├── .gitattributes ├── .github ├── actions-rs │ └── grcov.yml └── workflows │ ├── codecov.yaml │ ├── push.yaml │ └── release.yaml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── benches └── benchmarks.rs ├── out.test ├── src ├── cli.rs ├── compare.rs ├── file_io.rs ├── hash_functions.rs ├── hasher.rs ├── heed.rs ├── heed_codec.rs ├── lib.rs ├── main.rs ├── signature.rs ├── sketch.rs ├── sketcher.rs └── varintencoding.rs └── tests ├── hash_statistics.rs ├── sketching.rs └── testfiles ├── output.bin ├── short.fa ├── test.short.fa.sourmash_k33.sig ├── test.small.fa ├── test.small.fasta.sourmash_k33.sig └── testsketch.bin /.gitattributes: -------------------------------------------------------------------------------- 1 | *.fa -linguist-detectable 2 | *.fasta -linguist-detectable 3 | *.sig -linguist-detectable -------------------------------------------------------------------------------- /.github/actions-rs/grcov.yml: -------------------------------------------------------------------------------- 1 | output-type: lcov 2 | output-file: ./lcov.info -------------------------------------------------------------------------------- /.github/workflows/codecov.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: [main] 4 | pull_request: 5 | name: coverage 6 | jobs: 7 | test: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: arduino/setup-protoc@v1 11 | - uses: actions-rs/toolchain@v1 12 | with: 13 | toolchain: nightly 14 | override: true 15 | - uses: actions/checkout@v2 16 | with: 17 | submodules: recursive 18 | - uses: actions-rs/cargo@v1 19 | with: 20 | command: test 21 | args: --no-fail-fast -- --include-ignored 22 | env: 23 | CARGO_INCREMENTAL: '0' 24 | RUSTFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests' 25 | RUSTDOCFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests' 26 | - uses: actions-rs/grcov@v0.1.5 27 | - name: Upload to codecov.io 28 | uses: codecov/codecov-action@v3 -------------------------------------------------------------------------------- /.github/workflows/push.yaml: -------------------------------------------------------------------------------- 1 | name: test_build_push 2 | on: push 3 | jobs: 4 | build: # This does not build the whole proto files for production, it tests if rust / tonic 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v3 8 | - uses: dtolnay/rust-toolchain@nightly 9 | - run: cargo test -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: automatic_test_build_publish 2 | on: 3 | release: 4 | types: [created] 5 | workflow_dispatch: {} 6 | jobs: 7 | build: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout repository with submodules 11 | uses: actions/checkout@v3 12 | with: 13 | submodules: recursive 14 | - name: Cargo publish 15 | run: cargo publish --token ${CRATES_TOKEN} 16 | env: 17 | CRATES_TOKEN: ${{ secrets.CRATES_TOKEN }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /*.sketch 3 | /*.gz 4 | /*.fasta 5 | /*.fa 6 | /*.sig 7 | /test.* 8 | /test* 9 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "adler2" 7 | version = "2.0.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" 10 | 11 | [[package]] 12 | name = "aho-corasick" 13 | version = "1.1.2" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" 16 | dependencies = [ 17 | "memchr", 18 | ] 19 | 20 | [[package]] 21 | name = "aliasable" 22 | version = "0.1.3" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" 25 | 26 | [[package]] 27 | name = "android-tzdata" 28 | version = "0.1.1" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" 31 | 32 | [[package]] 33 | name = "android_system_properties" 34 | version = "0.1.5" 35 | source = "registry+https://github.com/rust-lang/crates.io-index" 36 | checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" 37 | dependencies = [ 38 | "libc", 39 | ] 40 | 41 | [[package]] 42 | name = "anes" 43 | version = "0.1.6" 44 | source = "registry+https://github.com/rust-lang/crates.io-index" 45 | checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" 46 | 47 | [[package]] 48 | name = "anstream" 49 | version = "0.6.15" 50 | source = "registry+https://github.com/rust-lang/crates.io-index" 51 | checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" 52 | dependencies = [ 53 | "anstyle", 54 | "anstyle-parse", 55 | "anstyle-query", 56 | "anstyle-wincon", 57 | "colorchoice", 58 | "is_terminal_polyfill", 59 | "utf8parse", 60 | ] 61 | 62 | [[package]] 63 | name = "anstyle" 64 | version = "1.0.8" 65 | source = "registry+https://github.com/rust-lang/crates.io-index" 66 | checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" 67 | 68 | [[package]] 69 | name = "anstyle-parse" 70 | version = "0.2.2" 71 | source = "registry+https://github.com/rust-lang/crates.io-index" 72 | checksum = "317b9a89c1868f5ea6ff1d9539a69f45dffc21ce321ac1fd1160dfa48c8e2140" 73 | dependencies = [ 74 | "utf8parse", 75 | ] 76 | 77 | [[package]] 78 | name = "anstyle-query" 79 | version = "1.0.0" 80 | source = "registry+https://github.com/rust-lang/crates.io-index" 81 | checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" 82 | dependencies = [ 83 | "windows-sys 0.48.0", 84 | ] 85 | 86 | [[package]] 87 | name = "anstyle-wincon" 88 | version = "3.0.1" 89 | source = "registry+https://github.com/rust-lang/crates.io-index" 90 | checksum = "f0699d10d2f4d628a98ee7b57b289abbc98ff3bad977cb3152709d4bf2330628" 91 | dependencies = [ 92 | "anstyle", 93 | "windows-sys 0.48.0", 94 | ] 95 | 96 | [[package]] 97 | name = "anyhow" 98 | version = "1.0.94" 99 | source = "registry+https://github.com/rust-lang/crates.io-index" 100 | checksum = "c1fd03a028ef38ba2276dce7e33fcd6369c158a1bca17946c4b1b701891c1ff7" 101 | 102 | [[package]] 103 | name = "approx" 104 | version = "0.5.1" 105 | source = "registry+https://github.com/rust-lang/crates.io-index" 106 | checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" 107 | dependencies = [ 108 | "num-traits", 109 | ] 110 | 111 | [[package]] 112 | name = "autocfg" 113 | version = "1.1.0" 114 | source = "registry+https://github.com/rust-lang/crates.io-index" 115 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 116 | 117 | [[package]] 118 | name = "az" 119 | version = "1.2.1" 120 | source = "registry+https://github.com/rust-lang/crates.io-index" 121 | checksum = "7b7e4c2464d97fe331d41de9d5db0def0a96f4d823b8b32a2efd503578988973" 122 | 123 | [[package]] 124 | name = "binary-merge" 125 | version = "0.1.2" 126 | source = "registry+https://github.com/rust-lang/crates.io-index" 127 | checksum = "597bb81c80a54b6a4381b23faba8d7774b144c94cbd1d6fe3f1329bd776554ab" 128 | 129 | [[package]] 130 | name = "bincode" 131 | version = "1.3.3" 132 | source = "registry+https://github.com/rust-lang/crates.io-index" 133 | checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" 134 | dependencies = [ 135 | "serde", 136 | ] 137 | 138 | [[package]] 139 | name = "bitflags" 140 | version = "2.6.0" 141 | source = "registry+https://github.com/rust-lang/crates.io-index" 142 | checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" 143 | dependencies = [ 144 | "serde", 145 | ] 146 | 147 | [[package]] 148 | name = "buffer-redux" 149 | version = "1.0.0" 150 | source = "registry+https://github.com/rust-lang/crates.io-index" 151 | checksum = "d2886ea01509598caac116942abd33ab5a88fa32acdf7e4abfa0fc489ca520c9" 152 | dependencies = [ 153 | "memchr", 154 | "safemem", 155 | ] 156 | 157 | [[package]] 158 | name = "bumpalo" 159 | version = "3.14.0" 160 | source = "registry+https://github.com/rust-lang/crates.io-index" 161 | checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" 162 | 163 | [[package]] 164 | name = "bytecount" 165 | version = "0.6.7" 166 | source = "registry+https://github.com/rust-lang/crates.io-index" 167 | checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" 168 | 169 | [[package]] 170 | name = "bytemuck" 171 | version = "1.20.0" 172 | source = "registry+https://github.com/rust-lang/crates.io-index" 173 | checksum = "8b37c88a63ffd85d15b406896cc343916d7cf57838a847b3a6f2ca5d39a5695a" 174 | 175 | [[package]] 176 | name = "byteorder" 177 | version = "1.5.0" 178 | source = "registry+https://github.com/rust-lang/crates.io-index" 179 | checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 180 | 181 | [[package]] 182 | name = "bzip2" 183 | version = "0.4.4" 184 | source = "registry+https://github.com/rust-lang/crates.io-index" 185 | checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" 186 | dependencies = [ 187 | "bzip2-sys", 188 | "libc", 189 | ] 190 | 191 | [[package]] 192 | name = "bzip2-sys" 193 | version = "0.1.11+1.0.8" 194 | source = "registry+https://github.com/rust-lang/crates.io-index" 195 | checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" 196 | dependencies = [ 197 | "cc", 198 | "libc", 199 | "pkg-config", 200 | ] 201 | 202 | [[package]] 203 | name = "camino" 204 | version = "1.1.9" 205 | source = "registry+https://github.com/rust-lang/crates.io-index" 206 | checksum = "8b96ec4966b5813e2c0507c1f86115c8c5abaadc3980879c3424042a02fd1ad3" 207 | dependencies = [ 208 | "serde", 209 | ] 210 | 211 | [[package]] 212 | name = "cast" 213 | version = "0.3.0" 214 | source = "registry+https://github.com/rust-lang/crates.io-index" 215 | checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" 216 | 217 | [[package]] 218 | name = "cc" 219 | version = "1.1.24" 220 | source = "registry+https://github.com/rust-lang/crates.io-index" 221 | checksum = "812acba72f0a070b003d3697490d2b55b837230ae7c6c6497f05cc2ddbb8d938" 222 | dependencies = [ 223 | "jobserver", 224 | "libc", 225 | "shlex", 226 | ] 227 | 228 | [[package]] 229 | name = "cfg-if" 230 | version = "1.0.0" 231 | source = "registry+https://github.com/rust-lang/crates.io-index" 232 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 233 | 234 | [[package]] 235 | name = "chrono" 236 | version = "0.4.38" 237 | source = "registry+https://github.com/rust-lang/crates.io-index" 238 | checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" 239 | dependencies = [ 240 | "android-tzdata", 241 | "iana-time-zone", 242 | "js-sys", 243 | "num-traits", 244 | "wasm-bindgen", 245 | "windows-targets 0.52.6", 246 | ] 247 | 248 | [[package]] 249 | name = "ciborium" 250 | version = "0.2.1" 251 | source = "registry+https://github.com/rust-lang/crates.io-index" 252 | checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" 253 | dependencies = [ 254 | "ciborium-io", 255 | "ciborium-ll", 256 | "serde", 257 | ] 258 | 259 | [[package]] 260 | name = "ciborium-io" 261 | version = "0.2.1" 262 | source = "registry+https://github.com/rust-lang/crates.io-index" 263 | checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" 264 | 265 | [[package]] 266 | name = "ciborium-ll" 267 | version = "0.2.1" 268 | source = "registry+https://github.com/rust-lang/crates.io-index" 269 | checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" 270 | dependencies = [ 271 | "ciborium-io", 272 | "half", 273 | ] 274 | 275 | [[package]] 276 | name = "clap" 277 | version = "4.5.23" 278 | source = "registry+https://github.com/rust-lang/crates.io-index" 279 | checksum = "3135e7ec2ef7b10c6ed8950f0f792ed96ee093fa088608f1c76e569722700c84" 280 | dependencies = [ 281 | "clap_builder", 282 | "clap_derive", 283 | ] 284 | 285 | [[package]] 286 | name = "clap_builder" 287 | version = "4.5.23" 288 | source = "registry+https://github.com/rust-lang/crates.io-index" 289 | checksum = "30582fc632330df2bd26877bde0c1f4470d57c582bbc070376afcd04d8cb4838" 290 | dependencies = [ 291 | "anstream", 292 | "anstyle", 293 | "clap_lex", 294 | "strsim", 295 | ] 296 | 297 | [[package]] 298 | name = "clap_derive" 299 | version = "4.5.18" 300 | source = "registry+https://github.com/rust-lang/crates.io-index" 301 | checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab" 302 | dependencies = [ 303 | "heck 0.5.0", 304 | "proc-macro2", 305 | "quote", 306 | "syn", 307 | ] 308 | 309 | [[package]] 310 | name = "clap_lex" 311 | version = "0.7.4" 312 | source = "registry+https://github.com/rust-lang/crates.io-index" 313 | checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" 314 | 315 | [[package]] 316 | name = "codepage-437" 317 | version = "0.1.0" 318 | source = "registry+https://github.com/rust-lang/crates.io-index" 319 | checksum = "e40c1169585d8d08e5675a39f2fc056cd19a258fc4cba5e3bbf4a9c1026de535" 320 | dependencies = [ 321 | "csv", 322 | ] 323 | 324 | [[package]] 325 | name = "colorchoice" 326 | version = "1.0.0" 327 | source = "registry+https://github.com/rust-lang/crates.io-index" 328 | checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" 329 | 330 | [[package]] 331 | name = "console" 332 | version = "0.15.10" 333 | source = "registry+https://github.com/rust-lang/crates.io-index" 334 | checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b" 335 | dependencies = [ 336 | "encode_unicode", 337 | "libc", 338 | "once_cell", 339 | "unicode-width", 340 | "windows-sys 0.59.0", 341 | ] 342 | 343 | [[package]] 344 | name = "core-foundation-sys" 345 | version = "0.8.7" 346 | source = "registry+https://github.com/rust-lang/crates.io-index" 347 | checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" 348 | 349 | [[package]] 350 | name = "counter" 351 | version = "0.6.0" 352 | source = "registry+https://github.com/rust-lang/crates.io-index" 353 | checksum = "f009fcafa949dc1fc46a762dae84d0c2687d3b550906b633c4979d58d2c6ae52" 354 | dependencies = [ 355 | "num-traits", 356 | ] 357 | 358 | [[package]] 359 | name = "crc32fast" 360 | version = "1.3.2" 361 | source = "registry+https://github.com/rust-lang/crates.io-index" 362 | checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" 363 | dependencies = [ 364 | "cfg-if", 365 | ] 366 | 367 | [[package]] 368 | name = "criterion" 369 | version = "0.5.1" 370 | source = "registry+https://github.com/rust-lang/crates.io-index" 371 | checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" 372 | dependencies = [ 373 | "anes", 374 | "cast", 375 | "ciborium", 376 | "clap", 377 | "criterion-plot", 378 | "is-terminal", 379 | "itertools 0.10.5", 380 | "num-traits", 381 | "once_cell", 382 | "oorandom", 383 | "plotters", 384 | "rayon", 385 | "regex", 386 | "serde", 387 | "serde_derive", 388 | "serde_json", 389 | "tinytemplate", 390 | "walkdir", 391 | ] 392 | 393 | [[package]] 394 | name = "criterion-plot" 395 | version = "0.5.0" 396 | source = "registry+https://github.com/rust-lang/crates.io-index" 397 | checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" 398 | dependencies = [ 399 | "cast", 400 | "itertools 0.10.5", 401 | ] 402 | 403 | [[package]] 404 | name = "crossbeam-deque" 405 | version = "0.8.3" 406 | source = "registry+https://github.com/rust-lang/crates.io-index" 407 | checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" 408 | dependencies = [ 409 | "cfg-if", 410 | "crossbeam-epoch", 411 | "crossbeam-utils", 412 | ] 413 | 414 | [[package]] 415 | name = "crossbeam-epoch" 416 | version = "0.9.15" 417 | source = "registry+https://github.com/rust-lang/crates.io-index" 418 | checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" 419 | dependencies = [ 420 | "autocfg", 421 | "cfg-if", 422 | "crossbeam-utils", 423 | "memoffset", 424 | "scopeguard", 425 | ] 426 | 427 | [[package]] 428 | name = "crossbeam-queue" 429 | version = "0.3.11" 430 | source = "registry+https://github.com/rust-lang/crates.io-index" 431 | checksum = "df0346b5d5e76ac2fe4e327c5fd1118d6be7c51dfb18f9b7922923f287471e35" 432 | dependencies = [ 433 | "crossbeam-utils", 434 | ] 435 | 436 | [[package]] 437 | name = "crossbeam-utils" 438 | version = "0.8.20" 439 | source = "registry+https://github.com/rust-lang/crates.io-index" 440 | checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" 441 | 442 | [[package]] 443 | name = "csv" 444 | version = "1.3.1" 445 | source = "registry+https://github.com/rust-lang/crates.io-index" 446 | checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" 447 | dependencies = [ 448 | "csv-core", 449 | "itoa", 450 | "ryu", 451 | "serde", 452 | ] 453 | 454 | [[package]] 455 | name = "csv-core" 456 | version = "0.1.11" 457 | source = "registry+https://github.com/rust-lang/crates.io-index" 458 | checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" 459 | dependencies = [ 460 | "memchr", 461 | ] 462 | 463 | [[package]] 464 | name = "displaydoc" 465 | version = "0.2.5" 466 | source = "registry+https://github.com/rust-lang/crates.io-index" 467 | checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" 468 | dependencies = [ 469 | "proc-macro2", 470 | "quote", 471 | "syn", 472 | ] 473 | 474 | [[package]] 475 | name = "doxygen-rs" 476 | version = "0.4.2" 477 | source = "registry+https://github.com/rust-lang/crates.io-index" 478 | checksum = "415b6ec780d34dcf624666747194393603d0373b7141eef01d12ee58881507d9" 479 | dependencies = [ 480 | "phf", 481 | ] 482 | 483 | [[package]] 484 | name = "either" 485 | version = "1.9.0" 486 | source = "registry+https://github.com/rust-lang/crates.io-index" 487 | checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" 488 | 489 | [[package]] 490 | name = "encode_unicode" 491 | version = "1.0.0" 492 | source = "registry+https://github.com/rust-lang/crates.io-index" 493 | checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" 494 | 495 | [[package]] 496 | name = "enum_dispatch" 497 | version = "0.3.13" 498 | source = "registry+https://github.com/rust-lang/crates.io-index" 499 | checksum = "aa18ce2bc66555b3218614519ac839ddb759a7d6720732f979ef8d13be147ecd" 500 | dependencies = [ 501 | "once_cell", 502 | "proc-macro2", 503 | "quote", 504 | "syn", 505 | ] 506 | 507 | [[package]] 508 | name = "errno" 509 | version = "0.3.7" 510 | source = "registry+https://github.com/rust-lang/crates.io-index" 511 | checksum = "f258a7194e7f7c2a7837a8913aeab7fd8c383457034fa20ce4dd3dcb813e8eb8" 512 | dependencies = [ 513 | "libc", 514 | "windows-sys 0.48.0", 515 | ] 516 | 517 | [[package]] 518 | name = "fastmurmur3" 519 | version = "0.2.0" 520 | source = "registry+https://github.com/rust-lang/crates.io-index" 521 | checksum = "2d7e9bc68be4cdabbb8938140b01a8b5bc1191937f2c7e7ecc2fcebbe2d749df" 522 | 523 | [[package]] 524 | name = "fixedbitset" 525 | version = "0.4.2" 526 | source = "registry+https://github.com/rust-lang/crates.io-index" 527 | checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" 528 | 529 | [[package]] 530 | name = "flate2" 531 | version = "1.0.35" 532 | source = "registry+https://github.com/rust-lang/crates.io-index" 533 | checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" 534 | dependencies = [ 535 | "crc32fast", 536 | "miniz_oxide", 537 | ] 538 | 539 | [[package]] 540 | name = "form_urlencoded" 541 | version = "1.2.1" 542 | source = "registry+https://github.com/rust-lang/crates.io-index" 543 | checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" 544 | dependencies = [ 545 | "percent-encoding", 546 | ] 547 | 548 | [[package]] 549 | name = "getrandom" 550 | version = "0.2.15" 551 | source = "registry+https://github.com/rust-lang/crates.io-index" 552 | checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" 553 | dependencies = [ 554 | "cfg-if", 555 | "js-sys", 556 | "libc", 557 | "wasi", 558 | "wasm-bindgen", 559 | ] 560 | 561 | [[package]] 562 | name = "getset" 563 | version = "0.1.3" 564 | source = "registry+https://github.com/rust-lang/crates.io-index" 565 | checksum = "f636605b743120a8d32ed92fc27b6cde1a769f8f936c065151eb66f88ded513c" 566 | dependencies = [ 567 | "proc-macro-error2", 568 | "proc-macro2", 569 | "quote", 570 | "syn", 571 | ] 572 | 573 | [[package]] 574 | name = "half" 575 | version = "1.8.2" 576 | source = "registry+https://github.com/rust-lang/crates.io-index" 577 | checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" 578 | 579 | [[package]] 580 | name = "heck" 581 | version = "0.4.1" 582 | source = "registry+https://github.com/rust-lang/crates.io-index" 583 | checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" 584 | 585 | [[package]] 586 | name = "heck" 587 | version = "0.5.0" 588 | source = "registry+https://github.com/rust-lang/crates.io-index" 589 | checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" 590 | 591 | [[package]] 592 | name = "heed" 593 | version = "0.21.0" 594 | source = "registry+https://github.com/rust-lang/crates.io-index" 595 | checksum = "bd54745cfacb7b97dee45e8fdb91814b62bccddb481debb7de0f9ee6b7bf5b43" 596 | dependencies = [ 597 | "bitflags", 598 | "byteorder", 599 | "heed-traits", 600 | "heed-types", 601 | "libc", 602 | "lmdb-master-sys", 603 | "once_cell", 604 | "page_size", 605 | "serde", 606 | "synchronoise", 607 | "url", 608 | ] 609 | 610 | [[package]] 611 | name = "heed-traits" 612 | version = "0.20.0" 613 | source = "registry+https://github.com/rust-lang/crates.io-index" 614 | checksum = "eb3130048d404c57ce5a1ac61a903696e8fcde7e8c2991e9fcfc1f27c3ef74ff" 615 | 616 | [[package]] 617 | name = "heed-types" 618 | version = "0.21.0" 619 | source = "registry+https://github.com/rust-lang/crates.io-index" 620 | checksum = "13c255bdf46e07fb840d120a36dcc81f385140d7191c76a7391672675c01a55d" 621 | dependencies = [ 622 | "bincode", 623 | "byteorder", 624 | "heed-traits", 625 | "serde", 626 | "serde_json", 627 | ] 628 | 629 | [[package]] 630 | name = "hermit-abi" 631 | version = "0.3.3" 632 | source = "registry+https://github.com/rust-lang/crates.io-index" 633 | checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" 634 | 635 | [[package]] 636 | name = "histogram" 637 | version = "0.11.1" 638 | source = "registry+https://github.com/rust-lang/crates.io-index" 639 | checksum = "58cf6b99a250776d813cdf2f0b478a053a822d078e7a2baf5cb36afc88c41a7c" 640 | dependencies = [ 641 | "thiserror 1.0.50", 642 | ] 643 | 644 | [[package]] 645 | name = "iana-time-zone" 646 | version = "0.1.60" 647 | source = "registry+https://github.com/rust-lang/crates.io-index" 648 | checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" 649 | dependencies = [ 650 | "android_system_properties", 651 | "core-foundation-sys", 652 | "iana-time-zone-haiku", 653 | "js-sys", 654 | "wasm-bindgen", 655 | "windows-core", 656 | ] 657 | 658 | [[package]] 659 | name = "iana-time-zone-haiku" 660 | version = "0.1.2" 661 | source = "registry+https://github.com/rust-lang/crates.io-index" 662 | checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" 663 | dependencies = [ 664 | "cc", 665 | ] 666 | 667 | [[package]] 668 | name = "icu_collections" 669 | version = "1.5.0" 670 | source = "registry+https://github.com/rust-lang/crates.io-index" 671 | checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" 672 | dependencies = [ 673 | "displaydoc", 674 | "yoke", 675 | "zerofrom", 676 | "zerovec", 677 | ] 678 | 679 | [[package]] 680 | name = "icu_locid" 681 | version = "1.5.0" 682 | source = "registry+https://github.com/rust-lang/crates.io-index" 683 | checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" 684 | dependencies = [ 685 | "displaydoc", 686 | "litemap", 687 | "tinystr", 688 | "writeable", 689 | "zerovec", 690 | ] 691 | 692 | [[package]] 693 | name = "icu_locid_transform" 694 | version = "1.5.0" 695 | source = "registry+https://github.com/rust-lang/crates.io-index" 696 | checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" 697 | dependencies = [ 698 | "displaydoc", 699 | "icu_locid", 700 | "icu_locid_transform_data", 701 | "icu_provider", 702 | "tinystr", 703 | "zerovec", 704 | ] 705 | 706 | [[package]] 707 | name = "icu_locid_transform_data" 708 | version = "1.5.0" 709 | source = "registry+https://github.com/rust-lang/crates.io-index" 710 | checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" 711 | 712 | [[package]] 713 | name = "icu_normalizer" 714 | version = "1.5.0" 715 | source = "registry+https://github.com/rust-lang/crates.io-index" 716 | checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" 717 | dependencies = [ 718 | "displaydoc", 719 | "icu_collections", 720 | "icu_normalizer_data", 721 | "icu_properties", 722 | "icu_provider", 723 | "smallvec", 724 | "utf16_iter", 725 | "utf8_iter", 726 | "write16", 727 | "zerovec", 728 | ] 729 | 730 | [[package]] 731 | name = "icu_normalizer_data" 732 | version = "1.5.0" 733 | source = "registry+https://github.com/rust-lang/crates.io-index" 734 | checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" 735 | 736 | [[package]] 737 | name = "icu_properties" 738 | version = "1.5.1" 739 | source = "registry+https://github.com/rust-lang/crates.io-index" 740 | checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" 741 | dependencies = [ 742 | "displaydoc", 743 | "icu_collections", 744 | "icu_locid_transform", 745 | "icu_properties_data", 746 | "icu_provider", 747 | "tinystr", 748 | "zerovec", 749 | ] 750 | 751 | [[package]] 752 | name = "icu_properties_data" 753 | version = "1.5.0" 754 | source = "registry+https://github.com/rust-lang/crates.io-index" 755 | checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" 756 | 757 | [[package]] 758 | name = "icu_provider" 759 | version = "1.5.0" 760 | source = "registry+https://github.com/rust-lang/crates.io-index" 761 | checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" 762 | dependencies = [ 763 | "displaydoc", 764 | "icu_locid", 765 | "icu_provider_macros", 766 | "stable_deref_trait", 767 | "tinystr", 768 | "writeable", 769 | "yoke", 770 | "zerofrom", 771 | "zerovec", 772 | ] 773 | 774 | [[package]] 775 | name = "icu_provider_macros" 776 | version = "1.5.0" 777 | source = "registry+https://github.com/rust-lang/crates.io-index" 778 | checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" 779 | dependencies = [ 780 | "proc-macro2", 781 | "quote", 782 | "syn", 783 | ] 784 | 785 | [[package]] 786 | name = "idna" 787 | version = "1.0.3" 788 | source = "registry+https://github.com/rust-lang/crates.io-index" 789 | checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" 790 | dependencies = [ 791 | "idna_adapter", 792 | "smallvec", 793 | "utf8_iter", 794 | ] 795 | 796 | [[package]] 797 | name = "idna_adapter" 798 | version = "1.2.0" 799 | source = "registry+https://github.com/rust-lang/crates.io-index" 800 | checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" 801 | dependencies = [ 802 | "icu_normalizer", 803 | "icu_properties", 804 | ] 805 | 806 | [[package]] 807 | name = "indicatif" 808 | version = "0.17.9" 809 | source = "registry+https://github.com/rust-lang/crates.io-index" 810 | checksum = "cbf675b85ed934d3c67b5c5469701eec7db22689d0a2139d856e0925fa28b281" 811 | dependencies = [ 812 | "console", 813 | "number_prefix", 814 | "portable-atomic", 815 | "rayon", 816 | "unicode-width", 817 | "web-time", 818 | ] 819 | 820 | [[package]] 821 | name = "inplace-vec-builder" 822 | version = "0.1.1" 823 | source = "registry+https://github.com/rust-lang/crates.io-index" 824 | checksum = "cf64c2edc8226891a71f127587a2861b132d2b942310843814d5001d99a1d307" 825 | dependencies = [ 826 | "smallvec", 827 | ] 828 | 829 | [[package]] 830 | name = "integer-encoding" 831 | version = "4.0.2" 832 | source = "registry+https://github.com/rust-lang/crates.io-index" 833 | checksum = "0d762194228a2f1c11063e46e32e5acb96e66e906382b9eb5441f2e0504bbd5a" 834 | 835 | [[package]] 836 | name = "is-terminal" 837 | version = "0.4.9" 838 | source = "registry+https://github.com/rust-lang/crates.io-index" 839 | checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" 840 | dependencies = [ 841 | "hermit-abi", 842 | "rustix", 843 | "windows-sys 0.48.0", 844 | ] 845 | 846 | [[package]] 847 | name = "is_terminal_polyfill" 848 | version = "1.70.1" 849 | source = "registry+https://github.com/rust-lang/crates.io-index" 850 | checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 851 | 852 | [[package]] 853 | name = "itertools" 854 | version = "0.10.5" 855 | source = "registry+https://github.com/rust-lang/crates.io-index" 856 | checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" 857 | dependencies = [ 858 | "either", 859 | ] 860 | 861 | [[package]] 862 | name = "itertools" 863 | version = "0.12.1" 864 | source = "registry+https://github.com/rust-lang/crates.io-index" 865 | checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" 866 | dependencies = [ 867 | "either", 868 | ] 869 | 870 | [[package]] 871 | name = "itertools" 872 | version = "0.13.0" 873 | source = "registry+https://github.com/rust-lang/crates.io-index" 874 | checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" 875 | dependencies = [ 876 | "either", 877 | ] 878 | 879 | [[package]] 880 | name = "itoa" 881 | version = "1.0.9" 882 | source = "registry+https://github.com/rust-lang/crates.io-index" 883 | checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" 884 | 885 | [[package]] 886 | name = "jam-rs" 887 | version = "0.2.0" 888 | dependencies = [ 889 | "anyhow", 890 | "bincode", 891 | "bytemuck", 892 | "byteorder", 893 | "clap", 894 | "criterion", 895 | "fastmurmur3", 896 | "flate2", 897 | "heed", 898 | "indicatif", 899 | "integer-encoding", 900 | "itertools 0.13.0", 901 | "murmurhash3", 902 | "needletail", 903 | "rayon", 904 | "serde", 905 | "serde_json", 906 | "sourmash", 907 | "xxhash-rust", 908 | ] 909 | 910 | [[package]] 911 | name = "jobserver" 912 | version = "0.1.32" 913 | source = "registry+https://github.com/rust-lang/crates.io-index" 914 | checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" 915 | dependencies = [ 916 | "libc", 917 | ] 918 | 919 | [[package]] 920 | name = "js-sys" 921 | version = "0.3.76" 922 | source = "registry+https://github.com/rust-lang/crates.io-index" 923 | checksum = "6717b6b5b077764fb5966237269cb3c64edddde4b14ce42647430a78ced9e7b7" 924 | dependencies = [ 925 | "once_cell", 926 | "wasm-bindgen", 927 | ] 928 | 929 | [[package]] 930 | name = "lazy_static" 931 | version = "1.5.0" 932 | source = "registry+https://github.com/rust-lang/crates.io-index" 933 | checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" 934 | 935 | [[package]] 936 | name = "libc" 937 | version = "0.2.168" 938 | source = "registry+https://github.com/rust-lang/crates.io-index" 939 | checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d" 940 | 941 | [[package]] 942 | name = "liblzma" 943 | version = "0.3.5" 944 | source = "registry+https://github.com/rust-lang/crates.io-index" 945 | checksum = "603222e049bf0da71529325ada5d02dc3871cbd3679cf905429f7f0de93da87b" 946 | dependencies = [ 947 | "liblzma-sys", 948 | ] 949 | 950 | [[package]] 951 | name = "liblzma-sys" 952 | version = "0.3.11" 953 | source = "registry+https://github.com/rust-lang/crates.io-index" 954 | checksum = "41e2171ce6827cbab9bc97238a58361bf9a526080475f21dbc470e1842258b2d" 955 | dependencies = [ 956 | "cc", 957 | "libc", 958 | "pkg-config", 959 | ] 960 | 961 | [[package]] 962 | name = "libm" 963 | version = "0.2.8" 964 | source = "registry+https://github.com/rust-lang/crates.io-index" 965 | checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" 966 | 967 | [[package]] 968 | name = "linux-raw-sys" 969 | version = "0.4.11" 970 | source = "registry+https://github.com/rust-lang/crates.io-index" 971 | checksum = "969488b55f8ac402214f3f5fd243ebb7206cf82de60d3172994707a4bcc2b829" 972 | 973 | [[package]] 974 | name = "litemap" 975 | version = "0.7.4" 976 | source = "registry+https://github.com/rust-lang/crates.io-index" 977 | checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" 978 | 979 | [[package]] 980 | name = "lmdb-master-sys" 981 | version = "0.2.4" 982 | source = "registry+https://github.com/rust-lang/crates.io-index" 983 | checksum = "472c3760e2a8d0f61f322fb36788021bb36d573c502b50fa3e2bcaac3ec326c9" 984 | dependencies = [ 985 | "cc", 986 | "doxygen-rs", 987 | "libc", 988 | ] 989 | 990 | [[package]] 991 | name = "log" 992 | version = "0.4.22" 993 | source = "registry+https://github.com/rust-lang/crates.io-index" 994 | checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" 995 | 996 | [[package]] 997 | name = "matrixmultiply" 998 | version = "0.3.9" 999 | source = "registry+https://github.com/rust-lang/crates.io-index" 1000 | checksum = "9380b911e3e96d10c1f415da0876389aaf1b56759054eeb0de7df940c456ba1a" 1001 | dependencies = [ 1002 | "autocfg", 1003 | "rawpointer", 1004 | ] 1005 | 1006 | [[package]] 1007 | name = "md5" 1008 | version = "0.7.0" 1009 | source = "registry+https://github.com/rust-lang/crates.io-index" 1010 | checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" 1011 | 1012 | [[package]] 1013 | name = "memchr" 1014 | version = "2.7.4" 1015 | source = "registry+https://github.com/rust-lang/crates.io-index" 1016 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 1017 | 1018 | [[package]] 1019 | name = "memmap2" 1020 | version = "0.9.5" 1021 | source = "registry+https://github.com/rust-lang/crates.io-index" 1022 | checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" 1023 | dependencies = [ 1024 | "libc", 1025 | ] 1026 | 1027 | [[package]] 1028 | name = "memoffset" 1029 | version = "0.9.0" 1030 | source = "registry+https://github.com/rust-lang/crates.io-index" 1031 | checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" 1032 | dependencies = [ 1033 | "autocfg", 1034 | ] 1035 | 1036 | [[package]] 1037 | name = "miniz_oxide" 1038 | version = "0.8.0" 1039 | source = "registry+https://github.com/rust-lang/crates.io-index" 1040 | checksum = "e2d80299ef12ff69b16a84bb182e3b9df68b5a91574d3d4fa6e41b65deec4df1" 1041 | dependencies = [ 1042 | "adler2", 1043 | ] 1044 | 1045 | [[package]] 1046 | name = "murmurhash3" 1047 | version = "0.0.5" 1048 | source = "registry+https://github.com/rust-lang/crates.io-index" 1049 | checksum = "a2983372caf4480544083767bf2d27defafe32af49ab4df3a0b7fc90793a3664" 1050 | 1051 | [[package]] 1052 | name = "nalgebra" 1053 | version = "0.32.6" 1054 | source = "registry+https://github.com/rust-lang/crates.io-index" 1055 | checksum = "7b5c17de023a86f59ed79891b2e5d5a94c705dbe904a5b5c9c952ea6221b03e4" 1056 | dependencies = [ 1057 | "approx", 1058 | "matrixmultiply", 1059 | "nalgebra-macros", 1060 | "num-complex", 1061 | "num-rational", 1062 | "num-traits", 1063 | "rand", 1064 | "rand_distr", 1065 | "simba", 1066 | "typenum", 1067 | ] 1068 | 1069 | [[package]] 1070 | name = "nalgebra-macros" 1071 | version = "0.2.2" 1072 | source = "registry+https://github.com/rust-lang/crates.io-index" 1073 | checksum = "254a5372af8fc138e36684761d3c0cdb758a4410e938babcff1c860ce14ddbfc" 1074 | dependencies = [ 1075 | "proc-macro2", 1076 | "quote", 1077 | "syn", 1078 | ] 1079 | 1080 | [[package]] 1081 | name = "needletail" 1082 | version = "0.6.1" 1083 | source = "registry+https://github.com/rust-lang/crates.io-index" 1084 | checksum = "de3de09e373770238e3d30eb1a9f09f4754134d0ef354d0570bc1203d2517257" 1085 | dependencies = [ 1086 | "buffer-redux", 1087 | "bytecount", 1088 | "bzip2", 1089 | "flate2", 1090 | "liblzma", 1091 | "memchr", 1092 | "zstd", 1093 | ] 1094 | 1095 | [[package]] 1096 | name = "niffler" 1097 | version = "2.5.0" 1098 | source = "registry+https://github.com/rust-lang/crates.io-index" 1099 | checksum = "470dd05a938a5ad42c2cb80ceea4255e275990ee530b86ca164e6d8a19fa407f" 1100 | dependencies = [ 1101 | "cfg-if", 1102 | "flate2", 1103 | "thiserror 1.0.50", 1104 | ] 1105 | 1106 | [[package]] 1107 | name = "nohash-hasher" 1108 | version = "0.2.0" 1109 | source = "registry+https://github.com/rust-lang/crates.io-index" 1110 | checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" 1111 | 1112 | [[package]] 1113 | name = "num-complex" 1114 | version = "0.4.6" 1115 | source = "registry+https://github.com/rust-lang/crates.io-index" 1116 | checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" 1117 | dependencies = [ 1118 | "num-traits", 1119 | ] 1120 | 1121 | [[package]] 1122 | name = "num-integer" 1123 | version = "0.1.46" 1124 | source = "registry+https://github.com/rust-lang/crates.io-index" 1125 | checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" 1126 | dependencies = [ 1127 | "num-traits", 1128 | ] 1129 | 1130 | [[package]] 1131 | name = "num-iter" 1132 | version = "0.1.45" 1133 | source = "registry+https://github.com/rust-lang/crates.io-index" 1134 | checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" 1135 | dependencies = [ 1136 | "autocfg", 1137 | "num-integer", 1138 | "num-traits", 1139 | ] 1140 | 1141 | [[package]] 1142 | name = "num-rational" 1143 | version = "0.4.2" 1144 | source = "registry+https://github.com/rust-lang/crates.io-index" 1145 | checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" 1146 | dependencies = [ 1147 | "num-integer", 1148 | "num-traits", 1149 | ] 1150 | 1151 | [[package]] 1152 | name = "num-traits" 1153 | version = "0.2.19" 1154 | source = "registry+https://github.com/rust-lang/crates.io-index" 1155 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" 1156 | dependencies = [ 1157 | "autocfg", 1158 | "libm", 1159 | ] 1160 | 1161 | [[package]] 1162 | name = "number_prefix" 1163 | version = "0.4.0" 1164 | source = "registry+https://github.com/rust-lang/crates.io-index" 1165 | checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" 1166 | 1167 | [[package]] 1168 | name = "once_cell" 1169 | version = "1.20.2" 1170 | source = "registry+https://github.com/rust-lang/crates.io-index" 1171 | checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" 1172 | 1173 | [[package]] 1174 | name = "oorandom" 1175 | version = "11.1.3" 1176 | source = "registry+https://github.com/rust-lang/crates.io-index" 1177 | checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" 1178 | 1179 | [[package]] 1180 | name = "ouroboros" 1181 | version = "0.18.4" 1182 | source = "registry+https://github.com/rust-lang/crates.io-index" 1183 | checksum = "944fa20996a25aded6b4795c6d63f10014a7a83f8be9828a11860b08c5fc4a67" 1184 | dependencies = [ 1185 | "aliasable", 1186 | "ouroboros_macro", 1187 | "static_assertions", 1188 | ] 1189 | 1190 | [[package]] 1191 | name = "ouroboros_macro" 1192 | version = "0.18.4" 1193 | source = "registry+https://github.com/rust-lang/crates.io-index" 1194 | checksum = "39b0deead1528fd0e5947a8546a9642a9777c25f6e1e26f34c97b204bbb465bd" 1195 | dependencies = [ 1196 | "heck 0.4.1", 1197 | "itertools 0.12.1", 1198 | "proc-macro2", 1199 | "proc-macro2-diagnostics", 1200 | "quote", 1201 | "syn", 1202 | ] 1203 | 1204 | [[package]] 1205 | name = "page_size" 1206 | version = "0.6.0" 1207 | source = "registry+https://github.com/rust-lang/crates.io-index" 1208 | checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" 1209 | dependencies = [ 1210 | "libc", 1211 | "winapi", 1212 | ] 1213 | 1214 | [[package]] 1215 | name = "paste" 1216 | version = "1.0.15" 1217 | source = "registry+https://github.com/rust-lang/crates.io-index" 1218 | checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" 1219 | 1220 | [[package]] 1221 | name = "percent-encoding" 1222 | version = "2.3.1" 1223 | source = "registry+https://github.com/rust-lang/crates.io-index" 1224 | checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" 1225 | 1226 | [[package]] 1227 | name = "phf" 1228 | version = "0.11.2" 1229 | source = "registry+https://github.com/rust-lang/crates.io-index" 1230 | checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" 1231 | dependencies = [ 1232 | "phf_macros", 1233 | "phf_shared", 1234 | ] 1235 | 1236 | [[package]] 1237 | name = "phf_generator" 1238 | version = "0.11.2" 1239 | source = "registry+https://github.com/rust-lang/crates.io-index" 1240 | checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" 1241 | dependencies = [ 1242 | "phf_shared", 1243 | "rand", 1244 | ] 1245 | 1246 | [[package]] 1247 | name = "phf_macros" 1248 | version = "0.11.2" 1249 | source = "registry+https://github.com/rust-lang/crates.io-index" 1250 | checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b" 1251 | dependencies = [ 1252 | "phf_generator", 1253 | "phf_shared", 1254 | "proc-macro2", 1255 | "quote", 1256 | "syn", 1257 | ] 1258 | 1259 | [[package]] 1260 | name = "phf_shared" 1261 | version = "0.11.2" 1262 | source = "registry+https://github.com/rust-lang/crates.io-index" 1263 | checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" 1264 | dependencies = [ 1265 | "siphasher", 1266 | ] 1267 | 1268 | [[package]] 1269 | name = "piz" 1270 | version = "0.5.1" 1271 | source = "registry+https://github.com/rust-lang/crates.io-index" 1272 | checksum = "898b071c1938a2c92b95c18708cbf38f2566a01f0ab9dd7bdf4329987e5c2e17" 1273 | dependencies = [ 1274 | "camino", 1275 | "chrono", 1276 | "codepage-437", 1277 | "crc32fast", 1278 | "flate2", 1279 | "log", 1280 | "memchr", 1281 | "thiserror 1.0.50", 1282 | ] 1283 | 1284 | [[package]] 1285 | name = "pkg-config" 1286 | version = "0.3.27" 1287 | source = "registry+https://github.com/rust-lang/crates.io-index" 1288 | checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" 1289 | 1290 | [[package]] 1291 | name = "plotters" 1292 | version = "0.3.5" 1293 | source = "registry+https://github.com/rust-lang/crates.io-index" 1294 | checksum = "d2c224ba00d7cadd4d5c660deaf2098e5e80e07846537c51f9cfa4be50c1fd45" 1295 | dependencies = [ 1296 | "num-traits", 1297 | "plotters-backend", 1298 | "plotters-svg", 1299 | "wasm-bindgen", 1300 | "web-sys", 1301 | ] 1302 | 1303 | [[package]] 1304 | name = "plotters-backend" 1305 | version = "0.3.5" 1306 | source = "registry+https://github.com/rust-lang/crates.io-index" 1307 | checksum = "9e76628b4d3a7581389a35d5b6e2139607ad7c75b17aed325f210aa91f4a9609" 1308 | 1309 | [[package]] 1310 | name = "plotters-svg" 1311 | version = "0.3.5" 1312 | source = "registry+https://github.com/rust-lang/crates.io-index" 1313 | checksum = "38f6d39893cca0701371e3c27294f09797214b86f1fb951b89ade8ec04e2abab" 1314 | dependencies = [ 1315 | "plotters-backend", 1316 | ] 1317 | 1318 | [[package]] 1319 | name = "portable-atomic" 1320 | version = "1.10.0" 1321 | source = "registry+https://github.com/rust-lang/crates.io-index" 1322 | checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" 1323 | 1324 | [[package]] 1325 | name = "ppv-lite86" 1326 | version = "0.2.20" 1327 | source = "registry+https://github.com/rust-lang/crates.io-index" 1328 | checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" 1329 | dependencies = [ 1330 | "zerocopy", 1331 | ] 1332 | 1333 | [[package]] 1334 | name = "primal-check" 1335 | version = "0.3.4" 1336 | source = "registry+https://github.com/rust-lang/crates.io-index" 1337 | checksum = "dc0d895b311e3af9902528fbb8f928688abbd95872819320517cc24ca6b2bd08" 1338 | dependencies = [ 1339 | "num-integer", 1340 | ] 1341 | 1342 | [[package]] 1343 | name = "proc-macro-error-attr2" 1344 | version = "2.0.0" 1345 | source = "registry+https://github.com/rust-lang/crates.io-index" 1346 | checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5" 1347 | dependencies = [ 1348 | "proc-macro2", 1349 | "quote", 1350 | ] 1351 | 1352 | [[package]] 1353 | name = "proc-macro-error2" 1354 | version = "2.0.1" 1355 | source = "registry+https://github.com/rust-lang/crates.io-index" 1356 | checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802" 1357 | dependencies = [ 1358 | "proc-macro-error-attr2", 1359 | "proc-macro2", 1360 | "quote", 1361 | "syn", 1362 | ] 1363 | 1364 | [[package]] 1365 | name = "proc-macro2" 1366 | version = "1.0.92" 1367 | source = "registry+https://github.com/rust-lang/crates.io-index" 1368 | checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" 1369 | dependencies = [ 1370 | "unicode-ident", 1371 | ] 1372 | 1373 | [[package]] 1374 | name = "proc-macro2-diagnostics" 1375 | version = "0.10.1" 1376 | source = "registry+https://github.com/rust-lang/crates.io-index" 1377 | checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" 1378 | dependencies = [ 1379 | "proc-macro2", 1380 | "quote", 1381 | "syn", 1382 | "version_check", 1383 | "yansi", 1384 | ] 1385 | 1386 | [[package]] 1387 | name = "quote" 1388 | version = "1.0.37" 1389 | source = "registry+https://github.com/rust-lang/crates.io-index" 1390 | checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" 1391 | dependencies = [ 1392 | "proc-macro2", 1393 | ] 1394 | 1395 | [[package]] 1396 | name = "rand" 1397 | version = "0.8.5" 1398 | source = "registry+https://github.com/rust-lang/crates.io-index" 1399 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 1400 | dependencies = [ 1401 | "libc", 1402 | "rand_chacha", 1403 | "rand_core", 1404 | ] 1405 | 1406 | [[package]] 1407 | name = "rand_chacha" 1408 | version = "0.3.1" 1409 | source = "registry+https://github.com/rust-lang/crates.io-index" 1410 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 1411 | dependencies = [ 1412 | "ppv-lite86", 1413 | "rand_core", 1414 | ] 1415 | 1416 | [[package]] 1417 | name = "rand_core" 1418 | version = "0.6.4" 1419 | source = "registry+https://github.com/rust-lang/crates.io-index" 1420 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 1421 | dependencies = [ 1422 | "getrandom", 1423 | ] 1424 | 1425 | [[package]] 1426 | name = "rand_distr" 1427 | version = "0.4.3" 1428 | source = "registry+https://github.com/rust-lang/crates.io-index" 1429 | checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" 1430 | dependencies = [ 1431 | "num-traits", 1432 | "rand", 1433 | ] 1434 | 1435 | [[package]] 1436 | name = "rawpointer" 1437 | version = "0.2.1" 1438 | source = "registry+https://github.com/rust-lang/crates.io-index" 1439 | checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" 1440 | 1441 | [[package]] 1442 | name = "rayon" 1443 | version = "1.10.0" 1444 | source = "registry+https://github.com/rust-lang/crates.io-index" 1445 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" 1446 | dependencies = [ 1447 | "either", 1448 | "rayon-core", 1449 | ] 1450 | 1451 | [[package]] 1452 | name = "rayon-core" 1453 | version = "1.12.1" 1454 | source = "registry+https://github.com/rust-lang/crates.io-index" 1455 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" 1456 | dependencies = [ 1457 | "crossbeam-deque", 1458 | "crossbeam-utils", 1459 | ] 1460 | 1461 | [[package]] 1462 | name = "regex" 1463 | version = "1.10.2" 1464 | source = "registry+https://github.com/rust-lang/crates.io-index" 1465 | checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" 1466 | dependencies = [ 1467 | "aho-corasick", 1468 | "memchr", 1469 | "regex-automata", 1470 | "regex-syntax", 1471 | ] 1472 | 1473 | [[package]] 1474 | name = "regex-automata" 1475 | version = "0.4.3" 1476 | source = "registry+https://github.com/rust-lang/crates.io-index" 1477 | checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" 1478 | dependencies = [ 1479 | "aho-corasick", 1480 | "memchr", 1481 | "regex-syntax", 1482 | ] 1483 | 1484 | [[package]] 1485 | name = "regex-syntax" 1486 | version = "0.8.2" 1487 | source = "registry+https://github.com/rust-lang/crates.io-index" 1488 | checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" 1489 | 1490 | [[package]] 1491 | name = "roaring" 1492 | version = "0.10.9" 1493 | source = "registry+https://github.com/rust-lang/crates.io-index" 1494 | checksum = "41589aba99537475bf697f2118357cad1c31590c5a1b9f6d9fc4ad6d07503661" 1495 | dependencies = [ 1496 | "bytemuck", 1497 | "byteorder", 1498 | ] 1499 | 1500 | [[package]] 1501 | name = "roots" 1502 | version = "0.0.8" 1503 | source = "registry+https://github.com/rust-lang/crates.io-index" 1504 | checksum = "082f11ffa03bbef6c2c6ea6bea1acafaade2fd9050ae0234ab44a2153742b058" 1505 | 1506 | [[package]] 1507 | name = "rustix" 1508 | version = "0.38.25" 1509 | source = "registry+https://github.com/rust-lang/crates.io-index" 1510 | checksum = "dc99bc2d4f1fed22595588a013687477aedf3cdcfb26558c559edb67b4d9b22e" 1511 | dependencies = [ 1512 | "bitflags", 1513 | "errno", 1514 | "libc", 1515 | "linux-raw-sys", 1516 | "windows-sys 0.48.0", 1517 | ] 1518 | 1519 | [[package]] 1520 | name = "ryu" 1521 | version = "1.0.15" 1522 | source = "registry+https://github.com/rust-lang/crates.io-index" 1523 | checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" 1524 | 1525 | [[package]] 1526 | name = "safe_arch" 1527 | version = "0.7.2" 1528 | source = "registry+https://github.com/rust-lang/crates.io-index" 1529 | checksum = "c3460605018fdc9612bce72735cba0d27efbcd9904780d44c7e3a9948f96148a" 1530 | dependencies = [ 1531 | "bytemuck", 1532 | ] 1533 | 1534 | [[package]] 1535 | name = "safemem" 1536 | version = "0.3.3" 1537 | source = "registry+https://github.com/rust-lang/crates.io-index" 1538 | checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" 1539 | 1540 | [[package]] 1541 | name = "same-file" 1542 | version = "1.0.6" 1543 | source = "registry+https://github.com/rust-lang/crates.io-index" 1544 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 1545 | dependencies = [ 1546 | "winapi-util", 1547 | ] 1548 | 1549 | [[package]] 1550 | name = "scopeguard" 1551 | version = "1.2.0" 1552 | source = "registry+https://github.com/rust-lang/crates.io-index" 1553 | checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" 1554 | 1555 | [[package]] 1556 | name = "serde" 1557 | version = "1.0.216" 1558 | source = "registry+https://github.com/rust-lang/crates.io-index" 1559 | checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e" 1560 | dependencies = [ 1561 | "serde_derive", 1562 | ] 1563 | 1564 | [[package]] 1565 | name = "serde_derive" 1566 | version = "1.0.216" 1567 | source = "registry+https://github.com/rust-lang/crates.io-index" 1568 | checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e" 1569 | dependencies = [ 1570 | "proc-macro2", 1571 | "quote", 1572 | "syn", 1573 | ] 1574 | 1575 | [[package]] 1576 | name = "serde_json" 1577 | version = "1.0.133" 1578 | source = "registry+https://github.com/rust-lang/crates.io-index" 1579 | checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" 1580 | dependencies = [ 1581 | "itoa", 1582 | "memchr", 1583 | "ryu", 1584 | "serde", 1585 | ] 1586 | 1587 | [[package]] 1588 | name = "shlex" 1589 | version = "1.3.0" 1590 | source = "registry+https://github.com/rust-lang/crates.io-index" 1591 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 1592 | 1593 | [[package]] 1594 | name = "simba" 1595 | version = "0.8.1" 1596 | source = "registry+https://github.com/rust-lang/crates.io-index" 1597 | checksum = "061507c94fc6ab4ba1c9a0305018408e312e17c041eb63bef8aa726fa33aceae" 1598 | dependencies = [ 1599 | "approx", 1600 | "num-complex", 1601 | "num-traits", 1602 | "paste", 1603 | "wide", 1604 | ] 1605 | 1606 | [[package]] 1607 | name = "siphasher" 1608 | version = "0.3.11" 1609 | source = "registry+https://github.com/rust-lang/crates.io-index" 1610 | checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" 1611 | 1612 | [[package]] 1613 | name = "smallvec" 1614 | version = "1.13.2" 1615 | source = "registry+https://github.com/rust-lang/crates.io-index" 1616 | checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" 1617 | 1618 | [[package]] 1619 | name = "sorted-iter" 1620 | version = "0.1.11" 1621 | source = "registry+https://github.com/rust-lang/crates.io-index" 1622 | checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" 1623 | 1624 | [[package]] 1625 | name = "sourmash" 1626 | version = "0.17.2" 1627 | source = "registry+https://github.com/rust-lang/crates.io-index" 1628 | checksum = "54e30f752d984b1d8456024973f8d89772b4ba248f592b77b57d59ad27a232a0" 1629 | dependencies = [ 1630 | "az", 1631 | "byteorder", 1632 | "camino", 1633 | "cfg-if", 1634 | "chrono", 1635 | "counter", 1636 | "csv", 1637 | "enum_dispatch", 1638 | "fixedbitset", 1639 | "getrandom", 1640 | "getset", 1641 | "histogram", 1642 | "itertools 0.13.0", 1643 | "js-sys", 1644 | "log", 1645 | "md5", 1646 | "memmap2", 1647 | "murmurhash3", 1648 | "needletail", 1649 | "niffler", 1650 | "nohash-hasher", 1651 | "num-iter", 1652 | "once_cell", 1653 | "ouroboros", 1654 | "piz", 1655 | "primal-check", 1656 | "roaring", 1657 | "roots", 1658 | "serde", 1659 | "serde_json", 1660 | "statrs", 1661 | "streaming-stats", 1662 | "thiserror 2.0.7", 1663 | "twox-hash", 1664 | "typed-builder", 1665 | "vec-collections", 1666 | "wasm-bindgen", 1667 | "web-sys", 1668 | ] 1669 | 1670 | [[package]] 1671 | name = "stable_deref_trait" 1672 | version = "1.2.0" 1673 | source = "registry+https://github.com/rust-lang/crates.io-index" 1674 | checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" 1675 | 1676 | [[package]] 1677 | name = "static_assertions" 1678 | version = "1.1.0" 1679 | source = "registry+https://github.com/rust-lang/crates.io-index" 1680 | checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" 1681 | 1682 | [[package]] 1683 | name = "statrs" 1684 | version = "0.17.1" 1685 | source = "registry+https://github.com/rust-lang/crates.io-index" 1686 | checksum = "f697a07e4606a0a25c044de247e583a330dbb1731d11bc7350b81f48ad567255" 1687 | dependencies = [ 1688 | "approx", 1689 | "nalgebra", 1690 | "num-traits", 1691 | "rand", 1692 | ] 1693 | 1694 | [[package]] 1695 | name = "streaming-stats" 1696 | version = "0.2.3" 1697 | source = "registry+https://github.com/rust-lang/crates.io-index" 1698 | checksum = "b0d670ce4e348a2081843569e0f79b21c99c91bb9028b3b3ecb0f050306de547" 1699 | dependencies = [ 1700 | "num-traits", 1701 | ] 1702 | 1703 | [[package]] 1704 | name = "strsim" 1705 | version = "0.11.1" 1706 | source = "registry+https://github.com/rust-lang/crates.io-index" 1707 | checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" 1708 | 1709 | [[package]] 1710 | name = "syn" 1711 | version = "2.0.90" 1712 | source = "registry+https://github.com/rust-lang/crates.io-index" 1713 | checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" 1714 | dependencies = [ 1715 | "proc-macro2", 1716 | "quote", 1717 | "unicode-ident", 1718 | ] 1719 | 1720 | [[package]] 1721 | name = "synchronoise" 1722 | version = "1.0.1" 1723 | source = "registry+https://github.com/rust-lang/crates.io-index" 1724 | checksum = "3dbc01390fc626ce8d1cffe3376ded2b72a11bb70e1c75f404a210e4daa4def2" 1725 | dependencies = [ 1726 | "crossbeam-queue", 1727 | ] 1728 | 1729 | [[package]] 1730 | name = "synstructure" 1731 | version = "0.13.1" 1732 | source = "registry+https://github.com/rust-lang/crates.io-index" 1733 | checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" 1734 | dependencies = [ 1735 | "proc-macro2", 1736 | "quote", 1737 | "syn", 1738 | ] 1739 | 1740 | [[package]] 1741 | name = "thiserror" 1742 | version = "1.0.50" 1743 | source = "registry+https://github.com/rust-lang/crates.io-index" 1744 | checksum = "f9a7210f5c9a7156bb50aa36aed4c95afb51df0df00713949448cf9e97d382d2" 1745 | dependencies = [ 1746 | "thiserror-impl 1.0.50", 1747 | ] 1748 | 1749 | [[package]] 1750 | name = "thiserror" 1751 | version = "2.0.7" 1752 | source = "registry+https://github.com/rust-lang/crates.io-index" 1753 | checksum = "93605438cbd668185516ab499d589afb7ee1859ea3d5fc8f6b0755e1c7443767" 1754 | dependencies = [ 1755 | "thiserror-impl 2.0.7", 1756 | ] 1757 | 1758 | [[package]] 1759 | name = "thiserror-impl" 1760 | version = "1.0.50" 1761 | source = "registry+https://github.com/rust-lang/crates.io-index" 1762 | checksum = "266b2e40bc00e5a6c09c3584011e08b06f123c00362c92b975ba9843aaaa14b8" 1763 | dependencies = [ 1764 | "proc-macro2", 1765 | "quote", 1766 | "syn", 1767 | ] 1768 | 1769 | [[package]] 1770 | name = "thiserror-impl" 1771 | version = "2.0.7" 1772 | source = "registry+https://github.com/rust-lang/crates.io-index" 1773 | checksum = "e1d8749b4531af2117677a5fcd12b1348a3fe2b81e36e61ffeac5c4aa3273e36" 1774 | dependencies = [ 1775 | "proc-macro2", 1776 | "quote", 1777 | "syn", 1778 | ] 1779 | 1780 | [[package]] 1781 | name = "tinystr" 1782 | version = "0.7.6" 1783 | source = "registry+https://github.com/rust-lang/crates.io-index" 1784 | checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" 1785 | dependencies = [ 1786 | "displaydoc", 1787 | "zerovec", 1788 | ] 1789 | 1790 | [[package]] 1791 | name = "tinytemplate" 1792 | version = "1.2.1" 1793 | source = "registry+https://github.com/rust-lang/crates.io-index" 1794 | checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" 1795 | dependencies = [ 1796 | "serde", 1797 | "serde_json", 1798 | ] 1799 | 1800 | [[package]] 1801 | name = "twox-hash" 1802 | version = "1.6.3" 1803 | source = "registry+https://github.com/rust-lang/crates.io-index" 1804 | checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" 1805 | dependencies = [ 1806 | "cfg-if", 1807 | "rand", 1808 | "static_assertions", 1809 | ] 1810 | 1811 | [[package]] 1812 | name = "typed-builder" 1813 | version = "0.18.2" 1814 | source = "registry+https://github.com/rust-lang/crates.io-index" 1815 | checksum = "77739c880e00693faef3d65ea3aad725f196da38b22fdc7ea6ded6e1ce4d3add" 1816 | dependencies = [ 1817 | "typed-builder-macro", 1818 | ] 1819 | 1820 | [[package]] 1821 | name = "typed-builder-macro" 1822 | version = "0.18.2" 1823 | source = "registry+https://github.com/rust-lang/crates.io-index" 1824 | checksum = "1f718dfaf347dcb5b983bfc87608144b0bad87970aebcbea5ce44d2a30c08e63" 1825 | dependencies = [ 1826 | "proc-macro2", 1827 | "quote", 1828 | "syn", 1829 | ] 1830 | 1831 | [[package]] 1832 | name = "typenum" 1833 | version = "1.17.0" 1834 | source = "registry+https://github.com/rust-lang/crates.io-index" 1835 | checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" 1836 | 1837 | [[package]] 1838 | name = "unicode-ident" 1839 | version = "1.0.12" 1840 | source = "registry+https://github.com/rust-lang/crates.io-index" 1841 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 1842 | 1843 | [[package]] 1844 | name = "unicode-width" 1845 | version = "0.2.0" 1846 | source = "registry+https://github.com/rust-lang/crates.io-index" 1847 | checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" 1848 | 1849 | [[package]] 1850 | name = "url" 1851 | version = "2.5.4" 1852 | source = "registry+https://github.com/rust-lang/crates.io-index" 1853 | checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" 1854 | dependencies = [ 1855 | "form_urlencoded", 1856 | "idna", 1857 | "percent-encoding", 1858 | ] 1859 | 1860 | [[package]] 1861 | name = "utf16_iter" 1862 | version = "1.0.5" 1863 | source = "registry+https://github.com/rust-lang/crates.io-index" 1864 | checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" 1865 | 1866 | [[package]] 1867 | name = "utf8_iter" 1868 | version = "1.0.4" 1869 | source = "registry+https://github.com/rust-lang/crates.io-index" 1870 | checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" 1871 | 1872 | [[package]] 1873 | name = "utf8parse" 1874 | version = "0.2.1" 1875 | source = "registry+https://github.com/rust-lang/crates.io-index" 1876 | checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" 1877 | 1878 | [[package]] 1879 | name = "vec-collections" 1880 | version = "0.4.3" 1881 | source = "registry+https://github.com/rust-lang/crates.io-index" 1882 | checksum = "3c9965c8f2ffed1dbcd16cafe18a009642f540fa22661c6cfd6309ddb02e4982" 1883 | dependencies = [ 1884 | "binary-merge", 1885 | "inplace-vec-builder", 1886 | "lazy_static", 1887 | "num-traits", 1888 | "serde", 1889 | "smallvec", 1890 | "sorted-iter", 1891 | ] 1892 | 1893 | [[package]] 1894 | name = "version_check" 1895 | version = "0.9.4" 1896 | source = "registry+https://github.com/rust-lang/crates.io-index" 1897 | checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 1898 | 1899 | [[package]] 1900 | name = "walkdir" 1901 | version = "2.4.0" 1902 | source = "registry+https://github.com/rust-lang/crates.io-index" 1903 | checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" 1904 | dependencies = [ 1905 | "same-file", 1906 | "winapi-util", 1907 | ] 1908 | 1909 | [[package]] 1910 | name = "wasi" 1911 | version = "0.11.0+wasi-snapshot-preview1" 1912 | source = "registry+https://github.com/rust-lang/crates.io-index" 1913 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 1914 | 1915 | [[package]] 1916 | name = "wasm-bindgen" 1917 | version = "0.2.99" 1918 | source = "registry+https://github.com/rust-lang/crates.io-index" 1919 | checksum = "a474f6281d1d70c17ae7aa6a613c87fce69a127e2624002df63dcb39d6cf6396" 1920 | dependencies = [ 1921 | "cfg-if", 1922 | "once_cell", 1923 | "wasm-bindgen-macro", 1924 | ] 1925 | 1926 | [[package]] 1927 | name = "wasm-bindgen-backend" 1928 | version = "0.2.99" 1929 | source = "registry+https://github.com/rust-lang/crates.io-index" 1930 | checksum = "5f89bb38646b4f81674e8f5c3fb81b562be1fd936d84320f3264486418519c79" 1931 | dependencies = [ 1932 | "bumpalo", 1933 | "log", 1934 | "proc-macro2", 1935 | "quote", 1936 | "syn", 1937 | "wasm-bindgen-shared", 1938 | ] 1939 | 1940 | [[package]] 1941 | name = "wasm-bindgen-macro" 1942 | version = "0.2.99" 1943 | source = "registry+https://github.com/rust-lang/crates.io-index" 1944 | checksum = "2cc6181fd9a7492eef6fef1f33961e3695e4579b9872a6f7c83aee556666d4fe" 1945 | dependencies = [ 1946 | "quote", 1947 | "wasm-bindgen-macro-support", 1948 | ] 1949 | 1950 | [[package]] 1951 | name = "wasm-bindgen-macro-support" 1952 | version = "0.2.99" 1953 | source = "registry+https://github.com/rust-lang/crates.io-index" 1954 | checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" 1955 | dependencies = [ 1956 | "proc-macro2", 1957 | "quote", 1958 | "syn", 1959 | "wasm-bindgen-backend", 1960 | "wasm-bindgen-shared", 1961 | ] 1962 | 1963 | [[package]] 1964 | name = "wasm-bindgen-shared" 1965 | version = "0.2.99" 1966 | source = "registry+https://github.com/rust-lang/crates.io-index" 1967 | checksum = "943aab3fdaaa029a6e0271b35ea10b72b943135afe9bffca82384098ad0e06a6" 1968 | 1969 | [[package]] 1970 | name = "web-sys" 1971 | version = "0.3.76" 1972 | source = "registry+https://github.com/rust-lang/crates.io-index" 1973 | checksum = "04dd7223427d52553d3702c004d3b2fe07c148165faa56313cb00211e31c12bc" 1974 | dependencies = [ 1975 | "js-sys", 1976 | "wasm-bindgen", 1977 | ] 1978 | 1979 | [[package]] 1980 | name = "web-time" 1981 | version = "1.1.0" 1982 | source = "registry+https://github.com/rust-lang/crates.io-index" 1983 | checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" 1984 | dependencies = [ 1985 | "js-sys", 1986 | "wasm-bindgen", 1987 | ] 1988 | 1989 | [[package]] 1990 | name = "wide" 1991 | version = "0.7.28" 1992 | source = "registry+https://github.com/rust-lang/crates.io-index" 1993 | checksum = "b828f995bf1e9622031f8009f8481a85406ce1f4d4588ff746d872043e855690" 1994 | dependencies = [ 1995 | "bytemuck", 1996 | "safe_arch", 1997 | ] 1998 | 1999 | [[package]] 2000 | name = "winapi" 2001 | version = "0.3.9" 2002 | source = "registry+https://github.com/rust-lang/crates.io-index" 2003 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 2004 | dependencies = [ 2005 | "winapi-i686-pc-windows-gnu", 2006 | "winapi-x86_64-pc-windows-gnu", 2007 | ] 2008 | 2009 | [[package]] 2010 | name = "winapi-i686-pc-windows-gnu" 2011 | version = "0.4.0" 2012 | source = "registry+https://github.com/rust-lang/crates.io-index" 2013 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 2014 | 2015 | [[package]] 2016 | name = "winapi-util" 2017 | version = "0.1.6" 2018 | source = "registry+https://github.com/rust-lang/crates.io-index" 2019 | checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" 2020 | dependencies = [ 2021 | "winapi", 2022 | ] 2023 | 2024 | [[package]] 2025 | name = "winapi-x86_64-pc-windows-gnu" 2026 | version = "0.4.0" 2027 | source = "registry+https://github.com/rust-lang/crates.io-index" 2028 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 2029 | 2030 | [[package]] 2031 | name = "windows-core" 2032 | version = "0.52.0" 2033 | source = "registry+https://github.com/rust-lang/crates.io-index" 2034 | checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" 2035 | dependencies = [ 2036 | "windows-targets 0.52.6", 2037 | ] 2038 | 2039 | [[package]] 2040 | name = "windows-sys" 2041 | version = "0.48.0" 2042 | source = "registry+https://github.com/rust-lang/crates.io-index" 2043 | checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" 2044 | dependencies = [ 2045 | "windows-targets 0.48.5", 2046 | ] 2047 | 2048 | [[package]] 2049 | name = "windows-sys" 2050 | version = "0.59.0" 2051 | source = "registry+https://github.com/rust-lang/crates.io-index" 2052 | checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 2053 | dependencies = [ 2054 | "windows-targets 0.52.6", 2055 | ] 2056 | 2057 | [[package]] 2058 | name = "windows-targets" 2059 | version = "0.48.5" 2060 | source = "registry+https://github.com/rust-lang/crates.io-index" 2061 | checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" 2062 | dependencies = [ 2063 | "windows_aarch64_gnullvm 0.48.5", 2064 | "windows_aarch64_msvc 0.48.5", 2065 | "windows_i686_gnu 0.48.5", 2066 | "windows_i686_msvc 0.48.5", 2067 | "windows_x86_64_gnu 0.48.5", 2068 | "windows_x86_64_gnullvm 0.48.5", 2069 | "windows_x86_64_msvc 0.48.5", 2070 | ] 2071 | 2072 | [[package]] 2073 | name = "windows-targets" 2074 | version = "0.52.6" 2075 | source = "registry+https://github.com/rust-lang/crates.io-index" 2076 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 2077 | dependencies = [ 2078 | "windows_aarch64_gnullvm 0.52.6", 2079 | "windows_aarch64_msvc 0.52.6", 2080 | "windows_i686_gnu 0.52.6", 2081 | "windows_i686_gnullvm", 2082 | "windows_i686_msvc 0.52.6", 2083 | "windows_x86_64_gnu 0.52.6", 2084 | "windows_x86_64_gnullvm 0.52.6", 2085 | "windows_x86_64_msvc 0.52.6", 2086 | ] 2087 | 2088 | [[package]] 2089 | name = "windows_aarch64_gnullvm" 2090 | version = "0.48.5" 2091 | source = "registry+https://github.com/rust-lang/crates.io-index" 2092 | checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" 2093 | 2094 | [[package]] 2095 | name = "windows_aarch64_gnullvm" 2096 | version = "0.52.6" 2097 | source = "registry+https://github.com/rust-lang/crates.io-index" 2098 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 2099 | 2100 | [[package]] 2101 | name = "windows_aarch64_msvc" 2102 | version = "0.48.5" 2103 | source = "registry+https://github.com/rust-lang/crates.io-index" 2104 | checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" 2105 | 2106 | [[package]] 2107 | name = "windows_aarch64_msvc" 2108 | version = "0.52.6" 2109 | source = "registry+https://github.com/rust-lang/crates.io-index" 2110 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 2111 | 2112 | [[package]] 2113 | name = "windows_i686_gnu" 2114 | version = "0.48.5" 2115 | source = "registry+https://github.com/rust-lang/crates.io-index" 2116 | checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" 2117 | 2118 | [[package]] 2119 | name = "windows_i686_gnu" 2120 | version = "0.52.6" 2121 | source = "registry+https://github.com/rust-lang/crates.io-index" 2122 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 2123 | 2124 | [[package]] 2125 | name = "windows_i686_gnullvm" 2126 | version = "0.52.6" 2127 | source = "registry+https://github.com/rust-lang/crates.io-index" 2128 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 2129 | 2130 | [[package]] 2131 | name = "windows_i686_msvc" 2132 | version = "0.48.5" 2133 | source = "registry+https://github.com/rust-lang/crates.io-index" 2134 | checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" 2135 | 2136 | [[package]] 2137 | name = "windows_i686_msvc" 2138 | version = "0.52.6" 2139 | source = "registry+https://github.com/rust-lang/crates.io-index" 2140 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 2141 | 2142 | [[package]] 2143 | name = "windows_x86_64_gnu" 2144 | version = "0.48.5" 2145 | source = "registry+https://github.com/rust-lang/crates.io-index" 2146 | checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" 2147 | 2148 | [[package]] 2149 | name = "windows_x86_64_gnu" 2150 | version = "0.52.6" 2151 | source = "registry+https://github.com/rust-lang/crates.io-index" 2152 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 2153 | 2154 | [[package]] 2155 | name = "windows_x86_64_gnullvm" 2156 | version = "0.48.5" 2157 | source = "registry+https://github.com/rust-lang/crates.io-index" 2158 | checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" 2159 | 2160 | [[package]] 2161 | name = "windows_x86_64_gnullvm" 2162 | version = "0.52.6" 2163 | source = "registry+https://github.com/rust-lang/crates.io-index" 2164 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 2165 | 2166 | [[package]] 2167 | name = "windows_x86_64_msvc" 2168 | version = "0.48.5" 2169 | source = "registry+https://github.com/rust-lang/crates.io-index" 2170 | checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" 2171 | 2172 | [[package]] 2173 | name = "windows_x86_64_msvc" 2174 | version = "0.52.6" 2175 | source = "registry+https://github.com/rust-lang/crates.io-index" 2176 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 2177 | 2178 | [[package]] 2179 | name = "write16" 2180 | version = "1.0.0" 2181 | source = "registry+https://github.com/rust-lang/crates.io-index" 2182 | checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" 2183 | 2184 | [[package]] 2185 | name = "writeable" 2186 | version = "0.5.5" 2187 | source = "registry+https://github.com/rust-lang/crates.io-index" 2188 | checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" 2189 | 2190 | [[package]] 2191 | name = "xxhash-rust" 2192 | version = "0.8.12" 2193 | source = "registry+https://github.com/rust-lang/crates.io-index" 2194 | checksum = "6a5cbf750400958819fb6178eaa83bee5cd9c29a26a40cc241df8c70fdd46984" 2195 | 2196 | [[package]] 2197 | name = "yansi" 2198 | version = "1.0.1" 2199 | source = "registry+https://github.com/rust-lang/crates.io-index" 2200 | checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" 2201 | 2202 | [[package]] 2203 | name = "yoke" 2204 | version = "0.7.5" 2205 | source = "registry+https://github.com/rust-lang/crates.io-index" 2206 | checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" 2207 | dependencies = [ 2208 | "serde", 2209 | "stable_deref_trait", 2210 | "yoke-derive", 2211 | "zerofrom", 2212 | ] 2213 | 2214 | [[package]] 2215 | name = "yoke-derive" 2216 | version = "0.7.5" 2217 | source = "registry+https://github.com/rust-lang/crates.io-index" 2218 | checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" 2219 | dependencies = [ 2220 | "proc-macro2", 2221 | "quote", 2222 | "syn", 2223 | "synstructure", 2224 | ] 2225 | 2226 | [[package]] 2227 | name = "zerocopy" 2228 | version = "0.7.35" 2229 | source = "registry+https://github.com/rust-lang/crates.io-index" 2230 | checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" 2231 | dependencies = [ 2232 | "byteorder", 2233 | "zerocopy-derive", 2234 | ] 2235 | 2236 | [[package]] 2237 | name = "zerocopy-derive" 2238 | version = "0.7.35" 2239 | source = "registry+https://github.com/rust-lang/crates.io-index" 2240 | checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" 2241 | dependencies = [ 2242 | "proc-macro2", 2243 | "quote", 2244 | "syn", 2245 | ] 2246 | 2247 | [[package]] 2248 | name = "zerofrom" 2249 | version = "0.1.5" 2250 | source = "registry+https://github.com/rust-lang/crates.io-index" 2251 | checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" 2252 | dependencies = [ 2253 | "zerofrom-derive", 2254 | ] 2255 | 2256 | [[package]] 2257 | name = "zerofrom-derive" 2258 | version = "0.1.5" 2259 | source = "registry+https://github.com/rust-lang/crates.io-index" 2260 | checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" 2261 | dependencies = [ 2262 | "proc-macro2", 2263 | "quote", 2264 | "syn", 2265 | "synstructure", 2266 | ] 2267 | 2268 | [[package]] 2269 | name = "zerovec" 2270 | version = "0.10.4" 2271 | source = "registry+https://github.com/rust-lang/crates.io-index" 2272 | checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" 2273 | dependencies = [ 2274 | "yoke", 2275 | "zerofrom", 2276 | "zerovec-derive", 2277 | ] 2278 | 2279 | [[package]] 2280 | name = "zerovec-derive" 2281 | version = "0.10.3" 2282 | source = "registry+https://github.com/rust-lang/crates.io-index" 2283 | checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" 2284 | dependencies = [ 2285 | "proc-macro2", 2286 | "quote", 2287 | "syn", 2288 | ] 2289 | 2290 | [[package]] 2291 | name = "zstd" 2292 | version = "0.13.2" 2293 | source = "registry+https://github.com/rust-lang/crates.io-index" 2294 | checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9" 2295 | dependencies = [ 2296 | "zstd-safe", 2297 | ] 2298 | 2299 | [[package]] 2300 | name = "zstd-safe" 2301 | version = "7.2.1" 2302 | source = "registry+https://github.com/rust-lang/crates.io-index" 2303 | checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" 2304 | dependencies = [ 2305 | "zstd-sys", 2306 | ] 2307 | 2308 | [[package]] 2309 | name = "zstd-sys" 2310 | version = "2.0.13+zstd.1.5.6" 2311 | source = "registry+https://github.com/rust-lang/crates.io-index" 2312 | checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" 2313 | dependencies = [ 2314 | "cc", 2315 | "pkg-config", 2316 | ] 2317 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "jam-rs" 3 | version = "0.2.0" 4 | edition = "2021" 5 | repository = "https://github.com/St4NNi/jam-rs" 6 | license = "MIT" 7 | authors = ["Sebastian Beyvers "] 8 | description = "Just another (genomic) minhash (Jam) implementation in Rust" 9 | keywords = ["minhash", "metagenomics", "bioinformatics", "containment", "jaccard"] 10 | categories = ["command-line-utilities", "science", "algorithms"] 11 | exclude=["/tests"] 12 | readme = "README.md" 13 | 14 | 15 | [[bin]] 16 | name = "jam" 17 | path = "src/main.rs" 18 | 19 | [lib] 20 | name = "jam_rs" 21 | path = "src/lib.rs" 22 | 23 | [dependencies] 24 | anyhow = "1.0.94" 25 | bincode = "1.3.3" 26 | flate2 = "1.0.35" 27 | needletail = "0.6.1" 28 | rayon = "1.10.0" 29 | xxhash-rust = { version = "0.8.12", features = ["xxh3"]} 30 | bytemuck = "1.20.0" 31 | serde = { version = "1.0.216", features = ["derive"] } 32 | clap = { version = "4.5.23", features = ["derive"] } 33 | fastmurmur3 = {version = "0.2.0"} 34 | sourmash = "0.17.2" 35 | serde_json = "1.0.133" 36 | itertools = "0.13.0" 37 | heed = "0.21.0" 38 | byteorder = "1.5.0" 39 | integer-encoding = "4.0.2" 40 | indicatif = { version = "0.17.9", features = ["rayon"] } 41 | 42 | [dev-dependencies] 43 | criterion = { version = "0.5.1", features = ["html_reports"] } 44 | murmurhash3 = {version = "0.0.5"} 45 | 46 | [[bench]] 47 | name = "benchmarks" 48 | harness = false 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Sebastian Beyvers 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Rust](https://img.shields.io/badge/built_with-Rust-dca282.svg)](https://www.rust-lang.org/) 2 | [![License](https://img.shields.io/badge/License-MIT-brightgreen.svg)](https://github.com/St4NNi/jam-rs/blob/main/LICENSE) 3 | [![Crates.io](https://img.shields.io/crates/v/jam-rs.svg)](https://crates.io/crates/jam-rs) 4 | [![Codecov](https://codecov.io/github/St4NNi/jam-rs/coverage.svg?branch=main)](https://codecov.io/gh/St4NNi/jam-rs) 5 | [![Dependency status](https://deps.rs/repo/github/St4NNi/jam-rs/status.svg)](https://deps.rs/repo/github/St4NNi/jam-rs) 6 | # jam-rs 7 | 8 | Just another minhash (jam) implementation. A high performance minhash variant to screen extremely large (metagenomic) datasets in a very short timeframe. 9 | Implements parts of the ScaledMinHash / FracMinHash algorithm described in [sourmash](https://joss.theoj.org/papers/10.21105/joss.00027). 10 | 11 | Unlike traditional implementations like [sourmash](https://joss.theoj.org/papers/10.21105/joss.00027) or [mash](https://doi.org/10.1186/s13059-016-0997-x) this version tries to focus on estimating the containment of small sequences in large sets by (optionally) introducing an intentional bias towards smaller sequences and enforcing a certain frequence. This is intended to be used to screen terabytes of data in just a few seconds / minutes. 12 | 13 | ### Installation 14 | 15 | A pre-release is published via [crates.io](https://crates.io/) to install it use (you need to have `cargo` and the `rust-toolchain` installed, the easiest way is via [rustup.rs](https://rustup.rs/)): 16 | 17 | ```bash 18 | cargo install jam-rs@0.1.0-beta.2 19 | ``` 20 | 21 | If you want the bleeding edge development release you can install it via git: 22 | 23 | ```bash 24 | cargo install --git https://github.com/St4NNi/jam-rs 25 | ``` 26 | 27 | ### Comparison 28 | 29 | - Multiple algorithms: [xxhash3](https://github.com/DoumanAsh/xxhash-rust), [ahash-fallback](https://github.com/tkaitchuck/aHash/wiki/AHash-fallback-algorithm) (for kmer < 32) and legacy [murmurhash3](https://github.com/mhallin/murmurhash3-rs) 30 | - Additional filter and sketching options to increase for specificity and sensitivity for small sequences in collections of large assembled metagenomes 31 | - Sketch to a memory mapped database including additional metadata 32 | 33 | ### Scaling methods 34 | 35 | Multiple different scaling methods: 36 | - FracMinHash (`fscale`): Restricts the hash-space to a (lower) maximum fraction of `u64::MAX` / `fscale` 37 | - Bias scaling, introduce a bias towards smaller sequences, this increases the Database size but ensures that smaller sequences are better covered 38 | 39 | If `KmerCountScaling` and `MinMaxAbsoluteScaling` are used together the minimum number of hashes (per sequence record) will be guaranteed. `FracMinHash` and `KmerCountScaling` produce similar results, the first is mainly provided for sourmash compatibility. 40 | 41 | ### Usage 42 | 43 | ```console 44 | $ jam 45 | Just another (genomic) minhasher (jam), obviously blazingly fast 46 | 47 | Usage: jam [OPTIONS] 48 | 49 | Commands: 50 | sketch Sketch one or more files and write result to output file (or stdout) 51 | merge Merge multiple input sketches into a single sketch 52 | dist Estimate distance of a (small) sketch against a subset of one or more sketches as database. Requires all sketches to have the same kmer size 53 | help Print this message or the help of the given subcommand(s) 54 | 55 | Options: 56 | -t, --threads Number of threads to use [default: 1] 57 | -f, --force Overwrite output files 58 | -h, --help Print help (see more with '--help') 59 | -V, --version Print version 60 | ``` 61 | 62 | #### Sketching 63 | 64 | The easiest way to sketch files is to use the `jam sketch` command. This accepts one or more input files (fastx / fastx.gz) or a `.list` file with a full list of input files. And sketches all inputs to a specific outpuf sketch file. 65 | 66 | ```console 67 | $ jam sketch 68 | Sketch one or more files and write the result to an output file (or stdout) 69 | 70 | Usage: jam sketch [OPTIONS] [INPUT]... 71 | 72 | Arguments: 73 | [INPUT]... Input file(s), one directory or one file with list of files to be hashed 74 | 75 | Options: 76 | -o, --output Output file 77 | -k, --kmer-size kmer size, all sketches must have the same size to be compared [default: 21] 78 | --fscale Scale the hash space to a minimum fraction of the maximum hash value (FracMinHash) 79 | -t, --threads Number of threads to use [default: 1] 80 | -f, --force Overwrite output files 81 | --nmin Minimum number of k-mers (per record) to be hashed, bottom cut-off 82 | --nmax Maximum number of k-mers (per record) to be hashed, top cut-off 83 | --format Change to other output formats [default: bin] [possible values: bin, sourmash] 84 | --algorithm Change the hashing algorithm [default: default] [possible values: default, ahash, xxhash, murmur3] 85 | --singleton Create a separate sketch for each sequence record 86 | -s, --stats Keep extra stats for each sequence record 87 | -h, --help Print help 88 | ``` 89 | 90 | #### Dist 91 | 92 | Calculate the distance for one or more inputs vs. a large set of database sketches. Optionally specify a minimum cutoff in percent of matching kmers. Output is optional if not specified the result will be printed to stdout. 93 | 94 | ```console 95 | $ jam dist 96 | Estimate containment of a (small) sketch against a subset of one or more sketches as database. Requires all sketches to have the same kmer size 97 | 98 | Usage: jam dist [OPTIONS] --input 99 | 100 | Options: 101 | -i, --input Input sketch or raw file 102 | -d, --database Database sketch(es) 103 | -o, --output Output to file instead of stdout 104 | -c, --cutoff Cut-off value for similarity [default: 0.0] 105 | -t, --threads Number of threads to use [default: 1] 106 | -f, --force Overwrite output files 107 | --stats Use the Stats params for restricting results 108 | --gc-lower Use GC stats with an upper bound of x% (gc_lower and gc_upper must be set) 109 | --gc-upper Use GC stats with an lower bound of y% (gc_lower and gc_upper must be set) 110 | -h, --help Print help 111 | ``` 112 | 113 | 114 | #### Merge 115 | 116 | Merge multiple sketches into one large one. 117 | 118 | ```console 119 | $ jam merge 120 | Merge multiple input sketches into a single sketch 121 | 122 | Usage: jam merge [OPTIONS] --output [INPUTS]... 123 | 124 | Arguments: 125 | [INPUTS]... One or more input sketches 126 | 127 | Options: 128 | -o, --output Output file 129 | -t, --threads Number of threads to use [default: 1] 130 | -f, --force Overwrite output files 131 | -h, --help Print help 132 | ``` 133 | 134 | ### License 135 | 136 | This project is licensed under the MIT license. See the [LICENSE](LICENSE) file for more info. 137 | 138 | ### Disclaimer 139 | 140 | jam-rs is still in active development and not ready for production use. Use at your own risk. 141 | 142 | ### Feedback & Contributions 143 | 144 | If you have any ideas, suggestions, or issues, please don't hesitate to open an issue and/or PR. Contributions to this project are always welcome ! We appreciate your help in making this project better 145 | 146 | ### Credits 147 | 148 | This tool is heavily inspired by [finch-rs](https://github.com/onecodex/finch-rs)/[License](https://github.com/onecodex/finch-rs/blob/master/LICENSE.txt) and [sourmash](https://github.com/sourmash-bio/sourmash)/[License](https://github.com/sourmash-bio/sourmash/blob/latest/LICENSE). Check them out if you need a more mature ecosystem with well tested hash functions and more features. 149 | -------------------------------------------------------------------------------- /benches/benchmarks.rs: -------------------------------------------------------------------------------- 1 | use criterion::{criterion_group, criterion_main, Criterion}; 2 | use std::time::Duration; 3 | 4 | #[inline] 5 | pub fn murmur3_old(kmer: &[u8]) -> u64 { 6 | murmurhash3::murmurhash3_x64_128(kmer, 42).0 7 | } 8 | 9 | #[inline] 10 | pub fn murmur3_new(kmer: &[u8]) -> u64 { 11 | fastmurmur3::murmur3_x64_128(kmer, 42) as u64 12 | } 13 | 14 | fn criterion_benchmark(c: &mut Criterion) { 15 | let mut group = c.benchmark_group("Hashes"); 16 | group.warm_up_time(Duration::from_millis(100)); 17 | group.measurement_time(Duration::from_millis(100)); 18 | 19 | for x in u64::MAX - 20..u64::MAX { 20 | group.bench_with_input(format!("xxhash_{}", x), &x, |b, &x| { 21 | b.iter(|| jam_rs::hash_functions::xxhash3(&x.to_be_bytes())); 22 | }); 23 | group.bench_with_input(format!("ahash_{}", x), &x, |b, &x| { 24 | b.iter(|| jam_rs::hash_functions::ahash(x)); 25 | }); 26 | group.bench_with_input(format!("murmur3_old_{}", x), &x, |b, &x| { 27 | b.iter(|| murmur3_old(&x.to_be_bytes())); 28 | }); 29 | group.bench_with_input(format!("murmur3_new_{}", x), &x, |b, &x| { 30 | b.iter(|| murmur3_new(&x.to_be_bytes())); 31 | }); 32 | } 33 | group.finish(); 34 | } 35 | criterion_group!(benches, criterion_benchmark); 36 | criterion_main!(benches); 37 | -------------------------------------------------------------------------------- /src/cli.rs: -------------------------------------------------------------------------------- 1 | use clap::{Parser, Subcommand, ValueEnum}; 2 | use serde::{Deserialize, Serialize}; 3 | use std::path::PathBuf; 4 | 5 | #[derive(Debug, Parser)] 6 | #[command(name = "jam")] 7 | #[command(bin_name = "jam")] 8 | #[command(version = "0.2.0")] 9 | #[command( 10 | about = "Just another (genomic) minhasher (jam), obviously blazingly fast", 11 | long_about = "An optimized minhash implementation that focuses on quick scans for small sequences in large datasets." 12 | )] 13 | pub struct Cli { 14 | #[command(subcommand)] 15 | pub command: Commands, 16 | /// Number of threads to use 17 | #[arg(short, long, global = true, default_value = "1")] 18 | pub threads: Option, 19 | /// Overwrite output files 20 | #[arg(short, long, global = true, default_value = "false")] 21 | pub force: bool, 22 | 23 | /// Silent mode, no (additional) output to stdout 24 | /// Only errors and output files will be printed 25 | #[arg(short, long, global = true, default_value = "false")] 26 | pub silent: bool, 27 | } 28 | 29 | #[derive(ValueEnum, Debug, Clone)] 30 | pub enum OutputFormats { 31 | // Lmdb format, memory mapped database 32 | Lmdb, 33 | // Sourmash compatible json 34 | Sourmash, 35 | } 36 | 37 | #[derive(ValueEnum, Debug, Clone, Deserialize, Serialize)] 38 | pub enum HashAlgorithms { 39 | Default, // AHash < 32 | Xxhash >= 32 40 | Ahash, 41 | Xxhash, 42 | Murmur3, 43 | } 44 | 45 | #[derive(Debug, Subcommand, Clone)] 46 | pub enum Commands { 47 | /// Sketch one or more files and write the result to an output file (or stdout) 48 | #[command(arg_required_else_help = true)] 49 | Sketch { 50 | /// Input file(s), one directory or one file with list of files to be hashed 51 | #[arg(value_parser = clap::value_parser!(std::path::PathBuf))] 52 | input: Vec, 53 | /// Output file 54 | #[arg(short, long)] 55 | #[arg(value_parser = clap::value_parser!(std::path::PathBuf))] 56 | output: Option, 57 | /// kmer size, all sketches must have the same size to be compared 58 | #[arg(short = 'k', long = "kmer-size", default_value = "21")] 59 | kmer_size: u8, 60 | /// Scale the hash space to a minimum fraction of the maximum hash value (FracMinHash) 61 | #[arg(long)] 62 | fscale: Option, 63 | /// Maximum number of k-mers (per record) to be hashed, top cut-off 64 | #[arg(long)] 65 | nmax: Option, 66 | /// Change to other output formats 67 | #[arg(long, default_value = "lmdb")] 68 | format: OutputFormats, 69 | /// Change the hashing algorithm 70 | #[arg(long, default_value = "default")] 71 | algorithm: HashAlgorithms, 72 | /// Create a separate sketch for each sequence record 73 | /// Will increase the size of the output file if lmdb is used 74 | #[arg(long)] 75 | singleton: bool, 76 | }, 77 | /// Merge multiple input sketches into a single sketch 78 | // #[command(arg_required_else_help = true)] 79 | // Merge { 80 | // /// One or more input sketches 81 | // #[arg(value_parser = clap::value_parser!(std::path::PathBuf))] 82 | // inputs: Vec, 83 | // /// Output file 84 | // #[arg(short, long, required = true)] 85 | // #[arg(value_parser = clap::value_parser!(std::path::PathBuf))] 86 | // output: PathBuf, 87 | // }, 88 | /// Estimate containment of a (small) sketch against a subset of one or more sketches as database. 89 | /// Requires all sketches to have the same kmer size 90 | #[command(arg_required_else_help = true)] 91 | Dist { 92 | /// Input sketch or raw file 93 | #[arg(short, long)] 94 | input: PathBuf, 95 | /// Database sketch(es), 1 lmdb file or multiple sourmash json files 96 | #[arg(short, long)] 97 | database: Vec, 98 | /// Output to file instead of stdout 99 | #[arg(short, long)] 100 | #[arg(value_parser = clap::value_parser!(std::path::PathBuf))] 101 | output: Option, 102 | /// Cut-off value for similarity 103 | #[arg(short, long, default_value = "0.0")] 104 | cutoff: f64, 105 | }, 106 | 107 | #[command(arg_required_else_help = true)] 108 | Stats { 109 | /// Input lmdb database 110 | #[arg(short, long)] 111 | input: PathBuf, 112 | // Short 113 | #[arg(short, long)] 114 | short: bool, 115 | }, 116 | } 117 | -------------------------------------------------------------------------------- /src/compare.rs: -------------------------------------------------------------------------------- 1 | use crate::file_io::ShortSketchInfo; 2 | use crate::signature::Signature; 3 | use crate::sketch::Sketch; 4 | use anyhow::anyhow; 5 | use anyhow::Result; 6 | use byteorder::BigEndian; 7 | use heed::types::SerdeBincode; 8 | use heed::types::U32; 9 | use heed::types::U64; 10 | use heed::DatabaseFlags; 11 | use heed::EnvFlags; 12 | use indicatif::ParallelProgressIterator; 13 | use indicatif::ProgressBar; 14 | use indicatif::ProgressDrawTarget; 15 | use rayon::prelude::{IntoParallelRefIterator, ParallelIterator}; 16 | use serde::{Deserialize, Serialize}; 17 | use std::cmp::max; 18 | use std::collections::HashMap; 19 | use std::path::PathBuf; 20 | use std::sync::Arc; 21 | use std::sync::RwLock; 22 | use std::{ 23 | fmt::{self, Display, Formatter}, 24 | ops::DerefMut, 25 | sync::Mutex, 26 | }; 27 | 28 | #[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] 29 | pub struct CompareResult { 30 | pub from_name: String, 31 | pub to_name: String, 32 | pub num_common: usize, 33 | pub num_kmers: usize, 34 | pub reverse: bool, 35 | pub estimated_containment: f64, 36 | } 37 | 38 | impl Display for CompareResult { 39 | fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 40 | if self.reverse { 41 | write!( 42 | f, 43 | "{}\t{}\t{}\t{}\t{:.2}", 44 | self.to_name, 45 | self.from_name, 46 | self.num_common, 47 | self.num_kmers, 48 | self.estimated_containment, 49 | )?; 50 | Ok(()) 51 | } else { 52 | write!( 53 | f, 54 | "{}\t{}\t{}\t{}\t{:.2}", 55 | self.from_name, 56 | self.to_name, 57 | self.num_common, 58 | self.num_kmers, 59 | self.estimated_containment, 60 | ) 61 | } 62 | } 63 | } 64 | 65 | pub struct MultiComp { 66 | from: Vec, 67 | to: Vec, 68 | results: Vec, 69 | threads: usize, 70 | kmer_size: u8, 71 | cutoff: f64, 72 | } 73 | 74 | impl MultiComp { 75 | pub fn new( 76 | mut from: Vec, 77 | mut to: Vec, 78 | threads: usize, 79 | cutoff: f64, 80 | ) -> Result { 81 | let kmer_size = from 82 | .first() 83 | .ok_or_else(|| anyhow!("Empty from list"))? 84 | .kmer_size; 85 | 86 | Ok(MultiComp { 87 | from: from.iter_mut().map(|e| e.collapse()).collect(), 88 | to: to.iter_mut().map(|e| e.collapse()).collect(), 89 | results: Vec::new(), 90 | threads, 91 | kmer_size, 92 | cutoff, 93 | }) 94 | } 95 | 96 | pub fn compare(&mut self) -> Result<()> { 97 | let pool = rayon::ThreadPoolBuilder::new() 98 | .num_threads(self.threads) 99 | .build()?; 100 | 101 | let results = Mutex::new(Vec::new()); 102 | 103 | pool.install(|| { 104 | self.from.par_iter().try_for_each(|origin| { 105 | self.to.par_iter().try_for_each(|target| { 106 | if target.kmer_size != self.kmer_size || origin.kmer_size != self.kmer_size { 107 | return Err(anyhow!( 108 | "Kmer sizes do not match, expected: {}, got: {}", 109 | self.kmer_size, 110 | origin.kmer_size 111 | )); 112 | } 113 | let mut comparator = Comparator::new(origin, target); 114 | comparator.compare()?; 115 | results 116 | .lock() 117 | .unwrap() 118 | .deref_mut() 119 | .push(comparator.finalize()); 120 | Ok::<(), anyhow::Error>(()) 121 | }) 122 | }) 123 | })?; 124 | 125 | self.results = results.into_inner().unwrap(); 126 | Ok(()) 127 | } 128 | 129 | pub fn finalize(self) -> Vec { 130 | self.results 131 | .into_iter() 132 | .filter(|e| e.num_common as f64 / e.num_kmers as f64 * 100.0 > self.cutoff) 133 | .collect() 134 | } 135 | } 136 | 137 | pub struct Comparator<'a> { 138 | larger: &'a Sketch, 139 | smaller: &'a Sketch, 140 | num_kmers: usize, 141 | num_common: usize, 142 | num_skipped: usize, 143 | reverse: bool, 144 | } 145 | 146 | impl<'a> Comparator<'a> { 147 | pub fn new(sketch_a: &'a Sketch, sketch_b: &'a Sketch) -> Self { 148 | let (larger, smaller, reverse) = if sketch_a.hashes.len() >= sketch_b.hashes.len() { 149 | // DATABASE, INPUT -> Reverse = false 150 | (sketch_a, sketch_b, false) 151 | } else { 152 | // INPUT, DATABASE -> Reverse = true 153 | (sketch_b, sketch_a, true) 154 | }; 155 | Comparator { 156 | larger, 157 | smaller, 158 | num_kmers: 0, 159 | num_common: 0, 160 | num_skipped: 0, 161 | reverse, 162 | } 163 | } 164 | 165 | // Stats handling: 166 | // GC & Size for the original contig are stored in the Stats struct 167 | // This comparison is always in relation to the query sketch 168 | // If reverse is true, the query sketch is the larger sketch 169 | #[inline] 170 | pub fn compare(&mut self) -> Result<()> { 171 | self.num_kmers = max(self.larger.num_kmers, self.smaller.num_kmers); 172 | 173 | let mut larger = self.larger.hashes.iter(); 174 | let mut smaller = self.smaller.hashes.iter(); 175 | 176 | let mut larger_item = larger.next(); 177 | let mut smaller_item = smaller.next(); 178 | 179 | loop { 180 | match (larger_item, smaller_item) { 181 | (Some(l), Some(s)) => { 182 | if l == s { 183 | self.num_common += 1; 184 | smaller_item = smaller.next(); 185 | larger_item = larger.next(); 186 | } else if l < s { 187 | smaller_item = smaller.next(); 188 | } else { 189 | larger_item = larger.next(); 190 | } 191 | } 192 | (Some(_), None) => { 193 | larger_item = larger.next(); 194 | } 195 | (None, Some(_)) => { 196 | smaller_item = smaller.next(); 197 | } 198 | (None, None) => break, 199 | } 200 | } 201 | 202 | Ok(()) 203 | } 204 | 205 | pub fn finalize(self) -> CompareResult { 206 | // Eg 0.1 207 | let larger_fraction = self.larger.num_kmers as f64 / self.larger.hashes.len() as f64; 208 | // Eg 1.0 209 | let smaller_fraction = self.smaller.num_kmers as f64 / self.smaller.hashes.len() as f64; 210 | // How much smaller is the smaller sketch 211 | let fraction = if larger_fraction < smaller_fraction { 212 | smaller_fraction / larger_fraction 213 | } else { 214 | larger_fraction / smaller_fraction 215 | }; 216 | let estimated_containment = 217 | self.num_common as f64 / self.num_kmers as f64 * fraction * 100.0; 218 | 219 | CompareResult { 220 | from_name: self.larger.name.clone(), 221 | to_name: self.smaller.name.clone(), 222 | num_kmers: self.num_kmers, 223 | num_common: self.num_common, 224 | reverse: self.reverse, 225 | estimated_containment, 226 | } 227 | } 228 | 229 | #[allow(dead_code)] 230 | pub fn reset(&mut self) { 231 | self.num_kmers = 0; 232 | self.num_common = 0; 233 | self.num_skipped = 0; 234 | } 235 | } 236 | 237 | pub struct LmdbComparator { 238 | pub signatures: Vec, 239 | pub lmdb_env: heed::Env, 240 | pub threads: usize, 241 | pub cutoff: f64, 242 | pub infos: Arc>>, 243 | pub kmer_size: u8, 244 | pub fscale: Option, 245 | pub silent: bool, 246 | } 247 | 248 | impl LmdbComparator { 249 | pub fn new(lmdb_env: PathBuf, threads: usize, cutoff: f64, silent: bool) -> Result { 250 | let lmdb_env = unsafe { 251 | heed::EnvOpenOptions::new() 252 | .flags(EnvFlags::READ_ONLY | EnvFlags::NO_LOCK | EnvFlags::NO_SUB_DIR) 253 | .map_size(10 * 1024 * 1024 * 1024) 254 | .max_dbs(2) 255 | .open(lmdb_env) 256 | .unwrap() 257 | }; 258 | 259 | let txn = lmdb_env.read_txn()?; 260 | 261 | let sigs_db = lmdb_env 262 | .open_database::, SerdeBincode>(&txn, Some("sigs"))? 263 | .ok_or_else(|| anyhow!("Database sigs not found"))?; 264 | 265 | let infos = RwLock::new(HashMap::new()); 266 | 267 | let mut kmer_size = None; 268 | let mut fscale = None; 269 | for sig in sigs_db.iter(&txn)? { 270 | let (key, value) = sig?; 271 | if let Some(kmer_size) = kmer_size { 272 | if kmer_size != value.kmer_size { 273 | return Err(anyhow!("Kmer sizes do not match")); 274 | } 275 | } else { 276 | kmer_size = Some(value.kmer_size); 277 | } 278 | 279 | if fscale.is_some() { 280 | if fscale != value.fscale { 281 | return Err(anyhow!("Fscale sizes do not match")); 282 | } 283 | } else { 284 | fscale = value.fscale; 285 | } 286 | 287 | infos.write().expect("poisoned lock").insert(key, value); 288 | } 289 | 290 | txn.commit()?; 291 | 292 | Ok(LmdbComparator { 293 | signatures: vec![], 294 | lmdb_env, 295 | threads, 296 | cutoff, 297 | infos: Arc::new(infos), 298 | kmer_size: kmer_size.unwrap(), 299 | fscale, 300 | silent, 301 | }) 302 | } 303 | 304 | pub fn set_signatures(&mut self, signatures: Vec) { 305 | self.signatures = signatures; 306 | } 307 | 308 | pub fn compare(&self) -> Result> { 309 | let pool = rayon::ThreadPoolBuilder::new() 310 | .num_threads(self.threads) 311 | .build()?; 312 | 313 | let results = Mutex::new(Vec::new()); 314 | 315 | let pb = ProgressBar::new(self.signatures.len() as u64); 316 | pb.set_style( 317 | indicatif::ProgressStyle::default_bar() 318 | .template("[{elapsed_precise}] {bar:40.cyan/blue} {pos:>7}/{len:7} {msg}")? 319 | .progress_chars("##-"), 320 | ); 321 | if self.silent { 322 | pb.set_draw_target(ProgressDrawTarget::hidden()) 323 | } 324 | let infos = self.infos.clone(); 325 | 326 | pool.install(|| { 327 | self.signatures 328 | .par_iter() 329 | .progress_with(pb) 330 | .try_for_each(|origin| { 331 | origin.sketches.par_iter().try_for_each(|target| { 332 | let txn = self.lmdb_env.read_txn()?; 333 | 334 | let hashes = self 335 | .lmdb_env 336 | .database_options() 337 | .types::, U32>() 338 | .name("hashes") 339 | .flags(DatabaseFlags::DUP_SORT) 340 | .open(&txn)? 341 | .ok_or_else(|| anyhow!("Database hashes not found"))?; 342 | let mut result_map = HashMap::new(); 343 | 344 | for hash in target.hashes.iter() { 345 | if let Some(key) = hashes.get_duplicates(&txn, hash)? { 346 | for item in key { 347 | let (_, sketch) = item?; 348 | let entry = result_map.entry(sketch).or_insert(0); 349 | *entry += 1u64; 350 | } 351 | }; 352 | } 353 | 354 | let mut final_results = vec![]; 355 | for (idx, num_common) in result_map { 356 | let read_infos = infos.read().expect("poisoned lock"); 357 | let infos = read_infos.get(&idx).expect("Key not found"); 358 | let num_kmers = if target.hashes.len() < infos.num_hashes { 359 | target.hashes.len() 360 | } else { 361 | infos.num_hashes 362 | }; 363 | let estimated_containment = 364 | num_common as f64 / num_kmers as f64 * 100.0; 365 | final_results.push(CompareResult { 366 | from_name: target.name.clone(), 367 | to_name: infos.file_name.clone(), 368 | num_kmers, 369 | num_common: num_common as usize, 370 | reverse: false, 371 | estimated_containment, 372 | }) 373 | } 374 | 375 | results 376 | .lock() 377 | .unwrap() 378 | .extend(final_results.into_iter().filter(|e| { 379 | e.num_common as f64 / e.num_kmers as f64 * 100.0 > self.cutoff 380 | })); 381 | 382 | Ok::<(), anyhow::Error>(()) 383 | }) 384 | }) 385 | })?; 386 | Ok(results.into_inner().expect("poisoned lock")) 387 | } 388 | } 389 | 390 | #[cfg(test)] 391 | mod tests { 392 | use std::collections::BTreeSet; 393 | 394 | use crate::compare::CompareResult; 395 | 396 | #[test] 397 | fn test_comp_without_stats() { 398 | let mut bheap1 = BTreeSet::default(); 399 | bheap1.extend([1, 2, 3]); 400 | let sketch_a = crate::sketch::Sketch { 401 | name: "a".to_string(), 402 | hashes: bheap1, 403 | num_kmers: 3, 404 | kmer_size: 21, 405 | }; 406 | let mut bheap2 = BTreeSet::default(); 407 | bheap2.extend([1, 2, 4]); 408 | let sketch_b = crate::sketch::Sketch { 409 | name: "b".to_string(), 410 | hashes: bheap2, 411 | num_kmers: 3, 412 | kmer_size: 21, 413 | }; 414 | 415 | let mut comp = super::Comparator::new(&sketch_a, &sketch_b); 416 | comp.compare().unwrap(); 417 | let result = comp.finalize(); 418 | assert_eq!(result.num_kmers, 3); 419 | assert_eq!(result.num_common, 2); 420 | assert_eq!(result.estimated_containment, 66.66666666666666); 421 | 422 | let constructed_result = CompareResult { 423 | from_name: "a".to_string(), 424 | to_name: "b".to_string(), 425 | num_kmers: 3, 426 | num_common: 2, 427 | reverse: false, 428 | estimated_containment: 66.66666666666666, 429 | }; 430 | assert_eq!(result, constructed_result); 431 | } 432 | } 433 | -------------------------------------------------------------------------------- /src/file_io.rs: -------------------------------------------------------------------------------- 1 | use crate::cli::Commands; 2 | use crate::cli::HashAlgorithms; 3 | use crate::cli::OutputFormats; 4 | use crate::compare::CompareResult; 5 | use crate::hash_functions::Function; 6 | use crate::signature::Signature; 7 | use crate::sketch::Sketch; 8 | use crate::sketcher; 9 | use anyhow::anyhow; 10 | use anyhow::Result; 11 | use byteorder::BigEndian; 12 | use heed::types::SerdeBincode; 13 | use heed::types::U32; 14 | use heed::types::U64; 15 | use heed::DatabaseFlags; 16 | use heed::EnvFlags; 17 | use heed::PutFlags; 18 | use indicatif::MultiProgress; 19 | use indicatif::ParallelProgressIterator; 20 | use indicatif::ProgressBar; 21 | use needletail::parse_fastx_file; 22 | use rayon::prelude::IntoParallelRefIterator; 23 | use rayon::prelude::ParallelIterator; 24 | use serde::Deserialize; 25 | use serde::Serialize; 26 | use sourmash::signature::Signature as SourmashSignature; 27 | use std::collections::BTreeMap; 28 | use std::fs; 29 | use std::fs::remove_file; 30 | use std::io; 31 | use std::io::Write; 32 | use std::path; 33 | use std::sync::mpsc; 34 | use std::sync::mpsc::Receiver; 35 | use std::thread; 36 | use std::{ 37 | ffi::OsStr, 38 | fs::File, 39 | io::{BufRead, BufReader}, 40 | path::PathBuf, 41 | }; 42 | 43 | pub struct FileHandler {} 44 | 45 | #[derive(Debug, Serialize, Deserialize)] 46 | pub struct ShortSketchInfo { 47 | pub file_name: String, 48 | pub num_hashes: usize, 49 | pub kmer_size: u8, 50 | pub fscale: Option, 51 | } 52 | 53 | impl FileHandler { 54 | pub fn sketch_files(command: Commands, threads: Option) -> Result<()> { 55 | match command.to_owned() { 56 | Commands::Sketch { 57 | input, 58 | output, 59 | kmer_size, 60 | fscale, 61 | nmax, 62 | algorithm, 63 | format, 64 | singleton, 65 | } => { 66 | let files = FileHandler::test_and_collect_files(input, true)?; 67 | let pool = rayon::ThreadPoolBuilder::new() 68 | .num_threads(threads.unwrap_or_default()) 69 | .build()?; 70 | 71 | let function = Function::from_alg(algorithm.clone(), kmer_size); 72 | 73 | let (send, recv) = mpsc::sync_channel(10); 74 | 75 | let multi_bar = MultiProgress::new(); 76 | let multi_bar_clone = multi_bar.clone(); 77 | 78 | let is_stdout = output.is_none(); 79 | let handler = thread::spawn(move || { 80 | FileHandler::write_output(fscale, output, format, recv, multi_bar_clone) 81 | }); 82 | 83 | let pb = ProgressBar::new(files.len() as u64); 84 | let pb = multi_bar.add(pb); 85 | pb.set_style(indicatif::ProgressStyle::default_bar() 86 | .template("{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({eta}) {msg}") 87 | .unwrap() 88 | .progress_chars("#>-")); 89 | let pb_clone = pb.clone(); 90 | let _ = pool.install(|| { 91 | files 92 | .par_iter() 93 | .progress_with(pb) 94 | .try_for_each(|file_path| { 95 | pb_clone.set_message(format!("{:?}", file_path.clone())); 96 | match FileHandler::sketch_file( 97 | file_path, 98 | kmer_size, 99 | fscale, 100 | nmax, 101 | singleton, 102 | function.clone(), 103 | algorithm.clone(), 104 | is_stdout, 105 | ) { 106 | Ok(sig) => { 107 | send.send(sig).map_err(|_| anyhow!("Error while sending")) 108 | } 109 | Err(_) => { 110 | Err(anyhow!("Error while sketching file {:?}", file_path)) 111 | } 112 | } 113 | }) 114 | }); 115 | 116 | drop(send); 117 | 118 | Ok(handler 119 | .join() 120 | .map_err(|_| anyhow!("Unable to join threads"))??) 121 | } 122 | _ => Err(anyhow!("Wrong command")), 123 | } 124 | } 125 | 126 | pub fn sketch_file( 127 | input: &PathBuf, 128 | kmer_length: u8, 129 | fscale: Option, 130 | nmax: Option, 131 | singleton: bool, 132 | function: Function, 133 | algorithm: HashAlgorithms, 134 | _stdout: bool, 135 | ) -> Result { 136 | //let start = std::time::Instant::now(); 137 | let max_hash = if let Some(fscale) = fscale { 138 | (u64::MAX as f64 / fscale as f64) as u64 139 | } else { 140 | u64::MAX 141 | }; 142 | let mut sketcher = sketcher::Sketcher::new( 143 | kmer_length, 144 | input 145 | .to_str() 146 | .ok_or_else(|| anyhow!("Unknown path"))? 147 | .to_string(), 148 | singleton, 149 | max_hash, 150 | nmax, 151 | function, 152 | algorithm, 153 | ); 154 | let mut reader = parse_fastx_file(input)?; 155 | //let mut counter = 0; 156 | while let Some(record) = reader.next() { 157 | sketcher.process(&record?); 158 | } 159 | //let elapsed = start.elapsed().as_millis(); 160 | // if !stdout { 161 | // println!( 162 | // "Processed {:?} with {} records, in {:?} seconds", 163 | // input, 164 | // counter, 165 | // elapsed as f64 / 1000.0, 166 | // ); 167 | // } 168 | Ok(sketcher.finish()) 169 | } 170 | 171 | pub fn write_output( 172 | fscale: Option, 173 | output: Option, 174 | output_format: OutputFormats, 175 | signature_recv: Receiver, 176 | multibar: MultiProgress, 177 | ) -> Result<()> { 178 | let stdout = output.is_none(); 179 | 180 | match output_format { 181 | OutputFormats::Sourmash => { 182 | let mut output: Box = match output { 183 | Some(o) => Box::new(std::io::BufWriter::new(File::create(o)?)), 184 | None => Box::new(std::io::BufWriter::new(io::stdout())), 185 | }; 186 | output.write_all(b"[\n")?; 187 | let mut first = true; 188 | while let Ok(sig) = signature_recv.recv() { 189 | let sourmash_sig: SourmashSignature = sig.into(); 190 | if !first { 191 | output.write_all(b",\n")?; 192 | first = false; 193 | } 194 | serde_json::to_writer(&mut output, &sourmash_sig)?; 195 | } 196 | output.write_all(b"]")?; 197 | } 198 | OutputFormats::Lmdb => { 199 | if stdout { 200 | return Err(anyhow!("Output format lmdb is not supported for stdout")); 201 | } 202 | let Some(output) = output else { 203 | return Err(anyhow!("Output folder is required for lmdb")); 204 | }; 205 | if !output.is_dir() { 206 | return Err(anyhow!( 207 | "Output folder {:?} does not exist or is no directory", 208 | output 209 | )); 210 | } 211 | 212 | let heed_env = unsafe { 213 | heed::EnvOpenOptions::new() 214 | .map_size(10 * 1024 * 1024 * 1024 * 1024) 215 | .max_dbs(2) 216 | .flags(EnvFlags::WRITE_MAP | EnvFlags::MAP_ASYNC) 217 | .open(output.clone())? 218 | }; 219 | { 220 | let mut write_txn = heed_env.write_txn()?; 221 | 222 | let sigs_db = heed_env 223 | .create_database::, SerdeBincode>( 224 | &mut write_txn, 225 | Some("sigs"), 226 | )?; 227 | let hashes_db = heed_env 228 | .database_options() 229 | .types::, U32>() 230 | .name("hashes") 231 | .flags(DatabaseFlags::DUP_SORT) 232 | .create(&mut write_txn)?; 233 | 234 | let mut counter: u32 = 0; 235 | let mut hashes = BTreeMap::new(); 236 | while let Ok(sig) = signature_recv.recv() { 237 | for sketch in sig.sketches { 238 | sigs_db.put( 239 | &mut write_txn, 240 | &counter, 241 | &ShortSketchInfo { 242 | file_name: sketch.name, 243 | num_hashes: sketch.num_kmers, 244 | kmer_size: sig.kmer_size, 245 | fscale, 246 | }, 247 | )?; 248 | for hash in sketch.hashes { 249 | hashes.entry(hash).or_insert_with(Vec::new).push(counter); 250 | } 251 | counter += 1; 252 | } 253 | write_txn.commit()?; 254 | write_txn = heed_env.write_txn()?; 255 | } 256 | let _ = multibar.println("Signatures finished, writing hashes"); 257 | 258 | let bar = multibar.add(ProgressBar::new(hashes.len() as u64)); 259 | bar.set_style(indicatif::ProgressStyle::default_bar() 260 | .template("{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({eta}) {msg}") 261 | .unwrap() 262 | .progress_chars("#>-")); 263 | 264 | for (hash, sigs) in hashes { 265 | for sig in sigs { 266 | hashes_db.put_with_flags( 267 | &mut write_txn, 268 | PutFlags::APPEND_DUP, 269 | &hash, 270 | &sig, 271 | )?; 272 | } 273 | bar.inc(1); 274 | } 275 | write_txn.commit()?; 276 | } 277 | 278 | heed_env.prepare_for_closing().wait(); 279 | 280 | let heed_env = unsafe { 281 | heed::EnvOpenOptions::new() 282 | .map_size(10 * 1024 * 1024 * 1024 * 1024) 283 | .max_dbs(2) 284 | .open(output.clone())? 285 | }; 286 | 287 | let canonical_path = fs::canonicalize(format!("{}/", output.to_string_lossy()))?; 288 | println!( 289 | "Compacting database to {:?}/compact.mdb", 290 | canonical_path.to_string_lossy() 291 | ); 292 | heed_env 293 | .copy_to_file( 294 | format!("{}/compact.mdb", canonical_path.to_string_lossy()), 295 | heed::CompactionOption::Enabled, 296 | ) 297 | .map_err(|e| { 298 | println!("Error in copy file: {e}"); 299 | e 300 | })?; 301 | 302 | remove_file(format!("{}/data.mdb", output.to_string_lossy())).map_err(|e| { 303 | println!("Error deleting data.mdb: {e}"); 304 | e 305 | })?; 306 | remove_file(format!("{}/lock.mdb", output.to_string_lossy())).map_err(|e| { 307 | println!("Error deleting lock.mdb: {e}"); 308 | e 309 | })?; 310 | } 311 | } 312 | 313 | Ok(()) 314 | } 315 | 316 | pub fn read_signatures(input: &PathBuf) -> Result> { 317 | Ok( 318 | sourmash::signature::Signature::from_path(path::Path::new(input))? 319 | .into_iter() 320 | .map(Signature::from) 321 | .collect(), 322 | ) 323 | } 324 | 325 | pub fn concat(inputs: Vec, output: PathBuf) -> Result<()> { 326 | let o_file = std::fs::File::create(output)?; 327 | let mut bufwriter = std::io::BufWriter::new(o_file); 328 | 329 | for input in inputs { 330 | let mut reader = BufReader::new(std::fs::File::open(input)?); 331 | while let Ok(result) = 332 | bincode::deserialize_from::<&mut BufReader, Sketch>(&mut reader) 333 | { 334 | bincode::serialize_into(&mut bufwriter, &result)?; 335 | } 336 | } 337 | Ok(()) 338 | } 339 | 340 | pub fn test_and_collect_files(input: Vec, check_ext: bool) -> Result> { 341 | let mut resulting_paths = Vec::new(); 342 | let mut found_list: Option = None; 343 | for path in input { 344 | if !path.exists() { 345 | return Err(anyhow::anyhow!("File {:?} does not exist", path)); 346 | } 347 | if path.is_dir() { 348 | for p in path.read_dir()? { 349 | let p = p?; 350 | if p.path().is_file() { 351 | if let Some(ext) = p.path().extension() { 352 | if test_extension(ext) { 353 | resulting_paths.push(p.path()); 354 | } else { 355 | println!("Skipping file with invalid extension: {:?}", p.path()); 356 | } 357 | } else { 358 | println!("Skipping file without extension: {:?}", p.path()); 359 | } 360 | } else { 361 | println!("Skipping directory: {:?}", p.path()); 362 | } 363 | } 364 | } 365 | 366 | if path.is_file() { 367 | if let Some(ext) = path.extension() { 368 | if test_extension(ext) || !check_ext { 369 | resulting_paths.push(path); 370 | } else if ext == "list" { 371 | if resulting_paths.is_empty() { 372 | found_list = Some(path); 373 | break; 374 | } else { 375 | return Err(anyhow::anyhow!("Found multiple list files in {:?}", path)); 376 | } 377 | } else { 378 | return Err(anyhow::anyhow!("File with {:?} invalid extension", path)); 379 | } 380 | } else { 381 | return Err(anyhow::anyhow!( 382 | "File {:?} does not have an extension", 383 | path 384 | )); 385 | } 386 | } 387 | } 388 | 389 | if let Some(list) = found_list { 390 | let reader = BufReader::new(std::fs::File::open(list)?); 391 | for line in reader.lines() { 392 | let as_path_buf = PathBuf::from(line?); 393 | if as_path_buf.exists() 394 | && test_extension(as_path_buf.extension().ok_or_else(|| { 395 | anyhow::anyhow!("File {:?} does not have an extension", as_path_buf) 396 | })?) 397 | || !check_ext 398 | { 399 | resulting_paths.push(as_path_buf); 400 | } 401 | } 402 | } 403 | Ok(resulting_paths) 404 | } 405 | 406 | pub fn write_result(result: &Vec, output: PathBuf) -> Result<()> { 407 | let o_file = std::fs::File::create(output)?; 408 | let mut bufwriter = std::io::BufWriter::new(o_file); 409 | for r in result { 410 | writeln!(bufwriter, "{}", r)?; 411 | } 412 | Ok(()) 413 | } 414 | } 415 | 416 | pub fn test_extension(ext: &OsStr) -> bool { 417 | !(ext != "fasta" && ext != "fa" && ext != "fastq" && ext != "fq" && ext != "gz") 418 | } 419 | 420 | #[cfg(test)] 421 | mod tests { 422 | use super::*; 423 | 424 | #[test] 425 | fn test_test_extension() { 426 | assert!(test_extension(OsStr::new("fasta"))); 427 | assert!(test_extension(OsStr::new("fa"))); 428 | assert!(test_extension(OsStr::new("fastq"))); 429 | assert!(test_extension(OsStr::new("fq"))); 430 | assert!(test_extension(OsStr::new("gz"))); 431 | assert!(!test_extension(OsStr::new("txt"))); 432 | assert!(!test_extension(OsStr::new("list"))); 433 | } 434 | } 435 | -------------------------------------------------------------------------------- /src/hash_functions.rs: -------------------------------------------------------------------------------- 1 | //! A list of hash functions to compare 2 | //! 3 | //! Constants chosen by testing different digits of pi; 4 | use crate::cli::HashAlgorithms; 5 | const KEY1: u64 = 0xe121_19c4_114f_22a7; // = 0x4528_21e6_38d0_1377 ^ 0xa409_3822_299f_31d0; 6 | const KEY2: u32 = 0x60e5; //(0xbe54_66cf_34e9_0c6c ^ 0x082e_fa98_ec4e_6c89) & 63; 7 | 8 | // Standard xxhash function for all sizes 9 | #[inline] 10 | pub fn xxhash3(kmer: &[u8]) -> u64 { 11 | xxhash_rust::xxh3::xxh3_64(kmer) 12 | } 13 | 14 | // Standard xxhash function for all sizes 15 | #[inline] 16 | pub fn xxhash3_u64(kmer: u64) -> u64 { 17 | xxhash_rust::xxh3::xxh3_64(&kmer.to_be_bytes()) 18 | } 19 | 20 | // Specialized hash function for kmers < 32 21 | // Simplified version of ahash-fallback from the ahash crate 22 | #[inline] 23 | pub fn ahash(kmer: u64) -> u64 { 24 | let temp = (kmer ^ KEY1) as u128 * 6364136223846793005_u128; 25 | let temp2 = ((temp & 0xffff_ffff_ffff_ffff) as u64) ^ ((temp >> 64) as u64); // XOR the lower 64 bits with the upper 64 bits. 26 | temp2.rotate_left(KEY2) 27 | } 28 | 29 | // Faster version of murmur3 with equivalent output 30 | #[inline] 31 | pub fn murmur3(kmer: &[u8]) -> u64 { 32 | fastmurmur3::murmur3_x64_128(kmer, 42) as u64 33 | } 34 | 35 | #[inline] 36 | pub fn murmur3_u64(kmer: u64) -> u64 { 37 | fastmurmur3::murmur3_x64_128(&kmer.to_be_bytes(), 42) as u64 38 | } 39 | 40 | /// Stores a function pointer to a hash function 41 | #[derive(Clone)] 42 | pub enum Function<'a> { 43 | Large(&'a (dyn Fn(&[u8]) -> u64 + Send + Sync)), 44 | Small(&'a (dyn Fn(u64) -> u64 + Send + Sync)), 45 | } 46 | 47 | impl Function<'_> { 48 | pub fn get_large(&self) -> Option<&dyn Fn(&[u8]) -> u64> { 49 | match self { 50 | Function::Large(f) => Some(f), 51 | _ => None, 52 | } 53 | } 54 | pub fn get_small(&self) -> Option<&dyn Fn(u64) -> u64> { 55 | match self { 56 | Function::Small(f) => Some(f), 57 | _ => None, 58 | } 59 | } 60 | 61 | pub fn from_alg(algo: HashAlgorithms, kmer_size: u8) -> Self { 62 | if kmer_size < 32 { 63 | match algo { 64 | HashAlgorithms::Ahash => Function::Small(&ahash), 65 | HashAlgorithms::Murmur3 => Function::Small(&murmur3_u64), 66 | HashAlgorithms::Xxhash => Function::Small(&xxhash3_u64), 67 | HashAlgorithms::Default => Function::Small(&ahash), 68 | } 69 | } else { 70 | match algo { 71 | HashAlgorithms::Murmur3 => Function::Large(&murmur3), 72 | HashAlgorithms::Xxhash | HashAlgorithms::Default => Function::Large(&xxhash3), 73 | _ => panic!("Hash function not supported for kmer size > 32"), 74 | } 75 | } 76 | } 77 | } 78 | 79 | #[cfg(test)] 80 | mod tests { 81 | use super::*; 82 | 83 | #[test] 84 | fn test_xxhash3() { 85 | assert_eq!(xxhash3(b"AAAAAAAAAAA"), 0x92994E9987384EE2); 86 | } 87 | 88 | #[test] 89 | fn test_ahash() { 90 | assert_eq!(ahash(0xAAAAAAAAAAAAAAA), 6369629604220809163); 91 | } 92 | 93 | #[test] 94 | fn test_murmur3() { 95 | assert_eq!(murmur3(b"AAAAAAAAAAA"), 7773142420371383521); 96 | } 97 | 98 | #[test] 99 | fn test_xxhash3_u64() { 100 | assert_eq!(xxhash3_u64(0xAAAAAAAAAAAAAAA), 5855080426738543665); 101 | } 102 | 103 | #[test] 104 | fn test_murmur3_u64() { 105 | assert_eq!(murmur3_u64(0xAAAAAAAAAAAAAAA), 442865051503200633); 106 | } 107 | 108 | #[test] 109 | fn function_test() { 110 | let f = Function::from_alg(HashAlgorithms::Ahash, 21); 111 | assert_eq!( 112 | f.get_small().unwrap()(0xAAAAAAAAAAAAAAA), 113 | 6369629604220809163 114 | ); 115 | let f = Function::from_alg(HashAlgorithms::Murmur3, 21); 116 | assert_eq!( 117 | f.get_small().unwrap()(0xAAAAAAAAAAAAAAA), 118 | 442865051503200633 119 | ); 120 | let f = Function::from_alg(HashAlgorithms::Xxhash, 21); 121 | assert_eq!( 122 | f.get_small().unwrap()(0xAAAAAAAAAAAAAAA), 123 | 5855080426738543665 124 | ); 125 | let f = Function::from_alg(HashAlgorithms::Default, 21); 126 | assert_eq!( 127 | f.get_small().unwrap()(0xAAAAAAAAAAAAAAA), 128 | 6369629604220809163 129 | ); 130 | let f = Function::from_alg(HashAlgorithms::Murmur3, 32); 131 | assert_eq!(f.get_large().unwrap()(b"AAAAAAAAAAA"), 7773142420371383521); 132 | let f = Function::from_alg(HashAlgorithms::Xxhash, 32); 133 | assert_eq!(f.get_large().unwrap()(b"AAAAAAAAAAA"), 10563560822279786210); 134 | let f = Function::from_alg(HashAlgorithms::Default, 32); 135 | assert_eq!(f.get_large().unwrap()(b"AAAAAAAAAAA"), 10563560822279786210); 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /src/hasher.rs: -------------------------------------------------------------------------------- 1 | use std::hash::Hasher; 2 | 3 | /// Adapted from finch-rs: https://github.com/onecodex/finch-rs/blob/master/lib/src/sketch_schemes/hashing.rs 4 | /// 5 | /// If we're using a `HashMap` where the keys themselves are hashes, it's 6 | /// a little silly to re-hash them. That's where the `NoHashHasher` comes in. 7 | #[derive(Default)] 8 | pub struct NoHashHasher(u64); 9 | 10 | impl Hasher for NoHashHasher { 11 | #[inline] 12 | fn write(&mut self, bytes: &[u8]) { 13 | *self = NoHashHasher(u64::from_be_bytes(bytes.try_into().unwrap())); // This unwrap is fine -> we know we have 8 bytes 14 | } 15 | 16 | #[inline] 17 | fn write_usize(&mut self, i: usize) { 18 | self.0 = i as u64; 19 | } 20 | 21 | fn finish(&self) -> u64 { 22 | self.0 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/heed.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | 3 | use byteorder::BigEndian; 4 | use heed::{ 5 | types::{SerdeBincode, U32, U64}, 6 | DatabaseFlags, EnvFlags, 7 | }; 8 | 9 | use crate::file_io::ShortSketchInfo; 10 | 11 | pub struct HeedHandler { 12 | heed_env: heed::Env, 13 | signatures: heed::Database, SerdeBincode>, 14 | hashes: heed::Database, U32>, 15 | } 16 | 17 | impl HeedHandler { 18 | pub fn new_ro(path: PathBuf) -> anyhow::Result { 19 | let heed_env = if path.is_dir() { 20 | unsafe { 21 | heed::EnvOpenOptions::new() 22 | .map_size(10 * 1024 * 1024 * 1024 * 1024) 23 | .max_dbs(2) 24 | .flags(EnvFlags::READ_ONLY) 25 | .open(path.clone())? 26 | } 27 | } else { 28 | unsafe { 29 | heed::EnvOpenOptions::new() 30 | .map_size(10 * 1024 * 1024 * 1024 * 1024) 31 | .max_dbs(2) 32 | .flags(EnvFlags::READ_ONLY | EnvFlags::NO_SUB_DIR) 33 | .open(path.clone())? 34 | } 35 | }; 36 | 37 | let rtxn = heed_env.read_txn()?; 38 | 39 | let sigs_db = heed_env 40 | .open_database::, SerdeBincode>(&rtxn, Some("sigs"))? 41 | .ok_or_else(|| anyhow::anyhow!("Unable to open signatures database"))?; 42 | let hashes = heed_env 43 | .database_options() 44 | .types::, U32>() 45 | .name("hashes") 46 | .flags(DatabaseFlags::DUP_SORT) 47 | .open(&rtxn)? 48 | .ok_or_else(|| anyhow::anyhow!("Unable to open signatures database"))?; 49 | rtxn.commit()?; 50 | Ok(HeedHandler { 51 | heed_env, 52 | signatures: sigs_db, 53 | hashes, 54 | }) 55 | } 56 | 57 | pub fn summarize_stats(&self) -> anyhow::Result<()> { 58 | let rtxn = self.heed_env.read_txn()?; 59 | let num_of_sigs = self.signatures.len(&rtxn)?; 60 | println!("Number of signatures: {}", num_of_sigs); 61 | let num_of_hashes = self.hashes.len(&rtxn)?; 62 | println!("Number of hashes: {}", num_of_hashes); 63 | Ok(()) 64 | } 65 | 66 | pub fn detail_sigs(&self) -> anyhow::Result<()> { 67 | let rtxn = self.heed_env.read_txn()?; 68 | for (_, value) in self.signatures.iter(&rtxn)?.enumerate() { 69 | let (_, value) = value?; 70 | println!( 71 | "{},{:?},{},{}", 72 | value.file_name, value.fscale, value.kmer_size, value.num_hashes 73 | ); 74 | } 75 | Ok(()) 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/heed_codec.rs: -------------------------------------------------------------------------------- 1 | use std::mem::size_of; 2 | /// This is a modified version of the CBORoaringBitmapCodec from the milli crate. 3 | /// Used in meilisearch: Source: https://github.com/meilisearch/meilisearch/blob/main/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs 4 | /// Licensed under MIT 5 | use std::{borrow::Cow, io}; 6 | 7 | use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; 8 | use heed::BoxedError; 9 | use roaring::RoaringBitmap; 10 | 11 | /// This is the limit where using a byteorder became less size efficient 12 | /// than using a direct roaring encoding, it is also the point where we are able 13 | /// to determine the encoding used only by using the array of bytes length. 14 | pub const THRESHOLD: usize = 7; 15 | 16 | /// A conditionnal codec that either use the RoaringBitmap 17 | /// or a lighter ByteOrder en/decoding method. 18 | pub struct CboRoaringBitmapCodec; 19 | 20 | impl CboRoaringBitmapCodec { 21 | pub fn serialized_size(roaring: &RoaringBitmap) -> usize { 22 | if roaring.len() <= THRESHOLD as u64 { 23 | roaring.len() as usize * size_of::() 24 | } else { 25 | roaring.serialized_size() 26 | } 27 | } 28 | 29 | pub fn serialize_into(roaring: &RoaringBitmap, vec: &mut Vec) { 30 | if roaring.len() <= THRESHOLD as u64 { 31 | // If the number of items (u32s) to encode is less than or equal to the threshold 32 | // it means that it would weigh the same or less than the RoaringBitmap 33 | // header, so we directly encode them using ByteOrder instead. 34 | for integer in roaring { 35 | vec.write_u32::(integer).unwrap(); 36 | } 37 | } else { 38 | // Otherwise, we use the classic RoaringBitmapCodec that writes a header. 39 | roaring.serialize_into(vec).unwrap(); 40 | } 41 | } 42 | 43 | pub fn deserialize_from(mut bytes: &[u8]) -> io::Result { 44 | if bytes.len() <= THRESHOLD * size_of::() { 45 | // If there is threshold or less than threshold integers that can fit into this array 46 | // of bytes it means that we used the ByteOrder codec serializer. 47 | let mut bitmap = RoaringBitmap::new(); 48 | while let Ok(integer) = bytes.read_u32::() { 49 | bitmap.insert(integer); 50 | } 51 | Ok(bitmap) 52 | } else { 53 | // Otherwise, it means we used the classic RoaringBitmapCodec and 54 | // that the header takes threshold integers. 55 | RoaringBitmap::deserialize_unchecked_from(bytes) 56 | } 57 | } 58 | } 59 | 60 | impl heed::BytesDecode<'_> for CboRoaringBitmapCodec { 61 | type DItem = RoaringBitmap; 62 | 63 | fn bytes_decode(bytes: &[u8]) -> Result { 64 | Self::deserialize_from(bytes).map_err(Into::into) 65 | } 66 | } 67 | 68 | impl heed::BytesEncode<'_> for CboRoaringBitmapCodec { 69 | type EItem = RoaringBitmap; 70 | 71 | fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { 72 | let mut vec = Vec::with_capacity(Self::serialized_size(item)); 73 | Self::serialize_into(item, &mut vec); 74 | Ok(Cow::Owned(vec)) 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod cli; 2 | pub mod compare; 3 | pub mod file_io; 4 | pub mod hash_functions; 5 | mod hasher; 6 | pub mod heed; 7 | //mod heed_codec; 8 | pub mod signature; 9 | mod sketch; 10 | pub mod sketcher; 11 | mod varintencoding; 12 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | 3 | use clap::{error::ErrorKind, CommandFactory, Parser}; 4 | use indicatif::ProgressIterator; 5 | use jam_rs::{ 6 | cli::{Cli, Commands}, 7 | hash_functions::ahash, 8 | heed::HeedHandler, 9 | }; 10 | 11 | fn main() { 12 | let args = jam_rs::cli::Cli::parse(); 13 | 14 | match args.command { 15 | Commands::Sketch { .. } => { 16 | let mut cmd = Cli::command(); 17 | match jam_rs::file_io::FileHandler::sketch_files(args.command, args.threads) { 18 | Ok(_) => {} 19 | Err(e) => { 20 | cmd.error(ErrorKind::ArgumentConflict, e).exit(); 21 | } 22 | } 23 | } 24 | // Commands::Merge { inputs, output } => { 25 | // match jam_rs::file_io::FileHandler::concat(inputs, output) { 26 | // Ok(_) => {} 27 | // Err(e) => { 28 | // Cli::command().error(ErrorKind::ArgumentConflict, e).exit(); 29 | // } 30 | // } 31 | // } 32 | Commands::Dist { 33 | input, 34 | database, 35 | output, 36 | cutoff, 37 | } => { 38 | let mut cmd = Cli::command(); 39 | 40 | let input_files = 41 | jam_rs::file_io::FileHandler::test_and_collect_files(vec![input], false); 42 | let fs_input = match input_files { 43 | Ok(f) => f, 44 | Err(e) => { 45 | cmd.error(ErrorKind::ArgumentConflict, e).exit(); 46 | } 47 | }; 48 | 49 | if database.len() == 1 { 50 | let mut lmdb = false; 51 | if let Some(first) = database.first() { 52 | if first.is_file() { 53 | if first.extension() == Some("mdb".as_ref()) { 54 | lmdb = true; 55 | } 56 | } 57 | if lmdb { 58 | let mut lmdb_comparator = jam_rs::compare::LmdbComparator::new( 59 | first.clone(), 60 | args.threads.unwrap_or(1), 61 | cutoff, 62 | args.silent 63 | ) 64 | .unwrap(); 65 | 66 | let mut input_sketch = Vec::new(); 67 | 68 | let iterator:Box> = if args.silent { 69 | Box::new(fs_input.into_iter()) 70 | } else { 71 | Box::new(fs_input.into_iter().progress()) 72 | }; 73 | 74 | for db_path in iterator { 75 | // TODO: Remove hardcoded kmer sizes / settings / parse from db 76 | match jam_rs::file_io::FileHandler::sketch_file( 77 | &db_path, 78 | lmdb_comparator.kmer_size, 79 | lmdb_comparator.fscale, 80 | None, 81 | false, 82 | jam_rs::hash_functions::Function::Small(&ahash), 83 | jam_rs::cli::HashAlgorithms::Ahash, 84 | false, 85 | ) { 86 | Ok(r) => { 87 | input_sketch.push(r); 88 | } 89 | Err(e) => { 90 | cmd.error(ErrorKind::ArgumentConflict, e).exit(); 91 | } 92 | } 93 | } 94 | 95 | lmdb_comparator.set_signatures(input_sketch); 96 | 97 | let mut result = match lmdb_comparator.compare() { 98 | Ok(r) => r, 99 | Err(e) => { 100 | cmd.error(ErrorKind::ArgumentConflict, e).exit(); 101 | } 102 | }; 103 | 104 | result.sort_by(|a, b| b.estimated_containment.total_cmp(&a.estimated_containment)); 105 | 106 | match output { 107 | Some(o) => { 108 | if let Err(e) = 109 | jam_rs::file_io::FileHandler::write_result(&result, o) 110 | { 111 | cmd.error(ErrorKind::ArgumentConflict, e).exit(); 112 | } 113 | } 114 | None => { 115 | for result in result { 116 | println!("{}", result); 117 | } 118 | } 119 | } 120 | return; 121 | } 122 | } 123 | }; 124 | 125 | let mut input_sketch = Vec::new(); 126 | eprintln!("Reading input sketches"); 127 | for db_path in fs_input { 128 | // TODO: Remove hardcoded kmer sizes / settings / parse from db 129 | match jam_rs::file_io::FileHandler::sketch_file( 130 | &db_path, 131 | 21, 132 | None, 133 | None, 134 | false, 135 | jam_rs::hash_functions::Function::Small(&ahash), 136 | jam_rs::cli::HashAlgorithms::Ahash, 137 | false, 138 | ) { 139 | Ok(r) => { 140 | input_sketch.push(r); 141 | } 142 | Err(e) => { 143 | cmd.error(ErrorKind::ArgumentConflict, e).exit(); 144 | } 145 | } 146 | } 147 | 148 | let database_files = 149 | jam_rs::file_io::FileHandler::test_and_collect_files(database, false); 150 | let fs = match database_files { 151 | Ok(f) => f, 152 | Err(e) => { 153 | cmd.error(ErrorKind::ArgumentConflict, e).exit(); 154 | } 155 | }; 156 | 157 | let mut db_sketches = Vec::new(); 158 | for db_path in fs { 159 | match jam_rs::file_io::FileHandler::read_signatures(&db_path) { 160 | Ok(r) => { 161 | db_sketches.extend(r); 162 | } 163 | Err(e) => { 164 | cmd.error(ErrorKind::ArgumentConflict, e).exit(); 165 | } 166 | } 167 | } 168 | 169 | match jam_rs::compare::MultiComp::new( 170 | input_sketch, 171 | db_sketches, 172 | args.threads.unwrap(), 173 | cutoff, 174 | ) { 175 | Ok(mut mc) => { 176 | if let Err(e) = mc.compare() { 177 | cmd.error(ErrorKind::ArgumentConflict, e).exit(); 178 | } 179 | let result = mc.finalize(); 180 | match output { 181 | Some(o) => { 182 | if let Err(e) = jam_rs::file_io::FileHandler::write_result(&result, o) { 183 | cmd.error(ErrorKind::ArgumentConflict, e).exit(); 184 | } 185 | } 186 | None => { 187 | for result in result { 188 | println!("{}", result); 189 | } 190 | } 191 | } 192 | } 193 | Err(e) => { 194 | cmd.error(ErrorKind::ArgumentConflict, e).exit(); 195 | } 196 | } 197 | } 198 | Commands::Stats { input, short } => { 199 | let mut cmd = Cli::command(); 200 | 201 | let heed_handler = match HeedHandler::new_ro(input) { 202 | Ok(heed_handler) => heed_handler, 203 | Err(e) => { 204 | cmd.error(ErrorKind::ArgumentConflict, e).exit(); 205 | } 206 | }; 207 | 208 | if short { 209 | match heed_handler.summarize_stats() { 210 | Ok(_) => {} 211 | Err(e) => { 212 | cmd.error(ErrorKind::ArgumentConflict, e).exit(); 213 | } 214 | } 215 | } else { 216 | match heed_handler.detail_sigs() { 217 | Ok(_) => {} 218 | Err(e) => { 219 | cmd.error(ErrorKind::ArgumentConflict, e).exit(); 220 | } 221 | } 222 | } 223 | } 224 | } 225 | } 226 | -------------------------------------------------------------------------------- /src/signature.rs: -------------------------------------------------------------------------------- 1 | use crate::{cli::HashAlgorithms, sketch::Sketch}; 2 | use serde::{Deserialize, Serialize}; 3 | use sourmash::signature::{Signature as SourmashSignature, SigsTrait}; 4 | use std::collections::BTreeSet; 5 | 6 | #[derive(Debug, Serialize, Deserialize, Clone)] 7 | pub struct Signature { 8 | pub file_name: String, 9 | pub sketches: Vec, 10 | pub algorithm: HashAlgorithms, 11 | pub kmer_size: u8, 12 | pub max_hash: u64, 13 | } 14 | 15 | impl From for SourmashSignature { 16 | fn from(val: Signature) -> Self { 17 | SourmashSignature::builder() 18 | .hash_function(format!("{:?}", val.algorithm)) 19 | .filename(Some(val.file_name)) 20 | .email("".to_string()) 21 | .license("CC0".to_string()) 22 | .name(None) 23 | .signatures( 24 | val.sketches 25 | .into_iter() 26 | .map(|sketch| sketch.into_sourmash(val.max_hash)) 27 | .collect(), 28 | ) 29 | .build() 30 | } 31 | } 32 | 33 | impl From for Signature { 34 | fn from(sourmash_signature: SourmashSignature) -> Self { 35 | let mut sketches = Vec::new(); 36 | let mut max_hash = None; 37 | let mut kmer_size = None; 38 | for sketch in sourmash_signature.sketches() { 39 | match sketch { 40 | sourmash::sketch::Sketch::MinHash(mash) => { 41 | if let Some(max_hash) = max_hash { 42 | if max_hash != mash.max_hash() { 43 | panic!("Max hash of sketches is not equal"); 44 | } 45 | } else { 46 | max_hash = Some(mash.max_hash()); 47 | } 48 | 49 | if let Some(kmer_size) = kmer_size { 50 | if kmer_size != mash.ksize() as u8 { 51 | panic!("Kmer size of sketches is not equal"); 52 | } 53 | } else { 54 | kmer_size = Some(mash.ksize() as u8); 55 | } 56 | 57 | let mut sketch = Sketch::new( 58 | sourmash_signature.filename(), 59 | mash.mins().len(), 60 | mash.ksize() as u8, 61 | ); 62 | sketch.hashes = mash.mins().into_iter().collect::>(); 63 | sketches.push(sketch); 64 | } 65 | sourmash::sketch::Sketch::LargeMinHash(mash) => { 66 | if let Some(max_hash) = max_hash { 67 | if max_hash != mash.max_hash() { 68 | panic!("Max hash of sketches is not equal"); 69 | } 70 | } else { 71 | max_hash = Some(mash.max_hash()); 72 | } 73 | 74 | if let Some(kmer_size) = kmer_size { 75 | if kmer_size != mash.ksize() as u8 { 76 | panic!("Kmer size of sketches is not equal"); 77 | } 78 | } else { 79 | kmer_size = Some(mash.ksize() as u8); 80 | } 81 | 82 | let mut sketch = Sketch::new( 83 | sourmash_signature.filename(), 84 | mash.mins().len(), 85 | mash.ksize() as u8, 86 | ); 87 | sketch.hashes = mash.mins().into_iter().collect::>(); 88 | sketches.push(sketch); 89 | } 90 | sourmash::sketch::Sketch::HyperLogLog(_) => { 91 | unimplemented!("HyperLogLog sketches are not supported") 92 | } 93 | } 94 | } 95 | Signature { 96 | file_name: sourmash_signature.filename(), 97 | sketches, 98 | algorithm: HashAlgorithms::Murmur3, 99 | kmer_size: kmer_size.expect("No sketch with kmer_size found"), 100 | max_hash: max_hash.expect("No sketch with max hash found"), 101 | } 102 | } 103 | } 104 | 105 | impl Signature { 106 | pub fn collapse(&mut self) -> Sketch { 107 | let mut sketch = Sketch::new(self.file_name.to_string(), 0, self.kmer_size); 108 | for old_sketch in self.sketches.drain(..) { 109 | sketch.hashes.extend(old_sketch.hashes); 110 | sketch.num_kmers += old_sketch.num_kmers; 111 | } 112 | sketch 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/sketch.rs: -------------------------------------------------------------------------------- 1 | use itertools::Itertools; 2 | use serde::{Deserialize, Serialize}; 3 | use sourmash::sketch::{minhash::KmerMinHash, Sketch as SourmashSketch}; 4 | use std::collections::BTreeSet; 5 | 6 | #[derive(Debug, Serialize, Deserialize, Clone, Default)] 7 | pub struct Sketch { 8 | pub name: String, // Name of file or sequence 9 | pub hashes: BTreeSet, // Hashes with stats 10 | pub num_kmers: usize, // Number of kmers (collected) 11 | pub kmer_size: u8, // Kmer size 12 | } 13 | 14 | impl Sketch { 15 | pub fn new(name: String, num_kmers: usize, kmer_size: u8) -> Self { 16 | Sketch { 17 | name, 18 | num_kmers, 19 | kmer_size, 20 | hashes: BTreeSet::new(), 21 | } 22 | } 23 | } 24 | 25 | impl Sketch { 26 | pub fn into_sourmash(self, max_hash: u64) -> SourmashSketch { 27 | let sketch = KmerMinHash::builder() 28 | .ksize(self.kmer_size as u32) 29 | .num(self.hashes.len() as u32) 30 | .max_hash(max_hash) 31 | .mins(self.hashes.into_iter().sorted().collect::>()) 32 | .build(); 33 | SourmashSketch::MinHash(sketch) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/sketcher.rs: -------------------------------------------------------------------------------- 1 | use crate::{cli::HashAlgorithms, hash_functions::Function, signature::Signature, sketch::Sketch}; 2 | use needletail::{parser::SequenceRecord, Sequence}; 3 | use std::collections::BTreeSet; 4 | 5 | #[derive(Debug, Default)] 6 | struct SketchHelper { 7 | pub max_hash: u64, 8 | hit_counter: u64, 9 | kmer_seq_counter: u64, 10 | pub nmax: u64, 11 | pub btree: BTreeSet, 12 | } 13 | 14 | impl SketchHelper { 15 | pub fn new(max_hash: u64, nmax: Option) -> Self { 16 | SketchHelper { 17 | nmax: nmax.unwrap_or(u64::MAX), 18 | hit_counter: 0, 19 | kmer_seq_counter: 0, 20 | max_hash, 21 | btree: BTreeSet::new(), 22 | } 23 | } 24 | 25 | pub fn push(&mut self, hash: u64) { 26 | // Increase the local sequence counter in any case 27 | self.kmer_seq_counter += 1; 28 | if hash < self.max_hash { 29 | self.hit_counter += 1; 30 | self.btree.insert(hash); 31 | if self.btree.len() > self.nmax as usize { 32 | self.btree.pop_last(); 33 | } 34 | } 35 | } 36 | 37 | pub fn reset(&mut self) { 38 | let nmax = self.nmax; 39 | *self = Self::default(); 40 | self.nmax = nmax; 41 | } 42 | 43 | pub fn into_sketch(&mut self, name: String, kmer_size: u8) -> Sketch { 44 | let mut sketch = Sketch::new(name, self.btree.len(), kmer_size); 45 | let old_map = std::mem::replace(&mut self.btree, BTreeSet::new()); 46 | sketch.hashes = old_map.into_iter().collect(); 47 | self.reset(); 48 | sketch 49 | } 50 | } 51 | 52 | pub struct Sketcher<'a> { 53 | name: String, 54 | kmer_length: u8, 55 | helper: SketchHelper, 56 | completed_sketches: Vec, 57 | singleton: bool, 58 | function: Function<'a>, 59 | algorithm: HashAlgorithms, 60 | } 61 | 62 | impl<'a> Sketcher<'a> { 63 | pub fn new( 64 | kmer_length: u8, 65 | name: String, 66 | singleton: bool, 67 | max_hash: u64, 68 | nmax: Option, 69 | function: Function<'a>, 70 | algorithm: HashAlgorithms, 71 | ) -> Self { 72 | Sketcher { 73 | name, 74 | kmer_length, 75 | helper: SketchHelper::new(max_hash, nmax), 76 | singleton, 77 | completed_sketches: Vec::new(), 78 | function, 79 | algorithm, 80 | } 81 | } 82 | } 83 | 84 | impl Sketcher<'_> { 85 | // This is more or less derived from the `process` method in `finch-rs`: 86 | // https://github.com/onecodex/finch-rs/blob/master/lib/src/sketch_schemes/mash.rs 87 | pub fn process<'seq, 'a, 'inner>(&'a mut self, seq: &'seq SequenceRecord<'inner>) 88 | where 89 | 'a: 'seq, 90 | 'seq: 'inner, 91 | { 92 | let name = seq.id(); 93 | let seq = seq.normalize(false); 94 | if self.kmer_length <= 31 { 95 | let func_small = self.function.get_small().unwrap(); 96 | for (_, kmer, _) in seq.bit_kmers(self.kmer_length, true) { 97 | self.helper.push(func_small(kmer.0)); 98 | } 99 | } else { 100 | let func_large = self.function.get_large().unwrap(); 101 | let rc = seq.reverse_complement(); 102 | for (_, kmer, _) in seq.canonical_kmers(self.kmer_length, &rc) { 103 | self.helper.push(func_large(kmer)); 104 | } 105 | } 106 | if self.singleton { 107 | self.completed_sketches.push( 108 | self.helper 109 | .into_sketch(String::from_utf8_lossy(name).to_string(), self.kmer_length), 110 | ); 111 | } 112 | } 113 | 114 | pub fn finish(self) -> Signature { 115 | let max_hash = self.helper.max_hash; 116 | let file_name = self.name.to_string(); 117 | let algorithm = self.algorithm.clone(); 118 | let kmer_size = self.kmer_length; 119 | let mut sketches = self.completed_sketches; 120 | let mut helper = self.helper; 121 | sketches.push(helper.into_sketch(self.name, self.kmer_length)); 122 | Signature { 123 | file_name, 124 | sketches, 125 | max_hash, 126 | algorithm, 127 | kmer_size, 128 | } 129 | } 130 | } 131 | 132 | // #[cfg(test)] 133 | // mod tests { 134 | // use super::*; 135 | 136 | // #[test] 137 | // fn test_sketch_helper() { 138 | // let mut helper = SketchHelper::new(1, 100, None, None); 139 | // helper.initialize_record(Some(Stats::new(0, 0))); 140 | // helper.push(1); 141 | // helper.push(2); 142 | // helper.push(3); 143 | // assert_eq!( 144 | // helper.into_sketch("sketch".to_string(), 1), 145 | // Sketch { 146 | // name: "sketch".to_string(), 147 | // hashes: HashMap::from_iter(vec![(1, Some(Stats::new(0, 0)))]), 148 | // num_kmers: 1, 149 | // max_kmers: 3, 150 | // kmer_size: 1 151 | // } 152 | // ); 153 | // } 154 | // } 155 | -------------------------------------------------------------------------------- /src/varintencoding.rs: -------------------------------------------------------------------------------- 1 | use std::borrow::Cow; 2 | 3 | use heed::BoxedError; 4 | use integer_encoding::{VarInt, VarIntReader}; 5 | 6 | pub struct VarIntEncoder; 7 | 8 | impl heed::BytesEncode<'_> for VarIntEncoder { 9 | type EItem = Vec; 10 | 11 | fn bytes_encode(item: &Self::EItem) -> Result, BoxedError> { 12 | let mut vec = Vec::new(); 13 | for integer in item { 14 | vec.extend_from_slice(&integer.encode_var_vec()); 15 | } 16 | Ok(Cow::Owned(vec)) 17 | } 18 | } 19 | 20 | impl heed::BytesDecode<'_> for VarIntEncoder { 21 | type DItem = Vec; 22 | fn bytes_decode(bytes: &[u8]) -> Result { 23 | let mut vec = Vec::new(); 24 | let mut bytes = bytes; 25 | while !bytes.is_empty() { 26 | vec.push(VarIntReader::read_varint(&mut bytes).map_err(|e| e.to_string())?); 27 | } 28 | Ok(vec) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /tests/hash_statistics.rs: -------------------------------------------------------------------------------- 1 | // kolmogorov-smirnov from: https://github.com/tmmcguire/hashers/blob/master/examples/kolmogorov-smirnov.rs 2 | // See 3 | // - https://www.itl.nist.gov/div898/handbook/eda/section3/eda35g.htm 4 | // - https://onlinecourses.science.psu.edu/stat414/node/322/ 5 | 6 | /// Hash a sequence of values, returning the hashes sorted. 7 | #[inline] 8 | fn do_hashes_bytes(fcn: fn(&[u8]) -> u64, data: &[Vec]) -> Vec { 9 | let mut res: Vec = data.iter().map(|elt| fcn(elt)).collect(); 10 | res.sort(); 11 | res 12 | } 13 | 14 | #[inline] 15 | fn do_hashes_u64(fcn: fn(u64) -> u64, data: &[u64]) -> Vec { 16 | let mut res: Vec = data.iter().map(|elt| fcn(*elt)).collect(); 17 | res.sort(); 18 | res 19 | } 20 | 21 | /// Cumulative Distribution Function for the Uniform Distribution. 22 | fn cdf_uniform(x: u64) -> f64 { 23 | // Wish we had f128s. Gonna be issues here. 24 | (x as f64) / (std::u64::MAX as f64) 25 | } 26 | 27 | /// Compute the Kolmogorov-Smirnov test. 28 | /// 29 | /// ECDF: Experimental Cumulative Distribution Function. The distribution represented by the 30 | /// samples. 31 | /// 32 | /// TCDF: Theoretical Cumulative Distribution Function. The theoretical distribution to be tested 33 | /// against; in this case the uniform distribution. 34 | fn ks(samples: &[u64]) -> f64 { 35 | let n = samples.len() as f64; 36 | let mut last_ecdf = 0.0f64; 37 | let mut ks = std::f64::MIN; 38 | for (i, x) in samples.iter().enumerate() { 39 | let tcdf = (i as f64) / n; 40 | let next_ecdf = cdf_uniform(*x); 41 | let d1 = (last_ecdf - tcdf).abs(); 42 | let d2 = (tcdf - next_ecdf).abs(); 43 | ks = ks.max(d1.max(d2)); 44 | last_ecdf = next_ecdf; 45 | } 46 | ks 47 | } 48 | 49 | fn print_ks(hash: &str, d: f64) { 50 | assert!(d < 0.005); // 0.5% confidence interval that the distribution is not uniform. 51 | println!("{:10} {: <10.10}", hash, d); 52 | } 53 | 54 | #[inline] 55 | pub fn murmur3_old(kmer: &[u8]) -> u64 { 56 | murmurhash3::murmurhash3_x64_128(kmer, 42).0 57 | } 58 | 59 | #[inline] 60 | pub fn murmur3_new(kmer: &[u8]) -> u64 { 61 | fastmurmur3::murmur3_x64_128(kmer, 42) as u64 62 | } 63 | 64 | #[test] 65 | fn run_ks() { 66 | let samples = (100_000_000_000..100_000_100_000u64).collect::>(); 67 | 68 | let samples_bytes = samples 69 | .iter() 70 | .map(|x| x.to_be_bytes().to_vec()) 71 | .collect::>(); 72 | print_ks( 73 | "xxhash3", 74 | ks(&do_hashes_bytes( 75 | jam_rs::hash_functions::xxhash3, 76 | samples_bytes.as_slice(), 77 | )), 78 | ); 79 | print_ks( 80 | "ahash", 81 | ks(&do_hashes_u64(jam_rs::hash_functions::ahash, &samples)), 82 | ); 83 | print_ks( 84 | "murmur3_old", 85 | ks(&do_hashes_bytes(murmur3_old, &samples_bytes)), 86 | ); 87 | print_ks( 88 | "murmur3_new", 89 | ks(&do_hashes_bytes(murmur3_new, &samples_bytes)), 90 | ); 91 | } 92 | 93 | #[test] 94 | fn test_bit_distribution() { 95 | let samples = (100_000_000_000..100_010_000_000u64).collect::>(); 96 | 97 | let samples_bytes = samples 98 | .iter() 99 | .map(|x| x.to_be_bytes().to_vec()) 100 | .collect::>(); 101 | 102 | let mut xxhash3_bits = [0u64; 64]; 103 | let mut ahash_bits = [0u64; 64]; 104 | let mut murmur3_old_bits = [0u64; 64]; 105 | let mut murmur3_new_bits = [0u64; 64]; 106 | 107 | for x in 0..samples.len() { 108 | let xx = jam_rs::hash_functions::xxhash3(samples_bytes[x].as_slice()); 109 | unrolled_64bits(xx, &mut xxhash3_bits); 110 | let ah = jam_rs::hash_functions::ahash(samples[x]); 111 | unrolled_64bits(ah, &mut ahash_bits); 112 | let mo = murmur3_old(samples_bytes[x].as_slice()); 113 | unrolled_64bits(mo, &mut murmur3_old_bits); 114 | let mn = murmur3_new(samples_bytes[x].as_slice()); 115 | unrolled_64bits(mn, &mut murmur3_new_bits); 116 | } 117 | 118 | println!("bit|xxhash3|ahash|murmur3_old|murmur3_new"); 119 | for x in 0..64 { 120 | let xxhash_bit = xxhash3_bits[x] as f64 / 10_000_000f64; 121 | let ahash_bit = ahash_bits[x] as f64 / 10_000_000f64; 122 | let murmur3_old_bit = murmur3_old_bits[x] as f64 / 10_000_000f64; 123 | let murmur3_new_bit = murmur3_new_bits[x] as f64 / 10_000_000f64; 124 | assert!(xxhash_bit > 0.49); 125 | assert!(xxhash_bit < 0.51); 126 | assert!(ahash_bit > 0.49); 127 | assert!(ahash_bit < 0.51); 128 | assert!(murmur3_old_bit > 0.49); 129 | assert!(murmur3_old_bit < 0.51); 130 | assert!(murmur3_new_bit > 0.49); 131 | assert!(murmur3_new_bit < 0.51); 132 | println!( 133 | "{}|{}|{}|{}|{}", 134 | x, xxhash_bit, ahash_bit, murmur3_old_bit, murmur3_new_bit 135 | ); 136 | } 137 | } 138 | 139 | fn unrolled_64bits(num: u64, nums: &mut [u64; 64]) { 140 | for i in 0..64 { 141 | if num & (1u64 << i) != 0 { 142 | nums[i] += 1; 143 | } 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /tests/sketching.rs: -------------------------------------------------------------------------------- 1 | use jam_rs::file_io::FileHandler; 2 | use sourmash::sketch::Sketch; 3 | use std::{ 4 | fs, 5 | path::{self, PathBuf}, 6 | }; 7 | 8 | fn get_hashes_sketch(sketch: &Sketch) -> Vec { 9 | if let Sketch::MinHash(minhash) = sketch { 10 | minhash.mins() 11 | } else { 12 | panic!("Sketch is not a MinHash sketch"); 13 | } 14 | } 15 | 16 | #[test] 17 | fn test_file_sketching_basic() { 18 | let input_file = "tests/testfiles/test.small.fa"; 19 | FileHandler::sketch_files( 20 | jam_rs::cli::Commands::Sketch { 21 | input: vec![PathBuf::from(input_file)], 22 | output: Some(PathBuf::from("test.small.fa.test")), 23 | kmer_size: 33, 24 | fscale: None, 25 | nmax: None, 26 | format: jam_rs::cli::OutputFormats::Sourmash, 27 | algorithm: jam_rs::cli::HashAlgorithms::Murmur3, 28 | singleton: false, 29 | }, 30 | None, 31 | ) 32 | .unwrap(); 33 | 34 | let created_sketch = 35 | sourmash::signature::Signature::from_path(path::Path::new("test.small.fa.test")) 36 | .unwrap() 37 | .pop() 38 | .unwrap() 39 | .sketches() 40 | .pop() 41 | .unwrap(); 42 | 43 | let expected_sketch = sourmash::signature::Signature::from_path(path::Path::new( 44 | "tests/testfiles/test.small.fasta.sourmash_k33.sig", 45 | )) 46 | .unwrap() 47 | .pop() 48 | .unwrap() 49 | .sketches() 50 | .pop() 51 | .unwrap(); 52 | 53 | for (created, expected) in get_hashes_sketch(&created_sketch) 54 | .into_iter() 55 | .zip(get_hashes_sketch(&expected_sketch).into_iter()) 56 | { 57 | println!("{} == {}", created, expected); 58 | assert_eq!(created, expected); 59 | } 60 | } 61 | 62 | #[test] 63 | fn test_file_sketching_lmdb() { 64 | let input_file = "tests/testfiles/test.small.fa"; 65 | fs::create_dir("testout").unwrap(); 66 | FileHandler::sketch_files( 67 | jam_rs::cli::Commands::Sketch { 68 | input: vec![PathBuf::from(input_file)], 69 | output: Some(PathBuf::from("testout")), 70 | kmer_size: 33, 71 | fscale: None, 72 | nmax: None, 73 | format: jam_rs::cli::OutputFormats::Lmdb, 74 | algorithm: jam_rs::cli::HashAlgorithms::Murmur3, 75 | singleton: false, 76 | }, 77 | None, 78 | ) 79 | .unwrap(); 80 | } 81 | 82 | // #[test] 83 | // fn test_file_sketching_comp() { 84 | // let input_file = "tests/testfiles/test.small.fa"; 85 | // FileHandler::sketch_files( 86 | // jam_rs::cli::Commands::Sketch { 87 | // input: vec![PathBuf::from(input_file)], 88 | // output: Some(PathBuf::from("test.small.fa.test.bin")), 89 | // kmer_size: 33, 90 | // fscale: None, 91 | // nmax: None, 92 | // format: jam_rs::cli::OutputFormats::Bin, 93 | // algorithm: jam_rs::cli::HashAlgorithms::Murmur3, 94 | // singleton: false, 95 | // }, 96 | // None, 97 | // ) 98 | // .unwrap(); 99 | 100 | // let read_to_bytes = std::fs::read("test.small.fa.test.bin").unwrap(); 101 | // let mut signature: Vec = 102 | // bincode::deserialize_from(read_to_bytes.as_slice()).unwrap(); 103 | // let signature = signature.pop().unwrap(); 104 | 105 | // let expected_signature = sourmash::signature::Signature::from_path(path::Path::new( 106 | // "tests/testfiles/test.small.fasta.sourmash_k33.sig", 107 | // )) 108 | // .unwrap() 109 | // .pop() 110 | // .unwrap(); 111 | 112 | // let expected_signature = jam_rs::signature::Signature::from(expected_signature); 113 | 114 | // assert_eq!(signature.max_hash, expected_signature.max_hash); 115 | // assert_eq!(signature.kmer_size, expected_signature.kmer_size); 116 | // assert_eq!(signature.sketches.len(), expected_signature.sketches.len()); 117 | // assert_eq!( 118 | // signature.sketches[0].hashes, 119 | // expected_signature.sketches[0].hashes 120 | // ); 121 | // } 122 | -------------------------------------------------------------------------------- /tests/testfiles/output.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/St4NNi/jam-rs/2b5b72b9ec4dc4fbf02415bccf1cec0a352164a6/tests/testfiles/output.bin -------------------------------------------------------------------------------- /tests/testfiles/short.fa: -------------------------------------------------------------------------------- 1 | >Acinetobacter_guillouiae_strain_NBRC_110550_NZ_AP014630 [modified] 2 | GATAATTTATCCACAGCTTGTGGAAAACCTTATCCACAACTGCTTTGAATTTGAGTTTAT 3 | TTACAGAAGGTTGCTTTTTTGAGCGCAAATTCCTTTATTATTGAGTAATACTCCACTCCT 4 | TTTATTTTACTTCCTTCTCTTTTACGCACGTATAAAGAACCTATCTCTTTTTCAATTTCC 5 | TTTAAGAGTTCATCTTTTGTTATTTCGCTTTTTAGGAATAGCTCTTTAAAATAAGAAGGC 6 | AACCAAAAAAAACTATGTCCACTTTTTTATATTTTAATTCTTCCTCATACTCTTCTTTTC 7 | TTTTTTTCATATCTTCAAGAATGAGGCTAACATTACCATGTGGGGAGGATAAACTTTTTA 8 | AAATATCTGAATATGCTATAACAGTCATCTTTCTTGCTCCTTTATTTTTTTTAGTATAAC 9 | AATTAGATTGAATTAACCATGACAGATGTGGATAACTTGGTTAGAATGGCGACCCCTTCA 10 | CCTACAGGGTAGGGGTAAATAGTCTTTTTTGAATTTAAAATTGAATGAAGGGGTTTCACA 11 | TGCTTTGGACGGACTGCTTAACTCGCTTGCGACAAGAGCTCTCTGATAATGTCTTTGCCA 12 | TGTGG -------------------------------------------------------------------------------- /tests/testfiles/test.short.fa.sourmash_k33.sig: -------------------------------------------------------------------------------- 1 | [{"class":"sourmash_signature","email":"","hash_function":"0.murmur64","filename":"./tests/testfiles/short.fa","license":"CC0","signatures":[{"num":0,"ksize":33,"seed":42,"max_hash":18446744073709551615,"mins":[19916266331765313,49612108431566693,127080787518776276,184824699785807868,249509521076291171,273846231702273907,392607627696639588,441732197543629540,457860465617978005,478108538917657061,492063821842604528,550974657406355128,558730317439617572,575776869714153685,577947352252259573,669517759499673057,700041433997222244,705775560795855809,721355198710473189,772147852033344098,793094941595784795,800931906581166998,803588016591619921,849933317892824746,876564235110283402,896066887823311716,919929010823617106,1036932904444384450,1039407196391236783,1085357010898624629,1154417989718413060,1233963710184820041,1256865897567565897,1301400451068927722,1322417785719468686,1337553992212222913,1359855950800912891,1397965853122008848,1404639715893722866,1409827769064931553,1438646218874593536,1444130303324838702,1463433396534866672,1487758546564651400,1508262187434573335,1546634130785912461,1548790675619621178,1560425585266611596,1633748617492589652,1680359757195875230,1759399287717304962,1768497048937777426,1798076480218840095,1850918667119492251,1885574228889635180,1954306366777417458,1998189935364583704,2038481050798650725,2046291390754683618,2099931067835211552,2103835000783468055,2108503300247622997,2120795238149505456,2127705521995714720,2175056767918415155,2187518802413867058,2251338902931556614,2288204501513004936,2336092231849265884,2365024550793383311,2389589122034144606,2399284320085639445,2407042599637241487,2462787266635355507,2484815237861384963,2495568322228030988,2606949634577134846,2608746549672201530,2638780885497218083,2647324305428878173,2666261986452345323,2681059617517779823,2689108983491524531,2720870790692680123,2746922149382763379,2824655812327207761,2841072941517905522,2851042739632825294,2881919225833409499,2906926555899357714,2966011766863548406,2973584922234801989,3002180308619982962,3015082165635034263,3055862015686114539,3056001185387579170,3066704456969969480,3082632388724773354,3091905504119940657,3091917605869791450,3133324021841957675,3142458638716234158,3152899548662330485,3191022469096960887,3250885700121199239,3282099267665675474,3334322060303981425,3360958698054053319,3364665369674998936,3419269181277301356,3427542872350566096,3451572618123171079,3494925097158484949,3527035106232494749,3543441199985373233,3556392384684668987,3573306021673377719,3608640268919387902,3628167572954404545,3636893040806471692,3677719700158927626,3720179377124240074,3744737777121144325,3755111633815318557,3783867418255091844,3794351403875544965,3795630112704602737,3831777874168892891,3837248188602077948,3845242577588624586,4042165333901114209,4093722374840151947,4154440279659089324,4161743417678644582,4205312673183562870,4216157624560118392,4287871897245921056,4416665075000717040,4446274323401792323,4459094407115386982,4494197922946912499,4503942099681952049,4563521484892247480,4563784121381558232,4632217856854400682,4703831783408755489,4707777511256837645,4787742269509929130,4819022274542544354,4836016550970969384,4854415218998201720,4862532907772833063,4895200345158520251,4912414376853217531,4949785200168551438,5138515530477383062,5220725632235549337,5232275160361588969,5232565931729975664,5237061088672893795,5267667625724956134,5295204026856639196,5306178932140292307,5316519619284176032,5335332564744972674,5370442164962854361,5375194018776705964,5401977219407360140,5452040879646259722,5500092896744448242,5510009548857055301,5551852227636351307,5575385884706231332,5578422687622403655,5643701912818157824,5655314803035362920,5674135534729555971,5684835108642287905,5693676586680067266,5731749364201611060,5740820936050921184,5787606507474686786,5795929184854578593,5849202809781409533,5942606413424689561,5946527859459118990,5950999778277364245,6036495658323310384,6054812361677425737,6118908023903489918,6140991957271794029,6149420236791305416,6175204067734030814,6176689226052727670,6202187658831906143,6260590493139634199,6297545241868967139,6310176940882829712,6318678268447918078,6332057879881541381,6344984984979026327,6373973762626125247,6389194629242006384,6407494622934582203,6467421933807961017,6490250587169011160,6501743643396221591,6553638392583581536,6553642611631893398,6580579307505433708,6682226682948883480,6788754069437929896,6825049198549238780,6857025758942846552,6964525196616856777,6977373969402843827,6996784398275404853,7014626204966141621,7020720622028953698,7072634525053660993,7080492619613792425,7087027578526797716,7221840140773104720,7225572378812191380,7288722417407250042,7321659877298521926,7322282828204237052,7373848790007494496,7374374142513355719,7384738690348473012,7435539225191926734,7460422327455405239,7649780810760867722,7667446951757664075,7675009036605906071,7712793242620292773,7756059203829055070,7791006790328872465,7797701912535161769,7862746920229132041,7885256332100456459,7906346458404716762,7991854547654338766,8002465370975342804,8024135055347246306,8033453776240459639,8052444660905417004,8054941017952148746,8085283986686422729,8088160155707343758,8123064015188213569,8134615027416473446,8215128982714922592,8216985882995628326,8253209128120742607,8259432453722152162,8264994398628938423,8278390584834930984,8335017340342251385,8474962527937713893,8479691438310655780,8505488343657989297,8556151145258731125,8583331477070029313,8585953185103006574,8589589048167953527,8596235482750905421,8611442063750688830,8625634940017046631,8641778855914535948,8642825634621072834,8661242072889722902,8673140006337671649,8677177710142400106,8713909637218465909,8774210930072831878,8776507290780140782,8828218144219560186,8873952259538019306,8903084598052518353,8936280956977959230,8938442911362967575,8943588680742371448,8948174068287826068,8985833304452681816,9129512293406026872,9173015161922897354,9177268141129878756,9234076221161116894,9267120117566206496,9292587874068537507,9293530576047558231,9299773834264238730,9336703648314491623,9406259194511199346,9420894671592025255,9421649469227980357,9436849194121066470,9468046097955614757,9494321057224192438,9535739936461692874,9604349825992391598,9637283011441590214,9676080231223254744,9694163472323894191,9756992690184494840,9760822378930877776,9766507151413004904,9771740573645244522,9781484177311992536,9811117617307381155,9992830640146707461,10068288678317140218,10069008009875940019,10080195247392662061,10177650966716474221,10188928265349676309,10236714606028346633,10283668735378141982,10298276350300583430,10337559740095339755,10375583210505758666,10375689216558874025,10385270776840595717,10444795504054974992,10488009160690286977,10559571552939244055,10568797382769005497,10570015561324638064,10575693877437757461,10609074192341866158,10609842446209328404,10611433736601856314,10622355434257850584,10671334340811255658,10742203296409414981,10763880067944057500,10773206851924069492,10802380959260023798,10844555159182566991,10874842465730316173,10886674959665807097,10918935113584131382,10964005467090165413,11030644594060457588,11052733244008540705,11054333095480698958,11108465589921999239,11110104833948719038,11156263814405577610,11192659385024926919,11225211049253031259,11272671813801700756,11283591073584822039,11283678811734114958,11297963550270579629,11344686343336701077,11349424175089732516,11381052962920056500,11413434613800281802,11434051399544365732,11516797908354295393,11523565165698951611,11544006174037889255,11579508061333040232,11596003247357798875,11613268541018232953,11656895317263158916,11679221848653469710,11687647855662476873,11749980968971558382,11760486663693630403,11819948220069039647,11844746866119674966,11878946861939761612,11914309906605217199,11949439093329169136,12008283042109677498,12027632174206353717,12089001591328871690,12094955261805409201,12095778283629141452,12100746225892552157,12165778441902320956,12218271653292954352,12226836400326589496,12230436660699802311,12243906368875676934,12258900095113958406,12394100775687568308,12399897700555715293,12440841203024632444,12472635078497297458,12478894603234179661,12509582765204471474,12571517971145081014,12619468052851052570,12644599477752712445,12670028143945674319,12727138128276573618,12767418687585131865,12776000603076586364,12808345695648631611,12811919410304423957,12876269137473874148,12903632975926206636,12916610622842106633,12917905820689781390,12925199322207333970,12926928880176042683,13033086404116933893,13091733777034954857,13104972441105988291,13118301028035716516,13130163543433716675,13138714729783972040,13156685846436397304,13220092483189770554,13240982507096194295,13244323507431943534,13256991347983328314,13263081997646182857,13338189793887088020,13371831196316897577,13392069590083755771,13452756953245486838,13543003971006867524,13688592656690085605,13706362015959801497,13735870392281662262,13779989937992752799,13839981903801279080,13867240929135427443,13895970581395311244,13901062953701639413,13915799503531084014,13916594164004297909,14000680667087829024,14003908643487828660,14008637669612602471,14016558444711552908,14018693041858450215,14022051343857264960,14040252670202342011,14052620625985568511,14077809811276865899,14101460932922281972,14159107815706561141,14181236071340091022,14235683333435749021,14254876896304960346,14314964992902867567,14322856932728190652,14380782756459448757,14384821631186434383,14408283360413545523,14470889263943798873,14503567298103774471,14505810130123063797,14515592682934893261,14540948807293759107,14613969298858402315,14670562910188083332,14683511839826648772,14691745417347889670,14778028840445192925,14856594329341598817,14930363680741591164,14931519198144514542,15017374410656867947,15018442531225542876,15051780502580357385,15072201912763247422,15091291231401699097,15170773149220496818,15209468024841641575,15264313335046809728,15278836768305545399,15365368188161875300,15419004049492908671,15449049244663496497,15470261728944409096,15475852197642928497,15500771557669234614,15547171877958630088,15596201490609379615,15596465664591504360,15600164284762984056,15629395877060225442,15703629351446716457,15709856361840755406,15779308555475601847,15803023165200772957,15866463256785765210,15877268546749662201,15894160539583169113,15974433056706337991,16055488039883703794,16064375086587079102,16111493723105783042,16184411103015685801,16195979228247529829,16202150593167392775,16311848557230042265,16338715735181962595,16378072092690536971,16400588365541851135,16454093912866823424,16467183170273204531,16520278833063527489,16621407244749700738,16712781539073748746,16731821533107927774,16796842231190542151,16807495330227275193,16824349505117822100,16829257494336145315,16909616158758965006,17001618694445427215,17044502978573666200,17066109724032382576,17103461422982155723,17110560699824719100,17187262795421083645,17191602842810844697,17215031952395060766,17235582544104866339,17245446985389244878,17278171489076298159,17284196711350459245,17294336793505205709,17306839340757183761,17311341547728883632,17349752964868257395,17375059645567663317,17393978220905092366,17488919837835301361,17536303293298882146,17541903517862674564,17609600704932719774,17627771213282140295,17633079503077905216,17694462982157960929,17719657499074091680,17745774316977599051,17770058630050056818,17780591936330577958,17780861391906274672,17781333759721560574,17815213719522833860,17871596598298624470,17915823043653727025,17920607043232016644,17925134794459496259,17944851458282064372,17953002366581230022,17960507993086364660,17967757561459054565,17987436841326602324,18060094718475056284,18075711673846298843,18106272742041188303,18202984022144875648,18223866832642861982,18294283936199074000,18308153767075595097,18356685323625054317,18358468212862363390,18361753075473795307,18363013652799422992,18383482404232358480,18414382297074219500,18429209458639152291],"md5sum":"47a3b799175d7a7e4d6dfc508c3690ea","molecule":"dna"}],"version":0.4}] -------------------------------------------------------------------------------- /tests/testfiles/testsketch.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/St4NNi/jam-rs/2b5b72b9ec4dc4fbf02415bccf1cec0a352164a6/tests/testfiles/testsketch.bin --------------------------------------------------------------------------------