├── .cargo └── config.toml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── README.md ├── benches └── bench.rs ├── benchmarks ├── Cargo.toml ├── eval.py ├── paper │ ├── figs │ │ ├── sassy_edlib_k1_match0.svg │ │ ├── sassy_edlib_k1_match1.svg │ │ └── sassy_edlib_multi_k_match1.svg │ ├── sassy_edlib_k1_match1.1.csv │ └── sassy_edlib_k1_match1.csv └── src │ ├── crispr_bench │ ├── crispr_config.toml │ ├── mod.rs │ ├── runner.rs │ └── tools.rs │ ├── edlib_bench │ ├── edlib.rs │ ├── grid.rs │ ├── grid_config.toml │ ├── mod.rs │ ├── runner.rs │ └── sim_data.rs │ └── main.rs ├── bin ├── crispr.rs ├── main.rs └── search.rs ├── justfile ├── rustfmt.toml └── src ├── bitpacking.rs ├── delta_encoding.rs ├── lib.rs ├── minima.rs ├── profiles ├── ascii.rs ├── dna.rs ├── iupac.rs └── profile.rs ├── search.rs └── trace.rs /.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [build] 2 | rustflags = ["-C", "target-cpu=native"] 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /perf.data* 3 | /flame* 4 | /examples/crispr 5 | /data 6 | *.ipynb 7 | *.png 8 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 4 4 | 5 | [[package]] 6 | name = "adler2" 7 | version = "2.0.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" 10 | 11 | [[package]] 12 | name = "aho-corasick" 13 | version = "1.1.3" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 16 | dependencies = [ 17 | "memchr", 18 | ] 19 | 20 | [[package]] 21 | name = "anes" 22 | version = "0.1.6" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" 25 | 26 | [[package]] 27 | name = "anstream" 28 | version = "0.6.18" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" 31 | dependencies = [ 32 | "anstyle", 33 | "anstyle-parse", 34 | "anstyle-query", 35 | "anstyle-wincon", 36 | "colorchoice", 37 | "is_terminal_polyfill", 38 | "utf8parse", 39 | ] 40 | 41 | [[package]] 42 | name = "anstyle" 43 | version = "1.0.10" 44 | source = "registry+https://github.com/rust-lang/crates.io-index" 45 | checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" 46 | 47 | [[package]] 48 | name = "anstyle-parse" 49 | version = "0.2.6" 50 | source = "registry+https://github.com/rust-lang/crates.io-index" 51 | checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" 52 | dependencies = [ 53 | "utf8parse", 54 | ] 55 | 56 | [[package]] 57 | name = "anstyle-query" 58 | version = "1.1.2" 59 | source = "registry+https://github.com/rust-lang/crates.io-index" 60 | checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" 61 | dependencies = [ 62 | "windows-sys", 63 | ] 64 | 65 | [[package]] 66 | name = "anstyle-wincon" 67 | version = "3.0.7" 68 | source = "registry+https://github.com/rust-lang/crates.io-index" 69 | checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" 70 | dependencies = [ 71 | "anstyle", 72 | "once_cell", 73 | "windows-sys", 74 | ] 75 | 76 | [[package]] 77 | name = "arrayvec" 78 | version = "0.7.6" 79 | source = "registry+https://github.com/rust-lang/crates.io-index" 80 | checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" 81 | 82 | [[package]] 83 | name = "autocfg" 84 | version = "1.4.0" 85 | source = "registry+https://github.com/rust-lang/crates.io-index" 86 | checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" 87 | 88 | [[package]] 89 | name = "benchmarks" 90 | version = "0.1.0" 91 | dependencies = [ 92 | "clap", 93 | "edlib_rs", 94 | "once_cell", 95 | "rand", 96 | "sassy", 97 | "serde", 98 | "tempfile", 99 | "toml", 100 | ] 101 | 102 | [[package]] 103 | name = "bindgen" 104 | version = "0.63.0" 105 | source = "registry+https://github.com/rust-lang/crates.io-index" 106 | checksum = "36d860121800b2a9a94f9b5604b332d5cffb234ce17609ea479d723dbc9d3885" 107 | dependencies = [ 108 | "bitflags 1.3.2", 109 | "cexpr", 110 | "clang-sys", 111 | "lazy_static", 112 | "lazycell", 113 | "log", 114 | "peeking_take_while", 115 | "proc-macro2", 116 | "quote", 117 | "regex", 118 | "rustc-hash", 119 | "shlex", 120 | "syn 1.0.109", 121 | "which", 122 | ] 123 | 124 | [[package]] 125 | name = "bitflags" 126 | version = "1.3.2" 127 | source = "registry+https://github.com/rust-lang/crates.io-index" 128 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 129 | 130 | [[package]] 131 | name = "bitflags" 132 | version = "2.9.0" 133 | source = "registry+https://github.com/rust-lang/crates.io-index" 134 | checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" 135 | 136 | [[package]] 137 | name = "buffer-redux" 138 | version = "1.0.2" 139 | source = "registry+https://github.com/rust-lang/crates.io-index" 140 | checksum = "4e8acf87c5b9f5897cd3ebb9a327f420e0cae9dd4e5c1d2e36f2c84c571a58f1" 141 | dependencies = [ 142 | "memchr", 143 | ] 144 | 145 | [[package]] 146 | name = "bumpalo" 147 | version = "3.17.0" 148 | source = "registry+https://github.com/rust-lang/crates.io-index" 149 | checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" 150 | 151 | [[package]] 152 | name = "bytecount" 153 | version = "0.6.8" 154 | source = "registry+https://github.com/rust-lang/crates.io-index" 155 | checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" 156 | 157 | [[package]] 158 | name = "bzip2" 159 | version = "0.4.4" 160 | source = "registry+https://github.com/rust-lang/crates.io-index" 161 | checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" 162 | dependencies = [ 163 | "bzip2-sys", 164 | "libc", 165 | ] 166 | 167 | [[package]] 168 | name = "bzip2-sys" 169 | version = "0.1.13+1.0.8" 170 | source = "registry+https://github.com/rust-lang/crates.io-index" 171 | checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" 172 | dependencies = [ 173 | "cc", 174 | "pkg-config", 175 | ] 176 | 177 | [[package]] 178 | name = "cast" 179 | version = "0.3.0" 180 | source = "registry+https://github.com/rust-lang/crates.io-index" 181 | checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" 182 | 183 | [[package]] 184 | name = "cc" 185 | version = "1.2.21" 186 | source = "registry+https://github.com/rust-lang/crates.io-index" 187 | checksum = "8691782945451c1c383942c4874dbe63814f61cb57ef773cda2972682b7bb3c0" 188 | dependencies = [ 189 | "jobserver", 190 | "libc", 191 | "shlex", 192 | ] 193 | 194 | [[package]] 195 | name = "cexpr" 196 | version = "0.6.0" 197 | source = "registry+https://github.com/rust-lang/crates.io-index" 198 | checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" 199 | dependencies = [ 200 | "nom", 201 | ] 202 | 203 | [[package]] 204 | name = "cfg-if" 205 | version = "1.0.0" 206 | source = "registry+https://github.com/rust-lang/crates.io-index" 207 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 208 | 209 | [[package]] 210 | name = "ciborium" 211 | version = "0.2.2" 212 | source = "registry+https://github.com/rust-lang/crates.io-index" 213 | checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" 214 | dependencies = [ 215 | "ciborium-io", 216 | "ciborium-ll", 217 | "serde", 218 | ] 219 | 220 | [[package]] 221 | name = "ciborium-io" 222 | version = "0.2.2" 223 | source = "registry+https://github.com/rust-lang/crates.io-index" 224 | checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" 225 | 226 | [[package]] 227 | name = "ciborium-ll" 228 | version = "0.2.2" 229 | source = "registry+https://github.com/rust-lang/crates.io-index" 230 | checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" 231 | dependencies = [ 232 | "ciborium-io", 233 | "half", 234 | ] 235 | 236 | [[package]] 237 | name = "clang-sys" 238 | version = "1.8.1" 239 | source = "registry+https://github.com/rust-lang/crates.io-index" 240 | checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4" 241 | dependencies = [ 242 | "glob", 243 | "libc", 244 | "libloading", 245 | ] 246 | 247 | [[package]] 248 | name = "clap" 249 | version = "4.5.37" 250 | source = "registry+https://github.com/rust-lang/crates.io-index" 251 | checksum = "eccb054f56cbd38340b380d4a8e69ef1f02f1af43db2f0cc817a4774d80ae071" 252 | dependencies = [ 253 | "clap_builder", 254 | "clap_derive", 255 | ] 256 | 257 | [[package]] 258 | name = "clap_builder" 259 | version = "4.5.37" 260 | source = "registry+https://github.com/rust-lang/crates.io-index" 261 | checksum = "efd9466fac8543255d3b1fcad4762c5e116ffe808c8a3043d4263cd4fd4862a2" 262 | dependencies = [ 263 | "anstream", 264 | "anstyle", 265 | "clap_lex", 266 | "strsim", 267 | ] 268 | 269 | [[package]] 270 | name = "clap_derive" 271 | version = "4.5.32" 272 | source = "registry+https://github.com/rust-lang/crates.io-index" 273 | checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" 274 | dependencies = [ 275 | "heck", 276 | "proc-macro2", 277 | "quote", 278 | "syn 2.0.101", 279 | ] 280 | 281 | [[package]] 282 | name = "clap_lex" 283 | version = "0.7.4" 284 | source = "registry+https://github.com/rust-lang/crates.io-index" 285 | checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" 286 | 287 | [[package]] 288 | name = "cmake" 289 | version = "0.1.54" 290 | source = "registry+https://github.com/rust-lang/crates.io-index" 291 | checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0" 292 | dependencies = [ 293 | "cc", 294 | ] 295 | 296 | [[package]] 297 | name = "colorchoice" 298 | version = "1.0.3" 299 | source = "registry+https://github.com/rust-lang/crates.io-index" 300 | checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" 301 | 302 | [[package]] 303 | name = "crc32fast" 304 | version = "1.4.2" 305 | source = "registry+https://github.com/rust-lang/crates.io-index" 306 | checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" 307 | dependencies = [ 308 | "cfg-if", 309 | ] 310 | 311 | [[package]] 312 | name = "criterion" 313 | version = "0.5.1" 314 | source = "registry+https://github.com/rust-lang/crates.io-index" 315 | checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" 316 | dependencies = [ 317 | "anes", 318 | "cast", 319 | "ciborium", 320 | "clap", 321 | "criterion-plot", 322 | "is-terminal", 323 | "itertools", 324 | "num-traits", 325 | "once_cell", 326 | "oorandom", 327 | "plotters", 328 | "rayon", 329 | "regex", 330 | "serde", 331 | "serde_derive", 332 | "serde_json", 333 | "tinytemplate", 334 | "walkdir", 335 | ] 336 | 337 | [[package]] 338 | name = "criterion-plot" 339 | version = "0.5.0" 340 | source = "registry+https://github.com/rust-lang/crates.io-index" 341 | checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" 342 | dependencies = [ 343 | "cast", 344 | "itertools", 345 | ] 346 | 347 | [[package]] 348 | name = "crossbeam-deque" 349 | version = "0.8.6" 350 | source = "registry+https://github.com/rust-lang/crates.io-index" 351 | checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" 352 | dependencies = [ 353 | "crossbeam-epoch", 354 | "crossbeam-utils", 355 | ] 356 | 357 | [[package]] 358 | name = "crossbeam-epoch" 359 | version = "0.9.18" 360 | source = "registry+https://github.com/rust-lang/crates.io-index" 361 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 362 | dependencies = [ 363 | "crossbeam-utils", 364 | ] 365 | 366 | [[package]] 367 | name = "crossbeam-utils" 368 | version = "0.8.21" 369 | source = "registry+https://github.com/rust-lang/crates.io-index" 370 | checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" 371 | 372 | [[package]] 373 | name = "crunchy" 374 | version = "0.2.3" 375 | source = "registry+https://github.com/rust-lang/crates.io-index" 376 | checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" 377 | 378 | [[package]] 379 | name = "derive_more" 380 | version = "0.99.20" 381 | source = "registry+https://github.com/rust-lang/crates.io-index" 382 | checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f" 383 | dependencies = [ 384 | "proc-macro2", 385 | "quote", 386 | "syn 2.0.101", 387 | ] 388 | 389 | [[package]] 390 | name = "edlib_rs" 391 | version = "0.2.0" 392 | source = "git+https://github.com/pairwise-alignment/edlib-rs#57d6aad38f62d6002e228d539144958ca48faaa0" 393 | dependencies = [ 394 | "bindgen", 395 | "cmake", 396 | ] 397 | 398 | [[package]] 399 | name = "either" 400 | version = "1.15.0" 401 | source = "registry+https://github.com/rust-lang/crates.io-index" 402 | checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" 403 | 404 | [[package]] 405 | name = "env_filter" 406 | version = "0.1.3" 407 | source = "registry+https://github.com/rust-lang/crates.io-index" 408 | checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" 409 | dependencies = [ 410 | "log", 411 | "regex", 412 | ] 413 | 414 | [[package]] 415 | name = "env_logger" 416 | version = "0.11.8" 417 | source = "registry+https://github.com/rust-lang/crates.io-index" 418 | checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" 419 | dependencies = [ 420 | "anstream", 421 | "anstyle", 422 | "env_filter", 423 | "jiff", 424 | "log", 425 | ] 426 | 427 | [[package]] 428 | name = "equivalent" 429 | version = "1.0.2" 430 | source = "registry+https://github.com/rust-lang/crates.io-index" 431 | checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" 432 | 433 | [[package]] 434 | name = "errno" 435 | version = "0.3.11" 436 | source = "registry+https://github.com/rust-lang/crates.io-index" 437 | checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e" 438 | dependencies = [ 439 | "libc", 440 | "windows-sys", 441 | ] 442 | 443 | [[package]] 444 | name = "fastrand" 445 | version = "2.3.0" 446 | source = "registry+https://github.com/rust-lang/crates.io-index" 447 | checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" 448 | 449 | [[package]] 450 | name = "flate2" 451 | version = "1.1.1" 452 | source = "registry+https://github.com/rust-lang/crates.io-index" 453 | checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" 454 | dependencies = [ 455 | "crc32fast", 456 | "miniz_oxide", 457 | ] 458 | 459 | [[package]] 460 | name = "getrandom" 461 | version = "0.3.2" 462 | source = "registry+https://github.com/rust-lang/crates.io-index" 463 | checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" 464 | dependencies = [ 465 | "cfg-if", 466 | "libc", 467 | "r-efi", 468 | "wasi", 469 | ] 470 | 471 | [[package]] 472 | name = "glob" 473 | version = "0.3.2" 474 | source = "registry+https://github.com/rust-lang/crates.io-index" 475 | checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" 476 | 477 | [[package]] 478 | name = "half" 479 | version = "2.6.0" 480 | source = "registry+https://github.com/rust-lang/crates.io-index" 481 | checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" 482 | dependencies = [ 483 | "cfg-if", 484 | "crunchy", 485 | ] 486 | 487 | [[package]] 488 | name = "hashbrown" 489 | version = "0.15.3" 490 | source = "registry+https://github.com/rust-lang/crates.io-index" 491 | checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" 492 | 493 | [[package]] 494 | name = "heck" 495 | version = "0.5.0" 496 | source = "registry+https://github.com/rust-lang/crates.io-index" 497 | checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" 498 | 499 | [[package]] 500 | name = "hermit-abi" 501 | version = "0.3.9" 502 | source = "registry+https://github.com/rust-lang/crates.io-index" 503 | checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" 504 | 505 | [[package]] 506 | name = "hermit-abi" 507 | version = "0.5.0" 508 | source = "registry+https://github.com/rust-lang/crates.io-index" 509 | checksum = "fbd780fe5cc30f81464441920d82ac8740e2e46b29a6fad543ddd075229ce37e" 510 | 511 | [[package]] 512 | name = "home" 513 | version = "0.5.11" 514 | source = "registry+https://github.com/rust-lang/crates.io-index" 515 | checksum = "589533453244b0995c858700322199b2becb13b627df2851f64a2775d024abcf" 516 | dependencies = [ 517 | "windows-sys", 518 | ] 519 | 520 | [[package]] 521 | name = "indexmap" 522 | version = "2.9.0" 523 | source = "registry+https://github.com/rust-lang/crates.io-index" 524 | checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" 525 | dependencies = [ 526 | "equivalent", 527 | "hashbrown", 528 | ] 529 | 530 | [[package]] 531 | name = "is-terminal" 532 | version = "0.4.16" 533 | source = "registry+https://github.com/rust-lang/crates.io-index" 534 | checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" 535 | dependencies = [ 536 | "hermit-abi 0.5.0", 537 | "libc", 538 | "windows-sys", 539 | ] 540 | 541 | [[package]] 542 | name = "is_terminal_polyfill" 543 | version = "1.70.1" 544 | source = "registry+https://github.com/rust-lang/crates.io-index" 545 | checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" 546 | 547 | [[package]] 548 | name = "itertools" 549 | version = "0.10.5" 550 | source = "registry+https://github.com/rust-lang/crates.io-index" 551 | checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" 552 | dependencies = [ 553 | "either", 554 | ] 555 | 556 | [[package]] 557 | name = "itoa" 558 | version = "1.0.15" 559 | source = "registry+https://github.com/rust-lang/crates.io-index" 560 | checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" 561 | 562 | [[package]] 563 | name = "jiff" 564 | version = "0.2.13" 565 | source = "registry+https://github.com/rust-lang/crates.io-index" 566 | checksum = "f02000660d30638906021176af16b17498bd0d12813dbfe7b276d8bc7f3c0806" 567 | dependencies = [ 568 | "jiff-static", 569 | "log", 570 | "portable-atomic", 571 | "portable-atomic-util", 572 | "serde", 573 | ] 574 | 575 | [[package]] 576 | name = "jiff-static" 577 | version = "0.2.13" 578 | source = "registry+https://github.com/rust-lang/crates.io-index" 579 | checksum = "f3c30758ddd7188629c6713fc45d1188af4f44c90582311d0c8d8c9907f60c48" 580 | dependencies = [ 581 | "proc-macro2", 582 | "quote", 583 | "syn 2.0.101", 584 | ] 585 | 586 | [[package]] 587 | name = "jobserver" 588 | version = "0.1.33" 589 | source = "registry+https://github.com/rust-lang/crates.io-index" 590 | checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a" 591 | dependencies = [ 592 | "getrandom", 593 | "libc", 594 | ] 595 | 596 | [[package]] 597 | name = "js-sys" 598 | version = "0.3.77" 599 | source = "registry+https://github.com/rust-lang/crates.io-index" 600 | checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" 601 | dependencies = [ 602 | "once_cell", 603 | "wasm-bindgen", 604 | ] 605 | 606 | [[package]] 607 | name = "lazy_static" 608 | version = "1.5.0" 609 | source = "registry+https://github.com/rust-lang/crates.io-index" 610 | checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" 611 | 612 | [[package]] 613 | name = "lazycell" 614 | version = "1.3.0" 615 | source = "registry+https://github.com/rust-lang/crates.io-index" 616 | checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" 617 | 618 | [[package]] 619 | name = "libc" 620 | version = "0.2.172" 621 | source = "registry+https://github.com/rust-lang/crates.io-index" 622 | checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" 623 | 624 | [[package]] 625 | name = "libloading" 626 | version = "0.8.6" 627 | source = "registry+https://github.com/rust-lang/crates.io-index" 628 | checksum = "fc2f4eb4bc735547cfed7c0a4922cbd04a4655978c09b54f1f7b228750664c34" 629 | dependencies = [ 630 | "cfg-if", 631 | "windows-targets", 632 | ] 633 | 634 | [[package]] 635 | name = "liblzma" 636 | version = "0.3.6" 637 | source = "registry+https://github.com/rust-lang/crates.io-index" 638 | checksum = "a631d2b24be269775ba8f7789a6afa1ac228346a20c9e87dbbbe4975a79fd764" 639 | dependencies = [ 640 | "liblzma-sys", 641 | ] 642 | 643 | [[package]] 644 | name = "liblzma-sys" 645 | version = "0.3.13" 646 | source = "registry+https://github.com/rust-lang/crates.io-index" 647 | checksum = "efdadf1a99aceff34553de1461674ab6ac7e7f0843ae9875e339f4a14eb43475" 648 | dependencies = [ 649 | "cc", 650 | "libc", 651 | "pkg-config", 652 | ] 653 | 654 | [[package]] 655 | name = "linux-raw-sys" 656 | version = "0.4.15" 657 | source = "registry+https://github.com/rust-lang/crates.io-index" 658 | checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" 659 | 660 | [[package]] 661 | name = "linux-raw-sys" 662 | version = "0.9.4" 663 | source = "registry+https://github.com/rust-lang/crates.io-index" 664 | checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12" 665 | 666 | [[package]] 667 | name = "log" 668 | version = "0.4.27" 669 | source = "registry+https://github.com/rust-lang/crates.io-index" 670 | checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" 671 | 672 | [[package]] 673 | name = "memchr" 674 | version = "2.7.4" 675 | source = "registry+https://github.com/rust-lang/crates.io-index" 676 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 677 | 678 | [[package]] 679 | name = "minimal-lexical" 680 | version = "0.2.1" 681 | source = "registry+https://github.com/rust-lang/crates.io-index" 682 | checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" 683 | 684 | [[package]] 685 | name = "miniz_oxide" 686 | version = "0.8.8" 687 | source = "registry+https://github.com/rust-lang/crates.io-index" 688 | checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" 689 | dependencies = [ 690 | "adler2", 691 | ] 692 | 693 | [[package]] 694 | name = "needletail" 695 | version = "0.6.3" 696 | source = "registry+https://github.com/rust-lang/crates.io-index" 697 | checksum = "6aa22e1ae8bce4ecf257e2475ef2046026caea08d66b1848d073fe7bc77e4351" 698 | dependencies = [ 699 | "buffer-redux", 700 | "bytecount", 701 | "bzip2", 702 | "flate2", 703 | "liblzma", 704 | "memchr", 705 | "zstd", 706 | ] 707 | 708 | [[package]] 709 | name = "nom" 710 | version = "7.1.3" 711 | source = "registry+https://github.com/rust-lang/crates.io-index" 712 | checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" 713 | dependencies = [ 714 | "memchr", 715 | "minimal-lexical", 716 | ] 717 | 718 | [[package]] 719 | name = "num-traits" 720 | version = "0.2.19" 721 | source = "registry+https://github.com/rust-lang/crates.io-index" 722 | checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" 723 | dependencies = [ 724 | "autocfg", 725 | ] 726 | 727 | [[package]] 728 | name = "num_cpus" 729 | version = "1.16.0" 730 | source = "registry+https://github.com/rust-lang/crates.io-index" 731 | checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" 732 | dependencies = [ 733 | "hermit-abi 0.3.9", 734 | "libc", 735 | ] 736 | 737 | [[package]] 738 | name = "once_cell" 739 | version = "1.21.3" 740 | source = "registry+https://github.com/rust-lang/crates.io-index" 741 | checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" 742 | 743 | [[package]] 744 | name = "oorandom" 745 | version = "11.1.5" 746 | source = "registry+https://github.com/rust-lang/crates.io-index" 747 | checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" 748 | 749 | [[package]] 750 | name = "pa-types" 751 | version = "0.1.0" 752 | source = "git+https://github.com/pairwise-alignment/pa-types#e02484de627cf20400a9435f493a832ea34a259c" 753 | dependencies = [ 754 | "clap", 755 | "derive_more", 756 | "itertools", 757 | "parse-size", 758 | "serde", 759 | ] 760 | 761 | [[package]] 762 | name = "parse-size" 763 | version = "1.1.0" 764 | source = "registry+https://github.com/rust-lang/crates.io-index" 765 | checksum = "487f2ccd1e17ce8c1bfab3a65c89525af41cfad4c8659021a1e9a2aacd73b89b" 766 | 767 | [[package]] 768 | name = "peeking_take_while" 769 | version = "0.1.2" 770 | source = "registry+https://github.com/rust-lang/crates.io-index" 771 | checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" 772 | 773 | [[package]] 774 | name = "pkg-config" 775 | version = "0.3.32" 776 | source = "registry+https://github.com/rust-lang/crates.io-index" 777 | checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" 778 | 779 | [[package]] 780 | name = "plotters" 781 | version = "0.3.7" 782 | source = "registry+https://github.com/rust-lang/crates.io-index" 783 | checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" 784 | dependencies = [ 785 | "num-traits", 786 | "plotters-backend", 787 | "plotters-svg", 788 | "wasm-bindgen", 789 | "web-sys", 790 | ] 791 | 792 | [[package]] 793 | name = "plotters-backend" 794 | version = "0.3.7" 795 | source = "registry+https://github.com/rust-lang/crates.io-index" 796 | checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" 797 | 798 | [[package]] 799 | name = "plotters-svg" 800 | version = "0.3.7" 801 | source = "registry+https://github.com/rust-lang/crates.io-index" 802 | checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" 803 | dependencies = [ 804 | "plotters-backend", 805 | ] 806 | 807 | [[package]] 808 | name = "portable-atomic" 809 | version = "1.11.0" 810 | source = "registry+https://github.com/rust-lang/crates.io-index" 811 | checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" 812 | 813 | [[package]] 814 | name = "portable-atomic-util" 815 | version = "0.2.4" 816 | source = "registry+https://github.com/rust-lang/crates.io-index" 817 | checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" 818 | dependencies = [ 819 | "portable-atomic", 820 | ] 821 | 822 | [[package]] 823 | name = "ppv-lite86" 824 | version = "0.2.21" 825 | source = "registry+https://github.com/rust-lang/crates.io-index" 826 | checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" 827 | dependencies = [ 828 | "zerocopy", 829 | ] 830 | 831 | [[package]] 832 | name = "proc-macro2" 833 | version = "1.0.95" 834 | source = "registry+https://github.com/rust-lang/crates.io-index" 835 | checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" 836 | dependencies = [ 837 | "unicode-ident", 838 | ] 839 | 840 | [[package]] 841 | name = "quote" 842 | version = "1.0.40" 843 | source = "registry+https://github.com/rust-lang/crates.io-index" 844 | checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" 845 | dependencies = [ 846 | "proc-macro2", 847 | ] 848 | 849 | [[package]] 850 | name = "r-efi" 851 | version = "5.2.0" 852 | source = "registry+https://github.com/rust-lang/crates.io-index" 853 | checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" 854 | 855 | [[package]] 856 | name = "rand" 857 | version = "0.9.1" 858 | source = "registry+https://github.com/rust-lang/crates.io-index" 859 | checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97" 860 | dependencies = [ 861 | "rand_chacha", 862 | "rand_core", 863 | ] 864 | 865 | [[package]] 866 | name = "rand_chacha" 867 | version = "0.9.0" 868 | source = "registry+https://github.com/rust-lang/crates.io-index" 869 | checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" 870 | dependencies = [ 871 | "ppv-lite86", 872 | "rand_core", 873 | ] 874 | 875 | [[package]] 876 | name = "rand_core" 877 | version = "0.9.3" 878 | source = "registry+https://github.com/rust-lang/crates.io-index" 879 | checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" 880 | dependencies = [ 881 | "getrandom", 882 | ] 883 | 884 | [[package]] 885 | name = "rayon" 886 | version = "1.10.0" 887 | source = "registry+https://github.com/rust-lang/crates.io-index" 888 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" 889 | dependencies = [ 890 | "either", 891 | "rayon-core", 892 | ] 893 | 894 | [[package]] 895 | name = "rayon-core" 896 | version = "1.12.1" 897 | source = "registry+https://github.com/rust-lang/crates.io-index" 898 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" 899 | dependencies = [ 900 | "crossbeam-deque", 901 | "crossbeam-utils", 902 | ] 903 | 904 | [[package]] 905 | name = "regex" 906 | version = "1.11.1" 907 | source = "registry+https://github.com/rust-lang/crates.io-index" 908 | checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" 909 | dependencies = [ 910 | "aho-corasick", 911 | "memchr", 912 | "regex-automata", 913 | "regex-syntax", 914 | ] 915 | 916 | [[package]] 917 | name = "regex-automata" 918 | version = "0.4.9" 919 | source = "registry+https://github.com/rust-lang/crates.io-index" 920 | checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" 921 | dependencies = [ 922 | "aho-corasick", 923 | "memchr", 924 | "regex-syntax", 925 | ] 926 | 927 | [[package]] 928 | name = "regex-syntax" 929 | version = "0.8.5" 930 | source = "registry+https://github.com/rust-lang/crates.io-index" 931 | checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" 932 | 933 | [[package]] 934 | name = "rustc-hash" 935 | version = "1.1.0" 936 | source = "registry+https://github.com/rust-lang/crates.io-index" 937 | checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" 938 | 939 | [[package]] 940 | name = "rustix" 941 | version = "0.38.44" 942 | source = "registry+https://github.com/rust-lang/crates.io-index" 943 | checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" 944 | dependencies = [ 945 | "bitflags 2.9.0", 946 | "errno", 947 | "libc", 948 | "linux-raw-sys 0.4.15", 949 | "windows-sys", 950 | ] 951 | 952 | [[package]] 953 | name = "rustix" 954 | version = "1.0.7" 955 | source = "registry+https://github.com/rust-lang/crates.io-index" 956 | checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" 957 | dependencies = [ 958 | "bitflags 2.9.0", 959 | "errno", 960 | "libc", 961 | "linux-raw-sys 0.9.4", 962 | "windows-sys", 963 | ] 964 | 965 | [[package]] 966 | name = "rustversion" 967 | version = "1.0.20" 968 | source = "registry+https://github.com/rust-lang/crates.io-index" 969 | checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" 970 | 971 | [[package]] 972 | name = "ryu" 973 | version = "1.0.20" 974 | source = "registry+https://github.com/rust-lang/crates.io-index" 975 | checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" 976 | 977 | [[package]] 978 | name = "same-file" 979 | version = "1.0.6" 980 | source = "registry+https://github.com/rust-lang/crates.io-index" 981 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 982 | dependencies = [ 983 | "winapi-util", 984 | ] 985 | 986 | [[package]] 987 | name = "sassy" 988 | version = "0.1.0" 989 | dependencies = [ 990 | "arrayvec", 991 | "clap", 992 | "criterion", 993 | "edlib_rs", 994 | "env_logger", 995 | "log", 996 | "needletail", 997 | "num_cpus", 998 | "once_cell", 999 | "pa-types", 1000 | "rand", 1001 | "serde", 1002 | "tempfile", 1003 | "toml", 1004 | ] 1005 | 1006 | [[package]] 1007 | name = "serde" 1008 | version = "1.0.219" 1009 | source = "registry+https://github.com/rust-lang/crates.io-index" 1010 | checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" 1011 | dependencies = [ 1012 | "serde_derive", 1013 | ] 1014 | 1015 | [[package]] 1016 | name = "serde_derive" 1017 | version = "1.0.219" 1018 | source = "registry+https://github.com/rust-lang/crates.io-index" 1019 | checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" 1020 | dependencies = [ 1021 | "proc-macro2", 1022 | "quote", 1023 | "syn 2.0.101", 1024 | ] 1025 | 1026 | [[package]] 1027 | name = "serde_json" 1028 | version = "1.0.140" 1029 | source = "registry+https://github.com/rust-lang/crates.io-index" 1030 | checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" 1031 | dependencies = [ 1032 | "itoa", 1033 | "memchr", 1034 | "ryu", 1035 | "serde", 1036 | ] 1037 | 1038 | [[package]] 1039 | name = "serde_spanned" 1040 | version = "0.6.8" 1041 | source = "registry+https://github.com/rust-lang/crates.io-index" 1042 | checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" 1043 | dependencies = [ 1044 | "serde", 1045 | ] 1046 | 1047 | [[package]] 1048 | name = "shlex" 1049 | version = "1.3.0" 1050 | source = "registry+https://github.com/rust-lang/crates.io-index" 1051 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 1052 | 1053 | [[package]] 1054 | name = "strsim" 1055 | version = "0.11.1" 1056 | source = "registry+https://github.com/rust-lang/crates.io-index" 1057 | checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" 1058 | 1059 | [[package]] 1060 | name = "syn" 1061 | version = "1.0.109" 1062 | source = "registry+https://github.com/rust-lang/crates.io-index" 1063 | checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" 1064 | dependencies = [ 1065 | "proc-macro2", 1066 | "quote", 1067 | "unicode-ident", 1068 | ] 1069 | 1070 | [[package]] 1071 | name = "syn" 1072 | version = "2.0.101" 1073 | source = "registry+https://github.com/rust-lang/crates.io-index" 1074 | checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" 1075 | dependencies = [ 1076 | "proc-macro2", 1077 | "quote", 1078 | "unicode-ident", 1079 | ] 1080 | 1081 | [[package]] 1082 | name = "tempfile" 1083 | version = "3.19.1" 1084 | source = "registry+https://github.com/rust-lang/crates.io-index" 1085 | checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf" 1086 | dependencies = [ 1087 | "fastrand", 1088 | "getrandom", 1089 | "once_cell", 1090 | "rustix 1.0.7", 1091 | "windows-sys", 1092 | ] 1093 | 1094 | [[package]] 1095 | name = "tinytemplate" 1096 | version = "1.2.1" 1097 | source = "registry+https://github.com/rust-lang/crates.io-index" 1098 | checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" 1099 | dependencies = [ 1100 | "serde", 1101 | "serde_json", 1102 | ] 1103 | 1104 | [[package]] 1105 | name = "toml" 1106 | version = "0.7.8" 1107 | source = "registry+https://github.com/rust-lang/crates.io-index" 1108 | checksum = "dd79e69d3b627db300ff956027cc6c3798cef26d22526befdfcd12feeb6d2257" 1109 | dependencies = [ 1110 | "serde", 1111 | "serde_spanned", 1112 | "toml_datetime", 1113 | "toml_edit", 1114 | ] 1115 | 1116 | [[package]] 1117 | name = "toml_datetime" 1118 | version = "0.6.9" 1119 | source = "registry+https://github.com/rust-lang/crates.io-index" 1120 | checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" 1121 | dependencies = [ 1122 | "serde", 1123 | ] 1124 | 1125 | [[package]] 1126 | name = "toml_edit" 1127 | version = "0.19.15" 1128 | source = "registry+https://github.com/rust-lang/crates.io-index" 1129 | checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" 1130 | dependencies = [ 1131 | "indexmap", 1132 | "serde", 1133 | "serde_spanned", 1134 | "toml_datetime", 1135 | "winnow", 1136 | ] 1137 | 1138 | [[package]] 1139 | name = "unicode-ident" 1140 | version = "1.0.18" 1141 | source = "registry+https://github.com/rust-lang/crates.io-index" 1142 | checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" 1143 | 1144 | [[package]] 1145 | name = "utf8parse" 1146 | version = "0.2.2" 1147 | source = "registry+https://github.com/rust-lang/crates.io-index" 1148 | checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" 1149 | 1150 | [[package]] 1151 | name = "walkdir" 1152 | version = "2.5.0" 1153 | source = "registry+https://github.com/rust-lang/crates.io-index" 1154 | checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" 1155 | dependencies = [ 1156 | "same-file", 1157 | "winapi-util", 1158 | ] 1159 | 1160 | [[package]] 1161 | name = "wasi" 1162 | version = "0.14.2+wasi-0.2.4" 1163 | source = "registry+https://github.com/rust-lang/crates.io-index" 1164 | checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" 1165 | dependencies = [ 1166 | "wit-bindgen-rt", 1167 | ] 1168 | 1169 | [[package]] 1170 | name = "wasm-bindgen" 1171 | version = "0.2.100" 1172 | source = "registry+https://github.com/rust-lang/crates.io-index" 1173 | checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" 1174 | dependencies = [ 1175 | "cfg-if", 1176 | "once_cell", 1177 | "rustversion", 1178 | "wasm-bindgen-macro", 1179 | ] 1180 | 1181 | [[package]] 1182 | name = "wasm-bindgen-backend" 1183 | version = "0.2.100" 1184 | source = "registry+https://github.com/rust-lang/crates.io-index" 1185 | checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" 1186 | dependencies = [ 1187 | "bumpalo", 1188 | "log", 1189 | "proc-macro2", 1190 | "quote", 1191 | "syn 2.0.101", 1192 | "wasm-bindgen-shared", 1193 | ] 1194 | 1195 | [[package]] 1196 | name = "wasm-bindgen-macro" 1197 | version = "0.2.100" 1198 | source = "registry+https://github.com/rust-lang/crates.io-index" 1199 | checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" 1200 | dependencies = [ 1201 | "quote", 1202 | "wasm-bindgen-macro-support", 1203 | ] 1204 | 1205 | [[package]] 1206 | name = "wasm-bindgen-macro-support" 1207 | version = "0.2.100" 1208 | source = "registry+https://github.com/rust-lang/crates.io-index" 1209 | checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" 1210 | dependencies = [ 1211 | "proc-macro2", 1212 | "quote", 1213 | "syn 2.0.101", 1214 | "wasm-bindgen-backend", 1215 | "wasm-bindgen-shared", 1216 | ] 1217 | 1218 | [[package]] 1219 | name = "wasm-bindgen-shared" 1220 | version = "0.2.100" 1221 | source = "registry+https://github.com/rust-lang/crates.io-index" 1222 | checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" 1223 | dependencies = [ 1224 | "unicode-ident", 1225 | ] 1226 | 1227 | [[package]] 1228 | name = "web-sys" 1229 | version = "0.3.77" 1230 | source = "registry+https://github.com/rust-lang/crates.io-index" 1231 | checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" 1232 | dependencies = [ 1233 | "js-sys", 1234 | "wasm-bindgen", 1235 | ] 1236 | 1237 | [[package]] 1238 | name = "which" 1239 | version = "4.4.2" 1240 | source = "registry+https://github.com/rust-lang/crates.io-index" 1241 | checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" 1242 | dependencies = [ 1243 | "either", 1244 | "home", 1245 | "once_cell", 1246 | "rustix 0.38.44", 1247 | ] 1248 | 1249 | [[package]] 1250 | name = "winapi-util" 1251 | version = "0.1.9" 1252 | source = "registry+https://github.com/rust-lang/crates.io-index" 1253 | checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" 1254 | dependencies = [ 1255 | "windows-sys", 1256 | ] 1257 | 1258 | [[package]] 1259 | name = "windows-sys" 1260 | version = "0.59.0" 1261 | source = "registry+https://github.com/rust-lang/crates.io-index" 1262 | checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 1263 | dependencies = [ 1264 | "windows-targets", 1265 | ] 1266 | 1267 | [[package]] 1268 | name = "windows-targets" 1269 | version = "0.52.6" 1270 | source = "registry+https://github.com/rust-lang/crates.io-index" 1271 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 1272 | dependencies = [ 1273 | "windows_aarch64_gnullvm", 1274 | "windows_aarch64_msvc", 1275 | "windows_i686_gnu", 1276 | "windows_i686_gnullvm", 1277 | "windows_i686_msvc", 1278 | "windows_x86_64_gnu", 1279 | "windows_x86_64_gnullvm", 1280 | "windows_x86_64_msvc", 1281 | ] 1282 | 1283 | [[package]] 1284 | name = "windows_aarch64_gnullvm" 1285 | version = "0.52.6" 1286 | source = "registry+https://github.com/rust-lang/crates.io-index" 1287 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 1288 | 1289 | [[package]] 1290 | name = "windows_aarch64_msvc" 1291 | version = "0.52.6" 1292 | source = "registry+https://github.com/rust-lang/crates.io-index" 1293 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 1294 | 1295 | [[package]] 1296 | name = "windows_i686_gnu" 1297 | version = "0.52.6" 1298 | source = "registry+https://github.com/rust-lang/crates.io-index" 1299 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 1300 | 1301 | [[package]] 1302 | name = "windows_i686_gnullvm" 1303 | version = "0.52.6" 1304 | source = "registry+https://github.com/rust-lang/crates.io-index" 1305 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 1306 | 1307 | [[package]] 1308 | name = "windows_i686_msvc" 1309 | version = "0.52.6" 1310 | source = "registry+https://github.com/rust-lang/crates.io-index" 1311 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 1312 | 1313 | [[package]] 1314 | name = "windows_x86_64_gnu" 1315 | version = "0.52.6" 1316 | source = "registry+https://github.com/rust-lang/crates.io-index" 1317 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 1318 | 1319 | [[package]] 1320 | name = "windows_x86_64_gnullvm" 1321 | version = "0.52.6" 1322 | source = "registry+https://github.com/rust-lang/crates.io-index" 1323 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 1324 | 1325 | [[package]] 1326 | name = "windows_x86_64_msvc" 1327 | version = "0.52.6" 1328 | source = "registry+https://github.com/rust-lang/crates.io-index" 1329 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 1330 | 1331 | [[package]] 1332 | name = "winnow" 1333 | version = "0.5.40" 1334 | source = "registry+https://github.com/rust-lang/crates.io-index" 1335 | checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" 1336 | dependencies = [ 1337 | "memchr", 1338 | ] 1339 | 1340 | [[package]] 1341 | name = "wit-bindgen-rt" 1342 | version = "0.39.0" 1343 | source = "registry+https://github.com/rust-lang/crates.io-index" 1344 | checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" 1345 | dependencies = [ 1346 | "bitflags 2.9.0", 1347 | ] 1348 | 1349 | [[package]] 1350 | name = "zerocopy" 1351 | version = "0.8.25" 1352 | source = "registry+https://github.com/rust-lang/crates.io-index" 1353 | checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" 1354 | dependencies = [ 1355 | "zerocopy-derive", 1356 | ] 1357 | 1358 | [[package]] 1359 | name = "zerocopy-derive" 1360 | version = "0.8.25" 1361 | source = "registry+https://github.com/rust-lang/crates.io-index" 1362 | checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" 1363 | dependencies = [ 1364 | "proc-macro2", 1365 | "quote", 1366 | "syn 2.0.101", 1367 | ] 1368 | 1369 | [[package]] 1370 | name = "zstd" 1371 | version = "0.13.3" 1372 | source = "registry+https://github.com/rust-lang/crates.io-index" 1373 | checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a" 1374 | dependencies = [ 1375 | "zstd-safe", 1376 | ] 1377 | 1378 | [[package]] 1379 | name = "zstd-safe" 1380 | version = "7.2.4" 1381 | source = "registry+https://github.com/rust-lang/crates.io-index" 1382 | checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d" 1383 | dependencies = [ 1384 | "zstd-sys", 1385 | ] 1386 | 1387 | [[package]] 1388 | name = "zstd-sys" 1389 | version = "2.0.15+zstd.1.5.7" 1390 | source = "registry+https://github.com/rust-lang/crates.io-index" 1391 | checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237" 1392 | dependencies = [ 1393 | "cc", 1394 | "pkg-config", 1395 | ] 1396 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "sassy" 3 | version = "0.1.0" 4 | edition = "2024" 5 | 6 | [profile.release] 7 | incremental = true 8 | opt-level = 3 9 | debug = true 10 | 11 | [[bin]] 12 | name = 'sassy' 13 | path = 'bin/main.rs' 14 | 15 | 16 | [dependencies] 17 | arrayvec = "0.7.6" 18 | criterion = "0.5.1" 19 | rand = "0.9.0" 20 | pa-types.git = "https://github.com/pairwise-alignment/pa-types" 21 | clap = { version = "4.5.37", features = ["derive"] } 22 | log = "0.4.27" 23 | env_logger = "0.11.8" 24 | num_cpus = "1.16.0" 25 | needletail = "0.6.3" 26 | once_cell = "1.19.0" 27 | 28 | [features] 29 | avx512 = [] 30 | # default = ["avx512"] 31 | 32 | [[bench]] 33 | name = "bench" 34 | harness = false 35 | 36 | [dev-dependencies] 37 | edlib_rs.git = "https://github.com/pairwise-alignment/edlib-rs" 38 | toml = "0.7" 39 | serde = { version = "1.0", features = ["derive"] } 40 | once_cell = "1.19.0" 41 | tempfile = "*" 42 | 43 | [workspace] 44 | members = [ 45 | ".", 46 | "benchmarks" 47 | ] 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Sassy: SIMD Approximate String Searching 2 | 3 | Sassy is a library and tool for approximately searching short patterns in texts, 4 | the problem that goes by many names: 5 | - approximate string matching, 6 | - pattern searching, 7 | - fuzzy searching. 8 | 9 | The motivating application is searching short (~20bp) DNA fragments in a human 10 | genome (3GB), but it also works well for longer patterns up to ~1Kbp, and 11 | shorter texts. 12 | 13 | ## Usage 14 | 15 | ``` 16 | > cargo run -r -- search --help 17 | 18 | Usage: sassy [OPTIONS] --alphabet 19 | 20 | Arguments: 21 | Pattern to search for 22 | Report matches up to (and including) this distance threshold 23 | Fasta file to search. May be gzipped 24 | 25 | Options: 26 | --alphabet The alphabet to use. DNA=ACTG, or IUPAC=ACTG+NYR... [possible values: ascii, dna, iupac] 27 | --rc Whether to include matches of the reverse-complement string 28 | -j, --threads Number of threads to use. All CPUs by default 29 | -h, --help Print help 30 | ``` 31 | 32 | You can also first build with `cargo build -r` and then do 33 | `./target/release/sassy `. 34 | 35 | ## Example 36 | 37 | To find matches of a pattern with up to 1 edit: 38 | 39 | ``` rust 40 | cargo run -r -- search --alphabet dna --rc ACTGCTACTGTACA 1 hg.fa 41 | ``` 42 | 43 | Output is written as tab-separated values to stdout, containing the sequence id, 44 | distance, strand, start and end position, matched substring, and cigar string. 45 | (For matches to the reverse-complement strand, the query is reversed and matches 46 | are reported in the forward direction.) 47 | 48 | ``` 49 | chr1 2 + 74462851 74462868 ATCGGTGTCATCAATAA =D11=X4= 50 | chr1 2 + 97381917 97381934 ACTCGGTGTCCTCATAA 10=X2=D4= 51 | chr1 2 + 196285921 196285938 actggtgtcatcggtaa 3=D10=X3= 52 | chr1 2 - 199471583 199471601 TTATCGATGACACTGAAT 13=X2=X= 53 | chr1 2 - 229999068 229999085 ATATCGATGACACCAGT X13=D3= 54 | chr1 2 - 231082126 231082144 ttatcaatgacaacgagt 5=X6=X5= 55 | ``` 56 | 57 | ## Alphabets 58 | Three alphabets are supported: 59 | - *ASCII*: only equal characters match. 60 | - *DNA*: Only `ACTG` and `actg` characters are expected, and treated case-insensitively. 61 | - *IUPAC*: On top of the DNA characters, also supports `NYR` and furter 62 | characters (again, case insensitive), so that `A` matches `N`. 63 | -------------------------------------------------------------------------------- /benches/bench.rs: -------------------------------------------------------------------------------- 1 | #![feature(portable_simd, array_chunks)] 2 | use criterion::{BenchmarkId, Criterion, Throughput, black_box, criterion_group, criterion_main}; 3 | use rand::Rng; 4 | use sassy::profiles::*; 5 | use sassy::search::Searcher; 6 | use std::time::Duration; 7 | 8 | fn generate_dna_sequence(size: usize) -> Vec { 9 | let mut rng = rand::rng(); 10 | let bases = [b'A', b'T', b'G', b'C']; 11 | let mut seq = vec![b'A'; size]; 12 | for i in 0..size { 13 | seq[i] = bases[rand::Rng::random_range(&mut rng, 0..4)]; 14 | } 15 | seq 16 | } 17 | 18 | fn generate_ascii_sequence(size: usize) -> Vec { 19 | let mut rng = rand::rng(); 20 | let mut seq = vec![0; size]; 21 | for i in 0..size { 22 | seq[i] = rand::Rng::random_range(&mut rng, 0..256) as u8; 23 | } 24 | seq 25 | } 26 | 27 | fn benchmark_base_lookup(c: &mut Criterion) { 28 | let mut group = c.benchmark_group("Base lookup"); 29 | group.warm_up_time(Duration::from_secs(1)); 30 | group.measurement_time(Duration::from_secs(1)); 31 | group.sample_size(10); 32 | 33 | // Test different sequence sizes, for now just one 34 | 35 | #[allow(clippy::single_element_loop)] 36 | for size in [1024 * 1024] { 37 | let dna_seq = generate_dna_sequence(size); 38 | 39 | // Dna sequence with middle isnert 40 | let mut dna_seq_inserted = dna_seq.clone(); 41 | // in the middle of the sequence insert a random query sequence 42 | let inserted_query = generate_dna_sequence(20); 43 | let inserted_query_len = inserted_query.len(); 44 | let middle = size / 2; 45 | dna_seq_inserted[middle..middle + inserted_query_len].copy_from_slice(&inserted_query); 46 | 47 | let ascii_seq = generate_ascii_sequence(size); 48 | group.throughput(Throughput::Bytes(size as u64)); 49 | 50 | group.bench_with_input( 51 | BenchmarkId::new("encode_iupac", size), 52 | &dna_seq, 53 | |b, seq| { 54 | let profiler = Iupac::encode_query(b"NY").0; 55 | let mut result = Iupac::alloc_out(); 56 | b.iter(|| { 57 | for chunk in seq.array_chunks() { 58 | profiler.encode_ref(chunk, &mut result); 59 | black_box(&mut result); 60 | } 61 | }) 62 | }, 63 | ); 64 | 65 | group.bench_with_input(BenchmarkId::new("encode_dna", size), &dna_seq, |b, seq| { 66 | let profiler = Dna::encode_query(b"ACTG").0; 67 | let mut result = Dna::alloc_out(); 68 | b.iter(|| { 69 | for chunk in seq.array_chunks() { 70 | profiler.encode_ref(chunk, &mut result); 71 | black_box(&mut result); 72 | } 73 | }) 74 | }); 75 | 76 | group.bench_with_input( 77 | BenchmarkId::new("encode_ascii", size), 78 | &ascii_seq, 79 | |b, seq| { 80 | let profiler = Ascii::::encode_query(b"ABCDEFGHIJKLMNOPQRSTUVWXYZ").0; 81 | let mut result = Ascii::::alloc_out(); 82 | b.iter(|| { 83 | for chunk in seq.array_chunks() { 84 | profiler.encode_ref(chunk, &mut result); 85 | black_box(&mut result); 86 | } 87 | }) 88 | }, 89 | ); 90 | 91 | let query = b"AHOVHSHJDHFAPVHAJDJ"; 92 | group.bench_with_input( 93 | BenchmarkId::new("ascii_search_20", size), 94 | &ascii_seq, 95 | |b, seq| { 96 | let mut searcher = Searcher::::new_fwd(); 97 | b.iter(|| { 98 | let res = searcher.search(black_box(query), seq, 20); 99 | black_box(&res); 100 | }) 101 | }, 102 | ); 103 | 104 | let query = b"ACTGCAACTGCAACGACGTA"; 105 | group.bench_with_input( 106 | BenchmarkId::new("dna_search_20", size), 107 | &dna_seq, 108 | |b, seq| { 109 | let mut searcher: Searcher = Searcher::::new_fwd(); 110 | b.iter(|| { 111 | let res = searcher.search(black_box(query), seq, 20); 112 | black_box(&res); 113 | }) 114 | }, 115 | ); 116 | 117 | let query = b"ACTGCAACTGCAACGACGTA"; 118 | group.bench_with_input( 119 | BenchmarkId::new("dna_search_20_k3", size), 120 | &dna_seq, 121 | |b, seq| { 122 | let mut searcher: Searcher = Searcher::::new_fwd(); 123 | b.iter(|| { 124 | let matches = searcher.search(black_box(query), seq, 3); 125 | black_box(&matches); 126 | }) 127 | }, 128 | ); 129 | 130 | let mut searcher: Searcher = Searcher::::new_fwd(); 131 | 132 | let matches = searcher.search(black_box(&inserted_query), &dna_seq, 1); 133 | 134 | group.bench_with_input( 135 | BenchmarkId::new("dna_search_inserted_20_k1", size), 136 | &dna_seq_inserted, 137 | |b, seq| { 138 | black_box(&matches); 139 | b.iter(|| { 140 | let matches = searcher.search(black_box(&inserted_query), seq, 1); 141 | black_box(&matches); 142 | }) 143 | }, 144 | ); 145 | 146 | let query = b"ACTGCAACTGCAACGACGTA"; 147 | group.bench_with_input( 148 | BenchmarkId::new("dna_search_20_k1", size), 149 | &dna_seq, 150 | |b, seq| { 151 | let mut searcher: Searcher = Searcher::::new_fwd(); 152 | b.iter(|| { 153 | let matches = searcher.search(black_box(query), seq, 1); 154 | black_box(&matches); 155 | }) 156 | }, 157 | ); 158 | 159 | let query = b"ACTGCAACTGCAACGACGTA"; 160 | group.bench_with_input( 161 | BenchmarkId::new("iupac_search_20", size), 162 | &dna_seq, 163 | |b, seq| { 164 | let mut searcher: Searcher = Searcher::::new_fwd(); 165 | b.iter(|| { 166 | let res = searcher.search(black_box(query), seq, 20); 167 | black_box(&res); 168 | }) 169 | }, 170 | ); 171 | 172 | let query = b"ACTGCAANTGCAACGACGTA"; 173 | group.bench_with_input( 174 | BenchmarkId::new("iupac_search_20_N", size), 175 | &dna_seq, 176 | |b, seq| { 177 | let mut searcher: Searcher = Searcher::::new_fwd(); 178 | b.iter(|| { 179 | let res = searcher.search(black_box(query), seq, 20); 180 | black_box(&res); 181 | }) 182 | }, 183 | ); 184 | 185 | let query = b"ACTGCAACTGCAACGACGTAACACCTACTAAC"; 186 | group.bench_with_input( 187 | BenchmarkId::new("iupac_search_32", size), 188 | &dna_seq, 189 | |b, seq| { 190 | let mut searcher: Searcher = Searcher::::new_fwd(); 191 | b.iter(|| { 192 | let res = searcher.search(black_box(query), seq, 32); 193 | black_box(&res); 194 | }) 195 | }, 196 | ); 197 | 198 | let query = b"ACTGCAANTGCAACGAYGTAACARCTACTAAC"; 199 | group.bench_with_input( 200 | BenchmarkId::new("iupac_search_32_NRY", size), 201 | &dna_seq, 202 | |b, seq| { 203 | let mut searcher: Searcher = Searcher::::new_fwd(); 204 | b.iter(|| { 205 | let res = searcher.search(black_box(query), seq, 32); 206 | black_box(&res); 207 | }) 208 | }, 209 | ); 210 | 211 | // let query = b"ACTGCAACTGCAACGACGTAACACCTACTAAC"; 212 | // group.bench_with_input( 213 | // BenchmarkId::new("iupac_find_32", size), 214 | // &dna_seq, 215 | // |b, seq| { 216 | // let mut positions = vec![]; 217 | // b.iter(|| { 218 | // Searcher::::new().search(black_box(query), seq, 32); 219 | // positions.clear(); 220 | // let mut costs = vec![]; 221 | // sassy::private::find_below_threshold( 222 | // black_box(query), 223 | // 8, 224 | // &deltas, 225 | // &mut positions, 226 | // &mut costs, 227 | // ); 228 | // black_box(&positions); 229 | // }) 230 | // }, 231 | // ); 232 | 233 | group.bench_with_input( 234 | BenchmarkId::new("dna_input_validation_valid", size), 235 | &dna_seq, 236 | |b, seq| { 237 | b.iter(|| { 238 | let profiler = Dna::encode_query(b"").0; 239 | let is_valid = profiler.valid_seq(seq); 240 | black_box(is_valid); 241 | }) 242 | }, 243 | ); 244 | 245 | group.bench_with_input( 246 | BenchmarkId::new("iupac_input_validation_valid", size), 247 | &dna_seq, 248 | |b, seq| { 249 | b.iter(|| { 250 | let profiler = Iupac::encode_query(b"").0; 251 | let is_valid = profiler.valid_seq(seq); 252 | black_box(is_valid); 253 | }) 254 | }, 255 | ); 256 | } 257 | group.finish(); 258 | } 259 | 260 | fn benchmark_iupac_reverse_complement(c: &mut Criterion) { 261 | let mut group = c.benchmark_group("IUPAC reverse complement"); 262 | group.warm_up_time(Duration::from_secs(1)); 263 | group.measurement_time(Duration::from_secs(1)); 264 | group.sample_size(10); 265 | 266 | for size in [1024 * 1024] { 267 | let seq = generate_dna_sequence(size); 268 | group.throughput(Throughput::Bytes(size as u64)); 269 | group.bench_with_input( 270 | BenchmarkId::new("iupac_reverse_complement", size), 271 | &seq, 272 | |b, seq| { 273 | b.iter(|| { 274 | let rc = Iupac::reverse_complement(black_box(seq)); 275 | black_box(rc); 276 | }) 277 | }, 278 | ); 279 | } 280 | group.finish(); 281 | } 282 | 283 | fn benchmark_prefix_min(c: &mut Criterion) { 284 | let mut group = c.benchmark_group("Prefix min"); 285 | group.warm_up_time(Duration::from_secs(1)); 286 | group.measurement_time(Duration::from_secs(1)); 287 | group.sample_size(10); 288 | 289 | use pa_types::Cost; 290 | use sassy::private::prefix_min; 291 | use sassy::private::prefix_min_k; 292 | use sassy::private::prefix_min_k_simd; 293 | 294 | // Generate 100 random test pairs where p and m have no overlapping bits 295 | let mut rng = rand::rng(); 296 | let mut test_pairs = Vec::with_capacity(100); 297 | for _ in 0..200_000 { 298 | let p: u64 = rng.random::(); 299 | let m: u64 = rng.random::() & !p; // Ensure no overlapping bits 300 | let start_cost: Cost = rng.random_range(0..20); 301 | test_pairs.push((p, m, start_cost)); 302 | } 303 | let k: i32 = 1; 304 | 305 | group.bench_function("prefix_min", |b| { 306 | b.iter(|| { 307 | for &(p, m, _) in &test_pairs { 308 | let result = prefix_min(black_box(p), black_box(m)); 309 | black_box(result); 310 | } 311 | }) 312 | }); 313 | 314 | group.bench_function("prefix_min_k", |b| { 315 | b.iter(|| { 316 | for &(p, m, start_cost) in &test_pairs { 317 | let result = prefix_min_k( 318 | black_box(start_cost), 319 | black_box(p), 320 | black_box(m), 321 | black_box(k), 322 | ); 323 | black_box(result); 324 | } 325 | }) 326 | }); 327 | 328 | group.bench_function("prefix_min_k_simd", |b| { 329 | b.iter(|| { 330 | for &(p, m, start_cost) in &test_pairs { 331 | let result = prefix_min_k_simd( 332 | black_box(start_cost), 333 | black_box(p), 334 | black_box(m), 335 | black_box(k), 336 | ); 337 | black_box(result); 338 | } 339 | }) 340 | }); 341 | 342 | group.finish(); 343 | } 344 | 345 | criterion_group!( 346 | benches, 347 | benchmark_base_lookup, 348 | benchmark_iupac_reverse_complement, 349 | benchmark_prefix_min 350 | ); 351 | criterion_main!(benches); 352 | -------------------------------------------------------------------------------- /benchmarks/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "benchmarks" 3 | version = "0.1.0" 4 | edition = "2024" 5 | 6 | [dependencies] 7 | edlib_rs.git = "https://github.com/pairwise-alignment/edlib-rs" 8 | toml = "0.7" 9 | serde = { version = "1.0", features = ["derive"] } 10 | once_cell = "1.19.0" 11 | tempfile = "*" 12 | rand = "*" 13 | sassy = { path = ".." } 14 | clap = { version = "4.5.37", features = ["derive"] } -------------------------------------------------------------------------------- /benchmarks/eval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import matplotlib.pyplot as plt 3 | from matplotlib.colors import LinearSegmentedColormap 4 | import pandas as pd 5 | 6 | f = "benchmarks/results.csv" 7 | df = pd.read_csv(f, delimiter=",") 8 | 9 | profile = "dna" 10 | k = 1 11 | 12 | filtered = df[ 13 | (df["profile"] == profile) 14 | & (df["alphabet"] == "Dna") 15 | & (df["k"] == k) 16 | & (df["match_fraction"] == 2) 17 | ].copy() 18 | 19 | # print(filtered) 20 | 21 | # Create a custom colormap from gray to #fcc007 22 | colors = ["#fcc007", "black"] 23 | custom_cmap = LinearSegmentedColormap.from_list("custom_gray_yellow", colors) 24 | 25 | # Plot 26 | plt.figure(figsize=(12, 7)) 27 | 28 | # Plot each query length with a gradient color and connect points with lines 29 | for i, ql in enumerate(filtered["query_length"].unique()): 30 | subset = filtered[filtered["query_length"] == ql] 31 | color = custom_cmap(i / len(filtered["query_length"].unique())) 32 | 33 | # Plot Edlib times with dotted line 34 | plt.plot( 35 | subset["text_length"], 36 | subset["edlib_ms"], 37 | color=color, 38 | marker="^", 39 | markersize=10, 40 | linestyle=":", 41 | label=f"Edlib (query_length={ql})", 42 | ) 43 | 44 | # Plot Sassy times with solid line 45 | plt.plot( 46 | subset["text_length"], 47 | subset["sassy_ms"], 48 | color=color, 49 | marker="o", 50 | markersize=10, 51 | linestyle="-", 52 | label=f"Sassy (query_length={ql})", 53 | ) 54 | 55 | # Log scale for x-axis with original labels 56 | xticks = sorted(filtered["text_length"].unique()) 57 | plt.xscale("log") 58 | plt.yscale("log") 59 | plt.xticks(xticks, [str(x) for x in xticks]) 60 | 61 | plt.xlabel("Text Length (log scale)", fontsize=12) 62 | plt.ylabel("Execution Time (ms, log scale)", fontsize=12) 63 | plt.title(f"Edlib vs Sassy Execution Times (bounded, {profile}, k={k})", fontsize=14) 64 | plt.legend(loc="upper left", bbox_to_anchor=(1, 1), fontsize=10) 65 | plt.grid(True, which="both", linestyle="--", linewidth=0.5, alpha=0.7) 66 | plt.tight_layout() 67 | plt.savefig("results.png", dpi=300) 68 | plt.show() -------------------------------------------------------------------------------- /benchmarks/paper/sassy_edlib_k1_match1.1.csv: -------------------------------------------------------------------------------- 1 | query_length,text_length,k,match_fraction,max_edits,bench_iter,alphabet,profile,rc,edlib_ms,sassy_ms 2 | 20,1000,1,0,1,200,Dna,dna,withoutrc,5.930000,0.015000 3 | 20,1000,1,1,1,200,Dna,dna,withoutrc,9.010000,1.035000 4 | 20,1000,2,0,2,200,Dna,dna,withoutrc,6.040000,0.015000 5 | 20,1000,2,1,2,200,Dna,dna,withoutrc,8.845000,1.010000 6 | 20,1000,3,0,3,200,Dna,dna,withoutrc,6.005000,0.015000 7 | 20,1000,3,1,3,200,Dna,dna,withoutrc,9.045000,1.015000 8 | 20,1000,4,0,4,200,Dna,dna,withoutrc,6.015000,0.005000 9 | 20,1000,4,1,4,200,Dna,dna,withoutrc,9.025000,2.185000 10 | 20,1000,5,0,5,200,Dna,dna,withoutrc,6.035000,0.040000 11 | 20,1000,5,1,5,200,Dna,dna,withoutrc,9.115000,1.040000 12 | 20,2000,1,0,1,200,Dna,dna,withoutrc,11.210000,0.020000 13 | 20,2000,1,1,1,200,Dna,dna,withoutrc,15.050000,1.010000 14 | 20,2000,2,0,2,200,Dna,dna,withoutrc,11.035000,0.005000 15 | 20,2000,2,1,2,200,Dna,dna,withoutrc,15.060000,1.010000 16 | 20,2000,3,0,3,200,Dna,dna,withoutrc,11.060000,1.015000 17 | 20,2000,3,1,3,200,Dna,dna,withoutrc,15.215000,1.040000 18 | 20,2000,4,0,4,200,Dna,dna,withoutrc,11.040000,1.010000 19 | 20,2000,4,1,4,200,Dna,dna,withoutrc,16.060000,3.045000 20 | 20,2000,5,0,5,200,Dna,dna,withoutrc,11.035000,1.005000 21 | 20,2000,5,1,5,200,Dna,dna,withoutrc,15.210000,2.020000 22 | 20,5000,1,0,1,200,Dna,dna,withoutrc,28.575000,1.020000 23 | 20,5000,1,1,1,200,Dna,dna,withoutrc,33.160000,2.045000 24 | 20,5000,2,0,2,200,Dna,dna,withoutrc,28.725000,1.065000 25 | 20,5000,2,1,2,200,Dna,dna,withoutrc,33.245000,2.035000 26 | 20,5000,3,0,3,200,Dna,dna,withoutrc,28.135000,2.110000 27 | 20,5000,3,1,3,200,Dna,dna,withoutrc,33.265000,2.120000 28 | 20,5000,4,0,4,200,Dna,dna,withoutrc,28.120000,2.005000 29 | 20,5000,4,1,4,200,Dna,dna,withoutrc,33.770000,2.760000 30 | 20,5000,5,0,5,200,Dna,dna,withoutrc,33.130000,4.020000 31 | 20,5000,5,1,5,200,Dna,dna,withoutrc,37.485000,5.105000 32 | 20,10000,1,0,1,200,Dna,dna,withoutrc,56.520000,3.395000 33 | 20,10000,1,1,1,200,Dna,dna,withoutrc,64.060000,4.015000 34 | 20,10000,2,0,2,200,Dna,dna,withoutrc,56.755000,3.050000 35 | 20,10000,2,1,2,200,Dna,dna,withoutrc,65.875000,4.075000 36 | 20,10000,3,0,3,200,Dna,dna,withoutrc,62.740000,5.480000 37 | 20,10000,3,1,3,200,Dna,dna,withoutrc,68.600000,5.060000 38 | 20,10000,4,0,4,200,Dna,dna,withoutrc,56.955000,5.110000 39 | 20,10000,4,1,4,200,Dna,dna,withoutrc,65.555000,5.045000 40 | 20,10000,5,0,5,200,Dna,dna,withoutrc,56.925000,5.465000 41 | 20,10000,5,1,5,200,Dna,dna,withoutrc,64.415000,7.070000 42 | 20,20000,1,0,1,200,Dna,dna,withoutrc,113.675000,6.075000 43 | 20,20000,1,1,1,200,Dna,dna,withoutrc,125.635000,7.070000 44 | 20,20000,2,0,2,200,Dna,dna,withoutrc,113.725000,7.015000 45 | 20,20000,2,1,2,200,Dna,dna,withoutrc,126.020000,7.100000 46 | 20,20000,3,0,3,200,Dna,dna,withoutrc,113.605000,8.185000 47 | 20,20000,3,1,3,200,Dna,dna,withoutrc,126.015000,8.920000 48 | 20,20000,4,0,4,200,Dna,dna,withoutrc,125.490000,10.070000 49 | 20,20000,4,1,4,200,Dna,dna,withoutrc,126.050000,9.240000 50 | 20,20000,5,0,5,200,Dna,dna,withoutrc,113.935000,14.695000 51 | 20,20000,5,1,5,200,Dna,dna,withoutrc,126.415000,14.165000 52 | 20,50000,1,0,1,200,Dna,dna,withoutrc,296.325000,15.680000 53 | 20,50000,1,1,1,200,Dna,dna,withoutrc,311.280000,16.425000 54 | 20,50000,2,0,2,200,Dna,dna,withoutrc,284.570000,18.065000 55 | 20,50000,2,1,2,200,Dna,dna,withoutrc,316.450000,19.660000 56 | 20,50000,3,0,3,200,Dna,dna,withoutrc,297.180000,21.165000 57 | 20,50000,3,1,3,200,Dna,dna,withoutrc,316.000000,21.815000 58 | 20,50000,4,0,4,200,Dna,dna,withoutrc,285.425000,22.940000 59 | 20,50000,4,1,4,200,Dna,dna,withoutrc,318.600000,23.745000 60 | 20,50000,5,0,5,200,Dna,dna,withoutrc,316.230000,40.030000 61 | 20,50000,5,1,5,200,Dna,dna,withoutrc,323.460000,29.745000 62 | 20,100000,1,0,1,200,Dna,dna,withoutrc,598.680000,31.150000 63 | 20,100000,1,1,1,200,Dna,dna,withoutrc,652.765000,32.915000 64 | 20,100000,2,0,2,200,Dna,dna,withoutrc,568.340000,35.205000 65 | 20,100000,2,1,2,200,Dna,dna,withoutrc,657.475000,35.215000 66 | 20,100000,3,0,3,200,Dna,dna,withoutrc,576.145000,49.970000 67 | 20,100000,3,1,3,200,Dna,dna,withoutrc,663.995000,43.875000 68 | 20,100000,4,0,4,200,Dna,dna,withoutrc,572.045000,45.415000 69 | 20,100000,4,1,4,200,Dna,dna,withoutrc,655.220000,49.680000 70 | 20,100000,5,0,5,200,Dna,dna,withoutrc,657.705000,82.190000 71 | 20,100000,5,1,5,200,Dna,dna,withoutrc,657.940000,65.705000 72 | 20,200000,1,0,1,200,Dna,dna,withoutrc,1139.895000,70.785000 73 | 20,200000,1,1,1,200,Dna,dna,withoutrc,1359.315000,66.145000 74 | 20,200000,2,0,2,200,Dna,dna,withoutrc,1132.455000,72.775000 75 | 20,200000,2,1,2,200,Dna,dna,withoutrc,1369.210000,74.415000 76 | 20,200000,3,0,3,200,Dna,dna,withoutrc,1139.120000,91.815000 77 | 20,200000,3,1,3,200,Dna,dna,withoutrc,1328.265000,94.045000 78 | 20,200000,4,0,4,200,Dna,dna,withoutrc,1327.740000,96.150000 79 | 20,200000,4,1,4,200,Dna,dna,withoutrc,1330.905000,94.855000 80 | 20,200000,5,0,5,200,Dna,dna,withoutrc,1329.195000,154.920000 81 | 20,200000,5,1,5,200,Dna,dna,withoutrc,1332.170000,195.420000 82 | 40,1000,1,0,1,200,Dna,dna,withoutrc,6.275000,0.005000 83 | 40,1000,1,1,1,200,Dna,dna,withoutrc,9.240000,1.035000 84 | 40,1000,2,0,2,200,Dna,dna,withoutrc,6.020000,0.000000 85 | 40,1000,2,1,2,200,Dna,dna,withoutrc,9.340000,4.190000 86 | 40,1000,3,0,3,200,Dna,dna,withoutrc,6.050000,0.005000 87 | 40,1000,3,1,3,200,Dna,dna,withoutrc,9.715000,1.020000 88 | 40,1000,4,0,4,200,Dna,dna,withoutrc,6.045000,0.045000 89 | 40,1000,4,1,4,200,Dna,dna,withoutrc,9.085000,1.030000 90 | 40,1000,5,0,5,200,Dna,dna,withoutrc,6.050000,0.070000 91 | 40,1000,5,1,5,200,Dna,dna,withoutrc,11.320000,1.330000 92 | 40,2000,1,0,1,200,Dna,dna,withoutrc,11.065000,0.005000 93 | 40,2000,1,1,1,200,Dna,dna,withoutrc,16.050000,1.115000 94 | 40,2000,2,0,2,200,Dna,dna,withoutrc,11.085000,0.005000 95 | 40,2000,2,1,2,200,Dna,dna,withoutrc,15.230000,1.285000 96 | 40,2000,3,0,3,200,Dna,dna,withoutrc,12.760000,0.235000 97 | 40,2000,3,1,3,200,Dna,dna,withoutrc,16.810000,2.025000 98 | 40,2000,4,0,4,200,Dna,dna,withoutrc,12.025000,1.010000 99 | 40,2000,4,1,4,200,Dna,dna,withoutrc,16.060000,2.030000 100 | 40,2000,5,0,5,200,Dna,dna,withoutrc,11.040000,1.000000 101 | 40,2000,5,1,5,200,Dna,dna,withoutrc,17.715000,2.200000 102 | 40,5000,1,0,1,200,Dna,dna,withoutrc,28.665000,1.045000 103 | 40,5000,1,1,1,200,Dna,dna,withoutrc,35.380000,2.080000 104 | 40,5000,2,0,2,200,Dna,dna,withoutrc,28.645000,1.030000 105 | 40,5000,2,1,2,200,Dna,dna,withoutrc,35.770000,4.070000 106 | 40,5000,3,0,3,200,Dna,dna,withoutrc,33.940000,2.080000 107 | 40,5000,3,1,3,200,Dna,dna,withoutrc,35.635000,5.300000 108 | 40,5000,4,0,4,200,Dna,dna,withoutrc,29.940000,2.275000 109 | 40,5000,4,1,4,200,Dna,dna,withoutrc,35.225000,3.100000 110 | 40,5000,5,0,5,200,Dna,dna,withoutrc,31.515000,2.815000 111 | 40,5000,5,1,5,200,Dna,dna,withoutrc,35.785000,3.070000 112 | 40,10000,1,0,1,200,Dna,dna,withoutrc,60.550000,3.020000 113 | 40,10000,1,1,1,200,Dna,dna,withoutrc,67.155000,4.150000 114 | 40,10000,2,0,2,200,Dna,dna,withoutrc,57.880000,3.180000 115 | 40,10000,2,1,2,200,Dna,dna,withoutrc,67.955000,4.725000 116 | 40,10000,3,0,3,200,Dna,dna,withoutrc,58.175000,4.030000 117 | 40,10000,3,1,3,200,Dna,dna,withoutrc,68.215000,5.090000 118 | 40,10000,4,0,4,200,Dna,dna,withoutrc,59.250000,4.040000 119 | 40,10000,4,1,4,200,Dna,dna,withoutrc,67.905000,5.075000 120 | 40,10000,5,0,5,200,Dna,dna,withoutrc,60.660000,5.125000 121 | 40,10000,5,1,5,200,Dna,dna,withoutrc,67.910000,6.075000 122 | 40,20000,1,0,1,200,Dna,dna,withoutrc,114.585000,6.025000 123 | 40,20000,1,1,1,200,Dna,dna,withoutrc,130.675000,7.470000 124 | 40,20000,2,0,2,200,Dna,dna,withoutrc,120.530000,7.060000 125 | 40,20000,2,1,2,200,Dna,dna,withoutrc,131.845000,8.105000 126 | 40,20000,3,0,3,200,Dna,dna,withoutrc,115.450000,8.070000 127 | 40,20000,3,1,3,200,Dna,dna,withoutrc,131.030000,11.660000 128 | 40,20000,4,0,4,200,Dna,dna,withoutrc,123.095000,9.275000 129 | 40,20000,4,1,4,200,Dna,dna,withoutrc,129.425000,10.225000 130 | 40,20000,5,0,5,200,Dna,dna,withoutrc,115.845000,9.085000 131 | 40,20000,5,1,5,200,Dna,dna,withoutrc,128.975000,10.850000 132 | 40,50000,1,0,1,200,Dna,dna,withoutrc,296.420000,15.120000 133 | 40,50000,1,1,1,200,Dna,dna,withoutrc,315.885000,23.035000 134 | 40,50000,2,0,2,200,Dna,dna,withoutrc,275.315000,16.690000 135 | 40,50000,2,1,2,200,Dna,dna,withoutrc,302.930000,18.120000 136 | 40,50000,3,0,3,200,Dna,dna,withoutrc,286.925000,20.085000 137 | 40,50000,3,1,3,200,Dna,dna,withoutrc,304.205000,22.095000 138 | 40,50000,4,0,4,200,Dna,dna,withoutrc,276.260000,20.900000 139 | 40,50000,4,1,4,200,Dna,dna,withoutrc,301.370000,22.655000 140 | 40,50000,5,0,5,200,Dna,dna,withoutrc,275.325000,23.085000 141 | 40,50000,5,1,5,200,Dna,dna,withoutrc,303.525000,24.145000 142 | 40,100000,1,0,1,200,Dna,dna,withoutrc,555.380000,30.305000 143 | 40,100000,1,1,1,200,Dna,dna,withoutrc,607.475000,32.250000 144 | 40,100000,2,0,2,200,Dna,dna,withoutrc,558.305000,37.880000 145 | 40,100000,2,1,2,200,Dna,dna,withoutrc,613.825000,38.910000 146 | 40,100000,3,0,3,200,Dna,dna,withoutrc,561.480000,44.510000 147 | 40,100000,3,1,3,200,Dna,dna,withoutrc,628.415000,46.440000 148 | 40,100000,4,0,4,200,Dna,dna,withoutrc,566.150000,45.215000 149 | 40,100000,4,1,4,200,Dna,dna,withoutrc,629.360000,49.485000 150 | 40,100000,5,0,5,200,Dna,dna,withoutrc,577.420000,48.385000 151 | 40,100000,5,1,5,200,Dna,dna,withoutrc,625.270000,50.140000 152 | 40,200000,1,0,1,200,Dna,dna,withoutrc,1123.790000,63.105000 153 | 40,200000,1,1,1,200,Dna,dna,withoutrc,1328.145000,63.575000 154 | 40,200000,2,0,2,200,Dna,dna,withoutrc,1103.630000,70.585000 155 | 40,200000,2,1,2,200,Dna,dna,withoutrc,1293.100000,72.755000 156 | 40,200000,3,0,3,200,Dna,dna,withoutrc,1098.030000,85.370000 157 | 40,200000,3,1,3,200,Dna,dna,withoutrc,1290.085000,85.175000 158 | 40,200000,4,0,4,200,Dna,dna,withoutrc,1108.365000,91.065000 159 | 40,200000,4,1,4,200,Dna,dna,withoutrc,1331.190000,89.515000 160 | 40,200000,5,0,5,200,Dna,dna,withoutrc,1129.930000,96.190000 161 | 40,200000,5,1,5,200,Dna,dna,withoutrc,1331.295000,97.700000 162 | 80,1000,1,0,1,200,Dna,dna,withoutrc,6.285000,0.035000 163 | 80,1000,1,1,1,200,Dna,dna,withoutrc,14.210000,2.050000 164 | 80,1000,2,0,2,200,Dna,dna,withoutrc,6.015000,0.005000 165 | 80,1000,2,1,2,200,Dna,dna,withoutrc,12.420000,3.020000 166 | 80,1000,3,0,3,200,Dna,dna,withoutrc,6.010000,0.005000 167 | 80,1000,3,1,3,200,Dna,dna,withoutrc,14.070000,7.400000 168 | 80,1000,4,0,4,200,Dna,dna,withoutrc,6.255000,0.380000 169 | 80,1000,4,1,4,200,Dna,dna,withoutrc,15.170000,2.120000 170 | 80,1000,5,0,5,200,Dna,dna,withoutrc,6.015000,0.005000 171 | 80,1000,5,1,5,200,Dna,dna,withoutrc,13.055000,7.045000 172 | 80,2000,1,0,1,200,Dna,dna,withoutrc,11.040000,0.010000 173 | 80,2000,1,1,1,200,Dna,dna,withoutrc,19.080000,3.015000 174 | 80,2000,2,0,2,200,Dna,dna,withoutrc,11.035000,0.005000 175 | 80,2000,2,1,2,200,Dna,dna,withoutrc,21.155000,2.990000 176 | 80,2000,3,0,3,200,Dna,dna,withoutrc,12.480000,1.010000 177 | 80,2000,3,1,3,200,Dna,dna,withoutrc,21.200000,3.040000 178 | 80,2000,4,0,4,200,Dna,dna,withoutrc,12.660000,1.010000 179 | 80,2000,4,1,4,200,Dna,dna,withoutrc,22.135000,3.015000 180 | 80,2000,5,0,5,200,Dna,dna,withoutrc,11.040000,1.000000 181 | 80,2000,5,1,5,200,Dna,dna,withoutrc,21.095000,3.010000 182 | 80,5000,1,0,1,200,Dna,dna,withoutrc,30.160000,1.035000 183 | 80,5000,1,1,1,200,Dna,dna,withoutrc,37.370000,4.010000 184 | 80,5000,2,0,2,200,Dna,dna,withoutrc,29.730000,2.020000 185 | 80,5000,2,1,2,200,Dna,dna,withoutrc,38.130000,4.030000 186 | 80,5000,3,0,3,200,Dna,dna,withoutrc,28.945000,2.500000 187 | 80,5000,3,1,3,200,Dna,dna,withoutrc,37.230000,4.070000 188 | 80,5000,4,0,4,200,Dna,dna,withoutrc,28.295000,2.610000 189 | 80,5000,4,1,4,200,Dna,dna,withoutrc,37.430000,4.070000 190 | 80,5000,5,0,5,200,Dna,dna,withoutrc,28.080000,2.005000 191 | 80,5000,5,1,5,200,Dna,dna,withoutrc,42.080000,5.055000 192 | 80,10000,1,0,1,200,Dna,dna,withoutrc,57.605000,3.025000 193 | 80,10000,1,1,1,200,Dna,dna,withoutrc,72.010000,5.055000 194 | 80,10000,2,0,2,200,Dna,dna,withoutrc,56.165000,3.040000 195 | 80,10000,2,1,2,200,Dna,dna,withoutrc,70.395000,5.190000 196 | 80,10000,3,0,3,200,Dna,dna,withoutrc,58.625000,4.090000 197 | 80,10000,3,1,3,200,Dna,dna,withoutrc,70.915000,6.070000 198 | 80,10000,4,0,4,200,Dna,dna,withoutrc,56.540000,4.085000 199 | 80,10000,4,1,4,200,Dna,dna,withoutrc,70.950000,6.120000 200 | 80,10000,5,0,5,200,Dna,dna,withoutrc,56.385000,5.070000 201 | 80,10000,5,1,5,200,Dna,dna,withoutrc,69.195000,7.640000 202 | 80,20000,1,0,1,200,Dna,dna,withoutrc,113.555000,6.040000 203 | 80,20000,1,1,1,200,Dna,dna,withoutrc,129.730000,8.075000 204 | 80,20000,2,0,2,200,Dna,dna,withoutrc,112.170000,7.025000 205 | 80,20000,2,1,2,200,Dna,dna,withoutrc,130.550000,9.080000 206 | 80,20000,3,0,3,200,Dna,dna,withoutrc,111.995000,8.070000 207 | 80,20000,3,1,3,200,Dna,dna,withoutrc,127.860000,11.000000 208 | 80,20000,4,0,4,200,Dna,dna,withoutrc,111.865000,8.085000 209 | 80,20000,4,1,4,200,Dna,dna,withoutrc,127.365000,11.065000 210 | 80,20000,5,0,5,200,Dna,dna,withoutrc,111.940000,9.125000 211 | 80,20000,5,1,5,200,Dna,dna,withoutrc,129.995000,11.515000 212 | 80,50000,1,0,1,200,Dna,dna,withoutrc,279.620000,15.060000 213 | 80,50000,1,1,1,200,Dna,dna,withoutrc,310.260000,17.095000 214 | 80,50000,2,0,2,200,Dna,dna,withoutrc,280.030000,17.145000 215 | 80,50000,2,1,2,200,Dna,dna,withoutrc,312.945000,20.160000 216 | 80,50000,3,0,3,200,Dna,dna,withoutrc,279.435000,20.905000 217 | 80,50000,3,1,3,200,Dna,dna,withoutrc,306.690000,23.275000 218 | 80,50000,4,0,4,200,Dna,dna,withoutrc,278.380000,22.110000 219 | 80,50000,4,1,4,200,Dna,dna,withoutrc,309.180000,25.165000 220 | 80,50000,5,0,5,200,Dna,dna,withoutrc,277.835000,24.070000 221 | 80,50000,5,1,5,200,Dna,dna,withoutrc,307.340000,24.625000 222 | 80,100000,1,0,1,200,Dna,dna,withoutrc,555.190000,30.470000 223 | 80,100000,1,1,1,200,Dna,dna,withoutrc,605.555000,32.180000 224 | 80,100000,2,0,2,200,Dna,dna,withoutrc,555.175000,34.120000 225 | 80,100000,2,1,2,200,Dna,dna,withoutrc,608.110000,36.865000 226 | 80,100000,3,0,3,200,Dna,dna,withoutrc,554.095000,43.600000 227 | 80,100000,3,1,3,200,Dna,dna,withoutrc,608.800000,45.945000 228 | 80,100000,4,0,4,200,Dna,dna,withoutrc,555.120000,43.710000 229 | 80,100000,4,1,4,200,Dna,dna,withoutrc,607.085000,46.200000 230 | 80,100000,5,0,5,200,Dna,dna,withoutrc,560.130000,47.415000 231 | 80,100000,5,1,5,200,Dna,dna,withoutrc,608.195000,48.815000 232 | 80,200000,1,0,1,200,Dna,dna,withoutrc,1109.820000,61.425000 233 | 80,200000,1,1,1,200,Dna,dna,withoutrc,1306.600000,64.185000 234 | 80,200000,2,0,2,200,Dna,dna,withoutrc,1113.250000,71.185000 235 | 80,200000,2,1,2,200,Dna,dna,withoutrc,1308.870000,70.145000 236 | 80,200000,3,0,3,200,Dna,dna,withoutrc,1111.805000,83.565000 237 | 80,200000,3,1,3,200,Dna,dna,withoutrc,1311.275000,84.790000 238 | 80,200000,4,0,4,200,Dna,dna,withoutrc,1113.890000,85.565000 239 | 80,200000,4,1,4,200,Dna,dna,withoutrc,1305.780000,89.660000 240 | 80,200000,5,0,5,200,Dna,dna,withoutrc,1109.520000,96.300000 241 | 80,200000,5,1,5,200,Dna,dna,withoutrc,1310.235000,99.725000 242 | 100,1000,1,0,1,200,Dna,dna,withoutrc,6.070000,0.020000 243 | 100,1000,1,1,1,200,Dna,dna,withoutrc,13.195000,3.040000 244 | 100,1000,2,0,2,200,Dna,dna,withoutrc,6.015000,0.025000 245 | 100,1000,2,1,2,200,Dna,dna,withoutrc,14.180000,3.010000 246 | 100,1000,3,0,3,200,Dna,dna,withoutrc,6.025000,0.005000 247 | 100,1000,3,1,3,200,Dna,dna,withoutrc,14.000000,3.005000 248 | 100,1000,4,0,4,200,Dna,dna,withoutrc,6.015000,0.020000 249 | 100,1000,4,1,4,200,Dna,dna,withoutrc,14.070000,3.005000 250 | 100,1000,5,0,5,200,Dna,dna,withoutrc,6.030000,0.005000 251 | 100,1000,5,1,5,200,Dna,dna,withoutrc,13.055000,3.015000 252 | 100,2000,1,0,1,200,Dna,dna,withoutrc,11.065000,0.005000 253 | 100,2000,1,1,1,200,Dna,dna,withoutrc,21.085000,3.020000 254 | 100,2000,2,0,2,200,Dna,dna,withoutrc,11.115000,0.120000 255 | 100,2000,2,1,2,200,Dna,dna,withoutrc,22.115000,3.055000 256 | 100,2000,3,0,3,200,Dna,dna,withoutrc,11.045000,1.010000 257 | 100,2000,3,1,3,200,Dna,dna,withoutrc,19.100000,3.995000 258 | 100,2000,4,0,4,200,Dna,dna,withoutrc,11.055000,1.000000 259 | 100,2000,4,1,4,200,Dna,dna,withoutrc,21.080000,3.370000 260 | 100,2000,5,0,5,200,Dna,dna,withoutrc,11.105000,1.005000 261 | 100,2000,5,1,5,200,Dna,dna,withoutrc,21.115000,3.340000 262 | 100,5000,1,0,1,200,Dna,dna,withoutrc,28.070000,1.005000 263 | 100,5000,1,1,1,200,Dna,dna,withoutrc,38.260000,4.040000 264 | 100,5000,2,0,2,200,Dna,dna,withoutrc,28.285000,2.005000 265 | 100,5000,2,1,2,200,Dna,dna,withoutrc,37.775000,4.070000 266 | 100,5000,3,0,3,200,Dna,dna,withoutrc,28.085000,2.005000 267 | 100,5000,3,1,3,200,Dna,dna,withoutrc,40.625000,4.960000 268 | 100,5000,4,0,4,200,Dna,dna,withoutrc,28.085000,2.025000 269 | 100,5000,4,1,4,200,Dna,dna,withoutrc,37.280000,5.050000 270 | 100,5000,5,0,5,200,Dna,dna,withoutrc,28.075000,2.020000 271 | 100,5000,5,1,5,200,Dna,dna,withoutrc,37.165000,5.045000 272 | 100,10000,1,0,1,200,Dna,dna,withoutrc,56.150000,3.020000 273 | 100,10000,1,1,1,200,Dna,dna,withoutrc,67.735000,6.050000 274 | 100,10000,2,0,2,200,Dna,dna,withoutrc,56.135000,3.020000 275 | 100,10000,2,1,2,200,Dna,dna,withoutrc,70.645000,6.045000 276 | 100,10000,3,0,3,200,Dna,dna,withoutrc,56.055000,4.030000 277 | 100,10000,3,1,3,200,Dna,dna,withoutrc,68.405000,7.080000 278 | 100,10000,4,0,4,200,Dna,dna,withoutrc,56.150000,4.025000 279 | 100,10000,4,1,4,200,Dna,dna,withoutrc,67.270000,7.140000 280 | 100,10000,5,0,5,200,Dna,dna,withoutrc,56.120000,4.920000 281 | 100,10000,5,1,5,200,Dna,dna,withoutrc,68.670000,8.095000 282 | 100,20000,1,0,1,200,Dna,dna,withoutrc,113.860000,6.185000 283 | 100,20000,1,1,1,200,Dna,dna,withoutrc,133.140000,9.125000 284 | 100,20000,2,0,2,200,Dna,dna,withoutrc,114.265000,7.140000 285 | 100,20000,2,1,2,200,Dna,dna,withoutrc,129.345000,9.620000 286 | 100,20000,3,0,3,200,Dna,dna,withoutrc,112.115000,8.095000 287 | 100,20000,3,1,3,200,Dna,dna,withoutrc,129.050000,11.735000 288 | 100,20000,4,0,4,200,Dna,dna,withoutrc,112.250000,8.780000 289 | 100,20000,4,1,4,200,Dna,dna,withoutrc,127.955000,11.350000 290 | 100,20000,5,0,5,200,Dna,dna,withoutrc,112.130000,9.045000 291 | 100,20000,5,1,5,200,Dna,dna,withoutrc,130.160000,12.105000 292 | 100,50000,1,0,1,200,Dna,dna,withoutrc,295.015000,16.415000 293 | 100,50000,1,1,1,200,Dna,dna,withoutrc,314.830000,18.700000 294 | 100,50000,2,0,2,200,Dna,dna,withoutrc,282.920000,17.085000 295 | 100,50000,2,1,2,200,Dna,dna,withoutrc,313.545000,19.740000 296 | 100,50000,3,0,3,200,Dna,dna,withoutrc,284.115000,21.465000 297 | 100,50000,3,1,3,200,Dna,dna,withoutrc,315.375000,23.390000 298 | 100,50000,4,0,4,200,Dna,dna,withoutrc,279.430000,22.080000 299 | 100,50000,4,1,4,200,Dna,dna,withoutrc,309.615000,23.995000 300 | 100,50000,5,0,5,200,Dna,dna,withoutrc,278.370000,23.075000 301 | 100,50000,5,1,5,200,Dna,dna,withoutrc,308.780000,25.145000 302 | 100,100000,1,0,1,200,Dna,dna,withoutrc,554.530000,30.705000 303 | 100,100000,1,1,1,200,Dna,dna,withoutrc,610.645000,33.990000 304 | 100,100000,2,0,2,200,Dna,dna,withoutrc,556.840000,34.135000 305 | 100,100000,2,1,2,200,Dna,dna,withoutrc,619.060000,39.050000 306 | 100,100000,3,0,3,200,Dna,dna,withoutrc,597.755000,43.235000 307 | 100,100000,3,1,3,200,Dna,dna,withoutrc,629.255000,45.775000 308 | 100,100000,4,0,4,200,Dna,dna,withoutrc,569.310000,44.355000 309 | 100,100000,4,1,4,200,Dna,dna,withoutrc,630.560000,48.920000 310 | 100,100000,5,0,5,200,Dna,dna,withoutrc,574.475000,49.170000 311 | 100,100000,5,1,5,200,Dna,dna,withoutrc,626.885000,52.365000 312 | 100,200000,1,0,1,200,Dna,dna,withoutrc,1248.225000,62.560000 313 | 100,200000,1,1,1,200,Dna,dna,withoutrc,1323.410000,66.420000 314 | 100,200000,2,0,2,200,Dna,dna,withoutrc,1108.950000,68.560000 315 | 100,200000,2,1,2,200,Dna,dna,withoutrc,1311.785000,73.880000 316 | 100,200000,3,0,3,200,Dna,dna,withoutrc,1108.870000,83.575000 317 | 100,200000,3,1,3,200,Dna,dna,withoutrc,1303.425000,90.605000 318 | 100,200000,4,0,4,200,Dna,dna,withoutrc,1117.730000,85.750000 319 | 100,200000,4,1,4,200,Dna,dna,withoutrc,1315.770000,90.565000 320 | 100,200000,5,0,5,200,Dna,dna,withoutrc,1124.425000,94.045000 321 | 100,200000,5,1,5,200,Dna,dna,withoutrc,1350.720000,100.120000 322 | 200,1000,1,0,1,200,Dna,dna,withoutrc,6.120000,0.005000 323 | 200,1000,1,1,1,200,Dna,dna,withoutrc,20.740000,7.040000 324 | 200,1000,2,0,2,200,Dna,dna,withoutrc,6.050000,0.005000 325 | 200,1000,2,1,2,200,Dna,dna,withoutrc,20.050000,21.120000 326 | 200,1000,3,0,3,200,Dna,dna,withoutrc,6.815000,0.180000 327 | 200,1000,3,1,3,200,Dna,dna,withoutrc,20.585000,6.090000 328 | 200,1000,4,0,4,200,Dna,dna,withoutrc,6.030000,0.010000 329 | 200,1000,4,1,4,200,Dna,dna,withoutrc,20.425000,9.070000 330 | 200,1000,5,0,5,200,Dna,dna,withoutrc,6.045000,0.005000 331 | 200,1000,5,1,5,200,Dna,dna,withoutrc,21.590000,7.090000 332 | 200,2000,1,0,1,200,Dna,dna,withoutrc,12.035000,0.030000 333 | 200,2000,1,1,1,200,Dna,dna,withoutrc,25.520000,7.035000 334 | 200,2000,2,0,2,200,Dna,dna,withoutrc,12.785000,1.005000 335 | 200,2000,2,1,2,200,Dna,dna,withoutrc,30.795000,7.050000 336 | 200,2000,3,0,3,200,Dna,dna,withoutrc,12.170000,1.005000 337 | 200,2000,3,1,3,200,Dna,dna,withoutrc,26.330000,8.240000 338 | 200,2000,4,0,4,200,Dna,dna,withoutrc,12.060000,1.005000 339 | 200,2000,4,1,4,200,Dna,dna,withoutrc,26.415000,21.725000 340 | 200,2000,5,0,5,200,Dna,dna,withoutrc,12.360000,1.250000 341 | 200,2000,5,1,5,200,Dna,dna,withoutrc,27.765000,7.065000 342 | 200,5000,1,0,1,200,Dna,dna,withoutrc,28.370000,1.020000 343 | 200,5000,1,1,1,200,Dna,dna,withoutrc,45.420000,8.065000 344 | 200,5000,2,0,2,200,Dna,dna,withoutrc,28.360000,1.440000 345 | 200,5000,2,1,2,200,Dna,dna,withoutrc,45.505000,8.050000 346 | 200,5000,3,0,3,200,Dna,dna,withoutrc,28.400000,2.020000 347 | 200,5000,3,1,3,200,Dna,dna,withoutrc,45.485000,8.540000 348 | 200,5000,4,0,4,200,Dna,dna,withoutrc,28.140000,2.005000 349 | 200,5000,4,1,4,200,Dna,dna,withoutrc,45.500000,9.300000 350 | 200,5000,5,0,5,200,Dna,dna,withoutrc,29.125000,2.545000 351 | 200,5000,5,1,5,200,Dna,dna,withoutrc,44.425000,9.060000 352 | 200,10000,1,0,1,200,Dna,dna,withoutrc,56.465000,3.010000 353 | 200,10000,1,1,1,200,Dna,dna,withoutrc,73.870000,9.125000 354 | 200,10000,2,0,2,200,Dna,dna,withoutrc,56.750000,3.915000 355 | 200,10000,2,1,2,200,Dna,dna,withoutrc,74.410000,10.125000 356 | 200,10000,3,0,3,200,Dna,dna,withoutrc,56.480000,4.020000 357 | 200,10000,3,1,3,200,Dna,dna,withoutrc,74.655000,10.105000 358 | 200,10000,4,0,4,200,Dna,dna,withoutrc,56.700000,4.035000 359 | 200,10000,4,1,4,200,Dna,dna,withoutrc,74.630000,10.820000 360 | 200,10000,5,0,5,200,Dna,dna,withoutrc,56.555000,5.025000 361 | 200,10000,5,1,5,200,Dna,dna,withoutrc,74.295000,11.075000 362 | 200,20000,1,0,1,200,Dna,dna,withoutrc,112.625000,6.025000 363 | 200,20000,1,1,1,200,Dna,dna,withoutrc,134.965000,13.055000 364 | 200,20000,2,0,2,200,Dna,dna,withoutrc,115.555000,7.035000 365 | 200,20000,2,1,2,200,Dna,dna,withoutrc,137.645000,15.205000 366 | 200,20000,3,0,3,200,Dna,dna,withoutrc,114.620000,8.070000 367 | 200,20000,3,1,3,200,Dna,dna,withoutrc,136.740000,16.290000 368 | 200,20000,4,0,4,200,Dna,dna,withoutrc,114.090000,8.050000 369 | 200,20000,4,1,4,200,Dna,dna,withoutrc,138.205000,15.165000 370 | 200,20000,5,0,5,200,Dna,dna,withoutrc,115.030000,9.355000 371 | 200,20000,5,1,5,200,Dna,dna,withoutrc,136.825000,17.310000 372 | 200,50000,1,0,1,200,Dna,dna,withoutrc,283.835000,15.080000 373 | 200,50000,1,1,1,200,Dna,dna,withoutrc,319.915000,21.530000 374 | 200,50000,2,0,2,200,Dna,dna,withoutrc,285.040000,16.330000 375 | 200,50000,2,1,2,200,Dna,dna,withoutrc,315.375000,23.985000 376 | 200,50000,3,0,3,200,Dna,dna,withoutrc,279.820000,21.060000 377 | 200,50000,3,1,3,200,Dna,dna,withoutrc,314.660000,27.235000 378 | 200,50000,4,0,4,200,Dna,dna,withoutrc,279.845000,21.205000 379 | 200,50000,4,1,4,200,Dna,dna,withoutrc,314.115000,27.930000 380 | 200,50000,5,0,5,200,Dna,dna,withoutrc,279.755000,24.040000 381 | 200,50000,5,1,5,200,Dna,dna,withoutrc,314.195000,29.840000 382 | 200,100000,1,0,1,200,Dna,dna,withoutrc,558.900000,29.800000 383 | 200,100000,1,1,1,200,Dna,dna,withoutrc,626.150000,37.345000 384 | 200,100000,2,0,2,200,Dna,dna,withoutrc,573.290000,35.820000 385 | 200,100000,2,1,2,200,Dna,dna,withoutrc,650.195000,42.735000 386 | 200,100000,3,0,3,200,Dna,dna,withoutrc,574.390000,42.265000 387 | 200,100000,3,1,3,200,Dna,dna,withoutrc,630.970000,50.970000 388 | 200,100000,4,0,4,200,Dna,dna,withoutrc,576.140000,53.140000 389 | 200,100000,4,1,4,200,Dna,dna,withoutrc,626.590000,50.390000 390 | 200,100000,5,0,5,200,Dna,dna,withoutrc,566.935000,47.425000 391 | 200,100000,5,1,5,200,Dna,dna,withoutrc,627.225000,57.285000 392 | 200,200000,1,0,1,200,Dna,dna,withoutrc,1133.345000,61.975000 393 | 200,200000,1,1,1,200,Dna,dna,withoutrc,1316.415000,68.755000 394 | 200,200000,2,0,2,200,Dna,dna,withoutrc,1124.975000,66.095000 395 | 200,200000,2,1,2,200,Dna,dna,withoutrc,1313.240000,73.585000 396 | 200,200000,3,0,3,200,Dna,dna,withoutrc,1117.745000,97.580000 397 | 200,200000,3,1,3,200,Dna,dna,withoutrc,1370.585000,92.170000 398 | 200,200000,4,0,4,200,Dna,dna,withoutrc,1112.175000,88.845000 399 | 200,200000,4,1,4,200,Dna,dna,withoutrc,1318.220000,109.435000 400 | 200,200000,5,0,5,200,Dna,dna,withoutrc,1115.510000,95.290000 401 | 200,200000,5,1,5,200,Dna,dna,withoutrc,1317.260000,101.830000 402 | 500,1000,1,0,1,200,Dna,dna,withoutrc,7.065000,0.025000 403 | 500,1000,1,1,1,200,Dna,dna,withoutrc,39.750000,27.320000 404 | 500,1000,2,0,2,200,Dna,dna,withoutrc,7.175000,0.005000 405 | 500,1000,2,1,2,200,Dna,dna,withoutrc,40.000000,28.395000 406 | 500,1000,3,0,3,200,Dna,dna,withoutrc,7.055000,0.340000 407 | 500,1000,3,1,3,200,Dna,dna,withoutrc,39.895000,27.130000 408 | 500,1000,4,0,4,200,Dna,dna,withoutrc,7.110000,0.025000 409 | 500,1000,4,1,4,200,Dna,dna,withoutrc,39.855000,27.310000 410 | 500,1000,5,0,5,200,Dna,dna,withoutrc,7.220000,0.880000 411 | 500,1000,5,1,5,200,Dna,dna,withoutrc,39.540000,31.030000 412 | 500,2000,1,0,1,200,Dna,dna,withoutrc,12.935000,1.000000 413 | 500,2000,1,1,1,200,Dna,dna,withoutrc,45.565000,26.025000 414 | 500,2000,2,0,2,200,Dna,dna,withoutrc,13.155000,1.015000 415 | 500,2000,2,1,2,200,Dna,dna,withoutrc,45.485000,28.175000 416 | 500,2000,3,0,3,200,Dna,dna,withoutrc,13.110000,1.025000 417 | 500,2000,3,1,3,200,Dna,dna,withoutrc,45.695000,27.335000 418 | 500,2000,4,0,4,200,Dna,dna,withoutrc,13.145000,1.035000 419 | 500,2000,4,1,4,200,Dna,dna,withoutrc,46.120000,27.810000 420 | 500,2000,5,0,5,200,Dna,dna,withoutrc,13.060000,1.020000 421 | 500,2000,5,1,5,200,Dna,dna,withoutrc,46.955000,35.870000 422 | 500,5000,1,0,1,200,Dna,dna,withoutrc,29.655000,1.975000 423 | 500,5000,1,1,1,200,Dna,dna,withoutrc,85.490000,43.325000 424 | 500,5000,2,0,2,200,Dna,dna,withoutrc,49.925000,3.050000 425 | 500,5000,2,1,2,200,Dna,dna,withoutrc,116.990000,44.710000 426 | 500,5000,3,0,3,200,Dna,dna,withoutrc,42.810000,2.035000 427 | 500,5000,3,1,3,200,Dna,dna,withoutrc,74.625000,28.580000 428 | 500,5000,4,0,4,200,Dna,dna,withoutrc,30.425000,2.035000 429 | 500,5000,4,1,4,200,Dna,dna,withoutrc,64.565000,28.335000 430 | 500,5000,5,0,5,200,Dna,dna,withoutrc,30.215000,2.595000 431 | 500,5000,5,1,5,200,Dna,dna,withoutrc,63.840000,29.350000 432 | 500,10000,1,0,1,200,Dna,dna,withoutrc,57.900000,3.210000 433 | 500,10000,1,1,1,200,Dna,dna,withoutrc,95.015000,84.315000 434 | 500,10000,2,0,2,200,Dna,dna,withoutrc,58.100000,3.055000 435 | 500,10000,2,1,2,200,Dna,dna,withoutrc,94.715000,30.320000 436 | 500,10000,3,0,3,200,Dna,dna,withoutrc,58.330000,4.110000 437 | 500,10000,3,1,3,200,Dna,dna,withoutrc,94.700000,29.525000 438 | 500,10000,4,0,4,200,Dna,dna,withoutrc,57.965000,5.025000 439 | 500,10000,4,1,4,200,Dna,dna,withoutrc,94.375000,31.070000 440 | 500,10000,5,0,5,200,Dna,dna,withoutrc,57.790000,5.030000 441 | 500,10000,5,1,5,200,Dna,dna,withoutrc,95.080000,32.855000 442 | 500,20000,1,0,1,200,Dna,dna,withoutrc,114.390000,6.110000 443 | 500,20000,1,1,1,200,Dna,dna,withoutrc,154.930000,33.480000 444 | 500,20000,2,0,2,200,Dna,dna,withoutrc,114.580000,7.025000 445 | 500,20000,2,1,2,200,Dna,dna,withoutrc,155.260000,40.820000 446 | 500,20000,3,0,3,200,Dna,dna,withoutrc,114.430000,8.130000 447 | 500,20000,3,1,3,200,Dna,dna,withoutrc,155.645000,35.160000 448 | 500,20000,4,0,4,200,Dna,dna,withoutrc,116.190000,11.655000 449 | 500,20000,4,1,4,200,Dna,dna,withoutrc,166.640000,37.810000 450 | 500,20000,5,0,5,200,Dna,dna,withoutrc,135.865000,9.200000 451 | 500,20000,5,1,5,200,Dna,dna,withoutrc,157.710000,36.470000 452 | 500,50000,1,0,1,200,Dna,dna,withoutrc,283.960000,16.120000 453 | 500,50000,1,1,1,200,Dna,dna,withoutrc,336.185000,43.500000 454 | 500,50000,2,0,2,200,Dna,dna,withoutrc,282.775000,17.160000 455 | 500,50000,2,1,2,200,Dna,dna,withoutrc,334.275000,44.235000 456 | 500,50000,3,0,3,200,Dna,dna,withoutrc,282.930000,21.200000 457 | 500,50000,3,1,3,200,Dna,dna,withoutrc,334.030000,46.425000 458 | 500,50000,4,0,4,200,Dna,dna,withoutrc,282.635000,22.315000 459 | 500,50000,4,1,4,200,Dna,dna,withoutrc,343.800000,49.745000 460 | 500,50000,5,0,5,200,Dna,dna,withoutrc,290.905000,23.185000 461 | 500,50000,5,1,5,200,Dna,dna,withoutrc,338.025000,50.505000 462 | 500,100000,1,0,1,200,Dna,dna,withoutrc,559.745000,30.780000 463 | 500,100000,1,1,1,200,Dna,dna,withoutrc,635.400000,57.455000 464 | 500,100000,2,0,2,200,Dna,dna,withoutrc,560.475000,40.100000 465 | 500,100000,2,1,2,200,Dna,dna,withoutrc,660.990000,62.400000 466 | 500,100000,3,0,3,200,Dna,dna,withoutrc,559.305000,43.185000 467 | 500,100000,3,1,3,200,Dna,dna,withoutrc,633.730000,69.465000 468 | 500,100000,4,0,4,200,Dna,dna,withoutrc,560.045000,44.365000 469 | 500,100000,4,1,4,200,Dna,dna,withoutrc,635.170000,70.885000 470 | 500,100000,5,0,5,200,Dna,dna,withoutrc,559.495000,48.980000 471 | 500,100000,5,1,5,200,Dna,dna,withoutrc,637.375000,73.420000 472 | 500,200000,1,0,1,200,Dna,dna,withoutrc,1128.420000,63.305000 473 | 500,200000,1,1,1,200,Dna,dna,withoutrc,1344.885000,90.765000 474 | 500,200000,2,0,2,200,Dna,dna,withoutrc,1116.825000,70.820000 475 | 500,200000,2,1,2,200,Dna,dna,withoutrc,1340.630000,96.230000 476 | 500,200000,3,0,3,200,Dna,dna,withoutrc,1117.680000,82.405000 477 | 500,200000,3,1,3,200,Dna,dna,withoutrc,1346.130000,111.650000 478 | 500,200000,4,0,4,200,Dna,dna,withoutrc,1118.010000,88.130000 479 | 500,200000,4,1,4,200,Dna,dna,withoutrc,1349.315000,118.700000 480 | 500,200000,5,0,5,200,Dna,dna,withoutrc,1120.650000,93.940000 481 | 500,200000,5,1,5,200,Dna,dna,withoutrc,1347.835000,124.815000 482 | 1000,1000,1,0,1,200,Dna,dna,withoutrc,9.185000,1.010000 483 | 1000,1000,1,1,1,200,Dna,dna,withoutrc,82.605000,91.335000 484 | 1000,1000,2,0,2,200,Dna,dna,withoutrc,9.115000,1.010000 485 | 1000,1000,2,1,2,200,Dna,dna,withoutrc,82.260000,92.200000 486 | 1000,1000,3,0,3,200,Dna,dna,withoutrc,9.280000,1.025000 487 | 1000,1000,3,1,3,200,Dna,dna,withoutrc,80.490000,90.990000 488 | 1000,1000,4,0,4,200,Dna,dna,withoutrc,9.140000,1.005000 489 | 1000,1000,4,1,4,200,Dna,dna,withoutrc,9.125000,1.010000 490 | 1000,1000,5,0,5,200,Dna,dna,withoutrc,9.080000,1.000000 491 | 1000,1000,5,1,5,200,Dna,dna,withoutrc,81.670000,90.520000 492 | 1000,2000,1,0,1,200,Dna,dna,withoutrc,15.095000,1.010000 493 | 1000,2000,1,1,1,200,Dna,dna,withoutrc,90.575000,92.930000 494 | 1000,2000,2,0,2,200,Dna,dna,withoutrc,15.180000,1.100000 495 | 1000,2000,2,1,2,200,Dna,dna,withoutrc,91.465000,90.405000 496 | 1000,2000,3,0,3,200,Dna,dna,withoutrc,15.355000,1.230000 497 | 1000,2000,3,1,3,200,Dna,dna,withoutrc,91.845000,90.700000 498 | 1000,2000,4,0,4,200,Dna,dna,withoutrc,15.240000,1.010000 499 | 1000,2000,4,1,4,200,Dna,dna,withoutrc,91.900000,115.095000 500 | 1000,2000,5,0,5,200,Dna,dna,withoutrc,15.545000,1.035000 501 | 1000,2000,5,1,5,200,Dna,dna,withoutrc,89.455000,96.000000 502 | 1000,5000,1,0,1,200,Dna,dna,withoutrc,31.855000,2.030000 503 | 1000,5000,1,1,1,200,Dna,dna,withoutrc,109.415000,96.565000 504 | 1000,5000,2,0,2,200,Dna,dna,withoutrc,31.930000,2.005000 505 | 1000,5000,2,1,2,200,Dna,dna,withoutrc,110.185000,96.015000 506 | 1000,5000,3,0,3,200,Dna,dna,withoutrc,31.850000,2.045000 507 | 1000,5000,3,1,3,200,Dna,dna,withoutrc,111.130000,95.065000 508 | 1000,5000,4,0,4,200,Dna,dna,withoutrc,31.540000,2.800000 509 | 1000,5000,4,1,4,200,Dna,dna,withoutrc,110.545000,96.750000 510 | 1000,5000,5,0,5,200,Dna,dna,withoutrc,31.955000,3.040000 511 | 1000,5000,5,1,5,200,Dna,dna,withoutrc,110.650000,126.240000 512 | 1000,10000,1,0,1,200,Dna,dna,withoutrc,60.180000,3.100000 513 | 1000,10000,1,1,1,200,Dna,dna,withoutrc,140.280000,95.130000 514 | 1000,10000,2,0,2,200,Dna,dna,withoutrc,59.745000,4.060000 515 | 1000,10000,2,1,2,200,Dna,dna,withoutrc,142.300000,93.905000 516 | 1000,10000,3,0,3,200,Dna,dna,withoutrc,60.430000,5.060000 517 | 1000,10000,3,1,3,200,Dna,dna,withoutrc,141.015000,96.865000 518 | 1000,10000,4,0,4,200,Dna,dna,withoutrc,60.445000,5.030000 519 | 1000,10000,4,1,4,200,Dna,dna,withoutrc,142.210000,97.890000 520 | 1000,10000,5,0,5,200,Dna,dna,withoutrc,59.735000,5.040000 521 | 1000,10000,5,1,5,200,Dna,dna,withoutrc,141.850000,96.720000 522 | 1000,20000,1,0,1,200,Dna,dna,withoutrc,117.155000,6.865000 523 | 1000,20000,1,1,1,200,Dna,dna,withoutrc,201.200000,95.165000 524 | 1000,20000,2,0,2,200,Dna,dna,withoutrc,117.030000,7.230000 525 | 1000,20000,2,1,2,200,Dna,dna,withoutrc,201.085000,95.595000 526 | 1000,20000,3,0,3,200,Dna,dna,withoutrc,117.295000,9.140000 527 | 1000,20000,3,1,3,200,Dna,dna,withoutrc,202.320000,99.830000 528 | 1000,20000,4,0,4,200,Dna,dna,withoutrc,116.895000,9.070000 529 | 1000,20000,4,1,4,200,Dna,dna,withoutrc,202.540000,101.995000 530 | 1000,20000,5,0,5,200,Dna,dna,withoutrc,118.065000,10.055000 531 | 1000,20000,5,1,5,200,Dna,dna,withoutrc,205.500000,101.730000 532 | 1000,50000,1,0,1,200,Dna,dna,withoutrc,293.960000,16.085000 533 | 1000,50000,1,1,1,200,Dna,dna,withoutrc,407.730000,105.830000 534 | 1000,50000,2,0,2,200,Dna,dna,withoutrc,284.285000,18.085000 535 | 1000,50000,2,1,2,200,Dna,dna,withoutrc,380.875000,108.895000 536 | 1000,50000,3,0,3,200,Dna,dna,withoutrc,285.110000,20.820000 537 | 1000,50000,3,1,3,200,Dna,dna,withoutrc,383.715000,111.980000 538 | 1000,50000,4,0,4,200,Dna,dna,withoutrc,288.070000,22.215000 539 | 1000,50000,4,1,4,200,Dna,dna,withoutrc,386.440000,115.520000 540 | 1000,50000,5,0,5,200,Dna,dna,withoutrc,285.885000,23.480000 541 | 1000,50000,5,1,5,200,Dna,dna,withoutrc,384.795000,117.660000 542 | 1000,100000,1,0,1,200,Dna,dna,withoutrc,565.975000,31.155000 543 | 1000,100000,1,1,1,200,Dna,dna,withoutrc,690.170000,123.840000 544 | 1000,100000,2,0,2,200,Dna,dna,withoutrc,572.510000,33.830000 545 | 1000,100000,2,1,2,200,Dna,dna,withoutrc,684.780000,126.365000 546 | 1000,100000,3,0,3,200,Dna,dna,withoutrc,561.725000,42.105000 547 | 1000,100000,3,1,3,200,Dna,dna,withoutrc,685.205000,134.945000 548 | 1000,100000,4,0,4,200,Dna,dna,withoutrc,561.900000,43.630000 549 | 1000,100000,4,1,4,200,Dna,dna,withoutrc,684.750000,136.495000 550 | 1000,100000,5,0,5,200,Dna,dna,withoutrc,563.045000,47.340000 551 | 1000,100000,5,1,5,200,Dna,dna,withoutrc,686.660000,174.200000 552 | 1000,200000,1,0,1,200,Dna,dna,withoutrc,1118.535000,62.030000 553 | 1000,200000,1,1,1,200,Dna,dna,withoutrc,1392.230000,154.270000 554 | 1000,200000,2,0,2,200,Dna,dna,withoutrc,1121.050000,67.600000 555 | 1000,200000,2,1,2,200,Dna,dna,withoutrc,1387.425000,159.425000 556 | 1000,200000,3,0,3,200,Dna,dna,withoutrc,1113.285000,86.760000 557 | 1000,200000,3,1,3,200,Dna,dna,withoutrc,1382.015000,177.890000 558 | 1000,200000,4,0,4,200,Dna,dna,withoutrc,1117.395000,87.775000 559 | 1000,200000,4,1,4,200,Dna,dna,withoutrc,1379.325000,183.265000 560 | 1000,200000,5,0,5,200,Dna,dna,withoutrc,1141.215000,98.605000 561 | 1000,200000,5,1,5,200,Dna,dna,withoutrc,1433.450000,195.520000 562 | -------------------------------------------------------------------------------- /benchmarks/src/crispr_bench/crispr_config.toml: -------------------------------------------------------------------------------- 1 | sassy_path = "/home/rickb/sassy/sassy-server/target/release/sassy" 2 | swofinder_path = "/home/rickb/sassy/SWOFINDER/SWOffinder" 3 | chopoff_path = "/home/rickb/sassy/CHOPOFF/CHOPOFF.jl/build/bin/CHOPOFF" 4 | target_file = "/home/rickb/sassy/GCF_000001405.40_GRCh38.p14_genomic_sample.fa" 5 | out_dir = "/home/rickb/sassy/sassy-server/bench_results" 6 | dists = [4] 7 | threads = 1 8 | chopoff_db_path = "db_path" 9 | guides_file = "/home/rickb/sassy/sassy-bench/benchmarks/src/crispr_bench/test_guide.txt" 10 | -------------------------------------------------------------------------------- /benchmarks/src/crispr_bench/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod runner; 2 | mod tools; 3 | -------------------------------------------------------------------------------- /benchmarks/src/crispr_bench/runner.rs: -------------------------------------------------------------------------------- 1 | use std::fs::{self}; 2 | use std::path::Path; 3 | use std::time::Duration; 4 | 5 | use crate::crispr_bench::tools::*; 6 | use serde::Deserialize; 7 | 8 | #[derive(Deserialize)] 9 | pub struct Config { 10 | pub sassy_path: String, 11 | pub swofinder_path: String, 12 | pub chopoff_path: String, 13 | pub target_file: String, 14 | pub out_dir: String, 15 | pub dists: Vec, 16 | pub threads: usize, 17 | pub chopoff_db_path: Option, 18 | pub guides_file: String, 19 | } 20 | 21 | fn load_config(path: &str) -> Config { 22 | let s = std::fs::read_to_string(path).expect("Failed to read config TOML"); 23 | toml::from_str(&s).expect("Failed to parse config TOML") 24 | } 25 | 26 | struct BenchmarkResult { 27 | tool_name: String, 28 | operation: String, 29 | duration: Duration, 30 | distance: usize, 31 | threads: usize, 32 | guide_seq: String, 33 | } 34 | 35 | // fn load_guide_sequences(path: &str) -> Result, String> { 36 | // File::open(path) 37 | // .map_err(|e| format!("Unable to open {}: {}", path, e)) 38 | // .and_then(|file| { 39 | // BufReader::new(file) 40 | // .lines() 41 | // .collect::>() 42 | // .map_err(|e| format!("Error reading guides: {}", e)) 43 | // }) 44 | // } 45 | 46 | fn ensure_directory_exists(path: &str) -> Result<(), String> { 47 | if !Path::new(path).exists() { 48 | fs::create_dir_all(path).map_err(|e| format!("Failed to create {}: {}", path, e))?; 49 | } 50 | Ok(()) 51 | } 52 | 53 | fn format_duration(d: Duration) -> String { 54 | let secs = d.as_secs_f64(); 55 | if secs < 60.0 { 56 | format!("{:.3} s", secs) 57 | } else { 58 | let m = (secs / 60.0).floor(); 59 | let s = secs % 60.0; 60 | format!("{} m {:.3} s", m, s) 61 | } 62 | } 63 | 64 | fn build_chopoff_db(tool: &Chopoff, cfg: &Config) -> Option { 65 | if let Some(ref db_path) = cfg.chopoff_db_path { 66 | if !Path::new(db_path).exists() { 67 | println!("Building Chopoff DB at {}...", db_path); 68 | let dur = tool 69 | .build( 70 | cfg.target_file.as_str(), 71 | db_path, 72 | cfg.dists[0], 73 | &format!("human_dist{}", cfg.dists[0]), 74 | cfg.threads, 75 | ) 76 | .expect("Build failed"); 77 | 78 | return Some(BenchmarkResult { 79 | tool_name: tool.name().into(), 80 | operation: "build".into(), 81 | duration: dur, 82 | distance: cfg.dists[0], 83 | threads: cfg.threads, 84 | guide_seq: String::new(), 85 | }); 86 | } 87 | } 88 | None 89 | } 90 | 91 | fn run_chopoff_search(tool: &Chopoff, cfg: &Config) -> Option { 92 | if let Some(ref db_path) = cfg.chopoff_db_path { 93 | println!("Running Chopoff search for {}...", cfg.guides_file); 94 | match tool.run( 95 | cfg.dists[0], // For now we just do dist loop in main, but nicer here, fixme 96 | cfg.guides_file.as_str(), 97 | db_path, 98 | &format!("{}/chopoff.txt", cfg.out_dir), 99 | cfg.threads, 100 | ) { 101 | Ok(dur) => Some(BenchmarkResult { 102 | tool_name: tool.name().into(), 103 | operation: "search".into(), 104 | duration: dur, 105 | distance: cfg.dists[0], 106 | threads: cfg.threads, 107 | guide_seq: cfg.guides_file.clone(), 108 | }), 109 | Err(e) => { 110 | eprintln!("Chopoff error: {}", e); 111 | None 112 | } 113 | } 114 | } else { 115 | eprintln!("No DB path for Chopoff, skipping"); 116 | None 117 | } 118 | } 119 | 120 | fn run_other_tool(tool: &dyn Tool, cfg: &Config) -> Option { 121 | println!("Running {} for {}...", tool.name(), cfg.guides_file); 122 | match tool.run( 123 | cfg.dists[0], 124 | cfg.guides_file.as_str(), 125 | cfg.target_file.as_str(), 126 | &format!("{}/{}.txt", cfg.out_dir, tool.name().to_lowercase()), 127 | cfg.threads, 128 | ) { 129 | Ok(dur) => Some(BenchmarkResult { 130 | tool_name: tool.name().into(), 131 | operation: "run".into(), 132 | duration: dur, 133 | distance: cfg.dists[0], 134 | threads: cfg.threads, 135 | guide_seq: cfg.guides_file.clone(), 136 | }), 137 | Err(e) => { 138 | eprintln!("{} error: {}", tool.name(), e); 139 | None 140 | } 141 | } 142 | } 143 | 144 | fn run_benchmark(tools: &[ToolVariant], cfg: &Config) -> Vec { 145 | ensure_directory_exists(&cfg.out_dir).expect("Can't create out dir"); 146 | let mut results = Vec::new(); 147 | 148 | for tool in tools { 149 | match tool { 150 | ToolVariant::Chop(chopoff) => { 151 | if let Some(r) = build_chopoff_db(chopoff, cfg) { 152 | results.push(r); 153 | } 154 | //for seq in guide_seqs { 155 | if let Some(r) = run_chopoff_search(chopoff, cfg) { 156 | results.push(r); 157 | } 158 | //} 159 | } 160 | 161 | ToolVariant::Sassy(sassy) => { 162 | if let Some(r) = run_other_tool(sassy, cfg) { 163 | results.push(r); 164 | } 165 | } 166 | 167 | ToolVariant::Swo(swo) => { 168 | if let Some(r) = run_other_tool(swo, cfg) { 169 | results.push(r); 170 | } 171 | } 172 | } 173 | } 174 | results 175 | } 176 | 177 | fn print_results(results: &[BenchmarkResult]) { 178 | println!("\n=== Results ==="); 179 | println!("Tool Op Dist Th Seq Time"); 180 | for r in results { 181 | println!( 182 | "{}\t{}\t{}\t{}\t{}\t{}", 183 | r.tool_name, 184 | r.operation, 185 | r.distance, 186 | r.threads, 187 | r.guide_seq, 188 | format_duration(r.duration) 189 | ); 190 | } 191 | } 192 | 193 | pub fn run(config_path: &str) { 194 | let cfg = load_config(config_path); 195 | let tools = vec![ 196 | ToolVariant::Sassy(SassyTool::new(&cfg.sassy_path)), 197 | ToolVariant::Swo(Swofinder::new(&cfg.swofinder_path)), 198 | ToolVariant::Chop(Chopoff::new(&cfg.chopoff_path)), 199 | ]; 200 | let mut all_results = Vec::new(); 201 | for &dist in &cfg.dists { 202 | // Generate a unique DB path for each distance 203 | let db_path = format!( 204 | "{}_dist{}.db", 205 | cfg.chopoff_db_path.as_deref().unwrap_or("chopoff_db"), 206 | dist 207 | ); 208 | let bench_cfg = Config { 209 | target_file: cfg.target_file.clone(), 210 | out_dir: cfg.out_dir.clone(), 211 | dists: vec![dist], 212 | threads: cfg.threads, 213 | chopoff_db_path: Some(db_path), 214 | sassy_path: cfg.sassy_path.clone(), 215 | swofinder_path: cfg.swofinder_path.clone(), 216 | chopoff_path: cfg.chopoff_path.clone(), 217 | guides_file: cfg.guides_file.clone(), 218 | }; 219 | let results = run_benchmark(&tools, &bench_cfg); 220 | all_results.extend(results); 221 | } 222 | print_results(&all_results); 223 | } 224 | -------------------------------------------------------------------------------- /benchmarks/src/crispr_bench/tools.rs: -------------------------------------------------------------------------------- 1 | #![allow(unused)] 2 | use std::fs::File; 3 | use std::io::{BufRead, BufReader, Write}; 4 | use std::process::{Command, Stdio}; 5 | use std::time::{Duration, Instant}; 6 | 7 | // Enum to hold all tool types 8 | pub enum ToolVariant { 9 | Sassy(SassyTool), 10 | Swo(Swofinder), 11 | Chop(Chopoff), 12 | } 13 | 14 | pub enum Strand { 15 | Fwd, 16 | Rc, 17 | } 18 | pub struct Match { 19 | start: usize, 20 | end: usize, 21 | strand: Strand, 22 | } 23 | 24 | // Object safe-trait 25 | pub trait Tool { 26 | fn name(&self) -> &str; 27 | 28 | /// Runs the tool with whatever arguments are needed. 29 | /// Returns the duration of just the command execution 30 | fn run( 31 | &self, 32 | dist: usize, 33 | guide_seq: &str, 34 | target_file_path: &str, 35 | out_path: &str, 36 | threads: usize, 37 | ) -> Result; 38 | 39 | /// Parses the output by returning a vector of Match objects 40 | fn parse_output(&self, out_path: &str) -> Result, String>; 41 | } 42 | 43 | impl Tool for ToolVariant { 44 | fn name(&self) -> &str { 45 | match self { 46 | ToolVariant::Sassy(t) => t.name(), 47 | ToolVariant::Swo(t) => t.name(), 48 | ToolVariant::Chop(t) => t.name(), 49 | } 50 | } 51 | 52 | fn run( 53 | &self, 54 | dist: usize, 55 | guide_seq: &str, 56 | target_file_path: &str, 57 | out_path: &str, 58 | threads: usize, 59 | ) -> Result { 60 | match self { 61 | ToolVariant::Sassy(t) => t.run(dist, guide_seq, target_file_path, out_path, threads), 62 | ToolVariant::Swo(t) => t.run(dist, guide_seq, target_file_path, out_path, threads), 63 | ToolVariant::Chop(t) => t.run(dist, guide_seq, target_file_path, out_path, threads), 64 | } 65 | } 66 | 67 | fn parse_output(&self, out_path: &str) -> Result, String> { 68 | match self { 69 | ToolVariant::Sassy(t) => t.parse_output(out_path), 70 | ToolVariant::Swo(t) => t.parse_output(out_path), 71 | ToolVariant::Chop(t) => t.parse_output(out_path), 72 | } 73 | } 74 | } 75 | 76 | /* 77 | SassyTool - just to not be confused with "sassy" code 78 | */ 79 | 80 | pub struct SassyTool { 81 | exec_path: String, 82 | } 83 | 84 | impl SassyTool { 85 | pub fn new(exec_path: &str) -> Self { 86 | Self { 87 | exec_path: exec_path.to_string(), 88 | } 89 | } 90 | } 91 | 92 | impl Tool for SassyTool { 93 | fn name(&self) -> &str { 94 | "SassyTool" 95 | } 96 | 97 | fn run( 98 | &self, 99 | dist: usize, 100 | guide_file_path: &str, 101 | target_file_path: &str, 102 | out_path: &str, 103 | threads: usize, 104 | ) -> Result { 105 | let args = vec![ 106 | "crispr".to_string(), 107 | "-g".to_string(), 108 | guide_file_path.to_string(), 109 | "-k".to_string(), 110 | dist.to_string(), 111 | "-t".to_string(), 112 | target_file_path.to_string(), 113 | "--rc".to_string(), 114 | "-o".to_string(), 115 | out_path.to_string(), 116 | "--exact-suffix".to_string(), 117 | "3".to_string(), 118 | "-j".to_string(), 119 | threads.to_string(), 120 | "-n".to_string(), 121 | "0.0".to_string(), 122 | ]; 123 | 124 | // Execute the command and time just the execution 125 | let start = Instant::now(); 126 | let mut child = Command::new(&self.exec_path) 127 | .args(&args) 128 | .stdout(Stdio::piped()) // Stream output 129 | .stderr(Stdio::piped()) // Stream errors 130 | .spawn() 131 | .map_err(|e| format!("Failed to spawn SassyTool process: {}", e))?; 132 | 133 | // Process stdout and stderr in real-time if needed 134 | if let Some(stdout) = child.stdout.take() { 135 | let stdout_reader = BufReader::new(stdout); 136 | for line in stdout_reader.lines() { 137 | if let Ok(line) = line { 138 | println!("SassyTool stdout: {}", line); 139 | } 140 | } 141 | } 142 | 143 | if let Some(stderr) = child.stderr.take() { 144 | let stderr_reader = BufReader::new(stderr); 145 | for line in stderr_reader.lines() { 146 | if let Ok(line) = line { 147 | eprintln!("SassyTool stderr: {}", line); 148 | } 149 | } 150 | } 151 | 152 | let status = child 153 | .wait() 154 | .map_err(|e| format!("Failed to wait on SassyTool process: {}", e))?; 155 | let duration = start.elapsed(); 156 | 157 | if status.success() { 158 | Ok(duration) 159 | } else { 160 | Err(format!( 161 | "SassyTool command failed with exit code: {:?}", 162 | status.code() 163 | )) 164 | } 165 | } 166 | 167 | fn parse_output(&self, _out_path: &str) -> Result, String> { 168 | Ok(vec![]) 169 | } 170 | } 171 | 172 | /* 173 | Swofinder 174 | */ 175 | 176 | pub struct Swofinder { 177 | exec_path: String, 178 | } 179 | 180 | impl Swofinder { 181 | pub fn new(exec_path: &str) -> Self { 182 | Self { 183 | exec_path: exec_path.to_string(), 184 | } 185 | } 186 | } 187 | 188 | impl Tool for Swofinder { 189 | fn name(&self) -> &str { 190 | "Swofinder" 191 | } 192 | 193 | fn run( 194 | &self, 195 | dist: usize, 196 | guide_file_path: &str, 197 | target_file_path: &str, 198 | out_path: &str, 199 | threads: usize, 200 | ) -> Result { 201 | // Use the directory containing the Java classes as the working directory 202 | let working_dir = std::path::PathBuf::from(self.exec_path.clone()); 203 | 204 | // Copy the guide file to sgRNAs.txt in the working directory 205 | let dest_sg_rnas_path = working_dir.join("sgRNAs.txt"); 206 | std::fs::copy(guide_file_path, &dest_sg_rnas_path) 207 | .map_err(|e| format!("Failed to copy guide file: {}", e))?; 208 | 209 | let args = vec![ 210 | "-cp".to_string(), 211 | "bin".to_string(), 212 | "SmithWatermanOffTarget.SmithWatermanOffTargetSearchAlign".to_string(), 213 | target_file_path.to_string(), 214 | "sgRNAs.txt".to_string(), 215 | out_path.to_string(), 216 | dist.to_string(), 217 | dist.to_string(), 218 | dist.to_string(), 219 | dist.to_string(), 220 | dist.to_string(), 221 | "false".to_string(), 222 | "0".to_string(), 223 | "NGG".to_string(), 224 | "false".to_string(), 225 | ]; 226 | 227 | // Execute the command and time just the execution 228 | let start = Instant::now(); 229 | let mut child = Command::new("java") 230 | .args(&args) 231 | .current_dir(working_dir) 232 | .stdout(Stdio::piped()) // Changed from null to piped 233 | .stderr(Stdio::piped()) // Changed from null to piped 234 | .spawn() 235 | .map_err(|e| format!("Failed to spawn Swofinder process: {}", e))?; 236 | 237 | // Stream stdout 238 | if let Some(stdout) = child.stdout.take() { 239 | let reader = BufReader::new(stdout); 240 | for line in reader.lines() { 241 | if let Ok(line) = line { 242 | println!("Swofinder stdout: {}", line); 243 | } 244 | } 245 | } 246 | 247 | // Stream stderr 248 | if let Some(stderr) = child.stderr.take() { 249 | let reader = BufReader::new(stderr); 250 | for line in reader.lines() { 251 | if let Ok(line) = line { 252 | eprintln!("Swofinder stderr: {}", line); 253 | } 254 | } 255 | } 256 | 257 | let status = child 258 | .wait() 259 | .map_err(|e| format!("Failed to wait on Swofinder process: {}", e))?; 260 | let duration = start.elapsed(); 261 | 262 | // Clean up temporary file 263 | if let Err(e) = std::fs::remove_file(dest_sg_rnas_path) { 264 | eprintln!("Warning: Failed to remove temporary file sgRNAs.txt: {}", e); 265 | } 266 | 267 | if status.success() { 268 | Ok(duration) 269 | } else { 270 | Err(format!( 271 | "Swofinder command failed with exit code: {:?}", 272 | status.code() 273 | )) 274 | } 275 | } 276 | 277 | fn parse_output(&self, out_path: &str) -> Result, String> { 278 | Ok(vec![]) 279 | } 280 | } 281 | 282 | /* 283 | Chopoff 284 | */ 285 | 286 | pub struct Chopoff { 287 | exec_path: String, 288 | } 289 | 290 | impl Chopoff { 291 | pub fn new(exec_path: &str) -> Self { 292 | Self { 293 | exec_path: exec_path.to_string(), 294 | } 295 | } 296 | 297 | // New build method specific to Chopoff 298 | pub fn build( 299 | &self, 300 | target_file_path: &str, 301 | out_dir: &str, 302 | distance: usize, 303 | db_name: &str, 304 | threads: usize, 305 | ) -> Result { 306 | // Set JULIA_NUM_THREADS environment variable 307 | unsafe { std::env::set_var("JULIA_NUM_THREADS", threads.to_string()) }; 308 | 309 | let args = vec![ 310 | "build".to_string(), 311 | "--name".to_string(), 312 | format!("Cas9_{}", db_name), 313 | "--genome".to_string(), 314 | target_file_path.to_string(), 315 | "-o".to_string(), 316 | out_dir.to_string(), 317 | "--distance".to_string(), 318 | distance.to_string(), 319 | "--motif".to_string(), 320 | "Cas9".to_string(), 321 | "prefixHashDB".to_string(), 322 | ]; 323 | 324 | // Execute the command and time just the execution 325 | let start = Instant::now(); 326 | let mut child = Command::new(&self.exec_path) 327 | .args(&args) 328 | .stdout(Stdio::piped()) // Changed from null to piped 329 | .stderr(Stdio::piped()) // Changed from null to piped 330 | .spawn() 331 | .map_err(|e| format!("Failed to spawn Chopoff build process: {}", e))?; 332 | 333 | // Stream stdout 334 | if let Some(stdout) = child.stdout.take() { 335 | let reader = BufReader::new(stdout); 336 | for line in reader.lines() { 337 | if let Ok(line) = line { 338 | println!("Chopoff build stdout: {}", line); 339 | } 340 | } 341 | } 342 | 343 | // Stream stderr 344 | if let Some(stderr) = child.stderr.take() { 345 | let reader = BufReader::new(stderr); 346 | for line in reader.lines() { 347 | if let Ok(line) = line { 348 | eprintln!("Chopoff build stderr: {}", line); 349 | } 350 | } 351 | } 352 | 353 | let status = child 354 | .wait() 355 | .map_err(|e| format!("Failed to wait on Chopoff build process: {}", e))?; 356 | let duration = start.elapsed(); 357 | 358 | if status.success() { 359 | Ok(duration) 360 | } else { 361 | Err(format!( 362 | "Chopoff build command failed with exit code: {:?}", 363 | status.code() 364 | )) 365 | } 366 | } 367 | } 368 | 369 | impl Tool for Chopoff { 370 | fn name(&self) -> &str { 371 | "Chopoff" 372 | } 373 | 374 | fn run( 375 | &self, 376 | dist: usize, 377 | guide_file_path: &str, 378 | target_file_path: &str, 379 | out_path: &str, 380 | threads: usize, 381 | ) -> Result { 382 | // Create temporary guides file 383 | let tmp_guides_path = "tmp_guides.txt"; 384 | let mut output_file = File::create(tmp_guides_path) 385 | .map_err(|e| format!("Failed to create guides file: {}", e))?; 386 | 387 | // Read the guide file line by line 388 | let guide_file = 389 | File::open(guide_file_path).map_err(|e| format!("Failed to open guide file: {}", e))?; 390 | let reader = BufReader::new(guide_file); 391 | 392 | // Process each line 393 | for line in reader.lines() { 394 | let line = line.map_err(|e| format!("Failed to read line: {}", e))?; 395 | if line.len() >= 3 { 396 | // Strip last 3 characters and write to output file 397 | let stripped = &line[..line.len() - 3]; 398 | writeln!(output_file, "{}", stripped) 399 | .map_err(|e| format!("Failed to write to guides file: {}", e))?; 400 | } 401 | } 402 | 403 | // Set JULIA_NUM_THREADS environment variable 404 | unsafe { std::env::set_var("JULIA_NUM_THREADS", threads.to_string()) }; 405 | 406 | let args = vec![ 407 | "search".to_string(), 408 | "--database".to_string(), 409 | target_file_path.to_string(), 410 | "--guides".to_string(), 411 | tmp_guides_path.to_string(), 412 | "--output".to_string(), 413 | out_path.to_string(), 414 | "--distance".to_string(), 415 | dist.to_string(), 416 | "prefixHashDB".to_string(), 417 | ]; 418 | 419 | // Execute the command and stream output 420 | let start = Instant::now(); 421 | let mut child = Command::new(&self.exec_path) 422 | .args(&args) 423 | .stdout(Stdio::piped()) // Pipe stdout 424 | .stderr(Stdio::piped()) // Pipe stderr 425 | .spawn() 426 | .map_err(|e| format!("Failed to spawn Chopoff search process: {}", e))?; 427 | 428 | // Stream stdout 429 | if let Some(stdout) = child.stdout.take() { 430 | let reader = BufReader::new(stdout); 431 | for line in reader.lines() { 432 | if let Ok(line) = line { 433 | println!("Chopoff stdout: {}", line); 434 | } 435 | } 436 | } 437 | 438 | // Stream stderr 439 | if let Some(stderr) = child.stderr.take() { 440 | let reader = BufReader::new(stderr); 441 | for line in reader.lines() { 442 | if let Ok(line) = line { 443 | eprintln!("Chopoff stderr: {}", line); 444 | } 445 | } 446 | } 447 | 448 | let status = child 449 | .wait() 450 | .map_err(|e| format!("Failed to wait on Chopoff search process: {}", e))?; 451 | let duration = start.elapsed(); 452 | 453 | // Clean up temporary file 454 | if let Err(e) = std::fs::remove_file(tmp_guides_path) { 455 | eprintln!( 456 | "Warning: Failed to remove temporary file {}: {}", 457 | tmp_guides_path, e 458 | ); 459 | } 460 | 461 | if status.success() { 462 | Ok(duration) 463 | } else { 464 | Err(format!( 465 | "Chopoff search command failed with exit code: {:?}", 466 | status.code() 467 | )) 468 | } 469 | } 470 | 471 | fn parse_output(&self, out_path: &str) -> Result, String> { 472 | // Simplified for this example 473 | Ok(vec![]) 474 | } 475 | } 476 | -------------------------------------------------------------------------------- /benchmarks/src/edlib_bench/edlib.rs: -------------------------------------------------------------------------------- 1 | use crate::edlib_bench::sim_data::Alphabet; 2 | use ::std::os::raw::c_char; 3 | use edlib_rs::edlib_sys::*; 4 | use edlib_rs::*; 5 | use once_cell::sync::Lazy; 6 | use sassy::profiles::*; 7 | 8 | static EQUALITY_PAIRS: Lazy> = Lazy::new(build_equality_pairs); 9 | 10 | pub fn get_edlib_config(k: i32, alphabet: &Alphabet) -> EdlibAlignConfigRs<'static> { 11 | let mut config = EdlibAlignConfigRs::default(); 12 | config.mode = EdlibAlignModeRs::EDLIB_MODE_HW; 13 | if alphabet == &Alphabet::Iupac { 14 | println!("[EDLIB] Added iupac alphabet"); 15 | config.additionalequalities = &EQUALITY_PAIRS; 16 | } 17 | config.k = k; 18 | config.task = EdlibAlignTaskRs::EDLIB_TASK_PATH; 19 | config 20 | } 21 | 22 | fn build_equality_pairs() -> Vec { 23 | let codes = b"ACGTURYSWKMBDHVNX"; 24 | let mut pairs = Vec::new(); 25 | for &a in codes.iter() { 26 | for &b in codes.iter() { 27 | if Iupac::is_match(a, b) { 28 | // both upper 29 | pairs.push(EdlibEqualityPairRs { 30 | first: a as c_char, 31 | second: b as c_char, 32 | }); 33 | // both lower 34 | pairs.push(EdlibEqualityPairRs { 35 | first: a.to_ascii_lowercase() as c_char, 36 | second: b.to_ascii_lowercase() as c_char, 37 | }); 38 | // first upper, second lower 39 | pairs.push(EdlibEqualityPairRs { 40 | first: a.to_ascii_lowercase() as c_char, 41 | second: b as c_char, 42 | }); 43 | // first lower, second upper 44 | pairs.push(EdlibEqualityPairRs { 45 | first: a as c_char, 46 | second: b.to_ascii_lowercase() as c_char, 47 | }); 48 | } 49 | } 50 | } 51 | pairs 52 | } 53 | 54 | pub fn run_edlib( 55 | query: &[u8], 56 | target: &[u8], 57 | edlib_config: &EdlibAlignConfigRs, 58 | ) -> EdlibAlignResultRs { 59 | let edlib_result = edlibAlignRs(query, target, edlib_config); 60 | assert_eq!(edlib_result.status, EDLIB_STATUS_OK); 61 | edlib_result 62 | } 63 | 64 | #[cfg(test)] 65 | mod tests { 66 | 67 | use super::*; 68 | 69 | #[test] 70 | fn test_edlib_wrapper() { 71 | let query = b"ATG"; 72 | let target = b"CCCATGCCC"; 73 | let config = get_edlib_config(1, &Alphabet::Dna); 74 | let r = run_edlib(query, target, &config); 75 | assert_eq!(r.editDistance, 0); 76 | } 77 | 78 | #[test] 79 | fn test_edlib_iupac() { 80 | let query = b"NTG"; 81 | let target = b"CCCATGCCC"; 82 | let dna_config = get_edlib_config(1, &Alphabet::Dna); 83 | let iupac_config = get_edlib_config(1, &Alphabet::Iupac); 84 | let dna_res = run_edlib(query, target, &dna_config); 85 | let iupac_res = run_edlib(query, target, &iupac_config); 86 | assert_eq!(dna_res.editDistance, 1); 87 | assert_eq!(iupac_res.editDistance, 0); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /benchmarks/src/edlib_bench/grid.rs: -------------------------------------------------------------------------------- 1 | use crate::edlib_bench::sim_data::Alphabet; 2 | use serde::Deserialize; 3 | use std::fs; 4 | 5 | #[derive(Debug, Deserialize)] 6 | pub struct GridConfig { 7 | pub query_lengths: Vec, 8 | pub text_lengths: Vec, 9 | pub k: Vec, 10 | pub match_fraction: Vec, 11 | pub bench_iter: Vec, 12 | pub alphabet: Vec, 13 | pub profile: Vec, 14 | pub rc: Vec, 15 | pub edlib: bool, 16 | pub verbose: bool, 17 | } 18 | 19 | #[derive(Clone, Debug)] 20 | pub struct ParamSet<'a> { 21 | pub query_length: usize, 22 | pub text_length: usize, 23 | pub k: usize, 24 | pub match_fraction: f64, 25 | pub max_edits: usize, 26 | pub bench_iter: usize, 27 | pub alphabet: Alphabet, 28 | pub profile: &'a str, 29 | pub rc: &'a str, 30 | pub edlib: bool, 31 | pub verbose: bool, 32 | } 33 | 34 | impl GridConfig { 35 | /// Returns an iterator over all parameter combinations as tuples. 36 | pub fn all_combinations<'a>(&'a self) -> impl Iterator> + 'a { 37 | self.query_lengths.iter().flat_map(move |&ql| { 38 | self.text_lengths.iter().flat_map(move |&tl| { 39 | self.k.iter().flat_map(move |&k| { 40 | self.match_fraction.iter().flat_map(move |&mf| { 41 | self.bench_iter.iter().flat_map(move |&bi| { 42 | self.alphabet.iter().flat_map(move |&a| { 43 | self.profile 44 | .iter() 45 | .flat_map(move |p| { 46 | let k = if k < 1.0 { 47 | (k * ql as f32).round() as usize 48 | } else { 49 | k as usize 50 | }; 51 | // Only allow matching profile/alphabet pairs 52 | self.rc.iter().map(move |rc| ParamSet { 53 | query_length: ql, 54 | text_length: tl, 55 | k, 56 | match_fraction: mf, 57 | max_edits: k, 58 | bench_iter: bi, 59 | alphabet: a, 60 | profile: p.as_str(), 61 | rc: rc.as_str(), 62 | edlib: self.edlib, 63 | verbose: self.verbose, 64 | }) 65 | }) 66 | .filter(|param| { 67 | (param.alphabet == Alphabet::Dna && param.profile == "dna") 68 | || (param.alphabet == Alphabet::Dna 69 | && param.profile == "iupac") 70 | || (param.alphabet == Alphabet::Iupac 71 | && param.profile == "iupac") 72 | || (param.alphabet == Alphabet::Ascii 73 | && param.profile == "ascii") 74 | }) 75 | }) 76 | }) 77 | }) 78 | }) 79 | }) 80 | }) 81 | } 82 | } 83 | 84 | pub fn read_grid(path: &str) -> Result> { 85 | let toml_str = fs::read_to_string(path)?; 86 | let config: GridConfig = toml::from_str(&toml_str)?; 87 | Ok(config) 88 | } 89 | -------------------------------------------------------------------------------- /benchmarks/src/edlib_bench/grid_config.toml: -------------------------------------------------------------------------------- 1 | query_lengths = [20, 40, 80, 100, 200, 500, 1000] 2 | text_lengths = [1000, 2000, 5000, 10_000, 20_000, 50_000, 100_000, 200_000] 3 | k = [1, 2, 3, 4, 5, 10, 15] 4 | match_fraction = [0, 1] 5 | bench_iter = [200] 6 | alphabet = ["dna"] 7 | profile = ["dna"] 8 | rc = ["withoutrc"] 9 | verbose = false 10 | edlib = true 11 | -------------------------------------------------------------------------------- /benchmarks/src/edlib_bench/mod.rs: -------------------------------------------------------------------------------- 1 | mod edlib; 2 | mod grid; 3 | pub mod runner; 4 | mod sim_data; 5 | -------------------------------------------------------------------------------- /benchmarks/src/edlib_bench/runner.rs: -------------------------------------------------------------------------------- 1 | use crate::edlib_bench::edlib::*; 2 | use crate::edlib_bench::grid::*; 3 | use crate::edlib_bench::sim_data::*; 4 | use sassy::search::{Match, Searcher}; 5 | use std::fs::OpenOptions; 6 | use std::io::{BufWriter, Write}; 7 | 8 | macro_rules! time_it { 9 | ($label:expr, $expr:expr, $iters:expr) => {{ 10 | let label = $label; 11 | let mut times = Vec::with_capacity($iters); 12 | let mut result = None; 13 | for _ in 0..$iters { 14 | let start = std::time::Instant::now(); 15 | // https://doc.rust-lang.org/std/hint/fn.black_box.html 16 | let r = std::hint::black_box($expr); 17 | let elapsed = start.elapsed(); 18 | times.push(elapsed.as_micros() as f64); 19 | result = Some(r); 20 | } 21 | let mean = times.iter().sum::() / times.len() as f64; 22 | eprintln!("{label:>10} : {:.3}ms", mean / 1000.0); 23 | (result.unwrap(), mean) 24 | }}; 25 | } 26 | 27 | pub fn run(grid_config: &str) { 28 | // Read grid config file for benching 29 | let grid = read_grid(grid_config).expect("Invalid grid config"); 30 | 31 | // Open output file and write header 32 | let file = OpenOptions::new() 33 | .create(true) 34 | .write(true) 35 | .truncate(true) 36 | .open("results.csv") 37 | .expect("Unable to open results.csv"); 38 | let mut writer = BufWriter::new(file); 39 | 40 | // Write header 41 | writeln!( 42 | writer, 43 | "query_length,text_length,k,match_fraction,max_edits,bench_iter,alphabet,profile,rc,edlib_ms,sassy_ms" 44 | ).unwrap(); 45 | 46 | // Get combinations 47 | for param_set in grid.all_combinations() { 48 | println!("Param set: {:?}", param_set); 49 | 50 | // Aboslute number of matches 51 | // let num_matches = (param_set.match_fraction * param_set.text_length as f64) as usize; 52 | let num_matches = param_set.match_fraction as usize; 53 | 54 | // Generating random data 55 | let (q, t, _locs) = generate_query_and_text_with_matches( 56 | param_set.query_length, 57 | param_set.text_length, 58 | num_matches, 59 | param_set.max_edits, 60 | param_set.max_edits, 61 | ¶m_set.alphabet, 62 | ); 63 | 64 | // Number of bench iterations; 65 | let bench_iter = param_set.bench_iter; 66 | 67 | // K always at 10% of query length 68 | // let k = (param_set.query_length as f64 * 0.1) as usize; 69 | let k = param_set.max_edits; 70 | // Running Edlib 71 | let (edlib_matches, edlib_mean_ms) = if param_set.edlib { 72 | let edlib_config = get_edlib_config(k as i32, ¶m_set.alphabet); 73 | let (r, ms) = time_it!("edlib", run_edlib(&q, &t, &edlib_config), bench_iter); 74 | let edlib_matches = r.startLocations.unwrap_or(vec![]); 75 | (edlib_matches, ms) 76 | } else { 77 | (vec![], 0.0) 78 | }; 79 | 80 | // Get the correct search function (not timed) 81 | let mut search_fn = get_search_fn(¶m_set); 82 | 83 | // Now time the search 84 | let (sassy_matches, sassy_mean_ms) = time_it!("sassy", search_fn(&q, &t, k), bench_iter); 85 | 86 | if param_set.edlib { 87 | println!("Edlib matches: {:?}", edlib_matches.len()); 88 | } 89 | println!("Sassy matches: {:?}", sassy_matches.len()); 90 | 91 | if param_set.verbose { 92 | println!("Edlib matches"); 93 | for loc in edlib_matches { 94 | println!("{}", loc); 95 | } 96 | println!("Sassy matches"); 97 | for loc in sassy_matches { 98 | println!("{:?}", loc); 99 | } 100 | } 101 | 102 | // Write row to CSV 103 | writeln!( 104 | writer, 105 | "{},{},{},{},{},{},{},{},{},{:.6},{:.6}", 106 | param_set.query_length, 107 | param_set.text_length, 108 | param_set.k, 109 | param_set.match_fraction, 110 | param_set.max_edits, 111 | param_set.bench_iter, 112 | format!("{:?}", param_set.alphabet), 113 | param_set.profile, 114 | param_set.rc, 115 | edlib_mean_ms, 116 | sassy_mean_ms 117 | ) 118 | .unwrap(); 119 | } 120 | // Ensure all data is written 121 | writer.flush().unwrap(); 122 | } 123 | 124 | type SearchFn = Box Vec>; 125 | 126 | fn get_search_fn(param_set: &ParamSet) -> SearchFn { 127 | let rc = match param_set.rc { 128 | "withrc" => true, 129 | "withoutrc" => false, 130 | x => panic!("Unsupported rc config: {x}"), 131 | }; 132 | match param_set.profile { 133 | // IUPAC profile 134 | "iupac" => { 135 | let mut searcher = Searcher::::new(rc); 136 | Box::new(move |q, t, k| searcher.search(&q, &t, k)) 137 | } 138 | 139 | // DNA profile 140 | "dna" => { 141 | let mut searcher = Searcher::::new(rc); 142 | Box::new(move |q, t, k| searcher.search(&q, &t, k)) 143 | } 144 | 145 | // ASCII profile 146 | "ascii" => { 147 | let mut searcher = Searcher::::new(rc); 148 | Box::new(move |q, t, k| searcher.search(&q, &t, k)) 149 | } 150 | 151 | _ => panic!( 152 | "Unsupported combination: {:?} {:?} {:?}", 153 | param_set.profile, param_set.rc, param_set.alphabet 154 | ), 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /benchmarks/src/edlib_bench/sim_data.rs: -------------------------------------------------------------------------------- 1 | use rand::Rng; 2 | use serde::{Deserialize, Serialize}; 3 | 4 | #[derive(Copy, Clone, Debug, PartialEq, Deserialize, Serialize)] 5 | #[serde(rename_all = "lowercase")] 6 | pub enum Alphabet { 7 | Dna, 8 | Iupac, 9 | Ascii, 10 | } 11 | 12 | /// Generate random data with inserted target matches 13 | pub fn generate_query_and_text_with_matches( 14 | ql: usize, 15 | tl: usize, 16 | num: usize, 17 | min_edits: usize, 18 | max_edits: usize, 19 | alphabet: &Alphabet, 20 | ) -> (Vec, Vec, Vec<(usize, usize)>) { 21 | let mut rng = rand::rng(); 22 | let query = generate_random_sequence(ql, alphabet); 23 | let mut text = generate_random_sequence(tl, alphabet); 24 | let mut locs = Vec::new(); 25 | for _ in 0..num { 26 | let m = mutate_sequence(&query, min_edits, max_edits); 27 | if m.len() > text.len() { 28 | continue; 29 | } 30 | let max_start = text.len() - m.len(); 31 | for _ in 0..10 { 32 | let start = rng.random_range(0..=max_start); 33 | let end = start + m.len(); 34 | if locs.iter().all(|&(s, e)| end <= s || start >= e) { 35 | text.splice(start..end, m.iter().cloned()); 36 | locs.push((start, end)); 37 | break; 38 | } 39 | } 40 | } 41 | (query, text, locs) 42 | } 43 | 44 | /// Generate random dna sequence of length "length" with alphabet 45 | fn generate_random_sequence(length: usize, alphabet: &Alphabet) -> Vec { 46 | let mut rng = rand::rng(); 47 | match alphabet { 48 | Alphabet::Dna => (0..length) 49 | .map(|_| b"ACGT"[rng.random_range(0..4)]) 50 | .collect(), 51 | Alphabet::Iupac => (0..length) 52 | .map(|_| b"ACGTURYSWKMBDHVNX"[rng.random_range(0..16)]) 53 | .collect(), 54 | Alphabet::Ascii => (0..length) 55 | .map(|_| rng.random_range(0..256) as u8) 56 | .collect(), 57 | } 58 | } 59 | 60 | /// Mutate sequence with at most max_edits 61 | fn mutate_sequence(sequence: &[u8], min_edits: usize, max_edits: usize) -> Vec { 62 | let mut rng = rand::rng(); 63 | let mut seq = sequence.to_vec(); 64 | for _ in 0..rng.random_range(min_edits..=max_edits) { 65 | let idx = rng.random_range(0..seq.len()); 66 | match rng.random_range(0..3) { 67 | 0 => { 68 | let current = seq[idx]; 69 | let mut new_char; 70 | // Keep trying until we get a different character 71 | loop { 72 | new_char = b"ACGT"[rng.random_range(0..4)]; 73 | if new_char != current { 74 | break; 75 | } 76 | } 77 | seq[idx] = new_char; 78 | } 79 | 1 if seq.len() > 1 => { 80 | seq.remove(idx); 81 | } 82 | 2 => seq.insert(idx, b"ACGT"[rng.random_range(0..4)]), 83 | _ => {} 84 | } 85 | } 86 | seq 87 | } 88 | 89 | #[cfg(test)] 90 | mod test { 91 | 92 | use super::*; 93 | 94 | fn naive_edit_dist(q: &[u8], t: &[u8]) -> usize { 95 | let m = q.len(); 96 | let n = t.len(); 97 | 98 | // Create a matrix to store the edit distances 99 | let mut dp = vec![vec![0; n + 1]; m + 1]; 100 | 101 | // Initialize the first row and column 102 | for i in 0..=m { 103 | dp[i][0] = i; 104 | } 105 | for j in 0..=n { 106 | dp[0][j] = j; 107 | } 108 | 109 | // Fill the dp matrix 110 | for i in 1..=m { 111 | for j in 1..=n { 112 | let cost = if q[i - 1] == t[j - 1] { 0 } else { 1 }; 113 | dp[i][j] = (dp[i - 1][j] + 1) // deletion 114 | .min(dp[i][j - 1] + 1) // insertion 115 | .min(dp[i - 1][j - 1] + cost); // substitution 116 | } 117 | } 118 | 119 | // Return the edit distance 120 | dp[m][n] 121 | } 122 | 123 | #[test] 124 | fn test_random_data_single_match_no_edits() { 125 | let (q, t, locs) = generate_query_and_text_with_matches(10, 100, 1, 0, 0, &Alphabet::Dna); 126 | let (s, e) = locs[0]; 127 | assert_eq!(q, t[s..e]); 128 | } 129 | 130 | #[test] 131 | fn test_random_data_single_match_1_edit() { 132 | let (q, t, locs) = generate_query_and_text_with_matches(10, 100, 1, 1, 1, &Alphabet::Dna); 133 | let (s, e) = locs[0]; 134 | assert_ne!(q, t[s..e]); 135 | // Get actual edits using edlib wrapper 136 | let e = naive_edit_dist(&q, &t[s..e]); 137 | assert_eq!(e, 1); 138 | } 139 | 140 | #[test] 141 | fn test_random_two_matches() { 142 | let (q, t, locs) = generate_query_and_text_with_matches(10, 100, 2, 1, 1, &Alphabet::Dna); 143 | assert_eq!(locs.len(), 2); 144 | for loc in locs { 145 | let (s, e) = loc; 146 | let e = naive_edit_dist(&q, &t[s..e]); 147 | assert_eq!(e, 1); 148 | } 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /benchmarks/src/main.rs: -------------------------------------------------------------------------------- 1 | use clap::{Parser, Subcommand}; 2 | 3 | mod edlib_bench; 4 | use edlib_bench::runner as edlib_runner; 5 | 6 | mod crispr_bench; 7 | use crispr_bench::runner as crispr_runner; 8 | 9 | #[derive(Parser)] 10 | #[command(author, version, about)] 11 | struct Args { 12 | #[command(subcommand)] 13 | command: Commands, 14 | } 15 | 16 | #[derive(Subcommand)] 17 | enum Commands { 18 | /// Run the edlib grid benchmark 19 | Edlib { 20 | /// Path to the grid config TOML file 21 | #[arg(long)] 22 | config: String, 23 | }, 24 | /// Run the CRISPR benchmark 25 | Crispr { 26 | /// Path to the CRISPR config TOML file 27 | #[arg(long)] 28 | config: String, 29 | }, 30 | } 31 | 32 | fn main() { 33 | let args = Args::parse(); 34 | match args.command { 35 | Commands::Edlib { config } => { 36 | println!("Running edlib grid"); 37 | edlib_runner::run(&config); 38 | } 39 | Commands::Crispr { config } => { 40 | println!("Running CRISPR benchmark"); 41 | crispr_runner::run(&config); 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /bin/crispr.rs: -------------------------------------------------------------------------------- 1 | use sassy::search::Match; 2 | use sassy::{ 3 | profiles::Iupac, profiles::Profile, search::Searcher, search::StaticText, search::Strand, 4 | }; 5 | use pa_types::CigarOp; 6 | use std::fs::File; 7 | use std::io::{BufRead, BufReader}; 8 | use std::sync::Arc; 9 | use std::sync::atomic::{AtomicUsize, Ordering}; 10 | use std::time::Instant; 11 | use std::{ 12 | io::{BufWriter, Write}, 13 | path::PathBuf, 14 | sync::Mutex, 15 | }; 16 | 17 | #[derive(clap::Parser)] 18 | pub struct CrisprArgs { 19 | /// Path to file with guide sequences (including PAM) 20 | #[arg(long, short = 'g')] 21 | guide: String, 22 | 23 | /// Report matches up to (and including) this distance threshold (excluding PAM). 24 | #[arg(long, short = 'k')] 25 | k: usize, 26 | 27 | /// Fasta file to search. May be gzipped. 28 | #[arg(long, short = 't')] 29 | target: PathBuf, 30 | 31 | /// Require the first N bases of the guide to be exact matches. 32 | #[arg(long, short = 'p')] 33 | exact_prefix: Option, 34 | 35 | /// Require the last N bases of the guide to be exact matches. 36 | #[arg(long, short = 's')] 37 | exact_suffix: Option, 38 | 39 | /// Whether to include matches of the reverse-complement string. 40 | #[arg(long, short = 'r')] 41 | rc: bool, 42 | 43 | /// Number of threads to use. All CPUs by default. 44 | #[arg(short = 'j', long)] 45 | threads: Option, 46 | 47 | /// Allow at most max_n_frac of N characters in the target sequence. 48 | #[arg(long, short = 'n')] 49 | max_n_frac: Option, 50 | 51 | /// Output file path. 52 | #[arg(short = 'o', long)] 53 | output: PathBuf, 54 | } 55 | 56 | fn check_edit_free(m: &Match, target: isize) -> bool { 57 | // We assume PAMs are always at the ends so we can either check if the first X are 58 | // Match or the last X 59 | let is_negative = target < 0; 60 | let to_check: &pa_types::CigarElem = match is_negative { 61 | true => m.cigar.ops.last().unwrap(), 62 | false => m.cigar.ops.first().unwrap(), 63 | }; 64 | to_check.op == CigarOp::Match && to_check.cnt >= target.abs() as i32 65 | } 66 | 67 | fn check_n_frac(max_n_frac: f32, match_slice: &[u8]) -> bool { 68 | let n_count = match_slice 69 | .iter() 70 | .filter(|c| (**c & 0xDF) == b'N') // Convert to uppercase check against N 71 | .count() as f32; 72 | let n_frac = n_count / match_slice.len() as f32; 73 | n_frac <= max_n_frac 74 | } 75 | 76 | fn print_and_check_params(args: &CrisprArgs, guide_sequence: &[u8]) -> (Option, f32) { 77 | // Only allow one of prefix/suffix 78 | if let (Some(_), Some(_)) = (args.exact_prefix, args.exact_suffix) { 79 | eprintln!("[crispr] Error: cannot specify both exact prefix and suffix"); 80 | std::process::exit(1); 81 | } 82 | 83 | let edit_free: Option = match (args.exact_prefix, args.exact_suffix) { 84 | (Some(prefix), None) => Some(prefix as isize), 85 | (None, Some(suffix)) => Some(-(suffix as isize)), 86 | (None, None) => None, 87 | _ => None, 88 | }; 89 | 90 | let max_n_frac = args.max_n_frac.unwrap_or(100.0); // Allow all to be N by default 91 | 92 | // Print info 93 | if let Some(v) = edit_free { 94 | let guide_str = String::from_utf8_lossy(guide_sequence); 95 | if v < 0 { 96 | let pam_start = guide_sequence.len() - (-v) as usize; 97 | let prefix = &guide_str[..pam_start]; 98 | let pam = &guide_str[pam_start..]; 99 | println!("[PAM] Edit-free region in brackets: {}[{}]", prefix, pam); 100 | } else { 101 | let pam = &guide_str[..v as usize]; 102 | let suffix = &guide_str[v as usize..]; 103 | println!("[PAM] Edit-free region in brackets: [{}]{}", pam, suffix); 104 | } 105 | } else { 106 | println!("[PAM] Edits are allowed"); 107 | } 108 | 109 | if args.max_n_frac.is_some() { 110 | println!( 111 | "[N-chars] Allowing up to {}% N characters", 112 | max_n_frac * 100.0 113 | ); 114 | } else { 115 | println!("[N-chars] No N-character filtering"); 116 | } 117 | 118 | (edit_free, max_n_frac) 119 | } 120 | 121 | fn pass( 122 | m: &Match, 123 | edit_free: Option, 124 | edit_free_value: isize, 125 | max_n_frac: f32, 126 | match_slice: &[u8], 127 | ) -> bool { 128 | let pam_ok = if edit_free.is_some() { 129 | check_edit_free(m, edit_free_value) 130 | } else { 131 | true 132 | }; 133 | let n_ok = if max_n_frac < 100.0 { 134 | check_n_frac(max_n_frac, match_slice) 135 | } else { 136 | true 137 | }; 138 | pam_ok && n_ok 139 | } 140 | 141 | pub fn read_guide_sequences(path: &str) -> Vec> { 142 | let file = File::open(path).expect("Failed to open guide file"); 143 | let reader = BufReader::new(file); 144 | reader 145 | .lines() 146 | .map(|l| l.unwrap().as_bytes().to_vec()) 147 | .collect::>() 148 | .into_iter() 149 | .filter(|seq| !seq.is_empty()) 150 | .collect() 151 | } 152 | 153 | pub fn crispr(args: CrisprArgs) { 154 | let guide_sequences = read_guide_sequences(&args.guide); 155 | println!("[GUIDES] Found {} guides", guide_sequences.len()); 156 | 157 | if !guide_sequences.is_empty() { 158 | // Read the first record from the FASTA file for benchmarking 159 | println!("Creating output file with path: {}", args.output.display()); 160 | let file = File::create(&args.output).expect("Failed to create output file"); 161 | let writer = Mutex::new(BufWriter::new(file)); 162 | let reader = Mutex::new(needletail::parse_fastx_file(args.target.clone()).unwrap()); 163 | 164 | let (edit_free, max_n_frac) = print_and_check_params(&args, &guide_sequences[0]); 165 | let edit_free_value = edit_free.unwrap_or(0); 166 | 167 | let total_found = Arc::new(AtomicUsize::new(0)); 168 | let edits_in_pam = Arc::new(AtomicUsize::new(0)); 169 | 170 | let num_threads = args.threads.unwrap_or_else(num_cpus::get); 171 | println!("[Threads] Using {} threads", num_threads); 172 | 173 | let start = Instant::now(); 174 | std::thread::scope(|scope| { 175 | for _ in 0..num_threads { 176 | scope.spawn(|| { 177 | while let Ok(mut guard) = reader.lock() 178 | && let Some(record) = guard.next() 179 | { 180 | // Get fasta record 181 | let record = record.unwrap(); 182 | let id = String::from_utf8(record.id().to_vec()).unwrap(); 183 | let text = &record.seq().into_owned(); 184 | 185 | // Create static text by precomputing reverse 186 | let static_text = StaticText::new(text); 187 | 188 | // Searcher, IUPAC and always reverse complement 189 | let mut searcher = Searcher::::new_rc(); 190 | 191 | // Search for each guide sequence 192 | guide_sequences.iter().for_each(|guide_sequence| { 193 | let matches = searcher.search_all(guide_sequence, &static_text, args.k); 194 | 195 | total_found.fetch_add(matches.len(), Ordering::Relaxed); 196 | 197 | let mut writer_guard = writer.lock().unwrap(); 198 | 199 | for m in matches { 200 | // We have to adjust the start and end based on reverse complement 201 | // as we reverse the text these should be adjusted based on text length 202 | let (start, end) = if m.strand == Strand::Rc { 203 | ( 204 | text.len() - m.end.1 as usize, 205 | text.len() - m.start.1 as usize, 206 | ) 207 | } else { 208 | (m.start.1 as usize, m.end.1 as usize) 209 | }; 210 | 211 | let slice = &text[start..end]; 212 | 213 | // If reverse complement, also take reverse complmeent of the slice 214 | let rc_vec = if m.strand == Strand::Rc { 215 | ::reverse_complement(slice) 216 | } else { 217 | Vec::new() 218 | }; 219 | 220 | let slice = if m.strand == Strand::Rc { 221 | &rc_vec 222 | } else { 223 | slice 224 | }; 225 | 226 | if pass(&m, edit_free, edit_free_value, max_n_frac, slice) { 227 | let cost = m.cost; 228 | let slice_str = String::from_utf8_lossy(slice); 229 | let cigar = m.cigar.to_string(); 230 | let strand = match m.strand { 231 | Strand::Fwd => "+", 232 | Strand::Rc => "-", 233 | }; 234 | writeln!( 235 | writer_guard, 236 | "{id}\t{cost}\t{strand}\t{start}\t{end}\t{slice_str}\t{cigar}" 237 | ) 238 | .unwrap(); 239 | } else { 240 | edits_in_pam.fetch_add(1, Ordering::Relaxed); 241 | } 242 | } 243 | }); 244 | } 245 | }); 246 | } 247 | }); 248 | 249 | println!("\nSummary"); 250 | println!( 251 | " Total targets found: {}", 252 | total_found.load(Ordering::Relaxed) 253 | ); 254 | println!( 255 | " Discarded (edits + N's): {}", 256 | edits_in_pam.load(Ordering::Relaxed) 257 | ); 258 | println!( 259 | " Total targets passed: {}", 260 | total_found.load(Ordering::Relaxed) - edits_in_pam.load(Ordering::Relaxed) 261 | ); 262 | println!(" Time taken: {:?}", start.elapsed()); 263 | } 264 | } 265 | -------------------------------------------------------------------------------- /bin/main.rs: -------------------------------------------------------------------------------- 1 | mod crispr; 2 | mod search; 3 | 4 | use clap::Parser; 5 | use { 6 | crispr::{CrisprArgs, crispr}, 7 | search::{SearchArgs, search}, 8 | }; 9 | 10 | #[derive(clap::Parser)] 11 | #[command(author, version, about)] 12 | enum Args { 13 | /// Default search behavior 14 | Search(SearchArgs), 15 | /// CRISPR-specific search with PAM and edit-free region 16 | Crispr(CrisprArgs), 17 | } 18 | 19 | fn main() { 20 | let args = Args::parse(); 21 | env_logger::init(); 22 | 23 | match args { 24 | Args::Search(search_args) => search(search_args), 25 | Args::Crispr(crispr_args) => crispr(crispr_args), 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /bin/search.rs: -------------------------------------------------------------------------------- 1 | use sassy::{ 2 | profiles::{Ascii, Dna, Iupac}, 3 | search::{Searcher, Strand}, 4 | }; 5 | use std::{path::PathBuf, sync::Mutex}; 6 | 7 | //FIXME: either adjust CLI to match cirispr (named arguments) or other way around 8 | 9 | #[derive(clap::Parser, Clone)] 10 | pub struct SearchArgs { 11 | /// Pattern to search for. 12 | query: String, 13 | /// Report matches up to (and including) this distance threshold. 14 | k: usize, 15 | /// Fasta file to search. May be gzipped. 16 | path: PathBuf, 17 | 18 | /// The alphabet to use. DNA=ACTG, or IUPAC=ACTG+NYR... 19 | #[arg(long, value_enum)] 20 | alphabet: Alphabet, 21 | 22 | /// Disable reverse complement search 23 | #[arg(long)] 24 | no_rc: bool, 25 | 26 | /// Number of threads to use. All CPUs by default. 27 | #[arg(short = 'j', long)] 28 | threads: Option, 29 | } 30 | 31 | #[derive(clap::ValueEnum, Default, Clone)] 32 | pub enum Alphabet { 33 | Ascii, 34 | #[default] 35 | Dna, 36 | Iupac, 37 | } 38 | 39 | pub fn search(args: SearchArgs) { 40 | let query = args.query.as_bytes(); 41 | let reader = Mutex::new(needletail::parse_fastx_file(args.path).unwrap()); 42 | let write_lock = Mutex::new(()); 43 | 44 | let num_threads = args.threads.unwrap_or_else(num_cpus::get); 45 | std::thread::scope(|scope| { 46 | for _ in 0..num_threads { 47 | scope.spawn(|| { 48 | while let Ok(mut guard) = reader.lock() 49 | && let Some(record) = guard.next() 50 | { 51 | let record = record.unwrap(); 52 | let id = String::from_utf8(record.id().to_vec()).unwrap(); 53 | let text = &record.seq().into_owned(); 54 | 55 | let matches = match args.alphabet { 56 | Alphabet::Ascii => { 57 | Searcher::::new_fwd().search(query, &text, args.k) 58 | } 59 | Alphabet::Dna => { 60 | Searcher::::new(!args.no_rc).search(query, &text, args.k) 61 | } 62 | Alphabet::Iupac => { 63 | Searcher::::new(!args.no_rc).search(query, &text, args.k) 64 | } 65 | }; 66 | 67 | let _write_lock = write_lock.lock().unwrap(); 68 | for m in matches { 69 | let cost = m.cost; 70 | let start = m.start.1 as usize; 71 | let end = m.end.1 as usize; 72 | let slice = &text[start..end]; 73 | let slice_str = String::from_utf8_lossy(slice); 74 | let cigar = m.cigar.to_string(); 75 | let strand = match m.strand { 76 | Strand::Fwd => "+", 77 | Strand::Rc => "-", 78 | }; 79 | println!("{id}\t{cost}\t{strand}\t{start}\t{end}\t{slice_str}\t{cigar}"); 80 | } 81 | } 82 | }); 83 | } 84 | }); 85 | } 86 | 87 | mod test { 88 | 89 | use super::*; 90 | use rand::Rng; 91 | use std::io::Write; 92 | 93 | fn random_dna_string(len: usize) -> Vec { 94 | let mut rng = rand::rng(); 95 | (0..len).map(|_| b"ACGT"[rng.random_range(0..4)]).collect() 96 | } 97 | 98 | fn rc(dna: &[u8]) -> Vec { 99 | dna.iter() 100 | .rev() 101 | .map(|c| match c { 102 | b'A' => b'T', 103 | b'C' => b'G', 104 | b'G' => b'C', 105 | b'T' => b'A', 106 | _ => panic!("Invalid DNA character"), 107 | }) 108 | .collect() 109 | } 110 | 111 | #[test] 112 | fn end_to_end_search() { 113 | eprintln!("WARNING: Run this test with -- --nocapture to see the output"); 114 | // Create file at data/test.fasta, with two fwd, one reverse complement match 115 | // insert at 10, 50, and 100 116 | let dna = random_dna_string(1000); 117 | let mut text = dna.clone(); 118 | let query = b"TAGCTAGAC"; 119 | text.splice(10..10, query.iter().copied()); 120 | text.splice(50..50, query.iter().copied()); 121 | text.splice(100..100, rc(query).iter().copied()); 122 | // Write to file 123 | let mut file = std::fs::File::create("data/test.fasta").unwrap(); 124 | writeln!(file, ">test").unwrap(); 125 | writeln!(file, "{}", String::from_utf8_lossy(&text)).unwrap(); 126 | 127 | let mut args: SearchArgs = SearchArgs { 128 | query: String::from_utf8(query.to_vec()).unwrap(), 129 | k: 0, 130 | path: PathBuf::from("data/test.fasta"), 131 | alphabet: Alphabet::Dna, 132 | no_rc: true, 133 | threads: Some(1), 134 | }; 135 | 136 | // FIXME: capture output and assert or write to file for easy check 137 | // anyway for now run with -- --nocapture and check for 10,50,100 138 | println!("Search without RC"); 139 | search(args.clone()); 140 | 141 | println!("Search with RC"); 142 | args.no_rc = false; 143 | search(args); 144 | } 145 | } 146 | -------------------------------------------------------------------------------- /justfile: -------------------------------------------------------------------------------- 1 | bench bench='' *args='': 2 | cargo criterion --offline --plotting-backend disabled --bench bench -- "{{bench}}" {{args}} 3 | 4 | build: 5 | cargo build -r --bench bench 6 | 7 | stat bench='' *args='': build 8 | perf stat cargo bench --bench bench -- --profile-time 5 "{{bench}}" {{args}} 9 | 10 | flame bench='' *args='': 11 | cargo flamegraph --release --bench bench -- --profile-time 5 "{{bench}}" 12 | 13 | record bench='' *args='': build 14 | perf record -g cargo bench --bench bench -- --profile-time 2 "{{bench}}" {{args}} 15 | perf report -n 16 | 17 | cache bench='' *args='': build 18 | perf record -e cache-misses -g cargo bench --bench bench -- --profile-time 2 "{{bench}}" {{args}} 19 | perf report -n 20 | 21 | report: 22 | perf report -n 23 | 24 | cpufreq: 25 | sudo cpupower frequency-set --governor powersave -d 2.6GHz -u 2.6GHz 26 | cpufreq-high: 27 | sudo cpupower frequency-set --governor powersave -d 5.0GHz -u 5.0GHz 28 | 29 | heaptrack bench='' *args='': build 30 | heaptrack cargo bench --bench bench -- --profile-time 2 "{{bench}}" {{args}} 31 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | edition = "2024" 2 | style_edition = "2024" 3 | -------------------------------------------------------------------------------- /src/bitpacking.rs: -------------------------------------------------------------------------------- 1 | //! The basic bitpacking algorithm from Myers'99. 2 | use std::simd::{LaneCount, Simd, SupportedLaneCount}; 3 | 4 | use crate::{ 5 | delta_encoding::{HEncoding, VEncoding}, 6 | profiles::Profile, 7 | }; 8 | 9 | /// Implements Myers '99 bitpacking based algorithm. Terminology is as in the 10 | /// paper. The code is a translation from the implementation in Edlib. 11 | /// 12 | /// Modifies `h0` (horizontal difference at top) and `v` (vertical differences 13 | /// along the left) in place. 14 | /// 15 | /// Given the scores below: 16 | /// 17 | /// A0 - B0 18 | /// | | 19 | /// A1 - B1 20 | /// ... 21 | /// AW - BW 22 | /// 23 | /// h0 = B0 - A0 24 | /// v[i] = A(i+1) - Ai 25 | /// 26 | /// H and V are wrapper types to encode the horizontal and vertical differences 27 | /// using a + and - indicator bit. 28 | /// 29 | /// 20 operations, excluding `eq`. 30 | #[inline(always)] 31 | #[allow(unused)] // TODO: Drop this 32 | pub fn compute_block, V: VEncoding>( 33 | h0: &mut H, 34 | v: &mut V, 35 | ca: &P::A, 36 | cb: &P::B, 37 | ) { 38 | let eq = P::eq(ca, cb); // this one is not counted as an operation 39 | let (vp, vm) = v.pm(); 40 | let vx = eq | vm; 41 | // NOTE: This is not in Myers' original code because he assumes the input delta can never be -1. 42 | let eq = eq | h0.m(); 43 | // The add here contains the 'folding' magic that makes this algorithm 44 | // 'non-local' and prevents simple SIMDification. See Myers'99 for details. 45 | let hx = (((eq & vp).wrapping_add(vp)) ^ vp) | eq; 46 | let hp = vm | !(hx | vp); 47 | let hm = vp & hx; 48 | // Extract `hw` from `ph` and `mh`. 49 | // TODO: Use carry-bit from shit-left operation. 50 | // - The problem with carry bits is that they block pipelining, hence 51 | // incurring a bit performance hit. 52 | // TODO: Could we save ops with 63-bit vectors? 53 | 54 | // Push `hw` out of `ph` and `mh` and shift in `h0`. 55 | // NOTE: overflowing_add uses the carry bit, but is slow because reading the 56 | // carry bit right after this instruction interrupts pipelining. 57 | // NOTE: overflowing_shl returns whether the shift is too large, not the shifted out bit. 58 | let hpw = hp >> (u64::BITS - 1); 59 | let hmw = hm >> (u64::BITS - 1); 60 | let hp = (hp << 1) | h0.p(); 61 | let hm = (hm << 1) | h0.m(); 62 | 63 | *h0 = H::from(hpw as u64, hmw); 64 | *v = V::from(hm | !(vx | hp), hp & vx); 65 | } 66 | 67 | /// Simd version of `compute_block`. 68 | /// 69 | /// This assumes HEncoding of `(u64,u64)`. 70 | #[inline(always)] 71 | pub fn compute_block_simd( 72 | hp0: &mut Simd, 73 | hm0: &mut Simd, 74 | vp: &mut Simd, 75 | vm: &mut Simd, 76 | eq: Simd, 77 | ) where 78 | LaneCount: SupportedLaneCount, 79 | { 80 | let vx = eq | *vm; 81 | let eq = eq | *hm0; 82 | // The add here contains the 'folding' magic that makes this algorithm 83 | // 'non-local' and prevents simple SIMDification. See Myers'99 for details. 84 | let hx = (((eq & *vp) + *vp) ^ *vp) | eq; 85 | let hp = *vm | !(hx | *vp); 86 | let hm = *vp & hx; 87 | // Extract `hw` from `ph` and `mh`. 88 | let right_shift = u64::BITS as u64 - 1; 89 | let hpw = hp >> right_shift; 90 | let hmw = hm >> right_shift; 91 | 92 | // Push `hw` out of `ph` and `mh` and shift in `h0`. 93 | let left_shift = Simd::splat(1); 94 | let hp = (hp << left_shift) | *hp0; 95 | let hm = (hm << left_shift) | *hm0; 96 | 97 | *hp0 = hpw; 98 | *hm0 = hmw; 99 | *vp = hm | !(vx | hp); 100 | *vm = hp & vx; 101 | } 102 | -------------------------------------------------------------------------------- /src/delta_encoding.rs: -------------------------------------------------------------------------------- 1 | #![allow(unused)] // TODO: Drop this 2 | 3 | use pa_types::{Cost, I}; 4 | 5 | pub trait VEncoding { 6 | fn zero() -> Self; 7 | fn one() -> Self; 8 | fn from(p: Base, m: Base) -> Self; 9 | fn value(&self) -> Cost; 10 | fn p(&self) -> Base; 11 | fn m(&self) -> Base; 12 | fn pm(&self) -> (Base, Base) { 13 | (self.p(), self.m()) 14 | } 15 | fn value_of_prefix(&self, j: I) -> Cost; 16 | fn value_of_suffix(&self, j: I) -> Cost; 17 | fn value_to(v: &Vec, j: I) -> Cost 18 | where 19 | Self: Sized; 20 | fn value_from(v: &Vec, j: I) -> Cost 21 | where 22 | Self: Sized; 23 | } 24 | 25 | #[derive(Clone, Default, Copy, PartialEq, Eq, Debug)] 26 | pub struct V(pub Base, pub Base); 27 | 28 | macro_rules! impl_vencoding { 29 | ($($t:ty),+) => { 30 | $( 31 | impl VEncoding<$t> for V<$t> { 32 | #[inline(always)] 33 | fn zero() -> Self { 34 | V(0, 0) 35 | } 36 | #[inline(always)] 37 | fn one() -> Self { 38 | V(<$t>::MAX, 0) 39 | } 40 | #[inline(always)] 41 | fn from(p: $t, m: $t) -> Self { 42 | V(p, m) 43 | } 44 | #[inline(always)] 45 | fn value(&self) -> Cost { 46 | self.0.count_ones() as Cost - self.1.count_ones() as Cost 47 | } 48 | /// Value of the first `j` bits. 49 | /// NOTE: Requires `0 <= j < $t::BITS`. 50 | #[inline(always)] 51 | fn value_of_prefix(&self, j: I) -> Cost { 52 | debug_assert!(0 <= j && j < <$t>::BITS as I); 53 | let mask = (1 << j) - 1; 54 | (self.0 & mask).count_ones() as Cost - (self.1 & mask).count_ones() as Cost 55 | } 56 | /// Value of the last `j` bits. 57 | /// NOTE: Requires `j > 0`. 58 | #[inline(always)] 59 | fn value_of_suffix(&self, j: I) -> Cost { 60 | debug_assert!(0 < j && j <= <$t>::BITS as I); 61 | let mask = !(((1 as $t) << (<$t>::BITS as I - j)).wrapping_sub(1)); 62 | (self.0 & mask).count_ones() as Cost - (self.1 & mask).count_ones() as Cost 63 | } 64 | #[inline(always)] 65 | fn pm(&self) -> ($t, $t) { 66 | (self.0, self.1) 67 | } 68 | #[inline(always)] 69 | fn p(&self) -> $t { 70 | self.0 71 | } 72 | #[inline(always)] 73 | fn m(&self) -> $t { 74 | self.1 75 | } 76 | fn value_to(v: &Vec, j: I) -> Cost { 77 | let mut s = 0; 78 | for vj in &v[0..j as usize / 64] { 79 | s += vj.value(); 80 | } 81 | if j % 64 != 0 { 82 | s += v[j as usize / 64].value_of_prefix(j % 64); 83 | } 84 | s 85 | } 86 | fn value_from(v: &Vec, j: I) -> Cost { 87 | let mut s = 0; 88 | if j % 64 != 0 { 89 | s += v[j as usize / 64].value_of_suffix(64 - j % 64); 90 | } 91 | for vj in &v[j.div_ceil(64) as usize..] { 92 | s += vj.value(); 93 | } 94 | s 95 | } 96 | } 97 | )+ 98 | } 99 | } 100 | impl_vencoding!(u8, u16, u32, u64); 101 | 102 | pub trait HEncoding: Copy { 103 | fn zero() -> Self; 104 | fn one() -> Self; 105 | fn from(p: Base, m: Base) -> Self; 106 | fn value(&self) -> Cost; 107 | fn p(&self) -> Base; 108 | fn m(&self) -> Base; 109 | #[inline(always)] 110 | fn pm(&self) -> (Base, Base) { 111 | (self.p(), self.m()) 112 | } 113 | } 114 | 115 | impl HEncoding for i8 { 116 | #[inline(always)] 117 | fn zero() -> Self { 118 | 0 119 | } 120 | #[inline(always)] 121 | fn one() -> Self { 122 | 1 123 | } 124 | #[inline(always)] 125 | fn from(p: u8, m: u8) -> Self { 126 | p as i8 - m as i8 127 | } 128 | #[inline(always)] 129 | fn value(&self) -> Cost { 130 | *self as Cost 131 | } 132 | #[inline(always)] 133 | fn p(&self) -> u8 { 134 | (*self > 0) as u8 135 | } 136 | #[inline(always)] 137 | fn m(&self) -> u8 { 138 | (*self < 0) as u8 139 | } 140 | } 141 | 142 | // implement HEncoding for all unsigned types. 143 | macro_rules! impl_unsigned { 144 | ($($t:ty),+) => { 145 | $( 146 | impl HEncoding<$t> for ($t, $t) { 147 | #[inline(always)] 148 | fn zero() -> Self { 149 | (0, 0) 150 | } 151 | #[inline(always)] 152 | fn one() -> Self { 153 | (1, 0) 154 | } 155 | #[inline(always)] 156 | fn from(p: $t, m: $t) -> Self { 157 | (p as $t, m as $t) 158 | } 159 | #[inline(always)] 160 | fn value(&self) -> Cost { 161 | self.0 as Cost - self.1 as Cost 162 | } 163 | #[inline(always)] 164 | fn p(&self) -> $t { 165 | self.0 as $t 166 | } 167 | #[inline(always)] 168 | fn m(&self) -> $t { 169 | self.1 as $t 170 | } 171 | } 172 | )+ 173 | } 174 | } 175 | impl_unsigned!(u8, u16, u32, u64); 176 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![feature(portable_simd, int_roundings, let_chains)] 2 | 3 | use std::simd::Simd; 4 | mod bitpacking; 5 | mod delta_encoding; 6 | 7 | pub mod profiles { 8 | mod ascii; 9 | mod dna; 10 | mod iupac; 11 | mod profile; 12 | 13 | pub use ascii::{Ascii, CaseInsensitiveAscii, CaseSensitiveAscii}; 14 | pub use dna::Dna; 15 | pub use iupac::Iupac; 16 | pub use profile::Profile; 17 | } 18 | 19 | mod minima; 20 | pub mod search; 21 | mod trace; 22 | 23 | #[doc(hidden)] 24 | pub mod private { 25 | pub use crate::minima::{prefix_min, prefix_min_k, prefix_min_k_simd}; 26 | } 27 | 28 | #[cfg(feature = "avx512")] 29 | const LANES: usize = 8; 30 | #[cfg(not(feature = "avx512"))] 31 | const LANES: usize = 4; 32 | type S = Simd; 33 | -------------------------------------------------------------------------------- /src/minima.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | delta_encoding::{V, VEncoding}, 3 | search::Deltas, 4 | }; 5 | use pa_types::Cost; 6 | use std::{ 7 | arch::x86_64::_pext_u64, 8 | cmp::min, 9 | simd::{Simd, num::SimdUint}, 10 | }; 11 | 12 | // Note: also reports minima at the end of the range. 13 | #[allow(unused)] // only for testing 14 | pub fn find_local_minima_slow(query: &[u8], deltas: &[V], k: Cost) -> Vec<(usize, Cost)> { 15 | let mut valleys = Vec::new(); 16 | let mut is_decreasing = false; 17 | let mut prev_cost = query.len() as Cost; 18 | let mut cur_cost = query.len() as Cost; 19 | 20 | for (word_idx, v) in deltas.iter().enumerate() { 21 | let (p, m) = v.pm(); 22 | 23 | for bit in 0..64 { 24 | // Calculate cost 25 | let p_bit = (p >> bit) & 1; 26 | let m_bit = (m >> bit) & 1; 27 | 28 | cur_cost += (p_bit as Cost) - (m_bit as Cost); 29 | if cur_cost > prev_cost && is_decreasing { 30 | if prev_cost <= k { 31 | // Going up, but we were going down 32 | let value = (word_idx * 64 + bit - 1, prev_cost); 33 | // debug!("Push {value:?}"); 34 | valleys.push(value); // relative prev pos 35 | } 36 | is_decreasing = false; 37 | } else if cur_cost < prev_cost { 38 | is_decreasing = true; 39 | } 40 | prev_cost = cur_cost; 41 | } 42 | } 43 | 44 | // If we ended while decreasing, add the final position 45 | if cur_cost <= k && is_decreasing { 46 | valleys.push((deltas.len() * 64 - 1, cur_cost)); 47 | } 48 | valleys 49 | } 50 | 51 | pub fn find_local_minima( 52 | query: &[u8], 53 | deltas: &mut Deltas, 54 | k: Cost, 55 | text_len: usize, 56 | ) -> Vec<(usize, Cost)> { 57 | let mut prev_cost = query.len() as Cost; 58 | let mut cur_cost = query.len() as Cost; 59 | let mut all_valleys = Vec::new(); 60 | let mut is_decreasing = false; 61 | 62 | // Handle overhang bits (bits beyond text_len) 63 | // We set all overhang bits to increasing edits to avoid false valleys 64 | let overhang = deltas.len() * 64 - text_len; 65 | if overhang > 0 { 66 | let mut remaining = overhang; 67 | for delta in deltas.iter_mut().rev() { 68 | if remaining >= 64 { 69 | delta.0 = Cost::MAX; 70 | delta.1 = V(u64::MAX, 0); 71 | remaining -= 64; 72 | if remaining == 0 { 73 | break; 74 | } 75 | } else { 76 | // partial overhang 77 | let mask = (u64::MAX) << (64 - remaining); 78 | let (mut p, mut m) = delta.1.pm(); 79 | p |= mask; 80 | m &= !mask; 81 | delta.1 = V(p, m); 82 | break; 83 | } 84 | } 85 | } 86 | 87 | for (word_idx, v) in deltas.iter().enumerate() { 88 | if v.0 == Cost::MAX { 89 | continue; 90 | } 91 | cur_cost = v.0; 92 | let (min, delta) = prefix_min(v.1.0, v.1.1); 93 | if cur_cost + (min as Cost) <= k { 94 | // FIXME? 95 | let (p, m) = v.1.pm(); 96 | // Get positions where cost changes occur 97 | let mut changes = p | m; 98 | while changes != 0 { 99 | let pos = changes.trailing_zeros() as usize; 100 | let p_bit = (p >> pos) & 1; 101 | let m_bit = (m >> pos) & 1; 102 | cur_cost += (p_bit as Cost) - (m_bit as Cost); 103 | if cur_cost > prev_cost && is_decreasing { 104 | if prev_cost <= k { 105 | all_valleys.push((word_idx * 64 + pos, prev_cost)); 106 | } 107 | is_decreasing = false; 108 | } else if cur_cost < prev_cost { 109 | is_decreasing = true; 110 | } 111 | prev_cost = cur_cost; 112 | // Clear the processed bit 113 | changes &= changes - 1; 114 | } 115 | } else { 116 | cur_cost += delta as Cost; 117 | prev_cost = cur_cost; 118 | } 119 | } 120 | 121 | // Add valley at right end if still decreasing 122 | if cur_cost <= k && is_decreasing { 123 | all_valleys.push((deltas.len() * 64, cur_cost)); 124 | } 125 | 126 | all_valleys 127 | } 128 | 129 | /// Baseline implementation. 130 | #[allow(unused)] 131 | fn find_below_threshold( 132 | query: &[u8], 133 | threshold: Cost, 134 | deltas: &Deltas, 135 | positions: &mut Vec, 136 | costs: &mut Vec, 137 | ) { 138 | let mut cur_cost = query.len() as Cost; 139 | for (i, v) in deltas.iter().enumerate() { 140 | let (min, delta) = prefix_min(v.1.0, v.1.1); 141 | if cur_cost + (min as Cost) <= threshold { 142 | positions.push(i * 64); 143 | // Cost at start of block 144 | costs.push(cur_cost as Cost); 145 | } 146 | cur_cost += delta as Cost; 147 | } 148 | } 149 | 150 | /// For each byte: (min_cost, end_cost) 151 | /// Each 1 in a byte indicates -1. 152 | /// Each 0 in a byte indicates +1. 153 | const TABLE: [(i8, i8); 256] = { 154 | let mut table = [(0, 0); 256]; 155 | 156 | let mut i = 0; 157 | while i < 256 { 158 | let mut min = 0; 159 | let mut cur = 0; 160 | let mut j = 0; 161 | while j < 8 { 162 | let bit = (i >> j) & 1; 163 | let delta = if bit == 1 { -1 } else { 1 }; 164 | cur += delta; 165 | if cur < min { 166 | min = cur; 167 | } 168 | j += 1; 169 | } 170 | table[i] = (min, cur); 171 | i += 1; 172 | } 173 | 174 | table 175 | }; 176 | 177 | pub fn find_all_minima( 178 | query: &[u8], 179 | deltas: &mut Deltas, 180 | k: Cost, 181 | text_len: usize, 182 | ) -> Vec<(usize, Cost)> { 183 | let mut cost = query.len() as Cost; 184 | let mut all_valleys = Vec::new(); 185 | 186 | // Iterate through each block of 64 positions 187 | for (word_idx, v) in deltas.iter().enumerate() { 188 | if v.0 == Cost::MAX { 189 | continue; 190 | } 191 | 192 | // Reset cost at start of block 193 | cost = v.0; 194 | let (p, m) = v.1.pm(); 195 | let base = word_idx * 64; 196 | 197 | // Step through every bit position 198 | for bit in 0..64 { 199 | let pos = base + bit; 200 | if pos > text_len { 201 | break; 202 | } 203 | 204 | // Check valley before applying change 205 | if cost <= k { 206 | all_valleys.push((pos, cost)); 207 | } 208 | 209 | // Update cost if there's a change at this bit 210 | let p_bit = ((p >> bit) & 1) as Cost; 211 | let m_bit = ((m >> bit) & 1) as Cost; 212 | cost += p_bit; 213 | cost -= m_bit; 214 | } 215 | } 216 | 217 | // Check end-of-text position 218 | if cost <= k && text_len > 0 { 219 | all_valleys.push((text_len, cost)); 220 | } 221 | 222 | all_valleys 223 | } 224 | 225 | /// Compute any prefix min <= k over 8 bytes via SIMD vectorized DP approach. 226 | #[inline(always)] 227 | pub fn prefix_min(p: u64, m: u64) -> (i8, i8) { 228 | // extract only the relevant chars 229 | let delta = p | m; 230 | let num_p = p.count_ones(); 231 | let num_m = m.count_ones(); 232 | let deltas = unsafe { _pext_u64(m, delta) }; 233 | let mut min = 0; 234 | let mut cur = 0; 235 | for i in 0..8 { 236 | let byte = (deltas >> (i * 8)) as u8 as usize; 237 | let (min_cost, end_cost) = TABLE[byte]; 238 | min = min.min(cur + min_cost); 239 | cur += end_cost; 240 | } 241 | (min, num_p as i8 - num_m as i8) 242 | } 243 | 244 | // split TABLE into two flat arrays for SIMD gather test 245 | const TABLE_MIN: [Cost; 256] = { 246 | let mut a = [0 as Cost; 256]; 247 | let mut i = 0; 248 | while i < 256 { 249 | a[i] = TABLE[i].0 as Cost; 250 | i += 1; 251 | } 252 | a 253 | }; 254 | 255 | const TABLE_END: [Cost; 256] = { 256 | let mut a = [0 as Cost; 256]; 257 | let mut i = 0; 258 | while i < 256 { 259 | a[i] = TABLE[i].1 as Cost; 260 | i += 1; 261 | } 262 | a 263 | }; 264 | 265 | // Precompute for each m pattern whether it can reach <= k 266 | const M_PATTERN_CAN_REACH_K: [bool; 256] = { 267 | let mut table = [false; 256]; 268 | let mut pattern = 0; 269 | while pattern < 256 { 270 | // For k=1, we only care if the number of set bits is <= 1 271 | // This is because each set bit contributes -1 to the cost 272 | table[pattern] = (pattern as u8).count_ones() <= 1; 273 | pattern += 1; 274 | } 275 | table 276 | }; 277 | 278 | #[inline(always)] 279 | pub fn prefix_min_k(start_cost: Cost, p: u64, m: u64, k: i32) -> (Cost, i8) { 280 | let delta = p | m; 281 | let num_p = p.count_ones() as i8; 282 | let num_m = m.count_ones() as i8; 283 | let compressed_m = unsafe { _pext_u64(m, delta) }; 284 | 285 | let mut min_cost = start_cost; 286 | let mut cur_cost = start_cost; 287 | let mut remaining_m = num_m as i32; 288 | 289 | for i in 0..8 { 290 | let byte = (compressed_m >> (i * 8)) as u8 as usize; 291 | 292 | // Quick check if this m pattern can ever reach <= k 293 | if !M_PATTERN_CAN_REACH_K[byte] { 294 | remaining_m -= (byte as u8).count_ones() as i32; 295 | if min_cost > k && cur_cost - remaining_m > k { 296 | break; 297 | } 298 | continue; 299 | } 300 | 301 | let (tbl_min, tbl_end) = TABLE[byte]; 302 | min_cost = min(min_cost, cur_cost + tbl_min as Cost); 303 | cur_cost += tbl_end as Cost; 304 | remaining_m -= (byte as u8).count_ones() as i32; 305 | 306 | if min_cost > k && cur_cost - remaining_m > k { 307 | break; 308 | } 309 | } 310 | 311 | (min_cost, num_p - num_m) 312 | } 313 | 314 | #[inline(always)] 315 | pub fn prefix_min_k_simd(start_cost: Cost, p: u64, m: u64, k: i32) -> (Cost, i8) { 316 | let delta = p | m; 317 | let num_p = p.count_ones() as i8; 318 | let num_m = m.count_ones() as i8; 319 | let compressed_m = unsafe { _pext_u64(m, delta) }; 320 | 321 | let byte_vec: Simd = Simd::from_array([ 322 | compressed_m as u8, 323 | (compressed_m >> 8) as u8, 324 | (compressed_m >> 16) as u8, 325 | (compressed_m >> 24) as u8, 326 | (compressed_m >> 32) as u8, 327 | (compressed_m >> 40) as u8, 328 | (compressed_m >> 48) as u8, 329 | (compressed_m >> 56) as u8, 330 | ]); 331 | let byte_pop: [u8; 8] = byte_vec.count_ones().to_array(); 332 | 333 | // Fixme: we could pass as mut so we dont have to realloc 334 | let mut rem_pop = [0i32; 9]; 335 | for i in (0..8).rev() { 336 | rem_pop[i] = rem_pop[i + 1] + (byte_pop[i] as i32); 337 | } 338 | 339 | let idxs: Simd = byte_vec.cast(); // lanes = the raw bytes 340 | let mins_vec = Simd::gather_or_default(&TABLE_MIN, idxs); 341 | let ends_vec = Simd::gather_or_default(&TABLE_END, idxs); 342 | 343 | let mins_arr = mins_vec.to_array(); 344 | let ends_arr = ends_vec.to_array(); 345 | 346 | // We have to do this loop sequentially 347 | // (or simd scan prefix idea but that's also a lot of operations and no earlye exit) 348 | let mut min_cost = start_cost; 349 | let mut cur_cost = start_cost; 350 | 351 | for i in 0..8 { 352 | let tbl_min = mins_arr[i]; 353 | let tbl_end = ends_arr[i]; 354 | 355 | min_cost = min(min_cost, cur_cost + tbl_min); 356 | cur_cost += tbl_end; 357 | 358 | let best_possible = cur_cost - rem_pop[i + 1]; 359 | if min_cost > k && best_possible > k { 360 | break; 361 | } 362 | } 363 | 364 | (min_cost, num_p - num_m) 365 | } 366 | 367 | #[cfg(test)] 368 | mod test { 369 | use super::*; 370 | 371 | /// Create Vencoding from (position, delta) vec 372 | fn make_pattern(changes: &[(usize, i8)]) -> V { 373 | let mut p = 0u64; 374 | let mut m = 0u64; 375 | for &(pos, delta) in changes { 376 | assert!(pos < 64, "Position must be < 64"); 377 | if delta > 0 { 378 | p |= 1u64 << pos; 379 | } else if delta < 0 { 380 | m |= 1u64 << pos; 381 | } 382 | } 383 | V(p, m) 384 | } 385 | 386 | #[test] 387 | fn test_multiple_valleys() { 388 | let v1 = make_pattern(&[ 389 | (0, -1), 390 | (1, -1), 391 | (2, 1), 392 | (3, 1), 393 | (4, -1), 394 | (5, -1), 395 | (6, 1), 396 | (7, 1), 397 | ]); 398 | let mut deltas = vec![(0, v1)]; 399 | let minima = find_local_minima(b"ATG", &mut deltas, 100, 64); 400 | println!("Minima: {:?}", minima); 401 | assert_eq!(minima, vec![(2, 1), (6, 1)]); 402 | } 403 | 404 | #[test] 405 | fn test_valley_with_plateau() { 406 | let v1 = make_pattern(&[(10, -1), (11, -1), (15, 1), (16, 1)]); 407 | let v2 = V(0, 0); 408 | let v3 = V(0, 0); 409 | let mut deltas = vec![(0, v1), (0, v2), (0, v3)]; 410 | let minima = find_local_minima(b"ATG", &mut deltas, 100, 64 * 3); 411 | assert_eq!(minima, vec![(15, 1)]); // valley at end of plateau 412 | } 413 | 414 | #[test] 415 | fn test_long_cross_word_valley() { 416 | let v1 = make_pattern(&[(62, -1), (63, -1)]); 417 | let v2 = V(0, 0); 418 | let v3 = make_pattern(&[(0, 1), (1, 1)]); 419 | let mut deltas = vec![(0, v1), (-2, v2), (0, v3)]; 420 | let minima = find_local_minima(b"ATG", &mut deltas, 100, 64 * 3); 421 | assert_eq!(minima, vec![(64 * 2, 1)]); // valley at end of second word 422 | } 423 | 424 | #[test] 425 | fn test_cost_calculation_simple() { 426 | let v1 = make_pattern(&[(0, -1), (1, -1), (2, 1)]); 427 | let v2 = V(0, 0); 428 | let v3 = V(0, 0); 429 | let mut deltas = vec![(0, v1), (-1, v2), (-1, v3)]; 430 | let minima = find_local_minima(b"ATG", &mut deltas, 100, 64 * 3); 431 | assert_eq!(minima, vec![(2, 1)]); // valley at position 1 with cost 8 432 | } 433 | 434 | #[test] 435 | fn test_cost_calculation_complex() { 436 | let v1 = make_pattern(&[(62, -1), (63, -1)]); 437 | let v2 = V(0, 0); 438 | let v3 = make_pattern(&[(0, 1), (1, 1)]); 439 | let mut deltas = vec![(0, v1), (-2, v2), (-2, v3)]; 440 | let minima = find_local_minima(&[b'A'; 20], &mut deltas, 100, 64 * 3); 441 | assert_eq!(minima, vec![(64 * 2, 18)]); // valley at end of second word with cost 18 442 | } 443 | 444 | #[test] 445 | fn test_at_right_end() { 446 | let v1 = make_pattern(&[(0, -1), (1, -1)]); 447 | let mut deltas = vec![(0, v1)]; 448 | let minima = find_local_minima(b"ATG", &mut deltas, 100, 64); 449 | assert_eq!(minima, vec![(64, 1)]); // We end with a valley, right end true, so still report 450 | } 451 | } 452 | -------------------------------------------------------------------------------- /src/profiles/ascii.rs: -------------------------------------------------------------------------------- 1 | use crate::profiles::Profile; 2 | use std::{ 3 | mem::transmute, 4 | simd::{ 5 | cmp::{SimdPartialEq, SimdPartialOrd}, 6 | u8x32, 7 | }, 8 | }; 9 | 10 | #[derive(Clone, Debug)] 11 | pub struct Ascii { 12 | bases: Vec, 13 | } 14 | 15 | pub type CaseSensitiveAscii = Ascii; 16 | pub type CaseInsensitiveAscii = Ascii; 17 | 18 | impl Profile for Ascii { 19 | type A = usize; 20 | type B = [u64; 256]; // Maximum number of ASCII characters 21 | 22 | fn encode_query(a: &[u8]) -> (Self, Vec) { 23 | let mut bases = Vec::new(); 24 | let mut query_profile = Vec::with_capacity(a.len()); 25 | for &c in a { 26 | if !bases.contains(&c) { 27 | bases.push(c); 28 | } 29 | query_profile.push(bases.iter().position(|&x| x == c).unwrap()); 30 | } 31 | (Ascii { bases }, query_profile) 32 | } 33 | 34 | #[inline(always)] 35 | fn encode_ref(&self, b: &[u8; 64], out: &mut Self::B) { 36 | if CASE_SENSITIVE { 37 | ascii_u64_search(b, &self.bases, out); 38 | } else { 39 | ascii_u64_search_case_insensitive(b, &self.bases, out); 40 | } 41 | } 42 | 43 | #[inline(always)] 44 | fn eq(ca: &usize, cb: &[u64; 256]) -> u64 { 45 | unsafe { *cb.get_unchecked(*ca) } 46 | } 47 | 48 | #[inline(always)] 49 | fn is_match(char1: u8, char2: u8) -> bool { 50 | if CASE_SENSITIVE { 51 | char1.eq(&char2) 52 | } else { 53 | // Safe rust version to handle cases only in Az range 54 | char1.eq_ignore_ascii_case(&char2) 55 | } 56 | } 57 | 58 | #[inline(always)] 59 | fn alloc_out() -> Self::B { 60 | [0; 256] 61 | } 62 | 63 | #[inline(always)] 64 | fn n_bases(&self) -> usize { 65 | self.bases.len() 66 | } 67 | 68 | #[inline(always)] 69 | fn valid_seq(&self, _seq: &[u8]) -> bool { 70 | true // assuming every u8 is valid ascii 71 | } 72 | } 73 | 74 | #[inline(always)] 75 | pub fn ascii_u64_search(seq: &[u8; 64], bases: &[u8], out: &mut [u64]) { 76 | unsafe { 77 | let chunk0 = u8x32::from_array(seq[0..32].try_into().unwrap()); 78 | let chunk1 = u8x32::from_array(seq[32..64].try_into().unwrap()); 79 | 80 | for (i, &base) in bases.iter().enumerate() { 81 | let m = u8x32::splat(base); 82 | let eq0 = chunk0.simd_eq(m); 83 | let eq1 = chunk1.simd_eq(m); 84 | let low = eq0.to_bitmask(); 85 | let high = eq1.to_bitmask(); 86 | *out.get_unchecked_mut(i) = (high << 32) | low; 87 | } 88 | } 89 | } 90 | 91 | // FIXME: Tests 92 | #[inline(always)] 93 | fn ascii_u64_search_case_insensitive(seq: &[u8; 64], bases: &[u8], out: &mut [u64]) { 94 | unsafe { 95 | let chunk0 = u8x32::from_array(seq[0..32].try_into().unwrap()); 96 | let chunk1 = u8x32::from_array(seq[32..64].try_into().unwrap()); 97 | 98 | const A: u8 = b'A'; 99 | const Z: u8 = b'Z'; 100 | let to_lowercase = b'a' - b'A'; 101 | let is_char0 = chunk0.simd_ge(u8x32::splat(A)) & chunk0.simd_le(u8x32::splat(Z)); 102 | let is_char1 = chunk1.simd_ge(u8x32::splat(A)) & chunk1.simd_le(u8x32::splat(Z)); 103 | // Transmute from i8x32 to u8x32 104 | let lower0 = 105 | chunk0 | (u8x32::splat(to_lowercase) & transmute::<_, u8x32>(is_char0.to_int())); 106 | let lower1 = 107 | chunk1 | (u8x32::splat(to_lowercase) & transmute::<_, u8x32>(is_char1.to_int())); 108 | 109 | for (i, &base) in bases.iter().enumerate() { 110 | let m = u8x32::splat(base | 0x20); 111 | let eq0 = lower0.simd_eq(m); 112 | let eq1 = lower1.simd_eq(m); 113 | let low = eq0.to_bitmask(); 114 | let high = eq1.to_bitmask(); 115 | *out.get_unchecked_mut(i) = (high << 32) | low; 116 | } 117 | } 118 | } 119 | 120 | #[cfg(test)] 121 | mod test { 122 | use super::*; 123 | 124 | fn get_match_positions(out: &[u64]) -> Vec> { 125 | let mut positions = vec![vec![]; out.len()]; 126 | for (i, _) in out.iter().enumerate() { 127 | let bits = out[i]; 128 | for j in 0..64 { 129 | if (bits & (1u64 << j)) != 0 { 130 | positions[i].push(j); 131 | } 132 | } 133 | } 134 | positions 135 | } 136 | 137 | const HELLO_TEST_SEQ: [u8; 64] = { 138 | let mut seq = [b'H'; 64]; 139 | seq[0] = b'E'; 140 | seq[1] = b'l'; 141 | seq[2] = b'L'; 142 | seq[3] = b'o'; 143 | seq 144 | }; 145 | 146 | const HELLO_TEST_BASES: [u8; 3] = [b'H', b'l', b'o']; 147 | 148 | #[test] 149 | fn test_ascii_is_match() { 150 | // Case sensitive 151 | assert!(Ascii::::is_match(b'H', b'H')); 152 | assert!(!Ascii::::is_match(b'l', b'L')); // Should not match 153 | // Case insensitive 154 | assert!(Ascii::::is_match(b'H', b'H')); 155 | assert!(Ascii::::is_match(b'l', b'L')); // Should match now 156 | } 157 | 158 | #[test] 159 | fn test_ascii_u64_search() { 160 | let mut out = vec![0u64; 3]; 161 | ascii_u64_search(&HELLO_TEST_SEQ, &HELLO_TEST_BASES, &mut out); 162 | let positions = get_match_positions(&out); 163 | assert_eq!(positions[0], (4..64).collect::>()); 164 | assert_eq!(positions[1], vec![1]); 165 | assert_eq!(positions[2], vec![3]); 166 | } 167 | 168 | #[test] 169 | fn test_ascii_u64_search_case_insensitive() { 170 | let mut out = vec![0u64; 3]; 171 | ascii_u64_search_case_insensitive(&HELLO_TEST_SEQ, &HELLO_TEST_BASES, &mut out); 172 | let positions = get_match_positions(&out); 173 | assert_eq!(positions[1], vec![1, 2]); // l and L 174 | } 175 | 176 | #[test] 177 | fn test_ascii_u64_search_case_sensitive() { 178 | let mut out = vec![0u64; 3]; 179 | ascii_u64_search(&HELLO_TEST_SEQ, &HELLO_TEST_BASES, &mut out); 180 | let positions = get_match_positions(&out); 181 | assert_eq!(positions[1], vec![1]); // only l 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /src/profiles/dna.rs: -------------------------------------------------------------------------------- 1 | use crate::profiles::Profile; 2 | use std::{ 3 | simd::cmp::SimdPartialEq, 4 | simd::{Simd, u8x32}, 5 | }; 6 | 7 | #[derive(Clone, Debug)] 8 | pub struct Dna { 9 | bases: Vec, 10 | } 11 | 12 | impl Profile for Dna { 13 | type A = u8; 14 | type B = [u64; 4]; 15 | 16 | fn encode_query(a: &[u8]) -> (Self, Vec) { 17 | let bases = vec![b'A', b'C', b'T', b'G']; 18 | let query_profile = a.iter().map(|c| (c >> 1) & 3).collect(); 19 | (Dna { bases }, query_profile) 20 | } 21 | 22 | #[inline(always)] 23 | fn encode_ref(&self, b: &[u8; 64], out: &mut Self::B) { 24 | unsafe { 25 | let chunk0 = u8x32::from_array(b[0..32].try_into().unwrap()); 26 | let chunk1 = u8x32::from_array(b[32..64].try_into().unwrap()); 27 | let chunk0_shifted = chunk0 >> 1; 28 | let chunk1_shifted = chunk1 >> 1; 29 | let masked0 = chunk0_shifted & u8x32::splat(0x03); 30 | let masked1 = chunk1_shifted & u8x32::splat(0x03); 31 | for (i, code) in CODES.iter().enumerate() { 32 | let eq0 = masked0.simd_eq(*code); 33 | let eq1 = masked1.simd_eq(*code); 34 | let low = eq0.to_bitmask(); 35 | let high = eq1.to_bitmask(); 36 | *out.get_unchecked_mut(i) = (high << 32) | low; 37 | } 38 | }; 39 | } 40 | 41 | #[inline(always)] 42 | fn eq(ca: &u8, cb: &[u64; 4]) -> u64 { 43 | unsafe { *cb.get_unchecked(*ca as usize) } 44 | } 45 | 46 | #[inline(always)] 47 | fn is_match(char1: u8, char2: u8) -> bool { 48 | (char1 | 0x20) == (char2 | 0x20) 49 | } 50 | 51 | #[inline(always)] 52 | fn alloc_out() -> Self::B { 53 | [0; 4] 54 | } 55 | 56 | #[inline(always)] 57 | fn n_bases(&self) -> usize { 58 | self.bases.len() 59 | } 60 | 61 | #[inline(always)] 62 | fn valid_seq(&self, seq: &[u8]) -> bool { 63 | // we’ll do 32-byte chunks 64 | const LANES: usize = 32; 65 | type V = Simd; 66 | 67 | let len = seq.len(); 68 | let mut i = 0; 69 | 70 | // Split in 32-byte chunks (u8 * 32) 71 | while i + LANES <= len { 72 | let chunk = V::from_slice(&seq[i..i + LANES]); 73 | // lowercase, setting 5th bit, might transform some ascii to 74 | // other ascii but that's fine 75 | let lowered = chunk | V::splat(0x20); 76 | let is_a = lowered.simd_eq(V::splat(b'a')); 77 | let is_c = lowered.simd_eq(V::splat(b'c')); 78 | let is_g = lowered.simd_eq(V::splat(b'g')); 79 | let is_t = lowered.simd_eq(V::splat(b't')); 80 | let ok = is_a | is_c | is_g | is_t; 81 | if !ok.all() { 82 | return false; 83 | } 84 | 85 | i += LANES; 86 | } 87 | 88 | // Whatever non 32 tail is left 89 | while i < len { 90 | println!("Tail check"); 91 | let c = seq[i] | 0x20; // lowercase 92 | if c != b'a' && c != b'c' && c != b'g' && c != b't' { 93 | return false; 94 | } 95 | i += 1; 96 | } 97 | 98 | true 99 | } 100 | 101 | fn reverse_complement(query: &[u8]) -> Vec { 102 | query.iter().rev().map(|&c| RC[c as usize]).collect() 103 | } 104 | 105 | fn complement(query: &[u8]) -> Vec { 106 | query.iter().map(|&c| RC[c as usize]).collect() 107 | } 108 | } 109 | 110 | // Same order as iupac 111 | const CODES: [u8x32; 4] = [ 112 | u8x32::splat(0u8), // A 113 | u8x32::splat(1u8), // C 114 | u8x32::splat(2u8), // T 115 | u8x32::splat(3u8), // G 116 | ]; 117 | 118 | const RC: [u8; 256] = { 119 | let mut rc = [0; 256]; 120 | let mut i = 0; 121 | while i < 256 { 122 | rc[i] = i as u8; 123 | i += 1; 124 | } 125 | rc[b'A' as usize] = b'T'; 126 | rc[b'C' as usize] = b'G'; 127 | rc[b'T' as usize] = b'A'; 128 | rc[b'G' as usize] = b'C'; 129 | rc 130 | }; 131 | 132 | #[cfg(test)] 133 | mod test { 134 | use super::*; 135 | 136 | #[test] 137 | fn test_dna_is_match() { 138 | assert!(Dna::is_match(b'A', b'A')); 139 | assert!(Dna::is_match(b'c', b'c')); 140 | assert!(Dna::is_match(b'C', b'c')); 141 | assert!(Dna::is_match(b'c', b'C')); 142 | assert!(!Dna::is_match(b'X', b'A')); 143 | assert!(!Dna::is_match(b'X', b'A')); 144 | assert!(!Dna::is_match(b'X', b'T')); 145 | assert!(!Dna::is_match(b'X', b'G')); 146 | assert!(!Dna::is_match(b'X', b'C')); 147 | assert!(!Dna::is_match(b'A', b'N')); 148 | assert!(!Dna::is_match(b'C', b't')); 149 | } 150 | 151 | fn get_match_positions(out: &[u64; 4]) -> Vec> { 152 | let mut positions = vec![vec![]; 4]; 153 | for (i, _) in CODES.iter().enumerate() { 154 | let bits = out[i]; 155 | for j in 0..64 { 156 | if (bits & (1u64 << j)) != 0 { 157 | positions[i].push(j); 158 | } 159 | } 160 | } 161 | positions 162 | } 163 | 164 | #[test] 165 | fn test_dna_u64_search() { 166 | let mut seq = [b'G'; 64]; 167 | seq[0] = b'A'; 168 | seq[1] = b'A'; 169 | seq[63] = b'C'; 170 | let mut out = [0u64; 4]; 171 | { 172 | let seq: &[u8; 64] = &seq; 173 | let out: &mut [u64; 4] = &mut out; 174 | unsafe { 175 | let chunk0 = u8x32::from_array(seq[0..32].try_into().unwrap()); 176 | let chunk1 = u8x32::from_array(seq[32..64].try_into().unwrap()); 177 | let chunk0_shifted = chunk0 >> 1; 178 | let chunk1_shifted = chunk1 >> 1; 179 | let masked0 = chunk0_shifted & u8x32::splat(0x03); 180 | let masked1 = chunk1_shifted & u8x32::splat(0x03); 181 | for (i, code) in CODES.iter().enumerate() { 182 | let eq0 = masked0.simd_eq(*code); 183 | let eq1 = masked1.simd_eq(*code); 184 | let low = eq0.to_bitmask(); 185 | let high = eq1.to_bitmask(); 186 | *out.get_unchecked_mut(i) = (high << 32) | low; 187 | } 188 | } 189 | }; // A, C, T, G 190 | let positions = get_match_positions(&out); 191 | assert_eq!(positions[0], vec![0, 1]); 192 | assert_eq!(positions[1], vec![63]); 193 | assert_eq!(positions[2], Vec::::new()); 194 | assert_eq!(positions[3], (2..63).collect::>()); 195 | } 196 | 197 | #[test] 198 | fn test_dna_u64_case_insensitive() { 199 | let mut seq = [b'G'; 64]; 200 | seq[0] = b'a'; 201 | seq[1] = b'A'; 202 | let mut out = [0u64; 4]; 203 | { 204 | let seq: &[u8; 64] = &seq; 205 | let out: &mut [u64; 4] = &mut out; 206 | unsafe { 207 | let chunk0 = u8x32::from_array(seq[0..32].try_into().unwrap()); 208 | let chunk1 = u8x32::from_array(seq[32..64].try_into().unwrap()); 209 | let chunk0_shifted = chunk0 >> 1; 210 | let chunk1_shifted = chunk1 >> 1; 211 | let masked0 = chunk0_shifted & u8x32::splat(0x03); 212 | let masked1 = chunk1_shifted & u8x32::splat(0x03); 213 | for (i, code) in CODES.iter().enumerate() { 214 | let eq0 = masked0.simd_eq(*code); 215 | let eq1 = masked1.simd_eq(*code); 216 | let low = eq0.to_bitmask(); 217 | let high = eq1.to_bitmask(); 218 | *out.get_unchecked_mut(i) = (high << 32) | low; 219 | } 220 | } 221 | }; 222 | let positions = get_match_positions(&out); 223 | assert_eq!(positions[0], vec![0, 1]); 224 | } 225 | 226 | fn non_actg_bytes(n: isize) -> Vec { 227 | // Create a vector of all bytes that are not DNA bases 228 | let non_dna_chars = (0u8..=255) 229 | .filter(|&b| !matches!(b.to_ascii_uppercase(), b'A' | b'C' | b'G' | b'T')) 230 | .collect::>(); 231 | 232 | if n == -1 { 233 | // return all (unqiue) non dna bytes 234 | non_dna_chars 235 | } else { 236 | let mut seq = vec![0u8; n as usize]; 237 | for i in 0..n as usize { 238 | seq[i] = non_dna_chars[rand::random_range(0..non_dna_chars.len())]; 239 | } 240 | seq 241 | } 242 | } 243 | 244 | #[test] 245 | fn test_dna_valid_seq_empty() { 246 | let dna = Dna::encode_query(b"ACGT").0; 247 | assert!(dna.valid_seq(b"")); // Not sure if this should be valid or not 248 | } 249 | 250 | #[test] 251 | fn test_dna_valid_seq() { 252 | // scalar, dna (as <32); valid 253 | let dna = Dna::encode_query(b"ACGT").0; 254 | assert!(dna.valid_seq(b"ACGTactg")); 255 | 256 | // scalar, non-dna; invalid 257 | // -1 is all ascii which are not dna 258 | let non_actg = non_actg_bytes(-1); 259 | assert!(!dna.valid_seq(&non_actg)); 260 | 261 | // 32-byte chunks, dna; valid 262 | let seq = [b'A', b'C', b'T', b'G', b'a', b'c', b't', b'g'].repeat(32); 263 | assert!(dna.valid_seq(&seq)); 264 | 265 | // 32-byte chunks, non-dna; invalid 266 | let seq = non_actg_bytes(256); 267 | assert!(!dna.valid_seq(&seq)); 268 | } 269 | } 270 | -------------------------------------------------------------------------------- /src/profiles/iupac.rs: -------------------------------------------------------------------------------- 1 | use crate::profiles::Profile; 2 | use std::{ 3 | arch::x86_64::*, 4 | mem::transmute, 5 | simd::{cmp::SimdPartialOrd, u8x32}, 6 | }; 7 | 8 | #[derive(Clone, Debug)] 9 | pub struct Iupac { 10 | bases: Vec, 11 | } 12 | 13 | impl Profile for Iupac { 14 | type A = usize; 15 | type B = [u64; 16]; 16 | 17 | fn encode_query(a: &[u8]) -> (Self, Vec) { 18 | let mut bases = vec![b'A', b'C', b'T', b'G']; 19 | let mut query_profile = Vec::with_capacity(a.len()); 20 | for &c in a { 21 | if !bases.contains(&c) { 22 | bases.push(c); 23 | } 24 | query_profile.push(bases.iter().position(|&x| x == c).unwrap()); 25 | } 26 | (Iupac { bases }, query_profile) 27 | } 28 | 29 | /// NOTE: `out` should be initialized using `self.alloc_out()`. 30 | #[inline(always)] 31 | fn encode_ref(&self, b: &[u8; 64], out: &mut Self::B) { 32 | assert!(self.bases.len() <= out.len()); 33 | let extra_bases: &[u8] = &self.bases[4..]; 34 | unsafe { 35 | let zero = u8x32::splat(0); 36 | let mask4 = u8x32::splat(0x0F); 37 | let tbl256 = u8x32::from_array(transmute([PACKED_NIBBLES, PACKED_NIBBLES])); 38 | 39 | let chunk0 = u8x32::from_array(b[0..32].try_into().unwrap()); 40 | let chunk1 = u8x32::from_array(b[32..64].try_into().unwrap()); 41 | 42 | let idx5_0 = chunk0 & u8x32::splat(0x1F); 43 | let idx5_1 = chunk1 & u8x32::splat(0x1F); 44 | let low4_0 = idx5_0 & mask4; 45 | let low4_1 = idx5_1 & mask4; 46 | 47 | let is_hi_0 = idx5_0.simd_ge(u8x32::splat(15)); 48 | let is_hi_1 = idx5_1.simd_ge(u8x32::splat(15)); 49 | 50 | let shuffled0: u8x32 = 51 | transmute(_mm256_shuffle_epi8(transmute(tbl256), transmute(low4_0))); 52 | let shuffled1: u8x32 = 53 | transmute(_mm256_shuffle_epi8(transmute(tbl256), transmute(low4_1))); 54 | 55 | let lo_nib0 = shuffled0 & mask4; 56 | let lo_nib1 = shuffled1 & mask4; 57 | 58 | let hi_nib0 = shuffled0 >> 4; 59 | let hi_nib1 = shuffled1 >> 4; 60 | 61 | let nib0 = is_hi_0.select(hi_nib0, lo_nib0); 62 | let nib1 = is_hi_1.select(hi_nib1, lo_nib1); 63 | 64 | for (i, &base) in [b'A', b'C', b'T', b'G'].iter().enumerate() { 65 | let m = u8x32::splat(get_encoded(base)); 66 | 67 | let match0 = (nib0 & m).simd_gt(zero); 68 | let match1 = (nib1 & m).simd_gt(zero); 69 | 70 | let low = match0.to_bitmask() as u64; 71 | let high = match1.to_bitmask() as u64; 72 | 73 | *out.get_unchecked_mut(i) = (high << 32) | low; 74 | } 75 | 76 | for (i, &base) in extra_bases.iter().enumerate() { 77 | let m = u8x32::splat(get_encoded(base)); 78 | 79 | let match0 = (nib0 & m).simd_gt(zero); 80 | let match1 = (nib1 & m).simd_gt(zero); 81 | 82 | let low = match0.to_bitmask() as u64; 83 | let high = match1.to_bitmask() as u64; 84 | 85 | *out.get_unchecked_mut(i + 4) = (high << 32) | low; 86 | } 87 | } 88 | } 89 | 90 | #[inline(always)] 91 | fn eq(ca: &usize, cb: &[u64; 16]) -> u64 { 92 | unsafe { *cb.get_unchecked(*ca) } 93 | } 94 | 95 | #[inline(always)] 96 | fn is_match(char1: u8, char2: u8) -> bool { 97 | (get_encoded(char1) & get_encoded(char2)) > 0 98 | } 99 | 100 | #[inline(always)] 101 | fn alloc_out() -> Self::B { 102 | [0; 16] //FIME: is this always valid? 103 | } 104 | 105 | #[inline(always)] 106 | fn n_bases(&self) -> usize { 107 | self.bases.len() 108 | } 109 | 110 | #[inline(always)] 111 | fn valid_seq(&self, seq: &[u8]) -> bool { 112 | const LANES: usize = 32; 113 | type V = u8x32; 114 | let len = seq.len(); 115 | let mut i = 0; 116 | unsafe { 117 | let mask4 = V::splat(0x0F); 118 | let tbl256 = V::from_array(transmute([ 119 | PACKED_NIBBLES_INDICATOR, 120 | PACKED_NIBBLES_INDICATOR, 121 | ])); 122 | while i + LANES <= len { 123 | let chunk = V::from_slice(&seq[i..i + LANES]); 124 | let upper = chunk & V::splat(!0x20); 125 | 126 | // Check if >= '@' (64) (=b'A'-1) and < 128. 127 | let in_range = upper.simd_ge(V::splat(64)) & upper.simd_lt(V::splat(128)); 128 | if !in_range.all() { 129 | return false; 130 | } 131 | 132 | let idx5 = upper & V::splat(0x1F); 133 | let low4 = idx5 & mask4; 134 | let is_hi = idx5.simd_ge(V::splat(16)); 135 | let shuffled: V = 136 | transmute(_mm256_shuffle_epi8(transmute(tbl256), transmute(low4))); 137 | let lo_nib = shuffled & mask4; 138 | let hi_nib = shuffled >> 4; 139 | let nib = is_hi.select(hi_nib, lo_nib); 140 | 141 | if !nib.simd_gt(V::splat(0)).all() { 142 | return false; 143 | } 144 | 145 | i += LANES; 146 | } 147 | } 148 | 149 | // Scalar for rest 150 | while i < len { 151 | let c = seq[i] & !0x20; 152 | if c <= b'@' || c >= b'Z' || IUPAC_CODE[(c & 0x1F) as usize] == 255 { 153 | return false; 154 | } 155 | i += 1; 156 | } 157 | 158 | true 159 | } 160 | 161 | #[inline(always)] 162 | fn reverse_complement(seq: &[u8]) -> Vec { 163 | seq.iter().rev().map(|&c| RC[c as usize]).collect() 164 | } 165 | 166 | // TODO: Implement this using SIMD 167 | #[inline(always)] 168 | fn complement(seq: &[u8]) -> Vec { 169 | seq.iter().map(|&c| RC[c as usize]).collect() 170 | } 171 | } 172 | 173 | const RC: [u8; 256] = { 174 | let mut rc = [0; 256]; 175 | let mut i = 0; 176 | while i < 256 { 177 | rc[i] = i as u8; 178 | i += 1; 179 | } 180 | // Standard bases 181 | rc[b'A' as usize] = b'T'; 182 | rc[b'C' as usize] = b'G'; 183 | rc[b'T' as usize] = b'A'; 184 | rc[b'G' as usize] = b'C'; 185 | rc[b'a' as usize] = b't'; 186 | rc[b'c' as usize] = b'g'; 187 | rc[b't' as usize] = b'a'; 188 | rc[b'g' as usize] = b'c'; 189 | // IUPAC ambiguity codes 190 | rc[b'R' as usize] = b'Y'; // A|G -> T|C 191 | rc[b'Y' as usize] = b'R'; // C|T -> G|A 192 | rc[b'S' as usize] = b'S'; // G|C -> C|G 193 | rc[b'W' as usize] = b'W'; // A|T -> T|A 194 | rc[b'K' as usize] = b'M'; // G|T -> C|A 195 | rc[b'M' as usize] = b'K'; // A|C -> T|G 196 | rc[b'B' as usize] = b'V'; // C|G|T -> G|C|A 197 | rc[b'D' as usize] = b'H'; // A|G|T -> T|C|A 198 | rc[b'H' as usize] = b'D'; // A|C|T -> T|G|A 199 | rc[b'V' as usize] = b'B'; // A|C|G -> T|G|C 200 | rc[b'N' as usize] = b'N'; // A|C|G|T -> T|G|C|A 201 | rc[b'X' as usize] = b'X'; 202 | // Lowercase versions 203 | rc[b'r' as usize] = b'y'; 204 | rc[b'y' as usize] = b'r'; 205 | rc[b's' as usize] = b's'; 206 | rc[b'w' as usize] = b'w'; 207 | rc[b'k' as usize] = b'm'; 208 | rc[b'm' as usize] = b'k'; 209 | rc[b'b' as usize] = b'v'; 210 | rc[b'd' as usize] = b'h'; 211 | rc[b'h' as usize] = b'd'; 212 | rc[b'v' as usize] = b'b'; 213 | rc[b'n' as usize] = b'n'; 214 | rc[b'x' as usize] = b'x'; 215 | rc 216 | }; 217 | 218 | #[rustfmt::skip] 219 | const IUPAC_CODE: [u8; 32] = { 220 | let mut t = [255u8; 32]; 221 | // Standard bases 222 | // Map ACGT -> [0,1,3,2], like packed_seq does. 223 | const A: u8 = 1 << 0; 224 | const C: u8 = 1 << 1; 225 | const T: u8 = 1 << 2; 226 | const G: u8 = 1 << 3; 227 | 228 | // Map common chars. 229 | // Lower case has the same last 5 bits as upper case. 230 | // (Thanks ASCII :) 231 | t[b'A' as usize & 0x1F] = A; 232 | t[b'C' as usize & 0x1F] = C; 233 | t[b'T' as usize & 0x1F] = T; 234 | t[b'U' as usize & 0x1F] = T; 235 | t[b'G' as usize & 0x1F] = G; 236 | t[b'N' as usize & 0x1F] = A|C|T|G; 237 | 238 | // IUPAC ambiguity codes 239 | // https://www.bioinformatics.org/sms/iupac.html 240 | t[b'R' as usize & 0x1F] = A|G; 241 | t[b'Y' as usize & 0x1F] = C|T; 242 | t[b'S' as usize & 0x1F] = G|C; 243 | t[b'W' as usize & 0x1F] = A|T; 244 | t[b'K' as usize & 0x1F] = G|T; 245 | t[b'M' as usize & 0x1F] = A|C; 246 | t[b'B' as usize & 0x1F] = C|G|T; 247 | t[b'D' as usize & 0x1F] = A|G|T; 248 | t[b'H' as usize & 0x1F] = A|C|T; 249 | t[b'V' as usize & 0x1F] = A|C|G; 250 | 251 | // Gap/unknown 252 | t[b'X' as usize & 0x1F] = 0; 253 | 254 | t 255 | }; 256 | 257 | #[inline(always)] 258 | pub fn get_encoded(c: u8) -> u8 { 259 | IUPAC_CODE[(c & 0x1F) as usize] 260 | } 261 | 262 | const PACKED_NIBBLES: [u8; 16] = { 263 | let mut p = [0u8; 16]; 264 | let mut i = 0; 265 | while i < 16 { 266 | let lo = IUPAC_CODE[i] & 0x0F; 267 | let hi = IUPAC_CODE[i + 16] & 0x0F; 268 | // packed 8 bit of low nibbles(0-3) and high nibbles(4-7) 269 | p[i] = (hi << 4) | lo; 270 | i += 1; 271 | } 272 | p 273 | }; 274 | 275 | /// Nibbles are 1111 for IUPAC chars, and 0000 for non-IUPAC chars. 276 | const PACKED_NIBBLES_INDICATOR: [u8; 16] = { 277 | let mut p = [0u8; 16]; 278 | let mut i = 0; 279 | while i < 16 { 280 | let lo = if IUPAC_CODE[i] < 255 { 0b1111 } else { 0 }; 281 | let hi = if IUPAC_CODE[i + 16] < 255 { 0b1111 } else { 0 }; 282 | // packed 8 bit of low nibbles(0-3) and high nibbles(4-7) 283 | p[i] = (hi << 4) | lo; 284 | i += 1; 285 | } 286 | p 287 | }; 288 | 289 | #[cfg(test)] 290 | mod test { 291 | use super::*; 292 | 293 | #[test] 294 | fn test_iupac_is_match() { 295 | assert!(Iupac::is_match(b'a', b'A')); 296 | assert!(Iupac::is_match(b'C', b'C')); 297 | assert!(Iupac::is_match(b'T', b't')); 298 | assert!(Iupac::is_match(b'G', b'G')); 299 | assert!(Iupac::is_match(b'y', b'Y')); 300 | assert!(Iupac::is_match(b'A', b'N')); 301 | assert!(Iupac::is_match(b'C', b'Y')); 302 | } 303 | 304 | fn get_match_positions_u64(result: &[u64]) -> Vec> { 305 | result 306 | .iter() 307 | .filter_map(|&base_result| { 308 | if base_result == 0 { 309 | None 310 | } else { 311 | let positions: Vec = (0..64) 312 | .filter(|&pos| (base_result & (1 << pos)) != 0) 313 | .collect(); 314 | Some(positions) 315 | } 316 | }) 317 | .collect() 318 | } 319 | 320 | #[test] 321 | fn test_just_atgc() { 322 | let mut seq = [b'g'; 64]; 323 | seq[0] = b'a'; 324 | seq[1] = b'y'; // C or T 325 | let profiler = Iupac::encode_query(b"").0; 326 | let mut result = Iupac::alloc_out(); 327 | profiler.encode_ref(&seq, &mut result); 328 | let positions = get_match_positions_u64(&result); 329 | let a_positions = positions[0].clone(); 330 | let c_positions = positions[1].clone(); 331 | let t_positions = positions[2].clone(); 332 | let g_positions = positions[3].clone(); 333 | assert_eq!(a_positions, vec![0]); 334 | assert_eq!(t_positions, vec![1]); 335 | assert_eq!(g_positions, (2..64).collect::>()); 336 | assert_eq!(c_positions, vec![1]); 337 | } 338 | 339 | #[test] 340 | fn test_extra_bases_ny() { 341 | let mut seq = [b'g'; 64]; 342 | seq[0] = b'a'; // Does not match Y 343 | seq[1] = b'y'; // Matches Y 344 | seq[2] = b'C'; // Matches Y 345 | let profiler = Iupac::encode_query(b"NY").0; 346 | let mut result = Iupac::alloc_out(); 347 | profiler.encode_ref(&seq, &mut result); 348 | let positions = get_match_positions_u64(&result); 349 | let n_positions = positions[4].clone(); 350 | let y_positions = positions[5].clone(); 351 | // N matches all positions 352 | assert_eq!(n_positions, (0..64).collect::>()); 353 | // Y matches 1,2 354 | assert_eq!(y_positions, vec![1, 2]); 355 | } 356 | 357 | #[test] 358 | fn test_just_atgc_64() { 359 | let mut seq = [b'g'; 64]; 360 | seq[0] = b'a'; 361 | seq[1] = b'y'; // C or T 362 | seq[34] = b'y'; // C or T 363 | let profiler = Iupac::encode_query(b"").0; 364 | let mut result = Iupac::alloc_out(); 365 | profiler.encode_ref(&seq, &mut result); 366 | let positions = get_match_positions_u64(&result); 367 | let a_positions = positions[0].clone(); 368 | let c_positions = positions[1].clone(); 369 | let t_positions = positions[2].clone(); 370 | let g_positions = positions[3].clone(); 371 | assert_eq!(a_positions, vec![0]); 372 | assert_eq!(t_positions, vec![1, 34]); 373 | assert_eq!( 374 | g_positions, 375 | [ 376 | &(2..34).collect::>()[..], // 34 not inclusive 377 | &(35..64).collect::>()[..] 378 | ] 379 | .concat() 380 | ); 381 | assert_eq!(c_positions, vec![1, 34]); 382 | } 383 | 384 | #[test] 385 | fn test_extra_bases_ny_64() { 386 | let mut seq = [b'g'; 64]; 387 | seq[0] = b'a'; // Does not match Y 388 | seq[1] = b'y'; // Matches Y 389 | seq[2] = b'C'; // Matches Y 390 | seq[50] = b'y'; // Matches Y 391 | seq[63] = b'y'; // Matches Y 392 | let profiler = Iupac::encode_query(b"NY").0; 393 | let mut result = Iupac::alloc_out(); 394 | profiler.encode_ref(&seq, &mut result); 395 | let positions = get_match_positions_u64(&result); 396 | let n_positions = positions[4].clone(); 397 | let y_positions = positions[5].clone(); 398 | // N matches all positions 399 | assert_eq!(n_positions, (0..64).collect::>()); 400 | assert_eq!(y_positions, vec![1, 2, 50, 63]); 401 | } 402 | 403 | #[test] 404 | fn test_iupac_u64_case_insensitive() { 405 | let mut seq = [b'G'; 64]; 406 | seq[0] = b'a'; 407 | seq[1] = b'A'; 408 | seq[3] = b'r'; 409 | seq[4] = b'W'; 410 | let profiler = Iupac::encode_query(b"").0; 411 | let mut result = Iupac::alloc_out(); 412 | profiler.encode_ref(&seq, &mut result); 413 | let positions = get_match_positions_u64(&result); 414 | assert_eq!(positions[0], vec![0, 1, 3, 4]); 415 | } 416 | 417 | #[test] 418 | fn test_iupac_valid_seq_all() { 419 | let iupac = Iupac::encode_query(b"ACGT").0; 420 | let all_codes = b"ACTUGNRYSWKMBDHVX"; 421 | for &c in all_codes { 422 | assert!(iupac.valid_seq(&[c])); 423 | assert!(iupac.valid_seq(&[c.to_ascii_lowercase()])); 424 | } 425 | // Mixed case should also be valid 426 | assert!(iupac.valid_seq(b"AaCcTtUuGgNnRrYySsWwKkMmBbDdHhVvXx")); 427 | } 428 | 429 | #[test] 430 | fn test_iupac_different_lengths() { 431 | let iupac = Iupac::encode_query(b"ACGT").0; 432 | let valid_codes = b"ACTUGNRYSWKMBDHVX"; 433 | for len in [1, 31, 32, 33, 63, 64, 65, 127, 128, 129] { 434 | let seq = valid_codes 435 | .iter() 436 | .cycle() 437 | .take(len) 438 | .copied() 439 | .collect::>(); 440 | assert!(iupac.valid_seq(&seq), "Failed at length {}", len); 441 | } 442 | } 443 | 444 | #[test] 445 | fn test_iupac_valid_seq_empty() { 446 | let iupac = Iupac::encode_query(b"ACGT").0; 447 | assert!(iupac.valid_seq(b"")); // Not sure if this should be valid or not 448 | } 449 | 450 | #[test] 451 | fn test_invalid_iupac_codes() { 452 | let iupac = Iupac::encode_query(b"ACGT").0; 453 | // Test invalid characters 454 | let invalid_cases = [ 455 | // Below 'A' 456 | b"@CGT", b"?CGT", b"1CGT", b" CGT", // Above 'X' 457 | b"ACGZ", b"ACG[", b"ACG{", b"ACG~", 458 | // Control characters, \n, \t, \r, etc 459 | b"ACG\n", b"ACG\t", b"ACG\r", b"\0CGT", 460 | ]; 461 | 462 | for case in invalid_cases { 463 | assert!(!iupac.valid_seq(case)); 464 | } 465 | } 466 | 467 | #[test] 468 | fn test_iupac_boundary_chars() { 469 | let iupac = Iupac::encode_query(b"ACGT").0; 470 | 471 | // Test exact boundaries 472 | assert!(!iupac.valid_seq(b"@")); // 64 - invalid 473 | assert!(iupac.valid_seq(b"A")); // 65 - valid 474 | assert!(iupac.valid_seq(b"X")); // 88 - valid 475 | assert!(iupac.valid_seq(b"Y")); // 89 - valid 476 | assert!(!iupac.valid_seq(b"Z")); // 90 - invalid 477 | 478 | // Same but in 32 bytes to trigger SIMD 479 | let mut seq = b"ACGT".repeat(8); // 32 bytes 480 | seq[31] = b'Y'; 481 | assert!(iupac.valid_seq(&seq)); 482 | seq[31] = b'Z'; 483 | assert!(!iupac.valid_seq(&seq)); 484 | } 485 | } 486 | -------------------------------------------------------------------------------- /src/profiles/profile.rs: -------------------------------------------------------------------------------- 1 | use std::ops::{Index, IndexMut}; 2 | 3 | pub trait Profile: Clone + std::fmt::Debug { 4 | /// Encoding for a single character in `a`. 5 | type A; 6 | /// Encoding for 64 characters in `b`. 7 | type B: Index + IndexMut + Copy; 8 | fn encode_query(a: &[u8]) -> (Self, Vec); 9 | fn encode_ref(&self, b: &[u8; 64], out: &mut Self::B); 10 | /// Given the encoding of an `a` and the encoding for 64 `b`s, 11 | /// return a bitmask of which characters of `b` equal the corresponding character of `a`. 12 | fn eq(ca: &Self::A, cb: &Self::B) -> u64; 13 | /// Allocate a buffer of at most n_bases in search (and reuse) 14 | fn alloc_out() -> Self::B; 15 | fn n_bases(&self) -> usize; 16 | /// Verify whether a seqeunce matching the profile characters 17 | fn valid_seq(&self, seq: &[u8]) -> bool; 18 | /// Return true if the two characters are a match accroding to profile 19 | fn is_match(char1: u8, char2: u8) -> bool; 20 | /// Reverse-complement the input string. 21 | fn reverse_complement(_query: &[u8]) -> Vec { 22 | unimplemented!( 23 | "Profile::reverse_complement not implemented for {:?}", 24 | std::any::type_name::() 25 | ); 26 | } 27 | fn complement(_query: &[u8]) -> Vec { 28 | unimplemented!( 29 | "Profile::reverse_complement not implemented for {:?}", 30 | std::any::type_name::() 31 | ); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/trace.rs: -------------------------------------------------------------------------------- 1 | use crate::bitpacking::compute_block; 2 | use crate::delta_encoding::V; 3 | use crate::delta_encoding::VEncoding; 4 | use crate::profiles::Profile; 5 | use crate::search::init_deltas_for_overshoot_all_lanes; 6 | use crate::search::init_deltas_for_overshoot_scalar; 7 | use pa_types::Cigar; 8 | use pa_types::Cost; 9 | use pa_types::I; 10 | use pa_types::Pos; 11 | 12 | use crate::LANES; 13 | use crate::S; 14 | use crate::bitpacking::compute_block_simd; 15 | use crate::search::{Match, Strand}; 16 | use std::array::from_fn; 17 | 18 | #[derive(Debug, Clone, Default)] 19 | pub struct CostMatrix { 20 | /// Query length. 21 | q: usize, 22 | deltas: Vec>, 23 | pub(crate) alpha: Option, 24 | } 25 | 26 | impl CostMatrix { 27 | /// i: text idx 28 | /// j: query idx 29 | pub fn get(&self, i: usize, j: usize) -> Cost { 30 | let mut s = if let Some(alpha) = self.alpha { 31 | (j as f32 * alpha).floor() as Cost 32 | } else { 33 | j as Cost 34 | }; 35 | for idx in (j..j + i / 64 * (self.q + 1)).step_by(self.q + 1) { 36 | s += self.deltas[idx].value(); 37 | } 38 | if i % 64 != 0 { 39 | s += self.deltas[j + i / 64 * (self.q + 1)].value_of_prefix(i as I % 64); 40 | } 41 | s 42 | } 43 | } 44 | 45 | /// Compute the full n*m matrix corresponding to the query * text alignment. 46 | /// TODO: SIMD variant that takes 1 query, and LANES text slices of the same length. 47 | #[allow(unused)] // FIXME 48 | pub fn fill( 49 | query: &[u8], 50 | text: &[u8], 51 | len: usize, 52 | m: &mut CostMatrix, 53 | alpha: Option, 54 | ) { 55 | m.q = query.len(); 56 | m.deltas.clear(); 57 | m.deltas.reserve((m.q + 1) * len.div_ceil(64)); 58 | let (profiler, query_profile) = P::encode_query(query); 59 | let mut h = vec![(1, 0); query.len()]; 60 | 61 | init_deltas_for_overshoot_scalar(&mut h, alpha); 62 | 63 | let mut text_profile = P::alloc_out(); 64 | 65 | let num_chunks = len.div_ceil(64); 66 | 67 | // Process chunks of 64 chars, that end exactly at the end of the text. 68 | for i in 0..num_chunks { 69 | let mut slice: [u8; 64] = [b'N'; 64]; 70 | let block = text.get(64 * i..).unwrap_or_default(); 71 | let block = block.get(..64).unwrap_or(block); 72 | slice[..block.len()].copy_from_slice(block); 73 | profiler.encode_ref(&slice, &mut text_profile); 74 | 75 | let mut v = V::::zero(); 76 | 77 | m.deltas.push(v); 78 | for j in 0..query.len() { 79 | compute_block::(&mut h[j], &mut v, &query_profile[j], &text_profile); 80 | m.deltas.push(v); 81 | } 82 | } 83 | } 84 | 85 | pub fn simd_fill( 86 | query: &[u8], 87 | texts: &[&[u8]], 88 | max_len: usize, 89 | m: &mut [CostMatrix; LANES], 90 | alpha: Option, 91 | ) { 92 | assert!(texts.len() <= LANES); 93 | let lanes = texts.len(); 94 | 95 | let (profiler, query_profile) = P::encode_query(query); 96 | let num_chunks = max_len.div_ceil(64); 97 | 98 | for m in &mut *m { 99 | m.q = query.len(); 100 | m.deltas.clear(); 101 | m.deltas.reserve((m.q + 1) * num_chunks); 102 | } 103 | 104 | type Base = u64; 105 | type VV = V; 106 | 107 | let mut hp: Vec = Vec::with_capacity(query.len()); 108 | let mut hm: Vec = Vec::with_capacity(query.len()); 109 | hp.resize(query.len(), S::splat(1)); 110 | hm.resize(query.len(), S::splat(0)); 111 | 112 | // NOTE: It's OK to always fill the left with 010101, even if it's not 113 | // actually the left of the text, because in that case the left column can't 114 | // be included in the alignment anyway. (The text has length q+k in that case.) 115 | init_deltas_for_overshoot_all_lanes(&mut hp, alpha); 116 | 117 | let mut text_profile: [_; LANES] = from_fn(|_| P::alloc_out()); 118 | 119 | for i in 0..num_chunks { 120 | for lane in 0..lanes { 121 | let mut slice = [b'N'; 64]; 122 | let block = texts[lane].get(64 * i..).unwrap_or_default(); 123 | let block = block.get(..64).unwrap_or(block); 124 | slice[..block.len()].copy_from_slice(block); 125 | profiler.encode_ref(&slice, &mut text_profile[lane]); 126 | } 127 | let mut vp = S::splat(0); 128 | let mut vm = S::splat(0); 129 | for lane in 0..lanes { 130 | let v = >::from(vp[lane], vm[lane]); 131 | m[lane].deltas.push(v); 132 | } 133 | // FIXME: for large queries, use the SIMD within this single block, rather than spreading it thin over LANES 'matches' when there is only a single candidate match. 134 | for j in 0..query.len() { 135 | let eq = from_fn(|lane| P::eq(&query_profile[j], &text_profile[lane])).into(); 136 | compute_block_simd(&mut hp[j], &mut hm[j], &mut vp, &mut vm, eq); 137 | for lane in 0..lanes { 138 | let v = >::from(vp[lane], vm[lane]); 139 | m[lane].deltas.push(v); 140 | } 141 | } 142 | } 143 | 144 | for lane in 0..lanes { 145 | assert_eq!(m[lane].deltas.len(), num_chunks * (m[lane].q + 1)); 146 | } 147 | } 148 | 149 | pub fn get_trace( 150 | query: &[u8], 151 | text_offset: usize, 152 | end_pos: usize, 153 | text: &[u8], 154 | m: &CostMatrix, 155 | alpha: Option, 156 | ) -> Match { 157 | let mut trace = Vec::new(); 158 | let mut j = query.len(); 159 | let mut i = end_pos - text_offset; 160 | 161 | let cost = |j: usize, i: usize| -> Cost { m.get(i, j) }; 162 | 163 | // remaining dist to (i,j) 164 | let mut g = cost(j, i); 165 | let mut total_cost = g; 166 | 167 | let mut cigar = Cigar::default(); 168 | 169 | // Overshoot at end. 170 | if i > text.len() { 171 | let overshoot = i - text.len(); 172 | let overshoot_cost = (overshoot as f32 * alpha.unwrap()).floor() as Cost; 173 | 174 | total_cost += overshoot_cost; 175 | i -= overshoot; 176 | j -= overshoot; 177 | } 178 | 179 | loop { 180 | // eprintln!("({i}, {j}) {g}"); 181 | trace.push((j, text_offset + i)); 182 | 183 | if j == 0 { 184 | break; 185 | } 186 | 187 | if i == 0 188 | && let Some(alpha) = alpha 189 | { 190 | // Overshoot at start. 191 | let overshoot_cost = (j as f32 * alpha).floor() as Cost; 192 | g -= overshoot_cost; 193 | break; 194 | } 195 | 196 | // Match 197 | if i > 0 && cost(j - 1, i - 1) == g && P::is_match(query[j - 1], text[i - 1]) { 198 | cigar.push(pa_types::CigarOp::Match); 199 | j -= 1; 200 | i -= 1; 201 | continue; 202 | } 203 | // We make some kind of mutation. 204 | g -= 1; 205 | 206 | // Insert text char. 207 | if i > 0 && cost(j, i - 1) == g { 208 | cigar.push(pa_types::CigarOp::Ins); 209 | i -= 1; 210 | continue; 211 | } 212 | // Mismatch. 213 | if i > 0 && cost(j - 1, i - 1) == g { 214 | cigar.push(pa_types::CigarOp::Sub); 215 | j -= 1; 216 | i -= 1; 217 | continue; 218 | } 219 | // Delete query char. 220 | if cost(j - 1, i) == g { 221 | cigar.push(pa_types::CigarOp::Del); 222 | j -= 1; 223 | continue; 224 | } 225 | panic!( 226 | "Trace failed! No ancestor found of {j} {i} at distance {}", 227 | g + 1 228 | ); 229 | } 230 | 231 | assert_eq!(g, 0, "Remaining cost after the trace must be 0."); 232 | 233 | cigar.reverse(); 234 | 235 | Match { 236 | cost: total_cost, 237 | start: Pos(0, (text_offset + i) as I), 238 | end: Pos(query.len() as I, (text_offset + text.len()) as I), 239 | strand: Strand::Fwd, 240 | cigar, 241 | } 242 | } 243 | 244 | #[cfg(test)] 245 | mod tests { 246 | use super::*; 247 | use crate::profiles::Dna; 248 | 249 | #[test] 250 | fn test_traceback() { 251 | let query = b"ATTTTCCCGGGGATTTT".as_slice(); 252 | let text2: &[u8] = b"ATTTTGGGGATTTT".as_slice(); 253 | 254 | let mut cost_matrix = Default::default(); 255 | fill::(query, text2, text2.len(), &mut cost_matrix, None); 256 | 257 | let trace = get_trace::(query, 0, text2.len(), text2, &cost_matrix, None); 258 | println!("Trace: {:?}", trace); 259 | } 260 | 261 | #[test] 262 | fn test_traceback_simd() { 263 | let query = b"ATTTTCCCGGGGATTTT".as_slice(); 264 | let text1 = b"ATTTTCCCGGGGATTTT".as_slice(); 265 | let text2 = b"ATTTTGGGGATTTT".as_slice(); 266 | let text3 = b"TGGGGATTTT".as_slice(); 267 | let text4 = b"TTTTTTTTTTATTTTGGGGATTTT".as_slice(); 268 | 269 | let mut cost_matrix = Default::default(); 270 | simd_fill::( 271 | &query, 272 | &[&text1, &text2, &text3, &text4], 273 | text4.len(), 274 | &mut cost_matrix, 275 | None, 276 | ); 277 | let _trace = get_trace::(query, 0, text1.len(), text1, &cost_matrix[0], None); 278 | let _trace = get_trace::(query, 0, text2.len(), text2, &cost_matrix[1], None); 279 | let _trace = get_trace::(query, 0, text3.len(), text3, &cost_matrix[2], None); 280 | let trace = get_trace::(query, 0, text4.len(), text4, &cost_matrix[3], None); 281 | println!("Trace: {:?}", trace); 282 | } 283 | } 284 | // let text1 = b"ATCGACTAGC".as_slice(); 285 | 286 | // let text3 = b"CTAGC".as_slice(); 287 | // let text4 = b"TGGC".as_slice(); 288 | 289 | // let col_costs = fill(query, text2); 290 | // for c in col_costs { 291 | // println!("col: {:?}", c); 292 | // let (p, m) = c.deltas[0].pm(); 293 | // println!("p: {:064b} \nm: {:064b}\n\n", p, m); 294 | // } 295 | 296 | // let col_costs = simd_fill::(&query, [&text2, &text2, &text2, &text2]); 297 | // let c = &col_costs[0]; 298 | // for col in c { 299 | // let (p, m) = col.deltas[0].pm(); 300 | // // println!("(p, m): {:?}", (p, m)); 301 | // // print the binary 0100101 302 | // println!("p: {:064b} \nm: {:064b}\n\n", p, m); 303 | // } 304 | 305 | // // Simd 306 | // let col_costs = simd_fill::(&query, [&text2, &text2, &text2, &text2]); 307 | 308 | // for lane in 0..LANES { 309 | // //println!("\nCol costs for lane {}\n{:?}", lane, col_costs[lane]); 310 | // let trace = get_trace(&col_costs[lane]); 311 | // println!("Trace {}: {:?}", lane, trace); 312 | // } 313 | 314 | // #[test] 315 | // fn test_and_block_boundary() { 316 | // let query = b"ACGTGGA"; 317 | // let mut text = [b'G'; 128]; 318 | // text[64 - 3..64 + 4].copy_from_slice(query); 319 | // let col_costs = fill(query, &text[..64 + 4]); 320 | // let trace = get_trace(&col_costs); 321 | // println!("Trace 1: {:?}", trace); // FIXME: This is wrong when crossing block boundary 322 | // } 323 | /* 324 | query: ATTTTCCCGGGGATTTT 325 | text2: ...GGATTTTCCGGATTTT 326 | */ 327 | --------------------------------------------------------------------------------