├── .envrc ├── .github └── workflows │ ├── ci.yml │ └── codspeed.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── benches └── nthash.rs ├── flake.lock ├── flake.nix ├── rust-toolchain.toml ├── src ├── error.rs └── lib.rs └── tests └── nthash.rs /.envrc: -------------------------------------------------------------------------------- 1 | use flake 2 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Rust checks 2 | 3 | on: 4 | push: 5 | branches: [latest] 6 | pull_request: 7 | schedule: 8 | - cron: '0 5 * * *' 9 | 10 | jobs: 11 | check: 12 | name: Check 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout sources 16 | uses: actions/checkout@v2 17 | 18 | - name: Install stable toolchain 19 | uses: actions-rs/toolchain@v1 20 | with: 21 | profile: minimal 22 | toolchain: stable 23 | override: true 24 | 25 | - name: Run cargo check 26 | uses: actions-rs/cargo@v1 27 | with: 28 | command: check 29 | 30 | test: 31 | runs-on: ${{ matrix.os }} 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | build: [beta, stable, windows, macos] 36 | include: 37 | - build: macos 38 | os: macos-latest 39 | rust: stable 40 | - build: windows 41 | os: windows-latest 42 | rust: stable 43 | - build: beta 44 | os: ubuntu-latest 45 | rust: beta 46 | - build: stable 47 | os: ubuntu-latest 48 | rust: stable 49 | steps: 50 | - uses: actions/checkout@v2 51 | 52 | - uses: actions-rs/toolchain@v1 53 | with: 54 | toolchain: ${{ matrix.rust }} 55 | override: true 56 | 57 | - name: Run tests 58 | uses: actions-rs/cargo@v1 59 | with: 60 | command: test 61 | args: --no-fail-fast 62 | 63 | cross_testing: 64 | runs-on: ubuntu-latest 65 | strategy: 66 | fail-fast: false 67 | matrix: 68 | target: [ 69 | aarch64-unknown-linux-gnu, 70 | ] 71 | steps: 72 | - uses: actions/checkout@v2 73 | 74 | - uses: actions-rs/toolchain@v1 75 | with: 76 | toolchain: stable 77 | target: ${{ matrix.target }} 78 | override: true 79 | 80 | - name: Run tests 81 | uses: actions-rs/cargo@v1 82 | with: 83 | use-cross: true 84 | command: test 85 | args: --target ${{ matrix.target }} --no-fail-fast -- 86 | 87 | cross_build: 88 | runs-on: ubuntu-latest 89 | strategy: 90 | fail-fast: false 91 | matrix: 92 | target: [ 93 | powerpc64le-unknown-linux-gnu, 94 | s390x-unknown-linux-gnu, 95 | ] 96 | steps: 97 | - uses: actions/checkout@v2 98 | 99 | - uses: actions-rs/toolchain@v1 100 | with: 101 | toolchain: stable 102 | target: ${{ matrix.target }} 103 | override: true 104 | 105 | - name: Run tests 106 | uses: actions-rs/cargo@v1 107 | with: 108 | use-cross: true 109 | command: build 110 | 111 | coverage: 112 | runs-on: ubuntu-latest 113 | steps: 114 | - uses: actions/checkout@v4 115 | 116 | - uses: actions-rs/toolchain@v1 117 | with: 118 | toolchain: stable 119 | override: true 120 | 121 | - name: Install cargo-binstall 122 | run: | 123 | wget https://github.com/ryankurte/cargo-binstall/releases/latest/download/cargo-binstall-x86_64-unknown-linux-gnu.tgz 124 | tar xf cargo-binstall-x86_64-unknown-linux-gnu.tgz 125 | chmod +x cargo-binstall 126 | mv cargo-binstall "$HOME/.cargo/bin" 127 | 128 | - name: Install cargo-tarpaulin 129 | run: cargo binstall --no-confirm cargo-tarpaulin 130 | 131 | - name: Run cargo-tarpaulin 132 | run: cargo tarpaulin --timeout 600 --out Xml -- --test-threads 1 133 | 134 | - name: Upload coverage to codecov 135 | uses: codecov/codecov-action@v3 136 | 137 | lints: 138 | name: Lints 139 | runs-on: ubuntu-latest 140 | steps: 141 | - name: Checkout sources 142 | uses: actions/checkout@v4 143 | 144 | - name: Install stable toolchain 145 | uses: actions-rs/toolchain@v1 146 | with: 147 | profile: minimal 148 | toolchain: stable 149 | override: true 150 | components: rustfmt, clippy 151 | 152 | - name: Run cargo fmt 153 | uses: actions-rs/cargo@v1 154 | with: 155 | command: fmt 156 | args: --all -- --check 157 | 158 | - name: Run cargo clippy 159 | uses: actions-rs/cargo@v1 160 | with: 161 | command: clippy 162 | args: -- -D warnings 163 | 164 | minimum_rust_version: 165 | runs-on: ubuntu-latest 166 | steps: 167 | - uses: actions/checkout@v2 168 | 169 | - name: check if README matches MSRV defined here 170 | run: grep '1.63.0' README.md 171 | 172 | - uses: actions-rs/toolchain@v1 173 | with: 174 | toolchain: 1.63.0 175 | override: true 176 | 177 | - name: Check if it builds 178 | uses: actions-rs/cargo@v1 179 | with: 180 | command: build 181 | 182 | publish: 183 | name: Publish (dry-run) 184 | runs-on: ubuntu-latest 185 | steps: 186 | - name: Checkout sources 187 | uses: actions/checkout@v2 188 | 189 | - name: Install stable toolchain 190 | uses: actions-rs/toolchain@v1 191 | with: 192 | profile: minimal 193 | toolchain: stable 194 | override: true 195 | 196 | - name: Make sure we can publish 197 | uses: actions-rs/cargo@v1 198 | with: 199 | command: publish 200 | args: --dry-run 201 | -------------------------------------------------------------------------------- /.github/workflows/codspeed.yml: -------------------------------------------------------------------------------- 1 | name: codspeed-benchmarks 2 | 3 | on: 4 | # Run on pushes to the latest branch 5 | push: 6 | branches: 7 | - "latest" 8 | # Run on pull requests 9 | pull_request: 10 | # `workflow_dispatch` allows CodSpeed to trigger backtest 11 | # performance analysis in order to generate initial data. 12 | workflow_dispatch: 13 | 14 | jobs: 15 | benchmarks-rust: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v3 19 | 20 | - name: Setup rust toolchain, cache and cargo-codspeed binary 21 | uses: moonrepo/setup-rust@v0 22 | with: 23 | channel: stable 24 | cache-target: release 25 | bins: cargo-codspeed 26 | 27 | - name: Build the benchmark target(s) 28 | run: cargo codspeed build 29 | 30 | - name: Run the benchmarks 31 | uses: CodSpeedHQ/action@v2 32 | with: 33 | run: cargo codspeed run 34 | token: ${{ secrets.CODSPEED_TOKEN }} 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | Cargo.lock 4 | /result 5 | /result-lib 6 | .direnv 7 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "aho-corasick" 7 | version = "0.7.20" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" 10 | dependencies = [ 11 | "memchr", 12 | ] 13 | 14 | [[package]] 15 | name = "anes" 16 | version = "0.1.6" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" 19 | 20 | [[package]] 21 | name = "anstyle" 22 | version = "1.0.7" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" 25 | 26 | [[package]] 27 | name = "autocfg" 28 | version = "1.3.0" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" 31 | 32 | [[package]] 33 | name = "bumpalo" 34 | version = "3.15.4" 35 | source = "registry+https://github.com/rust-lang/crates.io-index" 36 | checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" 37 | 38 | [[package]] 39 | name = "cast" 40 | version = "0.3.0" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" 43 | 44 | [[package]] 45 | name = "cfg-if" 46 | version = "1.0.0" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 49 | 50 | [[package]] 51 | name = "ciborium" 52 | version = "0.2.2" 53 | source = "registry+https://github.com/rust-lang/crates.io-index" 54 | checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" 55 | dependencies = [ 56 | "ciborium-io", 57 | "ciborium-ll", 58 | "serde", 59 | ] 60 | 61 | [[package]] 62 | name = "ciborium-io" 63 | version = "0.2.2" 64 | source = "registry+https://github.com/rust-lang/crates.io-index" 65 | checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" 66 | 67 | [[package]] 68 | name = "ciborium-ll" 69 | version = "0.2.2" 70 | source = "registry+https://github.com/rust-lang/crates.io-index" 71 | checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" 72 | dependencies = [ 73 | "ciborium-io", 74 | "half", 75 | ] 76 | 77 | [[package]] 78 | name = "clap" 79 | version = "4.5.9" 80 | source = "registry+https://github.com/rust-lang/crates.io-index" 81 | checksum = "64acc1846d54c1fe936a78dc189c34e28d3f5afc348403f28ecf53660b9b8462" 82 | dependencies = [ 83 | "clap_builder", 84 | ] 85 | 86 | [[package]] 87 | name = "clap_builder" 88 | version = "4.5.9" 89 | source = "registry+https://github.com/rust-lang/crates.io-index" 90 | checksum = "6fb8393d67ba2e7bfaf28a23458e4e2b543cc73a99595511eb207fdb8aede942" 91 | dependencies = [ 92 | "anstyle", 93 | "clap_lex", 94 | ] 95 | 96 | [[package]] 97 | name = "clap_lex" 98 | version = "0.7.1" 99 | source = "registry+https://github.com/rust-lang/crates.io-index" 100 | checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" 101 | 102 | [[package]] 103 | name = "codspeed" 104 | version = "2.6.0" 105 | source = "registry+https://github.com/rust-lang/crates.io-index" 106 | checksum = "3a104ac948e0188b921eb3fcbdd55dcf62e542df4c7ab7e660623f6288302089" 107 | dependencies = [ 108 | "colored", 109 | "libc", 110 | "serde_json", 111 | ] 112 | 113 | [[package]] 114 | name = "codspeed-criterion-compat" 115 | version = "2.6.0" 116 | source = "registry+https://github.com/rust-lang/crates.io-index" 117 | checksum = "722c36bdc62d9436d027256ce2627af81ac7a596dfc7d13d849d0d212448d7fe" 118 | dependencies = [ 119 | "codspeed", 120 | "colored", 121 | "criterion", 122 | ] 123 | 124 | [[package]] 125 | name = "colored" 126 | version = "2.1.0" 127 | source = "registry+https://github.com/rust-lang/crates.io-index" 128 | checksum = "cbf2150cce219b664a8a70df7a1f933836724b503f8a413af9365b4dcc4d90b8" 129 | dependencies = [ 130 | "lazy_static", 131 | "windows-sys 0.48.0", 132 | ] 133 | 134 | [[package]] 135 | name = "criterion" 136 | version = "0.5.1" 137 | source = "registry+https://github.com/rust-lang/crates.io-index" 138 | checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" 139 | dependencies = [ 140 | "anes", 141 | "cast", 142 | "ciborium", 143 | "clap", 144 | "criterion-plot", 145 | "is-terminal", 146 | "itertools", 147 | "num-traits", 148 | "once_cell", 149 | "oorandom", 150 | "plotters", 151 | "rayon", 152 | "regex", 153 | "serde", 154 | "serde_derive", 155 | "serde_json", 156 | "tinytemplate", 157 | "walkdir", 158 | ] 159 | 160 | [[package]] 161 | name = "criterion-plot" 162 | version = "0.5.0" 163 | source = "registry+https://github.com/rust-lang/crates.io-index" 164 | checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" 165 | dependencies = [ 166 | "cast", 167 | "itertools", 168 | ] 169 | 170 | [[package]] 171 | name = "crossbeam-deque" 172 | version = "0.8.5" 173 | source = "registry+https://github.com/rust-lang/crates.io-index" 174 | checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" 175 | dependencies = [ 176 | "crossbeam-epoch", 177 | "crossbeam-utils", 178 | ] 179 | 180 | [[package]] 181 | name = "crossbeam-epoch" 182 | version = "0.9.18" 183 | source = "registry+https://github.com/rust-lang/crates.io-index" 184 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" 185 | dependencies = [ 186 | "crossbeam-utils", 187 | ] 188 | 189 | [[package]] 190 | name = "crossbeam-utils" 191 | version = "0.8.20" 192 | source = "registry+https://github.com/rust-lang/crates.io-index" 193 | checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" 194 | 195 | [[package]] 196 | name = "crunchy" 197 | version = "0.2.2" 198 | source = "registry+https://github.com/rust-lang/crates.io-index" 199 | checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" 200 | 201 | [[package]] 202 | name = "either" 203 | version = "1.13.0" 204 | source = "registry+https://github.com/rust-lang/crates.io-index" 205 | checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" 206 | 207 | [[package]] 208 | name = "env_logger" 209 | version = "0.8.4" 210 | source = "registry+https://github.com/rust-lang/crates.io-index" 211 | checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" 212 | dependencies = [ 213 | "log", 214 | "regex", 215 | ] 216 | 217 | [[package]] 218 | name = "getrandom" 219 | version = "0.2.15" 220 | source = "registry+https://github.com/rust-lang/crates.io-index" 221 | checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" 222 | dependencies = [ 223 | "cfg-if", 224 | "libc", 225 | "wasi", 226 | ] 227 | 228 | [[package]] 229 | name = "half" 230 | version = "2.4.1" 231 | source = "registry+https://github.com/rust-lang/crates.io-index" 232 | checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" 233 | dependencies = [ 234 | "cfg-if", 235 | "crunchy", 236 | ] 237 | 238 | [[package]] 239 | name = "hermit-abi" 240 | version = "0.3.9" 241 | source = "registry+https://github.com/rust-lang/crates.io-index" 242 | checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" 243 | 244 | [[package]] 245 | name = "is-terminal" 246 | version = "0.4.12" 247 | source = "registry+https://github.com/rust-lang/crates.io-index" 248 | checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" 249 | dependencies = [ 250 | "hermit-abi", 251 | "libc", 252 | "windows-sys 0.52.0", 253 | ] 254 | 255 | [[package]] 256 | name = "itertools" 257 | version = "0.10.5" 258 | source = "registry+https://github.com/rust-lang/crates.io-index" 259 | checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" 260 | dependencies = [ 261 | "either", 262 | ] 263 | 264 | [[package]] 265 | name = "itoa" 266 | version = "1.0.11" 267 | source = "registry+https://github.com/rust-lang/crates.io-index" 268 | checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" 269 | 270 | [[package]] 271 | name = "js-sys" 272 | version = "0.3.69" 273 | source = "registry+https://github.com/rust-lang/crates.io-index" 274 | checksum = "29c15563dc2726973df627357ce0c9ddddbea194836909d655df6a75d2cf296d" 275 | dependencies = [ 276 | "wasm-bindgen", 277 | ] 278 | 279 | [[package]] 280 | name = "lazy_static" 281 | version = "1.5.0" 282 | source = "registry+https://github.com/rust-lang/crates.io-index" 283 | checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" 284 | 285 | [[package]] 286 | name = "libc" 287 | version = "0.2.155" 288 | source = "registry+https://github.com/rust-lang/crates.io-index" 289 | checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" 290 | 291 | [[package]] 292 | name = "log" 293 | version = "0.4.22" 294 | source = "registry+https://github.com/rust-lang/crates.io-index" 295 | checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" 296 | 297 | [[package]] 298 | name = "memchr" 299 | version = "2.5.0" 300 | source = "registry+https://github.com/rust-lang/crates.io-index" 301 | checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" 302 | 303 | [[package]] 304 | name = "nthash" 305 | version = "0.5.1" 306 | dependencies = [ 307 | "codspeed-criterion-compat", 308 | "criterion", 309 | "quickcheck", 310 | "rand", 311 | "thiserror", 312 | ] 313 | 314 | [[package]] 315 | name = "num-traits" 316 | version = "0.2.18" 317 | source = "registry+https://github.com/rust-lang/crates.io-index" 318 | checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" 319 | dependencies = [ 320 | "autocfg", 321 | ] 322 | 323 | [[package]] 324 | name = "once_cell" 325 | version = "1.17.2" 326 | source = "registry+https://github.com/rust-lang/crates.io-index" 327 | checksum = "9670a07f94779e00908f3e686eab508878ebb390ba6e604d3a284c00e8d0487b" 328 | 329 | [[package]] 330 | name = "oorandom" 331 | version = "11.1.4" 332 | source = "registry+https://github.com/rust-lang/crates.io-index" 333 | checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" 334 | 335 | [[package]] 336 | name = "plotters" 337 | version = "0.3.4" 338 | source = "registry+https://github.com/rust-lang/crates.io-index" 339 | checksum = "2538b639e642295546c50fcd545198c9d64ee2a38620a628724a3b266d5fbf97" 340 | dependencies = [ 341 | "num-traits", 342 | "plotters-backend", 343 | "plotters-svg", 344 | "wasm-bindgen", 345 | "web-sys", 346 | ] 347 | 348 | [[package]] 349 | name = "plotters-backend" 350 | version = "0.3.6" 351 | source = "registry+https://github.com/rust-lang/crates.io-index" 352 | checksum = "414cec62c6634ae900ea1c56128dfe87cf63e7caece0852ec76aba307cebadb7" 353 | 354 | [[package]] 355 | name = "plotters-svg" 356 | version = "0.3.6" 357 | source = "registry+https://github.com/rust-lang/crates.io-index" 358 | checksum = "81b30686a7d9c3e010b84284bdd26a29f2138574f52f5eb6f794fc0ad924e705" 359 | dependencies = [ 360 | "plotters-backend", 361 | ] 362 | 363 | [[package]] 364 | name = "ppv-lite86" 365 | version = "0.2.17" 366 | source = "registry+https://github.com/rust-lang/crates.io-index" 367 | checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" 368 | 369 | [[package]] 370 | name = "proc-macro2" 371 | version = "1.0.86" 372 | source = "registry+https://github.com/rust-lang/crates.io-index" 373 | checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" 374 | dependencies = [ 375 | "unicode-ident", 376 | ] 377 | 378 | [[package]] 379 | name = "quickcheck" 380 | version = "1.0.3" 381 | source = "registry+https://github.com/rust-lang/crates.io-index" 382 | checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6" 383 | dependencies = [ 384 | "env_logger", 385 | "log", 386 | "rand", 387 | ] 388 | 389 | [[package]] 390 | name = "quote" 391 | version = "1.0.36" 392 | source = "registry+https://github.com/rust-lang/crates.io-index" 393 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" 394 | dependencies = [ 395 | "proc-macro2", 396 | ] 397 | 398 | [[package]] 399 | name = "rand" 400 | version = "0.8.5" 401 | source = "registry+https://github.com/rust-lang/crates.io-index" 402 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 403 | dependencies = [ 404 | "libc", 405 | "rand_chacha", 406 | "rand_core", 407 | ] 408 | 409 | [[package]] 410 | name = "rand_chacha" 411 | version = "0.3.1" 412 | source = "registry+https://github.com/rust-lang/crates.io-index" 413 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 414 | dependencies = [ 415 | "ppv-lite86", 416 | "rand_core", 417 | ] 418 | 419 | [[package]] 420 | name = "rand_core" 421 | version = "0.6.4" 422 | source = "registry+https://github.com/rust-lang/crates.io-index" 423 | checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" 424 | dependencies = [ 425 | "getrandom", 426 | ] 427 | 428 | [[package]] 429 | name = "rayon" 430 | version = "1.8.0" 431 | source = "registry+https://github.com/rust-lang/crates.io-index" 432 | checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1" 433 | dependencies = [ 434 | "either", 435 | "rayon-core", 436 | ] 437 | 438 | [[package]] 439 | name = "rayon-core" 440 | version = "1.12.0" 441 | source = "registry+https://github.com/rust-lang/crates.io-index" 442 | checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed" 443 | dependencies = [ 444 | "crossbeam-deque", 445 | "crossbeam-utils", 446 | ] 447 | 448 | [[package]] 449 | name = "regex" 450 | version = "1.7.3" 451 | source = "registry+https://github.com/rust-lang/crates.io-index" 452 | checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" 453 | dependencies = [ 454 | "aho-corasick", 455 | "memchr", 456 | "regex-syntax", 457 | ] 458 | 459 | [[package]] 460 | name = "regex-syntax" 461 | version = "0.6.29" 462 | source = "registry+https://github.com/rust-lang/crates.io-index" 463 | checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" 464 | 465 | [[package]] 466 | name = "ryu" 467 | version = "1.0.18" 468 | source = "registry+https://github.com/rust-lang/crates.io-index" 469 | checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" 470 | 471 | [[package]] 472 | name = "same-file" 473 | version = "1.0.6" 474 | source = "registry+https://github.com/rust-lang/crates.io-index" 475 | checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 476 | dependencies = [ 477 | "winapi-util", 478 | ] 479 | 480 | [[package]] 481 | name = "serde" 482 | version = "1.0.204" 483 | source = "registry+https://github.com/rust-lang/crates.io-index" 484 | checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" 485 | dependencies = [ 486 | "serde_derive", 487 | ] 488 | 489 | [[package]] 490 | name = "serde_derive" 491 | version = "1.0.204" 492 | source = "registry+https://github.com/rust-lang/crates.io-index" 493 | checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" 494 | dependencies = [ 495 | "proc-macro2", 496 | "quote", 497 | "syn", 498 | ] 499 | 500 | [[package]] 501 | name = "serde_json" 502 | version = "1.0.120" 503 | source = "registry+https://github.com/rust-lang/crates.io-index" 504 | checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" 505 | dependencies = [ 506 | "itoa", 507 | "ryu", 508 | "serde", 509 | ] 510 | 511 | [[package]] 512 | name = "syn" 513 | version = "2.0.56" 514 | source = "registry+https://github.com/rust-lang/crates.io-index" 515 | checksum = "6e2415488199887523e74fd9a5f7be804dfd42d868ae0eca382e3917094d210e" 516 | dependencies = [ 517 | "proc-macro2", 518 | "quote", 519 | "unicode-ident", 520 | ] 521 | 522 | [[package]] 523 | name = "thiserror" 524 | version = "1.0.61" 525 | source = "registry+https://github.com/rust-lang/crates.io-index" 526 | checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" 527 | dependencies = [ 528 | "thiserror-impl", 529 | ] 530 | 531 | [[package]] 532 | name = "thiserror-impl" 533 | version = "1.0.61" 534 | source = "registry+https://github.com/rust-lang/crates.io-index" 535 | checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" 536 | dependencies = [ 537 | "proc-macro2", 538 | "quote", 539 | "syn", 540 | ] 541 | 542 | [[package]] 543 | name = "tinytemplate" 544 | version = "1.2.1" 545 | source = "registry+https://github.com/rust-lang/crates.io-index" 546 | checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" 547 | dependencies = [ 548 | "serde", 549 | "serde_json", 550 | ] 551 | 552 | [[package]] 553 | name = "unicode-ident" 554 | version = "1.0.12" 555 | source = "registry+https://github.com/rust-lang/crates.io-index" 556 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" 557 | 558 | [[package]] 559 | name = "walkdir" 560 | version = "2.5.0" 561 | source = "registry+https://github.com/rust-lang/crates.io-index" 562 | checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" 563 | dependencies = [ 564 | "same-file", 565 | "winapi-util", 566 | ] 567 | 568 | [[package]] 569 | name = "wasi" 570 | version = "0.11.0+wasi-snapshot-preview1" 571 | source = "registry+https://github.com/rust-lang/crates.io-index" 572 | checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" 573 | 574 | [[package]] 575 | name = "wasm-bindgen" 576 | version = "0.2.92" 577 | source = "registry+https://github.com/rust-lang/crates.io-index" 578 | checksum = "4be2531df63900aeb2bca0daaaddec08491ee64ceecbee5076636a3b026795a8" 579 | dependencies = [ 580 | "cfg-if", 581 | "wasm-bindgen-macro", 582 | ] 583 | 584 | [[package]] 585 | name = "wasm-bindgen-backend" 586 | version = "0.2.92" 587 | source = "registry+https://github.com/rust-lang/crates.io-index" 588 | checksum = "614d787b966d3989fa7bb98a654e369c762374fd3213d212cfc0251257e747da" 589 | dependencies = [ 590 | "bumpalo", 591 | "log", 592 | "once_cell", 593 | "proc-macro2", 594 | "quote", 595 | "syn", 596 | "wasm-bindgen-shared", 597 | ] 598 | 599 | [[package]] 600 | name = "wasm-bindgen-macro" 601 | version = "0.2.92" 602 | source = "registry+https://github.com/rust-lang/crates.io-index" 603 | checksum = "a1f8823de937b71b9460c0c34e25f3da88250760bec0ebac694b49997550d726" 604 | dependencies = [ 605 | "quote", 606 | "wasm-bindgen-macro-support", 607 | ] 608 | 609 | [[package]] 610 | name = "wasm-bindgen-macro-support" 611 | version = "0.2.92" 612 | source = "registry+https://github.com/rust-lang/crates.io-index" 613 | checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" 614 | dependencies = [ 615 | "proc-macro2", 616 | "quote", 617 | "syn", 618 | "wasm-bindgen-backend", 619 | "wasm-bindgen-shared", 620 | ] 621 | 622 | [[package]] 623 | name = "wasm-bindgen-shared" 624 | version = "0.2.92" 625 | source = "registry+https://github.com/rust-lang/crates.io-index" 626 | checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" 627 | 628 | [[package]] 629 | name = "web-sys" 630 | version = "0.3.69" 631 | source = "registry+https://github.com/rust-lang/crates.io-index" 632 | checksum = "77afa9a11836342370f4817622a2f0f418b134426d91a82dfb48f532d2ec13ef" 633 | dependencies = [ 634 | "js-sys", 635 | "wasm-bindgen", 636 | ] 637 | 638 | [[package]] 639 | name = "winapi-util" 640 | version = "0.1.8" 641 | source = "registry+https://github.com/rust-lang/crates.io-index" 642 | checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" 643 | dependencies = [ 644 | "windows-sys 0.52.0", 645 | ] 646 | 647 | [[package]] 648 | name = "windows-sys" 649 | version = "0.48.0" 650 | source = "registry+https://github.com/rust-lang/crates.io-index" 651 | checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" 652 | dependencies = [ 653 | "windows-targets 0.48.5", 654 | ] 655 | 656 | [[package]] 657 | name = "windows-sys" 658 | version = "0.52.0" 659 | source = "registry+https://github.com/rust-lang/crates.io-index" 660 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 661 | dependencies = [ 662 | "windows-targets 0.52.6", 663 | ] 664 | 665 | [[package]] 666 | name = "windows-targets" 667 | version = "0.48.5" 668 | source = "registry+https://github.com/rust-lang/crates.io-index" 669 | checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" 670 | dependencies = [ 671 | "windows_aarch64_gnullvm 0.48.5", 672 | "windows_aarch64_msvc 0.48.5", 673 | "windows_i686_gnu 0.48.5", 674 | "windows_i686_msvc 0.48.5", 675 | "windows_x86_64_gnu 0.48.5", 676 | "windows_x86_64_gnullvm 0.48.5", 677 | "windows_x86_64_msvc 0.48.5", 678 | ] 679 | 680 | [[package]] 681 | name = "windows-targets" 682 | version = "0.52.6" 683 | source = "registry+https://github.com/rust-lang/crates.io-index" 684 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 685 | dependencies = [ 686 | "windows_aarch64_gnullvm 0.52.6", 687 | "windows_aarch64_msvc 0.52.6", 688 | "windows_i686_gnu 0.52.6", 689 | "windows_i686_gnullvm", 690 | "windows_i686_msvc 0.52.6", 691 | "windows_x86_64_gnu 0.52.6", 692 | "windows_x86_64_gnullvm 0.52.6", 693 | "windows_x86_64_msvc 0.52.6", 694 | ] 695 | 696 | [[package]] 697 | name = "windows_aarch64_gnullvm" 698 | version = "0.48.5" 699 | source = "registry+https://github.com/rust-lang/crates.io-index" 700 | checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" 701 | 702 | [[package]] 703 | name = "windows_aarch64_gnullvm" 704 | version = "0.52.6" 705 | source = "registry+https://github.com/rust-lang/crates.io-index" 706 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 707 | 708 | [[package]] 709 | name = "windows_aarch64_msvc" 710 | version = "0.48.5" 711 | source = "registry+https://github.com/rust-lang/crates.io-index" 712 | checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" 713 | 714 | [[package]] 715 | name = "windows_aarch64_msvc" 716 | version = "0.52.6" 717 | source = "registry+https://github.com/rust-lang/crates.io-index" 718 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 719 | 720 | [[package]] 721 | name = "windows_i686_gnu" 722 | version = "0.48.5" 723 | source = "registry+https://github.com/rust-lang/crates.io-index" 724 | checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" 725 | 726 | [[package]] 727 | name = "windows_i686_gnu" 728 | version = "0.52.6" 729 | source = "registry+https://github.com/rust-lang/crates.io-index" 730 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 731 | 732 | [[package]] 733 | name = "windows_i686_gnullvm" 734 | version = "0.52.6" 735 | source = "registry+https://github.com/rust-lang/crates.io-index" 736 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 737 | 738 | [[package]] 739 | name = "windows_i686_msvc" 740 | version = "0.48.5" 741 | source = "registry+https://github.com/rust-lang/crates.io-index" 742 | checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" 743 | 744 | [[package]] 745 | name = "windows_i686_msvc" 746 | version = "0.52.6" 747 | source = "registry+https://github.com/rust-lang/crates.io-index" 748 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 749 | 750 | [[package]] 751 | name = "windows_x86_64_gnu" 752 | version = "0.48.5" 753 | source = "registry+https://github.com/rust-lang/crates.io-index" 754 | checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" 755 | 756 | [[package]] 757 | name = "windows_x86_64_gnu" 758 | version = "0.52.6" 759 | source = "registry+https://github.com/rust-lang/crates.io-index" 760 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 761 | 762 | [[package]] 763 | name = "windows_x86_64_gnullvm" 764 | version = "0.48.5" 765 | source = "registry+https://github.com/rust-lang/crates.io-index" 766 | checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" 767 | 768 | [[package]] 769 | name = "windows_x86_64_gnullvm" 770 | version = "0.52.6" 771 | source = "registry+https://github.com/rust-lang/crates.io-index" 772 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 773 | 774 | [[package]] 775 | name = "windows_x86_64_msvc" 776 | version = "0.48.5" 777 | source = "registry+https://github.com/rust-lang/crates.io-index" 778 | checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" 779 | 780 | [[package]] 781 | name = "windows_x86_64_msvc" 782 | version = "0.52.6" 783 | source = "registry+https://github.com/rust-lang/crates.io-index" 784 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 785 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "nthash" 3 | version = "0.5.1" 4 | authors = ["Luiz Irber "] 5 | description = "ntHash is a rolling hash function for hashing all possible k-mers in a DNA sequence." 6 | repository = "https://github.com/luizirber/nthash" 7 | documentation = "https://docs.rs/nthash" 8 | keywords = ["bioinformatics"] 9 | categories = ["science", "algorithms"] 10 | license = "MIT OR Apache-2.0" 11 | readme = 'README.md' 12 | edition = "2021" 13 | rust-version = "1.63.0" 14 | 15 | [dependencies] 16 | thiserror = "1.0" 17 | 18 | [dev-dependencies] 19 | codspeed-criterion-compat = "2.6.0" 20 | criterion = "0.5.1" 21 | quickcheck = "1.0.3" 22 | rand = "0.8.5" 23 | 24 | [[bench]] 25 | name = "nthash" 26 | harness = false 27 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | https://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2018 Luiz Irber 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | https://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 Luiz Irber 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # nthash 4 | 5 | **[ntHash](https://github.com/bcgsc/ntHash) implementation in Rust** 6 | 7 | [![Build Status](https://github.com/luizirber/nthash/actions/workflows/ci.yml/badge.svg)](https://github.com/luizirber/nthash/actions/workflows/ci.yml) 8 | [![Crates.io](https://img.shields.io/crates/v/nthash.svg)](https://crates.io/crates/nthash) 9 | [![Documentation](https://docs.rs/nthash/badge.svg)](https://docs.rs/nthash/) 10 | 11 | ntHash (version [1.0.4](https://github.com/bcgsc/ntHash/releases/tag/v1.0.4)) implementation in Rust. 12 | 13 | ## Minimum supported Rust version 14 | 15 | Currently the minimum supported Rust version is 1.63.0. 16 | 17 | ## License 18 | 19 | Licensed under either of these: 20 | 21 | * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or https://www.apache.org/licenses/LICENSE-2.0) 22 | * MIT License ([LICENSE-MIT](LICENSE-MIT) or https://opensource.org/licenses/MIT) 23 | 24 | ### Contributing 25 | 26 | Unless you explicitly state otherwise, any contribution you intentionally submit for inclusion in the work, as defined 27 | in the Apache-2.0 license, shall be dual-licensed as above, without any additional terms or conditions. 28 | -------------------------------------------------------------------------------- /benches/nthash.rs: -------------------------------------------------------------------------------- 1 | use codspeed_criterion_compat::{criterion_group, criterion_main, Criterion}; 2 | use rand::distributions::{Distribution, Uniform}; 3 | 4 | use nthash::{nthash, NtHashIterator}; 5 | 6 | fn nthash_bench(c: &mut Criterion) { 7 | let range = Uniform::from(0..4); 8 | let mut rng = rand::thread_rng(); 9 | let seq = (0..10000) 10 | .map(|_| match range.sample(&mut rng) { 11 | 0 => 'A', 12 | 1 => 'C', 13 | 2 => 'G', 14 | 3 => 'T', 15 | _ => 'N', 16 | }) 17 | .collect::(); 18 | 19 | let mut group = c.benchmark_group("nthash"); 20 | 21 | group.bench_function("nthash_iterator", |b| { 22 | b.iter(|| { 23 | let iter = NtHashIterator::new(seq.as_bytes(), 5).unwrap(); 24 | // iter.for_each(drop); 25 | let _res = iter.collect::>(); 26 | }) 27 | }); 28 | 29 | group.bench_function("nthash_simple", |b| { 30 | b.iter(|| { 31 | nthash(seq.as_bytes(), 5); 32 | }) 33 | }); 34 | } 35 | 36 | criterion_group!(benches, nthash_bench); 37 | criterion_main!(benches); 38 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "crane": { 4 | "inputs": { 5 | "nixpkgs": [ 6 | "rust-flake", 7 | "nixpkgs" 8 | ] 9 | }, 10 | "locked": { 11 | "lastModified": 1718474113, 12 | "narHash": "sha256-UKrfy/46YF2TRnxTtKCYzqf2f5ZPRRWwKCCJb7O5X8U=", 13 | "owner": "ipetkov", 14 | "repo": "crane", 15 | "rev": "0095fd8ea00ae0a9e6014f39c375e40c2fbd3386", 16 | "type": "github" 17 | }, 18 | "original": { 19 | "owner": "ipetkov", 20 | "repo": "crane", 21 | "type": "github" 22 | } 23 | }, 24 | "flake-parts": { 25 | "inputs": { 26 | "nixpkgs-lib": "nixpkgs-lib" 27 | }, 28 | "locked": { 29 | "lastModified": 1719994518, 30 | "narHash": "sha256-pQMhCCHyQGRzdfAkdJ4cIWiw+JNuWsTX7f0ZYSyz0VY=", 31 | "owner": "hercules-ci", 32 | "repo": "flake-parts", 33 | "rev": "9227223f6d922fee3c7b190b2cc238a99527bbb7", 34 | "type": "github" 35 | }, 36 | "original": { 37 | "owner": "hercules-ci", 38 | "repo": "flake-parts", 39 | "type": "github" 40 | } 41 | }, 42 | "flake-utils": { 43 | "inputs": { 44 | "systems": "systems" 45 | }, 46 | "locked": { 47 | "lastModified": 1705309234, 48 | "narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=", 49 | "owner": "numtide", 50 | "repo": "flake-utils", 51 | "rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26", 52 | "type": "github" 53 | }, 54 | "original": { 55 | "owner": "numtide", 56 | "repo": "flake-utils", 57 | "type": "github" 58 | } 59 | }, 60 | "nixpkgs": { 61 | "locked": { 62 | "lastModified": 1720529433, 63 | "narHash": "sha256-cIExDRdCblQ8gDOdw7KiV5VpK90wVGXjKZr/JGoS8tc=", 64 | "owner": "nixos", 65 | "repo": "nixpkgs", 66 | "rev": "f571ea25a6d088f0416d4a9323d9dd9a6edd0088", 67 | "type": "github" 68 | }, 69 | "original": { 70 | "owner": "nixos", 71 | "ref": "nixpkgs-unstable", 72 | "repo": "nixpkgs", 73 | "type": "github" 74 | } 75 | }, 76 | "nixpkgs-lib": { 77 | "locked": { 78 | "lastModified": 1719876945, 79 | "narHash": "sha256-Fm2rDDs86sHy0/1jxTOKB1118Q0O3Uc7EC0iXvXKpbI=", 80 | "type": "tarball", 81 | "url": "https://github.com/NixOS/nixpkgs/archive/5daf0514482af3f97abaefc78a6606365c9108e2.tar.gz" 82 | }, 83 | "original": { 84 | "type": "tarball", 85 | "url": "https://github.com/NixOS/nixpkgs/archive/5daf0514482af3f97abaefc78a6606365c9108e2.tar.gz" 86 | } 87 | }, 88 | "nixpkgs_2": { 89 | "locked": { 90 | "lastModified": 1706487304, 91 | "narHash": "sha256-LE8lVX28MV2jWJsidW13D2qrHU/RUUONendL2Q/WlJg=", 92 | "owner": "NixOS", 93 | "repo": "nixpkgs", 94 | "rev": "90f456026d284c22b3e3497be980b2e47d0b28ac", 95 | "type": "github" 96 | }, 97 | "original": { 98 | "owner": "NixOS", 99 | "ref": "nixpkgs-unstable", 100 | "repo": "nixpkgs", 101 | "type": "github" 102 | } 103 | }, 104 | "nixpkgs_3": { 105 | "locked": { 106 | "lastModified": 1719690277, 107 | "narHash": "sha256-0xSej1g7eP2kaUF+JQp8jdyNmpmCJKRpO12mKl/36Kc=", 108 | "owner": "nixos", 109 | "repo": "nixpkgs", 110 | "rev": "2741b4b489b55df32afac57bc4bfd220e8bf617e", 111 | "type": "github" 112 | }, 113 | "original": { 114 | "owner": "nixos", 115 | "ref": "nixos-unstable", 116 | "repo": "nixpkgs", 117 | "type": "github" 118 | } 119 | }, 120 | "root": { 121 | "inputs": { 122 | "flake-parts": "flake-parts", 123 | "nixpkgs": "nixpkgs", 124 | "rust-flake": "rust-flake", 125 | "systems": "systems_2", 126 | "treefmt-nix": "treefmt-nix" 127 | } 128 | }, 129 | "rust-flake": { 130 | "inputs": { 131 | "crane": "crane", 132 | "nixpkgs": [ 133 | "nixpkgs" 134 | ], 135 | "rust-overlay": "rust-overlay" 136 | }, 137 | "locked": { 138 | "lastModified": 1718723475, 139 | "narHash": "sha256-sTNWxStNuvjLFIR1P412Ovk+sEKJsAP31ARxraCf7iI=", 140 | "owner": "juspay", 141 | "repo": "rust-flake", 142 | "rev": "f2333101b401e82013b2c6966a3727bdd23c86e0", 143 | "type": "github" 144 | }, 145 | "original": { 146 | "owner": "juspay", 147 | "repo": "rust-flake", 148 | "type": "github" 149 | } 150 | }, 151 | "rust-overlay": { 152 | "inputs": { 153 | "flake-utils": "flake-utils", 154 | "nixpkgs": "nixpkgs_2" 155 | }, 156 | "locked": { 157 | "lastModified": 1715480255, 158 | "narHash": "sha256-gEZl8nYidQwqJhOigJ91JDjoBFoPEWVsd82AKnaE7Go=", 159 | "owner": "oxalica", 160 | "repo": "rust-overlay", 161 | "rev": "d690205a4f01ec0930303c4204e5063958e51255", 162 | "type": "github" 163 | }, 164 | "original": { 165 | "owner": "oxalica", 166 | "repo": "rust-overlay", 167 | "type": "github" 168 | } 169 | }, 170 | "systems": { 171 | "locked": { 172 | "lastModified": 1681028828, 173 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", 174 | "owner": "nix-systems", 175 | "repo": "default", 176 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", 177 | "type": "github" 178 | }, 179 | "original": { 180 | "owner": "nix-systems", 181 | "repo": "default", 182 | "type": "github" 183 | } 184 | }, 185 | "systems_2": { 186 | "locked": { 187 | "lastModified": 1681028828, 188 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", 189 | "owner": "nix-systems", 190 | "repo": "default", 191 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", 192 | "type": "github" 193 | }, 194 | "original": { 195 | "owner": "nix-systems", 196 | "repo": "default", 197 | "type": "github" 198 | } 199 | }, 200 | "treefmt-nix": { 201 | "inputs": { 202 | "nixpkgs": "nixpkgs_3" 203 | }, 204 | "locked": { 205 | "lastModified": 1720507012, 206 | "narHash": "sha256-QIeZ43t9IVB4dLsFaWh2f4C7JSRfK7p+Y1U9dULsLXU=", 207 | "owner": "numtide", 208 | "repo": "treefmt-nix", 209 | "rev": "8b63fe8cf7892c59b3df27cbcab4d5644035d72f", 210 | "type": "github" 211 | }, 212 | "original": { 213 | "owner": "numtide", 214 | "repo": "treefmt-nix", 215 | "type": "github" 216 | } 217 | } 218 | }, 219 | "root": "root", 220 | "version": 7 221 | } 222 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | inputs = { 3 | nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable"; 4 | flake-parts.url = "github:hercules-ci/flake-parts"; 5 | systems.url = "github:nix-systems/default"; 6 | rust-flake.url = "github:juspay/rust-flake"; 7 | rust-flake.inputs.nixpkgs.follows = "nixpkgs"; 8 | 9 | # Dev tools 10 | treefmt-nix.url = "github:numtide/treefmt-nix"; 11 | }; 12 | 13 | outputs = inputs: 14 | inputs.flake-parts.lib.mkFlake { inherit inputs; } { 15 | systems = import inputs.systems; 16 | imports = [ 17 | inputs.treefmt-nix.flakeModule 18 | inputs.rust-flake.flakeModules.default 19 | inputs.rust-flake.flakeModules.nixpkgs 20 | ]; 21 | perSystem = { config, self', pkgs, lib, ... }: { 22 | rust-project.crane.args = { 23 | buildInputs = lib.optionals pkgs.stdenv.isDarwin ( 24 | with pkgs.darwin.apple_sdk.frameworks; [ 25 | IOKit 26 | ] 27 | ); 28 | }; 29 | rust-project.toolchain = (pkgs.rust-bin.fromRustupToolchainFile (./rust-toolchain.toml)); 30 | 31 | # Add your auto-formatters here. 32 | # cf. https://nixos.asia/en/treefmt 33 | treefmt.config = { 34 | projectRootFile = "flake.nix"; 35 | programs = { 36 | nixpkgs-fmt.enable = true; 37 | rustfmt.enable = true; 38 | }; 39 | }; 40 | 41 | devShells.default = pkgs.mkShell { 42 | inputsFrom = [ self'.devShells.nthash ]; 43 | packages = [ 44 | pkgs.cargo-watch 45 | pkgs.cargo-codspeed 46 | pkgs.cargo-criterion 47 | ]; 48 | }; 49 | packages.default = self'.packages.nthash; 50 | }; 51 | }; 52 | } 53 | -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "stable" 3 | components = ["rust-src", "rust-analyzer", "clippy"] 4 | #channel = "1.63.0" 5 | #components = ["rust-src", "clippy"] 6 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | use super::MAXIMUM_K_SIZE; 2 | 3 | #[derive(thiserror::Error, Debug)] 4 | pub enum Error { 5 | #[error("K size {ksize} is out of range for the given sequence size {seq_size}")] 6 | KSizeOutOfRange { ksize: usize, seq_size: usize }, 7 | #[error("K size {0} cannot exceed the size of a u32 {MAXIMUM_K_SIZE}")] 8 | KSizeTooBig(usize), 9 | } 10 | 11 | pub type Result = std::result::Result; 12 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! ntHash is a hash function tuned for genomic data. 2 | //! It performs best when calculating hash values for adjacent k-mers in 3 | //! an input sequence, operating an order of magnitude faster than the best 4 | //! performing alternatives in typical use cases. 5 | //! 6 | //! [Scientific article with more details](https://doi.org/10.1093/bioinformatics/btw397) 7 | //! 8 | //! [Original implementation in C++](https://github.com/bcgsc/ntHash/) 9 | //! 10 | //! This crate is based on ntHash [1.0.4](https://github.com/bcgsc/ntHash/releases/tag/v1.0.4). 11 | //! 12 | 13 | mod error; 14 | 15 | pub use crate::error::{Error, Result}; 16 | 17 | pub(crate) const MAXIMUM_K_SIZE: usize = u32::MAX as usize; 18 | 19 | const H_LOOKUP: [u64; 256] = { 20 | let mut lookup = [1; 256]; 21 | lookup[b'A' as usize] = 0x3c8b_fbb3_95c6_0474; 22 | lookup[b'C' as usize] = 0x3193_c185_62a0_2b4c; 23 | lookup[b'G' as usize] = 0x2032_3ed0_8257_2324; 24 | lookup[b'T' as usize] = 0x2955_49f5_4be2_4456; 25 | lookup[b'N' as usize] = 0; 26 | lookup 27 | }; 28 | 29 | const RC_LOOKUP: [u64; 256] = { 30 | let mut lookup = [1; 256]; 31 | lookup[b'A' as usize] = 0x2955_49f5_4be2_4456; 32 | lookup[b'C' as usize] = 0x2032_3ed0_8257_2324; 33 | lookup[b'G' as usize] = 0x3193_c185_62a0_2b4c; 34 | lookup[b'T' as usize] = 0x3c8b_fbb3_95c6_0474; 35 | lookup[b'N' as usize] = 0; 36 | lookup 37 | }; 38 | 39 | #[inline(always)] 40 | fn h(c: u8) -> u64 { 41 | let val = H_LOOKUP[c as usize]; 42 | if val == 1 { 43 | panic!("Non-ACGTN nucleotide encountered! {}", c as char) 44 | } 45 | val 46 | } 47 | 48 | #[inline(always)] 49 | fn rc(nt: u8) -> u64 { 50 | let val = RC_LOOKUP[nt as usize]; 51 | if val == 1 { 52 | panic!("Non-ACGTN nucleotide encountered! {}", nt as char) 53 | } 54 | val 55 | } 56 | 57 | /// Calculate the hash for a k-mer in the forward strand of a sequence. 58 | /// 59 | /// This is a low level function, more useful for debugging than for direct use. 60 | /// 61 | /// ``` 62 | /// use nthash::ntf64; 63 | /// let fh = ntf64(b"TGCAG", 0, 5); 64 | /// assert_eq!(fh, 0xbafa6728fc6dabf); 65 | /// ``` 66 | pub fn ntf64(s: &[u8], i: usize, k: usize) -> u64 { 67 | let mut out = h(s[i + k - 1]); 68 | for (idx, v) in s.iter().skip(i).take(k - 1).enumerate() { 69 | out ^= h(*v).rotate_left((k - idx - 1) as u32); 70 | } 71 | out 72 | } 73 | 74 | /// Calculate the hash for a k-mer in the reverse strand of a sequence. 75 | /// 76 | /// This is a low level function, more useful for debugging than for direct use. 77 | /// 78 | /// ``` 79 | /// use nthash::ntr64; 80 | /// let rh = ntr64(b"TGCAG", 0, 5); 81 | /// assert_eq!(rh, 0x8cf2d4072cca480e); 82 | /// ``` 83 | pub fn ntr64(s: &[u8], i: usize, k: usize) -> u64 { 84 | let mut out = rc(s[i]); 85 | for (idx, v) in s.iter().skip(i + 1).take(k - 1).enumerate() { 86 | out ^= rc(*v).rotate_left(idx as u32 + 1); 87 | } 88 | out 89 | } 90 | 91 | /// Calculate the canonical hash (minimum hash value between the forward 92 | /// and reverse strands in a sequence). 93 | /// 94 | /// This is a low level function, more useful for debugging than for direct use. 95 | /// 96 | /// ``` 97 | /// use nthash::ntc64; 98 | /// let hash = ntc64(b"TGCAG", 0, 5); 99 | /// assert_eq!(hash, 0xbafa6728fc6dabf); 100 | /// ``` 101 | pub fn ntc64(s: &[u8], i: usize, ksize: usize) -> u64 { 102 | u64::min(ntr64(s, i, ksize), ntf64(s, i, ksize)) 103 | } 104 | 105 | /// Takes a sequence and ksize and returns the canonical hashes for each k-mer 106 | /// in a Vec. This doesn't benefit from the rolling hash properties of ntHash, 107 | /// serving more for correctness check for the NtHashIterator. 108 | pub fn nthash(seq: &[u8], ksize: usize) -> Vec { 109 | seq.windows(ksize).map(|x| ntc64(x, 0, ksize)).collect() 110 | } 111 | 112 | /// An efficient iterator for calculating hashes for genomic sequences. 113 | /// 114 | /// Since it implements the `Iterator` trait it also 115 | /// exposes many other useful methods. In this example we use `collect` to 116 | /// generate all hashes and put them in a `Vec`. 117 | /// ``` 118 | /// # use nthash::Result; 119 | /// use nthash::NtHashIterator; 120 | /// 121 | /// # fn main() -> Result<()> { 122 | /// let seq = b"ACTGC"; 123 | /// let iter = NtHashIterator::new(seq, 3)?; 124 | /// let hashes: Vec = iter.collect(); 125 | /// assert_eq!(hashes, 126 | /// vec![0x9b1eda9a185413ce, 0x9f6acfa2235b86fc, 0xd4a29bf149877c5c]); 127 | /// # Ok(()) 128 | /// # } 129 | /// ``` 130 | /// or, in one line: 131 | /// ``` 132 | /// # use nthash::Result; 133 | /// use nthash::NtHashIterator; 134 | /// 135 | /// # fn main() -> Result<()> { 136 | /// assert_eq!(NtHashIterator::new(b"ACTGC", 3)?.collect::>(), 137 | /// vec![0x9b1eda9a185413ce, 0x9f6acfa2235b86fc, 0xd4a29bf149877c5c]); 138 | /// # Ok(()) 139 | /// # } 140 | /// ``` 141 | #[derive(Debug)] 142 | pub struct NtHashIterator<'a> { 143 | seq: &'a [u8], 144 | k: usize, 145 | fh: u64, 146 | rh: u64, 147 | current_idx: usize, 148 | max_idx: usize, 149 | } 150 | 151 | impl<'a> NtHashIterator<'a> { 152 | /// Creates a new NtHashIterator with internal state properly initialized. 153 | pub fn new(seq: &'a [u8], k: usize) -> Result> { 154 | if k > seq.len() { 155 | return Err(Error::KSizeOutOfRange { 156 | ksize: k, 157 | seq_size: seq.len(), 158 | }); 159 | } 160 | if k > MAXIMUM_K_SIZE { 161 | return Err(Error::KSizeTooBig(k)); 162 | } 163 | let mut fh = 0; 164 | for (i, v) in seq[0..k].iter().enumerate() { 165 | fh ^= h(*v).rotate_left((k - i - 1) as u32); 166 | } 167 | 168 | let mut rh = 0; 169 | for (i, v) in seq[0..k].iter().rev().enumerate() { 170 | rh ^= rc(*v).rotate_left((k - i - 1) as u32); 171 | } 172 | 173 | Ok(NtHashIterator { 174 | seq, 175 | k, 176 | fh, 177 | rh, 178 | current_idx: 0, 179 | max_idx: seq.len() - k + 1, 180 | }) 181 | } 182 | } 183 | 184 | impl<'a> Iterator for NtHashIterator<'a> { 185 | type Item = u64; 186 | 187 | #[inline(always)] 188 | fn next(&mut self) -> Option { 189 | if self.current_idx == self.max_idx { 190 | return None; 191 | }; 192 | 193 | if self.current_idx != 0 { 194 | let i = self.current_idx - 1; 195 | let seqi = self.seq[i]; 196 | let seqk = self.seq[i + self.k]; 197 | 198 | self.fh = self.fh.rotate_left(1) ^ h(seqi).rotate_left(self.k as u32) ^ h(seqk); 199 | 200 | self.rh = self.rh.rotate_right(1) 201 | ^ rc(seqi).rotate_right(1) 202 | ^ rc(seqk).rotate_left(self.k as u32 - 1); 203 | } 204 | 205 | self.current_idx += 1; 206 | Some(u64::min(self.rh, self.fh)) 207 | } 208 | 209 | fn size_hint(&self) -> (usize, Option) { 210 | (self.max_idx, Some(self.max_idx)) 211 | } 212 | } 213 | 214 | impl<'a> ExactSizeIterator for NtHashIterator<'a> {} 215 | 216 | /// An efficient iterator for calculating hashes for genomic sequences. This 217 | /// returns the forward hashes, not the canonical hashes. 218 | /// 219 | /// Since it implements the `Iterator` trait it also 220 | /// exposes many other useful methods. In this example we use `collect` to 221 | /// generate all hashes and put them in a `Vec`. 222 | /// ``` 223 | /// # use nthash::Result; 224 | /// use nthash::NtHashForwardIterator; 225 | /// 226 | /// # fn main() -> Result<()> { 227 | /// let seq = b"ACTGC"; 228 | /// let iter = NtHashForwardIterator::new(seq, 3)?; 229 | /// let hashes: Vec = iter.collect(); 230 | /// assert_eq!(hashes, [0xb85d2431d9ba031e, 0xb4d7ab2f9f1306b8, 0xd4a29bf149877c5c]); 231 | /// # Ok(()) 232 | /// # } 233 | /// ``` 234 | /// or, in one line: 235 | /// ``` 236 | /// # use nthash::Result; 237 | /// use nthash::NtHashForwardIterator; 238 | /// 239 | /// # fn main() -> Result<()> { 240 | /// assert_eq!(NtHashForwardIterator::new(b"ACTGC", 3)?.collect::>(), 241 | /// [0xb85d2431d9ba031e, 0xb4d7ab2f9f1306b8, 0xd4a29bf149877c5c]); 242 | /// # Ok(()) 243 | /// # } 244 | /// ``` 245 | #[derive(Debug)] 246 | pub struct NtHashForwardIterator<'a> { 247 | seq: &'a [u8], 248 | k: usize, 249 | fh: u64, 250 | current_idx: usize, 251 | max_idx: usize, 252 | } 253 | 254 | impl<'a> NtHashForwardIterator<'a> { 255 | /// Creates a new NtHashForwardIterator with internal state properly initialized. 256 | pub fn new(seq: &'a [u8], k: usize) -> Result> { 257 | if k > seq.len() { 258 | return Err(Error::KSizeOutOfRange { 259 | ksize: k, 260 | seq_size: seq.len(), 261 | }); 262 | } 263 | if k > MAXIMUM_K_SIZE { 264 | return Err(Error::KSizeTooBig(k)); 265 | } 266 | 267 | let mut fh = 0; 268 | for (i, v) in seq[0..k].iter().enumerate() { 269 | fh ^= h(*v).rotate_left((k - i - 1) as u32); 270 | } 271 | 272 | Ok(NtHashForwardIterator { 273 | seq, 274 | k, 275 | fh, 276 | current_idx: 0, 277 | max_idx: seq.len() - k + 1, 278 | }) 279 | } 280 | } 281 | 282 | impl<'a> Iterator for NtHashForwardIterator<'a> { 283 | type Item = u64; 284 | 285 | #[inline(always)] 286 | fn next(&mut self) -> Option { 287 | if self.current_idx == self.max_idx { 288 | return None; 289 | }; 290 | 291 | if self.current_idx != 0 { 292 | let i = self.current_idx - 1; 293 | let seqi = self.seq[i]; 294 | let seqk = self.seq[i + self.k]; 295 | 296 | self.fh = self.fh.rotate_left(1) ^ h(seqi).rotate_left(self.k as u32) ^ h(seqk); 297 | } 298 | 299 | self.current_idx += 1; 300 | Some(self.fh) 301 | } 302 | 303 | fn size_hint(&self) -> (usize, Option) { 304 | (self.max_idx, Some(self.max_idx)) 305 | } 306 | } 307 | 308 | impl<'a> ExactSizeIterator for NtHashForwardIterator<'a> {} 309 | -------------------------------------------------------------------------------- /tests/nthash.rs: -------------------------------------------------------------------------------- 1 | use quickcheck::{quickcheck, Arbitrary, Gen}; 2 | 3 | use nthash::{nthash, NtHashIterator}; 4 | 5 | #[test] 6 | fn oracle_cmp() { 7 | assert_eq!(nthash(b"TGCAG", 5), vec![0x0baf_a672_8fc6_dabf]); 8 | assert_eq!(nthash(b"ACGTC", 5), vec![0x4802_02d5_4e8e_becd]); 9 | assert_eq!( 10 | nthash(b"ACGTCGTCAGTCGATGCAGT", 5), 11 | vec![ 12 | 0x4802_02d5_4e8e_becd, 13 | 0xa997_bdc6_28b4_c98e, 14 | 0x8c6d_7ab2_0911_b216, 15 | 0x5ddc_b093_90aa_feef, 16 | 0x25ff_3ac4_bc92_382f, 17 | 0x9bda_9a5c_3560_3946, 18 | 0x82d4_49e5_b371_0ccd, 19 | 0x1e92_6ce7_780a_b812, 20 | 0x2f6e_d7b2_2647_3a86, 21 | 0xd186_5edf_eb55_b037, 22 | 0x38b5_7494_189a_8afe, 23 | 0x1b23_5fc5_ecac_f386, 24 | 0x1eab_5d82_920f_da13, 25 | 0x02c8_d157_4673_bdcd, 26 | 0x0baf_a672_8fc6_dabf, 27 | 0x14a3_3bb9_2827_7bed, 28 | ] 29 | ); 30 | assert_eq!( 31 | nthash(b"ACGTCGANNGTA", 5), 32 | vec![ 33 | 0x4802_02d5_4e8e_becd, 34 | 0xa997_bdc6_28b4_c98e, 35 | 0xd186_5edf_eb55_b037, 36 | 0xe015_9f5a_89f5_9b7b, 37 | 0xe640_9a0f_689e_64e4, 38 | 0x7a05_4a39_df66_1723, 39 | 0x6d74_fee7_0283_5974, 40 | 0xb744_44dd_9a94_cbf3, 41 | ] 42 | ); 43 | } 44 | 45 | #[test] 46 | fn iter_cmp() { 47 | let ksize = 5; 48 | for s in &vec!["TGCAG", "ACGTC", "ACGTCGTCAGTCGATGCAGT", "ACGTCGANNGTA"] { 49 | let seq = s.as_bytes(); 50 | let iter = NtHashIterator::new(seq, ksize).unwrap(); 51 | println!("{:?}", s); 52 | assert_eq!(nthash(seq, ksize), iter.collect::>()); 53 | } 54 | } 55 | 56 | #[should_panic(expected = "Non-ACGTN nucleotide encountered! E")] 57 | #[test] 58 | fn panic_non_acgtn() { 59 | let ksize: usize = 2; 60 | let sequences = "TGCAGNE"; 61 | let iter = NtHashIterator::new(sequences.as_bytes(), ksize).unwrap(); 62 | let _: Vec = iter.collect(); 63 | } 64 | 65 | #[test] 66 | fn out_of_range_ksize_wont_panic() { 67 | let ksize: usize = 10; 68 | let sequences = "TGCAG"; 69 | let err = NtHashIterator::new(sequences.as_bytes(), ksize).unwrap_err(); 70 | assert_eq!( 71 | err.to_string(), 72 | "K size 10 is out of range for the given sequence size 5" 73 | ); 74 | } 75 | 76 | #[cfg(target_pointer_width = "64")] 77 | #[test] 78 | #[ignore] 79 | fn big_ksize_wont_panic() { 80 | let ksize: usize = (u64::from(u32::max_value()) + 1) as usize; 81 | let repetitions: usize = ((f64::from(u32::max_value()) + 1.0) / 5.0).ceil() as usize; 82 | let sequences = "TGCAG".repeat(repetitions); 83 | let err = NtHashIterator::new(sequences.as_bytes(), ksize).unwrap_err(); 84 | assert_eq!( 85 | err.to_string(), 86 | "K size 4294967296 cannot exceed the size of a u32 4294967295" 87 | ); 88 | } 89 | 90 | #[derive(Clone, Debug)] 91 | struct Seq(String); 92 | 93 | impl Arbitrary for Seq { 94 | fn arbitrary(g: &mut Gen) -> Seq { 95 | let choices = ['A', 'C', 'G', 'T', 'N']; 96 | let size = g.size(); 97 | let mut s = String::with_capacity(size); 98 | for _ in 0..size { 99 | s.push(*g.choose(&choices).expect("Not a valid nucleotide")); 100 | } 101 | Seq(s) 102 | } 103 | } 104 | 105 | quickcheck! { 106 | fn oracle_quickcheck(s: Seq) -> bool { 107 | let seq = s.0.as_bytes(); 108 | (1..(seq.len())).all(|ksize| { 109 | let iter = NtHashIterator::new(seq, ksize).unwrap(); 110 | nthash(seq, ksize) == iter.collect::>() 111 | }) 112 | } 113 | } 114 | --------------------------------------------------------------------------------