├── .gitignore ├── src ├── lib.rs ├── config.rs ├── scatter.rs ├── equiv_classes.rs ├── mappability.rs ├── utils.rs ├── bin │ └── pseudoaligner.rs ├── build_index.rs └── pseudoaligner.rs ├── .github ├── dependabot.yml └── workflows │ ├── approve_dependabot.yml │ └── test.yml ├── .travis.yml ├── LICENSE ├── test └── README ├── Cargo.toml ├── README.md ├── deny.toml └── Cargo.lock /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 10x Genomics, Inc. All rights reserved. 2 | 3 | pub mod build_index; 4 | mod config; 5 | 6 | mod equiv_classes; 7 | pub mod mappability; 8 | pub mod pseudoaligner; 9 | mod scatter; 10 | pub mod utils; 11 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | updates: 4 | - package-ecosystem: cargo 5 | directory: "/" 6 | schedule: 7 | interval: weekly 8 | open-pull-requests-limit: 10 9 | reviewers: 10 | - pmarks 11 | - package-ecosystem: "github-actions" 12 | directory: "/" 13 | schedule: 14 | interval: weekly 15 | reviewers: 16 | - pmarks 17 | -------------------------------------------------------------------------------- /.github/workflows/approve_dependabot.yml: -------------------------------------------------------------------------------- 1 | name: Auto approve dependabot PRs 2 | 3 | on: 4 | pull_request_target: 5 | 6 | permissions: 7 | pull-requests: write 8 | 9 | jobs: 10 | autoapprove: 11 | name: Auto-Approve a PR by dependabot 12 | runs-on: ubuntu-latest 13 | if: github.actor == 'dependabot[bot]' || github.actor == 'dependabot-preview[bot]' 14 | steps: 15 | - name: Auto approve 16 | uses: cognitedata/auto-approve-dependabot-action@v3.0.1 17 | with: 18 | github-token: ${{ secrets.GITHUB_TOKEN }} 19 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | 3 | # Cache cargo symbols for faster build 4 | cache: cargo 5 | 6 | env: 7 | global: 8 | - RUST_BACKTRACE=1 9 | 10 | rust: 11 | - stable 12 | 13 | before_script: 14 | - export PATH=$HOME/.cargo/bin:$PATH 15 | - cargo install cargo-update || echo "cargo-update already installed" 16 | - cargo install cargo-travis || echo "cargo-travis already installed" 17 | - cargo install-update -a # update outdated cached binaries 18 | 19 | 20 | # the main build 21 | script: 22 | - cargo build 23 | - cargo test 24 | - cargo doc --no-deps 25 | 26 | after_success: 27 | # upload documentation to github.io (gh-pages branch) 28 | - cargo doc-upload 29 | -------------------------------------------------------------------------------- /src/config.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 10x Genomics, Inc. All rights reserved. 2 | 3 | // transcriptome fasta header formats 4 | pub enum FastaFormat { 5 | Unknown, 6 | Gencode, 7 | Ensembl, 8 | Gffread, 9 | } 10 | 11 | // main configs 12 | pub const MEM_SIZE: usize = 1; 13 | pub const MIN_KMERS: usize = 1; 14 | pub const STRANDED: bool = true; 15 | pub const REPORT_ALL_KMER: bool = false; 16 | pub const READ_COVERAGE_THRESHOLD: usize = 32; 17 | pub const LEFT_EXTEND_FRACTION: f64 = 0.2; 18 | pub const DEFAULT_ALLOWED_MISMATCHES: usize = 2; 19 | 20 | pub const U32_MAX: usize = u32::max_value() as usize; 21 | 22 | // Transcriptome mappability 23 | pub const MAPPABILITY_COUNTS_LEN: usize = 11; 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014-2018 10x Genomics, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /test/README: -------------------------------------------------------------------------------- 1 | Download test data from: 2 | ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_28/gencode.v28.transcripts.fa.gz 3 | 4 | Small test transcriptome created by: 5 | head -n 50000 gencode.v28.transcripts.fa > gencode_small.fa 6 | 7 | Small test fastq created from the above by sampling every 15th line, so long as it isn't a sequence name. 8 | We create two more sets of sequences by 1) swapping a random base to A [or A -> C] 9 | and 2) reversing (no complement) 10 | 11 | ``` 12 | QUAL="IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII" 13 | 14 | cat gencode_small.fa | \ 15 | awk '{ 16 | if(!($0~/^>/) && length($0) == 60 && NR % 15 == 0) { 17 | print "@gencode_small_line"NR; 18 | print $0; 19 | print "+"; 20 | print "'$QUAL'"; 21 | idx = 1 + int((rand()*60) % 60); 22 | if(substr($0, idx, 1) == "A") { 23 | err = "C"; 24 | } else { 25 | err = "A"; 26 | } 27 | print "@gencode_small_line"NR"_err"idx; 28 | print substr($0, 1, idx-1)""err""substr($0, idx+1); 29 | print "+"; 30 | print "'$QUAL'"; 31 | }}' > tmp1 32 | 33 | cat gencode_small.fa | \ 34 | rev | 35 | awk '{ 36 | if(!($0~/>$/) && length($0) == 60 && NR % 15 == 0) { 37 | print "@gencode_small_line"NR"_rev"; 38 | print $0; 39 | print "+"; 40 | print "'$QUAL'"; 41 | }}' > tmp2 42 | 43 | cat tmp1 tmp2 > small.fq 44 | ``` 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "debruijn_mapping" 3 | version = "0.6.0" 4 | authors = ["Avi Srivastava ", "Patrick Marks ", "Joey Arthur "] 5 | edition = "2021" 6 | license = "MIT" 7 | repository = "https://github.com/10XGenomics/rust-pseudoaligner" 8 | include = ["src/**/*", "LICENSE", "README.md"] 9 | 10 | [dependencies] 11 | bio = ">=0.35" 12 | crossbeam-utils = ">=0.7" 13 | debruijn = { git = "https://github.com/10XGenomics/rust-debruijn" } 14 | docopt = "1.0" 15 | anyhow = "1" 16 | flate2 = "1.0" 17 | itertools = "0.11" 18 | lazy_static = "1.4" 19 | log = "0.4" 20 | rayon = "1.8" 21 | serde = "1.0" 22 | boomphf = { version = "^0.6", features = ["serde"] } 23 | dashmap = "5" 24 | bincode = "1.2" 25 | pretty_env_logger = "0.5" 26 | 27 | [dev-dependencies.proptest] 28 | version = "1" 29 | default-features = false 30 | # Enable all default features not known to break code coverage builds 31 | features = ["default-code-coverage"] 32 | 33 | [profile.release] 34 | debug = 1 35 | 36 | [features] 37 | default = [] 38 | slow_tests = [] 39 | 40 | [lints.rust] 41 | future_incompatible = "warn" 42 | nonstandard_style = "warn" 43 | unused_must_use = "deny" 44 | 45 | [lints.clippy] 46 | perf = "deny" 47 | style = "deny" 48 | wildcard_imports = "deny" 49 | unused_io_amount = "forbid" 50 | redundant_closure_for_method_calls = "deny" 51 | comparison_chain = "allow" 52 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Rust Pseudoaligner 2 | 3 | A work-in-progress tool for pseudo-alignment of RNA-seq reads to transcriptome references. This project aims to create a very high performance pseudo-alignment tool suitable for single-cell RNA-seq data, and easily usable as a component of larger pipelines. We build on the crates includng [debruijn](https://github.com/10XGenomics/rust-debruijn) and [boomphf](https://github.com/10XGenomics/rust-boomphf), and [bio](https://github.com/rust-bio/rust-bio) 4 | 5 | This tool implements existing algorithms from the literature including: 6 | 7 | Bray, Nicolas L., et al. "Near-optimal probabilistic RNA-seq quantification." Nature biotechnology 34.5 (2016): 525. 8 | https://arxiv.org/ftp/arxiv/papers/1505/1505.02710.pdf 9 | 10 | Srivastava, Avi, et al. "RapMap: a rapid, sensitive and accurate tool for mapping RNA-seq reads to transcriptomes." Bioinformatics 32.12 (2016): i192-i200. 11 | https://www.biorxiv.org/content/biorxiv/early/2015/10/22/029652.full.pdf 12 | 13 | Ntranos, Vasilis, et al. "Fast and accurate single-cell RNA-seq analysis by clustering of transcript-compatibility counts." Genome biology 17.1 (2016): 112. 14 | https://www.biorxiv.org/content/biorxiv/early/2016/03/04/036863.full.pdf 15 | 16 | Srivastava, Avi, et al. "Alevin: An integrated method for dscRNA-seq quantification." bioRxiv (2018): 335000 17 | https://www.biorxiv.org/content/biorxiv/early/2018/06/01/335000.full.pdf 18 | 19 | Limasset, Antoine, et al. "Fast and scalable minimal perfect hashing for massive key sets." arXiv preprint arXiv:1702.03154 (2017). 20 | https://arxiv.org/pdf/1702.03154.pdf 21 | 22 | Orenstein, Yaron, et al. "Designing small universal k-mer hitting sets for improved analysis of high-throughput sequencing." PLoS computational biology 13.10 (2017): e1005777. 23 | https://journals.plos.org/ploscompbiol/article/file?id=10.1371/journal.pcbi.1005777&type=printable 24 | 25 | Li, Yang. "MSPKmerCounter: a fast and memory efficient approach for k-mer counting." arXiv preprint arXiv:1505.06550 (2015). 26 | https://arxiv.org/pdf/1505.06550.pdf 27 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - master 8 | 9 | permissions: 10 | contents: read 11 | # Write permissions are required in order to produce annotations. 12 | checks: write 13 | 14 | jobs: 15 | rust: 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: dtolnay/rust-toolchain@master 19 | with: 20 | toolchain: 1.75.0 21 | components: rustfmt, clippy 22 | - name: Checkout git repository 23 | uses: actions/checkout@master 24 | - name: Check Rust formatting 25 | run: cargo fmt -- --check 26 | - name: Install cargo-deny 27 | run: | 28 | wget https://github.com/EmbarkStudios/cargo-deny/releases/download/0.14.3/cargo-deny-0.14.3-x86_64-unknown-linux-musl.tar.gz 29 | tar -xvf cargo-deny-0.14.3-x86_64-unknown-linux-musl.tar.gz 30 | mkdir -p ~/bin/ 31 | cp cargo-deny-0.14.3-x86_64-unknown-linux-musl/cargo-deny ~/bin/ 32 | rm -r cargo-deny-* 33 | echo "$HOME/bin" >> $GITHUB_PATH 34 | - name: Cache dependencies 35 | uses: Swatinem/rust-cache@v2 36 | - name: Deny duplicate dependencies and check licenses 37 | run: cargo deny --locked check 38 | - uses: 10XGenomics/clippy-check@main 39 | with: 40 | # TODO: re-enable some the -A lints below once we're clean on other 41 | # more important ones. 42 | args: | 43 | --all-features --all-targets --locked --release -- 44 | -D clippy::perf 45 | -D clippy::style 46 | -D clippy::wildcard_imports 47 | -W future_incompatible 48 | -W nonstandard_style 49 | -W rust_2018_compatibility 50 | -W rust_2021_compatibility 51 | -F clippy::unused_io_amount 52 | -D clippy::redundant_closure_for_method_calls 53 | -A clippy::comparison_chain 54 | - name: Build tests 55 | run: cargo test --no-run --frozen --release 56 | - name: Run Rust tests 57 | run: cargo test --frozen --release -- --nocapture 58 | -------------------------------------------------------------------------------- /src/scatter.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Mutex; 2 | 3 | /// Ingest pairs of (index, value) from multiple threads and set 4 | /// vec[index] = value efficiently. 5 | pub struct ScatterToVec<'a, T> { 6 | slices: Vec>, 7 | chunk_bit_size: usize, 8 | max_buf_size: usize, 9 | } 10 | 11 | const CHUNK_BITS: usize = 20; 12 | const BUF_ELEMENTS: usize = 16; 13 | 14 | impl<'a, T> ScatterToVec<'a, T> { 15 | /// Create a new scatterer that permits efficiently writing (index, value) 16 | /// tuples into `data` from multiple threads. 17 | pub fn new(data: &'a mut [T]) -> ScatterToVec<'a, T> { 18 | let mut slices = Vec::new(); 19 | let sz = 1 << CHUNK_BITS; 20 | 21 | let mut rest = data; 22 | 23 | while rest.len() > sz { 24 | let (l, r) = rest.split_at_mut(sz); 25 | slices.push(Mutex::new(l)); 26 | rest = r; 27 | } 28 | 29 | slices.push(Mutex::new(rest)); 30 | 31 | ScatterToVec { 32 | slices, 33 | chunk_bit_size: CHUNK_BITS, 34 | max_buf_size: BUF_ELEMENTS, 35 | } 36 | } 37 | 38 | /// Create a writer handle. Each thread that produces values 39 | /// should give given it's own handle to write values with. 40 | pub fn handle(&'a self) -> ScatterHandle<'a, T> { 41 | let mut bufs = Vec::with_capacity(self.slices.len()); 42 | for _ in 0..self.slices.len() { 43 | bufs.push(vec![]); 44 | } 45 | 46 | ScatterHandle { 47 | max_buf_size: self.max_buf_size, 48 | scatter: self, 49 | bufs, 50 | } 51 | } 52 | } 53 | 54 | /// A handle to write (index, value) pairs 55 | /// into the target slice. 56 | pub struct ScatterHandle<'a, T> { 57 | max_buf_size: usize, 58 | scatter: &'a ScatterToVec<'a, T>, 59 | bufs: Vec>, 60 | } 61 | 62 | impl<'a, T> ScatterHandle<'a, T> { 63 | /// Set data[index] = value in the data slice. 64 | pub fn write(&mut self, index: usize, value: T) { 65 | let chunk = index >> self.scatter.chunk_bit_size; 66 | let buf = &mut self.bufs[chunk]; 67 | buf.push((index, value)); 68 | 69 | // If we've filled this buffer, write out the values 70 | if buf.len() == self.max_buf_size { 71 | self.flush_chunk(chunk); 72 | } 73 | } 74 | 75 | fn flush_chunk(&mut self, chunk: usize) { 76 | let buf = &mut self.bufs[chunk]; 77 | let mut slice_to_write = self.scatter.slices[chunk].lock().unwrap(); 78 | 79 | for (index, value) in buf.drain(..) { 80 | let mask = (1 << self.scatter.chunk_bit_size) - 1; 81 | let slice_pos = mask & index; 82 | slice_to_write[slice_pos] = value; 83 | } 84 | } 85 | } 86 | 87 | impl<'a, T> Drop for ScatterHandle<'a, T> { 88 | fn drop(&mut self) { 89 | for i in 0..self.bufs.len() { 90 | self.flush_chunk(i); 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/equiv_classes.rs: -------------------------------------------------------------------------------- 1 | // Copyright 2017 10x Genomics 2 | 3 | //! Generate equivalence classes for pseudoaligner 4 | use std::fmt::Debug; 5 | use std::hash::Hash; 6 | use std::ops::Deref; 7 | use std::sync::atomic::{AtomicUsize, Ordering}; 8 | 9 | use dashmap::DashMap; 10 | 11 | use debruijn::filter::KmerSummarizer; 12 | use debruijn::Exts; 13 | 14 | //Equivalence class based implementation 15 | pub type EqClassIdType = u32; 16 | pub struct CountFilterEqClass { 17 | min_kmer_obs: usize, 18 | eq_classes: DashMap, EqClassIdType>, 19 | num_eq_classes: AtomicUsize, 20 | } 21 | 22 | impl CountFilterEqClass { 23 | pub fn new(min_kmer_obs: usize) -> CountFilterEqClass { 24 | CountFilterEqClass { 25 | min_kmer_obs, 26 | eq_classes: DashMap::, EqClassIdType>::with_shard_amount(4), 27 | num_eq_classes: AtomicUsize::new(0), 28 | } 29 | } 30 | 31 | pub fn get_eq_classes(&self) -> Vec> { 32 | let mut eq_class_vec = vec![Vec::new(); self.get_number_of_eq_classes()]; 33 | 34 | let mut eq_ids = Vec::with_capacity(eq_class_vec.len()); 35 | 36 | for item in &self.eq_classes { 37 | eq_class_vec[*item.value() as usize] = item.key().clone(); 38 | eq_ids.push(*item.value() as usize) 39 | } 40 | 41 | // consistency property the equivalence classes must be assigned 42 | // unique ids from 0 to N, with no gaps. This could be violated 43 | // if theres is a race condition when assigning equivalence class 44 | // ids in CountFilterEqClass::summarize below. panic if this 45 | // property doesn't hold. 46 | eq_ids.sort_unstable(); 47 | for (i, id) in eq_ids.into_iter().enumerate() { 48 | assert_eq!(id, i); 49 | } 50 | 51 | eq_class_vec 52 | } 53 | 54 | pub fn get_number_of_eq_classes(&self) -> usize { 55 | self.num_eq_classes.load(Ordering::SeqCst) 56 | } 57 | } 58 | 59 | impl KmerSummarizer 60 | for CountFilterEqClass 61 | { 62 | fn summarize>( 63 | &self, 64 | items: F, 65 | ) -> (bool, Exts, EqClassIdType) { 66 | let mut all_exts = Exts::empty(); 67 | 68 | // the ids of the sequences in the equivalence class 69 | let mut eq_class = Vec::new(); 70 | 71 | let mut nobs = 0; 72 | for (_, exts, d) in items { 73 | eq_class.push(d); 74 | all_exts = all_exts.add(exts); 75 | nobs += 1; 76 | } 77 | 78 | eq_class.sort(); 79 | eq_class.dedup(); 80 | 81 | // register the equivalence class and assign it a unique id. 82 | // IDs must be sequential from 0 to N. This must be an atomic operation. 83 | // The correctness of the eqclass_ids is checked above int get_eq_classes. 84 | let eq_ref = self 85 | .eq_classes 86 | .entry(eq_class) 87 | .or_insert_with(|| self.num_eq_classes.fetch_add(1, Ordering::SeqCst) as u32); 88 | 89 | let eq_id = *eq_ref.deref(); 90 | (nobs as usize >= self.min_kmer_obs, all_exts, eq_id) 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /deny.toml: -------------------------------------------------------------------------------- 1 | # Only the specified targets will be checked when running `cargo deny check`. 2 | targets = [ 3 | # The triple can be any string, but only the target triples built in to 4 | # rustc (as of 1.40) can be checked against actual config expressions 5 | { triple = "x86_64-unknown-linux-gnu" }, 6 | ] 7 | 8 | # This section is considered when running `cargo deny check advisories` 9 | # More documentation for the advisories section can be found here: 10 | # https://embarkstudios.github.io/cargo-deny/checks/advisories/cfg.html 11 | [advisories] 12 | # The path where the advisory database is cloned/fetched into 13 | # db-path = "~/.cargo/advisory-db" 14 | # The url(s) of the advisory databases to use 15 | db-urls = ["https://github.com/rustsec/advisory-db"] 16 | vulnerability = "deny" 17 | unmaintained = "warn" 18 | yanked = "warn" 19 | notice = "warn" 20 | # A list of advisory IDs to ignore. Note that ignored advisories will still 21 | # output a note when they are encountered. 22 | ignore = [ 23 | ] 24 | 25 | # This section is considered when running `cargo deny check licenses` 26 | # More documentation for the licenses section can be found here: 27 | # https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html 28 | [licenses] 29 | unlicensed = "deny" 30 | # Allow us to keep a consistent list across projects without needing 31 | # to customize deny.toml based on what's actually present. 32 | unused-allowed-license = "allow" 33 | # List of explictly allowed licenses 34 | # See https://spdx.org/licenses/ for list of possible licenses 35 | # [possible values: any SPDX 3.11 short identifier (+ optional exception)]. 36 | allow = [ 37 | "0BSD", 38 | "Apache-2.0", 39 | "Apache-2.0 WITH LLVM-exception", 40 | "BSD-2-Clause", 41 | "BSD-3-Clause", 42 | "ISC", 43 | "MIT", 44 | "MPL-2.0", 45 | "OpenSSL", 46 | "WTFPL", 47 | ] 48 | copyleft = "deny" 49 | allow-osi-fsf-free = "neither" 50 | default = "deny" 51 | # The confidence threshold for detecting a license from license text. 52 | # The higher the value, the more closely the license text must be to the 53 | # canonical license text of a valid SPDX license file. 54 | # [possible values: any between 0.0 and 1.0]. 55 | confidence-threshold = 0.6 56 | 57 | [licenses.private] 58 | # If true, ignores workspace crates that aren't published, or are only 59 | # published to private registries 60 | ignore = true 61 | # One or more private registries that you might publish crates to, if a crate 62 | # is only published to private registries, and ignore is true, the crate will 63 | # not have its license(s) checked 64 | registries = [ 65 | #"https://sekretz.com/registry 66 | ] 67 | 68 | # This section is considered when running `cargo deny check bans`. 69 | # More documentation about the 'bans' section can be found here: 70 | # https://embarkstudios.github.io/cargo-deny/checks/bans/cfg.html 71 | [bans] 72 | multiple-versions = "deny" 73 | wildcards = "allow" 74 | highlight = "all" 75 | 76 | # This section is considered when running `cargo deny check sources`. 77 | # More documentation about the 'sources' section can be found here: 78 | # https://embarkstudios.github.io/cargo-deny/checks/sources/cfg.html 79 | [sources] 80 | unknown-registry = "deny" 81 | unknown-git = "deny" 82 | allow-registry = ["https://github.com/rust-lang/crates.io-index"] 83 | allow-git = [] 84 | 85 | [sources.allow-org] 86 | github = ["10XGenomics"] 87 | -------------------------------------------------------------------------------- /src/mappability.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 10x Genomics, Inc. All rights reserved. 2 | 3 | use anyhow::Error; 4 | use debruijn::Kmer; 5 | use itertools::Itertools; 6 | use std::io::Write; 7 | use std::path::Path; 8 | 9 | use crate::config::MAPPABILITY_COUNTS_LEN; 10 | use crate::pseudoaligner::Pseudoaligner; 11 | use crate::utils::open_file; 12 | 13 | // 1. Given graph, build a data structure of transcripts 14 | // - tx: tx_name, gene_name, 15 | // 2. For each de Bruijn graph node 16 | // - count = number of kmers (L - K + 1) 17 | // - transcript multiplicity = # of colors (size of equiv class) 18 | // - gene multiplicity = # of distinct genes 19 | // - add count, transcript multiplicity to tx_mappability 20 | // - add count, gene multiplicity to gene_mappability 21 | // 3. Output results to tx_mappability.tsv and gene_mappability.tsv 22 | // - tx_mappability: 23 | // tx_name gene_name length kmer_count fraction_unique_tx fraction_unique_gene 24 | // MappabilityRecord: tx_name, gene_name, tx_multiplicity: [usize], gene_multiplicity: [usize] 25 | // 26 | // fn update_counts(Vec, kmer_count, ids) 27 | // fn update_counts(self, kmer_count, ids), Option(Gene_tx_map)) 28 | // - (if gene we'll need to make a gene vector instead of color) 29 | // fn fraction_unique(self) -> f64 30 | const MAPPABILITY_HEADER_STRING: &str = 31 | "tx_name\tgene_name\ttx_kmer_count\tfrac_kmer_unique_tx\tfrac_kmer_unique_gene\n"; 32 | 33 | #[derive(Debug)] 34 | pub struct MappabilityRecord<'a> { 35 | pub tx_name: &'a str, 36 | pub gene_name: &'a str, 37 | tx_multiplicity: [usize; MAPPABILITY_COUNTS_LEN], 38 | gene_multiplicity: [usize; MAPPABILITY_COUNTS_LEN], 39 | } 40 | 41 | impl MappabilityRecord<'_> { 42 | fn new<'a>(tx_name: &'a str, gene_name: &'a str) -> MappabilityRecord<'a> { 43 | MappabilityRecord { 44 | tx_name, 45 | gene_name, 46 | // tx_multiplicity[j] = # of kmers in this tx shared by j other transcripts 47 | tx_multiplicity: [0; MAPPABILITY_COUNTS_LEN], 48 | // gene_multiplicity[j] = # of kmers in the tx shared by j other genes 49 | gene_multiplicity: [0; MAPPABILITY_COUNTS_LEN], 50 | } 51 | } 52 | 53 | fn total_kmer_count(&self) -> usize { 54 | self.tx_multiplicity.iter().sum() 55 | } 56 | 57 | fn add_tx_count(&mut self, count: usize, multiplicity: usize) { 58 | if multiplicity > MAPPABILITY_COUNTS_LEN { 59 | self.tx_multiplicity[MAPPABILITY_COUNTS_LEN - 1] += count 60 | } else { 61 | self.tx_multiplicity[multiplicity - 1] += count 62 | } 63 | } 64 | 65 | fn add_gene_count(&mut self, count: usize, multiplicity: usize) { 66 | if multiplicity > MAPPABILITY_COUNTS_LEN { 67 | self.gene_multiplicity[MAPPABILITY_COUNTS_LEN - 1] += count 68 | } else { 69 | self.gene_multiplicity[multiplicity - 1] += count 70 | } 71 | } 72 | 73 | fn fraction_unique_tx(&self) -> f64 { 74 | self.tx_multiplicity[0] as f64 / self.total_kmer_count() as f64 75 | } 76 | 77 | fn fraction_unique_gene(&self) -> f64 { 78 | self.gene_multiplicity[0] as f64 / self.total_kmer_count() as f64 79 | } 80 | 81 | fn to_tsv(&self) -> String { 82 | format!( 83 | "{}\t{}\t{}\t{}\t{}", 84 | self.tx_name, 85 | self.gene_name, 86 | self.total_kmer_count(), 87 | self.fraction_unique_tx(), 88 | self.fraction_unique_gene() 89 | ) 90 | } 91 | } 92 | 93 | pub fn write_mappability_tsv>( 94 | records: Vec, 95 | outdir: P, 96 | ) -> Result<(), Error> { 97 | let mut outfile = open_file("tx_mappability.tsv", outdir)?; 98 | 99 | outfile.write_all(MAPPABILITY_HEADER_STRING.as_bytes())?; 100 | 101 | for record in records { 102 | writeln!(outfile, "{}", record.to_tsv())?; 103 | } 104 | 105 | Ok(()) 106 | } 107 | 108 | // pub fn update_counts(records: &mut Vec, 109 | // kmer_count: usize, 110 | // ids: Vec) { 111 | // let num_ids = ids.len(); 112 | // for id in ids { 113 | // // add to total # kmers 114 | // records[id].add_count(kmer_count, 0); 115 | // // add to counts according to the size of this equiv class 116 | // records[id].add_count(kmer_count, num_ids) 117 | // } 118 | // } 119 | 120 | pub fn analyze_graph( 121 | index: &Pseudoaligner, 122 | ) -> Result>, Error> { 123 | // Make records 124 | let mut records = index 125 | .tx_names 126 | .iter() 127 | .map(|tx_name| MappabilityRecord::new(tx_name, index.tx_gene_mapping.get(tx_name).unwrap())) 128 | .collect::>(); 129 | 130 | // Iterate through graph 131 | for node in index.dbg.iter_nodes() { 132 | let num_kmer = node.len() - K::k() + 1; 133 | 134 | let eq_class_idx = *node.data() as usize; 135 | let eq_class = &index.eq_classes[eq_class_idx]; 136 | 137 | let num_tx = eq_class.len(); 138 | 139 | let num_genes = eq_class 140 | .iter() 141 | .map(|&tx_id| { 142 | let tx_name = &index.tx_names[tx_id as usize]; 143 | index.tx_gene_mapping.get(tx_name) 144 | }) 145 | .unique() 146 | .count(); 147 | 148 | for &tx_id in eq_class { 149 | let record = &mut records[tx_id as usize]; 150 | record.add_tx_count(num_kmer, num_tx); 151 | record.add_gene_count(num_kmer, num_genes); 152 | } 153 | } 154 | 155 | Ok(records) 156 | } 157 | -------------------------------------------------------------------------------- /src/utils.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 10x Genomics, Inc. All rights reserved. 2 | 3 | //! Utility methods. 4 | use std::collections::HashMap; 5 | use std::fmt::Debug; 6 | use std::fs::File; 7 | use std::io::{self, BufRead, BufReader, BufWriter}; 8 | use std::path::Path; 9 | use std::sync::{Arc, Mutex}; 10 | 11 | use anyhow::{self, Error}; 12 | use bincode::{self, deserialize_from, serialize_into}; 13 | use flate2::read::MultiGzDecoder; 14 | use serde::{de::DeserializeOwned, Serialize}; 15 | 16 | use bio::io::{fasta, fastq}; 17 | use debruijn::dna_string::DnaString; 18 | use log::info; 19 | 20 | use crate::config::FastaFormat; 21 | 22 | pub fn write_obj + Debug>( 23 | g: &T, 24 | filename: P, 25 | ) -> Result<(), bincode::Error> { 26 | let f = match File::create(&filename) { 27 | Err(err) => panic!("couldn't create file {:?}: {}", filename, err), 28 | Ok(f) => f, 29 | }; 30 | let mut writer = BufWriter::new(f); 31 | serialize_into(&mut writer, &g) 32 | } 33 | 34 | pub fn read_obj + Debug>( 35 | filename: P, 36 | ) -> Result { 37 | let f = match File::open(&filename) { 38 | Err(err) => panic!("couldn't open file {:?}: {}", filename, err), 39 | Ok(f) => f, 40 | }; 41 | let mut reader = BufReader::new(f); 42 | deserialize_from(&mut reader) 43 | } 44 | 45 | /// Open a (possibly gzipped) file into a BufReader. 46 | fn _open_with_gz>(p: P) -> Result, Error> { 47 | let r = File::open(p.as_ref())?; 48 | 49 | if p.as_ref().extension().unwrap() == "gz" { 50 | let gz = MultiGzDecoder::new(r); 51 | let buf_reader = BufReader::with_capacity(32 * 1024, gz); 52 | Ok(Box::new(buf_reader)) 53 | } else { 54 | let buf_reader = BufReader::with_capacity(32 * 1024, r); 55 | Ok(Box::new(buf_reader)) 56 | } 57 | } 58 | 59 | type ReadTranscriptResults = (Vec, Vec, HashMap); 60 | 61 | pub fn read_transcripts( 62 | reader: fasta::Reader>, 63 | ) -> Result { 64 | let mut seqs = Vec::new(); 65 | let mut transcript_counter = 0; 66 | let mut tx_ids = Vec::new(); 67 | let mut tx_to_gene_map = HashMap::new(); 68 | let mut fasta_format = FastaFormat::Unknown; 69 | 70 | info!("Reading transcripts from Fasta file"); 71 | for result in reader.records() { 72 | // obtain record or fail with error 73 | let record = result?; 74 | 75 | // Sequence 76 | let dna_string = DnaString::from_acgt_bytes_hashn(record.seq(), record.id().as_bytes()); 77 | seqs.push(dna_string); 78 | 79 | if let FastaFormat::Unknown = fasta_format { 80 | fasta_format = detect_fasta_format(&record)?; 81 | } 82 | 83 | let (tx_id, gene_id) = extract_tx_gene_id(&record, &fasta_format); 84 | 85 | tx_ids.push(tx_id.to_string()); 86 | tx_to_gene_map.insert(tx_id.to_string(), gene_id.to_string()); 87 | 88 | transcript_counter += 1; 89 | } 90 | 91 | info!( 92 | "Done reading the Fasta file; Found {} sequences", 93 | transcript_counter 94 | ); 95 | 96 | Ok((seqs, tx_ids, tx_to_gene_map)) 97 | } 98 | 99 | fn detect_fasta_format(record: &fasta::Record) -> Result { 100 | let id_tokens = record.id().split('|'); 101 | if id_tokens.count() == 9 { 102 | return Ok(FastaFormat::Gencode); 103 | } 104 | 105 | let mut desc_tokens = record.desc().unwrap().split(' '); 106 | if let Some(desc_token) = desc_tokens.next() { 107 | let mut gene_tokens = desc_token.split('='); 108 | if let Some(gene_token) = gene_tokens.next() { 109 | if gene_token == "gene" && gene_tokens.count() == 1 { 110 | return Ok(FastaFormat::Gffread); 111 | } 112 | } else if desc_tokens.count() == 4 { 113 | return Ok(FastaFormat::Ensembl); 114 | } 115 | } 116 | anyhow::bail!("Failed to detect FASTA header format.") 117 | } 118 | 119 | fn extract_tx_gene_id<'a>( 120 | record: &'a fasta::Record, 121 | fasta_format: &FastaFormat, 122 | ) -> (&'a str, &'a str) { 123 | match *fasta_format { 124 | FastaFormat::Gencode => { 125 | let mut id_tokens = record.id().split('|'); 126 | let tx_id = id_tokens.next().unwrap(); 127 | let gene_id = id_tokens.next().unwrap(); 128 | // (human readable name) 129 | // let gene_name = id_tokens[5].to_string(); 130 | (tx_id, gene_id) 131 | } 132 | FastaFormat::Ensembl => { 133 | let tx_id = record.id(); 134 | let mut desc_tokens = record.desc().unwrap().split(' '); 135 | let gene_id = desc_tokens.nth(2).unwrap().split(':').nth(1).unwrap(); 136 | (tx_id, gene_id) 137 | } 138 | FastaFormat::Gffread => { 139 | let mut id_tokens = record.id().split(' '); 140 | let tx_id = id_tokens.next().unwrap(); 141 | let mut desc_tokens = record.desc().unwrap().split(' '); 142 | let mut gene_tokens = desc_tokens.next().unwrap().split('='); 143 | let gene_id = gene_tokens.nth(1).unwrap(); 144 | (tx_id, gene_id) 145 | } 146 | FastaFormat::Unknown => { 147 | panic!("fasta_format was uninitialized"); 148 | } 149 | } 150 | } 151 | 152 | pub(crate) fn get_next_record( 153 | reader: &Arc>>, 154 | ) -> Option> { 155 | let mut lock = reader.lock().unwrap(); 156 | lock.next() 157 | } 158 | 159 | pub(crate) fn open_file>(filename: &str, outdir: P) -> Result { 160 | let out_fn = outdir.as_ref().join(filename); 161 | let outfile = File::create(out_fn)?; 162 | Ok(outfile) 163 | } 164 | -------------------------------------------------------------------------------- /src/bin/pseudoaligner.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 10x Genomics, Inc. All rights reserved. 2 | 3 | use debruijn::kmer; 4 | use log::info; 5 | use serde::Deserialize; 6 | 7 | use anyhow::Error; 8 | use bio::io::{fasta, fastq}; 9 | use docopt::Docopt; 10 | use std::{env, fs}; 11 | use std::{path::PathBuf, str}; 12 | 13 | use debruijn_mapping::{ 14 | build_index::build_index, 15 | mappability::{analyze_graph, write_mappability_tsv}, 16 | pseudoaligner, 17 | pseudoaligner::process_reads, 18 | utils, 19 | }; 20 | 21 | const PKG_NAME: &str = env!("CARGO_PKG_NAME"); 22 | const PKG_VERSION: &str = env!("CARGO_PKG_VERSION"); 23 | const USAGE: &str = " 24 | De-bruijn-mapping 25 | 26 | Usage: 27 | pseudoaligner index [--kmer-size=] [--num-threads=] -i 28 | pseudoaligner map [--kmer-size=] [--num-threads=] -i 29 | pseudoaligner mappability [-o ] [--kmer-size=] -i 30 | pseudoaligner idxstats [--kmer-size=] -i 31 | pseudoaligner inspect [--kmer-size=] -i -c ... 32 | pseudoaligner -h | --help | -v | --version 33 | 34 | Options: 35 | -k --kmer-size K Kmer size to use - only 20 or 64 currently supported [default: 20]. 36 | -n --num-threads N Number of worker threads [default: 2] 37 | -o --outdir DIR Output directory 38 | -h --help Show this screen. 39 | -v --version Show version. 40 | "; 41 | 42 | #[derive(Clone, Debug, Deserialize)] 43 | struct Args { 44 | arg_ref_fasta: String, 45 | arg_index: String, 46 | arg_reads_fastq: String, 47 | flag_outdir: Option, 48 | flag_num_threads: usize, 49 | flag_kmer_size: usize, 50 | 51 | cmd_index: bool, 52 | 53 | cmd_map: bool, 54 | cmd_mappability: bool, 55 | cmd_idxstats: bool, 56 | 57 | flag_version: bool, 58 | flag_v: bool, 59 | } 60 | 61 | enum KmerSetting { 62 | K20, 63 | K64, 64 | } 65 | 66 | fn main() -> Result<(), Error> { 67 | let args: Args = Docopt::new(USAGE) 68 | .and_then(|d| d.deserialize()) 69 | .unwrap_or_else(|e| e.exit()); 70 | 71 | if args.flag_version || args.flag_v { 72 | println! {"{} {}", PKG_NAME, PKG_VERSION}; 73 | return Ok(()); 74 | } 75 | 76 | // initialize logger 77 | pretty_env_logger::init_timed(); 78 | info!("Command line args:\n{:?}", args); 79 | 80 | let outdir = match &args.flag_outdir { 81 | Some(dir) => PathBuf::from(dir), 82 | None => env::current_dir()?, 83 | }; 84 | fs::create_dir_all(&outdir)?; 85 | 86 | let km = match args.flag_kmer_size { 87 | 20 => KmerSetting::K20, 88 | 64 => KmerSetting::K64, 89 | v => { 90 | println!( 91 | "Kmer size = {} is not supported. Set kmer size to 20 or 64", 92 | v 93 | ); 94 | return Ok(()); 95 | } 96 | }; 97 | 98 | if args.cmd_index { 99 | info!("Building index from fasta"); 100 | let fasta = fasta::Reader::from_file(args.arg_ref_fasta)?; 101 | let (seqs, tx_names, tx_gene_map) = utils::read_transcripts(fasta)?; 102 | 103 | match km { 104 | KmerSetting::K20 => { 105 | let index = build_index::( 106 | &seqs, 107 | &tx_names, 108 | &tx_gene_map, 109 | args.flag_num_threads, 110 | )?; 111 | info!("Finished building index!"); 112 | 113 | info!("Writing index to disk"); 114 | utils::write_obj(&index, args.arg_index)?; 115 | info!("Finished writing index!"); 116 | } 117 | KmerSetting::K64 => { 118 | let index = build_index::( 119 | &seqs, 120 | &tx_names, 121 | &tx_gene_map, 122 | args.flag_num_threads, 123 | )?; 124 | info!("Finished building index!"); 125 | 126 | info!("Writing index to disk"); 127 | utils::write_obj(&index, args.arg_index)?; 128 | info!("Finished writing index!"); 129 | } 130 | } 131 | } else if args.cmd_map { 132 | match km { 133 | KmerSetting::K20 => { 134 | info!("Reading index from disk"); 135 | let index = utils::read_obj(args.arg_index)?; 136 | info!("Finished reading index!"); 137 | 138 | info!("Mapping reads from fastq"); 139 | let reads = fastq::Reader::from_file(args.arg_reads_fastq)?; 140 | process_reads::(reads, &index, outdir, args.flag_num_threads)?; 141 | } 142 | KmerSetting::K64 => { 143 | info!("Reading index from disk"); 144 | let index = utils::read_obj(args.arg_index)?; 145 | info!("Finished reading index!"); 146 | 147 | info!("Mapping reads from fastq"); 148 | let reads = fastq::Reader::from_file(args.arg_reads_fastq)?; 149 | process_reads::(reads, &index, outdir, args.flag_num_threads)?; 150 | } 151 | } 152 | } else if args.cmd_mappability { 153 | match km { 154 | KmerSetting::K20 => { 155 | info!("Reading index from disk"); 156 | let index = debruijn_mapping::utils::read_obj(args.arg_index)?; 157 | info!("Finished reading index!"); 158 | info!("Analyzing de Bruijn graph"); 159 | let records = analyze_graph::(&index)?; 160 | info!("Finished analyzing!"); 161 | info!("{} transcripts total", records.len()); 162 | write_mappability_tsv(records, outdir)?; 163 | } 164 | KmerSetting::K64 => { 165 | info!("Reading index from disk"); 166 | let index = debruijn_mapping::utils::read_obj(args.arg_index)?; 167 | info!("Finished reading index!"); 168 | info!("Analyzing de Bruijn graph"); 169 | let records = analyze_graph::(&index)?; 170 | info!("Finished analyzing!"); 171 | info!("{} transcripts total", records.len()); 172 | write_mappability_tsv(records, outdir)?; 173 | } 174 | } 175 | } else if args.cmd_idxstats { 176 | match km { 177 | KmerSetting::K20 => { 178 | let index: pseudoaligner::Pseudoaligner = 179 | utils::read_obj(args.arg_index)?; 180 | 181 | use debruijn::Mer; 182 | 183 | for e in index.dbg.iter_nodes() { 184 | let eqid = e.data(); 185 | let eq = &index.eq_classes[*eqid as usize]; 186 | println!("{}\t{}\t{}", e.node_id, e.sequence().len(), eq.len()); 187 | } 188 | } 189 | KmerSetting::K64 => { 190 | let index: pseudoaligner::Pseudoaligner = 191 | utils::read_obj(args.arg_index)?; 192 | 193 | use debruijn::Mer; 194 | 195 | for e in index.dbg.iter_nodes() { 196 | let eqid = e.data(); 197 | let eq = &index.eq_classes[*eqid as usize]; 198 | println!("{}\t{}\t{}", e.node_id, e.sequence().len(), eq.len()); 199 | } 200 | } 201 | } 202 | } 203 | 204 | info!("Done!"); 205 | Ok(()) 206 | } 207 | -------------------------------------------------------------------------------- /src/build_index.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 10x Genomics, Inc. All rights reserved. 2 | 3 | use lazy_static::lazy_static; 4 | use std::collections::HashMap; 5 | use std::sync::Arc; 6 | 7 | use crate::config::{MEM_SIZE, REPORT_ALL_KMER, STRANDED}; 8 | use boomphf::hashmap::{BoomHashMap2, NoKeyBoomHashMap}; 9 | use debruijn; 10 | use debruijn::compression::{compress_graph, compress_kmers_with_hash, ScmapCompress}; 11 | use debruijn::dna_string::{DnaString, DnaStringSlice}; 12 | use debruijn::filter::filter_kmers; 13 | use debruijn::graph::{BaseGraph, DebruijnGraph}; 14 | use debruijn::{Exts, Kmer}; 15 | 16 | use crate::config::{MIN_KMERS, U32_MAX}; 17 | use crate::equiv_classes::{CountFilterEqClass, EqClassIdType}; 18 | use crate::pseudoaligner::Pseudoaligner; 19 | use anyhow::Error; 20 | use boomphf; 21 | use log::info; 22 | use rayon::prelude::*; 23 | use rayon::{self, ThreadPool}; 24 | 25 | const MIN_SHARD_SEQUENCES: usize = 2000; 26 | 27 | pub fn build_index( 28 | seqs: &[DnaString], 29 | tx_names: &[String], 30 | tx_gene_map: &HashMap, 31 | num_threads: usize, 32 | ) -> Result, Error> { 33 | // Thread pool Configuration for calling BOOMphf 34 | let pool = rayon::ThreadPoolBuilder::new() 35 | .num_threads(num_threads) 36 | .build()?; 37 | 38 | if seqs.len() >= U32_MAX { 39 | panic!("Too many ({}) sequences to handle.", seqs.len()); 40 | } 41 | 42 | info!("Sharding sequences..."); 43 | 44 | let mut buckets: Vec<_> = seqs 45 | .iter() 46 | .enumerate() 47 | .flat_map(|(id, seq)| partition_contigs::(seq, id as u32)) 48 | .collect(); 49 | 50 | pool.install(|| { 51 | buckets.par_sort_unstable_by_key(|x| x.0); 52 | }); 53 | info!("Got {} sequence chunks", buckets.len()); 54 | 55 | let summarizer = Arc::new(CountFilterEqClass::new(MIN_KMERS)); 56 | let sequence_shards = group_by_slices(&buckets, |x| x.0, MIN_SHARD_SEQUENCES); 57 | 58 | info!("Assembling {} shards...", sequence_shards.len()); 59 | 60 | let shard_dbgs = pool.install(|| { 61 | let mut shard_dbgs = Vec::with_capacity(sequence_shards.len()); 62 | sequence_shards 63 | .into_par_iter() 64 | .into_par_iter() 65 | .map_with(summarizer.clone(), |s, strings| { 66 | assemble_shard::(strings, s) 67 | }) 68 | .collect_into_vec(&mut shard_dbgs); 69 | 70 | shard_dbgs 71 | }); 72 | 73 | info!("Done dBG construction of shards"); 74 | info!("Starting merging disjoint graphs"); 75 | 76 | let dbg = merge_shard_dbgs(shard_dbgs); 77 | info!("Graph merge complete"); 78 | 79 | let eq_classes = summarizer.get_eq_classes(); 80 | 81 | info!("Indexing de Bruijn graph"); 82 | let dbg_index = make_dbg_index(&dbg, &pool, num_threads); 83 | 84 | Ok(Pseudoaligner::new( 85 | dbg, 86 | eq_classes, 87 | dbg_index, 88 | tx_names.to_owned(), 89 | tx_gene_map.clone(), 90 | )) 91 | } 92 | 93 | type PmerType = debruijn::kmer::Kmer6; 94 | 95 | lazy_static! { 96 | static ref PERM: Vec = { 97 | let maxp = 1 << (2 * PmerType::k()); 98 | let mut sorted_kmers: Vec = Vec::with_capacity(maxp); 99 | for i in 0..maxp { 100 | let kmer = Kmer::from_u64(i as u64); 101 | sorted_kmers.push(kmer); 102 | } 103 | sorted_kmers.sort_by_key(count_a_t_bases); 104 | 105 | let mut permutation = vec![0; maxp]; 106 | 107 | for (sort_pos, kmer) in sorted_kmers.into_iter().enumerate() { 108 | permutation[kmer.to_u64() as usize] = sort_pos; 109 | } 110 | 111 | permutation 112 | }; 113 | } 114 | 115 | /// Count the number of A/T bases in a kmer 116 | fn count_a_t_bases(kmer: &K) -> usize { 117 | let mut count = 0; 118 | for i in 0..K::k() { 119 | let v = kmer.get(i); 120 | if v == b'A' || v == b'T' { 121 | count += 1; 122 | } 123 | } 124 | count 125 | } 126 | 127 | fn partition_contigs( 128 | contig: &DnaString, 129 | contig_id: u32, 130 | ) -> Vec<(u16, u32, DnaStringSlice<'_>, Exts)> { 131 | // One FASTA entry possibly broken into multiple contigs 132 | // based on the location of `N` int he sequence. 133 | 134 | if contig.len() >= K::k() { 135 | // It is safe to always set rc to true when calling simple_scan. See 136 | // https://github.com/10XGenomics/rust-debruijn/issues/10 137 | // However, we set it to !STRANDED so stranded assays use more buckets. 138 | #[allow(deprecated)] 139 | let msps = debruijn::msp::simple_scan::<_, PmerType>(K::k(), contig, &PERM, !STRANDED); 140 | msps.into_iter() 141 | .map(|msp| { 142 | let bucket_id = msp.bucket(); 143 | let slice = contig.slice(msp.start(), msp.end()); 144 | let exts = Exts::from_dna_string(contig, msp.start(), msp.len()); 145 | (bucket_id, contig_id, slice, exts) 146 | }) 147 | .collect() 148 | } else { 149 | Vec::new() 150 | } 151 | } 152 | 153 | fn assemble_shard( 154 | shard_data: &[(u16, u32, DnaStringSlice, Exts)], 155 | summarizer: &Arc>, 156 | ) -> BaseGraph { 157 | let filter_input: Vec<_> = shard_data 158 | .iter() 159 | .cloned() 160 | .map(|(_, seqid, string, exts)| (string, exts, seqid)) 161 | .collect(); 162 | 163 | let (phf, _): (BoomHashMap2, _) = filter_kmers( 164 | &filter_input, 165 | summarizer, 166 | STRANDED, 167 | REPORT_ALL_KMER, 168 | MEM_SIZE, 169 | ); 170 | 171 | compress_kmers_with_hash(STRANDED, &ScmapCompress::new(), &phf) 172 | } 173 | 174 | fn merge_shard_dbgs( 175 | uncompressed_dbgs: Vec>, 176 | ) -> DebruijnGraph { 177 | let combined_graph = BaseGraph::combine(uncompressed_dbgs.into_iter()).finish(); 178 | compress_graph(STRANDED, &ScmapCompress::new(), combined_graph, None) 179 | } 180 | 181 | #[inline(never)] 182 | fn make_dbg_index( 183 | dbg: &DebruijnGraph, 184 | pool: &ThreadPool, 185 | num_threads: usize, 186 | ) -> NoKeyBoomHashMap { 187 | let mut total_kmers = 0; 188 | let kmer_length = K::k(); 189 | for node in dbg.iter_nodes() { 190 | total_kmers += node.len() - kmer_length + 1; 191 | } 192 | 193 | println!("Total {:?} kmers to process in dbg", total_kmers); 194 | println!("Making mphf of kmers"); 195 | let mphf = 196 | //boomphf::Mphf::from_chunked_iterator(1.7, dbg, total_kmers); 197 | boomphf::Mphf::from_chunked_iterator_parallel(1.7, dbg, None, total_kmers as u64, num_threads); 198 | 199 | println!("Assigning offsets to kmers"); 200 | let mut node_and_offsets = Vec::with_capacity(total_kmers); 201 | node_and_offsets.resize(total_kmers, (U32_MAX as u32, U32_MAX as u32)); 202 | 203 | let scatter = crate::scatter::ScatterToVec::new(&mut node_and_offsets[..]); 204 | 205 | pool.install(|| { 206 | (0..dbg.len()).into_par_iter().for_each_init( 207 | // Each thread gets a scatter handle to write values from 208 | || scatter.handle(), 209 | |handle, node_id| { 210 | let node = dbg.get_node_kmer(node_id); 211 | 212 | for (offset, kmer) in node.into_iter().enumerate() { 213 | let index = mphf.try_hash(&kmer).expect("can't find kmer in DBG graph!"); 214 | handle.write(index as usize, (node_id as u32, offset as u32)); 215 | } 216 | }, 217 | ); 218 | }); 219 | 220 | boomphf::hashmap::NoKeyBoomHashMap::new_with_mphf(mphf, node_and_offsets) 221 | } 222 | 223 | /// Split the slice `data` into subslices of size at least 224 | /// `min_size`, while ensuring that consecutive runs of 225 | /// items with the same key as defined by the key function `f` are 226 | /// in the same subslice. 227 | fn group_by_slices K>( 228 | data: &[T], 229 | f: F, 230 | min_size: usize, 231 | ) -> Vec<&[T]> { 232 | let mut slice_start = 0; 233 | let mut result = Vec::new(); 234 | for ((i, d1), d2) in data.iter().enumerate().skip(1).zip(data.iter()) { 235 | if (i - slice_start) > min_size && f(d1) != f(d2) { 236 | result.push(&data[slice_start..i]); 237 | slice_start = i; 238 | } 239 | } 240 | if slice_start < data.len() { 241 | result.push(&data[slice_start..]); 242 | } 243 | result 244 | } 245 | 246 | #[cfg(test)] 247 | mod test { 248 | use super::*; 249 | use crate::utils; 250 | use anyhow::Context; 251 | use bio::io::fasta; 252 | use debruijn::kmer; 253 | use debruijn::Vmer; 254 | use proptest::collection::vec; 255 | use proptest::prelude::*; 256 | use proptest::proptest; 257 | use std::collections::HashSet; 258 | 259 | // Manually compute the equivalence class of each kmer, and make sure 260 | // it matches that equivalence class for that kmer inside the DBG. 261 | #[inline(never)] 262 | fn validate_dbg(seqs: &[DnaString], al: &Pseudoaligner) { 263 | let mut eqclasses = HashMap::>::new(); 264 | 265 | // compute the equivalence class of each kmer 266 | for (i, s) in seqs.iter().enumerate() { 267 | for k in s.iter_kmers::() { 268 | let eq = eqclasses.entry(k).or_default(); 269 | eq.push(i as u32) 270 | } 271 | } 272 | 273 | // check that the equivalence class of the kmer inside the graph matches the naive version 274 | for (k, mut test_eqclass) in eqclasses { 275 | test_eqclass.dedup(); 276 | 277 | if test_eqclass.len() > 5000 { 278 | println!("kmer: {:?}, eqclass.len(): {}", k, test_eqclass.len()); 279 | } 280 | 281 | let (node_id, _) = al.dbg_index.get(&k).unwrap(); 282 | 283 | let eq_class = al.dbg.get_node(*node_id as usize).data(); 284 | let dbg_eqclass = &al.eq_classes[*eq_class as usize]; 285 | 286 | let mut dbg_eq_clone = dbg_eqclass.clone(); 287 | dbg_eq_clone.dedup(); 288 | 289 | if &dbg_eq_clone != dbg_eqclass { 290 | println!( 291 | "dbg eq class not unique: eqclass_id: {}, node: {}", 292 | eq_class, node_id 293 | ); 294 | assert_eq!(&dbg_eq_clone, dbg_eqclass); 295 | } 296 | 297 | assert_eq!(&test_eqclass, dbg_eqclass); 298 | } 299 | 300 | // check that each read sequence aligns cleanly 301 | for (i, s) in seqs.iter().enumerate() { 302 | let i = i as u32; 303 | 304 | // transcripts shorter than k can't be mapped 305 | if s.len() < K::k() { 306 | continue; 307 | } 308 | 309 | let (eqclass, bases_aligned) = al.map_read(s).unwrap(); 310 | assert_eq!(s.len(), bases_aligned); 311 | 312 | if eqclass.len() > 1 { 313 | assert!(eqclass.contains(&i)); 314 | 315 | // identical strings 316 | if eqclass.len() == 2 && seqs[eqclass[0] as usize] == seqs[eqclass[1] as usize] { 317 | continue; 318 | } 319 | 320 | // if the sequences aren't identical, the current string must be shortest, or 321 | // the set of nodes visited by the input string must be a subset of the other sequences 322 | // in the equivalence class. 323 | let shortest = eqclass 324 | .iter() 325 | .map(|x| seqs[*x as usize].len()) 326 | .min() 327 | .unwrap(); 328 | 329 | if s.len() != shortest { 330 | let mut path_buf: Vec = Vec::new(); 331 | 332 | al.map_read_to_nodes(s, &mut path_buf).unwrap(); 333 | let my_nodes: HashSet = HashSet::from_iter(path_buf.iter().cloned()); 334 | 335 | println!("eqclass: {:?}", eqclass); 336 | for i in &eqclass { 337 | println!( 338 | "{}: {}, len:{}", 339 | i, 340 | al.tx_names[*i as usize], 341 | seqs[*i as usize].len() 342 | ); 343 | println!("{:?}", seqs[*i as usize]); 344 | 345 | let r = al 346 | .map_read_to_nodes(&seqs[*i as usize], &mut path_buf) 347 | .unwrap(); 348 | let other_nodes = HashSet::from_iter(path_buf.iter().cloned()); 349 | 350 | println!("r: {:?}", r); 351 | println!("{:?}", path_buf); 352 | 353 | assert!(my_nodes.is_subset(&other_nodes)); 354 | println!("---"); 355 | } 356 | } 357 | 358 | // debugging 359 | // println!("--- dup on {}", i); 360 | // for e in &eqclass { 361 | // println!("{}: {}", e, al.tx_names[*e as usize]); 362 | // println!("{:?}", seqs[*e as usize]); 363 | // } 364 | } else { 365 | assert_eq!(eqclass, vec![i]); 366 | } 367 | } 368 | } 369 | 370 | proptest! { 371 | #![proptest_config(ProptestConfig { cases: 2000, .. ProptestConfig::default()})] 372 | #[test] 373 | fn group_by_slices_test( 374 | //v: Vec, 375 | v in vec(0..100usize, 0..5000usize), 376 | min_sz in 1..200usize 377 | ) { 378 | let res = group_by_slices(&v, |v| *v, min_sz); 379 | let total_len: usize = res.iter().map(|x| x.len()).sum(); 380 | prop_assert_eq!(v.len(), total_len); 381 | 382 | for i in 1 .. res.len() { 383 | prop_assert!(res[i-1].len() >= min_sz); 384 | } 385 | 386 | for i in 1 .. res.len() { 387 | let prev = res[i-1]; 388 | let next = res[i]; 389 | prop_assert!(prev[prev.len() - 1] != next[0]); 390 | } 391 | } 392 | } 393 | 394 | #[test] 395 | fn test_gencode_small_build_20() -> Result<(), Error> { 396 | let fasta = fasta::Reader::from_file("test/gencode_small.fa")?; 397 | let (seqs, tx_names, tx_gene_map) = utils::read_transcripts(fasta)?; 398 | let index = build_index::(&seqs, &tx_names, &tx_gene_map, 2)?; 399 | validate_dbg(&seqs, &index); 400 | Ok(()) 401 | } 402 | 403 | #[test] 404 | fn test_gencode_small_build_64() -> Result<(), Error> { 405 | let fasta = fasta::Reader::from_file("test/gencode_small.fa")?; 406 | let (seqs, tx_names, tx_gene_map) = utils::read_transcripts(fasta)?; 407 | let index = build_index::(&seqs, &tx_names, &tx_gene_map, 2)?; 408 | validate_dbg(&seqs, &index); 409 | Ok(()) 410 | } 411 | 412 | #[allow(dead_code)] 413 | #[cfg_attr(feature = "slow_tests", test)] 414 | fn test_gencode_full_build_20() -> Result<(), Error> { 415 | let msg = "For full txome indexing test, download from ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_28/gencode.v28.transcripts.fa.gz, un-gzip and place in test/gencode.v28.transcripts.fa"; 416 | let fasta = fasta::Reader::from_file("test/gencode.v28.transcripts.fa").context(msg)?; 417 | let (seqs, tx_names, tx_gene_map) = utils::read_transcripts(fasta)?; 418 | let index = build_index::(&seqs, &tx_names, &tx_gene_map, 2)?; 419 | validate_dbg(&seqs, &index); 420 | Ok(()) 421 | } 422 | 423 | #[allow(dead_code)] 424 | fn test_alignment() -> Result<(), Error> { 425 | let fasta = fasta::Reader::from_file("test/gencode_small.fa").unwrap(); 426 | let (seqs, tx_names, tx_gene_map) = utils::read_transcripts(fasta)?; 427 | let index = build_index::(&seqs, &tx_names, &tx_gene_map, 2)?; 428 | 429 | let ex1 = DnaString::from_dna_string( 430 | "GGCTGTCAACCAGTCCATAGGCAGGGCCATCAGGCACCAAAGGGATTCTGCCAGCATAGT", 431 | ); 432 | let (eq_class, nbases) = index.map_read(&ex1).unwrap(); 433 | assert_eq!(eq_class, [1, 30]); 434 | assert_eq!(nbases, ex1.len()); 435 | 436 | let single_snp = DnaString::from_dna_string( 437 | "GGCTGTCAACCAGTCCATAGGCGGGGCCATCAGGCACCAAAGGGATTCTGCCAGCATAGT", 438 | ); 439 | let (eq_class, nbases) = index.map_read(&single_snp).unwrap(); 440 | assert_eq!(eq_class, [1, 30]); 441 | assert_eq!(nbases, single_snp.len()); 442 | 443 | let two_snps = DnaString::from_dna_string( 444 | "GGCTGTCAACCAGTCCATAGGCGGGGCCATCAGGCACCAAAGGGATTCTGCCAGCGTAGT", 445 | ); 446 | let (eq_class, nbases) = index.map_read(&single_snp).unwrap(); 447 | assert_eq!(eq_class, [1, 30]); 448 | assert_eq!(nbases, two_snps.len()); 449 | 450 | Ok(()) 451 | } 452 | 453 | #[allow(dead_code)] 454 | #[cfg_attr(feature = "slow_tests", test)] 455 | fn test_gencode_full_build_64() -> Result<(), Error> { 456 | let msg = "For full txome indexing test, download from ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_28/gencode.v28.transcripts.fa.gz, un-gzip and place in test/gencode.v28.transcripts.fa"; 457 | let fasta = fasta::Reader::from_file("test/gencode.v28.transcripts.fa").context(msg)?; 458 | let (seqs, tx_names, tx_gene_map) = utils::read_transcripts(fasta)?; 459 | let index = build_index::(&seqs, &tx_names, &tx_gene_map, 2)?; 460 | validate_dbg(&seqs, &index); 461 | Ok(()) 462 | } 463 | } 464 | -------------------------------------------------------------------------------- /src/pseudoaligner.rs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2018 10x Genomics, Inc. All rights reserved. 2 | 3 | use std::collections::HashMap; 4 | use std::fmt::Debug; 5 | use std::io::{self, Write}; 6 | use std::path::Path; 7 | use std::sync::{mpsc, Arc, Mutex}; 8 | use std::{self, fs::File, str}; 9 | 10 | use bio::io::fastq; 11 | use boomphf::hashmap::NoKeyBoomHashMap; 12 | use crossbeam_utils::thread::scope; 13 | use debruijn::dna_string::DnaString; 14 | 15 | use anyhow::Error; 16 | use debruijn::graph::DebruijnGraph; 17 | use debruijn::{Dir, Kmer, Mer, Vmer}; 18 | use log::info; 19 | 20 | use serde::{Deserialize, Serialize}; 21 | 22 | use crate::config::{DEFAULT_ALLOWED_MISMATCHES, LEFT_EXTEND_FRACTION, READ_COVERAGE_THRESHOLD}; 23 | use crate::equiv_classes::EqClassIdType; 24 | use crate::utils; 25 | 26 | #[derive(Serialize, Deserialize)] 27 | pub struct Pseudoaligner { 28 | pub dbg: DebruijnGraph, 29 | pub eq_classes: Vec>, 30 | pub dbg_index: NoKeyBoomHashMap, 31 | pub tx_names: Vec, 32 | pub tx_gene_mapping: HashMap, 33 | } 34 | 35 | impl Pseudoaligner { 36 | pub(crate) fn new( 37 | dbg: DebruijnGraph, 38 | eq_classes: Vec>, 39 | dbg_index: NoKeyBoomHashMap, 40 | tx_names: Vec, 41 | tx_gene_mapping: HashMap, 42 | ) -> Pseudoaligner { 43 | Pseudoaligner { 44 | dbg, 45 | eq_classes, 46 | dbg_index, 47 | tx_names, 48 | tx_gene_mapping, 49 | } 50 | } 51 | 52 | /// Pseudo-align `read_seq` and return a list of nodes that the read was aligned to, with mismatch = 2 53 | #[cfg(test)] 54 | pub(crate) fn map_read_to_nodes( 55 | &self, 56 | read_seq: &DnaString, 57 | nodes: &mut Vec, 58 | ) -> Option { 59 | self.map_read_to_nodes_with_mismatch(read_seq, nodes, DEFAULT_ALLOWED_MISMATCHES) 60 | .map(|(read_coverage, _mismatches)| read_coverage) 61 | } 62 | 63 | /// Pseudo-align `read_seq` and return a list of nodes that the read was aligned to, with configurable # of allowed mismatches 64 | fn map_read_to_nodes_with_mismatch( 65 | &self, 66 | read_seq: &DnaString, 67 | nodes: &mut Vec, 68 | allowed_mismatches: usize, 69 | ) -> Option<(usize, usize)> { 70 | let read_length = read_seq.len(); 71 | let mut read_coverage: usize = 0; 72 | let mut mismatch_count: usize = 0; 73 | 74 | // We're filling out nodes 75 | nodes.clear(); 76 | 77 | let left_extend_threshold = (LEFT_EXTEND_FRACTION * read_length as f64) as usize; 78 | 79 | let mut kmer_pos: usize = 0; 80 | let kmer_length = K::k(); 81 | 82 | if read_seq.len() < kmer_length { 83 | return None; 84 | } 85 | 86 | let last_kmer_pos = read_length - kmer_length; 87 | let mut kmer_lookups = 0; 88 | 89 | { 90 | // Scan the read for the first kmer that exists in the reference 91 | let mut find_kmer_match = |kmer_pos: &mut usize| -> Option<(usize, usize)> { 92 | while *kmer_pos <= last_kmer_pos { 93 | let read_kmer = read_seq.get_kmer(*kmer_pos); 94 | 95 | kmer_lookups += 1; 96 | match self.dbg_index.get(&read_kmer) { 97 | None => (), 98 | Some((nid, offset)) => { 99 | // Verify that the kmer actually matches -- the MPHF can have false 100 | // positives. 101 | let node = self.dbg.get_node(*nid as usize); 102 | let ref_seq_slice = node.sequence(); 103 | let ref_kmer: K = ref_seq_slice.get_kmer(*offset as usize); 104 | 105 | if read_kmer == ref_kmer { 106 | return Some((*nid as usize, *offset as usize)); 107 | } 108 | } 109 | }; 110 | *kmer_pos += 3; 111 | } 112 | 113 | None 114 | }; 115 | 116 | // extract the first exact matching position of a kmer 117 | // from the read in the DBG 118 | let (mut node_id, mut kmer_offset) = match find_kmer_match(&mut kmer_pos) { 119 | None => (None, None), 120 | Some((nid, offset)) => (Some(nid), Some(offset)), 121 | }; 122 | 123 | // check if we can extend back if there were SNP in every kmer query 124 | if let Some(node_id) = node_id { 125 | let kmer_offset = kmer_offset.unwrap(); 126 | if kmer_pos >= left_extend_threshold { 127 | let mut last_pos = kmer_pos - 1; 128 | let mut prev_node_id = node_id; 129 | let mut prev_kmer_offset = if kmer_offset > 0 { kmer_offset - 1 } else { 0 }; 130 | 131 | loop { 132 | let node = self.dbg.get_node(prev_node_id); 133 | //println!("{:?}, {:?}, {:?}, {:?}, {:?}", 134 | // node, node.sequence(), 135 | // &eq_classes[ *node.data() as usize], 136 | // prev_kmer_offset, last_pos); 137 | 138 | // length of remaining read before kmer match 139 | let skipped_read = last_pos + 1; 140 | 141 | // length of the skipped node sequence before kmer match 142 | let skipped_ref = prev_kmer_offset + 1; 143 | 144 | // find maximum extention possbile before fork or eof read 145 | let max_matchable_pos = std::cmp::min(skipped_read, skipped_ref); 146 | 147 | let ref_seq_slice = node.sequence(); 148 | let mut premature_break = false; 149 | let mut matched_bases = 0; 150 | let mut seen_snp = 0; 151 | for idx in 0..max_matchable_pos { 152 | let ref_pos = prev_kmer_offset - idx; 153 | let read_offset = last_pos - idx; 154 | 155 | // compare base by base 156 | if ref_seq_slice.get(ref_pos) != read_seq.get(read_offset) { 157 | // Record mismatch 158 | mismatch_count += 1; 159 | 160 | // Allowing num_mismatch-SNP 161 | seen_snp += 1; 162 | if seen_snp > allowed_mismatches { 163 | premature_break = true; 164 | break; 165 | } 166 | } 167 | 168 | matched_bases += 1; 169 | read_coverage += 1; 170 | } 171 | 172 | //break the loop if end of read reached or a premature mismatch 173 | if last_pos + 1 - matched_bases == 0 || premature_break { 174 | break; 175 | } 176 | 177 | // adjust last position 178 | last_pos -= matched_bases; 179 | 180 | // If reached here then a fork is found in the reference. 181 | let exts = node.exts(); 182 | let next_base = read_seq.get(last_pos); 183 | if exts.has_ext(Dir::Left, next_base) { 184 | // found a left extention. 185 | let index = exts 186 | .get(Dir::Left) 187 | .iter() 188 | .position(|&x| x == next_base) 189 | .unwrap(); 190 | 191 | let edge = node.l_edges()[index]; 192 | 193 | //update the previous node's id 194 | prev_node_id = edge.0; 195 | let prev_node = self.dbg.get_node(prev_node_id); 196 | prev_kmer_offset = prev_node.sequence().len() - kmer_length; 197 | 198 | // extract colors 199 | nodes.push(prev_node.node_id); 200 | } else { 201 | break; 202 | } 203 | } // end-loop 204 | } 205 | } //end-if 206 | 207 | // forward search 208 | if kmer_pos <= last_kmer_pos { 209 | loop { 210 | let node = self.dbg.get_node(node_id.unwrap()); 211 | //println!("{:?}, {:?}, {:?}, {:?}", 212 | // node, node.sequence(), 213 | // &eq_classes[ *node.data() as usize], 214 | // kmer_offset); 215 | kmer_pos += kmer_length; 216 | read_coverage += kmer_length; 217 | 218 | // extract colors 219 | nodes.push(node.node_id); 220 | 221 | // length of remaining read after kmer match 222 | let remaining_read = read_length - kmer_pos; 223 | 224 | // length of the remaining node sequence after kmer match 225 | let ref_seq_slice = node.sequence(); 226 | let ref_length = ref_seq_slice.len(); 227 | let ref_offset = kmer_offset.unwrap() + kmer_length; 228 | let informative_ref = ref_length - ref_offset; 229 | 230 | // find maximum extention possbile before fork or eof read 231 | let max_matchable_pos = std::cmp::min(remaining_read, informative_ref); 232 | 233 | let mut premature_break = false; 234 | let mut matched_bases = 0; 235 | let mut seen_snp = 0; 236 | for idx in 0..max_matchable_pos { 237 | let ref_pos = ref_offset + idx; 238 | let read_offset = kmer_pos + idx; 239 | 240 | // compare base by base 241 | if ref_seq_slice.get(ref_pos) != read_seq.get(read_offset) { 242 | // Record mismatch 243 | mismatch_count += 1; 244 | 245 | // Allowing num_mismatch-SNP 246 | seen_snp += 1; 247 | if seen_snp > allowed_mismatches { 248 | premature_break = true; 249 | break; 250 | } 251 | } 252 | 253 | matched_bases += 1; 254 | read_coverage += 1; 255 | } 256 | 257 | kmer_pos += matched_bases; 258 | //break the loop if end of read reached or a premature mismatch 259 | if kmer_pos >= read_length { 260 | break; 261 | } 262 | 263 | // If reached here then a fork is found in the reference. 264 | let exts = node.exts(); 265 | let next_base = read_seq.get(kmer_pos); 266 | 267 | if !premature_break && exts.has_ext(Dir::Right, next_base) { 268 | // found a right extention. 269 | let index = exts 270 | .get(Dir::Right) 271 | .iter() 272 | .position(|&x| x == next_base) 273 | .unwrap(); 274 | 275 | let edge = node.r_edges()[index]; 276 | 277 | //update the next node's id 278 | node_id = Some(edge.0); 279 | kmer_offset = Some(0); 280 | 281 | //adjust for kmer_position 282 | kmer_pos -= kmer_length - 1; 283 | read_coverage -= kmer_length - 1; 284 | } else { 285 | // can't extend node in dbg extract read using mphf 286 | // TODO: might have to check some cases 287 | if kmer_pos > last_kmer_pos { 288 | // can't search in mphf if no full kmer can be made 289 | break; 290 | } 291 | 292 | // get the match through mphf 293 | match find_kmer_match(&mut kmer_pos) { 294 | None => break, 295 | Some((nid, offset)) => { 296 | node_id = Some(nid); 297 | kmer_offset = Some(offset); 298 | } 299 | }; 300 | } 301 | } // end-loop 302 | } //end-if 303 | } 304 | 305 | if nodes.is_empty() { 306 | if read_coverage != 0 { 307 | panic!( 308 | "Different read coverage {:?} than num of eqclasses {:?}", 309 | nodes.len(), 310 | read_coverage 311 | ); 312 | } 313 | //println!("lookups: {} -- no hit", kmer_lookups); 314 | None 315 | } else { 316 | //println!("lookups: {} -- cov: {}", kmer_lookups, read_coverage); 317 | Some((read_coverage, mismatch_count)) 318 | } 319 | } 320 | 321 | /// Convert a list of nodes contacted by a read into an equivalence class. 322 | /// Supply node list in `nodes`. Equivalence class will be written to `eq_class`. 323 | fn nodes_to_eq_class(&self, nodes: &mut Vec, eq_class: &mut Vec) { 324 | eq_class.clear(); 325 | 326 | if nodes.is_empty() { 327 | return; 328 | } 329 | 330 | // Sort nodes to get the shorter equivalence class first. 331 | nodes.sort_by_key(|n| { 332 | let eqclass_id = self.dbg.get_node(*n).data(); 333 | self.eq_classes[*eqclass_id as usize].len() 334 | }); 335 | 336 | let _lens: Vec<_> = nodes 337 | .iter() 338 | .map(|n| { 339 | let eqclass_id = self.dbg.get_node(*n).data(); 340 | self.eq_classes[*eqclass_id as usize].len() 341 | }) 342 | .collect(); 343 | //println!("nodes: {:?}, lens: {:?}", nodes, lens); 344 | 345 | // Intersect the equivalence classes 346 | let first_node = nodes[0]; 347 | 348 | //println!("node: {}, seq: {:?}", first_node, self.dbg.get_node(first_node).sequence()); 349 | let first_color = self.dbg.get_node(first_node).data(); 350 | eq_class.extend(&self.eq_classes[*first_color as usize]); 351 | 352 | for node in nodes.iter().skip(1) { 353 | let color = self.dbg.get_node(*node).data(); 354 | intersect(eq_class, &self.eq_classes[*color as usize]); 355 | } 356 | } 357 | 358 | /// Pseudoalign the `read_seq` to the graph. Returns a tuple of the 359 | /// eqivalence class, the number of bases aligned on success, 360 | /// and the number of mismatched bases, or None is no alignment could be found. 361 | fn map_read_with_mismatch( 362 | &self, 363 | read_seq: &DnaString, 364 | allowed_mismatches: usize, 365 | ) -> Option<(Vec, usize, usize)> { 366 | let mut nodes = Vec::new(); 367 | 368 | match self.map_read_to_nodes_with_mismatch(read_seq, &mut nodes, allowed_mismatches) { 369 | Some((read_coverage, mismatches)) => { 370 | let mut eq_class = Vec::new(); 371 | self.nodes_to_eq_class(&mut nodes, &mut eq_class); 372 | Some((eq_class, read_coverage, mismatches)) 373 | } 374 | None => None, 375 | } 376 | } 377 | 378 | /// Pseudoalign the `read_seq` to the graph with # mismatches = 2. Returns a tuple of the 379 | /// eqivalence class and the number of bases aligned on success 380 | /// or None is no alignment could be found. 381 | pub(crate) fn map_read(&self, read_seq: &DnaString) -> Option<(Vec, usize)> { 382 | self.map_read_with_mismatch(read_seq, DEFAULT_ALLOWED_MISMATCHES) 383 | .map(|(eq_class, read_coverage, _mismatches)| (eq_class, read_coverage)) 384 | } 385 | } 386 | 387 | /// Compute the intersection of v1 and v2 inplace on top of v1 388 | /// v1 and v2 must be sorted and deduplicated. 389 | fn intersect(v1: &mut Vec, v2: &[T]) { 390 | if v1.is_empty() { 391 | return; 392 | } 393 | 394 | if v2.is_empty() { 395 | v1.clear(); 396 | } 397 | 398 | let mut fill_idx1 = 0; 399 | let mut idx1 = 0; 400 | let mut idx2 = 0; 401 | 402 | while idx1 < v1.len() && idx2 < v2.len() { 403 | let rem_slice = &v2[idx2..]; 404 | match rem_slice.binary_search(&v1[idx1]) { 405 | Ok(pos) => { 406 | v1.swap(fill_idx1, idx1); 407 | fill_idx1 += 1; 408 | idx1 += 1; 409 | idx2 = pos + 1; 410 | } 411 | Err(pos) => { 412 | idx1 += 1; 413 | idx2 = pos; 414 | } 415 | } 416 | } 417 | v1.truncate(fill_idx1); 418 | } 419 | 420 | pub fn process_reads + Debug>( 421 | reader: fastq::Reader>, 422 | index: &Pseudoaligner, 423 | outdir: P, 424 | num_threads: usize, 425 | ) -> Result<(), Error> { 426 | info!("Done Reading index"); 427 | info!("Starting Multi-threaded Mapping"); 428 | info!("Output directory: {:?}", outdir); 429 | 430 | let (tx, rx) = mpsc::sync_channel(num_threads); 431 | let atomic_reader = Arc::new(Mutex::new(reader.records())); 432 | 433 | info!("Spawning {} threads for Mapping.\n", num_threads); 434 | scope(|scope| { 435 | for _ in 0..num_threads { 436 | let tx = tx.clone(); 437 | let reader = Arc::clone(&atomic_reader); 438 | 439 | scope.spawn(move |_| { 440 | loop { 441 | // If work is available, do that work. 442 | match utils::get_next_record(&reader) { 443 | Some(result_record) => { 444 | let record = match result_record { 445 | Ok(record) => record, 446 | Err(err) => panic!("Error {:?} in reading fastq", err), 447 | }; 448 | 449 | let dna_string = str::from_utf8(record.seq()).unwrap(); 450 | let seq = DnaString::from_dna_string(dna_string); 451 | let read_data = index.map_read(&seq); 452 | 453 | let wrapped_read_data = match read_data { 454 | Some((eq_class, coverage)) => { 455 | if coverage >= READ_COVERAGE_THRESHOLD && eq_class.is_empty() { 456 | Some((true, record.id().to_owned(), eq_class, coverage)) 457 | } else { 458 | Some((false, record.id().to_owned(), eq_class, coverage)) 459 | } 460 | } 461 | None => Some((false, record.id().to_owned(), Vec::new(), 0)), 462 | }; 463 | 464 | tx.send(wrapped_read_data).expect("Could not send data!"); 465 | } 466 | None => { 467 | // send None to tell receiver that the queue ended 468 | tx.send(None).expect("Could not send data!"); 469 | break; 470 | } 471 | }; //end-match 472 | } // end loop 473 | }); //end-scope 474 | } // end-for 475 | 476 | let mut read_counter: usize = 0; 477 | let mut mapped_read_counter: usize = 0; 478 | let mut dead_thread_count = 0; 479 | 480 | for eq_class in rx.iter() { 481 | match eq_class { 482 | None => { 483 | dead_thread_count += 1; 484 | if dead_thread_count == num_threads { 485 | drop(tx); 486 | break; 487 | } 488 | } 489 | Some(read_data) => { 490 | println!("{:?}", read_data); 491 | 492 | if read_data.0 { 493 | mapped_read_counter += 1; 494 | } 495 | 496 | read_counter += 1; 497 | if read_counter % 1_000_000 == 0 { 498 | let frac_mapped = mapped_read_counter as f32 * 100.0 / read_counter as f32; 499 | eprint!( 500 | "\rDone Mapping {} reads w/ Rate: {}", 501 | read_counter, frac_mapped 502 | ); 503 | io::stderr().flush().expect("Could not flush stdout"); 504 | } 505 | } // end-Some 506 | } // end-match 507 | } // end-for 508 | }) 509 | .unwrap(); //end crossbeam 510 | 511 | eprintln!(); 512 | info!("Done Mapping Reads"); 513 | Ok(()) 514 | } 515 | 516 | #[cfg(test)] 517 | mod test { 518 | use super::*; 519 | use proptest::collection::vec; 520 | use proptest::prelude::*; 521 | use proptest::proptest; 522 | use std::collections::HashSet; 523 | use std::hash::Hash; 524 | use std::iter::FromIterator; 525 | 526 | fn test_intersect(v1: &[T], v2: &[T]) { 527 | let mut c1 = v1.to_owned(); 528 | let c2 = v2; 529 | 530 | let s1: HashSet = HashSet::from_iter(c1.iter().cloned()); 531 | let s2: HashSet = HashSet::from_iter(c2.iter().cloned()); 532 | let intersection = s1.intersection(&s2); 533 | 534 | let mut int1: Vec = intersection.cloned().collect(); 535 | int1.sort(); 536 | 537 | intersect(&mut c1, c2); 538 | 539 | assert_eq!(c1, int1); 540 | } 541 | 542 | #[test] 543 | fn intersect_test() { 544 | let v1 = vec![1, 2, 3, 4, 5, 6, 7, 8, 9]; 545 | let v2 = vec![1, 2, 3]; 546 | let v3 = vec![1, 4, 5]; 547 | let v4 = vec![7, 8, 9]; 548 | let v5 = vec![9]; 549 | let v6: Vec = vec![]; 550 | let v7 = vec![1, 2, 3, 6, 7, 8, 9]; 551 | let v8 = vec![1, 7, 8, 9, 10]; 552 | let v9 = vec![10, 15, 20]; 553 | let v10 = vec![21, 22, 23]; 554 | let v11 = vec![0]; 555 | let v12 = vec![0, 1000, 5000]; 556 | let v13 = vec![0, 1000, 1000001]; 557 | let v14 = vec![5]; 558 | let v15 = vec![100000000]; 559 | let v16 = vec![1, 23, 45, 1000001, 100000000]; 560 | 561 | let vecs = vec![ 562 | v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, 563 | ]; 564 | 565 | for v1 in vecs.iter() { 566 | for v2 in vecs.iter() { 567 | test_intersect(v1, v2); 568 | test_intersect(v2, v1); 569 | } 570 | } 571 | } 572 | 573 | proptest! { 574 | #![proptest_config(ProptestConfig { cases: 1000, .. ProptestConfig::default()})] 575 | #[test] 576 | fn intersect_prop_test( 577 | mut v1 in vec(0..100usize, 0..5000usize), 578 | mut v2 in vec(0..100usize, 0..5000usize), 579 | ) { 580 | 581 | v1.sort_unstable(); v1.dedup(); 582 | v2.sort_unstable(); v2.dedup(); 583 | test_intersect(&v1, &v2); 584 | test_intersect(&v2, &v1); 585 | } 586 | } 587 | } 588 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "adler" 7 | version = "1.0.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 10 | 11 | [[package]] 12 | name = "aho-corasick" 13 | version = "1.1.2" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" 16 | dependencies = [ 17 | "memchr", 18 | ] 19 | 20 | [[package]] 21 | name = "anyhow" 22 | version = "1.0.79" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" 25 | 26 | [[package]] 27 | name = "approx" 28 | version = "0.5.1" 29 | source = "registry+https://github.com/rust-lang/crates.io-index" 30 | checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" 31 | dependencies = [ 32 | "num-traits", 33 | ] 34 | 35 | [[package]] 36 | name = "autocfg" 37 | version = "1.1.0" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" 40 | 41 | [[package]] 42 | name = "bincode" 43 | version = "1.3.3" 44 | source = "registry+https://github.com/rust-lang/crates.io-index" 45 | checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" 46 | dependencies = [ 47 | "serde", 48 | ] 49 | 50 | [[package]] 51 | name = "bio" 52 | version = "1.5.0" 53 | source = "registry+https://github.com/rust-lang/crates.io-index" 54 | checksum = "25dccfc5babf5a4f505ab5bdda0e18d4b5fc1600c222677c54992203632cbdf5" 55 | dependencies = [ 56 | "anyhow", 57 | "approx", 58 | "bio-types", 59 | "bit-set", 60 | "bv", 61 | "bytecount", 62 | "csv", 63 | "custom_derive", 64 | "editdistancek", 65 | "enum-map", 66 | "fxhash", 67 | "getset", 68 | "itertools", 69 | "itertools-num", 70 | "lazy_static", 71 | "multimap", 72 | "ndarray", 73 | "newtype_derive", 74 | "num-integer", 75 | "num-traits", 76 | "ordered-float", 77 | "petgraph", 78 | "rand", 79 | "regex", 80 | "serde", 81 | "serde_derive", 82 | "statrs", 83 | "strum", 84 | "strum_macros", 85 | "thiserror", 86 | "triple_accel", 87 | "vec_map", 88 | ] 89 | 90 | [[package]] 91 | name = "bio-types" 92 | version = "1.0.1" 93 | source = "registry+https://github.com/rust-lang/crates.io-index" 94 | checksum = "9d45749b87f21808051025e9bf714d14ff4627f9d8ca967eade6946ea769aa4a" 95 | dependencies = [ 96 | "derive-new", 97 | "lazy_static", 98 | "regex", 99 | "strum_macros", 100 | "thiserror", 101 | ] 102 | 103 | [[package]] 104 | name = "bit-set" 105 | version = "0.5.2" 106 | source = "registry+https://github.com/rust-lang/crates.io-index" 107 | checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de" 108 | dependencies = [ 109 | "bit-vec", 110 | ] 111 | 112 | [[package]] 113 | name = "bit-vec" 114 | version = "0.6.3" 115 | source = "registry+https://github.com/rust-lang/crates.io-index" 116 | checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" 117 | 118 | [[package]] 119 | name = "bitflags" 120 | version = "1.3.2" 121 | source = "registry+https://github.com/rust-lang/crates.io-index" 122 | checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" 123 | 124 | [[package]] 125 | name = "bitflags" 126 | version = "2.4.2" 127 | source = "registry+https://github.com/rust-lang/crates.io-index" 128 | checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf" 129 | 130 | [[package]] 131 | name = "boomphf" 132 | version = "0.6.0" 133 | source = "registry+https://github.com/rust-lang/crates.io-index" 134 | checksum = "617e2d952880a00583ddb9237ac3965732e8df6a92a8e7bcc054100ec467ec3b" 135 | dependencies = [ 136 | "crossbeam-utils", 137 | "log", 138 | "rayon", 139 | "serde", 140 | "wyhash", 141 | ] 142 | 143 | [[package]] 144 | name = "bv" 145 | version = "0.11.1" 146 | source = "registry+https://github.com/rust-lang/crates.io-index" 147 | checksum = "8834bb1d8ee5dc048ee3124f2c7c1afcc6bc9aed03f11e9dfd8c69470a5db340" 148 | dependencies = [ 149 | "feature-probe", 150 | "serde", 151 | ] 152 | 153 | [[package]] 154 | name = "bytecount" 155 | version = "0.6.3" 156 | source = "registry+https://github.com/rust-lang/crates.io-index" 157 | checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" 158 | 159 | [[package]] 160 | name = "bytemuck" 161 | version = "1.14.0" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" 164 | 165 | [[package]] 166 | name = "byteorder" 167 | version = "1.4.3" 168 | source = "registry+https://github.com/rust-lang/crates.io-index" 169 | checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" 170 | 171 | [[package]] 172 | name = "cfg-if" 173 | version = "1.0.0" 174 | source = "registry+https://github.com/rust-lang/crates.io-index" 175 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 176 | 177 | [[package]] 178 | name = "crc32fast" 179 | version = "1.3.2" 180 | source = "registry+https://github.com/rust-lang/crates.io-index" 181 | checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" 182 | dependencies = [ 183 | "cfg-if", 184 | ] 185 | 186 | [[package]] 187 | name = "crossbeam-deque" 188 | version = "0.8.1" 189 | source = "registry+https://github.com/rust-lang/crates.io-index" 190 | checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" 191 | dependencies = [ 192 | "cfg-if", 193 | "crossbeam-epoch", 194 | "crossbeam-utils", 195 | ] 196 | 197 | [[package]] 198 | name = "crossbeam-epoch" 199 | version = "0.9.8" 200 | source = "registry+https://github.com/rust-lang/crates.io-index" 201 | checksum = "1145cf131a2c6ba0615079ab6a638f7e1973ac9c2634fcbeaaad6114246efe8c" 202 | dependencies = [ 203 | "autocfg", 204 | "cfg-if", 205 | "crossbeam-utils", 206 | "lazy_static", 207 | "memoffset", 208 | "scopeguard", 209 | ] 210 | 211 | [[package]] 212 | name = "crossbeam-utils" 213 | version = "0.8.19" 214 | source = "registry+https://github.com/rust-lang/crates.io-index" 215 | checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" 216 | 217 | [[package]] 218 | name = "csv" 219 | version = "1.3.0" 220 | source = "registry+https://github.com/rust-lang/crates.io-index" 221 | checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" 222 | dependencies = [ 223 | "csv-core", 224 | "itoa", 225 | "ryu", 226 | "serde", 227 | ] 228 | 229 | [[package]] 230 | name = "csv-core" 231 | version = "0.1.11" 232 | source = "registry+https://github.com/rust-lang/crates.io-index" 233 | checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" 234 | dependencies = [ 235 | "memchr", 236 | ] 237 | 238 | [[package]] 239 | name = "custom_derive" 240 | version = "0.1.7" 241 | source = "registry+https://github.com/rust-lang/crates.io-index" 242 | checksum = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9" 243 | 244 | [[package]] 245 | name = "dashmap" 246 | version = "5.5.3" 247 | source = "registry+https://github.com/rust-lang/crates.io-index" 248 | checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" 249 | dependencies = [ 250 | "cfg-if", 251 | "hashbrown", 252 | "lock_api", 253 | "once_cell", 254 | "parking_lot_core", 255 | ] 256 | 257 | [[package]] 258 | name = "debruijn" 259 | version = "0.3.4" 260 | source = "git+https://github.com/10XGenomics/rust-debruijn#8d9a5c525965dcf5bddb319fd94362e5bf41303a" 261 | dependencies = [ 262 | "bit-set", 263 | "boomphf", 264 | "itertools", 265 | "log", 266 | "num-traits", 267 | "serde", 268 | "serde_derive", 269 | "serde_json", 270 | "smallvec", 271 | ] 272 | 273 | [[package]] 274 | name = "debruijn_mapping" 275 | version = "0.6.0" 276 | dependencies = [ 277 | "anyhow", 278 | "bincode", 279 | "bio", 280 | "boomphf", 281 | "crossbeam-utils", 282 | "dashmap", 283 | "debruijn", 284 | "docopt", 285 | "flate2", 286 | "itertools", 287 | "lazy_static", 288 | "log", 289 | "pretty_env_logger", 290 | "proptest", 291 | "rayon", 292 | "serde", 293 | ] 294 | 295 | [[package]] 296 | name = "derive-new" 297 | version = "0.5.9" 298 | source = "registry+https://github.com/rust-lang/crates.io-index" 299 | checksum = "3418329ca0ad70234b9735dc4ceed10af4df60eff9c8e7b06cb5e520d92c3535" 300 | dependencies = [ 301 | "proc-macro2", 302 | "quote", 303 | "syn", 304 | ] 305 | 306 | [[package]] 307 | name = "docopt" 308 | version = "1.1.1" 309 | source = "registry+https://github.com/rust-lang/crates.io-index" 310 | checksum = "7f3f119846c823f9eafcf953a8f6ffb6ed69bf6240883261a7f13b634579a51f" 311 | dependencies = [ 312 | "lazy_static", 313 | "regex", 314 | "serde", 315 | "strsim", 316 | ] 317 | 318 | [[package]] 319 | name = "editdistancek" 320 | version = "1.0.2" 321 | source = "registry+https://github.com/rust-lang/crates.io-index" 322 | checksum = "3e02df23d5b1c6f9e69fa603b890378123b93073df998a21e6e33b9db0a32613" 323 | 324 | [[package]] 325 | name = "either" 326 | version = "1.6.1" 327 | source = "registry+https://github.com/rust-lang/crates.io-index" 328 | checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" 329 | 330 | [[package]] 331 | name = "enum-map" 332 | version = "1.1.1" 333 | source = "registry+https://github.com/rust-lang/crates.io-index" 334 | checksum = "e893a7ba6116821058dec84a6fb14fb2a97cd8ce5fd0f85d5a4e760ecd7329d9" 335 | dependencies = [ 336 | "enum-map-derive", 337 | ] 338 | 339 | [[package]] 340 | name = "enum-map-derive" 341 | version = "0.6.0" 342 | source = "registry+https://github.com/rust-lang/crates.io-index" 343 | checksum = "84278eae0af6e34ff6c1db44c11634a694aafac559ff3080e4db4e4ac35907aa" 344 | dependencies = [ 345 | "proc-macro2", 346 | "quote", 347 | "syn", 348 | ] 349 | 350 | [[package]] 351 | name = "env_logger" 352 | version = "0.10.2" 353 | source = "registry+https://github.com/rust-lang/crates.io-index" 354 | checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" 355 | dependencies = [ 356 | "humantime", 357 | "is-terminal", 358 | "log", 359 | "regex", 360 | "termcolor", 361 | ] 362 | 363 | [[package]] 364 | name = "equivalent" 365 | version = "1.0.1" 366 | source = "registry+https://github.com/rust-lang/crates.io-index" 367 | checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" 368 | 369 | [[package]] 370 | name = "errno" 371 | version = "0.3.8" 372 | source = "registry+https://github.com/rust-lang/crates.io-index" 373 | checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" 374 | dependencies = [ 375 | "libc", 376 | "windows-sys", 377 | ] 378 | 379 | [[package]] 380 | name = "fastrand" 381 | version = "2.0.1" 382 | source = "registry+https://github.com/rust-lang/crates.io-index" 383 | checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" 384 | 385 | [[package]] 386 | name = "feature-probe" 387 | version = "0.1.1" 388 | source = "registry+https://github.com/rust-lang/crates.io-index" 389 | checksum = "835a3dc7d1ec9e75e2b5fb4ba75396837112d2060b03f7d43bc1897c7f7211da" 390 | 391 | [[package]] 392 | name = "fixedbitset" 393 | version = "0.4.2" 394 | source = "registry+https://github.com/rust-lang/crates.io-index" 395 | checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" 396 | 397 | [[package]] 398 | name = "flate2" 399 | version = "1.0.28" 400 | source = "registry+https://github.com/rust-lang/crates.io-index" 401 | checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" 402 | dependencies = [ 403 | "crc32fast", 404 | "miniz_oxide", 405 | ] 406 | 407 | [[package]] 408 | name = "fnv" 409 | version = "1.0.7" 410 | source = "registry+https://github.com/rust-lang/crates.io-index" 411 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 412 | 413 | [[package]] 414 | name = "fxhash" 415 | version = "0.2.1" 416 | source = "registry+https://github.com/rust-lang/crates.io-index" 417 | checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" 418 | dependencies = [ 419 | "byteorder", 420 | ] 421 | 422 | [[package]] 423 | name = "getrandom" 424 | version = "0.2.6" 425 | source = "registry+https://github.com/rust-lang/crates.io-index" 426 | checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" 427 | dependencies = [ 428 | "cfg-if", 429 | "libc", 430 | "wasi", 431 | ] 432 | 433 | [[package]] 434 | name = "getset" 435 | version = "0.0.9" 436 | source = "registry+https://github.com/rust-lang/crates.io-index" 437 | checksum = "5bb3f5b7d8d70c9bd23cf29b2b38094661418fb0ea79f1b0cc2019a11d6f5429" 438 | dependencies = [ 439 | "proc-macro2", 440 | "quote", 441 | "syn", 442 | ] 443 | 444 | [[package]] 445 | name = "hashbrown" 446 | version = "0.14.3" 447 | source = "registry+https://github.com/rust-lang/crates.io-index" 448 | checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" 449 | 450 | [[package]] 451 | name = "heck" 452 | version = "0.3.3" 453 | source = "registry+https://github.com/rust-lang/crates.io-index" 454 | checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" 455 | dependencies = [ 456 | "unicode-segmentation", 457 | ] 458 | 459 | [[package]] 460 | name = "hermit-abi" 461 | version = "0.3.1" 462 | source = "registry+https://github.com/rust-lang/crates.io-index" 463 | checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" 464 | 465 | [[package]] 466 | name = "humantime" 467 | version = "2.1.0" 468 | source = "registry+https://github.com/rust-lang/crates.io-index" 469 | checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" 470 | 471 | [[package]] 472 | name = "indexmap" 473 | version = "2.2.2" 474 | source = "registry+https://github.com/rust-lang/crates.io-index" 475 | checksum = "824b2ae422412366ba479e8111fd301f7b5faece8149317bb81925979a53f520" 476 | dependencies = [ 477 | "equivalent", 478 | "hashbrown", 479 | ] 480 | 481 | [[package]] 482 | name = "is-terminal" 483 | version = "0.4.10" 484 | source = "registry+https://github.com/rust-lang/crates.io-index" 485 | checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455" 486 | dependencies = [ 487 | "hermit-abi", 488 | "rustix", 489 | "windows-sys", 490 | ] 491 | 492 | [[package]] 493 | name = "itertools" 494 | version = "0.11.0" 495 | source = "registry+https://github.com/rust-lang/crates.io-index" 496 | checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" 497 | dependencies = [ 498 | "either", 499 | ] 500 | 501 | [[package]] 502 | name = "itertools-num" 503 | version = "0.1.3" 504 | source = "registry+https://github.com/rust-lang/crates.io-index" 505 | checksum = "a872a22f9e6f7521ca557660adb96dd830e54f0f490fa115bb55dd69d38b27e7" 506 | dependencies = [ 507 | "num-traits", 508 | ] 509 | 510 | [[package]] 511 | name = "itoa" 512 | version = "1.0.10" 513 | source = "registry+https://github.com/rust-lang/crates.io-index" 514 | checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" 515 | 516 | [[package]] 517 | name = "lazy_static" 518 | version = "1.4.0" 519 | source = "registry+https://github.com/rust-lang/crates.io-index" 520 | checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 521 | 522 | [[package]] 523 | name = "libc" 524 | version = "0.2.152" 525 | source = "registry+https://github.com/rust-lang/crates.io-index" 526 | checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" 527 | 528 | [[package]] 529 | name = "libm" 530 | version = "0.2.2" 531 | source = "registry+https://github.com/rust-lang/crates.io-index" 532 | checksum = "33a33a362ce288760ec6a508b94caaec573ae7d3bbbd91b87aa0bad4456839db" 533 | 534 | [[package]] 535 | name = "linux-raw-sys" 536 | version = "0.4.13" 537 | source = "registry+https://github.com/rust-lang/crates.io-index" 538 | checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" 539 | 540 | [[package]] 541 | name = "lock_api" 542 | version = "0.4.11" 543 | source = "registry+https://github.com/rust-lang/crates.io-index" 544 | checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" 545 | dependencies = [ 546 | "autocfg", 547 | "scopeguard", 548 | ] 549 | 550 | [[package]] 551 | name = "log" 552 | version = "0.4.20" 553 | source = "registry+https://github.com/rust-lang/crates.io-index" 554 | checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" 555 | 556 | [[package]] 557 | name = "matrixmultiply" 558 | version = "0.3.2" 559 | source = "registry+https://github.com/rust-lang/crates.io-index" 560 | checksum = "add85d4dd35074e6fedc608f8c8f513a3548619a9024b751949ef0e8e45a4d84" 561 | dependencies = [ 562 | "rawpointer", 563 | ] 564 | 565 | [[package]] 566 | name = "memchr" 567 | version = "2.7.1" 568 | source = "registry+https://github.com/rust-lang/crates.io-index" 569 | checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" 570 | 571 | [[package]] 572 | name = "memoffset" 573 | version = "0.6.5" 574 | source = "registry+https://github.com/rust-lang/crates.io-index" 575 | checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" 576 | dependencies = [ 577 | "autocfg", 578 | ] 579 | 580 | [[package]] 581 | name = "miniz_oxide" 582 | version = "0.7.1" 583 | source = "registry+https://github.com/rust-lang/crates.io-index" 584 | checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" 585 | dependencies = [ 586 | "adler", 587 | ] 588 | 589 | [[package]] 590 | name = "multimap" 591 | version = "0.8.3" 592 | source = "registry+https://github.com/rust-lang/crates.io-index" 593 | checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" 594 | dependencies = [ 595 | "serde", 596 | ] 597 | 598 | [[package]] 599 | name = "nalgebra" 600 | version = "0.29.0" 601 | source = "registry+https://github.com/rust-lang/crates.io-index" 602 | checksum = "d506eb7e08d6329505faa8a3a00a5dcc6de9f76e0c77e4b75763ae3c770831ff" 603 | dependencies = [ 604 | "approx", 605 | "matrixmultiply", 606 | "nalgebra-macros", 607 | "num-complex", 608 | "num-rational", 609 | "num-traits", 610 | "rand", 611 | "rand_distr", 612 | "simba", 613 | "typenum", 614 | ] 615 | 616 | [[package]] 617 | name = "nalgebra-macros" 618 | version = "0.1.0" 619 | source = "registry+https://github.com/rust-lang/crates.io-index" 620 | checksum = "01fcc0b8149b4632adc89ac3b7b31a12fb6099a0317a4eb2ebff574ef7de7218" 621 | dependencies = [ 622 | "proc-macro2", 623 | "quote", 624 | "syn", 625 | ] 626 | 627 | [[package]] 628 | name = "ndarray" 629 | version = "0.15.4" 630 | source = "registry+https://github.com/rust-lang/crates.io-index" 631 | checksum = "dec23e6762830658d2b3d385a75aa212af2f67a4586d4442907144f3bb6a1ca8" 632 | dependencies = [ 633 | "matrixmultiply", 634 | "num-complex", 635 | "num-integer", 636 | "num-traits", 637 | "rawpointer", 638 | ] 639 | 640 | [[package]] 641 | name = "newtype_derive" 642 | version = "0.1.6" 643 | source = "registry+https://github.com/rust-lang/crates.io-index" 644 | checksum = "ac8cd24d9f185bb7223958d8c1ff7a961b74b1953fd05dba7cc568a63b3861ec" 645 | dependencies = [ 646 | "rustc_version", 647 | ] 648 | 649 | [[package]] 650 | name = "num-complex" 651 | version = "0.4.1" 652 | source = "registry+https://github.com/rust-lang/crates.io-index" 653 | checksum = "97fbc387afefefd5e9e39493299f3069e14a140dd34dc19b4c1c1a8fddb6a790" 654 | dependencies = [ 655 | "num-traits", 656 | ] 657 | 658 | [[package]] 659 | name = "num-integer" 660 | version = "0.1.45" 661 | source = "registry+https://github.com/rust-lang/crates.io-index" 662 | checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" 663 | dependencies = [ 664 | "autocfg", 665 | "num-traits", 666 | ] 667 | 668 | [[package]] 669 | name = "num-rational" 670 | version = "0.4.0" 671 | source = "registry+https://github.com/rust-lang/crates.io-index" 672 | checksum = "d41702bd167c2df5520b384281bc111a4b5efcf7fbc4c9c222c815b07e0a6a6a" 673 | dependencies = [ 674 | "autocfg", 675 | "num-integer", 676 | "num-traits", 677 | ] 678 | 679 | [[package]] 680 | name = "num-traits" 681 | version = "0.2.15" 682 | source = "registry+https://github.com/rust-lang/crates.io-index" 683 | checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" 684 | dependencies = [ 685 | "autocfg", 686 | "libm", 687 | ] 688 | 689 | [[package]] 690 | name = "once_cell" 691 | version = "1.19.0" 692 | source = "registry+https://github.com/rust-lang/crates.io-index" 693 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" 694 | 695 | [[package]] 696 | name = "ordered-float" 697 | version = "3.7.0" 698 | source = "registry+https://github.com/rust-lang/crates.io-index" 699 | checksum = "2fc2dbde8f8a79f2102cc474ceb0ad68e3b80b85289ea62389b60e66777e4213" 700 | dependencies = [ 701 | "num-traits", 702 | ] 703 | 704 | [[package]] 705 | name = "parking_lot_core" 706 | version = "0.9.9" 707 | source = "registry+https://github.com/rust-lang/crates.io-index" 708 | checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" 709 | dependencies = [ 710 | "cfg-if", 711 | "libc", 712 | "redox_syscall", 713 | "smallvec", 714 | "windows-targets 0.48.0", 715 | ] 716 | 717 | [[package]] 718 | name = "paste" 719 | version = "1.0.7" 720 | source = "registry+https://github.com/rust-lang/crates.io-index" 721 | checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc" 722 | 723 | [[package]] 724 | name = "petgraph" 725 | version = "0.6.4" 726 | source = "registry+https://github.com/rust-lang/crates.io-index" 727 | checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" 728 | dependencies = [ 729 | "fixedbitset", 730 | "indexmap", 731 | ] 732 | 733 | [[package]] 734 | name = "ppv-lite86" 735 | version = "0.2.16" 736 | source = "registry+https://github.com/rust-lang/crates.io-index" 737 | checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" 738 | 739 | [[package]] 740 | name = "pretty_env_logger" 741 | version = "0.5.0" 742 | source = "registry+https://github.com/rust-lang/crates.io-index" 743 | checksum = "865724d4dbe39d9f3dd3b52b88d859d66bcb2d6a0acfd5ea68a65fb66d4bdc1c" 744 | dependencies = [ 745 | "env_logger", 746 | "log", 747 | ] 748 | 749 | [[package]] 750 | name = "proc-macro2" 751 | version = "1.0.39" 752 | source = "registry+https://github.com/rust-lang/crates.io-index" 753 | checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" 754 | dependencies = [ 755 | "unicode-ident", 756 | ] 757 | 758 | [[package]] 759 | name = "proptest" 760 | version = "1.4.0" 761 | source = "registry+https://github.com/rust-lang/crates.io-index" 762 | checksum = "31b476131c3c86cb68032fdc5cb6d5a1045e3e42d96b69fa599fd77701e1f5bf" 763 | dependencies = [ 764 | "bit-set", 765 | "bit-vec", 766 | "bitflags 2.4.2", 767 | "lazy_static", 768 | "num-traits", 769 | "rand", 770 | "rand_chacha", 771 | "rand_xorshift", 772 | "regex-syntax", 773 | "rusty-fork", 774 | "tempfile", 775 | "unarray", 776 | ] 777 | 778 | [[package]] 779 | name = "quick-error" 780 | version = "1.2.3" 781 | source = "registry+https://github.com/rust-lang/crates.io-index" 782 | checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" 783 | 784 | [[package]] 785 | name = "quote" 786 | version = "1.0.18" 787 | source = "registry+https://github.com/rust-lang/crates.io-index" 788 | checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" 789 | dependencies = [ 790 | "proc-macro2", 791 | ] 792 | 793 | [[package]] 794 | name = "rand" 795 | version = "0.8.5" 796 | source = "registry+https://github.com/rust-lang/crates.io-index" 797 | checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" 798 | dependencies = [ 799 | "libc", 800 | "rand_chacha", 801 | "rand_core", 802 | ] 803 | 804 | [[package]] 805 | name = "rand_chacha" 806 | version = "0.3.1" 807 | source = "registry+https://github.com/rust-lang/crates.io-index" 808 | checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" 809 | dependencies = [ 810 | "ppv-lite86", 811 | "rand_core", 812 | ] 813 | 814 | [[package]] 815 | name = "rand_core" 816 | version = "0.6.3" 817 | source = "registry+https://github.com/rust-lang/crates.io-index" 818 | checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" 819 | dependencies = [ 820 | "getrandom", 821 | ] 822 | 823 | [[package]] 824 | name = "rand_distr" 825 | version = "0.4.3" 826 | source = "registry+https://github.com/rust-lang/crates.io-index" 827 | checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" 828 | dependencies = [ 829 | "num-traits", 830 | "rand", 831 | ] 832 | 833 | [[package]] 834 | name = "rand_xorshift" 835 | version = "0.3.0" 836 | source = "registry+https://github.com/rust-lang/crates.io-index" 837 | checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" 838 | dependencies = [ 839 | "rand_core", 840 | ] 841 | 842 | [[package]] 843 | name = "rawpointer" 844 | version = "0.2.1" 845 | source = "registry+https://github.com/rust-lang/crates.io-index" 846 | checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" 847 | 848 | [[package]] 849 | name = "rayon" 850 | version = "1.8.1" 851 | source = "registry+https://github.com/rust-lang/crates.io-index" 852 | checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051" 853 | dependencies = [ 854 | "either", 855 | "rayon-core", 856 | ] 857 | 858 | [[package]] 859 | name = "rayon-core" 860 | version = "1.12.1" 861 | source = "registry+https://github.com/rust-lang/crates.io-index" 862 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" 863 | dependencies = [ 864 | "crossbeam-deque", 865 | "crossbeam-utils", 866 | ] 867 | 868 | [[package]] 869 | name = "redox_syscall" 870 | version = "0.4.1" 871 | source = "registry+https://github.com/rust-lang/crates.io-index" 872 | checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" 873 | dependencies = [ 874 | "bitflags 1.3.2", 875 | ] 876 | 877 | [[package]] 878 | name = "regex" 879 | version = "1.10.3" 880 | source = "registry+https://github.com/rust-lang/crates.io-index" 881 | checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" 882 | dependencies = [ 883 | "aho-corasick", 884 | "memchr", 885 | "regex-automata", 886 | "regex-syntax", 887 | ] 888 | 889 | [[package]] 890 | name = "regex-automata" 891 | version = "0.4.5" 892 | source = "registry+https://github.com/rust-lang/crates.io-index" 893 | checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" 894 | dependencies = [ 895 | "aho-corasick", 896 | "memchr", 897 | "regex-syntax", 898 | ] 899 | 900 | [[package]] 901 | name = "regex-syntax" 902 | version = "0.8.2" 903 | source = "registry+https://github.com/rust-lang/crates.io-index" 904 | checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" 905 | 906 | [[package]] 907 | name = "rustc_version" 908 | version = "0.1.7" 909 | source = "registry+https://github.com/rust-lang/crates.io-index" 910 | checksum = "c5f5376ea5e30ce23c03eb77cbe4962b988deead10910c372b226388b594c084" 911 | dependencies = [ 912 | "semver", 913 | ] 914 | 915 | [[package]] 916 | name = "rustix" 917 | version = "0.38.31" 918 | source = "registry+https://github.com/rust-lang/crates.io-index" 919 | checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949" 920 | dependencies = [ 921 | "bitflags 2.4.2", 922 | "errno", 923 | "libc", 924 | "linux-raw-sys", 925 | "windows-sys", 926 | ] 927 | 928 | [[package]] 929 | name = "rustversion" 930 | version = "1.0.6" 931 | source = "registry+https://github.com/rust-lang/crates.io-index" 932 | checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f" 933 | 934 | [[package]] 935 | name = "rusty-fork" 936 | version = "0.3.0" 937 | source = "registry+https://github.com/rust-lang/crates.io-index" 938 | checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" 939 | dependencies = [ 940 | "fnv", 941 | "quick-error", 942 | "tempfile", 943 | "wait-timeout", 944 | ] 945 | 946 | [[package]] 947 | name = "ryu" 948 | version = "1.0.10" 949 | source = "registry+https://github.com/rust-lang/crates.io-index" 950 | checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" 951 | 952 | [[package]] 953 | name = "safe_arch" 954 | version = "0.7.1" 955 | source = "registry+https://github.com/rust-lang/crates.io-index" 956 | checksum = "f398075ce1e6a179b46f51bd88d0598b92b00d3551f1a2d4ac49e771b56ac354" 957 | dependencies = [ 958 | "bytemuck", 959 | ] 960 | 961 | [[package]] 962 | name = "scopeguard" 963 | version = "1.1.0" 964 | source = "registry+https://github.com/rust-lang/crates.io-index" 965 | checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" 966 | 967 | [[package]] 968 | name = "semver" 969 | version = "0.1.20" 970 | source = "registry+https://github.com/rust-lang/crates.io-index" 971 | checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" 972 | 973 | [[package]] 974 | name = "serde" 975 | version = "1.0.147" 976 | source = "registry+https://github.com/rust-lang/crates.io-index" 977 | checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965" 978 | dependencies = [ 979 | "serde_derive", 980 | ] 981 | 982 | [[package]] 983 | name = "serde_derive" 984 | version = "1.0.147" 985 | source = "registry+https://github.com/rust-lang/crates.io-index" 986 | checksum = "4f1d362ca8fc9c3e3a7484440752472d68a6caa98f1ab81d99b5dfe517cec852" 987 | dependencies = [ 988 | "proc-macro2", 989 | "quote", 990 | "syn", 991 | ] 992 | 993 | [[package]] 994 | name = "serde_json" 995 | version = "1.0.99" 996 | source = "registry+https://github.com/rust-lang/crates.io-index" 997 | checksum = "46266871c240a00b8f503b877622fe33430b3c7d963bdc0f2adc511e54a1eae3" 998 | dependencies = [ 999 | "itoa", 1000 | "ryu", 1001 | "serde", 1002 | ] 1003 | 1004 | [[package]] 1005 | name = "simba" 1006 | version = "0.6.0" 1007 | source = "registry+https://github.com/rust-lang/crates.io-index" 1008 | checksum = "f0b7840f121a46d63066ee7a99fc81dcabbc6105e437cae43528cea199b5a05f" 1009 | dependencies = [ 1010 | "approx", 1011 | "num-complex", 1012 | "num-traits", 1013 | "paste", 1014 | "wide", 1015 | ] 1016 | 1017 | [[package]] 1018 | name = "smallvec" 1019 | version = "1.10.0" 1020 | source = "registry+https://github.com/rust-lang/crates.io-index" 1021 | checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" 1022 | 1023 | [[package]] 1024 | name = "statrs" 1025 | version = "0.16.0" 1026 | source = "registry+https://github.com/rust-lang/crates.io-index" 1027 | checksum = "2d08e5e1748192713cc281da8b16924fb46be7b0c2431854eadc785823e5696e" 1028 | dependencies = [ 1029 | "approx", 1030 | "lazy_static", 1031 | "nalgebra", 1032 | "num-traits", 1033 | "rand", 1034 | ] 1035 | 1036 | [[package]] 1037 | name = "strsim" 1038 | version = "0.10.0" 1039 | source = "registry+https://github.com/rust-lang/crates.io-index" 1040 | checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" 1041 | 1042 | [[package]] 1043 | name = "strum" 1044 | version = "0.23.0" 1045 | source = "registry+https://github.com/rust-lang/crates.io-index" 1046 | checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb" 1047 | 1048 | [[package]] 1049 | name = "strum_macros" 1050 | version = "0.23.1" 1051 | source = "registry+https://github.com/rust-lang/crates.io-index" 1052 | checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38" 1053 | dependencies = [ 1054 | "heck", 1055 | "proc-macro2", 1056 | "quote", 1057 | "rustversion", 1058 | "syn", 1059 | ] 1060 | 1061 | [[package]] 1062 | name = "syn" 1063 | version = "1.0.96" 1064 | source = "registry+https://github.com/rust-lang/crates.io-index" 1065 | checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf" 1066 | dependencies = [ 1067 | "proc-macro2", 1068 | "quote", 1069 | "unicode-ident", 1070 | ] 1071 | 1072 | [[package]] 1073 | name = "tempfile" 1074 | version = "3.10.0" 1075 | source = "registry+https://github.com/rust-lang/crates.io-index" 1076 | checksum = "a365e8cd18e44762ef95d87f284f4b5cd04107fec2ff3052bd6a3e6069669e67" 1077 | dependencies = [ 1078 | "cfg-if", 1079 | "fastrand", 1080 | "rustix", 1081 | "windows-sys", 1082 | ] 1083 | 1084 | [[package]] 1085 | name = "termcolor" 1086 | version = "1.1.3" 1087 | source = "registry+https://github.com/rust-lang/crates.io-index" 1088 | checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" 1089 | dependencies = [ 1090 | "winapi-util", 1091 | ] 1092 | 1093 | [[package]] 1094 | name = "thiserror" 1095 | version = "1.0.31" 1096 | source = "registry+https://github.com/rust-lang/crates.io-index" 1097 | checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" 1098 | dependencies = [ 1099 | "thiserror-impl", 1100 | ] 1101 | 1102 | [[package]] 1103 | name = "thiserror-impl" 1104 | version = "1.0.31" 1105 | source = "registry+https://github.com/rust-lang/crates.io-index" 1106 | checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" 1107 | dependencies = [ 1108 | "proc-macro2", 1109 | "quote", 1110 | "syn", 1111 | ] 1112 | 1113 | [[package]] 1114 | name = "triple_accel" 1115 | version = "0.4.0" 1116 | source = "registry+https://github.com/rust-lang/crates.io-index" 1117 | checksum = "22048bc95dfb2ffd05b1ff9a756290a009224b60b2f0e7525faeee7603851e63" 1118 | 1119 | [[package]] 1120 | name = "typenum" 1121 | version = "1.15.0" 1122 | source = "registry+https://github.com/rust-lang/crates.io-index" 1123 | checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" 1124 | 1125 | [[package]] 1126 | name = "unarray" 1127 | version = "0.1.4" 1128 | source = "registry+https://github.com/rust-lang/crates.io-index" 1129 | checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" 1130 | 1131 | [[package]] 1132 | name = "unicode-ident" 1133 | version = "1.0.0" 1134 | source = "registry+https://github.com/rust-lang/crates.io-index" 1135 | checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" 1136 | 1137 | [[package]] 1138 | name = "unicode-segmentation" 1139 | version = "1.9.0" 1140 | source = "registry+https://github.com/rust-lang/crates.io-index" 1141 | checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" 1142 | 1143 | [[package]] 1144 | name = "vec_map" 1145 | version = "0.8.2" 1146 | source = "registry+https://github.com/rust-lang/crates.io-index" 1147 | checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" 1148 | dependencies = [ 1149 | "serde", 1150 | ] 1151 | 1152 | [[package]] 1153 | name = "wait-timeout" 1154 | version = "0.2.0" 1155 | source = "registry+https://github.com/rust-lang/crates.io-index" 1156 | checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" 1157 | dependencies = [ 1158 | "libc", 1159 | ] 1160 | 1161 | [[package]] 1162 | name = "wasi" 1163 | version = "0.10.2+wasi-snapshot-preview1" 1164 | source = "registry+https://github.com/rust-lang/crates.io-index" 1165 | checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" 1166 | 1167 | [[package]] 1168 | name = "wide" 1169 | version = "0.7.13" 1170 | source = "registry+https://github.com/rust-lang/crates.io-index" 1171 | checksum = "c68938b57b33da363195412cfc5fc37c9ed49aa9cfe2156fde64b8d2c9498242" 1172 | dependencies = [ 1173 | "bytemuck", 1174 | "safe_arch", 1175 | ] 1176 | 1177 | [[package]] 1178 | name = "winapi" 1179 | version = "0.3.9" 1180 | source = "registry+https://github.com/rust-lang/crates.io-index" 1181 | checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" 1182 | dependencies = [ 1183 | "winapi-i686-pc-windows-gnu", 1184 | "winapi-x86_64-pc-windows-gnu", 1185 | ] 1186 | 1187 | [[package]] 1188 | name = "winapi-i686-pc-windows-gnu" 1189 | version = "0.4.0" 1190 | source = "registry+https://github.com/rust-lang/crates.io-index" 1191 | checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 1192 | 1193 | [[package]] 1194 | name = "winapi-util" 1195 | version = "0.1.5" 1196 | source = "registry+https://github.com/rust-lang/crates.io-index" 1197 | checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" 1198 | dependencies = [ 1199 | "winapi", 1200 | ] 1201 | 1202 | [[package]] 1203 | name = "winapi-x86_64-pc-windows-gnu" 1204 | version = "0.4.0" 1205 | source = "registry+https://github.com/rust-lang/crates.io-index" 1206 | checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" 1207 | 1208 | [[package]] 1209 | name = "windows-sys" 1210 | version = "0.52.0" 1211 | source = "registry+https://github.com/rust-lang/crates.io-index" 1212 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 1213 | dependencies = [ 1214 | "windows-targets 0.52.0", 1215 | ] 1216 | 1217 | [[package]] 1218 | name = "windows-targets" 1219 | version = "0.48.0" 1220 | source = "registry+https://github.com/rust-lang/crates.io-index" 1221 | checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" 1222 | dependencies = [ 1223 | "windows_aarch64_gnullvm 0.48.0", 1224 | "windows_aarch64_msvc 0.48.0", 1225 | "windows_i686_gnu 0.48.0", 1226 | "windows_i686_msvc 0.48.0", 1227 | "windows_x86_64_gnu 0.48.0", 1228 | "windows_x86_64_gnullvm 0.48.0", 1229 | "windows_x86_64_msvc 0.48.0", 1230 | ] 1231 | 1232 | [[package]] 1233 | name = "windows-targets" 1234 | version = "0.52.0" 1235 | source = "registry+https://github.com/rust-lang/crates.io-index" 1236 | checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" 1237 | dependencies = [ 1238 | "windows_aarch64_gnullvm 0.52.0", 1239 | "windows_aarch64_msvc 0.52.0", 1240 | "windows_i686_gnu 0.52.0", 1241 | "windows_i686_msvc 0.52.0", 1242 | "windows_x86_64_gnu 0.52.0", 1243 | "windows_x86_64_gnullvm 0.52.0", 1244 | "windows_x86_64_msvc 0.52.0", 1245 | ] 1246 | 1247 | [[package]] 1248 | name = "windows_aarch64_gnullvm" 1249 | version = "0.48.0" 1250 | source = "registry+https://github.com/rust-lang/crates.io-index" 1251 | checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" 1252 | 1253 | [[package]] 1254 | name = "windows_aarch64_gnullvm" 1255 | version = "0.52.0" 1256 | source = "registry+https://github.com/rust-lang/crates.io-index" 1257 | checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" 1258 | 1259 | [[package]] 1260 | name = "windows_aarch64_msvc" 1261 | version = "0.48.0" 1262 | source = "registry+https://github.com/rust-lang/crates.io-index" 1263 | checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" 1264 | 1265 | [[package]] 1266 | name = "windows_aarch64_msvc" 1267 | version = "0.52.0" 1268 | source = "registry+https://github.com/rust-lang/crates.io-index" 1269 | checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" 1270 | 1271 | [[package]] 1272 | name = "windows_i686_gnu" 1273 | version = "0.48.0" 1274 | source = "registry+https://github.com/rust-lang/crates.io-index" 1275 | checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" 1276 | 1277 | [[package]] 1278 | name = "windows_i686_gnu" 1279 | version = "0.52.0" 1280 | source = "registry+https://github.com/rust-lang/crates.io-index" 1281 | checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" 1282 | 1283 | [[package]] 1284 | name = "windows_i686_msvc" 1285 | version = "0.48.0" 1286 | source = "registry+https://github.com/rust-lang/crates.io-index" 1287 | checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" 1288 | 1289 | [[package]] 1290 | name = "windows_i686_msvc" 1291 | version = "0.52.0" 1292 | source = "registry+https://github.com/rust-lang/crates.io-index" 1293 | checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" 1294 | 1295 | [[package]] 1296 | name = "windows_x86_64_gnu" 1297 | version = "0.48.0" 1298 | source = "registry+https://github.com/rust-lang/crates.io-index" 1299 | checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" 1300 | 1301 | [[package]] 1302 | name = "windows_x86_64_gnu" 1303 | version = "0.52.0" 1304 | source = "registry+https://github.com/rust-lang/crates.io-index" 1305 | checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" 1306 | 1307 | [[package]] 1308 | name = "windows_x86_64_gnullvm" 1309 | version = "0.48.0" 1310 | source = "registry+https://github.com/rust-lang/crates.io-index" 1311 | checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" 1312 | 1313 | [[package]] 1314 | name = "windows_x86_64_gnullvm" 1315 | version = "0.52.0" 1316 | source = "registry+https://github.com/rust-lang/crates.io-index" 1317 | checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" 1318 | 1319 | [[package]] 1320 | name = "windows_x86_64_msvc" 1321 | version = "0.48.0" 1322 | source = "registry+https://github.com/rust-lang/crates.io-index" 1323 | checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" 1324 | 1325 | [[package]] 1326 | name = "windows_x86_64_msvc" 1327 | version = "0.52.0" 1328 | source = "registry+https://github.com/rust-lang/crates.io-index" 1329 | checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" 1330 | 1331 | [[package]] 1332 | name = "wyhash" 1333 | version = "0.5.0" 1334 | source = "registry+https://github.com/rust-lang/crates.io-index" 1335 | checksum = "baf6e163c25e3fac820b4b453185ea2dea3b6a3e0a721d4d23d75bd33734c295" 1336 | dependencies = [ 1337 | "rand_core", 1338 | ] 1339 | --------------------------------------------------------------------------------