├── HOXD55.mtx ├── HOXD70.mtx ├── .gitignore ├── .github └── workflows │ └── rust.yml ├── Cargo.toml ├── LICENSE ├── src ├── bitfield_path.rs ├── lib.rs ├── sequences.rs ├── score_matrix.rs ├── args_parser.rs ├── api.rs ├── gap_local_poa.rs ├── utils.rs ├── graph.rs ├── main.rs ├── pathwise_alignment_semiglobal.rs ├── local_poa.rs ├── pathwise_graph.rs ├── pathwise_alignment_output.rs ├── pathwise_alignment.rs └── pathwise_alignment_gap_semi.rs ├── README.md ├── benches └── recgraph_benchmark.rs └── example ├── reads.fa └── graph.gfa /HOXD55.mtx: -------------------------------------------------------------------------------- 1 | A C G T N 2 | A 91 -90 -25 -100 0 3 | C -90 100 -100 -25 0 4 | G -25 -100 100 -90 0 5 | T -100 -25 -90 91 0 6 | N 0 0 0 0 0 -------------------------------------------------------------------------------- /HOXD70.mtx: -------------------------------------------------------------------------------- 1 | A C G T N 2 | A 91 -114 -31 -123 0 3 | C -114 100 -125 -31 0 4 | G -31 -125 100 -114 0 5 | T -123 -31 -144 91 0 6 | N 0 0 0 0 0 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 5 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 6 | Cargo.lock 7 | 8 | # These are backup files generated by rustfmt 9 | **/*.rs.bk 10 | *alignment.txt 11 | odgi 12 | *.og 13 | *valgrind* 14 | *massif* 15 | *.gaf 16 | /tests/ 17 | perf.data* -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Install latest nightly 20 | uses: actions-rs/toolchain@v1 21 | with: 22 | toolchain: nightly 23 | override: true 24 | - name: Build 25 | uses: actions-rs/cargo@v1 26 | with: 27 | command: build 28 | - name: Run tests 29 | uses: actions-rs/cargo@v1 30 | with: 31 | command: test 32 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "recgraph" 3 | version = "1.0.0" 4 | authors = ["Davide Monti "] 5 | description = "An exact sequence-to-graph aligner with controlled recombinations" 6 | license = "MIT" 7 | edition = "2021" 8 | 9 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 10 | [dependencies] 11 | clap = { version = "3.1.5", features = ["derive"] } 12 | project-root = "0.2.2" 13 | gfa = "0.8.0" 14 | handlegraph = "0.5.0" 15 | bit-vec = "0.6" 16 | bitvec = "1" 17 | pbr = "*" 18 | 19 | [target.'cfg(target_os="linux")'.dependencies] 20 | tikv-jemallocator = "0.5" 21 | 22 | [dev-dependencies] 23 | criterion = "0.3" 24 | 25 | [[bench]] 26 | name = "recgraph_benchmark" 27 | harness = false 28 | 29 | [profile.release] 30 | lto = "thin" 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 BIAS Lab - Bioinformatics and Experimental Algorithms lab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/bitfield_path.rs: -------------------------------------------------------------------------------- 1 | use bitvec::prelude::*; 2 | 3 | fn dir_u16_from_char(c: char) -> u16 { 4 | match c { 5 | 'O' => 0, 6 | 'D' => 1, 7 | 'd' => 2, 8 | 'L' => 3, 9 | 'U' => 4, 10 | 'X' => 5, 11 | 'Y' => 6, 12 | 'M' => 7, 13 | _ => panic! {"impossible direction char"}, 14 | } 15 | } 16 | 17 | fn char_from_bitslice(bs: &BitSlice) -> char { 18 | let dir: u16 = bs.load_be(); 19 | match dir { 20 | 0 => 'O', 21 | 1 => 'D', 22 | 2 => 'd', 23 | 3 => 'L', 24 | 4 => 'U', 25 | 5 => 'X', 26 | 6 => 'Y', 27 | 7 => 'M', 28 | _ => panic! {"impossible direction bitslice"}, 29 | } 30 | } 31 | pub fn pred_from_bitvec(bv: &BitVec) -> usize { 32 | let pred: u16 = bv[..16].load_be(); 33 | pred as usize 34 | } 35 | 36 | pub fn dir_from_bitvec(bv: &BitVec) -> char { 37 | char_from_bitslice(&bv[16..]) 38 | } 39 | pub fn set_path_cell(pred: usize, dir: char) -> BitVec { 40 | let mut bv = bitvec![u16, Msb0; 0; 32]; 41 | bv[..16].store::(pred as u16); 42 | bv[16..].store::(dir_u16_from_char(dir)); 43 | bv 44 | } 45 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! RecGraph is an exact sequence to variation graph aligner that allows controlled recombinations. 2 | //! More information at [rsPOA](https://github.com/AlgoLab/recgraph) 3 | 4 | /// Application program interface of recgraph 5 | pub mod api; 6 | /// Command Line Interface 7 | pub mod args_parser; 8 | /// Interface for path managment 9 | pub mod bitfield_path; 10 | /// .gaf file creation 11 | pub mod gaf_output; 12 | /// adaptive banded POA with gap opening and penalty 13 | pub mod gap_global_abpoa; 14 | /// adaptive banded local POA with gap opening and penalty 15 | pub mod gap_local_poa; 16 | /// adaptive banded POA, with avx2 instructions 17 | pub mod global_abpoa; 18 | /// LnzGraph creation 19 | pub mod graph; 20 | /// adaptive banded local POA, with avx2 instructions 21 | pub mod local_poa; 22 | /// DEMO 23 | pub mod pathwise_alignment; 24 | pub mod pathwise_alignment_gap; 25 | pub mod pathwise_alignment_gap_semi; 26 | pub mod pathwise_alignment_output; 27 | pub mod pathwise_alignment_recombination; 28 | pub mod pathwise_alignment_semiglobal; 29 | /// Pathwise graph creation 30 | pub mod pathwise_graph; 31 | pub mod recombination_output; 32 | /// Score matrix for each alignment type 33 | pub mod score_matrix; 34 | /// Read preparation for POA 35 | pub mod sequences; 36 | /// Various and miscellaneous 37 | pub mod utils; 38 | -------------------------------------------------------------------------------- /src/sequences.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | use std::io::{prelude::*, BufReader}; 3 | 4 | /// Returns a vector of (read, read_name) from a .fasta file, ready for the alignment 5 | pub fn get_sequences(file_path: String) -> (Vec>, Vec) { 6 | let file = File::open(file_path).unwrap(); 7 | let reader = BufReader::new(file); 8 | 9 | let mut sequences: Vec> = Vec::new(); 10 | let mut sequences_name: Vec = Vec::new(); 11 | 12 | let mut sequence: Vec = Vec::new(); 13 | for line in reader.lines().flatten() { 14 | if !line.starts_with('>') && !line.is_empty() { 15 | let mut line: Vec = line 16 | .chars() 17 | .map(|c| { 18 | if c == '-' { 19 | 'N' 20 | } else { 21 | c.to_ascii_uppercase() 22 | } 23 | }) 24 | .collect::>(); 25 | sequence.append(&mut line); 26 | } else if line.starts_with('>') { 27 | let seq_name = line[1..].into(); 28 | sequences_name.push(seq_name); 29 | if !sequence.is_empty() { 30 | sequence.insert(0, '$'); 31 | sequences.push(sequence); 32 | } 33 | sequence = Vec::new(); 34 | } 35 | } 36 | if !sequence.is_empty() { 37 | sequence.insert(0, '$'); 38 | sequences.push(sequence); 39 | } 40 | 41 | if sequences.len() != sequences_name.len() { 42 | panic!("wrong fasta file format"); 43 | } 44 | (sequences, sequences_name) //update with also sequences_name 45 | } 46 | 47 | /// Prepare a string for the alignment algorithms 48 | pub fn build_align_string(line: &String) -> Vec { 49 | let mut seq: Vec = line 50 | .chars() 51 | .map(|c| { 52 | if c == '-' { 53 | 'N' 54 | } else { 55 | c.to_ascii_uppercase() 56 | } 57 | }) 58 | .collect::>(); 59 | seq.insert(0, '$'); 60 | seq 61 | } 62 | 63 | /// If ambigous strand is set to true the alignment is done also on the reverse anc complement 64 | /// of the input read. 65 | pub fn rev_and_compl(seq: &[char]) -> Vec { 66 | let mut rev_seq = seq[1..] 67 | .iter() 68 | .map(|c| match *c { 69 | 'A' => 'T', 70 | 'C' => 'G', 71 | 'G' => 'C', 72 | 'T' => 'A', 73 | 'N' => 'N', 74 | _ => { 75 | panic!("wrong char: {}, unable to rev&compl", c) 76 | } 77 | }) 78 | .collect::>(); 79 | rev_seq.reverse(); 80 | rev_seq.insert(0, '$'); 81 | rev_seq 82 | } 83 | #[cfg(test)] 84 | mod tests { 85 | #[test] 86 | fn rev_and_compl_of_seq_correct() { 87 | let s1 = ['$', 'A', 'A', 'T']; 88 | let s1_rc = super::rev_and_compl(&s1); 89 | for i in 0..s1_rc.len() { 90 | assert_eq!(['$', 'A', 'T', 'T'][i], s1_rc[i]); 91 | } 92 | } 93 | #[test] 94 | fn rev_and_compl_with_every_symbol() { 95 | let s1 = ['$', 'A', 'T', 'C', 'G', 'N']; 96 | let s1_rc = super::rev_and_compl(&s1); 97 | for i in 0..s1_rc.len() { 98 | assert_eq!(['$', 'N', 'C', 'G', 'A', 'T'][i], s1_rc[i]); 99 | } 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RecGraph 2 | RecGraph is a sequence-to-graph aligner written in Rust. Differently from most aligners, RecGraph is an exact approach that implements a dynamic programming algorithm for computing an **optimal** alignment between a string and a variation graph. Moreover, RecGraph can allow recombinations in the alignment in a controlled (i.e., non heuristic) way - in other words, it can perform optimal alignment to path not included in the input graphs. This follows directly from the observation that a pangenome graph includes a set of related individuals that are represented as paths of the graph. 3 | 4 | ## Installation 5 | We support 4 different ways to obtain and use RecGraph: 6 | * [downloading static binaries](#static-binaries) 7 | * [building via cargo](#compilation) 8 | * [installing via conda](#installation-from-conda) 9 | * [getting a docker image](#docker-image) 10 | 11 | #### Static binaries 12 | For user convenience, we provide static binaries for x86_64 linux and windows systems (see [Releases](https://github.com/AlgoLab/RecGraph/releases)). 13 | 14 | #### Compilation 15 | Install [`rust`](https://doc.rust-lang.org/cargo/getting-started/installation.html), then clone and install RecGraph: 16 | ``` 17 | git clone https://github.com/AlgoLab/RecGraph.git 18 | cd RecGraph 19 | cargo build --release 20 | ``` 21 | 22 | #### Installation from conda 23 | RecGraph is available on bioconda: 24 | ``` 25 | conda create -n recgraph -c conda-forge -c bioconda recgraph 26 | ``` 27 | 28 | #### Docker image 29 | We provide a docker image, hosted on [docker.hub](https://hub.docker.com/r/algolab/recgraph): 30 | ``` 31 | docker pull algolab/recgraph 32 | docker run algolab/recgraph --help 33 | ``` 34 | 35 | ## Usage 36 | RecGraph requires as input a variation graph in `.gfa` format and a set of sequences (reads) in `.fasta` format and computes the alignment in `.gaf` format. To run RecGraph, run: 37 | ``` 38 | cargo run --release > 39 | ``` 40 | 41 | #### Example 42 | ``` 43 | # if you built with cargo, from the root of this repo 44 | cargo run --release -- -m 1 example/reads.fa example/graph.gfa > align.gaf 45 | 46 | # if you have the precompiled binary 47 | ./recgraph_linux_x86-64 -m 1 example/reads.fa example/graph.gfa > align.gaf 48 | 49 | # if you have the conda version, within the correct environment 50 | recgraph -m 1 example/reads.fa example/graph.gfa > align.gaf 51 | 52 | # if you use docker, please bind the volume into the container (-v) 53 | docker run -v $(pwd)/example:/data algolab/recgraph -m1 reads.fa graph.gfa > align.gaf 54 | ``` 55 | 56 | ### Alignment modes 57 | RecGraph can be run in several different modes (`-m` flag): 58 | * `-m [0,1,2,3]` performs the classical POA (global, local, affine gap, and local gap) 59 | * `-m [4,5]` performs global/semiglobal alignment in pathwise mode (i.e., following the paths of the graph) 60 | * `-m [8,9]` performs global/semiglobal alignment in recombination mode (i.e., allowing weighted recombinations) 61 | 62 | `-m 6` and `-m 7` are experimental and are not fully tested yet. They perform global/semiglobal alignment with affine gap in pathwise mode. 63 | 64 | ### Other parameters 65 | RecGraph also allows to set multiple parameters to tweak the dynamic programming alignment procedure. Here the list of parameters (please check also `--help`): 66 | ``` 67 | -M, --match Match score [default: 2] 68 | -X, --mismatch Mismatch penalty [default: 4] 69 | -O, --gap-open Gap opening penalty [default: 4] 70 | -E, --gap-ext Gap extension penalty [default: 2] 71 | -R, --base-rec-cost Recombination cost, 72 | determined with -r as R + r*(displacement_length) [default: 4] 73 | -r, --multi-rec-cost Displacement multiplier [default: 0.1] 74 | -B, --rec-band-width Recombination band width [default: 1] 75 | -b, --extra-b First adaptive banding par, 76 | set < 0 to disable adaptive banded [default: 1] 77 | -f, --extra-f Second adaptive banding par, number of basis added to both side of 78 | the band = b+f*L, l = length of the sequence [default: 0.01] 79 | -t, --matrix Scoring matrix file, if '-t' is used, '-M' and '-X' are not used 80 | and you should set gap penalties in this case [default: none] 81 | ``` 82 | 83 | ### Library 84 | RecGraph can also be used as a library for your project. To do so, add these lines to your `Cargo.toml`: 85 | ``` 86 | [dependencies] 87 | RecGraph = { git = "https://github.com/AlgoLab/RecGraph" } 88 | ``` 89 | You can use the functions defined in the [`api.rs`](https://github.com/AlgoLab/RecGraph/blob/1b513973c1145015ed626abc975e276970d2a60e/src/api.rs) file (e.g., by adding `use RecGraph::api::*` to your file). All the functions require just a read (as a string) and the graph (as an HashGraph). Other parameters are optional. 90 | -------------------------------------------------------------------------------- /src/score_matrix.rs: -------------------------------------------------------------------------------- 1 | use crate::args_parser; 2 | use std::{ 3 | collections::HashMap, 4 | fs::File, 5 | io::{prelude::*, BufReader}, 6 | }; 7 | 8 | /// Returns the same matrix of create_score_matrix with every score transformed into an f32 9 | /// this is done in order to works with f32 values needed in alignment with simd instruction. 10 | pub fn create_f32_scores_matrix() -> HashMap<(char, char), f32> { 11 | let matrix = create_score_matrix(); 12 | let mut f32_matrix: HashMap<(char, char), f32> = HashMap::new(); 13 | for (k, v) in matrix.iter() { 14 | f32_matrix.insert(*k, *v as f32); 15 | } 16 | f32_matrix 17 | } 18 | 19 | /// Returned score matrix can be set by match/mismatch score or by a .mtx file (currently only HOXD70 and HOXD55). 20 | /// This function is meant to be used by recgraph directly, if you want to create a score matrix use the functions defined in api.rs 21 | pub fn create_score_matrix() -> HashMap<(char, char), i32> { 22 | let matrix_type = args_parser::get_matrix_type(); 23 | match matrix_type.as_str() { 24 | "HOXD70.mtx" | "HOXD70" => create_score_matrix_from_matrix_file("HOXD70.mtx"), 25 | "HOXD55.mtx" | "HOXD55" => create_score_matrix_from_matrix_file("HOXD55.mtx"), 26 | "none" => { 27 | let (match_score, mismatch_score) = args_parser::get_match_mismatch(); 28 | create_score_matrix_match_mis(match_score, mismatch_score) 29 | } 30 | _ => { 31 | panic!("wrong matrix type") 32 | } 33 | } 34 | } 35 | pub fn create_score_matrix_match_mis(m: i32, x: i32) -> HashMap<(char, char), i32> { 36 | let mut score_matrix: HashMap<(char, char), i32> = HashMap::new(); 37 | for i in ['A', 'C', 'G', 'T', 'N', '-'].iter() { 38 | for j in ['A', 'C', 'G', 'T', 'N', '-'].iter() { 39 | if i == j { 40 | score_matrix.insert((*i, *j), m); 41 | } else if *i == '-' || *j == '-' { 42 | score_matrix.insert((*i, *j), x * 2); 43 | } else { 44 | score_matrix.insert((*i, *j), x); 45 | } 46 | } 47 | } 48 | score_matrix.insert(('N', 'N'), x); 49 | score_matrix.remove(&('-', '-')); 50 | score_matrix 51 | } 52 | pub fn create_score_matrix_match_mis_f32(m: f32, x: f32) -> HashMap<(char, char), f32> { 53 | let mut score_matrix: HashMap<(char, char), f32> = HashMap::new(); 54 | for i in ['A', 'C', 'G', 'T', 'N', '-'].iter() { 55 | for j in ['A', 'C', 'G', 'T', 'N', '-'].iter() { 56 | if i == j { 57 | score_matrix.insert((*i, *j), m); 58 | } else { 59 | score_matrix.insert((*i, *j), x); 60 | } 61 | } 62 | } 63 | score_matrix.insert(('N', 'N'), x); 64 | score_matrix.remove(&('-', '-')); 65 | score_matrix 66 | } 67 | pub fn create_score_matrix_from_matrix_file(matrix_file: &str) -> HashMap<(char, char), i32> { 68 | let mut matrix: Vec> = Vec::new(); 69 | let file_path = project_root::get_project_root().unwrap().join(matrix_file); 70 | 71 | let file = File::open(file_path).unwrap(); 72 | let reader = BufReader::new(file); 73 | 74 | for line in reader.lines().flatten() { 75 | let mut splitted_line: Vec = Vec::new(); 76 | for elem in line.split(' ') { 77 | splitted_line.push(String::from(elem)); 78 | } 79 | splitted_line.retain(|x| !x.is_empty()); 80 | matrix.push(splitted_line); 81 | } 82 | matrix[0].insert(0, String::from("X")); 83 | 84 | let mut matrix_score: HashMap<(char, char), i32> = HashMap::new(); 85 | for i in 1..matrix.len() { 86 | for j in 1..matrix[0].len() { 87 | let c1 = matrix[i][0] 88 | .chars() 89 | .next() 90 | .expect("failed to create HOXD70"); 91 | let c2 = matrix[0][j] 92 | .chars() 93 | .next() 94 | .expect("failed to create HOXD70"); 95 | 96 | matrix_score.insert((c1, c2), matrix[i][j].parse().unwrap()); 97 | } 98 | } 99 | for ch in ['A', 'C', 'G', 'T', 'N'].iter() { 100 | matrix_score.insert((*ch, '-'), -200); 101 | matrix_score.insert(('-', *ch), -200); 102 | } 103 | matrix_score.remove(&('-', '-')); 104 | matrix_score 105 | } 106 | 107 | #[cfg(test)] 108 | mod tests { 109 | #[test] 110 | fn match_miss_matrix_correct() { 111 | let score_matrix = super::create_score_matrix_match_mis(10, -10); 112 | assert_eq!(*score_matrix.get(&('A', 'A')).unwrap(), 10); 113 | assert_eq!(*score_matrix.get(&('A', 'C')).unwrap(), -10); 114 | assert_eq!(*score_matrix.get(&('N', 'N')).unwrap(), -10); 115 | assert_eq!(score_matrix.get(&('-', '-')), None); 116 | } 117 | #[test] 118 | fn hoxd_correct() { 119 | let score_matrix_d70 = super::create_score_matrix_from_matrix_file("HOXD70.mtx"); 120 | let score_matrix_d55 = super::create_score_matrix_from_matrix_file("HOXD55.mtx"); 121 | 122 | assert_eq!(*score_matrix_d70.get(&('A', 'A')).unwrap(), 91); 123 | assert_eq!(*score_matrix_d70.get(&('T', 'G')).unwrap(), -144); 124 | 125 | assert_eq!(*score_matrix_d55.get(&('A', 'A')).unwrap(), 91); 126 | assert_eq!(*score_matrix_d55.get(&('T', 'G')).unwrap(), -90); 127 | 128 | assert_eq!(score_matrix_d70.get(&('-', '-')), None); 129 | assert_eq!(score_matrix_d55.get(&('-', '-')), None); 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /src/args_parser.rs: -------------------------------------------------------------------------------- 1 | use clap::Parser; 2 | 3 | #[derive(Parser, Debug)] 4 | #[clap(author = "Davide Monti ", version, about = "RecGraph", long_about = None)] 5 | struct Args { 6 | #[clap( 7 | help_heading = "I/O", 8 | help = "Input sequences (in .fasta format)", 9 | required = true 10 | )] 11 | sequence_path: String, 12 | #[clap( 13 | help_heading = "I/O", 14 | help = "Input graph (in .gfa format)", 15 | required = true 16 | )] 17 | graph_path: String, 18 | 19 | #[clap( 20 | help_heading = "I/O", 21 | short = 'o', 22 | long = "out_file", 23 | default_value = "standard output", 24 | help = "Output alignment file" 25 | )] 26 | out_file: String, 27 | 28 | // Alignment mode 29 | #[clap( 30 | help_heading = "Alignment", 31 | short = 'm', 32 | long = "aln-mode", 33 | default_value_t = 0, 34 | help = "0: global POA, 1: local POA, 2: affine gap POA, 3: local gap POA,\n4: global pathwise alignment, 5: semiglobal pathwise alignment,\n6: global pathwise alignment with affine gap (EXPERIMENTAL),\n7: semiglobal pathwise alignment with affine gap (EXPERIMENTAL),\n8: global recombination alignment, 9: semiglobal recombination alignment" 35 | )] 36 | alignment_mode: i32, 37 | 38 | // Match score 39 | #[clap( 40 | help_heading = "Alignment", 41 | short = 'M', 42 | long = "match", 43 | default_value_t = 2, 44 | help = "Match score" 45 | )] 46 | match_score: i32, 47 | 48 | // Mismatch score 49 | #[clap( 50 | help_heading = "Alignment", 51 | short = 'X', 52 | long = "mismatch", 53 | default_value_t = 4, 54 | help = "Mismatch penalty" 55 | )] 56 | mismatch_score: i32, 57 | 58 | // Matrix type 59 | #[clap( 60 | help_heading = "Alignment", 61 | short = 't', 62 | long = "matrix", 63 | default_value = "none", 64 | help = "Scoring matrix file, if '-t' is used, '-M' and '-X' are not used and you should set gap penalties in this case" 65 | )] 66 | matrix: String, 67 | 68 | // Gap open 69 | #[clap( 70 | help_heading = "Alignment", 71 | short = 'O', 72 | long = "gap-open", 73 | default_value_t = 4, 74 | help = "Gap opening penalty" 75 | )] 76 | gap_open: i32, 77 | 78 | //Gap extension 79 | #[clap( 80 | help_heading = "Alignment", 81 | short = 'E', 82 | long = "gap-ext", 83 | default_value_t = 2, 84 | help = "Gap extension penalty" 85 | )] 86 | gap_extension: i32, 87 | 88 | // Recombination constant multiplier 89 | #[clap( 90 | help_heading = "Alignment", 91 | short = 'r', 92 | long = "multi-rec-cost", 93 | default_value_t = 0.1, 94 | help = "Displacement multiplier" 95 | )] 96 | multi_rec_cost: f32, 97 | 98 | //Base recombination cost 99 | #[clap( 100 | help_heading = "Alignment", 101 | short = 'R', 102 | long = "base-rec-cost", 103 | default_value_t = 4, 104 | help = "Recombination cost, determined with -r as R + r*(displacement_length)" 105 | )] 106 | base_rec_cost: i32, 107 | 108 | //Recombination band width 109 | #[clap( 110 | help_heading = "Alignment", 111 | short = 'B', 112 | long = "rec-band-width", 113 | default_value_t = 1.0, 114 | help = "Recombination band width" 115 | )] 116 | rec_band_width: f32, 117 | 118 | //Ambigous strand mode 119 | #[clap( 120 | help_heading = "Alignment", 121 | possible_values = &["true", "false"], 122 | default_value = "false", 123 | short = 's', 124 | long = "amb-strand", 125 | help = "Ambigous strand mode (experimental): try reverse complement if alignment score is too low" 126 | )] 127 | amb_strand: String, 128 | 129 | //set banding parameter, with f set the number of extra bases added (b+f*L) 130 | #[clap( 131 | help_heading = "Adaptive banded", 132 | default_value_t = 1, 133 | short = 'b', 134 | long = "extra-b", 135 | help = "First adaptive banding par, set < 0 to disable adaptive banded" 136 | )] 137 | extra_b: i32, 138 | 139 | #[clap( 140 | help_heading = "Adaptive banded", 141 | default_value_t = 0.01, 142 | short = 'f', 143 | long = "extra-f", 144 | help = "Second adaptive banding par, number of basis added to both side of the band = b+f*L, l = length of the sequence" 145 | )] 146 | extra_f: f32, 147 | } 148 | pub fn get_base_multi_recombination_cost() -> (i32, f32) { 149 | let args = Args::parse(); 150 | (args.base_rec_cost, args.multi_rec_cost) 151 | } 152 | 153 | pub fn get_match_mismatch() -> (i32, i32) { 154 | let args = Args::parse(); 155 | (args.match_score, -args.mismatch_score) 156 | } 157 | 158 | pub fn get_matrix_type() -> String { 159 | let args = Args::parse(); 160 | args.matrix 161 | } 162 | 163 | pub fn get_gap_open_gap_ext() -> (i32, i32) { 164 | let args = Args::parse(); 165 | (-args.gap_open, -args.gap_extension) 166 | } 167 | 168 | pub fn get_align_mode() -> i32 { 169 | let args = Args::parse(); 170 | args.alignment_mode 171 | } 172 | 173 | pub fn get_sequence_path() -> String { 174 | let args = Args::parse(); 175 | args.sequence_path 176 | } 177 | 178 | pub fn get_b_f() -> (f32, f32) { 179 | let args = Args::parse(); 180 | (args.extra_b as f32, args.extra_f) 181 | } 182 | 183 | pub fn get_graph_path() -> String { 184 | let args = Args::parse(); 185 | args.graph_path 186 | } 187 | 188 | pub fn get_amb_strand_mode() -> bool { 189 | let args = Args::parse(); 190 | let amb_strand = args.amb_strand.as_str(); 191 | matches!(amb_strand, "true") 192 | } 193 | 194 | pub fn get_out_file() -> String { 195 | let args = Args::parse(); 196 | args.out_file 197 | } 198 | 199 | pub fn get_recombination_band_width() -> f32 { 200 | let args = Args::parse(); 201 | args.rec_band_width 202 | } 203 | -------------------------------------------------------------------------------- /src/api.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use crate::{ 4 | gaf_output::GAFStruct, gap_global_abpoa, gap_local_poa, global_abpoa, graph, local_poa, 5 | score_matrix, sequences, utils, 6 | }; 7 | use handlegraph::hashgraph::HashGraph; 8 | 9 | /// Global alignment with adaptive band and simd instructions, score matrix can be set with create_score_matrix_f32. 10 | /// Only required parameters are a read as a &String and a graph as a &HandleGraph. 11 | pub fn align_global_no_gap( 12 | read: &String, 13 | graph: &HashGraph, 14 | sequence_name: Option<(&str, usize)>, 15 | score_matrix: Option>, 16 | bases_to_add: Option, 17 | ) -> GAFStruct { 18 | let read_for_alignment = sequences::build_align_string(read); 19 | let lnz_graph = graph::create_graph_struct(graph, false); 20 | let score_matrix_f32 = 21 | score_matrix.unwrap_or(score_matrix::create_score_matrix_match_mis_f32(2f32, -4f32)); 22 | let bases_to_add = (read.len() as f32 * bases_to_add.unwrap_or(0.1)) as usize; 23 | 24 | let r_values = utils::set_r_values(&lnz_graph.nwp, &lnz_graph.pred_hash, lnz_graph.lnz.len()); 25 | let hofp = utils::handle_pos_in_lnz_from_hashgraph(&lnz_graph.nwp, &graph, false); 26 | 27 | unsafe { 28 | let alignment = global_abpoa::exec_simd( 29 | &read_for_alignment, 30 | sequence_name.unwrap_or(("no_name", 1)), 31 | &lnz_graph, 32 | &score_matrix_f32, 33 | bases_to_add, 34 | false, 35 | &hofp, 36 | &r_values, 37 | ); 38 | alignment.1.unwrap() 39 | } 40 | } 41 | /// Global alignment with adaptive band,score matrix can be set with create_score_matrix_i32. 42 | /// Only required parameters are a read as a &String and a graph as a &HandleGraph. 43 | pub fn align_global_gap( 44 | read: &String, 45 | graph: &HashGraph, 46 | sequence_name: Option<(&str, usize)>, 47 | score_matrix: Option>, 48 | bases_to_add: Option, 49 | o: Option, 50 | e: Option, 51 | ) -> GAFStruct { 52 | let read_for_alignment = sequences::build_align_string(read); 53 | let lnz_graph = graph::create_graph_struct(graph, false); 54 | let score_matrix_i32 = 55 | score_matrix.unwrap_or(score_matrix::create_score_matrix_match_mis(2, -4)); 56 | let bases_to_add = (read.len() as f32 * bases_to_add.unwrap_or(0.1)) as usize; 57 | 58 | let hofp = utils::handle_pos_in_lnz_from_hashgraph(&lnz_graph.nwp, &graph, false); 59 | 60 | let alignment = gap_global_abpoa::exec( 61 | &read_for_alignment, 62 | sequence_name.unwrap_or(("no_name", 1)), 63 | &lnz_graph, 64 | &score_matrix_i32, 65 | o.unwrap_or(-10), 66 | e.unwrap_or(-6), 67 | bases_to_add, 68 | false, 69 | &hofp, 70 | ); 71 | alignment.1.unwrap() 72 | } 73 | /// Local alignment with simd instruction, score matrix can be set with create_score_matrix_f32. 74 | /// Only required parameters are a read as a &String and a graph as a &HandleGraph. 75 | /// Returns a GAFStruct 76 | pub fn align_local_no_gap( 77 | read: &String, 78 | graph: &HashGraph, 79 | sequence_name: Option<(&str, usize)>, 80 | score_matrix: Option>, 81 | ) -> GAFStruct { 82 | let read_for_alignment = sequences::build_align_string(read); 83 | let lnz_graph = graph::create_graph_struct(graph, false); 84 | let score_matrix_f32 = 85 | score_matrix.unwrap_or(score_matrix::create_score_matrix_match_mis_f32(2f32, -4f32)); 86 | let hofp = utils::handle_pos_in_lnz_from_hashgraph(&lnz_graph.nwp, &graph, false); 87 | 88 | unsafe { 89 | let alignment = local_poa::exec_simd( 90 | &read_for_alignment, 91 | sequence_name.unwrap_or(("no_name", 1)), 92 | &lnz_graph, 93 | &score_matrix_f32, 94 | false, 95 | &hofp, 96 | ); 97 | alignment.1.unwrap() 98 | } 99 | } 100 | /// Local gap alignment with adaptive band, score matrix can be set with create_score_matrix_i32. 101 | /// Only required parameters are a read as a &String and a graph as a &HandleGraph. 102 | pub fn align_local_gap( 103 | read: &String, 104 | graph: &HashGraph, 105 | sequence_name: Option<(&str, usize)>, 106 | score_matrix: Option>, 107 | o: Option, 108 | e: Option, 109 | ) -> GAFStruct { 110 | let read_for_alignment = sequences::build_align_string(read); 111 | let lnz_graph = graph::create_graph_struct(graph, false); 112 | let score_matrix_i32 = 113 | score_matrix.unwrap_or(score_matrix::create_score_matrix_match_mis(2, -4)); 114 | 115 | let hofp = utils::handle_pos_in_lnz_from_hashgraph(&lnz_graph.nwp, &graph, false); 116 | 117 | let alignment = gap_local_poa::exec( 118 | &read_for_alignment, 119 | sequence_name.unwrap_or(("no_name", 1)), 120 | &lnz_graph, 121 | &score_matrix_i32, 122 | o.unwrap_or(-10), 123 | e.unwrap_or(-6), 124 | false, 125 | &hofp, 126 | ); 127 | alignment.1.unwrap() 128 | } 129 | /// Returns a score matrix for gap alignments, can be set with match/mismatch score 130 | /// or by parsing a .mtx file 131 | pub fn create_score_matrix_i32( 132 | match_score: Option, 133 | mismatch_score: Option, 134 | matrix_file_path: Option<&str>, 135 | ) -> HashMap<(char, char), i32> { 136 | let score_matrix_i32; 137 | match matrix_file_path { 138 | Some(matrix_from_file) => { 139 | score_matrix_i32 = score_matrix::create_score_matrix_from_matrix_file(matrix_from_file); 140 | } 141 | _ => { 142 | score_matrix_i32 = score_matrix::create_score_matrix_match_mis( 143 | match_score.unwrap(), 144 | mismatch_score.unwrap(), 145 | ); 146 | } 147 | } 148 | score_matrix_i32 149 | } 150 | 151 | /// Returns a score matrix for non gap alignment that use simd instruction, can be set with match/mismatch score 152 | /// or by parsing a .mtx file 153 | pub fn create_score_matrix_f32( 154 | match_score: Option, 155 | mismatch_score: Option, 156 | matrix_type: Option<&str>, 157 | ) -> HashMap<(char, char), f32> { 158 | let score_matrix_i32 = create_score_matrix_i32(match_score, mismatch_score, matrix_type); 159 | let mut score_matrix_f32: HashMap<(char, char), f32> = HashMap::new(); 160 | for (k, v) in score_matrix_i32.iter() { 161 | score_matrix_f32.insert(*k, *v as f32); 162 | } 163 | score_matrix_f32 164 | } 165 | -------------------------------------------------------------------------------- /src/gap_local_poa.rs: -------------------------------------------------------------------------------- 1 | use std::{cmp::Ordering, collections::HashMap}; 2 | 3 | use crate::gaf_output::GAFStruct; 4 | use crate::{bitfield_path as bf, utils}; 5 | use crate::{gaf_output, graph::LnzGraph}; 6 | use bitvec::prelude::*; 7 | 8 | pub fn exec( 9 | sequence: &[char], 10 | seq_name: (&str, usize), 11 | graph: &LnzGraph, 12 | scores_matrix: &HashMap<(char, char), i32>, 13 | o: i32, 14 | e: i32, 15 | amb_mode: bool, 16 | hofp: &HashMap, 17 | ) -> (i32, Option) { 18 | let lnz = &graph.lnz; 19 | let nodes_with_pred = &graph.nwp; 20 | let pred_hash = &graph.pred_hash; 21 | 22 | let mut m = vec![vec![0; sequence.len()]; lnz.len()]; 23 | let mut x = vec![vec![0; sequence.len()]; lnz.len()]; 24 | let mut y = vec![vec![0; sequence.len()]; lnz.len()]; 25 | 26 | let mut path = vec![vec![bitvec![u16, Msb0; 0; 32]; sequence.len()]; lnz.len()]; 27 | let mut path_x = vec![vec![bitvec![u16, Msb0; 0; 32]; sequence.len()]; lnz.len()]; 28 | let mut path_y = vec![vec![bitvec![u16, Msb0; 0; 32]; sequence.len()]; lnz.len()]; 29 | 30 | let (mut best_row, mut best_col) = (0, 0); 31 | for i in 0..lnz.len() - 1 { 32 | for j in 0..sequence.len() { 33 | match (i, j) { 34 | (0, _) | (_, 0) => { 35 | path[i][j] = bf::set_path_cell(0, 'O'); 36 | path_x[i][j] = bf::set_path_cell(0, 'O'); 37 | path_y[i][j] = bf::set_path_cell(0, 'O'); 38 | } 39 | _ => { 40 | // set x 41 | let l_x = x[i][j - 1] + e; 42 | let l_m = m[i][j - 1] + o + e; 43 | let l_idx = i; 44 | let l = match l_x.cmp(&l_m) { 45 | Ordering::Greater => { 46 | path_x[i][j] = bf::set_path_cell(i, 'X'); 47 | l_x 48 | } 49 | _ => { 50 | path_x[i][j] = bf::set_path_cell(i, 'M'); 51 | l_m 52 | } 53 | }; 54 | x[i][j] = l; 55 | 56 | //set y and get d 57 | let mut d; 58 | let d_idx; 59 | 60 | let mut u; 61 | let u_idx; 62 | if !nodes_with_pred[i] { 63 | d = m[i - 1][j - 1] + scores_matrix.get(&(sequence[j], lnz[i])).unwrap(); 64 | d_idx = i - 1; 65 | 66 | let u_y = y[i - 1][j] + e; 67 | let u_m = m[i - 1][j] + o + e; 68 | u_idx = i - 1; 69 | 70 | u = match u_y.cmp(&u_m) { 71 | Ordering::Greater => { 72 | path_y[i][j] = bf::set_path_cell(u_idx, 'Y'); 73 | u_y 74 | } 75 | _ => { 76 | path_y[i][j] = bf::set_path_cell(u_idx, 'M'); 77 | u_m 78 | } 79 | }; 80 | y[i][j] = u; 81 | } else { 82 | let from_m; 83 | (d, d_idx) = get_best_d(&m, pred_hash.get(&i).unwrap(), j); 84 | (u, u_idx, from_m) = get_best_u(&m, &y, pred_hash.get(&i).unwrap(), j, o); 85 | d += scores_matrix.get(&(sequence[j], lnz[i])).unwrap(); 86 | u += e; 87 | y[i][j] = u; 88 | if from_m { 89 | path_y[i][j] = bf::set_path_cell(u_idx, 'M'); 90 | } else { 91 | path_y[i][j] = bf::set_path_cell(u_idx, 'Y'); 92 | } 93 | } 94 | 95 | // set m 96 | if d < 0 && l < 0 && u < 0 { 97 | m[i][j] = 0; 98 | path[i][j] = bf::set_path_cell(0, 'O'); 99 | } else { 100 | let (best_val, mut dir) = utils::get_max_d_u_l(d, u, l); 101 | if dir == 'D' && lnz[i] != sequence[j] { 102 | dir = 'd' 103 | } 104 | m[i][j] = best_val; 105 | path[i][j] = match dir { 106 | 'D' | 'd' => bf::set_path_cell(d_idx, dir), 107 | 'U' => bf::set_path_cell(u_idx, dir), 108 | _ => bf::set_path_cell(l_idx, dir), 109 | } 110 | } 111 | } 112 | } 113 | 114 | if m[i][j] > m[best_row][best_col] { 115 | best_row = i; 116 | best_col = j; 117 | } 118 | } 119 | } 120 | 121 | if seq_name.1 != 0 { 122 | let gaf_struct = gaf_output::gaf_of_gap_local_poa( 123 | &path, &path_x, &path_y, sequence, seq_name, best_row, best_col, amb_mode, hofp, 124 | ); 125 | (m[best_row][best_col], Some(gaf_struct)) 126 | } else { 127 | (m[best_row][best_col], None) 128 | } 129 | } 130 | 131 | fn get_best_d(m: &[Vec], p_arr: &[usize], j: usize) -> (i32, usize) { 132 | let mut d = 0; 133 | let mut d_idx = 0; 134 | let mut first = false; 135 | for p in p_arr { 136 | let current_d = m[*p][j - 1]; 137 | if first { 138 | first = false; 139 | d = current_d; 140 | d_idx = *p; 141 | } 142 | if current_d > d { 143 | d = current_d; 144 | d_idx = *p; 145 | } 146 | } 147 | (d, d_idx) 148 | } 149 | 150 | fn get_best_u( 151 | m: &[Vec], 152 | y: &[Vec], 153 | p_arr: &[usize], 154 | j: usize, 155 | o: i32, 156 | ) -> (i32, usize, bool) { 157 | let mut u_m = 0; 158 | let mut u_y = 0; 159 | let mut u_m_idx = 0; 160 | let mut u_y_idx = 0; 161 | let mut first = false; 162 | for p in p_arr { 163 | let current_u_m = m[*p][j] + o; 164 | let current_u_y = y[*p][j]; 165 | if first { 166 | first = false; 167 | u_m = current_u_m; 168 | u_y = current_u_y; 169 | u_m_idx = *p; 170 | u_y_idx = *p; 171 | } 172 | if current_u_m > u_m { 173 | u_m = current_u_m; 174 | u_m_idx = *p; 175 | } 176 | if current_u_y > u_y { 177 | u_y = current_u_y; 178 | u_y_idx = *p; 179 | } 180 | } 181 | 182 | if u_m > u_y { 183 | (u_m, u_m_idx, true) 184 | } else { 185 | (u_y, u_y_idx, false) 186 | } 187 | } 188 | 189 | #[cfg(test)] 190 | mod tests { 191 | use std::collections::HashMap; 192 | 193 | use bit_vec::BitVec; 194 | 195 | use crate::graph::LnzGraph; 196 | 197 | #[test] 198 | fn test_gap_local_poa_consider_substrings() { 199 | let s = vec!['$', 'A', 'A', 'C', 'C', 'C', 'A', 'A']; 200 | 201 | let lnz = vec!['$', 'G', 'G', 'C', 'C', 'C', 'G', 'G', 'F']; 202 | let mut nwp = BitVec::from_elem(lnz.len(), false); 203 | 204 | nwp.set(1, true); 205 | nwp.set(8, true); 206 | let mut pred_hash = HashMap::new(); 207 | pred_hash.insert(1, vec![0]); 208 | pred_hash.insert(8, vec![7]); 209 | let graph_struct = LnzGraph { 210 | lnz, 211 | nwp, 212 | pred_hash, 213 | }; 214 | let mut score_matrix = HashMap::new(); 215 | for c1 in ['A', 'C', 'G'] { 216 | for c2 in ['A', 'C', 'G'] { 217 | if c1 == c2 { 218 | score_matrix.insert((c1, c2), 1); 219 | } else { 220 | score_matrix.insert((c1, c2), -1); 221 | } 222 | } 223 | } 224 | let align_score = super::exec( 225 | &s, 226 | ("test", 0), 227 | &graph_struct, 228 | &score_matrix, 229 | -4, 230 | -2, 231 | false, 232 | &HashMap::new(), 233 | ); 234 | assert_eq!(align_score.0, 3); 235 | } 236 | 237 | #[test] 238 | fn gap_local_poa_consider_best_predecessor() { 239 | let s = vec!['$', 'A', 'A', 'C', 'C', 'C', 'A', 'A']; 240 | 241 | let lnz = vec!['$', 'G', 'G', 'G', 'C', 'C', 'C', 'G', 'G', 'F']; 242 | let mut nwp = BitVec::from_elem(lnz.len(), false); 243 | 244 | nwp.set(1, true); 245 | nwp.set(6, true); 246 | nwp.set(9, true); 247 | let mut pred_hash = HashMap::new(); 248 | pred_hash.insert(1, vec![0]); 249 | pred_hash.insert(6, vec![3]); 250 | pred_hash.insert(9, vec![8, 5]); 251 | let graph_struct = LnzGraph { 252 | lnz, 253 | nwp, 254 | pred_hash, 255 | }; 256 | let mut score_matrix = HashMap::new(); 257 | for c1 in ['A', 'C', 'G'] { 258 | for c2 in ['A', 'C', 'G'] { 259 | if c1 == c2 { 260 | score_matrix.insert((c1, c2), 1); 261 | } else { 262 | score_matrix.insert((c1, c2), -1); 263 | } 264 | } 265 | } 266 | let align_score = super::exec( 267 | &s, 268 | ("test", 0), 269 | &graph_struct, 270 | &score_matrix, 271 | -4, 272 | -2, 273 | false, 274 | &HashMap::new(), 275 | ); 276 | assert_eq!(align_score.0, 2); 277 | } 278 | } 279 | -------------------------------------------------------------------------------- /src/utils.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | cmp::{self, Ordering}, 3 | collections::HashMap, 4 | fs::{File, OpenOptions}, 5 | }; 6 | 7 | use bit_vec::BitVec; 8 | use handlegraph::{handle::Handle, handlegraph::HandleGraph, hashgraph::HashGraph}; 9 | 10 | use crate::args_parser; 11 | use std::io::{prelude::*, BufWriter}; 12 | use std::path::Path; 13 | 14 | #[inline] 15 | /// Needed for adaptive band settings, set the leftmost and rightmost position for each row of the dp matrix 16 | /// The algorithm used is the same as abPOA 17 | pub fn set_ampl_for_row( 18 | i: usize, 19 | p_arr: &[usize], 20 | r_val: usize, 21 | best_scoring_pos: &[usize], 22 | seq_len: usize, 23 | bta: usize, 24 | simd_version: bool, 25 | ) -> (usize, usize) { 26 | let ms; 27 | let me; 28 | if i == 0 { 29 | ms = 0; 30 | me = 0; 31 | } else if p_arr.is_empty() { 32 | let pl = best_scoring_pos[i - 1]; 33 | ms = pl + 1; 34 | me = pl + 1; 35 | } else { 36 | let mut pl = 0; 37 | let mut pr = 0; 38 | let mut first = true; 39 | for p in p_arr.iter() { 40 | let current_best = best_scoring_pos[*p]; 41 | if first { 42 | pl = current_best; 43 | pr = current_best; 44 | first = false; 45 | } 46 | if current_best < pl { 47 | pl = current_best; 48 | } 49 | if current_best > pr { 50 | pr = current_best; 51 | } 52 | } 53 | ms = pl + 1; 54 | me = pr + 1; 55 | } 56 | let tmp_bs = cmp::min(ms as i32, (seq_len as i32 - r_val as i32) - bta as i32); 57 | let band_start = if tmp_bs < 0 { 58 | 0 59 | } else { 60 | cmp::max(0, tmp_bs as usize) 61 | }; 62 | let band_end = if seq_len > r_val { 63 | cmp::min(seq_len, cmp::max(me, seq_len - r_val) + bta) 64 | } else { 65 | cmp::min(seq_len, me + bta) 66 | }; 67 | if simd_version { 68 | set_left_right_x64(band_start, band_end, seq_len) 69 | } else { 70 | (band_start, band_end) 71 | } 72 | } 73 | 74 | fn set_left_right_x64(left: usize, right: usize, seq_len: usize) -> (usize, usize) { 75 | let mut new_right = right; 76 | let mut new_left = left; 77 | while (new_right - new_left) % 8 != 0 { 78 | if (new_right - new_left) % 2 == 0 && new_right < seq_len { 79 | new_right += 1; 80 | } else if new_left > 0 { 81 | new_left -= 1; 82 | } else { 83 | break; 84 | } 85 | } 86 | if new_left == 0 { 87 | while (new_right - 1) % 8 != 0 && new_right < seq_len { 88 | new_right += 1; 89 | } 90 | } 91 | if new_right == seq_len { 92 | while (new_right - new_left) % 8 != 0 && new_left > 1 { 93 | new_left -= 1 94 | } 95 | } 96 | 97 | (new_left, new_right) 98 | } 99 | 100 | /// Set R score for each node of the graph, this is done before the dp algorithm. 101 | /// R represent the most likely distance of each node to the last node of the graph and is used 102 | /// in order to compute the band size for this node in the DP matrix 103 | pub fn set_r_values( 104 | nwp: &bit_vec::BitVec, 105 | pred_hash: &HashMap>, 106 | lnz_len: usize, 107 | ) -> Vec { 108 | let mut r_values: Vec = vec![-1; lnz_len]; 109 | r_values[lnz_len - 1] = 0; 110 | for p in pred_hash.get(&(lnz_len - 1)).unwrap() { 111 | r_values[*p] = 0; 112 | } 113 | for i in (1..lnz_len - 1).rev() { 114 | if r_values[i] == -1 || r_values[i] > r_values[i + 1] + 1 { 115 | r_values[i] = r_values[i + 1] + 1; 116 | } 117 | if nwp[i] { 118 | for p in pred_hash.get(&i).unwrap() { 119 | if r_values[*p] == -1 || r_values[*p] > r_values[i] + 1 { 120 | r_values[*p] = r_values[i] + 1; 121 | } 122 | } 123 | } 124 | } 125 | r_values.iter().map(|x| *x as usize).collect() 126 | } 127 | 128 | #[inline] 129 | pub fn get_max_d_u_l(d: i32, u: i32, l: i32) -> (i32, char) { 130 | match d.cmp(&u) { 131 | Ordering::Less => match u.cmp(&l) { 132 | Ordering::Less => (l, 'L'), 133 | _ => (u, 'U'), 134 | }, 135 | _ => match d.cmp(&l) { 136 | Ordering::Less => (l, 'L'), 137 | _ => (d, 'D'), 138 | }, 139 | } 140 | } 141 | 142 | /// Set for each node of the LnzGraph the handle id in the .gfa file, 143 | /// this enable the creation of the gaf output with same nodes as the original .gfa file 144 | pub fn create_handle_pos_in_lnz( 145 | nwp: &BitVec, 146 | file_path: &str, 147 | amb_mode: bool, 148 | ) -> HashMap { 149 | let sorted_handles = crate::graph::get_sorted_handles(file_path, amb_mode); 150 | let mut curr_handle_idx = 0; 151 | let mut handle_of_lnz_pos = HashMap::new(); 152 | for i in 1..nwp.len() - 1 { 153 | if nwp[i] { 154 | curr_handle_idx += 1; 155 | } 156 | handle_of_lnz_pos.insert( 157 | i, 158 | sorted_handles[(curr_handle_idx - 1) as usize] 159 | .id() 160 | .to_string(), 161 | ); 162 | } 163 | handle_of_lnz_pos.insert(0, String::from("-1")); 164 | handle_of_lnz_pos 165 | } 166 | 167 | /// Same as create_handle_pos_in_lnz, but works with an HashGraph and a LnzGraph instead of 168 | /// a .gfa file 169 | pub fn handle_pos_in_lnz_from_hashgraph( 170 | nwp: &BitVec, 171 | graph: &HashGraph, 172 | amb_mode: bool, 173 | ) -> HashMap { 174 | let mut sorted_handles: Vec = graph.handles_iter().collect(); 175 | sorted_handles.sort(); 176 | if amb_mode { 177 | sorted_handles.reverse(); 178 | sorted_handles = sorted_handles 179 | .iter() 180 | .map(|h| h.flip()) 181 | .collect::>(); 182 | } 183 | let mut curr_handle_idx = 0; 184 | let mut handle_of_lnz_pos = HashMap::new(); 185 | for i in 1..nwp.len() - 1 { 186 | if nwp[i] { 187 | curr_handle_idx += 1; 188 | } 189 | handle_of_lnz_pos.insert( 190 | i, 191 | sorted_handles[(curr_handle_idx - 1) as usize] 192 | .id() 193 | .to_string(), 194 | ); 195 | } 196 | handle_of_lnz_pos.insert(0, String::from("-1")); 197 | handle_of_lnz_pos 198 | } 199 | 200 | pub fn write_gaf(gaf_out: &str, number: usize) { 201 | let out_file = args_parser::get_out_file(); 202 | if out_file == "standard output" { 203 | println!("{}", gaf_out) 204 | } else { 205 | let file_name = Path::new(&out_file); 206 | let file = if file_name.exists() && number != 1 { 207 | OpenOptions::new() 208 | .write(true) 209 | .append(true) 210 | .open(file_name) 211 | .unwrap() 212 | } else { 213 | File::create(file_name).expect("unable to create file") 214 | }; 215 | 216 | let f = &mut BufWriter::new(&file); 217 | writeln!(f, "{}", gaf_out).expect("error in writing"); 218 | } 219 | } 220 | 221 | pub fn get_path_len_start_end( 222 | handles_nodes_id: &Vec, 223 | start: usize, 224 | end: usize, 225 | path_len: usize, 226 | ) -> (usize, usize, usize) { 227 | let mut path_start = 0; 228 | if start > 0 { 229 | let first_node_id = handles_nodes_id[start]; 230 | let mut counter = start - 1; 231 | while counter > 0 && handles_nodes_id[counter] == first_node_id { 232 | counter -= 1; 233 | path_start += 1; 234 | } 235 | } 236 | let path_end = if path_len > 0 { 237 | path_start + path_len - 1 238 | } else { 239 | 0 240 | }; 241 | 242 | let mut end_offset = 0; 243 | if end > 0 { 244 | let last_node_id = handles_nodes_id[end]; 245 | let mut counter = end + 1; 246 | while counter < handles_nodes_id.len() - 1 && handles_nodes_id[counter] == last_node_id { 247 | counter += 1; 248 | end_offset += 1; 249 | } 250 | } 251 | 252 | let path_len = path_end + end_offset + 1; 253 | (path_len, path_start, path_end) 254 | } 255 | 256 | pub fn get_rec_path_len_start_end( 257 | handles_nodes_id: &Vec, 258 | fen: usize, 259 | rsn: usize, 260 | start: usize, 261 | end: usize, 262 | forw_path_length: usize, 263 | rev_path_length: usize, 264 | ) -> (usize, usize, usize) { 265 | //forward path info 266 | let mut path_start = 0; 267 | if start > 0 { 268 | let first_node_id = handles_nodes_id[start]; 269 | let mut counter = start - 1; 270 | while counter > 0 && handles_nodes_id[counter] == first_node_id { 271 | counter -= 1; 272 | path_start += 1; 273 | } 274 | } 275 | 276 | let forw_path_end = if forw_path_length > 0 { 277 | path_start + forw_path_length - 1 278 | } else { 279 | 0 280 | }; 281 | 282 | let mut forw_end_offset = 0; 283 | if fen > 0 { 284 | let last_node_id = handles_nodes_id[fen]; 285 | let mut counter = fen + 1; 286 | while counter < handles_nodes_id.len() - 1 && handles_nodes_id[counter] == last_node_id { 287 | counter += 1; 288 | forw_end_offset += 1; 289 | } 290 | } 291 | let forw_path_len = forw_path_end + forw_end_offset + 1; 292 | 293 | //reverse path info 294 | let mut rev_path_start = 0; 295 | if rsn > 0 { 296 | let first_node_id = handles_nodes_id[rsn]; 297 | let mut counter = rsn - 1; 298 | while counter > 0 && handles_nodes_id[counter] == first_node_id { 299 | counter -= 1; 300 | rev_path_start += 1; 301 | } 302 | } 303 | 304 | let rev_path_end = if rev_path_length > 0 { 305 | rev_path_start + rev_path_length - 1 306 | } else { 307 | 0 308 | }; 309 | let path_end = forw_path_len + rev_path_end; 310 | let mut end_offset = 0; 311 | if end > 0 { 312 | let last_node_id = handles_nodes_id[end]; 313 | let mut counter = end + 1; 314 | while counter < handles_nodes_id.len() - 1 && handles_nodes_id[counter] == last_node_id { 315 | counter += 1; 316 | end_offset += 1; 317 | } 318 | } 319 | let rev_path_len = rev_path_end + end_offset + 1; 320 | let path_len = forw_path_len + rev_path_len; 321 | 322 | (path_len, path_start, path_end) 323 | } 324 | -------------------------------------------------------------------------------- /src/graph.rs: -------------------------------------------------------------------------------- 1 | use bit_vec::BitVec; 2 | use gfa::{gfa::*, parser::GFAParser}; 3 | use handlegraph::{ 4 | handle::{Direction, Handle, NodeId}, 5 | handlegraph::HandleGraph, 6 | hashgraph::HashGraph, 7 | }; 8 | use std::collections::HashMap; 9 | /// Create a LnzGraph from a .gfa file. 10 | /// Important: nodes must be in topological order 11 | pub fn read_graph(file_path: &str, amb_mode: bool) -> LnzGraph { 12 | let parser = GFAParser::new(); 13 | let gfa: GFA = parser.parse_file(file_path).unwrap(); 14 | 15 | let graph: HashGraph = HashGraph::from_gfa(&gfa); 16 | create_graph_struct(&graph, amb_mode) 17 | } 18 | 19 | /// Representation of a HandleGraph inside POA. 20 | /// lnz represents the label of each node 21 | /// nwp(i) is true if node i has multiple predecessor 22 | /// pred_hash contains the predecessor of each node with more than one predecessor 23 | pub struct LnzGraph { 24 | pub lnz: Vec, 25 | pub nwp: BitVec, 26 | pub pred_hash: HashMap>, 27 | } 28 | 29 | /// Transform an &HashGraph into a LnzGraph, amb_mode returns the rev and compl of the graph, this is needed only 30 | /// for showing a better output. 31 | pub fn create_graph_struct(graph: &HashGraph, amb_mode: bool) -> LnzGraph { 32 | let mut sorted_handles: Vec = graph.handles_iter().collect(); 33 | sorted_handles.sort(); 34 | if amb_mode { 35 | sorted_handles.reverse(); 36 | sorted_handles = sorted_handles 37 | .iter() 38 | .map(|h| h.flip()) 39 | .collect::>(); 40 | } 41 | let mut last_index = 1; 42 | let mut visited_node: HashMap = HashMap::new(); 43 | let mut last_nodes: HashMap = HashMap::new(); 44 | let mut handles_id_position = HashMap::new(); 45 | let mut linearization: Vec = vec!['$']; 46 | // concateno tutte le sequenze 47 | for handle in &sorted_handles { 48 | let start_pos = last_index; 49 | for c in graph.sequence(*handle) { 50 | linearization.push(c as char); 51 | last_index += 1; 52 | } 53 | let last_pos = last_index - 1; // last position included in position of current handle in lnz 54 | handles_id_position.insert(handle.id(), (start_pos, last_pos)); 55 | visited_node.insert(handle.id(), last_index - 1); 56 | last_nodes.insert(handle.id(), last_index - 1); 57 | } 58 | let mut nodes_with_predecessor = BitVec::from_elem(linearization.len() + 1, false); 59 | let mut predecessor_hash: HashMap> = HashMap::new(); 60 | 61 | // per ogni sequenza guardo i predecessori e aggiorno valore corrispondente 62 | for handle in &sorted_handles { 63 | if visited_node.get(&handle.id()).is_some() { 64 | if graph 65 | .handle_edges_iter(*handle, Direction::Left) 66 | .into_iter() 67 | .count() 68 | == 0 69 | { 70 | let h_last_idx = get_idx(&visited_node, handle.id()); 71 | let handle_start_pos = h_last_idx as usize - graph.sequence(*handle).len() + 1; 72 | nodes_with_predecessor.set(handle_start_pos, true); 73 | update_hash(&mut predecessor_hash, handle_start_pos, 0); 74 | } 75 | for predecessor in graph.handle_edges_iter(*handle, Direction::Left) { 76 | let pred_last_idx = get_idx(&visited_node, predecessor.id()); 77 | let h_last_idx = get_idx(&visited_node, handle.id()); 78 | let handle_start_pos = h_last_idx as usize - graph.sequence(*handle).len() + 1; 79 | last_nodes.remove(&predecessor.id()); 80 | nodes_with_predecessor.set(handle_start_pos, true); 81 | update_hash( 82 | &mut predecessor_hash, 83 | handle_start_pos, 84 | pred_last_idx as usize, 85 | ); 86 | } 87 | } 88 | } 89 | 90 | set_last_node( 91 | &mut linearization, 92 | &mut nodes_with_predecessor, 93 | &mut predecessor_hash, 94 | &last_nodes, 95 | ); 96 | 97 | LnzGraph { 98 | lnz: linearization, 99 | nwp: nodes_with_predecessor, 100 | pred_hash: predecessor_hash, 101 | } 102 | } 103 | 104 | fn update_hash(hashmap: &mut HashMap>, k: usize, val: usize) { 105 | if let Some(arr) = hashmap.get_mut(&k) { 106 | arr.push(val); 107 | } else { 108 | hashmap.insert(k, vec![val]); 109 | } 110 | } 111 | 112 | fn set_last_node( 113 | linearization: &mut Vec, 114 | nodes_with_predecessor: &mut BitVec, 115 | predecessor_hash: &mut HashMap>, 116 | last_nodes: &HashMap, 117 | ) { 118 | linearization.insert(linearization.len(), 'F'); 119 | nodes_with_predecessor.set(linearization.len() - 1, true); 120 | for (_, idx) in last_nodes.iter() { 121 | update_hash(predecessor_hash, linearization.len() - 1, *idx as usize); 122 | } 123 | } 124 | 125 | fn get_idx(visited_node: &HashMap, pred_id: NodeId) -> i32 { 126 | *visited_node.get(&pred_id).unwrap() 127 | } 128 | pub fn get_sorted_handles(file_path: &str, amb_mode: bool) -> Vec { 129 | let parser = GFAParser::new(); 130 | let gfa: GFA = parser.parse_file(file_path).unwrap(); 131 | 132 | let graph: HashGraph = HashGraph::from_gfa(&gfa); 133 | let mut sorted_handles: Vec = graph.handles_iter().collect(); 134 | sorted_handles.sort(); 135 | if amb_mode { 136 | sorted_handles.reverse(); 137 | sorted_handles = sorted_handles 138 | .iter() 139 | .map(|h| h.flip()) 140 | .collect::>(); 141 | } 142 | sorted_handles 143 | } 144 | /// DEMO, used by pathwise_alignment 145 | /// Returns, for each node, the paths the node belong to. 146 | pub fn create_nodes_paths(file_path: &str) -> Vec> { 147 | let parser = GFAParser::new(); 148 | let gfa: GFA = parser.parse_file(file_path).unwrap(); 149 | 150 | let graph: HashGraph = HashGraph::from_gfa(&gfa); 151 | let paths = &graph.paths; 152 | let mut every_path = vec![]; 153 | for path in paths.keys() { 154 | every_path.push(*path as usize) 155 | } 156 | 157 | let mut sorted_handles: Vec = graph.handles_iter().collect(); 158 | sorted_handles.sort(); 159 | let mut paths_node: Vec> = Vec::new(); 160 | let mut current_position = 0; 161 | paths_node.insert(0, every_path); 162 | current_position += 1; 163 | for handle in sorted_handles.iter() { 164 | let handle_length = &graph.sequence(*handle).len(); 165 | for i in current_position..current_position + handle_length { 166 | paths_node.insert(i, vec![]); 167 | for (path_id, path) in paths.iter() { 168 | if path.nodes.contains(handle) { 169 | paths_node[i].push(*path_id as usize); 170 | } 171 | } 172 | } 173 | current_position += handle_length; 174 | } 175 | let mut every_path = vec![]; 176 | for path in paths.keys() { 177 | every_path.push(*path as usize) 178 | } 179 | paths_node.insert(current_position, every_path); 180 | for p in paths_node.iter_mut() { 181 | p.sort_unstable() 182 | } 183 | paths_node 184 | } 185 | 186 | #[cfg(test)] 187 | mod tests { 188 | use std::collections::HashMap; 189 | 190 | use handlegraph::{handle::Edge, hashgraph::HashGraph, mutablehandlegraph::MutableHandleGraph}; 191 | 192 | #[test] 193 | fn graph_struct_correctly_created() { 194 | let mut graph: HashGraph = HashGraph::new(); 195 | let h1 = graph.append_handle("A".as_bytes()); 196 | let h2 = graph.append_handle("T".as_bytes()); 197 | let h3 = graph.append_handle("C".as_bytes()); 198 | let h4 = graph.append_handle("G".as_bytes()); 199 | 200 | graph.create_edge(&Edge(h1, h2)); 201 | graph.create_edge(&Edge(h2, h3)); 202 | graph.create_edge(&Edge(h3, h4)); 203 | 204 | let graph_struct = super::create_graph_struct(&graph, false); 205 | assert!(graph_struct.nwp[1]); 206 | assert!(graph_struct.nwp[5]); 207 | assert_eq!(graph_struct.pred_hash.get(&1).unwrap()[0], 0); 208 | assert_eq!(graph_struct.pred_hash.get(&5).unwrap()[0], 4); 209 | assert_eq!(graph_struct.lnz, ['$', 'A', 'T', 'C', 'G', 'F']); 210 | } 211 | #[test] 212 | fn rev_graph_struct_correctly_created() { 213 | let mut graph: HashGraph = HashGraph::new(); 214 | let h1 = graph.append_handle("A".as_bytes()); 215 | let h2 = graph.append_handle("T".as_bytes()); 216 | let h3 = graph.append_handle("C".as_bytes()); 217 | let h4 = graph.append_handle("G".as_bytes()); 218 | 219 | graph.create_edge(&Edge(h1, h2)); 220 | graph.create_edge(&Edge(h2, h3)); 221 | graph.create_edge(&Edge(h3, h4)); 222 | 223 | let graph_struct = super::create_graph_struct(&graph, true); 224 | assert!(graph_struct.nwp[1]); 225 | assert!(graph_struct.nwp[5]); 226 | assert_eq!(graph_struct.pred_hash.get(&1).unwrap()[0], 0); 227 | assert_eq!(graph_struct.pred_hash.get(&5).unwrap()[0], 4); 228 | assert_eq!(graph_struct.lnz, ['$', 'C', 'G', 'A', 'T', 'F']); 229 | } 230 | #[test] 231 | fn handle_id_from_lnz_pos_and_sorted_handles() { 232 | let mut graph: HashGraph = HashGraph::new(); 233 | let h1 = graph.append_handle("A".as_bytes()); 234 | let h2 = graph.append_handle("TA".as_bytes()); 235 | let h3 = graph.append_handle("CGG".as_bytes()); 236 | let h4 = graph.append_handle("G".as_bytes()); 237 | let h5 = graph.append_handle("TCCCC".as_bytes()); 238 | 239 | graph.create_edge(&Edge(h1, h2)); 240 | graph.create_edge(&Edge(h1, h3)); 241 | graph.create_edge(&Edge(h3, h4)); 242 | graph.create_edge(&Edge(h3, h5)); 243 | 244 | let gs = super::create_graph_struct(&graph, false); 245 | let mut curr_hand = -1; 246 | let mut handle_pos = HashMap::new(); 247 | for i in 1..gs.lnz.len() - 1 { 248 | if gs.nwp[i] { 249 | curr_hand += 1; 250 | } 251 | handle_pos.insert(i, curr_hand); 252 | } 253 | assert_eq!(handle_pos.get(&1).unwrap(), &0); 254 | assert_eq!(handle_pos.get(&2).unwrap(), &1); 255 | assert_eq!(handle_pos.get(&4).unwrap(), &2); 256 | assert_eq!(handle_pos.get(&6).unwrap(), &2); 257 | assert_eq!(handle_pos.get(&7).unwrap(), &3); 258 | assert_eq!(handle_pos.get(&12).unwrap(), &4); 259 | } 260 | } 261 | -------------------------------------------------------------------------------- /benches/recgraph_benchmark.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; 4 | use recgraph::{global_abpoa, graph, local_poa, score_matrix, utils}; 5 | 6 | fn bench_local_simd_no_simd(c: &mut Criterion) { 7 | let seq = "TGATATAAAGAAATGAGATTTATTGCCTTGTGGGGGGAAGGGATGTGGTTGTGATAGGCAGGCCACTCTGGGATCCCTGGGATGCAAGCCCAGGGACAGCAGAGTCCCCAGGTGGGAAATCTACACACACACCCCAGGGATGTCCCAGAGACTTCTACCCTAAGAGGAGATCCTGGGCAGGATGTGAGAAATCTGAGCATCCTCTGTTTGGATGGCCGAAGCTGCTGGCATCAAACTCTGGTCTGGAAGAATCAGTCTGGGGGAGAGACAGGGATGGAGGAAAGGCATCAGGGGATCCATCCTCCTCCTCCTTCTCCTCCTCCTCCTCCCCCACAAAGGCCTTGCTCGCCCTGCCTGCACCACACCCTGCAGAAGTTGATCTCTCCTTGTTCCCAAATCATCTCCAAGCACCCTTCCTACAGCACCCCATGATTCCTTTTTTCACTCAAAGCAATTCTTGTGACCCATAACTGTGTGTGTGTAACTGGGTCCCCAACTGGGAAGATGTGCCCCCATGGTGCTGGATACAGGCCCCCACACCCAAGGGCCTGAGGATCGCTATATGTCCCCCCATGCCACAAAATAATCCTGACACATGCACGCATGCACCACTGTATCTGGCTCCCACAGGCTCACCCGCCCCCTCCAGATGACATACCACCTGAGCAAGGCTTCCGGAAGTAGATGATGAGAACAATGCCCACGATGATGCCCAGCACACCCAGGCCAAAGGCCACGCCACACAGCACATTCTCCAGCAGATCTGAGGGCAGTGCGTTCCGGGGTACTGGAGGAAATGAGTGGCTCAGCCTGGGGACCTAGTTAGGGAGCCTCCCACCCAGGGAAATGACGTGGGTGTCTGGGATGACATGGGAGACTGGGATGGGCTTAGGGTAGGAATGGACTAAACAAGGTACCAGTGGAGAAAGAAGCCTCCTCCCATGGATCTATCCCTTTTTGCCCCCAAAAGGACCAGAATTCCAGGGAGAAAGCCTCACCCCAATAGGCAATTGCTGTGTAGCGGTCAATTTCGTGAGTCACAATGCAGGAGAAAATGTCAGAAGGTTCTGGTGTGAAGTTTAAGTAAGAAAAGGCCTGGAAGCTGAGTCCATCGACAGCTGAGACAAAAGTAGGCCCAAATCCTTCCACAGGGACGGAATGATGCTGCCAGTTCACTGTCAGCATGGGTGGGAAGAGATTACTGACAAAACAGACCAAAGTGTTGGGCTTGCCAAACTCCAGGGGCTTCAGCGTGAACACTTCAGCGATAGGAAACCCTGGTGGGGGGATTGAAGTGTAGGGGGAAAAAGAGACTAGTTTAGATGGTATCTCTGTGTTTGGAGGGGCCATGGCATATGGAGGGGAGGGCAGAGAAGAACACAGTGGGTCAGGCTTTGGGAGACAGAGATGAGCGAGGAGCTGGGCTCTGAAGGGAGGTCTTCTTCCAGGCAAGGACTGCAGCTAGACATAGAAGCAGAGCCAGATCCAGGCTACTCTGGACCCCTCCACCATGACTTCCTTCAGCACTTCCTGTCTAGAGCTCACATTGATGTCTAACCATGCACTGTCTTCTCACTAAGACATAGTCACGTCATCAGATATTTCCACTCTTCCCATCCATCTTGCTGGGCATAGTAGCACAAGTGTTAATATTCAGTAGGTATCAGTTGGTACCTGTTGAATTCATCACATTCAATACATAGTTCTGAATGCCTACTACATGCTAGGTACTTCGGCCCACCAAAAGAACACAGGGTGCAGACCAAGGCTGGTGGAAAAATTAAGGTGATGAAGAGAACCAGAAAGTATTTGAGATGGGGAGCTGGTATCAAGGGGAATTATTCAGTGTACAGATCAATGAGGTTAATGCAGCCCTCCTCCCTTCACTCCCCAGAAAACTCCTGACCTCTGGACACCGGGATTTTCCCATCAAGTTTTGGCCCTATTTGCTGGATCATCCACTCGCAGAACTCTTTGTCAAATAAAATGGCAGGAGCATCTCCCTGTTCCTGAGCCCAGTCAGCAAATTCGGGCAGGCGAGGCACCCGAGTGTTCTGGGAAAAGTCGAAGAAGAAAAGCTGGTCCTCGTCGTAGGCCTCAGAGAGTCCCACACTGGGACTCCCATCCTGGCAGTACACTGTGTGCAGGAATGTGTGGTTTTGCAGGTCATCTGGCCACATTGGAGTAGGAGCTGCAAAGGACACAGGGTGAGGTTCAGGGAGGTGGGAGCCTTCTCCTCCAACTTAAAAAACAGCAAGGTGGGGCTAGGCGCAGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCAAGGTGGGTGGATCATGAGGTCAGGAGTTTGAGACCAGCCTGGCCAGCATGGTGAAACTCCGTCTCTACTAAAAATACAAAAAAGTAGCTGGGCATGTTGGCATGCGCCTGTAGCTACTCGGGAGGCTGAGGGAGGAGAATTGCTTGAACCAGGGAGGCAGAGGTTGCCGGGAGCTAAGATTAAGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTCTGTCTCAAAACAAAACAACAAAAACAAGCAAGGCCTGCTTAAGGAGCGTGGGCTGAGGTGAGACCCTTTCCTGTGTCTGTTATTTAGACTCCCCCTCCCAAAGGGGGTGAAGAACAAATTATGGCATCTCTCCAAGCTTCCCCTGCCTATAAAAAGGCCAGTTGGCAAAAGTAAAGAGTTCTACTTTCTAAAGTGACAGATTCAGGCCAGGCATGGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGCAGGCAGATTGCTTGAGCCCAGGAGTTCAAGACCAACCTGGGCAACACAGCGAGACCCTGTCTCTACAAAAAATACAAAAACTTAGCCAGGTGTGGTGGCAAACACCTGTGGTCTCAGCTACTCTGGAGGCTGAGGCAGGAGGATTGCTTGTGCCTAGGAAGTTGGGGCTGCAGTGAGCCATGATTGTGCCACTGGACTCCAGCCCAGGTGACAGAATGAGCCCGTCTCAAAAAATATATATATAAAGGCCGGGCGCGGTGGCTCAAGCTTGTAATCCCAGCACTTTGGGAGGCCAAGGCGGGTGGATCACCTGAGGTCAGGAGTTTGAGACCAGCCTGGCAAACATGATGAAACCCCATCTCTACTAAAAATACAAAAATCAGCTGGGTGTGGTGGCATGCGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAGTCTCTTGAACCCCAGAGGCAGGGGTTGCAGGGAGCCGAGATCACGTCACTGCACTCTACCCTGGGTGACAGAGCGAGATGCCGTGTCAAAAAAAATAAATTAAATCAAATAAAAAATTTAAAAATGTATATATATAAAATAAAGTGACAGATTCAGAGTCACTGTTCATTGTGTGTTTGGGGGCTGCACAAAGACACCTAGCCAAAGAAGCAAGTGAAAGCCTGCATTCTGCTCACCATGCCATACATCCTGGCATAGGGCTGTATCCTCCCAAAGGGGATTCCTTTGTCTAATTCATACCAGGCCACTGTATTGACTAGAGAAGGCCATGGATGGGTTTCTCACTCTTAGAAGGGAAAGAGGAGGAATGGCTACAGCCTCCCCAAGCCATAGATGGGACTGCCTCCCACTATCCCCAGACACAAATGGTAAATTGGAAAACCTGTATCCAGACATTTCTTCAGCCACTTCATTGGCACCAAGCGTCTCTCAAAATGTCTTCTGTTCCTTAACCTACCAGGCCTCCCAAAGACAGCAATGGGAGAAGTGACCCCATAACTGCATAAAATAATCCCTCTTCTTTGAAGCTCTTGGCAGGAATCGCTCAGCCAGCAGGAAACCTTTAACCCAATACCCAGAAAAACAGACATTTGGAGGAAGAGGGATCTTCCAGATTATTCTTCCATTCTGCCCCATCCTCTACAGAGAAGGAAACTAAGACACTTTTCAAGAATCACAAGATAAGTTAATGATAGAAAGCAGAGTAGAATCTTGAGTGGAGGAGTGAAAATAACATTCACTTTGTTCAAATCCCAGCTCTACCACTTTCCAATGGTGTGAACTTGCACAAATAACTCTGAGTCTCATTTTCTTCATTTGTAAAATGGAGAGAACAATCTCCGCTTCAAGAGATTGTCTTAAATGGAACATGCAAAGCATCACTGATATCGTTTACCAACCACACATAGCAGCTGTCTTTCCCCACTCCCCTGTTGTTTCCACTGCCTCATAAGACTTCCCACCACTCACAAAGCACAGCGCTTTTCCTCACAAAGCTGAGTGGGCTCCCTAGGTTCAGGATGGAAGTAAATAGGAGTACCATCTTACCTTCAGGGACGGCCCAGGAGTGGGGTAGCAGCCACAGAAGTGGTAACATCTGTAGCAGCGCAGCTCCTTGGTTCTGTTCATGACCCATACCTTCTTGCCACACAGTAGGTAGGAGCTACCAACCCAGCCAACCCAGCTTCCCCAACTCCCTCCCCGAGAGGGTGGCCTTAGAT"; 8 | let mut sequence = seq.chars().collect::>(); 9 | sequence.insert(0, '$'); 10 | 11 | let graph_struct = graph::read_graph( 12 | &"tests/DMA-3108.fa.353ea42.34ee7b1.1576367.smooth.fix.gfa", 13 | false, 14 | ); 15 | 16 | let score_matrix = score_matrix::create_score_matrix_match_mis(2, -4); 17 | let hofp = HashMap::new(); 18 | let mut scores_f32 = HashMap::new(); 19 | for (k, v) in score_matrix.iter() { 20 | scores_f32.insert(*k, *v as f32); 21 | } 22 | let mut group = c.benchmark_group("Local POA"); 23 | for i in [1, 2, 3] { 24 | unsafe { 25 | group.bench_with_input(BenchmarkId::new(" Simd", i), &i, |b, _i| { 26 | b.iter(|| { 27 | local_poa::exec_simd( 28 | black_box(&sequence), 29 | black_box(("name", 0)), 30 | black_box(&graph_struct), 31 | black_box(&scores_f32), 32 | false, 33 | &hofp, 34 | ) 35 | }) 36 | }); 37 | 38 | group.bench_with_input(BenchmarkId::new("No Simd", i), &i, |b, _i| { 39 | b.iter(|| { 40 | local_poa::exec( 41 | black_box(&sequence), 42 | black_box(("name", 0)), 43 | black_box(&graph_struct), 44 | black_box(&score_matrix), 45 | false, 46 | &hofp, 47 | ) 48 | }) 49 | }); 50 | } 51 | } 52 | } 53 | 54 | fn bench_global_simd_no_simd(c: &mut Criterion) { 55 | let seq = "TGATATAAAGAAATGAGATTTATTGCCTTGTGGGGGGAAGGGATGTGGTTGTGATAGGCAGGCCACTCTGGGATCCCTGGGATGCAAGCCCAGGGACAGCAGAGTCCCCAGGTGGGAAATCTACACACACACCCCAGGGATGTCCCAGAGACTTCTACCCTAAGAGGAGATCCTGGGCAGGATGTGAGAAATCTGAGCATCCTCTGTTTGGATGGCCGAAGCTGCTGGCATCAAACTCTGGTCTGGAAGAATCAGTCTGGGGGAGAGACAGGGATGGAGGAAAGGCATCAGGGGATCCATCCTCCTCCTCCTTCTCCTCCTCCTCCTCCCCCACAAAGGCCTTGCTCGCCCTGCCTGCACCACACCCTGCAGAAGTTGATCTCTCCTTGTTCCCAAATCATCTCCAAGCACCCTTCCTACAGCACCCCATGATTCCTTTTTTCACTCAAAGCAATTCTTGTGACCCATAACTGTGTGTGTGTAACTGGGTCCCCAACTGGGAAGATGTGCCCCCATGGTGCTGGATACAGGCCCCCACACCCAAGGGCCTGAGGATCGCTATATGTCCCCCCATGCCACAAAATAATCCTGACACATGCACGCATGCACCACTGTATCTGGCTCCCACAGGCTCACCCGCCCCCTCCAGATGACATACCACCTGAGCAAGGCTTCCGGAAGTAGATGATGAGAACAATGCCCACGATGATGCCCAGCACACCCAGGCCAAAGGCCACGCCACACAGCACATTCTCCAGCAGATCTGAGGGCAGTGCGTTCCGGGGTACTGGAGGAAATGAGTGGCTCAGCCTGGGGACCTAGTTAGGGAGCCTCCCACCCAGGGAAATGACGTGGGTGTCTGGGATGACATGGGAGACTGGGATGGGCTTAGGGTAGGAATGGACTAAACAAGGTACCAGTGGAGAAAGAAGCCTCCTCCCATGGATCTATCCCTTTTTGCCCCCAAAAGGACCAGAATTCCAGGGAGAAAGCCTCACCCCAATAGGCAATTGCTGTGTAGCGGTCAATTTCGTGAGTCACAATGCAGGAGAAAATGTCAGAAGGTTCTGGTGTGAAGTTTAAGTAAGAAAAGGCCTGGAAGCTGAGTCCATCGACAGCTGAGACAAAAGTAGGCCCAAATCCTTCCACAGGGACGGAATGATGCTGCCAGTTCACTGTCAGCATGGGTGGGAAGAGATTACTGACAAAACAGACCAAAGTGTTGGGCTTGCCAAACTCCAGGGGCTTCAGCGTGAACACTTCAGCGATAGGAAACCCTGGTGGGGGGATTGAAGTGTAGGGGGAAAAAGAGACTAGTTTAGATGGTATCTCTGTGTTTGGAGGGGCCATGGCATATGGAGGGGAGGGCAGAGAAGAACACAGTGGGTCAGGCTTTGGGAGACAGAGATGAGCGAGGAGCTGGGCTCTGAAGGGAGGTCTTCTTCCAGGCAAGGACTGCAGCTAGACATAGAAGCAGAGCCAGATCCAGGCTACTCTGGACCCCTCCACCATGACTTCCTTCAGCACTTCCTGTCTAGAGCTCACATTGATGTCTAACCATGCACTGTCTTCTCACTAAGACATAGTCACGTCATCAGATATTTCCACTCTTCCCATCCATCTTGCTGGGCATAGTAGCACAAGTGTTAATATTCAGTAGGTATCAGTTGGTACCTGTTGAATTCATCACATTCAATACATAGTTCTGAATGCCTACTACATGCTAGGTACTTCGGCCCACCAAAAGAACACAGGGTGCAGACCAAGGCTGGTGGAAAAATTAAGGTGATGAAGAGAACCAGAAAGTATTTGAGATGGGGAGCTGGTATCAAGGGGAATTATTCAGTGTACAGATCAATGAGGTTAATGCAGCCCTCCTCCCTTCACTCCCCAGAAAACTCCTGACCTCTGGACACCGGGATTTTCCCATCAAGTTTTGGCCCTATTTGCTGGATCATCCACTCGCAGAACTCTTTGTCAAATAAAATGGCAGGAGCATCTCCCTGTTCCTGAGCCCAGTCAGCAAATTCGGGCAGGCGAGGCACCCGAGTGTTCTGGGAAAAGTCGAAGAAGAAAAGCTGGTCCTCGTCGTAGGCCTCAGAGAGTCCCACACTGGGACTCCCATCCTGGCAGTACACTGTGTGCAGGAATGTGTGGTTTTGCAGGTCATCTGGCCACATTGGAGTAGGAGCTGCAAAGGACACAGGGTGAGGTTCAGGGAGGTGGGAGCCTTCTCCTCCAACTTAAAAAACAGCAAGGTGGGGCTAGGCGCAGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCAAGGTGGGTGGATCATGAGGTCAGGAGTTTGAGACCAGCCTGGCCAGCATGGTGAAACTCCGTCTCTACTAAAAATACAAAAAAGTAGCTGGGCATGTTGGCATGCGCCTGTAGCTACTCGGGAGGCTGAGGGAGGAGAATTGCTTGAACCAGGGAGGCAGAGGTTGCCGGGAGCTAAGATTAAGCCACTGCACTCCAGCCTGGGTGACAGAGTGAGACTCTGTCTCAAAACAAAACAACAAAAACAAGCAAGGCCTGCTTAAGGAGCGTGGGCTGAGGTGAGACCCTTTCCTGTGTCTGTTATTTAGACTCCCCCTCCCAAAGGGGGTGAAGAACAAATTATGGCATCTCTCCAAGCTTCCCCTGCCTATAAAAAGGCCAGTTGGCAAAAGTAAAGAGTTCTACTTTCTAAAGTGACAGATTCAGGCCAGGCATGGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCTGAGGCAGGCAGATTGCTTGAGCCCAGGAGTTCAAGACCAACCTGGGCAACACAGCGAGACCCTGTCTCTACAAAAAATACAAAAACTTAGCCAGGTGTGGTGGCAAACACCTGTGGTCTCAGCTACTCTGGAGGCTGAGGCAGGAGGATTGCTTGTGCCTAGGAAGTTGGGGCTGCAGTGAGCCATGATTGTGCCACTGGACTCCAGCCCAGGTGACAGAATGAGCCCGTCTCAAAAAATATATATATAAAGGCCGGGCGCGGTGGCTCAAGCTTGTAATCCCAGCACTTTGGGAGGCCAAGGCGGGTGGATCACCTGAGGTCAGGAGTTTGAGACCAGCCTGGCAAACATGATGAAACCCCATCTCTACTAAAAATACAAAAATCAGCTGGGTGTGGTGGCATGCGCCTGTAATCCCAGCTACTTGGGAGGCTGAGGCAGGAGAGTCTCTTGAACCCCAGAGGCAGGGGTTGCAGGGAGCCGAGATCACGTCACTGCACTCTACCCTGGGTGACAGAGCGAGATGCCGTGTCAAAAAAAATAAATTAAATCAAATAAAAAATTTAAAAATGTATATATATAAAATAAAGTGACAGATTCAGAGTCACTGTTCATTGTGTGTTTGGGGGCTGCACAAAGACACCTAGCCAAAGAAGCAAGTGAAAGCCTGCATTCTGCTCACCATGCCATACATCCTGGCATAGGGCTGTATCCTCCCAAAGGGGATTCCTTTGTCTAATTCATACCAGGCCACTGTATTGACTAGAGAAGGCCATGGATGGGTTTCTCACTCTTAGAAGGGAAAGAGGAGGAATGGCTACAGCCTCCCCAAGCCATAGATGGGACTGCCTCCCACTATCCCCAGACACAAATGGTAAATTGGAAAACCTGTATCCAGACATTTCTTCAGCCACTTCATTGGCACCAAGCGTCTCTCAAAATGTCTTCTGTTCCTTAACCTACCAGGCCTCCCAAAGACAGCAATGGGAGAAGTGACCCCATAACTGCATAAAATAATCCCTCTTCTTTGAAGCTCTTGGCAGGAATCGCTCAGCCAGCAGGAAACCTTTAACCCAATACCCAGAAAAACAGACATTTGGAGGAAGAGGGATCTTCCAGATTATTCTTCCATTCTGCCCCATCCTCTACAGAGAAGGAAACTAAGACACTTTTCAAGAATCACAAGATAAGTTAATGATAGAAAGCAGAGTAGAATCTTGAGTGGAGGAGTGAAAATAACATTCACTTTGTTCAAATCCCAGCTCTACCACTTTCCAATGGTGTGAACTTGCACAAATAACTCTGAGTCTCATTTTCTTCATTTGTAAAATGGAGAGAACAATCTCCGCTTCAAGAGATTGTCTTAAATGGAACATGCAAAGCATCACTGATATCGTTTACCAACCACACATAGCAGCTGTCTTTCCCCACTCCCCTGTTGTTTCCACTGCCTCATAAGACTTCCCACCACTCACAAAGCACAGCGCTTTTCCTCACAAAGCTGAGTGGGCTCCCTAGGTTCAGGATGGAAGTAAATAGGAGTACCATCTTACCTTCAGGGACGGCCCAGGAGTGGGGTAGCAGCCACAGAAGTGGTAACATCTGTAGCAGCGCAGCTCCTTGGTTCTGTTCATGACCCATACCTTCTTGCCACACAGTAGGTAGGAGCTACCAACCCAGCCAACCCAGCTTCCCCAACTCCCTCCCCGAGAGGGTGGCCTTAGAT"; 56 | let mut sequence = seq.chars().collect::>(); 57 | sequence.insert(0, '$'); 58 | 59 | let graph_struct = graph::read_graph( 60 | &"tests/DMA-3108.fa.353ea42.34ee7b1.1576367.smooth.fix.gfa", 61 | false, 62 | ); 63 | 64 | let score_matrix = score_matrix::create_score_matrix_match_mis(2, -4); 65 | let hofp = HashMap::new(); 66 | let mut scores_f32 = HashMap::new(); 67 | for (k, v) in score_matrix.iter() { 68 | scores_f32.insert(*k, *v as f32); 69 | } 70 | let r_values = utils::set_r_values( 71 | &graph_struct.nwp, 72 | &graph_struct.pred_hash, 73 | graph_struct.lnz.len(), 74 | ); 75 | let mut group = c.benchmark_group("Global abPOA"); 76 | for i in [1, 2, 3] { 77 | unsafe { 78 | group.bench_with_input(BenchmarkId::new("Simd", i), &i, |b, _i| { 79 | b.iter(|| { 80 | global_abpoa::exec_simd( 81 | black_box(&sequence), 82 | black_box(("name", 0)), 83 | black_box(&graph_struct), 84 | black_box(&scores_f32), 85 | 300, 86 | false, 87 | &hofp, 88 | &r_values, 89 | ) 90 | }) 91 | }); 92 | 93 | group.bench_with_input(BenchmarkId::new("No Simd", i), &i, |b, _i| { 94 | b.iter(|| { 95 | global_abpoa::exec( 96 | black_box(&sequence), 97 | black_box(("name", 0)), 98 | black_box(&graph_struct), 99 | black_box(&score_matrix), 100 | 300, 101 | false, 102 | &hofp, 103 | ) 104 | }) 105 | }); 106 | } 107 | } 108 | } 109 | 110 | criterion_group!(benches, bench_global_simd_no_simd, bench_local_simd_no_simd,); 111 | criterion_main!(benches); 112 | -------------------------------------------------------------------------------- /example/reads.fa: -------------------------------------------------------------------------------- 1 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_67_77_0_1_0_0_1:0:0_2:0:0_0/1 2 | CTACTTGCTGTCGGATTCGGAAGTGGATTAAGCCCGATCGTTCCTGGGACGATGGGCTCGCTGGCAGCGATTCCGTTCTGGTATCTGATGACCTTTTTGCCCTGGCAGCTCTACTCGCTGGTGGTGATGCTGGGGATCTGTATCGCCGTC 3 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_10_19_0_1_0_0_3:0:0_1:0:0_1/1 4 | TTTCCACGCCATAAAGATGTCGCGAACAGTCGCCTGAAGATGAGTAATCCGTGGCATCTACTTGCTGTCGGATTCGGAAGTGGATTAAGCCCGATCGTTCCTGGGACGATGGGCTCGATGGCAGCGATTCCGTTCTGGTATCTGATGACC 5 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_206_214_0_1_0_0_3:0:0_1:0:0_2/1 6 | GTATCGGCGTCTATCTTTGTCATCGGACCGCAAAAGATATGGGCGTGCACGATCACGGCAGCATTGTCTGGGACGAATTTATTGGTATGTGGATCACGCCCATGGCGCTGCCGACCAATGACTTGCAGTGGGTTGCCGCCGGGTTTGTGA 7 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_343_353_0_1_0_0_1:0:0_1:0:0_3/1 8 | GCCGGGTTTGTGATTTTCCGTATTCTGGATATGTGGAAACCGTGTCCGATCCGCTGGTTTGATCGCAATGTGCATGGCGGCATGGGGATCATGATAGATGATATTGTCGCCGGGGTGATCTCCGCTGGCATCCTGTACTTTATTGGTCAT 9 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_274_285_0_1_0_0_2:0:0_1:0:0_4/1 10 | TGGGACGAATTTATTGGTATGTGGATCACGCTCATGGCGCTGCCGACCAATGACTGGCAGTGGGTTGCCGCCGGGTTTGTGATTTTCCGGATTCTGGATATGTGGAAGCCGTGGCCGATCCGCTGGTTTGATCGCAATGTGCATGGCGGC 11 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_313_336_0_1_0_0_2:0:0_2:0:0_5/1 12 | CTGCCGTCCAATGACTGGCAGTGGGTTGCCGCCGGGTTTGTGATTTTCCGTATTCTGGATATGTGGAAACCGTGGCCGATCCGCTGGTTTGATCGCAATGTGCATGGCGGCATGGGGATCATGATAGATCATATTGTCGCCGGGGTGATC 13 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_157_180_0_1_0_0_4:0:0_3:0:0_6/1 14 | ACCTTTTTGCCCTGGCAGCTCTACTCGCTGGTGGTGATGCTGGGGATCAGTATCGGCGTCTATCTTTCTCATCAGACCGCAAAAGATATGGGGGTGCACGATCACGGCAGCATTGTCTGTGACGAATTTATTGGTATGTGGATCACGCTC 15 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_79_82_0_1_0_0_1:0:0_2:0:0_7/1 16 | GGATTCGGAAGTGGATTAAGTCCGATCGTTCCTGGGACGATGGGCTCGCTGGCAGCGATTCCGTTCTGGTATCTGATGACCTTTTTGCCCTGGCAGCTCTACTCGCTGGTGGTGATGCTGGGGATCTGTATCGGCGTCTATCTTTGTCAT 17 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_116_131_0_1_0_0_2:0:0_1:0:0_8/1 18 | CGATGGGCTCGCTGGCAGCGATTCCGTTGTGGTATCTGATGACCTTTTTGCCCTGGCAGCTCTACTCGCTGGTGGTGATGCTGGGGATCTGTATCGGCGTCTATCTTTGTAATCAGACCGCAAAAGATATGGGCGTGCACGATCACGGCA 19 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_283_290_0_1_0_0_2:0:0_2:0:0_9/1 20 | TTTATTGGTAAGTGGATCACGCTCATGGCGCTGCCGACCAATGACTGGCAGTGGGTTGCCGCCGGGTTTGTGATTTTCCGTATTCTGGATATCTGGAAACCGTGGCCGATCCGCTGGTTTGATCGCAATGTGCATGGCGGCATGGGGATC 21 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_205_212_0_1_0_0_1:0:0_3:0:0_a/1 22 | TGTATCGGCGTCTATCTTTGTCATCAGACGGCAAAAGATATGGGCGTGCACGATCACGGCAGCATTGTCTGGGACGAATTTATTGGTATGTGGATCACGCTCATGGCGCTGCCGACCAATGACTGGCAGTGGGTTGCCGCCGGGTTTGTG 23 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_153_165_0_1_0_0_3:0:0_1:0:0_b/1 24 | GATGACCTTTTTGCCCTGGCAGCTCTACTCGCTGGTGGTGATGCTGGGGATCTGTATCGCCGTCTATCTTTGTCAGCAGACCGCAAAAGATATGGGCGTGCACGATCACGGCAGCATTGTCTTGGACGAATTTATTGGTATGTGGATCAC 25 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_92_100_0_1_0_0_1:0:0_0:0:0_c/1 26 | GATTAAGCCCGATCGTTCCTGGGACGATGGGCTCGCTGGCAGCGATTCCGTTCTGGTATCTGATGACCTTTTTGCCCTGGCAGCTCTACTCGCTGGTGGTGATGCTGGGGATCTGTATCGGCGTCTATCTTTGTCATCAGCCCGCAAAAG 27 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_281_326_0_1_0_0_1:0:0_1:0:0_d/1 28 | AATTTATTGGTATGTGGATCACGCTCATGGCGCTGCCGACCAATGACTGGCAGAGGGTTGCCGCCGGGTTTGTGATTTTCCGTATTCTGGATATGTGGAAACCGTGGCCGATCCGCTGGTTTGATCGCAATGTGCATGGCGGCATGGGGA 29 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_138_174_0_1_0_0_1:0:0_3:0:0_e/1 30 | TCCGTTCTGGTATCTGATGACCTTTTTGCCCTGGCAGCTCTACTCGCTGGTGGTGATGCTGGGGATCTGTATCGGCGTCTATCTTTGTCATCAGACCGCAAAAGATATGGGCGTGCACGATCACGGCAGCATTGTCTGGCACGAATTTAT 31 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_15_25_0_1_0_0_2:0:0_3:0:0_f/1 32 | ACGCCATAAAGATGTCGCGAAAAGTCGCCTGAAGATGAGTAATCCGTGGCATCTACTTGCTGTCGGATTCGGAAGTGGATTAAGCCCGATCGTTCCTGGGACGATGGGCTCGCTGGCATCGATTCCGCTCTGGTATCTGATGACCTTTTT 33 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_208_214_0_1_0_0_3:0:0_1:0:0_10/1 34 | ATCGGCGTCTATCTTTGTCATCAGACCGCAAAAGATATGGGCGTGCACGATCACGGCAGCATTGTCTGGGACGAATTTATCGGTATGTGGATTACGCTCATGGCGCTGCCGAGCAATGACTGGCAGTGGGTTGCCGCCGGGTTTGTGATT 35 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_21_24_0_1_0_0_0:0:0_3:0:0_11/1 36 | TAAAGATGTCGCGAAAAGTCGCCTGAAGATGAGTAATCCGTGGCATCTACTTGCTGTCGGATTCGGAAGTGGATTAAGCCCGATCGTTCCTGGGACGATGGGCTCGCTGGCAGCGATTCCGTTCTGGTATCTGATGACCTTTTTGCCCTG 37 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_41_63_0_1_0_0_2:0:0_1:0:0_12/1 38 | GCCTGAAGATGAGTAATCCGTGGCATCTACTTGCTGTCGGATTCGGAAGTGGATTTAGCCCGATCGTTCCTGGGACGATGGGCTCGCTGGCAGCGATTGCGTTCTGGTATCTGATGACCTTTTTGCCCTGGCAGCTCTACTCGCTGGTGG 39 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_197_214_0_1_0_0_1:0:0_1:0:0_13/1 40 | TGGGGATCTGTATCGGCGTCTATCTTTGTCATCAGACCGCAAAAGATATGGGCGTGCACGATCACGGCAGCATTGTCTGGGACGAATTTATTGGTATGTGGATCACGCTCATGGCGCTGCCGACCAATGACTGGGAGTGGGTTGCCGCCG 41 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_358_365_0_1_0_0_1:0:0_1:0:0_14/1 42 | TTCCGTATTCTGGATATGTGGAAACCGTGGCCGATCCGCTGGTTTTATCGCAATGTGCATGGCGGCATGGGGATCATGATAGATGATATTGTCGCCGGGGTGATCTCCGCTGGCATCCTGTACTTTATTGGTCATCACTGGCCGCTGGGC 43 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_41_51_0_1_0_0_1:0:0_0:0:0_15/1 44 | GCCTGAAGATGAATAATCCGTGGCATCTACTTGCTGTCGGATTCGGAAGTGGATTAAGCCCGATCGTTCCTGGGACGATGGGCTCGCTGGCAGCGATTCCGTTCTGGTATCTGATGACCTTTTTGCCCTGGCAGCTCTACTCGCTGGTGG 45 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_158_173_0_1_0_0_1:0:0_2:0:0_16/1 46 | CCTTTTTGCCCTGGCAGCTCTACTCGCTGGTGGTGATGCTGGGGATCTGTATCGGCGTCTATCTTTGTCATCAGACCGCAAAAGATAAGGGCGTGCACGATCACGGCAGCATTGTCTGGGACGAATTTATTGGTATGTGGATCACGCTCA 47 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_72_93_0_1_0_0_2:0:0_1:0:0_17/1 48 | TGCTGTCGGATTCGGAAGTGGATTAAGCCCGATCGTACCTGGGACGATGGGCTCGCTGGCAGCGATTCCGTTCTGGTATCTGATGACCTTTTTGCCCTGGCAGCTCTACTCGCTGGTGGTGATGCTGGGGATCTGTATCGACGTCTATCT 49 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_254_256_0_1_0_0_2:0:0_1:0:0_18/1 50 | ACGATCACGGCAGCATTGTCTGGGACGAATTTATTGGTATGTGGATCATGCTCATGGCGCTGCCGACCAATGACTGGCAGTGGGTTGCCGCCGGGTTTGTGATTTTCCGTATTCTGGTTATGTGGAAACCGTGGCCGATCCGCTGGTTTG 51 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_167_177_0_1_0_0_4:0:0_3:0:0_19/1 52 | CCTGGCAGCTATACTCGCTTGTGGTGATGCTGGGGATCTGTATCGGCGTCTATCTTTGTCATCAGACCGCAAAAGATATGGGCGTGCACGATCACGGCAACGTTGTCTGGGACGAATTTATTGGTATGTGGATCACGCTCATGGCGCTGC 53 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_67_77_0_1_0_0_1:0:0_2:0:0_0/2 54 | TCGGATTCGGAAGTGGATTGACCCCGATCGTTCCTGGGACGATGGGCTCGCTGGCAGCGATTCCGTTCTGGTATCTGATGACCTTTTTGCCCTGGCAGCTCTACTCGCTGGTGGTGATGCTGGGGATCTGTATCGGCGTCTATCTTTGTC 55 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_10_19_0_1_0_0_3:0:0_1:0:0_1/2 56 | CATAAAGATGTCGCGAAAAGTCGCCTGAAGATGAGTAATCCGTGGCATCTACTTGCTGTCGGATTCGGAAGTGGATTAAGCCCGATCGTTCCTGGGACGATGGGCTCGCTGGCTGCGATTCCGTTCTGGTATCTGATGACCTTTTTGCCC 57 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_206_214_0_1_0_0_3:0:0_1:0:0_2/2 58 | GTCTATCTTTGTCATCAGACCGCAAAAGATATTGGCGTGCACGATCACGGCAGCATTGTCTGGGACGAATTTATTGGTATGTGGATCACGCTCATGGCGCTGCCGACCAATGACTGGCAGTGGGTTGCCGCCGGGTTTGTGATTTTCCGT 59 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_343_353_0_1_0_0_1:0:0_1:0:0_3/2 60 | TGATTTTCCGTATTCTGGATATGTGGAAACCGCGGCCGATCCGCTGGTTTGATCGCAATGTGCATGGCGGCATGGGGATCATGATAGATGATATTGTCGCCGGGGTGATCTCCGCTGGCATCCTGTACTTTATTGGTCATCACTGGCCGC 61 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_274_285_0_1_0_0_2:0:0_1:0:0_4/2 62 | TATTGGTATGTGGATCACGCTCATGGTGCTGCCGACCAATGACTGGCAGTGGGTTGCCGCCGGGTTTGTGATTTTCCGTATTCTGGATATGTGGAAACCGTGGCCGATCCGCTGGTTTGATCGCAATGTGCATGGCGGCATGGGGATCAT 63 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_313_336_0_1_0_0_2:0:0_2:0:0_5/2 64 | GGTTGCCGCCGGGTTTGTGATTTTCCGTATCCTGGATATGTGGAAACCGTGGCCGATCCGCTGGTTTGATCGCAATGTGCATGGCGGCATGGAGATCATGATAGATGATATTGTCGCCGGGGTGATCTCCGCTGGCATCCTGTACTTTAT 65 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_157_180_0_1_0_0_4:0:0_3:0:0_6/2 66 | CTCGCTGGTGGTGATGCTGGGGATCTGTATCGGAGTCTATCTTCGTCATCAGACCGCAAAAGATATGGGCGTGCACGATCACGGCAGCATTGTCTGGGACGAATTTATTGGTATGTGGATCACGCTCATGGCGCTGCCGAACAATGACTG 67 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_79_82_0_1_0_0_1:0:0_2:0:0_7/2 68 | TTCGGAAGTGGATTAAGCCCGATCGTCCCTGGGATGATGGGCTCGCTGGCAGCGATTCCGTTCTGGTATCTGATGACCTTTTTGCCCTGGCAGCTCTACTCGCTGGTGGTGATGCTGGGGATCTGTATCGGCGTCTATCTTTGTCATCAG 69 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_116_131_0_1_0_0_2:0:0_1:0:0_8/2 70 | CAGCGATTCCGTTCTGGTATCTGATGAACTTTTTGCCCTGGCAGCTCTACTCGCTGGTGGTGATGCTGGGGATCTGTATCGGCGTCTATCTTTGTCATCAGACCGCAAAAGATATGGGCGTGCACGATCACGGCAGCATTGTCTGGGACG 71 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_283_290_0_1_0_0_2:0:0_2:0:0_9/2 72 | GTATGTGGATCACGCTCATGGCGCTGCCGACCAATGACTGGCAGTGGGTTGCCGCCGGGTTTGTGATTTTCCGTATGCTGGATATGTGGAAACCGTGGCCGATCCGCTGGTTTGATTGCAATGTGCATGGCGGCATGGGGATCATGATAG 73 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_205_212_0_1_0_0_1:0:0_3:0:0_a/2 74 | GCGTCTACCTTTGTCATCAGACCGCAAAAGATATGGGCGTGCACGATCACGGCAGCAATGTCTGGGACGAATTTATTGGTAGGTGGATCACGCTCATGGCGCTGCCGACCAATGACTGGCAGTGGGTTGCCGCCGGGTTTGTGATTTTCC 75 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_153_165_0_1_0_0_3:0:0_1:0:0_b/2 76 | GCCCTGGCAGCTCTACTCGCTGGTGGTGATGCTGGGGATCTGTATCGGCGTCTATCTTTGTCATCAGACCGCCAAAGATATGGGCGTGCACGATCACGGCAGCATTGTCTGGGACGAATTTATTGGTATGTGGATCACGCTCATGGCGCT 77 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_92_100_0_1_0_0_1:0:0_0:0:0_c/2 78 | CCGATCGTTCCTGGGACGATGGGCTCGCTGGCAGCGATTCCGTTCTGGTATCTGATGACCTTTTTGCCCTGGCAGCTCTACTCGCTGGTGGTGATGCTGGGGATCTGTATCGGCGTCTATCTTTGTCATCAGACCGCAAAAGATATGGGC 79 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_281_326_0_1_0_0_1:0:0_1:0:0_d/2 80 | ACTGGCAGTGGGTTGCCGCCGGGTTTGTGATTTTCCGTATTCTAGATATGTGGAAACCGTGGCCGATCCGCTGGTTTGATCGCAATGTGCATGGCGGCATGGGGATCATGATAGATGATATTGTCGCCGGGGTGATCTCCGCTGGCATCC 81 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_138_174_0_1_0_0_1:0:0_3:0:0_e/2 82 | GCTCTACTCGCTGGTGGTGATGCTGGGGATCTGTATCGGCGTCTTTATTTGTCATCAGACCGCAAAAGATATGGGCGTGCACGATCACGGCAGCATTGTCTGGGACGAATTTATTGGTATGTGGATCACGCTCATGGCTCTGCCGACCAA 83 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_15_25_0_1_0_0_2:0:0_3:0:0_f/2 84 | GATGTCGCGAAAAGTCGCCTGAACCTGAGTAATCCGTGGCATCTACTTGCTGTCGGATTCGGAAGTGGATTAAGCCCGATCGTTCCTGGGACGATGGGCTCGCTGGCAGCGATTCCGTTCTGGTATCTGATGACCTTTTTGCCCAGGCAG 85 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_208_214_0_1_0_0_3:0:0_1:0:0_10/2 86 | GTCTATCTTTGTCATCAGACCGCAAAAGATATGGGCGTGCACGATCACGGCAGCATTGTCTGGGACGAATTTATTGGTATGTGGATCACGCTCATGGGGCTGCCGACCAATGACTGGCAGTGGGTTGCCGCCGGGTTTGTGATTTTCCGT 87 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_21_24_0_1_0_0_0:0:0_3:0:0_11/2 88 | AGATGTCGCGAAAAGTCGCCTGAAGATGAGTAATCCGTGGCATCTACTTGCTGTCGGATTCGGAAGTGAATTAAGCCCGATCCTTCCTGGGACGATGGGCTCGCTGGTAGCGATTCCGTTCTGGTATCTGATGACCTTTTTGCCCTGGCA 89 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_41_63_0_1_0_0_2:0:0_1:0:0_12/2 90 | GCATCTACTTGCTGTCGGATTCGGAAGTGGATTAAGCCCGATCGTTCCTGGGACGATGGGCTCGCTGGCAGCGATTCCGTTCTGGTATCTGATGACCTTTTTGCCCTGGCAGCTCTACTCGCTGGTGGTGATGCTTGGGATCTGTATCGG 91 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_197_214_0_1_0_0_1:0:0_1:0:0_13/2 92 | GTCTATCTTTGTCATCAGACCGCAAAAGATATGGGCGTGCACGATCACGGCAGCATTGTCTGGGACGAATTTATTGGTATGTGGATCACGCTCATGGCGCTGCCGACCAATGACTGGCAGTGGCTTGCCGCCGGGTTTGTGATTTTCCGT 93 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_358_365_0_1_0_0_1:0:0_1:0:0_14/2 94 | TTCTGGATATGTGGAAACCGTGGCCGATCCGCTGGTTTGATCGCAATGTGCATGGCGGCATGGGGATCATGATAGATGATATTGTCGCCGGGGTGATCTCCGTTGGCATCCTGTACTTTATTGGTCATCACTGGCCGCTGGGCATTCTGT 95 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_41_51_0_1_0_0_1:0:0_0:0:0_15/2 96 | GAGTAATCCGTGGCATCTACTTGCTGTCGGATTCGGAAGTGGATTAAGCCCGATCGTTCCTGGGACGATGGGCTCGCTGGCAGCGATTCCGTTCTGGTATCTGATGACCTTTTTGCCCTGGCAGCTCTACTCGCTGGTGGTGATGCTGGG 97 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_158_173_0_1_0_0_1:0:0_2:0:0_16/2 98 | AGCTCTACTCGCTGGTGGTGATGCTGGGGATCTGTATCGGCGTCTATCTTTGTCATCAGACCGCAAAAGATATGGGCGTGCACGATCACGGCTGCATTGTCTGGGACGAATTTATTGGTATGTGGATCACGCTCACGGCGCTGCCGACCA 99 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_72_93_0_1_0_0_2:0:0_1:0:0_17/2 100 | ATTAAGCCCGATGGTTCCTGGGACGATGGGCTCGCTGGCAGCGATTCCGTTCTGGTATCTGATGACCTTTTTGCCCTGGCAGCTCTACTCGCTGGTGGTGATGCTGGGGATCTGTATCGGCGTCTATCTTTGTCATCAGACCGCAAAAGA 101 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_254_256_0_1_0_0_2:0:0_1:0:0_18/2 102 | GATCACGGCAGCATTGTCTGGGACGAATTTATTGGTATGTGGATCACGCTCATGGCGCTGCCGACCAATGACTGGCAGTGGGTTGCCGCCGGGTTTGTGATTTTCCGTATTCTGGATATGTGGAAACCGTGGCCGATCCGCTGGTTTGGT 103 | >NZ_CP013029-AKK22_RS05085-1-phosphatidylglycerophosphatase_A_167_177_0_1_0_0_4:0:0_3:0:0_19/2 104 | CTACTCGCTGGTGGTGATGCTCGGGATCTGTATCGACGTCTATCTTTGTCATCAGACCGCAAAAGGTATGGGCGTGCACGATCACGGCAGCATTGTCTGGGACGAATTTATTGGTATGTGGATCACGCTCATGGCGCTGCCGACCAATGA 105 | -------------------------------------------------------------------------------- /example/graph.gfa: -------------------------------------------------------------------------------- 1 | H VN:Z:1.0 2 | S 1 ATGAC 3 | L 1 + 3 + 0M 4 | S 2 ATGAT 5 | L 2 + 3 + 0M 6 | S 3 CATTTTGCCACGCCATAAAGA 7 | L 3 + 4 + 0M 8 | L 3 + 5 + 0M 9 | S 4 T 10 | L 4 + 6 + 0M 11 | S 5 A 12 | L 5 + 6 + 0M 13 | S 6 GTCGCGAAAAGTCGCCT 14 | L 6 + 7 + 0M 15 | L 6 + 8 + 0M 16 | L 6 + 9 + 0M 17 | L 6 + 10 + 0M 18 | S 7 GAAGA 19 | L 7 + 11 + 0M 20 | S 8 GAAAA 21 | L 8 + 11 + 0M 22 | S 9 TAAGC 23 | L 9 + 11 + 0M 24 | S 10 CAAGA 25 | L 10 + 11 + 0M 26 | S 11 TGAGTAATCCGTGGCATCT 27 | L 11 + 12 + 0M 28 | L 11 + 13 + 0M 29 | S 12 G 30 | L 12 + 14 + 0M 31 | S 13 A 32 | L 13 + 14 + 0M 33 | S 14 CTTGCTGT 34 | L 14 + 20 + 0M 35 | L 14 + 15 + 0M 36 | L 14 + 16 + 0M 37 | S 15 C 38 | L 15 + 17 + 0M 39 | S 16 T 40 | L 16 + 17 + 0M 41 | S 17 GGATTCGG 42 | L 17 + 18 + 0M 43 | L 17 + 19 + 0M 44 | S 18 A 45 | L 18 + 21 + 0M 46 | S 19 C 47 | L 19 + 21 + 0M 48 | S 20 TGGATTTGGT 49 | L 20 + 21 + 0M 50 | S 21 AGTGGATTAAGC 51 | L 21 + 22 + 0M 52 | L 21 + 23 + 0M 53 | L 21 + 24 + 0M 54 | L 21 + 28 + 0M 55 | L 21 + 29 + 0M 56 | L 21 + 33 + 0M 57 | L 21 + 34 + 0M 58 | S 22 CCA 59 | L 22 + 25 + 0M 60 | S 23 CCG 61 | L 23 + 25 + 0M 62 | S 24 TCA 63 | L 24 + 25 + 0M 64 | S 25 ATCGTTCCTGG 65 | L 25 + 26 + 0M 66 | L 25 + 27 + 0M 67 | S 26 C 68 | L 26 + 38 + 0M 69 | S 27 G 70 | L 27 + 38 + 0M 71 | S 28 CCG 72 | L 28 + 30 + 0M 73 | S 29 CCA 74 | L 29 + 30 + 0M 75 | S 30 ATCATTCCTGG 76 | L 30 + 31 + 0M 77 | L 30 + 32 + 0M 78 | S 31 G 79 | L 31 + 38 + 0M 80 | S 32 C 81 | L 32 + 38 + 0M 82 | S 33 CCG 83 | L 33 + 35 + 0M 84 | S 34 CCA 85 | L 34 + 35 + 0M 86 | S 35 ATTGTTCC 87 | L 35 + 36 + 0M 88 | L 35 + 37 + 0M 89 | S 36 TGGG 90 | L 36 + 38 + 0M 91 | S 37 AGGC 92 | L 37 + 38 + 0M 93 | S 38 ACGATGGG 94 | L 38 + 39 + 0M 95 | L 38 + 40 + 0M 96 | L 38 + 41 + 0M 97 | L 38 + 42 + 0M 98 | S 39 A 99 | L 39 + 43 + 0M 100 | S 40 C 101 | L 40 + 43 + 0M 102 | S 41 G 103 | L 41 + 43 + 0M 104 | S 42 T 105 | L 42 + 43 + 0M 106 | S 43 TCGCTGGC 107 | L 43 + 50 + 0M 108 | L 43 + 44 + 0M 109 | L 43 + 45 + 0M 110 | S 44 A 111 | L 44 + 46 + 0M 112 | S 45 G 113 | L 45 + 46 + 0M 114 | S 46 GCGATTCC 115 | L 46 + 47 + 0M 116 | L 46 + 48 + 0M 117 | L 46 + 49 + 0M 118 | S 47 G 119 | L 47 + 51 + 0M 120 | S 48 C 121 | L 48 + 51 + 0M 122 | S 49 A 123 | L 49 + 51 + 0M 124 | S 50 GGCGATCCCT 125 | L 50 + 51 + 0M 126 | S 51 TTCTGGTATCT 127 | L 51 + 52 + 0M 128 | L 51 + 53 + 0M 129 | L 51 + 54 + 0M 130 | L 51 + 55 + 0M 131 | L 51 + 56 + 0M 132 | S 52 GATGACCTTTTTA 133 | L 52 + 57 + 0M 134 | S 53 GATGACCTTTTTG 135 | L 53 + 57 + 0M 136 | S 54 GATGACTTTTTTG 137 | L 54 + 57 + 0M 138 | S 55 AATGACCTTTTTG 139 | L 55 + 57 + 0M 140 | S 56 AATGACCTTTTTA 141 | L 56 + 57 + 0M 142 | S 57 CCCTGGCA 143 | L 57 + 58 + 0M 144 | L 57 + 59 + 0M 145 | L 57 + 60 + 0M 146 | S 58 GCTCTAC 147 | L 58 + 61 + 0M 148 | S 59 GCTCTAT 149 | L 59 + 61 + 0M 150 | S 60 ACTCTAC 151 | L 60 + 61 + 0M 152 | S 61 TCGCTGGT 153 | L 61 + 62 + 0M 154 | L 61 + 63 + 0M 155 | L 61 + 64 + 0M 156 | L 61 + 65 + 0M 157 | L 61 + 66 + 0M 158 | S 62 GGTGATGTTGG 159 | L 62 + 67 + 0M 160 | S 63 GGTGATGCTGG 161 | L 63 + 67 + 0M 162 | S 64 GGTAATGCTGG 163 | L 64 + 67 + 0M 164 | S 65 AGTCATGCTGG 165 | L 65 + 67 + 0M 166 | S 66 GGTGATGCTGA 167 | L 66 + 67 + 0M 168 | S 67 GGATCTGTATCGG 169 | L 67 + 68 + 0M 170 | L 67 + 69 + 0M 171 | L 67 + 70 + 0M 172 | S 68 CGTC 173 | L 68 + 71 + 0M 174 | S 69 CGTG 175 | L 69 + 71 + 0M 176 | S 70 TGTG 177 | L 70 + 71 + 0M 178 | S 71 TATCTTTGTCATCA 179 | L 71 + 75 + 0M 180 | L 71 + 72 + 0M 181 | L 71 + 73 + 0M 182 | S 72 AACGGCG 183 | L 72 + 74 + 0M 184 | S 73 GACCGCA 185 | L 73 + 74 + 0M 186 | S 74 AAAGATATGGGC 187 | L 74 + 79 + 0M 188 | S 75 AACGGCGAAAGACATGGG 189 | L 75 + 76 + 0M 190 | L 75 + 77 + 0M 191 | L 75 + 78 + 0M 192 | S 76 T 193 | L 76 + 79 + 0M 194 | S 77 C 195 | L 77 + 79 + 0M 196 | S 78 G 197 | L 78 + 79 + 0M 198 | S 79 GTGCACGATCA 199 | L 79 + 80 + 0M 200 | L 79 + 81 + 0M 201 | L 79 + 82 + 0M 202 | L 79 + 83 + 0M 203 | L 79 + 84 + 0M 204 | L 79 + 85 + 0M 205 | L 79 + 86 + 0M 206 | L 79 + 87 + 0M 207 | S 80 CGGCAGCATTGTCTGGGACGAATTTATTGGT 208 | L 80 + 88 + 0M 209 | S 81 TGGCAGCATTGTCTGGGACGAATTTATTGGT 210 | L 81 + 88 + 0M 211 | S 82 CGGCAGCATTGTCTGGGACGAATTTATCGGC 212 | L 82 + 88 + 0M 213 | S 83 TGGCAGCATTGTCTGGGACGAATTTATCGGT 214 | L 83 + 88 + 0M 215 | S 84 CGGCAGCATTGTCTGGGACGAATTTATCGGT 216 | L 84 + 88 + 0M 217 | S 85 CGGCAGTATTGTCTGGGACGAATTTATTGGT 218 | L 85 + 88 + 0M 219 | S 86 CGGTAGCATCGTATGGGATGAGTTTATTGGC 220 | L 86 + 88 + 0M 221 | S 87 CGGTAGCATCGTCTGGGATGAGTTTATTGGC 222 | L 87 + 88 + 0M 223 | S 88 ATGTGGAT 224 | L 88 + 89 + 0M 225 | L 88 + 90 + 0M 226 | L 88 + 91 + 0M 227 | L 88 + 92 + 0M 228 | L 88 + 93 + 0M 229 | S 89 CACGCTCATGGCGC 230 | L 89 + 94 + 0M 231 | S 90 CACGCTCATGGCAT 232 | L 90 + 94 + 0M 233 | S 91 TACGCTCATGGCGC 234 | L 91 + 94 + 0M 235 | S 92 CACCCTGATGGCGC 236 | L 92 + 94 + 0M 237 | S 93 CACGTTCATGGCGC 238 | L 93 + 94 + 0M 239 | S 94 TGCCGACCA 240 | L 94 + 95 + 0M 241 | L 94 + 96 + 0M 242 | S 95 A 243 | L 95 + 97 + 0M 244 | S 96 T 245 | L 96 + 97 + 0M 246 | S 97 TGACTGGC 247 | L 97 + 106 + 0M 248 | L 97 + 110 + 0M 249 | L 97 + 98 + 0M 250 | L 97 + 99 + 0M 251 | L 97 + 100 + 0M 252 | L 97 + 104 + 0M 253 | S 98 AGTGGGTTGCC 254 | L 98 + 105 + 0M 255 | S 99 A 256 | L 99 + 101 + 0M 257 | S 100 C 258 | L 100 + 101 + 0M 259 | S 101 GTGGGTCGC 260 | L 101 + 102 + 0M 261 | L 101 + 103 + 0M 262 | S 102 C 263 | L 102 + 105 + 0M 264 | S 103 A 265 | L 103 + 105 + 0M 266 | S 104 AGTGGGTCACT 267 | L 104 + 105 + 0M 268 | S 105 GCCGGGTTTGTG 269 | L 105 + 114 + 0M 270 | S 106 AGTGGGT 271 | L 106 + 107 + 0M 272 | L 106 + 108 + 0M 273 | S 107 TA 274 | L 107 + 109 + 0M 275 | S 108 CG 276 | L 108 + 109 + 0M 277 | S 109 CCGCCGGATTTGTG 278 | L 109 + 114 + 0M 279 | S 110 AGTGGGTC 280 | L 110 + 111 + 0M 281 | L 110 + 112 + 0M 282 | S 111 G 283 | L 111 + 113 + 0M 284 | S 112 A 285 | L 112 + 113 + 0M 286 | S 113 CTGCCGGATTCGTC 287 | L 113 + 114 + 0M 288 | S 114 ATTTTCCGTAT 289 | L 114 + 115 + 0M 290 | L 114 + 116 + 0M 291 | L 114 + 117 + 0M 292 | S 115 TCTG 293 | L 115 + 118 + 0M 294 | S 116 CCTG 295 | L 116 + 118 + 0M 296 | S 117 CCTC 297 | L 117 + 118 + 0M 298 | S 118 GATATGTGGAA 299 | L 118 + 119 + 0M 300 | L 118 + 120 + 0M 301 | S 119 G 302 | L 119 + 121 + 0M 303 | S 120 A 304 | L 120 + 121 + 0M 305 | S 121 CCGTGGCCGATCCG 306 | L 121 + 122 + 0M 307 | L 121 + 123 + 0M 308 | S 122 C 309 | L 122 + 124 + 0M 310 | S 123 T 311 | L 123 + 124 + 0M 312 | S 124 TGGTTTGA 313 | L 124 + 125 + 0M 314 | L 124 + 126 + 0M 315 | L 124 + 127 + 0M 316 | L 124 + 128 + 0M 317 | L 124 + 129 + 0M 318 | L 124 + 130 + 0M 319 | L 124 + 131 + 0M 320 | L 124 + 132 + 0M 321 | L 124 + 133 + 0M 322 | S 125 TCGCAATGTACATGGCGGCATGGGG 323 | L 125 + 134 + 0M 324 | S 126 TCGCAATGTGCATGGCGGCATGGGG 325 | L 126 + 134 + 0M 326 | S 127 CCGCAATGTACATGGCGGCATGGGG 327 | L 127 + 134 + 0M 328 | S 128 CCGCAATGTACATGGCGGCATGGGA 329 | L 128 + 134 + 0M 330 | S 129 TCGCAATGTGCATGGAGGCATGGGG 331 | L 129 + 134 + 0M 332 | S 130 TCGCAATGTGCATGGCGGTATGGGG 333 | L 130 + 134 + 0M 334 | S 131 CCGCAATGTGCATGGCGGCATGGGG 335 | L 131 + 134 + 0M 336 | S 132 TCACAATGTACATGGCGGCATGGGG 337 | L 132 + 134 + 0M 338 | S 133 TCGCAATGTACATGGTGGCATGGGG 339 | L 133 + 134 + 0M 340 | S 134 ATCATGAT 341 | L 134 + 135 + 0M 342 | L 134 + 136 + 0M 343 | L 134 + 137 + 0M 344 | L 134 + 138 + 0M 345 | S 135 CGAC 346 | L 135 + 139 + 0M 347 | S 136 AGAT 348 | L 136 + 139 + 0M 349 | S 137 TGAC 350 | L 137 + 139 + 0M 351 | S 138 CGAT 352 | L 138 + 139 + 0M 353 | S 139 GATATTGTCGC 354 | L 139 + 148 + 0M 355 | S 148 CGGGGTGATCTCCGCAGGCATCCTGTACTTTATTGGACATCACTGGCCGCTGGGTATTCTGTCG 356 | P NC_017626-EC042_RS02380-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,12+,14+,16+,17+,18+,21+,22+,25+,26+,38+,39+,43+,45+,46+,47+,51+,53+,57+,59+,61+,63+,67+,68+,71+,75+,77+,79+,80+,88+,89+,94+,96+,97+,99+,101+,102+,105+,114+,116+,118+,120+,121+,122+,124+,127+,134+,136+,139+,148+ * 357 | P NC_017906-CDCO157_RS02440-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,12+,14+,15+,17+,18+,21+,22+,25+,26+,38+,39+,43+,45+,46+,47+,51+,53+,57+,58+,61+,62+,67+,68+,71+,75+,78+,79+,86+,88+,92+,94+,95+,97+,99+,101+,102+,105+,114+,115+,118+,119+,121+,122+,124+,125+,134+,138+,139+,148+ * 358 | P NZ_CP006636-PCN061_RS02380-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,12+,14+,15+,17+,18+,21+,33+,35+,36+,38+,40+,43+,44+,46+,47+,51+,53+,57+,58+,61+,63+,67+,68+,71+,73+,74+,79+,80+,88+,89+,94+,95+,97+,98+,105+,114+,115+,118+,120+,121+,122+,124+,126+,134+,136+,139+,148+ * 359 | P NZ_CP007136-ECRM12581_RS02120-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,10+,11+,12+,14+,15+,17+,18+,21+,23+,25+,27+,38+,40+,43+,44+,46+,47+,51+,53+,57+,59+,61+,63+,67+,68+,71+,75+,77+,79+,80+,88+,89+,94+,95+,97+,106+,108+,109+,114+,116+,118+,120+,121+,122+,124+,133+,134+,138+,139+,148+ * 360 | P NZ_CP007592-CH99_RS03970-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,12+,14+,15+,17+,18+,21+,23+,25+,27+,38+,42+,43+,44+,46+,47+,51+,56+,57+,59+,61+,63+,67+,68+,71+,75+,77+,79+,80+,88+,89+,94+,95+,97+,98+,105+,114+,115+,118+,119+,121+,122+,124+,125+,134+,138+,139+,148+ * 361 | P NZ_CP015228-GJ12_RS03690-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,12+,14+,15+,17+,19+,21+,23+,25+,26+,38+,39+,43+,50+,51+,52+,57+,58+,61+,62+,67+,68+,71+,72+,74+,79+,82+,88+,90+,94+,95+,97+,110+,112+,113+,114+,115+,118+,119+,121+,122+,124+,125+,134+,138+,139+,148+ * 362 | P NZ_CP015229-GJ11_RS03665-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,12+,14+,15+,17+,18+,21+,29+,30+,32+,38+,39+,43+,45+,46+,47+,51+,53+,57+,59+,61+,63+,67+,70+,71+,75+,77+,79+,80+,88+,89+,94+,95+,97+,100+,101+,102+,105+,114+,116+,118+,120+,121+,122+,124+,125+,134+,138+,139+,148+ * 363 | P NZ_CP019944-Eco28_RS20320-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,12+,14+,16+,17+,18+,21+,22+,25+,26+,38+,39+,43+,45+,46+,47+,51+,53+,57+,58+,61+,62+,67+,68+,71+,75+,76+,79+,87+,88+,92+,94+,95+,97+,99+,101+,102+,105+,114+,115+,118+,119+,121+,122+,124+,125+,134+,138+,139+,148+ * 364 | P NZ_CP020107-EC767_RS20440-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,12+,14+,15+,17+,18+,21+,23+,25+,26+,38+,39+,43+,50+,51+,52+,57+,58+,61+,62+,67+,68+,71+,72+,74+,79+,82+,88+,90+,94+,95+,97+,104+,105+,114+,115+,118+,119+,121+,122+,124+,125+,134+,135+,139+,148+ * 365 | P NZ_CP021683-AM408_RS17370-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,13+,14+,15+,17+,18+,21+,23+,25+,27+,38+,40+,43+,44+,46+,47+,51+,55+,57+,58+,61+,63+,67+,68+,71+,75+,76+,79+,81+,88+,89+,94+,95+,97+,98+,105+,114+,115+,118+,119+,121+,122+,124+,126+,134+,135+,139+,148+ * 366 | P NZ_CP023377-CNQ50_RS19335-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,12+,14+,15+,17+,18+,21+,23+,25+,27+,38+,40+,43+,44+,46+,47+,51+,53+,57+,58+,61+,63+,67+,68+,71+,73+,74+,79+,80+,88+,89+,94+,95+,97+,98+,105+,114+,115+,118+,120+,121+,122+,124+,129+,134+,136+,139+,148+ * 367 | P NZ_CP024127-CR534_RS20755-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,12+,14+,15+,17+,18+,21+,28+,30+,31+,38+,39+,43+,44+,46+,47+,51+,53+,57+,59+,61+,63+,67+,68+,71+,75+,76+,79+,81+,88+,89+,94+,95+,97+,98+,105+,114+,115+,118+,119+,121+,122+,124+,126+,134+,135+,139+,148+ * 368 | P NZ_CP024232-CNZ63_RS04595-1-phosphatidylglycerophosphatase_A 1+,3+,5+,6+,7+,11+,12+,14+,15+,17+,18+,21+,23+,25+,27+,38+,40+,43+,44+,46+,47+,51+,53+,57+,58+,61+,63+,67+,68+,71+,73+,74+,79+,80+,88+,89+,94+,95+,97+,98+,105+,114+,115+,118+,120+,121+,122+,124+,126+,134+,136+,139+,148+ * 369 | P NZ_CP024273-A5T63_RS02065-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,13+,14+,15+,17+,18+,21+,23+,25+,27+,38+,40+,43+,44+,46+,47+,51+,53+,57+,58+,61+,63+,67+,68+,71+,73+,74+,79+,80+,88+,93+,94+,95+,97+,98+,105+,114+,115+,118+,120+,121+,122+,124+,126+,134+,138+,139+,148+ * 370 | P NZ_CP027380-AYN27_RS21020-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,12+,14+,15+,17+,18+,21+,24+,25+,26+,38+,39+,43+,44+,46+,47+,51+,52+,57+,58+,61+,62+,67+,68+,71+,72+,74+,79+,80+,88+,89+,94+,95+,97+,98+,105+,114+,115+,118+,119+,121+,122+,124+,125+,134+,135+,139+,148+ * 371 | P NZ_CP027388-C6P70_RS13525-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,12+,14+,15+,17+,18+,21+,22+,25+,26+,38+,39+,43+,44+,46+,47+,51+,52+,57+,60+,61+,62+,67+,68+,71+,72+,74+,79+,80+,88+,89+,94+,95+,97+,98+,105+,114+,115+,118+,119+,121+,122+,124+,125+,134+,135+,139+,148+ * 372 | P NZ_CP027440-C6W69_RS21755-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,8+,11+,12+,14+,15+,17+,18+,21+,23+,25+,26+,38+,41+,43+,45+,46+,48+,51+,54+,57+,59+,61+,64+,67+,68+,71+,75+,77+,79+,85+,88+,91+,94+,95+,97+,106+,107+,109+,114+,115+,118+,119+,121+,122+,124+,132+,134+,137+,139+,148+ * 373 | P NZ_CP027449-C6W73_RS04390-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,12+,14+,15+,17+,18+,21+,22+,25+,26+,38+,39+,43+,50+,51+,53+,57+,58+,61+,62+,67+,68+,71+,72+,74+,79+,82+,88+,90+,94+,95+,97+,110+,111+,113+,114+,115+,118+,119+,121+,122+,124+,125+,134+,135+,139+,148+ * 374 | P NZ_CP027579-C6996_RS11655-1-phosphatidylglycerophosphatase_A 2+,3+,4+,6+,9+,11+,12+,14+,20+,21+,34+,35+,37+,38+,41+,43+,45+,46+,49+,51+,53+,57+,59+,61+,65+,67+,68+,71+,75+,77+,79+,80+,88+,89+,94+,95+,97+,99+,101+,102+,105+,114+,116+,118+,120+,121+,122+,124+,131+,134+,136+,139+,148+ * 375 | P NZ_CP028578-DBQ99_RS19495-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,13+,14+,15+,17+,18+,21+,23+,25+,27+,38+,40+,43+,44+,46+,47+,51+,53+,57+,58+,61+,66+,67+,68+,71+,75+,76+,79+,81+,88+,89+,94+,95+,97+,98+,105+,114+,115+,118+,119+,121+,122+,124+,126+,134+,135+,139+,148+ * 376 | P NZ_CP031215-DV870_RS11270-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,8+,11+,12+,14+,15+,17+,18+,21+,23+,25+,26+,38+,41+,43+,45+,46+,48+,51+,54+,57+,59+,61+,64+,67+,68+,71+,75+,77+,79+,83+,88+,89+,94+,95+,97+,99+,101+,103+,105+,114+,117+,118+,120+,121+,122+,124+,128+,134+,136+,139+,148+ * 377 | P NZ_CP033635-CQP61_RS20775-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,12+,14+,15+,17+,18+,21+,22+,25+,26+,38+,39+,43+,44+,46+,47+,51+,54+,57+,59+,61+,63+,67+,69+,71+,75+,76+,79+,84+,88+,89+,94+,95+,97+,98+,105+,114+,115+,118+,119+,121+,123+,124+,125+,134+,137+,139+,148+ * 378 | P NZ_LS483297-DQM98_RS18140-1-phosphatidylglycerophosphatase_A 1+,3+,4+,6+,7+,11+,13+,14+,15+,17+,18+,21+,23+,25+,27+,38+,40+,43+,44+,46+,47+,51+,53+,57+,58+,61+,63+,67+,68+,71+,73+,74+,79+,80+,88+,89+,94+,95+,97+,98+,105+,114+,115+,118+,120+,121+,122+,124+,130+,134+,136+,139+,148+ * 379 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use recgraph::args_parser; 2 | use recgraph::gap_global_abpoa; 3 | use recgraph::gap_local_poa; 4 | use recgraph::global_abpoa; 5 | use recgraph::graph; 6 | use recgraph::local_poa; 7 | use recgraph::pathwise_alignment; 8 | use recgraph::pathwise_alignment_gap; 9 | use recgraph::pathwise_alignment_gap_semi; 10 | use recgraph::pathwise_alignment_recombination; 11 | use recgraph::pathwise_alignment_semiglobal; 12 | use recgraph::pathwise_graph; 13 | use recgraph::pathwise_graph::nodes_displacement_matrix; 14 | use recgraph::score_matrix; 15 | use recgraph::sequences; 16 | use recgraph::utils; 17 | use std::collections::HashMap; 18 | 19 | use std::time::SystemTime; 20 | 21 | #[cfg(target_os = "linux")] 22 | #[global_allocator] 23 | static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; 24 | 25 | fn main() { 26 | let now = SystemTime::now(); 27 | 28 | // get sequence 29 | let (sequences, seq_names) = sequences::get_sequences(args_parser::get_sequence_path()); 30 | 31 | //get graph 32 | let graph_path = args_parser::get_graph_path(); 33 | let graph_struct = graph::read_graph(&graph_path, false); 34 | 35 | //get score matrix 36 | let score_matrix = score_matrix::create_score_matrix(); 37 | let scores_f32 = score_matrix::create_f32_scores_matrix(); 38 | 39 | //get alignment option 40 | let align_mode = args_parser::get_align_mode(); 41 | let amb_strand = args_parser::get_amb_strand_mode(); 42 | let (b, f) = args_parser::get_b_f(); 43 | 44 | //get handle position for output 45 | let hofp_forward = utils::create_handle_pos_in_lnz(&graph_struct.nwp, &graph_path, false); 46 | let mut hofp_reverse = HashMap::new(); 47 | 48 | match align_mode { 49 | //global alignment 50 | 0 => { 51 | let r_values = utils::set_r_values( 52 | &graph_struct.nwp, 53 | &graph_struct.pred_hash, 54 | graph_struct.lnz.len(), 55 | ); 56 | for (i, seq) in sequences.iter().enumerate() { 57 | let bases_to_add = (b + f * seq.len() as f32) as usize; 58 | let alignment = if is_x86_feature_detected!("avx2") { 59 | unsafe { 60 | global_abpoa::exec_simd( 61 | seq, 62 | (&seq_names[i], i + 1), 63 | &graph_struct, 64 | &scores_f32, 65 | bases_to_add, 66 | false, 67 | &hofp_forward, 68 | &r_values, 69 | ) 70 | } 71 | } else { 72 | global_abpoa::exec( 73 | seq, 74 | (&seq_names[i], i + 1), 75 | &graph_struct, 76 | &score_matrix, 77 | bases_to_add, 78 | false, 79 | &hofp_forward, 80 | ) 81 | }; 82 | if amb_strand && alignment.0 < 0 { 83 | if hofp_reverse.is_empty() { 84 | hofp_reverse = 85 | utils::create_handle_pos_in_lnz(&graph_struct.nwp, &graph_path, true); 86 | } 87 | let rev_seq = sequences::rev_and_compl(seq); 88 | let rev_alignment = global_abpoa::exec( 89 | &rev_seq, 90 | (&seq_names[i], i + 1), 91 | &graph_struct, 92 | &score_matrix, 93 | bases_to_add, 94 | true, 95 | &hofp_reverse, 96 | ); 97 | if rev_alignment.0 > alignment.0 { 98 | utils::write_gaf(&rev_alignment.1.unwrap().to_string(), i + 1); 99 | } else { 100 | utils::write_gaf(&alignment.1.unwrap().to_string(), i + 1); 101 | } 102 | } else { 103 | utils::write_gaf(&alignment.1.unwrap().to_string(), i + 1); 104 | } 105 | } 106 | } 107 | //local alignment 108 | 1 => { 109 | for (i, seq) in sequences.iter().enumerate() { 110 | let alignment = if is_x86_feature_detected!("avx2") { 111 | unsafe { 112 | let temp_score = local_poa::exec_simd( 113 | seq, 114 | (&seq_names[i], i + 1), 115 | &graph_struct, 116 | &scores_f32, 117 | false, 118 | &hofp_forward, 119 | ); 120 | (temp_score.0 as i32, temp_score.1) 121 | } 122 | } else { 123 | local_poa::exec( 124 | seq, 125 | (&seq_names[i], i + 1), 126 | &graph_struct, 127 | &score_matrix, 128 | false, 129 | &hofp_forward, 130 | ) 131 | }; 132 | if amb_strand { 133 | if hofp_reverse.is_empty() { 134 | hofp_reverse = 135 | utils::create_handle_pos_in_lnz(&graph_struct.nwp, &graph_path, true); 136 | } 137 | let rev_seq = sequences::rev_and_compl(seq); 138 | let alignment_rev = if is_x86_feature_detected!("avx2") { 139 | unsafe { 140 | let temp_alignment = local_poa::exec_simd( 141 | &rev_seq, 142 | (&seq_names[i], i + 1), 143 | &graph_struct, 144 | &scores_f32, 145 | true, 146 | &hofp_reverse, 147 | ); 148 | (temp_alignment.0 as i32, temp_alignment.1) 149 | } 150 | } else { 151 | local_poa::exec( 152 | &rev_seq, 153 | (&seq_names[i], i + 1), 154 | &graph_struct, 155 | &score_matrix, 156 | true, 157 | &hofp_reverse, 158 | ) 159 | }; 160 | if alignment.0 < alignment_rev.0 { 161 | utils::write_gaf(&alignment.1.unwrap().to_string(), i + 1); 162 | } else { 163 | utils::write_gaf(&alignment_rev.1.unwrap().to_string(), i + 1); 164 | } 165 | } else { 166 | utils::write_gaf(&alignment.1.unwrap().to_string(), i + 1) 167 | } 168 | } 169 | } 170 | //affine gap global alignment 171 | 2 => { 172 | let (g_open, g_ext) = args_parser::get_gap_open_gap_ext(); 173 | 174 | for (i, seq) in sequences.iter().enumerate() { 175 | let bases_to_add = (b + f * seq.len() as f32) as usize; 176 | let alignment = gap_global_abpoa::exec( 177 | seq, 178 | (&seq_names[i], i + 1), 179 | &graph_struct, 180 | &score_matrix, 181 | g_open, 182 | g_ext, 183 | bases_to_add, 184 | false, 185 | &hofp_forward, 186 | ); 187 | 188 | if amb_strand && alignment.0 < 0 { 189 | if hofp_reverse.is_empty() { 190 | hofp_reverse = 191 | utils::create_handle_pos_in_lnz(&graph_struct.nwp, &graph_path, true); 192 | } 193 | let rev_seq = sequences::rev_and_compl(seq); 194 | let rev_alignment = gap_global_abpoa::exec( 195 | &rev_seq, 196 | (&seq_names[i], i + 1), 197 | &graph_struct, 198 | &score_matrix, 199 | g_open, 200 | g_ext, 201 | bases_to_add, 202 | true, 203 | &hofp_reverse, 204 | ); 205 | if rev_alignment.0 > alignment.0 { 206 | utils::write_gaf(&rev_alignment.1.unwrap().to_string(), i + 1); 207 | } else { 208 | utils::write_gaf(&alignment.1.unwrap().to_string(), i + 1); 209 | } 210 | } else { 211 | utils::write_gaf(&alignment.1.unwrap().to_string(), i + 1); 212 | } 213 | } 214 | } 215 | //affine gap local alignment 216 | 3 => { 217 | let (g_open, g_ext) = args_parser::get_gap_open_gap_ext(); 218 | for (i, seq) in sequences.iter().enumerate() { 219 | let alignment = gap_local_poa::exec( 220 | seq, 221 | (&seq_names[i], i + 1), 222 | &graph_struct, 223 | &score_matrix, 224 | g_open, 225 | g_ext, 226 | false, 227 | &hofp_forward, 228 | ); 229 | if amb_strand { 230 | if hofp_reverse.is_empty() { 231 | hofp_reverse = 232 | utils::create_handle_pos_in_lnz(&graph_struct.nwp, &graph_path, true); 233 | } 234 | let rev_seq = sequences::rev_and_compl(seq); 235 | let rev_alignment = gap_local_poa::exec( 236 | &rev_seq, 237 | (&seq_names[i], i + 1), 238 | &graph_struct, 239 | &score_matrix, 240 | g_open, 241 | g_ext, 242 | false, 243 | &hofp_reverse, 244 | ); 245 | if rev_alignment.0 > alignment.0 { 246 | utils::write_gaf(&rev_alignment.1.unwrap().to_string(), i + 1); 247 | } else { 248 | utils::write_gaf(&alignment.1.unwrap().to_string(), i + 1); 249 | } 250 | } else { 251 | utils::write_gaf(&alignment.1.unwrap().to_string(), i + 1); 252 | } 253 | } 254 | } 255 | 4 => { 256 | let graph = pathwise_graph::read_graph_w_path(&graph_path, false); 257 | for (i, seq) in sequences.iter().enumerate() { 258 | let mut gaf = pathwise_alignment::exec(seq, &graph, &score_matrix); 259 | gaf.query_name = seq_names[i].clone(); 260 | utils::write_gaf(&gaf.to_string(), i); 261 | } 262 | } 263 | 5 => { 264 | let graph = pathwise_graph::read_graph_w_path(&graph_path, false); 265 | for (i, seq) in sequences.iter().enumerate() { 266 | let mut gaf = pathwise_alignment_semiglobal::exec(seq, &graph, &score_matrix); 267 | gaf.query_name = seq_names[i].clone(); 268 | utils::write_gaf(&gaf.to_string(), i); 269 | } 270 | } 271 | 6 => { 272 | let graph = pathwise_graph::read_graph_w_path(&graph_path, false); 273 | let (g_open, g_ext) = args_parser::get_gap_open_gap_ext(); 274 | for (i, seq) in sequences.iter().enumerate() { 275 | let best_path = 276 | pathwise_alignment_gap::exec(seq, &graph, &score_matrix, g_open, g_ext); 277 | println!("Best path sequence {i}: {best_path}"); 278 | } 279 | } 280 | 7 => { 281 | let graph = pathwise_graph::read_graph_w_path(&graph_path, false); 282 | let (g_open, g_ext) = args_parser::get_gap_open_gap_ext(); 283 | for (i, seq) in sequences.iter().enumerate() { 284 | let best_path = 285 | pathwise_alignment_gap_semi::exec(seq, &graph, &score_matrix, g_open, g_ext); 286 | println!("Best path sequence {i}: {best_path}"); 287 | } 288 | } 289 | 8 | 9 => { 290 | let graph = pathwise_graph::read_graph_w_path(&graph_path, false); 291 | let rev_graph = pathwise_graph::create_reverse_path_graph(&graph); 292 | let displ_matrix = nodes_displacement_matrix(&graph, &rev_graph); 293 | 294 | let (base_rec_cost, multi_rec_cost) = args_parser::get_base_multi_recombination_cost(); 295 | let rbw = args_parser::get_recombination_band_width(); 296 | 297 | for (i, seq) in sequences.iter().enumerate() { 298 | let mut gaf = pathwise_alignment_recombination::exec( 299 | align_mode, 300 | seq, 301 | &graph, 302 | &rev_graph, 303 | &score_matrix, 304 | base_rec_cost, 305 | multi_rec_cost, 306 | &displ_matrix, 307 | rbw, 308 | ); 309 | gaf.query_name = seq_names[i].clone(); 310 | 311 | utils::write_gaf(&gaf.to_string(), i); 312 | } 313 | } 314 | 315 | _ => { 316 | panic!("Alignment mode must be in [0..9]"); 317 | } 318 | } 319 | match now.elapsed() { 320 | Ok(elapsed) => { 321 | // it prints '2' 322 | eprintln!("Done in {}.", elapsed.as_secs()); 323 | } 324 | Err(e) => { 325 | // an error occurred! 326 | eprintln!("Error: {e:?}"); 327 | } 328 | } 329 | } 330 | -------------------------------------------------------------------------------- /src/pathwise_alignment_semiglobal.rs: -------------------------------------------------------------------------------- 1 | use crate::gaf_output::GAFStruct; 2 | use crate::pathwise_alignment_output::build_alignment; 3 | use crate::pathwise_graph::PathGraph; 4 | use std::collections::HashMap; 5 | 6 | pub fn exec( 7 | sequence: &[char], 8 | graph: &PathGraph, 9 | score_matrix: &HashMap<(char, char), i32>, 10 | ) -> GAFStruct { 11 | let lnz = &graph.lnz; 12 | let nodes_with_pred = &graph.nwp; 13 | let pred_hash = &graph.pred_hash; 14 | let path_number = graph.paths_number; 15 | let path_node = &graph.paths_nodes; 16 | 17 | let mut dpm = vec![vec![vec![0; path_number]; sequence.len()]; lnz.len()]; 18 | let alphas = &graph.alphas; 19 | for i in 0..lnz.len() - 1 { 20 | for j in 0..sequence.len() { 21 | match (i, j) { 22 | (0, 0) => { 23 | dpm[i][j] = vec![0; path_number]; 24 | } 25 | (_, 0) => dpm[i][j] = vec![0; path_number], 26 | (0, _) => { 27 | dpm[i][j][alphas[0]] = 28 | dpm[i][j - 1][alphas[0]] + score_matrix.get(&(sequence[j], '-')).unwrap(); 29 | for k in alphas[0] + 1..path_number { 30 | dpm[i][j][k] = dpm[i][j - 1][k]; 31 | } 32 | } 33 | _ => { 34 | if !nodes_with_pred[i] { 35 | let mut common_paths = path_node[i].clone(); 36 | common_paths.and(&path_node[i - 1]); 37 | 38 | if common_paths[alphas[i - 1]] { 39 | let u = dpm[i - 1][j][alphas[i - 1]] 40 | + score_matrix.get(&(lnz[i], '-')).unwrap(); 41 | let d = dpm[i - 1][j - 1][alphas[i - 1]] 42 | + score_matrix.get(&(lnz[i], sequence[j])).unwrap(); 43 | let l = dpm[i][j - 1][alphas[i]] 44 | + score_matrix.get(&(sequence[j], '-')).unwrap(); 45 | 46 | dpm[i][j][alphas[i]] = *[d, u, l].iter().max().unwrap(); 47 | 48 | for (path, is_in) in common_paths.iter().enumerate() { 49 | if is_in { 50 | if path != alphas[i] { 51 | if dpm[i][j][alphas[i]] == d { 52 | dpm[i][j][path] = dpm[i - 1][j - 1][path]; 53 | } else if dpm[i][j][alphas[i]] == u { 54 | dpm[i][j][path] = dpm[i - 1][j][path]; 55 | } else { 56 | dpm[i][j][path] = dpm[i][j - 1][path]; 57 | } 58 | } 59 | } 60 | } 61 | } else { 62 | let u = dpm[i - 1][j][alphas[i - 1]] 63 | + dpm[i - 1][j][alphas[i]] 64 | + score_matrix.get(&(lnz[i], '-')).unwrap(); 65 | let d = dpm[i - 1][j - 1][alphas[i - 1]] 66 | + dpm[i - 1][j - 1][alphas[i]] 67 | + score_matrix.get(&(lnz[i], sequence[j])).unwrap(); 68 | let l = dpm[i][j - 1][alphas[i]] 69 | + score_matrix.get(&(sequence[j], '-')).unwrap(); 70 | dpm[i][j][alphas[i]] = *[d, u, l].iter().max().unwrap(); 71 | 72 | for (path, is_in) in common_paths.iter().enumerate() { 73 | if is_in { 74 | if path != alphas[i] { 75 | if dpm[i][j][alphas[i]] == d { 76 | dpm[i][j][path] = dpm[i - 1][j - 1][path] 77 | - dpm[i - 1][j - 1][alphas[i]]; 78 | } else if dpm[i][j][alphas[i]] == u { 79 | dpm[i][j][path] = 80 | dpm[i - 1][j][path] - dpm[i - 1][j][alphas[i]]; 81 | } else { 82 | dpm[i][j][path] = dpm[i][j - 1][path]; 83 | } 84 | } 85 | } 86 | } 87 | } 88 | } else { 89 | // multiple alphas possible 90 | let mut alphas_deltas = HashMap::new(); 91 | for (p, p_paths) in pred_hash.get_preds_and_paths(i) { 92 | let mut common_paths = path_node[i].clone(); 93 | common_paths.and(&p_paths); 94 | 95 | if common_paths[alphas[p]] { 96 | let paths = common_paths 97 | .iter() 98 | .enumerate() 99 | .filter_map(|(path_id, is_in)| match is_in { 100 | true => Some(path_id), 101 | false => None, 102 | }) 103 | .collect::>(); 104 | alphas_deltas.insert(alphas[p], paths); 105 | 106 | let u = dpm[p][j][alphas[p]] 107 | + score_matrix.get(&(lnz[i], '-')).unwrap(); 108 | let d = dpm[p][j - 1][alphas[p]] 109 | + score_matrix.get(&(lnz[i], sequence[j])).unwrap(); 110 | let l = if alphas[i] == alphas[p] { 111 | dpm[i][j - 1][alphas[p]] 112 | + score_matrix.get(&(sequence[j], '-')).unwrap() 113 | } else { 114 | dpm[i][j - 1][alphas[p]] 115 | + dpm[i][j - 1][alphas[i]] 116 | + score_matrix.get(&(sequence[j], '-')).unwrap() 117 | }; 118 | dpm[i][j][alphas[p]] = *[d, u, l].iter().max().unwrap(); 119 | 120 | for (path, is_in) in common_paths.iter().enumerate() { 121 | if is_in { 122 | if path != alphas[p] { 123 | if dpm[i][j][alphas[p]] == d { 124 | dpm[i][j][path] = dpm[p][j - 1][path]; 125 | } else if dpm[i][j][alphas[p]] == u { 126 | dpm[i][j][path] = dpm[p][j][path]; 127 | } else { 128 | /* 129 | if alphas[p] == alphas[i] { 130 | dpm[i][j][path] = dpm[i][j - 1][path]; 131 | } else { 132 | dpm[i][j][path] = dpm[i][j - 1][alphas[p]] 133 | - dpm[i][j - 1][path]; 134 | } 135 | */ 136 | if alphas[p] == alphas[i] { 137 | dpm[i][j][path] = dpm[i][j - 1][path]; 138 | } else { 139 | dpm[i][j][path] = dpm[i][j - 1][path] 140 | - dpm[i][j - 1][alphas[p]]; 141 | } 142 | } 143 | } 144 | } 145 | } 146 | } else { 147 | //set new alpha 148 | let temp_alpha = if common_paths[alphas[i]] { 149 | alphas[i] 150 | } else { 151 | common_paths.iter().position(|is_in| is_in).unwrap() 152 | }; 153 | let paths = common_paths 154 | .iter() 155 | .enumerate() 156 | .filter_map(|(path_id, is_in)| match is_in { 157 | true => Some(path_id), 158 | false => None, 159 | }) 160 | .collect::>(); 161 | alphas_deltas.insert(temp_alpha, paths); 162 | 163 | let u = dpm[p][j][alphas[p]] 164 | + dpm[p][j][temp_alpha] 165 | + score_matrix.get(&(lnz[i], '-')).unwrap(); 166 | let d = dpm[p][j - 1][alphas[p]] 167 | + dpm[p][j - 1][temp_alpha] 168 | + score_matrix.get(&(lnz[i], sequence[j])).unwrap(); 169 | let l = if alphas[i] == temp_alpha { 170 | dpm[i][j - 1][temp_alpha] 171 | + score_matrix.get(&(sequence[j], '-')).unwrap() 172 | } else { 173 | dpm[i][j - 1][temp_alpha] 174 | + dpm[i][j - 1][alphas[i]] 175 | + score_matrix.get(&(sequence[j], '-')).unwrap() 176 | }; 177 | dpm[i][j][temp_alpha] = *[d, u, l].iter().max().unwrap(); 178 | 179 | for (path, is_in) in common_paths.iter().enumerate() { 180 | if path != temp_alpha { 181 | if is_in { 182 | if dpm[i][j][temp_alpha] == d { 183 | dpm[i][j][path] = 184 | dpm[p][j - 1][path] - dpm[p][j - 1][temp_alpha]; 185 | } else if dpm[i][j][temp_alpha] == u { 186 | dpm[i][j][path] = 187 | dpm[p][j][path] - dpm[p][j][temp_alpha]; 188 | } else { 189 | /* 190 | if temp_alpha == alphas[i] { 191 | dpm[i][j][path] = dpm[i][j - 1][path]; 192 | } else { 193 | dpm[i][j][path] = dpm[i][j - 1][temp_alpha] 194 | - dpm[i][j - 1][path]; 195 | } 196 | */ 197 | if temp_alpha == alphas[i] { 198 | dpm[i][j][path] = dpm[i][j - 1][path]; 199 | } else { 200 | dpm[i][j][path] = dpm[i][j - 1][path] 201 | - dpm[i][j - 1][temp_alpha]; 202 | } 203 | } 204 | } 205 | } 206 | } 207 | } 208 | } 209 | if alphas_deltas.keys().len() > 0 { 210 | for (a, delta) in alphas_deltas.iter() { 211 | if *a != alphas[i] { 212 | dpm[i][j][*a] -= dpm[i][j][alphas[i]]; 213 | for path in delta.iter() { 214 | if path != a { 215 | dpm[i][j][*path] += dpm[i][j][*a]; 216 | } 217 | } 218 | } 219 | } 220 | } 221 | } 222 | } 223 | } 224 | } 225 | } 226 | let (final_node, best_path) = best_ending_node(&dpm, graph); 227 | 228 | let gaf = build_alignment( 229 | &dpm, 230 | lnz, 231 | sequence, 232 | score_matrix, 233 | &alphas, 234 | best_path, 235 | &pred_hash, 236 | &nodes_with_pred, 237 | &graph.nodes_id_pos, 238 | final_node, 239 | false, 240 | ); 241 | gaf 242 | } 243 | 244 | fn best_ending_node(dpm: &Vec>>, graph: &PathGraph) -> (usize, usize) { 245 | let mut max: Option = None; 246 | let mut ending_node: usize = 0; 247 | let mut chosen_path: usize = 0; 248 | for i in 1..dpm.len() - 1 { 249 | let paths = graph.paths_nodes[i].clone(); 250 | let mut absolute_scores = dpm[i][dpm[i].len() - 1].clone(); 251 | 252 | for (path, is_in) in paths.iter().enumerate() { 253 | if is_in { 254 | if path != graph.alphas[i] { 255 | absolute_scores[path] = 256 | absolute_scores[path] + absolute_scores[graph.alphas[i]]; 257 | } 258 | } 259 | } 260 | 261 | let mut best_path: Option<(&i32, usize)> = None; 262 | for (path, score) in absolute_scores.iter().enumerate() { 263 | if paths[path] && (best_path.is_none() || best_path.unwrap().0 < score) { 264 | //*score <= (dpm[0].len()*2) as i32 && 265 | best_path = Some((&score, path)); 266 | } 267 | } 268 | 269 | if max.is_none() || best_path.unwrap().0 > &max.unwrap() { 270 | max = Some(*best_path.unwrap().0); 271 | ending_node = i; 272 | chosen_path = best_path.unwrap().1; 273 | } 274 | } 275 | 276 | (ending_node, chosen_path) 277 | } 278 | -------------------------------------------------------------------------------- /src/local_poa.rs: -------------------------------------------------------------------------------- 1 | use crate::gaf_output::GAFStruct; 2 | use crate::{bitfield_path as bf, utils}; 3 | use crate::{gaf_output, graph::LnzGraph}; 4 | use bitvec::prelude::*; 5 | use std::arch::x86_64::*; 6 | use std::cmp; 7 | use std::collections::HashMap; 8 | 9 | #[target_feature(enable = "avx2")] 10 | pub unsafe fn exec_simd( 11 | // comments on simd instructions are in global_abpoa::exec_simd() 12 | read: &[char], 13 | seq_name: (&str, usize), 14 | graph: &LnzGraph, 15 | scores_matrix: &HashMap<(char, char), f32>, 16 | amb_mode: bool, 17 | hofp: &HashMap, 18 | ) -> (f32, Option) { 19 | let mut m: Vec> = vec![vec![0f32; read.len()]; graph.lnz.len()]; 20 | let mut path: Vec> = vec![vec![0f32; read.len()]; graph.lnz.len()]; 21 | 22 | let max_multiple = if read.len() % 8 != 0 { 23 | (read.len() / 8) * 8 24 | } else { 25 | read.len() - 8 26 | }; 27 | 28 | let d_move_simd = _mm256_set1_ps(0.1); 29 | let u_move_simd = _mm256_set1_ps(0.2); 30 | let mut best_row = 0; 31 | let mut best_col = 0; 32 | for i in 1..graph.lnz.len() - 1 { 33 | let us_update = _mm256_set1_ps(*scores_matrix.get(&(graph.lnz[i], '-')).unwrap()); 34 | for j in (1..max_multiple + 1).step_by(8) { 35 | let ds_update = _mm256_set_ps( 36 | *scores_matrix.get(&(graph.lnz[i], read[j + 7])).unwrap(), 37 | *scores_matrix.get(&(graph.lnz[i], read[j + 6])).unwrap(), 38 | *scores_matrix.get(&(graph.lnz[i], read[j + 5])).unwrap(), 39 | *scores_matrix.get(&(graph.lnz[i], read[j + 4])).unwrap(), 40 | *scores_matrix.get(&(graph.lnz[i], read[j + 3])).unwrap(), 41 | *scores_matrix.get(&(graph.lnz[i], read[j + 2])).unwrap(), 42 | *scores_matrix.get(&(graph.lnz[i], read[j + 1])).unwrap(), 43 | *scores_matrix.get(&(graph.lnz[i], read[j])).unwrap(), 44 | ); 45 | if !graph.nwp[i] { 46 | let us = _mm256_add_ps(_mm256_loadu_ps(m[i - 1].get_unchecked(j)), us_update); 47 | 48 | let ds = _mm256_add_ps(_mm256_loadu_ps(m[i - 1].get_unchecked(j - 1)), ds_update); 49 | 50 | let best_choice = _mm256_cmp_ps(ds, us, _CMP_GT_OS); 51 | let result = _mm256_blendv_ps(us, ds, best_choice); 52 | 53 | _mm256_storeu_ps(m[i].get_unchecked_mut(j), result); 54 | 55 | let dir_result = _mm256_blendv_ps(u_move_simd, d_move_simd, best_choice); 56 | let path_update = _mm256_add_ps(_mm256_set1_ps((i - 1) as f32), dir_result); 57 | _mm256_storeu_ps(path[i].get_unchecked_mut(j), path_update); 58 | } else { 59 | let preds = graph.pred_hash.get(&i).unwrap(); 60 | let mut best_us = _mm256_loadu_ps(m[preds[0]].get_unchecked(j)); 61 | let mut best_ds = _mm256_loadu_ps(m[preds[0]].get_unchecked(j - 1)); 62 | let mut pred_best_us = _mm256_set1_ps(preds[0] as f32); 63 | let mut pred_best_ds = _mm256_set1_ps(preds[0] as f32); 64 | for p in preds[1..].iter() { 65 | let us = _mm256_loadu_ps(m[*p].get_unchecked(j)); 66 | let ds = _mm256_loadu_ps(m[*p].get_unchecked(j - 1)); 67 | let pred_simd = _mm256_set1_ps(*p as f32); 68 | 69 | let best_us_choices = _mm256_cmp_ps(us, best_us, _CMP_GT_OS); 70 | best_us = _mm256_blendv_ps(best_us, us, best_us_choices); 71 | pred_best_us = _mm256_blendv_ps(pred_best_us, pred_simd, best_us_choices); 72 | 73 | let best_ds_choices = _mm256_cmp_ps(ds, best_ds, _CMP_GT_OS); 74 | best_ds = _mm256_blendv_ps(best_ds, ds, best_ds_choices); 75 | pred_best_ds = _mm256_blendv_ps(pred_best_ds, pred_simd, best_ds_choices); 76 | } 77 | best_us = _mm256_add_ps(best_us, us_update); 78 | 79 | best_ds = _mm256_add_ps(best_ds, ds_update); 80 | 81 | let best_choice = _mm256_cmp_ps(best_ds, best_us, _CMP_GT_OS); 82 | let result = _mm256_blendv_ps(best_us, best_ds, best_choice); 83 | 84 | _mm256_storeu_ps(m[i].get_unchecked_mut(j), result); 85 | 86 | pred_best_ds = _mm256_add_ps(pred_best_ds, d_move_simd); 87 | pred_best_us = _mm256_add_ps(pred_best_us, u_move_simd); 88 | 89 | let dir_result = _mm256_blendv_ps(pred_best_us, pred_best_ds, best_choice); 90 | _mm256_storeu_ps(path[i].get_unchecked_mut(j), dir_result); 91 | } 92 | 93 | // update with l for each one 94 | for idx in j..cmp::min(j + 8, read.len()) { 95 | let l = m[i][idx - 1] + scores_matrix.get(&(read[j], '-')).unwrap(); 96 | if l > m[i][idx] { 97 | m[i][idx] = l; 98 | path[i][idx] = i as f32 + 0.3; 99 | } 100 | if m[i][idx] <= 0.0 { 101 | m[i][idx] = 0.0; 102 | path[i][idx] = 0.0; 103 | } 104 | if m[i][idx] >= m[best_row][best_col] { 105 | best_row = i; 106 | best_col = idx; 107 | } 108 | } 109 | } 110 | for j in max_multiple + 1..read.len() { 111 | if !graph.nwp[i] { 112 | let l = m[i][j - 1] + scores_matrix.get(&(read[j], '-')).unwrap(); 113 | let u = m[i - 1][j] + scores_matrix.get(&(graph.lnz[i], '-')).unwrap(); 114 | let d = m[i - 1][j - 1] + scores_matrix.get(&(graph.lnz[i], read[j])).unwrap(); 115 | m[i][j] = [l, u, d].into_iter().reduce(f32::max).unwrap(); 116 | if m[i][j] < 0.0 { 117 | m[i][j] = 0.0; 118 | path[i][j] = 0.0; 119 | } else if m[i][j] == d { 120 | path[i][j] = (i - 1) as f32 + 0.1; 121 | } else if m[i][j] == u { 122 | path[i][j] = (i - 1) as f32 + 0.2; 123 | } else { 124 | path[i][j] = i as f32 + 0.3; 125 | } 126 | } else { 127 | let mut u = 0f32; 128 | let mut u_pred = 0; 129 | let mut d = 0f32; 130 | let mut d_pred = 0; 131 | let mut first = true; 132 | for p in graph.pred_hash.get(&i).unwrap() { 133 | if first { 134 | u = m[*p][j]; 135 | d = m[*p][j - 1]; 136 | u_pred = *p; 137 | d_pred = *p; 138 | first = false 139 | } 140 | if m[*p][j] > u { 141 | u = m[*p][j]; 142 | u_pred = *p; 143 | } 144 | if m[*p][j - 1] > d { 145 | d = m[*p][j - 1]; 146 | d_pred = *p; 147 | } 148 | } 149 | u += scores_matrix.get(&(graph.lnz[i], '-')).unwrap(); 150 | d += scores_matrix.get(&(read[j], graph.lnz[i])).unwrap(); 151 | 152 | let l = m[i][j - 1] + scores_matrix.get(&(read[j], '-')).unwrap(); 153 | 154 | m[i][j] = [l, u, d].into_iter().reduce(f32::max).unwrap(); 155 | 156 | if m[i][j] == d { 157 | path[i][j] = d_pred as f32 + 0.1; 158 | } else if m[i][j] == u { 159 | path[i][j] = u_pred as f32 + 0.2; 160 | } else { 161 | path[i][j] = i as f32 + 0.3; 162 | } 163 | } 164 | if m[i][j] >= m[best_row][best_col] { 165 | best_row = i; 166 | best_col = j; 167 | } 168 | } 169 | } 170 | 171 | if seq_name.1 != 0 { 172 | let gaf_struct = gaf_output::gaf_of_local_poa_simd( 173 | &path, read, seq_name, best_row, best_col, amb_mode, hofp, 174 | ); 175 | (m[best_row][best_col], Some(gaf_struct)) 176 | } else { 177 | (m[best_row][best_col], None) 178 | } 179 | } 180 | 181 | pub fn exec( 182 | sequence: &[char], 183 | seq_name: (&str, usize), 184 | graph: &LnzGraph, 185 | scores_matrix: &HashMap<(char, char), i32>, 186 | amb_mode: bool, 187 | hofp: &HashMap, 188 | ) -> (i32, Option) { 189 | let lnz = &graph.lnz; 190 | let nodes_with_pred = &graph.nwp; 191 | let pred_hash = &graph.pred_hash; 192 | 193 | let mut m = vec![vec![0; sequence.len()]; lnz.len()]; 194 | let mut path = vec![vec![bitvec![u16, Msb0; 0; 32]; sequence.len()]; lnz.len()]; 195 | let (mut best_row, mut best_col) = (0, 0); 196 | 197 | for i in 0..lnz.len() - 1 { 198 | for j in 0..sequence.len() { 199 | match (i, j) { 200 | (0, _) | (_, 0) => path[i][j] = bf::set_path_cell(0, 'O'), 201 | _ => { 202 | let l = m[i][j - 1] + scores_matrix.get(&(sequence[j], '-')).unwrap(); 203 | let l_idx = i; 204 | 205 | let mut d; 206 | let d_idx; 207 | 208 | let mut u; 209 | let u_idx; 210 | if !nodes_with_pred[i] { 211 | d = m[i - 1][j - 1] + scores_matrix.get(&(sequence[j], lnz[i])).unwrap(); 212 | d_idx = i - 1; 213 | 214 | u = m[i - 1][j] + scores_matrix.get(&('-', lnz[i])).unwrap(); 215 | u_idx = i - 1; 216 | } else { 217 | (d, d_idx) = get_best_d(&m, pred_hash.get(&i).unwrap(), j); 218 | (u, u_idx) = get_best_u(&m, pred_hash.get(&i).unwrap(), j); 219 | d += scores_matrix.get(&(sequence[j], lnz[i])).unwrap(); 220 | u += scores_matrix.get(&('-', lnz[i])).unwrap(); 221 | } 222 | if d < 0 && l < 0 && u < 0 { 223 | m[i][j] = 0; 224 | path[i][j] = bf::set_path_cell(0, 'O'); 225 | } else { 226 | let (best_val, mut dir) = utils::get_max_d_u_l(d, u, l); 227 | if dir == 'D' && lnz[i] != sequence[j] { 228 | dir = 'd' 229 | } 230 | m[i][j] = best_val; 231 | path[i][j] = match dir { 232 | 'D' | 'd' => bf::set_path_cell(d_idx, dir), 233 | 'U' => bf::set_path_cell(u_idx, dir), 234 | _ => bf::set_path_cell(l_idx, dir), 235 | } 236 | } 237 | } 238 | } 239 | 240 | if m[i][j] > m[best_row][best_col] { 241 | best_row = i; 242 | best_col = j; 243 | } 244 | } 245 | } 246 | 247 | if seq_name.1 != 0 { 248 | let gaf_output = gaf_output::gaf_of_local_poa( 249 | &path, sequence, seq_name, best_row, best_col, amb_mode, hofp, 250 | ); 251 | (m[best_row][best_col], Some(gaf_output)) 252 | } else { 253 | (m[best_row][best_col], None) 254 | } 255 | } 256 | 257 | fn get_best_d(m: &[Vec], p_arr: &[usize], j: usize) -> (i32, usize) { 258 | let mut d = 0; 259 | let mut d_idx = 0; 260 | let mut first = false; 261 | for p in p_arr { 262 | let current_d = m[*p][j - 1]; 263 | if first { 264 | first = false; 265 | d = current_d; 266 | d_idx = *p; 267 | } 268 | if current_d > d { 269 | d = current_d; 270 | d_idx = *p; 271 | } 272 | } 273 | (d, d_idx) 274 | } 275 | 276 | fn get_best_u(m: &[Vec], p_arr: &[usize], j: usize) -> (i32, usize) { 277 | let mut u = 0; 278 | let mut u_idx = 0; 279 | let mut first = false; 280 | for p in p_arr { 281 | let current_u = m[*p][j]; 282 | if first { 283 | first = false; 284 | u = current_u; 285 | u_idx = *p; 286 | } 287 | if current_u > u { 288 | u = current_u; 289 | u_idx = *p; 290 | } 291 | } 292 | (u, u_idx) 293 | } 294 | 295 | #[cfg(test)] 296 | mod tests { 297 | use std::collections::HashMap; 298 | 299 | use bit_vec::BitVec; 300 | 301 | use crate::graph::LnzGraph; 302 | 303 | #[test] 304 | fn test_local_poa_consider_substrings() { 305 | let s = vec!['$', 'A', 'A', 'C', 'C', 'C', 'A', 'A']; 306 | 307 | let lnz = vec!['$', 'G', 'G', 'C', 'C', 'C', 'G', 'G', 'F']; 308 | let mut nwp = BitVec::from_elem(lnz.len(), false); 309 | nwp.set(1, true); 310 | nwp.set(8, true); 311 | let mut pred_hash = HashMap::new(); 312 | pred_hash.insert(1, vec![0]); 313 | pred_hash.insert(8, vec![7]); 314 | let graph_struct = LnzGraph { 315 | lnz, 316 | nwp, 317 | pred_hash, 318 | }; 319 | let mut score_matrix = HashMap::new(); 320 | for c1 in ['A', 'C', 'G', '-'] { 321 | for c2 in ['A', 'C', 'G', '-'] { 322 | if c1 == c2 { 323 | score_matrix.insert((c1, c2), 1); 324 | } else { 325 | score_matrix.insert((c1, c2), -1); 326 | } 327 | } 328 | } 329 | let align_score = super::exec( 330 | &s, 331 | ("seq", 0), 332 | &graph_struct, 333 | &score_matrix, 334 | false, 335 | &HashMap::new(), 336 | ); 337 | assert_eq!(align_score.0, 3); 338 | } 339 | 340 | #[test] 341 | fn local_poa_consider_best_predecessor() { 342 | let s = vec!['$', 'A', 'A', 'C', 'C', 'C', 'A', 'A']; 343 | 344 | let lnz = vec!['$', 'G', 'G', 'G', 'C', 'C', 'C', 'G', 'G', 'F']; 345 | let mut nwp = BitVec::from_elem(lnz.len(), false); 346 | nwp.set(1, true); 347 | nwp.set(6, true); 348 | nwp.set(9, true); 349 | let mut pred_hash = HashMap::new(); 350 | pred_hash.insert(1, vec![0]); 351 | pred_hash.insert(6, vec![3]); 352 | pred_hash.insert(9, vec![8, 5]); 353 | let graph_struct = LnzGraph { 354 | lnz, 355 | nwp, 356 | pred_hash, 357 | }; 358 | let mut score_matrix = HashMap::new(); 359 | for c1 in ['A', 'C', 'G', '-'] { 360 | for c2 in ['A', 'C', 'G', '-'] { 361 | if c1 == c2 { 362 | score_matrix.insert((c1, c2), 1); 363 | } else { 364 | score_matrix.insert((c1, c2), -1); 365 | } 366 | } 367 | } 368 | let align_score = super::exec( 369 | &s, 370 | ("seq", 0), 371 | &graph_struct, 372 | &score_matrix, 373 | false, 374 | &HashMap::new(), 375 | ); 376 | assert_eq!(align_score.0, 2); 377 | } 378 | } 379 | -------------------------------------------------------------------------------- /src/pathwise_graph.rs: -------------------------------------------------------------------------------- 1 | use bit_vec::BitVec; 2 | use gfa::{gfa::*, parser::GFAParser}; 3 | use handlegraph::{ 4 | handle::{Handle, NodeId}, 5 | handlegraph::HandleGraph, 6 | hashgraph::HashGraph, 7 | }; 8 | use std::collections::HashMap; 9 | //TODO: check path versus, only working with every path on + or - 10 | pub struct PathGraph { 11 | pub lnz: Vec, 12 | pub nwp: BitVec, 13 | pub pred_hash: PredHash, 14 | pub paths_nodes: Vec, 15 | pub alphas: Vec, 16 | pub paths_number: usize, 17 | pub nodes_id_pos: Vec, 18 | } 19 | 20 | impl PathGraph { 21 | pub fn new() -> PathGraph { 22 | PathGraph { 23 | lnz: vec![], 24 | nwp: BitVec::new(), 25 | pred_hash: PredHash::new(), 26 | paths_nodes: vec![], 27 | alphas: vec![], 28 | paths_number: 0, 29 | nodes_id_pos: vec![], 30 | } 31 | } 32 | 33 | pub fn build( 34 | lnz: Vec, 35 | nwp: BitVec, 36 | pred_hash: PredHash, 37 | paths_nodes: Vec, 38 | alphas: Vec, 39 | paths_number: usize, 40 | nodes_id_pos: Vec, 41 | ) -> PathGraph { 42 | PathGraph { 43 | lnz, 44 | nwp, 45 | pred_hash, 46 | paths_nodes, 47 | alphas, 48 | paths_number, 49 | nodes_id_pos, 50 | } 51 | } 52 | 53 | pub fn to_string(self) { 54 | println!("Linearization:"); 55 | println!("{:?}", self.lnz); 56 | println!(); 57 | 58 | println!("Nodes with preds:"); 59 | println!("{:?}", self.nwp); 60 | println!(); 61 | 62 | println!("Preds hash:"); 63 | println!("{:?}", self.pred_hash); 64 | println!(); 65 | 66 | println!("Paths of nodes:"); 67 | println!("{:?}", self.paths_nodes); 68 | println!(); 69 | 70 | println!("Number of paths: {}", self.paths_number); 71 | } 72 | } 73 | 74 | #[derive(Debug)] 75 | pub struct PredHash { 76 | predecessor: HashMap>, 77 | } 78 | 79 | impl PredHash { 80 | pub fn new() -> PredHash { 81 | PredHash { 82 | predecessor: HashMap::new(), 83 | } 84 | } 85 | 86 | pub fn get_preds_and_paths(&self, curr_node: usize) -> Vec<(usize, BitVec)> { 87 | let preds = self.predecessor.get(&curr_node).unwrap(); 88 | let mut preds_paths = Vec::new(); 89 | for (pred_pos, pred_paths) in preds.iter() { 90 | preds_paths.push((*pred_pos, pred_paths.clone())); 91 | } 92 | preds_paths 93 | } 94 | 95 | pub fn set_preds_and_paths( 96 | &mut self, 97 | curr_node: usize, 98 | pred_pos: usize, 99 | path_id: usize, 100 | paths_number: usize, 101 | ) { 102 | if self.predecessor.get(&curr_node).is_none() { 103 | self.predecessor.insert(curr_node, HashMap::new()); 104 | } 105 | 106 | if self 107 | .predecessor 108 | .get(&curr_node) 109 | .unwrap() 110 | .get(&pred_pos) 111 | .is_none() 112 | { 113 | self.predecessor 114 | .get_mut(&curr_node) 115 | .unwrap() 116 | .insert(pred_pos, BitVec::from_elem(paths_number, false)); 117 | } 118 | self.predecessor 119 | .get_mut(&curr_node) 120 | .unwrap() 121 | .get_mut(&pred_pos) 122 | .unwrap() 123 | .set(path_id, true); 124 | } 125 | } 126 | 127 | pub fn read_graph_w_path(file_path: &str, is_reversed: bool) -> PathGraph { 128 | let parser = GFAParser::new(); 129 | let gfa: GFA = parser.parse_file(file_path).unwrap(); 130 | 131 | let graph: HashGraph = HashGraph::from_gfa(&gfa); 132 | create_path_graph(&graph, is_reversed) 133 | } 134 | 135 | pub fn create_path_graph(graph: &HashGraph, is_reversed: bool) -> PathGraph { 136 | let mut sorted_handles = graph.handles_iter().collect::>(); 137 | sorted_handles.sort(); 138 | 139 | if is_reversed { 140 | sorted_handles.reverse(); 141 | sorted_handles = sorted_handles 142 | .iter() 143 | .map(|h| h.flip()) 144 | .collect::>(); 145 | } 146 | //create graph linearization 147 | let mut last_index = 1; 148 | let mut visited_node: HashMap = HashMap::new(); 149 | let mut linearization: Vec = vec!['$']; 150 | let mut handles_id_position = HashMap::new(); 151 | let mut nodes_id_pos = Vec::new(); 152 | nodes_id_pos.push(0); 153 | for handle in sorted_handles.iter() { 154 | let start_position = last_index; 155 | for ch in graph.sequence(*handle) { 156 | linearization.push(ch as char); 157 | nodes_id_pos.push(handle.id().into()); 158 | last_index += 1; 159 | } 160 | let end_position = last_index - 1; 161 | visited_node.insert(handle.id(), end_position); 162 | handles_id_position.insert(handle.id(), (start_position, end_position)); 163 | } 164 | linearization.push('F'); 165 | nodes_id_pos.push(0); 166 | 167 | //create nwp, pred_hash,nodes paths and 168 | let mut nodes_with_pred = BitVec::from_elem(linearization.len(), false); 169 | let mut pred_hash_struct = PredHash::new(); 170 | 171 | let paths_set = &graph.paths; 172 | let mut paths = Vec::new(); 173 | for (_id, path) in paths_set.iter() { 174 | paths.push(path) 175 | } 176 | for (id, path) in paths_set.iter() { 177 | paths[*id as usize] = path 178 | } 179 | 180 | //let paths = &graph.paths; 181 | let paths_number = paths_set.keys().len(); 182 | let mut alphas = vec![paths_number + 1; linearization.len()]; 183 | let mut paths_nodes = vec![BitVec::from_elem(paths_number, false); linearization.len()]; 184 | 185 | paths_nodes[0] = BitVec::from_elem(paths_number, true); 186 | alphas[0] = 0; 187 | alphas[linearization.len() - 1] = 0; 188 | for (path_id, path) in paths.iter().enumerate() { 189 | let path_nodes = if is_reversed { 190 | path.nodes.iter().rev().collect::>() 191 | } else { 192 | path.nodes.iter().collect::>() 193 | }; 194 | 195 | for (pos, handle) in path_nodes.iter().enumerate() { 196 | let (handle_start, handle_end) = handles_id_position.get(&handle.id()).unwrap(); 197 | let handle_start = *handle_start as usize; 198 | let handle_end = *handle_end as usize; 199 | 200 | for idx in handle_start..=handle_end { 201 | paths_nodes[idx].set(path_id as usize, true); 202 | if alphas[idx] == paths_number + 1 { 203 | alphas[idx] = path_id; 204 | } 205 | } 206 | 207 | if !nodes_with_pred[handle_start] { 208 | nodes_with_pred.set(handle_start, true); 209 | } 210 | 211 | if pos == 0 { 212 | pred_hash_struct.set_preds_and_paths(handle_start, 0, path_id, paths_number) 213 | } else { 214 | //ricava handle id pos prima, ricava suo handle end e aggiorna hash 215 | let pred = path_nodes[pos - 1]; 216 | let pred_end = handles_id_position.get(&pred.id()).unwrap().1; 217 | pred_hash_struct.set_preds_and_paths( 218 | handle_start, 219 | pred_end as usize, 220 | path_id, 221 | paths_number, 222 | ); 223 | 224 | // se ultimo nodo path aggiorna anche F 225 | if pos == path_nodes.iter().len() - 1 { 226 | pred_hash_struct.set_preds_and_paths( 227 | linearization.len() - 1, 228 | handle_end, 229 | path_id, 230 | paths_number, 231 | ); 232 | } 233 | } 234 | } 235 | } 236 | nodes_with_pred.set(linearization.len() - 1, true); 237 | paths_nodes[linearization.len() - 1] = BitVec::from_elem(paths_number, true); 238 | 239 | PathGraph::build( 240 | linearization, 241 | nodes_with_pred, 242 | pred_hash_struct, 243 | paths_nodes, 244 | alphas, 245 | paths_number, 246 | nodes_id_pos, 247 | ) 248 | } 249 | 250 | pub fn create_reverse_path_graph(forward_graph: &PathGraph) -> PathGraph { 251 | // create reverse predecessor 252 | let mut nodes_with_pred_rev = BitVec::from_elem(forward_graph.lnz.len(), false); 253 | let mut pred_hash_struct_rev = PredHash::new(); 254 | 255 | for (node, predecessors) in forward_graph.pred_hash.predecessor.iter() { 256 | for (pred, paths) in predecessors.iter() { 257 | if !nodes_with_pred_rev[*pred] { 258 | nodes_with_pred_rev.set(*pred, true); 259 | } 260 | for (path_id, path) in paths.iter().enumerate() { 261 | if path { 262 | pred_hash_struct_rev.set_preds_and_paths( 263 | *pred, 264 | *node, 265 | path_id, 266 | forward_graph.paths_number, 267 | ); 268 | } 269 | } 270 | } 271 | } 272 | 273 | PathGraph::build( 274 | forward_graph.lnz.clone(), 275 | nodes_with_pred_rev, 276 | pred_hash_struct_rev, 277 | forward_graph.paths_nodes.clone(), 278 | forward_graph.alphas.clone(), 279 | forward_graph.paths_number, 280 | forward_graph.nodes_id_pos.clone(), 281 | ) 282 | } 283 | 284 | pub fn nodes_displacement_matrix(graph: &PathGraph, rev_graph: &PathGraph) -> Vec> { 285 | let paths = &graph.paths_nodes; 286 | 287 | let dfe = get_distance_from_end(graph); 288 | 289 | let dfs = get_distance_from_start(rev_graph); 290 | 291 | let mut ndm = vec![vec![0; paths.len()]; paths.len()]; 292 | for i in 0..paths.len() { 293 | for j in 0..paths.len() { 294 | if i == j { 295 | ndm[i][j] = 0; 296 | } else { 297 | let distance = (dfs[i] - dfs[j]).abs() + (dfe[i] - dfe[j]).abs(); 298 | let displacement = distance as i32; 299 | ndm[i][j] = displacement; 300 | } 301 | } 302 | } 303 | 304 | ndm 305 | } 306 | fn get_distance_from_start(graph: &PathGraph) -> Vec { 307 | let nwp = &graph.nwp; 308 | let pred_hash = &graph.pred_hash; 309 | let lnz_len = graph.lnz.len(); 310 | let mut r_values: Vec = vec![-1; lnz_len]; 311 | r_values[0] = 0; 312 | for (p, _) in pred_hash.get_preds_and_paths(0) { 313 | r_values[p] = 1; 314 | } 315 | 316 | for i in 1..lnz_len - 1 { 317 | if r_values[i] == -1 || r_values[i] > r_values[i - 1] + 1 { 318 | r_values[i] = r_values[i - 1] + 1; 319 | } 320 | if nwp[i] { 321 | for (p, _) in pred_hash.get_preds_and_paths(i) { 322 | if r_values[p] == -1 || r_values[p] > r_values[i] + 1 { 323 | r_values[p] = r_values[i] + 1; 324 | } 325 | } 326 | } 327 | } 328 | r_values 329 | } 330 | fn get_distance_from_end(graph: &PathGraph) -> Vec { 331 | let nwp = &graph.nwp; 332 | let pred_hash = &graph.pred_hash; 333 | let lnz_len = graph.lnz.len(); 334 | let mut r_values: Vec = vec![-1; lnz_len]; 335 | r_values[lnz_len - 1] = 0; 336 | 337 | for (p, _) in pred_hash.get_preds_and_paths(lnz_len - 1) { 338 | r_values[p] = 1; 339 | } 340 | 341 | for i in (1..lnz_len - 1).rev() { 342 | if r_values[i] == -1 || r_values[i] > r_values[i + 1] + 1 { 343 | r_values[i] = r_values[i + 1] + 1; 344 | } 345 | if nwp[i] { 346 | for (p, _) in pred_hash.get_preds_and_paths(i) { 347 | if r_values[p] == -1 || r_values[p] > r_values[i] + 1 { 348 | r_values[p] = r_values[i] + 1; 349 | } 350 | } 351 | } 352 | } 353 | r_values 354 | } 355 | 356 | #[cfg(test)] 357 | mod tests { 358 | use handlegraph::{ 359 | handle::Edge, hashgraph::HashGraph, mutablehandlegraph::MutableHandleGraph, 360 | pathgraph::PathHandleGraph, 361 | }; 362 | 363 | #[test] 364 | fn pathwise_graph_correctly_created() { 365 | let mut graph: HashGraph = HashGraph::new(); 366 | let h1 = graph.append_handle("A".as_bytes()); 367 | let h2 = graph.append_handle("T".as_bytes()); 368 | let h3 = graph.append_handle("C".as_bytes()); 369 | let h4 = graph.append_handle("G".as_bytes()); 370 | 371 | graph.create_edge(&Edge(h1, h2)); 372 | graph.create_edge(&Edge(h1, h3)); 373 | graph.create_edge(&Edge(h2, h4)); 374 | graph.create_edge(&Edge(h3, h4)); 375 | 376 | let p1 = graph.create_path_handle(&['1' as u8], false); 377 | let p2 = graph.create_path_handle(&['2' as u8], false); 378 | 379 | graph.append_step(&p1, h1); 380 | graph.append_step(&p1, h2); 381 | graph.append_step(&p1, h4); 382 | graph.append_step(&p2, h1); 383 | graph.append_step(&p2, h3); 384 | graph.append_step(&p2, h4); 385 | 386 | let graph_struct = super::create_path_graph(&graph, false); 387 | 388 | assert_eq!(graph_struct.paths_number, 2); 389 | 390 | assert_eq!(graph_struct.lnz, ['$', 'A', 'T', 'C', 'G', 'F']); 391 | assert_eq!(graph_struct.nwp[2], true); 392 | 393 | let paths_h2 = &graph_struct.paths_nodes[2]; 394 | assert_eq!(paths_h2[0], true); 395 | assert_eq!(paths_h2[1], false); 396 | 397 | let paths_start = &graph_struct.paths_nodes[0]; 398 | let paths_end = &graph_struct.paths_nodes[5]; 399 | 400 | assert!(paths_start[0]); 401 | assert!(paths_start[1]); 402 | assert!(paths_end[0]); 403 | assert!(paths_end[1]); 404 | } 405 | #[test] 406 | pub fn multiple_starts_and_ends_pathwise() { 407 | let mut graph: HashGraph = HashGraph::new(); 408 | let h1 = graph.append_handle("A".as_bytes()); 409 | let h1_bis = graph.append_handle("B".as_bytes()); 410 | 411 | let h2 = graph.append_handle("T".as_bytes()); 412 | let h3 = graph.append_handle("C".as_bytes()); 413 | let h4 = graph.append_handle("G".as_bytes()); 414 | let h4_bis = graph.append_handle("H".as_bytes()); 415 | 416 | graph.create_edge(&Edge(h1, h2)); 417 | graph.create_edge(&Edge(h1, h3)); 418 | graph.create_edge(&Edge(h2, h4)); 419 | graph.create_edge(&Edge(h3, h4)); 420 | graph.create_edge(&Edge(h1_bis, h4_bis)); 421 | 422 | let p1 = graph.create_path_handle(&['1' as u8], false); 423 | let p2 = graph.create_path_handle(&['2' as u8], false); 424 | let p3 = graph.create_path_handle(&['3' as u8], false); 425 | 426 | graph.append_step(&p1, h1); 427 | graph.append_step(&p1, h2); 428 | graph.append_step(&p1, h4); 429 | graph.append_step(&p2, h1); 430 | graph.append_step(&p2, h3); 431 | graph.append_step(&p2, h4); 432 | graph.append_step(&p3, h1_bis); 433 | graph.append_step(&p3, h4_bis); 434 | 435 | let graph_struct = super::create_path_graph(&graph, false); 436 | 437 | assert_eq!(graph_struct.paths_number, 3); 438 | let paths_h2 = &graph_struct.paths_nodes[3]; 439 | assert_eq!(paths_h2[0], true); 440 | assert_eq!(paths_h2[1], false); 441 | 442 | let paths_start = &graph_struct.paths_nodes[0]; 443 | let paths_end = &graph_struct.paths_nodes[7]; 444 | 445 | assert!(paths_start[0]); 446 | assert!(paths_start[1]); 447 | assert!(paths_end[0]); 448 | assert!(paths_end[1]); 449 | } 450 | 451 | #[test] 452 | fn reverse_pathwise_graph_correctly_created() { 453 | let mut graph: HashGraph = HashGraph::new(); 454 | let h1 = graph.append_handle("A".as_bytes()); 455 | let h2 = graph.append_handle("T".as_bytes()); 456 | let h3 = graph.append_handle("C".as_bytes()); 457 | let h4 = graph.append_handle("G".as_bytes()); 458 | 459 | graph.create_edge(&Edge(h1, h2)); 460 | graph.create_edge(&Edge(h1, h3)); 461 | graph.create_edge(&Edge(h2, h4)); 462 | graph.create_edge(&Edge(h3, h4)); 463 | 464 | let p1 = graph.create_path_handle(&['1' as u8], false); 465 | let p2 = graph.create_path_handle(&['2' as u8], false); 466 | 467 | graph.append_step(&p1, h1); 468 | graph.append_step(&p1, h2); 469 | graph.append_step(&p1, h4); 470 | graph.append_step(&p2, h1); 471 | graph.append_step(&p2, h3); 472 | graph.append_step(&p2, h4); 473 | 474 | let graph_struct = super::create_path_graph(&graph, true); 475 | 476 | assert_eq!(graph_struct.paths_number, 2); 477 | 478 | assert_eq!(graph_struct.lnz, ['$', 'C', 'G', 'A', 'T', 'F']); 479 | assert_eq!(graph_struct.nwp[2], true); 480 | 481 | let paths_h2 = &graph_struct.paths_nodes[2]; 482 | assert_eq!(paths_h2[0], false); 483 | assert_eq!(paths_h2[1], true); 484 | let paths_h3 = &graph_struct.paths_nodes[3]; 485 | assert_eq!(paths_h3[0], true); 486 | assert_eq!(paths_h3[1], false); 487 | 488 | let paths_start = &graph_struct.paths_nodes[0]; 489 | let paths_end = &graph_struct.paths_nodes[5]; 490 | 491 | assert!(paths_start[0]); 492 | assert!(paths_start[1]); 493 | assert!(paths_end[0]); 494 | assert!(paths_end[1]); 495 | } 496 | 497 | #[test] 498 | fn test_pred_hash_struct() { 499 | let mut graph: HashGraph = HashGraph::new(); 500 | let h1 = graph.append_handle("A".as_bytes()); 501 | let h1_bis = graph.append_handle("B".as_bytes()); 502 | 503 | let h2 = graph.append_handle("T".as_bytes()); 504 | let h3 = graph.append_handle("C".as_bytes()); 505 | let h4 = graph.append_handle("G".as_bytes()); 506 | let h4_bis = graph.append_handle("H".as_bytes()); 507 | 508 | graph.create_edge(&Edge(h1, h2)); 509 | graph.create_edge(&Edge(h1, h3)); 510 | graph.create_edge(&Edge(h2, h4)); 511 | graph.create_edge(&Edge(h3, h4)); 512 | graph.create_edge(&Edge(h1_bis, h4_bis)); 513 | 514 | let p1 = graph.create_path_handle(&['1' as u8], false); 515 | let p2 = graph.create_path_handle(&['2' as u8], false); 516 | let p3 = graph.create_path_handle(&['3' as u8], false); 517 | 518 | graph.append_step(&p1, h1); 519 | graph.append_step(&p1, h2); 520 | graph.append_step(&p1, h4); 521 | graph.append_step(&p2, h1); 522 | graph.append_step(&p2, h3); 523 | graph.append_step(&p2, h4); 524 | graph.append_step(&p3, h1_bis); 525 | graph.append_step(&p3, h4_bis); 526 | 527 | let graph_struct = super::create_path_graph(&graph, false); 528 | 529 | let pred_h4 = &graph_struct.pred_hash.get_preds_and_paths(5); 530 | assert_eq!(pred_h4.len(), 2); 531 | for pred in pred_h4 { 532 | if pred.0 == 3usize { 533 | assert!(pred.1[0]); 534 | assert!(!pred.1[1]); 535 | assert!(!pred.1[2]); 536 | } else if pred.0 == 4usize { 537 | assert!(!pred.1[0]); 538 | assert!(pred.1[1]); 539 | assert!(!pred.1[2]); 540 | } else { 541 | panic!("{}", pred.0) 542 | } 543 | } 544 | } 545 | } 546 | -------------------------------------------------------------------------------- /src/pathwise_alignment_output.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use bit_vec::BitVec; 4 | 5 | use crate::{gaf_output::GAFStruct, pathwise_graph::PredHash, utils}; 6 | 7 | pub fn build_alignment( 8 | dpm: &Vec>>, 9 | lnz: &Vec, 10 | seq: &[char], 11 | scores: &HashMap<(char, char), i32>, 12 | alphas: &Vec, 13 | best_path: usize, 14 | pred_hash: &PredHash, 15 | nwp: &BitVec, 16 | handles_nodes_id: &Vec, 17 | ending_node: usize, 18 | global_align: bool, 19 | ) -> GAFStruct { 20 | let mut cigar = Vec::new(); 21 | let mut path_length: usize = 0; 22 | let mut i = ending_node; 23 | let mut j = dpm[i].len() - 1; 24 | let mut handle_id_alignment = Vec::new(); 25 | let mut path_sequence = Vec::new(); 26 | 27 | let score = if best_path == alphas[i] { 28 | dpm[i][j][best_path] 29 | } else { 30 | dpm[i][j][best_path] + dpm[i][j][alphas[i]] 31 | }; 32 | while i > 0 && j > 0 { 33 | let mut predecessor = None; 34 | let (d, u, l) = if !nwp[i] { 35 | ( 36 | if alphas[i - 1] == best_path { 37 | dpm[i - 1][j - 1][best_path] 38 | } else { 39 | dpm[i - 1][j - 1][best_path] + dpm[i - 1][j - 1][alphas[i - 1]] 40 | } + scores.get(&(lnz[i], seq[j])).unwrap(), 41 | if alphas[i - 1] == best_path { 42 | dpm[i - 1][j][best_path] 43 | } else { 44 | dpm[i - 1][j][best_path] + dpm[i - 1][j][alphas[i - 1]] 45 | } + scores.get(&(lnz[i], '-')).unwrap(), 46 | if alphas[i] == best_path { 47 | dpm[i][j - 1][best_path] 48 | } else { 49 | dpm[i][j - 1][best_path] + dpm[i][j - 1][alphas[i]] 50 | } + scores.get(&('-', seq[j])).unwrap(), 51 | ) 52 | } else { 53 | let preds = pred_hash.get_preds_and_paths(i); 54 | let (mut d, mut u, mut l) = (0, 0, 0); 55 | for (pred, paths) in preds.iter() { 56 | if paths[best_path] { 57 | predecessor = Some(*pred); 58 | if alphas[*pred] == best_path { 59 | d = dpm[*pred][j - 1][best_path] + scores.get(&(lnz[i], seq[j])).unwrap(); 60 | u = dpm[*pred][j][best_path] + scores.get(&(lnz[i], '-')).unwrap(); 61 | } else { 62 | d = dpm[*pred][j - 1][best_path] 63 | + dpm[*pred][j - 1][alphas[*pred]] 64 | + scores.get(&(lnz[i], seq[j])).unwrap(); 65 | u = dpm[*pred][j][best_path] 66 | + dpm[*pred][j][alphas[*pred]] 67 | + scores.get(&(lnz[i], '-')).unwrap(); 68 | } 69 | if alphas[i] == best_path { 70 | l = dpm[i][j - 1][best_path] + scores.get(&('-', seq[j])).unwrap(); 71 | } else { 72 | l = dpm[i][j - 1][best_path] 73 | + dpm[i][j - 1][alphas[i]] 74 | + scores.get(&('-', seq[j])).unwrap(); 75 | } 76 | } 77 | } 78 | (d, u, l) 79 | }; 80 | let max = *[d, u, l].iter().max().unwrap(); 81 | if max == d { 82 | if lnz[i] != seq[j] { 83 | cigar.push('d'); 84 | } else { 85 | cigar.push('D'); 86 | } 87 | handle_id_alignment.push(handles_nodes_id[i]); 88 | path_sequence.push(lnz[i]); 89 | i = if predecessor.is_none() { 90 | i - 1 91 | } else { 92 | predecessor.unwrap() 93 | }; 94 | j -= 1; 95 | path_length += 1; 96 | } else if max == u { 97 | cigar.push('U'); 98 | handle_id_alignment.push(handles_nodes_id[i]); 99 | path_sequence.push(lnz[i]); 100 | i = if predecessor.is_none() { 101 | i - 1 102 | } else { 103 | predecessor.unwrap() 104 | }; 105 | path_length += 1; 106 | } else { 107 | cigar.push('L'); 108 | j -= 1; 109 | } 110 | } 111 | while j > 0 { 112 | cigar.push('L'); 113 | j -= 1; 114 | } 115 | 116 | if global_align { 117 | while i > 0 { 118 | cigar.push('U'); 119 | handle_id_alignment.push(handles_nodes_id[i]); 120 | path_sequence.push(lnz[i]); 121 | path_length += 1; 122 | 123 | let predecessor = if !nwp[i] { 124 | i - 1 125 | } else { 126 | let preds = pred_hash.get_preds_and_paths(i); 127 | let mut p = 0; 128 | for (pred, paths) in preds.iter() { 129 | if paths[best_path] { 130 | p = *pred; 131 | } 132 | } 133 | p 134 | }; 135 | 136 | i = predecessor; 137 | } 138 | } 139 | 140 | cigar.reverse(); 141 | path_sequence.reverse(); 142 | let path_string_sequence: String = path_sequence.into_iter().collect(); 143 | 144 | let query_name = String::from("Temp"); 145 | let seq_length = dpm[0].len() - 1; 146 | let query_start = 0; 147 | let query_end = dpm[0].len() - 2; 148 | let strand = '+'; 149 | handle_id_alignment.dedup(); 150 | handle_id_alignment.reverse(); 151 | let path: Vec = handle_id_alignment.iter().map(|id| *id as usize).collect(); 152 | 153 | let (path_len, path_start, path_end) = utils::get_path_len_start_end( 154 | &handles_nodes_id, 155 | if i == 0 { i } else { i + 1 }, 156 | ending_node, 157 | path_length, 158 | ); 159 | let align_block_length = "*"; // to set 160 | let mapping_quality = "*"; // to set 161 | let comments = format!( 162 | "{}, best path: {}, score: {}\t{}", 163 | build_cigar(&cigar), 164 | best_path, 165 | score, 166 | path_string_sequence 167 | ); 168 | let gaf_output = GAFStruct::build_gaf_struct( 169 | query_name, 170 | seq_length, 171 | query_start, 172 | query_end, 173 | strand, 174 | path, 175 | path_len, 176 | path_start, 177 | path_end, 178 | 0, 179 | String::from(align_block_length), 180 | String::from(mapping_quality), 181 | comments, 182 | ); 183 | gaf_output 184 | } 185 | 186 | pub fn build_alignment_gap( 187 | dpm: &Vec>>, 188 | x: &Vec>>, 189 | y: &Vec>>, 190 | alphas: &Vec, 191 | best_path: usize, 192 | pred_hash: &PredHash, 193 | nwp: &BitVec, 194 | ) -> String { 195 | let mut cigar = Vec::new(); 196 | let mut i = 0; 197 | let ending_nodes = pred_hash.get_preds_and_paths(dpm.len() - 1); 198 | for (node, paths) in ending_nodes.iter() { 199 | if paths[best_path] { 200 | i = *node; 201 | } 202 | } 203 | let mut j = dpm[i].len() - 1; 204 | while i != 0 && j != 0 { 205 | let curr_score = if alphas[i] == best_path { 206 | dpm[i][j][best_path] 207 | } else { 208 | dpm[i][j][best_path] + dpm[i][j][alphas[i]] 209 | }; 210 | let mut predecessor = None; 211 | let (d, u, l) = if !nwp[i] { 212 | ( 213 | if alphas[i - 1] == best_path { 214 | dpm[i - 1][j - 1][best_path] 215 | } else { 216 | dpm[i - 1][j - 1][best_path] + dpm[i - 1][j - 1][alphas[i - 1]] 217 | }, 218 | if alphas[i - 1] == best_path { 219 | dpm[i - 1][j][best_path] 220 | } else { 221 | dpm[i - 1][j][best_path] + dpm[i - 1][j][alphas[i - 1]] 222 | }, 223 | if alphas[i] == best_path { 224 | dpm[i][j - 1][best_path] 225 | } else { 226 | dpm[i][j - 1][best_path] + dpm[i][j - 1][alphas[i]] 227 | }, 228 | ) 229 | } else { 230 | let preds = pred_hash.get_preds_and_paths(i); 231 | let (mut d, mut u, mut l) = (0, 0, 0); 232 | for (pred, paths) in preds.iter() { 233 | if paths[best_path] { 234 | predecessor = Some(*pred); 235 | if alphas[*pred] == best_path { 236 | d = dpm[*pred][j - 1][best_path]; 237 | u = dpm[*pred][j][best_path]; 238 | } else { 239 | d = dpm[*pred][j - 1][best_path] + dpm[*pred][j - 1][alphas[*pred]]; 240 | u = dpm[*pred][j][best_path] + dpm[*pred][j][alphas[*pred]]; 241 | } 242 | if alphas[i] == best_path { 243 | l = dpm[i][j - 1][best_path]; 244 | } else { 245 | l = dpm[i][j - 1][best_path] + dpm[i][j - 1][alphas[i]]; 246 | } 247 | } 248 | } 249 | (d, u, l) 250 | }; 251 | let max = *[d, u, l].iter().max().unwrap(); 252 | if max == d { 253 | if curr_score < d { 254 | cigar.push('d'); 255 | } else { 256 | cigar.push('D'); 257 | } 258 | 259 | i = if predecessor.is_none() { 260 | i - 1 261 | } else { 262 | predecessor.unwrap() 263 | }; 264 | j -= 1; 265 | } else if max == u { 266 | cigar.push('U'); 267 | i = if predecessor.is_none() { 268 | i - 1 269 | } else { 270 | predecessor.unwrap() 271 | }; 272 | while dpm[i][j][best_path] < y[i][j][best_path] { 273 | cigar.push('U'); 274 | if nwp[i] { 275 | let preds = pred_hash.get_preds_and_paths(i); 276 | for (pred, paths) in preds.iter() { 277 | if paths[best_path] { 278 | predecessor = Some(*pred); 279 | } 280 | } 281 | } else { 282 | predecessor = Some(i - 1); 283 | } 284 | i = predecessor.unwrap(); 285 | } 286 | } else { 287 | cigar.push('L'); 288 | j -= 1; 289 | while dpm[i][j][best_path] < x[i][j][best_path] { 290 | cigar.push('L'); 291 | j -= 1; 292 | } 293 | } 294 | } 295 | while j > 0 { 296 | cigar.push('L'); 297 | j -= 1; 298 | } 299 | while i > 0 { 300 | cigar.push('U'); 301 | i -= 1; 302 | } 303 | cigar.reverse(); 304 | cigar.pop(); 305 | build_cigar(&cigar) 306 | } 307 | 308 | pub fn build_alignment_semiglobal_gap( 309 | dpm: &Vec>>, 310 | x: &Vec>>, 311 | y: &Vec>>, 312 | alphas: &Vec, 313 | best_path: usize, 314 | pred_hash: &PredHash, 315 | nwp: &BitVec, 316 | ending_node: usize, 317 | ) -> String { 318 | let mut cigar = Vec::new(); 319 | let mut i = ending_node; 320 | let mut j = dpm[i].len() - 1; 321 | while i != 0 && j != 0 { 322 | let curr_score = if alphas[i] == best_path { 323 | dpm[i][j][best_path] 324 | } else { 325 | dpm[i][j][best_path] + dpm[i][j][alphas[i]] 326 | }; 327 | let mut predecessor = None; 328 | let (d, u, l) = if !nwp[i] { 329 | ( 330 | if alphas[i - 1] == best_path { 331 | dpm[i - 1][j - 1][best_path] 332 | } else { 333 | dpm[i - 1][j - 1][best_path] + dpm[i - 1][j - 1][alphas[i - 1]] 334 | }, 335 | if alphas[i - 1] == best_path { 336 | dpm[i - 1][j][best_path] 337 | } else { 338 | dpm[i - 1][j][best_path] + dpm[i - 1][j][alphas[i - 1]] 339 | }, 340 | if alphas[i] == best_path { 341 | dpm[i][j - 1][best_path] 342 | } else { 343 | dpm[i][j - 1][best_path] + dpm[i][j - 1][alphas[i]] 344 | }, 345 | ) 346 | } else { 347 | let preds = pred_hash.get_preds_and_paths(i); 348 | let (mut d, mut u, mut l) = (0, 0, 0); 349 | for (pred, paths) in preds.iter() { 350 | if paths[best_path] { 351 | predecessor = Some(*pred); 352 | if alphas[*pred] == best_path { 353 | d = dpm[*pred][j - 1][best_path]; 354 | u = dpm[*pred][j][best_path]; 355 | } else { 356 | d = dpm[*pred][j - 1][best_path] + dpm[*pred][j - 1][alphas[*pred]]; 357 | u = dpm[*pred][j][best_path] + dpm[*pred][j][alphas[*pred]]; 358 | } 359 | if alphas[i] == best_path { 360 | l = dpm[i][j - 1][best_path]; 361 | } else { 362 | l = dpm[i][j - 1][best_path] + dpm[i][j - 1][alphas[i]]; 363 | } 364 | } 365 | } 366 | (d, u, l) 367 | }; 368 | let max = *[d, u, l].iter().max().unwrap(); 369 | if max == d { 370 | if curr_score < d { 371 | cigar.push('d'); 372 | } else { 373 | cigar.push('D'); 374 | } 375 | 376 | i = if predecessor.is_none() { 377 | i - 1 378 | } else { 379 | predecessor.unwrap() 380 | }; 381 | j -= 1; 382 | } else if max == u { 383 | cigar.push('U'); 384 | i = if predecessor.is_none() { 385 | i - 1 386 | } else { 387 | predecessor.unwrap() 388 | }; 389 | while dpm[i][j][best_path] < y[i][j][best_path] { 390 | cigar.push('U'); 391 | if nwp[i] { 392 | let preds = pred_hash.get_preds_and_paths(i); 393 | for (pred, paths) in preds.iter() { 394 | if paths[best_path] { 395 | predecessor = Some(*pred); 396 | } 397 | } 398 | } else { 399 | predecessor = Some(i - 1); 400 | } 401 | i = predecessor.unwrap(); 402 | } 403 | } else { 404 | cigar.push('L'); 405 | j -= 1; 406 | while dpm[i][j][best_path] < x[i][j][best_path] { 407 | cigar.push('L'); 408 | j -= 1; 409 | } 410 | } 411 | } 412 | while j > 0 { 413 | cigar.push('L'); 414 | j -= 1; 415 | } 416 | 417 | cigar.reverse(); 418 | let mut starting_node = 0; 419 | while i > 0 { 420 | if nwp[i] { 421 | let preds = pred_hash.get_preds_and_paths(i); 422 | for (pred, paths) in preds.iter() { 423 | if paths[best_path] { 424 | i = *pred; 425 | } 426 | } 427 | } else { 428 | i -= 1 429 | } 430 | starting_node += 1; 431 | } 432 | 433 | let mut final_node = 0; 434 | i = ending_node; 435 | while i > 0 { 436 | if nwp[i] { 437 | let preds = pred_hash.get_preds_and_paths(i); 438 | for (pred, paths) in preds.iter() { 439 | if paths[best_path] { 440 | i = *pred; 441 | } 442 | } 443 | } else { 444 | i -= 1 445 | } 446 | final_node += 1; 447 | } 448 | let cigar = build_cigar(&cigar); 449 | let cigar_output = format!("{}\t({} {})", cigar, starting_node, final_node); 450 | cigar_output 451 | } 452 | 453 | pub fn extract_best_path_matrix( 454 | best_path: usize, 455 | dpm: &Vec>>, 456 | alphas: &Vec, 457 | ) -> Vec> { 458 | let mut bpm: Vec> = vec![vec![0; dpm[0].len()]; dpm.len()]; 459 | for i in 0..dpm.len() { 460 | for j in 0..dpm[0].len() { 461 | bpm[i][j] = if alphas[i] == best_path { 462 | dpm[i][j][best_path] 463 | } else { 464 | dpm[i][j][best_path] + dpm[i][j][alphas[i]] 465 | } 466 | } 467 | } 468 | bpm 469 | } 470 | 471 | pub fn build_cigar(cigar: &Vec) -> String { 472 | let mut output_string = String::new(); 473 | 474 | let mut d_count = 0; 475 | let mut u_count = 0; 476 | let mut l_count = 0; 477 | let mut mm_count = 0; 478 | for ch in cigar.iter() { 479 | match ch { 480 | 'D' => { 481 | if u_count != 0 { 482 | output_string = format!("{}{}I", output_string, u_count); 483 | u_count = 0 484 | } 485 | if l_count != 0 { 486 | output_string = format!("{}{}D", output_string, l_count); 487 | l_count = 0 488 | } 489 | if mm_count != 0 { 490 | output_string = format!("{}{}X", output_string, mm_count); 491 | mm_count = 0 492 | } 493 | d_count += 1; 494 | } 495 | 496 | 'U' => { 497 | if d_count != 0 { 498 | output_string = format!("{}{}M", output_string, d_count); 499 | d_count = 0 500 | } 501 | if l_count != 0 { 502 | output_string = format!("{}{}D", output_string, l_count); 503 | l_count = 0 504 | } 505 | if mm_count != 0 { 506 | output_string = format!("{}{}X", output_string, mm_count); 507 | mm_count = 0 508 | } 509 | u_count += 1; 510 | } 511 | 'd' => { 512 | if d_count != 0 { 513 | output_string = format!("{}{}M", output_string, d_count); 514 | d_count = 0 515 | } 516 | if l_count != 0 { 517 | output_string = format!("{}{}D", output_string, l_count); 518 | l_count = 0 519 | } 520 | if u_count != 0 { 521 | output_string = format!("{}{}I", output_string, u_count); 522 | u_count = 0 523 | } 524 | mm_count += 1; 525 | } 526 | _ => { 527 | if d_count != 0 { 528 | output_string = format!("{}{}M", output_string, d_count); 529 | d_count = 0; 530 | } 531 | if u_count != 0 { 532 | output_string = format!("{}{}I", output_string, u_count); 533 | u_count = 0 534 | } 535 | if mm_count != 0 { 536 | output_string = format!("{}{}X", output_string, mm_count); 537 | mm_count = 0 538 | } 539 | l_count += 1; 540 | } 541 | } 542 | } 543 | if d_count != 0 { 544 | output_string = format!("{}{}M", output_string, d_count); 545 | } 546 | if u_count != 0 { 547 | output_string = format!("{}{}I", output_string, u_count); 548 | } 549 | if l_count != 0 { 550 | output_string = format!("{}{}D", output_string, l_count); 551 | } 552 | if mm_count != 0 { 553 | output_string = format!("{}{}X", output_string, mm_count); 554 | } 555 | output_string 556 | } 557 | 558 | /* 559 | fn get_node_offset(nodes_handles: &Vec, curr_node: usize) -> i32 { 560 | let handle = nodes_handles[curr_node]; 561 | if handle == 0 { 562 | 0 563 | } else { 564 | let mut counter = curr_node; 565 | let mut offset = 0; 566 | while nodes_handles[counter - 1] == handle { 567 | counter -= 1; 568 | offset += 1; 569 | } 570 | offset 571 | } 572 | } 573 | */ 574 | -------------------------------------------------------------------------------- /src/pathwise_alignment.rs: -------------------------------------------------------------------------------- 1 | use crate::gaf_output::GAFStruct; 2 | use crate::pathwise_alignment_output::build_alignment; 3 | use crate::pathwise_graph::PathGraph; 4 | use std::collections::HashMap; 5 | pub fn exec( 6 | sequence: &[char], 7 | graph: &PathGraph, 8 | score_matrix: &HashMap<(char, char), i32>, 9 | ) -> GAFStruct { 10 | let lnz = &graph.lnz; 11 | let nodes_with_pred = &graph.nwp; 12 | let pred_hash = &graph.pred_hash; 13 | let path_number = graph.paths_number; 14 | let path_node = &graph.paths_nodes; 15 | 16 | let mut dpm = vec![vec![vec![0; path_number]; sequence.len()]; lnz.len()]; 17 | let alphas = &graph.alphas; 18 | for i in 0..lnz.len() - 1 { 19 | for j in 0..sequence.len() { 20 | match (i, j) { 21 | (0, 0) => { 22 | dpm[i][j] = vec![0; path_number]; 23 | } 24 | (_, 0) => { 25 | if !nodes_with_pred[i] { 26 | let mut common_paths = path_node[i].clone(); 27 | common_paths.and(&path_node[i - 1]); 28 | 29 | if common_paths[alphas[i - 1]] { 30 | for (path, is_in) in common_paths.iter().enumerate() { 31 | if is_in { 32 | if path == alphas[i] { 33 | dpm[i][j][path] = dpm[i - 1][j][path] 34 | + score_matrix.get(&(lnz[i], '-')).unwrap(); 35 | } else { 36 | dpm[i][j][path] = dpm[i - 1][j][path]; 37 | } 38 | } 39 | } 40 | } else { 41 | dpm[i][j][alphas[i]] = dpm[i - 1][j][alphas[i]] 42 | + dpm[i - 1][j][alphas[i - 1]] 43 | + score_matrix.get(&(lnz[i], '-')).unwrap(); 44 | 45 | for (path, is_in) in common_paths.iter().enumerate() { 46 | if is_in && path != alphas[i] { 47 | dpm[i][j][path] = dpm[i - 1][j][path] - dpm[i - 1][j][alphas[i]] 48 | } 49 | } 50 | } 51 | } else { 52 | let mut alphas_deltas = HashMap::new(); 53 | for (p, p_paths) in pred_hash.get_preds_and_paths(i) { 54 | let mut common_paths = path_node[i].clone(); 55 | common_paths.and(&p_paths); 56 | 57 | if common_paths[alphas[p]] { 58 | let paths = common_paths 59 | .iter() 60 | .enumerate() 61 | .filter_map(|(path_id, is_in)| match is_in { 62 | true => Some(path_id), 63 | false => None, 64 | }) 65 | .collect::>(); 66 | alphas_deltas.insert(alphas[p], paths); 67 | 68 | dpm[i][j][alphas[p]] = dpm[p][j][alphas[p]] 69 | + score_matrix.get(&(lnz[i], '-')).unwrap(); 70 | for (path, is_in) in common_paths.iter().enumerate() { 71 | if is_in && path != alphas[p] { 72 | dpm[i][j][path] = dpm[p][j][path]; 73 | } 74 | } 75 | } else { 76 | //set new alpha 77 | let temp_alpha = if common_paths[alphas[i]] { 78 | alphas[i] 79 | } else { 80 | common_paths.iter().position(|is_in| is_in).unwrap() 81 | }; 82 | let paths = common_paths 83 | .iter() 84 | .enumerate() 85 | .filter_map(|(path_id, is_in)| match is_in { 86 | true => Some(path_id), 87 | false => None, 88 | }) 89 | .collect::>(); 90 | alphas_deltas.insert(temp_alpha, paths); 91 | 92 | dpm[i][j][temp_alpha] = dpm[p][j][alphas[p]] 93 | + dpm[p][j][temp_alpha] 94 | + score_matrix.get(&(lnz[i], '-')).unwrap(); 95 | 96 | for (path, is_in) in common_paths.iter().enumerate() { 97 | if is_in { 98 | if path != temp_alpha { 99 | dpm[i][j][path] = 100 | dpm[p][j][path] - dpm[p][j][temp_alpha]; 101 | } 102 | } 103 | } 104 | } 105 | } 106 | // remove multiple alpha 107 | if alphas_deltas.keys().len() > 0 { 108 | for (a, delta) in alphas_deltas.iter() { 109 | if *a != alphas[i] { 110 | dpm[i][j][*a] -= dpm[i][j][alphas[i]]; 111 | for path in delta.iter() { 112 | if path != a { 113 | dpm[i][j][*path] += dpm[i][j][*a]; 114 | } 115 | } 116 | } 117 | } 118 | } 119 | } 120 | } 121 | (0, _) => { 122 | dpm[i][j][alphas[0]] = 123 | dpm[i][j - 1][alphas[0]] + score_matrix.get(&(sequence[j], '-')).unwrap(); 124 | for k in alphas[0] + 1..path_number { 125 | dpm[i][j][k] = dpm[i][j - 1][k]; 126 | } 127 | } 128 | _ => { 129 | if !nodes_with_pred[i] { 130 | let mut common_paths = path_node[i].clone(); 131 | common_paths.and(&path_node[i - 1]); 132 | 133 | if common_paths[alphas[i - 1]] { 134 | let u = dpm[i - 1][j][alphas[i - 1]] 135 | + score_matrix.get(&(lnz[i], '-')).unwrap(); 136 | let d = dpm[i - 1][j - 1][alphas[i - 1]] 137 | + score_matrix.get(&(lnz[i], sequence[j])).unwrap(); 138 | let l = dpm[i][j - 1][alphas[i]] 139 | + score_matrix.get(&(sequence[j], '-')).unwrap(); 140 | 141 | dpm[i][j][alphas[i]] = *[d, u, l].iter().max().unwrap(); 142 | 143 | for (path, is_in) in common_paths.iter().enumerate() { 144 | if is_in { 145 | if path != alphas[i] { 146 | if dpm[i][j][alphas[i]] == d { 147 | dpm[i][j][path] = dpm[i - 1][j - 1][path]; 148 | } else if dpm[i][j][alphas[i]] == u { 149 | dpm[i][j][path] = dpm[i - 1][j][path]; 150 | } else { 151 | dpm[i][j][path] = dpm[i][j - 1][path]; 152 | } 153 | } 154 | } 155 | } 156 | } else { 157 | let u = dpm[i - 1][j][alphas[i - 1]] 158 | + dpm[i - 1][j][alphas[i]] 159 | + score_matrix.get(&(lnz[i], '-')).unwrap(); 160 | let d = dpm[i - 1][j - 1][alphas[i - 1]] 161 | + dpm[i - 1][j - 1][alphas[i]] 162 | + score_matrix.get(&(lnz[i], sequence[j])).unwrap(); 163 | let l = dpm[i][j - 1][alphas[i]] 164 | + score_matrix.get(&(sequence[j], '-')).unwrap(); 165 | dpm[i][j][alphas[i]] = *[d, u, l].iter().max().unwrap(); 166 | 167 | for (path, is_in) in common_paths.iter().enumerate() { 168 | if is_in { 169 | if path != alphas[i] { 170 | if dpm[i][j][alphas[i]] == d { 171 | dpm[i][j][path] = dpm[i - 1][j - 1][path] 172 | - dpm[i - 1][j - 1][alphas[i]]; 173 | } else if dpm[i][j][alphas[i]] == u { 174 | dpm[i][j][path] = 175 | dpm[i - 1][j][path] - dpm[i - 1][j][alphas[i]]; 176 | } else { 177 | dpm[i][j][path] = dpm[i][j - 1][path]; 178 | } 179 | } 180 | } 181 | } 182 | } 183 | } else { 184 | // multiple alphas possible 185 | let mut alphas_deltas = HashMap::new(); 186 | for (p, p_paths) in pred_hash.get_preds_and_paths(i) { 187 | let mut common_paths = path_node[i].clone(); 188 | common_paths.and(&p_paths); 189 | 190 | if common_paths[alphas[p]] { 191 | let paths = common_paths 192 | .iter() 193 | .enumerate() 194 | .filter_map(|(path_id, is_in)| match is_in { 195 | true => Some(path_id), 196 | false => None, 197 | }) 198 | .collect::>(); 199 | alphas_deltas.insert(alphas[p], paths); 200 | 201 | let u = dpm[p][j][alphas[p]] 202 | + score_matrix.get(&(lnz[i], '-')).unwrap(); 203 | let d = dpm[p][j - 1][alphas[p]] 204 | + score_matrix.get(&(lnz[i], sequence[j])).unwrap(); 205 | let l = if alphas[i] == alphas[p] { 206 | dpm[i][j - 1][alphas[p]] 207 | + score_matrix.get(&(sequence[j], '-')).unwrap() 208 | } else { 209 | dpm[i][j - 1][alphas[p]] 210 | + dpm[i][j - 1][alphas[i]] 211 | + score_matrix.get(&(sequence[j], '-')).unwrap() 212 | }; 213 | dpm[i][j][alphas[p]] = *[d, u, l].iter().max().unwrap(); 214 | 215 | for (path, is_in) in common_paths.iter().enumerate() { 216 | if is_in { 217 | if path != alphas[p] { 218 | if dpm[i][j][alphas[p]] == d { 219 | dpm[i][j][path] = dpm[p][j - 1][path]; 220 | } else if dpm[i][j][alphas[p]] == u { 221 | dpm[i][j][path] = dpm[p][j][path]; 222 | } else { 223 | if alphas[p] == alphas[i] { 224 | dpm[i][j][path] = dpm[i][j - 1][path]; 225 | } else { 226 | dpm[i][j][path] = dpm[i][j - 1][path] 227 | - dpm[i][j - 1][alphas[p]]; 228 | } 229 | } 230 | } 231 | } 232 | } 233 | } else { 234 | //set new alpha 235 | let temp_alpha = if common_paths[alphas[i]] { 236 | alphas[i] 237 | } else { 238 | common_paths.iter().position(|is_in| is_in).unwrap() 239 | }; 240 | let paths = common_paths 241 | .iter() 242 | .enumerate() 243 | .filter_map(|(path_id, is_in)| match is_in { 244 | true => Some(path_id), 245 | false => None, 246 | }) 247 | .collect::>(); 248 | alphas_deltas.insert(temp_alpha, paths); 249 | 250 | let u = dpm[p][j][alphas[p]] 251 | + dpm[p][j][temp_alpha] 252 | + score_matrix.get(&(lnz[i], '-')).unwrap(); 253 | let d = dpm[p][j - 1][alphas[p]] 254 | + dpm[p][j - 1][temp_alpha] 255 | + score_matrix.get(&(lnz[i], sequence[j])).unwrap(); 256 | let l = if alphas[i] == temp_alpha { 257 | dpm[i][j - 1][temp_alpha] 258 | + score_matrix.get(&(sequence[j], '-')).unwrap() 259 | } else { 260 | dpm[i][j - 1][temp_alpha] 261 | + dpm[i][j - 1][alphas[i]] 262 | + score_matrix.get(&(sequence[j], '-')).unwrap() 263 | }; 264 | dpm[i][j][temp_alpha] = *[d, u, l].iter().max().unwrap(); 265 | 266 | for (path, is_in) in common_paths.iter().enumerate() { 267 | if path != temp_alpha { 268 | if is_in { 269 | if dpm[i][j][temp_alpha] == d { 270 | dpm[i][j][path] = 271 | dpm[p][j - 1][path] - dpm[p][j - 1][temp_alpha]; 272 | } else if dpm[i][j][temp_alpha] == u { 273 | dpm[i][j][path] = 274 | dpm[p][j][path] - dpm[p][j][temp_alpha]; 275 | } else { 276 | if temp_alpha == alphas[i] { 277 | dpm[i][j][path] = dpm[i][j - 1][path]; 278 | } else { 279 | dpm[i][j][path] = dpm[i][j - 1][path] 280 | - dpm[i][j - 1][temp_alpha]; 281 | } 282 | } 283 | } 284 | } 285 | } 286 | } 287 | } 288 | if alphas_deltas.keys().len() > 0 { 289 | for (a, delta) in alphas_deltas.iter() { 290 | if *a != alphas[i] { 291 | dpm[i][j][*a] -= dpm[i][j][alphas[i]]; 292 | for path in delta.iter() { 293 | if path != a { 294 | dpm[i][j][*path] += dpm[i][j][*a]; 295 | } 296 | } 297 | } 298 | } 299 | } 300 | } 301 | } 302 | } 303 | } 304 | } 305 | let mut ending_nodes = vec![0; path_number]; 306 | let mut results = vec![0; path_number]; 307 | for (pred, paths) in pred_hash.get_preds_and_paths(lnz.len() - 1) { 308 | for (path, is_in) in paths.iter().enumerate() { 309 | if is_in { 310 | if path == alphas[pred] { 311 | results[path] = dpm[pred][dpm[pred].len() - 1][path]; 312 | } else { 313 | results[path] = dpm[pred][dpm[pred].len() - 1][path] 314 | + dpm[pred][dpm[pred].len() - 1][alphas[pred]] 315 | } 316 | ending_nodes[path] = pred; 317 | } 318 | } 319 | } 320 | let best_path = results 321 | .iter() 322 | .enumerate() 323 | .map(|(path, score)| (score, path)) 324 | .max(); 325 | let final_node = ending_nodes[best_path.unwrap().1]; 326 | let gaf = build_alignment( 327 | &dpm, 328 | lnz, 329 | sequence, 330 | score_matrix, 331 | &alphas, 332 | best_path.unwrap().1, 333 | &pred_hash, 334 | &nodes_with_pred, 335 | &graph.nodes_id_pos, 336 | final_node, 337 | true, 338 | ); 339 | gaf 340 | } 341 | /* 342 | #[cfg(test)] 343 | mod tests { 344 | use crate::{pathwise_graph, score_matrix::create_score_matrix_match_mis, sequences}; 345 | use handlegraph::{ 346 | handle::Edge, hashgraph::HashGraph, mutablehandlegraph::MutableHandleGraph, 347 | pathgraph::PathHandleGraph, 348 | }; 349 | 350 | #[test] 351 | fn correct_score_simple_graph() { 352 | let mut graph: HashGraph = HashGraph::new(); 353 | let h1 = graph.append_handle("A".as_bytes()); 354 | let h2 = graph.append_handle("T".as_bytes()); 355 | let h3 = graph.append_handle("C".as_bytes()); 356 | let h4 = graph.append_handle("G".as_bytes()); 357 | 358 | graph.create_edge(&Edge(h1, h2)); 359 | graph.create_edge(&Edge(h1, h3)); 360 | graph.create_edge(&Edge(h2, h4)); 361 | graph.create_edge(&Edge(h3, h4)); 362 | 363 | let p1 = graph.create_path_handle(&['1' as u8], false); 364 | let p2 = graph.create_path_handle(&['2' as u8], false); 365 | 366 | graph.append_step(&p1, h1); 367 | graph.append_step(&p1, h2); 368 | graph.append_step(&p1, h4); 369 | graph.append_step(&p2, h1); 370 | graph.append_step(&p2, h3); 371 | graph.append_step(&p2, h4); 372 | 373 | let graph_struct = crate::pathwise_graph::create_path_graph(&graph, false); 374 | 375 | let sequence = ['$', 'A', 'T', 'G']; 376 | 377 | let score_matrix = create_score_matrix_match_mis(2, -4); 378 | 379 | let best_path = super::exec(&sequence, &graph_struct, &score_matrix); 380 | 381 | assert_eq!(best_path, 0); 382 | } 383 | 384 | #[test] 385 | fn correct_score_normal_graph() { 386 | let graph = pathwise_graph::read_graph_w_path(&"./prova.gfa", false); 387 | 388 | let sequences = sequences::get_sequences(String::from("./sequences.fa")); 389 | 390 | let score_matrix = create_score_matrix_match_mis(2, -4); 391 | 392 | let best_path = super::exec(&sequences.0[0], &graph, &score_matrix); 393 | 394 | assert_eq!(best_path, 0); 395 | } 396 | } 397 | */ 398 | -------------------------------------------------------------------------------- /src/pathwise_alignment_gap_semi.rs: -------------------------------------------------------------------------------- 1 | use crate::pathwise_alignment_output::build_alignment_semiglobal_gap; 2 | use crate::pathwise_graph::PathGraph; 3 | use std::collections::HashMap; 4 | 5 | pub fn exec( 6 | sequence: &[char], 7 | graph: &PathGraph, 8 | score_matrix: &HashMap<(char, char), i32>, 9 | o: i32, 10 | e: i32, 11 | ) -> usize { 12 | let lnz = &graph.lnz; 13 | let nodes_with_pred = &graph.nwp; 14 | let pred_hash = &graph.pred_hash; 15 | let path_number = graph.paths_number; 16 | let path_node = &graph.paths_nodes; 17 | 18 | let mut dpm = vec![vec![vec![0; path_number]; sequence.len()]; lnz.len()]; 19 | let mut x = vec![vec![vec![0; path_number]; sequence.len()]; lnz.len()]; 20 | let mut y = vec![vec![vec![0; path_number]; sequence.len()]; lnz.len()]; 21 | let alphas = &graph.alphas; 22 | for i in 0..lnz.len() - 1 { 23 | for j in 0..sequence.len() { 24 | match (i, j) { 25 | (0, 0) => { 26 | dpm[i][j] = vec![0; path_number]; 27 | } 28 | (_, 0) => dpm[0][0] = vec![0; path_number], 29 | (0, _) => { 30 | //set y 31 | y[i][j][alphas[0]] = o + e * j as i32; 32 | dpm[i][j][alphas[0]] = y[i][j][alphas[0]]; 33 | for k in alphas[0] + 1..path_number { 34 | y[i][j][k] = y[i][j - 1][k]; 35 | dpm[i][j][k] = y[i][j][k]; 36 | } 37 | } 38 | _ => { 39 | if !nodes_with_pred[i] { 40 | let mut common_paths = path_node[i].clone(); 41 | common_paths.and(&path_node[i - 1]); 42 | 43 | if common_paths[alphas[i - 1]] { 44 | //set y 45 | let u_y = y[i - 1][j][alphas[i - 1]] + e; 46 | let u_dpm = dpm[i - 1][j][alphas[i - 1]] + o + e; 47 | 48 | y[i][j][alphas[i]] = if u_dpm >= u_y { 49 | for (path, is_in) in common_paths.iter().enumerate() { 50 | if is_in { 51 | if path != alphas[i] { 52 | y[i][j][path] = dpm[i - 1][j][path]; 53 | } 54 | } 55 | } 56 | u_dpm 57 | } else { 58 | for (path, is_in) in common_paths.iter().enumerate() { 59 | if is_in { 60 | if path != alphas[i] { 61 | y[i][j][path] = y[i - 1][j][path]; 62 | } 63 | } 64 | } 65 | u_y 66 | }; 67 | 68 | let u = y[i][j][alphas[i]]; 69 | 70 | //set x 71 | let l_x = x[i][j - 1][alphas[i]] + e; 72 | let l_dpm = dpm[i][j - 1][alphas[i]] + o + e; 73 | 74 | x[i][j][alphas[i]] = if l_dpm >= l_x { 75 | for (path, is_in) in common_paths.iter().enumerate() { 76 | if is_in { 77 | if path != alphas[i] { 78 | x[i][j][path] = dpm[i][j - 1][path]; 79 | } 80 | } 81 | } 82 | l_dpm 83 | } else { 84 | for (path, is_in) in common_paths.iter().enumerate() { 85 | if is_in { 86 | if path != alphas[i] { 87 | x[i][j][path] = x[i][j - 1][path]; 88 | } 89 | } 90 | } 91 | l_x 92 | }; 93 | 94 | let l = x[i][j][alphas[i]]; 95 | 96 | //set dpm 97 | let d = dpm[i - 1][j - 1][alphas[i - 1]] 98 | + score_matrix.get(&(lnz[i], sequence[j])).unwrap(); 99 | 100 | dpm[i][j][alphas[i]] = *[d, u, l].iter().max().unwrap(); 101 | 102 | for (path, is_in) in common_paths.iter().enumerate() { 103 | if is_in { 104 | if path != alphas[i] { 105 | if dpm[i][j][alphas[i]] == d { 106 | dpm[i][j][path] = dpm[i - 1][j - 1][path]; 107 | } else if dpm[i][j][alphas[i]] == u { 108 | dpm[i][j][path] = y[i][j][path]; 109 | } else { 110 | dpm[i][j][path] = x[i][j][path]; 111 | } 112 | } 113 | } 114 | } 115 | } else { 116 | //set y 117 | let u_y = y[i - 1][j][alphas[i - 1]] + y[i - 1][j][alphas[i]] + e; 118 | let u_dpm = 119 | dpm[i - 1][j][alphas[i - 1]] + dpm[i - 1][j][alphas[i]] + o + e; 120 | 121 | y[i][j][alphas[i]] = if u_dpm >= u_y { 122 | for (path, is_in) in common_paths.iter().enumerate() { 123 | if is_in { 124 | if path != alphas[i] { 125 | y[i][j][path] = 126 | dpm[i - 1][j][path] - dpm[i - 1][j][alphas[i]]; 127 | } 128 | } 129 | } 130 | u_dpm 131 | } else { 132 | for (path, is_in) in common_paths.iter().enumerate() { 133 | if is_in { 134 | if path != alphas[i] { 135 | y[i][j][path] = 136 | y[i - 1][j][path] - y[i - 1][j][alphas[i]]; 137 | } 138 | } 139 | } 140 | u_y 141 | }; 142 | 143 | let u = y[i][j][alphas[i]]; 144 | 145 | //set x 146 | let l_x = x[i][j - 1][alphas[i]] + e; 147 | let l_dpm = dpm[i][j - 1][alphas[i]] + o + e; 148 | 149 | x[i][j][alphas[i]] = if l_dpm >= l_x { 150 | for (path, is_in) in common_paths.iter().enumerate() { 151 | if is_in { 152 | if path != alphas[i] { 153 | x[i][j][path] = dpm[i][j - 1][path]; 154 | } 155 | } 156 | } 157 | l_dpm 158 | } else { 159 | for (path, is_in) in common_paths.iter().enumerate() { 160 | if is_in { 161 | if path != alphas[i] { 162 | x[i][j][path] = x[i][j - 1][path]; 163 | } 164 | } 165 | } 166 | l_x 167 | }; 168 | 169 | let l = x[i][j][alphas[i]]; 170 | 171 | let d = dpm[i - 1][j - 1][alphas[i - 1]] 172 | + dpm[i - 1][j - 1][alphas[i]] 173 | + score_matrix.get(&(lnz[i], sequence[j])).unwrap(); 174 | 175 | dpm[i][j][alphas[i]] = *[d, u, l].iter().max().unwrap(); 176 | 177 | for (path, is_in) in common_paths.iter().enumerate() { 178 | if is_in { 179 | if path != alphas[i] { 180 | if dpm[i][j][alphas[i]] == d { 181 | dpm[i][j][path] = dpm[i - 1][j - 1][path] 182 | - dpm[i - 1][j - 1][alphas[i]]; 183 | } else if dpm[i][j][alphas[i]] == u { 184 | dpm[i][j][path] = y[i][j][path] 185 | } else { 186 | dpm[i][j][path] = x[i][j][path]; 187 | } 188 | } 189 | } 190 | } 191 | } 192 | } else { 193 | // multiple alphas possible 194 | let mut alphas_deltas = HashMap::new(); 195 | for (p, p_paths) in pred_hash.get_preds_and_paths(i) { 196 | let mut common_paths = path_node[i].clone(); 197 | common_paths.and(&p_paths); 198 | 199 | if common_paths[alphas[p]] { 200 | let paths = common_paths 201 | .iter() 202 | .enumerate() 203 | .filter_map(|(path_id, is_in)| match is_in { 204 | true => Some(path_id), 205 | false => None, 206 | }) 207 | .collect::>(); 208 | alphas_deltas.insert(alphas[p], paths); 209 | 210 | //set y 211 | let u_y = y[p][j][alphas[p]] + e; 212 | let u_dpm = dpm[p][j][alphas[p]] + o + e; 213 | 214 | y[i][j][alphas[p]] = if u_dpm >= u_y { 215 | for (path, is_in) in common_paths.iter().enumerate() { 216 | if is_in { 217 | if path != alphas[p] { 218 | y[i][j][path] = dpm[p][j][path]; 219 | } 220 | } 221 | } 222 | u_dpm 223 | } else { 224 | for (path, is_in) in common_paths.iter().enumerate() { 225 | if is_in { 226 | if path != alphas[i] { 227 | y[i][j][path] = y[p][j][path]; 228 | } 229 | } 230 | } 231 | u_y 232 | }; 233 | 234 | let u = y[i][j][alphas[p]]; 235 | 236 | //set x 237 | let l_x = if alphas[p] == alphas[i] { 238 | x[i][j - 1][alphas[p]] + e 239 | } else { 240 | x[i][j - 1][alphas[p]] + x[i][j - 1][alphas[i]] + e 241 | }; 242 | let l_dpm = if alphas[p] == alphas[i] { 243 | dpm[i][j - 1][alphas[p]] + o + e 244 | } else { 245 | dpm[i][j - 1][alphas[i]] + dpm[i][j - 1][alphas[p]] + o + e 246 | }; 247 | 248 | x[i][j][alphas[p]] = if l_dpm >= l_x { 249 | for (path, is_in) in common_paths.iter().enumerate() { 250 | if is_in { 251 | if path != alphas[p] { 252 | if alphas[p] == alphas[i] { 253 | x[i][j][path] = dpm[i][j - 1][path]; 254 | } else { 255 | x[i][j][path] = dpm[i][j - 1][path] 256 | - dpm[i][j - 1][alphas[p]]; 257 | } 258 | } 259 | } 260 | } 261 | l_dpm 262 | } else { 263 | for (path, is_in) in common_paths.iter().enumerate() { 264 | if is_in { 265 | if path != alphas[p] { 266 | if alphas[p] == alphas[i] { 267 | x[i][j][path] = x[i][j - 1][path]; 268 | } else { 269 | x[i][j][path] = 270 | x[i][j - 1][path] - x[i][j - 1][alphas[p]]; 271 | } 272 | } 273 | } 274 | } 275 | l_x 276 | }; 277 | 278 | let l = x[i][j][alphas[p]]; 279 | 280 | //set dpm 281 | let d = dpm[p][j - 1][alphas[p]] 282 | + score_matrix.get(&(lnz[i], sequence[j])).unwrap(); 283 | 284 | dpm[i][j][alphas[p]] = *[d, u, l].iter().max().unwrap(); 285 | 286 | for (path, is_in) in common_paths.iter().enumerate() { 287 | if is_in { 288 | if path != alphas[p] { 289 | if dpm[i][j][alphas[p]] == d { 290 | dpm[i][j][path] = dpm[p][j - 1][path]; 291 | } else if dpm[i][j][alphas[p]] == u { 292 | dpm[i][j][path] = y[i][j][path]; 293 | } else { 294 | dpm[i][j][path] = x[i][j][path]; 295 | } 296 | } 297 | } 298 | } 299 | } else { 300 | //set new alpha 301 | let temp_alpha = if common_paths[alphas[i]] { 302 | alphas[i] 303 | } else { 304 | common_paths.iter().position(|is_in| is_in).unwrap() 305 | }; 306 | let paths = common_paths 307 | .iter() 308 | .enumerate() 309 | .filter_map(|(path_id, is_in)| match is_in { 310 | true => Some(path_id), 311 | false => None, 312 | }) 313 | .collect::>(); 314 | alphas_deltas.insert(temp_alpha, paths); 315 | 316 | //set y 317 | let u_y = y[p][j][alphas[p]] + y[p][j][temp_alpha] + e; 318 | let u_dpm = dpm[p][j][alphas[p]] + dpm[p][j][temp_alpha] + o + e; 319 | 320 | y[i][j][temp_alpha] = if u_dpm >= u_y { 321 | for (path, is_in) in common_paths.iter().enumerate() { 322 | if is_in { 323 | if path != temp_alpha { 324 | y[i][j][path] = 325 | dpm[p][j][path] - dpm[p][j][temp_alpha]; 326 | } 327 | } 328 | } 329 | u_dpm 330 | } else { 331 | for (path, is_in) in common_paths.iter().enumerate() { 332 | if is_in { 333 | if path != temp_alpha { 334 | y[i][j][path] = y[p][j][path] - y[p][j][temp_alpha]; 335 | } 336 | } 337 | } 338 | u_y 339 | }; 340 | 341 | let u = y[i][j][temp_alpha]; 342 | 343 | //set x 344 | let l_x = if alphas[i] == temp_alpha { 345 | x[i][j - 1][alphas[i]] + e 346 | } else { 347 | x[i][j - 1][alphas[i]] + x[i][j - 1][temp_alpha] + e 348 | }; 349 | let l_dpm = if alphas[i] == temp_alpha { 350 | dpm[i][j - 1][alphas[i]] + o + e 351 | } else { 352 | dpm[i][j - 1][alphas[i]] + dpm[i][j - 1][temp_alpha] + o + e 353 | }; 354 | 355 | x[i][j][temp_alpha] = if l_dpm >= l_x { 356 | for (path, is_in) in common_paths.iter().enumerate() { 357 | if is_in { 358 | if path != temp_alpha { 359 | if temp_alpha == alphas[i] { 360 | x[i][j][path] = dpm[i][j - 1][path] 361 | } else { 362 | x[i][j][path] = dpm[i][j - 1][path] 363 | - dpm[i][j - 1][temp_alpha]; 364 | } 365 | } 366 | } 367 | } 368 | l_dpm 369 | } else { 370 | for (path, is_in) in common_paths.iter().enumerate() { 371 | if is_in { 372 | if path != temp_alpha { 373 | if temp_alpha == alphas[i] { 374 | x[i][j][path] = x[i][j - 1][path] 375 | } else { 376 | x[i][j][path] = 377 | x[i][j - 1][path] - x[i][j - 1][temp_alpha]; 378 | } 379 | } 380 | } 381 | } 382 | l_x 383 | }; 384 | 385 | let l = x[i][j][temp_alpha]; 386 | 387 | let d = dpm[p][j - 1][alphas[p]] 388 | + dpm[p][j - 1][temp_alpha] 389 | + score_matrix.get(&(lnz[i], sequence[j])).unwrap(); 390 | 391 | dpm[i][j][temp_alpha] = *[d, u, l].iter().max().unwrap(); 392 | 393 | for (path, is_in) in common_paths.iter().enumerate() { 394 | if path != temp_alpha { 395 | if is_in { 396 | if dpm[i][j][temp_alpha] == d { 397 | dpm[i][j][path] = 398 | dpm[p][j - 1][path] - dpm[p][j - 1][temp_alpha]; 399 | } else if dpm[i][j][temp_alpha] == u { 400 | dpm[i][j][path] = y[i][j][path]; 401 | } else { 402 | dpm[i][j][path] = x[i][j][path]; 403 | } 404 | } 405 | } 406 | } 407 | } 408 | } 409 | if alphas_deltas.keys().len() > 0 { 410 | for (a, delta) in alphas_deltas.iter() { 411 | if *a != alphas[i] { 412 | dpm[i][j][*a] -= dpm[i][j][alphas[i]]; 413 | x[i][j][*a] -= x[i][j][alphas[i]]; 414 | y[i][j][*a] -= y[i][j][alphas[i]]; 415 | 416 | for path in delta.iter() { 417 | if path != a { 418 | dpm[i][j][*path] += dpm[i][j][*a]; 419 | x[i][j][*path] += x[i][j][*a]; 420 | y[i][j][*path] += y[i][j][*a]; 421 | } 422 | } 423 | } 424 | } 425 | } 426 | } 427 | } 428 | } 429 | } 430 | } 431 | let (final_node, best_path) = best_ending_node(&dpm, graph); 432 | let cigar_output = build_alignment_semiglobal_gap( 433 | &dpm, 434 | &x, 435 | &y, 436 | &alphas, 437 | best_path, 438 | &pred_hash, 439 | &nodes_with_pred, 440 | final_node, 441 | ); 442 | println!("{}", cigar_output); 443 | best_path 444 | } 445 | 446 | fn best_ending_node(dpm: &Vec>>, graph: &PathGraph) -> (usize, usize) { 447 | let mut max: Option = None; 448 | let mut ending_node: usize = 0; 449 | let mut chosen_path: usize = 0; 450 | for i in 0..dpm.len() - 1 { 451 | let paths = graph.paths_nodes[i].clone(); 452 | let mut absolute_scores = dpm[i][dpm[i].len() - 1].clone(); 453 | for (path, is_in) in paths.iter().enumerate() { 454 | if is_in { 455 | if path != graph.alphas[i] { 456 | absolute_scores[path] = 457 | absolute_scores[path] + absolute_scores[graph.alphas[i]]; 458 | } 459 | } 460 | } 461 | let best_path = absolute_scores 462 | .iter() 463 | .enumerate() 464 | .map(|(path, score)| (score, path)) 465 | .max(); 466 | if max.is_none() || best_path.unwrap().0 > &max.unwrap() { 467 | max = Some(*best_path.unwrap().0); 468 | ending_node = i; 469 | chosen_path = best_path.unwrap().1; 470 | } 471 | } 472 | (ending_node, chosen_path) 473 | } 474 | --------------------------------------------------------------------------------