├── .Rbuildignore ├── src ├── Makevars ├── Makevars.win ├── computing_kernel.cpp └── RcppExports.cpp ├── .gitignore ├── R ├── doc-kernelForLeon.R ├── RcppExports.R └── computing_kernel.R ├── NAMESPACE ├── DESCRIPTION ├── man └── computing_kernel.Rd └── inst └── include └── kernelforleon ├── computing_kernel.h └── BLOSUM62_2.h /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | CXX_STD = CXX17 2 | PKG_CPPFLAGS = -I../inst/include 3 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | CXX_STD = CXX17 2 | PKG_CPPFLAGS = -I../inst/include/ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | *.Rproj 6 | *.o 7 | *.so 8 | src/kernelForLeon.dll 9 | src/main.cpp 10 | .idea/ 11 | cmake-build-debug/ 12 | CMakeLists.txt 13 | inst/doc 14 | -------------------------------------------------------------------------------- /R/doc-kernelForLeon.R: -------------------------------------------------------------------------------- 1 | #' @docType package 2 | #' 3 | #' @importFrom Rcpp sourceCpp 4 | #' @import tidysq 5 | #' @import checkmate 6 | #' @useDynLib "kernelForLeon", .registration = TRUE 7 | #' 8 | #' @name kernelForLeon-package 9 | #' @aliases kernelForLeon 10 | NULL 11 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(computing_kernel,character) 4 | S3method(computing_kernel,default) 5 | export(computing_kernel) 6 | import(checkmate) 7 | import(tidysq) 8 | importFrom(Rcpp,sourceCpp) 9 | useDynLib("kernelForLeon", .registration = TRUE) 10 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | CPP_computing_kernel <- function(x, max_kmer_length, exponential) { 5 | .Call(`_kernelForLeon_CPP_computing_kernel`, x, max_kmer_length, exponential) 6 | } 7 | 8 | -------------------------------------------------------------------------------- /src/computing_kernel.cpp: -------------------------------------------------------------------------------- 1 | #include "Rcpp.h" 2 | #include "kernelforleon/computing_kernel.h" 3 | 4 | // [[Rcpp::export]] 5 | Rcpp::NumericMatrix CPP_computing_kernel(const std::vector& x, 6 | const unsigned long long& max_kmer_length, 7 | const double exponential) { 8 | return correlation_kernel_3(x, max_kmer_length, exponential); 9 | } 10 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: kernelForLeon 2 | Type: Package 3 | Title: Computes a Certain Similarity Matrix on Biological Sequences 4 | Version: 0.0.1 5 | Date: 2021-05-29 6 | Author: Laura 7 | Maintainer: Laura 8 | Description: Computes a similarity matrix for biological sequences without alignment, using a lot of k-mers instead. 9 | License: GPL (>= 2) 10 | Imports: Rcpp (>= 1.0.6), 11 | checkmate 12 | LinkingTo: Rcpp 13 | RoxygenNote: 7.1.1 14 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | // CPP_computing_kernel 9 | Rcpp::NumericMatrix CPP_computing_kernel(const std::vector& x, const unsigned long long& max_kmer_length, const double exponential); 10 | RcppExport SEXP _kernelForLeon_CPP_computing_kernel(SEXP xSEXP, SEXP max_kmer_lengthSEXP, SEXP exponentialSEXP) { 11 | BEGIN_RCPP 12 | Rcpp::RObject rcpp_result_gen; 13 | Rcpp::RNGScope rcpp_rngScope_gen; 14 | Rcpp::traits::input_parameter< const std::vector& >::type x(xSEXP); 15 | Rcpp::traits::input_parameter< const unsigned long long& >::type max_kmer_length(max_kmer_lengthSEXP); 16 | Rcpp::traits::input_parameter< const double >::type exponential(exponentialSEXP); 17 | rcpp_result_gen = Rcpp::wrap(CPP_computing_kernel(x, max_kmer_length, exponential)); 18 | return rcpp_result_gen; 19 | END_RCPP 20 | } 21 | 22 | static const R_CallMethodDef CallEntries[] = { 23 | {"_kernelForLeon_CPP_computing_kernel", (DL_FUNC) &_kernelForLeon_CPP_computing_kernel, 3}, 24 | {NULL, NULL, 0} 25 | }; 26 | 27 | RcppExport void R_init_kernelForLeon(DllInfo *dll) { 28 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 29 | R_useDynamicSymbols(dll, FALSE); 30 | } 31 | -------------------------------------------------------------------------------- /man/computing_kernel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/computing_kernel.R 3 | \name{computing_kernel} 4 | \alias{computing_kernel} 5 | \alias{computing_kernel.character} 6 | \title{Compute sequence correlation matrix with kernel K_hat^3} 7 | \usage{ 8 | computing_kernel(x, max_kmer_length = 5, exponential = 0.125, ...) 9 | 10 | \method{computing_kernel}{character}(x, max_kmer_length = 5, exponential = 0.125, ...) 11 | } 12 | \arguments{ 13 | \item{x}{[\code{character}]\cr 14 | A vector of sequences this function is applied to.} 15 | 16 | \item{max_kmer_length}{[\code{integer(1)}]\cr 17 | Maximum k-mer length used for computations. Higher value results in longer 18 | computation time, but gives more accurate scores. Defaults to 5.} 19 | 20 | \item{exponential}{[\code{numeric(1)}]\cr 21 | A parameter used to control impact of differences between sequences. Higher 22 | value usually results in lower scores. Default value is 0.125, as suggested 23 | in the paper.} 24 | } 25 | \value{ 26 | A symmetric numeric matrix with dimensions n x n, where n is the number of 27 | sequences in supplied sequence vector. A value under index (i, j) is equal to the 28 | similarity of sequences i and j. 29 | } 30 | \description{ 31 | Uses computing kernel to compute sequence similarity. Similarity 32 | is expressed as a number from range [0, 1], where 1 means equality. 33 | } 34 | \details{ 35 | This algorithm is based on the paper called "Towards a Mathematical 36 | Foundation of Immunology and Amino Acid Chains" by Wen-Jun Shen, 37 | Hau-San Wong, Quan-Wu Xiao, Xin Guo and Stephen Smale (arXiv:1205.6031). 38 | 39 | This sequence correlation method does not employ any alignment methods in its 40 | computations. Instead, it bases heavily on k-mers, comparing analogous k-mers 41 | across sequences. 42 | } 43 | \examples{ 44 | sq_ami <- c("PPAVMMFDILKKIQ", "PQEWYTWLPVMCTN", "PQKWLANMMAQ") 45 | result <- computing_kernel(sq_ami) 46 | 47 | # Results in a symmetric matrix 3x3: 48 | result 49 | 50 | # Accessing similarity between sequences number 3 and 1: 51 | result[3, 1] 52 | result[1, 3] # the same value as above 53 | 54 | # Using higher exponential results amplifies differences: 55 | computing_kernel(sq_ami, exponential = 0.5) 56 | 57 | # max_kmer_length can be higher than sequence length. 58 | # In that case longer k-mers are simply omitted: 59 | computing_kernel(sq_ami, max_kmer_length = 25) 60 | 61 | } 62 | -------------------------------------------------------------------------------- /R/computing_kernel.R: -------------------------------------------------------------------------------- 1 | #' Compute sequence correlation matrix with kernel K_hat^3 2 | #' 3 | #' @description Uses computing kernel to compute sequence similarity. Similarity 4 | #' is expressed as a number from range [0, 1], where 1 means equality. 5 | #' 6 | #' @param x [\code{character}]\cr 7 | #' A vector of sequences this function is applied to. 8 | #' @param max_kmer_length [\code{integer(1)}]\cr 9 | #' Maximum k-mer length used for computations. Higher value results in longer 10 | #' computation time, but gives more accurate scores. Defaults to 5. 11 | #' @param exponential [\code{numeric(1)}]\cr 12 | #' A parameter used to control impact of differences between sequences. Higher 13 | #' value usually results in lower scores. Default value is 0.125, as suggested 14 | #' in the paper. 15 | #' 16 | #' @return A symmetric numeric matrix with dimensions n x n, where n is the number of 17 | #' sequences in supplied sequence vector. A value under index (i, j) is equal to the 18 | #' similarity of sequences i and j. 19 | #' 20 | #' @details 21 | #' This algorithm is based on the paper called "Towards a Mathematical 22 | #' Foundation of Immunology and Amino Acid Chains" by Wen-Jun Shen, 23 | #' Hau-San Wong, Quan-Wu Xiao, Xin Guo and Stephen Smale (arXiv:1205.6031). 24 | #' 25 | #' This sequence correlation method does not employ any alignment methods in its 26 | #' computations. Instead, it bases heavily on k-mers, comparing analogous k-mers 27 | #' across sequences. 28 | #' 29 | #' @examples 30 | #' sq_ami <- c("PPAVMMFDILKKIQ", "PQEWYTWLPVMCTN", "PQKWLANMMAQ") 31 | #' result <- computing_kernel(sq_ami) 32 | #' 33 | #' # Results in a symmetric matrix 3x3: 34 | #' result 35 | #' 36 | #' # Accessing similarity between sequences number 3 and 1: 37 | #' result[3, 1] 38 | #' result[1, 3] # the same value as above 39 | #' 40 | #' # Using higher exponential results amplifies differences: 41 | #' computing_kernel(sq_ami, exponential = 0.5) 42 | #' 43 | #' # max_kmer_length can be higher than sequence length. 44 | #' # In that case longer k-mers are simply omitted: 45 | #' computing_kernel(sq_ami, max_kmer_length = 25) 46 | #' 47 | #' @export 48 | computing_kernel <- function(x, max_kmer_length = 5, exponential = 0.125, ...) 49 | UseMethod("computing_kernel") 50 | 51 | #' @export 52 | computing_kernel.default <- function(x, max_kmer_length = 5, exponential = 0.125, ...) 53 | stop("'computing_kernel' isn't implemented for this type of object", call. = FALSE) 54 | 55 | #' @rdname computing_kernel 56 | #' @export 57 | computing_kernel.character <- function(x, max_kmer_length = 5, exponential = 0.125, ...) { 58 | assert_count(max_kmer_length) 59 | assert_number(exponential, lower = 0) 60 | 61 | CPP_computing_kernel(x, max_kmer_length, exponential) 62 | } 63 | -------------------------------------------------------------------------------- /inst/include/kernelforleon/computing_kernel.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "Rcpp.h" 5 | #include "BLOSUM62_2.h" 6 | 7 | inline double kernel_1(const char &codon_1, const char &codon_2, const double &exponential = 0.1) { 8 | return pow(internal::read_BLOSUM62_2(codon_1, codon_2), exponential); 9 | } 10 | 11 | inline double kernels_2_3(const std::string &sequence_1, 12 | const std::string &sequence_2, 13 | const unsigned long long &max_kmer_length, 14 | const double &exponential = 0.1) { 15 | typedef std::vector::size_type LenSq; 16 | 17 | const LenSq sequence_1_size = sequence_1.size(); 18 | const LenSq sequence_2_size = sequence_2.size(); 19 | 20 | // Constructs a matrix of kernel^2_1 scores, but rotated by 45 degrees. 21 | // The reason is that k-mers for k >= 2 can be built based on 1-mers, but the shift between sequences must be preserved for all k-mer elements. 22 | // Such matrix ensures that each vector has all possible k-mers for given shift. 23 | // E.g. one of the vectors in the middle contains the following pairs: [(1,1), (2,2), (3,3), (4,4), ...], 24 | // while the next contains: [(1,2), (2,3), (3,4), ...]. 25 | std::vector> kernel_2_1(sequence_1_size + sequence_2_size - 1); 26 | 27 | // Index keeps track of which vector is being filled 28 | LenSq index = 0; 29 | // Filling the first half of the matrix 30 | for (; index < sequence_1_size; ++index) { 31 | const LenSq sequence_1_start_index = sequence_1_size - index - 1; 32 | const LenSq sequence_2_start_index = 0; 33 | kernel_2_1[index] = std::vector(std::min(index + 1, sequence_2_size)); 34 | 35 | for (LenSq j = 0; j < kernel_2_1[index].size(); ++j) { 36 | kernel_2_1[index][j] = kernel_1( 37 | sequence_1[sequence_1_start_index + j], 38 | sequence_2[sequence_2_start_index + j], 39 | exponential); 40 | } 41 | } 42 | 43 | // Filling the second half 44 | for (; index < kernel_2_1.size(); ++index) { 45 | const LenSq sequence_1_start_index = 0; 46 | // We start from (0, 1) here, because (0, 0) was already computed above 47 | const LenSq sequence_2_start_index = index - sequence_1_size + 1; 48 | kernel_2_1[index] = std::vector(std::min(sequence_1_size + sequence_2_size - index - 1, sequence_1_size)); 49 | 50 | for (LenSq j = 0; j < kernel_2_1[index].size(); ++j) { 51 | kernel_2_1[index][j] = kernel_1( 52 | sequence_1[sequence_1_start_index + j], 53 | sequence_2[sequence_2_start_index + j], 54 | exponential); 55 | } 56 | } 57 | 58 | double sum = 0.0; 59 | index = 0; 60 | for (; index < kernel_2_1.size(); ++index) { 61 | for (LenSq i = 0; i < kernel_2_1[index].size(); ++i) { 62 | double kmer_product = 1.0; 63 | for (LenSq j = 0; j < max_kmer_length; ++j) { 64 | if (i + j >= kernel_2_1[index].size()) { 65 | break; 66 | } 67 | kmer_product *= kernel_2_1[index][i + j]; 68 | sum += kmer_product; 69 | } 70 | } 71 | } 72 | 73 | return sum; 74 | } 75 | 76 | inline double correlation_kernel_3(const std::string &sequence_1, 77 | const std::string &sequence_2, 78 | const double &self_similarity_1, 79 | const double &self_similarity_2, 80 | const unsigned long long &max_kmer_length, 81 | const double &exponential = 0.1) { 82 | return kernels_2_3(sequence_1, sequence_2, max_kmer_length, exponential) / 83 | sqrt(self_similarity_1 * self_similarity_2); 84 | } 85 | 86 | inline Rcpp::NumericMatrix correlation_kernel_3(const std::vector &sq, 87 | const unsigned long long &max_kmer_length, 88 | const double &exponential = 0.1) { 89 | typedef std::vector::size_type LenSq; 90 | 91 | // Initializes kernel_3 scores for similarity of sequences to themselves so that they aren't computed for each standardization 92 | std::vector self_similarity(sq.size(), 0); 93 | for (LenSq i = 0; i < sq.size(); ++i) { 94 | self_similarity[i] = kernels_2_3(sq[i], sq[i], max_kmer_length, exponential); 95 | } 96 | 97 | // Initializes returned correlation matrix with 1s on diagonal, because correlation of a sequence to itself is equal to 1 98 | Rcpp::NumericMatrix ret = Rcpp::NumericMatrix::diag(sq.size(), 1); 99 | for (LenSq i = 0; i < sq.size(); ++i) { 100 | for (LenSq j = i + 1; j < sq.size(); ++j) { 101 | const double correlation_score = correlation_kernel_3( 102 | sq[i], sq[j], self_similarity[i], self_similarity[j], max_kmer_length, exponential); 103 | ret(i, j) = correlation_score; 104 | ret(j, i) = correlation_score; 105 | } 106 | } 107 | return ret; 108 | } 109 | -------------------------------------------------------------------------------- /inst/include/kernelforleon/BLOSUM62_2.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace constants { 4 | template 5 | inline const double BLOSUM62_2 = BLOSUM62_2; 6 | 7 | template<> 8 | inline const double BLOSUM62_2<0u, 0u> = 3.90294070015052; 9 | 10 | template<> 11 | inline const double BLOSUM62_2<0u, 1u> = 0.867987663625351; 12 | 13 | template<> 14 | inline const double BLOSUM62_2<0u, 2u> = 0.544605274637711; 15 | 16 | template<> 17 | inline const double BLOSUM62_2<0u, 3u> = 0.741264113108527; 18 | 19 | template<> 20 | inline const double BLOSUM62_2<0u, 4u> = 0.464893827242731; 21 | 22 | template<> 23 | inline const double BLOSUM62_2<0u, 5u> = 1.05686960775367; 24 | 25 | template<> 26 | inline const double BLOSUM62_2<0u, 6u> = 0.569364849349247; 27 | 28 | template<> 29 | inline const double BLOSUM62_2<0u, 7u> = 0.632481034524687; 30 | 31 | template<> 32 | inline const double BLOSUM62_2<0u, 8u> = 0.775390239493712; 33 | 34 | template<> 35 | inline const double BLOSUM62_2<0u, 9u> = 0.601945974581018; 36 | 37 | template<> 38 | inline const double BLOSUM62_2<0u, 10u> = 0.723150342301527; 39 | 40 | template<> 41 | inline const double BLOSUM62_2<0u, 11u> = 0.58830764005965; 42 | 43 | template<> 44 | inline const double BLOSUM62_2<0u, 12u> = 0.754121369072143; 45 | 46 | template<> 47 | inline const double BLOSUM62_2<0u, 13u> = 0.756803942764855; 48 | 49 | template<> 50 | inline const double BLOSUM62_2<0u, 14u> = 0.612698600061261; 51 | 52 | template<> 53 | inline const double BLOSUM62_2<0u, 15u> = 1.47210398545007; 54 | 55 | template<> 56 | inline const double BLOSUM62_2<0u, 16u> = 0.984401955935144; 57 | 58 | template<> 59 | inline const double BLOSUM62_2<0u, 17u> = 0.936458396120263; 60 | 61 | template<> 62 | inline const double BLOSUM62_2<0u, 18u> = 0.416548781469996; 63 | 64 | template<> 65 | inline const double BLOSUM62_2<0u, 19u> = 0.542611868915233; 66 | 67 | template<> 68 | inline const double BLOSUM62_2<1u, 1u> = 19.5765856868537; 69 | 70 | template<> 71 | inline const double BLOSUM62_2<1u, 2u> = 0.301454344668722; 72 | 73 | template<> 74 | inline const double BLOSUM62_2<1u, 3u> = 0.285934574128461; 75 | 76 | template<> 77 | inline const double BLOSUM62_2<1u, 4u> = 0.438990117695358; 78 | 79 | template<> 80 | inline const double BLOSUM62_2<1u, 5u> = 0.420387869540714; 81 | 82 | template<> 83 | inline const double BLOSUM62_2<1u, 6u> = 0.355049504965318; 84 | 85 | template<> 86 | inline const double BLOSUM62_2<1u, 7u> = 0.653458800603899; 87 | 88 | template<> 89 | inline const double BLOSUM62_2<1u, 8u> = 0.349128464920366; 90 | 91 | template<> 92 | inline const double BLOSUM62_2<1u, 9u> = 0.642275633431893; 93 | 94 | template<> 95 | inline const double BLOSUM62_2<1u, 10u> = 0.61135434012195; 96 | 97 | template<> 98 | inline const double BLOSUM62_2<1u, 11u> = 0.397802620047207; 99 | 100 | template<> 101 | inline const double BLOSUM62_2<1u, 12u> = 0.379562691207627; 102 | 103 | template<> 104 | inline const double BLOSUM62_2<1u, 13u> = 0.3657815306054; 105 | 106 | template<> 107 | inline const double BLOSUM62_2<1u, 14u> = 0.308939296217125; 108 | 109 | template<> 110 | inline const double BLOSUM62_2<1u, 15u> = 0.738415701073377; 111 | 112 | template<> 113 | inline const double BLOSUM62_2<1u, 16u> = 0.740551692220627; 114 | 115 | template<> 116 | inline const double BLOSUM62_2<1u, 17u> = 0.75584405464154; 117 | 118 | template<> 119 | inline const double BLOSUM62_2<1u, 18u> = 0.449983902793404; 120 | 121 | template<> 122 | inline const double BLOSUM62_2<1u, 19u> = 0.434203398141003; 123 | 124 | template<> 125 | inline const double BLOSUM62_2<2u, 2u> = 7.39792738079911; 126 | 127 | template<> 128 | inline const double BLOSUM62_2<2u, 3u> = 1.68781074564091; 129 | 130 | template<> 131 | inline const double BLOSUM62_2<2u, 4u> = 0.298969081268819; 132 | 133 | template<> 134 | inline const double BLOSUM62_2<2u, 5u> = 0.634301018724725; 135 | 136 | template<> 137 | inline const double BLOSUM62_2<2u, 6u> = 0.678558838858087; 138 | 139 | template<> 140 | inline const double BLOSUM62_2<2u, 7u> = 0.339015407074848; 141 | 142 | template<> 143 | inline const double BLOSUM62_2<2u, 8u> = 0.784090405759266; 144 | 145 | template<> 146 | inline const double BLOSUM62_2<2u, 9u> = 0.286613045833504; 147 | 148 | template<> 149 | inline const double BLOSUM62_2<2u, 10u> = 0.346454633856285; 150 | 151 | template<> 152 | inline const double BLOSUM62_2<2u, 11u> = 1.55385280556386; 153 | 154 | template<> 155 | inline const double BLOSUM62_2<2u, 12u> = 0.598716825883862; 156 | 157 | template<> 158 | inline const double BLOSUM62_2<2u, 13u> = 0.89708112869296; 159 | 160 | template<> 161 | inline const double BLOSUM62_2<2u, 14u> = 0.573200023500285; 162 | 163 | template<> 164 | inline const double BLOSUM62_2<2u, 15u> = 0.913504624354301; 165 | 166 | template<> 167 | inline const double BLOSUM62_2<2u, 16u> = 0.694789868062839; 168 | 169 | template<> 170 | inline const double BLOSUM62_2<2u, 17u> = 0.336500141524566; 171 | 172 | template<> 173 | inline const double BLOSUM62_2<2u, 18u> = 0.232102315145553; 174 | 175 | template<> 176 | inline const double BLOSUM62_2<2u, 19u> = 0.345683565218847; 177 | 178 | template<> 179 | inline const double BLOSUM62_2<3u, 3u> = 5.46952607963445; 180 | 181 | template<> 182 | inline const double BLOSUM62_2<3u, 4u> = 0.33074399059478; 183 | 184 | template<> 185 | inline const double BLOSUM62_2<3u, 5u> = 0.481267654658343; 186 | 187 | template<> 188 | inline const double BLOSUM62_2<3u, 6u> = 0.960040718354581; 189 | 190 | template<> 191 | inline const double BLOSUM62_2<3u, 7u> = 0.330522558376655; 192 | 193 | template<> 194 | inline const double BLOSUM62_2<3u, 8u> = 1.30827885329714; 195 | 196 | template<> 197 | inline const double BLOSUM62_2<3u, 9u> = 0.372873704285776; 198 | 199 | template<> 200 | inline const double BLOSUM62_2<3u, 10u> = 0.50034228947388; 201 | 202 | template<> 203 | inline const double BLOSUM62_2<3u, 11u> = 0.911298183018242; 204 | 205 | template<> 206 | inline const double BLOSUM62_2<3u, 12u> = 0.679202586642317; 207 | 208 | template<> 209 | inline const double BLOSUM62_2<3u, 13u> = 1.90173784203935; 210 | 211 | template<> 212 | inline const double BLOSUM62_2<3u, 14u> = 0.960797602466529; 213 | 214 | template<> 215 | inline const double BLOSUM62_2<3u, 15u> = 0.950357185031325; 216 | 217 | template<> 218 | inline const double BLOSUM62_2<3u, 16u> = 0.741425610477113; 219 | 220 | template<> 221 | inline const double BLOSUM62_2<3u, 17u> = 0.428943129877398; 222 | 223 | template<> 224 | inline const double BLOSUM62_2<3u, 18u> = 0.374300211820363; 225 | 226 | template<> 227 | inline const double BLOSUM62_2<3u, 19u> = 0.496467353893267; 228 | 229 | template<> 230 | inline const double BLOSUM62_2<4u, 4u> = 8.1287970162524; 231 | 232 | template<> 233 | inline const double BLOSUM62_2<4u, 5u> = 0.340640908478402; 234 | 235 | template<> 236 | inline const double BLOSUM62_2<4u, 6u> = 0.651990520809943; 237 | 238 | template<> 239 | inline const double BLOSUM62_2<4u, 7u> = 0.945769882931625; 240 | 241 | template<> 242 | inline const double BLOSUM62_2<4u, 8u> = 0.344043118911871; 243 | 244 | template<> 245 | inline const double BLOSUM62_2<4u, 9u> = 1.15459749441297; 246 | 247 | template<> 248 | inline const double BLOSUM62_2<4u, 10u> = 1.00437163122058; 249 | 250 | template<> 251 | inline const double BLOSUM62_2<4u, 11u> = 0.354288952229033; 252 | 253 | template<> 254 | inline const double BLOSUM62_2<4u, 12u> = 0.287444757613266; 255 | 256 | template<> 257 | inline const double BLOSUM62_2<4u, 13u> = 0.333972401843889; 258 | 259 | template<> 260 | inline const double BLOSUM62_2<4u, 14u> = 0.380726330360237; 261 | 262 | template<> 263 | inline const double BLOSUM62_2<4u, 15u> = 0.439973596778216; 264 | 265 | template<> 266 | inline const double BLOSUM62_2<4u, 16u> = 0.481693682890345; 267 | 268 | template<> 269 | inline const double BLOSUM62_2<4u, 17u> = 0.745089737790822; 270 | 271 | template<> 272 | inline const double BLOSUM62_2<4u, 18u> = 1.37437942379832; 273 | 274 | template<> 275 | inline const double BLOSUM62_2<4u, 19u> = 2.76938062915766; 276 | 277 | template<> 278 | inline const double BLOSUM62_2<5u, 5u> = 6.87630690865387; 279 | 280 | template<> 281 | inline const double BLOSUM62_2<5u, 6u> = 0.492966575788069; 282 | 283 | template<> 284 | inline const double BLOSUM62_2<5u, 7u> = 0.275009721763455; 285 | 286 | template<> 287 | inline const double BLOSUM62_2<5u, 8u> = 0.588871736039716; 288 | 289 | template<> 290 | inline const double BLOSUM62_2<5u, 9u> = 0.284504011912594; 291 | 292 | template<> 293 | inline const double BLOSUM62_2<5u, 10u> = 0.395486600257494; 294 | 295 | template<> 296 | inline const double BLOSUM62_2<5u, 11u> = 0.86371140576969; 297 | 298 | template<> 299 | inline const double BLOSUM62_2<5u, 12u> = 0.477385507184256; 300 | 301 | template<> 302 | inline const double BLOSUM62_2<5u, 13u> = 0.538649627426744; 303 | 304 | template<> 305 | inline const double BLOSUM62_2<5u, 14u> = 0.449983999048674; 306 | 307 | template<> 308 | inline const double BLOSUM62_2<5u, 15u> = 0.90359652515418; 309 | 310 | template<> 311 | inline const double BLOSUM62_2<5u, 16u> = 0.579271581711225; 312 | 313 | template<> 314 | inline const double BLOSUM62_2<5u, 17u> = 0.3369549123791; 315 | 316 | template<> 317 | inline const double BLOSUM62_2<5u, 18u> = 0.421690355206204; 318 | 319 | template<> 320 | inline const double BLOSUM62_2<5u, 19u> = 0.348714366361603; 321 | 322 | template<> 323 | inline const double BLOSUM62_2<6u, 6u> = 13.5059996886779; 324 | 325 | template<> 326 | inline const double BLOSUM62_2<6u, 7u> = 0.326288124625136; 327 | 328 | template<> 329 | inline const double BLOSUM62_2<6u, 8u> = 0.778887489609194; 330 | 331 | template<> 332 | inline const double BLOSUM62_2<6u, 9u> = 0.380675485808673; 333 | 334 | template<> 335 | inline const double BLOSUM62_2<6u, 10u> = 0.584132623334439; 336 | 337 | template<> 338 | inline const double BLOSUM62_2<6u, 11u> = 1.22200066958752; 339 | 340 | template<> 341 | inline const double BLOSUM62_2<6u, 12u> = 0.472879830723747; 342 | 343 | template<> 344 | inline const double BLOSUM62_2<6u, 13u> = 1.16798103533111; 345 | 346 | template<> 347 | inline const double BLOSUM62_2<6u, 14u> = 0.917048020652714; 348 | 349 | template<> 350 | inline const double BLOSUM62_2<6u, 15u> = 0.736731739892316; 351 | 352 | template<> 353 | inline const double BLOSUM62_2<6u, 16u> = 0.55750325361248; 354 | 355 | template<> 356 | inline const double BLOSUM62_2<6u, 17u> = 0.339447441760233; 357 | 358 | template<> 359 | inline const double BLOSUM62_2<6u, 18u> = 0.44408895489718; 360 | 361 | template<> 362 | inline const double BLOSUM62_2<6u, 19u> = 1.79790413031311; 363 | 364 | template<> 365 | inline const double BLOSUM62_2<7u, 7u> = 3.997929939961; 366 | 367 | template<> 368 | inline const double BLOSUM62_2<7u, 8u> = 0.396372934422445; 369 | 370 | template<> 371 | inline const double BLOSUM62_2<7u, 9u> = 1.69443475437089; 372 | 373 | template<> 374 | inline const double BLOSUM62_2<7u, 10u> = 1.4777445015865; 375 | 376 | template<> 377 | inline const double BLOSUM62_2<7u, 11u> = 0.327934751806163; 378 | 379 | template<> 380 | inline const double BLOSUM62_2<7u, 12u> = 0.384662859733384; 381 | 382 | template<> 383 | inline const double BLOSUM62_2<7u, 13u> = 0.382937802207239; 384 | 385 | template<> 386 | inline const double BLOSUM62_2<7u, 14u> = 0.354751311390641; 387 | 388 | template<> 389 | inline const double BLOSUM62_2<7u, 15u> = 0.443163582314639; 390 | 391 | template<> 392 | inline const double BLOSUM62_2<7u, 16u> = 0.779816109586742; 393 | 394 | template<> 395 | inline const double BLOSUM62_2<7u, 17u> = 2.41751209060932; 396 | 397 | template<> 398 | inline const double BLOSUM62_2<7u, 18u> = 0.408874390481926; 399 | 400 | template<> 401 | inline const double BLOSUM62_2<7u, 19u> = 0.630388930627921; 402 | 403 | template<> 404 | inline const double BLOSUM62_2<8u, 8u> = 4.76433717338922; 405 | 406 | template<> 407 | inline const double BLOSUM62_2<8u, 9u> = 0.428270363066123; 408 | 409 | template<> 410 | inline const double BLOSUM62_2<8u, 10u> = 0.625302816237689; 411 | 412 | template<> 413 | inline const double BLOSUM62_2<8u, 11u> = 0.939841128716106; 414 | 415 | template<> 416 | inline const double BLOSUM62_2<8u, 12u> = 0.703774478956202; 417 | 418 | template<> 419 | inline const double BLOSUM62_2<8u, 13u> = 1.5543230772441; 420 | 421 | template<> 422 | inline const double BLOSUM62_2<8u, 14u> = 2.07680866910689; 423 | 424 | template<> 425 | inline const double BLOSUM62_2<8u, 15u> = 0.931919140710646; 426 | 427 | template<> 428 | inline const double BLOSUM62_2<8u, 16u> = 0.792905802702891; 429 | 430 | template<> 431 | inline const double BLOSUM62_2<8u, 17u> = 0.456542719723346; 432 | 433 | template<> 434 | inline const double BLOSUM62_2<8u, 18u> = 0.358930070683472; 435 | 436 | template<> 437 | inline const double BLOSUM62_2<8u, 19u> = 0.532179332619096; 438 | 439 | template<> 440 | inline const double BLOSUM62_2<9u, 9u> = 3.79662136919197; 441 | 442 | template<> 443 | inline const double BLOSUM62_2<9u, 10u> = 1.99429556770288; 444 | 445 | template<> 446 | inline const double BLOSUM62_2<9u, 11u> = 0.310043275665557; 447 | 448 | template<> 449 | inline const double BLOSUM62_2<9u, 12u> = 0.37112172360338; 450 | 451 | template<> 452 | inline const double BLOSUM62_2<9u, 13u> = 0.477325586336375; 453 | 454 | template<> 455 | inline const double BLOSUM62_2<9u, 14u> = 0.473919278116426; 456 | 457 | template<> 458 | inline const double BLOSUM62_2<9u, 15u> = 0.428893742551906; 459 | 460 | template<> 461 | inline const double BLOSUM62_2<9u, 16u> = 0.660328974513946; 462 | 463 | template<> 464 | inline const double BLOSUM62_2<9u, 17u> = 1.31423572845207; 465 | 466 | template<> 467 | inline const double BLOSUM62_2<9u, 18u> = 0.568037074030476; 468 | 469 | template<> 470 | inline const double BLOSUM62_2<9u, 19u> = 0.692059423023677; 471 | 472 | template<> 473 | inline const double BLOSUM62_2<10u, 10u> = 6.48145120779836; 474 | 475 | template<> 476 | inline const double BLOSUM62_2<10u, 11u> = 0.474529654685916; 477 | 478 | template<> 479 | inline const double BLOSUM62_2<10u, 12u> = 0.423898023856973; 480 | 481 | template<> 482 | inline const double BLOSUM62_2<10u, 13u> = 0.864250292645428; 483 | 484 | template<> 485 | inline const double BLOSUM62_2<10u, 14u> = 0.622623369170503; 486 | 487 | template<> 488 | inline const double BLOSUM62_2<10u, 15u> = 0.598558924100088; 489 | 490 | template<> 491 | inline const double BLOSUM62_2<10u, 16u> = 0.793801615982561; 492 | 493 | template<> 494 | inline const double BLOSUM62_2<10u, 17u> = 1.26893679116311; 495 | 496 | template<> 497 | inline const double BLOSUM62_2<10u, 18u> = 0.61029621403986; 498 | 499 | template<> 500 | inline const double BLOSUM62_2<10u, 19u> = 0.708364627674993; 501 | 502 | template<> 503 | inline const double BLOSUM62_2<11u, 11u> = 7.09409487818815; 504 | 505 | template<> 506 | inline const double BLOSUM62_2<11u, 12u> = 0.499932835964896; 507 | 508 | template<> 509 | inline const double BLOSUM62_2<11u, 13u> = 1.00058441852805; 510 | 511 | template<> 512 | inline const double BLOSUM62_2<11u, 14u> = 0.858630477662975; 513 | 514 | template<> 515 | inline const double BLOSUM62_2<11u, 15u> = 1.23152924484831; 516 | 517 | template<> 518 | inline const double BLOSUM62_2<11u, 16u> = 0.984152634507759; 519 | 520 | template<> 521 | inline const double BLOSUM62_2<11u, 17u> = 0.369033853043791; 522 | 523 | template<> 524 | inline const double BLOSUM62_2<11u, 18u> = 0.277782895565922; 525 | 526 | template<> 527 | inline const double BLOSUM62_2<11u, 19u> = 0.486030805784542; 528 | 529 | template<> 530 | inline const double BLOSUM62_2<12u, 12u> = 12.8375437364914; 531 | 532 | template<> 533 | inline const double BLOSUM62_2<12u, 13u> = 0.641280588751714; 534 | 535 | template<> 536 | inline const double BLOSUM62_2<12u, 14u> = 0.48153490494176; 537 | 538 | template<> 539 | inline const double BLOSUM62_2<12u, 15u> = 0.755503259406695; 540 | 541 | template<> 542 | inline const double BLOSUM62_2<12u, 16u> = 0.688897122172827; 543 | 544 | template<> 545 | inline const double BLOSUM62_2<12u, 17u> = 0.443082983953355; 546 | 547 | template<> 548 | inline const double BLOSUM62_2<12u, 18u> = 0.281833163504864; 549 | 550 | template<> 551 | inline const double BLOSUM62_2<12u, 19u> = 0.363521118919126; 552 | 553 | template<> 554 | inline const double BLOSUM62_2<13u, 13u> = 6.24442175356205; 555 | 556 | template<> 557 | inline const double BLOSUM62_2<13u, 14u> = 1.40579606250098; 558 | 559 | template<> 560 | inline const double BLOSUM62_2<13u, 15u> = 0.96555522798098; 561 | 562 | template<> 563 | inline const double BLOSUM62_2<13u, 16u> = 0.79132074056634; 564 | 565 | template<> 566 | inline const double BLOSUM62_2<13u, 17u> = 0.466777931405709; 567 | 568 | template<> 569 | inline const double BLOSUM62_2<13u, 18u> = 0.509360271558287; 570 | 571 | template<> 572 | inline const double BLOSUM62_2<13u, 19u> = 0.611094097106686; 573 | 574 | template<> 575 | inline const double BLOSUM62_2<14u, 14u> = 6.66557706993898; 576 | 577 | template<> 578 | inline const double BLOSUM62_2<14u, 15u> = 0.767165632779031; 579 | 580 | template<> 581 | inline const double BLOSUM62_2<14u, 16u> = 0.677754679194329; 582 | 583 | template<> 584 | inline const double BLOSUM62_2<14u, 17u> = 0.42007231624752; 585 | 586 | template<> 587 | inline const double BLOSUM62_2<14u, 18u> = 0.395102105602751; 588 | 589 | template<> 590 | inline const double BLOSUM62_2<14u, 19u> = 0.555965424563688; 591 | 592 | template<> 593 | inline const double BLOSUM62_2<15u, 15u> = 3.8428474099213; 594 | 595 | template<> 596 | inline const double BLOSUM62_2<15u, 16u> = 1.61392097340711; 597 | 598 | template<> 599 | inline const double BLOSUM62_2<15u, 17u> = 0.565223766047939; 600 | 601 | template<> 602 | inline const double BLOSUM62_2<15u, 18u> = 0.385303034789668; 603 | 604 | template<> 605 | inline const double BLOSUM62_2<15u, 19u> = 0.557520051020311; 606 | 607 | template<> 608 | inline const double BLOSUM62_2<16u, 16u> = 4.83210516236962; 609 | 610 | template<> 611 | inline const double BLOSUM62_2<16u, 17u> = 0.980943004996173; 612 | 613 | template<> 614 | inline const double BLOSUM62_2<16u, 18u> = 0.430934143757138; 615 | 616 | template<> 617 | inline const double BLOSUM62_2<16u, 19u> = 0.573156574120723; 618 | 619 | template<> 620 | inline const double BLOSUM62_2<17u, 17u> = 3.69215640428348; 621 | 622 | template<> 623 | inline const double BLOSUM62_2<17u, 18u> = 0.374456331815776; 624 | 625 | template<> 626 | inline const double BLOSUM62_2<17u, 19u> = 0.658038692870502; 627 | 628 | template<> 629 | inline const double BLOSUM62_2<18u, 18u> = 38.1077832575802; 630 | 631 | template<> 632 | inline const double BLOSUM62_2<18u, 19u> = 2.10980811550359; 633 | 634 | template<> 635 | inline const double BLOSUM62_2<19u, 19u> = 9.83220341258545; 636 | } 637 | 638 | namespace internal { 639 | 640 | #define SECOND_CODON(CODON_1) \ 641 | switch (value_2) { \ 642 | case 'A': return constants::BLOSUM62_2; \ 643 | case 'C': return constants::BLOSUM62_2; \ 644 | case 'D': return constants::BLOSUM62_2; \ 645 | case 'E': return constants::BLOSUM62_2; \ 646 | case 'F': return constants::BLOSUM62_2; \ 647 | case 'G': return constants::BLOSUM62_2; \ 648 | case 'H': return constants::BLOSUM62_2; \ 649 | case 'I': return constants::BLOSUM62_2; \ 650 | case 'K': return constants::BLOSUM62_2; \ 651 | case 'L': return constants::BLOSUM62_2; \ 652 | case 'M': return constants::BLOSUM62_2; \ 653 | case 'N': return constants::BLOSUM62_2; \ 654 | case 'P': return constants::BLOSUM62_2; \ 655 | case 'Q': return constants::BLOSUM62_2; \ 656 | case 'R': return constants::BLOSUM62_2; \ 657 | case 'S': return constants::BLOSUM62_2; \ 658 | case 'T': return constants::BLOSUM62_2; \ 659 | case 'V': return constants::BLOSUM62_2; \ 660 | case 'W': return constants::BLOSUM62_2; \ 661 | case 'Y': return constants::BLOSUM62_2; \ 662 | default: throw std::invalid_argument("translation must be made with twenty standard amino acid letters only"); \ 663 | } 664 | 665 | constexpr double read_BLOSUM62_2(char value_1, char value_2) { 666 | switch (value_1) { 667 | case 'A': SECOND_CODON(0) 668 | case 'C': SECOND_CODON(1) 669 | case 'D': SECOND_CODON(2) 670 | case 'E': SECOND_CODON(3) 671 | case 'F': SECOND_CODON(4) 672 | case 'G': SECOND_CODON(5) 673 | case 'H': SECOND_CODON(6) 674 | case 'I': SECOND_CODON(7) 675 | case 'K': SECOND_CODON(8) 676 | case 'L': SECOND_CODON(9) 677 | case 'M': SECOND_CODON(10) 678 | case 'N': SECOND_CODON(11) 679 | case 'P': SECOND_CODON(12) 680 | case 'Q': SECOND_CODON(13) 681 | case 'R': SECOND_CODON(14) 682 | case 'S': SECOND_CODON(15) 683 | case 'T': SECOND_CODON(16) 684 | case 'V': SECOND_CODON(17) 685 | case 'W': SECOND_CODON(18) 686 | case 'Y': SECOND_CODON(19) 687 | default: throw std::invalid_argument("translation must be made with twenty standard amino acid letters only"); 688 | } 689 | } 690 | 691 | #undef SECOND_CODON 692 | } 693 | --------------------------------------------------------------------------------