├── lib ├── umdhmm-v1.02 │ ├── VERSION │ ├── test.seq │ ├── esthmm │ ├── genseq │ ├── testfor │ ├── testvit │ ├── hmmtut.pdf │ ├── t2.50.seq │ ├── t3.hmm │ ├── t2.hmm │ ├── test.hmm │ ├── t2.100.seq │ ├── THANKS │ ├── TODO │ ├── hmmrand.c │ ├── Makefile │ ├── nrutil.h │ ├── CHANGES │ ├── testfor.c │ ├── backward.c │ ├── forward.c │ ├── testvit.c │ ├── hmm.h │ ├── t2.1500.seq │ ├── genseq.c │ ├── sequence.c │ ├── README │ ├── viterbi.c │ ├── hmmutils.c │ ├── baum.c │ ├── nrutil.c │ ├── esthmm.c │ └── COPYING └── jellyfish-2.2.6.tar.gz ├── LICENSE.docx ├── .gitmodules ├── include ├── EstimateCoverage.h ├── DataStruct.h ├── GenerateKmer.h ├── GrabJellyfishKmer.h ├── GetCnvSignal.h └── CallHmm.h ├── Dockerfile ├── main.cpp ├── Makefile ├── src ├── GenerateKmer.cpp ├── GrabJellyfishKmer.cpp ├── EstimateCoverage.cpp ├── CallHmm.cpp └── GetCnvSignal.cpp ├── README.md ├── LICENSE └── JaxCNVMerge.R /lib/umdhmm-v1.02/VERSION: -------------------------------------------------------------------------------- 1 | 1.02 2 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/test.seq: -------------------------------------------------------------------------------- 1 | T= 10 2 | 1 1 1 1 2 1 2 2 2 2 3 | -------------------------------------------------------------------------------- /LICENSE.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/JAX-CNV/HEAD/LICENSE.docx -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/esthmm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/JAX-CNV/HEAD/lib/umdhmm-v1.02/esthmm -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/genseq: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/JAX-CNV/HEAD/lib/umdhmm-v1.02/genseq -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/testfor: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/JAX-CNV/HEAD/lib/umdhmm-v1.02/testfor -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/testvit: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/JAX-CNV/HEAD/lib/umdhmm-v1.02/testvit -------------------------------------------------------------------------------- /lib/jellyfish-2.2.6.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/JAX-CNV/HEAD/lib/jellyfish-2.2.6.tar.gz -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/hmmtut.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheJacksonLaboratory/JAX-CNV/HEAD/lib/umdhmm-v1.02/hmmtut.pdf -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/t2.50.seq: -------------------------------------------------------------------------------- 1 | T= 50 2 | 2 1 2 1 2 2 1 2 1 2 1 1 1 2 1 1 2 1 2 1 2 1 1 2 2 2 2 1 2 1 1 2 1 1 2 1 2 1 2 2 1 1 1 1 2 1 2 2 1 1 3 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/t3.hmm: -------------------------------------------------------------------------------- 1 | M= 2 2 | N= 3 3 | A: 4 | 0.5 0.2 0.2 5 | 0.2 0.4 0.4 6 | 0.1 0.45 0.45 7 | B: 8 | 0.5 0.5 9 | 0.75 0.25 10 | 0.25 0.75 11 | pi: 12 | 0.333 0.333 0.333 13 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/t2.hmm: -------------------------------------------------------------------------------- 1 | M= 2 2 | N= 3 3 | A: 4 | 0.9 0.05 0.05 5 | 0.45 0.1 0.45 6 | 0.45 0.45 0.1 7 | B: 8 | 0.5 0.5 9 | 0.75 0.25 10 | 0.25 0.75 11 | pi: 12 | 0.333 0.333 0.333 13 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "lib/fastaq"] 2 | path = lib/fastaq 3 | url = git@github.com:wanpinglee/fastaq.git 4 | [submodule "lib/htslib"] 5 | path = lib/htslib 6 | url = https://github.com/samtools/htslib 7 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/test.hmm: -------------------------------------------------------------------------------- 1 | M= 2 2 | N= 3 3 | A: 4 | 0.333 0.333 0.333 5 | 0.333 0.333 0.333 6 | 0.333 0.333 0.333 7 | B: 8 | 0.5 0.5 9 | 0.75 0.25 10 | 0.25 0.75 11 | pi: 12 | 0.333 0.333 0.333 13 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/t2.100.seq: -------------------------------------------------------------------------------- 1 | R= 1 2 | T= 100 3 | 2 2 2 2 1 1 2 1 1 1 1 2 1 1 2 1 2 1 2 1 1 1 2 1 2 1 1 2 2 1 1 2 2 2 2 1 1 1 1 2 1 2 1 2 1 1 1 1 2 1 1 1 1 2 2 2 2 2 1 2 1 1 2 2 2 2 1 1 1 1 1 2 1 1 1 1 2 1 2 2 2 1 1 2 1 2 2 1 2 1 2 1 2 1 1 1 2 2 1 1 4 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/THANKS: -------------------------------------------------------------------------------- 1 | 2 | Many people have given helped me understand HMMs, have found 3 | bugs, debugged code, suggested changes, ported to other platforms. 4 | Thanks to all of them. 5 | 6 | 7 | S. Krishnan 8 | A. Kornai 9 | P. Resnik 10 | J. Adibi 11 | D. Dementhon 12 | A. Quinn 13 | 14 | 15 | .. and my OCR class with 52 students!! 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /include/EstimateCoverage.h: -------------------------------------------------------------------------------- 1 | #ifndef _ESTIMATECOVERAGE_H_ 2 | #define _ESTIMATECOVERAGE_H_ 3 | 4 | namespace EstimateCoverage { 5 | 6 | int EstimateCoverage(std::vector & coverages, bool & female, bool & male, 7 | const char * bam_filename, const char * kmer_table); // kmer_table is in FASTA format. 8 | 9 | namespace Human { 10 | static const int HumanAutosomeSize = 22; 11 | static const char* HumanAutosome[HumanAutosomeSize] = {"1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22"}; 12 | static const int HumanAllosomeSize = 2; 13 | static const char* HumanAllosome[HumanAllosomeSize] = {"X","Y"}; 14 | } //namespace Human 15 | } // namespace EstimateCoverage 16 | 17 | #endif 18 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:16.04 2 | 3 | MAINTAINER Wan-Ping Lee 4 | 5 | # Packaged dependencies 6 | RUN apt-get update && apt-get install -y \ 7 | git \ 8 | make \ 9 | gcc \ 10 | build-essential g++ \ 11 | libcurl4-openssl-dev \ 12 | libbz2-dev \ 13 | liblzma-dev \ 14 | libz-dev \ 15 | libssl1.0.0 \ 16 | libssl-dev \ 17 | automake \ 18 | autoconf \ 19 | wget \ 20 | r-base 21 | 22 | # Make a folder for tools 23 | RUN cd / && mkdir -p tools && cd /tools 24 | 25 | # Git clone JAX-CNV 26 | RUN git clone --recursive https://github.com/TheJacksonLaboratory/JAX-CNV.git 27 | 28 | # Build JAX-CNV 29 | RUN cd JAX-CNV \ 30 | && make 31 | 32 | # Define default command. 33 | CMD ["/tools/JAX-CNV/bin/JAX-CNV"] 34 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/TODO: -------------------------------------------------------------------------------- 1 | $Id: TODO,v 1.4 1998/02/23 09:57:48 kanungo Exp kanungo $ 2 | 3 | -- Documentation: Tutorial with theory, description of software, 4 | and applications. 5 | +added some 6 | 7 | -- Estimation from multiple observation sequences of unequal length. 8 | 9 | -- Continuous density estimation. 10 | 11 | -- Allow the user to specify random number seed. 12 | +done. -TK 13 | 14 | -- Write Usage() functions, read command line arguments using getopts() 15 | +done. -TK 16 | 17 | -- Platforms: check it compiles on Solaris, sunos, HP, Linux, MSVC. 18 | + checked on Dec Alpha, Linux, and Solaris. -TK 19 | + tested on NT with Cygnus GNU package and gcc. -TK 20 | 21 | -- If someone can check that it compiles and works with gcc on PC/NT, 22 | it will be great. 23 | + tested on NT with Cygnus GNU package and gcc. -TK 24 | 25 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/hmmrand.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Author: Tapas Kanungo, kanungo@cfar.umd.edu 3 | ** File: hmmrand.c 4 | ** Date: 4 May 1999 5 | ** Purpose: To separate out the random number generator 6 | ** functions so that the rest of the code can be 7 | ** platform independent. 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | 14 | /* 15 | ** hmmgetseed() generates an arbitary seed for the random number generator. 16 | */ 17 | int hmmgetseed(void) 18 | { 19 | return ((int) getpid()); 20 | } 21 | 22 | /* 23 | ** hmmsetseed() sets the seed of the random number generator to a 24 | ** specific value. 25 | */ 26 | void hmmsetseed(int seed) 27 | { 28 | srand(seed); 29 | } 30 | 31 | /* 32 | ** hmmgetrand() returns a (double) pseudo random number in the 33 | ** interval [0,1). 34 | */ 35 | 36 | double hmmgetrand(void) 37 | { 38 | return (double) rand()/RAND_MAX; 39 | } 40 | 41 | -------------------------------------------------------------------------------- /include/DataStruct.h: -------------------------------------------------------------------------------- 1 | #ifndef _DATASTRUCT_H_ 2 | #define _DATASTRUCT_H_ 3 | 4 | 5 | struct SReadDepth { 6 | SReadDepth(const int & i_pos, const unsigned int & i_count) : pos(i_pos), count(i_count){} 7 | int pos = 0; 8 | unsigned int count = 0; 9 | unsigned int n_count = 0; // how many N's in a region of reference genome. 10 | double low_mq_alignments = 0.0; // the % of low mapping quality alignments in the region. 11 | }; 12 | 13 | struct SHmmStats { 14 | SHmmStats(){} 15 | SHmmStats(const std::string & a, const unsigned int b, const unsigned int c, const unsigned int d): chr(a), pos(b), stats(c), length(d){} 16 | SHmmStats(const unsigned int a, const unsigned int b, const unsigned int c): pos(a), stats(b), length(c){} 17 | std::string chr; 18 | unsigned int pos = 0; 19 | unsigned int stats = 3; 20 | unsigned int length = 0; 21 | }; 22 | 23 | struct SHmmStatsHeap { 24 | SHmmStatsHeap(){} 25 | SHmmStatsHeap(const SHmmStats & a, const unsigned int & b) : hmm_stats(a), id(b){} 26 | SHmmStats hmm_stats; 27 | unsigned int id = 0; 28 | bool merged = false; 29 | }; 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # 3 | # Make file for compiling HMM code in this directory. 4 | # Author: Tapas Kanungo 5 | # Date: 23 February 1998 6 | # $Id: Makefile,v 1.3 1998/02/23 08:12:35 kanungo Exp kanungo $ 7 | # 8 | # 9 | CFLAGS= -O3 10 | INCS= 11 | # use the following line to "Purify" the code 12 | #CC=purify gcc 13 | CC=gcc 14 | SRCS=baum.c viterbi.c forward.c backward.c hmmutils.c sequence.c \ 15 | genseq.c nrutil.c testvit.c esthmm.c hmmrand.c testfor.c 16 | 17 | all : genseq testvit testfor esthmm 18 | 19 | genseq: genseq.o sequence.o nrutil.o hmmutils.o hmmrand.o 20 | $(CC) -o genseq genseq.o sequence.o nrutil.o \ 21 | hmmrand.o hmmutils.o -lm 22 | testvit: testvit.o viterbi.o nrutil.o hmmutils.o sequence.o 23 | $(CC) -o testvit testvit.o viterbi.o nrutil.o sequence.o \ 24 | hmmutils.o hmmrand.o -lm 25 | testfor: testfor.o forward.o nrutil.o hmmutils.o sequence.o hmmrand.o 26 | $(CC) -o testfor testfor.o forward.o nrutil.o sequence.o \ 27 | hmmutils.o hmmrand.o -lm 28 | esthmm: esthmm.o baum.o nrutil.o hmmutils.o sequence.o \ 29 | forward.o backward.o hmmrand.o 30 | $(CC) -o esthmm esthmm.o baum.o nrutil.o sequence.o hmmutils.o \ 31 | forward.o backward.o hmmrand.o -lm 32 | clean: 33 | rm *.o a.out 34 | # DO NOT DELETE THIS LINE -- make depend depends on it. 35 | 36 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/nrutil.h: -------------------------------------------------------------------------------- 1 | /* 2 | ** File: nrutil.h 3 | ** Purpose: Memory allocation routines borrowed from the 4 | ** book "Numerical Recipes" by Press, Flannery, Teukolsky, 5 | ** and Vetterling. 6 | ** state sequence and probablity of observing a sequence 7 | ** given the model. 8 | ** Organization: University of Maryland 9 | ** 10 | ** $Id: nrutil.h,v 1.2 1998/02/19 16:32:42 kanungo Exp kanungo $ 11 | ** 12 | ** Wan-Ping Lee at the Jackson Lab modified the code on 2017-11-02. 13 | */ 14 | 15 | float *vector(int nl, int nh); 16 | float **matrix(int nrl, int nrh, int ncl, int nch); 17 | float **convert_matrix(float *a, int nrl, int nrh, int ncl, int nch); 18 | double *dvector(int nl, int nh); 19 | double **dmatrix(int nrl, int nrh, int ncl, int nch); 20 | int *ivector(int nl, int nh); 21 | int **imatrix(int nrl, int nrh, int ncl, int nch); 22 | float **submatrix(float **a, int oldrl, int oldrh, int oldcl, int oldch, int newrl, int newcl); 23 | void free_vector(float *v, int nl, int nh); 24 | void free_dvector(double *v, int nl, int nh); 25 | void free_ivector(int *v, int nl, int nh); 26 | void free_matrix(float **m, int nrl, int nrh, int ncl, int nch); 27 | void free_dmatrix(double **m, int nrl, int nrh, int ncl, int nch); 28 | void free_imatrix(int **m, int nrl, int nrh, int ncl, int nch); 29 | void free_submatrix(float **b, int nrl, int nrh, int ncl, int nch); 30 | void free_convert_matrix(float *a, int nrl, int nrh, int ncl, int nch); 31 | void nrerror(const char *error_text); 32 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/CHANGES: -------------------------------------------------------------------------------- 1 | 2 | -------------------------- 3 | Version 1.01 4 | Date: 4 May, 1999 5 | -------------------------- 6 | 7 | 1. included math.h in hmm.h. Not including it was creating problems 8 | in forward.c. 9 | 10 | 2. BaumWelch(): Convergence criterion has been changed: now stops if 11 | log Prob(Obser|model_n) - log Prob(Obser|model_{n-1}) < DELTA 12 | where DELTA = 0.001 in baumwelch.c. 13 | 14 | 3. esthmm: Allow user to specify the initial model by using a -I flag and 15 | and then giving the hmm model file name. 16 | 17 | So, say you generate a sequence O using a model lambda_0. Now if 18 | you estimate the model parameters lambda_1 from the observation using 19 | lambda_0 as the intial parameter values: 20 | log Prob(O|lambda_0) and log Prob(O| lambda_1) should be close. 21 | 22 | This is a sanity check for BaumWelch code. 23 | 24 | 4. genseq: now generates random sequences everytime you run it. It also 25 | has a way of specifying the random seed in the command line so that 26 | you can replicate pervious experiments. 27 | 28 | 5. Added a new executable: testfor. This allows you to compute 29 | log Prob(O| model) using Forward() and ForwardWithScale(). 30 | 31 | 6. Added postscript and PDF versions of a tutorial I gave on HMMs. 32 | 33 | 7. Tested on: 34 | Sun Ultra 2 running UNIX SUN Solaris 5.5 and GNU gcc 2.7.2 35 | DEC Alpha running UNIX OSF version 4.0 and GNU gcc 2.8.1 36 | Dell i686 running Redhat Linux 2.0.36 and GNU gcc 2.7.2.3 37 | 38 | -------------------------- 39 | Version 1.02 40 | Date: 5 May, 1999 41 | -------------------------- 42 | 43 | 1. Test on: 44 | Gateway Pentium 400 MHz machine running NT. The compiler 45 | was gcc from Cygnus (version B20) You can get it from: 46 | ftp://go.cygnus.com/pub/sourceware.cygnus.com/cygwin/latest/full.exe 47 | 48 | 2. Changed the README file, viterbi.c 49 | 50 | 3. Corrected few typos in the tutorial (hmmtut.ps and hmmtut.pdf). 51 | 52 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/testfor.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Author: Tapas Kanungo, kanungo@cfar.umd.edu 3 | ** Date: 4 May 1999 4 | ** File: testfor.c 5 | ** Purpose: driver for testing the Forward, ForwardWithScale code. 6 | ** Organization: University of Maryland 7 | ** 8 | ** $Id$ 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "nrutil.h" 16 | #include "hmm.h" 17 | static char rcsid[] = "$Id: testvit.c,v 1.3 1998/02/23 07:39:07 kanungo Exp kanungo $"; 18 | 19 | int main (int argc, char **argv) 20 | { 21 | int t, T; 22 | HMM hmm; 23 | int *O; /* observation sequence O[1..T] */ 24 | double **alpha; 25 | double *scale; 26 | double proba, logproba; 27 | FILE *fp; 28 | 29 | if (argc != 3) { 30 | printf("Usage error \n"); 31 | printf("Usage: testfor \n"); 32 | exit (1); 33 | } 34 | 35 | fp = fopen(argv[1], "r"); 36 | if (fp == NULL) { 37 | fprintf(stderr, "Error: File %s not found\n", argv[1]); 38 | exit (1); 39 | } 40 | ReadHMM(fp, &hmm); 41 | fclose(fp); 42 | 43 | fp = fopen(argv[2], "r"); 44 | if (fp == NULL) { 45 | fprintf(stderr, "Error: File %s not found\n", argv[2]); 46 | exit (1); 47 | } 48 | ReadSequence(fp, &T, &O); 49 | fclose(fp); 50 | 51 | 52 | alpha = dmatrix(1, T, 1, hmm.N); 53 | scale = dvector(1, T); 54 | 55 | printf("------------------------------------\n"); 56 | printf("Forward without scaling \n"); 57 | Forward(&hmm, T, O, alpha, &proba); 58 | fprintf(stdout, "log prob(O| model) = %E\n", log(proba)); 59 | 60 | printf("------------------------------------\n"); 61 | printf("Forward with scaling \n"); 62 | 63 | ForwardWithScale(&hmm, T, O, alpha, scale, &logproba); 64 | 65 | fprintf(stdout, "log prob(O| model) = %E\n", logproba); 66 | printf("------------------------------------\n"); 67 | printf("The two log probabilites should identical \n"); 68 | printf("(within numerical precision). When observation\n"); 69 | printf("sequence is very large, use scaling. \n"); 70 | 71 | free_ivector(O, 1, T); 72 | free_dmatrix(alpha, 1, T, 1, hmm.N); 73 | free_dvector(scale, 1, T); 74 | FreeHMM(&hmm); 75 | } 76 | 77 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | // Self include 7 | #include "GrabJellyfishKmer.h" 8 | #include "GetCnvSignal.h" 9 | //#include "GenerateKmer.h" 10 | 11 | struct SubCommand { 12 | const unsigned int no_sub_commands = 2; 13 | //const std::string sub_commands[3] = {"GrabJellyfishKmer", "GetCnvSignal", "GenerateKmer"}; 14 | const std::string sub_commands[2] = {"GrabJellyfishKmer", "GetCnvSignal"}; 15 | 16 | const std::string Help (const char* program) const { return 17 | std::string("\n") + 18 | std::string("USAGE: ") + program + std::string(" [options]\n\n") + 19 | std::string("Commands:\n") + 20 | std::string("\tGrabJellyfishKmer Report the count of kmer giving Jellyfish database and a FASTA.\n") + 21 | std::string("\tGetCnvSignal Report CNV signals such as read depth and kmer count.\n"); 22 | //std::string("\tGenerateKmer Generate a kmer table by giving a FASTA. The kmer is shown by ascii code of log2(kmer)+34.\n"); 23 | } 24 | }; 25 | 26 | int main (int argc, char** argv) { 27 | SubCommand cml_option; 28 | std::string command; 29 | if(argc < 2) { 30 | std::cerr << cml_option.Help(argv[0]) << std::endl; 31 | return 1; 32 | } else { 33 | for (unsigned int i = 0; i < cml_option.no_sub_commands; ++i) { 34 | if (strcmp(argv[1], cml_option.sub_commands[i].c_str()) == 0) { 35 | // Get the valid subcommand 36 | command = cml_option.sub_commands[i]; 37 | break; 38 | } 39 | } 40 | } 41 | 42 | if (command.empty()) { // The given command is not in the list 43 | std::cerr << "ERROR: The given command (" << argv[1] << ") is not valid." << std::endl; 44 | std::cerr << cml_option.Help(argv[0]) << std::endl; 45 | return 1; 46 | } else { // Get the valid subcommand 47 | if (command == "GrabJellyfishKmer") { 48 | GrabJellyfishKmer count_kmer(argc - 1, argv + 1); 49 | count_kmer.Run(); 50 | } else if (command == "GetCnvSignal") { 51 | GetCnvSignal get_cnv_signal(argc - 1, argv + 1); 52 | get_cnv_signal.Run(); 53 | } 54 | //else if (command == "GenerateKmer") { 55 | // GenerateKmer generate_kmer(argc - 1, argv + 1); 56 | // generate_kmer.Run(); 57 | //} 58 | } 59 | 60 | return 0; 61 | } 62 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/backward.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Author: Tapas Kanungo, kanungo@cfar.umd.edu 3 | ** Date: 15 December 1997 4 | ** File: backward.c 5 | ** Purpose: Backward algorithm for computing the probabilty 6 | ** of observing a sequence given a HMM model parameter. 7 | ** Organization: University of Maryland 8 | ** 9 | ** $Id: backward.c,v 1.3 1998/02/23 07:56:05 kanungo Exp kanungo $ 10 | */ 11 | 12 | #include 13 | #include "hmm.h" 14 | static char rcsid[] = "$Id: backward.c,v 1.3 1998/02/23 07:56:05 kanungo Exp kanungo $"; 15 | 16 | void Backward(HMM *phmm, int T, int *O, double **beta, double *pprob) 17 | { 18 | int i, j; /* state indices */ 19 | int t; /* time index */ 20 | double sum; 21 | 22 | 23 | /* 1. Initialization */ 24 | 25 | for (i = 1; i <= phmm->N; i++) 26 | beta[T][i] = 1.0; 27 | 28 | /* 2. Induction */ 29 | 30 | for (t = T - 1; t >= 1; t--) { 31 | for (i = 1; i <= phmm->N; i++) { 32 | sum = 0.0; 33 | for (j = 1; j <= phmm->N; j++) 34 | sum += phmm->A[i][j] * 35 | (phmm->B[j][O[t+1]])*beta[t+1][j]; 36 | beta[t][i] = sum; 37 | 38 | } 39 | } 40 | 41 | /* 3. Termination */ 42 | *pprob = 0.0; 43 | for (i = 1; i <= phmm->N; i++) 44 | *pprob += beta[1][i]; 45 | 46 | } 47 | 48 | void BackwardWithScale(HMM *phmm, int T, int *O, double **beta, 49 | double *scale, double *pprob) 50 | { 51 | int i, j; /* state indices */ 52 | int t; /* time index */ 53 | double sum; 54 | 55 | 56 | /* 1. Initialization */ 57 | 58 | for (i = 1; i <= phmm->N; i++) 59 | beta[T][i] = 1.0/scale[T]; 60 | 61 | /* 2. Induction */ 62 | 63 | for (t = T - 1; t >= 1; t--) { 64 | for (i = 1; i <= phmm->N; i++) { 65 | sum = 0.0; 66 | for (j = 1; j <= phmm->N; j++) 67 | sum += phmm->A[i][j] * 68 | (phmm->B[j][O[t+1]])*beta[t+1][j]; 69 | beta[t][i] = sum/scale[t]; 70 | 71 | } 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/forward.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Author: Tapas Kanungo, kanungo@cfar.umd.edu 3 | ** Date: 15 December 1997 4 | ** File: forward.c 5 | ** Purpose: Foward algorithm for computing the probabilty 6 | ** of observing a sequence given a HMM model parameter. 7 | ** Organization: University of Maryland 8 | ** 9 | ** $Id: forward.c,v 1.2 1998/02/19 12:42:31 kanungo Exp kanungo $ 10 | */ 11 | #include 12 | #include "hmm.h" 13 | static char rcsid[] = "$Id: forward.c,v 1.2 1998/02/19 12:42:31 kanungo Exp kanungo $"; 14 | 15 | void Forward(HMM *phmm, int T, int *O, double **alpha, double *pprob) 16 | { 17 | int i, j; /* state indices */ 18 | int t; /* time index */ 19 | 20 | double sum; /* partial sum */ 21 | 22 | /* 1. Initialization */ 23 | 24 | for (i = 1; i <= phmm->N; i++) 25 | alpha[1][i] = phmm->pi[i]* phmm->B[i][O[1]]; 26 | 27 | /* 2. Induction */ 28 | 29 | for (t = 1; t < T; t++) { 30 | for (j = 1; j <= phmm->N; j++) { 31 | sum = 0.0; 32 | for (i = 1; i <= phmm->N; i++) 33 | sum += alpha[t][i]* (phmm->A[i][j]); 34 | 35 | alpha[t+1][j] = sum*(phmm->B[j][O[t+1]]); 36 | } 37 | } 38 | 39 | /* 3. Termination */ 40 | *pprob = 0.0; 41 | for (i = 1; i <= phmm->N; i++) 42 | *pprob += alpha[T][i]; 43 | 44 | } 45 | 46 | void ForwardWithScale(HMM *phmm, int T, int *O, double **alpha, 47 | double *scale, double *pprob) 48 | /* pprob is the LOG probability */ 49 | { 50 | int i, j; /* state indices */ 51 | int t; /* time index */ 52 | 53 | double sum; /* partial sum */ 54 | 55 | /* 1. Initialization */ 56 | 57 | scale[1] = 0.0; 58 | for (i = 1; i <= phmm->N; i++) { 59 | alpha[1][i] = phmm->pi[i]* (phmm->B[i][O[1]]); 60 | scale[1] += alpha[1][i]; 61 | } 62 | for (i = 1; i <= phmm->N; i++) 63 | alpha[1][i] /= scale[1]; 64 | 65 | /* 2. Induction */ 66 | 67 | for (t = 1; t <= T - 1; t++) { 68 | scale[t+1] = 0.0; 69 | for (j = 1; j <= phmm->N; j++) { 70 | sum = 0.0; 71 | for (i = 1; i <= phmm->N; i++) 72 | sum += alpha[t][i]* (phmm->A[i][j]); 73 | 74 | alpha[t+1][j] = sum*(phmm->B[j][O[t+1]]); 75 | scale[t+1] += alpha[t+1][j]; 76 | } 77 | for (j = 1; j <= phmm->N; j++) 78 | alpha[t+1][j] /= scale[t+1]; 79 | } 80 | 81 | /* 3. Termination */ 82 | *pprob = 0.0; 83 | 84 | for (t = 1; t <= T; t++) 85 | *pprob += log(scale[t]); 86 | 87 | } 88 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | MASTER_DIR=$(shell pwd) 3 | OBJ_DIR=$(MASTER_DIR)/obj 4 | BIN_DIR=$(MASTER_DIR)/bin 5 | LIB=$(MASTER_DIR)/lib 6 | 7 | AUTOCONF = autoconf 8 | AUTOHEADER = autoheader 9 | 10 | CFLAGS:= 11 | ifeq ($(mode), debug) 12 | CFLAGS:=$(CFLAGS) -O0 -g -DDEBUG 13 | else 14 | CFLAGS:=$(CFLAGS) -O3 15 | endif 16 | 17 | CXXFLAGS:=-std=c++11 $(CFLAGS) 18 | export $(CFLAGS) 19 | export $(CXXFLAGS) 20 | 21 | SOURCES = main.cpp src/GrabJellyfishKmer.cpp src/GetCnvSignal.cpp src/GenerateKmer.cpp src/CallHmm.cpp src/EstimateCoverage.cpp 22 | 23 | PROGRAM=$(BIN_DIR)/JAX-CNV 24 | 25 | UMDHMM_SRC= $(LIB)/umdhmm-v1.02/backward.c \ 26 | $(LIB)/umdhmm-v1.02/baum.c \ 27 | $(LIB)/umdhmm-v1.02/forward.c \ 28 | $(LIB)/umdhmm-v1.02/hmmrand.c \ 29 | $(LIB)/umdhmm-v1.02/hmmutils.c \ 30 | $(LIB)/umdhmm-v1.02/nrutil.c \ 31 | $(LIB)/umdhmm-v1.02/sequence.c \ 32 | $(LIB)/umdhmm-v1.02/viterbi.c 33 | 34 | INCLUDE = -I lib/jellyfish-2.2.6/include -I lib/fastaq/include/ -I lib/ -I lib/htslib/ -I include/ 35 | LIBRARY = -lz -lcurl -lbz2 -lpthread -lssl -lcrypto $(LIB)/fastaq/obj/*.o $(LIB)/jellyfish-2.2.6/lib/*.o \ 36 | $(patsubst %.c, %.o, $(UMDHMM_SRC) ) 37 | 38 | JELLYFISH=$(LIB)/jellyfish-2.2.6/bin/jellyfish 39 | HTS_LIB=$(LIB)/htslib/libhts.a 40 | 41 | all: $(PROGRAM) 42 | .PHONY: all 43 | 44 | $(PROGRAM): fastaq umdhmm $(JELLYFISH) $(HTS_LIB) $(SOURCES) 45 | @mkdir -p $(BIN_DIR) 46 | @$(CXX) $(CXXFLAGS) -o $@ $(SOURCES) $(UMDHMM_SRC) $(INCLUDE) $(HTS_LIB) $(LIBRARY) 47 | 48 | .PHONY: all 49 | 50 | clean: 51 | @rm -rf $(OBJ_DIR) $(BIN_DIR) 52 | $(MAKE) clean -C $(LIB)/fastaq 53 | @rm -rf $(LIB)/jellyfish-2.2.6 54 | $(MAKE) clean -C $(LIB)/umdhmm-v1.02 55 | .PHONY: clean 56 | 57 | 58 | $(OBJ_DIR): 59 | @mkdir -p $(OBJ_DIR) 60 | $(BIN_DIR): 61 | @mkdir -p $(BIN_DIR) 62 | 63 | fastaq: 64 | @echo "- Building in fastaq" 65 | @$(MAKE) --no-print-directory --directory=$(LIB)/fastaq 66 | 67 | umdhmm: 68 | @echo "- Building in umdhmm" 69 | @$(MAKE) --no-print-directory --directory=$(LIB)/umdhmm-v1.02 70 | 71 | $(JELLYFISH): 72 | @echo "- Building in jellyfish" 73 | @cd $(LIB) && tar -zxvf $(LIB)/jellyfish-2.2.6.tar.gz 74 | @cd $(LIB)/jellyfish-2.2.6 && ./configure --prefix=$(LIB)/jellyfish-2.2.6 75 | $(MAKE) --no-print-directory --directory=$(LIB)/jellyfish-2.2.6 76 | @mkdir -p $(BIN_DIR) 77 | @cp $(JELLYFISH) $(BIN_DIR) 78 | 79 | $(HTS_LIB): 80 | @echo "- Building in htslib" 81 | @rm -f $(LIB)/htslib/configure 82 | @rm -rf $(LIB)/htslib/autom4te.cache 83 | @cd $(LIB)/htslib && $(AUTOHEADER) && $(AUTOCONF) && ./configure --disable-lzma --disable-lcurl 84 | $(MAKE) --no-print-directory -C $(LIB)/htslib 85 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/testvit.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Author: Tapas Kanungo, kanungo@cfar.umd.edu 3 | ** Date: 15 December 1997 4 | ** File: testvit.c 5 | ** Purpose: driver for testing the Viterbi code. 6 | ** Organization: University of Maryland 7 | ** 8 | ** Update: 9 | ** Author: Tapas Kanungo 10 | ** Purpose: run both viterbi with probabilities and 11 | ** viterbi with log, change output etc. 12 | ** $Id: testvit.c,v 1.3 1998/02/23 07:39:07 kanungo Exp kanungo $ 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "nrutil.h" 20 | #include "hmm.h" 21 | static char rcsid[] = "$Id: testvit.c,v 1.3 1998/02/23 07:39:07 kanungo Exp kanungo $"; 22 | 23 | int main (int argc, char **argv) 24 | { 25 | int t, T; 26 | HMM hmm; 27 | int *O; /* observation sequence O[1..T] */ 28 | int *q; /* state sequence q[1..T] */ 29 | double **delta; 30 | int **psi; 31 | double proba, logproba; 32 | FILE *fp; 33 | 34 | if (argc != 3) { 35 | printf("Usage error \n"); 36 | printf("Usage: testvit \n"); 37 | exit (1); 38 | } 39 | 40 | fp = fopen(argv[1], "r"); 41 | if (fp == NULL) { 42 | fprintf(stderr, "Error: File %s not found\n", argv[1]); 43 | exit (1); 44 | } 45 | ReadHMM(fp, &hmm); 46 | fclose(fp); 47 | 48 | fp = fopen(argv[2], "r"); 49 | if (fp == NULL) { 50 | fprintf(stderr, "Error: File %s not found\n", argv[2]); 51 | exit (1); 52 | } 53 | ReadSequence(fp, &T, &O); 54 | fclose(fp); 55 | 56 | q = ivector(1,T); 57 | 58 | delta = dmatrix(1, T, 1, hmm.N); 59 | psi = imatrix(1, T, 1, hmm.N); 60 | 61 | printf("------------------------------------\n"); 62 | printf("Viterbi using direct probabilities\n"); 63 | Viterbi(&hmm, T, O, delta, psi, q, &proba); 64 | fprintf(stdout, "Viterbi MLE log prob = %E\n", log(proba)); 65 | fprintf(stdout, "Optimal state sequence:\n"); 66 | PrintSequence(stdout, T, q); 67 | 68 | printf("------------------------------------\n"); 69 | printf("Viterbi using log probabilities\n"); 70 | /* note: ViterbiLog() returns back with log(A[i][j]) instead 71 | ** of leaving the A matrix alone. If you need the original A, 72 | ** you can make a copy of hmm by calling CopyHMM */ 73 | 74 | ViterbiLog(&hmm, T, O, delta, psi, q, &logproba); 75 | 76 | fprintf(stdout, "Viterbi MLE log prob = %E\n", logproba); 77 | fprintf(stdout, "Optimal state sequence:\n"); 78 | PrintSequence(stdout, T, q); 79 | printf("------------------------------------\n"); 80 | printf("The two log probabilites and optimal state sequences\n"); 81 | printf("should identical (within numerical precision). \n"); 82 | 83 | free_ivector(q, 1, T); 84 | free_ivector(O, 1, T); 85 | free_imatrix(psi, 1, T, 1, hmm.N); 86 | free_dmatrix(delta, 1, T, 1, hmm.N); 87 | FreeHMM(&hmm); 88 | } 89 | 90 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/hmm.h: -------------------------------------------------------------------------------- 1 | /* 2 | ** Author: Tapas Kanungo, kanungo@cfar.umd.edu 3 | ** Date: 15 December 1997 4 | ** File: hmm.h 5 | ** Purpose: datastructures used for HMM. 6 | ** Organization: University of Maryland 7 | ** 8 | ** Update: 9 | ** Author: Tapas Kanungo 10 | ** Purpose: include . Not including this was 11 | ** creating a problem with forward.c 12 | ** $Id: hmm.h,v 1.9 1999/05/02 18:38:11 kanungo Exp kanungo $ 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | typedef struct { 20 | int N; /* number of states; Q={1,2,...,N} */ 21 | int M; /* number of observation symbols; V={1,2,...,M}*/ 22 | double **A; /* A[1..N][1..N]. a[i][j] is the transition prob 23 | of going from state i at time t to state j 24 | at time t+1 */ 25 | double **B; /* B[1..N][1..M]. b[j][k] is the probability of 26 | of observing symbol k in state j */ 27 | double *pi; /* pi[1..N] pi[i] is the initial state distribution. */ 28 | } HMM; 29 | void ReadHMM(FILE *fp, HMM *phmm); 30 | void PrintHMM(FILE *fp, HMM *phmm); 31 | void InitHMM(HMM *phmm, int N, int M, int seed); 32 | void CopyHMM(HMM *phmm1, HMM *phmm2); 33 | void FreeHMM(HMM *phmm); 34 | 35 | void ReadSequence(FILE *fp, int *pT, int **pO); 36 | void PrintSequence(FILE *fp, int T, int *O); 37 | void GenSequenceArray(HMM *phmm, int seed, int T, int *O, int *q); 38 | int GenInitalState(HMM *phmm); 39 | int GenNextState(HMM *phmm, int q_t); 40 | int GenSymbol(HMM *phmm, int q_t); 41 | 42 | 43 | void Forward(HMM *phmm, int T, int *O, double **alpha, double *pprob); 44 | void ForwardWithScale(HMM *phmm, int T, int *O, double **alpha, 45 | double *scale, double *pprob); 46 | void Backward(HMM *phmm, int T, int *O, double **beta, double *pprob); 47 | void BackwardWithScale(HMM *phmm, int T, int *O, double **beta, 48 | double *scale, double *pprob); 49 | void BaumWelch(HMM *phmm, int T, int *O, double **alpha, double **beta, 50 | double **gamma, int *niter, 51 | double *plogprobinit, double *plogprobfinal); 52 | 53 | double *** AllocXi(int T, int N); 54 | void FreeXi(double *** xi, int T, int N); 55 | void ComputeGamma(HMM *phmm, int T, double **alpha, double **beta, 56 | double **gamma); 57 | void ComputeXi(HMM* phmm, int T, int *O, double **alpha, double **beta, 58 | double ***xi); 59 | void Viterbi(HMM *phmm, int T, int *O, double **delta, int **psi, 60 | int *q, double *pprob); 61 | void ViterbiLog(HMM *phmm, int T, int *O, double **delta, int **psi, 62 | int *q, double *pprob); 63 | 64 | /* random number generator related functions*/ 65 | 66 | int hmmgetseed(void); 67 | void hmmsetseed(int seed); 68 | double hmmgetrand(void); 69 | 70 | #define MAX(x,y) ((x) > (y) ? (x) : (y)) 71 | #define MIN(x,y) ((x) < (y) ? (x) : (y)) 72 | 73 | 74 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/t2.1500.seq: -------------------------------------------------------------------------------- 1 | T= 1500 2 | 2 2 2 2 2 2 1 2 1 1 2 1 2 2 2 2 1 1 1 1 1 2 1 1 2 2 2 1 2 2 2 1 1 2 1 2 2 1 1 2 1 1 1 1 2 1 2 2 1 1 1 1 1 1 1 2 2 2 2 1 2 1 2 1 1 1 1 1 2 2 1 1 1 1 1 1 2 2 2 1 2 1 1 1 2 2 2 1 1 2 1 1 2 1 2 1 1 2 1 1 1 1 1 1 2 2 2 1 1 1 1 2 1 2 1 2 2 2 2 1 1 2 1 2 1 1 1 1 2 1 1 2 1 2 2 1 1 2 1 1 1 1 2 1 1 1 2 2 2 2 1 1 1 1 1 2 2 1 1 2 1 2 2 1 2 1 2 2 1 1 2 1 1 2 2 1 1 1 1 1 1 1 2 2 2 1 2 1 2 1 2 2 2 2 1 2 2 2 1 1 1 2 2 2 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 2 1 2 1 1 2 2 2 1 2 2 2 2 1 1 1 1 1 2 1 1 2 1 1 2 2 2 1 1 2 2 1 2 2 2 2 1 1 1 2 2 2 2 1 1 2 1 1 2 1 1 2 1 2 2 1 2 1 1 2 2 1 2 2 2 1 1 1 1 2 1 1 1 2 2 2 2 1 2 1 2 1 1 1 2 1 2 1 2 1 1 2 1 1 1 2 1 2 2 2 2 1 1 2 2 2 2 2 2 2 1 2 2 1 2 2 2 2 1 1 1 1 1 1 2 1 2 1 2 1 2 1 1 2 2 2 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 2 1 2 2 1 2 2 1 1 1 2 1 1 2 1 2 1 2 2 2 2 1 1 2 2 1 2 1 1 2 2 1 1 1 1 2 1 2 1 2 2 2 2 2 2 1 1 2 2 2 2 2 2 2 1 2 1 1 2 1 2 1 1 2 2 2 1 2 2 1 1 1 1 2 2 2 1 1 2 1 1 1 1 1 2 2 1 1 2 2 1 1 2 1 2 1 2 1 1 2 1 1 2 1 1 1 2 2 1 2 1 2 2 1 2 1 2 2 1 2 2 2 1 1 1 1 2 2 1 1 1 1 1 1 2 1 2 1 1 1 1 1 2 2 2 1 1 1 1 2 2 1 1 2 1 2 1 1 1 1 1 1 2 2 2 2 1 2 2 2 2 2 1 1 2 1 1 1 2 2 2 2 1 1 1 1 2 2 2 2 2 1 1 1 1 1 2 1 2 1 1 2 2 1 2 2 1 2 2 2 1 2 1 1 1 2 2 1 2 1 1 2 2 1 1 2 2 2 1 2 2 1 1 1 2 2 1 2 2 1 1 2 2 1 1 2 2 2 2 2 1 1 1 1 1 1 1 2 1 1 1 1 2 2 1 1 1 1 1 2 1 1 2 2 2 2 2 2 2 1 2 2 1 2 1 2 1 2 1 1 1 1 2 1 1 2 1 2 2 1 2 2 2 2 1 1 1 1 1 2 2 2 2 2 2 2 1 2 1 2 1 1 2 2 1 1 2 1 1 1 1 2 1 2 1 1 2 1 1 2 2 1 2 2 2 2 1 2 1 2 1 1 1 2 2 2 1 2 2 2 2 2 2 2 2 1 1 1 2 1 2 1 2 2 2 1 1 2 1 1 2 1 1 1 1 1 2 2 1 1 2 2 1 1 1 1 2 2 1 1 1 1 1 1 2 1 2 2 1 1 1 1 1 2 1 1 2 1 1 1 1 1 2 1 2 1 2 1 1 2 2 2 1 1 1 1 2 1 1 1 2 2 2 2 2 1 2 1 1 1 2 1 1 1 2 1 1 1 1 2 2 2 2 1 1 1 1 2 1 2 2 2 2 1 2 2 1 1 2 1 2 1 1 1 2 1 1 1 1 2 1 1 1 2 1 1 2 2 2 2 1 2 1 2 2 1 2 1 1 2 2 1 1 2 1 2 2 1 1 1 2 1 2 1 2 2 1 1 1 2 2 2 2 2 2 1 2 1 2 2 1 2 2 1 2 1 1 1 2 1 1 2 2 1 1 1 1 1 2 1 2 2 2 1 2 2 2 1 2 2 1 1 1 2 1 2 1 1 2 1 2 2 1 2 1 2 2 2 1 2 2 1 1 2 1 1 2 2 1 1 2 2 2 1 2 1 1 2 1 1 1 2 1 1 2 1 1 1 2 2 1 2 1 2 2 1 1 2 1 1 1 2 2 1 2 2 2 1 1 1 1 1 2 1 1 2 1 2 1 2 1 1 2 2 1 1 2 1 2 1 1 1 1 1 2 1 1 1 2 2 2 1 2 2 1 2 1 2 2 2 1 1 1 2 2 2 2 2 2 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 2 1 2 1 2 2 2 2 2 2 2 2 2 1 1 2 1 1 1 2 2 2 2 1 2 1 1 1 1 1 2 1 2 1 2 2 1 2 2 1 1 1 2 1 2 1 2 1 2 2 1 2 1 2 1 1 2 1 1 1 1 1 2 1 1 2 1 1 2 2 1 1 1 2 2 2 2 1 1 2 1 1 2 1 2 2 2 2 1 2 2 1 2 2 1 1 2 1 1 1 2 1 2 2 2 1 2 1 2 1 1 1 2 2 1 2 1 1 2 2 1 1 1 2 2 1 1 1 1 2 2 1 2 2 1 2 2 2 1 2 2 2 1 2 2 2 1 1 1 1 2 2 1 1 1 2 1 1 2 1 1 2 1 1 1 2 2 2 1 2 2 2 2 2 1 2 1 2 1 2 2 1 2 1 1 1 1 1 1 2 1 2 1 1 1 2 2 1 1 2 1 2 2 1 1 1 2 1 1 1 1 2 1 2 1 1 2 2 1 1 1 1 1 1 1 1 1 2 2 1 2 1 2 2 2 1 2 1 2 2 1 2 2 2 1 2 2 2 1 1 1 2 2 2 2 2 2 1 2 2 1 2 2 1 1 2 1 1 2 1 2 2 2 1 1 2 1 1 1 2 2 2 1 1 1 1 1 2 1 2 2 2 1 2 2 2 1 1 1 1 1 2 1 2 1 2 1 2 2 1 1 2 2 1 1 1 2 2 1 2 2 1 2 1 1 1 2 1 2 1 1 1 1 2 1 1 1 2 1 2 1 1 2 1 1 1 2 1 2 1 1 1 1 1 2 2 1 2 1 2 1 2 1 1 1 2 1 1 2 1 2 2 2 2 1 2 1 1 2 2 2 2 1 1 1 2 1 2 1 1 2 1 1 1 1 1 1 2 1 1 2 2 2 2 2 1 1 2 1 1 2 1 1 1 1 1 1 1 2 1 1 2 2 1 3 | -------------------------------------------------------------------------------- /src/GenerateKmer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | // Self include 7 | #include "GenerateKmer.h" 8 | 9 | // FASTAQ include 10 | #include "fastaq/fasta.h" 11 | #include "fastaq/reference.h" 12 | 13 | namespace { 14 | char CeilLog2 (const uint64_t in) 15 | { 16 | uint64_t x = in; 17 | static const unsigned long long t[6] = { 18 | 0xFFFFFFFF00000000ull, 19 | 0x00000000FFFF0000ull, 20 | 0x000000000000FF00ull, 21 | 0x00000000000000F0ull, 22 | 0x000000000000000Cull, 23 | 0x0000000000000002ull 24 | }; 25 | 26 | //int y = (((x & (x - 1)) == 0) ? 0 : 1); 27 | int y = (x > 0) ? 1 : 0; 28 | 29 | for (int i = 0, j = 32; i < 6; i++) { 30 | int k = (((x & t[i]) == 0) ? 0 : j); 31 | y += k; 32 | x >>= k; 33 | j >>= 1; 34 | } 35 | 36 | // Use 125 as the max. 37 | return (y > 92) ? 125 : y + 33; 38 | } 39 | } 40 | 41 | int GenerateKmer::Run() const 42 | { 43 | if (!cmdline.CheckArg()) { 44 | std::cerr << cmdline.Help("GenerateKmer"); 45 | return 1; 46 | } 47 | 48 | std::vector ref_names; 49 | std::list > kmer; 50 | 51 | Fastaq::CountKmer(cmdline.fasta.c_str(), cmdline.kmer_size, ref_names, kmer); 52 | 53 | if (ref_names.size() != kmer.size()) { 54 | std::cerr << "ERROR: The # of ref_names does not match the # of kmer_table." << std::endl; 55 | return 1; 56 | } 57 | 58 | std::list >::const_iterator table_ite = kmer.begin(); 59 | 60 | 61 | // Re-direct cout 62 | std::ofstream ofs; 63 | std::streambuf * coutbuf = std::cout.rdbuf(); //save old buf 64 | if (!cmdline.output.empty()) { 65 | ofs.open(cmdline.output, std::ofstream::out); 66 | std::cout.rdbuf(ofs.rdbuf()); //redirect std::cout to file; 67 | } 68 | 69 | for (std::vector::const_iterator name_ite = ref_names.begin(); name_ite != ref_names.end(); ++name_ite) { 70 | unsigned int chr_no = 1; 71 | std::cout << ">" << *name_ite << std::endl; 72 | for (std::list::const_iterator kmer_ite = table_ite->begin(); kmer_ite != table_ite->end(); ++kmer_ite) { 73 | if (chr_no % 60 == 0) std::cout << std::endl; 74 | ++chr_no; 75 | std::cout << CeilLog2(*kmer_ite); 76 | } 77 | if (chr_no % 60 != 0) std::cout << std::endl; 78 | ++table_ite; 79 | } 80 | 81 | // Clean up 82 | if (!cmdline.output.empty()) { 83 | std::cout.rdbuf(coutbuf); //reset to standard output again 84 | ofs.close(); 85 | } 86 | 87 | return 0; 88 | } 89 | 90 | GenerateKmer::GenerateKmer(int argc, char** argv) 91 | : cmdline(argc, argv) 92 | { 93 | } 94 | 95 | GenerateKmer::GenerateKmer(const char * pInput_fasta, const char * pOutput, const int size) 96 | { 97 | SetParameters(pInput_fasta, pOutput, size); 98 | } 99 | 100 | void GenerateKmer::SetParameters(const SGenerateKmerCml & cml) { 101 | cmdline = cml; 102 | } 103 | 104 | void GenerateKmer::SetParameters(const char * pInput_fasta, const char * pOutput, const int size) 105 | { 106 | cmdline.fasta = pInput_fasta; 107 | if (pOutput) cmdline.output = pOutput; 108 | cmdline.kmer_size = size; 109 | } 110 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/genseq.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Author: Tapas Kanungo, kanungo@cfar.umd.edu 3 | ** Date: 22 February 1998 4 | ** File: genseq.c 5 | ** Purpose: driver for generating a sequence of observation symbols. 6 | ** Organization: University of Maryland 7 | ** 8 | ** Update: 9 | ** Author: Tapas Kanungo 10 | ** Purpose: randomize the seeds to generate random sequences 11 | ** everytime the program is run. 12 | ** 13 | ** $Id: genseq.c,v 1.4 1999/05/04 15:36:53 kanungo Exp kanungo $ 14 | */ 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include "nrutil.h" 21 | #include "hmm.h" 22 | #include 23 | #include 24 | 25 | static char rcsid[] = "$Id: genseq.c,v 1.4 1999/05/04 15:36:53 kanungo Exp kanungo $"; 26 | 27 | void Usage(char *name); 28 | 29 | int main (int argc, char **argv) 30 | { 31 | HMM hmm; /* the HMM */ 32 | int T = 100; /* length of observation sequence */ 33 | int *O; /* the observation sequence O[1..T]*/ 34 | int *q; /* the state sequence q[1..T] */ 35 | FILE *fp; /* HMM parameters in this file */ 36 | int sflg=0, tflg=0, errflg = 0; 37 | int seed; /* random number seed */ 38 | int c; 39 | extern char *optarg; 40 | extern int optind, opterr, optopt; 41 | 42 | 43 | 44 | while ((c= getopt(argc, argv, "S:T:")) != EOF) 45 | switch (c) { 46 | case 'S': 47 | /* set random number generator seed */ 48 | if (sflg) 49 | errflg++; 50 | else { 51 | sflg++; 52 | sscanf(optarg, "%d", &seed); 53 | } 54 | break; 55 | case 'T': 56 | /* set sequence length */ 57 | if (tflg) 58 | errflg++; 59 | else { 60 | tflg++; 61 | sscanf(optarg, "%d", &T); 62 | } 63 | break; 64 | case '?': 65 | errflg++; 66 | } 67 | 68 | if ((argc - optind) != 1) errflg++; /* number or arguments not 69 | okay */ 70 | 71 | if (errflg || !tflg ) { 72 | Usage(argv[0]); 73 | exit(1); 74 | } 75 | 76 | 77 | /* read HMM file */ 78 | fp = fopen(argv[optind],"r"); 79 | if (fp == NULL) { 80 | fprintf(stderr, "Error: File %s not found \n", argv[optind]); 81 | exit (1); 82 | } 83 | ReadHMM(fp, &hmm); 84 | fclose(fp); 85 | 86 | 87 | /* length of observation sequence, T */ 88 | 89 | O = ivector(1,T); /* alloc space for observation sequence O */ 90 | q = ivector(1,T); /* alloc space for state sequence q */ 91 | 92 | if (!sflg) seed = hmmgetseed(); 93 | 94 | fprintf(stderr, "RandomSeed: %d\n", seed); 95 | GenSequenceArray(&hmm, seed, T, O, q); 96 | 97 | PrintSequence(stdout, T, O); 98 | free_ivector(O, 1, T); 99 | free_ivector(q, 1, T); 100 | FreeHMM(&hmm); 101 | } 102 | 103 | void Usage(char *name) 104 | { 105 | printf("Usage error \n"); 106 | printf("Usage: %s -T \n", name); 107 | printf("Usage: %s -S -T \n", 108 | name); 109 | printf(" T = length of sequence\n"); 110 | printf(" S = random number seed \n"); 111 | printf(" mod.hmm is a file with HMM parameters\n"); 112 | } 113 | -------------------------------------------------------------------------------- /include/GenerateKmer.h: -------------------------------------------------------------------------------- 1 | #ifndef _GENERATEKMER_H_ 2 | #define _GENERATEKMER_H_ 3 | 4 | #include 5 | #include 6 | 7 | 8 | struct SGenerateKmerCml { 9 | SGenerateKmerCml(){} 10 | SGenerateKmerCml(const int argc, char** const argv){Parse(argc, argv);} 11 | 12 | bool help = false; 13 | 14 | // i/o 15 | std::string fasta; // -f --fasta 16 | std::string output; // -o --output 17 | 18 | // operation parameters 19 | int kmer_size = 15; // -s --size 20 | 21 | // command line 22 | std::string cmd; 23 | 24 | const char* short_option = "hf:o:s:"; 25 | 26 | // Help list 27 | const std::string Help (const char* program) const { return 28 | std::string("\n") + 29 | std::string("USAGE: ") + program + std::string(" -i -f \n\n") + 30 | std::string(" -h --help Print this help list.\n") + 31 | std::string("\n") + 32 | std::string("Input & Output:\n") + 33 | std::string(" -f --fasta FASTA for kmer lookup.\n") + 34 | std::string(" -o --output Output file.\n")+ 35 | std::string("operation:\n") + 36 | std::string(" -s --size Kmer size [15].\n"); 37 | } 38 | 39 | // Check the required arguments. 40 | bool CheckArg () const { 41 | bool ok = true; 42 | 43 | return ok && !fasta.empty(); 44 | } 45 | 46 | bool Parse (const int argc, char** const argv) { 47 | // Record the input command line. 48 | for (int i = 0; i < argc; i++) cmd += std::string(argv[i]) + " "; 49 | const struct option long_option[] = { 50 | {"help", required_argument, NULL, 'h'}, 51 | // i/o 52 | {"fasta", required_argument, NULL, 'f'}, 53 | {"output", required_argument, NULL, 'o'}, 54 | 55 | // operation parameters 56 | {"size", required_argument, NULL, 's'}, 57 | {0,0,0,0} 58 | }; 59 | int option_index = 0; 60 | int c = -1; 61 | while ((c = getopt_long(argc, argv, short_option, long_option, &option_index)) != -1) { 62 | switch (c) { 63 | case 'h': help = true; break; 64 | case 'f': fasta = optarg; break; 65 | case 'o': output = optarg; break; 66 | case 's': kmer_size = atoi(optarg); break; 67 | default: std::cerr << "WARNING: Unkonw parameter: " << long_option[option_index].name << std::endl; break; 68 | } 69 | } 70 | 71 | if (help) { 72 | Help(argv[0]); 73 | return false; 74 | } 75 | 76 | return CheckArg(); 77 | } 78 | }; // SCountKmerCml 79 | 80 | class GenerateKmer { 81 | public: 82 | // Constructors 83 | GenerateKmer(); 84 | GenerateKmer(int argc, char** argv); 85 | GenerateKmer(const char * pInput_fasta, const char * pOutput = NULL, const int size = 15); 86 | 87 | // The function will report kmer count according to the parameter setting. 88 | // Return: 0 is successful. 89 | int Run() const; 90 | 91 | // If files are not assinged when declaring the class, you may use the function to assign them. 92 | void SetParameters(const SGenerateKmerCml & cml); 93 | void SetParameters(const char * pInput_fasta, const char * pOutput = NULL, const int size = 15); 94 | private: 95 | SGenerateKmerCml cmdline; 96 | // Not allow to use copy and assign constructors. 97 | GenerateKmer(const GenerateKmer&); 98 | GenerateKmer& operator= (const GenerateKmer&); 99 | }; 100 | #endif 101 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/sequence.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Author: Tapas Kanungo, kanungo@cfar.umd.edu 3 | ** Date: 22 February 1998 4 | ** File: sequence.c 5 | ** Purpose: Routines for generating, reading and writing sequence of 6 | ** observation symbols. 7 | ** Organization: University of Maryland 8 | ** 9 | ** Update: 10 | ** Author: Tapas Kanungo 11 | ** Purpose: To make calls to generic random number generators 12 | ** and to change the seed everytime the software is executed. 13 | ** 14 | ** $Id: sequence.c,v 1.2 1998/02/23 06:19:41 kanungo Exp kanungo $ 15 | */ 16 | 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "nrutil.h" 22 | #include "hmm.h" 23 | 24 | static char rcsid[] = "$Id: sequence.c,v 1.2 1998/02/23 06:19:41 kanungo Exp kanungo $"; 25 | 26 | void GenSequenceArray(HMM *phmm, int seed, int T, int *O, int *q) 27 | { 28 | int t = 1; 29 | int q_t, o_t; 30 | 31 | hmmsetseed(seed); 32 | 33 | q[1] = GenInitalState(phmm); 34 | O[1] = GenSymbol(phmm, q[1]); 35 | 36 | for (t = 2; t <= T; t++) { 37 | q[t] = GenNextState(phmm, q[t-1]); 38 | O[t] = GenSymbol(phmm, q[t]); 39 | } 40 | } 41 | 42 | int GenInitalState(HMM *phmm) 43 | { 44 | double val, accum; 45 | int i, q_t; 46 | 47 | val = hmmgetrand(); 48 | accum = 0.0; 49 | q_t = phmm->N; 50 | for (i = 1; i <= phmm->N; i++) { 51 | if (val < phmm->pi[i] + accum) { 52 | q_t = i; 53 | break; 54 | } 55 | else { 56 | accum += phmm->pi[i]; 57 | } 58 | } 59 | 60 | return q_t; 61 | } 62 | 63 | int GenNextState(HMM *phmm, int q_t) 64 | { 65 | double val, accum; 66 | int j, q_next; 67 | 68 | val = hmmgetrand(); 69 | accum = 0.0; 70 | q_next = phmm->N; 71 | for (j = 1; j <= phmm->N; j++) { 72 | if ( val < phmm->A[q_t][j] + accum ) { 73 | q_next = j; 74 | break; 75 | } 76 | else 77 | accum += phmm->A[q_t][j]; 78 | } 79 | 80 | return q_next; 81 | } 82 | int GenSymbol(HMM *phmm, int q_t) 83 | { 84 | double val, accum; 85 | int j, o_t; 86 | 87 | val = hmmgetrand(); 88 | accum = 0.0; 89 | o_t = phmm->M; 90 | for (j = 1; j <= phmm->M; j++) { 91 | if ( val < phmm->B[q_t][j] + accum ) { 92 | o_t = j; 93 | break; 94 | } 95 | else 96 | accum += phmm->B[q_t][j]; 97 | } 98 | 99 | return o_t; 100 | } 101 | 102 | void ReadSequence(FILE *fp, int *pT, int **pO) 103 | { 104 | int *O; 105 | int i; 106 | 107 | fscanf(fp, "T= %d\n", pT); 108 | O = ivector(1,*pT); 109 | for (i=1; i <= *pT; i++) 110 | fscanf(fp,"%d", &O[i]); 111 | *pO = O; 112 | } 113 | 114 | void PrintSequence(FILE *fp, int T, int *O) 115 | { 116 | int i; 117 | 118 | fprintf(fp, "T= %d\n", T); 119 | for (i=1; i <= T; i++) 120 | fprintf(fp,"%d ", O[i]); 121 | printf("\n"); 122 | 123 | } 124 | 125 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JAX-CNV: clinical-graded copy number variation detector 2 | 3 | ## Overview 4 | Here we present JAX-CNV, a newly developed NGS-based CNV algorithm and its performance on WGS data. We focused on deletions and duplications that are >50Kb. 5 | 6 | ## Dependencies 7 | To run the tool, the following dependencies are required 8 | * **Linux/Unix** 64-bit 9 | * **gcc** version 4.9 or higher 10 | * **zlib** 11 | * **autoconf** version 2.69 or higher 12 | 13 | JAX-CNV depends on the following tools, which are already included in JAX-CNV/lib/ 14 | * **fastaq** [Github](https://github.com/wanpinglee/fastaq/tree/990d69bffe24a2ea2adf823052bddcf25ea71017) 15 | * **jellyfish-2.2.6** [Github](https://github.com/gmarcais/Jellyfish/releases/tag/v2.2.6) 16 | * **htslib** [Github](https://github.com/samtools/htslib/tree/b8941e42e1962a026ff1f742df1a66c7edddf89c) 17 | 18 | ## Download and Installation 19 | ```Shell 20 | git clone --recursive https://github.com/TheJacksonLaboratory/genome_similarity.git 21 | cd genome_similarity 22 | make 23 | ``` 24 | 25 | ## Usages 26 | ### Kmer FASTA file preparation 27 | We employ jellyfish to check 25-mer counts and GrabJellyfishKmer to dump a kmer FASTA file. 28 | Please check [jellyfish](https://github.com/gmarcais/Jellyfish/releases/tag/v2.2.6) for more jellyfish options, such as --threads/-t (Number of threads) and --Files/-F (Number files open simultaneously). -s is for Initial hash size and please adjust it for your machine. 29 | ``` 30 | bin/jellyfish count -m 25 -s -o .jf [-t -F ] 31 | bin/JAX-CNV GrabJellyfishKmer --ascii -i .jf -f -o .kmer 32 | ``` 33 | 34 | ### Detect CNVs 35 | A sorted BAM, FASTA and Kmer are required. The results will be printed on stdout or use -o to specify an output file. 36 | ``` 37 | bin/JAX-CNV GetCnvSignal -f -k .kmer -b [-o ] 38 | Rscript --vanilla JaxCNVMerge.R -i .bed 39 | ``` 40 | .bed.merge.bed is the final result. JaxCNVMerge.R could be also applied for other tools'' bed files. 41 | 42 | The complete command line options are: 43 | ``` 44 | USAGE: GetCnvSignal -f -k -b 45 | 46 | -h --help Print this help list. 47 | 48 | Input & Output: 49 | -b --bam Input BAM; required. 50 | -k --kmer Kmer table. 51 | -f --fasta FASTA for kmer lookup. 52 | -o --output Output file. 53 | 54 | Operations: 55 | -c --coverage The expected coverage. 56 | -r --region chr:begin-end A target region. 57 | -q --aln_qual A mapping quality filter for alignments. [40] 58 | --bin Report a result for each # bp. [50] 59 | --log Log output. 60 | --unique_kmer Require percentage of unique kmer to report a CNV. [0.6] 61 | --kmer_score Score for log2(kmer count) = 2 positions. [0.1] 62 | ``` 63 | 64 | ## For Dcoker users 65 | [Dockerfile](Dockerfile) is provided. Please notice that sudo may be required for docker usages depending on your machine setting. 66 | ``` 67 | cd JAX-CNV 68 | docker build . 69 | ``` 70 | JAX-CNV wnd jellyfish will be built on /tools in docker. 71 | Or, Pull docker image from [wanpinglee/jax-cnv](https://cloud.docker.com/repository/docker/wanpinglee/jax-cnv). 72 | ``` 73 | docker pull wanpinglee/jax-cnv:latest 74 | ``` 75 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/README: -------------------------------------------------------------------------------- 1 | $Id: README,v 1.5 1998/03/16 08:21:26 kanungo Exp kanungo $ 2 | 3 | Package: UMDHMM version 1.02 4 | Author: Tapas Kanungo (kanungo@cfar.umd.edu) 5 | Organization: University of Maryland, Collge Park, MD 6 | Web: http://www.cfar.umd.edu/~kanungo 7 | Date: 19 February, 1998 8 | 9 | Updated on 5 May, 1999: see CHANGES file. 10 | 11 | Updated on 6 May, 1999: see CHANGES file. 12 | 13 | This software contains code for understanding the basics 14 | of hidden Markov models (HMM). The notation used is 15 | very similar to that used by Rabiner and Juang in: 16 | 17 | - Rabiner, L. R. and B. H. Juang, "Fundamentals of Speech Recognition," 18 | Prentice Hall, 1993. 19 | - Rabiner, L. R., "A Tutorial on Hidden Markov Models and Selected 20 | Applications in Speech Recognition, Prov. of IEEE, vol. 77, no. 2, 21 | pp. 257-286, 1989. 22 | - Rabiner, L. R., and B. H. Juang, "An Introduction to Hidden Markov Models," 23 | IEEE ASSP Magazine, vol. 3, no. 1, pp. 4-16, Jan. 1986. 24 | 25 | --------------------------------------------- 26 | Installation: 27 | --------------------------------------------- 28 | -------------------- 29 | UNIX: Dec, Sun Solaris, Linux (redhat): 30 | -------------------- 31 | 32 | Type "make all" at the unix prompt. It should 33 | compile the package. 34 | 35 | -------------------- 36 | Microsoft NT/95/98: 37 | -------------------- 38 | 39 | 1. Get the GNU package from: 40 | ftp://go.cygnus.com/pub/sourceware.cygnus.com/cygwin/latest/full.exe 41 | 42 | This package includes gcc and various commands and 43 | shells (sh, bash, etc.) that make the PC have a unix 44 | like environment. 45 | 46 | 2. Change to the UMDHMM directory and type "make all". 47 | 48 | 49 | --------------------------------------------- 50 | Executables: 51 | --------------------------------------------- 52 | genseq: Generates a symbol sequence using the specified model 53 | 54 | testvit: Generates the most like state sequence for a given symbol sequence, 55 | given the HMM, using Viterbi. 56 | 57 | esthmm: Estimates the HMM from a given symbol sequence using BaumWelch. 58 | 59 | testfor: Computes log Prob(observation|model) using the Forward algorithm. 60 | 61 | Note 1: The model test.hmm and sequence test.seq solve exercise 6.3 in 62 | the book by Rabiner and Juang (page 341). Just execute the command: 63 | prompt% testvit test.hmm test.seq 64 | and compare the output with the solution given in the book. 65 | 66 | --------------------------------------------- 67 | HMM file format: 68 | --------------------------------------------- 69 | M= 70 | N= 71 | A: 72 | a11 a12 ... a1N 73 | a21 a22 ... a2N 74 | . . . . 75 | . . . . 76 | . . . . 77 | aN1 aN2 ... aNN 78 | B: 79 | b11 b12 ... b1M 80 | b21 b22 ... b2M 81 | . . . . 82 | . . . . 83 | . . . . 84 | bN1 bN2 ... bNM 85 | pi: 86 | pi1 pi2 ... piN 87 | 88 | --------------------------------------------- 89 | Sample HMM file: 90 | --------------------------------------------- 91 | M= 2 92 | N= 3 93 | A: 94 | 0.333 0.333 0.333 95 | 0.333 0.333 0.333 96 | 0.333 0.333 0.333 97 | B: 98 | 0.5 0.5 99 | 0.75 0.25 100 | 0.25 0.75 101 | pi: 102 | 0.333 0.333 0.333 103 | --------------------------------------------- 104 | Sequence file format: 105 | --------------------------------------------- 106 | T= 107 | o1 o2 o3 . . . oT 108 | --------------------------------------------- 109 | Sample sequence file: 110 | --------------------------------------------- 111 | T= 10 112 | 1 1 1 1 2 1 2 2 2 2 113 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/viterbi.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Author: Tapas Kanungo, kanungo@cfar.umd.edu 3 | ** Date: 15 December 1997 4 | ** File: viterbi.c 5 | ** Purpose: Viterbi algorithm for computing the maximum likelihood 6 | ** state sequence and probablity of observing a sequence 7 | ** given the model. 8 | ** Organization: University of Maryland 9 | ** 10 | ** $Id: viterbi.c,v 1.1 1999/05/06 05:25:37 kanungo Exp kanungo $ 11 | */ 12 | 13 | #include 14 | #include "hmm.h" 15 | #include "nrutil.h" 16 | static char rcsid[] = "$Id: viterbi.c,v 1.1 1999/05/06 05:25:37 kanungo Exp kanungo $"; 17 | 18 | #define VITHUGE 100000000000.0 19 | 20 | void Viterbi(HMM *phmm, int T, int *O, double **delta, int **psi, 21 | int *q, double *pprob) 22 | { 23 | int i, j; /* state indices */ 24 | int t; /* time index */ 25 | 26 | int maxvalind; 27 | double maxval, val; 28 | 29 | /* 1. Initialization */ 30 | 31 | for (i = 1; i <= phmm->N; i++) { 32 | delta[1][i] = phmm->pi[i] * (phmm->B[i][O[1]]); 33 | psi[1][i] = 0; 34 | } 35 | 36 | /* 2. Recursion */ 37 | 38 | for (t = 2; t <= T; t++) { 39 | for (j = 1; j <= phmm->N; j++) { 40 | maxval = 0.0; 41 | maxvalind = 1; 42 | for (i = 1; i <= phmm->N; i++) { 43 | val = delta[t-1][i]*(phmm->A[i][j]); 44 | if (val > maxval) { 45 | maxval = val; 46 | maxvalind = i; 47 | } 48 | } 49 | 50 | delta[t][j] = maxval*(phmm->B[j][O[t]]); 51 | psi[t][j] = maxvalind; 52 | 53 | } 54 | } 55 | 56 | /* 3. Termination */ 57 | 58 | *pprob = 0.0; 59 | q[T] = 1; 60 | for (i = 1; i <= phmm->N; i++) { 61 | if (delta[T][i] > *pprob) { 62 | *pprob = delta[T][i]; 63 | q[T] = i; 64 | } 65 | } 66 | 67 | /* 4. Path (state sequence) backtracking */ 68 | 69 | for (t = T - 1; t >= 1; t--) 70 | q[t] = psi[t+1][q[t+1]]; 71 | 72 | } 73 | void ViterbiLog(HMM *phmm, int T, int *O, double **delta, int **psi, 74 | int *q, double *pprob) 75 | { 76 | int i, j; /* state indices */ 77 | int t; /* time index */ 78 | 79 | int maxvalind; 80 | double maxval, val; 81 | double **biot; 82 | 83 | /* 0. Preprocessing */ 84 | 85 | for (i = 1; i <= phmm->N; i++) 86 | phmm->pi[i] = log(phmm->pi[i]); 87 | for (i = 1; i <= phmm->N; i++) 88 | for (j = 1; j <= phmm->N; j++) { 89 | phmm->A[i][j] = log(phmm->A[i][j]); 90 | } 91 | 92 | biot = dmatrix(1, phmm->N, 1, T); 93 | for (i = 1; i <= phmm->N; i++) 94 | for (t = 1; t <= T; t++) { 95 | biot[i][t] = log(phmm->B[i][O[t]]); 96 | } 97 | 98 | /* 1. Initialization */ 99 | 100 | for (i = 1; i <= phmm->N; i++) { 101 | delta[1][i] = phmm->pi[i] + biot[i][1]; 102 | psi[1][i] = 0; 103 | } 104 | 105 | /* 2. Recursion */ 106 | 107 | for (t = 2; t <= T; t++) { 108 | for (j = 1; j <= phmm->N; j++) { 109 | maxval = -VITHUGE; 110 | maxvalind = 1; 111 | for (i = 1; i <= phmm->N; i++) { 112 | val = delta[t-1][i] + (phmm->A[i][j]); 113 | if (val > maxval) { 114 | maxval = val; 115 | maxvalind = i; 116 | } 117 | } 118 | 119 | delta[t][j] = maxval + biot[j][t]; 120 | psi[t][j] = maxvalind; 121 | } 122 | } 123 | 124 | /* 3. Termination */ 125 | 126 | *pprob = -VITHUGE; 127 | q[T] = 1; 128 | for (i = 1; i <= phmm->N; i++) { 129 | if (delta[T][i] > *pprob) { 130 | *pprob = delta[T][i]; 131 | q[T] = i; 132 | } 133 | } 134 | 135 | 136 | /* 4. Path (state sequence) backtracking */ 137 | 138 | for (t = T - 1; t >= 1; t--) 139 | q[t] = psi[t+1][q[t+1]]; 140 | 141 | free_dmatrix(biot, 1, phmm->N, 1, T); 142 | } 143 | 144 | 145 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/hmmutils.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Author: Tapas Kanungo, kanungo@cfar.umd.edu 3 | ** Date: 15 December 1997 4 | ** File: hmmutils.c 5 | ** Purpose: utilities for reading, writing HMM stuff. 6 | ** Organization: University of Maryland 7 | ** 8 | ** $Id: hmmutils.c,v 1.4 1998/02/23 07:51:26 kanungo Exp kanungo $ 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | #include "nrutil.h" 15 | #include "hmm.h" 16 | static char rcsid[] = "$Id: hmmutils.c,v 1.4 1998/02/23 07:51:26 kanungo Exp kanungo $"; 17 | 18 | void ReadHMM(FILE *fp, HMM *phmm) 19 | { 20 | int i, j, k; 21 | 22 | fscanf(fp, "M= %d\n", &(phmm->M)); 23 | 24 | fscanf(fp, "N= %d\n", &(phmm->N)); 25 | 26 | fscanf(fp, "A:\n"); 27 | phmm->A = (double **) dmatrix(1, phmm->N, 1, phmm->N); 28 | for (i = 1; i <= phmm->N; i++) { 29 | for (j = 1; j <= phmm->N; j++) { 30 | fscanf(fp, "%lf", &(phmm->A[i][j])); 31 | } 32 | fscanf(fp,"\n"); 33 | } 34 | 35 | fscanf(fp, "B:\n"); 36 | phmm->B = (double **) dmatrix(1, phmm->N, 1, phmm->M); 37 | for (j = 1; j <= phmm->N; j++) { 38 | for (k = 1; k <= phmm->M; k++) { 39 | fscanf(fp, "%lf", &(phmm->B[j][k])); 40 | } 41 | fscanf(fp,"\n"); 42 | } 43 | 44 | fscanf(fp, "pi:\n"); 45 | phmm->pi = (double *) dvector(1, phmm->N); 46 | for (i = 1; i <= phmm->N; i++) 47 | fscanf(fp, "%lf", &(phmm->pi[i])); 48 | 49 | } 50 | 51 | void FreeHMM(HMM *phmm) 52 | { 53 | free_dmatrix(phmm->A, 1, phmm->N, 1, phmm->N); 54 | free_dmatrix(phmm->B, 1, phmm->N, 1, phmm->M); 55 | free_dvector(phmm->pi, 1, phmm->N); 56 | } 57 | 58 | /* 59 | ** InitHMM() This function initializes matrices A, B and vector pi with 60 | ** random values. Not doing so can result in the BaumWelch behaving 61 | ** quite weirdly. 62 | */ 63 | 64 | void InitHMM(HMM *phmm, int N, int M, int seed) 65 | { 66 | int i, j, k; 67 | double sum; 68 | 69 | 70 | /* initialize random number generator */ 71 | 72 | 73 | hmmsetseed(seed); 74 | 75 | phmm->M = M; 76 | 77 | phmm->N = N; 78 | 79 | phmm->A = (double **) dmatrix(1, phmm->N, 1, phmm->N); 80 | 81 | for (i = 1; i <= phmm->N; i++) { 82 | sum = 0.0; 83 | for (j = 1; j <= phmm->N; j++) { 84 | phmm->A[i][j] = hmmgetrand(); 85 | sum += phmm->A[i][j]; 86 | } 87 | for (j = 1; j <= phmm->N; j++) 88 | phmm->A[i][j] /= sum; 89 | } 90 | 91 | phmm->B = (double **) dmatrix(1, phmm->N, 1, phmm->M); 92 | 93 | for (j = 1; j <= phmm->N; j++) { 94 | sum = 0.0; 95 | for (k = 1; k <= phmm->M; k++) { 96 | phmm->B[j][k] = hmmgetrand(); 97 | sum += phmm->B[j][k]; 98 | } 99 | for (k = 1; k <= phmm->M; k++) 100 | phmm->B[j][k] /= sum; 101 | } 102 | 103 | phmm->pi = (double *) dvector(1, phmm->N); 104 | sum = 0.0; 105 | for (i = 1; i <= phmm->N; i++) { 106 | phmm->pi[i] = hmmgetrand(); 107 | sum += phmm->pi[i]; 108 | } 109 | for (i = 1; i <= phmm->N; i++) 110 | phmm->pi[i] /= sum; 111 | } 112 | 113 | void CopyHMM(HMM *phmm1, HMM *phmm2) 114 | { 115 | int i, j, k; 116 | 117 | phmm2->M = phmm1->M; 118 | 119 | 120 | phmm2->N = phmm1->N; 121 | 122 | phmm2->A = (double **) dmatrix(1, phmm2->N, 1, phmm2->N); 123 | 124 | for (i = 1; i <= phmm2->N; i++) 125 | for (j = 1; j <= phmm2->N; j++) 126 | phmm2->A[i][j] = phmm1->A[i][j]; 127 | 128 | phmm2->B = (double **) dmatrix(1, phmm2->N, 1, phmm2->M); 129 | for (j = 1; j <= phmm2->N; j++) 130 | for (k = 1; k <= phmm2->M; k++) 131 | phmm2->B[j][k] = phmm1->B[j][k]; 132 | 133 | phmm2->pi = (double *) dvector(1, phmm2->N); 134 | for (i = 1; i <= phmm2->N; i++) 135 | phmm2->pi[i] = phmm1->pi[i]; 136 | 137 | } 138 | 139 | void PrintHMM(FILE *fp, HMM *phmm) 140 | { 141 | int i, j, k; 142 | 143 | fprintf(fp, "M= %d\n", phmm->M); 144 | fprintf(fp, "N= %d\n", phmm->N); 145 | 146 | fprintf(fp, "A:\n"); 147 | for (i = 1; i <= phmm->N; i++) { 148 | for (j = 1; j <= phmm->N; j++) { 149 | fprintf(fp, "%f ", phmm->A[i][j] ); 150 | } 151 | fprintf(fp, "\n"); 152 | } 153 | 154 | fprintf(fp, "B:\n"); 155 | for (j = 1; j <= phmm->N; j++) { 156 | for (k = 1; k <= phmm->M; k++){ 157 | fprintf(fp, "%f ", phmm->B[j][k]); 158 | } 159 | fprintf(fp, "\n"); 160 | } 161 | 162 | fprintf(fp, "pi:\n"); 163 | for (i = 1; i <= phmm->N; i++) { 164 | fprintf(fp, "%f ", phmm->pi[i]); 165 | } 166 | fprintf(fp, "\n\n"); 167 | } 168 | 169 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/baum.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Author: Tapas Kanungo, kanungo@cfar.umd.edu 3 | ** Date: 15 December 1997 4 | ** File: baumwelch.c 5 | ** Purpose: Baum-Welch algorithm for estimating the parameters 6 | ** of a HMM model, given an observation sequence. 7 | ** Organization: University of Maryland 8 | ** 9 | ** Update: 10 | ** Author: Tapas Kanungo 11 | ** Date: 19 April 1999 12 | ** Purpose: Changed the convergence criterion from ratio 13 | ** to absolute value. 14 | ** 15 | ** $Id: baumwelch.c,v 1.6 1999/04/24 15:58:43 kanungo Exp kanungo $ 16 | */ 17 | 18 | #include 19 | #include "nrutil.h" 20 | #include "hmm.h" 21 | #include 22 | 23 | static char rcsid[] = "$Id: baumwelch.c,v 1.6 1999/04/24 15:58:43 kanungo Exp kanungo $"; 24 | 25 | #define DELTA 0.001 26 | void BaumWelch(HMM *phmm, int T, int *O, double **alpha, double **beta, 27 | double **gamma, int *pniter, 28 | double *plogprobinit, double *plogprobfinal) 29 | { 30 | int i, j, k; 31 | int t, l = 0; 32 | 33 | double logprobf, logprobb, threshold; 34 | double numeratorA, denominatorA; 35 | double numeratorB, denominatorB; 36 | 37 | double ***xi, *scale; 38 | double delta, deltaprev, logprobprev; 39 | 40 | deltaprev = 10e-70; 41 | 42 | xi = AllocXi(T, phmm->N); 43 | scale = dvector(1, T); 44 | 45 | ForwardWithScale(phmm, T, O, alpha, scale, &logprobf); 46 | *plogprobinit = logprobf; /* log P(O |intial model) */ 47 | BackwardWithScale(phmm, T, O, beta, scale, &logprobb); 48 | ComputeGamma(phmm, T, alpha, beta, gamma); 49 | ComputeXi(phmm, T, O, alpha, beta, xi); 50 | logprobprev = logprobf; 51 | 52 | do { 53 | 54 | /* reestimate frequency of state i in time t=1 */ 55 | for (i = 1; i <= phmm->N; i++) 56 | phmm->pi[i] = .001 + .999*gamma[1][i]; 57 | 58 | /* reestimate transition matrix and symbol prob in 59 | each state */ 60 | for (i = 1; i <= phmm->N; i++) { 61 | denominatorA = 0.0; 62 | for (t = 1; t <= T - 1; t++) 63 | denominatorA += gamma[t][i]; 64 | 65 | for (j = 1; j <= phmm->N; j++) { 66 | numeratorA = 0.0; 67 | for (t = 1; t <= T - 1; t++) 68 | numeratorA += xi[t][i][j]; 69 | phmm->A[i][j] = .001 + 70 | .999*numeratorA/denominatorA; 71 | } 72 | 73 | denominatorB = denominatorA + gamma[T][i]; 74 | for (k = 1; k <= phmm->M; k++) { 75 | numeratorB = 0.0; 76 | for (t = 1; t <= T; t++) { 77 | if (O[t] == k) 78 | numeratorB += gamma[t][i]; 79 | } 80 | 81 | phmm->B[i][k] = .001 + 82 | .999*numeratorB/denominatorB; 83 | } 84 | } 85 | 86 | ForwardWithScale(phmm, T, O, alpha, scale, &logprobf); 87 | BackwardWithScale(phmm, T, O, beta, scale, &logprobb); 88 | ComputeGamma(phmm, T, alpha, beta, gamma); 89 | ComputeXi(phmm, T, O, alpha, beta, xi); 90 | 91 | /* compute difference between log probability of 92 | two iterations */ 93 | delta = logprobf - logprobprev; 94 | logprobprev = logprobf; 95 | l++; 96 | 97 | } 98 | while (delta > DELTA); /* if log probability does not 99 | change much, exit */ 100 | 101 | *pniter = l; 102 | *plogprobfinal = logprobf; /* log P(O|estimated model) */ 103 | FreeXi(xi, T, phmm->N); 104 | free_dvector(scale, 1, T); 105 | } 106 | 107 | void ComputeGamma(HMM *phmm, int T, double **alpha, double **beta, 108 | double **gamma) 109 | { 110 | 111 | int i, j; 112 | int t; 113 | double denominator; 114 | 115 | for (t = 1; t <= T; t++) { 116 | denominator = 0.0; 117 | for (j = 1; j <= phmm->N; j++) { 118 | gamma[t][j] = alpha[t][j]*beta[t][j]; 119 | denominator += gamma[t][j]; 120 | } 121 | 122 | for (i = 1; i <= phmm->N; i++) 123 | gamma[t][i] = gamma[t][i]/denominator; 124 | } 125 | } 126 | 127 | void ComputeXi(HMM* phmm, int T, int *O, double **alpha, double **beta, 128 | double ***xi) 129 | { 130 | int i, j; 131 | int t; 132 | double sum; 133 | 134 | for (t = 1; t <= T - 1; t++) { 135 | sum = 0.0; 136 | for (i = 1; i <= phmm->N; i++) 137 | for (j = 1; j <= phmm->N; j++) { 138 | xi[t][i][j] = alpha[t][i]*beta[t+1][j] 139 | *(phmm->A[i][j]) 140 | *(phmm->B[j][O[t+1]]); 141 | sum += xi[t][i][j]; 142 | } 143 | 144 | for (i = 1; i <= phmm->N; i++) 145 | for (j = 1; j <= phmm->N; j++) 146 | xi[t][i][j] /= sum; 147 | } 148 | } 149 | 150 | double *** AllocXi(int T, int N) 151 | { 152 | int t; 153 | double ***xi; 154 | 155 | xi = (double ***) malloc(T*sizeof(double **)); 156 | 157 | xi --; 158 | 159 | for (t = 1; t <= T; t++) 160 | xi[t] = dmatrix(1, N, 1, N); 161 | return xi; 162 | } 163 | 164 | void FreeXi(double *** xi, int T, int N) 165 | { 166 | int t; 167 | 168 | 169 | 170 | for (t = 1; t <= T; t++) 171 | free_dmatrix(xi[t], 1, N, 1, N); 172 | 173 | xi ++; 174 | free(xi); 175 | 176 | } 177 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/nrutil.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** File: nrutil.c 3 | ** Purpose: Memory allocation routines borrowed from the 4 | ** book "Numerical Recipes" by Press, Flannery, Teukolsky, 5 | ** and Vetterling. 6 | ** state sequence and probablity of observing a sequence 7 | ** given the model. 8 | ** Organization: University of Maryland 9 | ** 10 | ** $Id: nrutil.c,v 1.2 1998/02/19 16:31:35 kanungo Exp kanungo $ 11 | ** 12 | ** Wan-Ping Lee at the Jackson Lab modified the code on 2017-11-02. 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | static char rcsid[] = "$Id: nrutil.c,v 1.2 1998/02/19 16:31:35 kanungo Exp kanungo $"; 19 | 20 | 21 | void nrerror(const char *error_text) 22 | { 23 | //void exit(int); 24 | 25 | fprintf(stderr,"Numerical Recipes run-time error...\n"); 26 | fprintf(stderr,"%s\n",error_text); 27 | fprintf(stderr,"...now exiting to system...\n"); 28 | exit(1); 29 | } 30 | 31 | 32 | 33 | float *vector(int nl, int nh) 34 | { 35 | float *v; 36 | 37 | v=(float *)calloc((unsigned) (nh-nl+1),sizeof(float)); 38 | if (!v) nrerror("allocation failure in vector()"); 39 | return v-nl; 40 | } 41 | 42 | int *ivector(int nl, int nh) 43 | { 44 | int *v; 45 | 46 | v=(int *)calloc((unsigned) (nh-nl+1),sizeof(int)); 47 | if (!v) nrerror("allocation failure in ivector()"); 48 | return v-nl; 49 | } 50 | 51 | double *dvector(int nl, int nh) 52 | { 53 | double *v; 54 | 55 | v=(double *)calloc((unsigned) (nh-nl+1),sizeof(double)); 56 | if (!v) nrerror("allocation failure in dvector()"); 57 | return v-nl; 58 | } 59 | 60 | 61 | 62 | float **matrix(int nrl, int nrh, int ncl, int nch) 63 | { 64 | int i; 65 | float **m; 66 | 67 | m=(float **) calloc((unsigned) (nrh-nrl+1),sizeof(float*)); 68 | if (!m) nrerror("allocation failure 1 in matrix()"); 69 | m -= nrl; 70 | 71 | for(i=nrl;i<=nrh;i++) { 72 | m[i]=(float *) calloc((unsigned) (nch-ncl+1),sizeof(float)); 73 | if (!m[i]) nrerror("allocation failure 2 in matrix()"); 74 | m[i] -= ncl; 75 | } 76 | return m; 77 | } 78 | 79 | double **dmatrix(int nrl, int nrh, int ncl, int nch) 80 | { 81 | int i; 82 | double **m; 83 | 84 | m=(double **) calloc((unsigned) (nrh-nrl+1),sizeof(double*)); 85 | if (!m) nrerror("allocation failure 1 in dmatrix()"); 86 | m -= nrl; 87 | 88 | for(i=nrl;i<=nrh;i++) { 89 | m[i]=(double *) calloc((unsigned) (nch-ncl+1),sizeof(double)); 90 | if (!m[i]) nrerror("allocation failure 2 in dmatrix()"); 91 | m[i] -= ncl; 92 | } 93 | return m; 94 | } 95 | 96 | int **imatrix(int nrl, int nrh, int ncl, int nch) 97 | { 98 | int i,**m; 99 | 100 | m=(int **)calloc((unsigned) (nrh-nrl+1),sizeof(int*)); 101 | if (!m) nrerror("allocation failure 1 in imatrix()"); 102 | m -= nrl; 103 | 104 | for(i=nrl;i<=nrh;i++) { 105 | m[i]=(int *)calloc((unsigned) (nch-ncl+1),sizeof(int)); 106 | if (!m[i]) nrerror("allocation failure 2 in imatrix()"); 107 | m[i] -= ncl; 108 | } 109 | return m; 110 | } 111 | 112 | 113 | 114 | float **submatrix(float **a, int oldrl, int oldrh, int oldcl, int oldch, int newrl, int newcl) 115 | { 116 | int i,j; 117 | float **m; 118 | 119 | m=(float **) calloc((unsigned) (oldrh-oldrl+1),sizeof(float*)); 120 | if (!m) nrerror("allocation failure in submatrix()"); 121 | m -= newrl; 122 | 123 | for(i=oldrl,j=newrl;i<=oldrh;i++,j++) m[j]=a[i]+oldcl-newcl; 124 | 125 | return m; 126 | } 127 | 128 | 129 | 130 | void free_vector(float *v, int nl, int nh) 131 | { 132 | free((char*) (v+nl)); 133 | } 134 | 135 | void free_ivector(int *v, int nl, int nh) 136 | { 137 | free((char*) (v+nl)); 138 | } 139 | 140 | void free_dvector(double *v, int nl, int nh) 141 | { 142 | free((char*) (v+nl)); 143 | } 144 | 145 | 146 | 147 | void free_matrix(float **m, int nrl, int nrh, int ncl, int nch) 148 | { 149 | int i; 150 | 151 | for(i=nrh;i>=nrl;i--) free((char*) (m[i]+ncl)); 152 | free((char*) (m+nrl)); 153 | } 154 | 155 | void free_dmatrix(double **m, int nrl, int nrh, int ncl, int nch) 156 | { 157 | int i; 158 | 159 | for(i=nrh;i>=nrl;i--) free((char*) (m[i]+ncl)); 160 | free((char*) (m+nrl)); 161 | } 162 | 163 | void free_imatrix(int **m, int nrl, int nrh, int ncl, int nch) 164 | { 165 | int i; 166 | 167 | for(i=nrh;i>=nrl;i--) free((char*) (m[i]+ncl)); 168 | free((char*) (m+nrl)); 169 | } 170 | 171 | 172 | 173 | void free_submatrix(float **b, int nrl, int nrh, int ncl, int nch) 174 | { 175 | free((char*) (b+nrl)); 176 | } 177 | 178 | 179 | 180 | float **convert_matrix(float *a, int nrl, int nrh, int ncl, int nch) 181 | { 182 | int i,j,nrow,ncol; 183 | float **m; 184 | 185 | nrow=nrh-nrl+1; 186 | ncol=nch-ncl+1; 187 | m = (float **) calloc((unsigned) (nrow),sizeof(float*)); 188 | if (!m) nrerror("allocation failure in convert_matrix()"); 189 | m -= nrl; 190 | for(i=0,j=nrl;i<=nrow-1;i++,j++) m[j]=a+ncol*i-ncl; 191 | return m; 192 | } 193 | 194 | 195 | 196 | void free_convert_matrix(float **b, int nrl, int nrh, int ncl, int nch) 197 | { 198 | free((char*) (b+nrl)); 199 | } 200 | -------------------------------------------------------------------------------- /include/GrabJellyfishKmer.h: -------------------------------------------------------------------------------- 1 | #ifndef _GrabJellyfishKmer_H_ 2 | #define _GrabJellyfishKmer_H_ 3 | 4 | #include 5 | #include 6 | 7 | 8 | struct SGrabJellyfishKmerCml { 9 | SGrabJellyfishKmerCml(){} 10 | SGrabJellyfishKmerCml(const int argc, char** const argv){Parse(argc, argv);} 11 | 12 | bool help = false; 13 | 14 | // i/o 15 | std::string input_jfdb; // -i --input 16 | std::string fasta; // -f --fasta 17 | std::string output; // -o --output 18 | 19 | // operation parameters 20 | std::string region; // -r --region 21 | int bin = 1; 22 | bool ascii = false; 23 | bool rle = false; // --rle // running_length_encoding 24 | bool contig = false; // --contig 25 | 26 | // command line 27 | std::string cmd; 28 | 29 | const char* short_option = "hi:f:o:r:"; 30 | 31 | // Help list 32 | const std::string Help (const char* program) const { return 33 | std::string("\n") + 34 | std::string("USAGE: ") + program + std::string(" -i -f \n\n") + 35 | std::string(" -h --help Print this help list.\n") + 36 | std::string("\n") + 37 | std::string("Input & Output:\n") + 38 | std::string(" -i --input Jellyfish created count database.\n") + 39 | std::string(" -f --fasta FASTA for kmer lookup.\n") + 40 | std::string(" -o --output Output file.\n") + 41 | std::string("\n") + 42 | std::string("Operations:\n") + 43 | std::string(" -r --region chr:begin-end Specify a target region.\n") + 44 | std::string(" --bin Report a result for each # bp. [1]\n") + 45 | std::string(" --ascii Report count in ASCII: (log2(#) + 1) + 33.\n") + 46 | std::string(" --rle Ouput by running length encoding. --ascii is on.\n") + 47 | std::string(" --contig Report an average count for each contig.\n"); 48 | } 49 | 50 | // Check the required arguments. 51 | bool CheckArg () const { 52 | bool ok = true; 53 | if (bin < 1) { 54 | std::cerr << "ERROR: --bin should not smaller than 1." << std::endl; 55 | ok = false; 56 | } 57 | if (bin > 1 && rle) { 58 | std::cerr << "ERROR: --rle only work for --bin 1." << std::endl; 59 | ok = false; 60 | } 61 | if (rle && contig) { 62 | std::cerr << "ERROR: --rle and --contig are mutually exclusive." << std::endl; 63 | ok = false; 64 | } 65 | 66 | return ok && !input_jfdb.empty() && !fasta.empty(); 67 | } 68 | 69 | bool Parse (const int argc, char** const argv) { 70 | // Record the input command line. 71 | for (int i = 0; i < argc; i++) cmd += std::string(argv[i]) + " "; 72 | const struct option long_option[] = { 73 | {"help", required_argument, NULL, 'h'}, 74 | // i/o 75 | {"input", required_argument, NULL, 'i'}, 76 | {"fasta", required_argument, NULL, 'f'}, 77 | {"output", required_argument, NULL, 'o'}, 78 | 79 | // operation parameters 80 | {"region", required_argument, NULL, 'r'}, 81 | {"bin", required_argument, NULL, 1}, 82 | {"ascii", no_argument, NULL, 2}, 83 | {"rle", no_argument, NULL, 3}, 84 | {"contig", no_argument, NULL, 4}, 85 | {0,0,0,0} 86 | }; 87 | int option_index = 0; 88 | int c = -1; 89 | while ((c = getopt_long(argc, argv, short_option, long_option, &option_index)) != -1) { 90 | switch (c) { 91 | case 'h': help = true; break; 92 | case 'i': input_jfdb = optarg; break; 93 | case 'f': fasta = optarg; break; 94 | case 'o': output = optarg; break; 95 | case 'r': region = optarg; break; 96 | case 1: bin = atoi(optarg); break; 97 | case 2: ascii = true; break; 98 | case 3: rle = true; ascii = true; break; 99 | case 4: contig = true; break; 100 | default: std::cerr << "WARNING: Unkonw parameter: " << long_option[option_index].name << std::endl; break; 101 | } 102 | } 103 | 104 | if (help) { 105 | Help(argv[0]); 106 | return false; 107 | } 108 | 109 | return CheckArg(); 110 | } 111 | }; // SCountKmerCml 112 | 113 | class GrabJellyfishKmer { 114 | public: 115 | // Constructors 116 | GrabJellyfishKmer(); 117 | GrabJellyfishKmer(int argc, char** argv); 118 | GrabJellyfishKmer(const char * pInput_jfdb, const char * pInput_fasta, const char * pOutput = NULL, 119 | const char * pRegion = NULL, const int input_bin = 1, const bool input_ascii = false, 120 | const bool input_rle = false, const bool input_contig = false); 121 | 122 | // The function will report kmer count according to the parameter setting. 123 | // Return: 0 is successful. 124 | int Run() const; 125 | 126 | // If files are not assinged when declaring the class, you may use the function to assign them. 127 | void SetParameters(const SGrabJellyfishKmerCml & cml); 128 | void SetParameters(const char * pInput_jfdb, const char * pInput_fasta, const char * pOutput = NULL, 129 | const char * pRegion = NULL, const int input_bin = 1, const bool input_ascii = false, 130 | const bool input_rle = false, const bool input_contig = false); 131 | private: 132 | SGrabJellyfishKmerCml cmdline; 133 | // Not allow to use copy and assign constructors. 134 | GrabJellyfishKmer(const GrabJellyfishKmer&); 135 | GrabJellyfishKmer& operator= (const GrabJellyfishKmer&); 136 | }; 137 | #endif 138 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | SOFTWARE LICENSE AGREEMENT 2 | FOR NON-COMMERCIAL USE 3 | 4 | This Software License Agreement (this “Agreement”) is made between you (“You,” “Your,” or “Licensee”) and The Jackson Laboratory (“Licensor”). This Agreement grants to You a license to the Licensed Software subject to Your acceptance of all the terms and conditions contained in this Agreement. Please read the terms and conditions carefully. Your access and use of the Licensed Software shall be deemed your acceptance of this Agreement and the terms and conditions contained herein. If You do not agree to these terms, Licensor is unwilling to grant you the license contained in this Agreement and You should not access or use the Licensed Software. 5 | 6 | 1. LICENSE 7 | 8 | 1.1 Grant. Subject to the terms and conditions of this Agreement, Licensor hereby grants to Licensee a worldwide, royalty-free, non-exclusive, non-transferable, non-sublicensable license to download, copy, display, and use the Licensed Software for Non-Commercial purposes only. “Licensed Software” means the current version of the software made available to You via any platform, including but not limited to Licensor’s website or a third party website such as Githib, and requiring your acceptance of the terms and conditions of this Agreement as a condition of use. “Non-Commercial” means not intended or directed toward commercial advantage or monetary compensation. 9 | 10 | 1.2 License Limitations. Nothing in this Agreement shall be construed to confer any rights upon Licensee except as expressly granted herein. Licensee may not use or exploit the Licensed Software other than expressly permitted by this Agreement. Licensee may not, nor may Licensee permit any third party, to modify, translate, reverse engineer, decompile, disassemble or create derivative works based on the Licensed Software or any portion thereof. Subject to Section 1.1, Licensee may distribute the Licensed Software to a third party, provided that the recipient agrees to use the Licensed Software on the terms and conditions of this Agreement. Licensee acknowledges that Licensor reserves the right to offer to Licensee or any third party a license for commercial use and distribution of the Licensed Software on terms and conditions different than those contained in this Agreement. If You are interested in commercial use of the Licensed Software, please contact the Licensor. 11 | 12 | 2. OWNERSHIP OF INTELLECTUAL PROPERTY 13 | 14 | 2.1 Ownership Rights. Except for the limited license rights expressly granted to Licensee under this Agreement, Licensee acknowledges that all right, title and interest in and to the Licensed Software and all intellectual property rights therein shall remain with Licensor or its licensors, as applicable. 15 | 16 | 3. DISCLAIMER OF WARRANTY AND LIMITATION OF LIABILITY 17 | 18 | 3.1 Disclaimer of Warranty. LICENSOR PROVIDES THE LICENSED SOFTWARE ON A NO-FEE BASIS “AS IS” WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED. LICENSOR EXPRESSLY DISCLAIMS ALL WARRANTIES OR CONDITIONS OF ANY KIND, INCLUDING ANY WARRANTY OF MERCHANTABILITY, TITLE, SECURITY, ACCURACY, NON-INFRINGEMENT OR FITNESS FOR A PARTICULAR PURPOSE. 19 | 20 | 3.2 Limitation of Liability. LICENSEE ASSUMES FULL RESPONSIBILITY AND RISK FOR ANY LOSS RESULTING FROM LICENSEE’s DOWNLOADING AND USE OF THE LICENSED SOFTWARE. IN NO EVENT SHALL LICENSOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, ARISING FROM THE LICENSED SOFTWARE OR LICENSEE’S USE OF THE LICENSED SOFTWARE, REGARDLESS OF WHETHER LICENSOR IS ADVISED, OR HAS OTHER REASON TO KNOW, OR IN FACT KNOWS, OF THE POSSIBILITY OF THE FOREGOING. 21 | 22 | 3.3 Acknowledgement. Without limiting the generality of Sections 3.1 and 3.2, Licensee acknowledges that the Licensed Software is provided as an information resource only and should not be relied on for any diagnostic or treatment purposes. 23 | 24 | 4. TERM AND TERMINATION 25 | 26 | 4.1 Term. This Agreement commences on the date this Agreement is executed and will continue until terminated in accordance with Section 4.2. 27 | 28 | 4.2 Termination. If Licensee breaches any provision hereunder, or otherwise engages in any unauthorized use of the Licensed Software, Licensor may terminate this Agreement immediately. Licensee may terminate this Agreement at any time upon written notice to Licensor. Upon termination, the license granted hereunder will terminate and Licensee will immediately cease using the Licensed Software and destroy all copies of the Licensed Software in its possession. Licensee will certify in writing that it has complied with the foregoing obligation. 29 | 30 | 5. MISCELLANEOUS 31 | 32 | 5.1 Future Updates. Use of the Licensed Software under this Agreement is subject to the terms and conditions contained herein. New or updated software may require additional or revised terms of use. Licensor will provide notice of and make available to Licensee any such revised terms. 33 | 34 | 5.2 Entire Agreement. This Agreement, including any Attachments hereto, constitutes the sole and entire agreement between the parties as to the subject matter set forth herein and supersedes are previous license agreements, understandings, or arrangements between the parties relating to such subject matter. 35 | 36 | 5.2 Governing Law. This Agreement shall be construed, governed, interpreted and applied in accordance with the internal laws of the State of Maine, U.S.A., without regard to conflict of laws principles. The parties agree that any disputes between them may be heard only in the state or federal courts in the State of Maine, and the parties hereby consent to venue and jurisdiction in those courts. 37 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/esthmm.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** Author: Tapas Kanungo, kanungo@cfar.umd.edu 3 | ** Date: 22 February 1988 4 | ** File: esthmm.c 5 | ** Purpose: estimate HMM parameters from observation. 6 | ** Organization: University of Maryland 7 | ** 8 | ** $Id: esthmm.c,v 1.1 1998/02/23 07:49:45 kanungo Exp kanungo $ 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include "nrutil.h" 16 | #include "hmm.h" 17 | #include 18 | #include 19 | 20 | 21 | static char rcsid[] = "$Id: esthmm.c,v 1.1 1998/02/23 07:49:45 kanungo Exp kanungo $"; 22 | 23 | void Usage(char *name); 24 | 25 | int main (int argc, char **argv) 26 | { 27 | int T; 28 | HMM hmm; 29 | int N; 30 | int M; 31 | double **alpha; 32 | double **beta; 33 | double **gamma; 34 | int *O; 35 | int iflg=0, sflg=0, nflg=0, mflg=0, errflg =0, vflg=0; 36 | int c; 37 | int seed; /* seed for random number generator */ 38 | char *hmminitfile; 39 | int niter; 40 | double logprobinit, logprobfinal; 41 | FILE *fp; 42 | extern char *optarg; 43 | extern int optind, opterr, optopt; 44 | 45 | 46 | while ((c= getopt(argc, argv, "vhI:S:N:M:")) != EOF) 47 | switch (c) { 48 | case 'v': 49 | vflg++; 50 | break; 51 | case 'h': 52 | Usage(argv[0]); 53 | exit(1); 54 | break; 55 | case 'S': 56 | /* set random number generator seed */ 57 | if (sflg) 58 | errflg++; 59 | else { 60 | sflg++; 61 | sscanf(optarg, "%d", &seed); 62 | } 63 | break; 64 | case 'N': 65 | /* set random number generator seed */ 66 | if (nflg) 67 | errflg++; 68 | else { 69 | nflg++; 70 | sscanf(optarg, "%d", &N); 71 | } 72 | break; 73 | case 'M': 74 | /* set random number generator seed */ 75 | if (mflg) 76 | errflg++; 77 | else { 78 | mflg++; 79 | sscanf(optarg, "%d", &M); 80 | } 81 | break; 82 | case 'I': 83 | /* set random number generator seed */ 84 | if (iflg) 85 | errflg++; 86 | else { 87 | iflg++; 88 | hmminitfile = optarg; 89 | } 90 | break; 91 | case '?': 92 | errflg++; 93 | } 94 | 95 | /* you can initialize the hmm model three ways: 96 | i) with a model stored in a file, which also sets 97 | the number of states N and number of symbols M. 98 | ii) with a random model by just specifyin N and M 99 | on the command line. 100 | iii) with a specific random model by specifying N, M 101 | and seed on the command line. 102 | */ 103 | 104 | if (iflg) { 105 | /* model being read from a file */ 106 | if (((sflg || nflg) || mflg)) errflg++; 107 | } 108 | else if ((!nflg) || (!mflg)) { 109 | /* Model not being intialied from file */ 110 | /* both N and M should be specified */ 111 | errflg++; 112 | } 113 | 114 | 115 | if ((argc - optind) != 1) errflg++; /* number or arguments not okay */ 116 | 117 | 118 | 119 | if (errflg) { 120 | Usage(argv[0]); 121 | exit (1); 122 | } 123 | 124 | /* read the observed sequence */ 125 | fp = fopen(argv[optind], "r"); 126 | if (fp == NULL) { 127 | fprintf(stderr, "Error: File %s not found \n", argv[optind]); 128 | exit (1); 129 | } 130 | ReadSequence(fp, &T, &O); 131 | fclose(fp); 132 | 133 | 134 | /* initialize the hmm model */ 135 | if (iflg) { 136 | fp = fopen(hmminitfile, "r"); 137 | if (fp == NULL) { 138 | fprintf(stderr, "Error: File %s not found \n", 139 | hmminitfile); 140 | exit (1); 141 | } 142 | ReadHMM(fp, &hmm); 143 | fclose(fp); 144 | } 145 | else if (sflg) 146 | InitHMM(&hmm, N, M, seed); 147 | else { 148 | seed = hmmgetseed(); 149 | InitHMM(&hmm, N, M, seed); 150 | } 151 | 152 | /* allocate memory */ 153 | alpha = dmatrix(1, T, 1, hmm.N); 154 | beta = dmatrix(1, T, 1, hmm.N); 155 | gamma = dmatrix(1, T, 1, hmm.N); 156 | 157 | /* call Baum Welch */ 158 | BaumWelch(&hmm, T, O, alpha, beta, gamma, &niter, 159 | &logprobinit, &logprobfinal); 160 | 161 | if (vflg) { 162 | if (sflg) fprintf(stderr, "RandomSeed: %d\n", seed); 163 | fprintf(stderr, "Number of iterations: %d\n", niter); 164 | fprintf(stderr, "Log Prob(observation | init model): %E\n", 165 | logprobinit); 166 | fprintf(stderr, "Log Prob(observation | estimated model): %E\n", 167 | logprobfinal); 168 | } 169 | 170 | 171 | /* print the answer */ 172 | PrintHMM(stdout, &hmm); 173 | 174 | /* free memory */ 175 | free_ivector(O, 1, T); 176 | free_dmatrix(alpha, 1, T, 1, hmm.N); 177 | free_dmatrix(beta, 1, T, 1, hmm.N); 178 | free_dmatrix(gamma, 1, T, 1, hmm.N); 179 | FreeHMM(&hmm); 180 | } 181 | 182 | void Usage(char *name) 183 | { 184 | printf("Usage error. \n"); 185 | printf("Usage1: %s [-v] -N -M \n", 186 | name); 187 | printf("Usage2: %s [-v] -S -N -M \n", 188 | name); 189 | printf("Usage3: %s [-v] -I \n", 190 | name); 191 | printf(" N - number of states\n"); 192 | printf(" M - number of symbols\n"); 193 | printf(" S - seed for random number genrator\n"); 194 | printf(" I - mod.hmm is a file with the initial model parameters\n"); 195 | printf(" file.seq - file containing the obs. seqence\n"); 196 | printf(" v - prints out number of iterations and log prob\n"); 197 | } 198 | -------------------------------------------------------------------------------- /include/GetCnvSignal.h: -------------------------------------------------------------------------------- 1 | #ifndef _GETCNVSIGNAL_H_ 2 | #define _GETCNVSIGNAL_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | struct SGetCnvSignalCml { 11 | SGetCnvSignalCml(){} 12 | SGetCnvSignalCml(const int argc, char** const argv){Parse(argc, argv);} 13 | 14 | bool help = false; 15 | 16 | // i/o 17 | std::string kmer_table; // -k --kmer 18 | std::string fasta; // -f --fasta 19 | std::string bam; // -b --bam 20 | std::string output; // -o --output 21 | 22 | // operation parameters 23 | int coverage = 0; // -c --coverage 24 | std::string region; // -r --region 25 | uint8_t aln_qual = 40; // -q --aln_qual 26 | unsigned int minimum_report_size = 5000; // -m --minimum_report_size 27 | int bin = 50; // --bin 28 | std::string log; // --log 29 | float unique_kmer = 0.6; // --unique_kmer 30 | float kmer_score = 0.5; // --kmer_score 31 | 32 | // command line 33 | std::string cmd; 34 | 35 | const char* short_option = "hb:k:f:o:c:r:q:m:"; 36 | 37 | // Help list 38 | const std::string Help (const char* program) const { return 39 | std::string("USAGE: ") + program + std::string(" -f -k -b \n\n") + 40 | std::string(" -h --help Print this help list.\n") + 41 | std::string("\n") + 42 | std::string("Input & Output:\n") + 43 | std::string(" -b --bam Input BAM; required.\n") + 44 | std::string(" -k --kmer Kmer table.\n") + 45 | std::string(" -f --fasta FASTA for kmer lookup.\n") + 46 | std::string(" -o --output Output file.\n") + 47 | std::string("\n") + 48 | std::string("Operations:\n") + 49 | std::string(" -c --coverage The expected coverage.\n") + 50 | std::string(" -r --region chr:begin-end A target region.\n") + 51 | std::string(" -q --aln_qual A mapping quality filter for alignments. [40]\n") + 52 | std::string(" -m --minimum_report_size The minimum report SV size. [45K]\n") + 53 | std::string(" --bin Report a result for each # bp. [50]\n") + 54 | std::string(" --log Log output.\n" + 55 | std::string(" --unique_kmer Require percentage of unique kmer to report a CNV. [0.6]\n") + 56 | std::string(" --kmer_score Score for log2(kmer count) = 2 positions. [0.5]\n")); 57 | } 58 | 59 | // Check the required arguments. 60 | bool CheckArg () const { 61 | bool ok = true; 62 | if (bin < 1) { 63 | std::cerr << "ERROR: --bin should not smaller than 1." << std::endl; 64 | ok = false; 65 | } 66 | if (bam.empty()) { 67 | std::cerr << "ERROR: -b is required." << std::endl; 68 | ok = false; 69 | } else if (access(bam.c_str(), F_OK) == -1) { 70 | std::cerr << "ERROR: Cannot open " << bam << std::endl; 71 | ok = false; 72 | } 73 | 74 | if (kmer_table.empty()) { 75 | std::cerr << "ERROR: -k is required." << std::endl; 76 | ok = false; 77 | } else if (access(kmer_table.c_str(), F_OK) == -1) { 78 | std::cerr << "ERROR: Cannot open " << kmer_table << std::endl; 79 | ok = false; 80 | } 81 | 82 | if (fasta.empty()) { 83 | std::cerr << "ERROR: -f is required." << std::endl; 84 | ok = false; 85 | } else if (access(fasta.c_str(), F_OK) == -1) { 86 | std::cerr << "ERROR: Cannot open " << fasta << std::endl; 87 | ok = false; 88 | } 89 | 90 | if (unique_kmer > 1) { 91 | std::cerr << "ERROR: --unique_kmer should not larger than 1." << std::endl; 92 | ok = false; 93 | } 94 | if (kmer_score > 1) { 95 | std::cerr << "ERROR: --kmer_score should not larger than 1." << std::endl; 96 | ok = false; 97 | } 98 | 99 | return ok; 100 | } 101 | 102 | bool Parse (const int argc, char** const argv) { 103 | // Record the input command line. 104 | for (int i = 0; i < argc; i++) cmd += std::string(argv[i]) + " "; 105 | const struct option long_option[] = { 106 | {"help", required_argument, NULL, 'h'}, 107 | // i/o 108 | {"bam", required_argument, NULL, 'b'}, 109 | {"kmer", required_argument, NULL, 'k'}, 110 | {"fasta", required_argument, NULL, 'f'}, 111 | {"output", required_argument, NULL, 'o'}, 112 | 113 | // operation parameters 114 | {"coverage", required_argument, NULL, 'c'}, 115 | {"region", required_argument, NULL, 'r'}, 116 | {"aln_qual", required_argument, NULL, 'q'}, 117 | {"minimum_report_size", required_argument, NULL, 'm'}, 118 | {"bin", required_argument, NULL, 2}, 119 | {"log", required_argument, NULL, 3}, 120 | {"unique_kmer", required_argument, NULL, 4}, 121 | {"kmer_score", required_argument, NULL, 5}, 122 | {0,0,0,0} 123 | }; 124 | int option_index = 0; 125 | int c = -1; 126 | bool error = false; 127 | while ((c = getopt_long(argc, argv, short_option, long_option, &option_index)) != -1) { 128 | std::string tmp; 129 | switch (c) { 130 | case 'h': help = true; break; 131 | case 'k': kmer_table = optarg; break; 132 | case 'f': fasta = optarg; break; 133 | case 'b': bam = optarg; break; 134 | case 'o': output = optarg; break; 135 | case 'c': coverage = atoi(optarg); break; 136 | case 'r': region = optarg; break; 137 | case 'q': aln_qual = atoi(optarg); break; 138 | case 'm': 139 | tmp = optarg; 140 | if (std::all_of(tmp.begin(), tmp.end(), ::isdigit)) { // all digits 141 | minimum_report_size = atoi(optarg); 142 | } else { 143 | if ((tmp.back() == 'K' || tmp.back() == 'k') && (std::all_of(tmp.begin(), tmp.end() - 1, ::isdigit))) { 144 | minimum_report_size = atoi(tmp.substr(0, tmp.size()-1).c_str()) * 1000; 145 | } else if ((tmp.back() == 'M' || tmp.back() == 'm') && (std::all_of(tmp.begin(), tmp.end() - 1, ::isdigit))) { 146 | minimum_report_size = atoi(tmp.substr(0, tmp.size()-1).c_str()) * 1000000; 147 | } else { 148 | std::cerr << "Error: Cannot parse " << tmp << ". Please use all digits, or K or M as suffix." << std::endl; 149 | error = true; 150 | } 151 | } 152 | break; 153 | case 2: bin = atoi(optarg); break; 154 | case 3: log = optarg; break; 155 | case 4: unique_kmer = atof(optarg); break; 156 | case 5: kmer_score = atof(optarg); break; 157 | default: std::cerr << "WARNING: Unkonw parameter: " << long_option[option_index].name << std::endl; break; 158 | } 159 | } 160 | 161 | if (help) { 162 | Help(argv[0]); 163 | return false; 164 | } 165 | 166 | return error && CheckArg(); 167 | } 168 | }; // SGetCnvSignalCml 169 | 170 | class GetCnvSignal { 171 | public: 172 | // Constructors 173 | GetCnvSignal(); 174 | GetCnvSignal(int argc, char** argv); 175 | 176 | // The function will report kmer count according to the parameter setting. 177 | // Return: 0 is successful. 178 | int Run() const; 179 | 180 | // If files are not assinged when declaring the class, you may use the function to assign them. 181 | //void SetParameters(const SGetCnvSignalCml & cml ); 182 | 183 | private: 184 | SGetCnvSignalCml cmdline; 185 | GetCnvSignal(const GetCnvSignal&); 186 | GetCnvSignal& operator= (const GetCnvSignal&); 187 | }; 188 | #endif // _GETCNVSIGNAL_H_ 189 | -------------------------------------------------------------------------------- /src/GrabJellyfishKmer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | // Self include 7 | #include "GrabJellyfishKmer.h" 8 | 9 | // FASTAQ include 10 | #include "fastaq/fasta.h" 11 | #include "fastaq/reference.h" 12 | #include "fastaq/region.h" 13 | 14 | // Jellyfish include 15 | #include "jellyfish/file_header.hpp" 16 | #include "jellyfish/jellyfish.hpp" 17 | #include "jellyfish/mapped_file.hpp" 18 | 19 | namespace { 20 | 21 | // Calculate log2 score. 22 | char CeilLog2 (const uint64_t in) 23 | { 24 | uint64_t x = in; 25 | static const unsigned long long t[6] = { 26 | 0xFFFFFFFF00000000ull, 27 | 0x00000000FFFF0000ull, 28 | 0x000000000000FF00ull, 29 | 0x00000000000000F0ull, 30 | 0x000000000000000Cull, 31 | 0x0000000000000002ull 32 | }; 33 | 34 | //int y = (((x & (x - 1)) == 0) ? 0 : 1); 35 | int y = (x > 0) ? 1 : 0; 36 | 37 | for (int i = 0, j = 32; i < 6; i++) { 38 | int k = (((x & t[i]) == 0) ? 0 : j); 39 | y += k; 40 | x >>= k; 41 | j >>= 1; 42 | } 43 | 44 | // Use 125 as the max. 45 | return (y > 92) ? 125 : y + 33; 46 | } 47 | void GetKmerCount (const Fastaq::CReference & ref, const Fastaq::SRegion & region, const unsigned int & kmer_size, 48 | const jellyfish::file_header & header, const binary_query & bq, const bool running_length_encoding, 49 | const bool ascii, const int & bin, const bool contig) { 50 | 51 | 52 | std::vector ref_names; 53 | ref.GetReferenceNames(&ref_names); 54 | 55 | // If a region is given, there will be only one reference in the vector. 56 | for (unsigned int i = 0; i < ref_names.size(); i++) { 57 | const unsigned int target_end = (!region.chr.empty() && region.end > 0) // Use region.end when we set region 58 | ? std::min(ref.GetReferenceLength(ref_names[i].c_str()), region.end + kmer_size) 59 | : ref.GetReferenceLength(ref_names[i].c_str()); 60 | const unsigned int target_begin = !region.chr.empty() ? std::max(static_cast(0), region.begin) : 0; 61 | 62 | #ifdef DEBUG 63 | std::cerr << "GetKmerCount:" << std::endl; 64 | std::cerr << "\tProcessing region " << ref_names[i] << ":" << target_begin << "-" << target_end << std::endl; 65 | #endif 66 | // Cannot proceed when target_len < kmer_size 67 | if ((target_end - target_begin) < kmer_size) break; 68 | 69 | unsigned int score_count = 0; 70 | unsigned int score_sum = 0; 71 | char score = '\0'; 72 | std::cout << ">" << ref_names[i] << std::endl; 73 | for (unsigned int j = target_begin; j < target_end - kmer_size; ++j) { 74 | jellyfish::mer_dna m; 75 | // TODO: GetSubString is slow. 76 | m = ref.GetSubString(ref_names[i], j, kmer_size).c_str(); 77 | if (header.canonical()) m.canonicalize(); 78 | if (contig) { // Report an average count for each contig 79 | ++score_count; 80 | score_sum += bq.check(m); 81 | } else { 82 | if (running_length_encoding) { // running_length_encoding must use ascii for reporting. 83 | if (CeilLog2(bq.check(m)) == score) { 84 | ++score_count; 85 | } else { 86 | if (score != '\0') 87 | std::cout << score << "\t" << score_count; 88 | score_count = 1; 89 | score = CeilLog2(bq.check(m)); 90 | } 91 | } else { // not running_length_encoding 92 | score_sum += bq.check(m); 93 | ++score_count; 94 | if (((j-target_begin + 1) % bin) == 0) { 95 | if (ascii) 96 | std::cout << static_cast(CeilLog2((std::lround(score_sum / static_cast(score_count))))); 97 | else 98 | std::cout << score_sum / static_cast(score_count) << std::endl; 99 | score_sum = 0; 100 | score_count = 0; 101 | } 102 | } 103 | } // end of if contig 104 | } 105 | // Output the last score. Only running_length_encoding mode will use this. 106 | if (running_length_encoding) { 107 | if (score_count > 0) 108 | std::cout << score << "\t" << score_count; 109 | std::cout << std::endl; 110 | } else { // not running_length_encoding 111 | if (score_count > 0) { 112 | if (contig) // Report the chromosome name 113 | std::cout << ref_names[i] << "\t"; 114 | 115 | if (ascii) 116 | std::cout << static_cast(CeilLog2((std::lround(score_sum / static_cast(score_count))))); 117 | else 118 | std::cout << score_sum / static_cast(score_count) << std::endl; 119 | } 120 | if (ascii) 121 | std::cout << std::endl; 122 | } 123 | } // end of the chromosome 124 | } 125 | } // namespace 126 | 127 | int GrabJellyfishKmer::Run () const { 128 | if (!cmdline.CheckArg()) { 129 | std::cerr << cmdline.Help("GrabJellyfishKmer"); 130 | return 1; 131 | } 132 | 133 | // Read jellyfish database 134 | std::ifstream db(cmdline.input_jfdb, std::ios::in|std::ios::binary); 135 | jellyfish::file_header header(db); 136 | if(!db.good()) { // The jellyfish database is broken. 137 | std::cerr << "ERROR: Cannot open " << cmdline.input_jfdb << std::endl; 138 | return 1; 139 | } 140 | if (header.format() != binary_dumper::format) { 141 | std::cerr << "ERROR: Cannot process jellyfish database built by bloom filter." << std::endl; 142 | return 1; 143 | } 144 | 145 | // Read kmer size from the header of jellyfish database 146 | const unsigned int kmer_size = header.key_len() / 2; // The kmer size is key_len() / 2. 147 | jellyfish::mer_dna::k(kmer_size); 148 | if (kmer_size < 1) { // Cannot proceed if kmer size is not larger than zero. 149 | std::cerr << "ERROR: The kmer size (" << kmer_size << ") should be larger than 0." << std::endl; 150 | return 1; 151 | } 152 | 153 | // Load jellyfish database as query db. 154 | jellyfish::mapped_file binary_map(cmdline.input_jfdb.c_str()); 155 | if (!cmdline.region.empty()) // If there is no given region, load in memory for speedup. 156 | binary_map.load(); 157 | binary_query bq(binary_map.base() + header.offset(), header.key_len(), header.counter_len(), header.matrix(), 158 | header.size() - 1, binary_map.length() - header.offset()); 159 | 160 | // Parse region. 161 | Fastaq::SRegion region; 162 | if (!cmdline.region.empty()) { 163 | if (!region.Parse(cmdline.region)) { 164 | std::cerr << "ERROR: The given region is not valid." << std::endl; 165 | return 1; 166 | } 167 | } 168 | 169 | // Load reference from FASTA. 170 | Fastaq::CReference ref; // fastaq lib. 171 | if (!cmdline.region.empty()) { 172 | if (!Fastaq::FastaLoad(ref, cmdline.fasta.c_str(), true, region.chr.c_str())) { 173 | std::cerr << "ERROR: Cannot load chromosome " << region.chr << " from FASTA." << std::endl; 174 | return 1; 175 | } 176 | } else { 177 | if (!Fastaq::FastaLoad(ref, cmdline.fasta.c_str())) { 178 | std::cerr << "ERROR: Cannot load FASTA." << std::endl; 179 | return 1; 180 | } 181 | } 182 | 183 | // Open output if given. 184 | std::ofstream ofs; 185 | std::streambuf *coutbuf = std::cout.rdbuf(); //save old buf 186 | if (!cmdline.output.empty()) { 187 | ofs.open(cmdline.output, std::ofstream::out); 188 | std::cout.rdbuf(ofs.rdbuf()); //redirect std::cout to file; 189 | } 190 | 191 | GetKmerCount(ref, region, kmer_size, header, bq, cmdline.rle, cmdline.ascii, cmdline.bin, cmdline.contig); 192 | 193 | // Clean up 194 | if (!cmdline.output.empty()) { 195 | std::cout.rdbuf(coutbuf); //reset to standard output again 196 | ofs.close(); 197 | } 198 | 199 | db.close(); 200 | 201 | return 0; 202 | 203 | } 204 | 205 | GrabJellyfishKmer::GrabJellyfishKmer() 206 | { 207 | } 208 | 209 | GrabJellyfishKmer::GrabJellyfishKmer(int argc, char** argv) 210 | : cmdline(argc, argv) 211 | { 212 | } 213 | 214 | GrabJellyfishKmer::GrabJellyfishKmer( 215 | const char * pInput_jfdb, const char * pInput_fasta, const char * pOutput, 216 | const char * pRegion, const int input_bin, const bool input_ascii, const bool input_rle, const bool input_contig) 217 | { 218 | SetParameters(pInput_jfdb, pInput_fasta, pOutput, pRegion, input_bin, input_ascii, input_rle); 219 | } 220 | 221 | void GrabJellyfishKmer::SetParameters(const SGrabJellyfishKmerCml & cml) { 222 | cmdline = cml; 223 | } 224 | 225 | void GrabJellyfishKmer::SetParameters( 226 | const char * pInput_jfdb, const char * pInput_fasta, const char * pOutput, 227 | const char * pRegion, const int input_bin, const bool input_ascii, const bool input_rle, const bool input_contig) { 228 | 229 | cmdline.input_jfdb = pInput_jfdb; 230 | cmdline.fasta = pInput_fasta; 231 | if (pOutput) cmdline.output = pOutput; 232 | if (pRegion) cmdline.region = pRegion; 233 | cmdline.bin = input_bin; 234 | cmdline.ascii = input_ascii; 235 | cmdline.rle = input_rle; 236 | 237 | } 238 | -------------------------------------------------------------------------------- /src/EstimateCoverage.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | // htslib include 7 | #include "htslib/sam.h" 8 | #include "fastaq/fasta.h" 9 | #include "EstimateCoverage.h" 10 | 11 | namespace { 12 | bool LoadKmer(std::string & chr_name_prefix, std::string & kmer_seq, const char * kmer_table, const std::string & chr_name) { 13 | bool load_chr = false; 14 | const bool quiet = true; 15 | if (chr_name_prefix.empty()) { 16 | if (Fastaq::FastaLoad(kmer_seq, kmer_table, false, chr_name.c_str(), quiet)) { 17 | load_chr = true; 18 | } else if (Fastaq::FastaLoad(kmer_seq, kmer_table, false, ("chr" + chr_name).c_str()), quiet) { 19 | load_chr = true; 20 | chr_name_prefix = "chr"; 21 | } else { 22 | std::cerr << "WARNING: Cannot load kmer seqeunces " << chr_name << " from " << kmer_table << std::endl; 23 | load_chr = false; 24 | } 25 | } else { 26 | if (Fastaq::FastaLoad(kmer_seq, kmer_table, false, (chr_name_prefix + chr_name).c_str()), quiet) { 27 | load_chr = true; 28 | } else { 29 | std::cerr << "WARNING: Cannot load kmer seqeunces " << chr_name << " from " << kmer_table << std::endl; 30 | load_chr = false; 31 | } 32 | } 33 | 34 | return load_chr; 35 | } 36 | void CalculateChrCoverage(std::vector & coverages, std::string & chr_name_prefix, 37 | samFile * bam_reader, bam_hdr_t *header, hts_idx_t * idx, 38 | const char * kmer_table, const char * char_chr_name, const int & min_region_size) { 39 | const std::string chr_name = char_chr_name; 40 | std::string kmer_seq; 41 | const bool load_kmer = LoadKmer(chr_name_prefix, kmer_seq, kmer_table, chr_name); 42 | 43 | size_t pos = 0; 44 | 45 | coverages.clear(); 46 | 47 | while (pos != std::string::npos) { 48 | pos = kmer_seq.find('"', pos); 49 | if (pos != std::string::npos) { 50 | int length = 0; 51 | while(kmer_seq[pos] == '"') { 52 | ++pos; 53 | ++length; 54 | } 55 | 56 | if (length > min_region_size) { 57 | const std::string cat_region = chr_name_prefix + chr_name + ":" + std::to_string(pos - length) + '-' + std::to_string(pos); 58 | hts_itr_t * ite = sam_itr_querys(idx, header, cat_region.c_str()); 59 | bam1_t * aln = bam_init1(); 60 | unsigned int base_count = 0; 61 | while (ite && sam_itr_next(bam_reader, ite, aln) >= 0) { 62 | const bool bad_al = aln->core.flag & BAM_FUNMAP || aln->core.flag & BAM_FSECONDARY || aln->core.flag & BAM_FQCFAIL 63 | || aln->core.flag & BAM_FDUP || aln->core.flag & BAM_FSUPPLEMENTARY; 64 | if (!bad_al) { 65 | const uint32_t* pCigar = bam_get_cigar(aln); 66 | for (uint32_t i = 0; i < aln->core.n_cigar; ++i) { 67 | const uint32_t op = bam_cigar_op(*(pCigar + i)); 68 | if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) 69 | base_count += bam_cigar_oplen(*(pCigar + i)); 70 | } 71 | } 72 | } 73 | //std::cerr << chr_name_prefix + cat_region << "\t" << base_count / static_cast(length) << std::endl; 74 | hts_itr_destroy(ite); 75 | bam_destroy1(aln); 76 | coverages.push_back(base_count / static_cast(length)); 77 | } 78 | 79 | } 80 | } 81 | } 82 | 83 | bool CoverageSort(const std::pair & t1, const std::pair & t2) { 84 | return t1.second < t2.second; 85 | } 86 | 87 | void CalculateCoverage(int & all_chr_cov, std::vector & aneuploidies, 88 | const std::vector & coverages, unsigned int begin_chr_id, unsigned int end_chr_id) { 89 | 90 | if (end_chr_id > coverages.size() - 1) end_chr_id = coverages.size() - 1; 91 | if (begin_chr_id > end_chr_id) return; // Invalid ids. 92 | if (end_chr_id - begin_chr_id < 3) return; // Not enough element for IQR calculation. 93 | 94 | std::vector > covs; // id and coverages 95 | for (unsigned int i = begin_chr_id; i <= end_chr_id; ++i) { 96 | covs.push_back(std::pair(begin_chr_id + i, coverages[i])); 97 | } 98 | std::sort(covs.begin(), covs.end(), CoverageSort); 99 | 100 | const unsigned int Q2_id = (end_chr_id - begin_chr_id + 1) / 2; 101 | const unsigned int Q1_id = (end_chr_id - begin_chr_id + 1) / 4; 102 | const unsigned int Q3_id = Q2_id + (Q1_id == 0 ? 1 : Q1_id); 103 | const float IQR = covs[Q3_id].second - covs[Q1_id].second; 104 | 105 | // Based on IQR, we caluculate coverage. 106 | float total_coverage = 0.0; 107 | int total_coverage_count = 0; 108 | for (std::vector >::const_iterator ite = covs.begin(); ite != covs.end(); ++ite) { 109 | //std::cout << "chr:" << ite->first << "\t" << ite->second << std::endl; 110 | if (ite->second > covs[Q1_id].second && ite->second < covs[Q3_id].second) { 111 | total_coverage += ite->second; 112 | ++total_coverage_count; 113 | } 114 | } 115 | all_chr_cov = std::round(total_coverage / static_cast(total_coverage_count)); 116 | 117 | // Based on the coverage, we detect aneuploidies 118 | aneuploidies.clear(); 119 | for (std::vector >::const_iterator ite = covs.begin(); ite != covs.end(); ++ite) { 120 | if (ite->second < (all_chr_cov * 0.6) || ite->second > (all_chr_cov * 1.4)) 121 | aneuploidies.push_back(ite->first); 122 | } 123 | std::sort(aneuploidies.begin(), aneuploidies.end()); 124 | } 125 | 126 | void DetermineGender(const int & all_chr_cov, bool & female, bool & male, const float & cov_x, const float & cov_y) { 127 | female = false; 128 | if (all_chr_cov / cov_x > 1.75 && all_chr_cov / cov_x < 2.25) 129 | female = true; 130 | 131 | male = false; 132 | if (((all_chr_cov / 2) / cov_y > 1.75) && ((all_chr_cov / 2) / cov_x < 2.25)) 133 | male = true; 134 | 135 | } 136 | } 137 | 138 | namespace EstimateCoverage { 139 | int EstimateCoverage(std::vector & coverages, bool & female, bool & male, 140 | const char * bam_filename, const char * kmer_table) { 141 | 142 | std::string chr_name_prefix; 143 | 144 | samFile * bam_reader = sam_open(bam_filename, "r"); 145 | bam_hdr_t *header; 146 | header = sam_hdr_read(bam_reader); 147 | hts_idx_t * idx = sam_index_load(bam_reader, bam_filename); 148 | 149 | coverages.resize(Human::HumanAutosomeSize + Human::HumanAllosomeSize, 0); 150 | 151 | for (int i = 0; i < Human::HumanAutosomeSize; ++i) { 152 | std::vector chr_cov; 153 | int min_region_size = 20000; 154 | while (chr_cov.size() < 10 && min_region_size > 2000) { 155 | min_region_size = min_region_size >> 1; 156 | chr_cov.clear(); 157 | CalculateChrCoverage(chr_cov, chr_name_prefix, bam_reader, header, idx, kmer_table, Human::HumanAutosome[i], min_region_size); 158 | } 159 | float cov_chr_total = 0.0; 160 | for (std::vector::const_iterator cov_ite = chr_cov.begin(); cov_ite != chr_cov.end(); ++cov_ite) 161 | cov_chr_total += *cov_ite; 162 | coverages[i] = cov_chr_total / static_cast(chr_cov.size()); 163 | #ifdef DEBUG 164 | std::cerr << Human::HumanAutosome[i] << "\t" << coverages[i] << std::endl; 165 | #endif 166 | } 167 | 168 | for (int i = 0; i < Human::HumanAllosomeSize; ++i) { 169 | std::vector chr_cov; 170 | int min_region_size = 20000; 171 | while (chr_cov.size() < 10 && min_region_size > 2000) { 172 | min_region_size = min_region_size >> 1; 173 | chr_cov.clear(); 174 | CalculateChrCoverage(chr_cov, chr_name_prefix, bam_reader, header, idx, kmer_table, Human::HumanAllosome[i], min_region_size); 175 | } 176 | float cov_chr_total = 0.0; 177 | for (std::vector::const_iterator cov_ite = chr_cov.begin(); cov_ite != chr_cov.end(); ++cov_ite) 178 | cov_chr_total += *cov_ite; 179 | coverages[i + Human::HumanAutosomeSize] = cov_chr_total / static_cast(chr_cov.size()); 180 | #ifdef DEBUG 181 | std::cerr << Human::HumanAllosome[i] << "\t" << coverages[i + Human::HumanAutosomeSize] << std::endl; 182 | #endif 183 | } 184 | 185 | int all_chr_cov = 0; 186 | std::vector aneuploidies; 187 | // Calculate IQR and then coverage as well as detect aneuploidies. 188 | CalculateCoverage(all_chr_cov, aneuploidies, coverages, 0, Human::HumanAutosomeSize - 1); 189 | if (aneuploidies.size() > 0) std::cerr << "Aneuploidies:"; 190 | for (std::vector::const_iterator ite = aneuploidies.begin(); ite != aneuploidies.end(); ++ite) 191 | std::cerr << "\t" << Human::HumanAutosome[*ite]; 192 | if (aneuploidies.size() > 0) std::cerr << std::endl; 193 | 194 | DetermineGender(all_chr_cov, female, male, coverages[Human::HumanAutosomeSize], coverages[Human::HumanAutosomeSize + 1]); 195 | // Clean up 196 | bam_hdr_destroy(header); 197 | sam_close(bam_reader); 198 | 199 | //return std::round(all_chr_cov / static_cast(Human::HumanAutosomeSize)); 200 | return all_chr_cov; 201 | } 202 | } //namespace EstimateCoverage 203 | -------------------------------------------------------------------------------- /JaxCNVMerge.R: -------------------------------------------------------------------------------- 1 | ########################################################################################## 2 | ################################ function defination begin ############################### 3 | is.installed <- function(requirePackage){ 4 | is.element(requirePackage, installed.packages()[, 1]) 5 | } 6 | 7 | dbscanMerge <- function(oneTypeData){ 8 | if(nrow(oneTypeData) == 1){ 9 | return(oneTypeData) 10 | } 11 | if(nrow(oneTypeData) == 2){ 12 | return(twoRegionMerge(oneTypeData)) 13 | } 14 | dataNum <- nrow(oneTypeData) 15 | mergedResults <- data.frame(chr = "", start = 0, end = 0, type = "", subtype = "") 16 | theLengths <- oneTypeData$end - oneTypeData$start 17 | theLength <- mean(oneTypeData$end - oneTypeData$start) 18 | theMeanDensity <- (oneTypeData$end[dataNum] - oneTypeData$start[1]) / sum(theLengths) 19 | theDist <- matrix(theMeanDensity + 1, nrow = dataNum, ncol = dataNum) 20 | theDistNeigh <- oneTypeData$start[2 : dataNum] - oneTypeData$end[1 : (dataNum - 1)] 21 | for(i in 1 : (dataNum - 1)){ 22 | theDist[i, i] <- 0 23 | theDist[i, i + 1] <- (theLengths[i] + theLengths[i + 1] + theDistNeigh[i]) / (theLengths[i] + theLengths[i + 1]) 24 | theDist[i + 1, i] <- theDist[i, i + 1] 25 | } 26 | theDist[dataNum, dataNum] <- 0 27 | theDist[which(theDist > 3, 2)] <- theMeanDensity + 1 28 | theDistUsed <- as.dist(theDist) 29 | theRes <- dbscan(theDistUsed, minPts = 2, eps = theMeanDensity) 30 | 31 | oneCluData <- oneTypeData[which(theRes$cluster == 0), ] 32 | mergedResults <- rbind(mergedResults, oneCluData) 33 | theCluster <- unique(theRes$cluster[which(theRes$cluster != 0)]) 34 | if(length(theCluster) > 0){ 35 | for(clu in theCluster){ 36 | oneCluData <- oneTypeData[which(theRes$cluster == clu), ] 37 | thestart = min(oneCluData$start) 38 | theend = max(oneCluData$end) 39 | mergedResults <- rbind(mergedResults, data.frame(chr = oneTypeData$chr[1], start = thestart, end = theend, 40 | type = oneTypeData$type[1], subtype = oneTypeData$subtype[1], 41 | stringsAsFactors = F)) 42 | } 43 | } 44 | mergedResults <- mergedResults[-1, ] 45 | return(mergedResults) 46 | } 47 | 48 | bedRegionMerge <- function(oneTypeData){ 49 | # get the distance of each pair 50 | oneTypeData <- oneTypeData[order(oneTypeData$start), ] 51 | if(nrow(oneTypeData) == 1){ 52 | return(oneTypeData) 53 | } 54 | dataNum <- nrow(oneTypeData) 55 | mergedResults <- data.frame(chr = "", start = 0, end = 0, type = "", subtype = "") 56 | theDistNeigh <- oneTypeData$start[2 : dataNum] - oneTypeData$end[1 : (dataNum - 1)] 57 | 58 | ## seperate the data into different part by dist = DistCannotMerge, 59 | ## then in each part, using the dbscan to merge, threshold is meandensity 60 | theIndexLargerCannotMerge <- which(theDistNeigh > DistCannotMerge) 61 | if(length(theIndexLargerCannotMerge) == 0){ 62 | mergedResults <- rbind(mergedResults, dbscanMerge(oneTypeData)) 63 | mergedResults <- mergedResults[-1, ] 64 | return(mergedResults) 65 | } 66 | firstPartIndex <- 1 : theIndexLargerCannotMerge[1] 67 | firstPartData <- oneTypeData[firstPartIndex, ] 68 | mergedResults <- rbind(mergedResults, dbscanMerge(firstPartData)) 69 | for(j in 2 : length(theIndexLargerCannotMerge)){ 70 | if(j > length(theIndexLargerCannotMerge)){ 71 | break() 72 | } 73 | partIndex <- (theIndexLargerCannotMerge[j - 1] + 1) : theIndexLargerCannotMerge[j] 74 | partData <- oneTypeData[partIndex, ] 75 | mergedResults <- rbind(mergedResults, dbscanMerge(partData)) 76 | } 77 | lastPartIndex <- (theIndexLargerCannotMerge[length(theIndexLargerCannotMerge)] + 1) : dataNum 78 | lastPartData <- oneTypeData[lastPartIndex, ] 79 | mergedResults <- rbind(mergedResults, dbscanMerge(lastPartData)) 80 | mergedResults <- mergedResults[-1, ] 81 | return(mergedResults) 82 | } 83 | 84 | twoRegionMerge <- function(oneTypeData){ 85 | mergedResults <- data.frame(chr = "", start = 0, end = 0, type = "", subtype = "") 86 | theDist <- abs(oneTypeData$end[1] - oneTypeData$start[2]) 87 | if(theDist > DistCannotMerge){ 88 | return(oneTypeData) 89 | } 90 | theLength <- oneTypeData$end - oneTypeData$start 91 | MeanTheLength <- mean(theLength) 92 | theFold1 <- theDist / MeanTheLength 93 | theFold2 <- theDist / min(theLength) 94 | theFold3 <- theDist / max(theLength) 95 | if((theFold1 < 1 & theFold2 < 3) | theFold3 < 0.1){ 96 | start = min(oneTypeData$start[1], oneTypeData$start[2]) 97 | end = max(oneTypeData$end[1], oneTypeData$end[2]) 98 | mergedResults <- rbind(mergedResults, data.frame(chr = oneTypeData$chr[1], start = start, end = end, 99 | type = oneTypeData$type[1], subtype = oneTypeData$subtype[1], 100 | stringsAsFactors =F)) 101 | }else{ 102 | mergedResults <- rbind(mergedResults, oneTypeData) 103 | } 104 | mergedResults <- mergedResults[-1, ] 105 | return(mergedResults) 106 | } 107 | 108 | getTheMergeRes <- function(theDataUseToMerge){ 109 | if(nrow(theDataUseToMerge) == 1){ 110 | return(theDataUseToMerge) 111 | }else{ 112 | preNum <- nrow(theDataUseToMerge) 113 | oneMergedRes <- bedRegionMerge(theDataUseToMerge) 114 | oneMergedRes <- oneMergedRes[order(oneMergedRes$start), ] 115 | while(nrow(oneMergedRes) < preNum){ 116 | preNum <- nrow(oneMergedRes) 117 | oneMergedRes <- bedRegionMerge(oneMergedRes) 118 | oneMergedRes <- oneMergedRes[order(oneMergedRes$start), ] 119 | } 120 | return(oneMergedRes) 121 | } 122 | } 123 | ################################# function defination end ################################ 124 | ########################################################################################## 125 | 126 | ########################################################################################## 127 | ################# check if the required packages have been installed ##################### 128 | if(!is.installed("dbscan")){ install.packages("dbscan", repos="http://cran.us.r-project.org") } 129 | if(!is.installed("data.table")){ install.packages("data.table", repos="http://cran.us.r-project.org") } 130 | library(dbscan) 131 | library(data.table) 132 | ########################################################################################## 133 | 134 | ########################################################################################## 135 | ###################################### argument parsing ################################## 136 | DistCannotMerge <- 3000000 137 | oneBed <- "" 138 | theHelpMessge = 139 | paste(" The required packages are \"dbscan\" and \"data.table\" \n", 140 | " The usage of JaxCNVMerge is like: \n", 141 | " \"Rscript --vanilla JaxCNVMerge.R -md 3000000 -i filename\". The output file is filename.merge.bed\n", 142 | " Arguments: \n", 143 | " --max_distance or -md (option), numeric, distance threshold in merging, default s 3000000 \n", 144 | " --bed or -i (required), string, the bed file of the CNV fragments \n", 145 | " --help or -h, print the help messgae \n", sep = "") 146 | args <- commandArgs(TRUE) 147 | if(length(args) < 1) { 148 | args <- c("--help") 149 | } 150 | ## Help section 151 | if("--help" %in% args | "-h" %in% args) { 152 | cat(theHelpMessge) 153 | q(save="no") 154 | } 155 | if("--max_distance" %in% args){ 156 | argIndex <- which(args == "--max_distance") 157 | DistCannotMerge <- as.numeric(args[argIndex + 1]) 158 | }else if("-md" %in% args){ 159 | argIndex <- which(args == "-md") 160 | DistCannotMerge <- as.numeric(args[argIndex + 1]) 161 | } 162 | if("--bed " %in% args){ 163 | argIndex <- which(args == "--bed") 164 | oneBed <- args[argIndex + 1] 165 | }else if("-i" %in% args){ 166 | argIndex <- which(args == "-i") 167 | oneBed <- args[argIndex + 1] 168 | }else{ 169 | cat(theHelpMessge) 170 | q(save="no") 171 | } 172 | print(paste("the input file is :", oneBed, sep = '')) 173 | print(paste("the output file is :", oneBed, ".merge.bed", sep = "")) 174 | print(paste("the max_distance is: ", format(DistCannotMerge, scientific = F), sep = '')) 175 | ############################################################################################### 176 | 177 | ############################################################################################### 178 | ########################### merge CNVs in Bedfile bedfile ##################################### 179 | mergedResults <- data.frame(chr = "", start = 0, end = 0, type = "", subtype = "") 180 | theData <- fread(oneBed, header = F, sep = "\t") 181 | colnames(theData) <- c("chr", "start", "end", "type", "subtype") 182 | chrs <- unique(theData$chr) 183 | for(oneChr in chrs){ ## for CNVs in a chromosome 184 | print(paste("-------", oneChr, sep = "")) 185 | oneChrData <- theData[which(theData$chr == oneChr), ] 186 | oneChrData <- oneChrData[order(oneChrData$start), ] 187 | 188 | if(nrow(oneChrData) == 1){ 189 | mergedResults <- rbind(mergedResults, oneChrData) 190 | next() 191 | } 192 | 193 | currType <- oneChrData$type[1] 194 | currEnd <- oneChrData$end[1] 195 | theDataUseToMerge <- oneChrData[1, ] 196 | 197 | for(i in 2 : nrow(oneChrData)){ 198 | if(oneChrData$type[i] == currType & (oneChrData$start[i] - currEnd) <= DistCannotMerge){ 199 | theDataUseToMerge <- rbind(theDataUseToMerge, oneChrData[i, ]) 200 | }else{ 201 | mergedResults <- rbind(mergedResults, getTheMergeRes(theDataUseToMerge)) 202 | currType <- oneChrData$type[i] 203 | theDataUseToMerge <- oneChrData[i, ] 204 | } 205 | currEnd <- oneChrData$end[i] 206 | } 207 | if(nrow(theDataUseToMerge) > 0){ 208 | mergedResults <- rbind(mergedResults, getTheMergeRes(theDataUseToMerge)) 209 | } 210 | } 211 | mergedResults <- mergedResults[-1, ] 212 | lengths <- mergedResults$end - mergedResults$start 213 | mergedResults <- mergedResults[which(lengths > 46000), ] ## filter the lengths 214 | fwrite(mergedResults, file = paste(oneBed, ".merge.bed", sep = ''), quote = F, append = F, 215 | row.names = F, col.names = F, sep = '\t') 216 | ############################################################################################### 217 | 218 | 219 | -------------------------------------------------------------------------------- /src/CallHmm.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "umdhmm-v1.02/nrutil.h" 9 | #include "umdhmm-v1.02/hmm.h" 10 | 11 | #include "DataStruct.h" 12 | #include "CallHmm.h" 13 | 14 | namespace { 15 | void PrintHmm (const HMM& hmm, const int& T, const int* O) { 16 | std::cerr << "DEBUG: CallHmm::HmmAndViterbi" << std::endl; 17 | std::cerr << "=====Start HMM table printing=====" << std::endl; 18 | std::cerr << "N, M: " << hmm.N << ", " << hmm.M << std::endl; 19 | std::cerr << "A" << std::endl; 20 | for (int i = 0; i < hmm.N; ++i) { 21 | std::cerr << "A" << i << std::endl; 22 | for (int j = 0; j < hmm.N; ++j) 23 | std::cerr << hmm.A[i][j] << " "; 24 | std::cerr << std::endl; 25 | } 26 | 27 | std::cerr << "B" << std::endl; 28 | for (int i = 0; i < hmm.N; ++i) { 29 | std::cerr << "B" << i << std::endl; 30 | for (int j = 0; j < hmm.M; ++j) 31 | std::cerr << hmm.B[i][j] << " "; 32 | std::cerr << std::endl; 33 | } 34 | 35 | std::cerr << "pi" << std::endl; 36 | for (int i = 0; i < hmm.N; ++i) { 37 | std::cerr << hmm.pi[i] << " "; 38 | } 39 | 40 | std::cerr << std::endl; 41 | std::cerr << "T: " << T << std::endl; 42 | std::cerr << "O" << std::endl; 43 | for (int i = 0; i < T; ++i) { 44 | std::cerr << O[i] << " "; 45 | } 46 | std::cerr << std::endl; 47 | std::cerr << "=====End HMM table printing=====" << std::endl; 48 | } 49 | 50 | inline bool SortByCoordinate(const SHmmStats & a, const SHmmStats & b) { 51 | if (a.chr != b.chr) return a.chr < b.chr; 52 | else return a.pos < b.pos; 53 | } 54 | 55 | inline bool SortByCoordinateHeap(const SHmmStatsHeap & a, const SHmmStatsHeap & b) { 56 | return a.hmm_stats.pos < b.hmm_stats.pos; 57 | } 58 | 59 | inline bool SortByLength(const SHmmStatsHeap & a, const SHmmStatsHeap & b) { 60 | return a.hmm_stats.length < b.hmm_stats.length; 61 | } 62 | 63 | inline bool CheckMerge(SHmmStats & pilot, const SHmmStats & target) { 64 | bool merged = false; 65 | // There should not any overlap between two. 66 | if ((pilot.pos + pilot.length <= target.pos) || (target.pos + target.length <= pilot.pos)) { 67 | const bool pilot_is_left_hand = (pilot.pos + pilot.length - 1 < target.pos); 68 | const unsigned int gap = pilot_is_left_hand ? target.pos - pilot.pos - pilot.length 69 | : pilot.pos - target.pos - target.length; 70 | 71 | // Merge 72 | if ((gap / static_cast(pilot.length)) < 0.1) { 73 | merged = true; 74 | if (pilot_is_left_hand) { 75 | const unsigned int end_pos = target.pos + target.length - 1; 76 | pilot.length = end_pos - pilot.pos + 1; 77 | } else { 78 | pilot.length = pilot.pos + pilot.length - 1 - target.pos + 1; 79 | pilot.pos = target.pos; 80 | } 81 | } 82 | } 83 | 84 | return merged; 85 | } 86 | 87 | void ConsolidateStats(std::vector & smooth_result, std::vector & heap) { 88 | std::sort(heap.begin(), heap.end(), SortByLength); 89 | for (std::vector ::reverse_iterator ite = heap.rbegin(); ite != heap.rend(); ++ite) { 90 | if (!smooth_result[ite->id].merged && ite->hmm_stats.stats != 3) { 91 | #ifdef DEBUG 92 | std::cerr << "[Merging Anchor] " << ite->hmm_stats.chr << "\t" << ite->hmm_stats.pos << "\t" 93 | << ite->hmm_stats.length << "\t" << ite->hmm_stats.stats << std::endl; 94 | #endif 95 | // Forward merging 96 | for (unsigned int i = ite->id + 1; i < smooth_result.size(); ++i) { 97 | #ifdef DEBUG 98 | std::cerr << "\t[Merging Target] " << smooth_result[i].hmm_stats.chr << "\t" << smooth_result[i].hmm_stats.pos << "\t" << smooth_result[i].hmm_stats.length << "\t" << smooth_result[i].hmm_stats.stats << std::endl; 99 | #endif 100 | if (smooth_result[i].merged) break; 101 | if (smooth_result[i].hmm_stats.stats == 3) continue; 102 | //const bool consistant_type = ite->hmm_stats.stats == smooth_result[i].hmm_stats.stats; 103 | //const bool consistant_type = ((ite->hmm_stats.stats == 1 || ite->hmm_stats.stats == 2) 104 | // && (smooth_result[i].hmm_stats.stats == 1 || smooth_result[i].hmm_stats.stats == 2)) 105 | // || ((ite->hmm_stats.stats == 4 || ite->hmm_stats.stats == 5) 106 | // && (smooth_result[i].hmm_stats.stats == 4 || smooth_result[i].hmm_stats.stats == 5)); 107 | const bool consistant_type = (ite->hmm_stats.stats == smooth_result[i].hmm_stats.stats) 108 | || ((ite->hmm_stats.stats == 4 || ite->hmm_stats.stats == 5) 109 | && (smooth_result[i].hmm_stats.stats == 4 || smooth_result[i].hmm_stats.stats == 5)); 110 | if (!consistant_type) { // Different stats 111 | break; 112 | } else { 113 | if (CheckMerge(ite->hmm_stats, smooth_result[i].hmm_stats)) { 114 | #ifdef DEBUG 115 | std::cerr << "\tMERGE SUCCESSFUL" << std::endl; 116 | #endif 117 | smooth_result[i].merged = true; 118 | } else { 119 | #ifdef DEBUG 120 | std::cerr << "\tMERGE FAIL" << std::endl; 121 | #endif 122 | break; 123 | } 124 | } 125 | } 126 | #ifdef DEBUG 127 | std::cerr << "Forward merging done." << std::endl; 128 | #endif 129 | // Backward merging 130 | for (unsigned int i = ite->id; i > 0; --i) { 131 | #ifdef DEBUG 132 | std::cerr << "\t[Merging Target] " << smooth_result[i - 1].hmm_stats.chr << "\t" << smooth_result[i - 1].hmm_stats.pos << "\t" << smooth_result[i - 1].hmm_stats.length << "\t" << smooth_result[i - 1].hmm_stats.stats << std::endl; 133 | #endif 134 | if (smooth_result[i - 1].merged) break; 135 | if (smooth_result[i - 1].hmm_stats.stats == 3) continue; 136 | //const bool consistant_type = ite->hmm_stats.stats == smooth_result[i - 1].hmm_stats.stats; 137 | //const bool consistant_type = ((ite->hmm_stats.stats == 1 || ite->hmm_stats.stats == 2) 138 | // && (smooth_result[i - 1].hmm_stats.stats == 1 || smooth_result[i - 1].hmm_stats.stats == 2)) 139 | // || ((ite->hmm_stats.stats == 4 || ite->hmm_stats.stats == 5) 140 | // && (smooth_result[i - 1].hmm_stats.stats == 4 || smooth_result[i - 1].hmm_stats.stats == 5)); 141 | const bool consistant_type = (ite->hmm_stats.stats == smooth_result[i - 1].hmm_stats.stats) 142 | || ((ite->hmm_stats.stats == 4 || ite->hmm_stats.stats == 5) 143 | && (smooth_result[i - 1].hmm_stats.stats == 4 || smooth_result[i - 1].hmm_stats.stats == 5)); 144 | if (!consistant_type) { // Different stats 145 | break; 146 | } else { 147 | if (CheckMerge(ite->hmm_stats, smooth_result[i - 1].hmm_stats)) { 148 | #ifdef DEBUG 149 | std::cerr << "\tMERGE SUCCESSFUL" << std::endl; 150 | #endif 151 | smooth_result[i - 1].merged = true; 152 | } else { 153 | #ifdef DEBUG 154 | std::cerr << "\tMERGE FAIL" << std::endl; 155 | #endif 156 | break; 157 | } 158 | } 159 | } 160 | } 161 | } 162 | } 163 | 164 | void MergeStats (std::vector & result, std::vector & smooth_result) { 165 | //std::vector smooth_result; 166 | unsigned int vector_id = 0, out_stats_length = 0, total_out_stats_length = 0, out_stats_count = 0; 167 | SHmmStatsHeap tmp_heap(result.front(), vector_id); 168 | smooth_result.push_back(tmp_heap); 169 | for (std::vector ::const_iterator ite = std::next(result.begin()); ite != result.end(); ++ite) { 170 | if (ite->length < 5000 || ite->stats == smooth_result.back().hmm_stats.stats) { 171 | smooth_result.back().hmm_stats.length += ite->length; 172 | if ((smooth_result.back().hmm_stats.stats == 3) 173 | // If the stats of smooth_result.back().hmm_stats.stats is 3, we absorb the new stats anyway. 174 | // We prefer normal stage. 175 | || (ite->stats == smooth_result.back().hmm_stats.stats && ite->length / static_cast(out_stats_length) > 0.3)) { 176 | // If the stats of smooth_result.back().hmm_stats.stats is NOT 3, we absorb when length of the current stats is larger than 30% of out_stats_length. 177 | out_stats_length = 0; 178 | out_stats_count = 0; 179 | } else { 180 | // Two stats are different. 181 | total_out_stats_length += ite->length; 182 | out_stats_length += ite->length; 183 | ++out_stats_count; 184 | } 185 | // Too many total_out_stats_length so far, we stop extend smooth_result.back(). 186 | if (total_out_stats_length / static_cast(smooth_result.back().hmm_stats.length) > 0.2) { 187 | smooth_result.back().hmm_stats.length -= out_stats_length; 188 | 189 | ite = std::prev(ite, out_stats_count - 1); 190 | tmp_heap.hmm_stats = *ite; 191 | tmp_heap.id = ++vector_id; 192 | smooth_result.push_back(tmp_heap); 193 | 194 | total_out_stats_length = 0; 195 | out_stats_length = 0; 196 | out_stats_count = 0; 197 | } 198 | } else { // !(ite->length < 5000 || ite->stats == smooth_result.back().hmm_stats.stats) 199 | // A new stats is created. 200 | tmp_heap.hmm_stats = *ite; 201 | tmp_heap.id = ++vector_id; 202 | smooth_result.push_back(tmp_heap); 203 | total_out_stats_length = 0; 204 | out_stats_length = 0; 205 | out_stats_count = 0; 206 | } 207 | } 208 | } 209 | 210 | void SmoothStats(std::vector & cnvs, const std::string & ref_name, 211 | const std::vector & read_depth, const int bin_size, const unsigned int minimum_report_size, 212 | const int* q, const int T) { 213 | if (read_depth.size() != T) { 214 | std::cerr << "ERROR: HMM read_depth's size does not match with the number of stats." << std::endl; 215 | return; 216 | } 217 | 218 | std::vector result; 219 | result.reserve(T); 220 | std::vector ::const_iterator rd_ite = read_depth.begin(); 221 | // Ccollapse stats. 222 | #ifdef DEBUG 223 | std::cerr << "POS\tn_count\tlow_mq_alignments\tOri_stats" << std::endl; 224 | #endif 225 | for (int i = 1; i <= T; ++i, ++rd_ite) { 226 | // If there are >50% N's in the region, the region won't be taken in account so we set the stats to NORMAL. 227 | #ifdef DEBUG 228 | std::cerr << rd_ite->pos << "\t" << rd_ite->n_count << "\t" << rd_ite->low_mq_alignments << "\t" << q[i] << std::endl; 229 | #endif 230 | int cur_stat = (rd_ite->n_count * 2) > bin_size ? 3 : q[i]; 231 | // If the cur_stat is 1 (homo DEL), we don't estimate low_mq_alignments due to we don't expect enough reads to do so. 232 | // If there are >50% low qual alignments in the region, the region won't be taken in account so we set the stats to NORMAL. 233 | if (cur_stat != 1) cur_stat = rd_ite->low_mq_alignments > 0.5 ? 3 : cur_stat; 234 | 235 | if (result.empty() || cur_stat != result.back().stats) { // Create the init hmm_stats. 236 | SHmmStats tmp(rd_ite->pos, cur_stat, 0); 237 | result.push_back(tmp); 238 | } 239 | result.back().length += bin_size; 240 | } 241 | 242 | #ifdef DEBUG 243 | std::cerr << "HMM before smoothing" << std::endl; 244 | for (std::vector ::const_iterator ite = result.begin(); ite != result.end(); ++ite) { 245 | std::cerr << ite->pos << "\t" << ite->stats << "\t" << ite->length << std::endl; 246 | } 247 | #endif 248 | std::vector smooth_result; 249 | MergeStats(result, smooth_result); 250 | result.clear(); // Save some memory 251 | 252 | #ifdef DEBUG 253 | std::cerr << "HMM after smoothing" << std::endl; 254 | for (std::vector ::const_iterator ite = smooth_result.begin(); ite != smooth_result.end(); ++ite) { 255 | std::cerr << ite->hmm_stats.pos << "\t" << ite->hmm_stats.stats << "\t" << ite->hmm_stats.length << std::endl; 256 | } 257 | #endif 258 | 259 | // Merge segments from the largest one 260 | std::vector heap = smooth_result; 261 | ConsolidateStats(smooth_result, heap); 262 | 263 | #ifdef DEBUG 264 | std::sort(heap.begin(), heap.end(), SortByCoordinateHeap); 265 | std::cerr << "HMM after consolidating" << std::endl; 266 | for (std::vector ::const_iterator ite = heap.begin(); ite != heap.end(); ++ite) { 267 | std::cerr << ite->hmm_stats.pos << "\t" << ite->hmm_stats.stats << "\t" << ite->hmm_stats.length << "\t" 268 | << (smooth_result[ite->id].merged ? "MERGED: T" : "MERGED: F") << std::endl; 269 | } 270 | #endif 271 | 272 | // Dump the final results 273 | for (std::vector ::const_iterator ite = heap.begin(); ite != heap.end(); ++ite) { 274 | if (!smooth_result[ite->id].merged && ite->hmm_stats.stats != 3 && ite->hmm_stats.length > minimum_report_size) { 275 | cnvs.push_back(ite->hmm_stats); 276 | cnvs.back().chr = ref_name; 277 | } 278 | std::sort(cnvs.begin(), cnvs.end(), SortByCoordinate); 279 | } 280 | 281 | } 282 | } // namespace 283 | 284 | namespace CallHmm { 285 | bool HmmAndViterbi (std::vector & cnvs, const std::string & ref_name, const std::vector & read_depth, 286 | const int bin_size, const unsigned int minimum_report_size, const int coverage) { 287 | if (read_depth.empty()) return false; 288 | 289 | // Init HMM 290 | int T = read_depth.size(); 291 | int* O = new int [T + 1]; // observation sequence O[1..T] 292 | memset(O, 0, sizeof(O)); 293 | for (std::vector ::const_iterator ite = read_depth.begin(); ite != read_depth.end(); ++ite) { 294 | //const int rd_diff = (ite->count - coverage) / static_cast(ite->kmer_score); 295 | //int tmp_o = round((coverage + rd_diff) / coverage * 50); 296 | //if (tmp_o < 0) tmp_o = 0; 297 | const int tmp_o = round(ite->count / static_cast(coverage) * 50); 298 | O[std::distance(read_depth.begin(), ite) + 1] = std::min(tmp_o, 180); 299 | } 300 | 301 | HMM hmm; 302 | hmm.N = hmm_N; 303 | hmm.M = hmm_M; 304 | hmm.A = new double* [hmm.N + 1]; 305 | hmm.B = new double* [hmm.N + 1]; 306 | hmm.pi = new double [hmm.N + 1]; 307 | 308 | for (int i = 1; i <= hmm.N; ++i) { 309 | hmm.A[i] = new double [hmm.N + 1]; 310 | hmm.B[i] = new double [hmm.M + 1]; 311 | std::memcpy(hmm.A[i] + 1, hmm_A[i - 1], sizeof(double)* hmm.N); 312 | std::memcpy(hmm.B[i] + 1, hmm_B[i - 1], sizeof(double) * hmm.M); 313 | } 314 | std::memcpy(hmm.pi + 1, hmm_pi, sizeof(double) * hmm.N); 315 | 316 | int* q = new int [T + 1]; // resultant states 317 | memset(q, 0, sizeof(q)); 318 | int** psi = new int* [T + 1]; 319 | double **delta = new double* [T + 1]; 320 | for (int i = 1; i <= T; ++i) { 321 | psi[i] = new int [hmm.N + 1]; 322 | memset(psi[i], 0, sizeof(psi[i])); 323 | delta[i] = new double [hmm.N + 1]; 324 | memset(delta[i], 0.0, sizeof(delta[i])); 325 | } 326 | double logproba = 0.0; 327 | 328 | // End of Init HMM 329 | 330 | //#ifdef DEBUG 331 | //PrintHmm(hmm, T, O); 332 | //#endif 333 | 334 | ViterbiLog(&hmm, T, O, delta, psi, q, &logproba); 335 | SmoothStats(cnvs, ref_name, read_depth, bin_size, minimum_report_size,q, T); 336 | 337 | // Clean up 338 | for (int i = 1; i <= hmm.N; ++i) { 339 | delete hmm.A[i]; 340 | delete hmm.B[i]; 341 | } 342 | 343 | for (int i = 1; i <= T; ++i) { 344 | delete psi[i]; 345 | delete delta[i]; 346 | } 347 | 348 | delete O; 349 | delete hmm.A; 350 | delete hmm.B; 351 | delete hmm.pi; 352 | delete q; 353 | delete psi; 354 | delete delta; 355 | return true; 356 | } 357 | } // namespace CallHmm 358 | -------------------------------------------------------------------------------- /lib/umdhmm-v1.02/COPYING: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc. 5 | 675 Mass Ave, Cambridge, MA 02139, USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Library General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | 294 | Copyright (C) 19yy 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License 307 | along with this program; if not, write to the Free Software 308 | Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) 19yy name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | , 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Library General 339 | Public License instead of this License. 340 | -------------------------------------------------------------------------------- /src/GetCnvSignal.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | // Self include 10 | #include "GrabJellyfishKmer.h" 11 | #include "GetCnvSignal.h" 12 | #include "DataStruct.h" 13 | #include "CallHmm.h" 14 | #include "EstimateCoverage.h" 15 | 16 | // FASTAQ include 17 | #include "fastaq/fasta.h" 18 | #include "fastaq/reference.h" 19 | #include "fastaq/region.h" 20 | 21 | // Jellyfish include 22 | #include "jellyfish/file_header.hpp" 23 | #include "jellyfish/jellyfish.hpp" 24 | #include "jellyfish/mapped_file.hpp" 25 | 26 | // htslib include 27 | #include "htslib/sam.h" 28 | 29 | namespace { 30 | struct SBamData { 31 | unsigned int total_read = 0; 32 | unsigned int paired_reads = 0; 33 | unsigned int proper_pairs = 0; 34 | unsigned int inproper_pairs = 0; 35 | unsigned int mate_unmapped = 0; 36 | unsigned int low_mq_alignments = 0; 37 | std::vector isizes; 38 | std::vector softclips; 39 | std::vector mismatches; 40 | std::vector read_depth; 41 | 42 | void Clean() { 43 | total_read = 0; 44 | paired_reads = 0; 45 | proper_pairs = 0; 46 | inproper_pairs = 0; 47 | mate_unmapped = 0; 48 | low_mq_alignments = 0; 49 | isizes.clear(); 50 | softclips.clear(); 51 | mismatches.clear(); 52 | read_depth.clear(); 53 | } 54 | void Reserve(const unsigned int & size) { 55 | isizes.reserve(size); 56 | softclips.reserve(size); 57 | mismatches.reserve(size); 58 | read_depth.reserve(size); 59 | } 60 | }; 61 | 62 | // Func: Once alignments in a bin have been processed, then dump the info we collect for this bin. 63 | // Also, clean the info for the next bin. 64 | void PrintCleanBamData (SBamData & bam_data, std::vector & hmm_rd, std::stringstream & bam_signal_out, const bool output_bam_signal, const int & max_pos) { 65 | // If the max_pos means the last element. 66 | const int cur_pos = !bam_data.read_depth.empty() && max_pos == std::numeric_limits::max() 67 | ? bam_data.read_depth.back().pos : max_pos; 68 | 69 | if (output_bam_signal) { 70 | bam_signal_out << cur_pos << "\t"; 71 | 72 | if (bam_data.total_read == 0) { 73 | bam_signal_out << "0\t0\t0\t0\t0\t0\t0\t0\t"; 74 | } else { 75 | bam_signal_out << bam_data.total_read << "\t" << bam_data.paired_reads << "\t" // total reads and total paired-end reads 76 | << bam_data.proper_pairs << "\t" // proper pairs 77 | << bam_data.inproper_pairs << "\t" // inproper pairs 78 | << bam_data.mate_unmapped << "\t" // mate unmapped 79 | << ((bam_data.total_read < 4) ? 0 : bam_data.low_mq_alignments / static_cast(bam_data.total_read)) 80 | << "\t"; // ratio of low mq alignments 81 | // < 4, Not enough reads to tell the low-mapping quality. 82 | 83 | // Isize 84 | uint64_t sum = 0; 85 | for (std::vector::const_iterator ite = bam_data.isizes.begin(); ite != bam_data.isizes.end(); ++ite) 86 | sum += *ite; 87 | bam_signal_out << sum / static_cast(bam_data.total_read) << "\t"; 88 | 89 | // Softclip 90 | sum = 0; 91 | for (std::vector::const_iterator ite = bam_data.softclips.begin(); ite != bam_data.softclips.end(); ++ite) 92 | sum += *ite; 93 | bam_signal_out << sum / static_cast(bam_data.total_read) << "\t"; 94 | } 95 | } 96 | 97 | // Read depth 98 | uint64_t sum = 0; 99 | unsigned int pos_count = 0; 100 | while (!bam_data.read_depth.empty() && bam_data.read_depth.front().pos <= max_pos) { 101 | ++pos_count; 102 | sum += bam_data.read_depth.front().count; 103 | bam_data.read_depth.erase(bam_data.read_depth.begin());; 104 | } 105 | 106 | if (output_bam_signal) 107 | bam_signal_out << (pos_count == 0 ? 0 : sum / static_cast(pos_count)) << "\t"; 108 | 109 | SReadDepth rd_tmp(cur_pos, round(sum / static_cast(pos_count))); 110 | rd_tmp.low_mq_alignments = bam_data.low_mq_alignments / static_cast(bam_data.total_read); 111 | //rd_tmp.low_mq_alignments = (bam_data.total_read < 4) 112 | // ? 0 // Not enough reads to tell the low-mapping quality. 113 | // : bam_data.low_mq_alignments / static_cast(bam_data.total_read); 114 | hmm_rd.push_back(rd_tmp); 115 | 116 | // Clean 117 | // bam_data.read_depth has been cleaned in the while loop. 118 | bam_data.total_read = 0; 119 | bam_data.paired_reads = 0; 120 | bam_data.proper_pairs = 0; 121 | bam_data.inproper_pairs = 0; 122 | bam_data.mate_unmapped = 0; 123 | bam_data.low_mq_alignments = 0; 124 | bam_data.isizes.clear(); 125 | bam_data.softclips.clear(); 126 | bam_data.mismatches.clear(); 127 | }; 128 | 129 | // Func: Locate the SBamData in the list by using pos for read depth calculation. 130 | // If pos is not in SBamData::read_depth, then push new continues elements. 131 | // ProcessAlignment calls this function. 132 | // TODO: The performance of this function needs to be improved. 133 | inline std::vector::iterator GetRdListIte (std::vector & read_depth, const int & pos) { 134 | if (!read_depth.empty() && pos <= read_depth.back().pos) { 135 | for (std::vector::iterator ite = read_depth.begin(); 136 | ite != read_depth.end(); ++ite) { 137 | if (pos == ite->pos) 138 | return ite; // Get the match pos and return the iterator. 139 | } 140 | } else { 141 | 142 | // The pos is larger than read_depth.end() 143 | SReadDepth tmp_data((read_depth.empty() ? pos : read_depth.back().pos + 1), 0); 144 | while (tmp_data.pos <= pos) { 145 | read_depth.push_back(tmp_data); 146 | ++tmp_data.pos; 147 | } 148 | } 149 | 150 | // Return the last ite. 151 | std::vector::iterator ite = read_depth.begin(); 152 | std::advance(ite, read_depth.size() - 1); //ite is set to last element 153 | return ite; 154 | } 155 | 156 | // Func: Extract flag and read depth information and collect them in bam_data. 157 | // @bam_data: The info of the alignment will be kept in this data. 158 | // @aln: hts_lib bam alignment. 159 | void ProcessAlignment (SBamData & bam_data, const bam1_t * aln, const uint8_t aln_qual_filter) { 160 | // The alignment is not mapped. 161 | if (aln->core.flag & BAM_FUNMAP || aln->core.flag & BAM_FSECONDARY || aln->core.flag & BAM_FQCFAIL 162 | || aln->core.flag & BAM_FDUP || aln->core.flag & BAM_FSUPPLEMENTARY) 163 | return; 164 | 165 | ++bam_data.total_read; 166 | // Paired-end read 167 | if (aln->core.flag & BAM_FPAIRED) ++bam_data.paired_reads; 168 | // Proper pairs 169 | if (aln->core.flag & BAM_FPROPER_PAIR) ++bam_data.proper_pairs; 170 | else ++bam_data.inproper_pairs; 171 | // Is mate unmapped? 172 | // If it is, isize will be collected. 173 | if (aln->core.flag & BAM_FMUNMAP) ++bam_data.mate_unmapped; 174 | else bam_data.isizes.push_back(aln->core.isize < 0 ? -aln->core.isize : aln->core.isize); 175 | // MQ 176 | if (aln->core.qual < aln_qual_filter) ++bam_data.low_mq_alignments; 177 | // Softclip 178 | const uint32_t* pCigar = bam_get_cigar(aln); 179 | uint32_t sc = 0; 180 | if (bam_cigar_op(*pCigar) == BAM_CSOFT_CLIP) 181 | sc += bam_cigar_oplen(*pCigar); 182 | // Check the last CIGAR 183 | if (bam_cigar_op(*(pCigar + aln->core.n_cigar - 1)) == BAM_CSOFT_CLIP) 184 | sc += bam_cigar_oplen(*(pCigar + aln->core.n_cigar - 1)); 185 | bam_data.softclips.push_back(sc); 186 | 187 | // Read depth 188 | if (!bam_data.read_depth.empty() && aln->core.pos < bam_data.read_depth.front().pos) { 189 | std::cerr << "ERROR: The pos of " << bam_get_qname(aln) << " " << aln->core.pos << " is inconsistent." << std::endl 190 | << "\t\tThe smallest pos of the current bin is " << bam_data.read_depth.front().pos << std::endl; 191 | } else { 192 | int32_t pos = aln->core.pos; 193 | // Locate the SBamData for RD in the list by using pos. 194 | std::vector::iterator ite = GetRdListIte(bam_data.read_depth, pos); 195 | for (uint32_t i = 0; i < aln->core.n_cigar; ++i) { 196 | const uint32_t op = bam_cigar_op(*(pCigar + i)); 197 | if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF || op == BAM_CDEL || op == BAM_CREF_SKIP) { 198 | for (uint32_t j = 0; j < bam_cigar_oplen(*(pCigar + i)); ++j) { 199 | if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) ++(ite->count); 200 | ++ite; 201 | ++pos; 202 | // GetRdListIte cannot locate SBamData in the list for the given pos so append a new SBamData. 203 | if (ite == bam_data.read_depth.end()) { // Need to add new element in the list. 204 | SReadDepth tmp_data(pos, 0); 205 | bam_data.read_depth.push_back(tmp_data); 206 | ite = bam_data.read_depth.begin(); 207 | std::advance(ite, bam_data.read_depth.size() - 1); //iter is set to last element 208 | } 209 | } // end of for loop 210 | } // end of if 211 | } // end of for loop 212 | } 213 | 214 | } 215 | 216 | // Func: Process bam by giving bam filename, bam region and bin size. 217 | // For each alignment, Function ProcessAlignment will extract info the alignment. 218 | // HMM using read depths is also embedded. 219 | // @ref: We can access bases of the entire chromosome from ref. 220 | void ProcessBam (std::vector & hmm_rd, std::stringstream & bam_signal_out, const bool output_bam_signal, 221 | const char * bam_filename, const Fastaq::SRegion & region, 222 | const int & bin, const std::string & ref, const std::string & kmer_seq, const uint8_t aln_qual_filter) { 223 | samFile * bam_reader = sam_open(bam_filename, "r"); 224 | 225 | bam_hdr_t *header; 226 | header = sam_hdr_read(bam_reader); 227 | bam1_t * aln = bam_init1(); 228 | 229 | SBamData bam_data; 230 | bam_data.Reserve(region.end - region.begin + 1); 231 | // idx must be okay. We have checked in Run(). 232 | hts_idx_t * idx = sam_index_load(bam_reader, bam_filename); 233 | const bool load_index = idx == NULL ? false : true; 234 | 235 | 236 | if (load_index) { 237 | const std::string cat_region = region.chr + ":" + std::to_string(region.begin) + '-' + std::to_string(region.end); 238 | hts_itr_t * ite = sam_itr_querys(idx, header, cat_region.c_str()); 239 | int pre_bin = region.begin / bin; 240 | while (ite && sam_itr_next(bam_reader, ite, aln) >= 0) { 241 | const int cur_bin = aln->core.pos / bin; 242 | // If the cur_bin is not the same as pre_bin, we clean up the pre_bin. 243 | if ((cur_bin > pre_bin) && (cur_bin != pre_bin)) { 244 | // Every bin in the region between pre_bin and cur_bin will be padded. 245 | for (int i = pre_bin; i < cur_bin; ++i){ 246 | PrintCleanBamData(bam_data, hmm_rd, bam_signal_out, output_bam_signal, (i + 1) * bin - 1); // (i + 1) * bin - 1 for giving the max pos of the bin. 247 | // Calculate the number of N's in this region. 248 | hmm_rd.back().n_count = 0; 249 | //TODO: The for loop seems slow. 250 | //If the count of N's is too high for the bin, the HMM stats of the bin will be set to 3 which is normal. 251 | for (std::string::const_iterator s_ite = std::next(ref.begin(), i * bin); 252 | s_ite != ref.end() && s_ite != std::next(ref.begin(), (i + 1) * bin - 1); ++s_ite) { 253 | if (*s_ite == 'N') 254 | ++(hmm_rd.back().n_count); 255 | } 256 | // Keep kmer in bam_signal_out 257 | if (output_bam_signal) { 258 | unsigned int kmer_score = 0; 259 | for (std::string::const_iterator s_ite = std::next(kmer_seq.begin(), i * bin); 260 | s_ite != kmer_seq.end() && s_ite != std::next(kmer_seq.begin(), (i + 1) * bin - 1); ++s_ite) { 261 | if (static_cast(*s_ite) - 33 > 0) 262 | kmer_score += static_cast(*s_ite) - 33 - 1; 263 | } 264 | bam_signal_out << kmer_score / static_cast(bin) << std::endl; 265 | } 266 | } 267 | pre_bin = cur_bin; 268 | } 269 | ProcessAlignment(bam_data, aln, aln_qual_filter); 270 | } 271 | 272 | // Clean up 273 | hts_itr_destroy(ite); 274 | } 275 | 276 | // Clean up 277 | bam_destroy1(aln); 278 | bam_hdr_destroy(header); 279 | sam_close(bam_reader); 280 | bam_data.Clean(); 281 | } 282 | 283 | void PrintResults(std::ofstream & log, std::stringstream & bam_signal_out) { 284 | const bool have_count_kmer_out = false; 285 | log << "#POS\tREADS\tPAIRED\tPROPER_PAIRS\tINPROPER_PAIRS\tMATE_UNMAPPED\tLOW_MQ_RATIO\tISIZE\tSOFTCLIPS\tREAD_DEPTH\tKMER_COUNT\n"; 286 | while (!bam_signal_out.eof()) { // The bam_signal_out is the major player here. 287 | std::string tmp; 288 | if (std::getline(bam_signal_out, tmp).eof()) break; 289 | log << tmp << std::endl; 290 | } 291 | } 292 | 293 | void FilterCnvs(std::vector & cnvs, const std::string kmer_table, const float & unique_kmer, const float & kmer_score) { 294 | std::string chr_name, kmer_seq; 295 | bool load_kmer = false; 296 | std::vector::iterator ite = cnvs.begin(); 297 | //for (std::list::const_iterator ite = cnvs.begin(); ite != cnvs.end(); ++ite) { 298 | while (ite != cnvs.end()) { 299 | std::cerr << "Filter checking for " << ite->chr << "\t" << ite->pos << "\t" << ite->length << std::endl; 300 | if (chr_name != ite->chr) { 301 | load_kmer = Fastaq::FastaLoad(kmer_seq, kmer_table.c_str(), false, ite->chr.c_str()); 302 | if (!load_kmer) 303 | std::cerr << "WARNING: Cannot load kmer seqeunces " << ite->chr << " from " << kmer_table << std::endl; 304 | else 305 | std::cerr << "Message: Loading kmer of chromosome " << ite->chr << " is done." << std::endl; 306 | } 307 | if (load_kmer) { 308 | const unsigned int kmer_bin = 10; 309 | const unsigned int kmer_bin_length = ite->length / kmer_bin; 310 | std::vector uniq_kmers(kmer_bin, 0); 311 | for (unsigned int i = 0; i < kmer_bin; ++i) { 312 | for (unsigned int j = ite->pos + (i * ite->length / kmer_bin); 313 | j < ite->pos + ((i + 1) * ite->length / kmer_bin) && j < kmer_seq.size() - 1; ++j) { 314 | const unsigned int kmer_scale = static_cast(kmer_seq[j]) - 33; 315 | if (kmer_scale == 1) 316 | ++uniq_kmers[i]; 317 | if (kmer_scale == 2) 318 | uniq_kmers[i] += kmer_score; 319 | } 320 | uniq_kmers[i] = uniq_kmers[i] / (ite->length / static_cast(kmer_bin)); 321 | std::cerr << i << "\t" << uniq_kmers[i] << std::endl; 322 | } 323 | 324 | 325 | int leading_remove = 0; 326 | for (std::vector::const_iterator kmer_ite = uniq_kmers.begin(); kmer_ite != uniq_kmers.end(); ++kmer_ite) { 327 | if (*kmer_ite < unique_kmer) { 328 | ite->pos += kmer_bin_length; 329 | ite->length -= kmer_bin_length; 330 | ++leading_remove; 331 | } else { 332 | break; 333 | } 334 | } 335 | 336 | int tailing_remove = 0; 337 | for (std::vector::const_reverse_iterator kmer_ite = uniq_kmers.rbegin(); kmer_ite != uniq_kmers.rend(); ++kmer_ite) { 338 | if (*kmer_ite < unique_kmer) { 339 | ite->length -= kmer_bin_length; 340 | ++tailing_remove; 341 | } else { 342 | break; 343 | } 344 | } 345 | 346 | int uniq_kmer_count = 0; 347 | for (std::vector::const_iterator kmer_ite = uniq_kmers.begin(); kmer_ite != uniq_kmers.end(); ++kmer_ite) 348 | if (*kmer_ite > unique_kmer) ++uniq_kmer_count; 349 | 350 | std::cerr << uniq_kmer_count << "\t" << leading_remove << "\t" << tailing_remove << "\t" << uniq_kmer_count / static_cast(kmer_bin - leading_remove - tailing_remove) << std::endl; 351 | // Somehow >= 0.7 doesn't work. 352 | if ((uniq_kmer_count / static_cast(kmer_bin - leading_remove - tailing_remove)) > 0.69) { // the entire region pass the filter 353 | ++ite; 354 | std::cerr << "Keep" << std::endl; 355 | } else { // too many non uniq blocks 356 | cnvs.erase(ite); 357 | //if (ite != cnvs.end()) ++ite; 358 | std::cerr << "Filter" << std::endl; 359 | } 360 | } 361 | } 362 | } 363 | 364 | void ParseTargetRegion(const std::string & cmd_region, const std::string & bamfile, std::list & regions) { 365 | if (!cmd_region.empty()) { // Parse region from the command line. 366 | Fastaq::SRegion tmp_region; 367 | if (!tmp_region.Parse(cmd_region)) { 368 | std::cerr << "ERROR: The given region is not valid." << std::endl; 369 | return; 370 | } 371 | // Only chromosome name is given. 372 | // Need to find the length of the chromosome. 373 | if (tmp_region.begin == 0 && tmp_region.end == 0) { 374 | // Load bam header 375 | samFile * bam_reader = sam_open(bamfile.c_str(), "r"); 376 | bam_hdr_t *header; 377 | header = sam_hdr_read(bam_reader); 378 | for (int32_t i = 0; i < header->n_targets; ++i) { 379 | if (tmp_region.chr.compare(header->target_name[i]) == 0) { 380 | tmp_region.end = (header->target_len[i]) - 1; 381 | break; 382 | } 383 | } 384 | } 385 | regions.push_back(tmp_region); 386 | } else { // Parse regions from the bam header. 387 | // Load bam header 388 | samFile * bam_reader = sam_open(bamfile.c_str(), "r"); 389 | bam_hdr_t *header; 390 | header = sam_hdr_read(bam_reader); 391 | for (int32_t i = 0; i < header->n_targets; ++i) { 392 | Fastaq::SRegion tmp_region; 393 | tmp_region.chr = (header->target_name[i]); 394 | tmp_region.begin = 0; 395 | tmp_region.end = (header->target_len[i]) - 1; 396 | regions.push_back(tmp_region); 397 | } 398 | bam_hdr_destroy(header); 399 | sam_close(bam_reader); 400 | } 401 | } 402 | 403 | // If some chromosomes in regions cannot be found in fasta or kmer_table, they will be removed. 404 | bool CheckChrExistence(std::list & regions, const std::string & fasta, const std::string & kmer_table) { 405 | Fastaq::CReference fasta_header, kmer_table_header; 406 | Fastaq::HeaderLoad(fasta_header, fasta.c_str()); 407 | Fastaq::HeaderLoad(kmer_table_header, kmer_table.c_str()); 408 | 409 | // Make sure the headers of fasta and kemr_table are identical. 410 | if (fasta_header.GetReferenceCount() != kmer_table_header.GetReferenceCount()) { 411 | std::cerr << "ERROR: The numbers of references in " << fasta << " and " << kmer_table << " do not match." << std::endl; 412 | return false; 413 | } 414 | for (unsigned int i = 0; i < fasta_header.GetReferenceCount(); ++i) { 415 | const char* chr_name = fasta_header.GetReferenceName(i); 416 | if (kmer_table_header.GetReferenceId(chr_name) == -1) { 417 | std::cerr << "ERROR: The reference " << chr_name << " in " << fasta << " cannot be found in " << kmer_table << "." << std::endl; 418 | return false; 419 | } 420 | } 421 | 422 | // Make sure the references in bam header are all in fasta header. 423 | // If not, we remove them from the further analysis. 424 | bool exist = true; 425 | std::list::iterator ite = regions.begin(); 426 | while (ite != regions.end() && !regions.empty()) { 427 | if (fasta_header.GetReferenceId(ite->chr.c_str()) == -1) { 428 | std::cerr << "Warning: " << ite->chr << " is not in fasta so it won't be further processed." << std::endl; 429 | regions.erase(ite); 430 | exist = false; 431 | if (ite != regions.end()) ++ite; 432 | } 433 | if (ite != regions.end()) ++ite; 434 | } 435 | 436 | return exist; 437 | 438 | } 439 | 440 | } // namespace 441 | 442 | GetCnvSignal::GetCnvSignal(int argc, char** argv) 443 | : cmdline(argc, argv) 444 | { 445 | } 446 | 447 | int GetCnvSignal::Run () const { 448 | if (!cmdline.CheckArg()) { 449 | std::cerr << cmdline.Help("GetCnvSignal"); 450 | return 1; 451 | } 452 | 453 | // Parse region. 454 | // Parse region from the command line or parse regions from the bam header. 455 | std::list regions; 456 | ParseTargetRegion(cmdline.region, cmdline.bam, regions); 457 | if (!CheckChrExistence(regions, cmdline.fasta, cmdline.kmer_table)) return 1; 458 | 459 | // Check BAI 460 | samFile * bam_reader = sam_open(cmdline.bam.c_str(), "r"); 461 | hts_idx_t * idx = sam_index_load(bam_reader, cmdline.bam.c_str()); 462 | if (idx == NULL && sam_index_build(cmdline.bam.c_str(), 0) < 0) { // Try to build bam index 463 | std::cerr << "ERROR: The region givin but bam index cannot be built and loaded." << std::endl; 464 | sam_close(bam_reader); 465 | return 1; 466 | } 467 | sam_close(bam_reader); 468 | 469 | // Estimate Coverage 470 | int coverage = cmdline.coverage; 471 | bool female = false, male = false; 472 | if (coverage == 0) { 473 | std::vector coverages; 474 | coverage = EstimateCoverage::EstimateCoverage(coverages, female, male, cmdline.bam.c_str(), cmdline.kmer_table.c_str()); 475 | if (female && !male) std::cerr << "Gender: Female." << std::endl; 476 | else if (!female && male) std::cerr << "Gender: Male." << std::endl; 477 | else std::cerr << "Gender: Cannot determine." << std::endl; 478 | 479 | std::cerr << "Message: The estimated coverage is " << coverage << std::endl; 480 | } 481 | 482 | // Process BAM by regions 483 | std::string ref_seq, kmer_seq, ref_name; 484 | std::vector cnvs; 485 | std::stringstream bam_signal_out; 486 | for (std::list::const_iterator ite = regions.begin(); ite != regions.end(); ++ite) { 487 | std::cerr << "Message: Processing " << ite->chr << ":" << ite->begin << "-" << ite->end << std::endl; 488 | // The chromosome is not in ref. Load it from fasta. 489 | if (ref_name != ite->chr) { 490 | ref_name = ite->chr; // Keep the new chr name. 491 | // Load a complete seq of the chromosome. 492 | if (!Fastaq::FastaLoad(ref_seq, cmdline.fasta.c_str(), true, ite->chr.c_str())) { 493 | std::cerr << "ERROR: Cannot load chromosome " << ite->chr << " from " << cmdline.fasta << std::endl; 494 | return 1; 495 | } 496 | std::cerr << "Message: Loading chromosome " << ite->chr << " is done." << std::endl; 497 | if (!cmdline.log.empty()) { // Need to output kmer 498 | if (!Fastaq::FastaLoad(kmer_seq, cmdline.kmer_table.c_str(), true, ite->chr.c_str())) { 499 | std::cerr << "ERROR: Cannot load kmer seqeunces " << ite->chr << " from " << cmdline.kmer_table << std::endl; 500 | return 1; 501 | } 502 | std::cerr << "Message: Loading kmer of chromosome " << ite->chr << " is done." << std::endl; 503 | } 504 | } 505 | 506 | std::vector hmm_rd; // The list to collect read depth info for HMM. 507 | ProcessBam(hmm_rd, bam_signal_out, !cmdline.log.empty(), cmdline.bam.c_str(), *ite, cmdline.bin, ref_seq, kmer_seq, cmdline.aln_qual); 508 | // Perform HMM 509 | // TODO: Do not use the fix chromosome name. 510 | if (ref_name == "chrX" || ref_name == "X") { 511 | if (female == false && male == true) 512 | CallHmm::HmmAndViterbi(cnvs, ref_name, hmm_rd, cmdline.bin, cmdline.minimum_report_size, (coverage / 2)); 513 | else 514 | CallHmm::HmmAndViterbi(cnvs, ref_name, hmm_rd, cmdline.bin, cmdline.minimum_report_size, coverage); 515 | } 516 | else if (ref_name == "chrY" || ref_name == "Y") { 517 | if (female == false && male == true) // Must be male for detecting chrY 518 | CallHmm::HmmAndViterbi(cnvs, ref_name, hmm_rd, cmdline.bin, cmdline.minimum_report_size, (coverage / 2)); 519 | } else { 520 | CallHmm::HmmAndViterbi(cnvs, ref_name, hmm_rd, cmdline.bin, cmdline.minimum_report_size, coverage); 521 | } 522 | } 523 | 524 | std::cerr << "Message: HMM completes." << std::endl; 525 | for (std::vector::const_iterator ite = cnvs.begin(); ite != cnvs.end(); ++ite) { 526 | std::cerr << ite->stats << "\t" << ite->chr << "\t" << ite->pos << "\t" << ite->pos + ite->length - 1 << "\t" << ite->length << std::endl; 527 | } 528 | 529 | FilterCnvs(cnvs, cmdline.kmer_table, cmdline.unique_kmer, cmdline.kmer_score); 530 | 531 | std::ofstream output; 532 | bool use_output_file = !cmdline.output.empty(); 533 | if (use_output_file) { 534 | output.open(cmdline.output, std::ofstream::out); 535 | if (!output.good()) { 536 | std::cerr << "ERROR: Cannot open " << cmdline.output << ". The result will be shown in stdout instead." << std::endl; 537 | use_output_file = false; 538 | } 539 | } 540 | for (std::vector::const_iterator ite = cnvs.begin(); ite != cnvs.end(); ++ite) { 541 | if (ite->length > cmdline.minimum_report_size) { 542 | std::string tmp; 543 | tmp = ite->chr + "\t" + std::to_string(ite->pos) + "\t" + std::to_string(ite->pos + ite->length) + "\t"; 544 | switch(ite->stats) { 545 | case 1: tmp += "DEL\tCN=0\n"; break; 546 | case 2: tmp += "DEL\tCN=1\n"; break; 547 | case 4: tmp += "DUP\tCN=3\n"; break; 548 | case 5: tmp += "DUP\tCN>3\n"; break; 549 | } 550 | if (use_output_file) output << tmp; 551 | else std::cout << tmp; 552 | } 553 | } 554 | 555 | // Open a file for outputing log 556 | if (!cmdline.log.empty()) { 557 | std::ofstream log; 558 | log.open(cmdline.log, std::ofstream::out); 559 | PrintResults(log, bam_signal_out); 560 | log.close(); 561 | } 562 | 563 | return 0; 564 | } 565 | -------------------------------------------------------------------------------- /include/CallHmm.h: -------------------------------------------------------------------------------- 1 | #ifndef _CALLHMM_H_ 2 | #define _CALLHMM_H_ 3 | 4 | #include "DataStruct.h" 5 | 6 | namespace CallHmm { 7 | bool HmmAndViterbi (std::vector & cnvs, const std::string & ref_name, const std::vector & read_depth, 8 | const int bin_size, const unsigned int minimum_report_size, const int coverage); 9 | 10 | //namespace ReadDepthHmm { 11 | const int hmm_N = 5; 12 | const int hmm_M = 201; 13 | static const double hmm_pi[5] = {0.2, 0.2, 0.2, 0.2, 0.2}; 14 | static const double hmm_A[5][5] = { 15 | {0.99999996, 0.00000001, 0.00000001, 0.00000001, 0.00000001}, 16 | {0.00000001, 0.99999996, 0.00000001, 0.00000001, 0.00000001}, 17 | {0.00000001, 0.00000001, 0.99999996, 0.00000001, 0.00000001}, 18 | {0.00000001, 0.00000001, 0.00000001, 0.99999996, 0.00000001}, 19 | {0.00000001, 0.00000001, 0.00000001, 0.00000001, 0.99999996} 20 | }; 21 | static const double hmm_B[5][201] = { 22 | {0.367879441171442334024277442950, 0.367879441171442334024277442950, 0.183939720585721139256563105846, 0.061313240195240391317010875127, 0.015328310048810104768146622689, 0.003065662009762019565850543756, 0.000510943668293669349400598634, 0.000072991952613381494386832204, 0.000009123994076672691880551709, 0.000001013777119630296956728018, 0.000000101377711963029325095887, 0.000000009216155633002717654195, 0.000000000768012969416889277673, 0.000000000059077920724376272269, 0.000000000004219851480312613610, 0.000000000000281323432020839302, 0.000000000000017582714501302516, 0.000000000000001034277323606021, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000}, 23 | {0.000000000013887943864964030590, 0.000000000347198596624100680736, 0.000000004339982457801263291344, 0.000000036166520481677151908320, 0.000000226040753010482492248935, 0.000001130203765052411005406796, 0.000004709182354385050055733345, 0.000016818508408517966577636407, 0.000052557838776618838255109273, 0.000145993996601718979341799631, 0.000364984991504296852043304211, 0.000829511344327948539276174067, 0.001728148634016561787860033483, 0.003323362757724153664778965478, 0.005934576353078827026799046962, 0.009890960588464734262736932635, 0.015454625919476135467722777150, 0.022727391058053141720263212733, 0.031565820913962816529618748973, 0.041533974886793001424223348295, 0.051917468608491314230324320533, 0.061806510248203994084548185128, 0.070234670736595450346584357249, 0.076342033409342882621650971942, 0.079522951468065497526716001175, 0.079522951468065497526716001175, 0.076464376411601447935950659485, 0.070800348529260592744805080656, 0.063214596901125533201515338533, 0.054495342156142705103416545853, 0.045412785130118919763031470893, 0.036623213814612073924159574290, 0.028611885792665686656377488362, 0.021675671055049736124509252022, 0.015937993422830646489396499987, 0.011384281016307654446628738754, 0.007905750705769203079409379598, 0.005341723449844071325287586660, 0.003514291743318438600990338827, 0.002252751117511812232324652783, 0.001407969448444894950897565700, 0.000858517956368838337094517854, 0.000511022593076690813222728860, 0.000297106158765515976968168976, 0.000168810317480406877786378028, 0.000093783509711337014282984959, 0.000050969298756161654734373606, 0.000027111329125617993150276080, 0.000014120483919592669331557165, 0.000007204328530404456266348755, 0.000003602164265202201875153013, 0.000001765766796667749039976896, 0.000000848926344551801193239133, 0.000000400436954977262217641884, 0.000000185387479156138276409336, 0.000000084267035980064081351742, 0.000000037619212491100281872215, 0.000000016499654601359690961675, 0.000000007111920086793055270563, 0.000000003013525460505508761289, 0.000000001255635608543953574201, 0.000000000514604757599981949378, 0.000000000207501918387090660607, 0.000000000082342031105987271914, 0.000000000032164855900776441872, 0.000000000012371098423375625652, 0.000000000004686022130066459314, 0.000000000001748515720174052673, 0.000000000000642836661828701036, 0.000000000000232911833995910318, 0.000000000000083182797855681046, 0.000000000000029289717554817147, 0.000000000000010170040817644826, 0.000000000000003482890690974271, 0.000000000000001176652260464280, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000}, 24 | {0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000004185655052005027, 0.000000000000029897536085750398, 0.000000000000186859600535939321, 0.000000000001038108891866325845, 0.000000000005190544459331648611, 0.000000000023593383906052879202, 0.000000000098305766275220573422, 0.000000000378099101058539245293, 0.000000001350353932351924007504, 0.000000004501179774506436932993, 0.000000014066186795332592047752, 0.000000041371137633331095081666, 0.000000114919826759253298503325, 0.000000302420596734877442645381, 0.000000756051491837190589058578, 0.000001800122599612360199108560, 0.000004091187726391730343348564, 0.000008893886361721106738613581, 0.000018528929920252447865807940, 0.000037057859840505004151833129, 0.000071265115077893967181731083, 0.000131972435329432390138659770, 0.000235665063088272129119757348, 0.000406319074290127201908723320, 0.000677198457150210105827403684, 0.001092255576048726606677718998, 0.001706649337576133466237715552, 0.002585832329660809648930186455, 0.003802694602442364783800865524, 0.005432420860631982650890137165, 0.007545028973099989107187202109, 0.010195985098783747965778267996, 0.013415769866820630162873584368, 0.017199704957462442350690778881, 0.021499631196827979212615744586, 0.026219062435156120782764332944, 0.031213169565662043924980650900, 0.036294383215886100169100814128, 0.041243617290779660544686180401, 0.045826241434199624369405512425, 0.049811131993695238751040932357, 0.052990565950739615397324655532, 0.055198506198687109780554038707, 0.056325006325190926648183165071, 0.056325006325190919709289261164, 0.055220594436461684084704870656, 0.053096725419674702106664909707, 0.050091250395919530158383992102, 0.046380787403629192278131654348, 0.042164352185117444371531547631, 0.037646743022426286395809569285, 0.033023458791602011697108309818, 0.028468498958277594496069085039, 0.024125846574811520700576394916, 0.020104872145676265515756853119, 0.016479403398095303356907237458, 0.013289841450076883902498003920, 0.010547493214346689502369791569, 0.008240229073708390042374283269, 0.006338637749006422253406078937, 0.004801998294701848891963269494, 0.003583580816941684711596227686, 0.002634985894810043180469527613, 0.001909410068702944432081358705, 0.001363864334787819000346931197, 0.000960467841399868403270967399, 0.000666991556527689083595789032, 0.000456843531868282180710000295, 0.000308678062073164101528771619, 0.000205785374715441478484997950, 0.000135385114944367872290634236, 0.000087912412301537835499463946, 0.000056354110449703226056253347, 0.000035667158512470506373862045, 0.000022291974070294381579920157, 0.000013760477821169350389407179, 0.000008390535256810593967274209, 0.000005054539311331662141096432, 0.000003008654351983157698333885, 0.000001769796677637147389459591, 0.000001028951556765774792218593, 0.000000591351469405615907685351, 0.000000335995153071373627733288, 0.000000188761321950211120830810, 0.000000104867401083449455072394, 0.000000057619451144753043193858, 0.000000031314919100409300727595, 0.000000016835978010972563015254, 0.000000008955307452645112852156, 0.000000004713319711918449146248, 0.000000002454854016624197106124, 0.000000001265388668362993211150, 0.000000000645606463450511914482, 0.000000000326063870429546498433, 0.000000000163031935214772783927, 0.000000000080708878819194413321, 0.000000000039563175891762229356, 0.000000000019205425190175653152, 0.000000000009233377495276785528, 0.000000000004396846426322294402, 0.000000000002073984163359544855, 0.000000000000969151478205401421, 0.000000000000448681239909898495, 0.000000000000205817082527481761, 0.000000000000093553219330673500, 0.000000000000042141089788591118, 0.000000000000018812986512764359, 0.000000000000008324330315382495, 0.000000000000003651022068150171, 0.000000000000001587400899195752, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000}, 25 | {0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000002358715592765131, 0.000000000000009310719445125457, 0.000000000000034915197919220805, 0.000000000000124697135425787717, 0.000000000000425103870769733059, 0.000000000001386208274249131465, 0.000000000004331900857028511494, 0.000000000012995702571085521556, 0.000000000037487603570439257267, 0.000000000104132232140108578365, 0.000000000278925621803861368905, 0.000000000721359366734126916482, 0.000000001803398416835314085879, 0.000000004363060685891907162120, 0.000000010225923482559094131901, 0.000000023240735187634395383907, 0.000000051266327619781821670927, 0.000000109856416328103348660535, 0.000000228867534016881484479377, 0.000000463920677061250178435236, 0.000000915632915252456833304321, 0.000001760832529331662408395541, 0.000003301560992496891791455347, 0.000006039440839933319088840644, 0.000010784715785595138301251709, 0.000018810550788828714742582232, 0.000032063438844594528416391543, 0.000053439064740991059135593461, 0.000087128909903789965221004854, 0.000139035494527322601738400509, 0.000217242960198942036166569469, 0.000332514734998381447563658986, 0.000498772102497574421064996386, 0.000733488386025844991966682329, 0.001057915941383424322103090631, 0.001497050860448241940692248875, 0.002079237306178120456290336904, 0.002835323599333812883555028250, 0.003797308391964889905806357362, 0.004996458410480148631949148808, 0.006460937599758837142360778927, 0.008213056270879847894916814255, 0.010266320338599806832879934859, 0.012622525006475163178443210654, 0.015269183475574796965412716077, 0.018177599375684277971387814432, 0.021301874268380018018209653974, 0.024579085694284630303485528202, 0.027930779198050720985024852894, 0.031265797609758269604007807629, 0.034484335598998099026157859726, 0.037482973477171832588084043891, 0.040160328725541254157871406960, 0.042422882456557661767782008155, 0.044190502558914233899312051790, 0.045401201259158453782394104792, 0.046014731005903836136283757696, 0.046014731005903843075177661603, 0.045409274018984056853209807514, 0.044229812356153301589056781040, 0.042528665727070487367900142317, 0.040375315563674504804048126516, 0.037851858340944843916986428667, 0.035048016982356343385074382013, 0.032056113093618610054846840285, 0.028966367253269821591121768734, 0.025862827904705204540292129423, 0.022820142268857524642244882784, 0.019901286862375751340881180340, 0.017156281777910131064457388561, 0.014621831060718860559322074266, 0.012321767747796785394398533242, 0.010268139789830659988623118295, 0.008462752574036259967105166879, 0.006898983076660084555464802492, 0.005563696029564508746534379213, 0.004439119172524859900652582922, 0.003504567767782792737235597613, 0.002737943568580300957721052058, 0.002116966676737371509664509261, 0.001620127558727564066887172345, 0.001227369362672390889698603722, 0.000920527022004301786681224051, 0.000683559669805179018903240618, 0.000502617404268501828668025411, 0.000365983546797458395190283387, 0.000263930442402012974904140785, 0.000188521744572868970137372036, 0.000133388026820423775172658187, 0.000093496280481605796553422338, 0.000064927972556670638217338198, 0.000044675210474773204829754270, 0.000030460370778254761538281908, 0.000020581331606928810883539588, 0.000013782141701068316712482098, 0.000009147439182125102440893034, 0.000006018052093503329630212219, 0.000003924816582719511612060287, 0.000002537596928482451688055066, 0.000001626664697745161327637441, 0.000001033897053651616904695258, 0.000000651615790116554945291601, 0.000000407259868822846033478973, 0.000000252433802989366900104814, 0.000000155184714952479222980136, 0.000000094624826190537274952347, 0.000000057232757776534069806024, 0.000000034339654665920234095844, 0.000000020440270634476901125031, 0.000000012071025965241870041595, 0.000000007072866776508930470901, 0.000000004112131846807517638693, 0.000000002372383757773578271864, 0.000000001358234975824599298885, 0.000000000771724418082136090778, 0.000000000435182942527514165191, 0.000000000243572542459438031695, 0.000000000135318079144130780776, 0.000000000074623940704482633902, 0.000000000040852522283476387309, 0.000000000022202457762758715085, 0.000000000011979743397172193359, 0.000000000006417719677056348441, 0.000000000003413680679285406706, 0.000000000001803000358777512860, 0.000000000000945629558799363405, 0.000000000000492515395208014652, 0.000000000000254749342348965892, 0.000000000000130864388192961413, 0.000000000000066767544996411058, 0.000000000000033834904558990769, 0.000000000000017030992227680453, 0.000000000000008515496113840222, 0.000000000000004229551049920634, 0.000000000000002086949531210833, 0.000000000000001023014476083739, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000}, 26 | {0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000001413774234662685, 0.000000000000004284164347462688, 0.000000000000012600483374890216, 0.000000000000036001381071115200, 0.000000000000100003836308653006, 0.000000000000270280638672034754, 0.000000000000711264838610620235, 0.000000000001823755996437484572, 0.000000000004559389991093676090, 0.000000000011120463392911535920, 0.000000000026477293792646258062, 0.000000000061575101843363720873, 0.000000000139943413280371533607, 0.000000000310985362845272372406, 0.000000000676055136620144465062, 0.000000001438415184298209047010, 0.000000002996698300621251184185, 0.000000006115710817594376075974, 0.000000012231421635188861339788, 0.000000023983179676840692959337, 0.000000046121499378539894432312, 0.000000087021696940640496179066, 0.000000161151290630814874867642, 0.000000293002346601486966440044, 0.000000523218476074074434057205, 0.000000917927151007159919657633, 0.000001582633018977864405824963, 0.000002682428845725182875214853, 0.000004470714742875299850999229, 0.000007329040562090697761774005, 0.000011821033164662357326847066, 0.000018763544705813352237539979, 0.000029318038602833131842919853, 0.000045104674773589299642973655, 0.000068340416323620475002806329, 0.000102000621378538357969002759, 0.000150000913791967400989779691, 0.000217392628684012707883724258, 0.000310560898120019253810381032, 0.000437409715661997910637892595, 0.000607513493975000615741899335, 0.000832210265719172362537670029, 0.001124608467188074499218397584, 0.001499477956250759927425697526, 0.001972997310856245030408206986, 0.002562334169943186138601598500, 0.003285043807619493060750981428, 0.004158283300784129839222647007, 0.005197854125980171623166992134, 0.006417103859234779203446485951, 0.007825736413700982996299870820, 0.009428598088796357259977476417, 0.011224521534281385951037357529, 0.013205319452095746340924975470, 0.015355022618715985852078098617, 0.017649451285880440876141506124, 0.020056194643045959641325381995, 0.022535050160725791229543446548, 0.025038944623028656921714940609, 0.027515323761569957655703788646, 0.029907960610402125473106238474, 0.032159097430539920758896244024, 0.034211805777170124776009885181, 0.036012427133863295081184219271, 0.037512944931107600199382545725, 0.038673139104234637020152121067, 0.039462386841055754893581308806, 0.039860996809147224151370636491, 0.039860996809147224151370636491, 0.039466333474403196557300788072, 0.038692483798434498809548642839, 0.037565518250907280195516335652, 0.036120690625872392542916600178, 0.034400657738926081197217143881, 0.032453450697100073851775192679, 0.030330327754299137715809919769, 0.028083636809536242223694912923, 0.025764804412418566836873168313, 0.023422549465835062337815131173, 0.021101395915166725608669295866, 0.018840532067113145014980446490, 0.016673037227533757387298862795, 0.014625471252222597973169371244, 0.012717801088889211028054582187, 0.010963621628352770639991398127, 0.009370616776369888531461604941, 0.007941200657940582913196791992, 0.006673277863815615419673665798, 0.005561064886513014150770661814, 0.004595921393812407371171069315, 0.003767148683452793168913164124, 0.003062722506872170031427815573, 0.002469937505542098375027615376, 0.001975950004433676705090094927, 0.001568214289233065515671849965, 0.001234814400970919321884333719, 0.000964698750758538722989421377, 0.000747828488960111952173204219, 0.000575252683815461124741674759, 0.000439124186118672766177689626, 0.000332669837968693521586260875, 0.000250127697720819921079621340, 0.000186662460985688813007096765, 0.000138268489619028112904905448, 0.000101668007072813800363390857, 0.000074210224140739289994117556, 0.000053775524739667495441146350, 0.000038687427870264213755364252, 0.000027633877050188914814392305, 0.000019598494361836295784250195, 0.000013801756592842213985212162, 0.000009651578036952607558243873, 0.000006702484747883803106141987, 0.000004622403274402643686357382, 0.000003166029640001823873091081, 0.000002153761659865147560789485, 0.000001455244364773777568798272, 0.000000976674070317956121179965, 0.000000651116046878639849339700, 0.000000431202680052081386334010, 0.000000283685973718467028701701, 0.000000185415669097038491098099, 0.000000120399785127946901291227, 0.000000077677280727708237533232, 0.000000049793128671606563163767, 0.000000031715368580641860858278, 0.000000020073018089013494266423, 0.000000012624539678625256986327, 0.000000007890337299140756044748, 0.000000004900830620584209327980, 0.000000003025204086780433637859, 0.000000001855953427472705524688, 0.000000001131678919190651241989, 0.000000000685866011630696870171, 0.000000000413172296163066315153, 0.000000000247408560576684187020, 0.000000000147267000343268424618, 0.000000000087140236889506994322, 0.000000000051258962876180899650, 0.000000000029976001681976921985, 0.000000000017427907954637533811, 0.000000000010073935233894458109, 0.000000000005789617950514084825, 0.000000000003308353114579542878, 0.000000000001879746087829253937, 0.000000000001062003439451543514, 0.000000000000596631145759310169, 0.000000000000333313489251012579, 0.000000000000185174160695001998, 0.000000000000102306166129835238, 0.000000000000056212179192218461, 0.000000000000030717037809955025, 0.000000000000016694042288019258, 0.000000000000009023806642172485, 0.000000000000004851508947404523, 0.000000000000002594389811446256, 0.000000000000001379994580556567, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000, 0.000000000000000000000000000000} 27 | }; 28 | 29 | } // namespace CallHmm 30 | 31 | #endif //_CALLHMM_H_ 32 | --------------------------------------------------------------------------------