├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── doc ├── FPKM_HTSeq.png ├── FPKM_featureCounts.png ├── Figure_1.jpg ├── Figure_2.jpg ├── Gencode_v25_FPKM_HTSeq.png ├── Gencode_v25_FPKM_featureCounts.png ├── Gencode_v25_TPMCalculator_HTSEq.png ├── Gencode_v25_TPMCalculator_RSeQC.png ├── Gencode_v25_TPMCalculator_featureCounts.png ├── Gencode_v25_featureCounts_HTSeq.png ├── Gene_model.png ├── TPMCalculator_HTSEq.png ├── TPMCalculator_RSeQC.png ├── TPMCalculator_featureCounts.png ├── TableS3.png └── featureCounts_HTSeq.png ├── includes ├── DiffExpIR.h ├── Exceptions.h ├── FastaFactory.h ├── GenomeFactory.h ├── Global.h ├── RandomFactory.h ├── ReadFactory.h ├── Sequence.h ├── Stats.h ├── TextParser.h ├── TimeUtils.h ├── bmath.h └── bstring.h ├── nbproject ├── Makefile-Release.mk ├── Makefile-impl.mk ├── Makefile-variables.mk ├── Package-Release.bash ├── configurations.xml ├── private │ ├── Makefile-variables.mk │ ├── c_standard_headers_indexer.c │ ├── configurations.xml │ ├── cpp_standard_headers_indexer.cpp │ ├── launcher.properties │ └── private.xml ├── project.properties └── project.xml ├── src ├── DiffExpIR.cpp ├── FastaFactory.cpp ├── RandomFactory.cpp ├── ReadFactory.cpp ├── Stats.cpp ├── TextParser.cpp ├── bd0.c ├── bratio.c ├── bstring.cpp ├── chebyshev.c ├── choose.c ├── dnorm.c ├── dt.c ├── gamma.c ├── lbeta.c ├── lgamma.c ├── lgammacor.c ├── main.cpp ├── pbeta.c ├── phyper.c ├── pnorm.c ├── pt.c ├── qnorm.c ├── qt.c ├── stirlerr.c ├── sunif.c └── wilcox.c └── tpmcalculator.cwl /.gitignore: -------------------------------------------------------------------------------- 1 | /bin 2 | /build 3 | /dist 4 | .dep.inc 5 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Base Image 2 | FROM ubuntu:18.04 3 | 4 | # Metadata 5 | LABEL base.image="ubuntu:18.04" 6 | LABEL software="TPMCalculator" 7 | LABEL software.version="0.0.3" 8 | LABEL description="This program calculates the TPM (Transcript per Millions) values for the exons and introns from NGS RNA-Seq aligned reads (BAM files)" 9 | LABEL website="https://github.com/ncbi/TPMCalculator" 10 | LABEL documentation="https://github.com/ncbi/TPMCalculator" 11 | LABEL license="http://www.gnu.org/licenses/" 12 | LABEL tags="RNA-seq" 13 | 14 | # Maintainer 15 | MAINTAINER Roberto Vera Alvarez 16 | 17 | ENV URL=https://github.com/ncbi/TPMCalculator 18 | ENV BAMTOOLS_URL=https://github.com/pezmaster31/bamtools 19 | ENV FOLDER=TPMCalculator 20 | ENV BAMTOOLS_FOLDER=bamtools 21 | ENV DST=/tmp 22 | ENV BAMTOOLS_DIR=/usr/local 23 | ENV CPPFLAGS="-I $BAMTOOLS_DIR/include/bamtools" 24 | ENV LDFLAGS="-L $BAMTOOLS_DIR/lib/bamtools -Wl,-rpath,$BAMTOOLS_DIR/lib/bamtools" 25 | 26 | USER root 27 | 28 | RUN apt-get clean all && \ 29 | apt-get update && \ 30 | apt-get -y upgrade && \ 31 | apt-get install -y apt-utils && \ 32 | apt-get install -y tzdata && \ 33 | apt-get install -y software-properties-common && \ 34 | apt-get install -y gcc g++ perl wget zip make && \ 35 | apt-get install -y unzip cmake git libjsoncpp-dev zlib1g-dev && \ 36 | apt-get clean && \ 37 | apt-get purge && \ 38 | rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 39 | 40 | RUN cd $DST && \ 41 | git clone $BAMTOOLS_URL && \ 42 | cd $BAMTOOLS_FOLDER && \ 43 | mkdir build && \ 44 | cd build && \ 45 | cmake .. && \ 46 | make && \ 47 | make install && \ 48 | cd $DST && \ 49 | rm -rf $BAMTOOLS_FOLDER 50 | 51 | RUN cd $DST && \ 52 | git clone $URL && \ 53 | cd $FOLDER && \ 54 | make && \ 55 | mv $DST/$FOLDER/bin/* /usr/local/bin/ && \ 56 | rm -rf $DST/$FOLDER 57 | 58 | RUN adduser --disabled-password --gecos '' ubuntu 59 | RUN chmod a+rwx /home/ubuntu/ 60 | RUN mkdir /home/ubuntu/bin 61 | RUN chown -R ubuntu /home/ubuntu 62 | USER ubuntu 63 | 64 | WORKDIR /data/ 65 | 66 | CMD ["TPMCalculator"] 67 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | # Public Domain notice 2 | 3 | National Center for Biotechnology Information. 4 | 5 | This software is a "United States Government Work" under the terms of the United States 6 | Copyright Act. It was written as part of the authors' official duties as United States 7 | Government employees and thus cannot be copyrighted. This software is freely available 8 | to the public for use. The National Library of Medicine and the U.S. Government have not 9 | placed any restriction on its use or reproduction. 10 | 11 | Although all reasonable efforts have been taken to ensure the accuracy and reliability 12 | of the software and data, the NLM and the U.S. Government do not and cannot warrant the 13 | performance or results that may be obtained by using this software or data. The NLM and 14 | the U.S. Government disclaim all warranties, express or implied, including warranties 15 | of performance, merchantability or fitness for any particular purpose. 16 | 17 | Please cite NCBI in any work or product based on this material. 18 | 19 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # There exist several targets which are by default empty and which can be 3 | # used for execution of your targets. These targets are usually executed 4 | # before and after some main targets. They are: 5 | # 6 | # .build-pre: called before 'build' target 7 | # .build-post: called after 'build' target 8 | # .clean-pre: called before 'clean' target 9 | # .clean-post: called after 'clean' target 10 | # .clobber-pre: called before 'clobber' target 11 | # .clobber-post: called after 'clobber' target 12 | # .all-pre: called before 'all' target 13 | # .all-post: called after 'all' target 14 | # .help-pre: called before 'help' target 15 | # .help-post: called after 'help' target 16 | # 17 | # Targets beginning with '.' are not intended to be called on their own. 18 | # 19 | # Main targets can be executed directly, and they are: 20 | # 21 | # build build a specific configuration 22 | # clean remove built files from a configuration 23 | # clobber remove all built files 24 | # all build all configurations 25 | # help print help mesage 26 | # 27 | # Targets .build-impl, .clean-impl, .clobber-impl, .all-impl, and 28 | # .help-impl are implemented in nbproject/makefile-impl.mk. 29 | # 30 | # Available make variables: 31 | # 32 | # CND_BASEDIR base directory for relative paths 33 | # CND_DISTDIR default top distribution directory (build artifacts) 34 | # CND_BUILDDIR default top build directory (object files, ...) 35 | # CONF name of current configuration 36 | # CND_PLATFORM_${CONF} platform name (current configuration) 37 | # CND_ARTIFACT_DIR_${CONF} directory of build artifact (current configuration) 38 | # CND_ARTIFACT_NAME_${CONF} name of build artifact (current configuration) 39 | # CND_ARTIFACT_PATH_${CONF} path to build artifact (current configuration) 40 | # CND_PACKAGE_DIR_${CONF} directory of package (current configuration) 41 | # CND_PACKAGE_NAME_${CONF} name of package (current configuration) 42 | # CND_PACKAGE_PATH_${CONF} path to package (current configuration) 43 | # 44 | # NOCDDL 45 | 46 | 47 | # Environment 48 | MKDIR=mkdir 49 | CP=cp 50 | CCADMIN=CCadmin 51 | 52 | 53 | # build 54 | build: .build-post 55 | 56 | .build-pre: 57 | # Add your pre 'build' code here... 58 | 59 | .build-post: .build-impl 60 | # Add your post 'build' code here... 61 | 62 | 63 | # clean 64 | clean: .clean-post 65 | 66 | .clean-pre: 67 | # Add your pre 'clean' code here... 68 | 69 | .clean-post: .clean-impl 70 | # Add your post 'clean' code here... 71 | 72 | 73 | # clobber 74 | clobber: .clobber-post 75 | 76 | .clobber-pre: 77 | # Add your pre 'clobber' code here... 78 | 79 | .clobber-post: .clobber-impl 80 | # Add your post 'clobber' code here... 81 | 82 | 83 | # all 84 | all: .all-post 85 | 86 | .all-pre: 87 | # Add your pre 'all' code here... 88 | 89 | .all-post: .all-impl 90 | # Add your post 'all' code here... 91 | 92 | 93 | # build tests 94 | build-tests: .build-tests-post 95 | 96 | .build-tests-pre: 97 | # Add your pre 'build-tests' code here... 98 | 99 | .build-tests-post: .build-tests-impl 100 | # Add your post 'build-tests' code here... 101 | 102 | 103 | # run tests 104 | test: .test-post 105 | 106 | .test-pre: build-tests 107 | # Add your pre 'test' code here... 108 | 109 | .test-post: .test-impl 110 | # Add your post 'test' code here... 111 | 112 | 113 | # help 114 | help: .help-post 115 | 116 | .help-pre: 117 | # Add your pre 'help' code here... 118 | 119 | .help-post: .help-impl 120 | # Add your post 'help' code here... 121 | 122 | 123 | 124 | # include project implementation makefile 125 | include nbproject/Makefile-impl.mk 126 | 127 | # include project make variables 128 | include nbproject/Makefile-variables.mk 129 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | TPMCalculator 2 | ============= 3 | 4 | [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/tpmcalculator/README.html) [![Anaconda-Server Badge](https://anaconda.org/bioconda/tpmcalculator/badges/downloads.svg)](https://anaconda.org/bioconda/tpmcalculator) [![Anaconda-Server Badge](https://anaconda.org/bioconda/tpmcalculator/badges/license.svg)](https://anaconda.org/bioconda/tpmcalculator) 5 | 6 | TPMCalculator quantifies mRNA abundance directly from the alignments by parsing BAM files. 7 | The input parameters are the same GTF files used to generate the alignments, and one or 8 | multiple input BAM file(s) containing either single-end or paired-end sequencing reads. 9 | The TPMCalculator output is comprised of four files per sample reporting the TPM values 10 | and raw read counts for genes, transcripts, exons and introns respectively. 11 | 12 | ## Reference 13 | 14 | * Roberto Vera Alvarez, Lorinc Sandor Pongor, Leonardo Mariño-Ramírez, David Landsman; TPMCalculator: one-step software to quantify mRNA abundance of genomic features, Bioinformatics, , bty896, https://doi.org/10.1093/bioinformatics/bty896 15 | 16 | ## Conda/Bioconda 17 | 18 | TPMCalculator is available on Bioconda: https://bioconda.github.io/recipes/tpmcalculator/README.html 19 | 20 | ## NIH Biowulf 21 | 22 | NIH Biowulf users can load TPMcalculator as a module: https://hpc.nih.gov/apps/TPMCalculator.html 23 | 24 | ## Requirements 25 | 26 | ### BAMTools 27 | 28 | Clone the BAMTools repository from GitHub: https://github.com/pezmaster31/bamtools 29 | 30 | Compile it on this way and set the environment variables for TPMCalculator: 31 | 32 | cd bamtools 33 | mkdir build 34 | cd build 35 | cmake -DCMAKE_INSTALL_PREFIX=../ .. 36 | make 37 | make install 38 | cd .. 39 | export BAMTOOLS_DIR=`pwd` 40 | export CPPFLAGS="-I $BAMTOOLS_DIR/include/bamtools/" 41 | export LDFLAGS="-L $BAMTOOLS_DIR/lib64 -Wl,-rpath,$BAMTOOLS_DIR/lib64" 42 | 43 | That's it. BAMTools was compiled and the env variables were set for compiling 44 | TPMCalculator. 45 | 46 | ## Installation 47 | 48 | After the installation of BAMTools go to the TPMCalculator folder and do make: 49 | 50 | make 51 | 52 | A bin folder will be created with the TPMCalculator executable. 53 | 54 | ## Docker 55 | 56 | Use provided [Dockerfile](https://raw.githubusercontent.com/ncbi/TPMCalculator/master/Dockerfile) 57 | based on the [BioContainers](https://biocontainers.pro/) base image. 58 | 59 | docker build -t biocontainers/tpmcalculator:0.0.1 https://raw.githubusercontent.com/ncbi/TPMCalculator/master/Dockerfile 60 | 61 | docker run -v /path_to_data:/data --user=yourUID:your:GID biocontainers/tpmcalculator:0.0.1 TPMCalculator -g /data/path_to_GTF/genes.gtf -b /data/path_to_bam/sample1.bam 62 | 63 | ## [CWL](https://github.com/common-workflow-language) 64 | 65 | A CWL tool definition is also provided [tpmcalculator.cwl](https://raw.githubusercontent.com/ncbi/TPMCalculator/master/tpmcalculator.cwl) 66 | 67 | Use it like this: 68 | 69 | cwl-runner tpmcalculator.cwl --out_stderr=test.stderr --out_stdout=test.stdout -g genes.gtf -b sample_1.bam 70 | 71 | ## Usage 72 | 73 | Usage: ./bin/TPMCalculator -g GTF_file [-d BAM_files_directory|-b BAM_file] 74 | 75 | ./bin/TPMCalculator options: 76 | 77 | -v Print info 78 | -h Display this usage information. 79 | -g GTF file 80 | -d Directory with the BAM files 81 | -b BAM file 82 | -k Gene key to use from GTF file. Default: gene_id 83 | -t Transcript key to use from GTF file. Default: transcript_id 84 | -c Smaller size allowed for an intron created for genes. Default: 16. We recommend to use the reads length 85 | -p Use only properly paired reads. Default: No. Recommended for paired-end reads. 86 | -q Minimum MAPQ value to filter out reads. Default: 0. This value depends on the aligner MAPQ value. 87 | -o Minimum overlap between a reads and a feature. Default: 8. 88 | -e Extended output. This will include transcript level TPM values. Default: No. 89 | -a Print out all features with read counts equal to zero. Default: No. 90 | 91 | ## Description 92 | 93 | The model to describe the genomic features used for a gene is created from the GTF provided 94 | by the user. TPMCalculator performs two transformations which are executed on the genomic 95 | coordinates generating regions for the genes that include the exons and “pure” intron 96 | regions as shown in Figure S1. The first transformation creates overlapped exons for 97 | all alternative spliced forms of the genes. A single gene model is generated with unique 98 | exons and introns which includes the sequence of all exonic regions. The second transformation 99 | process creates a list of pure intron regions that replace those generated by the first 100 | transformation. We should indicate that only the intron regions are modified to generate 101 | regions not overlapped by exons of other genes. Reporting TPM values for these unique 102 | introns allows further identification of alternative splicing events like intron retention. 103 | Additionally, a set of non-overlapped gene features (exons and introns) are generated and 104 | used for TPM calculation. 105 | 106 | ![Gene model](https://github.com/ncbi/TPMCalculator/raw/master/doc/Gene_model.png) 107 | 108 | ## Validation 109 | 110 | * [UCSC hg19](https://github.com/ncbi/TPMCalculator/wiki/Validation#validation-using-ucsc-hg19-genome-annotation) 111 | * [Gencode v25](https://github.com/ncbi/TPMCalculator/wiki/Validation#validation-using-gencode-v25-genome-annotation) 112 | 113 | For more detailed description and instalation guide lines see https://github.com/ncbi/TPMCalculator/wiki/ 114 | 115 | ## Credits 116 | 117 | Roberto Vera Alvarez 118 | Email: veraalva@ncbi.nlm.nih.gov 119 | 120 | Lorinc Pongor 121 | Email: pongorlorinc@gmail.com 122 | 123 | Leonardo Mariño-Ramírez 124 | Email: marino@ncbi.nlm.nih.gov 125 | 126 | David Landsman 127 | Email: landsman@ncbi.nlm.nih.gov 128 | 129 | # Public Domain notice 130 | 131 | ## National Center for Biotechnology Information. 132 | 133 | This software is a "United States Government Work" under the terms of the United States 134 | Copyright Act. It was written as part of the authors' official duties as United States 135 | Government employees and thus cannot be copyrighted. This software is freely available 136 | to the public for use. The National Library of Medicine and the U.S. Government have not 137 | placed any restriction on its use or reproduction. 138 | 139 | Although all reasonable efforts have been taken to ensure the accuracy and reliability 140 | of the software and data, the NLM and the U.S. Government do not and cannot warrant the 141 | performance or results that may be obtained by using this software or data. The NLM and 142 | the U.S. Government disclaim all warranties, express or implied, including warranties 143 | of performance, merchantability or fitness for any particular purpose. 144 | 145 | Please cite NCBI in any work or product based on this material. 146 | 147 | -------------------------------------------------------------------------------- /doc/FPKM_HTSeq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/TPMCalculator/ce60a388f03ae2e65f2ed9e0bc6807c650a184ad/doc/FPKM_HTSeq.png -------------------------------------------------------------------------------- /doc/FPKM_featureCounts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/TPMCalculator/ce60a388f03ae2e65f2ed9e0bc6807c650a184ad/doc/FPKM_featureCounts.png -------------------------------------------------------------------------------- /doc/Figure_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/TPMCalculator/ce60a388f03ae2e65f2ed9e0bc6807c650a184ad/doc/Figure_1.jpg -------------------------------------------------------------------------------- /doc/Figure_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/TPMCalculator/ce60a388f03ae2e65f2ed9e0bc6807c650a184ad/doc/Figure_2.jpg -------------------------------------------------------------------------------- /doc/Gencode_v25_FPKM_HTSeq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/TPMCalculator/ce60a388f03ae2e65f2ed9e0bc6807c650a184ad/doc/Gencode_v25_FPKM_HTSeq.png -------------------------------------------------------------------------------- /doc/Gencode_v25_FPKM_featureCounts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/TPMCalculator/ce60a388f03ae2e65f2ed9e0bc6807c650a184ad/doc/Gencode_v25_FPKM_featureCounts.png -------------------------------------------------------------------------------- /doc/Gencode_v25_TPMCalculator_HTSEq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/TPMCalculator/ce60a388f03ae2e65f2ed9e0bc6807c650a184ad/doc/Gencode_v25_TPMCalculator_HTSEq.png -------------------------------------------------------------------------------- /doc/Gencode_v25_TPMCalculator_RSeQC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/TPMCalculator/ce60a388f03ae2e65f2ed9e0bc6807c650a184ad/doc/Gencode_v25_TPMCalculator_RSeQC.png -------------------------------------------------------------------------------- /doc/Gencode_v25_TPMCalculator_featureCounts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/TPMCalculator/ce60a388f03ae2e65f2ed9e0bc6807c650a184ad/doc/Gencode_v25_TPMCalculator_featureCounts.png -------------------------------------------------------------------------------- /doc/Gencode_v25_featureCounts_HTSeq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/TPMCalculator/ce60a388f03ae2e65f2ed9e0bc6807c650a184ad/doc/Gencode_v25_featureCounts_HTSeq.png -------------------------------------------------------------------------------- /doc/Gene_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/TPMCalculator/ce60a388f03ae2e65f2ed9e0bc6807c650a184ad/doc/Gene_model.png -------------------------------------------------------------------------------- /doc/TPMCalculator_HTSEq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/TPMCalculator/ce60a388f03ae2e65f2ed9e0bc6807c650a184ad/doc/TPMCalculator_HTSEq.png -------------------------------------------------------------------------------- /doc/TPMCalculator_RSeQC.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/TPMCalculator/ce60a388f03ae2e65f2ed9e0bc6807c650a184ad/doc/TPMCalculator_RSeQC.png -------------------------------------------------------------------------------- /doc/TPMCalculator_featureCounts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/TPMCalculator/ce60a388f03ae2e65f2ed9e0bc6807c650a184ad/doc/TPMCalculator_featureCounts.png -------------------------------------------------------------------------------- /doc/TableS3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/TPMCalculator/ce60a388f03ae2e65f2ed9e0bc6807c650a184ad/doc/TableS3.png -------------------------------------------------------------------------------- /doc/featureCounts_HTSeq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ncbi/TPMCalculator/ce60a388f03ae2e65f2ed9e0bc6807c650a184ad/doc/featureCounts_HTSeq.png -------------------------------------------------------------------------------- /includes/DiffExpIR.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: DiffExpIR.h 3 | * Author: veraalva 4 | * 5 | * Created on September 6, 2017, 9:50 AM 6 | */ 7 | 8 | #ifndef DIFFEXPIR_H 9 | #define DIFFEXPIR_H 10 | 11 | #include "ReadFactory.h" 12 | 13 | namespace ngs { 14 | 15 | class DiffExpIntron { 16 | public: 17 | 18 | DiffExpIntron(std::pair rvalue, SPtrGeneNGS g, SPtrFeatureNGS i, std::string chr, double pvalue, double log2TPMRatio, double TPM_1, double TPM_2) : 19 | rvalue(rvalue), g(g), i(i), chr(chr), pvalue(pvalue), log2TPMRatio(log2TPMRatio), TPM_1(TPM_1), TPM_2(TPM_2) { 20 | } 21 | 22 | virtual ~DiffExpIntron() { 23 | 24 | } 25 | 26 | std::string getChr() const { 27 | return chr; 28 | } 29 | 30 | SPtrGeneNGS& getGene() { 31 | return g; 32 | } 33 | 34 | SPtrFeatureNGS& getIntron() { 35 | return i; 36 | } 37 | 38 | double getLog2TPMRatio() { 39 | return log2TPMRatio; 40 | } 41 | 42 | double getPvalue() { 43 | return pvalue; 44 | } 45 | 46 | double getRvalueFirst() { 47 | return rvalue.first; 48 | } 49 | 50 | double getRvalueSecond() { 51 | return rvalue.second; 52 | } 53 | 54 | double getTPM_1() const { 55 | return TPM_1; 56 | } 57 | 58 | double getTPM_2() const { 59 | return TPM_2; 60 | } 61 | 62 | private: 63 | std::pair rvalue; 64 | SPtrGeneNGS g; 65 | SPtrFeatureNGS i; 66 | std::string chr; 67 | double pvalue; 68 | double log2TPMRatio; 69 | double TPM_1; 70 | double TPM_2; 71 | }; 72 | 73 | typedef std::shared_ptr SptrDiffExpIntron; 74 | 75 | class DiffExpIR { 76 | public: 77 | 78 | DiffExpIR() { 79 | } 80 | 81 | virtual ~DiffExpIR() { 82 | } 83 | 84 | void calculateDiffExpIR(ReadFactory& readFactory, std::vector samples, std::string method, bool useFDR); 85 | void calculateDiffExpIRUnique(ReadFactory& readFactory, std::vector samples, std::string method, bool useFDR); 86 | 87 | void printDiffExpIR(std::string output_name, double fc_cutoff, double pvalue_cutoff, double r_cutoff); 88 | 89 | private: 90 | std::vector pvalue; 91 | std::vector diffexpIRdata; 92 | }; 93 | 94 | } 95 | 96 | #endif /* DIFFEXPIR_H */ 97 | 98 | -------------------------------------------------------------------------------- /includes/Exceptions.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: Exceptions.h 3 | * Author: veraalva 4 | * 5 | * Created on March 24, 2016, 1:20 PM 6 | */ 7 | 8 | #ifndef EXCEPTIONS_H 9 | #define EXCEPTIONS_H 10 | 11 | namespace exceptions { 12 | 13 | /** 14 | * Exception to be thrown if there are problems opening files 15 | */ 16 | class FileHandledException : public std::exception { 17 | public: 18 | 19 | explicit FileHandledException(const char* message) : msg(message) { 20 | } 21 | 22 | explicit FileHandledException(const std::string& message) : msg(message) { 23 | } 24 | 25 | virtual ~FileHandledException() { 26 | } 27 | 28 | virtual const char* what() const throw () { 29 | return msg.c_str(); 30 | } 31 | 32 | private: 33 | std::string msg; 34 | }; 35 | 36 | /** 37 | * Exception to be thrown when an element is not found in the container 38 | */ 39 | class NotFoundException : public std::exception { 40 | public: 41 | 42 | explicit NotFoundException(const char* message) : msg(message) { 43 | } 44 | 45 | explicit NotFoundException(const std::string& message) : msg(message) { 46 | } 47 | 48 | virtual ~NotFoundException() { 49 | } 50 | 51 | virtual const char* what() const throw () { 52 | return msg.c_str(); 53 | } 54 | 55 | private: 56 | std::string msg; 57 | }; 58 | 59 | class EmptyDatasetException : public std::exception { 60 | public: 61 | 62 | explicit EmptyDatasetException(const char* message) : msg(message) { 63 | } 64 | 65 | explicit EmptyDatasetException(const std::string& message) : msg(message) { 66 | } 67 | 68 | virtual ~EmptyDatasetException() { 69 | } 70 | 71 | virtual const char* what() const throw () { 72 | return msg.c_str(); 73 | } 74 | 75 | private: 76 | std::string msg; 77 | }; 78 | 79 | // ML_ERR_return_NAN 80 | class NANException : public std::exception { 81 | public: 82 | 83 | explicit NANException(const char* message) : msg(message) { 84 | } 85 | 86 | explicit NANException(const std::string& message) : msg(message) { 87 | } 88 | 89 | virtual ~NANException() { 90 | } 91 | 92 | virtual const char* what() const throw () { 93 | return msg.c_str(); 94 | } 95 | 96 | private: 97 | std::string msg; 98 | }; 99 | } 100 | 101 | #endif /* EXCEPTIONS_H */ 102 | 103 | -------------------------------------------------------------------------------- /includes/FastaFactory.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: FastaFactory.h 3 | * Author: veraalva 4 | * 5 | * Created on February 10, 2016, 3:41 PM 6 | */ 7 | 8 | #ifndef FASTAFACTORY_H 9 | #define FASTAFACTORY_H 10 | 11 | namespace formats { 12 | 13 | class FastaFactory { 14 | public: 15 | 16 | FastaFactory() { 17 | } 18 | 19 | virtual ~FastaFactory() { 20 | } 21 | 22 | static void parseDNAFastaInDirectory(sequence::DNAContainer &seqContainer, std::string dirName, std::string prefix, std::string sufix, bool binary); 23 | static long unsigned int parseDNAFastaFile(sequence::DNAContainer &seqContainer, std::string fName, bool binary); 24 | static void writeDNASequencesToFile(sequence::DNAContainer &seqContainer, std::string fileName, bool binary); 25 | private: 26 | 27 | }; 28 | 29 | } 30 | 31 | #endif /* FASTAFACTORY_H */ 32 | 33 | -------------------------------------------------------------------------------- /includes/Global.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: Global.h 3 | * Author: veraalva 4 | * 5 | * Created on February 11, 2016, 1:11 PM 6 | */ 7 | 8 | #ifndef GLOBAL_H 9 | #define GLOBAL_H 10 | 11 | /** 12 | * Global class to handled global variables 13 | */ 14 | class Global { 15 | int verbose; 16 | static Global *s_instance; 17 | 18 | Global() { 19 | verbose = 0; 20 | } 21 | public: 22 | 23 | bool getVerbose() { 24 | return verbose; 25 | } 26 | 27 | bool isInfo() { 28 | if (verbose >= 1) return true; 29 | return false; 30 | } 31 | 32 | bool isDebug2() { 33 | if (verbose >= 2) return true; 34 | return false; 35 | } 36 | 37 | bool isDebug3() { 38 | if (verbose >= 3) return true; 39 | return false; 40 | } 41 | 42 | void setVerbose(int v) { 43 | verbose = v; 44 | } 45 | 46 | static Global *instance() { 47 | if (!s_instance) 48 | s_instance = new Global; 49 | return s_instance; 50 | } 51 | 52 | virtual ~Global() { 53 | if (s_instance) delete s_instance; 54 | } 55 | 56 | }; 57 | 58 | class Log { 59 | public: 60 | 61 | static void PrintCerrMessage(std::string message) { 62 | std::cerr << message << std::endl; 63 | } 64 | 65 | }; 66 | #endif /* GLOBAL_H */ 67 | 68 | -------------------------------------------------------------------------------- /includes/RandomFactory.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: RandomFactory.h 3 | * Author: veraalva 4 | * 5 | * Created on May 3, 2017, 11:15 AM 6 | */ 7 | 8 | #ifndef RANDOMFACTORY_H 9 | #define RANDOMFACTORY_H 10 | 11 | using uint32 = unsigned int; 12 | 13 | class Random { 14 | public: 15 | Random() = default; 16 | 17 | Random(std::mt19937::result_type seed) : eng(seed) { 18 | } 19 | uint32 DrawNumber(uint32 min, uint32 max); 20 | 21 | private: 22 | std::mt19937 eng{std::random_device{}()}; 23 | }; 24 | 25 | #endif /* RANDOMFACTORY_H */ 26 | 27 | -------------------------------------------------------------------------------- /includes/ReadFactory.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: ReadFactory.h 3 | * Author: veraalva 4 | * 5 | * Created on May 6, 2016, 10:49 AM 6 | */ 7 | 8 | #ifndef READFACTORY_H 9 | #define READFACTORY_H 10 | 11 | #include "GenomeFactory.h" 12 | 13 | 14 | namespace ngs { 15 | 16 | class SampleData { 17 | public: 18 | 19 | SampleData() { 20 | this->reads = 0; 21 | this->TPM = 0.0; 22 | 23 | this->exonReads = 0; 24 | this->exonTPM = 0.0; 25 | 26 | this->intronReads = 0; 27 | this->intronTPM = 0.0; 28 | 29 | this->uniqueReads = 0; 30 | this->uniqueTPM = 0.0; 31 | 32 | this->uniqueExonReads = 0; 33 | this->uniqueExonTPM = 0.0; 34 | 35 | this->uniqueIntronReads = 0; 36 | this->uniqueIntronTPM = 0.0; 37 | 38 | this->bridgesReads = 0; 39 | this->bridgesTPM = 0.0; 40 | } 41 | 42 | SampleData(int reads) { 43 | this->reads = reads; 44 | this->TPM = 0.0; 45 | 46 | this->exonReads = 0; 47 | this->exonTPM = 0.0; 48 | 49 | this->intronReads = 0; 50 | this->intronTPM = 0.0; 51 | 52 | this->uniqueReads = 0; 53 | this->uniqueTPM = 0.0; 54 | 55 | this->uniqueExonReads = 0; 56 | this->uniqueExonTPM = 0.0; 57 | 58 | this->uniqueIntronReads = 0; 59 | this->uniqueIntronTPM = 0.0; 60 | 61 | this->bridgesReads = 0; 62 | this->bridgesTPM = 0.0; 63 | } 64 | 65 | virtual ~SampleData() { 66 | } 67 | 68 | int getReads() const { 69 | return reads; 70 | } 71 | 72 | void increaseReads() { 73 | this->reads++; 74 | } 75 | 76 | void increaseReads(int reads) { 77 | this->reads += reads; 78 | } 79 | 80 | double getTPM() const { 81 | return TPM; 82 | } 83 | 84 | void setTPM(double TPM) { 85 | this->TPM = TPM; 86 | } 87 | 88 | int getExonReads() const { 89 | return exonReads; 90 | } 91 | 92 | void increaseExonReads() { 93 | this->exonReads++; 94 | } 95 | 96 | void increaseExonReads(int exonCount) { 97 | this->exonReads += exonCount; 98 | } 99 | 100 | double getExonTPM() const { 101 | return exonTPM; 102 | } 103 | 104 | void setExonTPM(double exonTPM) { 105 | this->exonTPM = exonTPM; 106 | } 107 | 108 | int getIntronReads() const { 109 | return intronReads; 110 | } 111 | 112 | void increaseIntronReads(int intronCount) { 113 | this->intronReads += intronCount; 114 | } 115 | 116 | void increaseIntronReads() { 117 | this->intronReads++; 118 | } 119 | 120 | double getIntronTPM() const { 121 | return intronTPM; 122 | } 123 | 124 | void setIntronTPM(double intronTPM) { 125 | this->intronTPM = intronTPM; 126 | } 127 | 128 | double getUniqueTPM() const { 129 | return uniqueTPM; 130 | } 131 | 132 | void setUniqueTPM(double uniqueTPM) { 133 | this->uniqueTPM = uniqueTPM; 134 | } 135 | 136 | int getUniqueReads() const { 137 | return uniqueReads; 138 | } 139 | 140 | void increaseUniqueReads() { 141 | this->uniqueReads++; 142 | } 143 | 144 | void increaseUniqueReads(int uniqueReads) { 145 | this->uniqueReads += uniqueReads; 146 | } 147 | 148 | int getUniqueExonReads() { 149 | return uniqueExonReads; 150 | } 151 | 152 | void increaseUniqueExonReads() { 153 | this->uniqueExonReads++; 154 | } 155 | 156 | void increaseUniqueExonReads(int uniqueReadsExon) { 157 | this->uniqueExonReads += uniqueReadsExon; 158 | } 159 | 160 | double getUniqueExonTPM() const { 161 | return uniqueExonTPM; 162 | } 163 | 164 | void setUniqueExonTPM(double uniqueExonTPM) { 165 | this->uniqueExonTPM = uniqueExonTPM; 166 | } 167 | 168 | int getUniqueIntronReads() const { 169 | return uniqueIntronReads; 170 | } 171 | 172 | void increaseUniqueIntronReads() { 173 | this->uniqueIntronReads++; 174 | } 175 | 176 | void increaseUniqueIntronReads(int uniqueReadsIntron) { 177 | this->uniqueIntronReads += uniqueReadsIntron; 178 | } 179 | 180 | double getUniqueIntronTPM() const { 181 | return uniqueIntronTPM; 182 | } 183 | 184 | void setUniqueIntronTPM(double uniqueIntronTPM) { 185 | this->uniqueIntronTPM = uniqueIntronTPM; 186 | } 187 | 188 | int getBridgesReads() const { 189 | return bridgesReads; 190 | } 191 | 192 | void increaseBridgesReads() { 193 | this->bridgesReads++; 194 | } 195 | 196 | double getBridgesTPM() const { 197 | return bridgesTPM; 198 | } 199 | 200 | void setBridgesTPM(double bridgesTPM) { 201 | this->bridgesTPM = bridgesTPM; 202 | } 203 | 204 | double getValueFromColumn(std::string column) { 205 | if (column.compare("Reads") == 0) { 206 | return this->getReads(); 207 | } else if (column.compare("TPM") == 0) { 208 | return this->getTPM(); 209 | } else if (column.compare("ExonReads") == 0) { 210 | return this->getExonReads(); 211 | } else if (column.compare("ExonTPM") == 0) { 212 | return this->getExonTPM(); 213 | } else if (column.compare("IntronReads") == 0) { 214 | return this->getIntronReads(); 215 | } else if (column.compare("IntronTPM") == 0) { 216 | return this->getIntronTPM(); 217 | } else if (column.compare("UniqueReads") == 0) { 218 | return this->getUniqueReads(); 219 | } else if (column.compare("UniqueTPM") == 0) { 220 | return this->getUniqueTPM(); 221 | } else if (column.compare("UniqueExonReads") == 0) { 222 | return this->getUniqueExonReads(); 223 | } else if (column.compare("UniqueExonTPM") == 0) { 224 | return this->getUniqueExonTPM(); 225 | } else if (column.compare("UniqueIntronReads") == 0) { 226 | return this->getUniqueIntronReads(); 227 | } else if (column.compare("UniqueIntronTPM") == 0) { 228 | return this->getUniqueIntronTPM(); 229 | } else if (column.compare("BridgesReads") == 0) { 230 | return this->getBridgesReads(); 231 | } else if (column.compare("BridgesTPM") == 0) { 232 | return this->getBridgesTPM(); 233 | } 234 | return 0.0; 235 | } 236 | 237 | private: 238 | int reads; 239 | double TPM; 240 | 241 | int exonReads; 242 | double exonTPM; 243 | 244 | int intronReads; 245 | double intronTPM; 246 | 247 | int uniqueReads; 248 | double uniqueTPM; 249 | 250 | int uniqueExonReads; 251 | double uniqueExonTPM; 252 | 253 | int uniqueIntronReads; 254 | double uniqueIntronTPM; 255 | 256 | int bridgesReads; 257 | float bridgesTPM; 258 | 259 | }; 260 | 261 | typedef std::shared_ptr SPtrSampleData; 262 | typedef std::unordered_map SampleDataUnMap; 263 | 264 | class ReadData { 265 | public: 266 | 267 | ReadData() { 268 | } 269 | 270 | virtual ~ReadData() { 271 | } 272 | 273 | SampleDataUnMap getData() const { 274 | return data; 275 | } 276 | 277 | SPtrSampleData createSampleData(std::string sampleName) { 278 | SPtrSampleData sampleData; 279 | try { 280 | sampleData = getSampleData(sampleName); 281 | } catch (exceptions::NotFoundException) { 282 | sampleData = std::make_shared(SampleData(0)); 283 | data.insert(std::pair (sampleName, sampleData)); 284 | } 285 | return sampleData; 286 | } 287 | 288 | SPtrSampleData getSampleData(std::string sampleName) { 289 | SampleDataUnMap::iterator it = data.find(sampleName); 290 | if (it == data.end()) { 291 | throw exceptions::NotFoundException("Sample with name: " + sampleName + " does not exist"); 292 | } 293 | return it->second; 294 | } 295 | 296 | void increaseReads(std::string sampleName) { 297 | SPtrSampleData sampleData = createSampleData(sampleName); 298 | sampleData->increaseReads(); 299 | } 300 | 301 | private: 302 | SampleDataUnMap data; 303 | }; 304 | 305 | typedef genome::GenomeFactory GenomeFactoryNGS; 306 | typedef genome::SPtrChromosome SPtrChromosomeNGS; 307 | typedef genome::SPtrGene SPtrGeneNGS; 308 | typedef genome::SPtrIsoform SPtrIsoformNGS; 309 | typedef genome::SPtrFeature SPtrFeatureNGS; 310 | typedef genome::GeneMultiSet GeneMultiSetNGS; 311 | 312 | class ReadFactory { 313 | public: 314 | ReadFactory(); 315 | virtual ~ReadFactory(); 316 | 317 | GenomeFactoryNGS& getGenomeFactory() { 318 | return genomeFactory; 319 | } 320 | 321 | std::vector& getSamples() { 322 | return samples; 323 | } 324 | 325 | struct coordinateLessCMP { 326 | 327 | bool operator()(const std::pair a, const std::pair b) { 328 | return a.first < b.first; 329 | } 330 | }; 331 | 332 | int processBAMSAMFromDir(std::string dirName, bool onlyProperlyPaired, uint16_t minMAPQ, uint16_t minOverlap); 333 | int processReadsFromBAM(std::string bamFileName, std::string sampleName, bool onlyProperlyPaired, uint16_t minMAPQ, uint16_t minOverlap); 334 | std::vector processCigar(std::string cigar); 335 | void printResults(bool singleFile, bool extendedOutput, bool all_feat); 336 | void printResultsMatrix(std::string output_name, std::vector tpmColumns); 337 | 338 | void processReadAtGenomeLevel(std::string chrName, std::string sampleName, std::set < std::pair, coordinateLessCMP> read_coords, uint16_t minOverlap); 339 | // void processReadAtGenomeLevelUnique(std::string chrName, std::string sampleName, unsigned int start, unsigned int end, uint16_t minOverlap); 340 | void processReadAtGeneLevel(SPtrGeneNGS gene, std::string sampleName, std::set < std::pair, coordinateLessCMP> read_coords, uint16_t minOverlap); 341 | // void processReadAtGeneLevelUnique(SPtrGeneNGS gene, std::string sampleName, unsigned int start, unsigned int end, uint16_t minOverlap); 342 | // void processReadAtIsoformLevel(SPtrIsoformNGS isoform, std::string sampleName, unsigned int start, unsigned int end, uint16_t minOverlap); 343 | 344 | void loadTPMCalculatorGenesOutput(std::string dirName); 345 | 346 | void createSIMSingleReadsIR(std::string outFileName, 347 | sequence::DNAContainer seqContainer, 348 | unsigned int numberFeat, unsigned int intronNumber, unsigned int len); 349 | private: 350 | BamTools::BamReader reader; 351 | BamTools::SamHeader header; 352 | BamTools::RefVector references; 353 | GenomeFactoryNGS genomeFactory; 354 | std::vector samples; 355 | 356 | void calculateTPMperSample(std::string sampleName); 357 | //void processReadAtGenomeLevel(std::string chrName, std::string sampleName, unsigned int start, unsigned int end); 358 | // void processReadAtGeneLevel(std::shared_ptr> gene, std::string sampleName, unsigned int start, unsigned int end); 359 | // void processReadAtIsoformLevel(std::shared_ptr> isoform, std::string sampleName, unsigned int start, unsigned int end); 360 | // void PopulateReads(std::string sampleName); 361 | }; 362 | } 363 | 364 | 365 | #endif /* READFACTORY_H */ 366 | 367 | -------------------------------------------------------------------------------- /includes/Sequence.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: Sequence.h 3 | * Author: veraalva 4 | * 5 | * Created on May 5, 2016, 9:25 AM 6 | */ 7 | 8 | #ifndef SEQUENCE_H 9 | #define SEQUENCE_H 10 | 11 | namespace sequence { 12 | 13 | class Sequence { 14 | public: 15 | 16 | Sequence() { 17 | this->seq = ""; 18 | this->id = ""; 19 | this->description = ""; 20 | } 21 | 22 | virtual ~Sequence() { 23 | } 24 | 25 | Sequence(const Sequence& other) : 26 | id(other.id), description(other.description), seq(other.seq) { 27 | } 28 | 29 | std::string getSegment(unsigned long int pos, unsigned long int length) { 30 | return seq.substr(pos, length); 31 | } 32 | 33 | std::string getId() { 34 | return id; 35 | } 36 | 37 | void setId(std::string id) { 38 | this->id = id; 39 | } 40 | 41 | std::string &getSeq() { 42 | return seq; 43 | } 44 | 45 | void setSeq(std::string seq) { 46 | this->seq = seq; 47 | } 48 | 49 | std::string getDescription() { 50 | return description; 51 | } 52 | 53 | void setDescription(std::string desc) { 54 | description = desc; 55 | } 56 | 57 | unsigned long int getLength() { 58 | return seq.size(); 59 | } 60 | 61 | void shuffle() { 62 | unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); 63 | std::shuffle(seq.begin(), seq.end(), std::default_random_engine(seed)); 64 | } 65 | 66 | void reverse() { 67 | std::reverse(seq.begin(), seq.end()); 68 | } 69 | 70 | Sequence newSegment(unsigned long int pos, unsigned long int length) { 71 | Sequence s; 72 | s.setId(this->id); 73 | s.setDescription(this->description); 74 | s.setSeq(this->getSegment(pos, length)); 75 | return s; 76 | } 77 | 78 | private: 79 | std::string id; 80 | std::string description; 81 | std::string seq; 82 | }; 83 | 84 | class DNA : public Sequence { 85 | public: 86 | 87 | DNA() : 88 | Sequence() { 89 | } 90 | 91 | virtual ~DNA() { 92 | } 93 | 94 | // DNA shuffle() { 95 | // DNA s(*this); 96 | // s.shuffle(); 97 | // return s; 98 | // } 99 | 100 | DNA complement() { 101 | DNA s(*this); 102 | for (auto it = s.getSeq().begin(); it < s.getSeq().end(); ++it) { 103 | switch (*it) { 104 | case 'A': *it = 'T'; 105 | break; 106 | case 'T': *it = 'A'; 107 | break; 108 | case 'C': *it = 'G'; 109 | break; 110 | case 'G': *it = 'C'; 111 | break; 112 | case 'U': *it = 'A'; 113 | break; 114 | case 'R': *it = 'Y'; 115 | break; 116 | case 'Y': *it = 'R'; 117 | break; 118 | case 'K': *it = 'M'; 119 | break; 120 | case 'M': *it = 'K'; 121 | break; 122 | case 'B': *it = 'V'; 123 | break; 124 | case 'V': *it = 'B'; 125 | break; 126 | case 'D': *it = 'H'; 127 | break; 128 | case 'H': *it = 'D'; 129 | break; 130 | 131 | case 'a': *it = 't'; 132 | break; 133 | case 't': *it = 'a'; 134 | break; 135 | case 'c': *it = 'g'; 136 | break; 137 | case 'g': *it = 'c'; 138 | break; 139 | case 'u': *it = 'a'; 140 | break; 141 | case 'r': *it = 'y'; 142 | break; 143 | case 'y': *it = 'r'; 144 | break; 145 | case 'k': *it = 'm'; 146 | break; 147 | case 'm': *it = 'k'; 148 | break; 149 | case 'b': *it = 'v'; 150 | break; 151 | case 'v': *it = 'b'; 152 | break; 153 | case 'd': *it = 'h'; 154 | break; 155 | case 'h': *it = 'd'; 156 | break; 157 | } 158 | } 159 | return s; 160 | } 161 | 162 | DNA reverseComplement() { 163 | DNA s = complement(); 164 | s.reverse(); 165 | return s; 166 | } 167 | 168 | DNA newSegment(unsigned long int pos, unsigned long int length) { 169 | DNA s; 170 | s.setId(this->getId()); 171 | s.setDescription(this->getDescription()); 172 | s.setSeq(this->getSegment(pos, length)); 173 | return s; 174 | } 175 | }; 176 | 177 | typedef std::shared_ptr SPtrDNA; 178 | typedef std::unordered_map TDNAMap; 179 | 180 | /** 181 | * Class to store and manipulate sequences 182 | */ 183 | class DNAContainer { 184 | public: 185 | 186 | DNAContainer() { 187 | } 188 | 189 | virtual ~DNAContainer() { 190 | } 191 | 192 | void clearContainer() { 193 | sequences.clear(); 194 | } 195 | 196 | TDNAMap &getContainer() { 197 | return sequences; 198 | } 199 | 200 | SPtrDNA getFirstElement() { 201 | TDNAMap::iterator it = sequences.begin(); 202 | if (it == sequences.end()) { 203 | throw exceptions::NotFoundException("Not sequences on the container"); 204 | } 205 | return it->second; 206 | } 207 | 208 | SPtrDNA getDNAFromID(std::string id) { 209 | TDNAMap::iterator it = sequences.find(id); 210 | if (it == sequences.end()) { 211 | throw exceptions::NotFoundException("Id " + id + " was not found in the sequence container"); 212 | } 213 | return it->second; 214 | } 215 | 216 | std::pair addElement(std::string id) { 217 | std::pair < TDNAMap::iterator, bool> result; 218 | result = sequences.insert(std::make_pair(id, std::make_unique())); 219 | return std::make_pair(result.first->second, result.second); 220 | } 221 | 222 | unsigned long int size() { 223 | return sequences.size(); 224 | } 225 | private: 226 | TDNAMap sequences; 227 | }; 228 | 229 | } 230 | 231 | #endif /* SEQUENCE_H */ 232 | 233 | -------------------------------------------------------------------------------- /includes/Stats.h: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: Stats.h 9 | * Author: veraalva 10 | * 11 | * Created on June 9, 2017, 4:22 PM 12 | */ 13 | 14 | #ifndef STATS_H 15 | #define STATS_H 16 | 17 | namespace stats { 18 | 19 | class WilcoxTest { 20 | public: 21 | 22 | WilcoxTest() { 23 | } 24 | 25 | virtual ~WilcoxTest() { 26 | } 27 | 28 | double pvalue(std::vector &x_in, std::vector &y_in); 29 | private: 30 | 31 | }; 32 | 33 | class TTest { 34 | public: 35 | 36 | TTest() { 37 | } 38 | 39 | virtual ~TTest() { 40 | } 41 | 42 | double pvalue(std::vector &x, std::vector &y); 43 | private: 44 | 45 | }; 46 | 47 | class Stats { 48 | public: 49 | 50 | Stats(){ 51 | } 52 | 53 | virtual ~Stats() { 54 | } 55 | 56 | double variance(std::vector &x); 57 | double variance(std::vector &x, double x_mean); 58 | private: 59 | }; 60 | 61 | class FDRCorrection { 62 | public: 63 | 64 | FDRCorrection() { 65 | } 66 | 67 | virtual ~FDRCorrection() { 68 | } 69 | 70 | std::vector fdr_correction(std::vector & c); 71 | private: 72 | }; 73 | } 74 | 75 | #endif /* STATS_H */ 76 | 77 | -------------------------------------------------------------------------------- /includes/TextParser.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: FileParserFactory.h 3 | * Author: veraalva 4 | * 5 | * Created on April 11, 2016, 12:50 PM 6 | */ 7 | 8 | #ifndef TEXTPARSER_H 9 | #define TEXTPARSER_H 10 | 11 | namespace parsers { 12 | 13 | class TextParser { 14 | public: 15 | TextParser(); 16 | virtual ~TextParser(); 17 | 18 | void setFileToParse(std::string fileToParseName) { 19 | clean(); 20 | fileToParse.open(fileToParseName, std::ios::in | std::ios::binary); 21 | if (!fileToParse) { 22 | std::cerr << "Error opening file: " << fileToParseName << std::endl; 23 | exit(-1); 24 | } 25 | closeFile = true; 26 | fileToParse.seekg(0, fileToParse.end); 27 | if (static_cast (fileToParse.tellg()) < bufferSize) { 28 | bufferSize = static_cast (fileToParse.tellg()); 29 | } 30 | buffer.resize(bufferSize + 1); 31 | fileToParse.seekg(0, fileToParse.beg); 32 | } 33 | 34 | std::vector& getWords() { 35 | return words; 36 | } 37 | 38 | std::string& getLine() { 39 | return line; 40 | } 41 | 42 | bool lineStartWith(std::string s) { 43 | if (line.compare(0, s.size(), s) == 0) return true; 44 | return false; 45 | } 46 | 47 | bool iterate(std::string dontStartWith); 48 | bool iterate(std::string dontStartWith, std::string delimiter); 49 | 50 | private: 51 | bool closeFile; 52 | bool backup; 53 | std::ifstream fileToParse; 54 | std::string buffer; 55 | std::string line; 56 | std::vector words; 57 | unsigned long int bufferSize; 58 | unsigned long int currPosition; 59 | 60 | void clean(); 61 | }; 62 | 63 | } 64 | 65 | #endif /* FILEPARSERFACTORY_H */ 66 | 67 | -------------------------------------------------------------------------------- /includes/TimeUtils.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: TimeUtils.h 3 | * Author: veraalva 4 | * 5 | * Created on February 16, 2016, 8:49 AM 6 | */ 7 | 8 | #include 9 | 10 | #ifndef TIMEUTILS_H 11 | #define TIMEUTILS_H 12 | 13 | /** 14 | * Global class to calculate times 15 | */ 16 | class TimeUtils { 17 | public: 18 | 19 | TimeUtils() { 20 | this->startTime = clock(); 21 | this->begin = clock(); 22 | } 23 | 24 | void setStartTime() { 25 | startTime = clock(); 26 | } 27 | 28 | void setTime() { 29 | begin = clock(); 30 | } 31 | 32 | double getElapseTimeSec() { 33 | return double(clock() - begin) / CLOCKS_PER_SEC; 34 | } 35 | 36 | double getElapseTimeMin() { 37 | return getElapseTimeSec() / 60; 38 | } 39 | 40 | double getElapseTimeHour() { 41 | return getElapseTimeSec() / 3600; 42 | } 43 | 44 | double getTotalTimeSec() { 45 | return double(clock() - startTime) / CLOCKS_PER_SEC; 46 | } 47 | 48 | double getTotalTimeMin() { 49 | return getTotalTimeSec() / 60; 50 | } 51 | 52 | double GetTotalTimeHour() { 53 | return getTotalTimeSec() / 3600; 54 | } 55 | 56 | double getElapseTimeSecFrom(clock_t b) { 57 | return double(clock() - b) / CLOCKS_PER_SEC; 58 | } 59 | 60 | double getElapseTimeMinFrom(clock_t b) { 61 | return getElapseTimeSecFrom(b) / 60; 62 | } 63 | 64 | double getElapseTimeHourFrom(clock_t b) { 65 | return getElapseTimeSecFrom(b) / 3600; 66 | } 67 | private: 68 | clock_t begin; 69 | clock_t startTime; 70 | }; 71 | 72 | #endif /* TIMEUTILS_H */ 73 | 74 | -------------------------------------------------------------------------------- /includes/bmath.h: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: bmath.h 9 | * Author: veraalva 10 | * 11 | * Created on February 18, 2016, 2:49 PM 12 | */ 13 | 14 | #ifndef BMATH_H 15 | #define BMATH_H 16 | 17 | #ifdef __cplusplus 18 | extern "C" { 19 | #endif 20 | 21 | #define R_NaN NAN 22 | #define R_PosInf INFINITY 23 | #define R_NegInf -INFINITY 24 | 25 | #define ML_POSINF R_PosInf 26 | #define ML_NEGINF R_NegInf 27 | #define ML_NAN NAN 28 | 29 | #define ML_ERR_return_NAN { fprintf(stderr, "ML_ERR_return_NAN\n"); return NAN; } 30 | 31 | #ifndef M_SQRT2 32 | #define M_SQRT2 1.41421356237309504880168872420969808 /* sqrt(2) */ 33 | #endif 34 | 35 | #ifndef M_SQRT_32 36 | #define M_SQRT_32 5.656854249492380195206754896838 /* sqrt(32) */ 37 | #endif 38 | 39 | #ifndef M_1_SQRT_2PI 40 | #define M_1_SQRT_2PI 0.398942280401432677939946059934 /* 1/sqrt(2pi) */ 41 | #endif 42 | 43 | #ifndef M_SQRT_PI 44 | #define M_SQRT_PI 1.772453850905516027298167483341 /* sqrt(pi) */ 45 | #endif 46 | 47 | #ifndef M_LN_SQRT_2PI 48 | #define M_LN_SQRT_2PI 0.918938533204672741780329736406 /* log(sqrt(2*pi)) == log(2*pi)/2 */ 49 | #endif 50 | 51 | #ifndef M_LN2 52 | #define M_LN2 0.693147180559945309417232121458 /* ln(2) */ 53 | #endif 54 | 55 | #ifndef M_PI 56 | #define M_PI 3.14159265358979323846264338327950288 /* pi */ 57 | #endif 58 | 59 | #ifndef M_1_PI 60 | #define M_1_PI 0.318309886183790671537767526745028724 /* 1/pi */ 61 | #endif 62 | #ifndef M_PI_2 63 | #define M_PI_2 1.57079632679489661923132169163975144 /* pi/2 */ 64 | #endif 65 | 66 | #ifndef M_2PI 67 | #define M_2PI 6.283185307179586476925286766559 /* 2*pi */ 68 | #endif 69 | 70 | #ifndef M_LN_SQRT_PId2 71 | #define M_LN_SQRT_PId2 0.225791352644727432363097614947 /* log(sqrt(pi/2)) */ 72 | #endif 73 | 74 | #ifndef DBL_EPSILON 75 | #define DBL_EPSILON 2.2204460492503131E-16 76 | #endif 77 | 78 | #ifndef M_LOG10_2 79 | #define M_LOG10_2 0.301029995663981195213738894724 /* log10(2) */ 80 | #endif 81 | 82 | #define give_log log_p 83 | #define R_D__0 (log_p ? -INFINITY : 0.) /* 0 */ 84 | #define R_D__1 (log_p ? 0. : 1.) /* 1 */ 85 | #define R_DT_0 (lower_tail ? R_D__0 : R_D__1) /* 0 */ 86 | #define R_DT_1 (lower_tail ? R_D__1 : R_D__0) /* 1 */ 87 | #define R_D_nonint(x) (fabs((x) - floor((x)+0.5)) > 1e-7) 88 | #define R_D_negInonint(x) (x < 0. || R_D_nonint(x)) 89 | #define R_D_forceint(x) floor((x) + 0.5) 90 | #define R_Log1_Exp(x) ((x) > -M_LN2 ? log(-expm1(x)) : log1p(-exp(x))) 91 | #define R_DT_Log(p) (lower_tail? (p) : R_Log1_Exp(p)) 92 | #define R_D_Lval(p) (lower_tail ? (p) : (0.5 - (p) + 0.5)) /* p */ 93 | #define R_D_Cval(p) (lower_tail ? (0.5 - (p) + 0.5) : (p)) /* 1 - p */ 94 | #define R_DT_CIv(p) (log_p ? (lower_tail ? -expm1(p) : exp(p)) : R_D_Cval(p)) 95 | #define R_D_qIv(p) (log_p ? exp(p) : (p)) /* p in qF(p,..) */ 96 | #define R_D_exp(x) (log_p ? (x) : exp(x)) /* exp(x) */ 97 | #define R_D_log(p) (log_p ? (p) : log(p)) /* log(p) */ 98 | #define R_Q_P01_check(p) if ((log_p && p > 0) || (!log_p && (p < 0 || p > 1)) ) ML_ERR_return_NAN 99 | #define R_DT_qIv(p) (log_p ? (lower_tail ? exp(p) : - expm1(p)) : R_D_Lval(p)) 100 | #define R_D_LExp(x) (log_p ? R_Log1_Exp(x) : log1p(-x)) 101 | 102 | #define R_forceint(x) floor((x) + 0.5) 103 | #define R_nonint(x) (fabs((x) - (int) x) > 1e-7) 104 | #define R_IS_INT(x) (!R_nonint(x)) 105 | #define ISNAN(x) (isnan(x)!=0) 106 | #define R_FINITE(x) isfinite(x) 107 | #define Rboolean bool 108 | #define ODD(_K_) ((_K_) != 2 * floor((_K_) / 2.)) 109 | 110 | /* Wilcoxon Rank Sum Distribution */ 111 | 112 | #define WILCOX_MAX 50 113 | #define k_small_max 30 114 | 115 | #define max(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a > _b ? _a : _b; }) 116 | #define min(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a < _b ? _a : _b; }) 117 | 118 | #define ML_VALID(x) (!ISNAN(x)) 119 | 120 | #define ME_NONE 0 121 | /* no error */ 122 | #define ME_DOMAIN 1 123 | /* argument out of domain */ 124 | #define ME_RANGE 2 125 | /* value out of range */ 126 | #define ME_NOCONV 4 127 | /* process did not converge */ 128 | #define ME_PRECISION 8 129 | /* does not have "full" precision */ 130 | #define ME_UNDERFLOW 16 131 | /* and underflow occured (important for IEEE)*/ 132 | 133 | #define R_Q_P01_boundaries(p, _LEFT_, _RIGHT_) \ 134 | if (log_p) { \ 135 | if(p > 0) \ 136 | ML_ERR_return_NAN; \ 137 | if(p == 0) /* upper bound*/ \ 138 | return lower_tail ? _RIGHT_ : _LEFT_; \ 139 | if(p == ML_NEGINF) \ 140 | return lower_tail ? _LEFT_ : _RIGHT_; \ 141 | } \ 142 | else { /* !log_p */ \ 143 | if(p < 0 || p > 1) \ 144 | ML_ERR_return_NAN; \ 145 | if(p == 0) \ 146 | return lower_tail ? _LEFT_ : _RIGHT_; \ 147 | if(p == 1) \ 148 | return lower_tail ? _RIGHT_ : _LEFT_; \ 149 | } 150 | 151 | extern void set_seed(unsigned int i1, unsigned int i2); 152 | 153 | extern void get_seed(unsigned int *i1, unsigned int *i2); 154 | 155 | extern double unif_rand(void); 156 | 157 | extern double lchoose(double n, double k); 158 | 159 | extern double choose(double n, double k); 160 | 161 | extern double stirlerr(double n); 162 | 163 | extern double lgammafn_sign(double x, int *sgn); 164 | 165 | extern double lgammafn(double x); 166 | 167 | extern double gammafn(double x); 168 | 169 | extern double lgammacor(double x); 170 | 171 | extern double chebyshev_eval(double x, const double *a, const int n); 172 | 173 | extern double phyper(double x, double NR, double NB, double n, 174 | int lower_tail, int log_p); 175 | 176 | extern double lbeta(double a, double b); 177 | 178 | extern double dwilcox(double x, double m, double n, int give_log); 179 | 180 | extern double pwilcox(double q, double m, double n, int lower_tail, int log_p); 181 | 182 | extern double qwilcox(double x, double m, double n, int lower_tail, int log_p); 183 | 184 | extern double rwilcox(double m, double n); 185 | 186 | extern double pnorm5(double x, double mu, double sigma, int lower_tail, int log_p); 187 | 188 | extern void pnorm_both(double x, double *cum, double *ccum, int i_tail, int log_p); 189 | 190 | extern void bratio(double a, double b, double x, double y, double *w, double *w1, 191 | int *ierr, int log_p); 192 | 193 | extern double Rf_d1mach(int i); 194 | 195 | extern double pbeta(double x, double a, double b, int lower_tail, int log_p); 196 | 197 | extern double pt(double x, double n, int lower_tail, int log_p); 198 | 199 | extern double qnorm5(double p, double mu, double sigma, int lower_tail, int log_p); 200 | 201 | extern double dnorm4(double x, double mu, double sigma, int give_log); 202 | 203 | extern double dt(double x, double n, int give_log); 204 | 205 | extern double bd0(double x, double np); 206 | 207 | 208 | #define pnorm pnorm5 209 | #define qnorm qnorm5 210 | #define dnorm dnorm4 211 | 212 | #ifdef __cplusplus 213 | } 214 | #endif 215 | 216 | #endif /* BMATH_H */ 217 | 218 | -------------------------------------------------------------------------------- /includes/bstring.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File: cString.h 3 | * Author: veraalva 4 | * 5 | * Created on April 13, 2016, 3:47 PM 6 | */ 7 | 8 | #ifndef BSTRING_H 9 | #define BSTRING_H 10 | 11 | class BString { 12 | public: 13 | BString(); 14 | virtual ~BString(); 15 | 16 | /** 17 | * Shuffle the string 18 | * @param str string to be shuffled 19 | * @return string 20 | */ 21 | static std::string shuffle(std::string str); 22 | 23 | /** 24 | * Count the number of occurrences of characters in c in the string str 25 | * 26 | * @param str the string to count on 27 | * @param c the characters to be counted 28 | * @return the number of occurrences 29 | */ 30 | static int countCharacter(std::string str, std::string characters); 31 | 32 | /** 33 | * Split string in a vector of strings using a delimiter 34 | * @param s string to be split 35 | * @param delim delimiter 36 | * @param elems vector with result 37 | * @return vector with result 38 | */ 39 | static std::vector &split(const std::string &s, std::string delim, std::vector &elems); 40 | 41 | /** 42 | * Split string in a set of strings using a delimiter 43 | * @param s string to be split 44 | * @param delim delimiter 45 | * @param elems vector with result 46 | * @return set with result 47 | */ 48 | static std::set &split(const std::string &s, std::string delim, std::set &elems); 49 | 50 | // trim from start 51 | 52 | static inline std::string <rim(std::string &s) { 53 | s.erase(s.begin(), std::find_if(s.begin(), s.end(), 54 | std::not1(std::ptr_fun(std::isspace)))); 55 | return s; 56 | } 57 | 58 | // trim from end 59 | 60 | static inline std::string &rtrim(std::string &s) { 61 | s.erase(std::find_if(s.rbegin(), s.rend(), 62 | std::not1(std::ptr_fun(std::isspace))).base(), s.end()); 63 | return s; 64 | } 65 | 66 | // trim from both ends 67 | 68 | static inline std::string &trim(std::string &s) { 69 | return ltrim(rtrim(s)); 70 | } 71 | private: 72 | 73 | }; 74 | 75 | #endif /* CSTRING_H */ 76 | 77 | -------------------------------------------------------------------------------- /nbproject/Makefile-Release.mk: -------------------------------------------------------------------------------- 1 | # 2 | # Generated Makefile - do not edit! 3 | # 4 | # Edit the Makefile in the project folder instead (../Makefile). Each target 5 | # has a -pre and a -post target defined where you can add customized code. 6 | # 7 | # This makefile implements configuration specific macros and targets. 8 | 9 | 10 | # Environment 11 | MKDIR=mkdir 12 | CP=cp 13 | GREP=grep 14 | NM=nm 15 | CCADMIN=CCadmin 16 | RANLIB=ranlib 17 | CC=gcc 18 | CCC=g++ 19 | CXX=g++ 20 | FC=gfortran 21 | AS=as 22 | 23 | # Macros 24 | CND_PLATFORM=GNU-MacOSX 25 | CND_DLIB_EXT=dylib 26 | CND_CONF=Release 27 | CND_DISTDIR=dist 28 | CND_BUILDDIR=build 29 | 30 | # Include project Makefile 31 | include Makefile 32 | 33 | # Object Directory 34 | OBJECTDIR=${CND_BUILDDIR}/${CND_CONF}/${CND_PLATFORM} 35 | 36 | # Object Files 37 | OBJECTFILES= \ 38 | ${OBJECTDIR}/src/DiffExpIR.o \ 39 | ${OBJECTDIR}/src/FastaFactory.o \ 40 | ${OBJECTDIR}/src/RandomFactory.o \ 41 | ${OBJECTDIR}/src/ReadFactory.o \ 42 | ${OBJECTDIR}/src/Stats.o \ 43 | ${OBJECTDIR}/src/TextParser.o \ 44 | ${OBJECTDIR}/src/bd0.o \ 45 | ${OBJECTDIR}/src/bratio.o \ 46 | ${OBJECTDIR}/src/bstring.o \ 47 | ${OBJECTDIR}/src/chebyshev.o \ 48 | ${OBJECTDIR}/src/choose.o \ 49 | ${OBJECTDIR}/src/dnorm.o \ 50 | ${OBJECTDIR}/src/dt.o \ 51 | ${OBJECTDIR}/src/gamma.o \ 52 | ${OBJECTDIR}/src/lbeta.o \ 53 | ${OBJECTDIR}/src/lgamma.o \ 54 | ${OBJECTDIR}/src/lgammacor.o \ 55 | ${OBJECTDIR}/src/main.o \ 56 | ${OBJECTDIR}/src/pbeta.o \ 57 | ${OBJECTDIR}/src/phyper.o \ 58 | ${OBJECTDIR}/src/pnorm.o \ 59 | ${OBJECTDIR}/src/pt.o \ 60 | ${OBJECTDIR}/src/qnorm.o \ 61 | ${OBJECTDIR}/src/qt.o \ 62 | ${OBJECTDIR}/src/stirlerr.o \ 63 | ${OBJECTDIR}/src/sunif.o \ 64 | ${OBJECTDIR}/src/wilcox.o 65 | 66 | 67 | # C Compiler Flags 68 | CFLAGS= 69 | 70 | # CC Compiler Flags 71 | CCFLAGS=-g 72 | CXXFLAGS=-g 73 | 74 | # Fortran Compiler Flags 75 | FFLAGS= 76 | 77 | # Assembler Flags 78 | ASFLAGS= 79 | 80 | # Link Libraries and Options 81 | LDLIBSOPTIONS=-L../../bamtools/lib 82 | 83 | # Build Targets 84 | .build-conf: ${BUILD_SUBPROJECTS} 85 | "${MAKE}" -f nbproject/Makefile-${CND_CONF}.mk bin/TPMCalculator 86 | 87 | bin/TPMCalculator: ${OBJECTFILES} 88 | ${MKDIR} -p bin 89 | ${LINK.cc} -o bin/TPMCalculator ${OBJECTFILES} ${LDLIBSOPTIONS} -lbamtools -lm -lz 90 | 91 | ${OBJECTDIR}/src/DiffExpIR.o: src/DiffExpIR.cpp 92 | ${MKDIR} -p ${OBJECTDIR}/src 93 | ${RM} "$@.d" 94 | $(COMPILE.cc) -O2 -Iincludes -I../../bamtools/include/bamtools -std=c++14 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/DiffExpIR.o src/DiffExpIR.cpp 95 | 96 | ${OBJECTDIR}/src/FastaFactory.o: src/FastaFactory.cpp 97 | ${MKDIR} -p ${OBJECTDIR}/src 98 | ${RM} "$@.d" 99 | $(COMPILE.cc) -O2 -Iincludes -I../../bamtools/include/bamtools -std=c++14 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/FastaFactory.o src/FastaFactory.cpp 100 | 101 | ${OBJECTDIR}/src/RandomFactory.o: src/RandomFactory.cpp 102 | ${MKDIR} -p ${OBJECTDIR}/src 103 | ${RM} "$@.d" 104 | $(COMPILE.cc) -O2 -Iincludes -I../../bamtools/include/bamtools -std=c++14 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/RandomFactory.o src/RandomFactory.cpp 105 | 106 | ${OBJECTDIR}/src/ReadFactory.o: src/ReadFactory.cpp 107 | ${MKDIR} -p ${OBJECTDIR}/src 108 | ${RM} "$@.d" 109 | $(COMPILE.cc) -O2 -Iincludes -I../../bamtools/include/bamtools -std=c++14 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/ReadFactory.o src/ReadFactory.cpp 110 | 111 | ${OBJECTDIR}/src/Stats.o: src/Stats.cpp 112 | ${MKDIR} -p ${OBJECTDIR}/src 113 | ${RM} "$@.d" 114 | $(COMPILE.cc) -O2 -Iincludes -I../../bamtools/include/bamtools -std=c++14 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/Stats.o src/Stats.cpp 115 | 116 | ${OBJECTDIR}/src/TextParser.o: src/TextParser.cpp 117 | ${MKDIR} -p ${OBJECTDIR}/src 118 | ${RM} "$@.d" 119 | $(COMPILE.cc) -O2 -Iincludes -I../../bamtools/include/bamtools -std=c++14 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/TextParser.o src/TextParser.cpp 120 | 121 | ${OBJECTDIR}/src/bd0.o: src/bd0.c 122 | ${MKDIR} -p ${OBJECTDIR}/src 123 | ${RM} "$@.d" 124 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/bd0.o src/bd0.c 125 | 126 | ${OBJECTDIR}/src/bratio.o: src/bratio.c 127 | ${MKDIR} -p ${OBJECTDIR}/src 128 | ${RM} "$@.d" 129 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/bratio.o src/bratio.c 130 | 131 | ${OBJECTDIR}/src/bstring.o: src/bstring.cpp 132 | ${MKDIR} -p ${OBJECTDIR}/src 133 | ${RM} "$@.d" 134 | $(COMPILE.cc) -O2 -Iincludes -I../../bamtools/include/bamtools -std=c++14 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/bstring.o src/bstring.cpp 135 | 136 | ${OBJECTDIR}/src/chebyshev.o: src/chebyshev.c 137 | ${MKDIR} -p ${OBJECTDIR}/src 138 | ${RM} "$@.d" 139 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/chebyshev.o src/chebyshev.c 140 | 141 | ${OBJECTDIR}/src/choose.o: src/choose.c 142 | ${MKDIR} -p ${OBJECTDIR}/src 143 | ${RM} "$@.d" 144 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/choose.o src/choose.c 145 | 146 | ${OBJECTDIR}/src/dnorm.o: src/dnorm.c 147 | ${MKDIR} -p ${OBJECTDIR}/src 148 | ${RM} "$@.d" 149 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/dnorm.o src/dnorm.c 150 | 151 | ${OBJECTDIR}/src/dt.o: src/dt.c 152 | ${MKDIR} -p ${OBJECTDIR}/src 153 | ${RM} "$@.d" 154 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/dt.o src/dt.c 155 | 156 | ${OBJECTDIR}/src/gamma.o: src/gamma.c 157 | ${MKDIR} -p ${OBJECTDIR}/src 158 | ${RM} "$@.d" 159 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/gamma.o src/gamma.c 160 | 161 | ${OBJECTDIR}/src/lbeta.o: src/lbeta.c 162 | ${MKDIR} -p ${OBJECTDIR}/src 163 | ${RM} "$@.d" 164 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/lbeta.o src/lbeta.c 165 | 166 | ${OBJECTDIR}/src/lgamma.o: src/lgamma.c 167 | ${MKDIR} -p ${OBJECTDIR}/src 168 | ${RM} "$@.d" 169 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/lgamma.o src/lgamma.c 170 | 171 | ${OBJECTDIR}/src/lgammacor.o: src/lgammacor.c 172 | ${MKDIR} -p ${OBJECTDIR}/src 173 | ${RM} "$@.d" 174 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/lgammacor.o src/lgammacor.c 175 | 176 | ${OBJECTDIR}/src/main.o: src/main.cpp 177 | ${MKDIR} -p ${OBJECTDIR}/src 178 | ${RM} "$@.d" 179 | $(COMPILE.cc) -O2 -Iincludes -I../../bamtools/include/bamtools -std=c++14 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/main.o src/main.cpp 180 | 181 | ${OBJECTDIR}/src/pbeta.o: src/pbeta.c 182 | ${MKDIR} -p ${OBJECTDIR}/src 183 | ${RM} "$@.d" 184 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/pbeta.o src/pbeta.c 185 | 186 | ${OBJECTDIR}/src/phyper.o: src/phyper.c 187 | ${MKDIR} -p ${OBJECTDIR}/src 188 | ${RM} "$@.d" 189 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/phyper.o src/phyper.c 190 | 191 | ${OBJECTDIR}/src/pnorm.o: src/pnorm.c 192 | ${MKDIR} -p ${OBJECTDIR}/src 193 | ${RM} "$@.d" 194 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/pnorm.o src/pnorm.c 195 | 196 | ${OBJECTDIR}/src/pt.o: src/pt.c 197 | ${MKDIR} -p ${OBJECTDIR}/src 198 | ${RM} "$@.d" 199 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/pt.o src/pt.c 200 | 201 | ${OBJECTDIR}/src/qnorm.o: src/qnorm.c 202 | ${MKDIR} -p ${OBJECTDIR}/src 203 | ${RM} "$@.d" 204 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/qnorm.o src/qnorm.c 205 | 206 | ${OBJECTDIR}/src/qt.o: src/qt.c 207 | ${MKDIR} -p ${OBJECTDIR}/src 208 | ${RM} "$@.d" 209 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/qt.o src/qt.c 210 | 211 | ${OBJECTDIR}/src/stirlerr.o: src/stirlerr.c 212 | ${MKDIR} -p ${OBJECTDIR}/src 213 | ${RM} "$@.d" 214 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/stirlerr.o src/stirlerr.c 215 | 216 | ${OBJECTDIR}/src/sunif.o: src/sunif.c 217 | ${MKDIR} -p ${OBJECTDIR}/src 218 | ${RM} "$@.d" 219 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/sunif.o src/sunif.c 220 | 221 | ${OBJECTDIR}/src/wilcox.o: src/wilcox.c 222 | ${MKDIR} -p ${OBJECTDIR}/src 223 | ${RM} "$@.d" 224 | $(COMPILE.c) -O2 -Iincludes -std=c99 -MMD -MP -MF "$@.d" -o ${OBJECTDIR}/src/wilcox.o src/wilcox.c 225 | 226 | # Subprojects 227 | .build-subprojects: 228 | 229 | # Clean Targets 230 | .clean-conf: ${CLEAN_SUBPROJECTS} 231 | ${RM} -r ${CND_BUILDDIR}/${CND_CONF} 232 | 233 | # Subprojects 234 | .clean-subprojects: 235 | 236 | # Enable dependency checking 237 | .dep.inc: .depcheck-impl 238 | 239 | include .dep.inc 240 | -------------------------------------------------------------------------------- /nbproject/Makefile-impl.mk: -------------------------------------------------------------------------------- 1 | # 2 | # Generated Makefile - do not edit! 3 | # 4 | # Edit the Makefile in the project folder instead (../Makefile). Each target 5 | # has a pre- and a post- target defined where you can add customization code. 6 | # 7 | # This makefile implements macros and targets common to all configurations. 8 | # 9 | # NOCDDL 10 | 11 | 12 | # Building and Cleaning subprojects are done by default, but can be controlled with the SUB 13 | # macro. If SUB=no, subprojects will not be built or cleaned. The following macro 14 | # statements set BUILD_SUB-CONF and CLEAN_SUB-CONF to .build-reqprojects-conf 15 | # and .clean-reqprojects-conf unless SUB has the value 'no' 16 | SUB_no=NO 17 | SUBPROJECTS=${SUB_${SUB}} 18 | BUILD_SUBPROJECTS_=.build-subprojects 19 | BUILD_SUBPROJECTS_NO= 20 | BUILD_SUBPROJECTS=${BUILD_SUBPROJECTS_${SUBPROJECTS}} 21 | CLEAN_SUBPROJECTS_=.clean-subprojects 22 | CLEAN_SUBPROJECTS_NO= 23 | CLEAN_SUBPROJECTS=${CLEAN_SUBPROJECTS_${SUBPROJECTS}} 24 | 25 | 26 | # Project Name 27 | PROJECTNAME=TPMCalculator 28 | 29 | # Active Configuration 30 | DEFAULTCONF=Release 31 | CONF=${DEFAULTCONF} 32 | 33 | # All Configurations 34 | ALLCONFS=Release 35 | 36 | 37 | # build 38 | .build-impl: .build-pre .validate-impl .depcheck-impl 39 | @#echo "=> Running $@... Configuration=$(CONF)" 40 | "${MAKE}" -f nbproject/Makefile-${CONF}.mk QMAKE=${QMAKE} SUBPROJECTS=${SUBPROJECTS} .build-conf 41 | 42 | 43 | # clean 44 | .clean-impl: .clean-pre .validate-impl .depcheck-impl 45 | @#echo "=> Running $@... Configuration=$(CONF)" 46 | "${MAKE}" -f nbproject/Makefile-${CONF}.mk QMAKE=${QMAKE} SUBPROJECTS=${SUBPROJECTS} .clean-conf 47 | 48 | 49 | # clobber 50 | .clobber-impl: .clobber-pre .depcheck-impl 51 | @#echo "=> Running $@..." 52 | for CONF in ${ALLCONFS}; \ 53 | do \ 54 | "${MAKE}" -f nbproject/Makefile-$${CONF}.mk QMAKE=${QMAKE} SUBPROJECTS=${SUBPROJECTS} .clean-conf; \ 55 | done 56 | 57 | # all 58 | .all-impl: .all-pre .depcheck-impl 59 | @#echo "=> Running $@..." 60 | for CONF in ${ALLCONFS}; \ 61 | do \ 62 | "${MAKE}" -f nbproject/Makefile-$${CONF}.mk QMAKE=${QMAKE} SUBPROJECTS=${SUBPROJECTS} .build-conf; \ 63 | done 64 | 65 | # build tests 66 | .build-tests-impl: .build-impl .build-tests-pre 67 | @#echo "=> Running $@... Configuration=$(CONF)" 68 | "${MAKE}" -f nbproject/Makefile-${CONF}.mk SUBPROJECTS=${SUBPROJECTS} .build-tests-conf 69 | 70 | # run tests 71 | .test-impl: .build-tests-impl .test-pre 72 | @#echo "=> Running $@... Configuration=$(CONF)" 73 | "${MAKE}" -f nbproject/Makefile-${CONF}.mk SUBPROJECTS=${SUBPROJECTS} .test-conf 74 | 75 | # dependency checking support 76 | .depcheck-impl: 77 | @echo "# This code depends on make tool being used" >.dep.inc 78 | @if [ -n "${MAKE_VERSION}" ]; then \ 79 | echo "DEPFILES=\$$(wildcard \$$(addsuffix .d, \$${OBJECTFILES} \$${TESTOBJECTFILES}))" >>.dep.inc; \ 80 | echo "ifneq (\$${DEPFILES},)" >>.dep.inc; \ 81 | echo "include \$${DEPFILES}" >>.dep.inc; \ 82 | echo "endif" >>.dep.inc; \ 83 | else \ 84 | echo ".KEEP_STATE:" >>.dep.inc; \ 85 | echo ".KEEP_STATE_FILE:.make.state.\$${CONF}" >>.dep.inc; \ 86 | fi 87 | 88 | # configuration validation 89 | .validate-impl: 90 | @if [ ! -f nbproject/Makefile-${CONF}.mk ]; \ 91 | then \ 92 | echo ""; \ 93 | echo "Error: can not find the makefile for configuration '${CONF}' in project ${PROJECTNAME}"; \ 94 | echo "See 'make help' for details."; \ 95 | echo "Current directory: " `pwd`; \ 96 | echo ""; \ 97 | fi 98 | @if [ ! -f nbproject/Makefile-${CONF}.mk ]; \ 99 | then \ 100 | exit 1; \ 101 | fi 102 | 103 | 104 | # help 105 | .help-impl: .help-pre 106 | @echo "This makefile supports the following configurations:" 107 | @echo " ${ALLCONFS}" 108 | @echo "" 109 | @echo "and the following targets:" 110 | @echo " build (default target)" 111 | @echo " clean" 112 | @echo " clobber" 113 | @echo " all" 114 | @echo " help" 115 | @echo "" 116 | @echo "Makefile Usage:" 117 | @echo " make [CONF=] [SUB=no] build" 118 | @echo " make [CONF=] [SUB=no] clean" 119 | @echo " make [SUB=no] clobber" 120 | @echo " make [SUB=no] all" 121 | @echo " make help" 122 | @echo "" 123 | @echo "Target 'build' will build a specific configuration and, unless 'SUB=no'," 124 | @echo " also build subprojects." 125 | @echo "Target 'clean' will clean a specific configuration and, unless 'SUB=no'," 126 | @echo " also clean subprojects." 127 | @echo "Target 'clobber' will remove all built files from all configurations and," 128 | @echo " unless 'SUB=no', also from subprojects." 129 | @echo "Target 'all' will will build all configurations and, unless 'SUB=no'," 130 | @echo " also build subprojects." 131 | @echo "Target 'help' prints this message." 132 | @echo "" 133 | 134 | -------------------------------------------------------------------------------- /nbproject/Makefile-variables.mk: -------------------------------------------------------------------------------- 1 | # 2 | # Generated - do not edit! 3 | # 4 | # NOCDDL 5 | # 6 | CND_BASEDIR=`pwd` 7 | CND_BUILDDIR=build 8 | CND_DISTDIR=dist 9 | # Release configuration 10 | CND_PLATFORM_Release=GNU-MacOSX 11 | CND_ARTIFACT_DIR_Release=bin 12 | CND_ARTIFACT_NAME_Release=TPMCalculator 13 | CND_ARTIFACT_PATH_Release=bin/TPMCalculator 14 | CND_PACKAGE_DIR_Release=dist/Release/GNU-MacOSX/package 15 | CND_PACKAGE_NAME_Release=tpmcalculator.tar 16 | CND_PACKAGE_PATH_Release=dist/Release/GNU-MacOSX/package/tpmcalculator.tar 17 | # 18 | # include compiler specific variables 19 | # 20 | # dmake command 21 | ROOT:sh = test -f nbproject/private/Makefile-variables.mk || \ 22 | (mkdir -p nbproject/private && touch nbproject/private/Makefile-variables.mk) 23 | # 24 | # gmake command 25 | .PHONY: $(shell test -f nbproject/private/Makefile-variables.mk || (mkdir -p nbproject/private && touch nbproject/private/Makefile-variables.mk)) 26 | # 27 | include nbproject/private/Makefile-variables.mk 28 | -------------------------------------------------------------------------------- /nbproject/Package-Release.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | # 4 | # Generated - do not edit! 5 | # 6 | 7 | # Macros 8 | TOP=`pwd` 9 | CND_PLATFORM=GNU-MacOSX 10 | CND_CONF=Release 11 | CND_DISTDIR=dist 12 | CND_BUILDDIR=build 13 | CND_DLIB_EXT=dylib 14 | NBTMPDIR=${CND_BUILDDIR}/${CND_CONF}/${CND_PLATFORM}/tmp-packaging 15 | TMPDIRNAME=tmp-packaging 16 | OUTPUT_PATH=bin/TPMCalculator 17 | OUTPUT_BASENAME=TPMCalculator 18 | PACKAGE_TOP_DIR=tpmcalculator/ 19 | 20 | # Functions 21 | function checkReturnCode 22 | { 23 | rc=$? 24 | if [ $rc != 0 ] 25 | then 26 | exit $rc 27 | fi 28 | } 29 | function makeDirectory 30 | # $1 directory path 31 | # $2 permission (optional) 32 | { 33 | mkdir -p "$1" 34 | checkReturnCode 35 | if [ "$2" != "" ] 36 | then 37 | chmod $2 "$1" 38 | checkReturnCode 39 | fi 40 | } 41 | function copyFileToTmpDir 42 | # $1 from-file path 43 | # $2 to-file path 44 | # $3 permission 45 | { 46 | cp "$1" "$2" 47 | checkReturnCode 48 | if [ "$3" != "" ] 49 | then 50 | chmod $3 "$2" 51 | checkReturnCode 52 | fi 53 | } 54 | 55 | # Setup 56 | cd "${TOP}" 57 | mkdir -p ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/package 58 | rm -rf ${NBTMPDIR} 59 | mkdir -p ${NBTMPDIR} 60 | 61 | # Copy files and create directories and links 62 | cd "${TOP}" 63 | makeDirectory "${NBTMPDIR}/tpmcalculator/bin" 64 | copyFileToTmpDir "${OUTPUT_PATH}" "${NBTMPDIR}/${PACKAGE_TOP_DIR}bin/${OUTPUT_BASENAME}" 0755 65 | 66 | 67 | # Generate tar file 68 | cd "${TOP}" 69 | rm -f ${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/package/tpmcalculator.tar 70 | cd ${NBTMPDIR} 71 | tar -vcf ../../../../${CND_DISTDIR}/${CND_CONF}/${CND_PLATFORM}/package/tpmcalculator.tar * 72 | checkReturnCode 73 | 74 | # Cleanup 75 | cd "${TOP}" 76 | rm -rf ${NBTMPDIR} 77 | -------------------------------------------------------------------------------- /nbproject/configurations.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | includes/DiffExpIR.h 8 | includes/Exceptions.h 9 | includes/FastaFactory.h 10 | includes/GenomeFactory.h 11 | includes/Global.h 12 | includes/RandomFactory.h 13 | includes/ReadFactory.h 14 | includes/Sequence.h 15 | includes/Stats.h 16 | includes/TextParser.h 17 | includes/TimeUtils.h 18 | includes/bmath.h 19 | includes/bstring.h 20 | 21 | 24 | 25 | 28 | src/DiffExpIR.cpp 29 | src/FastaFactory.cpp 30 | src/RandomFactory.cpp 31 | src/ReadFactory.cpp 32 | src/Stats.cpp 33 | src/TextParser.cpp 34 | src/bd0.c 35 | src/bratio.c 36 | src/bstring.cpp 37 | src/chebyshev.c 38 | src/choose.c 39 | src/dnorm.c 40 | src/dt.c 41 | src/gamma.c 42 | src/lbeta.c 43 | src/lgamma.c 44 | src/lgammacor.c 45 | src/main.cpp 46 | src/pbeta.c 47 | src/phyper.c 48 | src/pnorm.c 49 | src/pt.c 50 | src/qnorm.c 51 | src/qt.c 52 | src/stirlerr.c 53 | src/sunif.c 54 | src/wilcox.c 55 | 56 | 60 | 61 | 65 | Makefile 66 | 67 | .gitignore 68 | Dockerfile 69 | LICENSE 70 | README.md 71 | tpmcalculator.cwl 72 | 73 | 74 | src 75 | 76 | Makefile 77 | 78 | 79 | 80 | default 81 | true 82 | false 83 | 84 | 85 | 86 | 5 87 | 3 88 | 89 | includes 90 | 91 | 92 | 93 | 5 94 | 11 95 | 96 | includes 97 | ../../bamtools/include/bamtools 98 | 99 | -g 100 | 101 | 102 | 5 103 | 104 | 105 | 5 106 | 107 | 108 | bin/TPMCalculator 109 | 110 | ../../bamtools/lib 111 | 112 | 113 | . 114 | . 115 | . 116 | . 117 | . 118 | . 119 | . 120 | . 121 | . 122 | 123 | -lbamtools -lm -lz 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | -------------------------------------------------------------------------------- /nbproject/private/Makefile-variables.mk: -------------------------------------------------------------------------------- 1 | # 2 | # Generated - do not edit! 3 | # 4 | # NOCDDL 5 | # 6 | # Release configuration 7 | -------------------------------------------------------------------------------- /nbproject/private/c_standard_headers_indexer.c: -------------------------------------------------------------------------------- 1 | /* 2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 3 | * 4 | * Copyright (c) 2016 Oracle and/or its affiliates. All rights reserved. 5 | * 6 | * Oracle and Java are registered trademarks of Oracle and/or its affiliates. 7 | * Other names may be trademarks of their respective owners. 8 | * 9 | * The contents of this file are subject to the terms of either the GNU 10 | * General Public License Version 2 only ("GPL") or the Common 11 | * Development and Distribution License("CDDL") (collectively, the 12 | * "License"). You may not use this file except in compliance with the 13 | * License. You can obtain a copy of the License at 14 | * http://www.netbeans.org/cddl-gplv2.html 15 | * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the 16 | * specific language governing permissions and limitations under the 17 | * License. When distributing the software, include this License Header 18 | * Notice in each file and include the License file at 19 | * nbbuild/licenses/CDDL-GPL-2-CP. Oracle designates this 20 | * particular file as subject to the "Classpath" exception as provided 21 | * by Oracle in the GPL Version 2 section of the License file that 22 | * accompanied this code. If applicable, add the following below the 23 | * License Header, with the fields enclosed by brackets [] replaced by 24 | * your own identifying information: 25 | * "Portions Copyrighted [year] [name of copyright owner]" 26 | * 27 | * If you wish your version of this file to be governed by only the CDDL 28 | * or only the GPL Version 2, indicate your decision by adding 29 | * "[Contributor] elects to include this software in this distribution 30 | * under the [CDDL or GPL Version 2] license." If you do not indicate a 31 | * single choice of license, a recipient has the option to distribute 32 | * your version of this file under either the CDDL, the GPL Version 2 or 33 | * to extend the choice of license to its licensees as provided above. 34 | * However, if you add GPL Version 2 code and therefore, elected the GPL 35 | * Version 2 license, then the option applies only if the new code is 36 | * made subject to such option by the copyright holder. 37 | * 38 | * Contributor(s): 39 | */ 40 | 41 | // List of standard headers was taken in http://en.cppreference.com/w/c/header 42 | 43 | #include // Conditionally compiled macro that compares its argument to zero 44 | #include // Functions to determine the type contained in character data 45 | #include // Macros reporting error conditions 46 | #include // Limits of float types 47 | #include // Sizes of basic types 48 | #include // Localization utilities 49 | #include // Common mathematics functions 50 | #include // Nonlocal jumps 51 | #include // Signal handling 52 | #include // Variable arguments 53 | #include // Common macro definitions 54 | #include // Input/output 55 | #include // String handling 56 | #include // General utilities: memory management, program utilities, string conversions, random numbers 57 | #include // Time/date utilities 58 | #include // (since C95) Alternative operator spellings 59 | #include // (since C95) Extended multibyte and wide character utilities 60 | #include // (since C95) Wide character classification and mapping utilities 61 | #ifdef _STDC_C99 62 | #include // (since C99) Complex number arithmetic 63 | #include // (since C99) Floating-point environment 64 | #include // (since C99) Format conversion of integer types 65 | #include // (since C99) Boolean type 66 | #include // (since C99) Fixed-width integer types 67 | #include // (since C99) Type-generic math (macros wrapping math.h and complex.h) 68 | #endif 69 | #ifdef _STDC_C11 70 | #include // (since C11) alignas and alignof convenience macros 71 | #include // (since C11) Atomic types 72 | #include // (since C11) noreturn convenience macros 73 | #include // (since C11) Thread library 74 | #include // (since C11) UTF-16 and UTF-32 character utilities 75 | #endif 76 | -------------------------------------------------------------------------------- /nbproject/private/configurations.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | Makefile 4 | 5 | 6 | 7 | localhost 8 | 4 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | gdb 24 | 25 | 26 | 27 | "${OUTPUT_PATH}" 28 | 29 | "${OUTPUT_PATH}" 30 | 31 | true 32 | 0 33 | 0 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /nbproject/private/cpp_standard_headers_indexer.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER. 3 | * 4 | * Copyright (c) 2016 Oracle and/or its affiliates. All rights reserved. 5 | * 6 | * Oracle and Java are registered trademarks of Oracle and/or its affiliates. 7 | * Other names may be trademarks of their respective owners. 8 | * 9 | * The contents of this file are subject to the terms of either the GNU 10 | * General Public License Version 2 only ("GPL") or the Common 11 | * Development and Distribution License("CDDL") (collectively, the 12 | * "License"). You may not use this file except in compliance with the 13 | * License. You can obtain a copy of the License at 14 | * http://www.netbeans.org/cddl-gplv2.html 15 | * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the 16 | * specific language governing permissions and limitations under the 17 | * License. When distributing the software, include this License Header 18 | * Notice in each file and include the License file at 19 | * nbbuild/licenses/CDDL-GPL-2-CP. Oracle designates this 20 | * particular file as subject to the "Classpath" exception as provided 21 | * by Oracle in the GPL Version 2 section of the License file that 22 | * accompanied this code. If applicable, add the following below the 23 | * License Header, with the fields enclosed by brackets [] replaced by 24 | * your own identifying information: 25 | * "Portions Copyrighted [year] [name of copyright owner]" 26 | * 27 | * If you wish your version of this file to be governed by only the CDDL 28 | * or only the GPL Version 2, indicate your decision by adding 29 | * "[Contributor] elects to include this software in this distribution 30 | * under the [CDDL or GPL Version 2] license." If you do not indicate a 31 | * single choice of license, a recipient has the option to distribute 32 | * your version of this file under either the CDDL, the GPL Version 2 or 33 | * to extend the choice of license to its licensees as provided above. 34 | * However, if you add GPL Version 2 code and therefore, elected the GPL 35 | * Version 2 license, then the option applies only if the new code is 36 | * made subject to such option by the copyright holder. 37 | * 38 | * Contributor(s): 39 | */ 40 | 41 | // List of standard headers was taken in http://en.cppreference.com/w/cpp/header 42 | 43 | #include // General purpose utilities: program control, dynamic memory allocation, random numbers, sort and search 44 | #include // Functions and macro constants for signal management 45 | #include // Macro (and function) that saves (and jumps) to an execution context 46 | #include // Handling of variable length argument lists 47 | #include // Runtime type information utilities 48 | #include // std::bitset class template 49 | #include // Function objects, designed for use with the standard algorithms 50 | #include // Various utility components 51 | #include // C-style time/date utilites 52 | #include // typedefs for types such as size_t, NULL and others 53 | #include // Low-level memory management utilities 54 | #include // Higher level memory management utilities 55 | #include // limits of integral types 56 | #include // limits of float types 57 | #include // standardized way to query properties of arithmetic types 58 | #include // Exception handling utilities 59 | #include // Standard exception objects 60 | #include // Conditionally compiled macro that compares its argument to zero 61 | #include // Macro containing the last error number 62 | #include // functions to determine the type contained in character data 63 | #include // functions for determining the type of wide character data 64 | #include // various narrow character string handling functions 65 | #include // various wide and multibyte string handling functions 66 | #include // std::basic_string class template 67 | #include // std::vector container 68 | #include // std::deque container 69 | #include // std::list container 70 | #include // std::set and std::multiset associative containers 71 | #include // std::map and std::multimap associative containers 72 | #include // std::stack container adaptor 73 | #include // std::queue and std::priority_queue container adaptors 74 | #include // Algorithms that operate on containers 75 | #include // Container iterators 76 | #include // Common mathematics functions 77 | #include // Complex number type 78 | #include // Class for representing and manipulating arrays of values 79 | #include // Numeric operations on values in containers 80 | #include // forward declarations of all classes in the input/output library 81 | #include // std::ios_base class, std::basic_ios class template and several typedefs 82 | #include // std::basic_istream class template and several typedefs 83 | #include // std::basic_ostream, std::basic_iostream class templates and several typedefs 84 | #include // several standard stream objects 85 | #include // std::basic_fstream, std::basic_ifstream, std::basic_ofstream class templates and several typedefs 86 | #include // std::basic_stringstream, std::basic_istringstream, std::basic_ostringstream class templates and several typedefs 87 | #include // std::strstream, std::istrstream, std::ostrstream(deprecated) 88 | #include // Helper functions to control the format or input and output 89 | #include // std::basic_streambuf class template 90 | #include // C-style input-output functions 91 | #include // Localization utilities 92 | #include // C localization utilities 93 | #include // empty header. The macros that appear in iso646.h in C are keywords in C++ 94 | #if __cplusplus >= 201103L 95 | #include // (since C++11) std::type_index 96 | #include // (since C++11) Compile-time type information 97 | #include // (since C++11) C++ time utilites 98 | #include // (since C++11) std::initializer_list class template 99 | #include // (since C++11) std::tuple class template 100 | #include // (since C++11) Nested allocator class 101 | #include // (since C++11) fixed-size types and limits of other types 102 | #include // (since C++11) formatting macros , intmax_t and uintmax_t math and conversions 103 | #include // (since C++11) defines std::error_code, a platform-dependent error code 104 | #include // (since C++11) C-style Unicode character conversion functions 105 | #include // (since C++11) std::array container 106 | #include // (since C++11) std::forward_list container 107 | #include // (since C++11) std::unordered_set and std::unordered_multiset unordered associative containers 108 | #include // (since C++11) std::unordered_map and std::unordered_multimap unordered associative containers 109 | #include // (since C++11) Random number generators and distributions 110 | #include // (since C++11) Compile-time rational arithmetic 111 | #include // (since C++11) Floating-point environment access functions 112 | #include // (since C++11) Unicode conversion facilities 113 | #include // (since C++11) Classes, algorithms and iterators to support regular expression processing 114 | #include // (since C++11) Atomic operations library 115 | #include // (since C++11)(deprecated in C++17) simply includes the header 116 | #include // (since C++11)(deprecated in C++17) simply includes the headers (until C++17) (since C++17) and : the overloads equivalent to the contents of the C header tgmath.h are already provided by those headers 117 | #include // (since C++11)(deprecated in C++17) defines one compatibility macro constant 118 | #include // (since C++11)(deprecated in C++17) defines one compatibility macro constant 119 | #include // (since C++11) std::thread class and supporting functions 120 | #include // (since C++11) mutual exclusion primitives 121 | #include // (since C++11) primitives for asynchronous computations 122 | #include // (since C++11) thread waiting conditions 123 | #endif 124 | #if __cplusplus >= 201300L 125 | #include // (since C++14) shared mutual exclusion primitives 126 | #endif 127 | #if __cplusplus >= 201500L 128 | #include // (since C++17) std::any class template 129 | #include // (since C++17) std::optional class template 130 | #include // (since C++17) std::variant class template 131 | #include // (since C++17) Polymorphic allocators and memory resources 132 | #include // (since C++17) std::basic_string_view class template 133 | #include // (since C++17) Predefined execution policies for parallel versions of the algorithms 134 | #include // (since C++17) std::path class and supporting functions 135 | #endif 136 | -------------------------------------------------------------------------------- /nbproject/private/launcher.properties: -------------------------------------------------------------------------------- 1 | # Launchers File syntax: 2 | # 3 | # [Must-have property line] 4 | # launcher1.runCommand= 5 | # [Optional extra properties] 6 | # launcher1.displayName= 7 | # launcher1.buildCommand= 8 | # launcher1.runDir= 9 | # launcher1.symbolFiles= 10 | # launcher1.env.= 11 | # (If this value is quoted with ` it is handled as a native command which execution result will become the value) 12 | # [Common launcher properties] 13 | # common.runDir= 14 | # (This value is overwritten by a launcher specific runDir value if the latter exists) 15 | # common.env.= 16 | # (Environment variables from common launcher are merged with launcher specific variables) 17 | # common.symbolFiles= 18 | # (This value is overwritten by a launcher specific symbolFiles value if the latter exists) 19 | # 20 | # In runDir, symbolFiles and env fields you can use these macroses: 21 | # ${PROJECT_DIR} - project directory absolute path 22 | # ${OUTPUT_PATH} - linker output path (relative to project directory path) 23 | # ${OUTPUT_BASENAME}- linker output filename 24 | # ${TESTDIR} - test files directory (relative to project directory path) 25 | # ${OBJECTDIR} - object files directory (relative to project directory path) 26 | # ${CND_DISTDIR} - distribution directory (relative to project directory path) 27 | # ${CND_BUILDDIR} - build directory (relative to project directory path) 28 | # ${CND_PLATFORM} - platform name 29 | # ${CND_CONF} - configuration name 30 | # ${CND_DLIB_EXT} - dynamic library extension 31 | # 32 | # All the project launchers must be listed in the file! 33 | # 34 | # launcher1.runCommand=... 35 | # launcher2.runCommand=... 36 | # ... 37 | # common.runDir=... 38 | # common.env.KEY=VALUE 39 | 40 | # launcher1.runCommand= -------------------------------------------------------------------------------- /nbproject/private/private.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 1 5 | 0 6 | 7 | 8 | 9 | 10 | 11 | file:/Users/veraalva/Work/Developer/C/ncbi/TPMCalculator/includes/GenomeFactory.h 12 | file:/Users/veraalva/Work/Developer/C/ncbi/TPMCalculator/src/ReadFactory.cpp 13 | file:/Users/veraalva/Work/Developer/C/ncbi/TPMCalculator/Dockerfile 14 | file:/Users/veraalva/Work/Developer/C/ncbi/TPMCalculator/src/main.cpp 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /nbproject/project.properties: -------------------------------------------------------------------------------- 1 | #Wed Jul 18 12:08:31 EDT 2018 2 | -------------------------------------------------------------------------------- /nbproject/project.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | org.netbeans.modules.cnd.makeproject 4 | 5 | 6 | TPMCalculator 7 | c 8 | cpp 9 | h 10 | UTF-8 11 | 12 | 13 | src 14 | 15 | 16 | 17 | Release 18 | 1 19 | 20 | 21 | 22 | false 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /src/DiffExpIR.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | /* 8 | * File: DiffExpIR.cpp 9 | * Author: veraalva 10 | * 11 | * Created on September 6, 2017, 9:50 AM 12 | */ 13 | #include 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "api/BamReader.h" 27 | 28 | #include "Global.h" 29 | #include "Exceptions.h" 30 | #include "TimeUtils.h" 31 | #include "bstring.h" 32 | #include "TextParser.h" 33 | #include "Sequence.h" 34 | #include "ReadFactory.h" 35 | #include "DiffExpIR.h" 36 | #include "Stats.h" 37 | 38 | using namespace std; 39 | using namespace ngs; 40 | using namespace genome; 41 | 42 | void DiffExpIR::calculateDiffExpIR(ReadFactory& readFactory, std::vector samples, std::string method, bool useFDR) { 43 | stats::WilcoxTest wTest; 44 | stats::TTest ttest; 45 | stats::FDRCorrection fdrCorrection; 46 | SPtrChromosomeNGS c; 47 | SPtrGeneNGS g; 48 | SPtrIsoformNGS i; 49 | SPtrFeatureNGS f; 50 | 51 | for (auto cIt : readFactory.getGenomeFactory().getChromosomes()) { 52 | c = cIt.second; 53 | // cout << "Chromosome: " << c->getId() << endl; 54 | for (auto it : c->getGenes()) { 55 | g = it; 56 | // cout << "Gene: " << g->getId(); 57 | // fflush(NULL); 58 | if (g->isProcessed()) { 59 | // cout << "\tProcessed" << endl; 60 | // fflush(NULL); 61 | double e11_TPM, e12_TPM, e21_TPM, e22_TPM; 62 | int e1_count, e2_count; 63 | e11_TPM = e21_TPM = 0.0; 64 | e12_TPM = e22_TPM = 0.0; 65 | e1_count = e2_count = 0; 66 | // cout << "\tUniquefeatures: " << g->getUniquefeatures().size() << endl; 67 | // fflush(NULL); 68 | for (auto fIt = g->getFeatures().begin(); fIt != g->getFeatures().end(); ++fIt) { 69 | f = *fIt; 70 | // cout << "\t\tFeature: " << f << endl; 71 | // fflush(NULL); 72 | if (f->getType() == "exon") { 73 | e11_TPM = e21_TPM = 0.0; 74 | e1_count = e2_count = 0; 75 | for (auto s : readFactory.getSamples()) { 76 | try { 77 | SPtrSampleData sd = f->getData().getSampleData(s); 78 | if (s.compare(0, samples[0].size(), samples[0]) == 0) { 79 | e11_TPM += sd->getTPM(); 80 | e1_count++; 81 | } else if (s.compare(0, samples[1].size(), samples[1]) == 0) { 82 | e21_TPM += sd->getTPM(); 83 | e2_count++; 84 | } 85 | } catch (exceptions::NotFoundException) { 86 | } 87 | } 88 | e11_TPM = e11_TPM / static_cast (e1_count); 89 | e21_TPM = e21_TPM / static_cast (e2_count); 90 | } else if (f->getType() == "intron") { 91 | vector x; 92 | double x_sum = 0.0; 93 | vector y; 94 | double y_sum = 0.0; 95 | for (auto s : readFactory.getSamples()) { 96 | try { 97 | SPtrSampleData sd = f->getData().getSampleData(s); 98 | if (s.compare(0, samples[0].size(), samples[0]) == 0) { 99 | double tpm = sd->getTPM(); 100 | if (tpm < 10E-5) tpm = 10E-5; 101 | x.push_back(tpm); 102 | x_sum += tpm; 103 | } else if (s.compare(0, samples[1].size(), samples[1]) == 0) { 104 | double tpm = sd->getTPM(); 105 | if (tpm < 10E-5) tpm = 10E-5; 106 | y.push_back(tpm); 107 | y_sum += tpm; 108 | } 109 | } catch (exceptions::NotFoundException) { 110 | } 111 | } 112 | for (auto eIt = fIt; eIt != g->getFeatures().end(); ++eIt) { 113 | if ((*eIt)->getType() == "exon") { 114 | e12_TPM = e22_TPM = 0.0; 115 | e1_count = e2_count = 0; 116 | for (auto s : readFactory.getSamples()) { 117 | try { 118 | SPtrSampleData sd = f->getData().getSampleData(s); 119 | if (s.compare(0, samples[0].size(), samples[0]) == 0) { 120 | e12_TPM += sd->getTPM(); 121 | e1_count++; 122 | } else if (s.compare(0, samples[1].size(), samples[1]) == 0) { 123 | e22_TPM += sd->getTPM(); 124 | e2_count++; 125 | } 126 | } catch (exceptions::NotFoundException) { 127 | } 128 | } 129 | e12_TPM = e12_TPM / static_cast (e1_count); 130 | e22_TPM = e22_TPM / static_cast (e2_count); 131 | break; 132 | } 133 | } 134 | x_sum = x_sum / x.size(); 135 | y_sum = y_sum / y.size(); 136 | double r1 = std::log2(x_sum / (e11_TPM + e12_TPM)); 137 | double r2 = std::log2(y_sum / (e21_TPM + e22_TPM)); 138 | double p; 139 | if (x.size() != 0 && y.size() != 0) { 140 | if (method == "ttest") { 141 | p = ttest.pvalue(x, y); 142 | } else { 143 | p = wTest.pvalue(x, y); 144 | } 145 | if (!std::isnan(p)) { 146 | // cout << "\t\t\tPValue: " << p << " R: " << r1 << " " << r2 << " Mean: " << x_sum << " " << y_sum << " log2: " << std::log2(x_sum / y_sum) << endl; 147 | pvalue.push_back(p); 148 | SptrDiffExpIntron d = std::make_shared(DiffExpIntron(make_pair(r1, r2), g, f, c->getId(), p, std::log2(x_sum / y_sum), x_sum, y_sum)); 149 | diffexpIRdata.push_back(d); 150 | } 151 | } 152 | } 153 | } 154 | } else { 155 | // cout << "\tNo processed" << endl; 156 | fflush(NULL); 157 | } 158 | } 159 | } 160 | 161 | if (useFDR) 162 | pvalue = fdrCorrection.fdr_correction(pvalue); 163 | } 164 | 165 | void DiffExpIR::calculateDiffExpIRUnique(ReadFactory& readFactory, std::vector samples, std::string method, bool useFDR) { 166 | stats::WilcoxTest wTest; 167 | stats::TTest ttest; 168 | stats::FDRCorrection fdrCorrection; 169 | SPtrChromosomeNGS c; 170 | SPtrGeneNGS g; 171 | SPtrIsoformNGS i; 172 | SPtrFeatureNGS f; 173 | 174 | for (auto cIt : readFactory.getGenomeFactory().getChromosomes()) { 175 | c = cIt.second; 176 | // cout << "Chromosome: " << c->getId() << endl; 177 | for (auto it : c->getGenes()) { 178 | g = it; 179 | // cout << "Gene: " << g->getId(); 180 | // fflush(NULL); 181 | if (g->isProcessed()) { 182 | // cout << "\tProcessed" << endl; 183 | // fflush(NULL); 184 | double e11_TPM, e12_TPM, e21_TPM, e22_TPM; 185 | int e1_count, e2_count; 186 | e11_TPM = e21_TPM = 0.0; 187 | e12_TPM = e22_TPM = 0.0; 188 | e1_count = e2_count = 0; 189 | // cout << "\tUniquefeatures: " << g->getUniquefeatures().size() << endl; 190 | // fflush(NULL); 191 | for (auto fIt = g->getUniqueFeatures().begin(); fIt != g->getUniqueFeatures().end(); ++fIt) { 192 | f = *fIt; 193 | // cout << "\t\tFeature: " << f << endl; 194 | // fflush(NULL); 195 | if (f->getType() == "exon") { 196 | e11_TPM = e21_TPM = 0.0; 197 | e1_count = e2_count = 0; 198 | for (auto s : readFactory.getSamples()) { 199 | try { 200 | SPtrSampleData sd = f->getData().getSampleData(s); 201 | if (s.compare(0, samples[0].size(), samples[0]) == 0) { 202 | e11_TPM += sd->getTPM(); 203 | e1_count++; 204 | } else if (s.compare(0, samples[1].size(), samples[1]) == 0) { 205 | e21_TPM += sd->getTPM(); 206 | e2_count++; 207 | } 208 | } catch (exceptions::NotFoundException) { 209 | } 210 | } 211 | e11_TPM = e11_TPM / static_cast (e1_count); 212 | e21_TPM = e21_TPM / static_cast (e2_count); 213 | } else if (f->getType() == "intron") { 214 | vector x; 215 | double x_sum = 0.0; 216 | vector y; 217 | double y_sum = 0.0; 218 | for (auto s : readFactory.getSamples()) { 219 | try { 220 | SPtrSampleData sd = f->getData().getSampleData(s); 221 | if (s.compare(0, samples[0].size(), samples[0]) == 0) { 222 | double tpm = sd->getTPM(); 223 | if (tpm < 10E-5) tpm = 10E-5; 224 | x.push_back(tpm); 225 | x_sum += tpm; 226 | } else if (s.compare(0, samples[1].size(), samples[1]) == 0) { 227 | double tpm = sd->getTPM(); 228 | if (tpm < 10E-5) tpm = 10E-5; 229 | y.push_back(tpm); 230 | y_sum += tpm; 231 | } 232 | } catch (exceptions::NotFoundException) { 233 | } 234 | } 235 | for (auto eIt = fIt; eIt != g->getUniqueFeatures().end(); ++eIt) { 236 | if ((*eIt)->getType() == "exon") { 237 | e12_TPM = e22_TPM = 0.0; 238 | e1_count = e2_count = 0; 239 | for (auto s : readFactory.getSamples()) { 240 | try { 241 | SPtrSampleData sd = f->getData().getSampleData(s); 242 | if (s.compare(0, samples[0].size(), samples[0]) == 0) { 243 | e12_TPM += sd->getTPM(); 244 | e1_count++; 245 | } else if (s.compare(0, samples[1].size(), samples[1]) == 0) { 246 | e22_TPM += sd->getTPM(); 247 | e2_count++; 248 | } 249 | } catch (exceptions::NotFoundException) { 250 | } 251 | } 252 | e12_TPM = e12_TPM / static_cast (e1_count); 253 | e22_TPM = e22_TPM / static_cast (e2_count); 254 | break; 255 | } 256 | } 257 | x_sum = x_sum / x.size(); 258 | y_sum = y_sum / y.size(); 259 | double r1 = std::log2(x_sum / (e11_TPM + e12_TPM)); 260 | double r2 = std::log2(y_sum / (e21_TPM + e22_TPM)); 261 | double p; 262 | if (x.size() != 0 && y.size() != 0) { 263 | if (method == "ttest") { 264 | p = ttest.pvalue(x, y); 265 | } else { 266 | p = wTest.pvalue(x, y); 267 | } 268 | if (!std::isnan(p)) { 269 | // cout << "\t\t\tPValue: " << p << " R: " << r1 << " " << r2 << " Mean: " << x_sum << " " << y_sum << " log2: " << std::log2(x_sum / y_sum) << endl; 270 | pvalue.push_back(p); 271 | SptrDiffExpIntron d = std::make_shared(DiffExpIntron(make_pair(r1, r2), g, f, c->getId(), p, std::log2(x_sum / y_sum), x_sum, y_sum)); 272 | diffexpIRdata.push_back(d); 273 | } 274 | } 275 | } 276 | } 277 | } else { 278 | // cout << "\tNo processed" << endl; 279 | fflush(NULL); 280 | } 281 | } 282 | } 283 | 284 | if (useFDR) 285 | pvalue = fdrCorrection.fdr_correction(pvalue); 286 | } 287 | 288 | void DiffExpIR::printDiffExpIR(std::string output_name, double fc_cutoff, double pvalue_cutoff, double r_cutoff) { 289 | ofstream out_file; 290 | 291 | out_file.open(output_name); 292 | out_file << "GeneId\tChr\tStart\tEnd\tIntron_Start\tIntron_End\tLog2TPMRatio\tTPM_1\tTPM_2\tminusLog10PValue\tPValue\tRValue_1\tRValue_2" << endl; 293 | for (unsigned int ind = 0; ind != pvalue.size(); ind++) { 294 | SptrDiffExpIntron d = diffexpIRdata[ind]; 295 | 296 | if (std::isfinite(d->getRvalueFirst()) && std::isfinite(d->getRvalueSecond())) { 297 | if (d->getRvalueFirst() >= r_cutoff || d->getRvalueSecond() >= r_cutoff) { 298 | if (pvalue[ind] <= pvalue_cutoff && std::fabs(d->getLog2TPMRatio()) >= fc_cutoff) { 299 | double minusLog10PValue = -1.0 * std::log10(pvalue[ind]); 300 | SPtrGeneNGS g = d->getGene(); 301 | SPtrFeatureNGS f = d->getIntron(); 302 | out_file << g->getId() << "\t" << d->getChr() << "\t" << (g->getStart() + 1) << "\t" << (g->getEnd() + 1) << "\t"; 303 | out_file << (f->getStart() + 1) << "\t" << (f->getEnd() + 1) << "\t"; 304 | out_file << d->getLog2TPMRatio() << "\t" << d->getTPM_1() << "\t" << d->getTPM_2() << "\t"; 305 | out_file << minusLog10PValue << "\t" << pvalue[ind] << "\t" << d->getRvalueFirst() << "\t" << d->getRvalueSecond() << endl; 306 | } 307 | } 308 | } 309 | } 310 | out_file.close(); 311 | } 312 | 313 | -------------------------------------------------------------------------------- /src/FastaFactory.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * File: FastaFactory.cpp 3 | * Author: veraalva 4 | * 5 | * Created on February 10, 2016, 3:41 PM 6 | */ 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "Global.h" 26 | #include "Exceptions.h" 27 | #include "TimeUtils.h" 28 | #include "TextParser.h" 29 | #include "Sequence.h" 30 | #include "FastaFactory.h" 31 | 32 | using namespace std; 33 | using namespace parsers; 34 | using namespace sequence; 35 | using namespace formats; 36 | 37 | long unsigned int FastaFactory::parseDNAFastaFile(sequence::DNAContainer &seqContainer, std::string fName, bool binary) { 38 | int numberSeqCurrentRead = 0; 39 | uint64_t i, len; 40 | string id; 41 | std::shared_ptr dna; 42 | pair < SPtrDNA, bool> result; 43 | 44 | if (binary) { 45 | ifstream inFile(fName, std::ifstream::binary); 46 | inFile.read((char *) &i, sizeof (uint64_t)); 47 | for (unsigned long int j = 0; j < i; j++) { 48 | inFile.read((char *) &len, sizeof (uint64_t)); 49 | id.resize(len); 50 | inFile.read(&(id[0]), len); 51 | result = seqContainer.addElement(id); 52 | if (result.second) { 53 | dna = result.first; 54 | } else { 55 | cerr << "Duplicated sequence ID " << id << endl; 56 | exit(-1); 57 | } 58 | dna->setId(id); 59 | inFile.read((char *) &len, sizeof (uint64_t)); 60 | dna->getSeq().resize(len); 61 | inFile.read(&(dna->getSeq()[0]), len); 62 | numberSeqCurrentRead++; 63 | } 64 | inFile.close(); 65 | } else { 66 | TextParser fParser; 67 | try { 68 | fParser.setFileToParse(fName); 69 | while (fParser.iterate("#")) { 70 | string line = fParser.getLine(); 71 | if (fParser.lineStartWith(">")) { 72 | id = line.substr(1, fParser.getLine().size() - 1); 73 | result = seqContainer.addElement(id); 74 | if (result.second) { 75 | dna = result.first; 76 | } else { 77 | cerr << "Duplicated sequence ID " << id << endl; 78 | exit(-1); 79 | } 80 | dna->setId(id); 81 | numberSeqCurrentRead++; 82 | } else { 83 | if (dna == nullptr) { 84 | cerr << "Fasta file does not start with the header (>)" << endl; 85 | exit(-1); 86 | } 87 | dna->getSeq().append(line); 88 | } 89 | } 90 | } catch (exceptions::FileHandledException) { 91 | cerr << "Error parsing file: " << fName << endl; 92 | exit(-1); 93 | } catch (ios::failure) { 94 | cerr << "Error parsing file: " << fName << endl; 95 | exit(-1); 96 | } 97 | } 98 | 99 | if (Global::instance()->isDebug3()) { 100 | cout << "\tDEBUG3 ==> " << seqContainer.size() << " sequences in the container." << endl; 101 | for (auto it = seqContainer.getContainer().begin(); it != seqContainer.getContainer().end(); ++it) { 102 | dna = it->second; 103 | cout << "\tDEBUG3 ==>\t\t" << dna->getId() << " with " << dna->getLength() << " bp" << endl; 104 | } 105 | } 106 | 107 | return numberSeqCurrentRead; 108 | } 109 | 110 | void FastaFactory::parseDNAFastaInDirectory(sequence::DNAContainer &seqContainer, std::string dirName, std::string prefix, std::string sufix, bool binary) { 111 | struct dirent *dp; 112 | TimeUtils tUtil; 113 | DIR *dirp = (DIR *) opendir(dirName.c_str()); 114 | if (!dirp) { 115 | cerr << "Can't open directory: " << dirName << endl; 116 | exit(-1); 117 | } 118 | 119 | while ((dp = readdir(dirp)) != NULL) { 120 | bool read = false; 121 | string fName(dp->d_name); 122 | if (Global::instance()->isDebug3()) { 123 | cout << "\tDEBUG3 ==> Found file: " << fName << endl; 124 | } 125 | if (fName[0] != '.') { 126 | if (prefix.empty() && sufix.empty()) { 127 | read = true; 128 | } else { 129 | if (!prefix.empty() && sufix.empty()) { 130 | if (prefix.size() <= fName.size() && 131 | fName.compare(0, prefix.size(), prefix) == 0) read = true; 132 | } else if (prefix.empty() && !sufix.empty()) { 133 | if (sufix.size() <= fName.size() && 134 | fName.compare(fName.size() - sufix.size(), sufix.size(), sufix) == 0) read = true; 135 | } else if (!prefix.empty() && !sufix.empty()) { 136 | if (prefix.size() <= fName.size() && 137 | sufix.size() <= fName.size() && 138 | fName.compare(0, prefix.size(), prefix) == 0 && 139 | fName.compare(fName.size() - sufix.size(), sufix.size(), sufix) == 0) read = true; 140 | } 141 | } 142 | } 143 | if (read) { 144 | if (Global::instance()->isInfo()) { 145 | tUtil.setTime(); 146 | cout << "\tINFO ==> Parsing file: " << dirName + "/" + fName << endl; 147 | } 148 | int seqs = parseDNAFastaFile(seqContainer, dirName + "/" + fName, binary); 149 | if (Global::instance()->isInfo()) { 150 | cout << "\tINFO ==> " << seqs << " sequences read in " << tUtil.getElapseTimeSec() << " sec" << endl; 151 | } 152 | } 153 | } 154 | closedir(dirp); 155 | } 156 | 157 | void FastaFactory::writeDNASequencesToFile(sequence::DNAContainer &seqContainer, std::string fileName, bool binary) { 158 | uint64_t i, len; 159 | DNA *dna; 160 | 161 | if (Global::instance()->isDebug3()) { 162 | cout << "\tDEBUG3 ==> " << seqContainer.size() << " sequences in the container to write" << endl; 163 | } 164 | 165 | if (binary) { 166 | std::ofstream outputFile(fileName, std::ofstream::binary); 167 | if (!outputFile.is_open()) { 168 | cerr << "Can't open output file " << fileName << endl; 169 | exit(-1); 170 | } 171 | i = seqContainer.size(); 172 | outputFile.write((char *) &i, sizeof (uint64_t)); 173 | for (auto it = seqContainer.getContainer().begin(); it != seqContainer.getContainer().end(); ++it) { 174 | dna = it->second.get(); 175 | if (Global::instance()->isDebug3()) { 176 | cout << "\tDEBUG3 ==> Writing sequence: " << dna->getId() << " with length " << dna->getLength() << endl; 177 | } 178 | len = dna->getId().size(); 179 | outputFile.write((char *) &len, sizeof (uint64_t)); 180 | outputFile.write(dna->getId().c_str(), len); 181 | len = dna->getLength(); 182 | outputFile.write((char *) &len, sizeof (uint64_t)); 183 | outputFile.write(dna->getSeq().c_str(), len); 184 | } 185 | outputFile.close(); 186 | } else { 187 | ofstream outputFile(fileName); 188 | if (!outputFile.is_open()) { 189 | cerr << "Can't open output file " << fileName << endl; 190 | exit(-1); 191 | } 192 | for (auto it = seqContainer.getContainer().begin(); it != seqContainer.getContainer().end(); ++it) { 193 | dna = it->second.get(); 194 | if (Global::instance()->isDebug3()) { 195 | cout << "\tDEBUG3 ==> Writing sequence: " << dna->getId() << " with length " << dna->getLength() << endl; 196 | } 197 | outputFile << ">" << dna->getId() << endl; 198 | for (i = 0; i < dna->getLength(); i += 50) { 199 | char t = 0; 200 | if (i + 50 < dna->getLength()) { 201 | t = dna->getSeq()[i + 50]; 202 | dna->getSeq()[i + 50] = 0; 203 | } 204 | outputFile << (dna->getSeq().c_str() + i) << endl; 205 | if (i + 50 < dna->getLength()) { 206 | dna->getSeq()[i + 50] = t; 207 | } 208 | } 209 | } 210 | outputFile.close(); 211 | } 212 | 213 | } 214 | -------------------------------------------------------------------------------- /src/RandomFactory.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * File: RandomFactory.cpp 3 | * Author: veraalva 4 | * 5 | * Created on May 8, 2017, 10:49 AM 6 | */ 7 | 8 | #include 9 | 10 | #include "RandomFactory.h" 11 | 12 | uint32 Random::DrawNumber(uint32 min, uint32 max) { 13 | return std::uniform_int_distribution{min, max}(eng); 14 | } 15 | -------------------------------------------------------------------------------- /src/Stats.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * File: Stats.cpp 3 | * Author: veraalva 4 | * 5 | * Created on June 9, 2017, 4:22 PM 6 | */ 7 | 8 | /* 9 | * File: FastaFactory.cpp 10 | * Author: veraalva 11 | * 12 | * Created on February 10, 2016, 3:41 PM 13 | */ 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "bmath.h" 25 | #include "Global.h" 26 | #include "Exceptions.h" 27 | #include "Stats.h" 28 | 29 | using namespace std; 30 | using namespace stats; 31 | 32 | vector vector_rank(vector& r) { 33 | long unsigned int j = 0; 34 | vector in; 35 | vector rk(r.begin(), r.end()); 36 | 37 | std::sort(rk.begin(), rk.end()); 38 | for (auto it = rk.begin(); it != rk.end(); ++it) { 39 | for (auto it1 = r.begin(); it1 != r.end(); ++it1) { 40 | if (*it == *it1) { 41 | long unsigned int index = static_cast (it1 - r.begin()); 42 | if (std::find(in.begin(), in.end(), index) == in.end()) { 43 | in.push_back(index); 44 | } 45 | } 46 | } 47 | } 48 | for (long unsigned int i = 0; i < r.size(); i = j + 1) { 49 | j = i; 50 | while ((j < in.size() - 1) && r[in[j]] == r[in[j + 1]]) j++; 51 | for (long unsigned int k = i; k <= j; k++) { 52 | if (in[k] < rk.size()) 53 | rk[in[k]] = (i + j + 2) / 2.; 54 | } 55 | } 56 | return rk; 57 | } 58 | 59 | double sum_nties(vector r, vector r_unique) { 60 | double s = 0.0; 61 | for (long unsigned int i = 0; i < r_unique.size(); i++) { 62 | double c = 0; 63 | for (long unsigned int j = 0; j < r.size(); j++) { 64 | if (r_unique[i] == r[j]) c++; 65 | } 66 | s += (std::pow(c, 3) - c); 67 | } 68 | return s; 69 | } 70 | 71 | double sign(double x) { 72 | if (x > 0) return 1; 73 | if (x < 0) return -1; 74 | return 0; 75 | 76 | } 77 | 78 | double Stats::variance(std::vector& x, double x_mean) { 79 | double var = 0; 80 | for (auto e : x) { 81 | var += ((e - x_mean)*(e - x_mean)); 82 | } 83 | return (var / static_cast (x.size() - 1)); 84 | } 85 | 86 | double Stats::variance(std::vector& x) { 87 | double mx = 0.0; 88 | for (auto e : x) { 89 | mx += e; 90 | } 91 | mx = mx / static_cast (x.size()); 92 | return variance(x, mx); 93 | } 94 | 95 | double TTest::pvalue(std::vector& x, std::vector& y) { 96 | if (x.empty() or y.empty()) { 97 | throw new exceptions::EmptyDatasetException("Your dataset is empty"); 98 | } 99 | bool equal = (x.size() == y.size()) ? true : false; 100 | double pval = NAN; 101 | double mx = 0.0; 102 | double my = 0.0; 103 | double df, stderr; 104 | Stats stats; 105 | 106 | for (auto e : x) { 107 | if (!std::isnan(e)) mx += e; 108 | } 109 | mx = mx / x.size(); 110 | for (auto e : y) { 111 | if (!std::isnan(e)) my += e; 112 | } 113 | my = my / y.size(); 114 | 115 | double vx = stats.variance(x); 116 | double vy = stats.variance(y); 117 | 118 | if (equal) { 119 | df = static_cast (x.size() + y.size() - 2); 120 | double v = 0; 121 | if (x.size() > 1) v = v + static_cast (x.size() - 1) * vx; 122 | if (y.size() > 1) v = v + static_cast (y.size() - 1) * vy; 123 | v = v / df; 124 | stderr = std::sqrt(v * (1 / static_cast (x.size()) + 1 / static_cast (y.size()))); 125 | } else { 126 | double stderrx = std::sqrt(vx / static_cast (x.size())); 127 | double stderry = std::sqrt(vy / static_cast (y.size())); 128 | stderr = std::sqrt(std::pow(stderrx, 2) + std::pow(stderry, 2)); 129 | df = std::pow(stderr, 4) / (std::pow(stderrx, 4) / static_cast (x.size() - 1) + std::pow(stderry, 4) / static_cast (y.size() - 1)); 130 | } 131 | double tstat = (mx - my) / stderr; 132 | pval = 2 * pt(-std::abs(tstat), df, 1, 0); 133 | return pval; 134 | } 135 | 136 | double WilcoxTest::pvalue(std::vector& x_in, std::vector& y_in) { 137 | if (x_in.empty() or y_in.empty()) { 138 | throw new exceptions::EmptyDatasetException("Your dataset is empty"); 139 | } 140 | vector x, y, r; 141 | for (auto v : x_in) 142 | if (!std::isnan(v)) { 143 | x.push_back(v); 144 | r.push_back(v); 145 | } 146 | for (auto v : y_in) { 147 | if (!std::isnan(v)) { 148 | y.push_back(v); 149 | r.push_back(v); 150 | } 151 | } 152 | 153 | // cout << "Len(x): " << x.size() << endl; 154 | // cout << "Len(y): " << y.size() << endl; 155 | // cout << "Len(r): " << r.size() << endl; 156 | 157 | bool exact = (x.size() < 50) && (y.size() < 50); 158 | double p = NAN; 159 | double stats = 0.0; 160 | 161 | r = vector_rank(r); 162 | for (long unsigned int i = 0; i < x.size(); i++) stats += r[i]; 163 | // cout << "stats 1: " << stats << endl; 164 | 165 | stats -= static_cast (x.size()) * (static_cast (x.size()) + 1) / 2; 166 | // cout << "stats 2: " << stats << endl; 167 | 168 | vector r_unique(r.begin(), r.end()); 169 | std::sort(r_unique.begin(), r_unique.end()); 170 | auto last = std::unique(r_unique.begin(), r_unique.end()); 171 | r_unique.erase(last, r_unique.end()); 172 | bool ties = (r.size() != r_unique.size()); 173 | 174 | // cout << "Len(unique(r)): " << r_unique.size() << endl; 175 | // cout << "Is exact: " << exact << endl; 176 | // cout << "TIES: " << ties << endl; 177 | // fflush(NULL); 178 | 179 | if (exact && !ties) { 180 | if (stats > (static_cast (x.size()) * static_cast (y.size()) / 2.0)) { 181 | p = pwilcox(stats - 1, static_cast (x.size()), static_cast (y.size()), false, false); 182 | } else { 183 | p = pwilcox(stats, static_cast (x.size()), static_cast (y.size()), true, false); 184 | } 185 | if (std::isnan(p)) return NAN; 186 | p = min(2 * p, 1); 187 | } else { 188 | double z = stats - static_cast (x.size()) * static_cast (y.size()) / 2.0; 189 | double sigma = sqrt((static_cast (x.size()) * static_cast (y.size()) / 12) \ 190 | * ((static_cast (x.size()) + static_cast (y.size()) + 1) - \ 191 | sum_nties(r, r_unique) / ((static_cast (x.size()) + static_cast (y.size())) \ 192 | * (static_cast (x.size()) + static_cast (y.size()) - 1)))); 193 | double correction = sign(z) * 0.5; 194 | // printf("Z: %f\n", z); 195 | // printf("SIGMA: %f\n", sigma); 196 | // printf("CORRECTION: %f\n", correction); 197 | z = (z - correction) / sigma; 198 | // printf("Z: %f\n", z); 199 | if (std::isnan(z)){ 200 | // cout << "PValue: NaN" << endl; 201 | return NAN; 202 | } 203 | p = 2 * min(pnorm5(z, 0, 1, true, false), pnorm5(z, 0, 1, false, false)); 204 | // cout << "PValue: " << p << endl; 205 | } 206 | return p; 207 | } 208 | 209 | vector cummin(vector & c) { 210 | vector cmin; 211 | double min = INFINITY; 212 | for (auto it = c.begin(); it != c.end(); ++it) { 213 | if (std::isnan(*it) || std::isnan(min)) 214 | min = min + *it; /* propagate NA and NaN */ 215 | else 216 | min = (min < *it) ? min : *it; 217 | cmin.push_back(min); 218 | } 219 | return cmin; 220 | } 221 | 222 | std::vector sorted_order(std::vector &x, bool decreasing) { 223 | std::vector y(x.size()); 224 | std::size_t n(0); 225 | std::generate(std::begin(y), std::end(y), [&] { 226 | return n++; }); 227 | 228 | if (decreasing) { 229 | std::sort(std::begin(y), 230 | std::end(y), 231 | [&](int i1, int i2) { 232 | return x[i1] > x[i2]; }); 233 | } else { 234 | std::sort(std::begin(y), 235 | std::end(y), 236 | [&](int i1, int i2) { 237 | return x[i1] < x[i2]; }); 238 | } 239 | return y; 240 | } 241 | 242 | std::vector sorted_order(std::vector &x, bool decreasing) { 243 | std::vector y(x.size()); 244 | std::size_t n(0); 245 | std::generate(std::begin(y), std::end(y), [&] { 246 | return n++; }); 247 | 248 | if (decreasing) { 249 | std::sort(std::begin(y), 250 | std::end(y), 251 | [&](int i1, int i2) { 252 | return x[i1] > x[i2]; }); 253 | } else { 254 | std::sort(std::begin(y), 255 | std::end(y), 256 | [&](int i1, int i2) { 257 | return x[i1] < x[i2]; }); 258 | } 259 | return y; 260 | } 261 | 262 | /** 263 | * Calculates FDR correction. 264 | * It assumes no NAN are included 265 | * 266 | * @param c Vector of floats (P-Values) 267 | * @return 268 | */ 269 | vector FDRCorrection::fdr_correction(vector & c) { 270 | vector fdr; 271 | vector f; 272 | int n = c.size(); 273 | vector c_indexes_sorted = sorted_order(c, true); 274 | vector rc_indexes_sorted = sorted_order(c_indexes_sorted, false); 275 | 276 | int i = c.size(); 277 | for (auto v : c_indexes_sorted) { 278 | double m = static_cast (n) / static_cast (i) * c[v]; 279 | m = (m > 1) ? 1 : m; 280 | f.push_back(m); 281 | i--; 282 | } 283 | for (auto v : rc_indexes_sorted) { 284 | fdr.push_back(f[v]); 285 | } 286 | return fdr; 287 | } 288 | 289 | 290 | -------------------------------------------------------------------------------- /src/TextParser.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * File: FileParserFactory.cpp 3 | * Author: veraalva 4 | * 5 | * Created on April 11, 2016, 12:50 PM 6 | */ 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include "Exceptions.h" 20 | #include "TextParser.h" 21 | #include "bstring.h" 22 | 23 | using namespace std; 24 | using namespace parsers; 25 | 26 | TextParser::TextParser() { 27 | this->closeFile = false; 28 | this->bufferSize = 10000000; 29 | this->currPosition = 0; 30 | this->backup = false; 31 | } 32 | 33 | TextParser::~TextParser() { 34 | if (this->closeFile && this->fileToParse.is_open()) this->fileToParse.close(); 35 | } 36 | 37 | void TextParser::clean() { 38 | if (this->closeFile && this->fileToParse.is_open()) this->fileToParse.close(); 39 | this->closeFile = false; 40 | this->words.clear(); 41 | this->line.erase(); 42 | this->backup = false; 43 | this->bufferSize = 10000000; 44 | this->buffer.erase(); 45 | this->currPosition = 0; 46 | } 47 | 48 | bool TextParser::iterate(std::string dontStartWith) { 49 | if (!fileToParse.is_open()) { 50 | throw exceptions::FileHandledException("Can't do an iteration in a NULL file"); 51 | } 52 | if (currPosition == std::string::npos) return false; 53 | line.clear(); 54 | while (1) { 55 | if (currPosition == 0) fileToParse.read(&buffer[0], bufferSize); 56 | size_t pos = buffer.find_first_of("\n", currPosition); 57 | // cout << "Pos: " << pos << " size: " << static_cast (fileToParse.gcount()) << endl; 58 | if (pos != std::string::npos && pos < static_cast (fileToParse.gcount())) { 59 | if (backup) line += buffer.substr(currPosition, pos - currPosition); 60 | else line = buffer.substr(currPosition, pos - currPosition); 61 | backup = false; 62 | currPosition = pos + 1; 63 | if (!lineStartWith(dontStartWith)) { 64 | // cout << "LINE: [" << line << "]" << endl; 65 | return true; 66 | } 67 | } else { 68 | if (!backup) { 69 | line.clear(); 70 | backup = true; 71 | } 72 | line += buffer.substr(currPosition, fileToParse.gcount() - currPosition); 73 | if (fileToParse.eof()) { 74 | currPosition = std::string::npos; 75 | if (!lineStartWith(dontStartWith) && 76 | !lineStartWith("\n") && 77 | !line.empty()) return true; 78 | return false; 79 | } 80 | currPosition = 0; 81 | } 82 | } 83 | return false; 84 | } 85 | 86 | bool TextParser::iterate(std::string dontStartWith, std::string delimiters) { 87 | while (iterate(dontStartWith)) { 88 | BString::split(line, delimiters, words); 89 | return true; 90 | } 91 | return false; 92 | } 93 | 94 | 95 | -------------------------------------------------------------------------------- /src/bd0.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "bmath.h" 7 | 8 | double bd0(double x, double np) { 9 | double ej, s, s1, v; 10 | int j; 11 | 12 | if (!R_FINITE(x) || !R_FINITE(np) || np == 0.0) ML_ERR_return_NAN; 13 | 14 | if (fabs(x - np) < 0.1 * (x + np)) { 15 | v = (x - np) / (x + np); // might underflow to 0 16 | s = (x - np) * v; /* s using v -- change by MM */ 17 | if (fabs(s) < DBL_MIN) return s; 18 | ej = 2 * x*v; 19 | v = v*v; 20 | for (j = 1; j < 1000; j++) { /* Taylor series; 1000: no infinite loop 21 | as |v| < .1, v^2000 is "zero" */ 22 | ej *= v; // = v^(2j+1) 23 | s1 = s + ej / ((j << 1) + 1); 24 | if (s1 == s) /* last term was effectively 0 */ 25 | return s1; 26 | s = s1; 27 | } 28 | } 29 | /* else: | x - np | is not too small */ 30 | return (x * log(x / np) + np - x); 31 | } 32 | -------------------------------------------------------------------------------- /src/bstring.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * File: cString.cpp 3 | * Author: veraalva 4 | * 5 | * Created on April 13, 2016, 3:47 PM 6 | */ 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | 19 | #include "bstring.h" 20 | 21 | using namespace std; 22 | 23 | BString::BString() { 24 | } 25 | 26 | BString::~BString() { 27 | } 28 | 29 | /** 30 | * Shuffle the string 31 | * @param str string to be shuffled 32 | * @return string 33 | */ 34 | std::string BString::shuffle(std::string str) { 35 | unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); 36 | std::shuffle(str.begin(), str.end(), std::default_random_engine(seed)); 37 | return str; 38 | } 39 | 40 | /** 41 | * Count the number of occurrences of characters in c in the string str 42 | * 43 | * @param str the string to count on 44 | * @param c the characters to be counted 45 | * @return the number of occurrences 46 | */ 47 | int BString::countCharacter(std::string str, std::string characters) { 48 | int count = 0; 49 | for (auto it = str.begin(); it < str.end(); ++it) { 50 | for (auto it1 = characters.begin(); it1 < characters.end(); ++it1) { 51 | if (*it == *it1) count++; 52 | } 53 | } 54 | return count; 55 | } 56 | 57 | /** 58 | * Split string in a vector of strings using a delimiter 59 | * @param s string to be split 60 | * @param delim delimiter 61 | * @param elems vector with result 62 | * @return vector with result 63 | */ 64 | std::vector &BString::split(const std::string &s, std::string delim, std::vector &elems) { 65 | std::size_t prev = 0, pos; 66 | elems.clear(); 67 | while ((pos = s.find_first_of(delim, prev)) != std::string::npos) { 68 | if (pos > prev) 69 | elems.push_back(s.substr(prev, pos - prev)); 70 | prev = pos + 1; 71 | } 72 | if (prev < s.length()) 73 | elems.push_back(s.substr(prev, std::string::npos)); 74 | return elems; 75 | } 76 | 77 | /** 78 | * Split string in a set of strings using a delimiter 79 | * @param s string to be split 80 | * @param delim delimiter 81 | * @param elems set with result 82 | * @return set with result 83 | */ 84 | std::set &BString::split(const std::string &s, std::string delim, std::set &elems) { 85 | std::size_t prev = 0, pos; 86 | elems.clear(); 87 | while ((pos = s.find_first_of(delim, prev)) != std::string::npos) { 88 | if (pos > prev) 89 | elems.insert(s.substr(prev, pos - prev)); 90 | prev = pos + 1; 91 | } 92 | if (prev < s.length()) 93 | elems.insert(s.substr(prev, std::string::npos)); 94 | return elems; 95 | } -------------------------------------------------------------------------------- /src/chebyshev.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "bmath.h" 4 | 5 | double chebyshev_eval(double x, const double *a, const int n) { 6 | double b0, b1, b2, twox; 7 | int i; 8 | 9 | if (n < 1 || n > 1000) ML_ERR_return_NAN; 10 | 11 | if (x < -1.1 || x > 1.1) ML_ERR_return_NAN; 12 | 13 | twox = x * 2; 14 | b2 = b1 = 0; 15 | b0 = 0; 16 | for (i = 1; i <= n; i++) { 17 | b2 = b1; 18 | b1 = b0; 19 | b0 = twox * b1 - b2 + a[n - i]; 20 | } 21 | return (b0 - b2) * 0.5; 22 | } 23 | 24 | -------------------------------------------------------------------------------- /src/choose.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "bmath.h" 4 | 5 | /* These are recursive, so we should do a stack check */ 6 | 7 | #ifndef MATHLIB_STANDALONE 8 | void R_CheckStack(void); 9 | #endif 10 | 11 | double lfastchoose(double n, double k) { 12 | return -1.0* log(n + 1.0) - lbeta(n - k + 1.0, k + 1.0); 13 | } 14 | 15 | /* mathematically the same: 16 | less stable typically, but useful if n-k+1 < 0 : */ 17 | static double lfastchoose2(double n, double k, int *s_choose) { 18 | double r; 19 | r = lgammafn_sign(n - k + 1., s_choose); 20 | return lgammafn(n + 1.) - lgammafn(k + 1.) - r; 21 | } 22 | 23 | double lchoose(double n, double k) { 24 | k = R_forceint(k); 25 | #ifdef IEEE_754 26 | /* NaNs propagated correctly */ 27 | if (ISNAN(n) || ISNAN(k)) return n + k; 28 | #endif 29 | if (k < 2) { 30 | if (k < 0) return -INFINITY; 31 | if (k == 0) return 0.; 32 | /* else: k == 1 */ 33 | return log(fabs(n)); 34 | } 35 | /* else: k >= 2 */ 36 | if (n < 0) { 37 | return lchoose(-n + k - 1, k); 38 | } else if (R_IS_INT(n)) { 39 | n = R_forceint(n); 40 | if (n < k) return -INFINITY; 41 | /* k <= n :*/ 42 | if (n - k < 2) return lchoose(n, n - k); /* <- Symmetry */ 43 | /* else: n >= k+2 */ 44 | return lfastchoose(n, k); 45 | } 46 | /* else non-integer n >= 0 : */ 47 | if (n < k - 1) { 48 | int s; 49 | return lfastchoose2(n, k, &s); 50 | } 51 | return lfastchoose(n, k); 52 | } 53 | 54 | #define k_small_max 30 55 | 56 | /* 30 is somewhat arbitrary: it is on the *safe* side: 57 | * both speed and precision are clearly improved for k < 30. 58 | */ 59 | double choose(double n, double k) { 60 | double r; 61 | k = R_forceint(k); 62 | #ifdef IEEE_754 63 | /* NaNs propagated correctly */ 64 | if (ISNAN(n) || ISNAN(k)) return n + k; 65 | #endif 66 | if (k < k_small_max) { 67 | int j; 68 | if (n - k < k && n >= 0 && R_IS_INT(n)) k = n - k; /* <- Symmetry */ 69 | if (k < 0) return 0.; 70 | if (k == 0) return 1.; 71 | /* else: k >= 1 */ 72 | r = n; 73 | for (j = 2; j <= k; j++) 74 | r *= (n - j + 1) / j; 75 | return R_IS_INT(n) ? R_forceint(r) : r; 76 | /* might have got rounding errors */ 77 | } 78 | /* else: k >= k_small_max */ 79 | if (n < 0) { 80 | r = choose(-n + k - 1, k); 81 | if (ODD(k)) r = -r; 82 | return r; 83 | } else if (R_IS_INT(n)) { 84 | n = R_forceint(n); 85 | if (n < k) return 0.0; 86 | if (n - k < k_small_max) return choose(n, n - k); /* <- Symmetry */ 87 | return R_forceint(exp(lfastchoose(n, k))); 88 | } 89 | /* else non-integer n >= 0 : */ 90 | if (n < k - 1) { 91 | int s_choose; 92 | r = lfastchoose2(n, k, /* -> */ &s_choose); 93 | return s_choose * exp(r); 94 | } 95 | return exp(lfastchoose(n, k)); 96 | } 97 | -------------------------------------------------------------------------------- /src/dnorm.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "bmath.h" 7 | 8 | double dnorm4(double x, double mu, double sigma, int give_log) { 9 | #ifdef IEEE_754 10 | if (ISNAN(x) || ISNAN(mu) || ISNAN(sigma)) 11 | return x + mu + sigma; 12 | #endif 13 | if (!R_FINITE(sigma)) return R_D__0; 14 | if (!R_FINITE(x) && mu == x) return ML_NAN; /* x-mu is NaN */ 15 | if (sigma <= 0.0) { 16 | if (sigma < 0) ML_ERR_return_NAN; 17 | /* sigma == 0 */ 18 | return (x == mu) ? ML_POSINF : R_D__0; 19 | } else { 20 | x = (x - mu) / sigma; 21 | } 22 | if (!R_FINITE(x)) return R_D__0; 23 | 24 | x = fabs(x); 25 | if (x >= 2 * sqrt(DBL_MAX)) return R_D__0; 26 | if (give_log) 27 | return -(M_LN_SQRT_2PI + 0.5 * x * x + log(sigma)); 28 | // M_1_SQRT_2PI = 1 / sqrt(2 * pi) 29 | // more accurate, less fast : 30 | if (x < 5 && fabs(sigma) > 1.0e-15) return M_1_SQRT_2PI * exp(-0.5 * x * x) / sigma; 31 | 32 | /* ELSE: 33 | 34 | * x*x may lose upto about two digits accuracy for "large" x 35 | * Morten Welinder's proposal for PR#15620 36 | * https://bugs.r-project.org/bugzilla/show_bug.cgi?id=15620 37 | 38 | * -- 1 -- No hoop jumping when we underflow to zero anyway: 39 | 40 | * -x^2/2 < log(2)*.Machine$double.min.exp <==> 41 | * x > sqrt(-2*log(2)*.Machine$double.min.exp) =IEEE= 37.64031 42 | * but "thanks" to denormalized numbers, underflow happens a bit later, 43 | * effective.D.MIN.EXP <- with(.Machine, double.min.exp + double.ulp.digits) 44 | * for IEEE, DBL_MIN_EXP is -1022 but "effective" is -1074 45 | * ==> boundary = sqrt(-2*log(2)*(.Machine$double.min.exp + .Machine$double.ulp.digits)) 46 | * =IEEE= 38.58601 47 | * [on one x86_64 platform, effective boundary a bit lower: 38.56804] 48 | */ 49 | if (x > sqrt(-2 * M_LN2 * (DBL_MIN_EXP + 1 - DBL_MANT_DIG))) return 0.; 50 | 51 | /* Now, to get full accurary, split x into two parts, 52 | * x = x1+x2, such that |x2| <= 2^-16. 53 | * Assuming that we are using IEEE doubles, that means that 54 | * x1*x1 is error free for x<1024 (but we have x < 38.6 anyway). 55 | 56 | * If we do not have IEEE this is still an improvement over the naive formula. 57 | */ 58 | double x1 = // R_forceint(x * 65536) / 65536 = 59 | ldexp(R_forceint(ldexp(x, 16)), -16); 60 | double x2 = x - x1; 61 | double result; 62 | if (fabs(sigma) > 1e-25) 63 | result = M_1_SQRT_2PI / sigma * 64 | (exp(-0.5 * x1 * x1) * exp((-0.5 * x2 - x1) * x2)); 65 | else 66 | result = NAN; 67 | return result; 68 | } 69 | -------------------------------------------------------------------------------- /src/dt.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "bmath.h" 7 | 8 | double dt(double x, double n, int give_log) { 9 | #ifdef IEEE_754 10 | if (ISNAN(x) || ISNAN(n)) 11 | return x + n; 12 | #endif 13 | if (n <= 0) ML_ERR_return_NAN; 14 | if (!R_FINITE(x)) 15 | return R_D__0; 16 | if (!R_FINITE(n)) 17 | return dnorm(x, 0., 1., give_log); 18 | 19 | double u, t = -bd0(n / 2., (n + 1) / 2.) + stirlerr((n + 1) / 2.) - stirlerr(n / 2.); 20 | double x2n = INFINITY; 21 | if (fabs(n) > 1e-25) 22 | x2n = x * x / n; // in [0, Inf] 23 | double ax = 0., // <- -Wpedantic 24 | l_x2n; // := log(sqrt(1 + x2n)) = log(1 + x2n)/2 25 | Rboolean lrg_x2n = (x2n > 1. / DBL_EPSILON); 26 | if (lrg_x2n) { // large x^2/n : 27 | ax = fabs(x); 28 | l_x2n = log(ax) - log(n) / 2.; // = log(x2n)/2 = 1/2 * log(x^2 / n) 29 | u = // log(1 + x2n) * n/2 = n * log(1 + x2n)/2 = 30 | n * l_x2n; 31 | } else if (x2n > 0.2) { 32 | l_x2n = log(1 + x2n) / 2.; 33 | u = n * l_x2n; 34 | } else { 35 | l_x2n = log1p(x2n) / 2.; 36 | u = -bd0(n / 2., (n + x * x) / 2.) + x * x / 2.; 37 | } 38 | 39 | //old: return R_D_fexp(M_2PI*(1+x2n), t-u); 40 | 41 | // R_D_fexp(f,x) := (give_log ? -0.5*log(f)+(x) : exp(x)/sqrt(f)) 42 | // f = 2pi*(1+x2n) 43 | // ==> 0.5*log(f) = log(2pi)/2 + log(1+x2n)/2 = log(2pi)/2 + l_x2n 44 | // 1/sqrt(f) = 1/sqrt(2pi * (1+ x^2 / n)) 45 | // = 1/sqrt(2pi)/(|x|/sqrt(n)*sqrt(1+1/x2n)) 46 | // = M_1_SQRT_2PI * sqrt(n)/ (|x|*sqrt(1+1/x2n)) 47 | if (give_log) 48 | return t - u - (M_LN_SQRT_2PI + l_x2n); 49 | 50 | // else : if(lrg_x2n) : sqrt(1 + 1/x2n) ='= sqrt(1) = 1 51 | double I_sqrt_ = (lrg_x2n ? sqrt(n) / ax : exp(-l_x2n)); 52 | return exp(t - u) * M_1_SQRT_2PI * I_sqrt_; 53 | } 54 | -------------------------------------------------------------------------------- /src/gamma.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "bmath.h" 4 | 5 | double gammafn(double x) { 6 | const static double gamcs[42] = { 7 | +.8571195590989331421920062399942e-2, 8 | +.4415381324841006757191315771652e-2, 9 | +.5685043681599363378632664588789e-1, 10 | -.4219835396418560501012500186624e-2, 11 | +.1326808181212460220584006796352e-2, 12 | -.1893024529798880432523947023886e-3, 13 | +.3606925327441245256578082217225e-4, 14 | -.6056761904460864218485548290365e-5, 15 | +.1055829546302283344731823509093e-5, 16 | -.1811967365542384048291855891166e-6, 17 | +.3117724964715322277790254593169e-7, 18 | -.5354219639019687140874081024347e-8, 19 | +.9193275519859588946887786825940e-9, 20 | -.1577941280288339761767423273953e-9, 21 | +.2707980622934954543266540433089e-10, 22 | -.4646818653825730144081661058933e-11, 23 | +.7973350192007419656460767175359e-12, 24 | -.1368078209830916025799499172309e-12, 25 | +.2347319486563800657233471771688e-13, 26 | -.4027432614949066932766570534699e-14, 27 | +.6910051747372100912138336975257e-15, 28 | -.1185584500221992907052387126192e-15, 29 | +.2034148542496373955201026051932e-16, 30 | -.3490054341717405849274012949108e-17, 31 | +.5987993856485305567135051066026e-18, 32 | -.1027378057872228074490069778431e-18, 33 | +.1762702816060529824942759660748e-19, 34 | -.3024320653735306260958772112042e-20, 35 | +.5188914660218397839717833550506e-21, 36 | -.8902770842456576692449251601066e-22, 37 | +.1527474068493342602274596891306e-22, 38 | -.2620731256187362900257328332799e-23, 39 | +.4496464047830538670331046570666e-24, 40 | -.7714712731336877911703901525333e-25, 41 | +.1323635453126044036486572714666e-25, 42 | -.2270999412942928816702313813333e-26, 43 | +.3896418998003991449320816639999e-27, 44 | -.6685198115125953327792127999999e-28, 45 | +.1146998663140024384347613866666e-28, 46 | -.1967938586345134677295103999999e-29, 47 | +.3376448816585338090334890666666e-30, 48 | -.5793070335782135784625493333333e-31 49 | }; 50 | 51 | int i; 52 | double y; 53 | double value; 54 | 55 | #ifdef NOMORE_FOR_THREADS 56 | static int ngam = 0; 57 | static double xmin = 0, xmax = 0., xsml = 0., dxrel = 0.; 58 | 59 | /* Initialize machine dependent constants, the first time gamma() is called. 60 | FIXME for threads ! */ 61 | if (ngam == 0) { 62 | ngam = chebyshev_init(gamcs, 42, DBL_EPSILON / 20); /*was .1*d1mach(3)*/ 63 | gammalims(&xmin, &xmax); /*-> ./gammalims.c */ 64 | xsml = exp(fmax2(log(DBL_MIN), -log(DBL_MAX)) + 0.01); 65 | /* = exp(.01)*DBL_MIN = 2.247e-308 for IEEE */ 66 | dxrel = sqrt(DBL_EPSILON); /*was sqrt(d1mach(4)) */ 67 | } 68 | #else 69 | /* For IEEE double precision DBL_EPSILON = 2^-52 = 2.220446049250313e-16 : 70 | * (xmin, xmax) are non-trivial, see ./gammalims.c 71 | * xsml = exp(.01)*DBL_MIN 72 | * dxrel = sqrt(DBL_EPSILON) = 2 ^ -26 73 | */ 74 | #define ngam 22 75 | #define xmin -170.5674972726612 76 | #define xmax 171.61447887182298 77 | #define xsml 2.2474362225598545e-308 78 | #define dxrel 1.490116119384765696e-8 79 | #endif 80 | 81 | if (isnan(x)) return x; 82 | 83 | /* If the argument is exactly zero or a negative integer 84 | * then return NaN. */ 85 | if (x == 0 || (x < 0 && x == (long) x)) { 86 | return NAN; 87 | } 88 | 89 | y = fabs(x); 90 | 91 | if (y <= 10) { 92 | 93 | /* Compute gamma(x) for -10 <= x <= 10 94 | * Reduce the interval and find gamma(1 + y) for 0 <= y < 1 95 | * first of all. */ 96 | 97 | int n = floor(x); 98 | if (x < 0) --n; 99 | y = x - n; /* n = floor(x) ==> y in [ 0, 1 ) */ 100 | --n; 101 | value = chebyshev_eval(y * 2 - 1, gamcs, ngam) + .9375; 102 | if (n == 0) 103 | return value; /* x = 1.dddd = 1+y */ 104 | 105 | if (n < 0) { 106 | /* compute gamma(x) for -10 <= x < 1 */ 107 | 108 | /* exact 0 or "-n" checked already above */ 109 | 110 | /* The argument is so close to 0 that the result would overflow. */ 111 | if (y < xsml) { 112 | if (x > 0) return INFINITY; 113 | else return -INFINITY; 114 | } 115 | 116 | n = -n; 117 | 118 | for (i = 0; i < n; i++) { 119 | value /= (fabs(x + i) > 10.0e-15) ? (x + i): 1.0; 120 | } 121 | return value; 122 | } else { 123 | /* gamma(x) for 2 <= x <= 10 */ 124 | 125 | for (i = 1; i <= n; i++) { 126 | value *= (y + i); 127 | } 128 | return value; 129 | } 130 | } else { 131 | /* gamma(x) for y = |x| > 10. */ 132 | 133 | if (x > xmax) { /* Overflow */ 134 | return INFINITY; 135 | } 136 | 137 | if (x < xmin) { /* Underflow */ 138 | return 0.; 139 | } 140 | 141 | if (y <= 50 && y == (int) y) { /* compute (n - 1)! */ 142 | value = 1.; 143 | for (i = 2; i < y; i++) value *= i; 144 | } else { /* normal case */ 145 | value = exp((y - 0.5) * log(y) - y + M_LN_SQRT_2PI + 146 | ((2 * y == (int) 2 * y) ? stirlerr(y) : lgammacor(y))); 147 | } 148 | if (x > 0) 149 | return value; 150 | 151 | double sinpiy = sin(M_PI * y); 152 | if (sinpiy == 0) { /* Negative integer arg - overflow */ 153 | return INFINITY; 154 | } 155 | 156 | return -M_PI / (y * sinpiy * value); 157 | } 158 | } -------------------------------------------------------------------------------- /src/lbeta.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "bmath.h" 4 | 5 | double lbeta(double a, double b) { 6 | double corr, p, q; 7 | 8 | #ifdef IEEE_754 9 | if (ISNAN(a) || ISNAN(b)) 10 | return a + b; 11 | #endif 12 | p = q = a; 13 | if (b < p) p = b; /* := min(a,b) */ 14 | if (b > q) q = b; /* := max(a,b) */ 15 | 16 | /* both arguments must be >= 0 */ 17 | if (p < 0) 18 | ML_ERR_return_NAN 19 | else if (p == 0) { 20 | return INFINITY; 21 | } else if (!isfinite(q)) { /* q == +Inf */ 22 | return -INFINITY; 23 | } 24 | 25 | if (p >= 10) { 26 | /* p and q are big. */ 27 | corr = lgammacor(p) + lgammacor(q) - lgammacor(p + q); 28 | return log(q) * -0.5 + M_LN_SQRT_2PI + corr 29 | + (p - 0.5) * log(p / (p + q)) + q * log1p(-p / (p + q)); 30 | } else if (q >= 10) { 31 | /* p is small, but q is big. */ 32 | corr = lgammacor(q) - lgammacor(p + q); 33 | double d = p + q; 34 | if (fabs(d) > 1e-25) 35 | d = log1p(-p / d); 36 | else 37 | return INFINITY; 38 | return lgammafn(p) + corr + p - p * log(p + q) 39 | + (q - 0.5) * d; 40 | } else 41 | /* p and q are small: p <= q < 10. */ 42 | /* R change for very small args */ 43 | if (p < 1e-306) return lgamma(p) + (lgamma(q) - lgamma(p + q)); 44 | return log(gammafn(p) * (gammafn(q) / gammafn(p + q))); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /src/lgamma.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "bmath.h" 4 | 5 | double lgammafn_sign(double x, int *sgn) { 6 | double ans, y, sinpiy; 7 | 8 | #ifdef NOMORE_FOR_THREADS 9 | static double xmax = 0.; 10 | static double dxrel = 0.; 11 | 12 | if (xmax == 0) {/* initialize machine dependent constants _ONCE_ */ 13 | xmax = d1mach(2) / log(d1mach(2)); /* = 2.533 e305 for IEEE double */ 14 | dxrel = sqrt(d1mach(4)); /* sqrt(Eps) ~ 1.49 e-8 for IEEE double */ 15 | } 16 | #else 17 | /* For IEEE double precision DBL_EPSILON = 2^-52 = 2.220446049250313e-16 : 18 | xmax = DBL_MAX / log(DBL_MAX) = 2^1024 / (1024 * log(2)) = 2^1014 / log(2) 19 | dxrel = sqrt(DBL_EPSILON) = 2^-26 = 5^26 * 1e-26 (is *exact* below !) 20 | */ 21 | #define xmax 2.5327372760800758e+305 22 | #define dxrel 1.490116119384765696e-8 23 | #endif 24 | 25 | if (sgn != NULL) *sgn = 1; 26 | 27 | #ifdef IEEE_754 28 | if (ISNAN(x)) return x; 29 | #endif 30 | 31 | if (x < 0 && fmod(floor(-x), 2.) == 0) 32 | if (sgn != NULL) *sgn = -1; 33 | 34 | if (x <= 0 && x == trunc(x)) { /* Negative integer argument */ 35 | return INFINITY; /* +Inf, since lgamma(x) = log|gamma(x)| */ 36 | } 37 | 38 | y = fabs(x); 39 | 40 | if (y < 1e-306) return -log(x); // denormalized range, R change 41 | if (y <= 10) return log(fabs(gammafn(x))); 42 | /* 43 | ELSE y = |x| > 10 ---------------------- */ 44 | 45 | if (y > xmax) { 46 | return INFINITY; 47 | } 48 | 49 | if (x > 0) { /* i.e. y = x > 10 */ 50 | #ifdef IEEE_754 51 | if (x > 1e17) 52 | return (x * (log(x) - 1.)); 53 | else if (x > 4934720.) 54 | return (M_LN_SQRT_2PI + (x - 0.5) * log(x) - x); 55 | else 56 | #endif 57 | return M_LN_SQRT_2PI + (x - 0.5) * log(x) - x + lgammacor(x); 58 | } 59 | /* else: x < -10; y = -x */ 60 | sinpiy = fabs(sin(M_PI * y)); 61 | 62 | if (sinpiy == 0) { /* Negative integer argument === 63 | Now UNNECESSARY: caught above */ 64 | ML_ERR_return_NAN; 65 | } 66 | 67 | ans = M_LN_SQRT_PId2 + (x - 0.5) * log(y) - x - log(sinpiy) - lgammacor(y); 68 | 69 | return ans; 70 | } 71 | 72 | double lgammafn(double x) { 73 | return lgammafn_sign(x, NULL); 74 | } -------------------------------------------------------------------------------- /src/lgammacor.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "bmath.h" 4 | 5 | double lgammacor(double x) { 6 | const static double algmcs[15] = { 7 | +.1666389480451863247205729650822e+0, 8 | -.1384948176067563840732986059135e-4, 9 | +.9810825646924729426157171547487e-8, 10 | -.1809129475572494194263306266719e-10, 11 | +.6221098041892605227126015543416e-13, 12 | -.3399615005417721944303330599666e-15, 13 | +.2683181998482698748957538846666e-17, 14 | -.2868042435334643284144622399999e-19, 15 | +.3962837061046434803679306666666e-21, 16 | -.6831888753985766870111999999999e-23, 17 | +.1429227355942498147573333333333e-24, 18 | -.3547598158101070547199999999999e-26, 19 | +.1025680058010470912000000000000e-27, 20 | -.3401102254316748799999999999999e-29, 21 | +.1276642195630062933333333333333e-30 22 | }; 23 | 24 | double tmp; 25 | 26 | /* For IEEE double precision DBL_EPSILON = 2^-52 = 2.220446049250313e-16 : 27 | * xbig = 2 ^ 26.5 28 | * xmax = DBL_MAX / 48 = 2^1020 / 3 */ 29 | #define nalgm 5 30 | #define xbig 94906265.62425156 31 | #define xmax 3.745194030963158e306 32 | 33 | if (x < 10) 34 | ML_ERR_return_NAN 35 | else if (x >= xmax) { 36 | /* allow to underflow below */ 37 | } else if (x < xbig) { 38 | tmp = 10 / x; 39 | return chebyshev_eval(tmp * tmp * 2 - 1, algmcs, nalgm) / x; 40 | } 41 | double d = x * 12; 42 | if (fabs(d) > 1e-15) 43 | return 1 / d; 44 | return INFINITY; 45 | } 46 | 47 | -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include "api/BamReader.h" 12 | 13 | #include "Global.h" 14 | #include "Exceptions.h" 15 | #include "TimeUtils.h" 16 | #include "bstring.h" 17 | #include "TextParser.h" 18 | #include "Sequence.h" 19 | #include "GenomeFactory.h" 20 | #include "ReadFactory.h" 21 | 22 | using namespace std; 23 | using namespace ngs; 24 | using namespace sequence; 25 | using namespace genome; 26 | using namespace BamTools; 27 | 28 | Global *Global::s_instance = 0; 29 | 30 | void print_usage(char *program_name, int exit_code) { 31 | cerr << "\n********************************************************************************\n"; 32 | cerr << "\nUsage: " << program_name; 33 | cerr << "\n\n" << program_name << " options:\n\n"; 34 | cerr << "-v Print info\n"; 35 | cerr << "-version Print version\n"; 36 | cerr << "-h Display this usage information.\n"; 37 | cerr << "-g GTF file\n"; 38 | cerr << "-d Directory with the BAM files\n"; 39 | cerr << "-b BAM file\n"; 40 | cerr << "-k Gene key to use from GTF file. Default: gene_id\n"; 41 | cerr << "-t Transcript key to use from GTF file. Default: transcript_id\n"; 42 | cerr << "-c Smaller size allowed for an intron created for genes. Default: 16. We recommend to use the reads length\n"; 43 | cerr << "-p Use only properly paired reads. Default: No. Recommended for paired-end reads.\n"; 44 | cerr << "-q Minimum MAPQ value to filter out reads. Default: 0. This value depends on the aligner MAPQ value.\n"; 45 | cerr << "-o Minimum overlap between a reads and a feature. Default: 8.\n"; 46 | cerr << "-e Extended output. This will include transcript level TPM values. Default: No.\n"; 47 | cerr << "-a Print out all features with read counts equal to zero. Default: No.\n"; 48 | cerr << "\n********************************************************************************\n"; 49 | cerr << "\n Roberto Vera Alvarez, PhD\n"; 50 | cerr << " Emails: veraalva@ncbi.nlm.nih.gov\n\n"; 51 | cerr << "********************************************************************************\n"; 52 | exit(exit_code); 53 | } 54 | 55 | int main(int argc, char *argv[]) { 56 | int count = 0; 57 | TimeUtils uTime; 58 | string gtfFileName; 59 | string bamDirName; 60 | string bamFileName; 61 | string geneNameKey = "gene_id"; 62 | string transcriptNameKey = "transcript_id"; 63 | int intronCutOff = 16; 64 | uint16_t minMAPQ = 0; 65 | uint16_t minOverlap = 8; 66 | bool onlyProperlyPaired = false; 67 | bool singleFile = false; 68 | bool extendedOutput = false; 69 | bool all_feat = false; 70 | setfeatures = {"exon"}; 71 | unordered_map featuresToCreate = { 72 | {"exon", "intron"} 73 | }; 74 | ReadFactory readFactory; 75 | 76 | if (argc == 1) { 77 | print_usage(argv[0], 0); 78 | } 79 | 80 | for (int i = 1; i < argc; i++) { 81 | string option(argv[i]); 82 | if (option.compare(0, 1, "-") == 0 && option.compare(1, 1, "-") != 0 && option.size() == 2) { 83 | if (option.compare(1, 1, "h") == 0) { 84 | print_usage(argv[0], 0); 85 | } else if (option.compare(1, 1, "v") == 0) { 86 | Global::instance()->setVerbose(1); 87 | } else if (option.compare(1, 1, "g") == 0) { 88 | i++; 89 | if (i < argc) { 90 | gtfFileName = argv[i]; 91 | if (gtfFileName.compare(0, 1, "-") == 0) { 92 | cerr << "Option g require an argument" << endl; 93 | print_usage(argv[0], -1); 94 | } 95 | } else { 96 | cerr << "Option g require an argument" << endl; 97 | print_usage(argv[0], -1); 98 | } 99 | } else if (option.compare(1, 1, "d") == 0) { 100 | i++; 101 | if (i < argc) { 102 | bamDirName = argv[i]; 103 | if (bamDirName.compare(0, 1, "-") == 0) { 104 | cerr << "Option d require an argument" << endl; 105 | print_usage(argv[0], -1); 106 | } 107 | } else { 108 | cerr << "Option d require an argument" << endl; 109 | print_usage(argv[0], -1); 110 | } 111 | } else if (option.compare(1, 1, "b") == 0) { 112 | i++; 113 | if (i < argc) { 114 | bamFileName = argv[i]; 115 | if (bamFileName.compare(0, 1, "-") == 0) { 116 | cerr << "Option b require an argument" << endl; 117 | print_usage(argv[0], -1); 118 | } 119 | } else { 120 | cerr << "Option b require an argument" << endl; 121 | print_usage(argv[0], -1); 122 | } 123 | } else if (option.compare(1, 1, "k") == 0) { 124 | i++; 125 | if (i < argc) { 126 | geneNameKey = argv[i]; 127 | if (geneNameKey.compare(0, 1, "-") == 0) { 128 | cerr << "Option k require an argument" << endl; 129 | print_usage(argv[0], -1); 130 | } 131 | } else { 132 | cerr << "Option k require an argument" << endl; 133 | print_usage(argv[0], -1); 134 | } 135 | } else if (option.compare(1, 1, "t") == 0) { 136 | i++; 137 | if (i < argc) { 138 | transcriptNameKey = argv[i]; 139 | if (transcriptNameKey.compare(0, 1, "-") == 0) { 140 | cerr << "Option t require an argument" << endl; 141 | print_usage(argv[0], -1); 142 | } 143 | } else { 144 | cerr << "Option t require an argument" << endl; 145 | print_usage(argv[0], -1); 146 | } 147 | } else if (option.compare(1, 1, "c") == 0) { 148 | i++; 149 | if (i < argc) { 150 | string argument(argv[i]); 151 | if (argument.compare(0, 1, "-") == 0) { 152 | cerr << "Option c require an argument" << endl; 153 | print_usage(argv[0], -1); 154 | } 155 | intronCutOff = atoi(argv[i]); 156 | } else { 157 | cerr << "Option c require an argument" << endl; 158 | print_usage(argv[0], -1); 159 | } 160 | } else if (option.compare(1, 1, "q") == 0) { 161 | i++; 162 | if (i < argc) { 163 | string argument(argv[i]); 164 | if (argument.compare(0, 1, "-") == 0) { 165 | cerr << "Option q require an argument" << endl; 166 | print_usage(argv[0], -1); 167 | } 168 | minMAPQ = static_cast (atoi(argv[i])); 169 | } else { 170 | cerr << "Option q require an argument" << endl; 171 | print_usage(argv[0], -1); 172 | } 173 | } else if (option.compare(1, 1, "o") == 0) { 174 | i++; 175 | if (i < argc) { 176 | string argument(argv[i]); 177 | if (argument.compare(0, 1, "-") == 0) { 178 | cerr << "Option o require an argument" << endl; 179 | print_usage(argv[0], -1); 180 | } 181 | minOverlap = static_cast (atoi(argv[i])); 182 | } else { 183 | cerr << "Option o require an argument" << endl; 184 | print_usage(argv[0], -1); 185 | } 186 | } else if (option.compare(1, 1, "p") == 0) { 187 | onlyProperlyPaired = true; 188 | } else if (option.compare(1, 1, "e") == 0) { 189 | extendedOutput = true; 190 | } else if (option.compare(1, 1, "a") == 0) { 191 | all_feat = true; 192 | } else { 193 | cerr << "Unsupported option: " << option << endl; 194 | print_usage(argv[0], -1); 195 | } 196 | } else if (option.size() == 8 and option.compare(1, 7, "version") == 0) { 197 | cout << "Version: 0.0.4.1" << endl; 198 | exit(0); 199 | } else { 200 | cerr << "Unsupported option: " << option << endl; 201 | print_usage(argv[0], -1); 202 | } 203 | } 204 | 205 | if (gtfFileName.empty()) { 206 | cerr << "\nGTF is required. See -g option" << endl; 207 | print_usage(argv[0], -1); 208 | } 209 | 210 | if (bamDirName.empty() && bamFileName.empty()) { 211 | cerr << "\nDirectory with the BAM files or a BAM file is required. See -d or -b options" << endl; 212 | print_usage(argv[0], -1); 213 | } 214 | 215 | uTime.setTime(); 216 | cerr << "Reading GTF file ... " << endl; 217 | readFactory.getGenomeFactory().setIntronCutOff(intronCutOff); 218 | readFactory.getGenomeFactory().processGTFFile(gtfFileName, geneNameKey, transcriptNameKey, features, featuresToCreate); 219 | cerr << "Done in " << uTime.getElapseTimeSec() << " seconds" << endl; 220 | 221 | if (!bamDirName.empty()) { 222 | uTime.setTime(); 223 | cerr << "Parsing BAM files" << endl; 224 | fflush(NULL); 225 | count = readFactory.processBAMSAMFromDir(bamDirName, onlyProperlyPaired, minMAPQ, minOverlap); 226 | cerr << count << " reads processed in " << uTime.getElapseTimeSec() << " seconds" << endl; 227 | fflush(NULL); 228 | } else if (!bamFileName.empty()) { 229 | singleFile = true; 230 | string fileName = bamFileName; 231 | string sampleName; 232 | size_t sep = fileName.find_last_of("\\/"); 233 | if (sep != std::string::npos) 234 | fileName = fileName.substr(sep + 1, fileName.size() - sep - 1); 235 | size_t dot = fileName.find_last_of("."); 236 | if (dot != std::string::npos) { 237 | sampleName = fileName.substr(0, dot); 238 | if (fileName.substr(dot, fileName.size() - dot) == ".bam") { 239 | uTime.setTime(); 240 | cerr << "Parsing sample: " << sampleName; 241 | fflush(NULL); 242 | readFactory.getSamples().push_back(sampleName); 243 | count = readFactory.processReadsFromBAM(bamFileName, sampleName, onlyProperlyPaired, minMAPQ, minOverlap); 244 | cerr << " " << count << " reads processed in " << uTime.getElapseTimeSec() << " seconds" << endl; 245 | fflush(NULL); 246 | } 247 | } 248 | } 249 | 250 | cerr << "Printing results" << endl; 251 | fflush(NULL); 252 | readFactory.printResults(singleFile, extendedOutput, all_feat); 253 | 254 | cerr << "Total time: " << uTime.getTotalTimeSec() << " seconds" << endl; 255 | return 0; 256 | } 257 | -------------------------------------------------------------------------------- /src/pbeta.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "bmath.h" 4 | 5 | double pbeta_raw(double x, double a, double b, int lower_tail, int log_p) { 6 | // treat limit cases correctly here: 7 | if (a == 0 || b == 0 || !isfinite(a) || !isfinite(b)) { 8 | // NB: 0 < x < 1 : 9 | if (a == 0 && b == 0) // point mass 1/2 at each of {0,1} : 10 | return (log_p ? -M_LN2 : 0.5); 11 | if (a == 0 ) // point mass 1 at 0 ==> P(X <= x) = 1, all x > 0 12 | return R_DT_1; 13 | if (b == 0 ) // point mass 1 at 1 ==> P(X <= x) = 0, all x < 1 14 | return R_DT_0; 15 | // else, remaining case: a = b = Inf : point mass 1 at 1/2 16 | if (x < 0.5) return R_DT_0; 17 | else return R_DT_1; 18 | } 19 | // Now: 0 < a < Inf; 0 < b < Inf 20 | 21 | double x1 = 0.5 - x + 0.5, w, wc; 22 | int ierr; 23 | //==== 24 | bratio(a, b, x, x1, &w, &wc, &ierr, log_p); /* -> ./toms708.c */ 25 | //==== 26 | // ierr in {10,14} <==> bgrat() error code ierr-10 in 1:4; for 1 and 4, warned *there* 27 | if (ierr && ierr != 11 && ierr != 14) 28 | fprintf(stderr, "pbeta_raw(%g, a=%g, b=%g, ..) -> bratio() gave error code %d", x, a, b, ierr); 29 | return lower_tail ? w : wc; 30 | } /* pbeta_raw() */ 31 | 32 | double pbeta(double x, double a, double b, int lower_tail, int log_p) { 33 | #ifdef IEEE_754 34 | if (ISNAN(x) || ISNAN(a) || ISNAN(b)) return x + a + b; 35 | #endif 36 | 37 | if (a < 0 || b < 0) ML_ERR_return_NAN; 38 | // allowing a==0 and b==0 <==> treat as one- or two-point mass 39 | 40 | if (x <= 0) 41 | return R_DT_0; 42 | if (x >= 1) 43 | return R_DT_1; 44 | 45 | return pbeta_raw(x, a, b, lower_tail, log_p); 46 | } 47 | -------------------------------------------------------------------------------- /src/phyper.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Mathlib : A C Library of Special Functions 3 | * Copyright (C) 1998 Ross Ihaka 4 | * Copyright (C) 1999-2012 The R Core Team 5 | * Copyright (C) 2004 Morten Welinder 6 | * Copyright (C) 2004 The R Foundation 7 | * 8 | * This program is free software; you can redistribute it and/or modify 9 | * it under the terms of the GNU General Public License as published by 10 | * the Free Software Foundation; either version 2 of the License, or 11 | * (at your option) any later version. 12 | * 13 | * This program is distributed in the hope that it will be useful, 14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 | * GNU General Public License for more details. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program; if not, a copy is available at 20 | * http://www.r-project.org/Licenses/ 21 | * 22 | * DESCRIPTION 23 | * 24 | * The distribution function of the hypergeometric distribution. 25 | * 26 | * Current implementation based on posting 27 | * From: Morten Welinder 28 | * Cc: R-bugs@biostat.ku.dk 29 | * Subject: [Rd] phyper accuracy and efficiency (PR#6772) 30 | * Date: Thu, 15 Apr 2004 18:06:37 +0200 (CEST) 31 | ...... 32 | 33 | The current version has very serious cancellation issues. For example, 34 | if you ask for a small right-tail you are likely to get total cancellation. 35 | For example, phyper(59, 150, 150, 60, FALSE, FALSE) gives 6.372680161e-14. 36 | The right answer is dhyper(0, 150, 150, 60, FALSE) which is 5.111204798e-22. 37 | 38 | phyper is also really slow for large arguments. 39 | 40 | Therefore, I suggest using the code below. This is a sniplet from Gnumeric ... 41 | The code isn't perfect. In fact, if x*(NR+NB) is close to n*NR, 42 | then this code can take a while. Not longer than the old code, though. 43 | 44 | -- Thanks to Ian Smith for ideas. 45 | */ 46 | 47 | #include 48 | #include 49 | #include 50 | #include "bmath.h" 51 | 52 | double dbinom_raw(double x, double n, double p, double q, int give_log) { 53 | double lf, lc; 54 | 55 | if (p == 0) return ((x == 0) ? R_D__1 : R_D__0); 56 | if (q == 0) return ((x == n) ? R_D__1 : R_D__0); 57 | 58 | if (x == 0) { 59 | if (n == 0) return R_D__1; 60 | lc = (p < 0.1) ? -bd0(n, n * q) - n * p : n * log(q); 61 | return ( R_D_exp(lc)); 62 | } 63 | if (x == n) { 64 | lc = (q < 0.1) ? -bd0(n, n * p) - n * q : n * log(p); 65 | return ( R_D_exp(lc)); 66 | } 67 | if (x < 0 || x > n) return ( R_D__0); 68 | 69 | /* n*p or n*q can underflow to zero if n and p or q are small. This 70 | used to occur in dbeta, and gives NaN as from R 2.3.0. */ 71 | lc = stirlerr(n) - stirlerr(x) - stirlerr(n - x) - bd0(x, n * p) - bd0(n - x, n * q); 72 | 73 | /* f = (M_2PI*x*(n-x))/n; could overflow or underflow */ 74 | /* Upto R 2.7.1: 75 | * lf = log(M_2PI) + log(x) + log(n-x) - log(n); 76 | * -- following is much better for x << n : */ 77 | lf = log(M_2PI) + log(x) + log1p(-x / n); 78 | 79 | return R_D_exp(lc - 0.5 * lf); 80 | } 81 | 82 | double dhyper(double x, double r, double b, double n, int give_log) { 83 | double p, q, p1, p2, p3; 84 | 85 | #ifdef IEEE_754 86 | if (ISNAN(x) || ISNAN(r) || ISNAN(b) || ISNAN(n)) 87 | return x + r + b + n; 88 | #endif 89 | 90 | if (R_D_negInonint(r) || R_D_negInonint(b) || R_D_negInonint(n) || n > r + b) 91 | ML_ERR_return_NAN; 92 | if (R_D_negInonint(x)) 93 | return (0); 94 | 95 | x = R_D_forceint(x); 96 | r = R_D_forceint(r); 97 | b = R_D_forceint(b); 98 | n = R_D_forceint(n); 99 | 100 | if (n < x || r < x || n - x > b) return (R_D__0); 101 | if (n == 0) return ((x == 0) ? R_D__1 : R_D__0); 102 | 103 | p = ((double) n) / ((double) (r + b)); 104 | q = ((double) (r + b - n)) / ((double) (r + b)); 105 | 106 | p1 = dbinom_raw(x, r, p, q, give_log); 107 | p2 = dbinom_raw(n - x, b, p, q, give_log); 108 | p3 = dbinom_raw(n, r + b, p, q, give_log); 109 | 110 | return ( (give_log) ? p1 + p2 - p3 : p1 * p2 / p3); 111 | } 112 | 113 | static double pdhyper(double x, double NR, double NB, double n, int log_p) { 114 | /* 115 | * Calculate 116 | * 117 | * phyper (x, NR, NB, n, TRUE, FALSE) 118 | * [log] ---------------------------------- 119 | * dhyper (x, NR, NB, n, FALSE) 120 | * 121 | * without actually calling phyper. This assumes that 122 | * 123 | * x * (NR + NB) <= n * NR 124 | * 125 | */ 126 | double sum = 0; 127 | double term = 1; 128 | 129 | while (x > 0 && term >= DBL_EPSILON * sum) { 130 | term *= x * (NB - n + x) / (n + 1 - x) / (NR + 1 - x); 131 | sum += term; 132 | x--; 133 | } 134 | 135 | double ss = (double) sum; 136 | return log_p ? log1p(ss) : 1 + ss; 137 | } 138 | 139 | double phyper(double x, double NR, double NB, double n, 140 | int lower_tail, int log_p) { 141 | /* Sample of n balls from NR red and NB black ones; x are red */ 142 | 143 | double d, pd; 144 | 145 | #ifdef IEEE_754 146 | if (ISNAN(x) || ISNAN(NR) || ISNAN(NB) || ISNAN(n)) 147 | return x + NR + NB + n; 148 | #endif 149 | 150 | x = floor(x + 1e-7); 151 | NR = R_D_forceint(NR); 152 | NB = R_D_forceint(NB); 153 | n = R_D_forceint(n); 154 | 155 | if (NR < 0 || NB < 0 || isinf(NR + NB) || n < 0 || n > NR + NB) 156 | ML_ERR_return_NAN; 157 | 158 | if (x * (NR + NB) > n * NR) { 159 | /* Swap tails. */ 160 | double oldNB = NB; 161 | NB = NR; 162 | NR = oldNB; 163 | x = n - x - 1; 164 | lower_tail = !lower_tail; 165 | } 166 | 167 | if (x < 0) 168 | return R_DT_0; 169 | if (x >= NR || x >= n) 170 | return R_DT_1; 171 | 172 | d = dhyper(x, NR, NB, n, log_p); 173 | pd = pdhyper(x, NR, NB, n, log_p); 174 | 175 | return log_p ? R_DT_Log(d + pd) : R_D_Lval(d * pd); 176 | } 177 | -------------------------------------------------------------------------------- /src/pnorm.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "bmath.h" 4 | 5 | double pnorm5(double x, double mu, double sigma, int lower_tail, int log_p) { 6 | double p, cp; 7 | 8 | /* Note: The structure of these checks has been carefully thought through. 9 | * For example, if x == mu and sigma == 0, we get the correct answer 1. 10 | */ 11 | #ifdef IEEE_754 12 | if (ISNAN(x) || ISNAN(mu) || ISNAN(sigma)) 13 | return x + mu + sigma; 14 | #endif 15 | if (!isfinite(x) && mu == x) return ML_NAN; /* x-mu is NaN */ 16 | if (sigma <= 0) { 17 | if (sigma < 0) ML_ERR_return_NAN; 18 | /* sigma = 0 : */ 19 | return (x < mu) ? R_DT_0 : R_DT_1; 20 | } 21 | if (fabs(sigma) > 1e-15) 22 | p = (x - mu) / sigma; 23 | else 24 | p = INFINITY; 25 | if (!isfinite(p)) 26 | return (x < mu) ? R_DT_0 : R_DT_1; 27 | x = p; 28 | 29 | pnorm_both(x, &p, &cp, (lower_tail ? 0 : 1), log_p); 30 | 31 | return (lower_tail ? p : cp); 32 | } 33 | 34 | #define SIXTEN 16 /* Cutoff allowing exact "*" and "/" */ 35 | 36 | void pnorm_both(double x, double *cum, double *ccum, int i_tail, int log_p) { 37 | /* i_tail in {0,1,2} means: "lower", "upper", or "both" : 38 | if(lower) return *cum := P[X <= x] 39 | if(upper) return *ccum := P[X > x] = 1 - P[X <= x] 40 | */ 41 | const static double a[5] = { 42 | 2.2352520354606839287, 43 | 161.02823106855587881, 44 | 1067.6894854603709582, 45 | 18154.981253343561249, 46 | 0.065682337918207449113 47 | }; 48 | const static double b[4] = { 49 | 47.20258190468824187, 50 | 976.09855173777669322, 51 | 10260.932208618978205, 52 | 45507.789335026729956 53 | }; 54 | const static double c[9] = { 55 | 0.39894151208813466764, 56 | 8.8831497943883759412, 57 | 93.506656132177855979, 58 | 597.27027639480026226, 59 | 2494.5375852903726711, 60 | 6848.1904505362823326, 61 | 11602.651437647350124, 62 | 9842.7148383839780218, 63 | 1.0765576773720192317e-8 64 | }; 65 | const static double d[8] = { 66 | 22.266688044328115691, 67 | 235.38790178262499861, 68 | 1519.377599407554805, 69 | 6485.558298266760755, 70 | 18615.571640885098091, 71 | 34900.952721145977266, 72 | 38912.003286093271411, 73 | 19685.429676859990727 74 | }; 75 | const static double p[6] = { 76 | 0.21589853405795699, 77 | 0.1274011611602473639, 78 | 0.022235277870649807, 79 | 0.001421619193227893466, 80 | 2.9112874951168792e-5, 81 | 0.02307344176494017303 82 | }; 83 | const static double q[5] = { 84 | 1.28426009614491121, 85 | 0.468238212480865118, 86 | 0.0659881378689285515, 87 | 0.00378239633202758244, 88 | 7.29751555083966205e-5 89 | }; 90 | 91 | double xden, xnum, temp, del, eps, xsq, y; 92 | #ifdef NO_DENORMS 93 | double min = DBL_MIN; 94 | #endif 95 | int i, lower, upper; 96 | 97 | #ifdef IEEE_754 98 | if (ISNAN(x)) { 99 | *cum = *ccum = x; 100 | return; 101 | } 102 | #endif 103 | 104 | /* Consider changing these : */ 105 | eps = DBL_EPSILON * 0.5; 106 | 107 | /* i_tail in {0,1,2} =^= {lower, upper, both} */ 108 | lower = i_tail != 1; 109 | upper = i_tail != 0; 110 | 111 | y = fabs(x); 112 | if (y <= 0.67448975) { /* qnorm(3/4) = .6744.... -- earlier had 0.66291 */ 113 | if (y > eps) { 114 | xsq = x * x; 115 | xnum = a[4] * xsq; 116 | xden = xsq; 117 | for (i = 0; i < 3; ++i) { 118 | xnum = (xnum + a[i]) * xsq; 119 | xden = (xden + b[i]) * xsq; 120 | } 121 | } else xnum = xden = 0.0; 122 | 123 | temp = x * (xnum + a[3]) / (xden + b[3]); 124 | if (lower) *cum = 0.5 + temp; 125 | if (upper) *ccum = 0.5 - temp; 126 | if (log_p) { 127 | if (lower) *cum = log(*cum); 128 | if (upper) *ccum = log(*ccum); 129 | } 130 | } else if (y <= M_SQRT_32) { 131 | 132 | /* Evaluate pnorm for 0.674.. = qnorm(3/4) < |x| <= sqrt(32) ~= 5.657 */ 133 | 134 | xnum = c[8] * y; 135 | xden = y; 136 | for (i = 0; i < 7; ++i) { 137 | xnum = (xnum + c[i]) * y; 138 | xden = (xden + d[i]) * y; 139 | } 140 | temp = (xnum + c[7]) / (xden + d[7]); 141 | 142 | #define do_del(X) \ 143 | xsq = trunc(X * SIXTEN) / SIXTEN; \ 144 | del = (X - xsq) * (X + xsq); \ 145 | if(log_p) { \ 146 | *cum = (-xsq * xsq * 0.5) + (-del * 0.5) + log(temp); \ 147 | if((lower && x > 0.) || (upper && x <= 0.)) \ 148 | *ccum = log1p(-exp(-xsq * xsq * 0.5) * \ 149 | exp(-del * 0.5) * temp); \ 150 | } \ 151 | else { \ 152 | *cum = exp(-xsq * xsq * 0.5) * exp(-del * 0.5) * temp; \ 153 | *ccum = 1.0 - *cum; \ 154 | } 155 | 156 | #define swap_tail \ 157 | if (x > 0.) {/* swap ccum <--> cum */ \ 158 | temp = *cum; if(lower) *cum = *ccum; *ccum = temp; \ 159 | } 160 | 161 | do_del(y); 162 | swap_tail; 163 | }/* else |x| > sqrt(32) = 5.657 : 164 | * the next two case differentiations were really for lower=T, log=F 165 | * Particularly *not* for log_p ! 166 | 167 | * Cody had (-37.5193 < x && x < 8.2924) ; R originally had y < 50 168 | * 169 | * Note that we do want symmetry(0), lower/upper -> hence use y 170 | */ 171 | else if ((log_p && y < 1e170) /* avoid underflow below */ 172 | /* ^^^^^ MM FIXME: can speedup for log_p and much larger |x| ! 173 | * Then, make use of Abramowitz & Stegun, 26.2.13, something like 174 | 175 | xsq = x*x; 176 | 177 | if(xsq * DBL_EPSILON < 1.) 178 | del = (1. - (1. - 5./(xsq+6.)) / (xsq+4.)) / (xsq+2.); 179 | else 180 | del = 0.; 181 | *cum = -.5*xsq - M_LN_SQRT_2PI - log(x) + log1p(-del); 182 | *ccum = log1p(-exp(*cum)); /.* ~ log(1) = 0 *./ 183 | 184 | swap_tail; 185 | 186 | [Yes, but xsq might be infinite.] 187 | 188 | */ 189 | || (lower && -37.5193 < x && x < 8.2924) 190 | || (upper && -8.2924 < x && x < 37.5193) 191 | ) { 192 | 193 | /* Evaluate pnorm for x in (-37.5, -5.657) union (5.657, 37.5) */ 194 | xsq = 1.0 / (x * x); /* (1./x)*(1./x) might be better */ 195 | xnum = p[5] * xsq; 196 | xden = xsq; 197 | for (i = 0; i < 4; ++i) { 198 | xnum = (xnum + p[i]) * xsq; 199 | xden = (xden + q[i]) * xsq; 200 | } 201 | temp = xsq * (xnum + p[4]) / (xden + q[4]); 202 | temp = (M_1_SQRT_2PI - temp) / y; 203 | 204 | do_del(x); 205 | swap_tail; 206 | } else { /* large x such that probs are 0 or 1 */ 207 | if (x > 0) { 208 | *cum = R_D__1; 209 | *ccum = R_D__0; 210 | } else { 211 | *cum = R_D__0; 212 | *ccum = R_D__1; 213 | } 214 | } 215 | 216 | 217 | #ifdef NO_DENORMS 218 | /* do not return "denormalized" -- we do in R */ 219 | if (log_p) { 220 | if (*cum > -min) *cum = -0.; 221 | if (*ccum > -min)*ccum = -0.; 222 | } else { 223 | if (*cum < min) *cum = 0.; 224 | if (*ccum < min) *ccum = 0.; 225 | } 226 | #endif 227 | return; 228 | } 229 | -------------------------------------------------------------------------------- /src/pt.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "bmath.h" 4 | 5 | double pt(double x, double n, int lower_tail, int log_p) { 6 | /* return P[ T <= x ] where 7 | * T ~ t_{n} (t distrib. with n degrees of freedom). 8 | 9 | * --> ./pnt.c for NON-central 10 | */ 11 | double val, nx; 12 | #ifdef IEEE_754 13 | if (ISNAN(x) || ISNAN(n)) 14 | return x + n; 15 | #endif 16 | if (n <= 0.0) ML_ERR_return_NAN; 17 | 18 | if (!isfinite(x)) 19 | return (x < 0) ? R_DT_0 : R_DT_1; 20 | if (!isfinite(n)) 21 | return pnorm(x, 0.0, 1.0, lower_tail, log_p); 22 | 23 | nx = 1 + (x / n) * x; 24 | /* FIXME: This test is probably losing rather than gaining precision, 25 | * now that pbeta(*, log_p = TRUE) is much better. 26 | * Note however that a version of this test *is* needed for x*x > D_MAX */ 27 | if (nx > 1e100) { /* <==> x*x > 1e100 * n */ 28 | /* Danger of underflow. So use Abramowitz & Stegun 26.5.4 29 | pbeta(z, a, b) ~ z^a(1-z)^b / aB(a,b) ~ z^a / aB(a,b), 30 | with z = 1/nx, a = n/2, b= 1/2 : 31 | */ 32 | double lval; 33 | lval = -0.5 * n * (2 * log(fabs(x)) - log(n)) 34 | - lbeta(0.5 * n, 0.5) - log(0.5 * n); 35 | val = log_p ? lval : exp(lval); 36 | } else { 37 | val = (n > x * x) 38 | ? pbeta(x * x / (n + x * x), 0.5, n / 2., /*lower_tail*/0, log_p) 39 | : pbeta(1. / nx, n / 2., 0.5, /*lower_tail*/1, log_p); 40 | } 41 | 42 | /* Use "1 - v" if lower_tail and x > 0 (but not both):*/ 43 | if (x <= 0.) 44 | lower_tail = !lower_tail; 45 | 46 | if (log_p) { 47 | if (lower_tail) return log1p(-0.5 * exp(val)); 48 | else return val - M_LN2; /* = log(.5* pbeta(....)) */ 49 | } else { 50 | val /= 2.; 51 | return R_D_Cval(val); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/qnorm.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "bmath.h" 7 | 8 | double qnorm5(double p, double mu, double sigma, int lower_tail, int log_p) { 9 | double p_, q, r, val; 10 | 11 | #ifdef IEEE_754 12 | if (ISNAN(p) || ISNAN(mu) || ISNAN(sigma)) 13 | return p + mu + sigma; 14 | #endif 15 | R_Q_P01_boundaries(p, ML_NEGINF, ML_POSINF); 16 | 17 | if (sigma < 0) ML_ERR_return_NAN; 18 | if (sigma == 0) return mu; 19 | 20 | p_ = R_DT_qIv(p); /* real lower_tail prob. p */ 21 | q = p_ - 0.5; 22 | 23 | #ifdef DEBUG_qnorm 24 | REprintf("qnorm(p=%10.7g, m=%g, s=%g, l.t.= %d, log= %d): q = %g\n", 25 | p, mu, sigma, lower_tail, log_p, q); 26 | #endif 27 | 28 | 29 | /*-- use AS 241 --- */ 30 | /* double ppnd16_(double *p, long *ifault)*/ 31 | /* ALGORITHM AS241 APPL. STATIST. (1988) VOL. 37, NO. 3 32 | 33 | Produces the normal deviate Z corresponding to a given lower 34 | tail area of P; Z is accurate to about 1 part in 10**16. 35 | 36 | (original fortran code used PARAMETER(..) for the coefficients 37 | and provided hash codes for checking them...) 38 | */ 39 | if (fabs(q) <= .425) {/* 0.075 <= p <= 0.925 */ 40 | r = .180625 - q * q; 41 | val = 42 | q * (((((((r * 2509.0809287301226727 + 43 | 33430.575583588128105) * r + 67265.770927008700853) * r + 44 | 45921.953931549871457) * r + 13731.693765509461125) * r + 45 | 1971.5909503065514427) * r + 133.14166789178437745) * r + 46 | 3.387132872796366608) 47 | / (((((((r * 5226.495278852854561 + 48 | 28729.085735721942674) * r + 39307.89580009271061) * r + 49 | 21213.794301586595867) * r + 5394.1960214247511077) * r + 50 | 687.1870074920579083) * r + 42.313330701600911252) * r + 1.); 51 | } else { /* closer than 0.075 from {0,1} boundary */ 52 | 53 | /* r = min(p, 1-p) < 0.075 */ 54 | if (q > 0) 55 | r = R_DT_CIv(p); /* 1-p */ 56 | else 57 | r = p_; /* = R_DT_Iv(p) ^= p */ 58 | 59 | r = sqrt(-((log_p && 60 | ((lower_tail && q <= 0) || (!lower_tail && q > 0))) ? 61 | p : /* else */ log(r))); 62 | /* r = sqrt(-log(r)) <==> min(p, 1-p) = exp( - r^2 ) */ 63 | #ifdef DEBUG_qnorm 64 | REprintf("\t close to 0 or 1: r = %7g\n", r); 65 | #endif 66 | 67 | if (r <= 5.) { /* <==> min(p,1-p) >= exp(-25) ~= 1.3888e-11 */ 68 | r += -1.6; 69 | val = (((((((r * 7.7454501427834140764e-4 + 70 | .0227238449892691845833) * r + .24178072517745061177) * 71 | r + 1.27045825245236838258) * r + 72 | 3.64784832476320460504) * r + 5.7694972214606914055) * 73 | r + 4.6303378461565452959) * r + 74 | 1.42343711074968357734) 75 | / (((((((r * 76 | 1.05075007164441684324e-9 + 5.475938084995344946e-4) * 77 | r + .0151986665636164571966) * r + 78 | .14810397642748007459) * r + .68976733498510000455) * 79 | r + 1.6763848301838038494) * r + 80 | 2.05319162663775882187) * r + 1.); 81 | } else { /* very close to 0 or 1 */ 82 | r += -5.; 83 | val = (((((((r * 2.01033439929228813265e-7 + 84 | 2.71155556874348757815e-5) * r + 85 | .0012426609473880784386) * r + .026532189526576123093) * 86 | r + .29656057182850489123) * r + 87 | 1.7848265399172913358) * r + 5.4637849111641143699) * 88 | r + 6.6579046435011037772) 89 | / (((((((r * 90 | 2.04426310338993978564e-15 + 1.4215117583164458887e-7) * 91 | r + 1.8463183175100546818e-5) * r + 92 | 7.868691311456132591e-4) * r + .0148753612908506148525) 93 | * r + .13692988092273580531) * r + 94 | .59983220655588793769) * r + 1.); 95 | } 96 | 97 | if (q < 0.0) 98 | val = -val; 99 | /* return (q >= 0.)? r : -r ;*/ 100 | } 101 | return mu + sigma * val; 102 | } 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /src/qt.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "bmath.h" 7 | 8 | // tan(pi * x) -- exact when x = k/2 for all integer k 9 | #if defined(HAVE_TANPI) || defined(HAVE___TANPI) 10 | // for use in arithmetic.c, half-values documented to give NaN 11 | double Rtanpi(double x) 12 | #else 13 | 14 | double tanpi(double x) 15 | #endif 16 | { 17 | #ifdef IEEE_754 18 | if (ISNAN(x)) return x; 19 | #endif 20 | if (!isfinite(x)) ML_ERR_return_NAN; 21 | 22 | x = fmod(x, 1.); // tan(pi(x + k)) == tan(pi x) for all integer k 23 | // map (-1,1) --> (-1/2, 1/2] : 24 | if (x <= -0.5) x++; 25 | else if (x > 0.5) x--; 26 | return (x == 0.) ? 0. : ((x == 0.5) ? ML_NAN : tan(M_PI * x)); 27 | } 28 | 29 | #if !defined(HAVE_TANPI) && defined(HAVE___TANPI) 30 | 31 | double tanpi(double x) { 32 | return __tanpi(x); 33 | } 34 | #endif 35 | 36 | double fmin2(double x, double y) { 37 | #ifdef IEEE_754 38 | if (ISNAN(x) || ISNAN(y)) 39 | return x + y; 40 | #endif 41 | return (x < y) ? x : y; 42 | } 43 | 44 | double qt(double p, double ndf, int lower_tail, int log_p) { 45 | const static double eps = 1.e-12; 46 | 47 | double P, q; 48 | 49 | #ifdef IEEE_754 50 | if (ISNAN(p) || ISNAN(ndf)) 51 | return p + ndf; 52 | #endif 53 | 54 | R_Q_P01_boundaries(p, ML_NEGINF, ML_POSINF); 55 | 56 | if (ndf <= 0) ML_ERR_return_NAN; 57 | 58 | if (ndf < 1) { /* based on qnt */ 59 | const static double accu = 1e-13; 60 | const static double Eps = 1e-11; /* must be > accu */ 61 | 62 | double ux, lx, nx, pp; 63 | 64 | int iter = 0; 65 | 66 | p = R_DT_qIv(p); 67 | 68 | /* Invert pt(.) : 69 | * 1. finding an upper and lower bound */ 70 | if (p > 1 - DBL_EPSILON) return ML_POSINF; 71 | pp = fmin2(1 - DBL_EPSILON, p * (1 + Eps)); 72 | for (ux = 1.; ux < DBL_MAX && pt(ux, ndf, true, false) < pp; ux *= 2); 73 | pp = p * (1 - Eps); 74 | for (lx = -1.; lx > -DBL_MAX && pt(lx, ndf, true, false) > pp; lx *= 2); 75 | 76 | /* 2. interval (lx,ux) halving 77 | regula falsi failed on qt(0.1, 0.1) 78 | */ 79 | do { 80 | nx = 0.5 * (lx + ux); 81 | if (pt(nx, ndf, true, false) > p) ux = nx; 82 | else lx = nx; 83 | } while ((ux - lx) / fabs(nx) > accu && ++iter < 1000); 84 | 85 | return 0.5 * (lx + ux); 86 | } 87 | 88 | /* Old comment: 89 | * FIXME: "This test should depend on ndf AND p !! 90 | * ----- and in fact should be replaced by 91 | * something like Abramowitz & Stegun 26.7.5 (p.949)" 92 | * 93 | * That would say that if the qnorm value is x then 94 | * the result is about x + (x^3+x)/4df + (5x^5+16x^3+3x)/96df^2 95 | * The differences are tiny even if x ~ 1e5, and qnorm is not 96 | * that accurate in the extreme tails. 97 | */ 98 | if (ndf > 1e20) return qnorm(p, 0., 1., lower_tail, log_p); 99 | 100 | P = R_D_qIv(p); /* if exp(p) underflows, we fix below */ 101 | 102 | bool neg = (!lower_tail || P < 0.5) && (lower_tail || P > 0.5), 103 | is_neg_lower = (lower_tail == neg); /* both true or false == !xor */ 104 | if (neg) 105 | P = 2 * (log_p ? (lower_tail ? P : -expm1(p)) : R_D_Lval(p)); 106 | else 107 | P = 2 * (log_p ? (lower_tail ? -expm1(p) : P) : R_D_Cval(p)); 108 | /* 0 <= P <= 1 ; P = 2*min(P', 1 - P') in all cases */ 109 | 110 | if (fabs(ndf - 2) < eps) { /* df ~= 2 */ 111 | if (P > DBL_MIN) { 112 | if (3 * P < DBL_EPSILON) /* P ~= 0 */ 113 | q = 1 / sqrt(P); 114 | else if (P > 0.9) /* P ~= 1 */ 115 | q = (1 - P) * sqrt(2 / (P * (2 - P))); 116 | else /* eps/3 <= P <= 0.9 */ 117 | q = sqrt(2 / (P * (2 - P)) - 2); 118 | } else { /* P << 1, q = 1/sqrt(P) = ... */ 119 | if (log_p) 120 | q = is_neg_lower ? exp(-p / 2) / M_SQRT2 : 1 / sqrt(-expm1(p)); 121 | else 122 | q = ML_POSINF; 123 | } 124 | } else if (ndf < 1 + eps) { /* df ~= 1 (df < 1 excluded above): Cauchy */ 125 | if (P == 1.) q = 0; // some versions of tanpi give Inf, some NaN 126 | else if (P > 0) 127 | q = 1 / tanpi(P / 2.); /* == - tan((P+1) * M_PI_2) -- suffers for P ~= 0 */ 128 | 129 | else { /* P = 0, but maybe = 2*exp(p) ! */ 130 | if (log_p) /* 1/tan(e) ~ 1/e */ 131 | q = is_neg_lower ? M_1_PI * exp(-p) : -1. / (M_PI * expm1(p)); 132 | else 133 | q = ML_POSINF; 134 | } 135 | } else { /*-- usual case; including, e.g., df = 1.1 */ 136 | double x = 0., y, log_P2 = 0./* -Wall */, 137 | a = 1 / (ndf - 0.5), 138 | b = 48 / (a * a), 139 | c = ((20700 * a / b - 98) * a - 16) * a + 96.36, 140 | d = ((94.5 / (b + c) - 3) / b + 1) * sqrt(a * M_PI_2) * ndf; 141 | 142 | bool P_ok1 = P > DBL_MIN || !log_p, P_ok = P_ok1; 143 | if (P_ok1) { 144 | if(fabs(ndf) > 1e-15) 145 | y = pow(d * P, 2.0 / ndf); 146 | else 147 | return NAN; 148 | P_ok = (y >= DBL_EPSILON); 149 | } 150 | if (!P_ok) {// log.p && P very.small || (d*P)^(2/df) =: y < eps_c 151 | log_P2 = is_neg_lower ? R_D_log(p) : R_D_LExp(p); /* == log(P / 2) */ 152 | x = (log(d) + M_LN2 + log_P2) / ndf; 153 | y = exp(2 * x); 154 | } 155 | 156 | if ((ndf < 2.1 && P > 0.5) || y > 0.05 + a) { /* P > P0(df) */ 157 | /* Asymptotic inverse expansion about normal */ 158 | if (P_ok) 159 | x = qnorm(0.5 * P, 0., 1., /*lower_tail*/true, /*log_p*/false); 160 | else /* log_p && P underflowed */ 161 | x = qnorm(log_P2, 0., 1., lower_tail, /*log_p*/ true); 162 | 163 | y = x * x; 164 | if (ndf < 5) 165 | c += 0.3 * (ndf - 4.5) * (x + 0.6); 166 | c = (((0.05 * d * x - 5) * x - 7) * x - 2) * x + b + c; 167 | y = (((((0.4 * y + 6.3) * y + 36) * y + 94.5) / c 168 | - y - 3) / b + 1) * x; 169 | y = expm1(a * y * y); 170 | q = sqrt(ndf * y); 171 | } else if (!P_ok && x < -M_LN2 * DBL_MANT_DIG) {/* 0.5* log(DBL_EPSILON) */ 172 | /* y above might have underflown */ 173 | q = sqrt(ndf) * exp(-x); 174 | } else { /* re-use 'y' from above */ 175 | y = ((1 / (((ndf + 6) / (ndf * y) - 0.089 * d - 0.822) 176 | * (ndf + 2) * 3) + 0.5 / (ndf + 4)) 177 | * y - 1) * (ndf + 1) / (ndf + 2) + 1 / y; 178 | q = sqrt(ndf * y); 179 | } 180 | 181 | /* Now apply 2-term Taylor expansion improvement (1-term = Newton): 182 | * as by Hill (1981) [ref.above] */ 183 | 184 | /* FIXME: This can be far from optimal when log_p = true 185 | * but is still needed, e.g. for qt(-2, df=1.01, log=true). 186 | * Probably also improvable when lower_tail = false */ 187 | 188 | if (P_ok1) { 189 | int it = 0; 190 | while (it++ < 10 && (y = dt(q, ndf, false)) > 0 && 191 | isfinite(x = (pt(q, ndf, false, false) - P / 2) / y) && 192 | fabs(x) > 1e-14 * fabs(q)) 193 | /* Newton (=Taylor 1 term): 194 | * q += x; 195 | * Taylor 2-term : */ 196 | q += x * (1. + x * q * (ndf + 1) / (2 * (q * q + ndf))); 197 | } 198 | } 199 | if (neg) q = -q; 200 | return q; 201 | } 202 | -------------------------------------------------------------------------------- /src/stirlerr.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "bmath.h" 4 | 5 | /* stirlerr(n) = log(n!) - log( sqrt(2*pi*n)*(n/e)^n ) 6 | * = log Gamma(n+1) - 1/2 * [log(2*pi) + log(n)] - n*[log(n) - 1] 7 | * = log Gamma(n+1) - (n + 1/2) * log(n) + n - log(2*pi)/2 8 | * 9 | * see also lgammacor() in ./lgammacor.c which computes almost the same! 10 | */ 11 | 12 | double stirlerr(double n) { 13 | 14 | #define S0 0.083333333333333333333 /* 1/12 */ 15 | #define S1 0.00277777777777777777778 /* 1/360 */ 16 | #define S2 0.00079365079365079365079365 /* 1/1260 */ 17 | #define S3 0.000595238095238095238095238 /* 1/1680 */ 18 | #define S4 0.0008417508417508417508417508/* 1/1188 */ 19 | 20 | /* 21 | error for 0, 0.5, 1.0, 1.5, ..., 14.5, 15.0. 22 | */ 23 | const static double sferr_halves[31] = { 24 | 0.0, /* n=0 - wrong, place holder only */ 25 | 0.1534264097200273452913848, /* 0.5 */ 26 | 0.0810614667953272582196702, /* 1.0 */ 27 | 0.0548141210519176538961390, /* 1.5 */ 28 | 0.0413406959554092940938221, /* 2.0 */ 29 | 0.03316287351993628748511048, /* 2.5 */ 30 | 0.02767792568499833914878929, /* 3.0 */ 31 | 0.02374616365629749597132920, /* 3.5 */ 32 | 0.02079067210376509311152277, /* 4.0 */ 33 | 0.01848845053267318523077934, /* 4.5 */ 34 | 0.01664469118982119216319487, /* 5.0 */ 35 | 0.01513497322191737887351255, /* 5.5 */ 36 | 0.01387612882307074799874573, /* 6.0 */ 37 | 0.01281046524292022692424986, /* 6.5 */ 38 | 0.01189670994589177009505572, /* 7.0 */ 39 | 0.01110455975820691732662991, /* 7.5 */ 40 | 0.010411265261972096497478567, /* 8.0 */ 41 | 0.009799416126158803298389475, /* 8.5 */ 42 | 0.009255462182712732917728637, /* 9.0 */ 43 | 0.008768700134139385462952823, /* 9.5 */ 44 | 0.008330563433362871256469318, /* 10.0 */ 45 | 0.007934114564314020547248100, /* 10.5 */ 46 | 0.007573675487951840794972024, /* 11.0 */ 47 | 0.007244554301320383179543912, /* 11.5 */ 48 | 0.006942840107209529865664152, /* 12.0 */ 49 | 0.006665247032707682442354394, /* 12.5 */ 50 | 0.006408994188004207068439631, /* 13.0 */ 51 | 0.006171712263039457647532867, /* 13.5 */ 52 | 0.005951370112758847735624416, /* 14.0 */ 53 | 0.005746216513010115682023589, /* 14.5 */ 54 | 0.005554733551962801371038690 /* 15.0 */ 55 | }; 56 | double nn; 57 | int index; 58 | 59 | if (n <= 15.0000) { 60 | nn = n + n; 61 | if (nn == floor(nn)) { 62 | index = (int) floor(nn); 63 | if (index >= 0 && index <= 31) { 64 | nn = sferr_halves[index]; 65 | return nn; 66 | } 67 | } 68 | return (lgammafn(n + 1.) - (n + 0.5) * log(n) + n - M_LN_SQRT_2PI); 69 | } 70 | 71 | nn = n*n; 72 | if (n > 500) return ((S0 - S1 / nn) / n); 73 | if (n > 80) return ((S0 - (S1 - S2 / nn) / nn) / n); 74 | if (n > 35) return ((S0 - (S1 - (S2 - S3 / nn) / nn) / nn) / n); 75 | /* 15 < n <= 35 : */ 76 | return ((S0 - (S1 - (S2 - (S3 - S4 / nn) / nn) / nn) / nn) / n); 77 | } -------------------------------------------------------------------------------- /src/sunif.c: -------------------------------------------------------------------------------- 1 | /* A version of Marsaglia-MultiCarry */ 2 | 3 | static unsigned int I1 = 1234, I2 = 5678; 4 | 5 | void set_seed(unsigned int i1, unsigned int i2) { 6 | I1 = i1; 7 | I2 = i2; 8 | } 9 | 10 | void get_seed(unsigned int *i1, unsigned int *i2) { 11 | *i1 = I1; 12 | *i2 = I2; 13 | } 14 | 15 | double unif_rand(void) { 16 | I1 = 36969 * (I1 & 0177777) + (I1 >> 16); 17 | I2 = 18000 * (I2 & 0177777) + (I2 >> 16); 18 | return ((I1 << 16)^(I2 & 0177777)) * 2.328306437080797e-10; /* in [0,1) */ 19 | } 20 | -------------------------------------------------------------------------------- /src/wilcox.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "bmath.h" 5 | 6 | //static double ***w; /* to store cwilcox(i,j,k) -> w[i][j][k] */ 7 | //static int allocated_m, allocated_n; 8 | 9 | typedef struct wilcox_s { 10 | double ***w; 11 | int m, n; 12 | } wilcox_t; 13 | 14 | void w_free(wilcox_t *w) { 15 | int i, j; 16 | 17 | for (i = w->m; i >= 0; i--) { 18 | if (w->w[i]) { 19 | for (j = w->n; j >= 0; j--) { 20 | if (w->w[i][j]) 21 | free((void *) w->w[i][j]); 22 | } 23 | free((void *) w->w[i]); 24 | } 25 | } 26 | if (w->w) 27 | free((void *) w->w); 28 | w->w = NULL; 29 | w->m = w->n = 0; 30 | } 31 | 32 | void w_init_maybe(wilcox_t *wilcox, int m, int n) { 33 | int i, j; 34 | 35 | if (m > n) { 36 | i = n; 37 | n = m; 38 | m = i; 39 | } 40 | if (wilcox->w && (m > wilcox->m || n > wilcox->n)) 41 | w_free(wilcox); /* zeroes w */ 42 | 43 | if (!wilcox->w) { /* initialize w[][] */ 44 | m = max(m, WILCOX_MAX); 45 | n = max(n, WILCOX_MAX); 46 | wilcox->w = (double ***) calloc((size_t) m + 1, sizeof (double **)); 47 | if (!wilcox->w) { 48 | fprintf(stderr, "Not enough memory to allocate memory\n"); 49 | exit(-1); 50 | } 51 | for (i = 0; i <= m; i++) { 52 | wilcox->w[i] = (double **) calloc((size_t) n + 1, sizeof (double *)); 53 | if (!wilcox->w[i]) { 54 | fprintf(stderr, "Not enough memory to allocate memory\n"); 55 | exit(-1); 56 | } 57 | for (j = 0; j <= n; j++) { 58 | wilcox->w[i][j] = 0; 59 | } 60 | } 61 | wilcox->m = m; 62 | wilcox->n = n; 63 | } 64 | } 65 | 66 | void w_free_maybe(wilcox_t *wilcox) { 67 | if (wilcox->m > WILCOX_MAX || wilcox->n > WILCOX_MAX) 68 | w_free(wilcox); 69 | } 70 | 71 | /* This counts the number of choices with statistic = k */ 72 | double cwilcox(wilcox_t *wilcox, int k, int m, int n) { 73 | int c, u, i, j, l; 74 | u = m * n; 75 | if (k < 0 || k > u) 76 | return (0); 77 | c = (int) (u / 2); 78 | if (k > c) 79 | k = u - k; /* hence k <= floor(u / 2) */ 80 | if (m < n) { 81 | i = m; 82 | j = n; 83 | } else { 84 | i = n; 85 | j = m; 86 | } /* hence i <= j */ 87 | 88 | if (j == 0) /* and hence i == 0 */ 89 | return (k == 0); 90 | 91 | 92 | /* We can simplify things if k is small. Consider the Mann-Whitney 93 | definition, and sort y. Then if the statistic is k, no more 94 | than k of the y's can be <= any x[i], and since they are sorted 95 | these can only be in the first k. So the count is the same as 96 | if there were just k y's. 97 | */ 98 | if (j > 0 && k < j) return cwilcox(wilcox, k, i, k); 99 | 100 | if (!wilcox->w[i][j]) { 101 | wilcox->w[i][j] = (double *) calloc((size_t) c + 1, sizeof (double)); 102 | if (!wilcox->w[i][j]) { 103 | fprintf(stderr, "Not enough memory to allocate memory\n"); 104 | exit(-1); 105 | } 106 | for (l = 0; l <= c; l++) 107 | wilcox->w[i][j][l] = -1.0; 108 | } 109 | if (wilcox->w[i][j][k] < 0) { 110 | if (j == 0) /* and hence i == 0 */ 111 | wilcox->w[i][j][k] = (k == 0); 112 | else { 113 | wilcox->w[i][j][k] = cwilcox(wilcox, k - j, i - 1, j) + cwilcox(wilcox, k, i, j - 1); 114 | } 115 | 116 | } 117 | // printf("cwilcox: %d %d %d %f\n", i, j, k, wilcox->w[i][j][k]); 118 | return (wilcox->w[i][j][k]); 119 | } 120 | 121 | double dwilcox(double x, double m, double n, int give_log) { 122 | double d; 123 | wilcox_t *wilcox = (wilcox_t *) malloc(sizeof (wilcox_t)); 124 | if (!wilcox) { 125 | fprintf(stderr, "Not enough memory to allocate memory\n"); 126 | exit(-1); 127 | } 128 | 129 | wilcox->w = NULL; 130 | wilcox->m = 0; 131 | wilcox->n = 0; 132 | 133 | m = R_forceint(m); 134 | n = R_forceint(n); 135 | if (m <= 0 || n <= 0) 136 | ML_ERR_return_NAN; 137 | 138 | if (fabs(x - R_forceint(x)) > 1e-7) 139 | return (R_D__0); 140 | x = R_forceint(x); 141 | if ((x < 0) || (x > m * n)) 142 | return (R_D__0); 143 | 144 | int mm = floor(m); 145 | int nn = floor(n); 146 | int xx = floor(x); 147 | w_init_maybe(wilcox, mm, nn); 148 | d = give_log ? 149 | log(cwilcox(wilcox, xx, mm, nn)) - lchoose(m + n, n) : 150 | cwilcox(wilcox, xx, mm, nn) / choose(m + n, n); 151 | w_free(wilcox); 152 | if (wilcox) 153 | free(wilcox); 154 | return (d); 155 | } 156 | 157 | /* args have the same meaning as R function pwilcox */ 158 | double pwilcox(double q, double m, double n, int lower_tail, int log_p) { 159 | int i; 160 | double c, p; 161 | wilcox_t *wilcox = (wilcox_t *) malloc(sizeof (wilcox_t)); 162 | if (!wilcox) { 163 | fprintf(stderr, "Not enough memory to allocate memory\n"); 164 | exit(-1); 165 | } 166 | wilcox->w = NULL; 167 | wilcox->m = 0; 168 | wilcox->n = 0; 169 | 170 | if (!isfinite(m) || !isfinite(n)) 171 | ML_ERR_return_NAN; 172 | m = R_forceint(m); 173 | n = R_forceint(n); 174 | if (m <= 0 || n <= 0) 175 | ML_ERR_return_NAN; 176 | 177 | q = floor(q + 1e-7); 178 | 179 | if (q < 0.0) 180 | return (R_DT_0); 181 | if (q >= m * n) 182 | return (R_DT_1); 183 | 184 | int mm = floor(m); 185 | int nn = floor(n); 186 | w_init_maybe(wilcox, mm, nn); 187 | c = choose(m + n, n); 188 | p = 0.0; 189 | /* Use summation of probs over the shorter range */ 190 | if (q <= (m * n / 2)) { 191 | for (i = 0; i <= q; i++) { 192 | p += (cwilcox(wilcox, i, mm, nn) / c); 193 | } 194 | } else { 195 | q = m * n - q; 196 | for (i = 0; i < q; i++) 197 | p += cwilcox(wilcox, i, mm, nn) / c; 198 | lower_tail = !lower_tail; /* p = 1 - p; */ 199 | } 200 | w_free(wilcox); 201 | if (wilcox) 202 | free(wilcox); 203 | return p; 204 | } /* pwilcox */ 205 | 206 | /* x is 'p' in R function qwilcox */ 207 | 208 | double qwilcox(double x, double m, double n, int lower_tail, int log_p) { 209 | double c, p; 210 | wilcox_t *wilcox = (wilcox_t *) malloc(sizeof (wilcox_t)); 211 | if (!wilcox) { 212 | fprintf(stderr, "Not enough memory to allocate memory\n"); 213 | exit(-1); 214 | } 215 | wilcox->w = NULL; 216 | wilcox->m = 0; 217 | wilcox->n = 0; 218 | 219 | #ifdef IEEE_754 220 | if (ISNAN(x) || ISNAN(m) || ISNAN(n)) 221 | return (x + m + n); 222 | #endif 223 | if (!isfinite(x) || !isfinite(m) || !isfinite(n)) 224 | ML_ERR_return_NAN; 225 | R_Q_P01_check(x); 226 | 227 | m = R_forceint(m); 228 | n = R_forceint(n); 229 | if (m <= 0 || n <= 0) 230 | ML_ERR_return_NAN; 231 | 232 | if (x == R_DT_0) 233 | return (0); 234 | if (x == R_DT_1) 235 | return (m * n); 236 | 237 | if (log_p || !lower_tail) 238 | x = R_DT_qIv(x); /* lower_tail,non-log "p" */ 239 | 240 | int mm = floor(m); 241 | int nn = floor(n); 242 | w_init_maybe(wilcox, mm, nn); 243 | c = choose(m + n, n); 244 | p = 0; 245 | int q = 0; 246 | if (x <= 0.5) { 247 | x = x - 10 * DBL_EPSILON; 248 | for (;;) { 249 | p += cwilcox(wilcox, q, mm, nn) / c; 250 | if (p >= x) 251 | break; 252 | q++; 253 | } 254 | } else { 255 | x = 1 - x + 10 * DBL_EPSILON; 256 | for (;;) { 257 | p += cwilcox(wilcox, q, mm, nn) / c; 258 | if (p > x) { 259 | q = floor(m * n - q); 260 | break; 261 | } 262 | q++; 263 | } 264 | } 265 | w_free(wilcox); 266 | if (wilcox) 267 | free(wilcox); 268 | return (q); 269 | } 270 | 271 | double rwilcox(double m, double n) { 272 | int i, j, k, *x; 273 | double r; 274 | 275 | #ifdef IEEE_754 276 | /* NaNs propagated correctly */ 277 | if (ISNAN(m) || ISNAN(n)) 278 | return (m + n); 279 | #endif 280 | m = R_forceint(m); 281 | n = R_forceint(n); 282 | if ((m < 0) || (n < 0)) 283 | ML_ERR_return_NAN; 284 | 285 | if ((m == 0) || (n == 0)) 286 | return (0); 287 | 288 | r = 0.0; 289 | k = floor(m + n); 290 | x = (int *) calloc((size_t) k, sizeof (int)); 291 | if (!x) { 292 | fprintf(stderr, "Not enough memory to allocate memory\n"); 293 | exit(-1); 294 | } 295 | #ifdef MATHLIB_STANDALONE 296 | if (!x) MATHLIB_ERROR(_("wilcox allocation error %d"), 4); 297 | #endif 298 | for (i = 0; i < k; i++) 299 | x[i] = i; 300 | for (i = 0; i < n; i++) { 301 | j = (int) floor(k * unif_rand()); 302 | r += x[j]; 303 | if (k > 0) 304 | x[j] = x[--k]; 305 | } 306 | if (x) 307 | free(x); 308 | return (r - n * (n - 1) / 2); 309 | } 310 | 311 | -------------------------------------------------------------------------------- /tpmcalculator.cwl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env cwl-runner 2 | 3 | cwlVersion: v1.0 4 | class: CommandLineTool 5 | 6 | hints: 7 | DockerRequirement: 8 | dockerImageId: biocontainers/tpmcalculator:0.0.1 9 | dockerFile: 10 | $include: https://raw.githubusercontent.com/ncbi/TPMCalculator/master/Dockerfile 11 | 12 | inputs: 13 | out_stdout: 14 | type: string 15 | out_stderr: 16 | type: string 17 | g: 18 | type: File 19 | inputBinding: 20 | position: 1 21 | prefix: -g 22 | doc: | 23 | GTF file 24 | d: 25 | type: Directory? 26 | inputBinding: 27 | position: 2 28 | prefix: -d 29 | doc: | 30 | Directory with the BAM files 31 | b: 32 | type: File? 33 | inputBinding: 34 | position: 2 35 | prefix: -b 36 | doc: | 37 | BAM file 38 | k: 39 | type: string? 40 | inputBinding: 41 | position: 3 42 | prefix: -k 43 | doc: | 44 | Gene key to use from GTF file. Default: gene_id 45 | t: 46 | type: string? 47 | inputBinding: 48 | position: 3 49 | prefix: -t 50 | doc: | 51 | Transcript key to use from GTF file. Default: transcript_id 52 | c: 53 | type: int? 54 | inputBinding: 55 | position: 3 56 | prefix: -c 57 | doc: | 58 | Smaller size allowed for an intron created for genes. Default: 16. We recommend to use the reads length 59 | p: 60 | type: boolean? 61 | inputBinding: 62 | position: 3 63 | prefix: -p 64 | doc: | 65 | Use only properly paired reads. Default: No. Recommended for paired-end reads. 66 | q: 67 | type: int? 68 | inputBinding: 69 | position: 3 70 | prefix: -q 71 | doc: | 72 | Minimum MAPQ value to filter out reads. Default: 0. This value depends on the aligner MAPQ value. 73 | o: 74 | type: int? 75 | inputBinding: 76 | position: 3 77 | prefix: -o 78 | doc: | 79 | Minimum overlap between a reads and a feature. Default: 8. 80 | e: 81 | type: boolean? 82 | inputBinding: 83 | position: 3 84 | prefix: -e 85 | doc: | 86 | Extended output. This will include transcript level TPM values. Default: No. 87 | 88 | outputs: 89 | out_stdout: 90 | type: stdout 91 | out_stderr: 92 | type: stderr 93 | out_output: 94 | type: File[] 95 | outputBinding: 96 | glob: "*.out" 97 | ent_output: 98 | type: File[] 99 | outputBinding: 100 | glob: "*.ent" 101 | uni_output: 102 | type: File[] 103 | outputBinding: 104 | glob: "*.uni" 105 | 106 | stdout: $(inputs.out_stdout) 107 | stderr: $(inputs.out_stderr) 108 | 109 | baseCommand: ["TPMCalculator"] 110 | 111 | --------------------------------------------------------------------------------