├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── data ├── SARS-CoV-2.genomes.fa └── SARS-CoV-2.kmer.fa └── src ├── adaptertrimmer.cpp ├── adaptertrimmer.h ├── basecorrector.cpp ├── basecorrector.h ├── cmdline.h ├── common.h ├── coverage.js ├── duplicate.cpp ├── duplicate.h ├── editdistance.cpp ├── editdistance.h ├── evaluator.cpp ├── evaluator.h ├── fastareader.cpp ├── fastareader.h ├── fastqreader.cpp ├── fastqreader.h ├── filter.cpp ├── filter.h ├── filterresult.cpp ├── filterresult.h ├── genomes.cpp ├── genomes.h ├── htmlreporter.cpp ├── htmlreporter.h ├── jsonreporter.cpp ├── jsonreporter.h ├── kmer.cpp ├── kmer.h ├── kmercollection.cpp ├── kmercollection.h ├── knownadapters.h ├── main.cpp ├── nucleotidetree.cpp ├── nucleotidetree.h ├── options.cpp ├── options.h ├── overlapanalysis.cpp ├── overlapanalysis.h ├── peprocessor.cpp ├── peprocessor.h ├── polyx.cpp ├── polyx.h ├── processor.cpp ├── processor.h ├── read.cpp ├── read.h ├── seprocessor.cpp ├── seprocessor.h ├── sequence.cpp ├── sequence.h ├── stats.cpp ├── stats.h ├── threadconfig.cpp ├── threadconfig.h ├── umiprocessor.cpp ├── umiprocessor.h ├── unittest.cpp ├── unittest.h ├── util.h ├── virusdetector.cpp ├── virusdetector.h ├── writer.cpp ├── writer.h ├── writerthread.cpp ├── writerthread.h └── zlib ├── crc32.h ├── deflate.h ├── gzguts.h ├── inffast.h ├── inffixed.h ├── inflate.h ├── inftrees.h ├── trees.h ├── zconf.h ├── zlib.h └── zutil.h /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 OpenGene - Open Source Genetics Toolbox 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | DIR_INC := ./inc 2 | DIR_SRC := ./src 3 | DIR_OBJ := ./obj 4 | 5 | PREFIX ?= /usr/local 6 | BINDIR ?= $(PREFIX)/bin 7 | INCLUDE_DIRS ?= 8 | LIBRARY_DIRS ?= 9 | 10 | SRC := $(wildcard ${DIR_SRC}/*.cpp) 11 | OBJ := $(patsubst %.cpp,${DIR_OBJ}/%.o,$(notdir ${SRC})) 12 | 13 | TARGET := fastv 14 | 15 | BIN_TARGET := ${TARGET} 16 | 17 | CXX ?= g++ 18 | CXXFLAGS := -std=c++11 -g -O3 -I${DIR_INC} $(foreach includedir,$(INCLUDE_DIRS),-I$(includedir)) ${CXXFLAGS} 19 | LIBS := -lz -lpthread 20 | LD_FLAGS := $(foreach librarydir,$(LIBRARY_DIRS),-L$(librarydir)) $(LIBS) $(LD_FLAGS) 21 | 22 | 23 | ${BIN_TARGET}:${OBJ} 24 | $(CXX) $(OBJ) -o $@ $(LD_FLAGS) 25 | 26 | ${DIR_OBJ}/%.o:${DIR_SRC}/%.cpp make_obj_dir 27 | $(CXX) -c $< -o $@ $(CXXFLAGS) 28 | 29 | .PHONY:clean 30 | clean: 31 | @if test -d $(DIR_OBJ) ; \ 32 | then \ 33 | find $(DIR_OBJ) -name *.o -delete; \ 34 | fi 35 | @if test -e $(TARGET) ; \ 36 | then \ 37 | rm $(TARGET) ; \ 38 | fi 39 | 40 | make_obj_dir: 41 | @if test ! -d $(DIR_OBJ) ; \ 42 | then \ 43 | mkdir $(DIR_OBJ) ; \ 44 | fi 45 | 46 | install: 47 | install $(TARGET) $(BINDIR)/$(TARGET) 48 | @echo "Installed." 49 | -------------------------------------------------------------------------------- /src/adaptertrimmer.cpp: -------------------------------------------------------------------------------- 1 | #include "adaptertrimmer.h" 2 | 3 | AdapterTrimmer::AdapterTrimmer(){ 4 | } 5 | 6 | 7 | AdapterTrimmer::~AdapterTrimmer(){ 8 | } 9 | 10 | bool AdapterTrimmer::trimByOverlapAnalysis(Read* r1, Read* r2, FilterResult* fr, int diffLimit, int overlapRequire, double diffPercentLimit) { 11 | OverlapResult ov = OverlapAnalysis::analyze(r1, r2, diffLimit, overlapRequire, diffPercentLimit); 12 | return trimByOverlapAnalysis(r1, r2, fr, ov); 13 | } 14 | 15 | bool AdapterTrimmer::trimByOverlapAnalysis(Read* r1, Read* r2, FilterResult* fr, OverlapResult ov, int frontTrimmed1, int frontTrimmed2) { 16 | int ol = ov.overlap_len; 17 | if(ov.overlapped && ov.offset < 0) { 18 | 19 | //5' ......frontTrimmed1......|------------------------------------------|----- 3' 20 | //3' -----|-------------------------------------------|......frontTrimmed2..... 5' 21 | 22 | int len1 = min(r1->length(), ol + frontTrimmed2); 23 | int len2 = min(r2->length(), ol + frontTrimmed1); 24 | string adapter1 = r1->mSeq.mStr.substr(len1, r1->length() - len1); 25 | string adapter2 = r2->mSeq.mStr.substr(len2, r2->length() - len2); 26 | 27 | if(_DEBUG) { 28 | cerr << adapter1 << endl; 29 | cerr << adapter2 << endl; 30 | cerr << "frontTrimmed2: " << frontTrimmed1 << endl; 31 | cerr << "frontTrimmed2: " << frontTrimmed2 << endl; 32 | cerr << "overlap:" << ov.offset << "," << ov.overlap_len << ", " << ov.diff << endl; 33 | r1->print(); 34 | r2->reverseComplement()->print(); 35 | cerr <mSeq.mStr = r1->mSeq.mStr.substr(0, len1); 38 | r1->mQuality = r1->mQuality.substr(0, len1); 39 | r2->mSeq.mStr = r2->mSeq.mStr.substr(0, len2); 40 | r2->mQuality = r2->mQuality.substr(0, len2); 41 | 42 | fr->addAdapterTrimmed(adapter1, adapter2); 43 | return true; 44 | } 45 | return false; 46 | } 47 | 48 | bool AdapterTrimmer::trimByMultiSequences(Read* r, FilterResult* fr, vector& adapterList, bool isR2, bool incTrimmedCounter) { 49 | int matchReq = 4; 50 | if(adapterList.size() > 16) 51 | matchReq = 5; 52 | if(adapterList.size() > 256) 53 | matchReq = 6; 54 | bool trimmed = false; 55 | 56 | string originalSeq = r->mSeq.mStr; 57 | for(int i=0; ilength(), originalSeq.length() - r->length()); 63 | if(fr) 64 | fr->addAdapterTrimmed(adapter, isR2, incTrimmedCounter); 65 | else 66 | cerr << adapter << endl; 67 | } 68 | 69 | return trimmed; 70 | } 71 | 72 | bool AdapterTrimmer::trimBySequence(Read* r, FilterResult* fr, string& adapterseq, bool isR2, int matchReq) { 73 | const int allowOneMismatchForEach = 8; 74 | 75 | int rlen = r->length(); 76 | int alen = adapterseq.length(); 77 | 78 | const char* adata = adapterseq.c_str(); 79 | const char* rdata = r->mSeq.mStr.c_str(); 80 | 81 | if(alen < matchReq) 82 | return false; 83 | 84 | int pos=0; 85 | bool found = false; 86 | int start = 0; 87 | if(alen >= 16) 88 | start = -4; 89 | else if(alen >= 12) 90 | start = -3; 91 | else if(alen >= 8) 92 | start = -2; 93 | // we start from negative numbers since the Illumina adapter dimer usually have the first A skipped as A-tailing 94 | for(pos = start; pos allowedMismatch) { 103 | matched = false; 104 | break; 105 | } 106 | } 107 | } 108 | if(matched) { 109 | found = true; 110 | break; 111 | } 112 | 113 | } 114 | 115 | if(found) { 116 | if(pos < 0) { 117 | string adapter = adapterseq.substr(0, alen+pos); 118 | r->mSeq.mStr.resize(0); 119 | r->mQuality.resize(0); 120 | if(fr) { 121 | fr->addAdapterTrimmed(adapter, isR2); 122 | } 123 | 124 | } else { 125 | string adapter = r->mSeq.mStr.substr(pos, rlen-pos); 126 | r->mSeq.mStr = r->mSeq.mStr.substr(0, pos); 127 | r->mQuality = r->mQuality.substr(0, pos); 128 | if(fr) { 129 | fr->addAdapterTrimmed(adapter, isR2); 130 | } 131 | } 132 | return true; 133 | } 134 | 135 | return false; 136 | } 137 | 138 | bool AdapterTrimmer::test() { 139 | Read r("@name", 140 | "TTTTAACCCCCCCCCCCCCCCCCCCCCCCCCCCCAATTTTAAAATTTTCCCCGGGG", 141 | "+", 142 | "///EEEEEEEEEEEEEEEEEEEEEEEEEE////EEEEEEEEEEEEE////E////E"); 143 | string adapter = "TTTTCCACGGGGATACTACTG"; 144 | bool trimmed = AdapterTrimmer::trimBySequence(&r, NULL, adapter); 145 | if (r.mSeq.mStr != "TTTTAACCCCCCCCCCCCCCCCCCCCCCCCCCCCAATTTTAAAA") 146 | return false; 147 | 148 | Read read("@name", 149 | "TTTTAACCCCCCCCCCCCCCCCCCCCCCCCCCCCAATTTTAAAATTTTCCCCGGGGAAATTTCCCGGGAAATTTCCCGGGATCGATCGATCGATCGAATTCC", 150 | "+", 151 | "///EEEEEEEEEEEEEEEEEEEEEEEEEE////EEEEEEEEEEEEE////E////EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE"); 152 | vector adapterList; 153 | adapterList.push_back("GCTAGCTAGCTAGCTA"); 154 | adapterList.push_back("AAATTTCCCGGGAAATTTCCCGGG"); 155 | adapterList.push_back("ATCGATCGATCGATCG"); 156 | adapterList.push_back("AATTCCGGAATTCCGG"); 157 | trimmed = AdapterTrimmer::trimByMultiSequences(&read, NULL, adapterList); 158 | if (read.mSeq.mStr != "TTTTAACCCCCCCCCCCCCCCCCCCCCCCCCCCCAATTTTAAAATTTTCCCCGGGG") { 159 | cerr << read.mSeq.mStr << endl; 160 | return false; 161 | } 162 | 163 | return true; 164 | } -------------------------------------------------------------------------------- /src/adaptertrimmer.h: -------------------------------------------------------------------------------- 1 | #ifndef ADAPTER_TRIMMER_H 2 | #define ADAPTER_TRIMMER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "overlapanalysis.h" 8 | #include "filterresult.h" 9 | #include "options.h" 10 | 11 | using namespace std; 12 | 13 | class AdapterTrimmer{ 14 | public: 15 | AdapterTrimmer(); 16 | ~AdapterTrimmer(); 17 | 18 | static bool trimByOverlapAnalysis(Read* r1, Read* r2, FilterResult* fr, int diffLimit, int overlapRequire, double diffPercentLimit); 19 | static bool trimByOverlapAnalysis(Read* r1, Read* r2, FilterResult* fr, OverlapResult ov, int frontTrimmed1 = 0, int frontTrimmed2 = 0); 20 | static bool trimBySequence(Read* r1, FilterResult* fr, string& adapter, bool isR2 = false, int matchReq = 4); 21 | static bool trimByMultiSequences(Read* r1, FilterResult* fr, vector& adapterList, bool isR2 = false, bool incTrimmedCounter = true); 22 | static bool test(); 23 | 24 | 25 | }; 26 | 27 | 28 | #endif -------------------------------------------------------------------------------- /src/basecorrector.cpp: -------------------------------------------------------------------------------- 1 | #include "basecorrector.h" 2 | #include "util.h" 3 | 4 | BaseCorrector::BaseCorrector(){ 5 | } 6 | 7 | 8 | BaseCorrector::~BaseCorrector(){ 9 | } 10 | 11 | int BaseCorrector::correctByOverlapAnalysis(Read* r1, Read* r2, FilterResult* fr, int diffLimit, int overlapRequire, double diffPercentLimit) { 12 | OverlapResult ov = OverlapAnalysis::analyze(r1, r2, diffLimit, overlapRequire, diffPercentLimit); 13 | return correctByOverlapAnalysis(r1, r2, fr, ov); 14 | } 15 | 16 | int BaseCorrector::correctByOverlapAnalysis(Read* r1, Read* r2, FilterResult* fr, OverlapResult ov) { 17 | // we only correct overlap 18 | if(ov.diff == 0 || !ov.overlapped) 19 | return 0; 20 | 21 | int ol = ov.overlap_len; 22 | int start1 = max(0, ov.offset); 23 | int start2 = r2->length() - max(0, -ov.offset) - 1; 24 | 25 | const char* seq1 = r1->mSeq.mStr.c_str(); 26 | const char* seq2 = r2->mSeq.mStr.c_str(); 27 | const char* qual1 = r1->mQuality.c_str(); 28 | const char* qual2 = r2->mQuality.c_str(); 29 | 30 | const char GOOD_QUAL = num2qual(30); 31 | const char BAD_QUAL = num2qual(14); 32 | 33 | int corrected = 0; 34 | int uncorrected = 0; 35 | bool r1Corrected = false; 36 | bool r2Corrected = false; 37 | for(int i=0; i= GOOD_QUAL && qual2[p2] <= BAD_QUAL) { 43 | // use R1 44 | r2->mSeq.mStr[p2] = complement(seq1[p1]); 45 | r2->mQuality[p2] = qual1[p1]; 46 | corrected++; 47 | r2Corrected = true; 48 | if(fr) { 49 | fr->addCorrection(seq2[p2], complement(seq1[p1])); 50 | } 51 | } else if(qual2[p2] >= GOOD_QUAL && qual1[p1] <= BAD_QUAL) { 52 | // use R2 53 | r1->mSeq.mStr[p1] = complement(seq2[p2]); 54 | r1->mQuality[p1] = qual2[p2]; 55 | corrected++; 56 | r1Corrected = true; 57 | if(fr) { 58 | fr->addCorrection(seq1[p1], complement(seq2[p2])); 59 | } 60 | } else { 61 | uncorrected++; 62 | } 63 | } 64 | } 65 | 66 | // should never happen 67 | if(uncorrected + corrected != ov.diff) { 68 | static bool warned = false; 69 | if(!warned){ 70 | cerr << "WARNING: the algorithm is wrong! uncorrected + corrected != ov.diff" << endl; 71 | warned = true; 72 | } 73 | } 74 | 75 | if(corrected > 0 && fr) { 76 | if(r1Corrected && r2Corrected) 77 | fr->incCorrectedReads(2); 78 | else 79 | fr->incCorrectedReads(1); 80 | } 81 | 82 | return corrected; 83 | } 84 | 85 | bool BaseCorrector::test() { 86 | Read r1("@name", 87 | "TTTTAACCCCCCCCCCCCCCCCCCCCCCCCCCCCAATTTTAAAATTTTCCACGGGG", 88 | "+", 89 | "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE/EEEEE"); 90 | Read r2("@name", 91 | "AAAAAAAAAACCCCGGGGAAAATTTTAAAATTGGGGGGGGGGTGGGGGGGGGGGGG", 92 | "+", 93 | "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE/EEEEEEEEEEEEE"); 94 | 95 | correctByOverlapAnalysis(&r1, &r2, NULL, 5, 30, 0.2); 96 | 97 | if(r1.mSeq.mStr != "TTTTAACCCCCCCCCCCCCCCCCCCCCCCCCCCCAATTTTAAAATTTTCCCCGGGG") 98 | return false; 99 | if(r2.mSeq.mStr != "AAAAAAAAAACCCCGGGGAAAATTTTAAAATTGGGGGGGGGGGGGGGGGGGGGGGG") 100 | return false; 101 | if(r1.mQuality != "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE") 102 | return false; 103 | if(r2.mQuality != "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE") 104 | return false; 105 | 106 | return true; 107 | } -------------------------------------------------------------------------------- /src/basecorrector.h: -------------------------------------------------------------------------------- 1 | #ifndef BASE_CORRECTOR_H 2 | #define BASE_CORRECTOR_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "overlapanalysis.h" 8 | #include "filterresult.h" 9 | #include "options.h" 10 | 11 | using namespace std; 12 | 13 | class BaseCorrector{ 14 | public: 15 | BaseCorrector(); 16 | ~BaseCorrector(); 17 | 18 | static int correctByOverlapAnalysis(Read* r1, Read* r2, FilterResult* fr, int diffLimit, int overlapRequire, double diffPercentLimit); 19 | static int correctByOverlapAnalysis(Read* r1, Read* r2, FilterResult* fr, OverlapResult ov); 20 | static bool test(); 21 | }; 22 | 23 | 24 | #endif -------------------------------------------------------------------------------- /src/common.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_H 2 | #define COMMON_H 3 | 4 | #define FASTV_VER "0.10.0" 5 | 6 | #define _DEBUG false 7 | 8 | typedef long int64; 9 | typedef unsigned long uint64; 10 | 11 | typedef int int32; 12 | typedef unsigned int uint32; 13 | 14 | typedef short int16; 15 | typedef unsigned short uint16; 16 | 17 | typedef char int8; 18 | typedef unsigned char uint8; 19 | 20 | const char ATCG_BASES[] = {'A', 'T', 'C', 'G'}; 21 | 22 | #pragma pack(2) 23 | 24 | 25 | #pragma pack() 26 | 27 | // the limit of the queue to store the packs 28 | // error may happen if it generates more packs than this number 29 | static const int PACK_NUM_LIMIT = 10000000; 30 | 31 | // how many reads one pack has 32 | static const int PACK_SIZE = 1000; 33 | 34 | // if one pack is produced, but not consumed, it will be kept in the memory 35 | // this number limit the number of in memory packs 36 | // if the number of in memory packs is full, the producer thread should sleep 37 | static const int PACK_IN_MEM_LIMIT = 500; 38 | 39 | // if read number is more than this, warn it 40 | static const int WARN_STANDALONE_READ_LIMIT = 10000; 41 | 42 | // different filtering results, bigger number means worse 43 | // if r1 and r2 are both failed, then the bigger one of the two results will be recorded 44 | // we reserve some gaps for future types to be added 45 | static const int PASS_FILTER = 0; 46 | static const int FAIL_POLY_X = 4; 47 | static const int FAIL_OVERLAP = 8; 48 | static const int FAIL_N_BASE = 12; 49 | static const int FAIL_LENGTH = 16; 50 | static const int FAIL_TOO_LONG = 17; 51 | static const int FAIL_QUALITY = 20; 52 | static const int FAIL_COMPLEXITY = 24; 53 | 54 | // how many types in total we support 55 | static const int FILTER_RESULT_TYPES = 32; 56 | 57 | const static char* FAILED_TYPES[FILTER_RESULT_TYPES] = { 58 | "passed", "", "", "", 59 | "failed_polyx_filter", "", "", "", 60 | "failed_bad_overlap", "", "", "", 61 | "failed_too_many_n_bases", "", "", "", 62 | "failed_too_short", "failed_too_long", "", "", 63 | "failed_quality_filter", "", "", "", 64 | "failed_low_complexity", "", "", "", 65 | "", "", "", "" 66 | }; 67 | 68 | 69 | #endif /* COMMON_H */ 70 | -------------------------------------------------------------------------------- /src/coverage.js: -------------------------------------------------------------------------------- 1 | var maxSize = 0; 2 | var maxCoverage = 0.0; 3 | var mapMargin = 30; 4 | var mismatchRatioThreshold = 0.05; 5 | var sorting_by_coverage_rate = 1; 6 | 7 | function switch_sort() { 8 | if(sorting_by_coverage_rate == 1) { 9 | sorting_by_coverage_rate = 0; 10 | genome_coverage_data.sort(sortByBases); 11 | drawCoverages('genome_coverage', genome_coverage_data, genome_sizes, stats_bin); 12 | document.getElementById("sort_by_div").innerHTML = "Order by: Coverage rate | Bases on target"; 13 | } else { 14 | sorting_by_coverage_rate = 1; 15 | genome_coverage_data.sort(sortByCoverageRate); 16 | drawCoverages('genome_coverage', genome_coverage_data, genome_sizes, stats_bin); 17 | document.getElementById("sort_by_div").innerHTML = "Order by: Coverage rate | Bases on target"; 18 | } 19 | } 20 | 21 | function sortByCoverageRate(a, b) { 22 | var r = b["coverage_rate"] - a["coverage_rate"]; 23 | if(r == 0) 24 | return b["bases"] - a["bases"]; 25 | else 26 | return r; 27 | } 28 | 29 | function sortByBases(a, b) { 30 | var r= b["bases"] - a["bases"]; 31 | if(r == 0) 32 | return b["coverage_rate"] - a["coverage_rate"]; 33 | else 34 | return r; 35 | } 36 | 37 | function drawCoverages(divid, data, sizes, bin) { 38 | mapcontainer = document.getElementById(divid); 39 | var hasData = false; 40 | for(d in data) { 41 | hasData = true; 42 | if(sizes[d] > maxSize) 43 | maxSize = sizes[d]; 44 | 45 | for(c in data[d]["coverage"]) { 46 | if(data[d]["coverage"][c] > maxCoverage) 47 | maxCoverage = data[d]["coverage"][c]; 48 | } 49 | } 50 | if(hasData) { 51 | mapcontainer.style.display = 'block'; 52 | } else { 53 | mapcontainer.style.display = 'none'; 54 | } 55 | var childs = mapcontainer.childNodes; 56 | for(var i = 0; i < childs.length; i++) { 57 | mapcontainer.removeChild(childs[i]); 58 | } 59 | var colorTableHTML = "
"; 60 | colorTableHTML += "
"; 61 | var count = 0; 62 | var tds = 30; 63 | while (count < tds) { 64 | var mr = mismatchRatioThreshold * count/tds ; 65 | count++; 66 | var c = getColor(mr); 67 | colorTableHTML += " "; 68 | } 69 | colorTableHTML += ""; 70 | colorTableHTML += "
Mismatch ratio = 0 Mismatch ratio = " + mismatchRatioThreshold + "
"; 71 | mapcontainer.innerHTML = colorTableHTML; 72 | 73 | for(d in data) { 74 | var genome = data[d]; 75 | cvs = document.createElement("canvas"); 76 | cvs.id = 'coverage_' + d.toString(); 77 | cvs.width=mapcontainer.offsetWidth - 10; 78 | cvs.height=60; 79 | cvs.style.padding='2px 0px 2px 0px'; 80 | cvs.onmousemove = onCanvasMove; 81 | cvs.onmouseover = onCanvasIn; 82 | cvs.onmouseout = onCanvasOut; 83 | mapcontainer.appendChild(cvs); 84 | 85 | drawGenome(genome, cvs.id, sizes[d], bin); 86 | 87 | var namediv = document.createElement("div"); 88 | namediv.innerHTML = "
" + data[d]['name'] + " (" + data[d]['coverage_rate'] + "% covered, " + data[d]['reads'] + " reads, " + data[d]['bases'] + " bases)
" ;; 89 | mapcontainer.appendChild(namediv); 90 | } 91 | } 92 | 93 | function onCanvasMove(e) { 94 | var cvs = e.target; 95 | var id = parseInt(cvs.id.substring(9)); 96 | console.log(cvs.id.substring(9)); 97 | console.log(id); 98 | if(!genome_coverage_data[id]) 99 | return; 100 | 101 | genome = genome_coverage_data[id]; 102 | var mapw = cvs.width; 103 | var maph = cvs.height; 104 | var x = e.clientX - cvs.offsetLeft; 105 | var pos = (x - mapMargin) * maxSize / (mapw - mapMargin*2); 106 | pos = Math.round(pos / stats_bin); 107 | 108 | console.log(pos); 109 | 110 | if(!genome["coverage"][pos]) 111 | return; 112 | 113 | var start = pos * stats_bin; 114 | var end = (pos+1) * stats_bin - 1; 115 | var html = start + "-" + end + "
"; 116 | html += "mean coverage: " + genome["coverage"][pos] + "
"; 117 | html += "mean mismatch ratio: " + genome["mismatch_ratios"][pos] + "
"; 118 | 119 | var tips = document.getElementById('maptips'); 120 | tips.style.position="absolute"; 121 | tips.style.left = e.clientX + 5 + tips.parentNode.scrollLeft; 122 | tips.style.top = e.clientY + 5 + tips.parentNode.scrollTop + document.body.scrollTop; 123 | tips.innerHTML = html; 124 | 125 | console.log(html); 126 | } 127 | 128 | function onCanvasIn() { 129 | var tips = document.getElementById('maptips'); 130 | tips.style.display = 'block'; 131 | } 132 | 133 | function onCanvasOut() { 134 | var tips = document.getElementById('maptips'); 135 | tips.style.display = 'none'; 136 | } 137 | 138 | function drawGenome(genome, canvasid, size, bin) { 139 | var cvs = document.getElementById(canvasid); 140 | var mapw = cvs.width; 141 | var maph = cvs.height; 142 | var ctx = cvs.getContext("2d"); 143 | var texth = 15; 144 | 145 | var name = genome['name']; 146 | var reads = genome['reads']; 147 | var bases = genome['bases']; 148 | var avg_mismatch_ratio = genome['avg_mismatch_ratio']; 149 | var coverage = genome['coverage']; 150 | var mismatch_ratios = genome['mismatch_ratios']; 151 | 152 | for(var pos in coverage) { 153 | var c = coverage[pos]; 154 | var mr = mismatch_ratios[pos]; 155 | 156 | var w = (mapw - mapMargin*2) * bin / maxSize; 157 | var drawW = w; 158 | var h = (maph-texth)* (c/maxCoverage); 159 | var drawH = h; 160 | var centerX = mapMargin + (pos-0.5) * (mapw - mapMargin*2) * bin/maxSize; 161 | var x = centerX - 1; 162 | var y = maph- texth; 163 | ctx.fillStyle=getColor(mr); 164 | ctx.fillRect(x, y, drawW, -drawH); 165 | } 166 | 167 | var xbars = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; 168 | var tailPainted = false; 169 | for(b in xbars) { 170 | var tick = Math.round( maxSize * xbars[b]/10 ); 171 | if(tick > size) { 172 | if(tailPainted) 173 | continue; 174 | tailPainted = true; 175 | tick = size; 176 | } 177 | var x = mapMargin + (mapw - mapMargin*2) * tick/maxSize; 178 | x = Math.round(x); 179 | ctx.font = "10px Arial"; 180 | ctx.fillStyle = "#999999"; 181 | ctx.fillText(tick.toString(), x, maph); 182 | } 183 | 184 | ctx.font = "10px Arial"; 185 | ctx.fillStyle = "#AAAAAA"; 186 | ctx.fillText(maxCoverage.toString() + "", 10, 10); 187 | } 188 | 189 | function getColor(mr) { 190 | if(mr > mismatchRatioThreshold) 191 | return "rgb(128, 128, 128)"; 192 | else { 193 | var p = mr/mismatchRatioThreshold; 194 | var diff = 120* p; 195 | return "rgb(" + (128+diff) + "," + (128-diff) + "," + (128-diff) + ")"; 196 | } 197 | } -------------------------------------------------------------------------------- /src/duplicate.cpp: -------------------------------------------------------------------------------- 1 | #include "duplicate.h" 2 | #include "overlapanalysis.h" 3 | #include 4 | #include 5 | 6 | Duplicate::Duplicate(Options* opt) { 7 | mOptions = opt; 8 | mKeyLenInBase = mOptions->duplicate.keylen; 9 | mKeyLenInBit = 1<<(2*mKeyLenInBase); 10 | mDups = new uint64[mKeyLenInBit]; 11 | memset(mDups, 0, sizeof(uint64)*mKeyLenInBit); 12 | mCounts = new uint16[mKeyLenInBit]; 13 | memset(mCounts, 0, sizeof(uint16)*mKeyLenInBit); 14 | mGC = new uint8[mKeyLenInBit]; 15 | memset(mGC, 0, sizeof(uint8)*mKeyLenInBit); 16 | } 17 | 18 | Duplicate::~Duplicate(){ 19 | delete[] mDups; 20 | delete[] mCounts; 21 | } 22 | 23 | uint64 Duplicate::seq2int(const char* data, int start, int keylen, bool& valid) { 24 | uint64 ret = 0; 25 | for(int i=0; i kmer32) { 59 | mDups[key] = kmer32; 60 | mCounts[key] = 1; 61 | mGC[key] = gc; 62 | } 63 | } 64 | } 65 | 66 | void Duplicate::statRead(Read* r) { 67 | if(r->length() < 32) 68 | return; 69 | 70 | int start1 = 0; 71 | int start2 = max(0, r->length() - 32 - 5); 72 | 73 | const char* data = r->mSeq.mStr.c_str(); 74 | bool valid = true; 75 | 76 | uint64 ret = seq2int(data, start1, mKeyLenInBase, valid); 77 | uint32 key = (uint32)ret; 78 | if(!valid) 79 | return; 80 | 81 | uint64 kmer32 = seq2int(data, start2, 32, valid); 82 | if(!valid) 83 | return; 84 | 85 | int gc = 0; 86 | 87 | // not calculated 88 | if(mCounts[key] == 0) { 89 | for(int i=0; ilength(); i++) { 90 | if(data[i] == 'C' || data[i] == 'T') 91 | gc++; 92 | } 93 | } 94 | 95 | gc = round(255.0 * (double) gc / (double) r->length()); 96 | 97 | addRecord(key, kmer32, (uint8)gc); 98 | } 99 | 100 | void Duplicate::statPair(Read* r1, Read* r2) { 101 | if(r1->length() < 32 || r2->length() < 32) 102 | return; 103 | 104 | const char* data1 = r1->mSeq.mStr.c_str(); 105 | const char* data2 = r2->mSeq.mStr.c_str(); 106 | bool valid = true; 107 | 108 | uint64 ret = seq2int(data1, 0, mKeyLenInBase, valid); 109 | uint32 key = (uint32)ret; 110 | if(!valid) 111 | return; 112 | 113 | uint64 kmer32 = seq2int(data2, 0, 32, valid); 114 | if(!valid) 115 | return; 116 | 117 | int gc = 0; 118 | 119 | // not calculated 120 | if(mCounts[key] == 0) { 121 | for(int i=0; ilength(); i++) { 122 | if(data1[i] == 'G' || data1[i] == 'C') 123 | gc++; 124 | } 125 | for(int i=0; ilength(); i++) { 126 | if(data2[i] == 'G' || data2[i] == 'C') 127 | gc++; 128 | } 129 | } 130 | 131 | gc = round(255.0 * (double) gc / (double)( r1->length() + r2->length())); 132 | 133 | addRecord(key, kmer32, gc); 134 | } 135 | 136 | double Duplicate::statAll(int* hist, double* meanGC, int histSize) { 137 | long totalNum = 0; 138 | long dupNum = 0; 139 | int* gcStatNum = new int[histSize]; 140 | memset(gcStatNum, 0, sizeof(int)*histSize); 141 | for(int key=0; key 0) { 146 | totalNum += count; 147 | dupNum += count - 1; 148 | 149 | if(count >= histSize){ 150 | hist[histSize-1]++; 151 | meanGC[histSize-1] += gc; 152 | gcStatNum[histSize-1]++; 153 | } 154 | else{ 155 | hist[count]++; 156 | meanGC[count] += gc; 157 | gcStatNum[count]++; 158 | } 159 | } 160 | } 161 | 162 | for(int i=0; i 0) { 164 | meanGC[i] = meanGC[i] / 255.0 / gcStatNum[i]; 165 | } 166 | } 167 | 168 | delete[] gcStatNum; 169 | 170 | if(totalNum == 0) 171 | return 0.0; 172 | else 173 | return (double)dupNum / (double)totalNum; 174 | } -------------------------------------------------------------------------------- /src/duplicate.h: -------------------------------------------------------------------------------- 1 | #ifndef DUPLICATE_H 2 | #define DUPLICATE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "read.h" 8 | #include "options.h" 9 | #include "common.h" 10 | 11 | using namespace std; 12 | 13 | class Duplicate{ 14 | public: 15 | Duplicate(Options* opt); 16 | ~Duplicate(); 17 | 18 | void statRead(Read* r1); 19 | void statPair(Read* r1, Read* r2); 20 | uint64 seq2int(const char* data, int start, int keylen, bool& valid); 21 | void addRecord(uint32 key, uint64 kmer32, uint8 gc); 22 | 23 | // make histogram and get duplication rate 24 | double statAll(int* hist, double* meanGC, int histSize); 25 | 26 | private: 27 | Options* mOptions; 28 | int mKeyLenInBase; 29 | int mKeyLenInBit; 30 | uint64* mDups; 31 | uint16* mCounts; 32 | uint8* mGC; 33 | 34 | }; 35 | 36 | #endif -------------------------------------------------------------------------------- /src/editdistance.cpp: -------------------------------------------------------------------------------- 1 | // ------- 2 | // License 3 | // ------- 4 | // 5 | // It is released under the MIT license. 6 | // 7 | // Copyright (c) 2013 Hiroyuki Tanaka 8 | // 9 | // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 10 | // 11 | // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 14 | 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | 27 | #include "editdistance.h" 28 | 29 | using namespace std; 30 | 31 | template 32 | unsigned int edit_distance_bpv(T &cmap, char const *vec, size_t const &vecsize, unsigned int const &tmax, unsigned int const &tlen) { 33 | int D = tmax * 64 + tlen; 34 | TVALUE D0, HP, HN, VP, VN; 35 | uint64_t top = (1L << (tlen - 1)); // 末尾のvectorに適用 36 | uint64_t lmb = (1L << 63); 37 | 38 | for(size_t i = 0; i <= tmax; ++i) { 39 | VP[i] = 0; 40 | VN[i] = 0; 41 | } 42 | for(size_t i = 0; i < tmax; ++i) VP[i] = ~0; 43 | for(size_t i = 0; i < tlen; ++i) VP[tmax] |= (1L << i); 44 | for(size_t i = 0; i < vecsize; ++i) { 45 | TVALUE &PM = cmap[vec[i]]; 46 | for(int r = 0; r <= tmax; ++r) { 47 | uint64_t X = PM[r]; 48 | if(r > 0 && (HN[r - 1] & lmb)) X |= 1L; 49 | D0[r] = (((X & VP[r]) + VP[r]) ^ VP[r]) | X | VN[r]; 50 | HP[r] = VN[r] | ~(D0[r] | VP[r]); 51 | HN[r] = D0[r] & VP[r]; 52 | X = (HP[r] << 1L); 53 | if(r == 0 || HP[r - 1] & lmb) X |= 1L; 54 | VP[r] = (HN[r] << 1L) | ~(D0[r] | X); 55 | if(r > 0 && (HN[r - 1] & lmb)) VP[r] |= 1L; 56 | VN[r] = D0[r] & X; 57 | } 58 | if(HP[tmax] & top) ++D; 59 | else if(HN[tmax] & top) --D; 60 | } 61 | return D; 62 | } 63 | 64 | 65 | /// c.f. http://handasse.blogspot.com/2009/04/c_29.html 66 | template 67 | unsigned int edit_distance_dp(T const *str1, size_t const size1, T const *str2, size_t const size2) { 68 | vector< vector > d(size1 + 1, vector(size2 + 1)); 69 | for (int i = 0; i < size1 + 1; i++) d[i][0] = i; 70 | for (int i = 0; i < size2 + 1; i++) d[0][i] = i; 71 | for (int i = 1; i < size1 + 1; i++) { 72 | for (int j = 1; j < size2 + 1; j++) { 73 | d[i][j] = min(min(d[i-1][j], d[i][j-1]) + 1, d[i-1][j-1] + (str1[i-1] == str2[j-1] ? 0 : 1)); 74 | } 75 | } 76 | return d[size1][size2]; 77 | } 78 | 79 | template 80 | struct varr { 81 | uint64_t arr_[N]; 82 | uint64_t & operator[](size_t const &i) { 83 | return arr_[i]; 84 | } 85 | }; 86 | 87 | 88 | template 89 | unsigned int edit_distance_map_(char const *a, size_t const asize, char const *b, size_t const bsize) { 90 | typedef map > cmap_v; 91 | cmap_v cmap; 92 | unsigned int tmax = (asize - 1) >> 6; 93 | unsigned int tlen = asize - tmax * 64; 94 | for(size_t i = 0; i < tmax; ++i) { 95 | for(size_t j = 0; j < 64; ++j) cmap[a[i * 64 + j]][i] |= (1L << j); 96 | } 97 | for(size_t i = 0; i < tlen; ++i) cmap[a[tmax * 64 + i]][tmax] |= (1L << i); 98 | return edit_distance_bpv(cmap, b, bsize, tmax, tlen); 99 | } 100 | 101 | unsigned int edit_distance(const char *a, const unsigned int asize, const char *b, const unsigned int bsize) { 102 | if(asize == 0) return bsize; 103 | else if(bsize == 0) return asize; 104 | char const *ap, *bp; 105 | unsigned int const *asizep, *bsizep; 106 | if(asize < bsize) ap = b, bp = a, asizep = &bsize, bsizep = &asize; 107 | else ap = a, bp = b, asizep = &asize, bsizep = &bsize; 108 | size_t vsize = ((*asizep - 1) >> 6) + 1; 109 | if(vsize > 10) { 110 | char const *_ = ap; 111 | unsigned int const *__ = asizep; 112 | ap = bp, bp = _, asizep = bsizep, bsizep = __; 113 | vsize = ((*asizep - 1) >> 6) + 1; 114 | } 115 | 116 | if(vsize == 1) return edit_distance_map_<1>(ap, *asizep, bp, *bsizep); 117 | else if(vsize == 2) return edit_distance_map_<2>(ap, *asizep, bp, *bsizep); 118 | else if(vsize == 3) return edit_distance_map_<3>(ap, *asizep, bp, *bsizep); 119 | else if(vsize == 4) return edit_distance_map_<4>(ap, *asizep, bp, *bsizep); 120 | else if(vsize == 5) return edit_distance_map_<5>(ap, *asizep, bp, *bsizep); 121 | else if(vsize == 6) return edit_distance_map_<6>(ap, *asizep, bp, *bsizep); 122 | else if(vsize == 7) return edit_distance_map_<7>(ap, *asizep, bp, *bsizep); 123 | else if(vsize == 8) return edit_distance_map_<8>(ap, *asizep, bp, *bsizep); 124 | else if(vsize == 9) return edit_distance_map_<9>(ap, *asizep, bp, *bsizep); 125 | else if(vsize == 10) return edit_distance_map_<10>(ap, *asizep, bp, *bsizep); 126 | return edit_distance_dp(ap, *asizep, bp, *bsizep); 127 | } 128 | 129 | unsigned int edit_distance(string a, string b) { 130 | return edit_distance(a.c_str(), a.length(), b.c_str(), b.length()); 131 | } 132 | 133 | unsigned int hamming_distance(const char *a, const unsigned int asize, const char *b, const unsigned int bsize) { 134 | int dis = 0; 135 | for(int i=0; i 5 | #include 6 | 7 | // struct PatternMap { 8 | // uint64_t p_[256][4]; 9 | // unsigned int tmax_; 10 | // unsigned int tlen_; 11 | // }; 12 | 13 | using namespace std; 14 | 15 | unsigned int edit_distance(const char *a, const unsigned int asize, const char *b, const unsigned int bsize); 16 | // void create_patternmap(struct PatternMap *pm, const int64_t *a, const unsigned int size); 17 | // unsigned int edit_distance_by_patternmap(struct PatternMap *mp, const int64_t *b, const unsigned int size); 18 | 19 | unsigned int edit_distance(string a, string b); 20 | 21 | unsigned int hamming_distance(const char *a, const unsigned int asize, const char *b, const unsigned int bsize); 22 | 23 | bool editdistance_test(); 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/evaluator.h: -------------------------------------------------------------------------------- 1 | #ifndef EVALUATOR_H 2 | #define EVALUATOR_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "options.h" 8 | #include "util.h" 9 | #include "read.h" 10 | 11 | using namespace std; 12 | 13 | class Evaluator{ 14 | public: 15 | Evaluator(Options* opt); 16 | ~Evaluator(); 17 | // evaluate how many reads are stored in the input file 18 | void evaluateReadNum(long& readNum); 19 | string evalAdapterAndReadNumDepreciated(long& readNum); 20 | string evalAdapterAndReadNum(long& readNum, bool isR2); 21 | bool isTwoColorSystem(); 22 | void evaluateSeqLen(); 23 | int computeSeqLen(string filename); 24 | 25 | static bool test(); 26 | static string matchKnownAdapter(string seq); 27 | private: 28 | Options* mOptions; 29 | string int2seq(unsigned int val, int seqlen); 30 | int seq2int(string& seq, int pos, int seqlen, int lastVal = -1); 31 | string getAdapterWithSeed(int seed, Read** loadedReads, long records, int keylen); 32 | }; 33 | 34 | 35 | #endif -------------------------------------------------------------------------------- /src/fastareader.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "fastareader.h" 3 | #include "util.h" 4 | #include 5 | #include 6 | 7 | FastaReader::FastaReader(string faFile, bool forceUpperCase) 8 | { 9 | // Set locale and disable stdio synchronization to improve iostream performance 10 | // http://www.drdobbs.com/the-standard-librarian-iostreams-and-std/184401305 11 | // http://stackoverflow.com/questions/5166263/how-to-get-iostream-to-perform-better 12 | setlocale(LC_ALL,"C"); 13 | ios_base::sync_with_stdio(false); 14 | 15 | mFilename = faFile; 16 | mForceUpperCase = forceUpperCase; 17 | 18 | if (ends_with(mFilename, ".fasta.gz") || ends_with(mFilename, ".fa.gz") || ends_with(mFilename, ".fna.gz")){ 19 | mZipFile = gzopen(mFilename.c_str(), "r"); 20 | mZipped = true; 21 | } 22 | else if(ends_with(mFilename, ".fasta") || ends_with(mFilename, ".fa") || ends_with(mFilename, ".fna")){ 23 | mFile.open(mFilename.c_str(), ifstream::in); 24 | mZipped = false; 25 | } else { 26 | error_exit("FASTA file should have a name (*.fasta, *.fa or *.fna) or (*.fasta.gz, *.fa.gz or *.fna.gz). Not a FASTA file: " + mFilename); 27 | } 28 | 29 | char c; 30 | // seek to first contig 31 | while (getChar(c) && c != '>') { 32 | if (eof()) { 33 | break; 34 | } 35 | } 36 | } 37 | 38 | FastaReader::~FastaReader() 39 | { 40 | if (mZipped){ 41 | if (mZipFile){ 42 | gzclose(mZipFile); 43 | mZipFile = NULL; 44 | } 45 | } 46 | else { 47 | if (mFile.is_open()){ 48 | mFile.close(); 49 | } 50 | } 51 | } 52 | 53 | bool FastaReader::getLine(char* line, int maxLine){ 54 | bool status = true; 55 | if(mZipped) 56 | status = gzgets(mZipFile, line, maxLine); 57 | else { 58 | mFile.getline(line, maxLine, '\n'); 59 | status = !mFile.fail(); 60 | } 61 | 62 | // trim \n, \r or \r\n in the tail 63 | int readed = strlen(line); 64 | if(readed >=2 ){ 65 | if(line[readed-1] == '\n' || line[readed-1] == '\r'){ 66 | line[readed-1] = '\0'; 67 | if(line[readed-2] == '\r') 68 | line[readed-2] = '\0'; 69 | } 70 | } 71 | 72 | return status; 73 | } 74 | 75 | bool FastaReader::eof() { 76 | if (mZipped) { 77 | return gzeof(mZipFile); 78 | } else { 79 | return mFile.eof(); 80 | } 81 | } 82 | 83 | bool FastaReader::getChar(char& c) { 84 | bool status = true; 85 | if (mZipped) { 86 | c = (char)gzgetc(mZipFile); 87 | if(c == -1) 88 | status = false; 89 | } else { 90 | mFile.get(c); 91 | status = !mFile.fail(); 92 | } 93 | return status; 94 | } 95 | 96 | void FastaReader::readNext() 97 | { 98 | const int maxLine = 1024; 99 | char linebuf[maxLine]; 100 | 101 | mCurrentID = ""; 102 | mCurrentDescription = ""; 103 | mCurrentSequence = ""; 104 | bool foundHeader = false; 105 | 106 | char c; 107 | stringstream ssSeq; 108 | stringstream ssHeader; 109 | while(true){ 110 | getChar(c); 111 | // skip blank line 112 | if(c == '\n' && !eof()) 113 | continue; 114 | if(c == '>' || eof()) 115 | break; 116 | else { 117 | if (foundHeader){ 118 | if(mForceUpperCase && c>='a' && c<='z') { 119 | c -= ('a' - 'A'); 120 | } 121 | ssSeq << c; 122 | } 123 | else 124 | ssHeader << c; 125 | } 126 | string line; 127 | if(mZipped) { 128 | getLine(linebuf, maxLine); 129 | line = string(linebuf); 130 | } else { 131 | getline(mFile,line,'\n'); 132 | } 133 | 134 | // fix \r\n issue 135 | if(line.length()>0) { 136 | if(line[line.length()-1] == '\r') { 137 | line = line.substr(0, line.length()-1); 138 | } 139 | } 140 | 141 | if(foundHeader == false) { 142 | ssHeader << line; 143 | foundHeader = true; 144 | } 145 | else { 146 | str_keep_valid_sequence(line, mForceUpperCase); 147 | ssSeq << line; 148 | } 149 | } 150 | mCurrentSequence = ssSeq.str(); 151 | string header = ssHeader.str(); 152 | 153 | mCurrentID = header; 154 | } 155 | 156 | bool FastaReader::hasNext() { 157 | return !eof(); 158 | } 159 | 160 | void FastaReader::readAll() { 161 | while(!eof()){ 162 | readNext(); 163 | mAllContigs[mCurrentID] = mCurrentSequence; 164 | } 165 | } 166 | 167 | bool FastaReader::test(){ 168 | FastaReader reader("testdata/tinyref.fa"); 169 | reader.readAll(); 170 | 171 | string contig1 = "GATCACAGGTCTATCACCCTATTAATTGGTATTTTCGTCTGGGGGGTGTGGAGCCGGAGCACCCTATGTCGCAGT"; 172 | string contig2 = "GTCTGCACAGCCGCTTTCCACACAGAACCCCCCCCTCCCCCCGCTTCTGGCAAACCCCAAAAACAAAGAACCCTA"; 173 | 174 | if(reader.mAllContigs.count("contig1") == 0 || reader.mAllContigs.count("contig2") == 0 ) 175 | return false; 176 | 177 | if(reader.mAllContigs["contig1"] != contig1 || reader.mAllContigs["contig2"] != contig2 ) 178 | return false; 179 | 180 | return true; 181 | 182 | } 183 | 184 | 185 | 186 | -------------------------------------------------------------------------------- /src/fastareader.h: -------------------------------------------------------------------------------- 1 | #ifndef FASTA_READER_H 2 | #define FASTA_READER_H 3 | 4 | // includes 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "zlib/zlib.h" 14 | 15 | using namespace std; 16 | 17 | class FastaReader 18 | { 19 | public: 20 | FastaReader(string fastaFile, bool forceUpperCase = true); 21 | ~FastaReader(); 22 | bool hasNext(); 23 | void readNext(); 24 | void readAll(); 25 | 26 | inline string currentID() 27 | { 28 | return mCurrentID; 29 | } 30 | 31 | inline string currentDescription() 32 | { 33 | return mCurrentDescription; 34 | } 35 | 36 | inline string currentSequence() 37 | { 38 | return mCurrentSequence; 39 | } 40 | 41 | inline map& contigs() { 42 | return mAllContigs; 43 | } 44 | 45 | static bool test(); 46 | 47 | 48 | public: 49 | string mCurrentSequence; 50 | string mCurrentID ; 51 | string mCurrentDescription; 52 | map mAllContigs; 53 | 54 | private: 55 | bool readLine(); 56 | bool endOfLine(char c); 57 | void setFastaSequenceIdDescription(); 58 | bool getLine(char* line, int maxLine); 59 | bool getChar(char& c); 60 | bool eof(); 61 | 62 | private: 63 | string mFilename; 64 | bool mForceUpperCase; 65 | gzFile mZipFile; 66 | ifstream mFile; 67 | bool mZipped; 68 | }; 69 | 70 | 71 | #endif 72 | 73 | -------------------------------------------------------------------------------- /src/fastqreader.cpp: -------------------------------------------------------------------------------- 1 | #include "fastqreader.h" 2 | #include "util.h" 3 | #include 4 | 5 | #define FQ_BUF_SIZE (1<<20) 6 | 7 | FastqReader::FastqReader(string filename, bool hasQuality, bool phred64){ 8 | mFilename = filename; 9 | mZipFile = NULL; 10 | mZipped = false; 11 | mFile = NULL; 12 | mStdinMode = false; 13 | mPhred64 = phred64; 14 | mHasQuality = hasQuality; 15 | mBuf = new char[FQ_BUF_SIZE]; 16 | mBufDataLen = 0; 17 | mBufUsedLen = 0; 18 | mHasNoLineBreakAtEnd = false; 19 | init(); 20 | } 21 | 22 | FastqReader::~FastqReader(){ 23 | close(); 24 | delete mBuf; 25 | } 26 | 27 | bool FastqReader::hasNoLineBreakAtEnd() { 28 | return mHasNoLineBreakAtEnd; 29 | } 30 | 31 | void FastqReader::readToBuf() { 32 | if(mZipped) { 33 | mBufDataLen = gzread(mZipFile, mBuf, FQ_BUF_SIZE); 34 | if(mBufDataLen == -1) { 35 | cerr << "Error to read gzip file" << endl; 36 | } 37 | } else { 38 | mBufDataLen = fread(mBuf, 1, FQ_BUF_SIZE, mFile); 39 | } 40 | mBufUsedLen = 0; 41 | 42 | if(mBufDataLen < FQ_BUF_SIZE) { 43 | if(mBuf[mBufDataLen-1] != '\n') 44 | mHasNoLineBreakAtEnd = true; 45 | } 46 | } 47 | 48 | void FastqReader::init(){ 49 | if (ends_with(mFilename, ".gz")){ 50 | mZipFile = gzopen(mFilename.c_str(), "r"); 51 | mZipped = true; 52 | gzrewind(mZipFile); 53 | } 54 | else { 55 | if(mFilename == "/dev/stdin") { 56 | mFile = stdin; 57 | } 58 | else 59 | mFile = fopen(mFilename.c_str(), "rb"); 60 | if(mFile == NULL) { 61 | error_exit("Failed to open file: " + mFilename); 62 | } 63 | mZipped = false; 64 | } 65 | readToBuf(); 66 | } 67 | 68 | void FastqReader::getBytes(size_t& bytesRead, size_t& bytesTotal) { 69 | if(mZipped) { 70 | bytesRead = gzoffset(mZipFile); 71 | } else { 72 | bytesRead = ftell(mFile);//mFile.tellg(); 73 | } 74 | 75 | // use another ifstream to not affect current reader 76 | ifstream is(mFilename); 77 | is.seekg (0, is.end); 78 | bytesTotal = is.tellg(); 79 | } 80 | 81 | void FastqReader::clearLineBreaks(char* line) { 82 | 83 | // trim \n, \r or \r\n in the tail 84 | int readed = strlen(line); 85 | if(readed >=2 ){ 86 | if(line[readed-1] == '\n' || line[readed-1] == '\r'){ 87 | line[readed-1] = '\0'; 88 | if(line[readed-2] == '\r') 89 | line[readed-2] = '\0'; 90 | } 91 | } 92 | } 93 | 94 | string FastqReader::getLine(){ 95 | static int c=0; 96 | c++; 97 | int copied = 0; 98 | 99 | int start = mBufUsedLen; 100 | int end = start; 101 | 102 | while(end < mBufDataLen) { 103 | if(mBuf[end] != '\r' && mBuf[end] != '\n') 104 | end++; 105 | else 106 | break; 107 | } 108 | 109 | // this line well contained in this buf, or this is the last buf 110 | if(end < mBufDataLen || mBufDataLen < FQ_BUF_SIZE) { 111 | int len = end - start; 112 | string line(mBuf+start, len); 113 | 114 | // skip \n or \r 115 | end++; 116 | // handle \r\n 117 | if(end < mBufDataLen-1 && mBuf[end-1]=='\r' && mBuf[end] == '\n') 118 | end++; 119 | 120 | mBufUsedLen = end; 121 | 122 | return line; 123 | } 124 | 125 | // this line is not contained in this buf, we need to read new buf 126 | string str(mBuf+start, mBufDataLen - start); 127 | 128 | while(true) { 129 | readToBuf(); 130 | start = 0; 131 | end = 0; 132 | while(end < mBufDataLen) { 133 | if(mBuf[end] != '\r' && mBuf[end] != '\n') 134 | end++; 135 | else 136 | break; 137 | } 138 | // this line well contained in this buf, we need to read new buf 139 | if(end < mBufDataLen || mBufDataLen < FQ_BUF_SIZE) { 140 | int len = end - start; 141 | str.append(mBuf+start, len); 142 | 143 | // skip \n or \r 144 | end++; 145 | // handle \r\n 146 | if(end < mBufDataLen-1 && mBuf[end] == '\n') 147 | end++; 148 | 149 | mBufUsedLen = end; 150 | return str; 151 | } 152 | // even this new buf is not enough, although impossible 153 | str.append(mBuf+start, mBufDataLen); 154 | } 155 | 156 | return string(); 157 | } 158 | 159 | bool FastqReader::eof() { 160 | if (mZipped) { 161 | return gzeof(mZipFile); 162 | } else { 163 | return feof(mFile);//mFile.eof(); 164 | } 165 | } 166 | 167 | Read* FastqReader::read(){ 168 | if (mZipped){ 169 | if (mZipFile == NULL) 170 | return NULL; 171 | } 172 | 173 | if(mBufUsedLen >= mBufDataLen && eof()) { 174 | return NULL; 175 | } 176 | 177 | string name = getLine(); 178 | // name should start with @ 179 | while((name.empty() && !(mBufUsedLen >= mBufDataLen && eof())) || (!name.empty() && name[0]!='@')){ 180 | name = getLine(); 181 | } 182 | 183 | if(name.empty()) 184 | return NULL; 185 | 186 | string sequence = getLine(); 187 | string strand = getLine(); 188 | 189 | // WAR for FQ with no quality 190 | if (!mHasQuality){ 191 | string quality = string(sequence.length(), 'K'); 192 | return new Read(name, sequence, strand, quality, mPhred64); 193 | } 194 | else { 195 | string quality = getLine(); 196 | if(quality.length() != sequence.length()) { 197 | cerr << "ERROR: sequence and quality have different length:" << endl; 198 | cerr << name << endl; 199 | cerr << sequence << endl; 200 | cerr << strand << endl; 201 | cerr << quality << endl; 202 | return NULL; 203 | } 204 | return new Read(name, sequence, strand, quality, mPhred64); 205 | } 206 | 207 | return NULL; 208 | } 209 | 210 | void FastqReader::close(){ 211 | if (mZipped){ 212 | if (mZipFile){ 213 | gzclose(mZipFile); 214 | mZipFile = NULL; 215 | } 216 | } 217 | else { 218 | if (mFile){ 219 | fclose(mFile);//mFile.close(); 220 | mFile = NULL; 221 | } 222 | } 223 | } 224 | 225 | bool FastqReader::isZipFastq(string filename) { 226 | if (ends_with(filename, ".fastq.gz")) 227 | return true; 228 | else if (ends_with(filename, ".fq.gz")) 229 | return true; 230 | else if (ends_with(filename, ".fasta.gz")) 231 | return true; 232 | else if (ends_with(filename, ".fa.gz")) 233 | return true; 234 | else 235 | return false; 236 | } 237 | 238 | bool FastqReader::isFastq(string filename) { 239 | if (ends_with(filename, ".fastq")) 240 | return true; 241 | else if (ends_with(filename, ".fq")) 242 | return true; 243 | else if (ends_with(filename, ".fasta")) 244 | return true; 245 | else if (ends_with(filename, ".fa")) 246 | return true; 247 | else 248 | return false; 249 | } 250 | 251 | bool FastqReader::isZipped(){ 252 | return mZipped; 253 | } 254 | 255 | bool FastqReader::test(){ 256 | FastqReader reader1("testdata/R1.fq"); 257 | FastqReader reader2("testdata/R1.fq.gz"); 258 | Read* r1 = NULL; 259 | Read* r2 = NULL; 260 | while(true){ 261 | r1=reader1.read(); 262 | r2=reader2.read(); 263 | if(r1 == NULL || r2 == NULL) 264 | break; 265 | if(r1->mSeq.mStr != r2->mSeq.mStr){ 266 | return false; 267 | } 268 | delete r1; 269 | delete r2; 270 | } 271 | return true; 272 | } 273 | 274 | FastqReaderPair::FastqReaderPair(FastqReader* left, FastqReader* right){ 275 | mLeft = left; 276 | mRight = right; 277 | } 278 | 279 | FastqReaderPair::FastqReaderPair(string leftName, string rightName, bool hasQuality, bool phred64, bool interleaved){ 280 | mInterleaved = interleaved; 281 | mLeft = new FastqReader(leftName, hasQuality, phred64); 282 | if(mInterleaved) 283 | mRight = NULL; 284 | else 285 | mRight = new FastqReader(rightName, hasQuality, phred64); 286 | } 287 | 288 | FastqReaderPair::~FastqReaderPair(){ 289 | if(mLeft){ 290 | delete mLeft; 291 | mLeft = NULL; 292 | } 293 | if(mRight){ 294 | delete mRight; 295 | mRight = NULL; 296 | } 297 | } 298 | 299 | ReadPair* FastqReaderPair::read(){ 300 | Read* l = mLeft->read(); 301 | Read* r = NULL; 302 | if(mInterleaved) 303 | r = mLeft->read(); 304 | else 305 | r = mRight->read(); 306 | if(!l || !r){ 307 | return NULL; 308 | } else { 309 | return new ReadPair(l, r); 310 | } 311 | } 312 | -------------------------------------------------------------------------------- /src/fastqreader.h: -------------------------------------------------------------------------------- 1 | #ifndef FASTQ_READER_H 2 | #define FASTQ_READER_H 3 | 4 | #include 5 | #include 6 | #include "read.h" 7 | #ifdef DYNAMIC_ZLIB 8 | #include 9 | #else 10 | #include "zlib/zlib.h" 11 | #endif 12 | #include "common.h" 13 | #include 14 | #include 15 | 16 | class FastqReader{ 17 | public: 18 | FastqReader(string filename, bool hasQuality = true, bool phred64=false); 19 | ~FastqReader(); 20 | bool isZipped(); 21 | 22 | void getBytes(size_t& bytesRead, size_t& bytesTotal); 23 | 24 | //this function is not thread-safe 25 | //do not call read() of a same FastqReader object from different threads concurrently 26 | Read* read(); 27 | bool eof(); 28 | bool hasNoLineBreakAtEnd(); 29 | 30 | public: 31 | static bool isZipFastq(string filename); 32 | static bool isFastq(string filename); 33 | static bool test(); 34 | 35 | private: 36 | void init(); 37 | void close(); 38 | string getLine(); 39 | void clearLineBreaks(char* line); 40 | void readToBuf(); 41 | 42 | private: 43 | string mFilename; 44 | gzFile mZipFile; 45 | FILE* mFile; 46 | bool mZipped; 47 | bool mHasQuality; 48 | bool mPhred64; 49 | char* mBuf; 50 | int mBufDataLen; 51 | int mBufUsedLen; 52 | bool mStdinMode; 53 | bool mHasNoLineBreakAtEnd; 54 | 55 | }; 56 | 57 | class FastqReaderPair{ 58 | public: 59 | FastqReaderPair(FastqReader* left, FastqReader* right); 60 | FastqReaderPair(string leftName, string rightName, bool hasQuality = true, bool phred64 = false, bool interleaved = false); 61 | ~FastqReaderPair(); 62 | ReadPair* read(); 63 | public: 64 | FastqReader* mLeft; 65 | FastqReader* mRight; 66 | bool mInterleaved; 67 | }; 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /src/filter.cpp: -------------------------------------------------------------------------------- 1 | #include "processor.h" 2 | #include "peprocessor.h" 3 | #include "seprocessor.h" 4 | #include "overlapanalysis.h" 5 | 6 | Filter::Filter(Options* opt){ 7 | mOptions = opt; 8 | } 9 | 10 | 11 | Filter::~Filter(){ 12 | } 13 | 14 | int Filter::passFilter(Read* r) { 15 | if(r == NULL || r->length()==0) { 16 | return FAIL_LENGTH; 17 | } 18 | 19 | int rlen = r->length(); 20 | int lowQualNum = 0; 21 | int nBaseNum = 0; 22 | int totalQual = 0; 23 | 24 | // need to recalculate lowQualNum and nBaseNum if the corresponding filters are enabled 25 | if(mOptions->qualfilter.enabled || mOptions->lengthFilter.enabled) { 26 | const char* seqstr = r->mSeq.mStr.c_str(); 27 | const char* qualstr = r->mQuality.c_str(); 28 | 29 | for(int i=0; iqualfilter.qualifiedQual) 36 | lowQualNum ++; 37 | 38 | if(base == 'N') 39 | nBaseNum++; 40 | } 41 | } 42 | 43 | if(mOptions->qualfilter.enabled) { 44 | if(lowQualNum > (mOptions->qualfilter.unqualifiedPercentLimit * rlen / 100.0) ) 45 | return FAIL_QUALITY; 46 | else if(mOptions->qualfilter.avgQualReq > 0 && (totalQual / rlen)qualfilter.avgQualReq) 47 | return FAIL_QUALITY; 48 | else if(nBaseNum > mOptions->qualfilter.nBaseLimit ) 49 | return FAIL_N_BASE; 50 | } 51 | 52 | if(mOptions->lengthFilter.enabled) { 53 | if(rlen < mOptions->lengthFilter.requiredLength) 54 | return FAIL_LENGTH; 55 | if(mOptions->lengthFilter.maxLength > 0 && rlen > mOptions->lengthFilter.maxLength) 56 | return FAIL_TOO_LONG; 57 | } 58 | 59 | if(mOptions->complexityFilter.enabled) { 60 | if(!passLowComplexityFilter(r)) 61 | return FAIL_COMPLEXITY; 62 | } 63 | 64 | return PASS_FILTER; 65 | } 66 | 67 | bool Filter::passLowComplexityFilter(Read* r) { 68 | int diff = 0; 69 | int length = r->length(); 70 | if(length <= 1) 71 | return false; 72 | const char* data = r->mSeq.mStr.c_str(); 73 | for(int i=0; i= mOptions->complexityFilter.threshold ) 78 | return true; 79 | else 80 | return false; 81 | } 82 | 83 | Read* Filter::trimAndCut(Read* r, int front, int tail, int& frontTrimmed) { 84 | frontTrimmed = 0; 85 | // return the same read for speed if no change needed 86 | if(front == 0 && tail == 0 && !mOptions->qualityCut.enabledFront && !mOptions->qualityCut.enabledTail && !mOptions->qualityCut.enabledRight) 87 | return r; 88 | 89 | 90 | int rlen = r->length() - front - tail ; 91 | if (rlen < 0) 92 | return NULL; 93 | 94 | if(front == 0 && !mOptions->qualityCut.enabledFront && !mOptions->qualityCut.enabledTail && !mOptions->qualityCut.enabledRight){ 95 | r->resize(rlen); 96 | return r; 97 | } else if(!mOptions->qualityCut.enabledFront && !mOptions->qualityCut.enabledTail && !mOptions->qualityCut.enabledRight){ 98 | r->mSeq.mStr = r->mSeq.mStr.substr(front, rlen); 99 | r->mQuality = r->mQuality.substr(front, rlen); 100 | frontTrimmed = front; 101 | return r; 102 | } 103 | 104 | // need quality cutting 105 | 106 | int l = r->length(); 107 | const char* qualstr = r->mQuality.c_str(); 108 | const char* seq = r->mSeq.mStr.c_str(); 109 | // quality cutting forward 110 | if(mOptions->qualityCut.enabledFront) { 111 | int w = mOptions->qualityCut.windowSizeFront; 112 | int s = front; 113 | if(l - front - tail - w <= 0) 114 | return NULL; 115 | 116 | int totalQual = 0; 117 | 118 | // preparing rolling 119 | for(int i=0; i front) { 126 | totalQual -= qualstr[s-1]; 127 | } 128 | // add 33 for phred33 transforming 129 | if((double)totalQual / (double)w >= 33 + mOptions->qualityCut.qualityFront) 130 | break; 131 | } 132 | 133 | // the trimming in front is forwarded and rlen is recalculated 134 | if(s >0 ) 135 | s = s+w-1; 136 | while(squalityCut.enabledRight) { 144 | int w = mOptions->qualityCut.windowSizeRight; 145 | int s = front; 146 | if(l - front - tail - w <= 0) 147 | return NULL; 148 | 149 | int totalQual = 0; 150 | 151 | // preparing rolling 152 | for(int i=0; i front) { 161 | totalQual -= qualstr[s-1]; 162 | } 163 | // add 33 for phred33 transforming 164 | if((double)totalQual / (double)w < 33 + mOptions->qualityCut.qualityRight) { 165 | foundLowQualWindow = true; 166 | break; 167 | } 168 | } 169 | 170 | if(foundLowQualWindow ) { 171 | // keep the good bases in the window 172 | while(s=33 + mOptions->qualityCut.qualityRight) 173 | s++; 174 | rlen = s - front; 175 | } 176 | } 177 | 178 | // quality cutting backward 179 | if(!mOptions->qualityCut.enabledRight && mOptions->qualityCut.enabledTail) { 180 | int w = mOptions->qualityCut.windowSizeTail; 181 | if(l - front - tail - w <= 0) 182 | return NULL; 183 | 184 | int totalQual = 0; 185 | int t = l - tail - 1; 186 | 187 | // preparing rolling 188 | for(int i=0; i=front; t--) { 192 | totalQual += qualstr[t-w+1]; 193 | // rolling 194 | if(t < l-tail-1) { 195 | totalQual -= qualstr[t+1]; 196 | } 197 | // add 33 for phred33 transforming 198 | if((double)totalQual / (double)w >= 33 + mOptions->qualityCut.qualityTail) 199 | break; 200 | } 201 | 202 | if(t < l-1) 203 | t = t-w+1; 204 | while(t>=0 && seq[t] == 'N') 205 | t--; 206 | rlen = t - front + 1; 207 | } 208 | 209 | if(rlen <= 0 || front >= l-1) 210 | return NULL; 211 | 212 | r->mSeq.mStr = r->mSeq.mStr.substr(front, rlen); 213 | r->mQuality = r->mQuality.substr(front, rlen); 214 | 215 | frontTrimmed = front; 216 | 217 | return r; 218 | } 219 | 220 | bool Filter::match(vector& list, string target, int threshold) { 221 | for(int i=0; ithreshold) 229 | break; 230 | } 231 | } 232 | if(diff <= threshold) 233 | return true; 234 | } 235 | return false; 236 | } 237 | 238 | bool Filter::test() { 239 | Read r("@name", 240 | "TTTTAACCCCCCCCCCCCCCCCCCCCCCCCCCCCAATTTT", 241 | "+", 242 | "/////CCCCCCCCCCCC////CCCCCCCCCCCCCC////E"); 243 | Options opt; 244 | opt.qualityCut.enabledFront = true; 245 | opt.qualityCut.enabledTail = true; 246 | opt.qualityCut.windowSizeFront = 4; 247 | opt.qualityCut.qualityFront = 20; 248 | opt.qualityCut.windowSizeTail = 4; 249 | opt.qualityCut.qualityTail = 20; 250 | Filter filter(&opt); 251 | int frontTrimmed = 0; 252 | Read* ret = filter.trimAndCut(&r, 0, 1, frontTrimmed); 253 | ret->print(); 254 | 255 | return ret->mSeq.mStr == "CCCCCCCCCCCCCCCCCCCCCCCCCCCC" 256 | && ret->mQuality == "CCCCCCCCCCC////CCCCCCCCCCCCC"; 257 | } -------------------------------------------------------------------------------- /src/filter.h: -------------------------------------------------------------------------------- 1 | #ifndef FILTER_H 2 | #define FILTER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "options.h" 9 | #include "read.h" 10 | 11 | using namespace std; 12 | 13 | class Filter{ 14 | public: 15 | Filter(Options* opt); 16 | ~Filter(); 17 | int passFilter(Read* r); 18 | bool passLowComplexityFilter(Read* r); 19 | Read* trimAndCut(Read* r, int front, int tail, int& frontTrimmed); 20 | static bool test(); 21 | 22 | private: 23 | bool match(vector& list, string target, int threshold); 24 | 25 | private: 26 | Options* mOptions; 27 | }; 28 | 29 | 30 | #endif -------------------------------------------------------------------------------- /src/filterresult.h: -------------------------------------------------------------------------------- 1 | #ifndef FILTER_RESULT_H 2 | #define FILTER_RESULT_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "common.h" 10 | #include "options.h" 11 | #include 12 | #include 13 | 14 | struct classcomp { 15 | bool operator() (const string& lhs, const string& rhs) const { 16 | if (lhs.length() < rhs.length()) 17 | return true; 18 | else if(lhs.length() == rhs.length()) { 19 | return lhs < rhs; 20 | } else 21 | return false; 22 | } 23 | }; 24 | 25 | using namespace std; 26 | 27 | class FilterResult{ 28 | public: 29 | FilterResult(Options* opt, bool paired = false); 30 | ~FilterResult(); 31 | inline long* getFilterReadStats() {return mFilterReadStats;} 32 | void addFilterResult(int result, int readNum=1); 33 | static FilterResult* merge(vector& list); 34 | void print(); 35 | // for single end 36 | void addAdapterTrimmed(string adapter, bool isR2 = false, bool incTrimmedCounter = true); 37 | // for paired end 38 | void addAdapterTrimmed(string adapter1, string adapter2); 39 | void addPolyXTrimmed(int base, int length); 40 | long getTotalPolyXTrimmedReads(); 41 | long getTotalPolyXTrimmedBases(); 42 | // a part of JSON report 43 | void reportJson(ofstream& ofs, string padding); 44 | // a part of JSON report for adapters 45 | void reportAdapterJson(ofstream& ofs, string padding); 46 | // a part of JSON report for polyX trim 47 | void reportPolyXTrimJson(ofstream& ofs, string padding); 48 | // a part of HTML report 49 | void reportHtml(ofstream& ofs, long totalReads, long totalBases); 50 | // a part of HTML report for adapters 51 | void reportAdapterHtml(ofstream& ofs, long totalBases); 52 | void outputAdaptersJson(ofstream& ofs, map& adapterCounts); 53 | void outputAdaptersHtml(ofstream& ofs, map& adapterCounts, long totalBases); 54 | // deal with base correction results 55 | long* getCorrectionMatrix() {return mCorrectionMatrix;} 56 | long getTotalCorrectedBases(); 57 | void addCorrection(char from, char to); 58 | long getCorrectionNum(char from, char to); 59 | void incCorrectedReads(int count); 60 | void addMergedPairs(int pairs); 61 | 62 | 63 | public: 64 | Options* mOptions; 65 | bool mPaired; 66 | long mCorrectedReads; 67 | long mMergedPairs; 68 | private: 69 | long mFilterReadStats[FILTER_RESULT_TYPES]; 70 | long mTrimmedAdapterRead; 71 | long mTrimmedAdapterBases; 72 | long mTrimmedPolyXReads[4] = {0}; 73 | long mTrimmedPolyXBases[4] = {0}; 74 | map mAdapter1; 75 | map mAdapter2; 76 | long* mCorrectionMatrix; 77 | }; 78 | 79 | #endif -------------------------------------------------------------------------------- /src/genomes.h: -------------------------------------------------------------------------------- 1 | #ifndef GENOMES_H 2 | #define GENOMES_H 3 | 4 | // includes 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "common.h" 11 | #include "fastareader.h" 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include "options.h" 17 | 18 | using namespace std; 19 | 20 | class MapResult{ 21 | 22 | public: 23 | MapResult(){ 24 | mapped = false; 25 | start = 0; 26 | len = 0; 27 | ed = 0x7FFFFFFF; // initialized with a very large ED 28 | } 29 | public: 30 | bool mapped; 31 | uint32 start; 32 | uint32 len; 33 | uint32 ed; // edit distance 34 | }; 35 | 36 | class Genomes 37 | { 38 | public: 39 | Genomes(string fastaFile, Options* opt); 40 | ~Genomes(); 41 | 42 | void cover(int id, uint32 pos, uint32 len, uint32 ed, float frac); 43 | bool hasKey(uint64 key); 44 | bool align(string& seq); 45 | void report(); 46 | void reportJSON(ofstream& ofs); 47 | void reportHtml(ofstream& ofs); 48 | 49 | static uint32 packIdPos(uint32 id, uint32 position); 50 | static void unpackIdPos(uint32 data,uint32& id, uint32& pos); 51 | 52 | private: 53 | void init(); 54 | void buildKmerTable(); 55 | void addKmer(uint64 key, uint32 id, uint32 pos); 56 | void initLowComplexityKeys(); 57 | MapResult mapToGenome(string& seq, uint32 seqPos, string& genome, uint32 genomePos); 58 | void initBloomFilter(); 59 | string getPlotX(int id); 60 | string getCoverageY(int id); 61 | string getEditDistanceY(int id); 62 | void initBinSize(); 63 | double getCoverageRate(int id); 64 | 65 | private: 66 | int mGenomeNum; 67 | FastaReader* mFastaReader; 68 | vector mSequences; 69 | vector mNames; 70 | vector> mCoverage; 71 | vector> mEditDistance; 72 | vector mTotalEditDistance; 73 | vector mReads; 74 | vector mBases; 75 | // unit32 = 8 bits genome id + 24 bits positions 76 | unordered_map> mKmerTable; 77 | set mLowComplexityKeys; 78 | Options* mOptions; 79 | long mHitCount; 80 | long mMissedCount; 81 | char* mBloomFilterArray; 82 | }; 83 | 84 | 85 | #endif -------------------------------------------------------------------------------- /src/htmlreporter.h: -------------------------------------------------------------------------------- 1 | #ifndef HTML_REPORTER_H 2 | #define HTML_REPORTER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "options.h" 9 | #include "stats.h" 10 | #include "filterresult.h" 11 | #include 12 | #include "virusdetector.h" 13 | 14 | using namespace std; 15 | 16 | class HtmlReporter{ 17 | public: 18 | HtmlReporter(Options* opt); 19 | ~HtmlReporter(); 20 | void setDupHist(int* dupHist, double* dupMeanGC, double dupRate); 21 | void setInsertHist(long* insertHist, int insertSizePeak); 22 | void report(VirusDetector* vd,FilterResult* result, Stats* preStats1, Stats* postStats1, Stats* preStats2 = NULL, Stats* postStats2 = NULL); 23 | 24 | static void outputRow(ofstream& ofs, string key, long value); 25 | static void outputRow(ofstream& ofs, string key, string value); 26 | static string formatNumber(long number); 27 | static string getPercents(long numerator, long denominator); 28 | private: 29 | const string getCurrentSystemTime(); 30 | void printHeader(ofstream& ofs); 31 | void printCSS(ofstream& ofs); 32 | void printJS(ofstream& ofs); 33 | void printFooter(ofstream& ofs); 34 | void reportDuplication(ofstream& ofs); 35 | void reportInsertSize(ofstream& ofs, int isizeLimit); 36 | void printSummary(ofstream& ofs, FilterResult* result, Stats* preStats1, Stats* postStats1, Stats* preStats2, Stats* postStats2); 37 | void printDetectionResult(ofstream& ofs, Kmer* kmer); 38 | void printGenomeCoverage(ofstream& ofs, Genomes* g); 39 | void reportKmerHits(ofstream& ofs, Kmer* kmer); 40 | void reportKmerCollection(ofstream& ofs, KmerCollection* kc); 41 | 42 | 43 | private: 44 | Options* mOptions; 45 | int* mDupHist; 46 | double* mDupMeanGC; 47 | double mDupRate; 48 | long* mInsertHist; 49 | int mInsertSizePeak; 50 | }; 51 | 52 | 53 | #endif -------------------------------------------------------------------------------- /src/jsonreporter.cpp: -------------------------------------------------------------------------------- 1 | #include "jsonreporter.h" 2 | 3 | JsonReporter::JsonReporter(Options* opt){ 4 | mOptions = opt; 5 | mDupHist = NULL; 6 | mDupRate = 0; 7 | } 8 | 9 | JsonReporter::~JsonReporter(){ 10 | } 11 | 12 | void JsonReporter::setDupHist(int* dupHist, double* dupMeanGC, double dupRate) { 13 | mDupHist = dupHist; 14 | mDupMeanGC = dupMeanGC; 15 | mDupRate = dupRate; 16 | } 17 | 18 | void JsonReporter::setInsertHist(long* insertHist, int insertSizePeak) { 19 | mInsertHist = insertHist; 20 | mInsertSizePeak = insertSizePeak; 21 | } 22 | 23 | extern string command; 24 | void JsonReporter::report(VirusDetector* vd, FilterResult* result, Stats* preStats1, Stats* postStats1, Stats* preStats2, Stats* postStats2) { 25 | ofstream ofs; 26 | ofs.open(mOptions->jsonFile, ifstream::out); 27 | ofs << "{" << endl; 28 | 29 | long pre_total_reads = preStats1->getReads(); 30 | if(preStats2) 31 | pre_total_reads += preStats2->getReads(); 32 | 33 | long pre_total_bases = preStats1->getBases(); 34 | if(preStats2) 35 | pre_total_bases += preStats2->getBases(); 36 | 37 | long pre_q20_bases = preStats1->getQ20(); 38 | if(preStats2) 39 | pre_q20_bases += preStats2->getQ20(); 40 | 41 | long pre_q30_bases = preStats1->getQ30(); 42 | if(preStats2) 43 | pre_q30_bases += preStats2->getQ30(); 44 | 45 | long pre_total_gc = preStats1->getGCNumber(); 46 | if(preStats2) 47 | pre_total_gc += preStats2->getGCNumber(); 48 | 49 | long post_total_reads = postStats1->getReads(); 50 | if(postStats2) 51 | post_total_reads += postStats2->getReads(); 52 | 53 | long post_total_bases = postStats1->getBases(); 54 | if(postStats2) 55 | post_total_bases += postStats2->getBases(); 56 | 57 | long post_q20_bases = postStats1->getQ20(); 58 | if(postStats2) 59 | post_q20_bases += postStats2->getQ20(); 60 | 61 | long post_q30_bases = postStats1->getQ30(); 62 | if(postStats2) 63 | post_q30_bases += postStats2->getQ30(); 64 | 65 | long post_total_gc = postStats1->getGCNumber(); 66 | if(postStats2) 67 | post_total_gc += postStats2->getGCNumber(); 68 | 69 | // KMER detection 70 | Kmer* kmer = vd->getKmer(); 71 | if(kmer) { 72 | string detectionResult; 73 | if(kmer->getMeanHit() >= mOptions->positiveThreshold) 74 | detectionResult = "POSITIVE"; 75 | else 76 | detectionResult = "NEGATIVE"; 77 | 78 | ofs << "\t" << "\"kmer_detection_result\": {" << endl; 79 | ofs << "\t\t" << "\"result\": \"" << detectionResult << "\"," << endl; 80 | ofs << "\t\t" << "\"mean_coverage\": " << kmer->getMeanHit() << "," << endl; 81 | ofs << "\t\t" << "\"positive_thread\": " << mOptions->positiveThreshold << "," << endl; 82 | 83 | // unique kmer hits 84 | ofs << "\t\t" << "\"kmer_hits\": {" << endl; 85 | kmer->reportJSON(ofs); 86 | ofs << "\t\t" << "}" << endl; 87 | 88 | 89 | ofs << "\t" << "}," << endl; 90 | } 91 | 92 | // KMER detection 93 | Genomes* genome = vd->getGenomes(); 94 | if(genome) { 95 | genome->reportJSON(ofs); 96 | } 97 | 98 | // KMER detection 99 | KmerCollection* kc = vd->getKmerCollection(); 100 | if(kc) { 101 | kc->reportJSON(ofs); 102 | } 103 | 104 | // summary 105 | ofs << "\t" << "\"summary\": {" << endl; 106 | 107 | ofs << "\t\t" << "\"before_filtering\": {" << endl; 108 | ofs << "\t\t\t" << "\"total_reads\":" << pre_total_reads << "," << endl; 109 | ofs << "\t\t\t" << "\"total_bases\":" << pre_total_bases << "," << endl; 110 | ofs << "\t\t\t" << "\"q20_bases\":" << pre_q20_bases << "," << endl; 111 | ofs << "\t\t\t" << "\"q30_bases\":" << pre_q30_bases << "," << endl; 112 | ofs << "\t\t\t" << "\"q20_rate\":" << (pre_total_bases == 0?0.0:(double)pre_q20_bases / (double)pre_total_bases) << "," << endl; 113 | ofs << "\t\t\t" << "\"q30_rate\":" << (pre_total_bases == 0?0.0:(double)pre_q30_bases / (double)pre_total_bases) << "," << endl; 114 | ofs << "\t\t\t" << "\"read1_mean_length\":" << preStats1->getMeanLength() << "," << endl; 115 | if(mOptions->isPaired()) 116 | ofs << "\t\t\t" << "\"read2_mean_length\":" << preStats2->getMeanLength() << "," << endl; 117 | ofs << "\t\t\t" << "\"gc_content\":" << (pre_total_bases == 0?0.0:(double)pre_total_gc / (double)pre_total_bases) << endl; 118 | ofs << "\t\t" << "}," << endl; 119 | 120 | ofs << "\t\t" << "\"after_filtering\": {" << endl; 121 | ofs << "\t\t\t" << "\"total_reads\":" << post_total_reads << "," << endl; 122 | ofs << "\t\t\t" << "\"total_bases\":" << post_total_bases << "," << endl; 123 | ofs << "\t\t\t" << "\"q20_bases\":" << post_q20_bases << "," << endl; 124 | ofs << "\t\t\t" << "\"q30_bases\":" << post_q30_bases << "," << endl; 125 | ofs << "\t\t\t" << "\"q20_rate\":" << (post_total_bases == 0?0.0:(double)post_q20_bases / (double)post_total_bases) << "," << endl; 126 | ofs << "\t\t\t" << "\"q30_rate\":" << (post_total_bases == 0?0.0:(double)post_q30_bases / (double)post_total_bases) << "," << endl; 127 | ofs << "\t\t\t" << "\"read1_mean_length\":" << postStats1->getMeanLength() << "," << endl; 128 | if(mOptions->isPaired()) 129 | ofs << "\t\t\t" << "\"read2_mean_length\":" << postStats2->getMeanLength() << "," << endl; 130 | ofs << "\t\t\t" << "\"gc_content\":" << (post_total_bases == 0?0.0:(double)post_total_gc / (double)post_total_bases) << endl; 131 | ofs << "\t\t" << "}"; 132 | 133 | ofs << endl; 134 | 135 | ofs << "\t" << "}," << endl; 136 | 137 | if(result) { 138 | ofs << "\t" << "\"filtering_result\": " ; 139 | result -> reportJson(ofs, "\t"); 140 | } 141 | 142 | if(mOptions->duplicate.enabled) { 143 | ofs << "\t" << "\"duplication\": {" << endl; 144 | ofs << "\t\t\"rate\": " << mDupRate << "," << endl; 145 | ofs << "\t\t\"histogram\": ["; 146 | for(int d=1; dduplicate.histSize; d++) { 147 | ofs << mDupHist[d]; 148 | if(d!=mOptions->duplicate.histSize-1) 149 | ofs << ","; 150 | } 151 | ofs << "]," << endl; 152 | ofs << "\t\t\"mean_gc\": ["; 153 | for(int d=1; dduplicate.histSize; d++) { 154 | ofs << mDupMeanGC[d]; 155 | if(d!=mOptions->duplicate.histSize-1) 156 | ofs << ","; 157 | } 158 | ofs << "]" << endl; 159 | ofs << "\t" << "}"; 160 | ofs << "," << endl; 161 | } 162 | 163 | if(mOptions->isPaired()) { 164 | ofs << "\t" << "\"insert_size\": {" << endl; 165 | ofs << "\t\t\"peak\": " << mInsertSizePeak << "," << endl; 166 | ofs << "\t\t\"unknown\": " << mInsertHist[mOptions->insertSizeMax] << "," << endl; 167 | ofs << "\t\t\"histogram\": ["; 168 | for(int d=0; dinsertSizeMax; d++) { 169 | ofs << mInsertHist[d]; 170 | if(d!=mOptions->insertSizeMax-1) 171 | ofs << ","; 172 | } 173 | ofs << "]" << endl; 174 | ofs << "\t" << "}"; 175 | ofs << "," << endl; 176 | } 177 | 178 | if(result && mOptions->adapterCuttingEnabled()) { 179 | ofs << "\t" << "\"adapter_cutting\": " ; 180 | result -> reportAdapterJson(ofs, "\t"); 181 | } 182 | 183 | if(result && mOptions->polyXTrimmingEnabled()) { 184 | ofs << "\t" << "\"polyx_trimming\": " ; 185 | result -> reportPolyXTrimJson(ofs, "\t"); 186 | } 187 | 188 | if(preStats1) { 189 | ofs << "\t" << "\"read1_before_filtering\": " ; 190 | preStats1 -> reportJson(ofs, "\t"); 191 | } 192 | 193 | if(preStats2) { 194 | ofs << "\t" << "\"read2_before_filtering\": " ; 195 | preStats2 -> reportJson(ofs, "\t"); 196 | } 197 | 198 | if(postStats1) { 199 | string name = "read1_after_filtering"; 200 | ofs << "\t" << "\"" << name << "\": " ; 201 | postStats1 -> reportJson(ofs, "\t"); 202 | } 203 | 204 | if(postStats2) { 205 | ofs << "\t" << "\"read2_after_filtering\": " ; 206 | postStats2 -> reportJson(ofs, "\t"); 207 | } 208 | 209 | ofs << "\t\"command\": " << "\"" << command << "\"" << endl; 210 | 211 | ofs << "}"; 212 | } -------------------------------------------------------------------------------- /src/jsonreporter.h: -------------------------------------------------------------------------------- 1 | #ifndef JSON_REPORTER_H 2 | #define JSON_REPORTER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "options.h" 9 | #include "stats.h" 10 | #include "filterresult.h" 11 | #include 12 | #include "virusdetector.h" 13 | 14 | using namespace std; 15 | 16 | class JsonReporter{ 17 | public: 18 | JsonReporter(Options* opt); 19 | ~JsonReporter(); 20 | 21 | void setDupHist(int* dupHist, double* dupMeanGC, double dupRate); 22 | void setInsertHist(long* insertHist, int insertSizePeak); 23 | void report(VirusDetector* vd, FilterResult* result, Stats* preStats1, Stats* postStats1, Stats* preStats2 = NULL, Stats* postStats2 = NULL); 24 | 25 | private: 26 | Options* mOptions; 27 | int* mDupHist; 28 | double* mDupMeanGC; 29 | double mDupRate; 30 | long* mInsertHist; 31 | int mInsertSizePeak; 32 | }; 33 | 34 | 35 | #endif -------------------------------------------------------------------------------- /src/kmer.cpp: -------------------------------------------------------------------------------- 1 | #include "kmer.h" 2 | #include "util.h" 3 | #include 4 | #include 5 | 6 | Kmer::Kmer(string filename, Options* opt) 7 | { 8 | mFastaReader = NULL; 9 | mOptions = opt; 10 | init(filename); 11 | resultMade = false; 12 | } 13 | 14 | Kmer::~Kmer() 15 | { 16 | if(mFastaReader) { 17 | delete mFastaReader; 18 | mFastaReader = NULL; 19 | } 20 | } 21 | 22 | void Kmer::init(string filename) 23 | { 24 | mFastaReader = new FastaReader(filename); 25 | mFastaReader->readAll(); 26 | 27 | map kmers = mFastaReader->contigs(); 28 | map::iterator iter; 29 | 30 | bool initialized = false; 31 | for(iter = kmers.begin(); iter != kmers.end() ; iter++) { 32 | string seq = iter->second; 33 | 34 | if(!initialized) { 35 | initialized = true; 36 | if(mOptions->kmerKeyLen == 0) 37 | mOptions->kmerKeyLen = seq.length(); 38 | } 39 | if(seq.length() != mOptions->kmerKeyLen) { 40 | cerr << "KMER length must be " << mOptions->kmerKeyLen << ", skipped " << seq << endl; 41 | continue; 42 | } 43 | bool valid = true; 44 | uint64 kmer64 = seq2uint64(seq, 0, seq.length(), valid); 45 | if(valid) { 46 | mKmerHits[kmer64] = 0; 47 | mNames[kmer64] = iter->first; 48 | mSequences[kmer64] = iter->second; 49 | } else { 50 | cerr << iter->first << ": " << seq << " skipped" << endl; 51 | } 52 | } 53 | 54 | if(mKmerHits.size() == 0) { 55 | error_exit("No unique KMER specified!"); 56 | } 57 | } 58 | 59 | void Kmer::makeResults() { 60 | mResults.clear(); 61 | unordered_map::iterator iter; 62 | for(iter = mKmerHits.begin(); iter != mKmerHits.end(); iter++) { 63 | uint64 kmer64 = iter->first; 64 | string title = mNames[kmer64] + "_" + mSequences[kmer64]; 65 | mResults[title] = iter->second; 66 | } 67 | resultMade = true; 68 | } 69 | 70 | void Kmer::report() { 71 | if(!resultMade) 72 | makeResults(); 73 | 74 | map::iterator iter; 75 | for(iter = mResults.begin(); iter != mResults.end(); iter++) { 76 | cerr << iter->first << ": " << iter->second << endl; 77 | } 78 | 79 | double meanHit = getMeanHit(); 80 | cerr << endl; 81 | cerr << "Mean depth: " << meanHit << endl<= mOptions->positiveThreshold) 83 | cerr << "Result: POSITIVE"; 84 | else 85 | cerr << "Result: NEGATIVE"; 86 | cerr << " (" << "threshold: " << mOptions->positiveThreshold << ")" << endl; 87 | } 88 | 89 | double Kmer::getMeanHit() { 90 | if(mKmerHits.size() == 0) 91 | return 0.0; 92 | 93 | double total = 0; 94 | unordered_map::iterator iter; 95 | for(iter = mKmerHits.begin(); iter != mKmerHits.end(); iter++) { 96 | total += iter->second; 97 | } 98 | return total / (double) mKmerHits.size(); 99 | } 100 | 101 | bool Kmer::add(uint64 kmer64) { 102 | unordered_map::iterator iter = mKmerHits.find(kmer64); 103 | if(iter != mKmerHits.end()) { 104 | iter->second++; 105 | return true; 106 | } 107 | return false; 108 | } 109 | 110 | 111 | string Kmer::getPlotX() { 112 | if(!resultMade) 113 | makeResults(); 114 | 115 | stringstream ss; 116 | map::iterator iter; 117 | int first = true; 118 | for(iter = mResults.begin(); iter != mResults.end(); iter++) { 119 | if(first) { 120 | first = false; 121 | } else 122 | ss << ","; 123 | 124 | ss << "\"" << iter->first << "\""; 125 | } 126 | return ss.str(); 127 | } 128 | 129 | string Kmer::getPlotY() { 130 | if(!resultMade) 131 | makeResults(); 132 | 133 | stringstream ss; 134 | map::iterator iter; 135 | int first = true; 136 | for(iter = mResults.begin(); iter != mResults.end(); iter++) { 137 | if(first) { 138 | first = false; 139 | } else 140 | ss << ","; 141 | 142 | ss << iter->second; 143 | } 144 | return ss.str(); 145 | } 146 | 147 | int Kmer::getKmerCount() { 148 | return mKmerHits.size(); 149 | } 150 | 151 | void Kmer::reportJSON(ofstream& ofs) { 152 | if(!resultMade) 153 | makeResults(); 154 | 155 | map::iterator iter; 156 | int first = true; 157 | for(iter = mResults.begin(); iter != mResults.end(); iter++) { 158 | if(first) { 159 | first = false; 160 | } else 161 | ofs << "," << endl; 162 | 163 | ofs << "\t\t\t\"" << iter->first << "\""; 164 | ofs << ":" << iter->second; 165 | } 166 | ofs << endl; 167 | } 168 | 169 | uint64 Kmer::seq2uint64(string& seq, uint32 pos, uint32 len, bool& valid) { 170 | uint64 key = 0; 171 | for(uint32 i=0; i 7 | #include 8 | #include 9 | #include "fastareader.h" 10 | #include "options.h" 11 | #include 12 | 13 | using namespace std; 14 | 15 | class Kmer 16 | { 17 | public: 18 | Kmer(string filename, Options* opt); 19 | ~Kmer(); 20 | void init(string filename); 21 | bool add(uint64 kmer64); 22 | void report(); 23 | double getMeanHit(); 24 | string getPlotX(); 25 | string getPlotY(); 26 | int getKmerCount(); 27 | void reportJSON(ofstream& ofs); 28 | 29 | static uint64 seq2uint64(string& seq, uint32 pos, uint32 len, bool& valid); 30 | 31 | private: 32 | void makeResults(); 33 | 34 | private: 35 | unordered_map mKmerHits; 36 | FastaReader* mFastaReader; 37 | unordered_map mNames; 38 | unordered_map mSequences; 39 | map mResults; 40 | Options* mOptions; 41 | bool resultMade; 42 | }; 43 | 44 | 45 | #endif -------------------------------------------------------------------------------- /src/kmercollection.h: -------------------------------------------------------------------------------- 1 | #ifndef ALLKMER_H 2 | #define ALLKMER_H 3 | 4 | // includes 5 | #include "common.h" 6 | #include 7 | #include 8 | #include 9 | #include "fastareader.h" 10 | #include "options.h" 11 | #include "zlib/zlib.h" 12 | #include "common.h" 13 | #include 14 | #include 15 | #include 16 | 17 | #define MTX_COUNT 100 18 | #define COLLISION_FLAG 0xFFFFFFFF 19 | 20 | using namespace std; 21 | 22 | class KCResult { 23 | public: 24 | string mName; 25 | uint64 mHit; 26 | int mMedianHit; 27 | double mMeanHit; 28 | double mCoverage; 29 | int mKmerCount; 30 | int mUniqueReads; 31 | }; 32 | 33 | class KCHit { 34 | public: 35 | uint64 mKey64; 36 | uint32 mID; 37 | uint32 mHit; 38 | }; 39 | 40 | class KmerCollection 41 | { 42 | public: 43 | KmerCollection(string filename, Options* opt); 44 | ~KmerCollection(); 45 | void init(); 46 | void report(); 47 | void reportJSON(ofstream& ofs); 48 | void reportHTML(ofstream& ofs); 49 | uint32 add(uint64 kmer64); 50 | void addGenomeRead(uint32 genomeID); 51 | 52 | uint32 packIdCount(uint32 id, uint32 count); 53 | void unpackIdCount(uint32 data,uint32& id, uint32& count); 54 | void stat(); 55 | 56 | private: 57 | bool getLine(char* line, int maxLine); 58 | uint64 makeHash(uint64 key); 59 | bool eof(); 60 | void makeBitAndMask(); 61 | bool isHighConfidence(KCResult kcr); 62 | private: 63 | Options* mOptions; 64 | vector mNames; 65 | vector mHits; 66 | vector mMedianHits; 67 | vector mMeanHits; 68 | vector mCoverage; 69 | vector mKmerCounts; 70 | vector mGenomeReads; 71 | vector mResults; 72 | int mNumber; 73 | uint32 mUniqueHashNum; 74 | uint32* mHashKCH; 75 | KCHit* mKCHits; 76 | string mFilename; 77 | gzFile mZipFile; 78 | ifstream mFile; 79 | bool mZipped; 80 | int mIdBits; 81 | uint32 mIdMask; 82 | uint32 mCountMax; 83 | bool mStatDone; 84 | uint32 mUniqueNumber; 85 | }; 86 | 87 | 88 | #endif -------------------------------------------------------------------------------- /src/nucleotidetree.cpp: -------------------------------------------------------------------------------- 1 | #include "nucleotidetree.h" 2 | #include 3 | 4 | NucleotideNode::NucleotideNode(){ 5 | count = 0; 6 | base = 'N'; 7 | memset(children, 0, sizeof(NucleotideNode*)*8); 8 | } 9 | NucleotideNode::~NucleotideNode(){ 10 | for(int i=0; i<8; i++) { 11 | if(children[i]) 12 | delete children[i]; 13 | } 14 | } 15 | void NucleotideNode::dfs() { 16 | //cerr << base; 17 | //cerr << count; 18 | printf("%c", base); 19 | printf("%d", count); 20 | bool hasChild = false; 21 | for(int i=0; i<8; i++) { 22 | if(children[i]){ 23 | children[i]->dfs(); 24 | hasChild = true; 25 | } 26 | } 27 | if(!hasChild) { 28 | printf("\n"); 29 | } 30 | } 31 | 32 | NucleotideTree::NucleotideTree(Options* opt){ 33 | mOptions = opt; 34 | mRoot = new NucleotideNode(); 35 | } 36 | 37 | 38 | NucleotideTree::~NucleotideTree(){ 39 | delete mRoot; 40 | } 41 | 42 | void NucleotideTree::addSeq(string seq) { 43 | NucleotideNode* curNode = mRoot; 44 | for(int i=0; ichildren[base] == NULL) { 49 | curNode->children[base] = new NucleotideNode(); 50 | curNode->children[base]->base = seq[i]; 51 | } 52 | curNode->children[base]->count++; 53 | curNode = curNode->children[base]; 54 | } 55 | } 56 | 57 | string NucleotideTree::getDominantPath(bool& reachedLeaf) { 58 | stringstream ss; 59 | const double RATIO_THRESHOLD = 0.95; 60 | const int NUM_THRESHOLD = 50; 61 | NucleotideNode* curNode = mRoot; 62 | while(true) { 63 | int total = 0; 64 | for(int i=0; i<8; i++) { 65 | if(curNode->children[i] != NULL) 66 | total += curNode->children[i]->count; 67 | } 68 | if(total < NUM_THRESHOLD) 69 | break; 70 | bool hasDominant = false; 71 | for(int i=0; i<8; i++) { 72 | if(curNode->children[i] == NULL) 73 | continue; 74 | if(curNode->children[i]->count / (double)total >= RATIO_THRESHOLD) { 75 | hasDominant = true; 76 | ss << curNode->children[i]->base; 77 | curNode = curNode->children[i]; 78 | break; 79 | } 80 | } 81 | if(!hasDominant) { 82 | reachedLeaf = false; 83 | break; 84 | } 85 | } 86 | return ss.str(); 87 | 88 | } 89 | 90 | bool NucleotideTree::test() { 91 | NucleotideTree tree(NULL); 92 | for(int i=0; i<100; i++) { 93 | tree.addSeq("AAAATTTT"); 94 | tree.addSeq("AAAATTTTGGGG"); 95 | tree.addSeq("AAAATTTTGGGGCCCC"); 96 | tree.addSeq("AAAATTTTGGGGCCAA"); 97 | } 98 | tree.addSeq("AAAATTTTGGGACCCC"); 99 | 100 | bool reachedLeaf = true; 101 | string path = tree.getDominantPath(reachedLeaf); 102 | printf("%s\n", path.c_str()); 103 | return path == "AAAATTTTGGGGCC"; 104 | } -------------------------------------------------------------------------------- /src/nucleotidetree.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEICTREE_H 2 | #define NUCLEICTREE_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "options.h" 9 | 10 | using namespace std; 11 | 12 | // (A,T,C,G,N) & 0X07 = (1,4,7,6,3) 13 | class NucleotideNode{ 14 | public: 15 | NucleotideNode(); 16 | ~NucleotideNode(); 17 | void dfs(); 18 | 19 | public: 20 | int count; 21 | char base; 22 | NucleotideNode* children[8]; 23 | }; 24 | 25 | class NucleotideTree{ 26 | public: 27 | NucleotideTree(Options* opt); 28 | ~NucleotideTree(); 29 | void addSeq(string seq); 30 | string getDominantPath(bool& reachedLeaf); 31 | 32 | static bool test(); 33 | 34 | private: 35 | Options* mOptions; 36 | NucleotideNode* mRoot; 37 | }; 38 | 39 | 40 | #endif -------------------------------------------------------------------------------- /src/options.h: -------------------------------------------------------------------------------- 1 | #ifndef OPTIONS_H 2 | #define OPTIONS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | #define UMI_LOC_NONE 0 13 | #define UMI_LOC_INDEX1 1 14 | #define UMI_LOC_INDEX2 2 15 | #define UMI_LOC_READ1 3 16 | #define UMI_LOC_READ2 4 17 | #define UMI_LOC_PER_INDEX 5 18 | #define UMI_LOC_PER_READ 6 19 | 20 | class DuplicationOptions { 21 | public: 22 | DuplicationOptions() { 23 | enabled = true; 24 | keylen = 12; 25 | histSize = 32; 26 | } 27 | public: 28 | bool enabled; 29 | int keylen; 30 | int histSize; 31 | }; 32 | 33 | 34 | class LowComplexityFilterOptions { 35 | public: 36 | LowComplexityFilterOptions() { 37 | enabled = false; 38 | threshold = 0.3; 39 | } 40 | public: 41 | bool enabled; 42 | double threshold; 43 | }; 44 | 45 | class PolyGTrimmerOptions { 46 | public: 47 | PolyGTrimmerOptions() { 48 | enabled = false; 49 | minLen = 10; 50 | } 51 | public: 52 | bool enabled; 53 | int minLen; 54 | }; 55 | 56 | class PolyXTrimmerOptions { 57 | public: 58 | PolyXTrimmerOptions() { 59 | enabled = false; 60 | minLen = 10; 61 | } 62 | public: 63 | bool enabled; 64 | int minLen; 65 | }; 66 | 67 | class UMIOptions { 68 | public: 69 | UMIOptions() { 70 | enabled = false; 71 | location = UMI_LOC_NONE; 72 | length = 0; 73 | skip = 0; 74 | } 75 | public: 76 | bool enabled; 77 | int location; 78 | int length; 79 | int skip; 80 | string prefix; 81 | string separator; 82 | }; 83 | 84 | class CorrectionOptions { 85 | public: 86 | CorrectionOptions() { 87 | enabled = false; 88 | } 89 | public: 90 | bool enabled; 91 | }; 92 | 93 | class QualityCutOptions { 94 | public: 95 | QualityCutOptions() { 96 | enabledFront = false; 97 | enabledTail = false; 98 | enabledRight = false; 99 | windowSizeShared = 4; 100 | qualityShared = 20; 101 | windowSizeFront = windowSizeShared; 102 | qualityFront = qualityShared; 103 | windowSizeTail = windowSizeShared; 104 | qualityTail = qualityShared; 105 | windowSizeRight = windowSizeShared; 106 | qualityRight = qualityShared; 107 | } 108 | public: 109 | // enable 5' cutting by quality 110 | bool enabledFront; 111 | // enable 3' cutting by quality 112 | bool enabledTail; 113 | // enable agressive cutting mode 114 | bool enabledRight; 115 | // the sliding window size 116 | int windowSizeShared; 117 | // the mean quality requirement 118 | int qualityShared; 119 | // the sliding window size for cutting by quality in 5' 120 | int windowSizeFront; 121 | // the mean quality requirement for cutting by quality in 5' 122 | int qualityFront; 123 | // the sliding window size for cutting by quality in 3' 124 | int windowSizeTail; 125 | // the mean quality requirement for cutting by quality in 3' 126 | int qualityTail; 127 | // the sliding window size for cutting by quality in aggressive mode 128 | int windowSizeRight; 129 | // the mean quality requirement for cutting by quality in aggressive mode 130 | int qualityRight; 131 | }; 132 | 133 | class AdapterOptions { 134 | public: 135 | AdapterOptions() { 136 | enabled = true; 137 | hasSeqR1 = false; 138 | hasSeqR2 = false; 139 | detectAdapterForPE = false; 140 | } 141 | public: 142 | bool enabled; 143 | string sequence; 144 | string sequenceR2; 145 | string detectedAdapter1; 146 | string detectedAdapter2; 147 | vector seqsInFasta; 148 | string fastaFile; 149 | bool hasSeqR1; 150 | bool hasSeqR2; 151 | bool hasFasta; 152 | bool detectAdapterForPE; 153 | }; 154 | 155 | class TrimmingOptions { 156 | public: 157 | TrimmingOptions() { 158 | front1 = 0; 159 | tail1 = 0; 160 | front2 = 0; 161 | tail2 = 0; 162 | maxLen1 = 0; 163 | maxLen2 = 0; 164 | } 165 | public: 166 | // trimming first cycles for read1 167 | int front1; 168 | // trimming last cycles for read1 169 | int tail1; 170 | // trimming first cycles for read2 171 | int front2; 172 | // trimming last cycles for read2 173 | int tail2; 174 | // max length of read1 175 | int maxLen1; 176 | // max length of read2 177 | int maxLen2; 178 | }; 179 | 180 | class QualityFilteringOptions { 181 | public: 182 | QualityFilteringOptions() { 183 | enabled = true; 184 | // '0' = Q15 185 | qualifiedQual = '0'; 186 | unqualifiedPercentLimit = 40; 187 | nBaseLimit = 5; 188 | } 189 | public: 190 | // quality filter enabled 191 | bool enabled; 192 | // if a base's quality phred score < qualifiedPhred, then it's considered as a low_qual_base 193 | char qualifiedQual; 194 | // if low_qual_base_num > lowQualLimit, then discard this read 195 | int unqualifiedPercentLimit; 196 | // if n_base_number > nBaseLimit, then discard this read 197 | int nBaseLimit; 198 | // if average qual score < avgQualReq, then discard this read 199 | int avgQualReq; 200 | }; 201 | 202 | class ReadLengthFilteringOptions { 203 | public: 204 | ReadLengthFilteringOptions() { 205 | enabled = false; 206 | requiredLength = 15; 207 | maxLength = 0; 208 | } 209 | public: 210 | // length filter enabled 211 | bool enabled; 212 | // if read_length < requiredLength, then this read is discard 213 | int requiredLength; 214 | // length limit, 0 for no limitation 215 | int maxLength; 216 | }; 217 | 218 | class Options{ 219 | public: 220 | Options(); 221 | void init(); 222 | bool isPaired(); 223 | bool validate(); 224 | bool adapterCuttingEnabled(); 225 | bool polyXTrimmingEnabled(); 226 | string getAdapter1(); 227 | string getAdapter2(); 228 | bool shallDetectAdapter(bool isR2 = false); 229 | void loadFastaAdapters(); 230 | 231 | public: 232 | // file name of read1 input 233 | string in1; 234 | // file name of read2 input 235 | string in2; 236 | // file name of read1 output 237 | string out1; 238 | // file name of read2 output 239 | string out2; 240 | // genome FASTA file 241 | string genomeFile; 242 | // kmer FASTA file 243 | string kmerFile; 244 | // kmer FASTA file 245 | string kmerCollectionFile; 246 | // json file 247 | string jsonFile; 248 | // html file 249 | string htmlFile; 250 | // html report title 251 | string reportTitle; 252 | // compression level 253 | int compression; 254 | // the input file is using phred64 quality scoring 255 | bool phred64; 256 | // do not rewrite existing files 257 | bool dontOverwrite; 258 | // read STDIN 259 | bool inputFromSTDIN; 260 | // write STDOUT 261 | bool outputToSTDOUT; 262 | // the input R1 file is interleaved 263 | bool interleavedInput; 264 | // only process first N reads 265 | int readsToProcess; 266 | // worker thread number 267 | int thread; 268 | // trimming options 269 | TrimmingOptions trim; 270 | // quality filtering options 271 | QualityFilteringOptions qualfilter; 272 | // length filtering options 273 | ReadLengthFilteringOptions lengthFilter; 274 | // adapter options 275 | AdapterOptions adapter; 276 | // options for quality cutting 277 | QualityCutOptions qualityCut; 278 | // options for base correction 279 | CorrectionOptions correction; 280 | // options for UMI 281 | UMIOptions umi; 282 | // 3' end polyG trimming, default for Illumina NextSeq/NovaSeq 283 | PolyGTrimmerOptions polyGTrim; 284 | // 3' end polyX trimming 285 | PolyXTrimmerOptions polyXTrim; 286 | int seqLen1; 287 | int seqLen2; 288 | // low complexity filtering 289 | LowComplexityFilterOptions complexityFilter; 290 | // options for duplication profiling 291 | DuplicationOptions duplicate; 292 | // options for duplication profiling 293 | int insertSizeMax; 294 | // overlap analysis threshold 295 | int overlapRequire; 296 | int overlapDiffLimit; 297 | int overlapDiffPercentLimit; 298 | // output debug information 299 | bool verbose; 300 | // the length of KMER, default is 25 301 | int kmerKeyLen; 302 | // the threshold of positive result 303 | double positiveThreshold; 304 | // the threshold of depth for a region considered as covered 305 | double depthThreshold; 306 | // if ed(read, genome) <= edThreshold, then think it as a match 307 | int edThreshold; 308 | // the bin size for stat coverage and edit distance 309 | int statsBinSize; 310 | // read with length >= longReadThreshold will be considered as long reads 311 | int longReadThreshold; 312 | // long reads will be split to reads with length <= segmentLength 313 | int segmentLength; 314 | // coverage threshold to be reported in kmer collection results 315 | double kcCoverageThreshold; 316 | // coverage for high-confidence KCR 317 | double kcCoverageHighConfidence; 318 | // median hit for high-confidence KCR 319 | double kcMedianHitHighConfidence; 320 | 321 | }; 322 | 323 | #endif -------------------------------------------------------------------------------- /src/overlapanalysis.cpp: -------------------------------------------------------------------------------- 1 | #include "overlapanalysis.h" 2 | 3 | OverlapAnalysis::OverlapAnalysis(){ 4 | } 5 | 6 | 7 | OverlapAnalysis::~OverlapAnalysis(){ 8 | } 9 | 10 | OverlapResult OverlapAnalysis::analyze(Read* r1, Read* r2, int overlapDiffLimit, int overlapRequire, double diffPercentLimit) { 11 | return analyze(r1->mSeq, r2->mSeq, overlapDiffLimit, overlapRequire, diffPercentLimit); 12 | } 13 | 14 | // ported from the python code of AfterQC 15 | OverlapResult OverlapAnalysis::analyze(Sequence& r1, Sequence& r2, int diffLimit, int overlapRequire, double diffPercentLimit) { 16 | Sequence rcr2 = ~r2; 17 | int len1 = r1.length(); 18 | int len2 = rcr2.length(); 19 | // use the pointer directly for speed 20 | const char* str1 = r1.mStr.c_str(); 21 | const char* str2 = rcr2.mStr.c_str(); 22 | 23 | int complete_compare_require = 50; 24 | 25 | int overlap_len = 0; 26 | int offset = 0; 27 | int diff = 0; 28 | 29 | // forward 30 | // a match of less than overlapRequire is considered as unconfident 31 | while (offset < len1-overlapRequire) { 32 | // the overlap length of r1 & r2 when r2 is move right for offset 33 | overlap_len = min(len1 - offset, len2); 34 | int overlapDiffLimit = min(diffLimit, (int)(overlap_len * diffPercentLimit)); 35 | 36 | diff = 0; 37 | int i = 0; 38 | for (i=0; i overlapDiffLimit && i < complete_compare_require) 42 | break; 43 | } 44 | } 45 | 46 | if (diff <= overlapDiffLimit || (diff > overlapDiffLimit && i>complete_compare_require)){ 47 | OverlapResult ov; 48 | ov.overlapped = true; 49 | ov.offset = offset; 50 | ov.overlap_len = overlap_len; 51 | ov.diff = diff; 52 | return ov; 53 | } 54 | 55 | offset += 1; 56 | } 57 | 58 | 59 | // reverse 60 | // in this case, the adapter is sequenced since TEMPLATE_LEN < SEQ_LEN 61 | // check if distance can get smaller if offset goes negative 62 | // this only happens when insert DNA is shorter than sequencing read length, and some adapter/primer is sequenced but not trimmed cleanly 63 | // we go reversely 64 | offset = 0; 65 | while (offset > -(len2-overlapRequire)){ 66 | // the overlap length of r1 & r2 when r2 is move right for offset 67 | overlap_len = min(len1, len2- abs(offset)); 68 | int overlapDiffLimit = min(diffLimit, (int)(overlap_len * diffPercentLimit)); 69 | 70 | diff = 0; 71 | int i = 0; 72 | for (i=0; i overlapDiffLimit && i < complete_compare_require) 76 | break; 77 | } 78 | } 79 | 80 | if (diff <= overlapDiffLimit || (diff > overlapDiffLimit && i>complete_compare_require)){ 81 | OverlapResult ov; 82 | ov.overlapped = true; 83 | ov.offset = offset; 84 | ov.overlap_len = overlap_len; 85 | ov.diff = diff; 86 | return ov; 87 | } 88 | 89 | offset -= 1; 90 | } 91 | 92 | OverlapResult ov; 93 | ov.overlapped = false; 94 | ov.offset = ov.overlap_len = ov.diff = 0; 95 | return ov; 96 | } 97 | 98 | Read* OverlapAnalysis::merge(Read* r1, Read* r2, OverlapResult ov) { 99 | int ol = ov.overlap_len; 100 | if(!ov.overlapped) 101 | return NULL; 102 | 103 | int len1 = ol + max(0, ov.offset); 104 | int len2 = 0; 105 | if(ov.offset > 0) 106 | len2 = r2->length() - ol; 107 | 108 | Read* rr2 = r2->reverseComplement(); 109 | string mergedSeq = r1->mSeq.mStr.substr(0, len1); 110 | if(ov.offset > 0) { 111 | mergedSeq += rr2->mSeq.mStr.substr(ol, len2); 112 | } 113 | 114 | string mergedQual = r1->mQuality.substr(0, len1); 115 | if(ov.offset > 0) { 116 | mergedQual += rr2->mQuality.substr(ol, len2); 117 | } 118 | 119 | delete rr2; 120 | 121 | string name = r1->mName + " merged_" + to_string(len1) + "_" + to_string(len2); 122 | Read* mergedRead = new Read(name, mergedSeq, r1->mStrand, mergedQual); 123 | 124 | return mergedRead; 125 | } 126 | 127 | bool OverlapAnalysis::test(){ 128 | //Sequence r1("CAGCGCCTACGGGCCCCTTTTTCTGCGCGACCGCGTGGCTGTGGGCGCGGATGCCTTTGAGCGCGGTGACTTCTCACTGCGTATCGAGCCGCTGGAGGTCTCCC"); 129 | //Sequence r2("ACCTCCAGCGGCTCGATACGCAGTGAGAAGTCACCGCGCTCAAAGGCATCCGCGCCCACAGCCACGCGGTCGCGCAGAAAAAGGGGCCCGTAGGCGCGGCTCCC"); 130 | 131 | Sequence r1("CAGCGCCTACGGGCCCCTTTTTCTGCGCGACCGCGTGGCTGTGGGCGCGGATGCCTTTGAGCGCGGTGACTTCTCACTGCGTATCGAGC"); 132 | Sequence r2("ACCTCCAGCGGCTCGATACGCAGTGAGAAGTCACCGCGCTCAAAGGCATCCGCGCCCACAGCCACGCGGTCGCGCAGAAAAAGGGGTCC"); 133 | string qual1("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"); 134 | string qual2("#########################################################################################"); 135 | 136 | OverlapResult ov = OverlapAnalysis::analyze(r1, r2, 2, 30, 0.2); 137 | 138 | Read read1("name1", r1, "+", qual1); 139 | Read read2("name2", r2, "+", qual2); 140 | 141 | Read* mergedRead = OverlapAnalysis::merge(&read1, &read2, ov); 142 | mergedRead->print(); 143 | 144 | return ov.overlapped && ov.offset == 10 && ov.overlap_len == 79 && ov.diff == 1; 145 | } -------------------------------------------------------------------------------- /src/overlapanalysis.h: -------------------------------------------------------------------------------- 1 | #ifndef OVERLAP_ANALYSIS_H 2 | #define OVERLAP_ANALYSIS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "common.h" 10 | #include "options.h" 11 | #include "read.h" 12 | 13 | using namespace std; 14 | 15 | class OverlapResult { 16 | public: 17 | bool overlapped; 18 | int offset; 19 | int overlap_len; 20 | int diff; 21 | }; 22 | 23 | class OverlapAnalysis{ 24 | public: 25 | OverlapAnalysis(); 26 | ~OverlapAnalysis(); 27 | 28 | static OverlapResult analyze(Sequence& r1, Sequence& r2, int diffLimit, int overlapRequire, double diffPercentLimit); 29 | static OverlapResult analyze(Read* r1, Read* r2, int diffLimit, int overlapRequire, double diffPercentLimit); 30 | static Read* merge(Read* r1, Read* r2, OverlapResult ov); 31 | 32 | public: 33 | static bool test(); 34 | 35 | }; 36 | 37 | #endif -------------------------------------------------------------------------------- /src/peprocessor.h: -------------------------------------------------------------------------------- 1 | #ifndef PE_PROCESSOR_H 2 | #define PE_PROCESSOR_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "read.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "options.h" 13 | #include "threadconfig.h" 14 | #include "filter.h" 15 | #include "umiprocessor.h" 16 | #include "overlapanalysis.h" 17 | #include "writerthread.h" 18 | #include "duplicate.h" 19 | #include "virusdetector.h" 20 | 21 | 22 | using namespace std; 23 | 24 | struct ReadPairPack { 25 | ReadPair** data; 26 | int count; 27 | }; 28 | 29 | typedef struct ReadPairPack ReadPairPack; 30 | 31 | struct ReadPairRepository { 32 | ReadPairPack** packBuffer; 33 | atomic_long readPos; 34 | atomic_long writePos; 35 | //std::mutex mtx; 36 | //std::mutex readCounterMtx; 37 | //std::condition_variable repoNotFull; 38 | //std::condition_variable repoNotEmpty; 39 | }; 40 | 41 | typedef struct ReadPairRepository ReadPairRepository; 42 | 43 | class PairEndProcessor{ 44 | public: 45 | PairEndProcessor(Options* opt); 46 | ~PairEndProcessor(); 47 | bool process(); 48 | 49 | private: 50 | bool processPairEnd(ReadPairPack* pack, ThreadConfig* config); 51 | bool processRead(Read* r, ReadPair* originalRead, bool reversed); 52 | void initPackRepository(); 53 | void destroyPackRepository(); 54 | void producePack(ReadPairPack* pack); 55 | void consumePack(ThreadConfig* config); 56 | void producerTask(); 57 | void consumerTask(ThreadConfig* config); 58 | void initConfig(ThreadConfig* config); 59 | void initOutput(); 60 | void closeOutput(); 61 | void statInsertSize(Read* r1, Read* r2, OverlapResult& ov, int frontTrimmed1 = 0, int frontTrimmed2 = 0); 62 | int getPeakInsertSize(); 63 | void writeTask(WriterThread* config); 64 | 65 | private: 66 | ReadPairRepository mRepo; 67 | atomic_bool mProduceFinished; 68 | atomic_int mFinishedThreads; 69 | std::mutex mOutputMtx; 70 | std::mutex mInputMtx; 71 | Options* mOptions; 72 | Filter* mFilter; 73 | gzFile mZipFile1; 74 | gzFile mZipFile2; 75 | ofstream* mOutStream1; 76 | ofstream* mOutStream2; 77 | UmiProcessor* mUmiProcessor; 78 | long* mInsertSizeHist; 79 | WriterThread* mLeftWriter; 80 | WriterThread* mRightWriter; 81 | Duplicate* mDuplicate; 82 | VirusDetector* mVirusDetector; 83 | }; 84 | 85 | 86 | #endif -------------------------------------------------------------------------------- /src/polyx.cpp: -------------------------------------------------------------------------------- 1 | #include "polyx.h" 2 | #include "common.h" 3 | 4 | PolyX::PolyX(){ 5 | } 6 | 7 | 8 | PolyX::~PolyX(){ 9 | } 10 | 11 | void PolyX::trimPolyG(Read* r1, Read* r2, FilterResult* fr, int compareReq) { 12 | trimPolyG(r1, fr, compareReq); 13 | trimPolyG(r2, fr, compareReq); 14 | } 15 | 16 | void PolyX::trimPolyG(Read* r, FilterResult* fr, int compareReq) { 17 | const int allowOneMismatchForEach = 8; 18 | const int maxMismatch = 5; 19 | 20 | const char* data = r->mSeq.mStr.c_str(); 21 | 22 | int rlen = r->length(); 23 | 24 | int mismatch = 0; 25 | int i = 0; 26 | int firstGPos = rlen - 1; 27 | for(i=0; i< rlen; i++) { 28 | if(data[rlen - i - 1] != 'G') { 29 | mismatch++; 30 | } else { 31 | firstGPos = rlen - i -1; 32 | } 33 | 34 | int allowedMismatch = (i+1)/allowOneMismatchForEach; 35 | if(mismatch > maxMismatch || (mismatch>allowedMismatch && i>= compareReq-1) ) 36 | break; 37 | } 38 | 39 | if(i >= compareReq) { 40 | r->resize(firstGPos); 41 | } 42 | } 43 | 44 | void PolyX::trimPolyX(Read* r1, Read* r2, FilterResult* fr, int compareReq) { 45 | trimPolyX(r1, fr, compareReq); 46 | trimPolyX(r2, fr, compareReq); 47 | } 48 | 49 | void PolyX::trimPolyX(Read* r, FilterResult* fr, int compareReq) { 50 | const int allowOneMismatchForEach = 8; 51 | const int maxMismatch = 5; 52 | 53 | const char* data = r->mSeq.mStr.c_str(); 54 | 55 | int rlen = r->length(); 56 | 57 | 58 | int atcgNumbers[4] = {0, 0, 0, 0}; 59 | int pos = 0; 60 | for(pos=0; pos= allowOneMismatchForEach || pos+1 >= compareReq-1)) { 93 | break; 94 | } 95 | } 96 | 97 | // has polyX 98 | if(pos+1 >= compareReq) { 99 | // find the poly 100 | int poly; 101 | int maxCount = -1; 102 | for(int b=0; b<4; b++) { 103 | if(atcgNumbers[b] > maxCount){ 104 | maxCount = atcgNumbers[b]; 105 | poly = b; 106 | } 107 | } 108 | char polyBase = ATCG_BASES[poly]; 109 | while(data[rlen - pos - 1] != polyBase && pos>=0) 110 | pos--; 111 | 112 | r->resize(rlen - pos - 1); 113 | if(fr) 114 | fr->addPolyXTrimmed(poly, pos + 1); 115 | } 116 | } 117 | 118 | bool PolyX::test() { 119 | 120 | Read r("@name", 121 | "ATTTTAAAAAAAAAATAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAT", 122 | "+", 123 | "///EEEEEEEEEEEEEEEEEEEEEEEEEE////EEEEEEEEEEEEE////E////E"); 124 | 125 | FilterResult fr(NULL, false); 126 | PolyX::trimPolyX(&r, &fr, 10); 127 | r.print(); 128 | 129 | return r.mSeq.mStr == "ATTTT" && fr.getTotalPolyXTrimmedReads() == 1 && fr.getTotalPolyXTrimmedBases() == 51; 130 | } -------------------------------------------------------------------------------- /src/polyx.h: -------------------------------------------------------------------------------- 1 | #ifndef POLY_X_H 2 | #define POLY_X_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "overlapanalysis.h" 8 | #include "filterresult.h" 9 | #include "options.h" 10 | 11 | using namespace std; 12 | 13 | class PolyX{ 14 | public: 15 | PolyX(); 16 | ~PolyX(); 17 | 18 | static void trimPolyG(Read* r1, Read* r2, FilterResult* fr, int compareReq); 19 | static void trimPolyG(Read* r1, FilterResult* fr, int compareReq); 20 | static void trimPolyX(Read* r1, Read* r2, FilterResult* fr, int compareReq); 21 | static void trimPolyX(Read* r1, FilterResult* fr, int compareReq); 22 | static bool test(); 23 | 24 | 25 | }; 26 | 27 | 28 | #endif -------------------------------------------------------------------------------- /src/processor.cpp: -------------------------------------------------------------------------------- 1 | #include "processor.h" 2 | #include "peprocessor.h" 3 | #include "seprocessor.h" 4 | 5 | Processor::Processor(Options* opt){ 6 | mOptions = opt; 7 | } 8 | 9 | 10 | Processor::~Processor(){ 11 | } 12 | 13 | bool Processor::process() { 14 | if(mOptions->isPaired()) { 15 | PairEndProcessor p(mOptions); 16 | p.process(); 17 | } else { 18 | SingleEndProcessor p(mOptions); 19 | p.process(); 20 | } 21 | 22 | return true; 23 | } -------------------------------------------------------------------------------- /src/processor.h: -------------------------------------------------------------------------------- 1 | #ifndef PROCESSOR_H 2 | #define PROCESSOR_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "options.h" 8 | 9 | using namespace std; 10 | 11 | class Processor{ 12 | public: 13 | Processor(Options* opt); 14 | ~Processor(); 15 | bool process(); 16 | 17 | private: 18 | Options* mOptions; 19 | }; 20 | 21 | 22 | #endif -------------------------------------------------------------------------------- /src/read.cpp: -------------------------------------------------------------------------------- 1 | #include "read.h" 2 | #include 3 | #include "util.h" 4 | 5 | Read::Read(string name, string seq, string strand, string quality, bool phred64){ 6 | mName = name; 7 | mSeq = Sequence(seq); 8 | mStrand = strand; 9 | mQuality = quality; 10 | mHasQuality = true; 11 | if(phred64) 12 | convertPhred64To33(); 13 | } 14 | 15 | Read::Read(string name, string seq, string strand){ 16 | mName = name; 17 | mSeq = Sequence(seq); 18 | mStrand = strand; 19 | mHasQuality = false; 20 | } 21 | 22 | Read::Read(string name, Sequence seq, string strand, string quality, bool phred64){ 23 | mName = name; 24 | mSeq = seq; 25 | mStrand = strand; 26 | mQuality = quality; 27 | mHasQuality = true; 28 | if(phred64) 29 | convertPhred64To33(); 30 | } 31 | 32 | Read::Read(string name, Sequence seq, string strand){ 33 | mName = name; 34 | mSeq = seq; 35 | mStrand = strand; 36 | mHasQuality = false; 37 | } 38 | 39 | void Read::convertPhred64To33(){ 40 | for(int i=0; i length() || len<0) 79 | return ; 80 | mSeq.mStr.resize(len); 81 | mQuality.resize(len); 82 | } 83 | 84 | void Read::trimFront(int len){ 85 | len = min(length()-1, len); 86 | mSeq.mStr = mSeq.mStr.substr(len, mSeq.mStr.length() - len); 87 | mQuality = mQuality.substr(len, mQuality.length() - len); 88 | } 89 | 90 | string Read::lastIndex(){ 91 | int len = mName.length(); 92 | if(len<5) 93 | return ""; 94 | for(int i=len-3;i>=0;i--){ 95 | if(mName[i]==':' || mName[i]=='+'){ 96 | return mName.substr(i+1, len-i); 97 | } 98 | } 99 | return ""; 100 | } 101 | 102 | string Read::firstIndex(){ 103 | int len = mName.length(); 104 | int end = len; 105 | if(len<5) 106 | return ""; 107 | for(int i=len-3;i>=0;i--){ 108 | if(mName[i]=='+') 109 | end = i-1; 110 | if(mName[i]==':'){ 111 | return mName.substr(i+1, end-i); 112 | } 113 | } 114 | return ""; 115 | } 116 | 117 | int Read::lowQualCount(int qual){ 118 | int count = 0; 119 | for(int q=0;q Read::split(int segment) { 150 | vector ret; 151 | int splitted = 0; 152 | string name = mName; 153 | string strand = mStrand; 154 | while(splitted < length()) { 155 | int len = min(segment, length() - splitted); 156 | string seq = mSeq.mStr.substr(splitted, len); 157 | string quality = mQuality.substr(splitted, len); 158 | Read* r = new Read(name, seq, strand, quality); 159 | ret.push_back(r); 160 | splitted += len; 161 | } 162 | return ret; 163 | } 164 | 165 | bool Read::test(){ 166 | Read r("@NS500713:64:HFKJJBGXY:1:11101:20469:1097 1:N:0:TATAGCCT+GGTCCCGA", 167 | "CTCTTGGACTCTAACACTGTTTTTTCTTATGAAAACACAGGAGTGATGACTAGTTGAGTGCATTCTTATGAGACTCATAGTCATTCTATGATGTAGTTTTCCTTAGGAGGACATTTTTTACATGAAATTATTAACCTAAATAGAGTTGATC", 168 | "+", 169 | "AAAAA6EEEEEEEEEEEEEEEEE#EEEEEEEEEEEEEEEEE/EEEEEEEEEEEEEEEEAEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEreverseComplement(); 192 | int len1 = mLeft->length(); 193 | int len2 = rcRight->length(); 194 | // use the pointer directly for speed 195 | const char* str1 = mLeft->mSeq.mStr.c_str(); 196 | const char* str2 = rcRight->mSeq.mStr.c_str(); 197 | const char* qual1 = mLeft->mQuality.c_str(); 198 | const char* qual2 = rcRight->mQuality.c_str(); 199 | 200 | // we require at least 30 bp overlapping to merge a pair 201 | const int MIN_OVERLAP = 30; 202 | bool overlapped = false; 203 | int olen = MIN_OVERLAP; 204 | int diff = 0; 205 | // the diff count for 1 high qual + 1 low qual 206 | int lowQualDiff = 0; 207 | 208 | while(olen <= min(len1, len2)){ 209 | diff = 0; 210 | lowQualDiff = 0; 211 | bool ok = true; 212 | int offset = len1 - olen; 213 | for(int i=0;i= Q30 and the other is <= Q15 217 | if((qual1[offset+i]>='?' && qual2[i]<='0') || (qual1[offset+i]<='0' && qual2[i]>='?')){ 218 | lowQualDiff++; 219 | } 220 | // we disallow high quality diff, and only allow up to 3 low qual diff 221 | if(diff>lowQualDiff || lowQualDiff>=3){ 222 | ok = false; 223 | break; 224 | } 225 | } 226 | } 227 | if(ok){ 228 | overlapped = true; 229 | break; 230 | } 231 | olen++; 232 | } 233 | 234 | if(overlapped){ 235 | int offset = len1 - olen; 236 | stringstream ss; 237 | ss << mLeft->mName << " merged offset:" << offset << " overlap:" << olen << " diff:" << diff; 238 | string mergedName = ss.str(); 239 | string mergedSeq = mLeft->mSeq.mStr.substr(0, offset) + rcRight->mSeq.mStr; 240 | string mergedQual = mLeft->mQuality.substr(0, offset) + rcRight->mQuality; 241 | // quality adjuction and correction for low qual diff 242 | for(int i=0;i='?' && qual2[i]<='0'){ 245 | mergedSeq[offset+i] = str1[offset+i]; 246 | mergedQual[offset+i] = qual1[offset+i]; 247 | } else { 248 | mergedSeq[offset+i] = str2[i]; 249 | mergedQual[offset+i] = qual2[i]; 250 | } 251 | } else { 252 | // add the quality of the pair to make a high qual 253 | mergedQual[offset+i] = qual1[offset+i] + qual2[i] - 33; 254 | } 255 | } 256 | delete rcRight; 257 | return new Read(mergedName, mergedSeq, "+", mergedQual); 258 | } 259 | 260 | delete rcRight; 261 | return NULL; 262 | } 263 | 264 | bool ReadPair::test(){ 265 | Read* left = new Read("@NS500713:64:HFKJJBGXY:1:11101:20469:1097 1:N:0:TATAGCCT+GGTCCCGA", 266 | "TTTTTTCTCTTGGACTCTAACACTGTTTTTTCTTATGAAAACACAGGAGTGATGACTAGTTGAGTGCATTCTTATGAGACTCATAGTCATTCTATGATGTAG", 267 | "+", 268 | "AAAAA6EEEEEEEEEEEEEEEEE#EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE"); 269 | Read* right = new Read("@NS500713:64:HFKJJBGXY:1:11101:20469:1097 1:N:0:TATAGCCT+GGTCCCGA", 270 | "AAAAAACTACACCATAGAATGACTATGAGTCTCATAAGAATGCACTCAACTAGTCATCACTCCTGTGTTTTCATAAGAAAAAACAGTGTTAGAGTCCAAGAG", 271 | "+", 272 | "AAAAA6EEEEE/EEEEEEEEEEE#EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE"); 273 | 274 | ReadPair pair(left, right); 275 | Read* merged = pair.fastMerge(); 276 | if(merged == NULL) 277 | return false; 278 | 279 | if(merged->mSeq.mStr != "TTTTTTCTCTTGGACTCTAACACTGTTTTTTCTTATGAAAACACAGGAGTGATGACTAGTTGAGTGCATTCTTATGAGACTCATAGTCATTCTATGATGTAGTTTTTT") 280 | return false; 281 | 282 | return true; 283 | } 284 | -------------------------------------------------------------------------------- /src/read.h: -------------------------------------------------------------------------------- 1 | #ifndef READ_H 2 | #define READ_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "sequence.h" 10 | #include 11 | 12 | using namespace std; 13 | 14 | class Read{ 15 | public: 16 | Read(string name, string seq, string strand, string quality, bool phred64=false); 17 | Read(string name, Sequence seq, string strand, string quality, bool phred64=false); 18 | Read(string name, string seq, string strand); 19 | Read(string name, Sequence seq, string strand); 20 | Read(Read &r); 21 | void print(); 22 | void printFile(ofstream& file); 23 | Read* reverseComplement(); 24 | string firstIndex(); 25 | string lastIndex(); 26 | // default is Q20 27 | int lowQualCount(int qual=20); 28 | int length(); 29 | string toString(); 30 | string toStringWithTag(string tag); 31 | void resize(int len); 32 | void convertPhred64To33(); 33 | void trimFront(int len); 34 | bool fixMGI(); 35 | vector split(int segment); 36 | 37 | public: 38 | static bool test(); 39 | 40 | private: 41 | 42 | 43 | public: 44 | string mName; 45 | Sequence mSeq; 46 | string mStrand; 47 | string mQuality; 48 | bool mHasQuality; 49 | }; 50 | 51 | class ReadPair{ 52 | public: 53 | ReadPair(Read* left, Read* right); 54 | ~ReadPair(); 55 | 56 | // merge a pair, without consideration of seq error caused false INDEL 57 | Read* fastMerge(); 58 | public: 59 | Read* mLeft; 60 | Read* mRight; 61 | 62 | public: 63 | static bool test(); 64 | }; 65 | 66 | #endif -------------------------------------------------------------------------------- /src/seprocessor.h: -------------------------------------------------------------------------------- 1 | #ifndef SE_PROCESSOR_H 2 | #define SE_PROCESSOR_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "read.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "options.h" 13 | #include "threadconfig.h" 14 | #include "filter.h" 15 | #include "umiprocessor.h" 16 | #include "writerthread.h" 17 | #include "duplicate.h" 18 | #include "virusdetector.h" 19 | 20 | using namespace std; 21 | 22 | struct ReadPack { 23 | Read** data; 24 | int count; 25 | }; 26 | 27 | typedef struct ReadPack ReadPack; 28 | 29 | struct ReadRepository { 30 | ReadPack** packBuffer; 31 | atomic_long readPos; 32 | atomic_long writePos; 33 | //std::mutex mtx; 34 | //std::mutex readCounterMtx; 35 | //std::condition_variable repoNotFull; 36 | //std::condition_variable repoNotEmpty; 37 | }; 38 | 39 | typedef struct ReadRepository ReadRepository; 40 | 41 | class SingleEndProcessor{ 42 | public: 43 | SingleEndProcessor(Options* opt); 44 | ~SingleEndProcessor(); 45 | bool process(); 46 | 47 | private: 48 | bool processSingleEnd(ReadPack* pack, ThreadConfig* config); 49 | void initPackRepository(); 50 | void destroyPackRepository(); 51 | void producePack(ReadPack* pack); 52 | void consumePack(ThreadConfig* config); 53 | void producerTask(); 54 | void consumerTask(ThreadConfig* config); 55 | void initConfig(ThreadConfig* config); 56 | void initOutput(); 57 | void closeOutput(); 58 | void writeTask(WriterThread* config); 59 | 60 | private: 61 | Options* mOptions; 62 | ReadRepository mRepo; 63 | atomic_bool mProduceFinished; 64 | atomic_int mFinishedThreads; 65 | std::mutex mInputMtx; 66 | std::mutex mOutputMtx; 67 | Filter* mFilter; 68 | gzFile mZipFile; 69 | ofstream* mOutStream; 70 | UmiProcessor* mUmiProcessor; 71 | WriterThread* mLeftWriter; 72 | Duplicate* mDuplicate; 73 | VirusDetector* mVirusDetector; 74 | }; 75 | 76 | 77 | #endif -------------------------------------------------------------------------------- /src/sequence.cpp: -------------------------------------------------------------------------------- 1 | #include "sequence.h" 2 | 3 | Sequence::Sequence(){ 4 | } 5 | 6 | Sequence::Sequence(string seq){ 7 | mStr = seq; 8 | } 9 | 10 | void Sequence::print(){ 11 | std::cerr << mStr; 12 | } 13 | 14 | int Sequence::length(){ 15 | return mStr.length(); 16 | } 17 | 18 | Sequence Sequence::reverseComplement(){ 19 | string str(mStr.length(), 0); 20 | for(int c=0;c 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | class Sequence{ 12 | public: 13 | Sequence(); 14 | Sequence(string seq); 15 | void print(); 16 | int length(); 17 | Sequence reverseComplement(); 18 | 19 | Sequence operator~(); 20 | 21 | static bool test(); 22 | 23 | public: 24 | string mStr; 25 | }; 26 | 27 | #endif -------------------------------------------------------------------------------- /src/stats.h: -------------------------------------------------------------------------------- 1 | #ifndef STATS_H 2 | #define STATS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "read.h" 10 | #include "options.h" 11 | 12 | using namespace std; 13 | 14 | class Stats{ 15 | public: 16 | // this @guessedCycles parameter should be calculated using the first several records 17 | Stats(Options* opt, bool isRead2 = false, int guessedCycles = 0, int bufferMargin = 1024); 18 | ~Stats(); 19 | int getCycles(); 20 | long getReads(); 21 | long getBases(); 22 | long getQ20(); 23 | long getQ30(); 24 | long getGCNumber(); 25 | // by default the qualified qual score is Q20 ('5') 26 | void statRead(Read* r); 27 | 28 | static Stats* merge(vector& list); 29 | void print(); 30 | void summarize(bool forced = false); 31 | // a port of JSON report 32 | void reportJson(ofstream& ofs, string padding); 33 | // a port of HTML report 34 | void reportHtml(ofstream& ofs, string filteringType, string readName); 35 | void reportHtmlQuality(ofstream& ofs, string filteringType, string readName); 36 | void reportHtmlContents(ofstream& ofs, string filteringType, string readName); 37 | bool isLongRead(); 38 | int getMeanLength(); 39 | 40 | public: 41 | static string list2string(double* list, int size); 42 | static string list2string(double* list, int size, long* coords); 43 | static string list2string(long* list, int size); 44 | static int base2val(char base); 45 | 46 | private: 47 | void extendBuffer(int newBufLen); 48 | 49 | private: 50 | Options* mOptions; 51 | bool mIsRead2; 52 | long mReads; 53 | int mEvaluatedSeqLen; 54 | /* 55 | why we use 8 here? 56 | map A/T/C/G/N to 0~7 by their ASCII % 8: 57 | 'A' % 8 = 1 58 | 'T' % 8 = 4 59 | 'C' % 8 = 3 60 | 'G' % 8 = 7 61 | 'N' % 8 = 6 62 | */ 63 | long *mCycleQ30Bases[8]; 64 | long *mCycleQ20Bases[8]; 65 | long *mCycleBaseContents[8]; 66 | long *mCycleBaseQual[8]; 67 | long *mCycleTotalBase; 68 | long *mCycleTotalQual; 69 | long *mKmer; 70 | 71 | map mQualityCurves; 72 | map mContentCurves; 73 | 74 | 75 | int mCycles; 76 | int mBufLen; 77 | long mBases; 78 | long mQ20Bases[8]; 79 | long mQ30Bases[8]; 80 | long mBaseContents[8]; 81 | long mQ20Total; 82 | long mQ30Total; 83 | bool summarized; 84 | long mKmerMax; 85 | long mKmerMin; 86 | int mKmerBufLen; 87 | long mLengthSum; 88 | }; 89 | 90 | #endif -------------------------------------------------------------------------------- /src/threadconfig.cpp: -------------------------------------------------------------------------------- 1 | #include "threadconfig.h" 2 | #include "util.h" 3 | 4 | ThreadConfig::ThreadConfig(Options* opt, int threadId, bool paired){ 5 | mOptions = opt; 6 | mThreadId = threadId; 7 | mPreStats1 = new Stats(mOptions, false); 8 | mPostStats1 = new Stats(mOptions, false); 9 | if(paired){ 10 | mPreStats2 = new Stats(mOptions, true); 11 | mPostStats2 = new Stats(mOptions, true); 12 | } 13 | else { 14 | mPreStats2 = NULL; 15 | mPostStats2 = NULL; 16 | } 17 | mWriter1 = NULL; 18 | mWriter2 = NULL; 19 | 20 | mFilterResult = new FilterResult(opt, paired); 21 | mCanBeStopped = false; 22 | } 23 | 24 | ThreadConfig::~ThreadConfig() { 25 | cleanup(); 26 | } 27 | 28 | void ThreadConfig::cleanup() { 29 | deleteWriter(); 30 | } 31 | 32 | void ThreadConfig::deleteWriter() { 33 | if(mWriter1 != NULL) { 34 | delete mWriter1; 35 | mWriter1 = NULL; 36 | } 37 | if(mWriter2 != NULL) { 38 | delete mWriter2; 39 | mWriter2 = NULL; 40 | } 41 | } 42 | 43 | void ThreadConfig::initWriter(string filename1) { 44 | deleteWriter(); 45 | mWriter1 = new Writer(filename1, mOptions->compression); 46 | } 47 | 48 | void ThreadConfig::initWriter(string filename1, string filename2) { 49 | deleteWriter(); 50 | mWriter1 = new Writer(filename1, mOptions->compression); 51 | mWriter2 = new Writer(filename2, mOptions->compression); 52 | } 53 | 54 | void ThreadConfig::initWriter(ofstream* stream) { 55 | deleteWriter(); 56 | mWriter1 = new Writer(stream); 57 | } 58 | 59 | void ThreadConfig::initWriter(ofstream* stream1, ofstream* stream2) { 60 | deleteWriter(); 61 | mWriter1 = new Writer(stream1); 62 | mWriter2 = new Writer(stream2); 63 | } 64 | 65 | void ThreadConfig::initWriter(gzFile gzfile) { 66 | deleteWriter(); 67 | mWriter1 = new Writer(gzfile); 68 | } 69 | 70 | void ThreadConfig::initWriter(gzFile gzfile1, gzFile gzfile2) { 71 | deleteWriter(); 72 | mWriter1 = new Writer(gzfile1); 73 | mWriter2 = new Writer(gzfile2); 74 | } 75 | 76 | void ThreadConfig::addFilterResult(int result, int readNum) { 77 | mFilterResult->addFilterResult(result, readNum); 78 | } 79 | 80 | void ThreadConfig::addMergedPairs(int pairs) { 81 | mFilterResult->addMergedPairs(pairs); 82 | } 83 | 84 | void ThreadConfig::markProcessed(long readNum) { 85 | } 86 | 87 | bool ThreadConfig::canBeStopped() { 88 | return mCanBeStopped; 89 | } -------------------------------------------------------------------------------- /src/threadconfig.h: -------------------------------------------------------------------------------- 1 | #ifndef THREAD_CONFIG_H 2 | #define THREAD_CONFIG_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "stats.h" 9 | #include "writer.h" 10 | #include "options.h" 11 | #include "filterresult.h" 12 | 13 | using namespace std; 14 | 15 | class ThreadConfig{ 16 | public: 17 | ThreadConfig(Options* opt, int threadId, bool paired = false); 18 | ~ThreadConfig(); 19 | inline Stats* getPreStats1() {return mPreStats1;} 20 | inline Stats* getPostStats1() {return mPostStats1;} 21 | inline Stats* getPreStats2() {return mPreStats2;} 22 | inline Stats* getPostStats2() {return mPostStats2;} 23 | inline Writer* getWriter1() {return mWriter1;} 24 | inline Writer* getWriter2() {return mWriter2;} 25 | inline FilterResult* getFilterResult() {return mFilterResult;} 26 | 27 | void initWriter(string filename1); 28 | void initWriter(string filename1, string filename2); 29 | void initWriter(ofstream* stream); 30 | void initWriter(ofstream* stream1, ofstream* stream2); 31 | void initWriter(gzFile gzfile); 32 | void initWriter(gzFile gzfile1, gzFile gzfile2); 33 | 34 | void addFilterResult(int result, int readNum); 35 | void addMergedPairs(int pairs); 36 | 37 | int getThreadId() {return mThreadId;} 38 | // for splitting output 39 | // increase mCurrentSplitReads by readNum, and check it with options->split.size; 40 | void markProcessed(long readNum); 41 | bool canBeStopped(); 42 | void cleanup(); 43 | 44 | private: 45 | void deleteWriter(); 46 | 47 | private: 48 | Stats* mPreStats1; 49 | Stats* mPostStats1; 50 | Stats* mPreStats2; 51 | Stats* mPostStats2; 52 | Writer* mWriter1; 53 | Writer* mWriter2; 54 | Options* mOptions; 55 | FilterResult* mFilterResult; 56 | 57 | int mThreadId; 58 | bool mCanBeStopped; 59 | }; 60 | 61 | #endif -------------------------------------------------------------------------------- /src/umiprocessor.cpp: -------------------------------------------------------------------------------- 1 | #include "umiprocessor.h" 2 | 3 | UmiProcessor::UmiProcessor(Options* opt){ 4 | mOptions = opt; 5 | } 6 | 7 | 8 | UmiProcessor::~UmiProcessor(){ 9 | } 10 | 11 | void UmiProcessor::process(Read* r1, Read* r2) { 12 | if(!mOptions->umi.enabled) 13 | return; 14 | 15 | string umi; 16 | if(mOptions->umi.location == UMI_LOC_INDEX1) 17 | umi = r1->firstIndex(); 18 | else if(mOptions->umi.location == UMI_LOC_INDEX2 && r2) 19 | umi = r2->lastIndex(); 20 | else if(mOptions->umi.location == UMI_LOC_READ1){ 21 | umi = r1->mSeq.mStr.substr(0, min(r1->length(), mOptions->umi.length)); 22 | r1->trimFront(umi.length() + mOptions->umi.skip); 23 | } 24 | else if(mOptions->umi.location == UMI_LOC_READ2 && r2){ 25 | umi = r2->mSeq.mStr.substr(0, min(r2->length(), mOptions->umi.length)); 26 | r2->trimFront(umi.length() + mOptions->umi.skip); 27 | } 28 | else if(mOptions->umi.location == UMI_LOC_PER_INDEX){ 29 | string umiMerged = r1->firstIndex(); 30 | if(r2) { 31 | umiMerged = umiMerged + "_" + r2->lastIndex(); 32 | } 33 | 34 | addUmiToName(r1, umiMerged); 35 | if(r2) { 36 | addUmiToName(r2, umiMerged); 37 | } 38 | } 39 | else if(mOptions->umi.location == UMI_LOC_PER_READ){ 40 | string umi1 = r1->mSeq.mStr.substr(0, min(r1->length(), mOptions->umi.length)); 41 | string umiMerged = umi1; 42 | r1->trimFront(umi1.length() + mOptions->umi.skip); 43 | if(r2){ 44 | string umi2 = r2->mSeq.mStr.substr(0, min(r2->length(), mOptions->umi.length)); 45 | umiMerged = umiMerged + "_" + umi2; 46 | r2->trimFront(umi2.length() + mOptions->umi.skip); 47 | } 48 | 49 | addUmiToName(r1, umiMerged); 50 | if(r2){ 51 | addUmiToName(r2, umiMerged); 52 | } 53 | } 54 | 55 | if(mOptions->umi.location != UMI_LOC_PER_INDEX && mOptions->umi.location != UMI_LOC_PER_READ) { 56 | if(r1 && !umi.empty()) 57 | addUmiToName(r1, umi); 58 | if(r2 && !umi.empty()) 59 | addUmiToName(r2, umi); 60 | } 61 | } 62 | 63 | void UmiProcessor::addUmiToName(Read* r, string umi){ 64 | string tag; 65 | if(mOptions->umi.prefix.empty()) 66 | tag = ":" + umi; 67 | else 68 | tag = ":" + mOptions->umi.prefix + "_" + umi; 69 | int spacePos = -1; 70 | for(int i=0; imName.length(); i++) { 71 | if(r->mName[i] == ' ') { 72 | spacePos = i; 73 | break; 74 | } 75 | } 76 | if(spacePos == -1) { 77 | r->mName = r->mName + tag; 78 | } else { 79 | r->mName = r->mName.substr(0, spacePos) + tag + r->mName.substr(spacePos, r->mName.length() - spacePos); 80 | } 81 | 82 | } 83 | 84 | 85 | bool UmiProcessor::test() { 86 | return true; 87 | } -------------------------------------------------------------------------------- /src/umiprocessor.h: -------------------------------------------------------------------------------- 1 | #ifndef UMI_PROCESSOR_H 2 | #define UMI_PROCESSOR_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "options.h" 8 | #include "read.h" 9 | 10 | using namespace std; 11 | 12 | class UmiProcessor{ 13 | public: 14 | UmiProcessor(Options* opt); 15 | ~UmiProcessor(); 16 | void process(Read* r1, Read* r2 = NULL); 17 | void addUmiToName(Read* r, string umi); 18 | static bool test(); 19 | 20 | private: 21 | Options* mOptions; 22 | }; 23 | 24 | 25 | #endif -------------------------------------------------------------------------------- /src/unittest.cpp: -------------------------------------------------------------------------------- 1 | #include "unittest.h" 2 | #include "sequence.h" 3 | #include "fastqreader.h" 4 | #include "read.h" 5 | #include "overlapanalysis.h" 6 | #include "filter.h" 7 | #include "adaptertrimmer.h" 8 | #include "basecorrector.h" 9 | #include "polyx.h" 10 | #include "nucleotidetree.h" 11 | #include "evaluator.h" 12 | #include 13 | 14 | UnitTest::UnitTest(){ 15 | 16 | } 17 | 18 | void UnitTest::run(){ 19 | bool passed = true; 20 | passed &= report(Sequence::test(), "Sequence::test"); 21 | passed &= report(Read::test(), "Read::test"); 22 | passed &= report(OverlapAnalysis::test(), "OverlapAnalysis::test"); 23 | passed &= report(Filter::test(), "Filter::test"); 24 | passed &= report(AdapterTrimmer::test(), "AdapterTrimmer::test"); 25 | passed &= report(BaseCorrector::test(), "BaseCorrector::test"); 26 | passed &= report(PolyX::test(), "PolyX::test"); 27 | passed &= report(NucleotideTree::test(), "NucleotideTree::test"); 28 | passed &= report(Evaluator::test(), "Evaluator::test"); 29 | printf("\n==========================\n"); 30 | printf("%s\n\n", passed?"ALL PASSED":"FAILED"); 31 | } 32 | 33 | bool UnitTest::report(bool result, string message) { 34 | printf("%s:%s\n\n", message.c_str(), result?" PASSED":" FAILED"); 35 | return result; 36 | } -------------------------------------------------------------------------------- /src/unittest.h: -------------------------------------------------------------------------------- 1 | #ifndef UNIT_TEST_H 2 | #define UNIT_TEST_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | class UnitTest{ 11 | public: 12 | UnitTest(); 13 | void run(); 14 | bool report(bool result, string message); 15 | }; 16 | 17 | #endif -------------------------------------------------------------------------------- /src/util.h: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_H 2 | #define UTIL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | using namespace std; 14 | 15 | inline char complement(char base) { 16 | switch(base){ 17 | case 'A': 18 | case 'a': 19 | return 'T'; 20 | case 'T': 21 | case 't': 22 | return 'A'; 23 | case 'C': 24 | case 'c': 25 | return 'G'; 26 | case 'G': 27 | case 'g': 28 | return 'C'; 29 | default: 30 | return 'N'; 31 | } 32 | } 33 | 34 | inline bool starts_with( string const & value, string const & starting) 35 | { 36 | if (starting.size() > value.size()) return false; 37 | return equal(starting.begin(), starting.end(), value.begin()); 38 | } 39 | 40 | inline bool ends_with( string const & value, string const & ending) 41 | { 42 | if (ending.size() > value.size()) return false; 43 | return equal(ending.rbegin(), ending.rend(), value.rbegin()); 44 | } 45 | 46 | inline string trim(const string& str) 47 | { 48 | string::size_type pos = str.find_first_not_of(' '); 49 | if (pos == string::npos) 50 | { 51 | return string(""); 52 | } 53 | string::size_type pos2 = str.find_last_not_of(' '); 54 | if (pos2 != string::npos) 55 | { 56 | return str.substr(pos, pos2 - pos + 1); 57 | } 58 | return str.substr(pos); 59 | } 60 | 61 | inline int split(const string& str, vector& ret_, string sep = ",") 62 | { 63 | if (str.empty()) 64 | { 65 | return 0; 66 | } 67 | 68 | string tmp; 69 | string::size_type pos_begin = str.find_first_not_of(sep); 70 | string::size_type comma_pos = 0; 71 | 72 | while (pos_begin != string::npos) 73 | { 74 | comma_pos = str.find(sep, pos_begin); 75 | if (comma_pos != string::npos) 76 | { 77 | tmp = str.substr(pos_begin, comma_pos - pos_begin); 78 | pos_begin = comma_pos + sep.length(); 79 | } 80 | else 81 | { 82 | tmp = str.substr(pos_begin); 83 | pos_begin = comma_pos; 84 | } 85 | 86 | ret_.push_back(tmp); 87 | tmp.clear(); 88 | } 89 | return 0; 90 | } 91 | 92 | inline string replace(const string& str, const string& src, const string& dest) 93 | { 94 | string ret; 95 | 96 | string::size_type pos_begin = 0; 97 | string::size_type pos = str.find(src); 98 | while (pos != string::npos) 99 | { 100 | ret.append(str.data() + pos_begin, pos - pos_begin); 101 | ret += dest; 102 | pos_begin = pos + 1; 103 | pos = str.find(src, pos_begin); 104 | } 105 | if (pos_begin < str.length()) 106 | { 107 | ret.append(str.begin() + pos_begin, str.end()); 108 | } 109 | return ret; 110 | } 111 | 112 | inline string reverse(const string& str) { 113 | string ret(str.length(), 0); 114 | for(int pos=0; pos 0) { 151 | struct stat status; 152 | int result = stat( s.c_str(), &status ); 153 | if(result == 0) { 154 | exists = true; 155 | } 156 | } 157 | return exists; 158 | } 159 | 160 | 161 | // check if a string is a directory 162 | inline bool is_directory(const string& path) 163 | { 164 | bool isdir = false; 165 | struct stat status; 166 | // visual studion use _S_IFDIR instead of S_IFDIR 167 | // http://msdn.microsoft.com/en-us/library/14h5k7ff.aspx 168 | #ifdef _MSC_VER 169 | #define S_IFDIR _S_IFDIR 170 | #endif 171 | stat( path.c_str(), &status ); 172 | if ( status.st_mode & S_IFDIR ) { 173 | isdir = true; 174 | } 175 | // #endif 176 | return isdir; 177 | } 178 | 179 | inline void check_file_valid(const string& s) { 180 | if(!file_exists(s)){ 181 | cerr << "ERROR: file '" << s << "' doesn't exist, quit now" << endl; 182 | exit(-1); 183 | } 184 | if(is_directory(s)){ 185 | cerr << "ERROR: '" << s << "' is a folder, not a file, quit now" << endl; 186 | exit(-1); 187 | } 188 | } 189 | 190 | inline void check_file_writable(const string& s) { 191 | string dir = dirname(s); 192 | if(!file_exists(dir)) { 193 | cerr << "ERROR: '" << dir << " doesn't exist. Create this folder and run this command again." << endl; 194 | exit(-1); 195 | } 196 | if(is_directory(s)){ 197 | cerr << "ERROR: '" << s << "' is not a writable file, quit now" << endl; 198 | exit(-1); 199 | } 200 | } 201 | 202 | // Remove non alphabetic characters from a string 203 | inline string str_keep_alpha(const string& s) 204 | { 205 | string new_str; 206 | for( size_t it =0; it < s.size(); it++) { 207 | if( isalpha(s[it]) ) { 208 | new_str += s[it]; 209 | } 210 | } 211 | return new_str; 212 | } 213 | 214 | 215 | // Remove invalid sequence characters from a string 216 | inline void str_keep_valid_sequence( string& s, bool forceUpperCase = false) 217 | { 218 | size_t total = 0; 219 | const char case_gap = 'a' - 'A'; 220 | for( size_t it =0; it < s.size(); it++) { 221 | char c = s[it]; 222 | if(forceUpperCase && c>='a' && c<='z') { 223 | c -= case_gap; 224 | } 225 | if( isalpha(c) || c == '-' || c == '*' ) { 226 | s[total] = c; 227 | total ++; 228 | } 229 | } 230 | 231 | s.resize(total); 232 | } 233 | 234 | inline int find_with_right_pos(const string& str, const string& pattern, int start=0) { 235 | int pos = str.find(pattern, start); 236 | if (pos < 0) 237 | return -1; 238 | else 239 | return pos + pattern.length(); 240 | } 241 | 242 | inline void str2upper(string& s){ 243 | transform(s.begin(), s.end(), s.begin(), (int (*)(int))toupper); 244 | } 245 | 246 | inline void str2lower(string& s){ 247 | transform(s.begin(), s.end(), s.begin(), (int (*)(int))tolower); 248 | } 249 | 250 | inline char num2qual(int num) { 251 | if(num > 127 - 33) 252 | num = 127 - 33; 253 | if(num < 0) 254 | num = 0; 255 | 256 | char c = num + 33; 257 | return c; 258 | } 259 | 260 | inline void error_exit(const string& msg) { 261 | cerr << "ERROR: " << msg << endl; 262 | exit(-1); 263 | } 264 | 265 | extern mutex logmtx; 266 | inline void loginfo(const string s){ 267 | logmtx.lock(); 268 | time_t tt = time(NULL); 269 | tm* t= localtime(&tt); 270 | cerr<<"["<tm_hour<<":"<tm_min<<":"<tm_sec<<"] "<kmerFile.empty()) 8 | mKmer = new Kmer(mOptions->kmerFile, opt); 9 | 10 | mKmerCollection = NULL; 11 | if(!mOptions->kmerCollectionFile.empty()) 12 | mKmerCollection = new KmerCollection(mOptions->kmerCollectionFile, opt); 13 | 14 | // no KMER file, the kmerKeyLen is not intialized 15 | if(mOptions->kmerKeyLen == 0) 16 | mOptions->kmerKeyLen = 25; 17 | mGenomes = NULL; 18 | if(!mOptions->genomeFile.empty()) 19 | mGenomes = new Genomes(mOptions->genomeFile, opt); 20 | mHits = 0; 21 | } 22 | 23 | VirusDetector::~VirusDetector(){ 24 | if(mKmer) { 25 | delete mKmer; 26 | mKmer = NULL; 27 | } 28 | if(mKmerCollection) { 29 | delete mKmerCollection; 30 | mKmerCollection = NULL; 31 | } 32 | if(mGenomes) { 33 | delete mGenomes; 34 | mGenomes = NULL; 35 | } 36 | } 37 | 38 | void VirusDetector::report() { 39 | if(mKmer) { 40 | cerr << "Coverage for target unique KMER file:"<report(); 42 | } 43 | if(mKmerCollection) { 44 | cerr << endl << "Detection result for provided KMER collection:"<report(); 46 | } 47 | if(mGenomes) { 48 | //mGenomes->report(); 49 | } 50 | } 51 | 52 | bool VirusDetector::detect(Read* r) { 53 | if(r->length() >= mOptions->longReadThreshold) { 54 | // long reads, split it 55 | vector reads = r->split(mOptions->segmentLength); 56 | bool detected = false; 57 | for(int i=0; imSeq.mStr; 66 | Sequence rSequence = ~(r->mSeq); 67 | string& rseq = rSequence.mStr; 68 | 69 | return scan(seq) | scan(rseq); 70 | } 71 | 72 | bool VirusDetector::scan(string& seq) { 73 | int hitCount = 0; 74 | 75 | int keylen = mOptions->kmerKeyLen; 76 | int blankBits = 64 - 2*keylen; 77 | 78 | bool onlyHitOneGenome = true; 79 | uint32 lastGenomeID = 0; 80 | 81 | if(seq.length() < keylen) 82 | return false; 83 | 84 | bool valid = true; 85 | bool needAlignment = false; 86 | 87 | uint32 start = 0; 88 | uint64 key = Kmer::seq2uint64(seq, start, keylen-1, valid); 89 | while(valid == false) { 90 | start++; 91 | key = Kmer::seq2uint64(seq, start, keylen-1, valid); 92 | // reach the tail 93 | if(start >= seq.length() - keylen) 94 | return false; 95 | } 96 | for(uint32 pos = start; pos < seq.length() - keylen; pos++) { 97 | key = (key << 2); 98 | switch(seq[pos + keylen-1]) { 99 | case 'A': 100 | key += 0; 101 | break; 102 | case 'T': 103 | key += 1; 104 | break; 105 | case 'C': 106 | key += 2; 107 | break; 108 | case 'G': 109 | key += 3; 110 | break; 111 | case 'N': 112 | default: 113 | // we have to skip the segments covering this N 114 | if(pos >= seq.length() - keylen) 115 | continue; 116 | pos++; 117 | key = Kmer::seq2uint64(seq, pos, keylen-1, valid); 118 | bool outterBreak = false; 119 | while(valid == false) { 120 | pos++; 121 | key = Kmer::seq2uint64(seq, pos, keylen-1, valid); 122 | // reach the tail 123 | if(pos >= seq.length() - keylen) { 124 | outterBreak = true; 125 | break; 126 | } 127 | } 128 | if(outterBreak) 129 | break; 130 | 131 | continue; 132 | } 133 | key = (key << blankBits) >> blankBits; 134 | 135 | // add to genome stats 136 | if(!needAlignment && mGenomes && mGenomes->hasKey(key)) { 137 | needAlignment = true; 138 | if(!mKmer) 139 | break; 140 | } 141 | 142 | // add to Kmer stas 143 | if(mKmer) { 144 | bool hit = mKmer->add(key); 145 | if(hit) 146 | hitCount++; 147 | } 148 | 149 | if(mKmerCollection) { 150 | uint32 gid = mKmerCollection->add(key); 151 | if(gid > 0) { 152 | if(lastGenomeID!=0 && gid!=lastGenomeID) 153 | onlyHitOneGenome = false; 154 | lastGenomeID = gid; 155 | } 156 | } 157 | } 158 | 159 | if(mKmerCollection && onlyHitOneGenome && lastGenomeID>0) 160 | mKmerCollection->addGenomeRead(lastGenomeID); 161 | 162 | bool wellMapped = false; 163 | if(needAlignment && mGenomes) 164 | wellMapped = mGenomes->align(seq); 165 | 166 | return hitCount>0 || wellMapped; 167 | } -------------------------------------------------------------------------------- /src/virusdetector.h: -------------------------------------------------------------------------------- 1 | #ifndef VIRUSDETECTOR_H 2 | #define VIRUSDETECTOR_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "options.h" 9 | #include "read.h" 10 | #include "kmer.h" 11 | #include "genomes.h" 12 | #include "kmercollection.h" 13 | 14 | using namespace std; 15 | 16 | class VirusDetector{ 17 | public: 18 | VirusDetector(Options* opt); 19 | ~VirusDetector(); 20 | bool detect(Read* r); 21 | bool scan(string& seq); 22 | void report(); 23 | 24 | Kmer* getKmer() {return mKmer;} 25 | Genomes* getGenomes() {return mGenomes;} 26 | KmerCollection* getKmerCollection() {return mKmerCollection;} 27 | 28 | 29 | private: 30 | Options* mOptions; 31 | Genomes* mGenomes; 32 | Kmer* mKmer; 33 | KmerCollection* mKmerCollection; 34 | uint64 mHits; 35 | }; 36 | 37 | 38 | #endif -------------------------------------------------------------------------------- /src/writer.cpp: -------------------------------------------------------------------------------- 1 | #include "writer.h" 2 | #include "util.h" 3 | #include "fastqreader.h" 4 | #include 5 | 6 | Writer::Writer(string filename, int compression){ 7 | mCompression = compression; 8 | mFilename = filename; 9 | mZipFile = NULL; 10 | mZipped = false; 11 | haveToClose = true; 12 | init(); 13 | } 14 | 15 | Writer::Writer(ofstream* stream) { 16 | mZipFile = NULL; 17 | mZipped = false; 18 | mOutStream = stream; 19 | haveToClose = false; 20 | } 21 | 22 | Writer::Writer(gzFile gzfile) { 23 | mOutStream = NULL; 24 | mZipFile = gzfile; 25 | mZipped = true; 26 | haveToClose = false; 27 | } 28 | 29 | Writer::~Writer(){ 30 | if(haveToClose) { 31 | close(); 32 | } 33 | } 34 | 35 | string Writer::filename(){ 36 | return mFilename; 37 | } 38 | 39 | void Writer::init(){ 40 | if (ends_with(mFilename, ".gz")){ 41 | mZipFile = gzopen(mFilename.c_str(), "w"); 42 | gzsetparams(mZipFile, mCompression, Z_DEFAULT_STRATEGY); 43 | gzbuffer(mZipFile, 1024*1024); 44 | mZipped = true; 45 | } 46 | else { 47 | mOutStream = new ofstream(); 48 | mOutStream->open(mFilename.c_str(), ifstream::out); 49 | mZipped = false; 50 | } 51 | } 52 | 53 | bool Writer::writeLine(string& linestr){ 54 | const char* line = linestr.c_str(); 55 | size_t size = linestr.length(); 56 | size_t written; 57 | bool status; 58 | if(mZipped){ 59 | written = gzwrite(mZipFile, line, size); 60 | gzputc(mZipFile, '\n'); 61 | status = size == written; 62 | } 63 | else{ 64 | mOutStream->write(line, size); 65 | mOutStream->put('\n'); 66 | status = !mOutStream->fail(); 67 | } 68 | 69 | return status; 70 | } 71 | 72 | bool Writer::writeString(string& str){ 73 | const char* strdata = str.c_str(); 74 | size_t size = str.length(); 75 | size_t written; 76 | bool status; 77 | if(mZipped){ 78 | written = gzwrite(mZipFile, strdata, size); 79 | status = size == written; 80 | } 81 | else{ 82 | mOutStream->write(strdata, size); 83 | status = !mOutStream->fail(); 84 | } 85 | 86 | return status; 87 | } 88 | 89 | bool Writer::write(char* strdata, size_t size) { 90 | size_t written; 91 | bool status; 92 | 93 | if(mZipped){ 94 | written = gzwrite(mZipFile, strdata, size); 95 | status = size == written; 96 | } 97 | else{ 98 | mOutStream->write(strdata, size); 99 | status = !mOutStream->fail(); 100 | } 101 | return status; 102 | } 103 | 104 | void Writer::close(){ 105 | if (mZipped){ 106 | if (mZipFile){ 107 | gzflush(mZipFile, Z_FINISH); 108 | gzclose(mZipFile); 109 | mZipFile = NULL; 110 | } 111 | } 112 | else if(mOutStream) { 113 | if (mOutStream->is_open()){ 114 | mOutStream->flush(); 115 | //TODO: following two lines will cause crash 116 | //mOutStream->close(); 117 | //delete mOutStream; 118 | mOutStream = NULL; 119 | } 120 | } 121 | } 122 | 123 | bool Writer::isZipped(){ 124 | return mZipped; 125 | } -------------------------------------------------------------------------------- /src/writer.h: -------------------------------------------------------------------------------- 1 | #ifndef _WRITER_H 2 | #define _WRITER_H 3 | 4 | #include 5 | #include 6 | #ifdef DYNAMIC_ZLIB 7 | #include 8 | #else 9 | #include "zlib/zlib.h" 10 | #endif 11 | #include "common.h" 12 | #include 13 | #include 14 | 15 | using namespace std; 16 | 17 | class Writer{ 18 | public: 19 | Writer(string filename, int compression = 3); 20 | Writer(ofstream* stream); 21 | Writer(gzFile gzfile); 22 | ~Writer(); 23 | bool isZipped(); 24 | bool writeString(string& s); 25 | bool writeLine(string& linestr); 26 | bool write(char* strdata, size_t size); 27 | string filename(); 28 | 29 | public: 30 | static bool test(); 31 | 32 | private: 33 | void init(); 34 | void close(); 35 | 36 | private: 37 | string mFilename; 38 | gzFile mZipFile; 39 | ofstream* mOutStream; 40 | bool mZipped; 41 | int mCompression; 42 | bool haveToClose; 43 | }; 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /src/writerthread.cpp: -------------------------------------------------------------------------------- 1 | #include "writerthread.h" 2 | #include "util.h" 3 | #include 4 | #include 5 | 6 | WriterThread::WriterThread(Options* opt, string filename){ 7 | mOptions = opt; 8 | 9 | mWriter1 = NULL; 10 | 11 | mInputCounter = 0; 12 | mOutputCounter = 0; 13 | mInputCompleted = false; 14 | mFilename = filename; 15 | 16 | mRingBuffer = new char*[PACK_NUM_LIMIT]; 17 | memset(mRingBuffer, 0, sizeof(char*) * PACK_NUM_LIMIT); 18 | mRingBufferSizes = new size_t[PACK_NUM_LIMIT]; 19 | memset(mRingBufferSizes, 0, sizeof(size_t) * PACK_NUM_LIMIT); 20 | initWriter(filename); 21 | } 22 | 23 | WriterThread::~WriterThread() { 24 | cleanup(); 25 | delete mRingBuffer; 26 | } 27 | 28 | bool WriterThread::isCompleted() 29 | { 30 | return mInputCompleted && (mOutputCounter == mInputCounter); 31 | } 32 | 33 | bool WriterThread::setInputCompleted() { 34 | mInputCompleted = true; 35 | return true; 36 | } 37 | 38 | void WriterThread::output(){ 39 | if(mOutputCounter >= mInputCounter) { 40 | usleep(100); 41 | } 42 | while( mOutputCounter < mInputCounter) 43 | { 44 | mWriter1->write(mRingBuffer[mOutputCounter], mRingBufferSizes[mOutputCounter]); 45 | delete mRingBuffer[mOutputCounter]; 46 | mRingBuffer[mOutputCounter] = NULL; 47 | mOutputCounter++; 48 | } 49 | } 50 | 51 | void WriterThread::input(char* data, size_t size){ 52 | mRingBuffer[mInputCounter] = data; 53 | mRingBufferSizes[mInputCounter] = size; 54 | mInputCounter++; 55 | } 56 | 57 | void WriterThread::cleanup() { 58 | deleteWriter(); 59 | } 60 | 61 | void WriterThread::deleteWriter() { 62 | if(mWriter1 != NULL) { 63 | delete mWriter1; 64 | mWriter1 = NULL; 65 | } 66 | } 67 | 68 | void WriterThread::initWriter(string filename1) { 69 | deleteWriter(); 70 | mWriter1 = new Writer(filename1, mOptions->compression); 71 | } 72 | 73 | void WriterThread::initWriter(ofstream* stream) { 74 | deleteWriter(); 75 | mWriter1 = new Writer(stream); 76 | } 77 | 78 | void WriterThread::initWriter(gzFile gzfile) { 79 | deleteWriter(); 80 | mWriter1 = new Writer(gzfile); 81 | } 82 | 83 | long WriterThread::bufferLength(){ 84 | return mInputCounter - mOutputCounter; 85 | } 86 | -------------------------------------------------------------------------------- /src/writerthread.h: -------------------------------------------------------------------------------- 1 | #ifndef WRITER_THREAD_H 2 | #define WRITER_THREAD_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "writer.h" 9 | #include "options.h" 10 | #include 11 | #include 12 | 13 | using namespace std; 14 | 15 | class WriterThread{ 16 | public: 17 | WriterThread(Options* opt, string filename); 18 | ~WriterThread(); 19 | 20 | void initWriter(string filename1); 21 | void initWriter(ofstream* stream); 22 | void initWriter(gzFile gzfile); 23 | 24 | void cleanup(); 25 | 26 | bool isCompleted(); 27 | void output(); 28 | void input(char* data, size_t size); 29 | bool setInputCompleted(); 30 | 31 | long bufferLength(); 32 | string getFilename() {return mFilename;} 33 | 34 | private: 35 | void deleteWriter(); 36 | 37 | private: 38 | Writer* mWriter1; 39 | Options* mOptions; 40 | string mFilename; 41 | 42 | // for spliting output 43 | bool mInputCompleted; 44 | atomic_long mInputCounter; 45 | atomic_long mOutputCounter; 46 | char** mRingBuffer; 47 | size_t* mRingBufferSizes; 48 | 49 | mutex mtx; 50 | 51 | }; 52 | 53 | #endif -------------------------------------------------------------------------------- /src/zlib/deflate.h: -------------------------------------------------------------------------------- 1 | /* deflate.h -- internal compression state 2 | * Copyright (C) 1995-2012 Jean-loup Gailly 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | /* WARNING: this file should *not* be used by applications. It is 7 | part of the implementation of the compression library and is 8 | subject to change. Applications should only use zlib.h. 9 | */ 10 | 11 | /* @(#) $Id$ */ 12 | 13 | #ifndef DEFLATE_H 14 | #define DEFLATE_H 15 | 16 | #include "zutil.h" 17 | 18 | /* define NO_GZIP when compiling if you want to disable gzip header and 19 | trailer creation by deflate(). NO_GZIP would be used to avoid linking in 20 | the crc code when it is not needed. For shared libraries, gzip encoding 21 | should be left enabled. */ 22 | #ifndef NO_GZIP 23 | # define GZIP 24 | #endif 25 | 26 | /* =========================================================================== 27 | * Internal compression state. 28 | */ 29 | 30 | #define LENGTH_CODES 29 31 | /* number of length codes, not counting the special END_BLOCK code */ 32 | 33 | #define LITERALS 256 34 | /* number of literal bytes 0..255 */ 35 | 36 | #define L_CODES (LITERALS+1+LENGTH_CODES) 37 | /* number of Literal or Length codes, including the END_BLOCK code */ 38 | 39 | #define D_CODES 30 40 | /* number of distance codes */ 41 | 42 | #define BL_CODES 19 43 | /* number of codes used to transfer the bit lengths */ 44 | 45 | #define HEAP_SIZE (2*L_CODES+1) 46 | /* maximum heap size */ 47 | 48 | #define MAX_BITS 15 49 | /* All codes must not exceed MAX_BITS bits */ 50 | 51 | #define Buf_size 16 52 | /* size of bit buffer in bi_buf */ 53 | 54 | #define INIT_STATE 42 55 | #define EXTRA_STATE 69 56 | #define NAME_STATE 73 57 | #define COMMENT_STATE 91 58 | #define HCRC_STATE 103 59 | #define BUSY_STATE 113 60 | #define FINISH_STATE 666 61 | /* Stream status */ 62 | 63 | 64 | /* Data structure describing a single value and its code string. */ 65 | typedef struct ct_data_s { 66 | union { 67 | ush freq; /* frequency count */ 68 | ush code; /* bit string */ 69 | } fc; 70 | union { 71 | ush dad; /* father node in Huffman tree */ 72 | ush len; /* length of bit string */ 73 | } dl; 74 | } FAR ct_data; 75 | 76 | #define Freq fc.freq 77 | #define Code fc.code 78 | #define Dad dl.dad 79 | #define Len dl.len 80 | 81 | typedef struct static_tree_desc_s static_tree_desc; 82 | 83 | typedef struct tree_desc_s { 84 | ct_data *dyn_tree; /* the dynamic tree */ 85 | int max_code; /* largest code with non zero frequency */ 86 | static_tree_desc *stat_desc; /* the corresponding static tree */ 87 | } FAR tree_desc; 88 | 89 | typedef ush Pos; 90 | typedef Pos FAR Posf; 91 | typedef unsigned IPos; 92 | 93 | /* A Pos is an index in the character window. We use short instead of int to 94 | * save space in the various tables. IPos is used only for parameter passing. 95 | */ 96 | 97 | typedef struct internal_state { 98 | z_streamp strm; /* pointer back to this zlib stream */ 99 | int status; /* as the name implies */ 100 | Bytef *pending_buf; /* output still pending */ 101 | ulg pending_buf_size; /* size of pending_buf */ 102 | Bytef *pending_out; /* next pending byte to output to the stream */ 103 | uInt pending; /* nb of bytes in the pending buffer */ 104 | int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ 105 | gz_headerp gzhead; /* gzip header information to write */ 106 | uInt gzindex; /* where in extra, name, or comment */ 107 | Byte method; /* can only be DEFLATED */ 108 | int last_flush; /* value of flush param for previous deflate call */ 109 | 110 | /* used by deflate.c: */ 111 | 112 | uInt w_size; /* LZ77 window size (32K by default) */ 113 | uInt w_bits; /* log2(w_size) (8..16) */ 114 | uInt w_mask; /* w_size - 1 */ 115 | 116 | Bytef *window; 117 | /* Sliding window. Input bytes are read into the second half of the window, 118 | * and move to the first half later to keep a dictionary of at least wSize 119 | * bytes. With this organization, matches are limited to a distance of 120 | * wSize-MAX_MATCH bytes, but this ensures that IO is always 121 | * performed with a length multiple of the block size. Also, it limits 122 | * the window size to 64K, which is quite useful on MSDOS. 123 | * To do: use the user input buffer as sliding window. 124 | */ 125 | 126 | ulg window_size; 127 | /* Actual size of window: 2*wSize, except when the user input buffer 128 | * is directly used as sliding window. 129 | */ 130 | 131 | Posf *prev; 132 | /* Link to older string with same hash index. To limit the size of this 133 | * array to 64K, this link is maintained only for the last 32K strings. 134 | * An index in this array is thus a window index modulo 32K. 135 | */ 136 | 137 | Posf *head; /* Heads of the hash chains or NIL. */ 138 | 139 | uInt ins_h; /* hash index of string to be inserted */ 140 | uInt hash_size; /* number of elements in hash table */ 141 | uInt hash_bits; /* log2(hash_size) */ 142 | uInt hash_mask; /* hash_size-1 */ 143 | 144 | uInt hash_shift; 145 | /* Number of bits by which ins_h must be shifted at each input 146 | * step. It must be such that after MIN_MATCH steps, the oldest 147 | * byte no longer takes part in the hash key, that is: 148 | * hash_shift * MIN_MATCH >= hash_bits 149 | */ 150 | 151 | long block_start; 152 | /* Window position at the beginning of the current output block. Gets 153 | * negative when the window is moved backwards. 154 | */ 155 | 156 | uInt match_length; /* length of best match */ 157 | IPos prev_match; /* previous match */ 158 | int match_available; /* set if previous match exists */ 159 | uInt strstart; /* start of string to insert */ 160 | uInt match_start; /* start of matching string */ 161 | uInt lookahead; /* number of valid bytes ahead in window */ 162 | 163 | uInt prev_length; 164 | /* Length of the best match at previous step. Matches not greater than this 165 | * are discarded. This is used in the lazy match evaluation. 166 | */ 167 | 168 | uInt max_chain_length; 169 | /* To speed up deflation, hash chains are never searched beyond this 170 | * length. A higher limit improves compression ratio but degrades the 171 | * speed. 172 | */ 173 | 174 | uInt max_lazy_match; 175 | /* Attempt to find a better match only when the current match is strictly 176 | * smaller than this value. This mechanism is used only for compression 177 | * levels >= 4. 178 | */ 179 | # define max_insert_length max_lazy_match 180 | /* Insert new strings in the hash table only if the match length is not 181 | * greater than this length. This saves time but degrades compression. 182 | * max_insert_length is used only for compression levels <= 3. 183 | */ 184 | 185 | int level; /* compression level (1..9) */ 186 | int strategy; /* favor or force Huffman coding*/ 187 | 188 | uInt good_match; 189 | /* Use a faster search when the previous match is longer than this */ 190 | 191 | int nice_match; /* Stop searching when current match exceeds this */ 192 | 193 | /* used by trees.c: */ 194 | /* Didn't use ct_data typedef below to suppress compiler warning */ 195 | struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ 196 | struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ 197 | struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ 198 | 199 | struct tree_desc_s l_desc; /* desc. for literal tree */ 200 | struct tree_desc_s d_desc; /* desc. for distance tree */ 201 | struct tree_desc_s bl_desc; /* desc. for bit length tree */ 202 | 203 | ush bl_count[MAX_BITS+1]; 204 | /* number of codes at each bit length for an optimal tree */ 205 | 206 | int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ 207 | int heap_len; /* number of elements in the heap */ 208 | int heap_max; /* element of largest frequency */ 209 | /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. 210 | * The same heap array is used to build all trees. 211 | */ 212 | 213 | uch depth[2*L_CODES+1]; 214 | /* Depth of each subtree used as tie breaker for trees of equal frequency 215 | */ 216 | 217 | uchf *l_buf; /* buffer for literals or lengths */ 218 | 219 | uInt lit_bufsize; 220 | /* Size of match buffer for literals/lengths. There are 4 reasons for 221 | * limiting lit_bufsize to 64K: 222 | * - frequencies can be kept in 16 bit counters 223 | * - if compression is not successful for the first block, all input 224 | * data is still in the window so we can still emit a stored block even 225 | * when input comes from standard input. (This can also be done for 226 | * all blocks if lit_bufsize is not greater than 32K.) 227 | * - if compression is not successful for a file smaller than 64K, we can 228 | * even emit a stored file instead of a stored block (saving 5 bytes). 229 | * This is applicable only for zip (not gzip or zlib). 230 | * - creating new Huffman trees less frequently may not provide fast 231 | * adaptation to changes in the input data statistics. (Take for 232 | * example a binary file with poorly compressible code followed by 233 | * a highly compressible string table.) Smaller buffer sizes give 234 | * fast adaptation but have of course the overhead of transmitting 235 | * trees more frequently. 236 | * - I can't count above 4 237 | */ 238 | 239 | uInt last_lit; /* running index in l_buf */ 240 | 241 | ushf *d_buf; 242 | /* Buffer for distances. To simplify the code, d_buf and l_buf have 243 | * the same number of elements. To use different lengths, an extra flag 244 | * array would be necessary. 245 | */ 246 | 247 | ulg opt_len; /* bit length of current block with optimal trees */ 248 | ulg static_len; /* bit length of current block with static trees */ 249 | uInt matches; /* number of string matches in current block */ 250 | uInt insert; /* bytes at end of window left to insert */ 251 | 252 | #ifdef DEBUG 253 | ulg compressed_len; /* total bit length of compressed file mod 2^32 */ 254 | ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ 255 | #endif 256 | 257 | ush bi_buf; 258 | /* Output buffer. bits are inserted starting at the bottom (least 259 | * significant bits). 260 | */ 261 | int bi_valid; 262 | /* Number of valid bits in bi_buf. All bits above the last valid bit 263 | * are always zero. 264 | */ 265 | 266 | ulg high_water; 267 | /* High water mark offset in window for initialized bytes -- bytes above 268 | * this are set to zero in order to avoid memory check warnings when 269 | * longest match routines access bytes past the input. This is then 270 | * updated to the new high water mark. 271 | */ 272 | 273 | } FAR deflate_state; 274 | 275 | /* Output a byte on the stream. 276 | * IN assertion: there is enough room in pending_buf. 277 | */ 278 | #define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} 279 | 280 | 281 | #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) 282 | /* Minimum amount of lookahead, except at the end of the input file. 283 | * See deflate.c for comments about the MIN_MATCH+1. 284 | */ 285 | 286 | #define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) 287 | /* In order to simplify the code, particularly on 16 bit machines, match 288 | * distances are limited to MAX_DIST instead of WSIZE. 289 | */ 290 | 291 | #define WIN_INIT MAX_MATCH 292 | /* Number of bytes after end of data in window to initialize in order to avoid 293 | memory checker errors from longest match routines */ 294 | 295 | /* in trees.c */ 296 | void ZLIB_INTERNAL _tr_init OF((deflate_state *s)); 297 | int ZLIB_INTERNAL _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); 298 | void ZLIB_INTERNAL _tr_flush_block OF((deflate_state *s, charf *buf, 299 | ulg stored_len, int last)); 300 | void ZLIB_INTERNAL _tr_flush_bits OF((deflate_state *s)); 301 | void ZLIB_INTERNAL _tr_align OF((deflate_state *s)); 302 | void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf, 303 | ulg stored_len, int last)); 304 | 305 | #define d_code(dist) \ 306 | ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) 307 | /* Mapping from a distance to a distance code. dist is the distance - 1 and 308 | * must not have side effects. _dist_code[256] and _dist_code[257] are never 309 | * used. 310 | */ 311 | 312 | #ifndef DEBUG 313 | /* Inline versions of _tr_tally for speed: */ 314 | 315 | #if defined(GEN_TREES_H) || !defined(STDC) 316 | extern uch ZLIB_INTERNAL _length_code[]; 317 | extern uch ZLIB_INTERNAL _dist_code[]; 318 | #else 319 | extern const uch ZLIB_INTERNAL _length_code[]; 320 | extern const uch ZLIB_INTERNAL _dist_code[]; 321 | #endif 322 | 323 | # define _tr_tally_lit(s, c, flush) \ 324 | { uch cc = (c); \ 325 | s->d_buf[s->last_lit] = 0; \ 326 | s->l_buf[s->last_lit++] = cc; \ 327 | s->dyn_ltree[cc].Freq++; \ 328 | flush = (s->last_lit == s->lit_bufsize-1); \ 329 | } 330 | # define _tr_tally_dist(s, distance, length, flush) \ 331 | { uch len = (length); \ 332 | ush dist = (distance); \ 333 | s->d_buf[s->last_lit] = dist; \ 334 | s->l_buf[s->last_lit++] = len; \ 335 | dist--; \ 336 | s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ 337 | s->dyn_dtree[d_code(dist)].Freq++; \ 338 | flush = (s->last_lit == s->lit_bufsize-1); \ 339 | } 340 | #else 341 | # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) 342 | # define _tr_tally_dist(s, distance, length, flush) \ 343 | flush = _tr_tally(s, distance, length) 344 | #endif 345 | 346 | #endif /* DEFLATE_H */ 347 | -------------------------------------------------------------------------------- /src/zlib/gzguts.h: -------------------------------------------------------------------------------- 1 | /* gzguts.h -- zlib internal header definitions for gz* operations 2 | * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | #ifdef _LARGEFILE64_SOURCE 7 | # ifndef _LARGEFILE_SOURCE 8 | # define _LARGEFILE_SOURCE 1 9 | # endif 10 | # ifdef _FILE_OFFSET_BITS 11 | # undef _FILE_OFFSET_BITS 12 | # endif 13 | #endif 14 | 15 | #ifdef HAVE_HIDDEN 16 | # define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) 17 | #else 18 | # define ZLIB_INTERNAL 19 | #endif 20 | 21 | #include 22 | #include "zlib.h" 23 | #ifdef STDC 24 | # include 25 | # include 26 | # include 27 | #endif 28 | #include 29 | 30 | #ifdef _WIN32 31 | # include 32 | #endif 33 | 34 | #if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32) 35 | # include 36 | #endif 37 | 38 | #ifdef WINAPI_FAMILY 39 | # define open _open 40 | # define read _read 41 | # define write _write 42 | # define close _close 43 | #endif 44 | 45 | #ifdef NO_DEFLATE /* for compatibility with old definition */ 46 | # define NO_GZCOMPRESS 47 | #endif 48 | 49 | #if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) 50 | # ifndef HAVE_VSNPRINTF 51 | # define HAVE_VSNPRINTF 52 | # endif 53 | #endif 54 | 55 | #if defined(__CYGWIN__) 56 | # ifndef HAVE_VSNPRINTF 57 | # define HAVE_VSNPRINTF 58 | # endif 59 | #endif 60 | 61 | #if defined(MSDOS) && defined(__BORLANDC__) && (BORLANDC > 0x410) 62 | # ifndef HAVE_VSNPRINTF 63 | # define HAVE_VSNPRINTF 64 | # endif 65 | #endif 66 | 67 | #ifndef HAVE_VSNPRINTF 68 | # ifdef MSDOS 69 | /* vsnprintf may exist on some MS-DOS compilers (DJGPP?), 70 | but for now we just assume it doesn't. */ 71 | # define NO_vsnprintf 72 | # endif 73 | # ifdef __TURBOC__ 74 | # define NO_vsnprintf 75 | # endif 76 | # ifdef WIN32 77 | /* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ 78 | # if !defined(vsnprintf) && !defined(NO_vsnprintf) 79 | # if !defined(_MSC_VER) || ( defined(_MSC_VER) && _MSC_VER < 1500 ) 80 | # define vsnprintf _vsnprintf 81 | # endif 82 | # endif 83 | # endif 84 | # ifdef __SASC 85 | # define NO_vsnprintf 86 | # endif 87 | # ifdef VMS 88 | # define NO_vsnprintf 89 | # endif 90 | # ifdef __OS400__ 91 | # define NO_vsnprintf 92 | # endif 93 | # ifdef __MVS__ 94 | # define NO_vsnprintf 95 | # endif 96 | #endif 97 | 98 | /* unlike snprintf (which is required in C99, yet still not supported by 99 | Microsoft more than a decade later!), _snprintf does not guarantee null 100 | termination of the result -- however this is only used in gzlib.c where 101 | the result is assured to fit in the space provided */ 102 | #ifdef _MSC_VER 103 | # define snprintf _snprintf 104 | #endif 105 | 106 | #ifndef local 107 | # define local static 108 | #endif 109 | /* compile with -Dlocal if your debugger can't find static symbols */ 110 | 111 | /* gz* functions always use library allocation functions */ 112 | #ifndef STDC 113 | extern voidp malloc OF((uInt size)); 114 | extern void free OF((voidpf ptr)); 115 | #endif 116 | 117 | /* get errno and strerror definition */ 118 | #if defined UNDER_CE 119 | # include 120 | # define zstrerror() gz_strwinerror((DWORD)GetLastError()) 121 | #else 122 | # ifndef NO_STRERROR 123 | # include 124 | # define zstrerror() strerror(errno) 125 | # else 126 | # define zstrerror() "stdio error (consult errno)" 127 | # endif 128 | #endif 129 | 130 | /* provide prototypes for these when building zlib without LFS */ 131 | #if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0 132 | ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); 133 | ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); 134 | ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); 135 | ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); 136 | #endif 137 | 138 | /* default memLevel */ 139 | #if MAX_MEM_LEVEL >= 8 140 | # define DEF_MEM_LEVEL 8 141 | #else 142 | # define DEF_MEM_LEVEL MAX_MEM_LEVEL 143 | #endif 144 | 145 | /* default i/o buffer size -- double this for output when reading (this and 146 | twice this must be able to fit in an unsigned type) */ 147 | #define GZBUFSIZE 8192 148 | 149 | /* gzip modes, also provide a little integrity check on the passed structure */ 150 | #define GZ_NONE 0 151 | #define GZ_READ 7247 152 | #define GZ_WRITE 31153 153 | #define GZ_APPEND 1 /* mode set to GZ_WRITE after the file is opened */ 154 | 155 | /* values for gz_state how */ 156 | #define LOOK 0 /* look for a gzip header */ 157 | #define COPY 1 /* copy input directly */ 158 | #define GZIP 2 /* decompress a gzip stream */ 159 | 160 | /* internal gzip file state data structure */ 161 | typedef struct { 162 | /* exposed contents for gzgetc() macro */ 163 | struct gzFile_s x; /* "x" for exposed */ 164 | /* x.have: number of bytes available at x.next */ 165 | /* x.next: next output data to deliver or write */ 166 | /* x.pos: current position in uncompressed data */ 167 | /* used for both reading and writing */ 168 | int mode; /* see gzip modes above */ 169 | int fd; /* file descriptor */ 170 | char *path; /* path or fd for error messages */ 171 | unsigned size; /* buffer size, zero if not allocated yet */ 172 | unsigned want; /* requested buffer size, default is GZBUFSIZE */ 173 | unsigned char *in; /* input buffer */ 174 | unsigned char *out; /* output buffer (double-sized when reading) */ 175 | int direct; /* 0 if processing gzip, 1 if transparent */ 176 | /* just for reading */ 177 | int how; /* 0: get header, 1: copy, 2: decompress */ 178 | z_off64_t start; /* where the gzip data started, for rewinding */ 179 | int eof; /* true if end of input file reached */ 180 | int past; /* true if read requested past end */ 181 | /* just for writing */ 182 | int level; /* compression level */ 183 | int strategy; /* compression strategy */ 184 | /* seek request */ 185 | z_off64_t skip; /* amount to skip (already rewound if backwards) */ 186 | int seek; /* true if seek request pending */ 187 | /* error information */ 188 | int err; /* error code */ 189 | char *msg; /* error message */ 190 | /* zlib inflate or deflate stream */ 191 | z_stream strm; /* stream structure in-place (not a pointer) */ 192 | } gz_state; 193 | typedef gz_state FAR *gz_statep; 194 | 195 | /* shared functions */ 196 | void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char *)); 197 | #if defined UNDER_CE 198 | char ZLIB_INTERNAL *gz_strwinerror OF((DWORD error)); 199 | #endif 200 | 201 | /* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t 202 | value -- needed when comparing unsigned to z_off64_t, which is signed 203 | (possible z_off64_t types off_t, off64_t, and long are all signed) */ 204 | #ifdef INT_MAX 205 | # define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX) 206 | #else 207 | unsigned ZLIB_INTERNAL gz_intmax OF((void)); 208 | # define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax()) 209 | #endif 210 | -------------------------------------------------------------------------------- /src/zlib/inffast.h: -------------------------------------------------------------------------------- 1 | /* inffast.h -- header to use inffast.c 2 | * Copyright (C) 1995-2003, 2010 Mark Adler 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | /* WARNING: this file should *not* be used by applications. It is 7 | part of the implementation of the compression library and is 8 | subject to change. Applications should only use zlib.h. 9 | */ 10 | 11 | void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start)); 12 | -------------------------------------------------------------------------------- /src/zlib/inffixed.h: -------------------------------------------------------------------------------- 1 | /* inffixed.h -- table for decoding fixed codes 2 | * Generated automatically by makefixed(). 3 | */ 4 | 5 | /* WARNING: this file should *not* be used by applications. 6 | It is part of the implementation of this library and is 7 | subject to change. Applications should only use zlib.h. 8 | */ 9 | 10 | static const code lenfix[512] = { 11 | {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, 12 | {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, 13 | {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, 14 | {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176}, 15 | {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20}, 16 | {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100}, 17 | {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8}, 18 | {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216}, 19 | {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76}, 20 | {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114}, 21 | {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2}, 22 | {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148}, 23 | {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42}, 24 | {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86}, 25 | {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15}, 26 | {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236}, 27 | {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62}, 28 | {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142}, 29 | {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31}, 30 | {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162}, 31 | {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25}, 32 | {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105}, 33 | {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4}, 34 | {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202}, 35 | {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69}, 36 | {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125}, 37 | {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13}, 38 | {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195}, 39 | {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35}, 40 | {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91}, 41 | {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19}, 42 | {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246}, 43 | {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55}, 44 | {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135}, 45 | {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99}, 46 | {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190}, 47 | {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16}, 48 | {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96}, 49 | {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6}, 50 | {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209}, 51 | {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72}, 52 | {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116}, 53 | {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4}, 54 | {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153}, 55 | {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44}, 56 | {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82}, 57 | {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11}, 58 | {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229}, 59 | {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58}, 60 | {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138}, 61 | {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51}, 62 | {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173}, 63 | {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30}, 64 | {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110}, 65 | {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0}, 66 | {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195}, 67 | {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65}, 68 | {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121}, 69 | {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9}, 70 | {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258}, 71 | {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37}, 72 | {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93}, 73 | {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23}, 74 | {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251}, 75 | {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51}, 76 | {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131}, 77 | {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67}, 78 | {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183}, 79 | {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23}, 80 | {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103}, 81 | {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9}, 82 | {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223}, 83 | {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79}, 84 | {0,9,255} 85 | }; 86 | 87 | static const code distfix[32] = { 88 | {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, 89 | {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, 90 | {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, 91 | {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577}, 92 | {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073}, 93 | {22,5,193},{64,5,0} 94 | }; 95 | -------------------------------------------------------------------------------- /src/zlib/inflate.h: -------------------------------------------------------------------------------- 1 | /* inflate.h -- internal inflate state definition 2 | * Copyright (C) 1995-2009 Mark Adler 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | /* WARNING: this file should *not* be used by applications. It is 7 | part of the implementation of the compression library and is 8 | subject to change. Applications should only use zlib.h. 9 | */ 10 | 11 | /* define NO_GZIP when compiling if you want to disable gzip header and 12 | trailer decoding by inflate(). NO_GZIP would be used to avoid linking in 13 | the crc code when it is not needed. For shared libraries, gzip decoding 14 | should be left enabled. */ 15 | #ifndef NO_GZIP 16 | # define GUNZIP 17 | #endif 18 | 19 | /* Possible inflate modes between inflate() calls */ 20 | typedef enum { 21 | HEAD, /* i: waiting for magic header */ 22 | FLAGS, /* i: waiting for method and flags (gzip) */ 23 | TIME, /* i: waiting for modification time (gzip) */ 24 | OS, /* i: waiting for extra flags and operating system (gzip) */ 25 | EXLEN, /* i: waiting for extra length (gzip) */ 26 | EXTRA, /* i: waiting for extra bytes (gzip) */ 27 | NAME, /* i: waiting for end of file name (gzip) */ 28 | COMMENT, /* i: waiting for end of comment (gzip) */ 29 | HCRC, /* i: waiting for header crc (gzip) */ 30 | DICTID, /* i: waiting for dictionary check value */ 31 | DICT, /* waiting for inflateSetDictionary() call */ 32 | TYPE, /* i: waiting for type bits, including last-flag bit */ 33 | TYPEDO, /* i: same, but skip check to exit inflate on new block */ 34 | STORED, /* i: waiting for stored size (length and complement) */ 35 | COPY_, /* i/o: same as COPY below, but only first time in */ 36 | COPY, /* i/o: waiting for input or output to copy stored block */ 37 | TABLE, /* i: waiting for dynamic block table lengths */ 38 | LENLENS, /* i: waiting for code length code lengths */ 39 | CODELENS, /* i: waiting for length/lit and distance code lengths */ 40 | LEN_, /* i: same as LEN below, but only first time in */ 41 | LEN, /* i: waiting for length/lit/eob code */ 42 | LENEXT, /* i: waiting for length extra bits */ 43 | DIST, /* i: waiting for distance code */ 44 | DISTEXT, /* i: waiting for distance extra bits */ 45 | MATCH, /* o: waiting for output space to copy string */ 46 | LIT, /* o: waiting for output space to write literal */ 47 | CHECK, /* i: waiting for 32-bit check value */ 48 | LENGTH, /* i: waiting for 32-bit length (gzip) */ 49 | DONE, /* finished check, done -- remain here until reset */ 50 | BAD, /* got a data error -- remain here until reset */ 51 | MEM, /* got an inflate() memory error -- remain here until reset */ 52 | SYNC /* looking for synchronization bytes to restart inflate() */ 53 | } inflate_mode; 54 | 55 | /* 56 | State transitions between above modes - 57 | 58 | (most modes can go to BAD or MEM on error -- not shown for clarity) 59 | 60 | Process header: 61 | HEAD -> (gzip) or (zlib) or (raw) 62 | (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT -> 63 | HCRC -> TYPE 64 | (zlib) -> DICTID or TYPE 65 | DICTID -> DICT -> TYPE 66 | (raw) -> TYPEDO 67 | Read deflate blocks: 68 | TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK 69 | STORED -> COPY_ -> COPY -> TYPE 70 | TABLE -> LENLENS -> CODELENS -> LEN_ 71 | LEN_ -> LEN 72 | Read deflate codes in fixed or dynamic block: 73 | LEN -> LENEXT or LIT or TYPE 74 | LENEXT -> DIST -> DISTEXT -> MATCH -> LEN 75 | LIT -> LEN 76 | Process trailer: 77 | CHECK -> LENGTH -> DONE 78 | */ 79 | 80 | /* state maintained between inflate() calls. Approximately 10K bytes. */ 81 | struct inflate_state { 82 | inflate_mode mode; /* current inflate mode */ 83 | int last; /* true if processing last block */ 84 | int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ 85 | int havedict; /* true if dictionary provided */ 86 | int flags; /* gzip header method and flags (0 if zlib) */ 87 | unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ 88 | unsigned long check; /* protected copy of check value */ 89 | unsigned long total; /* protected copy of output count */ 90 | gz_headerp head; /* where to save gzip header information */ 91 | /* sliding window */ 92 | unsigned wbits; /* log base 2 of requested window size */ 93 | unsigned wsize; /* window size or zero if not using window */ 94 | unsigned whave; /* valid bytes in the window */ 95 | unsigned wnext; /* window write index */ 96 | unsigned char FAR *window; /* allocated sliding window, if needed */ 97 | /* bit accumulator */ 98 | unsigned long hold; /* input bit accumulator */ 99 | unsigned bits; /* number of bits in "in" */ 100 | /* for string and stored block copying */ 101 | unsigned length; /* literal or length of data to copy */ 102 | unsigned offset; /* distance back to copy string from */ 103 | /* for table and code decoding */ 104 | unsigned extra; /* extra bits needed */ 105 | /* fixed and dynamic code tables */ 106 | code const FAR *lencode; /* starting table for length/literal codes */ 107 | code const FAR *distcode; /* starting table for distance codes */ 108 | unsigned lenbits; /* index bits for lencode */ 109 | unsigned distbits; /* index bits for distcode */ 110 | /* dynamic table building */ 111 | unsigned ncode; /* number of code length code lengths */ 112 | unsigned nlen; /* number of length code lengths */ 113 | unsigned ndist; /* number of distance code lengths */ 114 | unsigned have; /* number of code lengths in lens[] */ 115 | code FAR *next; /* next available space in codes[] */ 116 | unsigned short lens[320]; /* temporary storage for code lengths */ 117 | unsigned short work[288]; /* work area for code table building */ 118 | code codes[ENOUGH]; /* space for code tables */ 119 | int sane; /* if false, allow invalid distance too far */ 120 | int back; /* bits back of last unprocessed length/lit */ 121 | unsigned was; /* initial length of match */ 122 | }; 123 | -------------------------------------------------------------------------------- /src/zlib/inftrees.h: -------------------------------------------------------------------------------- 1 | /* inftrees.h -- header to use inftrees.c 2 | * Copyright (C) 1995-2005, 2010 Mark Adler 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | /* WARNING: this file should *not* be used by applications. It is 7 | part of the implementation of the compression library and is 8 | subject to change. Applications should only use zlib.h. 9 | */ 10 | 11 | /* Structure for decoding tables. Each entry provides either the 12 | information needed to do the operation requested by the code that 13 | indexed that table entry, or it provides a pointer to another 14 | table that indexes more bits of the code. op indicates whether 15 | the entry is a pointer to another table, a literal, a length or 16 | distance, an end-of-block, or an invalid code. For a table 17 | pointer, the low four bits of op is the number of index bits of 18 | that table. For a length or distance, the low four bits of op 19 | is the number of extra bits to get after the code. bits is 20 | the number of bits in this code or part of the code to drop off 21 | of the bit buffer. val is the actual byte to output in the case 22 | of a literal, the base length or distance, or the offset from 23 | the current table to the next table. Each entry is four bytes. */ 24 | typedef struct { 25 | unsigned char op; /* operation, extra bits, table bits */ 26 | unsigned char bits; /* bits in this part of the code */ 27 | unsigned short val; /* offset in table or code value */ 28 | } code; 29 | 30 | /* op values as set by inflate_table(): 31 | 00000000 - literal 32 | 0000tttt - table link, tttt != 0 is the number of table index bits 33 | 0001eeee - length or distance, eeee is the number of extra bits 34 | 01100000 - end of block 35 | 01000000 - invalid code 36 | */ 37 | 38 | /* Maximum size of the dynamic table. The maximum number of code structures is 39 | 1444, which is the sum of 852 for literal/length codes and 592 for distance 40 | codes. These values were found by exhaustive searches using the program 41 | examples/enough.c found in the zlib distribtution. The arguments to that 42 | program are the number of symbols, the initial root table size, and the 43 | maximum bit length of a code. "enough 286 9 15" for literal/length codes 44 | returns returns 852, and "enough 30 6 15" for distance codes returns 592. 45 | The initial root table size (9 or 6) is found in the fifth argument of the 46 | inflate_table() calls in inflate.c and infback.c. If the root table size is 47 | changed, then these maximum sizes would be need to be recalculated and 48 | updated. */ 49 | #define ENOUGH_LENS 852 50 | #define ENOUGH_DISTS 592 51 | #define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS) 52 | 53 | /* Type of code to build for inflate_table() */ 54 | typedef enum { 55 | CODES, 56 | LENS, 57 | DISTS 58 | } codetype; 59 | 60 | int ZLIB_INTERNAL inflate_table OF((codetype type, unsigned short FAR *lens, 61 | unsigned codes, code FAR * FAR *table, 62 | unsigned FAR *bits, unsigned short FAR *work)); 63 | -------------------------------------------------------------------------------- /src/zlib/trees.h: -------------------------------------------------------------------------------- 1 | /* header created automatically with -DGEN_TREES_H */ 2 | 3 | local const ct_data static_ltree[L_CODES+2] = { 4 | {{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}}, 5 | {{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}}, 6 | {{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}}, 7 | {{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}}, 8 | {{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}}, 9 | {{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}}, 10 | {{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}}, 11 | {{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}}, 12 | {{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}}, 13 | {{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}}, 14 | {{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}}, 15 | {{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}}, 16 | {{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}}, 17 | {{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}}, 18 | {{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}}, 19 | {{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}}, 20 | {{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}}, 21 | {{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}}, 22 | {{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}}, 23 | {{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}}, 24 | {{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}}, 25 | {{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}}, 26 | {{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}}, 27 | {{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}}, 28 | {{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}}, 29 | {{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}}, 30 | {{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}}, 31 | {{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}}, 32 | {{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}}, 33 | {{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}}, 34 | {{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}}, 35 | {{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}}, 36 | {{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}}, 37 | {{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}}, 38 | {{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}}, 39 | {{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}}, 40 | {{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}}, 41 | {{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}}, 42 | {{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}}, 43 | {{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}}, 44 | {{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}}, 45 | {{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}}, 46 | {{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}}, 47 | {{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}}, 48 | {{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}}, 49 | {{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}}, 50 | {{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}}, 51 | {{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}}, 52 | {{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}}, 53 | {{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}}, 54 | {{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}}, 55 | {{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}}, 56 | {{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}}, 57 | {{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}}, 58 | {{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}}, 59 | {{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}}, 60 | {{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}}, 61 | {{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}} 62 | }; 63 | 64 | local const ct_data static_dtree[D_CODES] = { 65 | {{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}}, 66 | {{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}}, 67 | {{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}}, 68 | {{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}}, 69 | {{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}}, 70 | {{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}} 71 | }; 72 | 73 | const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = { 74 | 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 75 | 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 76 | 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 77 | 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 78 | 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 79 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 80 | 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 81 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 82 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 83 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 84 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 85 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 86 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, 87 | 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 88 | 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 89 | 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 90 | 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 91 | 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 92 | 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 93 | 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 94 | 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 95 | 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 96 | 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 97 | 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 98 | 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 99 | 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 100 | }; 101 | 102 | const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= { 103 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, 104 | 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 105 | 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 106 | 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 107 | 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 108 | 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 109 | 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 110 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 111 | 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 112 | 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 113 | 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 114 | 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 115 | 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28 116 | }; 117 | 118 | local const int base_length[LENGTH_CODES] = { 119 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 120 | 64, 80, 96, 112, 128, 160, 192, 224, 0 121 | }; 122 | 123 | local const int base_dist[D_CODES] = { 124 | 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 125 | 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 126 | 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576 127 | }; 128 | 129 | -------------------------------------------------------------------------------- /src/zlib/zutil.h: -------------------------------------------------------------------------------- 1 | /* zutil.h -- internal interface and configuration of the compression library 2 | * Copyright (C) 1995-2013 Jean-loup Gailly. 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | /* WARNING: this file should *not* be used by applications. It is 7 | part of the implementation of the compression library and is 8 | subject to change. Applications should only use zlib.h. 9 | */ 10 | 11 | /* @(#) $Id$ */ 12 | 13 | #ifndef ZUTIL_H 14 | #define ZUTIL_H 15 | 16 | #ifdef HAVE_HIDDEN 17 | # define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) 18 | #else 19 | # define ZLIB_INTERNAL 20 | #endif 21 | 22 | #include "zlib.h" 23 | 24 | #if defined(STDC) && !defined(Z_SOLO) 25 | # if !(defined(_WIN32_WCE) && defined(_MSC_VER)) 26 | # include 27 | # endif 28 | # include 29 | # include 30 | #endif 31 | 32 | #ifdef Z_SOLO 33 | typedef long ptrdiff_t; /* guess -- will be caught if guess is wrong */ 34 | #endif 35 | 36 | #ifndef local 37 | # define local static 38 | #endif 39 | /* compile with -Dlocal if your debugger can't find static symbols */ 40 | 41 | typedef unsigned char uch; 42 | typedef uch FAR uchf; 43 | typedef unsigned short ush; 44 | typedef ush FAR ushf; 45 | typedef unsigned long ulg; 46 | 47 | extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ 48 | /* (size given to avoid silly warnings with Visual C++) */ 49 | 50 | #define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] 51 | 52 | #define ERR_RETURN(strm,err) \ 53 | return (strm->msg = ERR_MSG(err), (err)) 54 | /* To be used only when the state is known to be valid */ 55 | 56 | /* common constants */ 57 | 58 | #ifndef DEF_WBITS 59 | # define DEF_WBITS MAX_WBITS 60 | #endif 61 | /* default windowBits for decompression. MAX_WBITS is for compression only */ 62 | 63 | #if MAX_MEM_LEVEL >= 8 64 | # define DEF_MEM_LEVEL 8 65 | #else 66 | # define DEF_MEM_LEVEL MAX_MEM_LEVEL 67 | #endif 68 | /* default memLevel */ 69 | 70 | #define STORED_BLOCK 0 71 | #define STATIC_TREES 1 72 | #define DYN_TREES 2 73 | /* The three kinds of block type */ 74 | 75 | #define MIN_MATCH 3 76 | #define MAX_MATCH 258 77 | /* The minimum and maximum match lengths */ 78 | 79 | #define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ 80 | 81 | /* target dependencies */ 82 | 83 | #if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) 84 | # define OS_CODE 0x00 85 | # ifndef Z_SOLO 86 | # if defined(__TURBOC__) || defined(__BORLANDC__) 87 | # if (__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) 88 | /* Allow compilation with ANSI keywords only enabled */ 89 | void _Cdecl farfree( void *block ); 90 | void *_Cdecl farmalloc( unsigned long nbytes ); 91 | # else 92 | # include 93 | # endif 94 | # else /* MSC or DJGPP */ 95 | # include 96 | # endif 97 | # endif 98 | #endif 99 | 100 | #ifdef AMIGA 101 | # define OS_CODE 0x01 102 | #endif 103 | 104 | #if defined(VAXC) || defined(VMS) 105 | # define OS_CODE 0x02 106 | # define F_OPEN(name, mode) \ 107 | fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") 108 | #endif 109 | 110 | #if defined(ATARI) || defined(atarist) 111 | # define OS_CODE 0x05 112 | #endif 113 | 114 | #ifdef OS2 115 | # define OS_CODE 0x06 116 | # if defined(M_I86) && !defined(Z_SOLO) 117 | # include 118 | # endif 119 | #endif 120 | 121 | #if defined(MACOS) || defined(TARGET_OS_MAC) 122 | # define OS_CODE 0x07 123 | # ifndef Z_SOLO 124 | # if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os 125 | # include /* for fdopen */ 126 | # else 127 | # ifndef fdopen 128 | # define fdopen(fd,mode) NULL /* No fdopen() */ 129 | # endif 130 | # endif 131 | # endif 132 | #endif 133 | 134 | #ifdef TOPS20 135 | # define OS_CODE 0x0a 136 | #endif 137 | 138 | #ifdef WIN32 139 | # ifndef __CYGWIN__ /* Cygwin is Unix, not Win32 */ 140 | # define OS_CODE 0x0b 141 | # endif 142 | #endif 143 | 144 | #ifdef __50SERIES /* Prime/PRIMOS */ 145 | # define OS_CODE 0x0f 146 | #endif 147 | 148 | #if defined(_BEOS_) || defined(RISCOS) 149 | # define fdopen(fd,mode) NULL /* No fdopen() */ 150 | #endif 151 | 152 | #if (defined(_MSC_VER) && (_MSC_VER > 600)) && !defined __INTERIX 153 | # if defined(_WIN32_WCE) 154 | # define fdopen(fd,mode) NULL /* No fdopen() */ 155 | # ifndef _PTRDIFF_T_DEFINED 156 | typedef int ptrdiff_t; 157 | # define _PTRDIFF_T_DEFINED 158 | # endif 159 | # else 160 | # define fdopen(fd,type) _fdopen(fd,type) 161 | # endif 162 | #endif 163 | 164 | #if defined(__BORLANDC__) && !defined(MSDOS) 165 | #pragma warn -8004 166 | #pragma warn -8008 167 | #pragma warn -8066 168 | #endif 169 | 170 | /* provide prototypes for these when building zlib without LFS */ 171 | #if !defined(_WIN32) && \ 172 | (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0) 173 | ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); 174 | ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); 175 | #endif 176 | 177 | /* common defaults */ 178 | 179 | #ifndef OS_CODE 180 | # define OS_CODE 0x03 /* assume Unix */ 181 | #endif 182 | 183 | #ifndef F_OPEN 184 | # define F_OPEN(name, mode) fopen((name), (mode)) 185 | #endif 186 | 187 | /* functions */ 188 | 189 | #if defined(pyr) || defined(Z_SOLO) 190 | # define NO_MEMCPY 191 | #endif 192 | #if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) 193 | /* Use our own functions for small and medium model with MSC <= 5.0. 194 | * You may have to use the same strategy for Borland C (untested). 195 | * The __SC__ check is for Symantec. 196 | */ 197 | # define NO_MEMCPY 198 | #endif 199 | #if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) 200 | # define HAVE_MEMCPY 201 | #endif 202 | #ifdef HAVE_MEMCPY 203 | # ifdef SMALL_MEDIUM /* MSDOS small or medium model */ 204 | # define zmemcpy _fmemcpy 205 | # define zmemcmp _fmemcmp 206 | # define zmemzero(dest, len) _fmemset(dest, 0, len) 207 | # else 208 | # define zmemcpy memcpy 209 | # define zmemcmp memcmp 210 | # define zmemzero(dest, len) memset(dest, 0, len) 211 | # endif 212 | #else 213 | void ZLIB_INTERNAL zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); 214 | int ZLIB_INTERNAL zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); 215 | void ZLIB_INTERNAL zmemzero OF((Bytef* dest, uInt len)); 216 | #endif 217 | 218 | /* Diagnostic functions */ 219 | #ifdef DEBUG 220 | # include 221 | extern int ZLIB_INTERNAL z_verbose; 222 | extern void ZLIB_INTERNAL z_error OF((char *m)); 223 | # define Assert(cond,msg) {if(!(cond)) z_error(msg);} 224 | # define Trace(x) {if (z_verbose>=0) fprintf x ;} 225 | # define Tracev(x) {if (z_verbose>0) fprintf x ;} 226 | # define Tracevv(x) {if (z_verbose>1) fprintf x ;} 227 | # define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;} 228 | # define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;} 229 | #else 230 | # define Assert(cond,msg) 231 | # define Trace(x) 232 | # define Tracev(x) 233 | # define Tracevv(x) 234 | # define Tracec(c,x) 235 | # define Tracecv(c,x) 236 | #endif 237 | 238 | #ifndef Z_SOLO 239 | voidpf ZLIB_INTERNAL zcalloc OF((voidpf opaque, unsigned items, 240 | unsigned size)); 241 | void ZLIB_INTERNAL zcfree OF((voidpf opaque, voidpf ptr)); 242 | #endif 243 | 244 | #define ZALLOC(strm, items, size) \ 245 | (*((strm)->zalloc))((strm)->opaque, (items), (size)) 246 | #define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) 247 | #define TRY_FREE(s, p) {if (p) ZFREE(s, p);} 248 | 249 | /* Reverse the bytes in a 32-bit value */ 250 | #define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ 251 | (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) 252 | 253 | #endif /* ZUTIL_H */ 254 | --------------------------------------------------------------------------------