├── .gitignore ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── src ├── builtinmutation.h ├── cmdline.h ├── common.cpp ├── common.h ├── editdistance.cpp ├── editdistance.h ├── fastareader.cpp ├── fastareader.h ├── fastqreader.cpp ├── fastqreader.h ├── globalsettings.cpp ├── globalsettings.h ├── htmlreporter.cpp ├── htmlreporter.h ├── jsonreporter.cpp ├── jsonreporter.h ├── main.cpp ├── match.cpp ├── match.h ├── multihtmlreporter.cpp ├── multihtmlreporter.h ├── mutation.cpp ├── mutation.h ├── mutscan.cpp ├── mutscan.h ├── overlap.cpp ├── overlap.h ├── pescanner.cpp ├── pescanner.h ├── read.cpp ├── read.h ├── rollinghash.cpp ├── rollinghash.h ├── scanner-impl.h ├── scanner.h ├── sequence.cpp ├── sequence.h ├── sescanner.cpp ├── sescanner.h ├── unittest.cpp ├── unittest.h ├── util.h ├── vcfreader.cpp ├── vcfreader.h └── zlib │ ├── crc32.h │ ├── deflate.h │ ├── gzguts.h │ ├── inffast.h │ ├── inffixed.h │ ├── inflate.h │ ├── inftrees.h │ ├── trees.h │ ├── zconf.h │ ├── zlib.h │ └── zutil.h └── testdata ├── R1.fq ├── R1.fq.gz ├── R2.fq ├── R2.fq.gz ├── mutations.csv ├── sample_report.jpg ├── tinyref.fa └── tinyvcf.vcf /.gitignore: -------------------------------------------------------------------------------- 1 | se.sh 2 | report.html 3 | runtest.sh 4 | mutscan 5 | .DS_Store 6 | 7 | # Compiled Object files 8 | *.slo 9 | *.lo 10 | *.o 11 | *.obj 12 | 13 | # Dependency files 14 | *.d 15 | 16 | # Precompiled Headers 17 | *.gch 18 | *.pch 19 | 20 | # Compiled Dynamic libraries 21 | *.so 22 | *.dylib 23 | *.dll 24 | 25 | # Fortran module files 26 | *.mod 27 | *.smod 28 | 29 | # Compiled Static libraries 30 | *.lai 31 | *.la 32 | *.a 33 | *.lib 34 | 35 | # Executables 36 | *.exe 37 | *.out 38 | *.app 39 | 40 | # ignore example dir 41 | **/example/** 42 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Base image 2 | FROM ubuntu:16.04 3 | 4 | ################## METADATA ###################### 5 | LABEL base.image="ubuntu:16.04" 6 | LABEL version="1" 7 | LABEL software.version="v1.14.0" 8 | 9 | ################## MAINTAINER ###################### 10 | MAINTAINER biolxy 11 | 12 | 13 | RUN mv /etc/apt/sources.list /etc/apt/sources.list.bkp && \ 14 | bash -c 'echo -e "deb mirror://mirrors.ubuntu.com/mirrors.txt xenial main restricted universe multiverse\n\ 15 | deb mirror://mirrors.ubuntu.com/mirrors.txt xenial-updates main restricted universe multiverse\n\ 16 | deb mirror://mirrors.ubuntu.com/mirrors.txt xenial-backports main restricted universe multiverse\n\ 17 | deb mirror://mirrors.ubuntu.com/mirrors.txt xenial-security main restricted universe multiverse\n\n" > /etc/apt/sources.list' && \ 18 | cat /etc/apt/sources.list.bkp >> /etc/apt/sources.list && \ 19 | cat /etc/apt/sources.list 20 | RUN sed -i s@/archive.ubuntu.com/@/mirrors.aliyun.com/@g /etc/apt/sources.list 21 | # RUN sed -i s@/archive.ubuntu.com/@/mirrors.tuna.tsinghua.edu.cn/@g /etc/apt/sources.list 22 | # RUN sed -i 's/deb src/# deb src/g' /etc/apt/sources.list 23 | 24 | RUN apt-get clean all && \ 25 | apt-get update && \ 26 | apt-get upgrade -y && \ 27 | apt-get install -y \ 28 | autotools-dev \ 29 | automake \ 30 | cmake \ 31 | curl \ 32 | grep \ 33 | sed \ 34 | dpkg \ 35 | fuse \ 36 | git \ 37 | wget \ 38 | zip \ 39 | openjdk-8-jre \ 40 | build-essential \ 41 | pkg-config \ 42 | python3 \ 43 | python3-dev \ 44 | python3-pip \ 45 | bzip2 \ 46 | ca-certificates \ 47 | libglib2.0-0 \ 48 | libxext6 \ 49 | libsm6 \ 50 | libxrender1 \ 51 | git \ 52 | mercurial \ 53 | subversion \ 54 | sudo \ 55 | zlib1g-dev && \ 56 | apt-get clean && \ 57 | apt-get purge && \ 58 | rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* \ 59 | && apt-get clean && apt-get purge 60 | 61 | 62 | RUN wget http://opengene.org/MutScan/mutscan 63 | RUN chmod a+x mutscan 64 | RUN mv mutscan /usr/local/bin/ 65 | ENV PATH=/usr/local/bin/:$PATH 66 | 67 | RUN mkdir /data /config 68 | 69 | # Add user biodocker with password biodocker 70 | RUN groupadd fuse && \ 71 | useradd --create-home --shell /bin/bash --user-group --uid 1000 --groups sudo,fuse biodocker && \ 72 | echo `echo "biodocker\nbiodocker\n" | passwd biodocker` && \ 73 | chown biodocker:biodocker /data && \ 74 | chown biodocker:biodocker /config 75 | 76 | WORKDIR /data 77 | VOLUME ["/data", "/config"] 78 | 79 | # Overwrite this with 'CMD []' in a dependent Dockerfile 80 | 81 | ENTRYPOINT ["mutscan"] 82 | CMD ["-h"] 83 | 84 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 OpenGene - Open Source Genetics Toolbox 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | DIR_INC = ./inc 2 | DIR_SRC = ./src 3 | DIR_OBJ = ./obj 4 | BINDIR=/usr/local/bin 5 | 6 | SRC = $(wildcard ${DIR_SRC}/*.cpp) 7 | OBJ = $(patsubst %.cpp,${DIR_OBJ}/%.o,$(notdir ${SRC})) 8 | 9 | TARGET = mutscan 10 | 11 | BIN_TARGET = ${TARGET} 12 | 13 | CC = g++ 14 | CPPFLAGS = -Wall 15 | CFLAGS = -std=c++11 -g -I${DIR_INC} 16 | 17 | ${BIN_TARGET}:${OBJ} 18 | $(CC) $(OBJ) -lz -lpthread -o $@ 19 | 20 | ${DIR_OBJ}/%.o:${DIR_SRC}/%.cpp 21 | @mkdir -p "${DIR_OBJ}" 22 | $(CC) $(CPPFLAGS) $(CFLAGS) -MMD -O3 -c $< -o $@ 23 | 24 | -include $(wildcard ${DIR_OBJ}/*.d) 25 | 26 | .PHONY:clean 27 | clean: 28 | rm obj/*.o 29 | rm obj/*.d 30 | rm mutscan 31 | 32 | install: 33 | install $(TARGET) $(DESTDIR)$(BINDIR)/$(TARGET) 34 | @echo "Installed." 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![install with conda]( 2 | https://anaconda.org/bioconda/mutscan/badges/version.svg)](https://anaconda.org/bioconda/mutscan) 3 | # MutScan 4 | Detect and visualize target mutations by scanning FastQ files directly 5 | * [Features](#features) 6 | * [Application scenarios](#application-scenarios) 7 | * [Take a quick glance](#take-a-quick-glance) 8 | * [Download, compile and install](#get-mutscan) 9 | * [HTML report](#html-report) 10 | * [JSON report](#json-report) 11 | * [All options](#all-options) 12 | * [Customize your mutation file](#mutation-file) 13 | * [Work with BAM/CRAM](#work-with-bamcram) 14 | * [Remarks](#remarks) 15 | * [Cite MutScan](#cite-mutscan) 16 | 17 | # Features 18 | * Ultra sensitive, guarantee that all reads supporting the mutations will be detected 19 | * Can be 50X+ faster than normal pipeline (i.e. BWA + Samtools + GATK/VarScan/Mutect). 20 | * Very easy to use and need nothing else. No alignment, no reference genome, no variant call, no... 21 | * Contains built-in most actionable mutation points for cancer-related mutations, like EGFR p.L858R, BRAF p.V600E... 22 | * Beautiful and informative HTML report with informative pileup visualization. 23 | * Multi-threading support. 24 | * Supports both single-end and pair-end data. 25 | * For pair-end data, MutScan will try to merge each pair, and do quality adjustment and error correction. 26 | * Able to scan the mutations in a VCF file, which can be used to visualize called variants. 27 | * Can be used to filter false-positive mutations. i.e. MutScan can handle highly repetive sequence to avoid false INDEL calling. 28 | 29 | # Application scenarios: 30 | * you are interested in some certain mutations (like cancer drugable mutations), and want to check whether the given FastQ files contain them. 31 | * you have no enough confidence with the mutations called by your pipeline, so you want to visualize and validate them to avoid false positive calling. 32 | * you worry that your pipeline uses too strict filtering and may cause some false negative, so you want to check that in a fast way. 33 | * you want to visualize the called mutation and take a screenshot with its clear pipeup information. 34 | * you called a lot of INDEL mutations, and you worry that mainly they are false positives (especially in highly repetive region) 35 | * you want to validate and visualize every record in the VCF called by your pipeline. 36 | * ... 37 | 38 | # Take a quick glance 39 | * Sample HTML report: http://opengene.org/MutScan/report.html 40 | * Sample JSON report: http://opengene.org/MutScan/report.json 41 | * Dataset for testing: http://opengene.org/dataset.html 42 | * Command to test 43 | ```shell 44 | mutscan -1 R1.fq.gz -2 R2.fq.gz 45 | ``` 46 | 47 | # Get MutScan 48 | ## install with Bioconda 49 | [![install with conda]( 50 | https://anaconda.org/bioconda/mutscan/badges/version.svg)](https://anaconda.org/bioconda/mutscan) 51 | ```shell 52 | conda install -c bioconda mutscan 53 | ``` 54 | ## download binary 55 | This binary is only for Linux systems: http://opengene.org/MutScan/mutscan 56 | ```shell 57 | # this binary was compiled on CentOS, and tested on CentOS/Ubuntu 58 | wget http://opengene.org/MutScan/mutscan 59 | chmod a+x ./mutscan 60 | ``` 61 | ## or compile from source 62 | ```shell 63 | # get source (you can also use browser to download from master or releases) 64 | git clone https://github.com/OpenGene/MutScan.git 65 | 66 | # build 67 | cd mutscan 68 | make 69 | 70 | # Install 71 | sudo make install 72 | ``` 73 | 74 | # Windows version (may be not the latest version) 75 | If you want to compile MutScan on Windows, you should use `cygwin`. We already built one with cygwin-2.6.0/g++ 5.4, and it can be downloaded from:   76 | http://opengene.org/MutScan/windows_mutscan.zip 77 | 78 | # HTML report 79 | * A HTML report will be generated, and written to the given filename. See http://opengene.org/MutScan/report.html for an example. 80 | * ***If you run the command in your Linux server and want to view the HTML report on your local system. DO remember to copy all of the `xxxx.html` and `xxxx.html.files` and keep them in the same folder, then click `xxxx.html` to view it in browser.*** 81 | * The default file name is `mutscan.html`, and a folder `mutscan.html.files` will be also generated. 82 | * By default, an indivudal HTML file will be generated for each found mutation. But you can specify `-s` or `--standalone` to contain all mutations in a single HTML file. Be caution with this mode if you are scanning too many records (for example, scanning VCF), it will give you a very big HTML file and is not loadable by browser. 83 | * Here is a screenshot for the pileup of a mutation (EGFR p.T790M) generated by MutScan: 84 | 85 | ![image](http://www.opengene.org/MutScan/t790m.png) 86 | * An pileup of EGFR p.T790M mutation is displayed above. EGFR p.T790M is a very important drugable mutation for lung cancer. 87 | * The color of each base indicates its quality, and the quality will be shown when mouse over. 88 | * In first column, d means the edit distance of match, and --> means forward, <-- means reverse 89 | 90 | # JSON report 91 | JSON report is disabled by default. You can enable it by specifying a JSON file name using `-j` or `--json`. A JSON report is like this: 92 | 93 | ```json 94 | { 95 | "command":"./mutscan -1 /Users/shifu/data/fq/S010_20170320003-4_ffpedna_pan-cancer-v1_S10_R1_001.fastq -2 /Users/shifu/data/fq/S010_20170320003-4_ffpedna_pan-cancer-v1_S10_R2_001.fastq -h z.html -j z.json -v --simplified=off ", 96 | "version":"1.14.0", 97 | "time":"2018-05-15 15:48:21", 98 | "mutations":{ 99 | "NRAS-neg-1-115258747-2-c.35G>C-p.G12A-COSM565":{ 100 | "chr":"chr1", 101 | "ref":["TGGATTGTCAGTGCGCTTTTCCCAACACCA","G","CTGCTCCAACCACCACCAGTTTGTACTCAG"], 102 | "reads":[ 103 | { 104 | "breaks":[31,61,62,76], 105 | "seq":"ATATTCATCTACAAAGTGGTTCTGGATTAGCTGGATTGTCAGTGCGCTTTTCCCAACACCAGCTGCTCCAACCACC", 106 | "qual":"eeeeeiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiieiiiiiiiiiiieieeeee" 107 | }, 108 | { 109 | "breaks":[31,61,62,76], 110 | "seq":"ATATTCATCTACAAAGTGGTTCTGGATTAGCTGGATTGTCAGTGCGCTTTTCCCAACACCAGCTGCTCCAACCACC", 111 | "qual":"eeeeeiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiieeeee" 112 | } 113 | ] 114 | }, 115 | "PIK3CA-pos-3-178936082-9-c.1624G>A-E542K-COSM760":{ 116 | "chr":"chr3", 117 | "ref":["AAAGCAATTTCTACACGAGATCCTCTCTCT","A","AAATCACTGAGCAGGAGAAAGATTTTCTAT"], 118 | "reads":[ 119 | { 120 | "breaks":[22,52,53,83], 121 | "seq":"GGAAAATGACAAAGAACAGCTCAAAGCAATTTCTACACGAGATCCTCTCTCTAAAATCACTGAGCAGGAGAAAGATTTTCCAAAGATGTTTCTCAGAACGCTGCAGTCTGCAATTTGTATGAATTCCC", 122 | "qual":"eeeeeiiiQiiiiiieiiiieiSeiiiiiie`iiii`i`iiiiiiiiiiiiii`iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiaiiiiiiiiiiiiiiiiiieiiiiiieeeee" 123 | }, 124 | { 125 | "breaks":[0,27,28,58], 126 | "seq":"GCAATTTCTACACGAGATCCTCTCTCTAAAATCACTGCGCAGGAGAAAGATTTTCTATGGACCACAGGTAAGTGCTAAAATGGAGATTCTCTGTTTCTTTTTCTTTATTACAGAAAAAATAACTGACTTTGGCTGATCTCAGCATGTTTTTACCATACC", 127 | "qual":"AAAAAEEEEiieiiieiiiiiiiiiieiiiiiiiie``iiiiiieiiiiiiiiiieiiiieiieieeiiiSiiiiiieiiiiiiiiiiiiiieiiiiiSiiiiiiiiiiiiieiiiiiiiiiiii`ieiiieiii`ieiiiii`eS``eieEEEAAAAA" 128 | } 129 | ] 130 | } 131 | } 132 | } 133 | ``` 134 | 135 | # All options 136 | ```shell 137 | usage: mutscan -1 -2 [options]... 138 | options: 139 | -1, --read1 read1 file name, required 140 | -2, --read2 read2 file name 141 | -m, --mutation mutation file name, can be a CSV format or a VCF format 142 | -r, --ref reference fasta file name (only needed when mutation file is a VCF) 143 | -h, --html filename of html report, default is mutscan.html in work directory 144 |  -j, --json               filename of JSON report, default is no JSON report (string [=]) 145 | -t, --thread worker thread number, default is 4 146 | -S, --support min read support required to report a mutation, default is 2. 147 | -k, --mark when mutation file is a vcf file, --mark means only process the records with FILTER column is M 148 | -l, --legacy use legacy mode, usually much slower but may be able to find a little more reads in certain case 149 | -s, --standalone output standalone HTML report with single file. Don't use this option when scanning too many target mutations (i.e. >1000 mutations) 150 | -n, --no-original-reads dont output original reads in HTML and text output. Will make HTML report files a bit smaller 151 | -?, --help print this message 152 | ``` 153 | The plain text result, which contains the detected mutations and their support reads, will be printed directly. You can use `>` to redirect output to a file, like: 154 | ```shell 155 | mutscan -1 -2 > result.txt 156 | ``` 157 | MutScan generate a very informative HTML file report, default is `mutscan.html` in the work directory. You can change the file name with `-h` argument, like: 158 | ``` 159 | mutscan -1 -2 -h report.html 160 | ``` 161 | ## single-end and pair-end 162 | For single-end sequencing data, `-2` argument is omitted: 163 | ``` 164 | mutscan -1 165 | ``` 166 | ## multi-threading 167 | `-t` argument specify how many worker threads will be launched. The default thread number is `4`. Suggest to use a number less than the CPU cores of your system. 168 | 169 | # Mutation file 170 | * Mutation file, specified by `-m`, can be a `CSV file`, or a `VCF file`. 171 | * If no `-m` specified, MutScan will use the built-in default mutation file with about 60 cancer related mutation points. 172 | * If a CSV is provided, no reference genome assembly needed. 173 | * If a VCF is provided, corresponding reference genome assembly should be provided (i.e. ucsc.hg19.fasta), and should not be zipped. 174 | 175 | ## CSV-format mutation file 176 | A CSV file with columns of `name`, `left_seq_of_mutation_point`, `mutation_seq`, `right_seq_of_mutation_point` and `chromosome(optional)` 177 | ```csv 178 | #name, left_seq_of_mutation_point, mutation_seq, right_seq_of_mutation_point, chromosome 179 | NRAS-neg-1-115258748-2-c.34G>A-p.G12S-COSM563, GGATTGTCAGTGCGCTTTTCCCAACACCAC, T, TGCTCCAACCACCACCAGTTTGTACTCAGT, chr1 180 | NRAS-neg-1-115252203-2-c.437C>T-p.A146V-COSM4170228, TGAAAGCTGTACCATACCTGTCTGGTCTTG, A, CTGAGGTTTCAATGAATGGAATCCCGTAAC, chr1 181 | BRAF-neg-7-140453136-15-c.1799T>A -V600E-COSM476, AACTGATGGGACCCACTCCATCGAGATTTC, T, CTGTAGCTAGACCAAAATCACCTATTTTTA, chr7 182 | EGFR-pos-7-55241677-18-c.2125G>A-p.E709K-COSM12988, CCCAACCAAGCTCTCTTGAGGATCTTGAAG, A, AAACTGAATTCAAAAAGATCAAAGTGCTGG, chr7 183 | EGFR-pos-7-55241707-18-c.2155G>A-p.G719S-COSM6252, GAAACTGAATTCAAAAAGATCAAAGTGCTG, A, GCTCCGGTGCGTTCGGCACGGTGTATAAGG, chr7 184 | EGFR-pos-7-55241707-18-c.2155G>T-p.G719C-COSM6253, GAAACTGAATTCAAAAAGATCAAAGTGCTG, T, GCTCCGGTGCGTTCGGCACGGTGTATAAGG, chr7 185 | ``` 186 | `testdata/mutations.csv` gives an example of CSV-format mutation file 187 | 188 | ## VCF-format mutation file 189 | A standard VCF can be used as a mutation file, with file extension `.vcf` or `.VCF`. If the mutation file is a VCF file, you should specify the `reference assembly file` by `-r `. For example the command can be: 190 | ```shell 191 | mutscan -1 R1.fq -2 R2.fq -m target.vcf -r hg19.fa 192 | ``` 193 | 194 | # Work with BAM/CRAM 195 | If you want to run MutScan with BAM/CRAM files, you can use `samtools` to convert them to FASTQ files using `samtools fastq` command, both single-end and paired-end data are supported by latest version of `samtools fastq`. 196 | 197 | # Remarks 198 | * `MutScan` requires at least 50 bp long reads, if your reads are too short, do not use it 199 | * If you want to extract mutations even with only one read support, add `-S 1` or `--support=1` in the command 200 | * Feel free to raise an issue if you meet any problem 201 | 202 | # Cite MutScan 203 | Shifu Chen, Tanxiao Huang, TieXiang Wen, Hong Li, Mingyan Xu and Jia Gu. MutScan: fast detection and visualization of target mutations by scanning FASTQ data. BMC Bioinformatics. https://doi.org/10.1186/s12859-018-2024-6 204 | -------------------------------------------------------------------------------- /src/builtinmutation.h: -------------------------------------------------------------------------------- 1 | #ifndef BUILT_IN_MUTATION_H 2 | #define BUILT_IN_MUTATION_H 3 | 4 | #include 5 | #include 6 | 7 | const string BUILT_IN_MUTATIONS = string("#name, left_seq_of_mutation_point, mutation_seq, right_seq_of_mutation_point, chromosome\n") + 8 | "KRAS-neg-12-25398281-2-c.38G>C-p.G13A-COSM533, AGCTGTATCGTCAAGGCACTCTTGCCTACG, G, CACCAGCTCCAACTACCACAAGTTTATATT, chr12\n" + 9 | "KRAS-neg-12-25398281-2-c.38G>A-p.G13D-COSM1140132, AGCTGTATCGTCAAGGCACTCTTGCCTACG, T, CACCAGCTCCAACTACCACAAGTTTATATT, chr12\n" + 10 | "KRAS-neg-12-25398281-2-c.38G>T-p.G13V-COSM1152504, AGCTGTATCGTCAAGGCACTCTTGCCTACG, A, CACCAGCTCCAACTACCACAAGTTTATATT, chr12\n" + 11 | "KRAS-neg-12-25398282-2-c.37G>T-p.G13C-COSM1152505, GCTGTATCGTCAAGGCACTCTTGCCTACGC, A, ACCAGCTCCAACTACCACAAGTTTATATTC, chr12\n" + 12 | "KRAS-neg-12-25398282-2-c.37G>A-p.G13S-COSM528, GCTGTATCGTCAAGGCACTCTTGCCTACGC, T, ACCAGCTCCAACTACCACAAGTTTATATTC, chr12\n" + 13 | "KRAS-neg-12-25398284-2-c.35G>A-p.G12D-COSM521, TGTATCGTCAAGGCACTCTTGCCTACGCCA, T, CAGCTCCAACTACCACAAGTTTATATTCAG, chr12\n" + 14 | "KRAS-neg-12-25398284-2-c.35G>T-p.G12V-COSM1140133, TGTATCGTCAAGGCACTCTTGCCTACGCCA, A, CAGCTCCAACTACCACAAGTTTATATTCAG, chr12\n" + 15 | "KRAS-neg-12-25398284-2-c.35G>C-p.G12A-COSM522, TGTATCGTCAAGGCACTCTTGCCTACGCCA, G, CAGCTCCAACTACCACAAGTTTATATTCAG, chr12\n" + 16 | "KRAS-neg-12-25398285-2-c.34G>T-p.G12C-COSM516, GTATCGTCAAGGCACTCTTGCCTACGCCAC, A, AGCTCCAACTACCACAAGTTTATATTCAGT, chr12\n" + 17 | "KRAS-neg-12-25398285-2-c.34G>C-p.G12R-COSM518, GTATCGTCAAGGCACTCTTGCCTACGCCAC, G, AGCTCCAACTACCACAAGTTTATATTCAGT, chr12\n" + 18 | "KRAS-neg-12-25398285-2-c.34G>A-p.G12S-COSM1152506, GTATCGTCAAGGCACTCTTGCCTACGCCAC, T, AGCTCCAACTACCACAAGTTTATATTCAGT, chr12\n" + 19 | "NRAS-neg-1-115256529-3-c.182A>G-p.Q61R-COSM584, TATTGGTCTCTCATGGCACTGTACTCTTCT, C, GTCCAGCTGTATCCAGTATGTCCAACAAAC, chr1\n" + 20 | "NRAS-neg-1-115256530-3-c.181C>A-p.Q61K-COSM580, ATTGGTCTCTCATGGCACTGTACTCTTCTT, T, TCCAGCTGTATCCAGTATGTCCAACAAACA, chr1\n" + 21 | "NRAS-neg-1-115258747-2-c.35G>C-p.G12A-COSM565, TGGATTGTCAGTGCGCTTTTCCCAACACCA, G, CTGCTCCAACCACCACCAGTTTGTACTCAG, chr1\n" + 22 | "NRAS-neg-1-115258747-2-c.35G>A-p.G12D-COSM564, TGGATTGTCAGTGCGCTTTTCCCAACACCA, T, CTGCTCCAACCACCACCAGTTTGTACTCAG, chr1\n" + 23 | "NRAS-neg-1-115258747-2-c.35G>T-p.G12V-COSM566, TGGATTGTCAGTGCGCTTTTCCCAACACCA, A, CTGCTCCAACCACCACCAGTTTGTACTCAG, chr1\n" + 24 | "NRAS-neg-1-115258748-2-c.34G>T-p.G12C-COSM562, GGATTGTCAGTGCGCTTTTCCCAACACCAC, A, TGCTCCAACCACCACCAGTTTGTACTCAGT, chr1\n" + 25 | "NRAS-neg-1-115258748-2-c.34G>A-p.G12S-COSM563, GGATTGTCAGTGCGCTTTTCCCAACACCAC, T, TGCTCCAACCACCACCAGTTTGTACTCAGT, chr1\n" + 26 | "NRAS-neg-1-115252203-2-c.437C>T-p.A146V-COSM4170228, TGAAAGCTGTACCATACCTGTCTGGTCTTG, A, CTGAGGTTTCAATGAATGGAATCCCGTAAC, chr1\n" + 27 | "BRAF-neg-7-140453136-15-c.1799T>A -V600E-COSM476, AACTGATGGGACCCACTCCATCGAGATTTC, T, CTGTAGCTAGACCAAAATCACCTATTTTTA, chr7\n" + 28 | "EGFR-pos-7-55241677-18-c.2125G>A-p.E709K-COSM12988, CCCAACCAAGCTCTCTTGAGGATCTTGAAG, A, AAACTGAATTCAAAAAGATCAAAGTGCTGG, chr7\n" + 29 | "EGFR-pos-7-55241707-18-c.2155G>A-p.G719S-COSM6252, GAAACTGAATTCAAAAAGATCAAAGTGCTG, A, GCTCCGGTGCGTTCGGCACGGTGTATAAGG, chr7\n" + 30 | "EGFR-pos-7-55241707-18-c.2155G>T-p.G719C-COSM6253, GAAACTGAATTCAAAAAGATCAAAGTGCTG, T, GCTCCGGTGCGTTCGGCACGGTGTATAAGG, chr7\n" + 31 | "EGFR-pos-7-55241708-18-c.2156G>C-p.G719A-COSM6239, AAACTGAATTCAAAAAGATCAAAGTGCTGG, C, CTCCGGTGCGTTCGGCACGGTGTATAAGGT, chr7\n" + 32 | "EGFR-pos-7-55241708-18-c.2156G>A-p.G719D-COSM18425, AAACTGAATTCAAAAAGATCAAAGTGCTGG, A, CTCCGGTGCGTTCGGCACGGTGTATAAGGT, chr7\n" + 33 | "EGFR-pos-7-55242470-19-c.2240T>C-p.L747S-COSM26704, AAGTTAAAATTCCCGTCGCTATCAAGGAAT, C, AAGAGAAGCAACATCTCCGAAAGCCAACAA, chr7\n" + 34 | "EGFR-pos-7-55242511-19-c.2281G>T-p.D761Y-COSM21984, ACATCTCCGAAAGCCAACAAGGAAATCCTC, T, ATGTGAGTTTCTGCTTTGCTGTGTGGGGGT, chr7\n" + 35 | "EGFR-pos-7-55249005-20-c.2303G>T-p.S768I -COSM6241, CCTCCCTCCAGGAAGCCTACGTGATGGCCA, T, CGTGGACAACCCCCACGTGTGCCGCCTGCT, chr7\n" + 36 | "EGFR-pos-7-55249071-20-c.2369C>T-p.T790M-COSM6240, TCTGCCTCACCTCCACCGTGCAGCTCATCA, T, GCAGCTCATGCCCTTCGGCTGCCTCCTGGA, chr7\n" + 37 | "EGFR-pos-7-55249091-20-c.2389T>A-p.C797S-COSM5010368, CAGCTCATCACGCAGCTCATGCCCTTCGGC, A, GCCTCCTGGACTATGTCCGGGAACACAAAG, chr7\n" + 38 | "EGFR-pos-7-55249092-20-c.2390G>C-p.C797S-COSM5010368, AGCTCATCACGCAGCTCATGCCCTTCGGCT, C, CCTCCTGGACTATGTCCGGGAACACAAAGA, chr7\n" + 39 | "EGFR-pos-7-55259502-21-c.2560A>G-p.T854A-COSM28537, CTGGTGAAAACACCGCAGCATGTCAAGATC, G, CAGATTTTGGGCTGGCCAAACTGCTGGGTG, chr7\n" + 40 | "EGFR-pos-7-55259515-21-c.2573T>G-p.L858R-COSM6224, CGCAGCATGTCAAGATCACAGATTTTGGGC, G, GGCCAAACTGCTGGGTGCGGAAGAGAAAGA, chr7\n" + 41 | "EGFR-pos-7-55259524-21-c.2582T>A-p.L861Q-COSM6213, TCAAGATCACAGATTTTGGGCTGGCCAAAC, A, GCTGGGTGCGGAAGAGAAAGAATACCATGC, chr7\n" + 42 | "EGFR-pos-7-55259524-21-c.2582T>G-p.L861R-COSM12374, TCAAGATCACAGATTTTGGGCTGGCCAAAC, G, GCTGGGTGCGGAAGAGAAAGAATACCATGC, chr7\n" + 43 | "PIK3CA-pos-3-178952085-21-c.3140A>G-H1047R-COSM775, AGTATTTCATGAAACAAATGAATGATGCAC, G, TCATGGTGGCTGGACAACAAAAATGGATTG, chr3\n" + 44 | "PIK3CA-pos-3-178952085-21-c.3140A>T-H1047L-COSM776, AGTATTTCATGAAACAAATGAATGATGCAC, T, TCATGGTGGCTGGACAACAAAAATGGATTG, chr3\n" + 45 | "PIK3CA-pos-3-178936091-9-c.1633G>A-E545K-COSM763, TCTACACGAGATCCTCTCTCTGAAATCACT, A, AGCAGGAGAAAGATTTTCTATGGAGTCACA, chr3\n" + 46 | "PIK3CA-pos-3-178936082-9-c.1624G>A-E542K-COSM760, AAAGCAATTTCTACACGAGATCCTCTCTCT, A, AAATCACTGAGCAGGAGAAAGATTTTCTAT, chr3\n" + 47 | "EGFR-pos-7-55242465:55242479-c.2235_2249del15-p.E746_A750delELREA-COSM6223, TGAGAAAGTTAAAATTCCCGTCGCTATCAA, , AACATCTCCGAAAGCCAACAAGGAAATCCT, chr7\n" + 48 | "EGFR-pos-7-55242466:55242480-c.2236_2250del15-p.E746_A750delELREA-COSM6225, GAGAAAGTTAAAATTCCCGTCGCTATCAAG, , ACATCTCCGAAAGCCAACAAGGAAATCCTC, chr7\n" + 49 | "EGFR-pos-7-55242466:55242483-c.2236_2253del18-p.E746_T751delELREAT-COSM12728, GAGAAAGTTAAAATTCCCGTCGCTATCAAG, , TCTCCGAAAGCCAACAAGGAAATCCTCGAT, chr7\n" + 50 | "EGFR-pos-7-55242470:55242487-c.2240_2257del18-p.L747_P753>S-COSM12370, AAGTTAAAATTCCCGTCGCTATCAAGGAAT, , CGAAAGCCAACAAGGAAATCCTCGATGTGA, chr7\n" + 51 | "EGFR-pos-7-55249013:55249014-c.2311_2312insGCGTGGACA-p.D770_N771insSVD-COSM13428, AGGAAGCCTACGTGATGGCCAGCGTGGACA, GCGTGGACA, ACCCCCACGTGTGCCGCCTGCTGGGCATCT, chr7\n" + 52 | "EGFR-pos-7-55242465:55242482-c.2235_2252>AAT(Complex)-p.E746_T751>I-COSM13551,TGAGAAAGTTAAAATTCCCGTCGCTATCAA,AAT,ATCTCCGAAAGCCAACAAGGAAATCCTCGA, chr7\n" + 53 | "EGFR-pos-7-55242467:55242485-c.2237_2255>T(Complex)-p.E746_S752>V-COSM12384,AGAAAGTTAAAATTCCCGTCGCTATCAAGG,T,TCCGAAAGCCAACAAGGAAATCCTCGATGT, chr7\n" + 54 | "EGFR-pos-7-55242468:55242482-c.2238_2252>GCA(Complex)-p.L747_T751>Q-COSM12419,GAAAGTTAAAATTCCCGTCGCTATCAAGGA,GCA,ATCTCCGAAAGCCAACAAGGAAATCCTCGA, chr7\n" + 55 | "EGFR-pos-7-55242469:55242477-c.2239_2247delTTAAGAGAA-p.L747_E749delLRE-COSM6218,AAAGTTAAAATTCCCGTCGCTATCAAGGAA,,GCAACATCTCCGAAAGCCAACAAGGAAATC, chr7\n" + 56 | "EGFR-pos-7-55242469:55242486-c.2239_2256del18-p.L747_S752delLREATS-COSM6255,AAAGTTAAAATTCCCGTCGCTATCAAGGAA,,CCGAAAGCCAACAAGGAAATCCTCGATGTG, chr7\n" + 57 | "EGFR-pos-7-55242469:55242478-c.2239_2248TTAAGAGAAG>C(Complex)-p.L747_A750>P-COSM12382,AAAGTTAAAATTCCCGTCGCTATCAAGGAA,C,CAACATCTCCGAAAGCCAACAAGGAAATCC, chr7\n" + 58 | "EGFR-pos-7-55242469:55242488-c.2239_2258>CA(Complex)-p.L747_P753>Q-COSM12387,AAAGTTAAAATTCCCGTCGCTATCAAGGAA,CA,GAAAGCCAACAAGGAAATCCTCGATGTGAG, chr7\n" + 59 | "EGFR-pos-7-55242470:55242484-c.2240_2254del15-p.L747_T751delLREAT-COSM12369,AAGTTAAAATTCCCGTCGCTATCAAGGAAT,,CTCCGAAAGCCAACAAGGAAATCCTCGATG, chr7\n" + 60 | "EGFR-pos-7-55242469:55242481-c.2239_2251>C(Complex)-p.L747_T751>P-COSM12383,AAAGTTAAAATTCCCGTCGCTATCAAGGAA,C,CATCTCCGAAAGCCAACAAGGAAATCCTCG, chr7\n" + 61 | "ERBB2-pos-17-37880995:37880996-c.2324_2325ins12-p.A775_G776insYVMA-COSM20959,CCCTTGTCCCCAGGAAGCATACGTGATGGC,ATACGTGATGGC,TGGTGTGGGCTCCCCATATGTCTCCCGCCT, chr17\n" + 62 | "ERBB2-pos-17-37880996:37880997-c.2325_2326ins12-p.A775_G776insYVMA-COSM12558,CCTTGTCCCCAGGAAGCATACGTGATGGCT,TACGTGATGGCT,GGTGTGGGCTCCCCATATGTCTCCCGCCTT, chr17\n" + 63 | "PDGFRA-pos-4-55152093-c.2525A>T-p.D842V-COSM736,TGAAGATCTGTGACTTTGGCCTGGCCAGAG,T,CATCATGCATGATTCGAACTATGTGTCGAA, chr4\n" + 64 | "PDGFRA-pos-4-55141052:55141066-c.1698_1712del15-p.S566_E571>R-COSM12418,AATTCGCTGGAGGGTCATTGAATCAATCAG, ,ATATATTTATGTGGACCCGATGCAGCTGCC, chr4\n" + 65 | "KIT-pos-4-55599321-c.2447A>T-p.D816V-COSM1314,CAAAGATTTGTGATTTTGGTCTAGCCAGAG,T,CATCAAGAATGATTCTAATTATGTGGTTAA, chr4\n" + 66 | "KIT-pos-4-55592185:55592186-c.1509_1510insGCCTAT-p.Y503_F504insAY-COSM1326,TACAACGATGTGGGCAAGACTTCTGCCTAT,GCCTAT,TTTAACTTTGCATTTAAAGGTAACAACAAA, chr4\n" + 67 | "KRAS-neg-12-25380275-c.183A>C-p.Q61H-COSM554,GTACTGGTCCCTCATTGCACTGTACTCCTC,G,TGACCTGCTGTGTCGAGAATATCCAAGAGA, chr12"; 68 | 69 | #endif 70 | -------------------------------------------------------------------------------- /src/common.cpp: -------------------------------------------------------------------------------- 1 | #include "common.h" -------------------------------------------------------------------------------- /src/common.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_H 2 | #define COMMON_H 3 | 4 | #define MUTSCAN_VER "1.14.1" 5 | 6 | typedef long int64; 7 | typedef unsigned long uint64; 8 | 9 | typedef int int32; 10 | typedef unsigned int uint32; 11 | 12 | typedef short int16; 13 | typedef unsigned short uint16; 14 | 15 | typedef char int8; 16 | typedef unsigned char uint8; 17 | 18 | #pragma pack(2) 19 | 20 | 21 | #pragma pack() 22 | 23 | // the limit of the queue to store the packs 24 | // error may happen if it generates more packs than this number 25 | static const int PACK_NUM_LIMIT = 5000000; 26 | 27 | // how many reads one pack has 28 | static const int PACK_SIZE = 1000; 29 | 30 | // if one pack is produced, but not consumed, it will be kept in the memory 31 | // this number limit the number of in memory packs 32 | // if the number of in memory packs is full, the producer thread should sleep 33 | static const int PACK_IN_MEM_LIMIT = 100; 34 | 35 | // if read number is more than this, warn it 36 | static const int WARN_STANDALONE_READ_LIMIT = 10000; 37 | 38 | 39 | #endif /* COMMON_H */ 40 | -------------------------------------------------------------------------------- /src/editdistance.cpp: -------------------------------------------------------------------------------- 1 | // ------- 2 | // License 3 | // ------- 4 | // 5 | // It is released under the MIT license. 6 | // 7 | // Copyright (c) 2013 Hiroyuki Tanaka 8 | // 9 | // Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 10 | // 11 | // The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 14 | 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "editdistance.h" 27 | 28 | using namespace std; 29 | 30 | template 31 | unsigned int edit_distance_bpv(T &cmap, char const *vec, size_t const &vecsize, unsigned int const &tmax, unsigned int const &tlen) { 32 | int D = tmax * 64 + tlen; 33 | TVALUE D0, HP, HN, VP, VN; 34 | uint64_t top = (1L << (tlen - 1)); // 末尾のvectorに適用 35 | uint64_t lmb = (1L << 63); 36 | 37 | for(size_t i = 0; i <= tmax; ++i) { 38 | VP[i] = 0; 39 | VN[i] = 0; 40 | } 41 | for(size_t i = 0; i < tmax; ++i) VP[i] = ~0; 42 | for(size_t i = 0; i < tlen; ++i) VP[tmax] |= (1L << i); 43 | for(size_t i = 0; i < vecsize; ++i) { 44 | TVALUE &PM = cmap[vec[i]]; 45 | for(unsigned int r = 0; r <= tmax; ++r) { 46 | uint64_t X = PM[r]; 47 | if(r > 0 && (HN[r - 1] & lmb)) X |= 1L; 48 | D0[r] = (((X & VP[r]) + VP[r]) ^ VP[r]) | X | VN[r]; 49 | HP[r] = VN[r] | ~(D0[r] | VP[r]); 50 | HN[r] = D0[r] & VP[r]; 51 | X = (HP[r] << 1L); 52 | if(r == 0 || HP[r - 1] & lmb) X |= 1L; 53 | VP[r] = (HN[r] << 1L) | ~(D0[r] | X); 54 | if(r > 0 && (HN[r - 1] & lmb)) VP[r] |= 1L; 55 | VN[r] = D0[r] & X; 56 | } 57 | if(HP[tmax] & top) ++D; 58 | else if(HN[tmax] & top) --D; 59 | } 60 | return D; 61 | } 62 | 63 | 64 | /// c.f. http://handasse.blogspot.com/2009/04/c_29.html 65 | template 66 | unsigned int edit_distance_dp(T const *str1, size_t const size1, T const *str2, size_t const size2) { 67 | vector< vector > d(size1 + 1, vector(size2 + 1)); 68 | for (size_t i = 0; i < size1 + 1; i++) d[i][0] = i; 69 | for (size_t i = 0; i < size2 + 1; i++) d[0][i] = i; 70 | for (size_t i = 1; i < size1 + 1; i++) { 71 | for (size_t j = 1; j < size2 + 1; j++) { 72 | d[i][j] = min(min(d[i-1][j], d[i][j-1]) + 1, d[i-1][j-1] + (str1[i-1] == str2[j-1] ? 0 : 1)); 73 | } 74 | } 75 | return d[size1][size2]; 76 | } 77 | 78 | template 79 | struct varr { 80 | uint64_t arr_[N]; 81 | uint64_t & operator[](size_t const &i) { 82 | return arr_[i]; 83 | } 84 | }; 85 | 86 | 87 | template 88 | unsigned int edit_distance_map_(char const *a, size_t const asize, char const *b, size_t const bsize) { 89 | typedef map > cmap_v; 90 | cmap_v cmap; 91 | unsigned int tmax = (asize - 1) >> 6; 92 | unsigned int tlen = asize - tmax * 64; 93 | for(size_t i = 0; i < tmax; ++i) { 94 | for(size_t j = 0; j < 64; ++j) cmap[a[i * 64 + j]][i] |= (1L << j); 95 | } 96 | for(size_t i = 0; i < tlen; ++i) cmap[a[tmax * 64 + i]][tmax] |= (1L << i); 97 | return edit_distance_bpv(cmap, b, bsize, tmax, tlen); 98 | } 99 | 100 | unsigned int edit_distance(const char *a, const unsigned int asize, const char *b, const unsigned int bsize) { 101 | if(asize == 0) return bsize; 102 | else if(bsize == 0) return asize; 103 | char const *ap, *bp; 104 | unsigned int const *asizep, *bsizep; 105 | if(asize < bsize) ap = b, bp = a, asizep = &bsize, bsizep = &asize; 106 | else ap = a, bp = b, asizep = &asize, bsizep = &bsize; 107 | size_t vsize = ((*asizep - 1) >> 6) + 1; 108 | if(vsize > 10) { 109 | char const *_ = ap; 110 | unsigned int const *__ = asizep; 111 | ap = bp, bp = _, asizep = bsizep, bsizep = __; 112 | vsize = ((*asizep - 1) >> 6) + 1; 113 | } 114 | 115 | if(vsize == 1) return edit_distance_map_<1>(ap, *asizep, bp, *bsizep); 116 | else if(vsize == 2) return edit_distance_map_<2>(ap, *asizep, bp, *bsizep); 117 | else if(vsize == 3) return edit_distance_map_<3>(ap, *asizep, bp, *bsizep); 118 | else if(vsize == 4) return edit_distance_map_<4>(ap, *asizep, bp, *bsizep); 119 | else if(vsize == 5) return edit_distance_map_<5>(ap, *asizep, bp, *bsizep); 120 | else if(vsize == 6) return edit_distance_map_<6>(ap, *asizep, bp, *bsizep); 121 | else if(vsize == 7) return edit_distance_map_<7>(ap, *asizep, bp, *bsizep); 122 | else if(vsize == 8) return edit_distance_map_<8>(ap, *asizep, bp, *bsizep); 123 | else if(vsize == 9) return edit_distance_map_<9>(ap, *asizep, bp, *bsizep); 124 | else if(vsize == 10) return edit_distance_map_<10>(ap, *asizep, bp, *bsizep); 125 | return edit_distance_dp(ap, *asizep, bp, *bsizep); 126 | } 127 | 128 | unsigned int edit_distance(string a, string b) { 129 | return edit_distance(a.c_str(), a.length(), b.c_str(), b.length()); 130 | } 131 | 132 | unsigned int hamming_distance(const char *a, const unsigned int asize, const char *b, const unsigned int bsize) { 133 | int dis = 0; 134 | for(unsigned int i=0; i 5 | #include 6 | 7 | // struct PatternMap { 8 | // uint64_t p_[256][4]; 9 | // unsigned int tmax_; 10 | // unsigned int tlen_; 11 | // }; 12 | 13 | using namespace std; 14 | 15 | unsigned int edit_distance(const char *a, const unsigned int asize, const char *b, const unsigned int bsize); 16 | // void create_patternmap(struct PatternMap *pm, const int64_t *a, const unsigned int size); 17 | // unsigned int edit_distance_by_patternmap(struct PatternMap *mp, const int64_t *b, const unsigned int size); 18 | 19 | unsigned int edit_distance(string a, string b); 20 | 21 | unsigned int hamming_distance(const char *a, const unsigned int asize, const char *b, const unsigned int bsize); 22 | 23 | bool editdistance_test(); 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/fastareader.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "fastareader.h" 3 | #include "util.h" 4 | #include 5 | 6 | FastaReader::FastaReader(string faFile, bool forceUpperCase) 7 | { 8 | // Set locale and disable stdio synchronization to improve iostream performance 9 | // http://www.drdobbs.com/the-standard-librarian-iostreams-and-std/184401305 10 | // http://stackoverflow.com/questions/5166263/how-to-get-iostream-to-perform-better 11 | setlocale(LC_ALL,"C"); 12 | ios_base::sync_with_stdio(false); 13 | 14 | mFastaFile = faFile; 15 | mForceUpperCase = forceUpperCase; 16 | if (is_directory(mFastaFile)) { 17 | string error_msg = "There is a problem with the provided fasta file: \'"; 18 | error_msg.append(mFastaFile); 19 | error_msg.append("\' is a directory NOT a file...\n"); 20 | throw invalid_argument(error_msg); 21 | } 22 | mFastaFileStream.open( mFastaFile.c_str(),ios::in); 23 | // verify that the file can be read 24 | if (!mFastaFileStream.is_open()) { 25 | string msg = "There is a problem with the provided fasta file: could NOT read "; 26 | msg.append(mFastaFile.c_str()); 27 | msg.append("...\n"); 28 | throw invalid_argument(msg); 29 | } 30 | 31 | char c; 32 | // seek to first contig 33 | while (mFastaFileStream.get(c) && c != '>') { 34 | if (mFastaFileStream.eof()) { 35 | break; 36 | } 37 | } 38 | } 39 | 40 | FastaReader::~FastaReader() 41 | { 42 | if (mFastaFileStream.is_open()) { 43 | mFastaFileStream.close(); 44 | } 45 | } 46 | 47 | void FastaReader::readNext() 48 | { 49 | mCurrentID = ""; 50 | mCurrentDescription = ""; 51 | mCurrentSequence = ""; 52 | bool foundHeader = false; 53 | 54 | char c; 55 | stringstream ssSeq; 56 | stringstream ssHeader; 57 | while(true){ 58 | mFastaFileStream.get(c); 59 | if(c == '>' || mFastaFileStream.eof()) 60 | break; 61 | else { 62 | if (foundHeader){ 63 | if(mForceUpperCase && c>='a' && c<='z') { 64 | c -= ('a' - 'A'); 65 | } 66 | ssSeq << c; 67 | } 68 | else 69 | ssHeader << c; 70 | } 71 | 72 | string line = ""; 73 | getline(mFastaFileStream,line,'\n'); 74 | 75 | 76 | if(foundHeader == false) { 77 | ssHeader << line; 78 | foundHeader = true; 79 | } 80 | else { 81 | str_keep_valid_sequence(line, mForceUpperCase); 82 | ssSeq << line; 83 | } 84 | } 85 | mCurrentSequence = ssSeq.str(); 86 | string header = ssHeader.str(); 87 | 88 | int space = header.find(" "); 89 | mCurrentID = header.substr(0, space); 90 | } 91 | 92 | bool FastaReader::hasNext() { 93 | return !mFastaFileStream.eof(); 94 | } 95 | 96 | void FastaReader::readAll() { 97 | while(!mFastaFileStream.eof()){ 98 | readNext(); 99 | mAllContigs[mCurrentID] = mCurrentSequence; 100 | } 101 | } 102 | 103 | bool FastaReader::test(){ 104 | FastaReader reader("testdata/tinyref.fa"); 105 | reader.readAll(); 106 | 107 | string contig1 = "GATCACAGGTCTATCACCCTATTAATTGGTATTTTCGTCTGGGGGGTGTGGAGCCGGAGCACCCTATGTCGCAGT"; 108 | string contig2 = "GTCTGCACAGCCGCTTTCCACACAGAACCCCCCCCTCCCCCCGCTTCTGGCAAACCCCAAAAACAAAGAACCCTA"; 109 | 110 | if(reader.mAllContigs.count("contig1") == 0 || reader.mAllContigs.count("contig2") == 0 ) 111 | return false; 112 | 113 | if(reader.mAllContigs["contig1"] != contig1 || reader.mAllContigs["contig2"] != contig2 ) 114 | return false; 115 | 116 | return true; 117 | 118 | } 119 | 120 | 121 | 122 | -------------------------------------------------------------------------------- /src/fastareader.h: -------------------------------------------------------------------------------- 1 | #ifndef FASTA_READER_H 2 | #define FASTA_READER_H 3 | 4 | // includes 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | using namespace std; 15 | 16 | class FastaReader 17 | { 18 | public: 19 | FastaReader(string fastaFile, bool forceUpperCase = true); 20 | ~FastaReader(); 21 | bool hasNext(); 22 | void readNext(); 23 | void readAll(); 24 | 25 | inline string currentID() 26 | { 27 | return mCurrentID; 28 | } 29 | 30 | inline string currentDescription() 31 | { 32 | return mCurrentDescription; 33 | } 34 | 35 | inline string currentSequence() 36 | { 37 | return mCurrentSequence; 38 | } 39 | 40 | inline map& contigs() { 41 | return mAllContigs; 42 | } 43 | 44 | static bool test(); 45 | 46 | 47 | public: 48 | string mCurrentSequence; 49 | string mCurrentID ; 50 | string mCurrentDescription; 51 | map mAllContigs; 52 | 53 | private: 54 | bool readLine(); 55 | bool endOfLine(char c); 56 | void setFastaSequenceIdDescription(); 57 | 58 | private: 59 | string mFastaFile; 60 | ifstream mFastaFileStream; 61 | bool mForceUpperCase; 62 | }; 63 | 64 | 65 | #endif 66 | 67 | -------------------------------------------------------------------------------- /src/fastqreader.cpp: -------------------------------------------------------------------------------- 1 | #include "fastqreader.h" 2 | #include "util.h" 3 | #include 4 | 5 | FastqReader::FastqReader(string filename, bool hasQuality){ 6 | mFilename = filename; 7 | mZipFile = NULL; 8 | mZipped = false; 9 | mHasQuality = hasQuality; 10 | init(); 11 | } 12 | 13 | FastqReader::~FastqReader(){ 14 | close(); 15 | } 16 | 17 | void FastqReader::init(){ 18 | if (isZipFastq(mFilename)){ 19 | mZipFile = gzopen(mFilename.c_str(), "r"); 20 | mZipped = true; 21 | } 22 | else if (isFastq(mFilename)){ 23 | mFile.open(mFilename.c_str(), ifstream::in); 24 | mZipped = false; 25 | } else { 26 | cerr << "ERROR: the input file should be fastq (.fq, .fastq) or gzipped fastq (.fq.gz, .fastq.gz)" << endl; 27 | exit(-1); 28 | } 29 | } 30 | 31 | bool FastqReader::getLine(char* line, int maxLine){ 32 | bool status = true; 33 | if(mZipped) 34 | status = gzgets(mZipFile, line, maxLine); 35 | else { 36 | mFile.getline(line, maxLine); 37 | status = !mFile.fail(); 38 | } 39 | 40 | // trim \n, \r or \r\n in the tail 41 | int readed = strlen(line); 42 | if(readed >=2 ){ 43 | if(line[readed-1] == '\n' || line[readed-1] == '\r'){ 44 | line[readed-1] = '\0'; 45 | if(line[readed-2] == '\r') 46 | line[readed-2] = '\0'; 47 | } 48 | } 49 | 50 | return status; 51 | } 52 | 53 | Read* FastqReader::read(){ 54 | const int maxLine = 1000; 55 | char line[maxLine]; 56 | if (mZipped){ 57 | if (mZipFile == NULL) 58 | return NULL; 59 | } 60 | 61 | if(!getLine(line, maxLine))return NULL; 62 | string name(line); 63 | 64 | if (!getLine(line, maxLine))return NULL; 65 | string sequence(line); 66 | 67 | if (!getLine(line, maxLine))return NULL; 68 | string strand(line); 69 | 70 | if (mHasQuality){ 71 | if (!getLine(line, maxLine))return NULL; 72 | string quality(line); 73 | Read* read = new Read(name, sequence, strand, quality); 74 | return read; 75 | } 76 | else { 77 | Read* read = new Read(name, sequence, strand); 78 | return read; 79 | } 80 | 81 | return NULL; 82 | } 83 | 84 | void FastqReader::close(){ 85 | if (mZipped){ 86 | if (mZipFile){ 87 | gzclose(mZipFile); 88 | mZipFile = NULL; 89 | } 90 | } 91 | else { 92 | if (mFile.is_open()){ 93 | mFile.close(); 94 | } 95 | } 96 | } 97 | 98 | bool FastqReader::isZipFastq(string filename) { 99 | if (ends_with(filename, ".fastq.gz")) 100 | return true; 101 | else if (ends_with(filename, ".fq.gz")) 102 | return true; 103 | else if (ends_with(filename, ".fasta.gz")) 104 | return true; 105 | else if (ends_with(filename, ".fa.gz")) 106 | return true; 107 | else 108 | return false; 109 | } 110 | 111 | bool FastqReader::isFastq(string filename) { 112 | if (ends_with(filename, ".fastq")) 113 | return true; 114 | else if (ends_with(filename, ".fq")) 115 | return true; 116 | else if (ends_with(filename, ".fasta")) 117 | return true; 118 | else if (ends_with(filename, ".fa")) 119 | return true; 120 | else 121 | return false; 122 | } 123 | 124 | bool FastqReader::isZipped(){ 125 | return mZipped; 126 | } 127 | 128 | bool FastqReader::test(){ 129 | FastqReader reader1("testdata/R1.fq"); 130 | FastqReader reader2("testdata/R1.fq.gz"); 131 | Read* r1 = NULL; 132 | Read* r2 = NULL; 133 | while(true){ 134 | r1=reader1.read(); 135 | r2=reader2.read(); 136 | if(r1 == NULL || r2 == NULL) 137 | break; 138 | if(r1->mSeq.mStr != r2->mSeq.mStr){ 139 | return false; 140 | } 141 | delete r1; 142 | delete r2; 143 | } 144 | return true; 145 | } 146 | 147 | FastqReaderPair::FastqReaderPair(FastqReader* left, FastqReader* right){ 148 | mLeft = left; 149 | mRight = right; 150 | } 151 | 152 | FastqReaderPair::FastqReaderPair(string leftName, string rightName){ 153 | mLeft = new FastqReader(leftName); 154 | mRight = new FastqReader(rightName); 155 | } 156 | 157 | FastqReaderPair::~FastqReaderPair(){ 158 | if(mLeft){ 159 | delete mLeft; 160 | mLeft = NULL; 161 | } 162 | if(mRight){ 163 | delete mRight; 164 | mRight = NULL; 165 | } 166 | } 167 | 168 | ReadPair* FastqReaderPair::read(){ 169 | Read* l = mLeft->read(); 170 | Read* r = mRight->read(); 171 | if(!l || !r){ 172 | return NULL; 173 | } else { 174 | return new ReadPair(l, r); 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /src/fastqreader.h: -------------------------------------------------------------------------------- 1 | #ifndef FASTQ_READER_H 2 | #define FASTQ_READER_H 3 | 4 | #include 5 | #include 6 | #include "read.h" 7 | #include "zlib/zlib.h" 8 | #include "common.h" 9 | #include 10 | #include 11 | 12 | class FastqReader{ 13 | public: 14 | FastqReader(string filename, bool hasQuality = true); 15 | ~FastqReader(); 16 | bool isZipped(); 17 | 18 | //this function is not thread-safe 19 | //do not call read() of a same FastqReader object from different threads concurrently 20 | Read* read(); 21 | 22 | public: 23 | static bool isZipFastq(string filename); 24 | static bool isFastq(string filename); 25 | static bool test(); 26 | 27 | private: 28 | void init(); 29 | void close(); 30 | bool getLine(char* line, int maxLine); 31 | 32 | private: 33 | string mFilename; 34 | gzFile mZipFile; 35 | ifstream mFile; 36 | bool mZipped; 37 | bool mHasQuality; 38 | 39 | }; 40 | 41 | class FastqReaderPair{ 42 | public: 43 | FastqReaderPair(FastqReader* left, FastqReader* right); 44 | FastqReaderPair(string leftName, string rightName); 45 | ~FastqReaderPair(); 46 | ReadPair* read(); 47 | public: 48 | FastqReader* mLeft; 49 | FastqReader* mRight; 50 | }; 51 | 52 | #endif -------------------------------------------------------------------------------- /src/globalsettings.cpp: -------------------------------------------------------------------------------- 1 | #include "globalsettings.h" 2 | 3 | bool GlobalSettings::markedOnlyForVCF = false; 4 | bool GlobalSettings::legacyMode = false; 5 | bool GlobalSettings::standaloneHtml = false; 6 | int GlobalSettings::minReadSupport = 2; 7 | bool GlobalSettings::processingVCF = false; 8 | bool GlobalSettings::verbose = false; 9 | bool GlobalSettings::simplifiedMode = false; 10 | bool GlobalSettings::simplifiedModeToEvaluate = false; -------------------------------------------------------------------------------- /src/globalsettings.h: -------------------------------------------------------------------------------- 1 | #ifndef GLOBALSETTINGS_H 2 | #define GLOBALSETTINGS_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std; 9 | 10 | class GlobalSettings{ 11 | public: 12 | GlobalSettings(); 13 | 14 | public: 15 | inline static void setMarkedOnlyForVCF(bool flag){ 16 | markedOnlyForVCF = flag; 17 | } 18 | inline static void setLegacyMode(bool flag){ 19 | legacyMode = flag; 20 | } 21 | inline static void setStandaloneHtml(bool flag){ 22 | standaloneHtml = flag; 23 | } 24 | inline static void setMinReadSupport(int val){ 25 | minReadSupport = val; 26 | } 27 | inline static void setProcessingVCF(bool flag){ 28 | processingVCF = flag; 29 | } 30 | inline static void setVerbose(bool flag){ 31 | verbose = flag; 32 | } 33 | inline static void setSimplifiedMode(bool flag){ 34 | simplifiedMode = flag; 35 | } 36 | inline static void setSimplifiedModeToEvaluate(bool flag){ 37 | simplifiedModeToEvaluate = flag; 38 | } 39 | 40 | public: 41 | static bool markedOnlyForVCF; 42 | static bool legacyMode; 43 | static bool standaloneHtml; 44 | static int minReadSupport; 45 | static bool processingVCF; 46 | static bool verbose; 47 | static bool simplifiedMode; 48 | static bool simplifiedModeToEvaluate; 49 | }; 50 | 51 | 52 | #endif -------------------------------------------------------------------------------- /src/htmlreporter.cpp: -------------------------------------------------------------------------------- 1 | #include "htmlreporter.h" 2 | #include "common.h" 3 | #include 4 | #include "globalsettings.h" 5 | 6 | HtmlReporter::HtmlReporter(string filename, vector& mutationList, vector *mutationMatches, bool inFrame){ 7 | mMutationList = mutationList; 8 | mMutationMatches = mutationMatches; 9 | mFilename = filename; 10 | mFile.open(mFilename.c_str(), ifstream::out); 11 | mInFrame = inFrame; 12 | } 13 | 14 | HtmlReporter::~HtmlReporter(){ 15 | mFile.close(); 16 | } 17 | 18 | void HtmlReporter::run() { 19 | printHeader(); 20 | printHelper(); 21 | printMutations(); 22 | printMutationsJS(); 23 | printFooter(); 24 | } 25 | 26 | void HtmlReporter::printHelper() { 27 | if(mInFrame) 28 | mFile << ""; 29 | else 30 | mFile << ""; 31 | mFile << ""; 32 | mFile << "

Helpful tips:

    "; 33 | mFile << "
  • Mutation point is in the center of the table.
  • "; 34 | mFile << "
  • Base color indicates quality: extremely high (Q40+), high (Q30~Q39) , moderate (Q20~Q29), low (Q15~Q19), extremely low (0~Q14).
  • "; 35 | mFile << "
  • Move mouse over the base, it will show the quality value.
  • "; 36 | if(!GlobalSettings::simplifiedMode) 37 | mFile << "
  • Click on any row, the original read/pair will be displayed.
  • "; 38 | mFile << "
  • In first column, d means the edit distance of match, and --> means forward, <-- means reverse.
  • "; 39 | mFile << "
  • For pair-end sequencing, MutScan tries to merge each pair, and the overlapped bases will be assigned higher qualities.
  • "; 40 | mFile << "
"; 41 | } 42 | 43 | void HtmlReporter::printMutations() { 44 | // calculate the found mutation 45 | int found = 0; 46 | for(size_t i=0;i matches = mMutationMatches[i]; 48 | if((ssize_t)matches.size()>=GlobalSettings::minReadSupport){ 49 | found++; 50 | } 51 | } 52 | // print menu 53 | int id = 0; 54 | if(!mInFrame){ 55 | mFile<<""; 68 | } 69 | id=0; 70 | for(size_t i=0;i matches = mMutationMatches[i]; 72 | if((ssize_t)matches.size()>=GlobalSettings::minReadSupport){ 73 | id++; 74 | printMutation(id, mMutationList[i], matches); 75 | } 76 | } 77 | } 78 | 79 | void HtmlReporter::printMutationsJS() { 80 | mFile << "\n"; 125 | } 126 | 127 | void HtmlReporter::printMutation(int id, Mutation& mutation, vector& matches){ 128 | mFile << "\n
"; 129 | mFile << ""; 134 | mFile << ""; 135 | mFile << ""; 136 | mFile << ""; 137 | mFile << ""; 138 | mFile << ""; 139 | mFile << ""; 140 | mFile << ""; 141 | mFile << ""; 142 | mFile << ""; 143 | for(size_t m=0; m"; 147 | else 148 | mFile << ""; 149 | mFile << ""; 160 | // print a hidden row containing the full read 161 | if(!GlobalSettings::simplifiedMode){ 162 | mFile << ""; 163 | mFile << ""; 166 | mFile << ""; 167 | } 168 | } 169 | mFile << "
" << "ID_Distance_Strand" << "" << "" << "" << mutation.mLeft << "" << mutation.getCenterHtml() << "" << mutation.mRight << "" << "" << "
"; 150 | // for display alignment 151 | if(m+1<10) 152 | mFile<<"0"; 153 | if(m+1<100) 154 | mFile<<"0"; 155 | if(m+1<1000) 156 | mFile<<"0"; 157 | mFile << m+1 << ", "; 158 | matches[m]->printHtmlTD(mFile, mutation.mLeft.length(), mutation.mCenter.length(), mutation.mRight.length(), id-1, m); 159 | mFile << "
"; 170 | } 171 | 172 | void HtmlReporter::printHeader(){ 173 | mFile << ""; 174 | mFile << "MutScan report"; 175 | printJS(); 176 | printCSS(); 177 | mFile << ""; 178 | mFile << "
"; 179 | } 180 | 181 | void HtmlReporter::printCSS(){ 182 | if(mInFrame){ 183 | mFile << ""; 184 | } else { 185 | mFile << ""; 207 | } 208 | } 209 | 210 | void HtmlReporter::printJS(){ 211 | if(mInFrame){ 212 | mFile << ""; 213 | } else { 214 | mFile << "\n"; 238 | } 239 | } 240 | 241 | const std::string getCurrentSystemTime() 242 | { 243 | auto tt = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); 244 | struct tm* ptm = localtime(&tt); 245 | char date[60] = {0}; 246 | sprintf(date, "%d-%02d-%02d %02d:%02d:%02d", 247 | (int)ptm->tm_year + 1900,(int)ptm->tm_mon + 1,(int)ptm->tm_mday, 248 | (int)ptm->tm_hour,(int)ptm->tm_min,(int)ptm->tm_sec); 249 | return std::string(date); 250 | } 251 | 252 | extern string command; 253 | 254 | void HtmlReporter::printFooter(){ 255 | if(!mInFrame){ 256 | mFile << "\n"; 260 | } 261 | mFile << "
"; 262 | } 263 | 264 | void HtmlReporter::printScanTargets(){ 265 | mFile << "\n
"; 266 | mFile << "

scanned " << mMutationList.size() << " mutation spots...

"; 267 | mFile << "
"; 274 | } -------------------------------------------------------------------------------- /src/htmlreporter.h: -------------------------------------------------------------------------------- 1 | #ifndef HTML_REPORTER_H 2 | #define HTML_REPORTER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "read.h" 8 | #include "mutation.h" 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | 14 | class HtmlReporter{ 15 | public: 16 | HtmlReporter(string filename, vector& mutationList, vector *mutationMatches, bool inFrame = false); 17 | ~HtmlReporter(); 18 | void run(); 19 | 20 | private: 21 | void printHeader(); 22 | void printCSS(); 23 | void printJS(); 24 | void printFooter(); 25 | void printHelper(); 26 | void printMutations(); 27 | void printMutationsJS(); 28 | void printMutation(int id, Mutation& mutation, vector& matches); 29 | void printScanTargets(); 30 | 31 | private: 32 | string mFilename; 33 | vector mMutationList; 34 | vector* mMutationMatches; 35 | ofstream mFile; 36 | bool mInFrame; 37 | }; 38 | 39 | #endif -------------------------------------------------------------------------------- /src/jsonreporter.cpp: -------------------------------------------------------------------------------- 1 | #include "jsonreporter.h" 2 | #include "common.h" 3 | #include 4 | #include "globalsettings.h" 5 | 6 | JsonReporter::JsonReporter(string filename, vector& mutationList, vector *mutationMatches){ 7 | mMutationList = mutationList; 8 | mMutationMatches = mutationMatches; 9 | mFilename = filename; 10 | mFile.open(mFilename.c_str(), ifstream::out); 11 | } 12 | 13 | JsonReporter::~JsonReporter(){ 14 | mFile.close(); 15 | } 16 | 17 | extern string getCurrentSystemTime(); 18 | extern string command; 19 | 20 | void JsonReporter::run() { 21 | mFile << "{" << endl; 22 | mFile << "\t\"command\":\"" << command << "\"," << endl; 23 | mFile << "\t\"version\":\"" << MUTSCAN_VER << "\"," << endl; 24 | mFile << "\t\"time\":\"" << getCurrentSystemTime() << "\"," << endl; 25 | mFile << "\t\"mutations\":{"; 26 | 27 | bool isFirstMut = true; 28 | for(size_t i=0;i matches = mMutationMatches[i]; 31 | if((ssize_t)matches.size()>=GlobalSettings::minReadSupport){ 32 | if(isFirstMut) { 33 | mFile << endl; 34 | isFirstMut = false; 35 | } 36 | else 37 | mFile << "," << endl; 38 | 39 | mFile << "\t\t\"" << mut.mName << "\":{" << endl; 40 | mFile << "\t\t\t\"" << "chr" << "\":" << "\"" << mut.mChr << "\"," << endl; 41 | mFile << "\t\t\t\"" << "ref" << "\":[" << "\"" << mut.mLeft << "\"," << "\"" << mut.mCenter << "\"," << "\"" << mut.mRight << "\"]," << endl; 42 | mFile << "\t\t\t\"" << "reads" << "\":[" << endl; 43 | for(size_t m=0; mprintBreaksToJson(mFile, mut.mLeft.length(), mut.mCenter.length(), mut.mRight.length()); 47 | mFile << ", " << endl; 48 | matches[m]->printReadToJson(mFile, "\t\t\t\t\t"); 49 | mFile << "\t\t\t\t}"; 50 | if(m!=matches.size()-1) 51 | mFile << ","; 52 | mFile << endl; 53 | } 54 | mFile << "\t\t\t]" << endl; 55 | mFile << "\t\t}"; 56 | } 57 | 58 | } 59 | mFile << endl; 60 | mFile << "\t}" << endl; 61 | mFile << "}" << endl; 62 | } -------------------------------------------------------------------------------- /src/jsonreporter.h: -------------------------------------------------------------------------------- 1 | #ifndef JSON_REPORTER_H 2 | #define JSON_REPORTER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "read.h" 8 | #include "mutation.h" 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | 14 | class JsonReporter{ 15 | public: 16 | JsonReporter(string filename, vector& mutationList, vector *mutationMatches); 17 | ~JsonReporter(); 18 | void run(); 19 | 20 | private: 21 | void printHeader(); 22 | void printCSS(); 23 | void printJS(); 24 | void printFooter(); 25 | void printHelper(); 26 | void printMutations(); 27 | void printMutationsJS(); 28 | void printMutation(int id, Mutation& mutation, vector& matches); 29 | void printScanTargets(); 30 | 31 | private: 32 | string mFilename; 33 | vector mMutationList; 34 | vector* mMutationMatches; 35 | ofstream mFile; 36 | }; 37 | 38 | #endif -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "fastqreader.h" 3 | #include "unittest.h" 4 | #include "mutscan.h" 5 | #include 6 | #include "cmdline.h" 7 | #include 8 | #include "util.h" 9 | #include "globalsettings.h" 10 | 11 | string command; 12 | 13 | int main(int argc, char* argv[]){ 14 | if (argc == 2 && strcmp(argv[1], "test")==0){ 15 | UnitTest tester; 16 | tester.run(); 17 | return 0; 18 | } 19 | cmdline::parser cmd; 20 | cmd.add("read1", '1', "read1 file name", true, ""); 21 | cmd.add("read2", '2', "read2 file name", false, ""); 22 | cmd.add("mutation", 'm', "mutation file name, can be a CSV format or a VCF format", false, ""); 23 | cmd.add("ref", 'r', "reference fasta file name (only needed when mutation file is a VCF)", false, ""); 24 | cmd.add("html", 'h', "filename of html report, default is mutscan.html in work directory", false, "mutscan.html"); 25 | cmd.add("json", 'j', "filename of JSON report, default is no JSON report", false, ""); 26 | cmd.add("thread", 't', "worker thread number, default is 4", false, 4); 27 | cmd.add("support", 'S', "min read support for reporting a mutation, default is 2", false, 2); 28 | cmd.add("mark", 'k', "when mutation file is a vcf file, --mark means only process the records with FILTER column is M"); 29 | cmd.add("legacy", 'l', "use legacy mode, usually much slower but may be able to find a little more reads in certain case"); 30 | cmd.add("standalone", 's', "output standalone HTML report with single file. Don't use this option when scanning too many target mutations (i.e. >1000 mutations)"); 31 | cmd.add("simplified", 0, "simplified mode uses less RAM but reports less information. This option can be auto/on/off, by default it's auto, which means automatically enabled when processing large FASTQ with large VCF.", false , "auto"); 32 | cmd.add("verbose", 'v', "enable verbose mode, more information will be output in STDERR"); 33 | cmd.parse_check(argc, argv); 34 | string r1file = cmd.get("read1"); 35 | string r2file = cmd.get("read2"); 36 | string mutationFile = cmd.get("mutation"); 37 | string html = cmd.get("html"); 38 | string json = cmd.get("json"); 39 | string refFile = cmd.get("ref"); 40 | int threadNum = cmd.get("thread"); 41 | 42 | if(ends_with(refFile, ".gz") || ends_with(refFile, ".gz")) { 43 | cerr << "ERROR: reference fasta file should not be compressed.\nplease unzip "<("support"); 60 | GlobalSettings::setMinReadSupport(support); 61 | 62 | stringstream ss; 63 | for(int i=0;i, because your mutation file (-m) is a VCF"<("simplified"); 85 | str2lower(simplified); 86 | if(simplified == "on" || simplified == "yes") { 87 | GlobalSettings::setSimplifiedMode(true); 88 | if(GlobalSettings::verbose) { 89 | cerr << "You have enabled simplified mode to reduce memory usage!" << endl; 90 | } 91 | } 92 | else if(simplified == "off" || simplified == "no") 93 | GlobalSettings::setSimplifiedMode(false); 94 | else if(simplified == "auto") 95 | GlobalSettings::setSimplifiedModeToEvaluate(true); 96 | else { 97 | cerr << "the option --simplified can only be auto/on/off"< 3 | #include "globalsettings.h" 4 | #include 5 | 6 | Match::Match(Read* r, int pos, int distance, bool reversed){ 7 | mRead = r; 8 | mReadLen = r->length(); 9 | mSequence = NULL; 10 | mDistance = distance; 11 | mPos = pos; 12 | mReversed = reversed; 13 | if(GlobalSettings::simplifiedMode) 14 | mOriginalReads = NULL; 15 | else 16 | mOriginalReads = new vector(); 17 | } 18 | 19 | Match::Match(char* seq, int readLen, char meanQual, int pos, int distance, bool reversed){ 20 | mRead = NULL; 21 | mSequence = seq; 22 | mReadLen = readLen; 23 | mMeanQual = meanQual; 24 | mDistance = distance; 25 | mPos = pos; 26 | mReversed = reversed; 27 | if(GlobalSettings::simplifiedMode) 28 | mOriginalReads = NULL; 29 | else 30 | mOriginalReads = new vector(); 31 | } 32 | 33 | Match::~Match(){ 34 | // we don't delete mRead or mSequence here since they are shared by different objects 35 | // and will be deleted in other places 36 | if(mOriginalReads) { 37 | for(size_t i=0;isize();i++){ 38 | delete (*mOriginalReads)[i]; 39 | (*mOriginalReads)[i] = NULL; 40 | } 41 | delete mOriginalReads; 42 | mOriginalReads = NULL; 43 | } 44 | } 45 | 46 | int Match::readlength() const { 47 | return mReadLen; 48 | } 49 | 50 | void Match::addOriginalRead(Read* r){ 51 | if(!mOriginalReads) 52 | return; 53 | mOriginalReads->push_back(new Read(*r)); 54 | } 55 | 56 | void Match::addOriginalPair(ReadPair* pair){ 57 | if(!mOriginalReads) 58 | return; 59 | mOriginalReads->push_back(new Read(*pair->mLeft)); 60 | mOriginalReads->push_back(new Read(*pair->mRight)); 61 | } 62 | 63 | void Match::print(int leftlen, int centerlen, int rightlen){ 64 | if(GlobalSettings::simplifiedMode) 65 | mRead = new Read(mSequence, mReadLen, mMeanQual); 66 | cout<<"pos: "< breaks; 73 | breaks.push_back(max(mPos-leftlen, 0)); 74 | breaks.push_back( mPos ); 75 | breaks.push_back( mPos+centerlen ); 76 | breaks.push_back( min(mPos+centerlen+rightlen, mRead->length())); 77 | mRead->printWithBreaks(breaks); 78 | if(GlobalSettings::simplifiedMode) { 79 | delete mRead; 80 | mRead = NULL; 81 | } 82 | } 83 | 84 | void Match::printHtmlTD(ofstream& file, int leftlen, int centerlen, int rightlen, int mutid, int matchid){ 85 | if(GlobalSettings::simplifiedMode) 86 | mRead = new Read(mSequence, mReadLen, mMeanQual); 87 | file<<""; 88 | file<<"d:" << (int)mDistance; 89 | if(mReversed) 90 | file<<", <--"; 91 | else 92 | file<<", -->"; 93 | 94 | file<<""; 95 | 96 | vector breaks; 97 | breaks.push_back(max(mPos-leftlen, 0)); 98 | breaks.push_back( mPos ); 99 | breaks.push_back( mPos+centerlen ); 100 | breaks.push_back( min(mPos+centerlen+rightlen, mRead->length())); 101 | mRead->printHtmlTDWithBreaks(file, breaks, mutid, matchid); 102 | if(GlobalSettings::simplifiedMode) { 103 | delete mRead; 104 | mRead = NULL; 105 | } 106 | } 107 | 108 | void Match::printBreaksToJson(ofstream& file, int leftlen, int centerlen, int rightlen){ 109 | vector breaks; 110 | breaks.push_back(max(mPos-leftlen, 0)); 111 | breaks.push_back( mPos ); 112 | breaks.push_back( mPos+centerlen ); 113 | breaks.push_back( min(mPos+centerlen+rightlen, mReadLen)); 114 | file << "["; 115 | for(size_t i=0; imSeq.mStr << "\"," << endl; 127 | file << pad << "\"qual\":" << "\"" << mRead->mQuality << "\"" << endl; 128 | if(GlobalSettings::simplifiedMode) { 129 | delete mRead; 130 | mRead = NULL; 131 | } 132 | } 133 | 134 | void Match::printJS(ofstream& file, int leftlen, int centerlen, int rightlen) { 135 | if(GlobalSettings::simplifiedMode) 136 | mRead = new Read(mSequence, mReadLen, mMeanQual); 137 | vector breaks; 138 | breaks.push_back(max(mPos-leftlen, 0)); 139 | breaks.push_back( mPos ); 140 | breaks.push_back( mPos+centerlen ); 141 | breaks.push_back( min(mPos+centerlen+rightlen, mRead->length())); 142 | mRead->printJSWithBreaks(file, breaks); 143 | if(GlobalSettings::simplifiedMode) { 144 | delete mRead; 145 | mRead = NULL; 146 | } 147 | } 148 | 149 | void Match::printReadsToFile(ofstream& file){ 150 | if(!mOriginalReads) 151 | return; 152 | for(size_t i=0;isize();i++){ 153 | (*mOriginalReads)[i]->printFile(file); 154 | } 155 | } 156 | 157 | void Match::setReversed(bool flag){ 158 | mReversed = flag; 159 | } 160 | 161 | int Match::countUnique(vector& matches) { 162 | if(matches.size()==0) 163 | return 0; 164 | int count = 1; 165 | Match* cur = matches[0]; 166 | for(size_t i=1;i *cur || *m < *cur) { 169 | cur = m; 170 | count++; 171 | } 172 | } 173 | return count; 174 | } 175 | -------------------------------------------------------------------------------- /src/match.h: -------------------------------------------------------------------------------- 1 | #ifndef MATCH_H 2 | #define MATCH_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "read.h" 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | using namespace std; 15 | 16 | class Match{ 17 | public: 18 | Match(Read* r, int pos, int distance, bool reversed = false); 19 | Match(char* seq, int readLen, char meanQual, int pos, int distance, bool reversed = false); 20 | ~Match(); 21 | void print(int leftlen, int centerlen, int rightlen); 22 | void printBreaksToJson(ofstream& file, int leftlen, int centerlen, int rightlen); 23 | void printReadToJson(ofstream& file, string pad); 24 | void printHtmlTD(ofstream& file, int leftlen, int centerlen, int rightlen, int mutid, int matchid); 25 | void printJS(ofstream& file, int leftlen, int centerlen, int rightlen); 26 | void printReadsToFile(ofstream& file); 27 | void setReversed(bool flag); 28 | void addOriginalRead(Read* r); 29 | void addOriginalPair(ReadPair* pair); 30 | template void addOriginalReadData(ReadData* r); 31 | int readlength() const; 32 | 33 | inline bool operator <(const Match& other) const 34 | { 35 | return mPos < other.mPos || (mPos == other.mPos && readlength() > other.readlength()); 36 | } 37 | inline bool operator >(const Match& other) const 38 | { 39 | return mPos > other.mPos || (mPos == other.mPos && readlength() < other.readlength()); 40 | } 41 | inline static bool less(const Match* m1, const Match* m2) 42 | { 43 | return *m1 < *m2; 44 | } 45 | inline static bool greater(const Match* m1, const Match* m2) 46 | { 47 | return *m1 > *m2; 48 | } 49 | 50 | static int countUnique(vector& matches); 51 | Read* getRead() {return mRead;} 52 | char* getSequence() {return mSequence;} 53 | 54 | private: 55 | Read* mRead; 56 | char* mSequence; 57 | vector* mOriginalReads; 58 | bool mReversed; 59 | // the start position of the mutation's center 60 | int mReadLen; 61 | int mPos; 62 | unsigned char mDistance; 63 | char mMeanQual; 64 | }; 65 | 66 | template <> 67 | inline void Match::addOriginalReadData (Read* r) { 68 | addOriginalRead(r); 69 | } 70 | 71 | template <> 72 | inline void Match::addOriginalReadData (ReadPair* pair) { 73 | addOriginalPair(pair); 74 | } 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /src/multihtmlreporter.cpp: -------------------------------------------------------------------------------- 1 | #include "multihtmlreporter.h" 2 | #include "htmlreporter.h" 3 | #include "common.h" 4 | #include 5 | #include "globalsettings.h" 6 | #include 7 | 8 | MultiHtmlReporter::MultiHtmlReporter(string filename, vector& mutationList, vector *mutationMatches){ 9 | mMutationList = mutationList; 10 | mMutationMatches = mutationMatches; 11 | mFilename = filename; 12 | mFolderPath = mFilename + ".files"; 13 | mFolderName = basename(mFilename) + ".files"; 14 | mkdir(mFolderPath.c_str(), 0777); 15 | stat(); 16 | } 17 | 18 | MultiHtmlReporter::~MultiHtmlReporter(){ 19 | } 20 | 21 | void MultiHtmlReporter::stat(){ 22 | mTotalCount = 0; 23 | for(size_t m=0; m matches = mMutationMatches[m]; 25 | if((ssize_t)matches.size()>=GlobalSettings::minReadSupport) { 26 | mTotalCount++; 27 | string chr = mMutationList[m].mChr; 28 | if(mChrCount.count(chr)==0) 29 | mChrCount[chr]=1; 30 | else 31 | mChrCount[chr]++; 32 | } 33 | } 34 | } 35 | 36 | void MultiHtmlReporter::run() { 37 | printCSS(); 38 | printJS(); 39 | printChrHtml(); 40 | printMutationHtml(); 41 | printMainFrame(); 42 | printIndexPage(); 43 | printMainPage(); 44 | } 45 | 46 | void MultiHtmlReporter::printMainFrame() { 47 | ofstream file; 48 | file.open(mFilename.c_str(), ifstream::out); 49 | file << "MutScan " << MUTSCAN_VER << " report " << "at " << getCurrentSystemTime() << " "; 54 | file.close(); 55 | } 56 | 57 | void MultiHtmlReporter::printMainPage() { 58 | ofstream file; 59 | string mainFile = mFolderPath + "/main.html"; 60 | file.open(mainFile.c_str(), ifstream::out); 61 | printHeader(file); 62 | printAllChromosomeLink(file); 63 | printFooter(file); 64 | file.close(); 65 | } 66 | 67 | void MultiHtmlReporter::printAllChromosomeLink(ofstream& file) { 68 | bool found = false; 69 | map::iterator iter; 70 | file << ""; 87 | } 88 | 89 | void MultiHtmlReporter::printChrLink(ofstream& file, string chr) { 90 | bool found = false; 91 | for(size_t m=0; m matches = mMutationMatches[m]; 93 | if((ssize_t)matches.size()>=GlobalSettings::minReadSupport) { 94 | found = true; 95 | if(chr == mMutationList[m].mChr) { 96 | string filename = chr + "/" + to_string(m) + ".html"; 97 | file << ""; 100 | } 101 | } 102 | } 103 | if(!found) { 104 | file << "MutScan didn't find any mutation"; 105 | } 106 | } 107 | 108 | void MultiHtmlReporter::printMutationHtml() { 109 | for(size_t m=0; m matches = mMutationMatches[m]; 111 | if((ssize_t)matches.size()>=GlobalSettings::minReadSupport) { 112 | string chr = mMutationList[m].mChr; 113 | string folder = mFolderPath + "/" + chr; 114 | string filename = folder + "/" + to_string(m) + ".html"; 115 | vector mutList; 116 | mutList.push_back(mMutationList[m]); 117 | HtmlReporter hr(filename, mutList, mMutationMatches+m, true); 118 | hr.run(); 119 | } 120 | } 121 | } 122 | 123 | void MultiHtmlReporter::printIndexPage() { 124 | ofstream file; 125 | string indexFile = mFolderPath + "/index.html"; 126 | file.open(indexFile.c_str(), ifstream::out); 127 | printHeader(file); 128 | file << ""; 129 | file << ""; 137 | printFooter(file, false, false); 138 | file.close(); 139 | } 140 | 141 | void MultiHtmlReporter::printChrHtml() { 142 | map::iterator iter; 143 | for(iter= mChrCount.begin(); iter!= mChrCount.end(); iter++){ 144 | string chr = iter->first; 145 | string folder = mFolderPath + "/" + chr; 146 | mkdir(folder.c_str(), 0777); 147 | ofstream file; 148 | string chrFilename = mFolderPath + "/" + chr + ".html"; 149 | file.open(chrFilename.c_str(), ifstream::out); 150 | printHeader(file); 151 | file << ""; 155 | printFooter(file, false); 156 | file.close(); 157 | } 158 | } 159 | 160 | void MultiHtmlReporter::printHelper(ofstream& file) { 161 | file << "

Helpful tips:

    "; 162 | file << "
  • Base color indicates quality: extremely high (Q40+), high (Q30+), moderate (Q20+), low (Q15+), extremely low (0~Q14)
  • "; 163 | file << "
  • Move mouse over the base, it will show the quality value
  • "; 164 | if(!GlobalSettings::simplifiedMode) 165 | file << "
  • Click on any row, the original read/pair will be displayed
  • "; 166 | file << "
  • In first column, d means the edit distance of match, and --> means forward, <-- means reverse
  • "; 167 | file << "
  • For pair-end sequencing, MutScan tries to merge each pair, and the overlapped bases will be assigned higher qualities
  • "; 168 | file << "
"; 169 | } 170 | 171 | void MultiHtmlReporter::printHeader(ofstream& file){ 172 | file << ""; 173 | file << "MutScan report"; 174 | file << ""; 175 | file << ""; 176 | file << ""; 177 | file << "
"; 178 | } 179 | 180 | void MultiHtmlReporter::printCSS(){ 181 | ofstream file; 182 | string filename = mFolderPath + "/mutscan.css"; 183 | file.open(filename.c_str(), ifstream::out); 184 | file << "td {border:1px solid #dddddd;padding-left:2px;padding-right:2px;font-size:10px;}"; 185 | file << "table {border:1px solid #999999;padding:2x;border-collapse:collapse;}"; 186 | file << "img {padding:30px;}"; 187 | file << "#menu {font-family:Consolas, 'Liberation Mono', Menlo, Courier, monospace;}"; 188 | file << "#menu a {color:#0366d6; font-size:15px;line-height:22px;text-decoration:none;font-family:-apple-system, BlinkMacSystemFont, 'Segoe UI', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol'}"; 189 | file << ".index {font-weight:600;font-siz3:18px;line-height:25px;}"; 190 | file << "a:visited {color: #999999}"; 191 | file << ".alignleft {text-align:left;}"; 192 | file << ".alignright {text-align:right;}"; 193 | file << ".header {color:#ffffff;padding:1px;height:20px;background:#000000;}"; 194 | file << ".figuretitle {color:#996657;font-size:20px;padding:50px;}"; 195 | file << "#container {text-align:center;padding:1px;font-family:Arail,'Liberation Mono', Menlo, Courier, monospace;}"; 196 | file << "#menu {padding-top:10px;padding-bottom:10px;text-align:left;}"; 197 | file << ".menu_item {text-align:left;padding-top:2px;font-size:18px;}"; 198 | file << ".highlight {text-align:left;padding-top:30px;padding-bottom:30px;font-size:20px;line-height:35px;}"; 199 | file << ".mutation_head {text-align:left;color:#0092FF;font-family:Arial;padding-top:20px;padding-bottom:5px;}"; 200 | file << ".mutation_block {}"; 201 | file << ".match_brief {font-size:8px}"; 202 | file << ".mutation_point {color:#FFCCAA}"; 203 | file << "#helper {text-align:left;border:1px dotted #fafafa;color:#777777;font-size:12px;}"; 204 | file << "#footer {text-align:left;padding-left:10px;padding-top:20px;color:#999999;font-size:10px;}"; 205 | file.close(); 206 | } 207 | 208 | void MultiHtmlReporter::printJS(){ 209 | ofstream file; 210 | string filename = mFolderPath + "/mutscan.js"; 211 | file.open(filename.c_str(), ifstream::out); 212 | file << "function toggle(targetid){ \n\ 213 | if (document.getElementById){ \n\ 214 | target=document.getElementById(targetid); \n\ 215 | if (target.style.display=='table-row'){ \n\ 216 | target.style.display='none'; \n\ 217 | } else { \n\ 218 | target.style.display='table-row'; \n\ 219 | } \n\ 220 | } \n\ 221 | }"; 222 | file << "function toggle_target_list(targetid){ \n\ 223 | if (document.getElementById){ \n\ 224 | target=document.getElementById(targetid); \n\ 225 | if (target.style.display=='block'){ \n\ 226 | target.style.display='none'; \n\ 227 | document.getElementById('target_view_btn').value='view';\n\ 228 | } else { \n\ 229 | document.getElementById('target_view_btn').value='hide';\n\ 230 | target.style.display='block'; \n\ 231 | } \n\ 232 | } \n\ 233 | }"; 234 | file.close(); 235 | } 236 | 237 | string MultiHtmlReporter::getCurrentSystemTime() 238 | { 239 | auto tt = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); 240 | struct tm* ptm = localtime(&tt); 241 | char date[60] = {0}; 242 | sprintf(date, "%d-%02d-%02d %02d:%02d:%02d", 243 | (int)ptm->tm_year + 1900,(int)ptm->tm_mon + 1,(int)ptm->tm_mday, 244 | (int)ptm->tm_hour,(int)ptm->tm_min,(int)ptm->tm_sec); 245 | return std::string(date); 246 | } 247 | 248 | extern string command; 249 | 250 | void MultiHtmlReporter::printFooter(ofstream& file, bool printTargetList, bool printCommand){ 251 | file << "\n"; 257 | file << "
"; 258 | } 259 | 260 | void MultiHtmlReporter::printScanTargets(ofstream& file){ 261 | file << "\n
"; 262 | file << "

scanned " << mMutationList.size() << " mutation spots...

"; 263 | file << "
"; 270 | } -------------------------------------------------------------------------------- /src/multihtmlreporter.h: -------------------------------------------------------------------------------- 1 | #ifndef MULTI_HTML_REPORTER_H 2 | #define MULTI_HTML_REPORTER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "read.h" 8 | #include "mutation.h" 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | 14 | class MultiHtmlReporter{ 15 | public: 16 | MultiHtmlReporter(string filename, vector& mutationList, vector *mutationMatches); 17 | ~MultiHtmlReporter(); 18 | void run(); 19 | 20 | private: 21 | void printMainFrame(); 22 | void printMainPage(); 23 | void printIndexPage(); 24 | void printAllChromosomeLink(ofstream& file); 25 | void printChrLink(ofstream& file, string chr); 26 | void printChrHtml(); 27 | void printMutationHtml(); 28 | void stat(); 29 | void printHeader(ofstream& file); 30 | void printCSS(); 31 | void printJS(); 32 | void printFooter(ofstream& file, bool printTargetList=true, bool printCommand=true); 33 | void printHelper(ofstream& file); 34 | void printScanTargets(ofstream& file); 35 | string getCurrentSystemTime(); 36 | 37 | private: 38 | string mFilename; 39 | string mFolderName; 40 | string mFolderPath; 41 | vector mMutationList; 42 | vector* mMutationMatches; 43 | map mChrCount; 44 | int mTotalCount; 45 | }; 46 | 47 | 48 | #endif -------------------------------------------------------------------------------- /src/mutation.cpp: -------------------------------------------------------------------------------- 1 | #include "mutation.h" 2 | #include "editdistance.h" 3 | #include 4 | #include 5 | #include "util.h" 6 | #include 7 | #include "builtinmutation.h" 8 | #include 9 | #include "globalsettings.h" 10 | 11 | Mutation::Mutation(string name, string left, string center, string right, string chr){ 12 | //we shift some bases from left and right to center to require 100% match of these bases 13 | mShift = 0; 14 | if(center.length() == 0) 15 | mShift = 2; 16 | mLeft = left.substr(0, left.length()-mShift); 17 | mCenter = left.substr(left.length()-mShift, mShift) + center + right.substr(0, mShift); 18 | mRight = right.substr(mShift, right.length()-mShift); 19 | mPattern = left + center + right; 20 | mName = name; 21 | mSmallIndel = false; 22 | mChr = chr; 23 | } 24 | 25 | Match* Mutation::searchInRead(Read* r, char* simplifiedBuf, int distanceReq, int qualReq){ 26 | char phredQualReq= (char)(qualReq + 33); 27 | int readLen = r->mSeq.length(); 28 | int lLen = mLeft.length(); 29 | int cLen = mCenter.length(); 30 | int rLen = mRight.length(); 31 | int pLen = mPattern.length(); 32 | string seq = r->mSeq.mStr; 33 | const char* seqData = seq.c_str(); 34 | const char* centerData = mCenter.c_str(); 35 | const char* patternData = mPattern.c_str(); 36 | const char* qualData = r->mQuality.c_str(); 37 | // we should ignore the mutations in the exact edge since there usualy exists errors 38 | const int margin = 2; 39 | for(int start = margin; start + cLen + margin <= readLen; start++){ 40 | int lComp = min(start, lLen); 41 | int rComp = min(readLen - (start+cLen), rLen); 42 | // check string identity in a fast way 43 | bool identical = true; 44 | for (int i=0;i= 2 && mShift==0) { 92 | // if this is caused by an indel, then indel should not happen around center 93 | // we check half of the pattern 94 | int noIndelLeft = min(lLen / 2, lComp); 95 | if(rComp < rLen/2) 96 | noIndelLeft = lLen/2 - rComp; 97 | int noIndelDis = hamming_distance(seqData + start - noIndelLeft, edLen/2, patternData + lLen - noIndelLeft, edLen/2); 98 | 99 | if(noIndelDis > 2) 100 | continue; 101 | } 102 | if(simplifiedBuf != NULL) { 103 | return new Match(simplifiedBuf, r->length(), r->meanQuality(), start, dis); 104 | } 105 | else 106 | return new Match(r, start, dis); 107 | } 108 | } 109 | return NULL; 110 | } 111 | 112 | vector Mutation::parseCsv(string filename) { 113 | int num = 0; 114 | if(GlobalSettings::verbose) { 115 | cerr << "Parsing target mutations from CSV file: " << filename << endl; 116 | } 117 | ifstream file; 118 | file.open(filename.c_str(), ifstream::in); 119 | const int maxLine = 1000; 120 | char line[maxLine]; 121 | vector mutations; 122 | while(file.getline(line, maxLine)){ 123 | // trim \n, \r or \r\n in the tail 124 | int readed = strlen(line); 125 | if(readed >=2 ){ 126 | if(line[readed-1] == '\n' || line[readed-1] == '\r'){ 127 | line[readed-1] = '\0'; 128 | if(line[readed-2] == '\r') 129 | line[readed-2] = '\0'; 130 | } 131 | } 132 | string linestr(line); 133 | vector splitted; 134 | split(linestr, splitted, ","); 135 | // a valid line need 4 columns: name, left, center, right 136 | if(splitted.size()<4) 137 | continue; 138 | // comment line 139 | if(starts_with(splitted[0], "#")) 140 | continue; 141 | string name = trim(splitted[0]); 142 | string left = trim(splitted[1]); 143 | string center = trim(splitted[2]); 144 | string right = trim(splitted[3]); 145 | string chr = "unspecified"; 146 | if(splitted.size()>4) 147 | chr = trim(splitted[4]); 148 | Mutation mut(name, left, center, right, chr); 149 | if(left.length()<15){ 150 | cerr << "WARNING: skip following mutation since its left part < 15bp"< Mutation::parseBuiltIn() { 177 | int num = 0; 178 | if(GlobalSettings::verbose) { 179 | cerr << "Using built-in target mutations" << endl; 180 | } 181 | vector mutations; 182 | vector lines; 183 | split(BUILT_IN_MUTATIONS, lines, "\n"); 184 | for(size_t i=0;i splitted; 187 | split(linestr, splitted, ","); 188 | // a valid line need 4 columns: name, left, center, right 189 | if(splitted.size()<4) 190 | continue; 191 | // comment line 192 | if(starts_with(splitted[0], "#")) 193 | continue; 194 | string name = trim(splitted[0]); 195 | string left = trim(splitted[1]); 196 | string center = trim(splitted[2]); 197 | string right = trim(splitted[3]); 198 | string chr = "unspecified"; 199 | if(splitted.size()>4) 200 | chr = trim(splitted[4]); 201 | Mutation mut(name, left, center, right, chr); 202 | mutations.push_back(mut); 203 | if(GlobalSettings::verbose) { 204 | num++; 205 | cerr < Mutation::parseVcf(string vcfFile, string refFile) { 215 | int num = 0; 216 | if(GlobalSettings::verbose) { 217 | cerr << "Parsing target mutations from VCF file: " << vcfFile << endl; 218 | cerr << "With reference genome: " << refFile << endl; 219 | } 220 | vector mutations; 221 | VcfReader vr(vcfFile); 222 | vr.readAll(); 223 | vector variants = vr.variants(); 224 | 225 | bool markedOnly = GlobalSettings::markedOnlyForVCF; 226 | 227 | FastaReader fr(refFile); 228 | fr.readAll(); 229 | map ref = fr.contigs(); 230 | 231 | for(size_t i=0;i ref[chrom].length() + 25 + v.ref.length() || (size_t)v.pos < 25 + v.ref.length()) 252 | continue; 253 | 254 | string gene = v.gene(); 255 | string aa = v.aaChange(); 256 | string cds = v.cdsChange(); 257 | 258 | stringstream ss; 259 | if(gene!="") 260 | ss<"<=1 && lengthDiff<=2 ) 277 | mut.setSmallIndel(true); 278 | mutations.push_back(mut); 279 | if(GlobalSettings::verbose) { 280 | num++; 281 | cerr <"; 313 | s += mCenter.substr(mCenter.length()-mShift, mShift); 314 | return s; 315 | } 316 | 317 | void Mutation::setSmallIndel(bool flag) { 318 | mSmallIndel = flag; 319 | } 320 | 321 | bool Mutation::isSmallIndel() { 322 | return mSmallIndel; 323 | } 324 | -------------------------------------------------------------------------------- /src/mutation.h: -------------------------------------------------------------------------------- 1 | #ifndef MUTATION_H 2 | #define MUTATION_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "read.h" 8 | #include 9 | #include "match.h" 10 | #include 11 | #include 12 | #include "vcfreader.h" 13 | #include "fastareader.h" 14 | 15 | using namespace std; 16 | 17 | class Mutation{ 18 | public: 19 | Mutation(string name, string left, string center, string right, string chr="unspecified"); 20 | 21 | //search this mutation in a read, and return the matchment info 22 | //by default, Q20 is required, and distance should be <=2 23 | //return NULL if not found 24 | Match* searchInRead(Read* r, char* simplifiedBuf = NULL, int distanceReq = 2, int qualReq=20); 25 | static vector parseCsv(string filename); 26 | static vector parseBuiltIn(); 27 | 28 | // if markedOnly = true, then only the entries with FILTER column = m will be treated 29 | // #CHROM POS ID REF ALT QUAL FILTER INFO 30 | // 1 69224 COSM3677745 A C . m GENE=OR4F5;STRAND=+;CDS=c.134A>C;AA=p.D45A;CNT=1 31 | static vector parseVcf(string vcfFile, string refFile); 32 | void print(); 33 | void printHtml(ofstream& file); 34 | string getCenterHtml(); 35 | void setSmallIndel(bool flag); 36 | bool isSmallIndel(); 37 | 38 | public: 39 | string mLeft; 40 | string mCenter; 41 | string mRight; 42 | string mPattern; 43 | string mName; 44 | string mChr; 45 | bool mSmallIndel; 46 | 47 | int mShift; 48 | }; 49 | 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /src/mutscan.cpp: -------------------------------------------------------------------------------- 1 | #include "mutscan.h" 2 | #include "fastqreader.h" 3 | #include 4 | #include "htmlreporter.h" 5 | #include "sescanner.h" 6 | #include "pescanner.h" 7 | #include "util.h" 8 | #include "globalsettings.h" 9 | 10 | MutScan::MutScan(string mutationFile, string refFile, string read1File, string read2File, string html, string json, int threadNum){ 11 | mRead1File = read1File; 12 | mRead2File = read2File; 13 | mMutationFile = mutationFile; 14 | mRefFile = refFile; 15 | mHtmlFile = html; 16 | mJsonFile = json; 17 | mThreadNum = threadNum; 18 | } 19 | 20 | bool MutScan::scan(){ 21 | if(mRead2File != ""){ 22 | //return scanPairEnd(); 23 | PairEndScanner pescanner( mMutationFile, mRefFile, mRead1File, mRead2File, mHtmlFile, mJsonFile, mThreadNum); 24 | return pescanner.scan(); 25 | } 26 | else{ 27 | //return scanSingleEnd(); 28 | SingleEndScanner sescanner( mMutationFile, mRefFile, mRead1File, mHtmlFile, mJsonFile, mThreadNum); 29 | return sescanner.scan(); 30 | } 31 | } 32 | 33 | void MutScan::evaluateSimplifiedMode(string r1file, string r2file, int mutationNum) { 34 | if(mutationNum < 10000) { 35 | GlobalSettings::setSimplifiedMode(false); 36 | return ; 37 | } 38 | // use another ifstream to not affect current reader 39 | ifstream is(r1file); 40 | is.seekg (0, is.end); 41 | long bytes = is.tellg(); 42 | 43 | if(r2file != "") 44 | bytes *= 2; 45 | 46 | // here we consider gz file for FASTQ has a compression rate of 3 47 | if(ends_with(r1file, ".gz")) 48 | bytes *= 3; 49 | 50 | // enable simplified mode for over 50G FASTQ + 10,000 mutations 51 | if(bytes > 50L * 1024L * 1024L * 1024L) { 52 | if(GlobalSettings::verbose) 53 | cerr << "Simplified mode is enabled automatically..."< 5 | #include 6 | #include 7 | #include "read.h" 8 | #include "mutation.h" 9 | 10 | using namespace std; 11 | 12 | class MutScan{ 13 | public: 14 | MutScan(string mutationFile, string refFile, string read1File, string read2File, string html, string json, int threadNum); 15 | bool scan(); 16 | static void evaluateSimplifiedMode(string r1file, string r2file, int mutationNum); 17 | 18 | private: 19 | string mMutationFile; 20 | string mRead1File; 21 | string mRead2File; 22 | string mHtmlFile; 23 | string mJsonFile; 24 | string mRefFile; 25 | int mThreadNum; 26 | vector mutationList; 27 | }; 28 | 29 | 30 | #endif -------------------------------------------------------------------------------- /src/overlap.cpp: -------------------------------------------------------------------------------- 1 | #include "overlap.h" 2 | #include "editdistance.h" 3 | #include "math.h" 4 | 5 | Overlap::Overlap(int offset, int overlapLen, int distance){ 6 | mOffset = offset; 7 | mOverlapLen = overlapLen; 8 | mDistance = distance; 9 | mOverlapped = overlapLen>0; 10 | } 11 | 12 | Overlap Overlap::fit(Sequence r1, Sequence r2){ 13 | int len1 = r1.length(); 14 | int len2 = r2.length(); 15 | Sequence reverse_r2 = ~r2; 16 | 17 | bool overlapped = false; 18 | int overlap_len = 0; 19 | int offset = 0; 20 | int distance = 0; 21 | // a match of less than 10 is considered as unconfident 22 | while (offset < len1-10 && overlapped==false){ 23 | // the overlap length of r1 & r2 when r2 is move right for offset 24 | overlap_len = min(len1-offset, len2); 25 | 26 | distance = edit_distance(r1.mStr.substr(offset, overlap_len), reverse_r2.mStr.substr(0, overlap_len)); 27 | float threshold = min(3.0, overlap_len/10.0); 28 | if (distance <= threshold){ 29 | // now we find a good candidate 30 | // we verify it by moving r2 one more base to see if the distance is getting longer 31 | // if yes, then current is the best match, otherwise it's not 32 | while (offset < len1-10){ 33 | int next_offset = offset + 1; 34 | int next_overlap_len = min(len1-next_offset, len2); 35 | int next_distance = edit_distance(r1.mStr.substr(next_offset, next_overlap_len), reverse_r2.mStr.substr(0, next_overlap_len)); 36 | if (distance <= next_distance){ 37 | overlapped = true; 38 | break; 39 | } 40 | else{ 41 | offset = next_offset; 42 | distance = next_distance; 43 | overlap_len = next_overlap_len; 44 | } 45 | } 46 | break; 47 | } 48 | else 49 | offset += max(1, (distance - (int)ceil(threshold))/2 ); 50 | } 51 | 52 | if (overlapped && offset == 0){ 53 | // check if distance can get smaller if offset goes negative 54 | // this only happens when insert DNA is shorter than sequencing read length, and some adapter/primer is sequenced but not trimmed cleanly 55 | // we go reversely 56 | while (offset > -(len2-10)){ 57 | // the overlap length of r1 & r2 when r2 is move right for offset 58 | overlap_len = min(len1, len2- abs(offset)); 59 | distance = edit_distance(r1.mStr.substr(0, overlap_len), reverse_r2.mStr.substr(-offset, overlap_len)); 60 | float threshold = min(3.0, overlap_len/10.0); 61 | if (distance <= threshold){ 62 | while(offset > -(len2-10)){ 63 | int next_offset = offset - 1; 64 | int next_overlap_len = min(len1, len2- abs(next_offset)); 65 | int next_distance = edit_distance(r1.mStr.substr(0, next_overlap_len), reverse_r2.mStr.substr(-next_offset, next_overlap_len)); 66 | if (distance <= next_distance) 67 | return Overlap(offset, overlap_len, distance); 68 | else { 69 | distance = next_distance; 70 | overlap_len = next_overlap_len; 71 | offset = next_offset; 72 | } 73 | } 74 | } 75 | else 76 | offset -= max(1, (distance - (int)ceil(threshold))/2 ); 77 | } 78 | } 79 | else if(overlapped) { 80 | return Overlap(offset, overlap_len, distance); 81 | } 82 | 83 | return Overlap(0,0,0); 84 | } 85 | bool Overlap::test(){ 86 | Sequence r1[5] = { 87 | Sequence("TTTGCAGGCACCTACCACTGTACCTGTCTAATTTTTCTTCTGCCCTTTTTTTTTTTTTTTTTTTTTTTTTGGGGTAGAGACGAGGCCTTGCTATGTAGCCCTTGCTGGTCTCAAACTCCTCGCCTCAAGTGATCCTCCTGCCTCGGCCTCC"), 88 | Sequence("CCCTATGTCTACAAAACATCAGAAAATTAGGGTGTGGTGGCTCATGCCTATAGTCATAGCTACATAGGAGGCTGAGGCAGGAGGATCGCTTGAGGGCAGGAGGATCACTCGAGCTCTGAAGGTCAACGCTGCAGTGAGCTATGATCGTGCC"), 89 | Sequence("TAGAGGGCTCAGATGCATTCCTTTTTAGCAGTGCTCTTATTTGGCATTGGTGGTGCTGTTTCTGTTGACCACTCCCAGAGTCTCTGGATGTTTTGTTATTCCTTTACCTCCCTAGCCTCTCCTTGGGGTTTCTTTGCAGGCTCTTGCTCTC"), 90 | Sequence("CCTGGGTAGCTGGGATACAGGCGCCCGCCACCACGCCCGGCTAATTTTGTATTTTTAGTAGAGACGAGGTTTCACCACATTGGCCAGGCTGGTCTCAAACTCCTGACCTCAGGTGATCTGCCTGCCTCAGCCTCCTAGAGTGCTGGG"), 91 | Sequence("GTTCCTTTTAACATAGAAAGCAGCTAATTTTCCTATTCAAAAAATGGAGCTCTATTAAAAGATAAAACAGCAGCTTAGCTCTAGGTAAAGTGATCCATGCGGTTCTTCTTCTTTTTTTTGTTTTGAGATGGACTCTCGCTCTGTCACCCA") 92 | }; 93 | Sequence r2[5] = { 94 | Sequence("CATGGTGGCTCATGCCTGTAATCCCAGTGGTTTGGGAGGCCGAGGCAGGAGGATCACTTGAGGCGAGGAGTTTGAGACCAGCAAGGGCTACATAGCAAGGCCTCGTCTCTACCCCAAAAAAAAAAAAAAAAAAAAAAAAAGGGCAGAAGAA"), 95 | Sequence("AGTGCAGTGGCACGATCATAGCTCACTGCAGCGTTGACCTTCAGAGCTCGAGTGATCCTCCTGCCCTCAAGCGATCCTCCTGCCTCAGCCTCCTATGTAGCTATGACTATAGGCATGAGCCACCACACCCTAATTTTCTGATGTTTTGTAG"), 96 | Sequence("CTGGAGATAAACACCTAGCAGTCATGAGACAAAGCTCTGCAATGCTTGTATTTATGGGATACAAGAGAGAGCAAGAGCCTGCAAAGAAACCCCAAGGAGAGGCTAGGGAGGTAAAGGAATAACAAAACATCCAGAGACACTGGGAGTGGTC"), 97 | Sequence("CCCAGCACTCTAGGAGGCTGAGGCAGGCAGATCACCTGAGGTCAGGAGTTTGAGACCAGCCTGGCCAATGTGGTGAAACCTCGTCTCTACTAAAAATACAAAATTAGCCGGGCGTGGTGGCGGGCGCCTGTAATCCCAGCTACCCAGC"), 98 | Sequence("TGGGTGACAGAGCGAGAGTCCATCTCAAAACAAAAAAAAGAAGAAGAACCGCACTGGATCACTTTACCTCAGAGCTAAGCTGCTGTTTTATCTTTTAATAGAGCTCCATTTTTTGAATAGGAAAATTAGCTGCTTTCTATGTTAAAAGGAA") 99 | }; 100 | Overlap overlap[5] = { 101 | Overlap(34L, 117L, 0L), 102 | Overlap(8L, 143L, 0L), 103 | Overlap(66L, 85L, 1L), 104 | Overlap(-1, 147, 2L), 105 | Overlap(0, 0, 0) 106 | }; 107 | for (int i=0;i<5;i++){ 108 | Overlap fit = Overlap::fit(r1[i], r2[i]); 109 | if (fit.mOffset!=overlap[i].mOffset || fit.mOverlapLen!=overlap[i].mOverlapLen || fit.mDistance!=overlap[i].mDistance){ 110 | cout<<"Fail in Overlap::fit() with sequence"< 5 | #include 6 | #include 7 | #include "sequence.h" 8 | 9 | using namespace std; 10 | 11 | class Overlap{ 12 | public: 13 | Overlap(int offset, int overlapLen, int distance); 14 | static Overlap fit(Sequence R1, Sequence R2); 15 | static bool test(); 16 | 17 | public: 18 | int mOffset; 19 | int mOverlapLen; 20 | int mDistance; 21 | bool mOverlapped; 22 | }; 23 | 24 | #endif -------------------------------------------------------------------------------- /src/pescanner.cpp: -------------------------------------------------------------------------------- 1 | #include "pescanner.h" 2 | 3 | PairEndScanner::PairEndScanner(string mutationFile, string refFile, string read1File, string read2File, string html, string json, int threadNum) 4 | : Scanner(mutationFile, refFile, read1File, read2File, html, json, threadNum) { 5 | } 6 | 7 | PairEndScanner::~PairEndScanner() {} 8 | 9 | bool PairEndScanner::scanNextEnd(ReadPack* pack){ 10 | bool simplified = GlobalSettings::simplifiedMode; 11 | 12 | for(int p=0;pcount;p++){ 13 | ReadPair* pair = pack->data[p]; 14 | Read* r1 = pair->mLeft; 15 | Read* r2 = pair->mRight; 16 | Read* rcr1 = NULL; 17 | Read* rcr2 = NULL; 18 | Read* merged = pair->fastMerge(); 19 | Read* mergedRC = NULL; 20 | if(merged != NULL) 21 | mergedRC = merged->reverseComplement(); 22 | else { 23 | rcr1 = r1->reverseComplement(); 24 | rcr2 = r2->reverseComplement(); 25 | } 26 | 27 | if(merged != NULL) { 28 | if(!scanRead(merged, pair, false) || simplified) delete merged; 29 | if(!scanRead(mergedRC, pair, true) || simplified) delete mergedRC; 30 | } else { 31 | if(!scanRead(rcr1, pair, true) || simplified) delete rcr1; 32 | if(!scanRead(rcr2, pair, true) || simplified) delete rcr2; 33 | bool leftMatched = scanRead(r1, pair, false); 34 | bool rightMatched = scanRead(r2, pair, false); 35 | if(leftMatched && !simplified) { 36 | pair->mLeft = NULL; 37 | } 38 | if(rightMatched && !simplified) { 39 | pair->mRight = NULL; 40 | } 41 | } 42 | 43 | delete pair; 44 | } 45 | 46 | delete pack->data; 47 | delete pack; 48 | 49 | return true; 50 | } 51 | -------------------------------------------------------------------------------- /src/pescanner.h: -------------------------------------------------------------------------------- 1 | #ifndef PE_SCANNNER_H 2 | #define PE_SCANNNER_H 3 | 4 | #include "scanner.h" 5 | 6 | template <> 7 | struct reader_trait { 8 | typedef FastqReaderPair FastqReaderType; 9 | static constexpr const char* name="pairs"; 10 | }; 11 | 12 | class PairEndScanner : public Scanner { 13 | public: 14 | PairEndScanner(string mutationFile, string refFile, string read1File, string read2File, string html="", string json = "", int threadnum=1); 15 | ~PairEndScanner(); 16 | 17 | private: 18 | friend Scanner; 19 | bool scanNextEnd(ReadPack* pack); 20 | reader_trait::FastqReaderType *fastqReader() { 21 | return new FastqReaderPair(mRead1File, mRead2File); 22 | } 23 | }; 24 | 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/read.cpp: -------------------------------------------------------------------------------- 1 | #include "read.h" 2 | #include 3 | #include "util.h" 4 | #include 5 | 6 | Read::Read(string name, string seq, string strand, string quality){ 7 | mName = name; 8 | mSeq = Sequence(seq); 9 | mStrand = strand; 10 | mQuality = quality; 11 | mHasQuality = true; 12 | } 13 | 14 | Read::Read(string name, string seq, string strand){ 15 | mName = name; 16 | mSeq = Sequence(seq); 17 | mStrand = strand; 18 | mHasQuality = false; 19 | } 20 | 21 | Read::Read(string name, Sequence seq, string strand, string quality){ 22 | mName = name; 23 | mSeq = seq; 24 | mStrand = strand; 25 | mQuality = quality; 26 | mHasQuality = true; 27 | } 28 | 29 | Read::Read(string name, Sequence seq, string strand){ 30 | mName = name; 31 | mSeq = seq; 32 | mStrand = strand; 33 | mHasQuality = false; 34 | } 35 | 36 | Read::Read(Read &r) { 37 | mName = r.mName; 38 | mSeq = r.mSeq; 39 | mStrand = r.mStrand; 40 | mQuality = r.mQuality; 41 | mHasQuality = r.mHasQuality; 42 | } 43 | 44 | Read::Read(char* twoBitBuf, int readLen, char meanQual) { 45 | mName = "Unknown"; 46 | string seq(readLen, '\0'); 47 | const char bases[4] = {'A', 'T', 'C', 'G'}; 48 | for(int i=0; i> shift; 52 | seq[i] = bases[b]; 53 | } 54 | 55 | mSeq = Sequence(seq); 56 | mStrand = "+"; 57 | mQuality = string(readLen, meanQual); 58 | mHasQuality = true; 59 | } 60 | 61 | char Read::meanQuality() { 62 | if(length() == 0) 63 | return '\0'; 64 | int total = 0; 65 | for(int i=0; i& breaks){ 135 | std::cout << mName << endl; 136 | std::cout << makeStringWithBreaks(mSeq.mStr, breaks)<< endl; 137 | std::cout << mStrand << endl; 138 | if(mHasQuality) 139 | std::cout << makeStringWithBreaks(mQuality, breaks) << endl; 140 | } 141 | 142 | string Read::makeStringWithBreaks(const string origin, vector& breaks) { 143 | string ret = origin.substr(0, breaks[0]); 144 | for(size_t i=0;i0) 148 | ret += " " + origin.substr(breaks[breaks.size()-1], origin.length() - breaks[breaks.size()-1]); 149 | return ret; 150 | } 151 | 152 | void Read::printHtmlTDWithBreaks(ofstream& file, vector& breaks, int mutid, int matchid) { 153 | file << "" << makeHtmlSeqWithQual(0, breaks[0]) << ""; 154 | for(size_t i=0;i" << makeHtmlSeqWithQual(breaks[i], breaks[i+1]-breaks[i]) << ""; 159 | } 160 | if(breaks[breaks.size()-1]>0) { 161 | file << "" << makeHtmlSeqWithQual(breaks[breaks.size()-1], mSeq.mStr.length() - breaks[breaks.size()-1]) << ""; 163 | } 164 | } 165 | 166 | string Read::makeHtmlSeqWithQual(int start, int length) { 167 | //colorful HTML report is dynamically created by JavaScript 168 | //here we return gray report first 169 | return mSeq.mStr.substr(start, length); 170 | 171 | /*stringstream ss; 172 | for(int i=start;i"<< mSeq.mStr[i] << ""; 174 | } 175 | return ss.str();*/ 176 | } 177 | 178 | void Read::escapeSpecialQualityChar(string& str) { 179 | const char* data = str.c_str(); 180 | for(size_t i=0;i& breaks) { 187 | if(breaks.size()>0){ 188 | file << "\n["; 189 | file << "'" << mSeq.mStr.substr(0, breaks[0]) << "'"; 190 | file << ", " ; 191 | string qualstr = mQuality.substr(0, breaks[0]); 192 | escapeSpecialQualityChar(qualstr); 193 | file << "'" << qualstr << "'"; 194 | file << "],"; 195 | } 196 | for(size_t i=0;i0){ 206 | file << "\n["; 207 | file << "'" << mSeq.mStr.substr(breaks[breaks.size()-1], mSeq.mStr.length() - breaks[breaks.size()-1]) << "'"; 208 | file << ", " ; 209 | string qualstr = mQuality.substr(breaks[breaks.size()-1], mSeq.mStr.length() - breaks[breaks.size()-1]); 210 | escapeSpecialQualityChar(qualstr); 211 | file << "'" << qualstr << "'"; 212 | file << "],"; 213 | } 214 | 215 | } 216 | 217 | string Read::qualityColor(char qual) { 218 | if(qual >= 'I') // >= Q40, extremely high quality 219 | return "#78C6B9"; 220 | if(qual >= '?') // Q30 ~ Q39, high quality 221 | return "#33BBE2"; 222 | if(qual >= '5') // Q20 ~ Q29, moderate quality 223 | return "#666666"; 224 | if(qual >= '0') // Q15 ~ Q19, low quality 225 | return "#E99E5B"; 226 | else // <= Q14, extremely low quality 227 | return "#FF0000"; 228 | } 229 | 230 | Read* Read::reverseComplement(){ 231 | Sequence seq = ~mSeq; 232 | string qual; 233 | qual.assign(mQuality.rbegin(), mQuality.rend()); 234 | string strand = (mStrand=="+") ? "-" : "+"; 235 | return new Read(mName, seq, strand, qual); 236 | } 237 | 238 | string Read::lastIndex(){ 239 | int len = mName.length(); 240 | if(len<5) 241 | return ""; 242 | for(int i=len-5;i>=0;i--){ 243 | if(mName[i]==':' || mName[i]=='+'){ 244 | return mName.substr(i+1, len-i); 245 | } 246 | } 247 | return ""; 248 | } 249 | 250 | int Read::lowQualCount(int qual){ 251 | int count = 0; 252 | for(size_t q=0;qreverseComplement(); 290 | int len1 = mLeft->length(); 291 | int len2 = rcRight->length(); 292 | // use the pointer directly for speed 293 | const char* str1 = mLeft->mSeq.mStr.c_str(); 294 | const char* str2 = rcRight->mSeq.mStr.c_str(); 295 | const char* qual1 = mLeft->mQuality.c_str(); 296 | const char* qual2 = rcRight->mQuality.c_str(); 297 | 298 | // we require at least 30 bp overlapping to merge a pair 299 | const int MIN_OVERLAP = 30; 300 | bool overlapped = false; 301 | int olen = MIN_OVERLAP; 302 | int diff = 0; 303 | // the diff count for 1 high qual + 1 low qual 304 | int lowQualDiff = 0; 305 | 306 | while(olen <= min(len1, len2)){ 307 | diff = 0; 308 | lowQualDiff = 0; 309 | bool ok = true; 310 | int offset = len1 - olen; 311 | for(int i=0;i= Q30 and the other is <= Q15 315 | if((qual1[offset+i]>='?' && qual2[i]<='0') || (qual1[offset+i]<='0' && qual2[i]>='?')){ 316 | lowQualDiff++; 317 | } 318 | // we disallow high quality diff, and only allow up to 3 low qual diff 319 | if(diff>lowQualDiff || lowQualDiff>=3){ 320 | ok = false; 321 | break; 322 | } 323 | } 324 | } 325 | if(ok){ 326 | overlapped = true; 327 | break; 328 | } 329 | olen++; 330 | } 331 | 332 | if(overlapped){ 333 | int offset = len1 - olen; 334 | stringstream ss; 335 | ss << mLeft->mName << " merged offset:" << offset << " overlap:" << olen << " diff:" << diff; 336 | string mergedName = ss.str(); 337 | string mergedSeq = mLeft->mSeq.mStr.substr(0, offset) + rcRight->mSeq.mStr; 338 | string mergedQual = mLeft->mQuality.substr(0, offset) + rcRight->mQuality; 339 | // quality adjuction and correction for low qual diff 340 | for(int i=0;i='?' && qual2[i]<='0'){ 343 | mergedSeq[offset+i] = str1[offset+i]; 344 | mergedQual[offset+i] = qual1[offset+i]; 345 | } else { 346 | mergedSeq[offset+i] = str2[i]; 347 | mergedQual[offset+i] = qual2[i]; 348 | } 349 | } else { 350 | // add the quality of the pair to make a high qual 351 | mergedQual[offset+i] = qual1[offset+i] + qual2[i] - 33; 352 | } 353 | } 354 | delete rcRight; 355 | return new Read(mergedName, mergedSeq, "+", mergedQual); 356 | } 357 | 358 | delete rcRight; 359 | return NULL; 360 | } 361 | 362 | bool ReadPair::test(){ 363 | Read* left = new Read("@NS500713:64:HFKJJBGXY:1:11101:20469:1097 1:N:0:TATAGCCT+GGTCCCGA", 364 | "TTTTTTCTCTTGGACTCTAACACTGTTTTTTCTTATGAAAACACAGGAGTGATGACTAGTTGAGTGCATTCTTATGAGACTCATAGTCATTCTATGATGTAG", 365 | "+", 366 | "AAAAA6EEEEEEEEEEEEEEEEE#EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE"); 367 | Read* right = new Read("@NS500713:64:HFKJJBGXY:1:11101:20469:1097 1:N:0:TATAGCCT+GGTCCCGA", 368 | "AAAAAACTACACCATAGAATGACTATGAGTCTCATAAGAATGCACTCAACTAGTCATCACTCCTGTGTTTTCATAAGAAAAAACAGTGTTAGAGTCCAAGAG", 369 | "+", 370 | "AAAAA6EEEEE/EEEEEEEEEEE#EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE"); 371 | 372 | ReadPair pair(left, right); 373 | Read* merged = pair.fastMerge(); 374 | if(merged == NULL) 375 | return false; 376 | 377 | if(merged->mSeq.mStr != "TTTTTTCTCTTGGACTCTAACACTGTTTTTTCTTATGAAAACACAGGAGTGATGACTAGTTGAGTGCATTCTTATGAGACTCATAGTCATTCTATGATGTAGTTTTTT") 378 | return false; 379 | 380 | //merged->print(); 381 | return true; 382 | } 383 | -------------------------------------------------------------------------------- /src/read.h: -------------------------------------------------------------------------------- 1 | #ifndef READ_H 2 | #define READ_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "sequence.h" 10 | #include 11 | 12 | using namespace std; 13 | 14 | class Read{ 15 | public: 16 | Read(string name, string seq, string strand, string quality); 17 | Read(string name, Sequence seq, string strand, string quality); 18 | Read(string name, string seq, string strand); 19 | Read(string name, Sequence seq, string strand); 20 | Read(Read &r); 21 | Read(char* twoBitBuf, int readLen, char meanQual); 22 | void print(); 23 | void printFile(ofstream& file); 24 | Read* reverseComplement(); 25 | string firstIndex(); 26 | string lastIndex(); 27 | // default is Q20 28 | int lowQualCount(int qual=20); 29 | int length(); 30 | void printWithBreaks(vector& breaks); 31 | void printHtmlTDWithBreaks(ofstream& file, vector& breaks, int mutid, int matchid); 32 | void printJSWithBreaks(ofstream& file, vector& breaks); 33 | char meanQuality(); 34 | int numOfBase(char base); 35 | char base2char(char base); 36 | char* to2bit(); 37 | 38 | public: 39 | static bool test(); 40 | 41 | private: 42 | string makeStringWithBreaks(const string origin, vector& breaks); 43 | string makeHtmlSeqWithQual(int start, int length); 44 | string qualityColor(char qual); 45 | void escapeSpecialQualityChar(string& str); 46 | 47 | public: 48 | string mName; 49 | Sequence mSeq; 50 | string mStrand; 51 | string mQuality; 52 | bool mHasQuality; 53 | }; 54 | 55 | class ReadPair{ 56 | public: 57 | ReadPair(Read* left, Read* right); 58 | ~ReadPair(); 59 | 60 | // merge a pair, without consideration of seq error caused false INDEL 61 | Read* fastMerge(); 62 | public: 63 | Read* mLeft; 64 | Read* mRight; 65 | 66 | public: 67 | static bool test(); 68 | }; 69 | 70 | #endif -------------------------------------------------------------------------------- /src/rollinghash.cpp: -------------------------------------------------------------------------------- 1 | #include "rollinghash.h" 2 | #include "builtinmutation.h" 3 | #include "util.h" 4 | #include 5 | #include "globalsettings.h" 6 | 7 | // we use 512M memory 8 | const int BLOOM_FILTER_LENGTH = (1<<29); 9 | 10 | RollingHash::RollingHash(int window, bool allowTwoSub) { 11 | mWindow = min(48, window); 12 | mAllowEditDistanceIs2 = allowTwoSub; 13 | mBloomFilterArray = new char[BLOOM_FILTER_LENGTH]; 14 | memset(mBloomFilterArray, 0, BLOOM_FILTER_LENGTH * sizeof(char)); 15 | } 16 | 17 | RollingHash::~RollingHash() { 18 | delete mBloomFilterArray; 19 | mBloomFilterArray = NULL; 20 | } 21 | 22 | void RollingHash::initMutations(vector& mutationList) { 23 | // for memory and speed consideration 24 | // when dealing with big mutation list (usually from VCF), we apply more strict matching 25 | if(mutationList.size() > 5000) 26 | mAllowEditDistanceIs2 = false; 27 | 28 | for(size_t i=0; i > RollingHash::getKeyTargets() { 45 | return mKeyTargets; 46 | } 47 | 48 | bool RollingHash::add(string s, int target, bool allowIndel) { 49 | if((ssize_t)s.length() < mWindow + 2) 50 | return false; 51 | 52 | int center = s.length() / 2; 53 | int start = center - mWindow / 2; 54 | 55 | // mutations cannot happen in skipStart to skipEnd 56 | int skipStart = center - 1; 57 | int skipEnd = center + 1; 58 | 59 | const char* data = s.c_str(); 60 | 61 | long* hashes = new long[mWindow]; 62 | memset(hashes, 0, sizeof(long)*mWindow); 63 | 64 | long* accum = new long[mWindow]; 65 | memset(accum, 0, sizeof(long)*mWindow); 66 | 67 | // initialize 68 | long origin = 0; 69 | for(int i=0; i= skipStart && i+start <= skipEnd ) 85 | continue; 86 | for(int b1=0; b1<4; b1++){ 87 | char base1 = bases[b1]; 88 | if(base1 == data[start + i]) 89 | continue; 90 | 91 | long mut1 = origin - hash(data[start + i], i) + hash(base1, i); 92 | addHash(mut1, target); 93 | /*cout<= skipStart && j+start <= skipEnd ) 105 | continue; 106 | for(int b2=0; b2<4; b2++){ 107 | char base2 = bases[b2]; 108 | if(base2 == data[start + j]) 109 | continue; 110 | long mut2 = mut1 - hash(data[start + j], j) + hash(base2, j); 111 | addHash(mut2, target); 112 | /*cout<= skipStart && i+start <= skipEnd ) 135 | continue; 136 | // make del of i first 137 | long mutOfDel; 138 | if (i==0) 139 | mutOfDel = origin - accum[i] + altVal; 140 | else 141 | mutOfDel = origin - accum[i] + (accum[i-1]<<1) + altVal; 142 | if(mutOfDel != origin) 143 | addHash(mutOfDel, target); 144 | 145 | // make insertion 146 | for(int b=0; b<4; b++){ 147 | char base = bases[b]; 148 | // shift the first base 149 | long mutOfIns = origin - accum[i] + hash(base, i) + ((accum[i] - hashes[0]) >> 1); 150 | if(mutOfIns != origin && mutOfIns != mutOfDel){ 151 | addHash(mutOfIns, target); 152 | /*cout << mutOfIns<<", insert at " << i << " with " << base << ": "; 153 | for(int p=1;p<=i;p++) 154 | cout << data[start + p]; 155 | cout << base; 156 | for(int p=i+1;p RollingHash::hitTargets(const string s) { 171 | map ret; 172 | if((ssize_t)s.length() < mWindow) 173 | return ret; 174 | 175 | const char* data = s.c_str(); 176 | 177 | // initialize 178 | long curHash = 0; 179 | for(int i=0; i>1) + hash(data[i], mWindow-1); 187 | addHit(ret, curHash); 188 | } 189 | 190 | return ret; 191 | } 192 | 193 | inline void RollingHash::addHit(map& ret, long hash) { 194 | //update bloom filter array 195 | const long bloomFilterFactors[3] = {1713137323, 371371377, 7341234131}; 196 | for(int b=0; b<3; b++) { 197 | if(mBloomFilterArray[(bloomFilterFactors[b] * hash) & (BLOOM_FILTER_LENGTH-1)] == 0 ) 198 | return; 199 | } 200 | 201 | if(mKeyTargets.count(hash)) { 202 | for(size_t i=0; i(); 219 | else { 220 | for(size_t i=0; i >::iterator iter; 260 | for(iter= mKeyTargets.begin(); iter!=mKeyTargets.end(); iter++) { 261 | if(iter->second.size() < 2) 262 | continue; 263 | cout << iter->first << endl; 264 | for(size_t i=0; isecond.size(); i++) 265 | cout << iter->second[i] << "\t"; 266 | cout << endl; 267 | } 268 | } 269 | bool RollingHash::test(){ 270 | vector mutationList = Mutation::parseBuiltIn(); 271 | RollingHash rh(48); 272 | rh.initMutations(mutationList); 273 | bool result = true; 274 | for(size_t i=0; i targets = rh.hitTargets(s); 278 | cout << i << ", " << s << endl; 279 | bool found = false; 280 | map::iterator iter; 281 | for(iter=targets.begin(); iter!=targets.end(); iter++) { 282 | size_t t = iter->first; 283 | //int count = iter->second; 284 | cout << t << "\t"; 285 | if(t == i) 286 | found = true; 287 | } 288 | cout << endl; 289 | result &= found; 290 | } 291 | return result; 292 | 293 | } 294 | -------------------------------------------------------------------------------- /src/rollinghash.h: -------------------------------------------------------------------------------- 1 | #ifndef ROLLINGHASH_H 2 | #define ROLLINGHASH_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "mutation.h" 11 | 12 | using namespace std; 13 | 14 | class RollingHash{ 15 | public: 16 | RollingHash(int window = 40, bool allowTwoSub = true); 17 | ~RollingHash(); 18 | void initMutations(vector& mutationList); 19 | 20 | public: 21 | bool add(string s, int target, bool allowIndel = true); 22 | void addHash(long hash, int target); 23 | map hitTargets(const string s); 24 | inline void addHit(map& ret, long hash); 25 | map > getKeyTargets(); 26 | 27 | inline static long char2val(char c); 28 | inline static long hash(char c, int pos); 29 | void dump(); 30 | static bool test(); 31 | 32 | private: 33 | map > mKeyTargets; 34 | int mWindow; 35 | bool mAllowEditDistanceIs2; 36 | char* mBloomFilterArray; 37 | 38 | }; 39 | 40 | 41 | #endif -------------------------------------------------------------------------------- /src/scanner.h: -------------------------------------------------------------------------------- 1 | #ifndef SCANNNER_H 2 | #define SCANNNER_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "read.h" 8 | #include "mutation.h" 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "rollinghash.h" 14 | 15 | using namespace std; 16 | 17 | template 18 | class ReadPack { 19 | public: 20 | ReadData** data; 21 | int count; 22 | }; 23 | 24 | template 25 | class reader_trait { 26 | public: 27 | //typedef unspecialized FastqReaderType; 28 | const char* name="unspecialized"; 29 | }; 30 | 31 | template 32 | class ReadRepository { 33 | public: 34 | ReadPack** packBuffer; 35 | size_t readPos; 36 | size_t writePos; 37 | size_t readCounter; 38 | std::mutex mtx; 39 | std::condition_variable repoNotFull; 40 | std::condition_variable repoNotEmpty; 41 | }; 42 | 43 | template 44 | class Scanner{ 45 | public: 46 | Scanner(string mutationFile, string refFile, string read1File, string read2File, string html="", string json = "", int threadnum=1); 47 | ~Scanner(); 48 | bool scan(); 49 | void textReport(vector& mutationList, vector *mutationMatches); 50 | void htmlReport(vector& mutationList, vector *mutationMatches); 51 | void jsonReport(vector& mutationList, vector *mutationMatches); 52 | 53 | protected: 54 | bool scanRead(Read* r, ReadData* originalRead, bool reversed); 55 | private: 56 | void initPackRepository(); 57 | void destroyPackRepository(); 58 | void producePack(ReadPack* pack); 59 | void consumePack(); 60 | void producerTask(); 61 | void consumerTask(); 62 | void pushMatch(int i, Match* m, bool needStoreReadToDelete); 63 | 64 | private: 65 | string mMutationFile; 66 | string mRefFile; 67 | protected: 68 | string mRead1File; 69 | string mRead2File; 70 | private: 71 | string mHtmlFile; 72 | string mJsonFile; 73 | ReadRepository mRepo; 74 | bool mProduceFinished; 75 | vector mutationList; 76 | vector *mutationMatches; 77 | std::mutex mMutationMtx; 78 | int mThreadNum; 79 | RollingHash* mRollingHash; 80 | vector mReadToDelete; 81 | vector mBufToDelete; 82 | }; 83 | 84 | #include "scanner-impl.h" 85 | 86 | #endif 87 | -------------------------------------------------------------------------------- /src/sequence.cpp: -------------------------------------------------------------------------------- 1 | #include "sequence.h" 2 | 3 | Sequence::Sequence(){ 4 | } 5 | 6 | Sequence::Sequence(string seq){ 7 | mStr = seq; 8 | } 9 | 10 | void Sequence::print(){ 11 | std::cout << mStr; 12 | } 13 | 14 | int Sequence::length(){ 15 | return mStr.length(); 16 | } 17 | 18 | Sequence Sequence::reverseComplement(){ 19 | string str(mStr.length(), 0); 20 | for(size_t c=0;c 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std; 10 | 11 | class Sequence{ 12 | public: 13 | Sequence(); 14 | Sequence(string seq); 15 | void print(); 16 | int length(); 17 | Sequence reverseComplement(); 18 | 19 | Sequence operator~(); 20 | 21 | static bool test(); 22 | 23 | public: 24 | string mStr; 25 | }; 26 | 27 | #endif -------------------------------------------------------------------------------- /src/sescanner.cpp: -------------------------------------------------------------------------------- 1 | #include "sescanner.h" 2 | #include "scanner.h" 3 | 4 | SingleEndScanner::SingleEndScanner(string mutationFile, string refFile, string read1File, string html, string json, int threadNum) 5 | : Scanner(mutationFile, refFile, read1File, "", html, json, threadNum) { 6 | } 7 | 8 | SingleEndScanner::~SingleEndScanner() {} 9 | 10 | bool SingleEndScanner::scanNextEnd(ReadPack* pack){ 11 | bool simplified = GlobalSettings::simplifiedMode; 12 | for(int p=0;pcount;p++){ 13 | Read* r1 = pack->data[p]; 14 | Read* rcr1 = r1->reverseComplement(); 15 | if(!scanRead(rcr1, r1, true) || simplified) delete rcr1; 16 | if(!scanRead(r1, r1, false) || simplified) delete r1; 17 | } 18 | 19 | delete pack->data; 20 | delete pack; 21 | 22 | return true; 23 | } 24 | -------------------------------------------------------------------------------- /src/sescanner.h: -------------------------------------------------------------------------------- 1 | #ifndef SE_SCANNNER_H 2 | #define SE_SCANNNER_H 3 | 4 | #include "scanner.h" 5 | 6 | template <> 7 | class reader_trait { 8 | public: 9 | typedef FastqReader FastqReaderType; 10 | static constexpr const char* name="reads"; 11 | }; 12 | 13 | class SingleEndScanner : public Scanner { 14 | public: 15 | SingleEndScanner(string mutationFile, string refFile, string read1File, string html="", string json = "", int threadnum=1); 16 | ~SingleEndScanner(); 17 | 18 | private: 19 | friend Scanner; 20 | bool scanNextEnd(ReadPack* pack); 21 | reader_trait::FastqReaderType *fastqReader() { 22 | return new FastqReader(mRead1File); 23 | } 24 | }; 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/unittest.cpp: -------------------------------------------------------------------------------- 1 | #include "unittest.h" 2 | #include "editdistance.h" 3 | #include "sequence.h" 4 | #include "fastqreader.h" 5 | #include "fastareader.h" 6 | #include "vcfreader.h" 7 | #include "overlap.h" 8 | #include "read.h" 9 | #include "rollinghash.h" 10 | #include 11 | 12 | UnitTest::UnitTest(){ 13 | 14 | } 15 | 16 | void UnitTest::run(){ 17 | bool passed = true; 18 | passed &= editdistance_test(); 19 | passed &= Sequence::test(); 20 | passed &= FastqReader::test(); 21 | passed &= FastaReader::test(); 22 | passed &= VcfReader::test(); 23 | passed &= Overlap::test(); 24 | passed &= Read::test(); 25 | passed &= ReadPair::test(); 26 | passed &= RollingHash::test(); 27 | printf("\n==========================\n"); 28 | printf("%s\n\n", passed?"PASSED":"FAILED"); 29 | } -------------------------------------------------------------------------------- /src/unittest.h: -------------------------------------------------------------------------------- 1 | #ifndef UNITDISTANCE_H 2 | #define UNITDISTANCE_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | class UnitTest{ 9 | public: 10 | UnitTest(); 11 | void run(); 12 | }; 13 | 14 | #endif -------------------------------------------------------------------------------- /src/util.h: -------------------------------------------------------------------------------- 1 | #ifndef UTIL_H 2 | #define UTIL_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | inline bool starts_with( string const & value, string const & starting) 14 | { 15 | if (starting.size() > value.size()) return false; 16 | return equal(starting.begin(), starting.end(), value.begin()); 17 | } 18 | 19 | inline bool ends_with( string const & value, string const & ending) 20 | { 21 | if (ending.size() > value.size()) return false; 22 | return equal(ending.rbegin(), ending.rend(), value.rbegin()); 23 | } 24 | 25 | inline string trim(const string& str) 26 | { 27 | string::size_type pos = str.find_first_not_of(' '); 28 | if (pos == string::npos) 29 | { 30 | return string(""); 31 | } 32 | string::size_type pos2 = str.find_last_not_of(' '); 33 | if (pos2 != string::npos) 34 | { 35 | return str.substr(pos, pos2 - pos + 1); 36 | } 37 | return str.substr(pos); 38 | } 39 | 40 | inline int split(const string& str, vector& ret_, string sep = ",") 41 | { 42 | if (str.empty()) 43 | { 44 | return 0; 45 | } 46 | 47 | string tmp; 48 | string::size_type pos_begin = str.find_first_not_of(sep); 49 | string::size_type comma_pos = 0; 50 | 51 | while (pos_begin != string::npos) 52 | { 53 | comma_pos = str.find(sep, pos_begin); 54 | if (comma_pos != string::npos) 55 | { 56 | tmp = str.substr(pos_begin, comma_pos - pos_begin); 57 | pos_begin = comma_pos + sep.length(); 58 | } 59 | else 60 | { 61 | tmp = str.substr(pos_begin); 62 | pos_begin = comma_pos; 63 | } 64 | 65 | ret_.push_back(tmp); 66 | tmp.clear(); 67 | } 68 | return 0; 69 | } 70 | 71 | inline string replace(const string& str, const string& src, const string& dest) 72 | { 73 | string ret; 74 | 75 | string::size_type pos_begin = 0; 76 | string::size_type pos = str.find(src); 77 | while (pos != string::npos) 78 | { 79 | ret.append(str.data() + pos_begin, pos - pos_begin); 80 | ret += dest; 81 | pos_begin = pos + 1; 82 | pos = str.find(src, pos_begin); 83 | } 84 | if (pos_begin < str.length()) 85 | { 86 | ret.append(str.begin() + pos_begin, str.end()); 87 | } 88 | return ret; 89 | } 90 | 91 | //Check if a string is a file or directory 92 | inline bool file_exists(const string& s) 93 | { 94 | bool exists = false; 95 | if(s.length() > 0) { 96 | struct stat status; 97 | int result = stat( s.c_str(), &status ); 98 | if(result == 0) { 99 | exists = true; 100 | } 101 | } 102 | return exists; 103 | } 104 | 105 | inline string basename(const string& filename){ 106 | string::size_type pos = filename.find_last_of('/'); 107 | if (pos == string::npos) 108 | return filename; 109 | else if(pos == filename.length()-1) 110 | return ""; // a bad filename 111 | else 112 | return filename.substr(pos+1, filename.length() - pos - 1); 113 | } 114 | 115 | // check if a string is a directory 116 | inline bool is_directory(const string& path) 117 | { 118 | bool isdir = false; 119 | struct stat status; 120 | // visual studion use _S_IFDIR instead of S_IFDIR 121 | // http://msdn.microsoft.com/en-us/library/14h5k7ff.aspx 122 | #ifdef _MSC_VER 123 | #define S_IFDIR _S_IFDIR 124 | #endif 125 | stat( path.c_str(), &status ); 126 | if ( status.st_mode & S_IFDIR ) { 127 | isdir = true; 128 | } 129 | // #endif 130 | return isdir; 131 | } 132 | 133 | inline void check_file_valid(const string& s) { 134 | if(!file_exists(s)){ 135 | cout << "ERROR: file '" << s << "' doesn't exist, quit now" << endl; 136 | exit(-1); 137 | } 138 | if(is_directory(s)){ 139 | cout << "ERROR: '" << s << "' is a folder, not a file, quit now" << endl; 140 | exit(-1); 141 | } 142 | } 143 | 144 | // Remove non alphabetic characters from a string 145 | inline string str_keep_alpha(const string& s) 146 | { 147 | string new_str; 148 | for( size_t it =0; it < s.size(); it++) { 149 | if( isalpha(s[it]) ) { 150 | new_str += s[it]; 151 | } 152 | } 153 | return new_str; 154 | } 155 | 156 | 157 | // Remove invalid sequence characters from a string 158 | inline void str_keep_valid_sequence( string& s, bool forceUpperCase = false) 159 | { 160 | size_t total = 0; 161 | const char case_gap = 'a' - 'A'; 162 | for( size_t it =0; it < s.size(); it++) { 163 | char c = s[it]; 164 | if(forceUpperCase && c>='a' && c<='z') { 165 | c -= case_gap; 166 | } 167 | if( isalpha(c) || c == '-' || c == '*' ) { 168 | s[total] = c; 169 | total ++; 170 | } 171 | } 172 | 173 | s.resize(total); 174 | } 175 | 176 | inline int find_with_right_pos(const string& str, const string& pattern, int start=0) { 177 | int pos = str.find(pattern, start); 178 | if (pos < 0) 179 | return -1; 180 | else 181 | return pos + pattern.length(); 182 | } 183 | 184 | inline void str2upper(string& s){ 185 | transform(s.begin(), s.end(), s.begin(), (int (*)(int))toupper); 186 | } 187 | 188 | inline void str2lower(string& s){ 189 | transform(s.begin(), s.end(), s.begin(), (int (*)(int))tolower); 190 | } 191 | 192 | #endif /* UTIL_H */ 193 | -------------------------------------------------------------------------------- /src/vcfreader.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "vcfreader.h" 3 | #include "util.h" 4 | #include 5 | 6 | VcfReader::VcfReader(string faFile) 7 | { 8 | // Set locale and disable stdio synchronization to improve iostream performance 9 | // http://www.drdobbs.com/the-standard-librarian-iostreams-and-std/184401305 10 | // http://stackoverflow.com/questions/5166263/how-to-get-iostream-to-perform-better 11 | setlocale(LC_ALL,"C"); 12 | ios_base::sync_with_stdio(false); 13 | 14 | mVcfFile = faFile; 15 | if (is_directory(mVcfFile)) { 16 | string error_msg = "There is a problem with the provided vcf file: \'"; 17 | error_msg.append(mVcfFile); 18 | error_msg.append("\' is a directory NOT a file...\n"); 19 | throw invalid_argument(error_msg); 20 | } 21 | mVcfFileStream.open( mVcfFile.c_str(),ios::in); 22 | // verify that the file can be read 23 | if (!mVcfFileStream.is_open()) { 24 | string msg = "There is a problem with the provided vcf file: could NOT read "; 25 | msg.append(mVcfFile.c_str()); 26 | msg.append("...\n"); 27 | throw invalid_argument(msg); 28 | } 29 | } 30 | 31 | VcfReader::~VcfReader() 32 | { 33 | if (mVcfFileStream.is_open()) { 34 | mVcfFileStream.close(); 35 | } 36 | } 37 | 38 | bool VcfReader::readNext() 39 | { 40 | 41 | string line = ""; 42 | vector items; 43 | 44 | getline(mVcfFileStream,line,'\n'); 45 | 46 | if(line.length() == 0) 47 | return false; 48 | 49 | split(line, items, "\t"); 50 | 51 | if(items.size() < 8) 52 | return false; 53 | 54 | if(items[0][0]=='#') 55 | return false; 56 | 57 | //split the alt by comma to make multiple variants, GATK usually output such kind of variant like C>T,AT 58 | vector alts; 59 | split(trim(items[4]), alts, ","); 60 | for(size_t a=0; a=9) 78 | var.format = trim(items[8]); 79 | 80 | mVariants.push_back(var); 81 | } 82 | 83 | return true; 84 | } 85 | 86 | 87 | void VcfReader::readAll() { 88 | while(!mVcfFileStream.eof()){ 89 | readNext(); 90 | } 91 | } 92 | 93 | void VcfReader::printAll() { 94 | for(size_t i=0;i 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include "util.h" 14 | 15 | using namespace std; 16 | 17 | class Variant{ 18 | public: 19 | inline string gene() { 20 | int start = find_with_right_pos(info, "Gene.refGene="); 21 | if (start<0){ 22 | start = find_with_right_pos(info, "GENE="); 23 | if(start<0) 24 | return ""; 25 | } 26 | int end1 = info.find(";", start); 27 | int end2 = info.find(":", start); 28 | int end = 0; 29 | if(end1>=0) 30 | end = end1; 31 | if(end2>=0 && (end1<0 || end2=0 && end>start){ 34 | return info.substr(start, end-start); 35 | } 36 | return ""; 37 | } 38 | inline string cdsChange(){ 39 | int start = find_with_right_pos(info, "CDS=c."); 40 | if (start<0){ 41 | start = find_with_right_pos(info, ":c."); 42 | if(start<0) 43 | return ""; 44 | } 45 | int end1 = info.find(";", start); 46 | int end2 = info.find(":", start); 47 | int end = 0; 48 | if(end1>=0) 49 | end = end1; 50 | if(end2>=0 && (end1<0 || end2=0 && end>start){ 53 | return "c."+info.substr(start, end-start); 54 | } 55 | return ""; 56 | } 57 | inline string aaChange(){ 58 | int start = find_with_right_pos(info, "AA=p."); 59 | if (start<0){ 60 | start = find_with_right_pos(info, ":p."); 61 | if(start<0) 62 | return ""; 63 | } 64 | int end1 = info.find(";", start); 65 | int end2 = info.find(":", start); 66 | int end = 0; 67 | if(end1>=0) 68 | end = end1; 69 | if(end2>=0 && (end1<0 || end2=0 && end>start){ 72 | return "p."+info.substr(start, end-start); 73 | } 74 | return ""; 75 | } 76 | public: 77 | string chrom; 78 | int pos; 79 | string id; 80 | string ref; 81 | string alt; 82 | string qual; 83 | string filter; 84 | string info; 85 | string format; 86 | }; 87 | 88 | class VcfReader 89 | { 90 | public: 91 | VcfReader(string vcfFile); 92 | ~VcfReader(); 93 | bool readNext(); 94 | void readAll(); 95 | void printAll(); 96 | 97 | inline vector& variants() { 98 | return mVariants; 99 | } 100 | 101 | static bool test(); 102 | 103 | public: 104 | vector mVariants; 105 | 106 | 107 | private: 108 | string mVcfFile; 109 | ifstream mVcfFileStream; 110 | }; 111 | 112 | 113 | #endif 114 | 115 | -------------------------------------------------------------------------------- /src/zlib/gzguts.h: -------------------------------------------------------------------------------- 1 | /* gzguts.h -- zlib internal header definitions for gz* operations 2 | * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | #ifdef _LARGEFILE64_SOURCE 7 | # ifndef _LARGEFILE_SOURCE 8 | # define _LARGEFILE_SOURCE 1 9 | # endif 10 | # ifdef _FILE_OFFSET_BITS 11 | # undef _FILE_OFFSET_BITS 12 | # endif 13 | #endif 14 | 15 | #ifdef HAVE_HIDDEN 16 | # define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) 17 | #else 18 | # define ZLIB_INTERNAL 19 | #endif 20 | 21 | #include 22 | #include "zlib.h" 23 | #ifdef STDC 24 | # include 25 | # include 26 | # include 27 | #endif 28 | #include 29 | 30 | #ifdef _WIN32 31 | # include 32 | #endif 33 | 34 | #if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32) 35 | # include 36 | #endif 37 | 38 | #ifdef WINAPI_FAMILY 39 | # define open _open 40 | # define read _read 41 | # define write _write 42 | # define close _close 43 | #endif 44 | 45 | #ifdef NO_DEFLATE /* for compatibility with old definition */ 46 | # define NO_GZCOMPRESS 47 | #endif 48 | 49 | #if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550) 50 | # ifndef HAVE_VSNPRINTF 51 | # define HAVE_VSNPRINTF 52 | # endif 53 | #endif 54 | 55 | #if defined(__CYGWIN__) 56 | # ifndef HAVE_VSNPRINTF 57 | # define HAVE_VSNPRINTF 58 | # endif 59 | #endif 60 | 61 | #if defined(MSDOS) && defined(__BORLANDC__) && (BORLANDC > 0x410) 62 | # ifndef HAVE_VSNPRINTF 63 | # define HAVE_VSNPRINTF 64 | # endif 65 | #endif 66 | 67 | #ifndef HAVE_VSNPRINTF 68 | # ifdef MSDOS 69 | /* vsnprintf may exist on some MS-DOS compilers (DJGPP?), 70 | but for now we just assume it doesn't. */ 71 | # define NO_vsnprintf 72 | # endif 73 | # ifdef __TURBOC__ 74 | # define NO_vsnprintf 75 | # endif 76 | # ifdef WIN32 77 | /* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */ 78 | # if !defined(vsnprintf) && !defined(NO_vsnprintf) 79 | # if !defined(_MSC_VER) || ( defined(_MSC_VER) && _MSC_VER < 1500 ) 80 | # define vsnprintf _vsnprintf 81 | # endif 82 | # endif 83 | # endif 84 | # ifdef __SASC 85 | # define NO_vsnprintf 86 | # endif 87 | # ifdef VMS 88 | # define NO_vsnprintf 89 | # endif 90 | # ifdef __OS400__ 91 | # define NO_vsnprintf 92 | # endif 93 | # ifdef __MVS__ 94 | # define NO_vsnprintf 95 | # endif 96 | #endif 97 | 98 | /* unlike snprintf (which is required in C99, yet still not supported by 99 | Microsoft more than a decade later!), _snprintf does not guarantee null 100 | termination of the result -- however this is only used in gzlib.c where 101 | the result is assured to fit in the space provided */ 102 | #ifdef _MSC_VER 103 | # define snprintf _snprintf 104 | #endif 105 | 106 | #ifndef local 107 | # define local static 108 | #endif 109 | /* compile with -Dlocal if your debugger can't find static symbols */ 110 | 111 | /* gz* functions always use library allocation functions */ 112 | #ifndef STDC 113 | extern voidp malloc OF((uInt size)); 114 | extern void free OF((voidpf ptr)); 115 | #endif 116 | 117 | /* get errno and strerror definition */ 118 | #if defined UNDER_CE 119 | # include 120 | # define zstrerror() gz_strwinerror((DWORD)GetLastError()) 121 | #else 122 | # ifndef NO_STRERROR 123 | # include 124 | # define zstrerror() strerror(errno) 125 | # else 126 | # define zstrerror() "stdio error (consult errno)" 127 | # endif 128 | #endif 129 | 130 | /* provide prototypes for these when building zlib without LFS */ 131 | #if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0 132 | ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *)); 133 | ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int)); 134 | ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile)); 135 | ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile)); 136 | #endif 137 | 138 | /* default memLevel */ 139 | #if MAX_MEM_LEVEL >= 8 140 | # define DEF_MEM_LEVEL 8 141 | #else 142 | # define DEF_MEM_LEVEL MAX_MEM_LEVEL 143 | #endif 144 | 145 | /* default i/o buffer size -- double this for output when reading (this and 146 | twice this must be able to fit in an unsigned type) */ 147 | #define GZBUFSIZE 8192 148 | 149 | /* gzip modes, also provide a little integrity check on the passed structure */ 150 | #define GZ_NONE 0 151 | #define GZ_READ 7247 152 | #define GZ_WRITE 31153 153 | #define GZ_APPEND 1 /* mode set to GZ_WRITE after the file is opened */ 154 | 155 | /* values for gz_state how */ 156 | #define LOOK 0 /* look for a gzip header */ 157 | #define COPY 1 /* copy input directly */ 158 | #define GZIP 2 /* decompress a gzip stream */ 159 | 160 | /* internal gzip file state data structure */ 161 | typedef struct { 162 | /* exposed contents for gzgetc() macro */ 163 | struct gzFile_s x; /* "x" for exposed */ 164 | /* x.have: number of bytes available at x.next */ 165 | /* x.next: next output data to deliver or write */ 166 | /* x.pos: current position in uncompressed data */ 167 | /* used for both reading and writing */ 168 | int mode; /* see gzip modes above */ 169 | int fd; /* file descriptor */ 170 | char *path; /* path or fd for error messages */ 171 | unsigned size; /* buffer size, zero if not allocated yet */ 172 | unsigned want; /* requested buffer size, default is GZBUFSIZE */ 173 | unsigned char *in; /* input buffer */ 174 | unsigned char *out; /* output buffer (double-sized when reading) */ 175 | int direct; /* 0 if processing gzip, 1 if transparent */ 176 | /* just for reading */ 177 | int how; /* 0: get header, 1: copy, 2: decompress */ 178 | z_off64_t start; /* where the gzip data started, for rewinding */ 179 | int eof; /* true if end of input file reached */ 180 | int past; /* true if read requested past end */ 181 | /* just for writing */ 182 | int level; /* compression level */ 183 | int strategy; /* compression strategy */ 184 | /* seek request */ 185 | z_off64_t skip; /* amount to skip (already rewound if backwards) */ 186 | int seek; /* true if seek request pending */ 187 | /* error information */ 188 | int err; /* error code */ 189 | char *msg; /* error message */ 190 | /* zlib inflate or deflate stream */ 191 | z_stream strm; /* stream structure in-place (not a pointer) */ 192 | } gz_state; 193 | typedef gz_state FAR *gz_statep; 194 | 195 | /* shared functions */ 196 | void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char *)); 197 | #if defined UNDER_CE 198 | char ZLIB_INTERNAL *gz_strwinerror OF((DWORD error)); 199 | #endif 200 | 201 | /* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t 202 | value -- needed when comparing unsigned to z_off64_t, which is signed 203 | (possible z_off64_t types off_t, off64_t, and long are all signed) */ 204 | #ifdef INT_MAX 205 | # define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX) 206 | #else 207 | unsigned ZLIB_INTERNAL gz_intmax OF((void)); 208 | # define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax()) 209 | #endif 210 | -------------------------------------------------------------------------------- /src/zlib/inffast.h: -------------------------------------------------------------------------------- 1 | /* inffast.h -- header to use inffast.c 2 | * Copyright (C) 1995-2003, 2010 Mark Adler 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | /* WARNING: this file should *not* be used by applications. It is 7 | part of the implementation of the compression library and is 8 | subject to change. Applications should only use zlib.h. 9 | */ 10 | 11 | void ZLIB_INTERNAL inflate_fast OF((z_streamp strm, unsigned start)); 12 | -------------------------------------------------------------------------------- /src/zlib/inffixed.h: -------------------------------------------------------------------------------- 1 | /* inffixed.h -- table for decoding fixed codes 2 | * Generated automatically by makefixed(). 3 | */ 4 | 5 | /* WARNING: this file should *not* be used by applications. 6 | It is part of the implementation of this library and is 7 | subject to change. Applications should only use zlib.h. 8 | */ 9 | 10 | static const code lenfix[512] = { 11 | {96,7,0},{0,8,80},{0,8,16},{20,8,115},{18,7,31},{0,8,112},{0,8,48}, 12 | {0,9,192},{16,7,10},{0,8,96},{0,8,32},{0,9,160},{0,8,0},{0,8,128}, 13 | {0,8,64},{0,9,224},{16,7,6},{0,8,88},{0,8,24},{0,9,144},{19,7,59}, 14 | {0,8,120},{0,8,56},{0,9,208},{17,7,17},{0,8,104},{0,8,40},{0,9,176}, 15 | {0,8,8},{0,8,136},{0,8,72},{0,9,240},{16,7,4},{0,8,84},{0,8,20}, 16 | {21,8,227},{19,7,43},{0,8,116},{0,8,52},{0,9,200},{17,7,13},{0,8,100}, 17 | {0,8,36},{0,9,168},{0,8,4},{0,8,132},{0,8,68},{0,9,232},{16,7,8}, 18 | {0,8,92},{0,8,28},{0,9,152},{20,7,83},{0,8,124},{0,8,60},{0,9,216}, 19 | {18,7,23},{0,8,108},{0,8,44},{0,9,184},{0,8,12},{0,8,140},{0,8,76}, 20 | {0,9,248},{16,7,3},{0,8,82},{0,8,18},{21,8,163},{19,7,35},{0,8,114}, 21 | {0,8,50},{0,9,196},{17,7,11},{0,8,98},{0,8,34},{0,9,164},{0,8,2}, 22 | {0,8,130},{0,8,66},{0,9,228},{16,7,7},{0,8,90},{0,8,26},{0,9,148}, 23 | {20,7,67},{0,8,122},{0,8,58},{0,9,212},{18,7,19},{0,8,106},{0,8,42}, 24 | {0,9,180},{0,8,10},{0,8,138},{0,8,74},{0,9,244},{16,7,5},{0,8,86}, 25 | {0,8,22},{64,8,0},{19,7,51},{0,8,118},{0,8,54},{0,9,204},{17,7,15}, 26 | {0,8,102},{0,8,38},{0,9,172},{0,8,6},{0,8,134},{0,8,70},{0,9,236}, 27 | {16,7,9},{0,8,94},{0,8,30},{0,9,156},{20,7,99},{0,8,126},{0,8,62}, 28 | {0,9,220},{18,7,27},{0,8,110},{0,8,46},{0,9,188},{0,8,14},{0,8,142}, 29 | {0,8,78},{0,9,252},{96,7,0},{0,8,81},{0,8,17},{21,8,131},{18,7,31}, 30 | {0,8,113},{0,8,49},{0,9,194},{16,7,10},{0,8,97},{0,8,33},{0,9,162}, 31 | {0,8,1},{0,8,129},{0,8,65},{0,9,226},{16,7,6},{0,8,89},{0,8,25}, 32 | {0,9,146},{19,7,59},{0,8,121},{0,8,57},{0,9,210},{17,7,17},{0,8,105}, 33 | {0,8,41},{0,9,178},{0,8,9},{0,8,137},{0,8,73},{0,9,242},{16,7,4}, 34 | {0,8,85},{0,8,21},{16,8,258},{19,7,43},{0,8,117},{0,8,53},{0,9,202}, 35 | {17,7,13},{0,8,101},{0,8,37},{0,9,170},{0,8,5},{0,8,133},{0,8,69}, 36 | {0,9,234},{16,7,8},{0,8,93},{0,8,29},{0,9,154},{20,7,83},{0,8,125}, 37 | {0,8,61},{0,9,218},{18,7,23},{0,8,109},{0,8,45},{0,9,186},{0,8,13}, 38 | {0,8,141},{0,8,77},{0,9,250},{16,7,3},{0,8,83},{0,8,19},{21,8,195}, 39 | {19,7,35},{0,8,115},{0,8,51},{0,9,198},{17,7,11},{0,8,99},{0,8,35}, 40 | {0,9,166},{0,8,3},{0,8,131},{0,8,67},{0,9,230},{16,7,7},{0,8,91}, 41 | {0,8,27},{0,9,150},{20,7,67},{0,8,123},{0,8,59},{0,9,214},{18,7,19}, 42 | {0,8,107},{0,8,43},{0,9,182},{0,8,11},{0,8,139},{0,8,75},{0,9,246}, 43 | {16,7,5},{0,8,87},{0,8,23},{64,8,0},{19,7,51},{0,8,119},{0,8,55}, 44 | {0,9,206},{17,7,15},{0,8,103},{0,8,39},{0,9,174},{0,8,7},{0,8,135}, 45 | {0,8,71},{0,9,238},{16,7,9},{0,8,95},{0,8,31},{0,9,158},{20,7,99}, 46 | {0,8,127},{0,8,63},{0,9,222},{18,7,27},{0,8,111},{0,8,47},{0,9,190}, 47 | {0,8,15},{0,8,143},{0,8,79},{0,9,254},{96,7,0},{0,8,80},{0,8,16}, 48 | {20,8,115},{18,7,31},{0,8,112},{0,8,48},{0,9,193},{16,7,10},{0,8,96}, 49 | {0,8,32},{0,9,161},{0,8,0},{0,8,128},{0,8,64},{0,9,225},{16,7,6}, 50 | {0,8,88},{0,8,24},{0,9,145},{19,7,59},{0,8,120},{0,8,56},{0,9,209}, 51 | {17,7,17},{0,8,104},{0,8,40},{0,9,177},{0,8,8},{0,8,136},{0,8,72}, 52 | {0,9,241},{16,7,4},{0,8,84},{0,8,20},{21,8,227},{19,7,43},{0,8,116}, 53 | {0,8,52},{0,9,201},{17,7,13},{0,8,100},{0,8,36},{0,9,169},{0,8,4}, 54 | {0,8,132},{0,8,68},{0,9,233},{16,7,8},{0,8,92},{0,8,28},{0,9,153}, 55 | {20,7,83},{0,8,124},{0,8,60},{0,9,217},{18,7,23},{0,8,108},{0,8,44}, 56 | {0,9,185},{0,8,12},{0,8,140},{0,8,76},{0,9,249},{16,7,3},{0,8,82}, 57 | {0,8,18},{21,8,163},{19,7,35},{0,8,114},{0,8,50},{0,9,197},{17,7,11}, 58 | {0,8,98},{0,8,34},{0,9,165},{0,8,2},{0,8,130},{0,8,66},{0,9,229}, 59 | {16,7,7},{0,8,90},{0,8,26},{0,9,149},{20,7,67},{0,8,122},{0,8,58}, 60 | {0,9,213},{18,7,19},{0,8,106},{0,8,42},{0,9,181},{0,8,10},{0,8,138}, 61 | {0,8,74},{0,9,245},{16,7,5},{0,8,86},{0,8,22},{64,8,0},{19,7,51}, 62 | {0,8,118},{0,8,54},{0,9,205},{17,7,15},{0,8,102},{0,8,38},{0,9,173}, 63 | {0,8,6},{0,8,134},{0,8,70},{0,9,237},{16,7,9},{0,8,94},{0,8,30}, 64 | {0,9,157},{20,7,99},{0,8,126},{0,8,62},{0,9,221},{18,7,27},{0,8,110}, 65 | {0,8,46},{0,9,189},{0,8,14},{0,8,142},{0,8,78},{0,9,253},{96,7,0}, 66 | {0,8,81},{0,8,17},{21,8,131},{18,7,31},{0,8,113},{0,8,49},{0,9,195}, 67 | {16,7,10},{0,8,97},{0,8,33},{0,9,163},{0,8,1},{0,8,129},{0,8,65}, 68 | {0,9,227},{16,7,6},{0,8,89},{0,8,25},{0,9,147},{19,7,59},{0,8,121}, 69 | {0,8,57},{0,9,211},{17,7,17},{0,8,105},{0,8,41},{0,9,179},{0,8,9}, 70 | {0,8,137},{0,8,73},{0,9,243},{16,7,4},{0,8,85},{0,8,21},{16,8,258}, 71 | {19,7,43},{0,8,117},{0,8,53},{0,9,203},{17,7,13},{0,8,101},{0,8,37}, 72 | {0,9,171},{0,8,5},{0,8,133},{0,8,69},{0,9,235},{16,7,8},{0,8,93}, 73 | {0,8,29},{0,9,155},{20,7,83},{0,8,125},{0,8,61},{0,9,219},{18,7,23}, 74 | {0,8,109},{0,8,45},{0,9,187},{0,8,13},{0,8,141},{0,8,77},{0,9,251}, 75 | {16,7,3},{0,8,83},{0,8,19},{21,8,195},{19,7,35},{0,8,115},{0,8,51}, 76 | {0,9,199},{17,7,11},{0,8,99},{0,8,35},{0,9,167},{0,8,3},{0,8,131}, 77 | {0,8,67},{0,9,231},{16,7,7},{0,8,91},{0,8,27},{0,9,151},{20,7,67}, 78 | {0,8,123},{0,8,59},{0,9,215},{18,7,19},{0,8,107},{0,8,43},{0,9,183}, 79 | {0,8,11},{0,8,139},{0,8,75},{0,9,247},{16,7,5},{0,8,87},{0,8,23}, 80 | {64,8,0},{19,7,51},{0,8,119},{0,8,55},{0,9,207},{17,7,15},{0,8,103}, 81 | {0,8,39},{0,9,175},{0,8,7},{0,8,135},{0,8,71},{0,9,239},{16,7,9}, 82 | {0,8,95},{0,8,31},{0,9,159},{20,7,99},{0,8,127},{0,8,63},{0,9,223}, 83 | {18,7,27},{0,8,111},{0,8,47},{0,9,191},{0,8,15},{0,8,143},{0,8,79}, 84 | {0,9,255} 85 | }; 86 | 87 | static const code distfix[32] = { 88 | {16,5,1},{23,5,257},{19,5,17},{27,5,4097},{17,5,5},{25,5,1025}, 89 | {21,5,65},{29,5,16385},{16,5,3},{24,5,513},{20,5,33},{28,5,8193}, 90 | {18,5,9},{26,5,2049},{22,5,129},{64,5,0},{16,5,2},{23,5,385}, 91 | {19,5,25},{27,5,6145},{17,5,7},{25,5,1537},{21,5,97},{29,5,24577}, 92 | {16,5,4},{24,5,769},{20,5,49},{28,5,12289},{18,5,13},{26,5,3073}, 93 | {22,5,193},{64,5,0} 94 | }; 95 | -------------------------------------------------------------------------------- /src/zlib/inflate.h: -------------------------------------------------------------------------------- 1 | /* inflate.h -- internal inflate state definition 2 | * Copyright (C) 1995-2009 Mark Adler 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | /* WARNING: this file should *not* be used by applications. It is 7 | part of the implementation of the compression library and is 8 | subject to change. Applications should only use zlib.h. 9 | */ 10 | 11 | /* define NO_GZIP when compiling if you want to disable gzip header and 12 | trailer decoding by inflate(). NO_GZIP would be used to avoid linking in 13 | the crc code when it is not needed. For shared libraries, gzip decoding 14 | should be left enabled. */ 15 | #ifndef NO_GZIP 16 | # define GUNZIP 17 | #endif 18 | 19 | /* Possible inflate modes between inflate() calls */ 20 | typedef enum { 21 | HEAD, /* i: waiting for magic header */ 22 | FLAGS, /* i: waiting for method and flags (gzip) */ 23 | TIME, /* i: waiting for modification time (gzip) */ 24 | OS, /* i: waiting for extra flags and operating system (gzip) */ 25 | EXLEN, /* i: waiting for extra length (gzip) */ 26 | EXTRA, /* i: waiting for extra bytes (gzip) */ 27 | NAME, /* i: waiting for end of file name (gzip) */ 28 | COMMENT, /* i: waiting for end of comment (gzip) */ 29 | HCRC, /* i: waiting for header crc (gzip) */ 30 | DICTID, /* i: waiting for dictionary check value */ 31 | DICT, /* waiting for inflateSetDictionary() call */ 32 | TYPE, /* i: waiting for type bits, including last-flag bit */ 33 | TYPEDO, /* i: same, but skip check to exit inflate on new block */ 34 | STORED, /* i: waiting for stored size (length and complement) */ 35 | COPY_, /* i/o: same as COPY below, but only first time in */ 36 | COPY, /* i/o: waiting for input or output to copy stored block */ 37 | TABLE, /* i: waiting for dynamic block table lengths */ 38 | LENLENS, /* i: waiting for code length code lengths */ 39 | CODELENS, /* i: waiting for length/lit and distance code lengths */ 40 | LEN_, /* i: same as LEN below, but only first time in */ 41 | LEN, /* i: waiting for length/lit/eob code */ 42 | LENEXT, /* i: waiting for length extra bits */ 43 | DIST, /* i: waiting for distance code */ 44 | DISTEXT, /* i: waiting for distance extra bits */ 45 | MATCH, /* o: waiting for output space to copy string */ 46 | LIT, /* o: waiting for output space to write literal */ 47 | CHECK, /* i: waiting for 32-bit check value */ 48 | LENGTH, /* i: waiting for 32-bit length (gzip) */ 49 | DONE, /* finished check, done -- remain here until reset */ 50 | BAD, /* got a data error -- remain here until reset */ 51 | MEM, /* got an inflate() memory error -- remain here until reset */ 52 | SYNC /* looking for synchronization bytes to restart inflate() */ 53 | } inflate_mode; 54 | 55 | /* 56 | State transitions between above modes - 57 | 58 | (most modes can go to BAD or MEM on error -- not shown for clarity) 59 | 60 | Process header: 61 | HEAD -> (gzip) or (zlib) or (raw) 62 | (gzip) -> FLAGS -> TIME -> OS -> EXLEN -> EXTRA -> NAME -> COMMENT -> 63 | HCRC -> TYPE 64 | (zlib) -> DICTID or TYPE 65 | DICTID -> DICT -> TYPE 66 | (raw) -> TYPEDO 67 | Read deflate blocks: 68 | TYPE -> TYPEDO -> STORED or TABLE or LEN_ or CHECK 69 | STORED -> COPY_ -> COPY -> TYPE 70 | TABLE -> LENLENS -> CODELENS -> LEN_ 71 | LEN_ -> LEN 72 | Read deflate codes in fixed or dynamic block: 73 | LEN -> LENEXT or LIT or TYPE 74 | LENEXT -> DIST -> DISTEXT -> MATCH -> LEN 75 | LIT -> LEN 76 | Process trailer: 77 | CHECK -> LENGTH -> DONE 78 | */ 79 | 80 | /* state maintained between inflate() calls. Approximately 10K bytes. */ 81 | struct inflate_state { 82 | inflate_mode mode; /* current inflate mode */ 83 | int last; /* true if processing last block */ 84 | int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ 85 | int havedict; /* true if dictionary provided */ 86 | int flags; /* gzip header method and flags (0 if zlib) */ 87 | unsigned dmax; /* zlib header max distance (INFLATE_STRICT) */ 88 | unsigned long check; /* protected copy of check value */ 89 | unsigned long total; /* protected copy of output count */ 90 | gz_headerp head; /* where to save gzip header information */ 91 | /* sliding window */ 92 | unsigned wbits; /* log base 2 of requested window size */ 93 | unsigned wsize; /* window size or zero if not using window */ 94 | unsigned whave; /* valid bytes in the window */ 95 | unsigned wnext; /* window write index */ 96 | unsigned char FAR *window; /* allocated sliding window, if needed */ 97 | /* bit accumulator */ 98 | unsigned long hold; /* input bit accumulator */ 99 | unsigned bits; /* number of bits in "in" */ 100 | /* for string and stored block copying */ 101 | unsigned length; /* literal or length of data to copy */ 102 | unsigned offset; /* distance back to copy string from */ 103 | /* for table and code decoding */ 104 | unsigned extra; /* extra bits needed */ 105 | /* fixed and dynamic code tables */ 106 | code const FAR *lencode; /* starting table for length/literal codes */ 107 | code const FAR *distcode; /* starting table for distance codes */ 108 | unsigned lenbits; /* index bits for lencode */ 109 | unsigned distbits; /* index bits for distcode */ 110 | /* dynamic table building */ 111 | unsigned ncode; /* number of code length code lengths */ 112 | unsigned nlen; /* number of length code lengths */ 113 | unsigned ndist; /* number of distance code lengths */ 114 | unsigned have; /* number of code lengths in lens[] */ 115 | code FAR *next; /* next available space in codes[] */ 116 | unsigned short lens[320]; /* temporary storage for code lengths */ 117 | unsigned short work[288]; /* work area for code table building */ 118 | code codes[ENOUGH]; /* space for code tables */ 119 | int sane; /* if false, allow invalid distance too far */ 120 | int back; /* bits back of last unprocessed length/lit */ 121 | unsigned was; /* initial length of match */ 122 | }; 123 | -------------------------------------------------------------------------------- /src/zlib/inftrees.h: -------------------------------------------------------------------------------- 1 | /* inftrees.h -- header to use inftrees.c 2 | * Copyright (C) 1995-2005, 2010 Mark Adler 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | /* WARNING: this file should *not* be used by applications. It is 7 | part of the implementation of the compression library and is 8 | subject to change. Applications should only use zlib.h. 9 | */ 10 | 11 | /* Structure for decoding tables. Each entry provides either the 12 | information needed to do the operation requested by the code that 13 | indexed that table entry, or it provides a pointer to another 14 | table that indexes more bits of the code. op indicates whether 15 | the entry is a pointer to another table, a literal, a length or 16 | distance, an end-of-block, or an invalid code. For a table 17 | pointer, the low four bits of op is the number of index bits of 18 | that table. For a length or distance, the low four bits of op 19 | is the number of extra bits to get after the code. bits is 20 | the number of bits in this code or part of the code to drop off 21 | of the bit buffer. val is the actual byte to output in the case 22 | of a literal, the base length or distance, or the offset from 23 | the current table to the next table. Each entry is four bytes. */ 24 | typedef struct { 25 | unsigned char op; /* operation, extra bits, table bits */ 26 | unsigned char bits; /* bits in this part of the code */ 27 | unsigned short val; /* offset in table or code value */ 28 | } code; 29 | 30 | /* op values as set by inflate_table(): 31 | 00000000 - literal 32 | 0000tttt - table link, tttt != 0 is the number of table index bits 33 | 0001eeee - length or distance, eeee is the number of extra bits 34 | 01100000 - end of block 35 | 01000000 - invalid code 36 | */ 37 | 38 | /* Maximum size of the dynamic table. The maximum number of code structures is 39 | 1444, which is the sum of 852 for literal/length codes and 592 for distance 40 | codes. These values were found by exhaustive searches using the program 41 | examples/enough.c found in the zlib distribtution. The arguments to that 42 | program are the number of symbols, the initial root table size, and the 43 | maximum bit length of a code. "enough 286 9 15" for literal/length codes 44 | returns returns 852, and "enough 30 6 15" for distance codes returns 592. 45 | The initial root table size (9 or 6) is found in the fifth argument of the 46 | inflate_table() calls in inflate.c and infback.c. If the root table size is 47 | changed, then these maximum sizes would be need to be recalculated and 48 | updated. */ 49 | #define ENOUGH_LENS 852 50 | #define ENOUGH_DISTS 592 51 | #define ENOUGH (ENOUGH_LENS+ENOUGH_DISTS) 52 | 53 | /* Type of code to build for inflate_table() */ 54 | typedef enum { 55 | CODES, 56 | LENS, 57 | DISTS 58 | } codetype; 59 | 60 | int ZLIB_INTERNAL inflate_table OF((codetype type, unsigned short FAR *lens, 61 | unsigned codes, code FAR * FAR *table, 62 | unsigned FAR *bits, unsigned short FAR *work)); 63 | -------------------------------------------------------------------------------- /src/zlib/trees.h: -------------------------------------------------------------------------------- 1 | /* header created automatically with -DGEN_TREES_H */ 2 | 3 | local const ct_data static_ltree[L_CODES+2] = { 4 | {{ 12},{ 8}}, {{140},{ 8}}, {{ 76},{ 8}}, {{204},{ 8}}, {{ 44},{ 8}}, 5 | {{172},{ 8}}, {{108},{ 8}}, {{236},{ 8}}, {{ 28},{ 8}}, {{156},{ 8}}, 6 | {{ 92},{ 8}}, {{220},{ 8}}, {{ 60},{ 8}}, {{188},{ 8}}, {{124},{ 8}}, 7 | {{252},{ 8}}, {{ 2},{ 8}}, {{130},{ 8}}, {{ 66},{ 8}}, {{194},{ 8}}, 8 | {{ 34},{ 8}}, {{162},{ 8}}, {{ 98},{ 8}}, {{226},{ 8}}, {{ 18},{ 8}}, 9 | {{146},{ 8}}, {{ 82},{ 8}}, {{210},{ 8}}, {{ 50},{ 8}}, {{178},{ 8}}, 10 | {{114},{ 8}}, {{242},{ 8}}, {{ 10},{ 8}}, {{138},{ 8}}, {{ 74},{ 8}}, 11 | {{202},{ 8}}, {{ 42},{ 8}}, {{170},{ 8}}, {{106},{ 8}}, {{234},{ 8}}, 12 | {{ 26},{ 8}}, {{154},{ 8}}, {{ 90},{ 8}}, {{218},{ 8}}, {{ 58},{ 8}}, 13 | {{186},{ 8}}, {{122},{ 8}}, {{250},{ 8}}, {{ 6},{ 8}}, {{134},{ 8}}, 14 | {{ 70},{ 8}}, {{198},{ 8}}, {{ 38},{ 8}}, {{166},{ 8}}, {{102},{ 8}}, 15 | {{230},{ 8}}, {{ 22},{ 8}}, {{150},{ 8}}, {{ 86},{ 8}}, {{214},{ 8}}, 16 | {{ 54},{ 8}}, {{182},{ 8}}, {{118},{ 8}}, {{246},{ 8}}, {{ 14},{ 8}}, 17 | {{142},{ 8}}, {{ 78},{ 8}}, {{206},{ 8}}, {{ 46},{ 8}}, {{174},{ 8}}, 18 | {{110},{ 8}}, {{238},{ 8}}, {{ 30},{ 8}}, {{158},{ 8}}, {{ 94},{ 8}}, 19 | {{222},{ 8}}, {{ 62},{ 8}}, {{190},{ 8}}, {{126},{ 8}}, {{254},{ 8}}, 20 | {{ 1},{ 8}}, {{129},{ 8}}, {{ 65},{ 8}}, {{193},{ 8}}, {{ 33},{ 8}}, 21 | {{161},{ 8}}, {{ 97},{ 8}}, {{225},{ 8}}, {{ 17},{ 8}}, {{145},{ 8}}, 22 | {{ 81},{ 8}}, {{209},{ 8}}, {{ 49},{ 8}}, {{177},{ 8}}, {{113},{ 8}}, 23 | {{241},{ 8}}, {{ 9},{ 8}}, {{137},{ 8}}, {{ 73},{ 8}}, {{201},{ 8}}, 24 | {{ 41},{ 8}}, {{169},{ 8}}, {{105},{ 8}}, {{233},{ 8}}, {{ 25},{ 8}}, 25 | {{153},{ 8}}, {{ 89},{ 8}}, {{217},{ 8}}, {{ 57},{ 8}}, {{185},{ 8}}, 26 | {{121},{ 8}}, {{249},{ 8}}, {{ 5},{ 8}}, {{133},{ 8}}, {{ 69},{ 8}}, 27 | {{197},{ 8}}, {{ 37},{ 8}}, {{165},{ 8}}, {{101},{ 8}}, {{229},{ 8}}, 28 | {{ 21},{ 8}}, {{149},{ 8}}, {{ 85},{ 8}}, {{213},{ 8}}, {{ 53},{ 8}}, 29 | {{181},{ 8}}, {{117},{ 8}}, {{245},{ 8}}, {{ 13},{ 8}}, {{141},{ 8}}, 30 | {{ 77},{ 8}}, {{205},{ 8}}, {{ 45},{ 8}}, {{173},{ 8}}, {{109},{ 8}}, 31 | {{237},{ 8}}, {{ 29},{ 8}}, {{157},{ 8}}, {{ 93},{ 8}}, {{221},{ 8}}, 32 | {{ 61},{ 8}}, {{189},{ 8}}, {{125},{ 8}}, {{253},{ 8}}, {{ 19},{ 9}}, 33 | {{275},{ 9}}, {{147},{ 9}}, {{403},{ 9}}, {{ 83},{ 9}}, {{339},{ 9}}, 34 | {{211},{ 9}}, {{467},{ 9}}, {{ 51},{ 9}}, {{307},{ 9}}, {{179},{ 9}}, 35 | {{435},{ 9}}, {{115},{ 9}}, {{371},{ 9}}, {{243},{ 9}}, {{499},{ 9}}, 36 | {{ 11},{ 9}}, {{267},{ 9}}, {{139},{ 9}}, {{395},{ 9}}, {{ 75},{ 9}}, 37 | {{331},{ 9}}, {{203},{ 9}}, {{459},{ 9}}, {{ 43},{ 9}}, {{299},{ 9}}, 38 | {{171},{ 9}}, {{427},{ 9}}, {{107},{ 9}}, {{363},{ 9}}, {{235},{ 9}}, 39 | {{491},{ 9}}, {{ 27},{ 9}}, {{283},{ 9}}, {{155},{ 9}}, {{411},{ 9}}, 40 | {{ 91},{ 9}}, {{347},{ 9}}, {{219},{ 9}}, {{475},{ 9}}, {{ 59},{ 9}}, 41 | {{315},{ 9}}, {{187},{ 9}}, {{443},{ 9}}, {{123},{ 9}}, {{379},{ 9}}, 42 | {{251},{ 9}}, {{507},{ 9}}, {{ 7},{ 9}}, {{263},{ 9}}, {{135},{ 9}}, 43 | {{391},{ 9}}, {{ 71},{ 9}}, {{327},{ 9}}, {{199},{ 9}}, {{455},{ 9}}, 44 | {{ 39},{ 9}}, {{295},{ 9}}, {{167},{ 9}}, {{423},{ 9}}, {{103},{ 9}}, 45 | {{359},{ 9}}, {{231},{ 9}}, {{487},{ 9}}, {{ 23},{ 9}}, {{279},{ 9}}, 46 | {{151},{ 9}}, {{407},{ 9}}, {{ 87},{ 9}}, {{343},{ 9}}, {{215},{ 9}}, 47 | {{471},{ 9}}, {{ 55},{ 9}}, {{311},{ 9}}, {{183},{ 9}}, {{439},{ 9}}, 48 | {{119},{ 9}}, {{375},{ 9}}, {{247},{ 9}}, {{503},{ 9}}, {{ 15},{ 9}}, 49 | {{271},{ 9}}, {{143},{ 9}}, {{399},{ 9}}, {{ 79},{ 9}}, {{335},{ 9}}, 50 | {{207},{ 9}}, {{463},{ 9}}, {{ 47},{ 9}}, {{303},{ 9}}, {{175},{ 9}}, 51 | {{431},{ 9}}, {{111},{ 9}}, {{367},{ 9}}, {{239},{ 9}}, {{495},{ 9}}, 52 | {{ 31},{ 9}}, {{287},{ 9}}, {{159},{ 9}}, {{415},{ 9}}, {{ 95},{ 9}}, 53 | {{351},{ 9}}, {{223},{ 9}}, {{479},{ 9}}, {{ 63},{ 9}}, {{319},{ 9}}, 54 | {{191},{ 9}}, {{447},{ 9}}, {{127},{ 9}}, {{383},{ 9}}, {{255},{ 9}}, 55 | {{511},{ 9}}, {{ 0},{ 7}}, {{ 64},{ 7}}, {{ 32},{ 7}}, {{ 96},{ 7}}, 56 | {{ 16},{ 7}}, {{ 80},{ 7}}, {{ 48},{ 7}}, {{112},{ 7}}, {{ 8},{ 7}}, 57 | {{ 72},{ 7}}, {{ 40},{ 7}}, {{104},{ 7}}, {{ 24},{ 7}}, {{ 88},{ 7}}, 58 | {{ 56},{ 7}}, {{120},{ 7}}, {{ 4},{ 7}}, {{ 68},{ 7}}, {{ 36},{ 7}}, 59 | {{100},{ 7}}, {{ 20},{ 7}}, {{ 84},{ 7}}, {{ 52},{ 7}}, {{116},{ 7}}, 60 | {{ 3},{ 8}}, {{131},{ 8}}, {{ 67},{ 8}}, {{195},{ 8}}, {{ 35},{ 8}}, 61 | {{163},{ 8}}, {{ 99},{ 8}}, {{227},{ 8}} 62 | }; 63 | 64 | local const ct_data static_dtree[D_CODES] = { 65 | {{ 0},{ 5}}, {{16},{ 5}}, {{ 8},{ 5}}, {{24},{ 5}}, {{ 4},{ 5}}, 66 | {{20},{ 5}}, {{12},{ 5}}, {{28},{ 5}}, {{ 2},{ 5}}, {{18},{ 5}}, 67 | {{10},{ 5}}, {{26},{ 5}}, {{ 6},{ 5}}, {{22},{ 5}}, {{14},{ 5}}, 68 | {{30},{ 5}}, {{ 1},{ 5}}, {{17},{ 5}}, {{ 9},{ 5}}, {{25},{ 5}}, 69 | {{ 5},{ 5}}, {{21},{ 5}}, {{13},{ 5}}, {{29},{ 5}}, {{ 3},{ 5}}, 70 | {{19},{ 5}}, {{11},{ 5}}, {{27},{ 5}}, {{ 7},{ 5}}, {{23},{ 5}} 71 | }; 72 | 73 | const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = { 74 | 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 75 | 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 76 | 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 77 | 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 78 | 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 79 | 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 80 | 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 81 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 82 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 83 | 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 84 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 85 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 86 | 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, 0, 16, 17, 87 | 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 88 | 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 89 | 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 90 | 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 91 | 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 92 | 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 93 | 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 94 | 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 95 | 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 96 | 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 97 | 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 98 | 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 99 | 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29 100 | }; 101 | 102 | const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= { 103 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, 104 | 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 105 | 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 106 | 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 107 | 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 108 | 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 109 | 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 110 | 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 111 | 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 112 | 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 113 | 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 114 | 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 115 | 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28 116 | }; 117 | 118 | local const int base_length[LENGTH_CODES] = { 119 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 120 | 64, 80, 96, 112, 128, 160, 192, 224, 0 121 | }; 122 | 123 | local const int base_dist[D_CODES] = { 124 | 0, 1, 2, 3, 4, 6, 8, 12, 16, 24, 125 | 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 126 | 1024, 1536, 2048, 3072, 4096, 6144, 8192, 12288, 16384, 24576 127 | }; 128 | 129 | -------------------------------------------------------------------------------- /src/zlib/zutil.h: -------------------------------------------------------------------------------- 1 | /* zutil.h -- internal interface and configuration of the compression library 2 | * Copyright (C) 1995-2013 Jean-loup Gailly. 3 | * For conditions of distribution and use, see copyright notice in zlib.h 4 | */ 5 | 6 | /* WARNING: this file should *not* be used by applications. It is 7 | part of the implementation of the compression library and is 8 | subject to change. Applications should only use zlib.h. 9 | */ 10 | 11 | /* @(#) $Id$ */ 12 | 13 | #ifndef ZUTIL_H 14 | #define ZUTIL_H 15 | 16 | #ifdef HAVE_HIDDEN 17 | # define ZLIB_INTERNAL __attribute__((visibility ("hidden"))) 18 | #else 19 | # define ZLIB_INTERNAL 20 | #endif 21 | 22 | #include "zlib.h" 23 | 24 | #if defined(STDC) && !defined(Z_SOLO) 25 | # if !(defined(_WIN32_WCE) && defined(_MSC_VER)) 26 | # include 27 | # endif 28 | # include 29 | # include 30 | #endif 31 | 32 | #ifdef Z_SOLO 33 | typedef long ptrdiff_t; /* guess -- will be caught if guess is wrong */ 34 | #endif 35 | 36 | #ifndef local 37 | # define local static 38 | #endif 39 | /* compile with -Dlocal if your debugger can't find static symbols */ 40 | 41 | typedef unsigned char uch; 42 | typedef uch FAR uchf; 43 | typedef unsigned short ush; 44 | typedef ush FAR ushf; 45 | typedef unsigned long ulg; 46 | 47 | extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */ 48 | /* (size given to avoid silly warnings with Visual C++) */ 49 | 50 | #define ERR_MSG(err) z_errmsg[Z_NEED_DICT-(err)] 51 | 52 | #define ERR_RETURN(strm,err) \ 53 | return (strm->msg = ERR_MSG(err), (err)) 54 | /* To be used only when the state is known to be valid */ 55 | 56 | /* common constants */ 57 | 58 | #ifndef DEF_WBITS 59 | # define DEF_WBITS MAX_WBITS 60 | #endif 61 | /* default windowBits for decompression. MAX_WBITS is for compression only */ 62 | 63 | #if MAX_MEM_LEVEL >= 8 64 | # define DEF_MEM_LEVEL 8 65 | #else 66 | # define DEF_MEM_LEVEL MAX_MEM_LEVEL 67 | #endif 68 | /* default memLevel */ 69 | 70 | #define STORED_BLOCK 0 71 | #define STATIC_TREES 1 72 | #define DYN_TREES 2 73 | /* The three kinds of block type */ 74 | 75 | #define MIN_MATCH 3 76 | #define MAX_MATCH 258 77 | /* The minimum and maximum match lengths */ 78 | 79 | #define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ 80 | 81 | /* target dependencies */ 82 | 83 | #if defined(MSDOS) || (defined(WINDOWS) && !defined(WIN32)) 84 | # define OS_CODE 0x00 85 | # ifndef Z_SOLO 86 | # if defined(__TURBOC__) || defined(__BORLANDC__) 87 | # if (__STDC__ == 1) && (defined(__LARGE__) || defined(__COMPACT__)) 88 | /* Allow compilation with ANSI keywords only enabled */ 89 | void _Cdecl farfree( void *block ); 90 | void *_Cdecl farmalloc( unsigned long nbytes ); 91 | # else 92 | # include 93 | # endif 94 | # else /* MSC or DJGPP */ 95 | # include 96 | # endif 97 | # endif 98 | #endif 99 | 100 | #ifdef AMIGA 101 | # define OS_CODE 0x01 102 | #endif 103 | 104 | #if defined(VAXC) || defined(VMS) 105 | # define OS_CODE 0x02 106 | # define F_OPEN(name, mode) \ 107 | fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512") 108 | #endif 109 | 110 | #if defined(ATARI) || defined(atarist) 111 | # define OS_CODE 0x05 112 | #endif 113 | 114 | #ifdef OS2 115 | # define OS_CODE 0x06 116 | # if defined(M_I86) && !defined(Z_SOLO) 117 | # include 118 | # endif 119 | #endif 120 | 121 | #if defined(MACOS) || defined(TARGET_OS_MAC) 122 | # define OS_CODE 0x07 123 | # ifndef Z_SOLO 124 | # if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os 125 | # include /* for fdopen */ 126 | # else 127 | # ifndef fdopen 128 | # define fdopen(fd,mode) NULL /* No fdopen() */ 129 | # endif 130 | # endif 131 | # endif 132 | #endif 133 | 134 | #ifdef TOPS20 135 | # define OS_CODE 0x0a 136 | #endif 137 | 138 | #ifdef WIN32 139 | # ifndef __CYGWIN__ /* Cygwin is Unix, not Win32 */ 140 | # define OS_CODE 0x0b 141 | # endif 142 | #endif 143 | 144 | #ifdef __50SERIES /* Prime/PRIMOS */ 145 | # define OS_CODE 0x0f 146 | #endif 147 | 148 | #if defined(_BEOS_) || defined(RISCOS) 149 | # define fdopen(fd,mode) NULL /* No fdopen() */ 150 | #endif 151 | 152 | #if (defined(_MSC_VER) && (_MSC_VER > 600)) && !defined __INTERIX 153 | # if defined(_WIN32_WCE) 154 | # define fdopen(fd,mode) NULL /* No fdopen() */ 155 | # ifndef _PTRDIFF_T_DEFINED 156 | typedef int ptrdiff_t; 157 | # define _PTRDIFF_T_DEFINED 158 | # endif 159 | # else 160 | # define fdopen(fd,type) _fdopen(fd,type) 161 | # endif 162 | #endif 163 | 164 | #if defined(__BORLANDC__) && !defined(MSDOS) 165 | #pragma warn -8004 166 | #pragma warn -8008 167 | #pragma warn -8066 168 | #endif 169 | 170 | /* provide prototypes for these when building zlib without LFS */ 171 | #if !defined(_WIN32) && \ 172 | (!defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0) 173 | ZEXTERN uLong ZEXPORT adler32_combine64 OF((uLong, uLong, z_off_t)); 174 | ZEXTERN uLong ZEXPORT crc32_combine64 OF((uLong, uLong, z_off_t)); 175 | #endif 176 | 177 | /* common defaults */ 178 | 179 | #ifndef OS_CODE 180 | # define OS_CODE 0x03 /* assume Unix */ 181 | #endif 182 | 183 | #ifndef F_OPEN 184 | # define F_OPEN(name, mode) fopen((name), (mode)) 185 | #endif 186 | 187 | /* functions */ 188 | 189 | #if defined(pyr) || defined(Z_SOLO) 190 | # define NO_MEMCPY 191 | #endif 192 | #if defined(SMALL_MEDIUM) && !defined(_MSC_VER) && !defined(__SC__) 193 | /* Use our own functions for small and medium model with MSC <= 5.0. 194 | * You may have to use the same strategy for Borland C (untested). 195 | * The __SC__ check is for Symantec. 196 | */ 197 | # define NO_MEMCPY 198 | #endif 199 | #if defined(STDC) && !defined(HAVE_MEMCPY) && !defined(NO_MEMCPY) 200 | # define HAVE_MEMCPY 201 | #endif 202 | #ifdef HAVE_MEMCPY 203 | # ifdef SMALL_MEDIUM /* MSDOS small or medium model */ 204 | # define zmemcpy _fmemcpy 205 | # define zmemcmp _fmemcmp 206 | # define zmemzero(dest, len) _fmemset(dest, 0, len) 207 | # else 208 | # define zmemcpy memcpy 209 | # define zmemcmp memcmp 210 | # define zmemzero(dest, len) memset(dest, 0, len) 211 | # endif 212 | #else 213 | void ZLIB_INTERNAL zmemcpy OF((Bytef* dest, const Bytef* source, uInt len)); 214 | int ZLIB_INTERNAL zmemcmp OF((const Bytef* s1, const Bytef* s2, uInt len)); 215 | void ZLIB_INTERNAL zmemzero OF((Bytef* dest, uInt len)); 216 | #endif 217 | 218 | /* Diagnostic functions */ 219 | #ifdef DEBUG 220 | # include 221 | extern int ZLIB_INTERNAL z_verbose; 222 | extern void ZLIB_INTERNAL z_error OF((char *m)); 223 | # define Assert(cond,msg) {if(!(cond)) z_error(msg);} 224 | # define Trace(x) {if (z_verbose>=0) fprintf x ;} 225 | # define Tracev(x) {if (z_verbose>0) fprintf x ;} 226 | # define Tracevv(x) {if (z_verbose>1) fprintf x ;} 227 | # define Tracec(c,x) {if (z_verbose>0 && (c)) fprintf x ;} 228 | # define Tracecv(c,x) {if (z_verbose>1 && (c)) fprintf x ;} 229 | #else 230 | # define Assert(cond,msg) 231 | # define Trace(x) 232 | # define Tracev(x) 233 | # define Tracevv(x) 234 | # define Tracec(c,x) 235 | # define Tracecv(c,x) 236 | #endif 237 | 238 | #ifndef Z_SOLO 239 | voidpf ZLIB_INTERNAL zcalloc OF((voidpf opaque, unsigned items, 240 | unsigned size)); 241 | void ZLIB_INTERNAL zcfree OF((voidpf opaque, voidpf ptr)); 242 | #endif 243 | 244 | #define ZALLOC(strm, items, size) \ 245 | (*((strm)->zalloc))((strm)->opaque, (items), (size)) 246 | #define ZFREE(strm, addr) (*((strm)->zfree))((strm)->opaque, (voidpf)(addr)) 247 | #define TRY_FREE(s, p) {if (p) ZFREE(s, p);} 248 | 249 | /* Reverse the bytes in a 32-bit value */ 250 | #define ZSWAP32(q) ((((q) >> 24) & 0xff) + (((q) >> 8) & 0xff00) + \ 251 | (((q) & 0xff00) << 8) + (((q) & 0xff) << 24)) 252 | 253 | #endif /* ZUTIL_H */ 254 | -------------------------------------------------------------------------------- /testdata/R1.fq: -------------------------------------------------------------------------------- 1 | @NS500713:64:HFKJJBGXY:1:11101:1675:1101 1:N:0:TATAGCCT+GACCCCCA 2 | TAGGAGGCTTGGAGTACCAATAATAAAGTGAGCCCACCTTCCTGGTACCCAGACATTTCAGGAGGTCGGGAAATTTTTAAACCCAGGCAGCTTCCTGGCAGTGACATTTGGAGCATCAAAGTGGTAAATAAAATTTCATTTACATTAATAT 3 | + 4 | 6AAAAAEEEEE/E/EA/E/AEA6EE//AEE66/AAE//EEE/E//E/AA/EEE/A/AEE/EEA//EEEEEEEE6EEAAA/E/A/6E/6//6C-p.G12A-COSM522, TGTATCGTCAAGGCACTCTTGCCTACGCCA, G, CAGCTCCAACTACCACAAGTTTATATTCAG, chr12 3 | TEST-mutation-left-edge, AAAAAAAAAAAAAAAAAAAAAAAAAAAAATA,G,GAGGCTTGGAGTACCAATAATAAAGTGAGCCCACCTT, chrUnknown 4 | TEST-mutation-right-edge, AGCTTATATTTGAATATTCCTTTTCAAGGTTTTATAAAGCGA,T,TTAAAAAAAAAAAAAAAAAAAAAAAAAAAAA, chrUnknown 5 | KRAS-12-25398284-2-c.35G>C-p.G12A-COSM522, TGTATCGTCAAGGCACTCTTGCCTACGCCA, G, CAGCTCCAACTACCACAAGTTTATATTCAG, chr12 -------------------------------------------------------------------------------- /testdata/sample_report.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenGene/MutScan/118c7047e90cf498da4d1f56f9528190473c8e17/testdata/sample_report.jpg -------------------------------------------------------------------------------- /testdata/tinyref.fa: -------------------------------------------------------------------------------- 1 | >contig1 description1 2 | GATCACAGGTCTATCACCCTATTAA 3 | TTGGTATTTTCGTCTGGGGGGTGTG 4 | GAGCCGGAGCACCCTATGTCGCAGT 5 | >contig2 description2 6 | GTCTGCACAGCCGCTTTCCACACAG 7 | AACCCCCCCCTCCCCCCGCTTCTGG 8 | CAAACCCCAAAAACAAAGAACCCTA -------------------------------------------------------------------------------- /testdata/tinyvcf.vcf: -------------------------------------------------------------------------------- 1 | ##fileformat=VCFv4.1 2 | ##source=COSMICv71 3 | ##reference=GRCh37 4 | ##fileDate=20141104 5 | ##comment="Missing nucleotide details indicate ambiguity during curation process" 6 | ##comment="URL stub for COSM ID field (use numeric portion of ID)='http://cancer.sanger.ac.uk/cosmic/mutation/overview?id='" 7 | ##comment="REF and ALT sequences are both forward strand 8 | ##INFO= 9 | ##INFO= 10 | ##INFO= 11 | ##INFO= 12 | ##INFO= 13 | #CHROM POS ID REF ALT QUAL FILTER INFO 14 | 1 69224 COSM3677745 A C . . GENE=OR4F5;STRAND=+;CDS=c.134A>C;AA=p.D45A;CNT=1 15 | 1 69230 COSM3677746 A C . . GENE=OR4F5;STRAND=+;CDS=c.140A>C;AA=p.H47P;CNT=1 16 | 1 69236 COSM3677747 A C . . GENE=OR4F5;STRAND=+;CDS=c.146A>C;AA=p.H49P;CNT=1 17 | 1 69345 COSM911918 C A . . GENE=OR4F5;STRAND=+;CDS=c.255C>A;AA=p.I85I;CNT=1 18 | 1 69511 COSM4144171 A G . . GENE=OR4F5;STRAND=+;CDS=c.421A>G;AA=p.T141A;CNT=1 19 | 1 69517 COSM3492078 G A . . GENE=OR4F5;STRAND=+;CDS=c.427G>A;AA=p.G143R;CNT=1 20 | 1 69523 COSM426644 G T . . GENE=OR4F5;STRAND=+;CDS=c.433G>T;AA=p.G145C;CNT=1 21 | 1 69538 COSM75742 G A . . GENE=OR4F5;STRAND=+;CDS=c.448G>A;AA=p.V150M;CNT=1 22 | 1 69539 COSM1343690 T C . . GENE=OR4F5;STRAND=+;CDS=c.449T>C;AA=p.V150A;CNT=1 23 | 1 69540 COSM1560546 G T . . GENE=OR4F5;STRAND=+;CDS=c.450G>T;AA=p.V150V;CNT=1 24 | 1 69552 COSM2240641 G C . . GENE=OR4F5;STRAND=+;CDS=c.462G>C;AA=p.A154A;CNT=1 25 | 1 69569 COSM1599955 T C . . GENE=OR4F5;STRAND=+;CDS=c.479T>C;AA=p.L160P;CNT=2 26 | 1 69591 COSM3419425 C T . . GENE=OR4F5;STRAND=+;CDS=c.501C>T;AA=p.V167V;CNT=1 27 | 1 69904 COSM3677748 A C . . GENE=OR4F5;STRAND=+;CDS=c.814A>C;AA=p.T272P;CNT=1 28 | 1 69968 COSM3677749 A G . . GENE=OR4F5;STRAND=+;CDS=c.878A>G;AA=p.Q293R;CNT=1 29 | 1 367825 COSM3677679 A C . . GENE=OR4F29;STRAND=+;CDS=c.167A>C;AA=p.H56P;CNT=2 30 | 1 367837 COSM3677680 A C . . GENE=OR4F29;STRAND=+;CDS=c.179A>C;AA=p.Y60S;CNT=1 31 | 1 367844 COSM3677681 A C . . GENE=OR4F29;STRAND=+;CDS=c.186A>C;AA=p.L62L;CNT=1 32 | 1 368197 COSM3677682 A C . . GENE=OR4F29;STRAND=+;CDS=c.539A>C;AA=p.D180A;CNT=2 33 | 1 621496 COSM3677735 T G . . GENE=OR4F16;STRAND=-;CDS=c.539A>C;AA=p.D180A;CNT=1 34 | 1 621868 COSM3677736 T G . . GENE=OR4F16;STRAND=-;CDS=c.167A>C;AA=p.H56P;CNT=1 35 | 1 621880 COSM3677737 T G . . GENE=OR4F16;STRAND=-;CDS=c.155A>C;AA=p.D52A;CNT=1 36 | 1 861336 COSM3493010 C T . . GENE=SAMD11;STRAND=+;CDS=c.15C>T;AA=p.I5I;CNT=1 37 | 1 861390 COSM460103 G C . . GENE=SAMD11;STRAND=+;CDS=c.69G>C;AA=p.P23P;CNT=1 38 | 1 865609 COSM336143 C T . . GENE=SAMD11;STRAND=+;CDS=c.147C>T;AA=p.P49P;CNT=1 39 | 1 865617 COSM3790304 C G . . GENE=SAMD11;STRAND=+;CDS=c.155C>G;AA=p.S52C;CNT=1 40 | 1 865624 COSM912740 C T . . GENE=SAMD11;STRAND=+;CDS=c.162C>T;AA=p.S54S;CNT=1 41 | 1 865644 COSM3493032 G A . . GENE=SAMD11;STRAND=+;CDS=c.182G>A;AA=p.S61N;CNT=1 42 | 1 865649 COSM3493033 C T . . GENE=SAMD11;STRAND=+;CDS=c.187C>T;AA=p.P63S;CNT=1 43 | 1 865651 COSM3493034 C T . . GENE=SAMD11;STRAND=+;CDS=c.189C>T;AA=p.P63P;CNT=1 44 | 1 865658 COSM364168 G T . . GENE=SAMD11;STRAND=+;CDS=c.196G>T;AA=p.G66W;CNT=1 45 | 1 865691 COSM1686856 C T . . GENE=SAMD11;STRAND=+;CDS=c.229C>T;AA=p.P77S;CNT=1 46 | 1 865716 COSM1735520 G A . . GENE=SAMD11;STRAND=+;CDS=c.254G>A;AA=p.R85K;CNT=1 47 | 1 866438 COSM3386379 G A . . GENE=SAMD11;STRAND=+;CDS=c.274G>A;AA=p.V92M;CNT=1 48 | 1 871165 COSM3711402 C A . . GENE=SAMD11;STRAND=+;CDS=c.319C>A;AA=p.L107I;CNT=2 49 | 1 871215 COSM3997690 C G . . GENE=SAMD11;STRAND=+;CDS=c.369C>G;AA=p.P123P;CNT=1 50 | 1 871217 COSM3667588 A C . . GENE=SAMD11;STRAND=+;CDS=c.371A>C;AA=p.E124A;CNT=1 51 | 1 871255 COSM414754 G A . . GENE=SAMD11;STRAND=+;CDS=c.409G>A;AA=p.E137K;CNT=1 52 | 1 874447 COSM178082 G A . . GENE=SAMD11;STRAND=+;CDS=c.458G>A;AA=p.R153H;CNT=1 53 | 1 874456 COSM178083 G C . . GENE=SAMD11;STRAND=+;CDS=c.467G>C;AA=p.R156P;CNT=1 54 | 1 874465 COSM112049 G GC . . GENE=SAMD11;STRAND=+;CDS=c.476_477insC;AA=p.D160fs*47;CNT=1 55 | 1 874497 COSM912847 G A . . GENE=SAMD11;STRAND=+;CDS=c.508G>A;AA=p.E170K;CNT=1 56 | 1 874501 COSM912848 C T . . GENE=SAMD11;STRAND=+;CDS=c.512C>T;AA=p.S171L;CNT=1 57 | 1 874504 COSM1659453 C G . . GENE=SAMD11;STRAND=+;CDS=c.515C>G;AA=p.P172R;CNT=1 58 | 1 874684 COSM3493101 C T . . GENE=SAMD11;STRAND=+;CDS=c.550C>T;AA=p.P184S;CNT=1 59 | 1 874778 COSM1344642 GCCTCCCCAGCCACGGTGAGGACCCACCCTGGCATGATCCCCCTCATCA G . . GENE=SAMD11;STRAND=+;CDS=c.645_692del48;AA=p.G220_H235del16;CNT=2 60 | 1 874781 COSM1344643 T TC . . GENE=SAMD11;STRAND=+;CDS=c.647_648insC;AA=p.S218fs*4;CNT=1 61 | 1 874805 COSM3493102 C T . . GENE=SAMD11;STRAND=+;CDS=c.671C>T;AA=p.P224L;CNT=1 62 | 1 874816 COSM1344644 C CT . . GENE=SAMD11;STRAND=+;CDS=c.682_683insT;AA=p.P228fs*227;CNT=3 63 | 1 876536 COSM4144216 T G . . GENE=SAMD11;STRAND=+;CDS=c.719T>G;AA=p.V240G;CNT=1 64 | 1 876670 COSM1317508 C T . . GENE=SAMD11;STRAND=+;CDS=c.853C>T;AA=p.P285S;CNT=1 65 | 1 877579 COSM3493106 C T . . GENE=SAMD11;STRAND=+;CDS=c.933C>T;AA=p.P311P;CNT=1 66 | 1 877627 COSM1503991 C A . . GENE=SAMD11;STRAND=+;CDS=c.981C>A;AA=p.T327T;CNT=1 67 | 1 877831 COSM4144217 T C . . GENE=SAMD11;STRAND=+;CDS=c.1027T>C;AA=p.W343R;CNT=2 68 | 1 878172 COSM3386380 G A . . GENE=SAMD11;STRAND=+;CDS=c.1298G>A;AA=p.R433Q;CNT=1 69 | 1 878239 COSM3667591 C A . . GENE=SAMD11;STRAND=+;CDS=c.1365C>A;AA=p.L455L;CNT=1 70 | 1 878266 COSM3419541 G A . . GENE=SAMD11;STRAND=+;CDS=c.1392G>A;AA=p.E464E;CNT=1 71 | 1 878268 COSM1344676 C A . . GENE=SAMD11;STRAND=+;CDS=c.1394C>A;AA=p.P465H;CNT=1 72 | 1 878314 COSM426784 G C . . GENE=SAMD11;STRAND=+;CDS=c.1440G>C;AA=p.G480G;CNT=2 73 | 1 878385 COSM3493108 G T . . GENE=SAMD11;STRAND=+;CDS=c.1511G>T;AA=p.G504V;CNT=1 74 | 1 878409 COSM465045 C T . . GENE=SAMD11;STRAND=+;CDS=c.1535C>T;AA=p.P512L;CNT=1 75 | 1 878634 COSM364872 C A . . GENE=SAMD11;STRAND=+;CDS=c.1566C>A;AA=p.G522G;CNT=1 76 | 1 878649 COSM1748634 C T . . GENE=SAMD11;STRAND=+;CDS=c.1581C>T;AA=p.L527L;CNT=2 77 | 1 878746 COSM3790315 G C . . GENE=SAMD11;STRAND=+;CDS=c.1678G>C;AA=p.E560Q;CNT=1 78 | 1 879085 COSM3806043 G T . . GENE=SAMD11;STRAND=+;CDS=c.1697G>T;AA=p.R566M;CNT=1 79 | 1 879126 COSM3493109 G A . . GENE=SAMD11;STRAND=+;CDS=c.1738G>A;AA=p.E580K;CNT=1 80 | 1 879131 COSM3493110 G A . . GENE=SAMD11;STRAND=+;CDS=c.1743G>A;AA=p.E581E;CNT=1 81 | 1 879297 COSM4010379 C T . . GENE=SAMD11;STRAND=+;CDS=c.1810C>T;AA=p.R604C;CNT=1 82 | 1 879317 COSM1344678 C T . . GENE=SAMD11;STRAND=+;CDS=c.1830C>T;AA=p.Y610Y;CNT=2 83 | 1 879321 COSM2244683 G A . . GENE=SAMD11;STRAND=+;CDS=c.1834G>A;AA=p.A612T;CNT=1 84 | 1 879333 COSM4010380 G A . . GENE=SAMD11;STRAND=+;CDS=c.1846G>A;AA=p.V616M;CNT=1 85 | 1 879365 COSM1602747 G A . . GENE=SAMD11;STRAND=+;CDS=c.1878G>A;AA=p.R626R;CNT=2 86 | 1 879370 COSM912864 C T . . GENE=SAMD11;STRAND=+;CDS=c.1883C>T;AA=p.P628L;CNT=1 87 | 1 879384 COSM1296757 G A . . GENE=SAMD11;STRAND=+;CDS=c.1897G>A;AA=p.G633S;CNT=1 88 | 1 879422 COSM121789 G A . . GENE=SAMD11;STRAND=+;CDS=c.1935G>A;AA=p.T645T;CNT=1 89 | 1 879424 COSM682820 C T . . GENE=SAMD11;STRAND=+;CDS=c.1937C>T;AA=p.S646F;CNT=1 90 | 1 879425 COSM682818 C T . . GENE=SAMD11;STRAND=+;CDS=c.1938C>T;AA=p.S646S;CNT=1 91 | 1 879452 COSM4010381 C T . . GENE=SAMD11;STRAND=+;CDS=c.1965C>T;AA=p.A655A;CNT=1 92 | 1 879497 COSM3386381 T A . . GENE=SAMD11;STRAND=+;CDS=c.2010T>A;AA=p.L670L;CNT=1 93 | 1 879509 COSM314985 C T . . GENE=SAMD11;STRAND=+;CDS=c.2022C>T;AA=p.P674P;CNT=1 94 | 1 880182 COSM3786284 T C . . GENE=NOC2L;STRAND=-;CDS=c.2144-2A>G;AA=p.?;CNT=1 95 | 1 880468 COSM1344683 C A . . GENE=NOC2L;STRAND=-;CDS=c.2112G>T;AA=p.E704D;CNT=1 96 | 1 880485 COSM414753 C T . . GENE=NOC2L;STRAND=-;CDS=c.2095G>A;AA=p.D699N;CNT=1 97 | 1 880525 COSM682817 C A . . GENE=NOC2L;STRAND=-;CDS=c.2055G>T;AA=p.G685G;CNT=1 98 | 1 880905 COSM536437 C A . . GENE=NOC2L;STRAND=-;CDS=c.2046G>T;AA=p.S682S;CNT=1 99 | 1 880925 COSM1296758 C T . . GENE=NOC2L;STRAND=-;CDS=c.2026G>A;AA=p.D676N;CNT=1 100 | 1 880950 COSM3493111 G A . . GENE=NOC2L;STRAND=-;CDS=c.2001C>T;AA=p.L667L;CNT=1 101 | 1 11174385 . C A . M DP=1002;SOMATIC;SS=2;SSC=4;GPV=1E0;SPV=3.4765E-1;ANNOVAR_DATE=2015-03-22;Func.refGene=exonic;Gene.refGene=MTOR;Gen eDetail.refGene=.;ExonicFunc.refGene=nonsynonymous_SNV;AAChange.refGene=MTOR:NM_004958:exon53:c.G7290T:p.R2430S;cytoBand=1p36.22;genomicSuperDups=.;esp6500siv2_all=.;1000g2014oct _all=.;1000g2014oct_afr=.;1000g2014oct_eas=.;1000g2014oct_eur=.;snp138=.;SIFT_score=0;SIFT_pred=D;Polyphen2_HDIV_score=0.933;Polyphen2_HDIV_pred=P;Polyphen2_HVAR_score=0.792;Poly phen2_HVAR_pred=P;LRT_score=0.000;LRT_pred=D;MutationTaster_score=1.000;MutationTaster_pred=D;MutationAssessor_score=1.74;MutationAssessor_pred=L;FATHMM_score=-1.11;FATHMM_pred=T ;RadialSVM_score=-0.062;RadialSVM_pred=T;LR_score=0.472;LR_pred=T;VEST3_score=0.967;CADD_raw=3.936;CADD_phred=20.1;GERP++_RS=4.98;phyloP46way_placental=1.500;phyloP100way_vertebr ate=2.012;SiPhy_29way_logOdds=9.345;cosmic77=.;CLINSIG=.;CLNDBN=.;CLNACC=.;CLNDSDB=.;CLNDSDBID=.;ALLELE_END GT:GQ:DP:RD:AD:FREQ:DP4 0/0:.:411:411:0:0%:221,190,0,0 0/1:.:591:58 9:2:0.34%:349,240,2,0 102 | --------------------------------------------------------------------------------