├── README.md ├── doc ├── SubreadUsersGuide.pdf └── SubreadUsersGuide.tex ├── src ├── HelperFunctions.c ├── HelperFunctions.h ├── Makefile ├── Makefile.FreeBSD ├── Makefile.Linux ├── Makefile.MacOS ├── Makefile.Windows ├── SNPCalling.c ├── SNPCalling.h ├── SUBindel.c ├── aligner.c ├── build-sam-index.c ├── cell-counts.c ├── core-bigtable.c ├── core-bigtable.h ├── core-indel.c ├── core-indel.h ├── core-interface-aligner.c ├── core-interface-subjunc.c ├── core-junction.c ├── core-junction.h ├── core.c ├── core.h ├── coverage_calc.c ├── del4-mmap-test.c ├── detection-calls.c ├── exon-algorithms.c ├── exon-algorithms.h ├── exon-align-indel.c ├── exon-align.c ├── filterJunctionTable.c ├── flattenAnnotations.c ├── fullscan.c ├── fusion-align.c ├── gen_long_chromosomes.c ├── gen_rand_reads.c ├── gene-algorithms.c ├── gene-algorithms.h ├── gene-value-index.c ├── gene-value-index.h ├── global-reassembly.c ├── hashtable.c ├── hashtable.h ├── index-builder.c ├── input-blc.c ├── input-blc.h ├── input-files.c ├── input-files.h ├── interval_merge.c ├── interval_merge.h ├── long-hashtable.c ├── long-hashtable.h ├── longread-one │ ├── LRMbase-index.c │ ├── LRMbase-index.h │ ├── LRMchro-event.c │ ├── LRMchro-event.h │ ├── LRMconfig.h │ ├── LRMfile-io.c │ ├── LRMfile-io.h │ ├── LRMhashtable.c │ ├── LRMhashtable.h │ ├── LRMhelper.c │ ├── LRMhelper.h │ ├── LRMseek-zlib.c │ ├── LRMseek-zlib.h │ ├── LRMsorted-hashtable.c │ ├── LRMsorted-hashtable.h │ ├── Makefile │ └── longread-mapping.c ├── makefile.version ├── mergeVCF.c ├── propmapped.c ├── qualityScores.c ├── read-repair.c ├── readSummary.c ├── removeDupReads.c ├── removeDupReads.h ├── sam2fq.c ├── samMappedBases.c ├── sambam-file.c ├── sambam-file.h ├── seek-zlib.c ├── seek-zlib.h ├── sorted-hashtable.c ├── sorted-hashtable.h ├── subfilter.c ├── sublog.c ├── sublog.h ├── subread.h ├── subtools.c ├── test-fisher.c ├── test-seek-zlib.c ├── test_qs.c ├── tx-unique.c ├── tx-unique.h └── zlib_test.c ├── test ├── chr901.fa ├── exactSNP │ ├── data │ │ └── test-in.BAM │ └── exactSNP-test.sh ├── featureCounts │ ├── data │ │ ├── across_genes.gtf │ │ ├── across_genes_r1.bam │ │ ├── across_genes_r1.bam.ora │ │ ├── across_genes_r1.sam │ │ ├── across_genes_r1.sam.ora │ │ ├── across_genes_r2.bam │ │ ├── across_genes_r2.bam.ora │ │ ├── across_genes_r2.sam │ │ ├── across_genes_r2.sam.ora │ │ ├── across_intron.gtf │ │ ├── across_intron_r1.bam │ │ ├── across_intron_r1.bam.ora │ │ ├── across_intron_r1.sam │ │ ├── across_intron_r1.sam.ora │ │ ├── across_intron_r2.bam │ │ ├── across_intron_r2.bam.ora │ │ ├── across_intron_r2.sam │ │ ├── across_intron_r2.sam.ora │ │ ├── compare.sh │ │ ├── corner-BINS.SAF │ │ ├── corner-BINS.sam │ │ ├── corner-BothEnds.ora │ │ ├── corner-Chimeric.ora │ │ ├── corner-DoNotSort.ora │ │ ├── corner-EXON-ONLY.ora │ │ ├── corner-Extend3.ora │ │ ├── corner-Extend5.ora │ │ ├── corner-Fraction.ora │ │ ├── corner-INDEL.ora │ │ ├── corner-INDEL.sam │ │ ├── corner-IgnoreDup.ora │ │ ├── corner-JUNC-ONLY.ora │ │ ├── corner-JUNC.ora │ │ ├── corner-JUNC.sam │ │ ├── corner-Jcounts-FA.ora │ │ ├── corner-Jcounts-FA.ora.jcounts │ │ ├── corner-Jcounts.ora │ │ ├── corner-Jcounts.ora.jcounts │ │ ├── corner-LargestOverlap.ora │ │ ├── corner-MaxOPs.ora │ │ ├── corner-MinMAPQ.ora │ │ ├── corner-MinOverlap.ora │ │ ├── corner-MultiMapping.ora │ │ ├── corner-NH-PM.ora │ │ ├── corner-NH.ora │ │ ├── corner-NH.sam │ │ ├── corner-ONEEND-BOTH.ora │ │ ├── corner-ONEEND.ora │ │ ├── corner-ONEEND.sam │ │ ├── corner-PEdist.ora │ │ ├── corner-Read2Pos3.ora │ │ ├── corner-Read2Pos5.ora │ │ ├── corner-fractions.SAF │ │ ├── corner-fractions.ora │ │ ├── corner-fractions.sam │ │ ├── corner-reduction.sam │ │ ├── intron_between.bam │ │ ├── intron_between.bam.ora │ │ ├── intron_between.gtf │ │ ├── intron_between.sam │ │ ├── intron_between.sam.ora │ │ ├── intron_between_nointron.bam │ │ ├── intron_between_nointron.bam.ora │ │ ├── intron_between_nointron.sam │ │ ├── intron_between_nointron.sam.ora │ │ ├── test-chralias.GTF │ │ ├── test-chralias.SAF │ │ ├── test-chralias.ora │ │ ├── test-chralias.sam │ │ ├── test-chralias.txt │ │ ├── test-chrname.SAF │ │ ├── test-chrname.ora │ │ ├── test-chrname.sam │ │ ├── test-dup.sam │ │ ├── test-fracOverlap.ora │ │ ├── test-fracOverlap.sam │ │ ├── test-junc.sam │ │ ├── test-minimum-35ext.ora │ │ ├── test-minimum-3ext.ora │ │ ├── test-minimum-5ext.ora │ │ ├── test-minimum-5reduce.ora │ │ ├── test-minimum-FL.ora │ │ ├── test-minimum-O.ora │ │ ├── test-minimum-PE.ora │ │ ├── test-minimum-SE.ora │ │ ├── test-minimum-STR.ora │ │ ├── test-minimum-UNSTR.ora │ │ ├── test-minimum-dup.ora │ │ ├── test-minimum.GTF │ │ ├── test-minimum.SAF │ │ ├── test-minimum.bam │ │ ├── test-minimum.ora │ │ └── test-minimum.sam │ ├── featureCounts-test.sh │ ├── test_chr_aliases.sh │ ├── test_chr_inference.sh │ ├── test_commonusage.sh │ ├── test_corner_cases.sh │ ├── test_featurelevel.sh │ └── test_minimal_example.sh ├── subjunc │ ├── data │ │ ├── junction-reads-A.fq │ │ └── junction-reads-B.fq │ └── subjunc-test.sh ├── subread-align │ ├── data │ │ ├── cigar-test-1.fq │ │ ├── cigar-test-2.fq │ │ ├── indel-test1.fq │ │ ├── indel-test2.fq │ │ ├── subfusion_test.fa │ │ ├── subfusion_test2.fa │ │ ├── test-err-mut-r1.fq.gz │ │ ├── test-err-mut-r2.fq.gz │ │ ├── test-error-r1.fq │ │ ├── test-error-r2.fq │ │ ├── test-noerror-r1.fq │ │ └── test-noerror-r2.fq │ ├── readname_cigar_match.py │ ├── readname_ora_match.py │ └── subread-align-test.sh └── test_all.sh └── tutorial └── Aligning.md /README.md: -------------------------------------------------------------------------------- 1 | # Subread 2 | The Subread software package is a tool kit for processing next-gen sequencing data. It includes Subread aligner, Subjunc exon-exon junction detector and featureCounts read summarization program. 3 | 4 | ## Installation 5 | The latest releases can be downloaded from the [release page](https://github.com/ShiLab-Bioinformatics/subread/releases). 6 | 7 | ### Installation from a binary package 8 | The easist way to installing Subread on Linux, Windows and macOS is to directly download the binary packages on our Release page. Simply decompress the package and the programs will be in the "/bin" directory. 9 | 10 | ### Installation in the R environment 11 | We also provide an R version of our package, [Rsubread](http://bioconductor.org/packages/Rsubread), on Bioconductor. You can follow the instructions on the Rsubread webpage to install it in R. 12 | 13 | ### Installation from the source code 14 | An experienced user may also try building the binary programs from source code. To this end, some programs and libraries are necessary. 15 | 16 | 1. A C language compiler. It can be gcc or clang or anything. Intel CC should work well but we have not tried it on our source code. 17 | 2. Libraries including zlib (for gzip), libpthread (for multi-threading) and libm (for math). We tried to reduce our dependency as much as possible so no fancy libraries are needed. 18 | 3. GNUmake and Shell. 19 | 20 | If you use Windows, you may consider to install [Mingw-w64](http://mingw-w64.org/doku.php) for that it provides all the required programs and libraries in one place. You do not need Cygwin or the Linux subsystem in Windows for building Subread. 21 | 22 | Compiling the source code is simple. 23 | ```sh 24 | $ cd src 25 | $ make -f Makefile.Linux (for Linux) 26 | $ make -f Makefile.MacOS (for macOS) 27 | $ make -f Makefile.Windows (for Windows) 28 | ``` 29 | The executable programs will be moved to the "/bin" directory. Because we want to minimise the dependency to other packages, we do not use autoconf to generate the Makefiles. 30 | 31 | ### Testing the installation 32 | After installation, you may test the programs to see if it works. 33 | 34 | The Subread package incorporates many small testcases that cover most of its functions. No matter if Subread is installed from the source code or a binary package, you can run "test_all.sh" in the "/test" directory. This assumes that you have a Shell program and a Python2 interpreter in PATH. 35 | 36 | ```sh 37 | $ cd test 38 | $ sh test_all.sh 39 | ``` 40 | 41 | ## Usage 42 | The usages of the programs in this package can be found in the users-guide in the "/doc" directory. 43 | 44 | ## Citation 45 | We have published papers on our Subread/Subjunc read aligners and featureCounts read quantifiers. 46 | 47 | 1. The Subread aligner: fast, accurate and scalable read mapping by seed-and-vote, ***Y Liao, GK Smyth, W Shi***, Nucleic acids research, 2013 [PMID:23558742](https://pubmed.ncbi.nlm.nih.gov/23558742/) 48 | 49 | 2. featureCounts: an efficient general purpose program for assigning sequence reads to genomic features, ***Y Liao, GK Smyth, W Shi***, Bioinformatics, 2014 [PMID:24227677](https://pubmed.ncbi.nlm.nih.gov/24227677/) 50 | 51 | 3. The R package Rsubread is easier, faster, cheaper and better for alignment and quantification of RNA sequencing reads, ***Y Liao, GK Smyth, W Shi***, Nucleic acids research, 2019 [PMID:30783653](https://pubmed.ncbi.nlm.nih.gov/30783653/) 52 | 53 | ## PhD projects 54 | PhD projects are available for further development of the Subread package, including the development of new methods for analyzing single-cell sequencing data. For any inquiries, please contact [Prof Wei Shi](https://www.onjcri.org.au/about-us/wei-shi/). 55 | -------------------------------------------------------------------------------- /doc/SubreadUsersGuide.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShiLab-Bioinformatics/subread/55dc154f6e5a6813998d2f81039ad0e906bd2c02/doc/SubreadUsersGuide.pdf -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: 3 | @echo 4 | @echo " Subread currently supports Linux, Mac OS X and FreeBSD. Please choose the correct Makefile to build Subread." 5 | @echo 6 | @echo " For building subread in Linux, please run ' make -f Makefile.Linux '." 7 | @echo " For building subread in Windows, please run ' make -f Makefile.Windows '. See README.txt for how to find the necessary programs and libraries." 8 | @echo " For building subread in Mac OS X, please run ' make -f Makefile.MacOS '." 9 | @echo " For building subread in FreeBSD, please run ' gmake -f Makefile.FreeBSD '." 10 | @echo 11 | @echo " The default compiler is gcc; you may change it by editing the Makefiles for platforms." 12 | @echo 13 | @echo " The generated executables are saved to directory (PACKAGE_DIR)/bin" 14 | @echo 15 | -------------------------------------------------------------------------------- /src/Makefile.FreeBSD: -------------------------------------------------------------------------------- 1 | include makefile.version 2 | 3 | MACOS = -D FREEBSD 4 | 5 | 6 | CCFLAGS = -march=native -mtune=core2 ${MACOS} -O9 -Wall -Wno-maybe-uninitialized -Wno-incompatible-pointer-types -Wno-array-bounds -Wno-unused-but-set-variable -Wno-unused-variable -Wno-unused-result -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" 7 | LDFLAGS = -pthread -lz -lm ${MACOS} -DMAKE_FOR_EXON -D MAKE_STANDALONE -l compat # -DREPORT_ALL_THE_BEST 8 | CC = gcc ${CCFLAGS} -ggdb -fomit-frame-pointer -ffast-math -funroll-loops -mmmx -msse -msse2 -msse3 -fmessage-length=0 9 | 10 | 11 | ALL_LIBS= core core-junction core-indel sambam-file sublog gene-algorithms hashtable input-files sorted-hashtable gene-value-index exon-algorithms HelperFunctions interval_merge core-bigtable seek-zlib long-hashtable 12 | 13 | ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS}) 14 | ALL_H=$(addsuffix .h, ${ALL_LIBS}) 15 | ALL_C=$(addsuffix .c, ${ALL_LIBS}) 16 | 17 | all: sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc subtools qualityScores subread-fullscan propmapped 18 | mkdir -p ../bin/utilities 19 | mv longread-one/LRM longread-one/sublong 20 | mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/ 21 | mv repair subtools qualityScores propmapped subread-fullscan removeDup ../bin/utilities 22 | @echo 23 | @echo "###########################################################" 24 | @echo "# #" 25 | @echo "# Installation successfully completed. #" 26 | @echo "# #" 27 | @echo "# Generated executables were copied to directory ../bin/ #" 28 | @echo "# #" 29 | @echo "###########################################################" 30 | @echo 31 | 32 | sublong: longread-one/longread-mapping.c ${ALL_OBJECTS} 33 | echo "MACOS= -D FREEBSD " > longread-one/make.version 34 | rm -f longread-one/*.o 35 | cd longread-one && $(MAKE) 36 | 37 | repair: read-repair.c ${ALL_OBJECTS} 38 | ${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS} 39 | 40 | propmapped: propmapped.c ${ALL_OBJECTS} 41 | ${CC} -o propmapped propmapped.c ${ALL_OBJECTS} ${LDFLAGS} 42 | 43 | exactSNP: SNPCalling.c SNPCalling.h ${ALL_OBJECTS} 44 | ${CC} -o exactSNP SNPCalling.c ${ALL_OBJECTS} ${LDFLAGS} 45 | 46 | subread-buildindex: index-builder.c subread.h ${ALL_OBJECTS} 47 | ${CC} -o subread-buildindex index-builder.c ${ALL_OBJECTS} ${LDFLAGS} 48 | 49 | removeDup: removeDupReads.c removeDupReads.h subread.h ${ALL_OBJECTS} 50 | ${CC} -o removeDup removeDupReads.c ${ALL_OBJECTS} ${LDFLAGS} 51 | 52 | subindel: SUBindel.c core.h subread.h ${ALL_OBJECTS} 53 | ${CC} -o subindel SUBindel.c ${ALL_OBJECTS} ${LDFLAGS} 54 | 55 | featureCounts: readSummary.c subread.h ${ALL_OBJECTS} 56 | ${CC} -o featureCounts readSummary.c ${ALL_OBJECTS} ${LDFLAGS} 57 | 58 | subread-align: core-interface-aligner.c ${ALL_OBJECTS} 59 | ${CC} -o subread-align core-interface-aligner.c ${ALL_OBJECTS} ${LDFLAGS} 60 | 61 | subjunc: core-interface-subjunc.c ${ALL_OBJECTS} 62 | ${CC} -o subjunc core-interface-subjunc.c ${ALL_OBJECTS} ${LDFLAGS} 63 | 64 | qualityScores: qualityScores.c ${ALL_OBJECTS} 65 | ${CC} -o qualityScores qualityScores.c ${ALL_OBJECTS} ${LDFLAGS} 66 | 67 | subread-fullscan: fullscan.c ${ALL_OBJECTS} 68 | ${CC} -o subread-fullscan fullscan.c ${ALL_OBJECTS} ${LDFLAGS} 69 | 70 | subtools: subtools.c ${ALL_OBJECTS} 71 | ${CC} -o subtools subtools.c ${ALL_OBJECTS} ${LDFLAGS} 72 | 73 | 74 | clean: 75 | rm -f core featureCounts exactSNP removeDup subread-buildindex ${ALL_OBJECTS} 76 | -------------------------------------------------------------------------------- /src/Makefile.Linux: -------------------------------------------------------------------------------- 1 | #MACOS = -D MACOS 2 | 3 | 4 | CC_EXEC = gcc 5 | OPT_LEVEL = 3 6 | 7 | include makefile.version 8 | -include ~/.R/DBPZ_debug_makefile 9 | 10 | CXXFLAGS= 11 | CFLAGS= 12 | CCFLAGS = ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64 ${WARNING_LEVEL} 13 | LDFLAGS = ${STATIC_MAKE} -pthread -lz ${MACOS} -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE -lm 14 | CC = ${CC_EXEC} ${CCFLAGS} -fmessage-length=0 -ggdb 15 | 16 | 17 | ALL_LIBS= core core-junction core-indel sambam-file sublog gene-algorithms hashtable input-files sorted-hashtable gene-value-index exon-algorithms HelperFunctions interval_merge long-hashtable core-bigtable seek-zlib input-blc 18 | ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS}) 19 | ALL_H=$(addsuffix .h, ${ALL_LIBS}) 20 | ALL_C=$(addsuffix .c, ${ALL_LIBS}) 21 | 22 | all: genRandomReads detectionCall sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped flattenGTF cell-counts # samMappedBases mergeVCF testZlib 23 | mkdir -p ../bin/utilities 24 | mv longread-one/LRM longread-one/sublong 25 | mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex cell-counts ../bin/ 26 | mv detectionCall genRandomReads repair propmapped qualityScores removeDup subread-fullscan txUnique flattenGTF ../bin/utilities 27 | @echo 28 | @echo "###########################################################" 29 | @echo "# #" 30 | @echo "# Installation successfully completed. #" 31 | @echo "# #" 32 | @echo "# Generated executables were copied to directory ../bin/ #" 33 | @echo "# #" 34 | @echo "###########################################################" 35 | @echo 36 | 37 | sublong: longread-one/longread-mapping.c ${ALL_OBJECTS} 38 | echo " " > longread-one/make.version 39 | rm -f longread-one/*.o 40 | cd longread-one && $(MAKE) 41 | 42 | genRandomReads: gen_rand_reads.c ${ALL_OBJECTS} 43 | ${CC} -o genRandomReads gen_rand_reads.c ${ALL_OBJECTS} ${LDFLAGS} 44 | 45 | flattenGTF: flattenAnnotations.c ${ALL_OBJECTS} 46 | ${CC} -o flattenGTF flattenAnnotations.c ${ALL_OBJECTS} ${LDFLAGS} 47 | 48 | detectionCall: detection-calls.c ${ALL_OBJECTS} 49 | ${CC} -o detectionCall detection-calls.c ${ALL_OBJECTS} ${LDFLAGS} 50 | 51 | repair: read-repair.c ${ALL_OBJECTS} 52 | ${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS} 53 | 54 | txUnique: tx-unique.c tx-unique.h ${ALL_OBJECTS} 55 | ${CC} -o txUnique tx-unique.c ${ALL_OBJECTS} ${LDFLAGS} 56 | 57 | globalReassembly: global-reassembly.c ${ALL_OBJECTS} 58 | ${CC} -o globalReassembly global-reassembly.c ${ALL_OBJECTS} ${LDFLAGS} 59 | 60 | propmapped: propmapped.c ${ALL_OBJECTS} 61 | ${CC} -o propmapped propmapped.c ${ALL_OBJECTS} ${LDFLAGS} 62 | 63 | exactSNP: SNPCalling.c SNPCalling.h ${ALL_OBJECTS} 64 | ${CC} -o exactSNP SNPCalling.c ${ALL_OBJECTS} ${LDFLAGS} 65 | 66 | subread-buildindex: index-builder.c subread.h ${ALL_OBJECTS} 67 | ${CC} -o subread-buildindex index-builder.c ${ALL_OBJECTS} ${LDFLAGS} 68 | 69 | removeDup: removeDupReads.c removeDupReads.h subread.h ${ALL_OBJECTS} 70 | ${CC} -o removeDup removeDupReads.c ${ALL_OBJECTS} ${LDFLAGS} 71 | 72 | subindel: SUBindel.c core.h subread.h ${ALL_OBJECTS} 73 | ${CC} -o subindel SUBindel.c ${ALL_OBJECTS} ${LDFLAGS} 74 | 75 | featureCounts: readSummary.c subread.h ${ALL_OBJECTS} 76 | ${CC} -o featureCounts readSummary.c ${ALL_OBJECTS} ${LDFLAGS} 77 | 78 | subread-align: core-interface-aligner.c ${ALL_OBJECTS} 79 | ${CC} -o subread-align core-interface-aligner.c ${ALL_OBJECTS} ${LDFLAGS} 80 | 81 | subjunc: core-interface-subjunc.c ${ALL_OBJECTS} 82 | ${CC} -o subjunc core-interface-subjunc.c ${ALL_OBJECTS} ${LDFLAGS} 83 | 84 | subtools: subtools.c ${ALL_OBJECTS} 85 | ${CC} -o subtools subtools.c ${ALL_OBJECTS} ${LDFLAGS} 86 | 87 | qualityScores: qualityScores.c ${ALL_OBJECTS} 88 | ${CC} -o qualityScores qualityScores.c ${ALL_OBJECTS} ${LDFLAGS} 89 | 90 | subread-fullscan: fullscan.c ${ALL_OBJECTS} 91 | ${CC} -o subread-fullscan fullscan.c ${ALL_OBJECTS} ${LDFLAGS} 92 | 93 | cell-counts: cell-counts.c ${ALL_OBJECTS} 94 | ${CC} -o cell-counts cell-counts.c ${ALL_OBJECTS} ${LDFLAGS} 95 | 96 | clean: 97 | rm -f core featureCounts exactSNP removeDup subread-buildindex ${ALL_OBJECTS} 98 | -------------------------------------------------------------------------------- /src/Makefile.MacOS: -------------------------------------------------------------------------------- 1 | MACOS = -D MACOS 2 | include makefile.version 3 | 4 | CCFLAGS = -mtune=core2 ${MACOS} -O9 -w -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64 ${WARNING_LEVEL} 5 | LDFLAGS = -pthread -lz -lm ${MACOS} -DMAKE_FOR_EXON -D MAKE_STANDALONE # -DREPORT_ALL_THE_BEST 6 | CC = gcc ${CCFLAGS} ${STATIC_MAKE} -ggdb -fomit-frame-pointer -O3 -ffast-math -funroll-loops -mmmx -msse -msse2 -msse3 -fmessage-length=0 7 | 8 | 9 | ALL_LIBS= core core-junction core-indel sambam-file sublog gene-algorithms hashtable input-files sorted-hashtable gene-value-index exon-algorithms HelperFunctions interval_merge core-bigtable seek-zlib input-blc 10 | ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS}) 11 | ALL_H=$(addsuffix .h, ${ALL_LIBS}) 12 | ALL_C=$(addsuffix .c, ${ALL_LIBS}) 13 | 14 | all: genRandomReads sublong repair featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped flattenGTF # globalReassembly testZlib 15 | mkdir -p ../bin/utilities 16 | mv longread-one/LRM longread-one/sublong 17 | mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/ 18 | mv repair genRandomReads subread-fullscan qualityScores removeDup propmapped flattenGTF ../bin/utilities 19 | @echo 20 | @echo "###########################################################" 21 | @echo "# #" 22 | @echo "# Installation successfully completed. #" 23 | @echo "# #" 24 | @echo "# Generated executables were copied to directory ../bin/ #" 25 | @echo "# #" 26 | @echo "###########################################################" 27 | @echo 28 | 29 | sublong: longread-one/longread-mapping.c ${ALL_OBJECTS} 30 | echo "MACOS= -D MACOS " > longread-one/make.version 31 | rm -f longread-one/*.o 32 | cd longread-one && $(MAKE) 33 | 34 | genRandomReads: gen_rand_reads.c ${ALL_OBJECTS} 35 | ${CC} -o genRandomReads gen_rand_reads.c ${ALL_OBJECTS} ${LDFLAGS} 36 | 37 | flattenGTF: flattenAnnotations.c ${ALL_OBJECTS} 38 | ${CC} -o flattenGTF flattenAnnotations.c ${ALL_OBJECTS} ${LDFLAGS} 39 | 40 | repair: read-repair.c ${ALL_OBJECTS} 41 | ${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS} 42 | 43 | propmapped: propmapped.c ${ALL_OBJECTS} 44 | ${CC} -o propmapped propmapped.c ${ALL_OBJECTS} ${LDFLAGS} 45 | 46 | exactSNP: SNPCalling.c SNPCalling.h ${ALL_OBJECTS} 47 | ${CC} -o exactSNP SNPCalling.c ${ALL_OBJECTS} ${LDFLAGS} 48 | 49 | subread-buildindex: index-builder.c subread.h ${ALL_OBJECTS} 50 | ${CC} -o subread-buildindex index-builder.c ${ALL_OBJECTS} ${LDFLAGS} 51 | 52 | removeDup: removeDupReads.c removeDupReads.h subread.h ${ALL_OBJECTS} 53 | ${CC} -o removeDup removeDupReads.c ${ALL_OBJECTS} ${LDFLAGS} 54 | 55 | subindel: SUBindel.c core.h subread.h ${ALL_OBJECTS} 56 | ${CC} -o subindel SUBindel.c ${ALL_OBJECTS} ${LDFLAGS} 57 | 58 | featureCounts: readSummary.c subread.h ${ALL_OBJECTS} 59 | ${CC} -o featureCounts readSummary.c ${ALL_OBJECTS} ${LDFLAGS} 60 | 61 | subread-align: core-interface-aligner.c ${ALL_OBJECTS} 62 | ${CC} -o subread-align core-interface-aligner.c ${ALL_OBJECTS} ${LDFLAGS} 63 | 64 | #subtools: subtools.c ${ALL_OBJECTS} 65 | # ${CC} -o subtools subtools.c ${ALL_OBJECTS} ${LDFLAGS} 66 | 67 | subjunc: core-interface-subjunc.c ${ALL_OBJECTS} 68 | ${CC} -o subjunc core-interface-subjunc.c ${ALL_OBJECTS} ${LDFLAGS} 69 | 70 | qualityScores: qualityScores.c ${ALL_OBJECTS} 71 | ${CC} -o qualityScores qualityScores.c ${ALL_OBJECTS} ${LDFLAGS} 72 | 73 | subread-fullscan: fullscan.c ${ALL_OBJECTS} 74 | ${CC} -o subread-fullscan fullscan.c ${ALL_OBJECTS} ${LDFLAGS} 75 | 76 | testZlib: test-seek-zlib.c ${ALL_OBJECTS} 77 | ${CC} -o testZlib test-seek-zlib.c ${ALL_OBJECTS} ${LDFLAGS} 78 | 79 | clean: 80 | rm -f subtools core featureCounts exactSNP removeDup subread-buildindex ${ALL_OBJECTS} 81 | -------------------------------------------------------------------------------- /src/Makefile.Windows: -------------------------------------------------------------------------------- 1 | CC_EXEC = gcc 2 | OPT_LEVEL = 3 3 | 4 | include makefile.version 5 | -include ~/.R/DBPZ_debug_makefile 6 | 7 | CCFLAGS = -mtune=core2 -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE -D SUBREAD_VERSION=\"${SUBREAD_VERSION}\" -D_FILE_OFFSET_BITS=64 ${WARNING_LEVEL} -D __MINGW32__ 8 | LDFLAGS = ${STATIC_MAKE} -pthread -lz -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE -lm 9 | CC = ${CC_EXEC} ${CCFLAGS} -fmessage-length=0 -ggdb 10 | 11 | 12 | ALL_LIBS= core core-junction core-indel sambam-file sublog gene-algorithms hashtable input-files sorted-hashtable gene-value-index exon-algorithms HelperFunctions interval_merge long-hashtable core-bigtable seek-zlib input-blc 13 | ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS}) 14 | ALL_H=$(addsuffix .h, ${ALL_LIBS}) 15 | ALL_C=$(addsuffix .c, ${ALL_LIBS}) 16 | 17 | all: $(ALL_OBJECTS) genRandomReads detectionCall sublong repair txUnique featureCounts removeDup exactSNP subread-buildindex subindel subread-align subjunc qualityScores subread-fullscan propmapped flattenGTF # samMappedBases mergeVCF testZlib 18 | mkdir -p ../bin/utilities 19 | mv longread-one/LRM longread-one/sublong 20 | mv longread-one/sublong subread-align subjunc featureCounts subindel exactSNP subread-buildindex ../bin/ 21 | mv detectionCall genRandomReads repair propmapped qualityScores removeDup subread-fullscan txUnique flattenGTF ../bin/utilities 22 | @echo 23 | @echo "###########################################################" 24 | @echo "# #" 25 | @echo "# Installation successfully completed. #" 26 | @echo "# #" 27 | @echo "# Generated executables were copied to directory ../bin/ #" 28 | @echo "# #" 29 | @echo "###########################################################" 30 | @echo 31 | 32 | $(ALL_OBJECTS): $(ALL_C) $(ALL_H) 33 | $(CC) -o $@ -c $(subst .o,.c,$@) 34 | 35 | sublong: longread-one/longread-mapping.c ${ALL_OBJECTS} 36 | echo "MINGW32 = -D __MINGW32__" > longread-one/make.version 37 | rm -f longread-one/*.o 38 | cd longread-one && $(MAKE) 39 | 40 | genRandomReads: gen_rand_reads.c ${ALL_OBJECTS} 41 | ${CC} -o genRandomReads gen_rand_reads.c ${ALL_OBJECTS} ${LDFLAGS} 42 | 43 | flattenGTF: flattenAnnotations.c ${ALL_OBJECTS} 44 | ${CC} -o flattenGTF flattenAnnotations.c ${ALL_OBJECTS} ${LDFLAGS} 45 | 46 | detectionCall: detection-calls.c ${ALL_OBJECTS} 47 | ${CC} -o detectionCall detection-calls.c ${ALL_OBJECTS} ${LDFLAGS} 48 | 49 | repair: read-repair.c ${ALL_OBJECTS} 50 | ${CC} -o repair read-repair.c ${ALL_OBJECTS} ${LDFLAGS} 51 | 52 | txUnique: tx-unique.c tx-unique.h ${ALL_OBJECTS} 53 | ${CC} -o txUnique tx-unique.c ${ALL_OBJECTS} ${LDFLAGS} 54 | 55 | globalReassembly: global-reassembly.c ${ALL_OBJECTS} 56 | ${CC} -o globalReassembly global-reassembly.c ${ALL_OBJECTS} ${LDFLAGS} 57 | 58 | propmapped: propmapped.c ${ALL_OBJECTS} 59 | ${CC} -o propmapped propmapped.c ${ALL_OBJECTS} ${LDFLAGS} 60 | 61 | exactSNP: SNPCalling.c SNPCalling.h ${ALL_OBJECTS} 62 | ${CC} -o exactSNP SNPCalling.c ${ALL_OBJECTS} ${LDFLAGS} 63 | 64 | subread-buildindex: index-builder.c subread.h ${ALL_OBJECTS} 65 | ${CC} -o subread-buildindex index-builder.c ${ALL_OBJECTS} ${LDFLAGS} 66 | 67 | removeDup: removeDupReads.c removeDupReads.h subread.h ${ALL_OBJECTS} 68 | ${CC} -o removeDup removeDupReads.c ${ALL_OBJECTS} ${LDFLAGS} 69 | 70 | subindel: SUBindel.c core.h subread.h ${ALL_OBJECTS} 71 | ${CC} -o subindel SUBindel.c ${ALL_OBJECTS} ${LDFLAGS} 72 | 73 | featureCounts: readSummary.c subread.h ${ALL_OBJECTS} 74 | ${CC} -o featureCounts readSummary.c ${ALL_OBJECTS} ${LDFLAGS} 75 | 76 | subread-align: core-interface-aligner.c ${ALL_OBJECTS} 77 | ${CC} -o subread-align core-interface-aligner.c ${ALL_OBJECTS} ${LDFLAGS} 78 | 79 | subjunc: core-interface-subjunc.c ${ALL_OBJECTS} 80 | ${CC} -o subjunc core-interface-subjunc.c ${ALL_OBJECTS} ${LDFLAGS} 81 | 82 | subtools: subtools.c ${ALL_OBJECTS} 83 | ${CC} -o subtools subtools.c ${ALL_OBJECTS} ${LDFLAGS} 84 | 85 | qualityScores: qualityScores.c ${ALL_OBJECTS} 86 | ${CC} -o qualityScores qualityScores.c ${ALL_OBJECTS} ${LDFLAGS} 87 | 88 | subread-fullscan: fullscan.c ${ALL_OBJECTS} 89 | ${CC} -o subread-fullscan fullscan.c ${ALL_OBJECTS} ${LDFLAGS} 90 | 91 | clean: 92 | rm -f core featureCounts exactSNP removeDup subread-buildindex ${ALL_OBJECTS} 93 | -------------------------------------------------------------------------------- /src/SNPCalling.h: -------------------------------------------------------------------------------- 1 | /*************************************************************** 2 | 3 | The Subread software package is free software package: 4 | you can redistribute it and/or modify it under the terms 5 | of the GNU General Public License as published by the 6 | Free Software Foundation, either version 3 of the License, 7 | or (at your option) any later version. 8 | 9 | Subread is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty 11 | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 | 13 | See the GNU General Public License for more details. 14 | 15 | Authors: Drs Yang Liao and Wei Shi 16 | 17 | ***************************************************************/ 18 | 19 | 20 | #ifndef __SINGLE_NUCLEODITE_POLYMOREPHISM_H_ 21 | #define __SINGLE_NUCLEODITE_POLYMOREPHISM_H_ 22 | 23 | 24 | 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /src/aligner.c: -------------------------------------------------------------------------------- 1 | //SPA 2 | -------------------------------------------------------------------------------- /src/build-sam-index.c: -------------------------------------------------------------------------------- 1 | //removed. 2 | -------------------------------------------------------------------------------- /src/core-bigtable.h: -------------------------------------------------------------------------------- 1 | #ifndef __CORE_BIGTABLE_H_ 2 | #define __CORE_BIGTABLE_H_ 3 | 4 | #include "subread.h" 5 | #include "core.h" 6 | #include "hashtable.h" 7 | #include "gene-algorithms.h" 8 | 9 | #define CACHE_STATUS_RELEASED 0 10 | #define CACHE_STATUS_OCCUPIED 1 11 | 12 | // This function creates an empty data structure for all results. 13 | // The number of reads is unknown at this stage. 14 | int init_bigtable_results(global_context_t * global_context, int is_rewind); 15 | 16 | // This function tries to retrieve the required result data structure into memory and set the return_ptr to the address of the data structure. 17 | // Junction ptr can be NULL. 18 | // This function returns ZERO if the record is available. It returns -1 if the record is unavailable. 19 | int bigtable_retrieve_result(global_context_t * global_context , thread_context_t * thread_context , subread_read_number_t pair_number, int result_number, int is_second_read, mapping_result_t ** return_ptr, subjunc_result_t ** return_junction_ptr); 20 | 21 | // This function notifies the bigtable subsystem to save changes and deallocate the memory block if necessary. 22 | // Junction ptr can be NULL. 23 | // If the data has been changed, commit_change must be set to an non-ZERO value 24 | void bigtable_release_result(global_context_t * global_context , thread_context_t * thread_context , subread_read_number_t pair_number, int commit_change); 25 | 26 | // This function destroies the buffers and deletes the temporary MMAP files. 27 | int finalise_bigtable_results(global_context_t * global_context); 28 | 29 | void bigtable_readonly_result(global_context_t * global_context , thread_context_t * thread_context , subread_read_number_t pair_number, int result_number, int is_second_read, mapping_result_t * return_ptr, subjunc_result_t * return_junction_ptr); 30 | 31 | void bktable_append(bucketed_table_t * tab, char * chro, unsigned int pos, void * detail); 32 | 33 | int bktable_lookup(bucketed_table_t * tab, char * chro, unsigned int start_pos, unsigned int interval_length, unsigned int * hit_pos_list, void ** hit_ptr_list, int max_hits); 34 | 35 | void bktable_init(bucketed_table_t * tab, unsigned int maximum_interval_length, unsigned int expected_items); 36 | 37 | void bktable_destroy(bucketed_table_t * tab); 38 | 39 | void bktable_free_ptrs(void * bkey, void * buckv, HashTable * tab); 40 | 41 | void fraglist_init(fragment_list_t * list); 42 | 43 | void fraglist_append(fragment_list_t * list, subread_read_number_t fragment_number); 44 | 45 | void fraglist_destroy(fragment_list_t * list); 46 | 47 | void bigtable_write_thread_cache(global_context_t * global_context); 48 | 49 | #endif 50 | -------------------------------------------------------------------------------- /src/core-indel.h: -------------------------------------------------------------------------------- 1 | /*************************************************************** 2 | 3 | The Subread software package is free software package: 4 | you can redistribute it and/or modify it under the terms 5 | of the GNU General Public License as published by the 6 | Free Software Foundation, either version 3 of the License, 7 | or (at your option) any later version. 8 | 9 | Subread is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty 11 | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 | 13 | See the GNU General Public License for more details. 14 | 15 | Authors: Drs Yang Liao and Wei Shi 16 | 17 | ***************************************************************/ 18 | 19 | 20 | #ifndef SUBREAD_CORE_INDEL_H_ 21 | #define SUBREAD_CORE_INDEL_H_ 22 | 23 | #include "subread.h" 24 | #include "hashtable.h" 25 | #include "core.h" 26 | 27 | // chromosome events can be indels, junctions or fusions. 28 | // if it is an insertion event, event_large_site = event_small_site+1. 29 | 30 | //#define MAX_EVENT_ENTRIES_PER_SITE 5 31 | //#define MAX_EVENT_ENTRIES_PER_SITE 12 32 | // 33 | #define EVENT_ENTRIES_INIT_SIZE (9) 34 | #define MAX_EVENT_ENTRIES_PER_SITE 9 35 | #define CHRO_EVENT_TYPE_REMOVED 0 36 | #define CHRO_EVENT_TYPE_INDEL 8 37 | #define CHRO_EVENT_TYPE_LONG_INDEL 16 38 | #define CHRO_EVENT_TYPE_POTENTIAL_INDEL 32 39 | #define CHRO_EVENT_TYPE_JUNCTION 64 40 | #define CHRO_EVENT_TYPE_FUSION 128 41 | #define CHRO_EVENT_TYPE_SNP 256 42 | 43 | #define EVENT_SEARCH_BY_SMALL_SIDE 10 44 | #define EVENT_SEARCH_BY_LARGE_SIDE 20 45 | #define EVENT_SEARCH_BY_BOTH_SIDES 30 46 | 47 | 48 | #define REASSEMBLY_WINDOW_LENGTH 350 49 | 50 | //#define is_target_window_X(x) ((x + 1) * REASSEMBLY_WINDOW_LENGTH / 2 >= (10734463 % BASE_BLOCK_LENGTH) && (x- 1) * REASSEMBLY_WINDOW_LENGTH /2-1 <= (10734463%BASE_BLOCK_LENGTH) ) 51 | #define is_target_window_X(x) 0 52 | //#define MAXIMUM_EVENT_NUMBER 300000 53 | 54 | 55 | typedef struct{ 56 | int is_precisely_called; 57 | unsigned int source_left_side; // the base BEFORE the translocated sequence. 58 | unsigned int target_left_side; // tge base BEFORE the inserted translocated sequence. 59 | unsigned int length; 60 | 61 | unsigned int event_P_number; 62 | unsigned int event_Q_number; 63 | unsigned int event_R_number; 64 | 65 | int is_inv; 66 | unsigned int all_sup_P; 67 | unsigned int max_sup_QR; 68 | } translocation_result_t; 69 | 70 | typedef struct{ 71 | int is_precisely_called; 72 | 73 | unsigned int event_Y_rough_small_abs; 74 | unsigned int event_Z_rough_large_abs; 75 | 76 | unsigned int small_side; // the base BEFORE the reversed sequence 77 | unsigned int length; 78 | 79 | unsigned int event_Y_number; // event_no in the event space. 80 | unsigned int event_Z_number; 81 | 82 | unsigned int all_sup_D; 83 | unsigned int max_sup_E; 84 | } inversion_result_t; 85 | 86 | struct reassmebly_window_allele 87 | { 88 | char rebuilt_window[8000]; 89 | float allele_quality; 90 | int rebuilt_size; 91 | }; 92 | 93 | typedef struct{ 94 | gehash_t * voting_indexes; 95 | char * chro_name; 96 | unsigned long long int * start_keys; 97 | short * start_offsets; 98 | 99 | unsigned int * read_no_counter; 100 | unsigned int block_start_linear_pos; 101 | HashTable * read_sequence_table; 102 | HashTable * read_position_table; 103 | HashTable * read_quality_table; 104 | gene_vote_t * vote_list; 105 | gene_vote_t * vote_list_rectify; 106 | short * read_rectify_space; 107 | 108 | char rebuilt_window[2500]; 109 | int rebuilt_window_size; 110 | 111 | 112 | struct reassmebly_window_allele * final_alleles; 113 | 114 | unsigned int used_read_ids[2000]; 115 | int used_read_number; 116 | 117 | 118 | int search_cost; 119 | int total_matched_bases; 120 | int max_matched_bases; 121 | unsigned int window_start_pos; 122 | } reassembly_by_voting_block_context_t; 123 | 124 | 125 | 126 | typedef struct{ 127 | HashTable ** de_bruijn_graphs; 128 | char * chro_name; 129 | unsigned long long int * start_keys; 130 | short * start_offsets; 131 | 132 | unsigned int block_start_linear_pos; 133 | } reassembly_block_context_t; 134 | 135 | #define EVENT_BODY_LOCK_BUCKETS 14929 136 | 137 | 138 | typedef struct{ 139 | HashTable * event_entry_table; 140 | unsigned int total_events; 141 | unsigned int current_max_event_number; 142 | chromosome_event_t * event_space_dynamic; 143 | HashTable * local_reassembly_pileup_files; 144 | subread_lock_t event_body_locks[EVENT_BODY_LOCK_BUCKETS]; 145 | 146 | short ** dynamic_align_table; 147 | char ** dynamic_align_table_mask; 148 | } indel_context_t; 149 | 150 | typedef struct{ 151 | HashTable * event_entry_table; 152 | unsigned int total_events; 153 | unsigned int current_max_event_number; 154 | chromosome_event_t * event_space_dynamic; 155 | unsigned short * final_counted_reads_array; 156 | unsigned short * final_reads_mismatches_array; 157 | 158 | short ** dynamic_align_table; 159 | char ** dynamic_align_table_mask; 160 | } indel_thread_context_t; 161 | 162 | int init_indel_tables(global_context_t * context); 163 | int destroy_indel_module(global_context_t * context); 164 | int init_indel_thread_contexts(global_context_t * global_context, thread_context_t * thread_context, int task); 165 | int sort_global_event_table(global_context_t * global_context); 166 | int load_known_junctions(global_context_t * global_context); 167 | int finalise_indel_and_junction_thread(global_context_t * global_context, thread_context_t * thread_contexts, int task); 168 | int find_new_indels(global_context_t * global_context, thread_context_t * thread_context, int pair_number, char * read_name, char * read_text, char * qual_text, int read_len, int is_second_read, int best_read_id); 169 | int write_indel_final_results(global_context_t * context); 170 | int search_event(global_context_t * global_context,HashTable * event_table, chromosome_event_t * event_space, unsigned int pos, int search_type, unsigned char event_type, chromosome_event_t ** return_buffer); 171 | 172 | void set_alignment_result(global_context_t * global_context, int pair_number, int is_second_read, int best_read_id, unsigned int position, int votes, gene_vote_number_t * indel_record, short best_cover_start, short best_cover_end, int is_negative_strand, int is_PE, unsigned int minor_position, unsigned int minor_votes, unsigned int minor_coverage_start, unsigned int minor_coverage_end, unsigned int split_point, int inserted_bases, int is_strand_jumped, int is_GT_AG_donors, int used_subreads_in_vote, int noninformative_subreads_in_vote, int major_indel_offset, int minor_indel_offset, int main_hamming, int minor_hamming, int main_quality, int minor_quality); 173 | 174 | void put_new_event(HashTable * event_table, chromosome_event_t * new_event , int event_no); 175 | void remove_neighbour(global_context_t * global_context); 176 | int build_local_reassembly(global_context_t *global_context , thread_context_t *thread_context , int pair_number, char * read_name_1 , char * read_text_1 ,char * qual_text_1 , int read_len_1, int read_len_2, int is_second_read, int best_read_id, int is_paired_unmapped, mapping_result_t * current_res, mapping_result_t * mate_res); 177 | int finalise_long_insertions(global_context_t * global_context); 178 | 179 | // This function sets the global context with default values. 180 | void init_global_context(global_context_t * context); 181 | 182 | int write_local_reassembly(global_context_t *global_context, HashTable *pileup_fp_table, unsigned int anchor_pos, char * read_name , char * read_text ,char * qual_text , int read_len, int is_anchor_certain); 183 | 184 | int finalise_long_insertions_by_hashtable(global_context_t * global_context); 185 | 186 | void destroy_pileup_table(HashTable* local_reassembly_pileup_files); 187 | 188 | chromosome_event_t * reallocate_event_space(global_context_t* global_context,thread_context_t* thread_context,int event_no); 189 | 190 | int there_are_events_in_range(char * bitmap, unsigned int pos, int sec_len); 191 | 192 | int anti_supporting_read_scan(global_context_t * global_context); 193 | 194 | int core_dynamic_align(global_context_t * global_context, thread_context_t * thread_context, char * read, int read_len, unsigned int begin_position, char * movement_buffer, int expected_offset, char * read_name); 195 | 196 | void init_core_temp_path(global_context_t * context); 197 | 198 | chromosome_event_t * local_add_indel_event(global_context_t * global_context, thread_context_t * thread_context, HashTable * event_table, char * read_text, unsigned int left_edge, int indels, int score_supporting_read_added, int is_ambiguous, int mismatched_bases,int * old_event_id); 199 | 200 | void print_indel_table(global_context_t * global_context); 201 | int sort_junction_entry_table(global_context_t * global_context); 202 | void mark_event_bitmap(unsigned char * bitmap, unsigned int pos); 203 | int check_event_bitmap(unsigned char * bitmap, unsigned int pos); 204 | 205 | void exchange_event_sides(void * arr, int l, int r); 206 | void merge_event_sides(void * arr, int start, int items, int items2); 207 | int compare_event_sides(void * arr, int l, int r); 208 | 209 | srUInt_64 localPointerHashFunction_forEventEntry(const void *pointer); 210 | int localPointerCmp_forEventEntry(const void *pointer1, const void *pointer2); 211 | void anti_support_add_count(void * ky, void * va, HashTable * tab); 212 | int BINsearch_event(chromosome_event_t * event_space, int * event_ids, int is_small_side, unsigned int pos, int events); 213 | void destory_event_entry_table(HashTable * old); 214 | #define is_ambiguous_indel_score(e) (0) 215 | #endif 216 | -------------------------------------------------------------------------------- /src/del4-mmap-test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | 11 | 12 | #define MEGA 1024llu*1024 13 | 14 | int main(){ 15 | int fd = open("/usr/local/work/liao/arena/del4.mem", O_TRUNC | O_CREAT|O_WRONLY , 0600); 16 | long long int x; 17 | 18 | for(x=0; x<100*MEGA; x++){ 19 | write(fd, &x, 4); 20 | } 21 | 22 | close(fd); 23 | 24 | fd = open("/usr/local/work/liao/arena/del4.mem", O_RDWR); 25 | 26 | void * fd_ptr = mmap(NULL, 400*MEGA, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); 27 | assert(fd_ptr != MAP_FAILED); 28 | printf("MEMPTR = %08X\n", (unsigned int)(fd_ptr)); 29 | 30 | int * int_ptr = (int *)fd_ptr; 31 | 32 | for(x=0; x<100*MEGA; x+=456){ 33 | int myint = int_ptr[x]; 34 | //printf("MYI=%d\n", myint); 35 | } 36 | for(x=0; x<100*MEGA; x+=2){ 37 | int_ptr[x]=0x0a; 38 | } 39 | 40 | printf("MEMORY PREPARIED\n"); 41 | sleep(100); 42 | munmap(fd_ptr, 400*MEGA); 43 | close(fd); 44 | } 45 | -------------------------------------------------------------------------------- /src/exon-algorithms.c: -------------------------------------------------------------------------------- 1 | //SPA 2 | -------------------------------------------------------------------------------- /src/exon-algorithms.h: -------------------------------------------------------------------------------- 1 | //SPA 2 | -------------------------------------------------------------------------------- /src/exon-align-indel.c: -------------------------------------------------------------------------------- 1 | //SPA 2 | -------------------------------------------------------------------------------- /src/exon-align.c: -------------------------------------------------------------------------------- 1 | //SPA 2 | -------------------------------------------------------------------------------- /src/filterJunctionTable.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "subread.h" 9 | #include "hashtable.h" 10 | #include "core.h" 11 | #include "HelperFunctions.h" 12 | 13 | 14 | void parse_line(char * new_line, int * flags, char * chro_name, unsigned int * chro_pos, char * cigar) 15 | { 16 | int tab_no = 0, read_cursor; 17 | int curr_line_len = strlen(new_line); 18 | int field_pnt = 0; 19 | chro_name[0]=0; 20 | cigar[0]=0; 21 | for(read_cursor=0; read_cursor 6)break; 28 | field_pnt=0; 29 | } 30 | else 31 | { 32 | if(tab_no == 1) // flags 33 | (*flags) = (*flags) * 10 + (nch - '0'); 34 | else if(tab_no == 2) 35 | { 36 | chro_name[field_pnt++]=nch; 37 | chro_name[field_pnt]=0; 38 | } 39 | else if(tab_no == 3) 40 | (*chro_pos) = (*chro_pos)*10 + (nch - '0'); 41 | else if(tab_no == 5) 42 | { 43 | cigar[field_pnt++]=nch; 44 | cigar[field_pnt]=0; 45 | } 46 | 47 | } 48 | } 49 | } 50 | 51 | int load_junc_table(char * file_name, HashTable * tab, HashTable * edge_table_l, HashTable * edge_table_r, HashTable * bin_table) 52 | { 53 | FILE * fp=f_subr_open(file_name, "r"); 54 | char new_fl[200]; 55 | if(!fp) return -1; 56 | while(1) 57 | { 58 | char * ll = fgets(new_fl, 199, fp); 59 | char * tmp_tok=NULL; 60 | if(!ll || strlen(ll)<4) break; 61 | 62 | strtok_r(ll, "\t", &tmp_tok); //name 63 | char * chrostr = strtok_r(NULL, "\t", &tmp_tok); //chro 64 | char * pos1str = strtok_r(NULL, "\t", &tmp_tok); //pos1 65 | char * pos2str = strtok_r(NULL, "\t", &tmp_tok); //pos2 66 | 67 | unsigned int pos1 = (unsigned int)atoll(pos1str); 68 | unsigned int pos2 = (unsigned int)atoll(pos2str); 69 | 70 | if(pos1==0||pos2==0)continue; 71 | 72 | char * chro_mem = malloc(30); 73 | chro_mem[0]=0; 74 | if(strlen(chrostr)<3)strcpy(chro_mem, "chr"); 75 | strcat(chro_mem, chrostr); 76 | 77 | long long int pos_key; 78 | if(pos2 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include "subread.h" 27 | #include "gene-value-index.h" 28 | #include "input-files.h" 29 | 30 | float MIN_REPORTING_RATIO = 0.8; 31 | gene_offset_t _global_offsets; 32 | unsigned int SCAN_TOTAL_BASES=0; 33 | char * only_chro = NULL; 34 | 35 | 36 | void fullscan_usage() 37 | { 38 | SUBREADprintf("\nsubread-fullscan Version %s\n\n", SUBREAD_VERSION); 39 | SUBREADputs(" This program scans the entire genome to find all high-similarity locations to"); 40 | SUBREADputs("a specified read."); 41 | SUBREADputs(""); 42 | SUBREADputs("Usage:"); 43 | SUBREADputs(""); 44 | SUBREADputs(" ./subread-fullscan [options] -i "); 45 | SUBREADputs(""); 46 | SUBREADputs("Required arguments:"); 47 | SUBREADputs(""); 48 | SUBREADputs(" -i Base name of the index."); 49 | SUBREADputs(""); 50 | SUBREADputs(" read_string The read bases."); 51 | SUBREADputs(""); 52 | SUBREADputs("Optional arguments:"); 53 | SUBREADputs(""); 54 | SUBREADputs(" -m The minimum fraction of matched bases in the read, 0.8 by default"); 55 | SUBREADputs(""); 56 | 57 | } 58 | 59 | void report_pos(unsigned int pos) 60 | { 61 | char * chro_name; 62 | int chro_pos; 63 | locate_gene_position(pos, &_global_offsets, &chro_name, &chro_pos); 64 | SUBREADprintf ("%s,%u\n", chro_name,chro_pos); 65 | } 66 | 67 | 68 | int str_match_count(char * c1, char * c2, int rl, int th) 69 | { 70 | int i,ret =0; 71 | for (i=0; i th) return 0; 75 | } 76 | return rl-ret; 77 | } 78 | 79 | void scan_test_match(char * read, char * read_rev, char * chro, int rl, unsigned int pos) 80 | { 81 | int threshold = (int)(MIN_REPORTING_RATIO * rl - 0.001); 82 | int m = str_match_count(read, chro, rl, rl- threshold); 83 | int mr = str_match_count(read_rev, chro, rl,rl- threshold); 84 | if (m>=threshold) 85 | { 86 | SUBREADprintf("\nFound on positive strand (%0.2f%%): ", m*100./rl); 87 | report_pos(pos); 88 | } 89 | if (mr>=threshold) 90 | { 91 | SUBREADprintf("\nFound on negative strand (%0.2f%%): ", mr*100./rl); 92 | report_pos(pos); 93 | } 94 | 95 | //if (pos > 19999999) 96 | //SUBREADprintf ("m=%d mr=%d T=%d\n", m, mr, threshold); 97 | } 98 | 99 | void full_scan_read(char * index_name, char * read_str) 100 | { 101 | int read_len = strlen(read_str); 102 | char read_rev_str[1208]; 103 | char chro_str[1208]; 104 | gene_value_index_t index; 105 | int tabno = 0; 106 | unsigned int current_pos =0xffffffffu; 107 | 108 | strcpy(read_rev_str, read_str); 109 | reverse_read(read_rev_str, read_len, GENE_SPACE_BASE); 110 | 111 | while (1){ 112 | char table_fn[1250]; 113 | struct stat filestat; 114 | SUBreadSprintf(table_fn, 1250, "%s.%02d.b.array", index_name, tabno); 115 | 116 | int stat_ret = stat(table_fn, &filestat); 117 | if (stat_ret !=0 ) 118 | { 119 | if(!tabno) 120 | SUBREADprintf("The index does not contain any raw base data which is required in scanning. Please use the -b option while building the index.\n"); 121 | return ; 122 | } 123 | if (tabno>0)gvindex_destory(&index); 124 | 125 | gvindex_load(&index,table_fn); 126 | if (tabno==0) 127 | { 128 | gvindex_get_string (chro_str, &index, 0, read_len, 0); 129 | current_pos = 0; 130 | } 131 | 132 | for(; current_pos + read_len < index.start_point + index.length ; current_pos++) 133 | { 134 | if(only_chro){ 135 | char * chro_name; 136 | int chro_pos; 137 | locate_gene_position(current_pos, &_global_offsets, &chro_name, &chro_pos); 138 | if(strcmp(chro_name, only_chro)!=0)continue; 139 | } 140 | scan_test_match(read_str, read_rev_str, chro_str, read_len, current_pos); 141 | char nch = gvindex_get(&index, current_pos + read_len); 142 | int i; 143 | for (i=0; i0)gvindex_destory(&index); 152 | 153 | 154 | } 155 | 156 | int main (int argc , char ** argv) 157 | { 158 | char index_name [MAX_FILE_NAME_LENGTH]; 159 | char read_str [1208]; 160 | char c; 161 | int i; 162 | 163 | index_name[0]=0; 164 | 165 | while ((c = getopt (argc, argv, "i:m:c:?")) != -1) 166 | switch(c) 167 | { 168 | case 'i': 169 | strncpy(index_name, optarg, MAX_FILE_NAME_LENGTH-1); 170 | break; 171 | case 'm': 172 | MIN_REPORTING_RATIO = atof(optarg); 173 | break; 174 | case 'c': 175 | only_chro = optarg; 176 | break; 177 | case '?': 178 | return -1 ; 179 | } 180 | 181 | if (!index_name[0] || optind == argc) 182 | { 183 | fullscan_usage(); 184 | return -1; 185 | } 186 | i=0; 187 | while(argv[optind][i]) 188 | { 189 | argv[optind][i] = toupper(argv[optind][i]); 190 | i++; 191 | } 192 | strncpy(read_str, argv[optind], 1199); 193 | 194 | load_offsets (&_global_offsets, index_name); 195 | SUBREADprintf ("Reporting threshold=%0.2f%%\n", MIN_REPORTING_RATIO*100); 196 | 197 | /* 198 | for(i=0;i<1000;i++) 199 | { 200 | if (!_global_offsets.read_offset[i])break; 201 | SCAN_TOTAL_BASES = _global_offsets.read_offset[i]; 202 | }*/ 203 | SUBREADprintf ("All bases =%u\n", SCAN_TOTAL_BASES); 204 | SUBREADprintf ("Scanning the full index for %s...\n\n", read_str); 205 | 206 | full_scan_read(index_name, read_str); 207 | 208 | SUBREADprintf ("\nFinished.\n"); 209 | 210 | return 0; 211 | } 212 | -------------------------------------------------------------------------------- /src/fusion-align.c: -------------------------------------------------------------------------------- 1 | //SPA 2 | -------------------------------------------------------------------------------- /src/gen_long_chromosomes.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char ** argv) 5 | { 6 | int chromosomes = atoi(argv[1]); 7 | int x1, chro_offset; 8 | 9 | for(x1=0; x1chr%d", x1+1); 13 | for(chro_offset = 0; chro_offset<500000; chro_offset++) 14 | { 15 | if(chro_offset % 70==0) puts(""); 16 | char nch_i = myrand_rand() % 4 ; 17 | char nch = nch_i?(nch_i<2?'T':(nch_i < 3?'C':'G')):'A'; 18 | 19 | putchar(nch); 20 | } 21 | } 22 | return 0; 23 | } 24 | -------------------------------------------------------------------------------- /src/gene-value-index.h: -------------------------------------------------------------------------------- 1 | /*************************************************************** 2 | 3 | The Subread software package is free software package: 4 | you can redistribute it and/or modify it under the terms 5 | of the GNU General Public License as published by the 6 | Free Software Foundation, either version 3 of the License, 7 | or (at your option) any later version. 8 | 9 | Subread is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty 11 | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 | 13 | See the GNU General Public License for more details. 14 | 15 | Authors: Drs Yang Liao and Wei Shi 16 | 17 | ***************************************************************/ 18 | 19 | 20 | #ifndef __GENE_VALUE_INDEX_ 21 | #define __GENE_VALUE_INDEX_ 22 | 23 | #include "hashtable.h" 24 | #include "subread.h" 25 | #include "sorted-hashtable.h" 26 | 27 | int gvindex_init(gene_value_index_t * index, unsigned int start_point, unsigned int all_bases_estm); 28 | 29 | void gvindex_set (gene_value_index_t * index, gehash_data_t offset, gehash_key_t base_value, int padding); 30 | 31 | int gvindex_dump(gene_value_index_t * index, const char filename []); 32 | 33 | int gvindex_load(gene_value_index_t * index, const char filename []); 34 | 35 | void gvindex_destory(gene_value_index_t * index); 36 | 37 | void gvindex_baseno2offset(unsigned int base_number, gene_value_index_t * index, unsigned int * offset_byte, unsigned int * offset_bit); 38 | 39 | // returns a 16-bit bitmap showing if each base is matched. 40 | int gvindex_match(gene_value_index_t * index, gehash_data_t offset, gehash_key_t base_value); 41 | 42 | int gvindex_match_base(gene_value_index_t * index, gehash_data_t offset, const char base_int_value); 43 | 44 | int gvindex_get(gene_value_index_t * index, gehash_data_t offset); 45 | 46 | int match_chro(char * read, gene_value_index_t * index, unsigned int pos, int test_len, int is_negative_strand, int space_type); 47 | float match_chro_support(char * read, gene_value_index_t * index, unsigned int pos, int test_len, int is_negative_strand, int space_type, char * qual_str, int qual_format); 48 | 49 | 50 | int match_chro_maxerror(char * read, gene_value_index_t * index, unsigned int pos, int test_len, int is_negative_strand, int space_type, int maxerror); 51 | 52 | void gvindex_get_string(char *buf, gene_value_index_t * index, unsigned int pos, int len, int is_negative_strand); 53 | 54 | int match_chro_wronglen(char * read, gene_value_index_t * index, unsigned int pos, int test_len, int space_type, int * left_match_bases, int * right_match_bases); 55 | 56 | int match_indel_chro_to_front(char * read, gene_value_index_t * index, unsigned int pos, int test_len, int * indels, int * indel_point,int max_indel_number, int max_test_len); 57 | int match_indel_chro_to_back(char * read, gene_value_index_t * index, unsigned int pos, int test_len, int * indels, int * indel_point,int max_indel_number, int min_test_offset); 58 | 59 | int match_indel_table_to_front(HashTable * indel_table , char * read, gene_value_index_t * index, unsigned int pos, int test_len, short * indels, short * indel_point, int max_indel_number, int min_test_offset, struct explorer_section_t * sec); 60 | int match_indel_table_to_back(HashTable * indel_table , char * read, gene_value_index_t * index, unsigned int pos, int test_len, short * indels, short * indel_point, int max_indel_number, int min_test_offset, struct explorer_section_t * sec); 61 | 62 | unsigned int match_chro_range(char * read, gene_value_index_t * index, unsigned int pos, int read_len, int search_length, int search_to_back); 63 | #endif 64 | -------------------------------------------------------------------------------- /src/input-blc.h: -------------------------------------------------------------------------------- 1 | #ifndef _INPUT_BLC_H 2 | #define _INPUT_BLC_H 3 | 4 | #include 5 | #include 6 | 7 | #include "subread.h" 8 | #include "seek-zlib.h" 9 | #include "hashtable.h" 10 | 11 | #define LANE_FOR_ALL_LANES 99999 12 | 13 | int input_BLC_init( input_BLC_t * blc_input , char * data_dir ); 14 | int input_BLC_next_read( input_BLC_t * blc_input, char * readname , char * read, char * qual ); 15 | int input_BLC_tell ( input_BLC_t * blc_input , input_BLC_pos_t * pos ); 16 | int input_BLC_seek ( input_BLC_t * blc_input , input_BLC_pos_t * pos ); 17 | void input_BLC_close (input_BLC_t * blc_input); 18 | 19 | int input_scBAM_init(input_scBAM_t * bam_input, char * bam_fname); 20 | void input_scBAM_close(input_scBAM_t * bam_input); 21 | void scBAM_seek(input_scBAM_t * bam_input, input_scBAM_pos_t * pos); 22 | void scBAM_tell(input_scBAM_t * bam_input, input_scBAM_pos_t * pos); 23 | int scBAM_next_read(input_scBAM_t * bam_input, char * readname , char * read, char * qual ); 24 | 25 | int input_mFQ_init( input_mFQ_t * fqs_input, char ** files1, char ** files2, char** files3, int total_files ); 26 | int input_mFQ_init_by_one_string(input_mFQ_t * fqs_input, char * three_paired_fqnames); 27 | int input_mFQ_next_read(input_mFQ_t * fqs_input, char * readname , char * read, char * qual ); 28 | int input_mFQ_tell(input_mFQ_t * fqs_input, input_mFQ_pos_t * pos ); 29 | int input_mFQ_seek(input_mFQ_t * fqs_input, input_mFQ_pos_t * pos ); 30 | void input_mFQ_close(input_mFQ_t * fqs_input); 31 | 32 | // "cached BCL" maintains a chunk of reads; it decompresses 33 | int cacheBCL_init( cache_BCL_t * cache_input, char * data_dir, int reads_in_chunk, int all_threads ); 34 | int cacheBCL_next_read( cache_BCL_t * cache_input, char * read_name, char * seq, char * qual, srInt_64 * read_number_in_all); 35 | int cacheBCL_next_readbin(cache_BCL_t * cache_input, int * readlane, char readbin[BCL_READBIN_ITEMS_LOCAL][BCL_READBIN_SIZE], int max_readbin_buffer, srInt_64 * start_allread_no); 36 | 37 | int cacheBCL_go_chunk_start( cache_BCL_t * blc_input ); 38 | int cacheBCL_go_chunk_end( cache_BCL_t * blc_input ); 39 | void cacheBCL_close ( cache_BCL_t * blc_input ); 40 | 41 | // it returns a hashtable : sample name -> [ ( "ACGTAATT", 1 ), ( "CGTTATGG", 2 ), ... ] 42 | // The hashtable can be simply destroyed and all contents are deallocated automatically 43 | // It returns NULL if no sample sheet is found 44 | HashTable * input_BLC_parse_SampleSheet(char * fname); 45 | 46 | // It returns a list of barcodes (not the barcode conversation table) 47 | // It returns NULL if no list is found 48 | // The arraylist can be simply destroyed and all contents are deallocated automatically 49 | ArrayList * input_BLC_parse_CellBarcodes(char * fname); 50 | int hamming_dist_ATGC_max1_2p(char* s1, char* s2 ); 51 | int hamming_dist_ATGC_max1(char* s1, char* s2 ); 52 | int hamming_dist_ATGC_max2(char* s1, char* s2 ); 53 | int hamming_dist_ATGC_max3(char* s1, char* s2 ); 54 | 55 | // returns -1 if error, or 0 if no error. 56 | int cacheBCL_quality_test(int is_FASTQ_input, char * datadir, HashTable * sample_sheet_table, ArrayList * cell_barcode_list, int testing_reads, int * tested_reads, int * valid_sample_index, int * valid_cell_barcode, char * result_prefix); 57 | #endif 58 | -------------------------------------------------------------------------------- /src/interval_merge.c: -------------------------------------------------------------------------------- 1 | /*************************************************************** 2 | 3 | The Subread software package is free software package: 4 | you can redistribute it and/or modify it under the terms 5 | of the GNU General Public License as published by the 6 | Free Software Foundation, either version 3 of the License, 7 | or (at your option) any later version. 8 | 9 | Subread is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty 11 | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 | 13 | See the GNU General Public License for more details. 14 | 15 | Authors: Drs Yang Liao and Wei Shi 16 | 17 | ***************************************************************/ 18 | 19 | 20 | #include 21 | #include 22 | #include 23 | #define NORMAL_INTS 35 24 | 25 | int mergeIntervals(unsigned int * intervals, unsigned int * result_ints, int nints) 26 | { 27 | 28 | unsigned int * stack_buffer_mem; 29 | int stock_pointer = 0; 30 | stack_buffer_mem = result_ints; 31 | 32 | 33 | int ii,jj; 34 | 35 | for(ii=0; ii intervals[jj*2]) 42 | { 43 | small_jj_value = intervals[jj*2]; 44 | small_jj_id = jj; 45 | } 46 | } 47 | 48 | if(small_jj_value < intervals[ii*2]) 49 | { 50 | unsigned int tmpv; 51 | tmpv = intervals[ii*2]; 52 | intervals[ii*2] = small_jj_value; 53 | intervals[small_jj_id * 2] = tmpv; 54 | 55 | tmpv = intervals[ii*2 + 1]; 56 | intervals[ii*2 + 1] = intervals[small_jj_id * 2 + 1]; 57 | intervals[small_jj_id * 2 + 1] = tmpv; 58 | } 59 | } 60 | 61 | stack_buffer_mem[stock_pointer*2] = intervals[0]; 62 | stack_buffer_mem[stock_pointer*2+1] = intervals[1]; 63 | stock_pointer++; 64 | 65 | for (ii = 1 ; ii < nints; ii++) 66 | { 67 | //int top_start = stack_buffer_mem[stock_pointer*2-2]; 68 | int top_stop = stack_buffer_mem[stock_pointer*2-1]; 69 | 70 | if (top_stop < intervals[ii*2]) 71 | { 72 | stack_buffer_mem[stock_pointer * 2] = intervals[ii*2]; 73 | stack_buffer_mem[stock_pointer * 2+1] = intervals[ii*2+1]; 74 | stock_pointer++; 75 | } 76 | else if (top_stop < intervals[ii*2+1]) 77 | { 78 | top_stop = intervals[ii*2+1]; 79 | stack_buffer_mem[stock_pointer*2-1] = top_stop; 80 | } 81 | // Otherwise the ii-th interval is useless because it is enclosed in the top item 82 | } 83 | 84 | return stock_pointer; 85 | } 86 | 87 | #ifdef MAKE_TEST_INTERVL_MERGE 88 | void print_gaps(unsigned int * gaps, int gapn) 89 | { 90 | int ii; 91 | for(ii=0; ii 23 | #include 24 | #include "subread.h" 25 | 26 | #define LNHASH_BUCKET_SIZE_INCREMENT 1.4 27 | #define LNHASH_INIT_BUCKET_SIZE 300 28 | 29 | typedef unsigned long long lnhash_data_t; 30 | 31 | typedef struct { 32 | unsigned int num_elements; 33 | unsigned int space_elements; 34 | 35 | gehash_key_t * key_array; 36 | lnhash_data_t * value_array; 37 | } lnhash_buckct_t; 38 | 39 | 40 | 41 | typedef struct { 42 | int is_sorted; 43 | unsigned long long int num_elements; 44 | unsigned int num_buckets; 45 | unsigned short * key_repeated_numbers; 46 | unsigned short subread_repeat_max; 47 | 48 | lnhash_buckct_t * buckets; 49 | } lnhash_t; 50 | 51 | #define LNHASH_VOTE_ARRAY_WIDTH 240 52 | #define LNHASH_VOTE_ARRAY_HEIGHT 233 53 | //#define LNHASH_VOTE_ARRAY_WIDTH 48 54 | //#define LNHASH_VOTE_ARRAY_HEIGHT 31 55 | 56 | typedef struct{ 57 | lnhash_data_t head_position; 58 | short coverage_start; 59 | short coverage_end; 60 | short num_votes; 61 | } lnhash_vote_record_t; 62 | 63 | typedef struct{ 64 | int max_votes; 65 | int vote_record_items [LNHASH_VOTE_ARRAY_HEIGHT]; 66 | lnhash_vote_record_t vote_records [LNHASH_VOTE_ARRAY_HEIGHT][LNHASH_VOTE_ARRAY_WIDTH]; 67 | } lnhash_vote_t; 68 | 69 | #define lnhash_init_votes(v) memset(v->vote_record_items, 0, sizeof(int) * LNHASH_VOTE_ARRAY_HEIGHT) 70 | 71 | // The EX version creates a hashtable with the given version number 72 | int lnhash_create(lnhash_t * the_table, unsigned int num_buckets); 73 | 74 | // This function puts a data item into the table. If there is duplication, it insert another copy into the table but do not overlap on the old one. 75 | int lnhash_insert(lnhash_t * the_table, gehash_key_t key, lnhash_data_t data); 76 | 77 | // this function must be called before querying or dumpping. 78 | void lnhash_resort(lnhash_t * the_table); 79 | 80 | // this function returns the number of items being hit 81 | int lnhash_query(lnhash_t * the_table, lnhash_vote_t * vote, gehash_key_t queried_key, short dist_to_head); 82 | 83 | // Free all memory that is allocated for the table. Only the table structure itself is not freed. 84 | void lnhash_destroy(lnhash_t * the_table); 85 | 86 | // This function returns the number of votes that has at least "minimum_votes" votes; the vote records are sorted by their head_position. 87 | int sorted_voting_table(lnhash_vote_t * vote, lnhash_vote_record_t ** result_buffer, int minimum_votes); 88 | 89 | // This function adds rec -> coverage_start to rec -> head_position. 90 | int sorted_voting_table_offset(lnhash_vote_t * vote, lnhash_vote_record_t ** result_buffer, int minimum_votes); 91 | 92 | #endif 93 | -------------------------------------------------------------------------------- /src/longread-one/LRMbase-index.c: -------------------------------------------------------------------------------- 1 | /*************************************************************** 2 | 3 | The Subread software package is free software package: 4 | you can redistribute it and/or modify it under the terms 5 | of the GNU General Public License as published by the 6 | Free Software Foundation, either version 3 of the License, 7 | or (at your option) any later version. 8 | 9 | Subread is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty 11 | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 | 13 | See the GNU General Public License for more details. 14 | 15 | Authors: Drs Yang Liao and Wei Shi 16 | 17 | ***************************************************************/ 18 | 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include "LRMconfig.h" 26 | #include "LRMbase-index.h" 27 | #include "LRMfile-io.h" 28 | 29 | 30 | #define LRMgvindex_baseno2offset_m(base_number, index, offset_byte, offset_bit) {offset_byte = (base_number - index -> start_base_offset) >>2; offset_bit = base_number % 4 * 2;} 31 | 32 | 33 | void LRMgvindex_baseno2offset(unsigned int base_number, LRMgene_value_index_t * index, unsigned int * offset_byte, unsigned int * offset_bit) 34 | { 35 | // the base number corrsponding to the 0-th bit in the whole value array; 36 | 37 | unsigned int offset = (base_number - index -> start_base_offset); 38 | 39 | * offset_byte = offset >>2 ; 40 | * offset_bit = base_number % 4 * 2; 41 | } 42 | 43 | // return 'A', 'G', 'T' and 'C' 44 | int LRMgvindex_get(LRMgene_value_index_t * index, LRMgehash_data_t offset) 45 | { 46 | unsigned int offset_byte, offset_bit; 47 | LRMgvindex_baseno2offset_m(offset, index , offset_byte, offset_bit); 48 | 49 | if(offset_byte >= index-> values_bytes -1)return 'N'; 50 | 51 | unsigned int one_base_value = (index->values [offset_byte]) >> (offset_bit); 52 | 53 | 54 | //LRMprintf("RECV_BASE=%d (%d - %d)\n",one_base_value & 3, offset_byte , offset_bit); 55 | 56 | return LRMint2base(one_base_value & 3); 57 | } 58 | 59 | int LRMgvindex_match(LRMgene_value_index_t * index, LRMgehash_data_t offset, LRMgehash_key_t base_values) 60 | { 61 | unsigned int offset_byte, offset_bit; 62 | 63 | LRMgvindex_baseno2offset_m(offset, index , offset_byte, offset_bit); 64 | int i, ret = 0; 65 | 66 | for (i=0; i<16; i++) 67 | { 68 | unsigned char mask = 0x3 << (offset_bit); 69 | unsigned char one_base_value = (index->values [offset_byte] & mask) >> (8-offset_bit); 70 | if ( ((base_values >> (30 - i*2)) & 0x3) == one_base_value) 71 | ret |= 1 << i; 72 | 73 | offset_bit +=2; 74 | if(offset_bit >=8) 75 | { 76 | offset_bit = 0; 77 | offset_byte ++; 78 | } 79 | } 80 | 81 | return ret; 82 | 83 | } 84 | 85 | int LRMgvindex_load(LRMgene_value_index_t * index, const char filename []) 86 | { 87 | FILE * fp = fopen(filename, "rb"); 88 | int read_length; 89 | read_length = fread(&index->start_point,4,1, fp); 90 | if(read_length<1){ 91 | LRMprintf("ERROR: the array index is incomplete : %d", read_length ); 92 | return 1; 93 | } 94 | read_length = fread(&index->length,4,1, fp); 95 | if(read_length<1){ 96 | LRMprintf("Bad index\n"); 97 | return 1; 98 | } 99 | //LRMprintf ("\nBINDEX %s : %u ~ +%u\n",filename, index->start_point, index->length ); 100 | 101 | unsigned int useful_bytes, useful_bits; 102 | index -> start_base_offset = index -> start_point - index -> start_point%4; 103 | LRMgvindex_baseno2offset (index -> length+ index -> start_point, index ,&useful_bytes,&useful_bits); 104 | index -> values = malloc(useful_bytes+1); 105 | index -> values_bytes = useful_bytes+1; 106 | if(!index->values) 107 | { 108 | LRMprintf("Out of memory\n"); 109 | return 1; 110 | } 111 | 112 | 113 | read_length =fread(index->values, 1, useful_bytes+1, fp); 114 | if(read_length < useful_bytes){ 115 | LRMprintf("ERROR: the array index is incomplete : %d < %d.", read_length, useful_bytes+1 ); 116 | return 1; 117 | } 118 | 119 | fclose(fp); 120 | return 0; 121 | 122 | } 123 | 124 | void LRMgvindex_get_string(char *buf, LRMgene_value_index_t * index, unsigned int pos, int len, int is_negative_strand){ 125 | int i; 126 | if (is_negative_strand) 127 | for (i=len-1;i>=0;i--) 128 | { 129 | buf[i] = LRMgvindex_get (index, pos + len - 1 - i); 130 | switch(buf[i]) 131 | { 132 | case 'A': buf[i] = 'T'; break; 133 | case 'G': buf[i] = 'C'; break; 134 | case 'C': buf[i] = 'G'; break; 135 | default: buf[i] = 'A'; 136 | } 137 | } 138 | else 139 | for (i=0;i='0' && nch <='9'){ 160 | tmpi = tmpi * 10 + (nch - '0'); 161 | }else{ 162 | tmpi *= tmpi_sign; 163 | if(nch == 'M'){ 164 | int this_matched = LRMmatch_chro(read + read_chrsor, index, chro_cursor, tmpi, 0); 165 | if(show_txt){ 166 | unsigned txt_chro_cursor = chro_cursor; 167 | int txt_read_chrsor = read_chrsor; 168 | for(x1 = 0; x1 < tmpi ; x1++){ 169 | int knownval = LRMgvindex_get(index, txt_chro_cursor); 170 | int readval = read[txt_read_chrsor]; 171 | txt_chro_cursor ++; 172 | txt_read_chrsor ++; 173 | 174 | LRMprintf("%c[3%dm%c", 27, knownval == readval ? 7:1, readval); 175 | } 176 | } 177 | 178 | if(0 && abs(tmpi) > 22 && this_matched < tmpi * 0.6){ 179 | LRMprintf("Too many mismatched (%d%c) : %d / %d : read + %d\n", tmpi, nch, this_matched, tmpi, read_chrsor); 180 | } 181 | 182 | all_matched += this_matched; 183 | all_mismatched += ( tmpi - this_matched ); 184 | (*maplen) += tmpi; 185 | } 186 | 187 | if(nch == 'M' || nch == 'I' || nch == 'S'){ 188 | if(nch == 'I' && show_txt) for(x1 = 0; x1 < tmpi ; x1++) LRMprintf("%c[32m%c", 27, read[ read_chrsor + x1 ]); 189 | if(nch == 'S' && show_txt) for(x1 = 0; x1 < tmpi ; x1++) LRMprintf("%c[4m%c%c[0m", 27, read[ read_chrsor + x1 ], 27); 190 | read_chrsor += tmpi; 191 | } 192 | if(nch == 'M' || nch == 'D' || nch == 'N' || nch == 'S'){ 193 | if(( nch == 'N' || nch == 'D' ) && show_txt) LRMprintf("%c[36m//%c[37m", 27, 27); 194 | if(nch != 'S')chro_cursor += tmpi; 195 | } 196 | 197 | tmpi = 0; 198 | tmpi_sign = 1; 199 | } 200 | } 201 | if(show_txt){ 202 | char postxt[100]; 203 | LRMpos2txt(context, chro_cursor, postxt); 204 | LRMprintf("%c[37m\n", 27); 205 | LRMprintf("Ending Pos : Read + %d ( %s )\n", read_chrsor, postxt); 206 | } 207 | if(neg) LRMreverse_read(read, strlen(read)); 208 | return all_matched; 209 | } 210 | 211 | int LRMmatch_chro(char * read, LRMgene_value_index_t * index, unsigned int pos, int test_len, int is_negative_strand){ 212 | int ret = 0; 213 | int i; 214 | 215 | if ((unsigned int)(pos + test_len) >= index -> length + index -> start_point) return 0; 216 | if (pos > 0xffff0000) return 0; 217 | 218 | if (is_negative_strand) 219 | { 220 | for (i=test_len -1;i>=0;i--) 221 | { 222 | char tt = LRMgvindex_get (index, pos+test_len-1-i); 223 | switch(tt) 224 | { 225 | case 'A': ret += read[i] == 'T'; break; 226 | case 'T': ret += read[i] == 'A'; break; 227 | case 'G': ret += read[i] == 'C'; break; 228 | case 'C': ret += read[i] == 'G'; break; 229 | } 230 | } 231 | }else{ 232 | unsigned int offset_byte, offset_bit; 233 | 234 | LRMgvindex_baseno2offset_m(pos, index , offset_byte, offset_bit); 235 | 236 | if(offset_byte >= index-> values_bytes)return 0; 237 | char int_value = index->values [offset_byte]; 238 | 239 | for (i=0;i> offset_bit) & 3; 242 | char tv = read[i]; 243 | switch(tv){ 244 | case 'A': 245 | ret += tt==0; 246 | break; 247 | case 'G': 248 | ret += tt==1; 249 | break; 250 | case 'C': 251 | ret += tt==2; 252 | break; 253 | case 0: 254 | break; 255 | default: 256 | ret += tt==3; 257 | 258 | } 259 | offset_bit+=2; 260 | if(offset_bit==8) 261 | { 262 | offset_byte++; 263 | if(offset_byte == index-> values_bytes)return 0; 264 | int_value = index->values [offset_byte]; 265 | offset_bit = 0; 266 | } 267 | } 268 | } 269 | return ret; 270 | } 271 | 272 | void LRMgvindex_destory(LRMgene_value_index_t * index) 273 | { 274 | free(index -> values); 275 | } 276 | -------------------------------------------------------------------------------- /src/longread-one/LRMbase-index.h: -------------------------------------------------------------------------------- 1 | /*************************************************************** 2 | 3 | The Subread software package is free software package: 4 | you can redistribute it and/or modify it under the terms 5 | of the GNU General Public License as published by the 6 | Free Software Foundation, either version 3 of the License, 7 | or (at your option) any later version. 8 | 9 | Subread is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty 11 | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 | 13 | See the GNU General Public License for more details. 14 | 15 | Authors: Drs Yang Liao and Wei Shi 16 | 17 | ***************************************************************/ 18 | 19 | 20 | #ifndef __LRMBASE_INDEX_H_ 21 | #define __LRMBASE_INDEX_H_ 22 | 23 | #include "LRMconfig.h" 24 | 25 | int LRMgvindex_load(LRMgene_value_index_t * index, const char filename []); 26 | 27 | void LRMgvindex_destory(LRMgene_value_index_t * index); 28 | 29 | void LRMgvindex_baseno2offset(unsigned int base_number, LRMgene_value_index_t * index, unsigned int * offset_byte, unsigned int * offset_bit); 30 | 31 | int LRMgvindex_get(LRMgene_value_index_t * index, LRMgehash_data_t offset); 32 | 33 | int LRMmatch_chro(char * read, LRMgene_value_index_t * index, unsigned int pos, int test_len, int is_negative_strand); 34 | 35 | void LRMgvindex_get_string(char *buf, LRMgene_value_index_t * index, unsigned int pos, int len, int is_negative_strand); 36 | 37 | int LRMvalidate_mapping(LRMcontext_t * context, char * read, char * cigar, LRMgene_value_index_t * index, unsigned int pos, int rev, int * mapped_length, int show_txt); 38 | #endif 39 | -------------------------------------------------------------------------------- /src/longread-one/LRMchro-event.h: -------------------------------------------------------------------------------- 1 | #ifndef __LRMCHRO_EVENT_H_ 2 | #define __LRMCHRO_EVENT_H_ 3 | 4 | #include "LRMconfig.h" 5 | 6 | int LRMevents_load_annot(LRMcontext_t * context); 7 | int LRMevents_filtering(LRMcontext_t * context); 8 | int LRMevents_reorder(LRMcontext_t * context); 9 | int LRMevents_build_entries(LRMcontext_t * context); 10 | int LRMevents_lookup(LRMcontext_t * context, unsigned int abs_pos, int event_type_masks, int search_large_side, LRMevent_t ** res); 11 | 12 | int LRMchro_event_new(LRMcontext_t * context, LRMthread_context_t * thread_context, LRMread_iteration_context_t * iteration_context, LRMevent_t * new_event); 13 | 14 | // high_penalty_for_creating_gap is for creating less CIGAR options (<65535) 15 | int LRMindel_dynamic_search(LRMcontext_t* context, LRMthread_context_t* thread_context, int expected_indels, unsigned int last_correct_base_on_chro, char * corrected_read, int last_correct_base, int first_correct_base, unsigned int * total_mismatched, int high_penalty_for_creating_gap); 16 | void LRMreverse_read_and_qual(LRMcontext_t * context, LRMthread_context_t * thread_context, LRMread_iteration_context_t * iteration_context); 17 | int LRMdynamic_to_ends(LRMcontext_t* context, LRMthread_context_t* thread_context, LRMread_iteration_context_t * iteration_context, int last_mapped_in_read, unsigned int last_correct_base_on_chro, int search_to_3end); 18 | int LRMdynamic_in_middle(LRMcontext_t* context, LRMthread_context_t* thread_context, LRMread_iteration_context_t * iteration_context, int last_correct_base, int first_correct_base , unsigned int last_correct_base_on_chro, int expected_indels); 19 | #endif 20 | -------------------------------------------------------------------------------- /src/longread-one/LRMfile-io.h: -------------------------------------------------------------------------------- 1 | #ifndef __LRMFILEIO_H_ 2 | #define __LRMFILEIO_H_ 3 | 4 | #define LRMGENE_INPUT_FASTQ 1 5 | #define LRMGENE_INPUT_GZIP_FASTQ 51 6 | #define LRMMAX_LINE_LENGTH (LRMMAX_READ_LENGTH + 10) 7 | 8 | #include "LRMconfig.h" 9 | 10 | int LRMhash_strcmp(const void * s1, const void * s2); 11 | srUInt_64 LRMhash_strhash(const void * sv); 12 | 13 | int LRMgenekey2int(char key []); 14 | 15 | int LRMgeinput_open(const char * filename, LRMgene_input_t * input); 16 | 17 | void LRMsambam_write_header(LRMcontext_t * context, LRMthread_context_t * thread_context); 18 | void LRMbam_generate_tail_binary(LRMcontext_t * context, LRMthread_context_t * thread_context); 19 | 20 | int LRMwrite_chunk_check_buffer_write(LRMcontext_t * context, LRMthread_context_t * thread_context, int force_write); 21 | // return 0 if successful 22 | int LRMfetch_next_read(LRMcontext_t * context, LRMthread_context_t * thread_context, unsigned int *read_len, char * read_name, char * read_text, char * qual_text, unsigned int * read_no_in_chunk); 23 | // Return the length of this read or -1 if EOF. 24 | int LRMgeinput_next_read(LRMgene_input_t * input, char * read_name, char * read_string, char * quality_string); 25 | void LRMgeinput_close(LRMgene_input_t * input); 26 | 27 | // returns read length 28 | int LRMgeinput_readline(LRMgene_input_t * input, int buf_len, char * linebuffer) ; 29 | void LRMreverse_read(char * ReadString, int Length); 30 | void LRMreverse_quality(char * QualtyString, int Length); 31 | int LRMload_offsets(LRMcontext_t * context); 32 | int LRMlocate_gene_position(LRMcontext_t * context, unsigned int linear, char ** chro_name, int * pos); 33 | 34 | void LRMpos2txt(LRMcontext_t * context, unsigned int linear, char * txt); 35 | int LRMlocate_chro_length(LRMcontext_t * context, unsigned int linear, char ** chro_name, long long * chro_len); 36 | #endif 37 | -------------------------------------------------------------------------------- /src/longread-one/LRMhelper.h: -------------------------------------------------------------------------------- 1 | #ifndef __LRM_HELPER_H_ 2 | #define __LRM_HELPER_H_ 3 | 4 | void LRMbasic_sort(void * arr, int items, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r)); 5 | 6 | void LRMmerge_sort(void * arr, int arr_size, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r), void merge(void * arr, int start, int items, int items2)); 7 | 8 | // A [n] : read ; B [m] : chro 9 | int LRMsmith_waterman(char * A, int n, char * B, int m, char * moves, int max_moves, int * score_buff_provided, int * move_buff_provided, int * high_score); 10 | int LRMsmith_waterman_linear(char * A, int n, char * B, int m, char * moves, int max_moves, int * score_buff_provided, int * move_buff_provided, int * high_score); 11 | #endif 12 | -------------------------------------------------------------------------------- /src/longread-one/LRMseek-zlib.h: -------------------------------------------------------------------------------- 1 | #ifndef __SEEK_ZLIB_H_LRM_ 2 | #define __SEEK_ZLIB_H_LRM_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define SEEKGZ_ZLIB_WINDOW_SIZE (32*1024) 10 | 11 | 12 | typedef struct { 13 | FILE * gz_fp; 14 | char * current_chunk_txt; 15 | char * current_chunk_bin; 16 | z_stream stem; 17 | int current_chunk_txt_size; 18 | unsigned int in_pointer; 19 | unsigned int in_chunk_offset; 20 | unsigned int in_block_offset; 21 | //unsigned int txt_buffer_size; 22 | unsigned int txt_buffer_used; 23 | unsigned long long block_start_in_file_offset; 24 | unsigned int block_start_in_file_bits; 25 | 26 | unsigned long long next_block_file_offset; 27 | unsigned int next_block_file_bits; 28 | 29 | int is_the_last_chunk; 30 | int internal_error; 31 | 32 | unsigned int dict_window_pointer; 33 | unsigned int dict_window_used; 34 | char dict_window[SEEKGZ_ZLIB_WINDOW_SIZE]; 35 | 36 | unsigned int block_dict_window_size; 37 | char block_dict_window[SEEKGZ_ZLIB_WINDOW_SIZE]; 38 | } seekable_zfile_t; 39 | 40 | typedef struct{ 41 | char dict_window[SEEKGZ_ZLIB_WINDOW_SIZE]; 42 | unsigned long long block_gzfile_offset; 43 | unsigned int block_gzfile_bits; 44 | unsigned int block_dict_window_size; 45 | 46 | unsigned int in_block_text_offset; 47 | } seekable_position_t; 48 | #define SEEKZLIBmin(a,b) ( (a)<(b)?(a):(b) ) 49 | 50 | // returns 0 if OK; returns 1 if the file is not indexable; returns -1 if file doesn't exist. 51 | int LRMseekgz_open(const char * fname, seekable_zfile_t * fp); 52 | 53 | // returns length in bytes if OK (length includes the line break at the end); returns 0 if EOF 54 | int LRMseekgz_gets(seekable_zfile_t * fp, char * buf, int buf_size); 55 | 56 | void LRMseekgz_tell(seekable_zfile_t * fp, seekable_position_t * pos); 57 | 58 | void LRMseekgz_seek(seekable_zfile_t * fp, seekable_position_t * pos); 59 | 60 | int LRMseekgz_next_char(seekable_zfile_t * fp); 61 | 62 | void LRMseekgz_close(seekable_zfile_t * fp); 63 | #endif 64 | -------------------------------------------------------------------------------- /src/longread-one/LRMsorted-hashtable.h: -------------------------------------------------------------------------------- 1 | /*************************************************************** 2 | 3 | The Subread software package is free software package: 4 | you can redistribute it and/or modify it under the terms 5 | of the GNU General Public License as published by the 6 | Free Software Foundation, either version 3 of the License, 7 | or (at your option) any later version. 8 | 9 | Subread is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty 11 | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 | 13 | See the GNU General Public License for more details. 14 | 15 | Authors: Drs Yang Liao and Wei Shi 16 | 17 | ***************************************************************/ 18 | 19 | 20 | #ifndef _SORTED_HASHTABLE_H_ 21 | #define _SORTED_HASHTABLE_H_ 22 | #include 23 | #include 24 | #include "LRMconfig.h" 25 | 26 | #define LRMGEHASH_DEFAULT_SIZE 2000000000 27 | #define LRMGEHASH_BUCKET_LENGTH 2291 28 | 29 | #define LRMinit_gene_vote(a) {memset((a)->items, 0, LRMGENE_VOTE_TABLE_SIZE*sizeof( *((a)->items))); } 30 | 31 | size_t LRMgehash_go_q(LRMgehash_t * the_table, LRMgehash_key_t key, int offset, int read_len, int is_reversed, LRMgene_vote_t * vote, int indel_tolerance, int subread_number); 32 | size_t LRMgehash_go_tolerance(LRMcontext_t * context, LRMthread_context_t * thread_context, LRMread_iteration_context_t * iteration_context, LRMgehash_t * the_table, LRMgehash_key_t key, int offset, int read_len, int is_reversed, LRMgene_vote_t * vote, int indel_tolerance, int subread_number, int max_MM); 33 | 34 | void LRMgehash_destory(LRMgehash_t * the_table); 35 | void LRMfinalise_vote(LRMgene_vote_t * vote); 36 | int LRMgehash_load(LRMgehash_t * the_table, const char fname []); 37 | void LRMassign_best_vote(LRMgene_vote_t * vote, int i, int j); 38 | 39 | void LRMselect_best_vote(LRMgene_vote_t * vote); 40 | void LRMgehash_sort(LRMgehash_t * the_table); 41 | int LRMgehash_load_option(const char fname [], int option_no, int * result); 42 | void LRMprint_v(LRMcontext_t * context, LRMread_iteration_context_t * iteration_context, int min_votes); 43 | 44 | size_t LRMgehash_go_QQ(LRMcontext_t * context, LRMthread_context_t * thread_context, LRMread_iteration_context_t * iteration_context, LRMgehash_t * the_table, LRMgehash_key_t key, int offset, int read_len, int is_reversed, LRMgene_vote_t * vote, int indel_tolerance, int subread_number); 45 | #endif 46 | -------------------------------------------------------------------------------- /src/longread-one/Makefile: -------------------------------------------------------------------------------- 1 | CC_EXEC = gcc 2 | OPT_LEVEL = 3 3 | 4 | include ../makefile.version 5 | include make.version 6 | 7 | CCFLAGS = ${MACOS} -O${OPT_LEVEL} -Wall -DMAKE_FOR_EXON -D MAKE_STANDALONE -D_FILE_OFFSET_BITS=64 -DSUBREAD_VERSION=\"${SUBREAD_VERSION}\" ${WARNING_LEVEL} ${MINGW32} 8 | LDFLAGS = -lpthread -lz -lm -O${OPT_LEVEL} -DMAKE_FOR_EXON -D MAKE_STANDALONE # -DREPORT_ALL_THE_BEST 9 | CC = ${CC_EXEC} ${CCFLAGS} -fmessage-length=0 -ggdb 10 | 11 | ALL_LIBS=LRMsorted-hashtable LRMbase-index LRMchro-event LRMhelper LRMseek-zlib LRMfile-io LRMhashtable 12 | ALL_OBJECTS=$(addsuffix .o, ${ALL_LIBS}) 13 | ALL_H=$(addsuffix .h, ${ALL_LIBS}) 14 | ALL_C=$(addsuffix .c, ${ALL_LIBS}) 15 | 16 | 17 | all: $(ALL_OBJECTS) LRM 18 | 19 | clean: 20 | rm -f *.o LRM 21 | 22 | LRM: longread-mapping.c ${ALL_OBJECTS} 23 | ${CC} -o LRM longread-mapping.c ${ALL_OBJECTS} ${LDFLAGS} 24 | 25 | $(ALL_OBJECTS): $(ALL_C) $(ALL_H) 26 | $(CC) -o $@ -c $(subst .o,.c,$@) 27 | 28 | -------------------------------------------------------------------------------- /src/makefile.version: -------------------------------------------------------------------------------- 1 | SUBREAD_VERSION_BASE=2.0.6 2 | SUBREAD_VERSION_DATE=$(SUBREAD_VERSION_BASE)-$(shell date +"%d%b%Y") 3 | SUBREAD_VERSION="$(SUBREAD_VERSION_DATE)" 4 | SUBREAD_VERSION="$(SUBREAD_VERSION_BASE)" 5 | 6 | STATIC_MAKE= 7 | #STATIC_MAKE= -static 8 | -------------------------------------------------------------------------------- /src/read-repair.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "subread.h" 7 | #include "core.h" 8 | #include "input-files.h" 9 | #include "HelperFunctions.h" 10 | 11 | void print_usage_pairer(char * cmd){ 12 | SUBREADprintf("\nrepair Version %s\n\n", SUBREAD_VERSION); 13 | SUBREADputs(" Find reads that are from the same pair in the input and then place them next"); 14 | SUBREADputs("to each other in the output. A dummy read is added for each singleton read"); 15 | SUBREADputs("that does not have a pair. The output file is compatible with featureCounts"); 16 | SUBREADputs("program."); 17 | SUBREADputs(""); 18 | SUBREADputs("Usage:"); 19 | SUBREADputs(""); 20 | SUBREADputs(" ./repair [options] -i -o \n"); 21 | SUBREADputs(""); 22 | SUBREADputs("Required arguments:"); 23 | SUBREADputs(""); 24 | SUBREADputs(" -i Name of input file. BAM format by default."); 25 | SUBREADputs(""); 26 | SUBREADputs(" -o Name of output file. The output file is in BAM format."); 27 | SUBREADputs(""); 28 | SUBREADputs("Optional arguments:"); 29 | SUBREADputs(""); 30 | SUBREADputs(" -S The input file is in SAM format."); 31 | SUBREADputs(""); 32 | SUBREADputs(" -c Compress the output BAM file. This will reduce the size of BAM"); 33 | SUBREADputs(" file, but will increase the time of retrieving reads from BAM"); 34 | SUBREADputs(" file."); 35 | SUBREADputs(""); 36 | SUBREADputs(" -T Number of CPU threads. 8 by default."); 37 | SUBREADputs(""); 38 | SUBREADputs(" -d Do not add dummy reads for singleton reads."); 39 | SUBREADputs(""); 40 | SUBREADputs(" -t Do not include sequences and quality scores of reads in the"); 41 | SUBREADputs(" output file."); 42 | SUBREADputs(""); 43 | } 44 | 45 | #ifdef MAKE_STANDALONE 46 | int main(int argc, char ** argv) 47 | #else 48 | int main_read_repair(int argc, char ** argv) 49 | #endif 50 | { 51 | double t0 = miltime(); 52 | int threads = 8, is_BAM = 1; 53 | char c; 54 | char in_BAM_file[MAX_FILE_NAME_LENGTH+1]; 55 | char out_BAM_file[MAX_FILE_NAME_LENGTH+1]; 56 | char rand_prefix[40]; 57 | int no_compression = 1; 58 | int has_dummy = 1; 59 | int tiny_mode = 0; 60 | optind = 1; 61 | opterr = 1; 62 | optopt = 63; 63 | int memory = 64; 64 | in_BAM_file[0] = out_BAM_file[0] = 0; 65 | 66 | while ((c = getopt(argc, argv, "i:T:M:o:vtdcS?")) != -1) 67 | { 68 | switch(c) 69 | { 70 | case '?': 71 | case 'v': 72 | print_usage_pairer(argv[0]); 73 | STANDALONE_exit(0); 74 | break; 75 | case 'S': 76 | is_BAM = 0; 77 | break; 78 | case 't': 79 | tiny_mode = 1; 80 | break; 81 | case 'd': 82 | has_dummy = 0; 83 | break; 84 | case 'o': 85 | strcpy(out_BAM_file, optarg); 86 | break; 87 | case 'M': 88 | memory = atoi(optarg); 89 | if(memory < 1) memory = 1; 90 | break; 91 | case 'T': 92 | threads = atoi(optarg); 93 | if(threads < 1) threads = 1; 94 | if(threads > MAX_THREADS) threads = MAX_THREADS; 95 | break; 96 | case 'c': 97 | no_compression = 0; 98 | break; 99 | case 'i': 100 | strcpy(in_BAM_file, optarg); 101 | break; 102 | } 103 | } 104 | 105 | if(in_BAM_file[0]==0 || out_BAM_file[0]==0){ 106 | print_usage_pairer(argv[0]); 107 | //SUBREADprintf("\nNo input or output files are specified.\n"); 108 | STANDALONE_exit(-1); 109 | } 110 | 111 | if(!is_paired_end_BAM(in_BAM_file)){ 112 | SUBREADprintf("Error: the input file contains single-end reads. Repair can only process paired-end reads.\n"); 113 | STANDALONE_exit(-1); 114 | } 115 | 116 | char mac_rand[13]; 117 | mac_or_rand_str(mac_rand); 118 | 119 | SUBreadSprintf(rand_prefix, 40, "fsbm-p%06d-%s", getpid(), mac_rand); 120 | 121 | SAM_pairer_context_t pairer; 122 | SAM_pairer_writer_main_t writer_main; 123 | int ret = SAM_pairer_writer_create(&writer_main, threads, has_dummy,1, no_compression?Z_NO_COMPRESSION:Z_DEFAULT_COMPRESSION, out_BAM_file); 124 | if(ret){ 125 | SUBREADprintf("Unable to open the output file. Program terminated.\n"); 126 | return -1; 127 | }else{ 128 | ret = SAM_pairer_create(&pairer, threads, memory, is_BAM, tiny_mode,0,0 , 0, 1, in_BAM_file, SAM_pairer_writer_reset, SAM_pairer_multi_thread_header, SAM_pairer_multi_thread_output, rand_prefix, &writer_main, 99999999); 129 | if(ret){ 130 | SUBREADprintf("Unable to open the input file. Program terminated.\n"); 131 | return -1; 132 | }else{ 133 | SAM_pairer_run(&pairer); 134 | int has_error = pairer.is_bad_format; 135 | SAM_pairer_destroy(&pairer); 136 | SAM_pairer_writer_destroy(&writer_main); 137 | SUBREADprintf("\n%s %.2f minutes\nTotal input reads: %llu ; Unpaired reads: %llu\n\n", has_error?"Program terminated WITH ERRORS!!! Used":"All finished in", (miltime()-t0)/60, pairer.total_input_reads, pairer.total_orphan_reads); 138 | if(has_error){ 139 | SUBREADprintf("No output file was generated.\n"); 140 | unlink(out_BAM_file); 141 | return -1; 142 | } 143 | return 0; 144 | } 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /src/removeDupReads.h: -------------------------------------------------------------------------------- 1 | /*************************************************************** 2 | 3 | The Subread software package is free software package: 4 | you can redistribute it and/or modify it under the terms 5 | of the GNU General Public License as published by the 6 | Free Software Foundation, either version 3 of the License, 7 | or (at your option) any later version. 8 | 9 | Subread is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty 11 | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 | 13 | See the GNU General Public License for more details. 14 | 15 | Authors: Drs Yang Liao and Wei Shi 16 | 17 | ***************************************************************/ 18 | 19 | 20 | #ifndef __REP_READ_REMO_H__ 21 | #define __REP_READ_REMO_H__ 22 | #include "subread.h" 23 | 24 | 25 | typedef struct{ 26 | unsigned short * repeating_times; 27 | } read_voting_table_t; 28 | 29 | 30 | // Giving temp_location 'NULL' makes the function to use the current directory to store temporary files. 31 | // Giving read_count '0' makas the function use its default value: read_count = 400,000,000, namely ~50MB memory is used to store the selection table (other parts of the program may use more memory) 32 | // Note that this function generates ( 4 * all_reads ) bytes of temporary files in the current director or the directory specified in temp_location. 33 | 34 | int repeated_read_removal(char * in_SAM_file, int threshold, char * out_SAM_file, char * temp_location, int threads); 35 | 36 | #endif 37 | -------------------------------------------------------------------------------- /src/sam2fq.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | 7 | char * __converting_char_table = "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTNGNNNCNNNNNNNNNNNNAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNTNGNNNCNNNNNNNNNNNNAANNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN "; 8 | 9 | 10 | void reverse_qual(int read_len, char * InBuff) 11 | { 12 | int i; 13 | if(!InBuff) return; 14 | if(!InBuff[0]) return; 15 | for (i=0; i 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include "subread.h" 28 | #include "core.h" 29 | #include "sambam-file.h" 30 | #include "HelperFunctions.h" 31 | #include "input-files.h" 32 | 33 | typedef struct{ 34 | int is_BAM; 35 | 36 | }countbases_context_t; 37 | 38 | static struct option sumb_long_options[] = 39 | { 40 | {"BAMinput", no_argument, 0, '9'}, 41 | {"SAMinput", no_argument, 0, '8'}, 42 | {0, 0, 0, 0} 43 | }; 44 | 45 | void countBases(char * fn, countbases_context_t * context) 46 | { 47 | char fline[2999]; 48 | unsigned long long int allbases = 0; 49 | SamBam_FILE * in_fp = SamBam_fopen(fn,context->is_BAM?SAMBAM_FILE_BAM:SAMBAM_FILE_SAM); 50 | 51 | while(1) 52 | { 53 | char * tok_tmp = NULL; 54 | char * is_ret = SamBam_fgets(in_fp, fline, 2999, 0); 55 | if(!is_ret) break; 56 | if('@' == fline[0]) continue; 57 | 58 | strtok_r(fline, "\t", &tok_tmp); 59 | char * flags_str = strtok_r(NULL, "\t", &tok_tmp); 60 | strtok_r(NULL, "\t", &tok_tmp); 61 | strtok_r(NULL, "\t", &tok_tmp); 62 | strtok_r(NULL, "\t", &tok_tmp); 63 | char * cigar = strtok_r(NULL, "\t", &tok_tmp); 64 | 65 | int flags = atoi(flags_str); 66 | 67 | if(4 & flags) continue; 68 | 69 | 70 | unsigned int Staring_Points[6]; 71 | unsigned short Section_Length[6]; 72 | 73 | int i, retv = RSubread_parse_CIGAR_string(cigar, Staring_Points, Section_Length); 74 | 75 | for(i=0;i is_BAM = 1; 106 | break; 107 | case '8': 108 | context -> is_BAM = 0; 109 | break; 110 | default: 111 | return 0; 112 | } 113 | } 114 | 115 | int input_file; 116 | for(input_file = optind; input_file < argc; input_file++) 117 | { 118 | countBases(argv[input_file], context); 119 | } 120 | 121 | free(context); 122 | return ret; 123 | } 124 | 125 | -------------------------------------------------------------------------------- /src/seek-zlib.h: -------------------------------------------------------------------------------- 1 | #ifndef __SEEK_ZLIB_H_ 2 | #define __SEEK_ZLIB_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "subread.h" 9 | 10 | #define PARALLEL_GZIP_TXT_BUFFER_MARGIN (2* MAX_FC_READ_LENGTH + 500) 11 | #define PARALLEL_GZIP_TXT_BUFFER_SIZE (1024*1024) 12 | #define PARALLEL_GZIP_ZIPPED_BUFFER_SIZE (PARALLEL_GZIP_TXT_BUFFER_SIZE *9/8 ) 13 | 14 | typedef struct{ 15 | int thread_no; 16 | int in_buffer_used; 17 | int out_buffer_used; 18 | unsigned int CRC32; 19 | unsigned int zipped_CRC32; 20 | int plain_length; 21 | char in_buffer[PARALLEL_GZIP_TXT_BUFFER_SIZE]; 22 | char out_buffer[PARALLEL_GZIP_ZIPPED_BUFFER_SIZE]; 23 | z_stream zipper; 24 | } parallel_gzip_writer_thread_t; 25 | 26 | typedef struct{ 27 | int threads; 28 | srInt_64 plain_length; 29 | unsigned int CRC32; 30 | FILE * os_file; 31 | parallel_gzip_writer_thread_t * thread_objs; 32 | } parallel_gzip_writer_t; 33 | 34 | void parallel_gzip_writer_init(parallel_gzip_writer_t * pzwtr, char * output_filename, int total_threads); 35 | void parallel_gzip_writer_add_text(parallel_gzip_writer_t * pzwtr, char * text, int tlen, int thread_no); 36 | // because we have to keep sync between three fastq files, the flush function has to be manually called three times at the same time point. 37 | // otherwise R1, I2 and R2 files will have inconsistent read orders. 38 | // the outer program has to check if any of the three in_buffers is full. 39 | void parallel_gzip_zip_texts(parallel_gzip_writer_t * pzwtr, int thread_no, int for_eof_marker); 40 | void parallel_gzip_writer_flush(parallel_gzip_writer_t * pzwtr, int thread_no); 41 | void parallel_gzip_writer_close(parallel_gzip_writer_t * pzwtr); 42 | int parallel_gzip_writer_add_read_fqs_scRNA(parallel_gzip_writer_t**outfps, char * bambin, int thread_no); 43 | void parallel_gzip_writer_add_text_qual(parallel_gzip_writer_t * pzwtr, char * text, int tlen, int thread_no); 44 | 45 | // returns 0 if OK; returns 1 if the file is not indexable; returns -1 if file doesn't exist. 46 | int seekgz_open(const char * fname, seekable_zfile_t * fp, FILE * old_fp); 47 | 48 | // returns length in bytes if OK (length includes the line break at the end); returns 0 if EOF 49 | int seekgz_gets(seekable_zfile_t * fp, char * buf, int buf_size); 50 | 51 | void seekgz_tell(seekable_zfile_t * fp, seekable_position_t * pos); 52 | 53 | void seekgz_seek(seekable_zfile_t * fp, seekable_position_t * pos); 54 | 55 | // Diff: seekgz_next_char returns EOF for EOF but seekgz_next_int8 returns -1 for EOF 56 | int seekgz_next_char(seekable_zfile_t * fp); 57 | int seekgz_next_int8(seekable_zfile_t * fp); 58 | 59 | void seekgz_close(seekable_zfile_t * fp); 60 | 61 | // returns length in bytes if OK (length includes the line break at the end); returns 0 if EOF 62 | int autozip_gets(autozip_fp * fp, char * buf, int buf_size); 63 | 64 | 65 | // return -1 for EOF 66 | int autozip_getch(autozip_fp * fp); 67 | 68 | void autozip_close(autozip_fp * fp); 69 | 70 | // return -1 if error, return 0 if plain text, return 1 if gzipped 71 | int autozip_open(const char * fname, autozip_fp * fp); 72 | 73 | void autozip_rewind(autozip_fp * fp); 74 | 75 | int seekgz_preload_buffer( seekable_zfile_t * fp , subread_lock_t * read_lock); 76 | 77 | // returns length in bytes if OK (length includes the line break at the end); returns 0 if EOF 78 | int seekgz_gets(seekable_zfile_t * fp, char * buff, int buff_len); 79 | #endif 80 | -------------------------------------------------------------------------------- /src/sorted-hashtable.h: -------------------------------------------------------------------------------- 1 | /*************************************************************** 2 | 3 | The Subread software package is free software package: 4 | you can redistribute it and/or modify it under the terms 5 | of the GNU General Public License as published by the 6 | Free Software Foundation, either version 3 of the License, 7 | or (at your option) any later version. 8 | 9 | Subread is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty 11 | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 | 13 | See the GNU General Public License for more details. 14 | 15 | Authors: Drs Yang Liao and Wei Shi 16 | 17 | ***************************************************************/ 18 | 19 | 20 | #ifndef _SORTED_HASHTABLE_H_ 21 | #define _SORTED_HASHTABLE_H_ 22 | #include 23 | #include 24 | 25 | #include "subread.h" 26 | #include "gene-algorithms.h" 27 | #include "gene-value-index.h" 28 | 29 | #define GEHASH_DEFAULT_SIZE 2000000000 30 | 31 | #if __WORDSIZE == 64 || _WIN64 32 | #define GEHASH_BUCKET_LENGTH (100 - 69) 33 | #else 34 | #define GEHASH_BUCKET_LENGTH 100 35 | #endif 36 | 37 | #define gehash_fast_t gehash_t 38 | #define gehash_destory_fast gehash_destory 39 | 40 | 41 | // This function creates a new hash table. The invoter may provide the expected size of the table or -1 for a default size (2 billions) 42 | // This function returns 0 if success or an errno 43 | // The version of the hash table created by this function must be SUBINDEX_VER0. 44 | int gehash_create(gehash_t * the_table, size_t expected_size, char is_small_table); 45 | 46 | // The EX version creates a hashtable with the given version number 47 | int gehash_create_ex(gehash_t * the_table, size_t expected_size, char is_small_table, int version_number, int GENE_SLIDING_STEP, int padding); 48 | 49 | // This function puts a data item into the table. If there is duplication, it insert another copy into the table but do not overlap on the old one. 50 | int gehash_insert(gehash_t * the_table, gehash_key_t key, gehash_data_t data, unsigned int * bucket_sizes); 51 | void gehash_try_insert_measure(unsigned int * bucket_sizes, int bucket_no, gehash_key_t key); 52 | 53 | // This function does what gehash_insert does, but insert nothing if the key has occured max_key_occurance times. 54 | int gehash_insert_limited(gehash_t * the_table, gehash_key_t key, gehash_data_t data, int max_key_occurance, int prob_replace); 55 | 56 | // This function queries the table and put the matched data item into data_result. 57 | // This function returns 0 if not found, or the number of matched items. 58 | // The invoter is in charge of allocating memory for results. 59 | size_t gehash_get(gehash_t * the_table, gehash_key_t key, gehash_data_t * data_result, size_t max_result_space); 60 | 61 | // Test existance, disregarding numbers. 62 | // Return 1 if exist, 0 if not. 63 | int gehash_exist(gehash_t * the_table, gehash_key_t key); 64 | 65 | 66 | size_t gehash_go_q_CtoT(gehash_t * the_table, gehash_key_t key, int offset, int read_len, int is_reversed, gene_vote_t * vote,gene_vote_number_t weight, int max_match_number, int indel_tolerance, int subread_number,int max_error_bases, unsigned int low_border, unsigned int high_border); 67 | 68 | size_t gehash_go_q_tolerable(gehash_t * the_table, gehash_key_t key, int offset, int read_len, int is_reversed, gene_vote_t * vote, gene_vote_number_t weight, gene_quality_score_t quality, int max_match_number, int indel_tolerance, int subread_number,int max_error_bases, int subread_len, unsigned int low_border, unsigned int high_border); 69 | 70 | size_t gehash_go_q(gehash_t * the_table, gehash_key_t key, int offset, int read_len, int is_reversed, gene_vote_t * vote,int indel_tolerance, int subread_number, unsigned int low_border, unsigned int high_border); 71 | size_t gehash_go_X(gehash_t * the_table, gehash_key_t key, int offset, int read_len, int is_reversed, gene_vote_t * vote,int indel_tolerance, int subread_number, unsigned int low_border, unsigned int high_border, int run_round, unsigned int * shift_indel_locations, unsigned int * shift_indel_NO); 72 | 73 | // This function performs the same functionality, but runs only on AMD-64 cpus, and the length of each key must be 4 bytes. 74 | size_t gehash_get_hpc(gehash_t * the_table, gehash_key_t key, gehash_data_t * data_result, size_t max_result_space); 75 | 76 | // This function removes all items under the key. It returns the number of items that has been removed in this call. 77 | size_t gehash_remove(gehash_t * the_table, gehash_key_t key); 78 | 79 | // Free all memory that is allocated for the table. Only the table structure itself is not freed. 80 | void gehash_destory(gehash_t * the_table); 81 | 82 | // This function conpletely dumps a table into a disk file. 83 | // It returns 0 if success, otherwise -1. 84 | int gehash_dump(gehash_t * the_table, const char fname []); 85 | 86 | void finalise_vote(gene_vote_t * vote); 87 | 88 | // This function loads a dumpped hash table. 89 | // The invoker does not need to initialise the table; it will be initialised in the function. 90 | // It returns 0 if success, otherwise -1. 91 | int gehash_load(gehash_t * the_table, const char fname []); 92 | 93 | void gehash_prealloc(gehash_t * the_table); 94 | 95 | size_t gehash_update(gehash_t * the_table, gehash_key_t key, gehash_data_t data_new); 96 | 97 | short indel_recorder_copy(gene_vote_number_t *dst, gene_vote_number_t* src); 98 | 99 | void assign_best_vote(gene_vote_t * vote, int i, int j); 100 | 101 | void select_best_vote(gene_vote_t * vote); 102 | void gehash_sort(gehash_t * the_table); 103 | int gehash_load_option(const char fname [], int option_no, int * result); 104 | 105 | // calculate # of buckets for estimaing their sizes. 106 | unsigned int calculate_buckets_by_size(size_t exp_size, int version, int is_small_tab, int index_gap); 107 | #endif 108 | -------------------------------------------------------------------------------- /src/subfilter.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "subread.h" 6 | #include "core.h" 7 | #include "sambam-file.h" 8 | #include "input-files.h" 9 | 10 | 11 | static struct option long_options[] = 12 | { 13 | {"in", required_argument, 0, 'i'}, 14 | {"filter", required_argument, 0, 'F'}, 15 | {0, 0, 0, 0} 16 | }; 17 | 18 | int main(int argc, char ** argv) 19 | { 20 | int c; 21 | int option_index = 0; 22 | 23 | optind = 1; 24 | opterr = 1; 25 | optopt = 63; 26 | 27 | int sort_needed = 0; 28 | char filter_mode[10]; 29 | char in_name[MAX_FILE_NAME_LENGTH]; 30 | int in_SAM = 1; 31 | int out_SAM = 1; 32 | 33 | in_name[0] = filter_mode[0]=0; 34 | 35 | 36 | while ((c = getopt_long (argc, argv, "i:F:", long_options, &option_index)) != -1) 37 | { 38 | switch(c){ 39 | 40 | case 'F': 41 | out_SAM = (strcmp(optarg, "SAM")==0); 42 | break; 43 | case 'i': 44 | strcpy(in_name, optarg); 45 | break; 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/sublog.c: -------------------------------------------------------------------------------- 1 | /*************************************************************** 2 | 3 | The Subread software package is free software package: 4 | you can redistribute it and/or modify it under the terms 5 | of the GNU General Public License as published by the 6 | Free Software Foundation, either version 3 of the License, 7 | or (at your option) any later version. 8 | 9 | Subread is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty 11 | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 | 13 | See the GNU General Public License for more details. 14 | 15 | Authors: Drs Yang Liao and Wei Shi 16 | 17 | ***************************************************************/ 18 | 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include "subread.h" 27 | #include "sublog.h" 28 | 29 | #define MINIMUM_LOG_LEVEL SUBLOG_LEVEL_INFO 30 | 31 | void remove_ESC_effects(char * txt) 32 | { 33 | int x1; 34 | int ocur = 0; 35 | int state = 0; 36 | int trimmed = 0; 37 | 38 | // return 0; 39 | 40 | for(x1=0;x1<1199;x1++) 41 | { 42 | if(!txt[x1])break; 43 | if((state == 0) && (txt[x1]==CHAR_ESC)) 44 | { 45 | state = 1; 46 | trimmed = 1; 47 | continue; 48 | } 49 | if(state == 0){ 50 | if(x1>ocur) 51 | txt[ocur] = txt[x1]; 52 | ocur++; 53 | } 54 | 55 | if((state == 1) && (txt[x1]=='m')) 56 | state = 0; 57 | } 58 | if(trimmed) 59 | txt[ocur]=0; 60 | } 61 | 62 | 63 | int is_ESC_removed() 64 | { 65 | #if defined(MAKE_STANDALONE) || defined(RUNNING_ENV) 66 | return !isatty(fileno(stderr)); 67 | #else 68 | return 1; 69 | #endif 70 | 71 | } 72 | 73 | 74 | void sublog_printf(int stage, int level, const char * pattern, ...) 75 | { 76 | va_list args; 77 | va_start(args , pattern); 78 | if(level0) 129 | SUBREADprintf("%s",vsbuf); 130 | free(vsbuf); 131 | } 132 | else 133 | { 134 | #if defined(MAKE_STANDALONE) || defined(RUNNING_ENV) 135 | vfprintf(stderr, pattern , args); 136 | 137 | fflush(stderr); 138 | #endif 139 | } 140 | va_end(args); 141 | 142 | } 143 | 144 | int sambamout_fprintf(FILE * fp, const char * pattern, ...) 145 | { 146 | int ret; 147 | va_list args; 148 | va_start(args , pattern); 149 | 150 | //printf("FP=%llu\n", (long long)fp); 151 | #ifdef MAKE_STANDALONE 152 | if(fp == NULL) fp = stdout; 153 | #endif 154 | assert(fp); 155 | 156 | ret = vfprintf(fp, pattern , args); 157 | va_end(args); 158 | return ret; 159 | } 160 | -------------------------------------------------------------------------------- /src/sublog.h: -------------------------------------------------------------------------------- 1 | /*************************************************************** 2 | 3 | The Subread software package is free software package: 4 | you can redistribute it and/or modify it under the terms 5 | of the GNU General Public License as published by the 6 | Free Software Foundation, either version 3 of the License, 7 | or (at your option) any later version. 8 | 9 | Subread is distributed in the hope that it will be useful, 10 | but WITHOUT ANY WARRANTY; without even the implied warranty 11 | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 12 | 13 | See the GNU General Public License for more details. 14 | 15 | Authors: Drs Yang Liao and Wei Shi 16 | 17 | ***************************************************************/ 18 | 19 | 20 | #ifndef _SUBLOG_H_ 21 | #define _SUBLOG_H_ 22 | 23 | #define SUBLOG_STAGE_DEV1 16 24 | #define SUBLOG_STAGE_DEV_ALPHA 256 25 | #define SUBLOG_STAGE_DEV_BETA 4096 26 | #define SUBLOG_STAGE_DEV_RC 65536 27 | #define SUBLOG_STAGE_RELEASED 1048576 28 | 29 | #define SUBLOG_LEVEL_NIL 10 30 | #define SUBLOG_LEVEL_DEBUG 20 31 | #define SUBLOG_LEVEL_DETAILS 110 32 | #define SUBLOG_LEVEL_INFO 120 33 | #define SUBLOG_LEVEL_ABNORMAL 210 34 | #define SUBLOG_LEVEL_WARNING 220 35 | #define SUBLOG_LEVEL_ERROR 310 36 | #define SUBLOG_LEVEL_FATAL 900 37 | 38 | void sublog_printf(int stage, int level, const char * pattern, ...); 39 | void sublog_fwrite(int stage, int level, const char * pattern, ...); 40 | int sambamout_fprintf(FILE * fp, const char * pattern, ...); 41 | 42 | #endif 43 | -------------------------------------------------------------------------------- /src/subtools.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "subread.h" 6 | #include "core.h" 7 | #include "sambam-file.h" 8 | #include "input-files.h" 9 | 10 | 11 | static struct option long_options[] = 12 | { 13 | {"in", required_argument, 0, 'i'}, 14 | {"out", required_argument, 0, 'o'}, 15 | {"informat", required_argument, 0, 'f'}, 16 | {"outformat", required_argument, 0, 'F'}, 17 | {"sort", required_argument, 0, 'S'}, 18 | {0, 0, 0, 0} 19 | }; 20 | 21 | void subtools_usage() 22 | { 23 | SUBREADprintf("\nVersion %s\n\n", SUBREAD_VERSION); 24 | SUBREADputs("Usage:"); 25 | SUBREADputs(""); 26 | SUBREADputs(" ./subtools -i input_file -o output_file --informat --outformat {--sort }"); 27 | SUBREADputs(""); 28 | } 29 | 30 | int main(int argc, char ** argv) 31 | { 32 | int c; 33 | int option_index = 0; 34 | 35 | optind = 1; 36 | opterr = 1; 37 | optopt = 63; 38 | 39 | int sort_needed = 0; 40 | char in_name[MAX_FILE_NAME_LENGTH]; 41 | char out_name[MAX_FILE_NAME_LENGTH]; 42 | int in_SAM = 1; 43 | int out_SAM = 1; 44 | 45 | in_name[0] = out_name[0]=0; 46 | 47 | while ((c = getopt_long (argc, argv, "i:o:f:F:S:", long_options, &option_index)) != -1) 48 | { 49 | switch(c){ 50 | 51 | case 'S': 52 | sort_needed = 1; 53 | break; 54 | case 'f': 55 | in_SAM = (strcmp(optarg, "SAM")==0); 56 | break; 57 | case 'F': 58 | out_SAM = (strcmp(optarg, "SAM")==0); 59 | break; 60 | case 'i': 61 | strcpy(in_name, optarg); 62 | break; 63 | case 'o': 64 | strcpy(out_name, optarg); 65 | break; 66 | 67 | default: 68 | subtools_usage(); 69 | return 0; 70 | } 71 | } 72 | 73 | 74 | if((!out_name[0])||(!in_name[0])) 75 | { 76 | subtools_usage(); 77 | return 0; 78 | } 79 | 80 | if(in_SAM && out_SAM && !sort_needed) 81 | { 82 | SUBREADprintf("Subtools can convert between SAM and BAM files and/or pair up the alignment results by the read names.\n"); 83 | SUBREADprintf("No operation was specified. The output file was not generated.\n"); 84 | return 0; 85 | } 86 | 87 | if(strcmp(out_name, in_name)==0) 88 | { 89 | SUBREADprintf("ERROR: the input file and the output file are the same!\n"); 90 | return 0; 91 | } 92 | 93 | SamBam_FILE * in_fp = SamBam_fopen(in_name,in_SAM?SAMBAM_FILE_SAM:SAMBAM_FILE_BAM); 94 | if(!in_fp) 95 | { 96 | SUBREADprintf("Unable to open the input file: %s\n",in_name); 97 | return 1; 98 | } 99 | 100 | FILE * out_fp = NULL; 101 | SamBam_Writer out_writer; 102 | 103 | if(out_SAM) 104 | { 105 | out_fp = f_subr_open(out_name, "w"); 106 | 107 | 108 | if(!out_fp) 109 | { 110 | SUBREADprintf("Unable to open the output file: %s\n",out_name); 111 | SamBam_fclose(in_fp); 112 | return 1; 113 | } 114 | } 115 | else 116 | { 117 | 118 | int swret = SamBam_writer_create( & out_writer, out_name); 119 | if(swret) 120 | { 121 | SUBREADprintf("Unable to open the output file: %s\n",out_name); 122 | SamBam_fclose(in_fp); 123 | return 1; 124 | } 125 | } 126 | 127 | 128 | if(sort_needed) 129 | { 130 | char fline[3000], temp_file_name[MAX_FILE_NAME_LENGTH], mac_rand[13]; 131 | 132 | SAM_sort_writer writer; 133 | mac_or_rand_str(mac_rand); 134 | sprintf(temp_file_name, "./temp-subt-%06u-%s.sam", getpid(), mac_rand); 135 | int ret =sort_SAM_create(&writer, temp_file_name, "."); 136 | if(ret) 137 | { 138 | SUBREADprintf("ERROR: temporary file '%s' is not able to be created.\n", temp_file_name); 139 | } 140 | 141 | unsigned long long int added_lines = 0; 142 | 143 | double t0 = miltime(); 144 | while(1) 145 | { 146 | char * is_ret = SamBam_fgets(in_fp, fline, 2999, 0); 147 | if(!is_ret) break; 148 | int ret = sort_SAM_add_line(&writer, fline, strlen(fline)); 149 | if(ret<0) 150 | { 151 | SUBREADprintf("ERROR: read name is too long; check the input format.\n"); 152 | break; 153 | } 154 | 155 | added_lines++; 156 | //if(added_lines>40000000) break; 157 | } 158 | //printf("N1=%llu\n", writer.unpaired_reads); 159 | 160 | double t1 = miltime(); 161 | sort_SAM_finalise(&writer); 162 | 163 | double t2 = miltime(); 164 | 165 | SUBREADprintf("Loading time: %.2f, Sorting time: %.2f\n", t1-t0, t2-t1); 166 | 167 | if(writer.unpaired_reads && 0) 168 | SUBREADprintf("%llu reads were paired up.\n", writer.written_reads); 169 | 170 | SamBam_fclose(in_fp); 171 | 172 | strcpy(in_name, temp_file_name); 173 | in_SAM = 1; 174 | in_fp = SamBam_fopen(in_name,SAMBAM_FILE_SAM); 175 | 176 | } 177 | 178 | int is_header = 1; 179 | 180 | while(1){ 181 | char in_buff[3000]; 182 | 183 | if(!SamBam_fgets(in_fp, in_buff, 2999, 1)) break; 184 | if(out_SAM) 185 | fputs(in_buff, out_fp); 186 | else { 187 | int tail_pos = strlen(in_buff)-1; 188 | 189 | if(in_buff[tail_pos] == '\n') in_buff[tail_pos] =0; 190 | if(in_buff[0]=='@') 191 | SamBam_writer_add_header(&out_writer, in_buff, 1); 192 | else 193 | { 194 | char * rname = NULL, *val_str = NULL; 195 | int flags = -1; 196 | char * chro = NULL; 197 | int pos = -1; 198 | int mapq = -1; 199 | char * cigar = NULL; 200 | char * mate_chro = NULL; 201 | int mate_pos = -1; 202 | int tlen = -1; 203 | char * read_text = NULL; 204 | char * qual_text = NULL; 205 | char * extra = NULL; 206 | char * toktmp; 207 | 208 | if(is_header && !out_SAM) 209 | { 210 | if(out_writer.chromosome_name_table->numOfElements<1) 211 | SUBREADprintf("WARNING: no chromosome was found from the header.\nThe BAM output will be useless.\n"); 212 | } 213 | 214 | is_header= 0; 215 | 216 | rname = strtok_r(in_buff, "\t", &toktmp); 217 | val_str = strtok_r(NULL, "\t", &toktmp); 218 | if(val_str) flags = atoi(val_str); 219 | chro = strtok_r(NULL, "\t", &toktmp); 220 | val_str = strtok_r(NULL, "\t", &toktmp); 221 | if(val_str) pos = atoi(val_str); 222 | val_str = strtok_r(NULL, "\t", &toktmp); 223 | if(val_str) mapq = atoi(val_str); 224 | cigar = strtok_r(NULL, "\t", &toktmp); 225 | 226 | mate_chro = strtok_r(NULL, "\t", &toktmp); 227 | val_str = strtok_r(NULL, "\t", &toktmp); 228 | if(val_str) mate_pos = atoi(val_str); 229 | val_str = strtok_r(NULL, "\t", &toktmp); 230 | if(val_str) tlen = atoi(val_str); 231 | read_text = strtok_r(NULL, "\t", &toktmp); 232 | qual_text = strtok_r(NULL, "\t", &toktmp); 233 | extra = toktmp; 234 | 235 | 236 | if(4 & flags) 237 | { 238 | if(chro[0]!='*' && mate_chro[0]=='=') 239 | mate_chro = chro; 240 | 241 | chro = "*"; 242 | pos = 0; 243 | tlen = 0; 244 | } 245 | if(8 & flags) 246 | { 247 | mate_chro = "*"; 248 | mate_pos = 0; 249 | tlen = 0; 250 | } 251 | 252 | SamBam_writer_add_read(& out_writer, rname, flags, chro, pos, mapq, cigar, mate_chro, mate_pos, tlen, strlen(read_text), read_text, qual_text, extra); 253 | } 254 | } 255 | } 256 | 257 | 258 | if(out_SAM) 259 | fclose(out_fp); 260 | else 261 | SamBam_writer_close(&out_writer); 262 | 263 | SamBam_fclose(in_fp); 264 | if(sort_needed && (memcmp(in_name, "./temp-subt", 11)==0)) unlink(in_name); 265 | 266 | 267 | return 0; 268 | } 269 | -------------------------------------------------------------------------------- /src/test-fisher.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "subread.h" 5 | #include "HelperFunctions.h" 6 | 7 | 8 | 9 | double factorial_float(int a) 10 | { 11 | 12 | double ret = 0; 13 | while(a) 14 | ret += log(a--); 15 | return ret; 16 | } 17 | 18 | 19 | 20 | double fisherSub(int a, int b, int c, int d) 21 | { 22 | double ret = factorial_float(a+b) + factorial_float(c+d) + factorial_float(a+c) + factorial_float(b+d) ; 23 | ret -= factorial_float(a) + factorial_float(b) + factorial_float(c) + factorial_float(d) + factorial_float(a+b+c+d); 24 | return pow(2.71828183, ret); 25 | } 26 | 27 | 28 | 29 | 30 | /** 31 | * See HELP string or run with no arguments for usage. 32 | *

33 | * The code used to calculate a Fisher p-value comes originally from a 34 | * JavaScript program 35 | * by T. Kadosawa (kadosawa@niaes.affrc.go.jp). 36 | * Retrieved from http://www.users.zetnet.co.uk/hopwood/tools/StatTests.java on 3/Jul/2012 37 | * 38 | * @author David Hopwood 39 | * @date 2000/04/23 40 | */ 41 | 42 | double fisher_exact_test(int a, int b, int c, int d) 43 | { 44 | 45 | if (a * d > b * c) { 46 | a = a + b; b = a - b; a = a - b; 47 | c = c + d; d = c - d; c = c - d; 48 | } 49 | 50 | if (a > d) { a = a + d; d = a - d; a = a - d; } 51 | if (b > c) { b = b + c; c = b - c; b = b - c; } 52 | 53 | double p_sum = 0.0; 54 | 55 | double p = fisherSub(a, b, c, d); 56 | while (a >= 0) { 57 | p_sum += p; 58 | if (a == 0) break; 59 | --a; ++b; ++c; --d; 60 | p = fisherSub(a, b, c, d); 61 | } 62 | 63 | return p_sum; 64 | } 65 | 66 | 67 | long double fastfact(int x){ 68 | return logl(x)*x - x + 0.5 * logl(2*M_PI* x) + 1/(12.*x) - 1./(360.* x*x*x) + 1./(1260.* x*x*x*x*x) - 1./(1680.*x*x*x*x*x*x*x);// + (x>60?0:(1./(1188.*x*x*x*x*x*x*x*x*x ) )); 69 | } 70 | 71 | main(){ 72 | unsigned int a = 10 , c = 11, 73 | b = 11 , d = 5000; 74 | 75 | double fisher, fisher_old; 76 | fisher = fast_fisher_test_one_side(a,b,c,d, NULL, 0); 77 | fisher_old = fisher_exact_test(a,b,c,d); 78 | printf("Log fisher = %.7f ; Old fisher = %.7f\n", log(fisher),log(fisher_old)); 79 | 80 | long double x1 = 1E-19L + 1E-20L; 81 | long double x2 = 1L - expl(logl(0.5L) + logl(2.0L)); 82 | printf("New Vals: x1=%LG, x2=%LG\n", x1,x2); 83 | } 84 | 85 | -------------------------------------------------------------------------------- /src/test-seek-zlib.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "seek-zlib.h" 3 | 4 | 5 | int main(int argc, char ** argv){ 6 | unsigned int tested_cell_total = atoi(argv[4]); 7 | unsigned int tested_inc ; 8 | tested_inc = 337; 9 | 10 | seekable_zfile_t * fp = malloc(sizeof(seekable_zfile_t)); 11 | seekable_position_t * pos = malloc(tested_cell_total*sizeof(seekable_position_t)); 12 | seekgz_open(argv[1], fp, NULL); 13 | 14 | char buf[1002]; 15 | char should[tested_cell_total][1002]; 16 | long long int text_pos[tested_cell_total]; 17 | long long int alll = 0, marked = 0; 18 | long long int full_size = atoll(argv[2]); 19 | long long int tested_cell_no = 0; 20 | long long int step = full_size / tested_cell_total; 21 | 22 | unsigned int rand_seed = atoi(argv[3]); 23 | //step = 10; 24 | 25 | int write_cell = -1; 26 | 27 | long long int first_all = 0; 28 | while(1){ 29 | int rl = seekgz_gets(fp, buf, 1000); 30 | unsigned int inchunk = fp -> in_chunk_offset; 31 | 32 | if(0){ 33 | if(rl>92) 34 | fprintf(stderr, "LEN=%d; READ=%s", rl, buf); 35 | if(rl>52 && rl<91) 36 | fprintf(stderr, "LEN=%d; READ=%s", rl, buf); 37 | } 38 | if(rl<1) break; 39 | //fprintf(stdout, "%s", buf); 40 | alll += rl; 41 | 42 | if(write_cell >=0){ 43 | strcpy(should[write_cell], buf); 44 | write_cell = -1; 45 | } 46 | 47 | if(1&& alll - rand_seed > tested_cell_no * step ) 48 | { 49 | if(tested_cell_no == 0) 50 | first_all = alll; 51 | if(tested_cell_no < tested_cell_total){ 52 | write_cell = ((1+tested_cell_no) * tested_inc) % tested_cell_total; 53 | seekgz_tell(fp, pos+write_cell); 54 | //assert(pos[write_cell].block_gzfile_offset > 10); 55 | text_pos[write_cell] = alll; 56 | //if(alll==925826012||alll==889250153){ 57 | //if(inchunk == fp->in_block_offset + 1 || inchunk == fp->in_block_offset - 1 ) 58 | // fprintf(stderr, "MATCH: IN_BLOCK_OFFSET=%u IN_CHUNK_OFFSET=%u/%u IS_LAST=%d\n", fp->in_block_offset, inchunk, fp -> txt_buffer_used, fp->is_the_last_chunk); 59 | //} 60 | if(alll==344780683){ 61 | char * quickview = malloc(100000); 62 | quickview[0]=0; 63 | //memcpy(quickview, fp -> block_dict_window, fp -> block_dict_window_size); 64 | quickview[fp -> block_dict_window_size]=0; 65 | fprintf(stderr, "----------------------------------------------------\n%s\n=====================================================\n\n", quickview); 66 | free(quickview); 67 | } 68 | } 69 | tested_cell_no++; 70 | } 71 | } 72 | 73 | fprintf(stderr, "TOTAL=%lld\n",alll); 74 | assert(tested_cell_no >= tested_cell_total); 75 | 76 | int i, valid=0; 77 | long long int last_all = 0; 78 | for(i=0;i< tested_cell_total ;i++){ 79 | //assert(pos[i].block_gzfile_offset > 10); 80 | seekgz_seek(fp, pos+i); 81 | unsigned int inchunk = fp -> in_chunk_offset, chunk_size = fp -> txt_buffer_used; 82 | //fprintf(stderr, "JUMPTO=%u\n", pos[i].block_gzfile_offset); 83 | int rl = seekgz_gets(fp, buf, 1000); 84 | if(rl <= 0) break; 85 | if(strcmp(should[i], buf)!=0) 86 | { 87 | char * quickshow = malloc(1000000); 88 | quickshow[0]=0; 89 | //memcpy(quickshow , pos[i].dict_window, pos[i].block_dict_window_size); 90 | quickshow[ pos[i].block_dict_window_size ] = 0; 91 | 92 | fprintf(stderr, "=================================================\nMATCH:LEN=%d; TOTAL=%lld; GZFP=%lld; INBLOCK=%u; INCHUNK=%u/%u\t\tMATCH=%d; \nREAD=%s\nORGN=%s\n%s\n", rl, text_pos[i], pos[i].block_gzfile_offset, pos[i].in_block_text_offset, inchunk, chunk_size, strcmp(should[i], buf), buf, should[i], quickshow); 93 | free(quickshow); 94 | } 95 | else 96 | valid++; 97 | last_all = text_pos[i]; 98 | } 99 | 100 | fprintf(stderr, "FINISHED size=%lld first=%lld [-1]=%lld DOTS=%d/%u rand=%u\n", full_size, first_all, last_all, valid,tested_cell_total , rand_seed); 101 | } 102 | -------------------------------------------------------------------------------- /src/test_qs.c: -------------------------------------------------------------------------------- 1 | /* Double-Click To Select Code */ 2 | 3 | #include 4 | 5 | void merge_sort_run(void * arr, int start, int items, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r), void merge(void * arr, int start, int items, int items2)); 6 | void merge_sort(void * arr, int arr_size, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r), void merge(void * arr, int start, int items, int items2)); 7 | void merge_ints(void * arr, int start, int items1, int items2); 8 | int compare_ints(void* arr, int l, int r); 9 | int exchange_ints(void* arr, int l, int r); 10 | 11 | void main() 12 | { 13 | 14 | int arr[30]; 15 | int val [] = {9,1,2,9,6,7,8,9,1,2,3,9,4,1,2,3,4,6,1,9,3,1,4,5,3,2,5,4,2,4,8}; 16 | int x; 17 | 18 | for(x=0; x<30;x++)arr[x]=val[x]; 19 | 20 | merge_sort(arr, 30, compare_ints, exchange_ints, merge_ints); 21 | 22 | for(x=0; x<30;x++)printf("V[%d]=%d\n",x, arr[x]); 23 | 24 | } 25 | 26 | int exchange_ints(void* arr, int l, int r) 27 | { 28 | int *arri = arr; 29 | int tm; 30 | tm=arri[l]; 31 | arri[l]=arri[r]; 32 | arri[r]=tm; 33 | } 34 | 35 | int compare_ints(void* arr, int l, int r) 36 | { 37 | int * arri = arr; 38 | if(arri[l]==arri[r])return 0; 39 | if(arri[l]>arri[r])return 1; 40 | return -1; 41 | } 42 | 43 | 44 | 45 | void merge_ints(void * arr, int start, int items1, int items2) 46 | { 47 | int r1, r2; 48 | int * arri = arr; 49 | r1=start; 50 | r2=start+items1; 51 | int * tmp = malloc(sizeof(int)*(items1+items2)); 52 | int x; 53 | 54 | for(x=0; x= start+items1)||(r2=arri[r2])) 57 | { 58 | tmp[x]=arri[r2]; 59 | r2++; 60 | }else{ 61 | tmp[x]=arri[r1]; 62 | r1++; 63 | } 64 | } 65 | 66 | memcpy(arri+start , tmp, sizeof(int)*(items1+items2)); 67 | free(tmp); 68 | } 69 | 70 | 71 | void merge_sort_run(void * arr, int start, int items, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r), void merge(void * arr, int start, int items, int items2)) 72 | { 73 | if(items > 4) 74 | { 75 | int xx,half_point = items/2; 76 | 77 | merge_sort_run(arr, start, half_point, compare, exchange, merge); 78 | merge_sort_run(arr, start + half_point, items - half_point, compare, exchange, merge); 79 | merge(arr, start, half_point, items - half_point); 80 | printf("IN: %d-%d-%d\n", start, start + half_point, start+items); 81 | for(xx=start; xx < items+start; xx++) 82 | printf("INNER: %d\n", ((int *)arr)[xx]); 83 | } 84 | else 85 | { 86 | int i, j, xx; 87 | for(i=start; i< start + items - 1; i++) 88 | { 89 | int min_j = i; 90 | for(j=i + 1; j< start + items; j++) 91 | { 92 | if(compare(arr, min_j, j) > 0) 93 | min_j = j; 94 | } 95 | if(i!=min_j) 96 | exchange(arr, i, min_j); 97 | } 98 | printf("RD: %d-%d\n", start,start+items); 99 | for(xx=start; xx < items+start; xx++) 100 | printf("INRED: %d\n", ((int *)arr)[xx]); 101 | } 102 | } 103 | void merge_sort(void * arr, int arr_size, int compare (void * arr, int l, int r), void exchange(void * arr, int l, int r), void merge(void * arr, int start, int items, int items2)) 104 | { 105 | merge_sort_run(arr, 0, arr_size, compare, exchange, merge); 106 | } 107 | -------------------------------------------------------------------------------- /src/tx-unique.h: -------------------------------------------------------------------------------- 1 | #ifndef __TX_UNIQUE_H_ 2 | #define __TX_UNIQUE_H_ 3 | 4 | #include "subread.h" 5 | #include "hashtable.h" 6 | #include "input-files.h" 7 | #include "HelperFunctions.h" 8 | 9 | typedef struct{ 10 | char chro_name[MAX_CHROMOSOME_NAME_LEN]; 11 | unsigned int exon_start; 12 | unsigned int exon_stop; 13 | int is_negative_strand; 14 | } txunique_exon_t; 15 | 16 | typedef struct{ 17 | char transcript_id[FEATURE_NAME_LENGTH]; 18 | ArrayList * exon_list; 19 | } txunique_transcript_t; 20 | 21 | typedef struct{ 22 | char gene_name[FEATURE_NAME_LENGTH]; 23 | ArrayList * transcript_list; 24 | } txunique_gene_t; 25 | 26 | typedef struct{ 27 | char input_GTF_file_name[MAX_FILE_NAME_LENGTH]; 28 | char output_file_name[MAX_FILE_NAME_LENGTH]; 29 | char gene_name_column_name[FEATURE_NAME_LENGTH]; 30 | char transcript_id_column_name[FEATURE_NAME_LENGTH]; 31 | char used_feature_type[FEATURE_NAME_LENGTH]; 32 | 33 | HashTable * gene_table; // gene_id => array of transcripts [ (transcript_id, list_of_exons) ] 34 | HashTable * result_table; // "$gene_id\t$transcript_id\nALL|UNIQUE" => NULL + number + 1 35 | } txunique_context_t; 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /src/zlib_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "zlib.h" 6 | 7 | 8 | main() 9 | { 10 | char * tdata = "ABCABC\n"; 11 | int tdata_len = 7, ret; 12 | 13 | z_stream strm; 14 | strm.zalloc = Z_NULL; 15 | strm.zfree = Z_NULL; 16 | strm.opaque = Z_NULL; 17 | strm.avail_in = 0; 18 | strm.next_in = Z_NULL; 19 | ret = deflateInit(&strm, 0); 20 | if (ret != Z_OK)printf("Ohh!\n"); 21 | 22 | char * out_buff = malloc(999999); 23 | strm.avail_out = 99999; 24 | strm.next_out = out_buff; 25 | strm.next_in = tdata; 26 | strm.avail_in = tdata_len; 27 | ret = deflate(&strm, Z_FINISH); 28 | 29 | int have = 99999 - strm.avail_out; 30 | 31 | printf("RET=%d; LEN=%d\n",ret, have); 32 | 33 | FILE * ofp = fopen("tt.gz","wb"); 34 | fwrite(out_buff,1,have,ofp); 35 | fclose(ofp); 36 | 37 | z_stream strmx; 38 | strmx.zalloc = Z_NULL; 39 | strmx.zfree = Z_NULL; 40 | strmx.opaque = Z_NULL; 41 | strmx.avail_in = 0; 42 | strmx.next_in = Z_NULL; 43 | ret = inflateInit(&strmx); 44 | if (ret != Z_OK)printf("Ohh!\n"); 45 | char * in_buff = malloc(999999); 46 | 47 | strmx.avail_out = 99999; 48 | strmx.next_out = in_buff; 49 | strmx.next_in = out_buff; 50 | strmx.avail_in = have; 51 | ret = inflate(&strmx, Z_FINISH); 52 | have = 99999 - strmx.avail_out; 53 | 54 | 55 | printf("XRET=%d; LEN=%d; RES=%s\n",ret, have, in_buff); 56 | FILE * fbig = fopen("t.bin","rb"); 57 | int fpos = 0; 58 | while(!feof(fbig)) 59 | { 60 | int nch = fgetc(fbig); 61 | if(nch<0) break; 62 | in_buff[fpos++]=nch; 63 | } 64 | fclose(fbig); 65 | 66 | strm.zalloc = Z_NULL; 67 | strm.zfree = Z_NULL; 68 | strm.opaque = Z_NULL; 69 | strm.avail_in = 0; 70 | strm.next_in = Z_NULL; 71 | 72 | ret = deflateInit(&strm, 1); 73 | if (ret != Z_OK)printf("Ohh!\n"); 74 | 75 | strm.next_in = in_buff; 76 | strm.avail_in = fpos; 77 | strm.next_out = out_buff; 78 | strm.avail_out = 999999; 79 | 80 | ret = deflate(&strm, Z_FINISH); 81 | 82 | have = 999999 - strmx.avail_out; 83 | 84 | printf("XRET=%d; LEN=%d; RES=%s\n",ret, have, in_buff); 85 | FILE * fbigo = fopen("tt.bin.gz","wb"); 86 | fwrite(out_buff, 1, have, fbigo); 87 | fclose(fbigo); 88 | } 89 | -------------------------------------------------------------------------------- /test/exactSNP/data/test-in.BAM: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShiLab-Bioinformatics/subread/55dc154f6e5a6813998d2f81039ad0e906bd2c02/test/exactSNP/data/test-in.BAM -------------------------------------------------------------------------------- /test/exactSNP/exactSNP-test.sh: -------------------------------------------------------------------------------- 1 | # This is a minimum runnable test case. 2 | # The options are: 3 | # -g : reference sequence. 4 | # -o : output VCF. 5 | # -i : input read alignment; SAM by default, but we use a BAM here. 6 | # -b : the input read alignment file is a BAM file. 7 | # More options are available. 8 | mkdir -p result 9 | echo ../../bin/exactSNP -g ../chr901.fa -o result/test-out.VCF -i data/test-in.BAM -b 10 | ../../bin/exactSNP -g ../chr901.fa -o result/test-out.VCF -i data/test-in.BAM -b 11 | -------------------------------------------------------------------------------- /test/featureCounts/data/across_genes.gtf: -------------------------------------------------------------------------------- 1 | 1 SOURCE exon 281759 282189 . + . gene_id "GENE1"; transcript_id "GENE1.1"; 2 | 1 SOURCE exon 282484 282766 . + . gene_id "GENE1"; transcript_id "GENE1.1"; 3 | 1 SOURCE exon 282849 282891 . + . gene_id "GENE1"; transcript_id "GENE1.1"; 4 | 1 SOURCE exon 282761 283053 . + . gene_id "GENE2"; transcript_id "GENE2.1"; 5 | 1 SOURCE exon 283132 283231 . + . gene_id "GENE2"; transcript_id "GENE2.1"; 6 | 1 SOURCE exon 283314 283336 . + . gene_id "GENE2"; transcript_id "GENE2.1"; 7 | 1 SOURCE exon 283444 283533 . + . gene_id "GENE2"; transcript_id "GENE2.1"; 8 | 1 SOURCE exon 283813 284245 . + . gene_id "GENE2"; transcript_id "GENE2.1"; -------------------------------------------------------------------------------- /test/featureCounts/data/across_genes_r1.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShiLab-Bioinformatics/subread/55dc154f6e5a6813998d2f81039ad0e906bd2c02/test/featureCounts/data/across_genes_r1.bam -------------------------------------------------------------------------------- /test/featureCounts/data/across_genes_r1.bam.ora: -------------------------------------------------------------------------------- 1 | GENE1 1 281759 282189 0 2 | GENE1 1 282484 282766 0 3 | GENE1 1 282849 282891 0 4 | GENE2 1 282761 283053 0 5 | GENE2 1 283132 283231 0 6 | GENE2 1 283314 283336 0 7 | GENE2 1 283444 283533 0 8 | GENE2 1 283813 284245 0 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/across_genes_r1.sam: -------------------------------------------------------------------------------- 1 | @PG 2 | @SQ SN:1 LN:30427671 3 | Read1 163 1 282699 255 68M82N33M = 283162 753 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA * NH:i:1 HI:i:1 AS:i:200 nM:i:0 4 | Read1 83 1 283162 255 70M82N23M107N8M = 282699 -753 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA * NH:i:1 HI:i:1 AS:i:200 nM:i:0 5 | -------------------------------------------------------------------------------- /test/featureCounts/data/across_genes_r1.sam.ora: -------------------------------------------------------------------------------- 1 | GENE1 1 281759 282189 0 2 | GENE1 1 282484 282766 0 3 | GENE1 1 282849 282891 0 4 | GENE2 1 282761 283053 0 5 | GENE2 1 283132 283231 0 6 | GENE2 1 283314 283336 0 7 | GENE2 1 283444 283533 0 8 | GENE2 1 283813 284245 0 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/across_genes_r2.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShiLab-Bioinformatics/subread/55dc154f6e5a6813998d2f81039ad0e906bd2c02/test/featureCounts/data/across_genes_r2.bam -------------------------------------------------------------------------------- /test/featureCounts/data/across_genes_r2.bam.ora: -------------------------------------------------------------------------------- 1 | GENE1 1 281759 282189 0 2 | GENE1 1 282484 282766 1 3 | GENE1 1 282849 282891 0 4 | GENE2 1 282761 283053 0 5 | GENE2 1 283132 283231 0 6 | GENE2 1 283314 283336 0 7 | GENE2 1 283444 283533 0 8 | GENE2 1 283813 284245 0 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/across_genes_r2.sam: -------------------------------------------------------------------------------- 1 | @PG 2 | @SQ SN:1 LN:30427671 3 | Read3 163 1 282164 255 26M294N75M = 282694 713 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA * NH:i:1 HI:i:1 AS:i:198 nM:i:1 4 | Read3 83 1 282694 255 73M82N28M = 282164 -713 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA * NH:i:1 HI:i:1 AS:i:198 nM:i:1 -------------------------------------------------------------------------------- /test/featureCounts/data/across_genes_r2.sam.ora: -------------------------------------------------------------------------------- 1 | GENE1 1 281759 282189 0 2 | GENE1 1 282484 282766 1 3 | GENE1 1 282849 282891 0 4 | GENE2 1 282761 283053 0 5 | GENE2 1 283132 283231 0 6 | GENE2 1 283314 283336 0 7 | GENE2 1 283444 283533 0 8 | GENE2 1 283813 284245 0 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/across_intron.gtf: -------------------------------------------------------------------------------- 1 | 1 SOURCE exon 99922 100031 . + . gene_id "GENE1"; transcript_id "GENE1.1"; 2 | 1 SOURCE exon 100657 101834 . + . gene_id "GENE1"; transcript_id "GENE1.1"; 3 | 1 SOURCE exon 99894 101834 . + . gene_id "GENE1"; transcript_id "GENE1.2"; 4 | -------------------------------------------------------------------------------- /test/featureCounts/data/across_intron_r1.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShiLab-Bioinformatics/subread/55dc154f6e5a6813998d2f81039ad0e906bd2c02/test/featureCounts/data/across_intron_r1.bam -------------------------------------------------------------------------------- /test/featureCounts/data/across_intron_r1.bam.ora: -------------------------------------------------------------------------------- 1 | GENE1 1 99922 100031 0 2 | GENE1 1 100657 101834 0 3 | GENE1 1 99894 101834 0 4 | -------------------------------------------------------------------------------- /test/featureCounts/data/across_intron_r1.sam: -------------------------------------------------------------------------------- 1 | @HD VN:1.3 SO:coordinate 2 | @PG 3 | @SQ SN:1 LN:30427671 4 | Read1 163 1 100005 255 27M625N74M = 100779 875 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA * NH:i:1 HI:i:1 AS:i:200 nM:i:0 5 | Read1 83 1 100779 255 101M = 100005 -875 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA * NH:i:1 HI:i:1 AS:i:200 nM:i:0 6 | -------------------------------------------------------------------------------- /test/featureCounts/data/across_intron_r1.sam.ora: -------------------------------------------------------------------------------- 1 | GENE1 1 99922 100031 0 2 | GENE1 1 100657 101834 0 3 | GENE1 1 99894 101834 0 4 | -------------------------------------------------------------------------------- /test/featureCounts/data/across_intron_r2.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShiLab-Bioinformatics/subread/55dc154f6e5a6813998d2f81039ad0e906bd2c02/test/featureCounts/data/across_intron_r2.bam -------------------------------------------------------------------------------- /test/featureCounts/data/across_intron_r2.bam.ora: -------------------------------------------------------------------------------- 1 | GENE1 1 99922 100031 0 2 | GENE1 1 100657 101834 0 3 | GENE1 1 99894 101834 0 4 | -------------------------------------------------------------------------------- /test/featureCounts/data/across_intron_r2.sam: -------------------------------------------------------------------------------- 1 | @HD VN:1.3 SO:coordinate 2 | @PG 3 | @SQ SN:1 LN:30427671 4 | Read2 163 1 99957 255 75M625N26M = 99964 733 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA * NH:i:1 HI:i:1 AS:i:200 nM:i:0 5 | Read2 83 1 99964 255 68M625N33M = 99957 -733 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA * NH:i:1 HI:i:1 AS:i:200 nM:i:0 -------------------------------------------------------------------------------- /test/featureCounts/data/across_intron_r2.sam.ora: -------------------------------------------------------------------------------- 1 | GENE1 1 99922 100031 0 2 | GENE1 1 100657 101834 0 3 | GENE1 1 99894 101834 0 4 | -------------------------------------------------------------------------------- /test/featureCounts/data/compare.sh: -------------------------------------------------------------------------------- 1 | SAM_FILE=$1 2 | ORA_FILE=$2 3 | ANNO_FILE=$3 4 | PARAMETERS=$4 5 | TEST_NAME=$5 6 | IS_FEATURE_LEVEL=$6 7 | 8 | 9 | TMPF=data/DEL4-`date '+%s'` 10 | printf "Testing %-60s [" "$TEST_NAME ... " 11 | #echo nohup ../../bin/featureCounts $PARAMETERS -o $TMPF.FC -a $ANNO_FILE $SAM_FILE 12 | nohup ../../bin/featureCounts $PARAMETERS -o $TMPF.FC -a $ANNO_FILE $SAM_FILE >/dev/null 2>&1 13 | 14 | 15 | if [ "$IS_FEATURE_LEVEL" == "" ] 16 | then 17 | cat $ORA_FILE $TMPF.FC |grep -v ^# |grep -iv Geneid | awk 'BEGIN{is_faild = 0; nl2=0; nl3=0} NF==2{ora[$1]=$2; is_faild++; nl2++} NF>3{if(ora[$1]==$7){is_faild -- } nl3++} END{if(is_faild || nl2!=nl3)printf("%c[31mFAILED%c[0m", 27,27);else printf("%c[32mPASS%c[0m", 27,27)}' 18 | elif [ "$IS_FEATURE_LEVEL" == "FL" ] 19 | then 20 | cat $ORA_FILE $TMPF.FC |grep -v ^# |grep -iv Geneid | awk 'BEGIN{is_faild = 0; nl2=0; nl3=0} NF==5{ora[$1 $2 $3]=$5; is_faild++; nl2++} NF>6{if(ora[$1 $2 $3]==$7){is_faild -- } nl3++} END{if(is_faild||nl2!=nl3)printf("%c[31mFAILED%c[0m", 27,27);else printf("%c[32mPASS%c[0m", 27,27)}' 21 | else 22 | cat $ORA_FILE $TMPF.FC |grep -v ^# |grep -iv Geneid | awk 'BEGIN{is_faild = 0; nl2=0; nl3=0} NF==2{ora[$1]=$2; is_faild++; nl2++} NF>3{if(ora[$1]==$7){is_faild -- } nl3++} END{if(is_faild || nl2!=nl3)printf("%c[31mFAILED%c[0m", 27,27);else printf("%c[32mPASS%c[0m", 27,27)}' 23 | lines_res=`cat $ORA_FILE.jcounts $TMPF.FC.jcounts |grep -v ^# |grep -v PrimaryGene |cut -f3-|sort |uniq -c|awk '$1!=2' |wc -l` 24 | if [[ $lines_res -gt 0 ]] 25 | then 26 | echo |awk '{ printf(",%c[31mFAILED%c[0m", 27,27) }' 27 | else 28 | echo |awk '{ printf(",%c[32mPASS%c[0m", 27,27) }' 29 | fi 30 | fi 31 | 32 | echo "]" 33 | 34 | rm -f $TMPF.FC* 35 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-BINS.SAF: -------------------------------------------------------------------------------- 1 | GeneID Chr Start End Strand 2 | bin1 chrX 10000 10049 + 3 | bin2 chrX 10050 10099 + 4 | bin3 chrX 10100 10149 + 5 | bin4 chrX 10150 10199 + 6 | bin5 chrX 10200 10249 + 7 | bin6 chrX 10250 10299 + 8 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-BINS.sam: -------------------------------------------------------------------------------- 1 | @HD VN:1.0 SO:unsorted 2 | @SQ SN:chrX LN:999950 3 | R1.1 0 chrX 10000 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 4 | R1.1 0 chrX 10040 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 5 | R1.1 0 chrX 10080 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 6 | R1.1 0 chrX 10120 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 7 | R1.1 0 chrX 10160 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 8 | R1.1 0 chrX 10200 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 9 | R1.1 0 chrX 10240 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 10 | R1.1 0 chrX 10280 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 11 | R1.1 0 chrX 10320 40 7M * * * AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA 12 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-BothEnds.ora: -------------------------------------------------------------------------------- 1 | geNeiD 0 2 | simu_gene1 0 3 | simu_gene2 0 4 | simu_gene3 0 5 | simu_gene4 0 6 | simu_gene5 16 7 | simu_gene6 27 8 | simu_gene7 168 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-Chimeric.ora: -------------------------------------------------------------------------------- 1 | geNeiD 0 2 | simu_gene1 0 3 | simu_gene2 0 4 | simu_gene3 0 5 | simu_gene4 0 6 | simu_gene5 22 7 | simu_gene6 29 8 | simu_gene7 180 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-DoNotSort.ora: -------------------------------------------------------------------------------- 1 | geNeiD 0 2 | simu_gene1 0 3 | simu_gene2 0 4 | simu_gene3 0 5 | simu_gene4 0 6 | simu_gene5 23 7 | simu_gene6 31 8 | simu_gene7 182 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-EXON-ONLY.ora: -------------------------------------------------------------------------------- 1 | # Program 2 | Geneid Chr Start End corner-JUNC.sam 3 | simu_gene1 0 4 | simu_gene2 1 5 | simu_gene3 0 6 | simu_gene4 1 7 | simu_gene5 0 8 | simu_gene6 0 9 | simu_gene7 0 10 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-Extend3.ora: -------------------------------------------------------------------------------- 1 | geNeiD 0 2 | simu_gene1 0 3 | simu_gene2 0 4 | simu_gene3 0 5 | simu_gene4 0 6 | simu_gene5 23 7 | simu_gene6 34 8 | simu_gene7 182 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-Extend5.ora: -------------------------------------------------------------------------------- 1 | geNeiD 0 2 | simu_gene1 0 3 | simu_gene2 0 4 | simu_gene3 0 5 | simu_gene4 0 6 | simu_gene5 23 7 | simu_gene6 34 8 | simu_gene7 182 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-Fraction.ora: -------------------------------------------------------------------------------- 1 | geNeiD 0 2 | simu_gene1 0 3 | simu_gene2 0 4 | simu_gene3 0 5 | simu_gene4 0 6 | simu_gene5 23 7 | simu_gene6 31 8 | simu_gene7 185.50 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-INDEL.ora: -------------------------------------------------------------------------------- 1 | ## featureCounts output gene_level; version:1.4.0b5 2 | ## CMD:"../../../bin/featureCounts" "-a" "test-minimum.GTF" "-i" "corner-INDEL.sam" "-o" "corner-INDEL.ora" "-p" 3 | geneid nreads 4 | simu_gene1 0 5 | simu_gene2 1 6 | simu_gene3 1 7 | simu_gene4 0 8 | simu_gene5 0 9 | simu_gene6 0 10 | simu_gene7 0 11 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-INDEL.sam: -------------------------------------------------------------------------------- 1 | @HD VN:1.0 SO:unsorted 2 | @SQ SN:chr3 LN:999950 3 | @SQ SN:chr4 LN:999950 4 | @SQ SN:chr5 LN:999950 5 | @PG ID:subread PN:subread VN:1.4.0b4 6 | chr3_271603_272081_0:0:0_0:0:0_2 99 chr3 471603 199 10M20D30M10I50M = 471982 479 CAGGGAAAAGCAGGTGGAAAAACAGAAATCGAACATAAAGATGGTAGACTCCAACCCAAACACTCTAACACCTACATTAAATACAAATGGTTAAATTAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:0 7 | chr3_271603_272081_0:0:0_0:0:0_2 147 chr3 471982 199 40M10000D60M = 471603 -479 GAAGAACTGAAAACCAGGACTCCAAGAGACATTTGTACAACCATATCTTAGCAGCATTGTTCACCACAACCACCATGTCTTAGTGAAAGGTGACAACATG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:0 8 | chr3_827607_828091_0:0:0_0:0:0_3 83 chr3 827992 199 100M = 827607 -485 CCCACAGAGCTGGGATCATAGGCGTGAGCCACCACACCCAGATGAAATATTTTTAAGTAAATTACAGGTATCATGACATCTCACCCCTGAGTACTTCAGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:1 9 | chr3_827607_828091_0:0:0_0:0:0_3 163 chr3 827607 199 10M20I70M = 827992 485 TGGGTCTGGAGGCTGGGTGGGGTTGGGGGACTCAGCGTCACGGTGACATCAGCCCTGCGGCCAGCAGCTCGGCTGACCCCGGGTCTGGAGGCCAGGATGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:1 10 | chr3_720214_720690_0:0:0_0:0:0_4 99 chr3 720214 199 30M10D30M10I30M = 720591 477 CTGGCCACAGGCACTGGAGCCACGAAAGCAACAGCCCTGGGCAGCCCAGCACCATCCTGGGTTCCCTGCTGCCGGCGCCAGCCCCACGTACCCCCGACCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:2 11 | chr3_720214_720690_0:0:0_0:0:0_4 147 chr3 720591 199 100M = 720214 -477 CAGCCATGCCCCTGCCACACACACAGAAGACTCCCCACATCAGAGGGGAGGTCAGAGGTCTCAAAGGTCAGGTTAGAGCTGGGTCAATCCGTTTCCATGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:2 12 | chr3_447928_448462_0:0:0_0:0:0_5 99 chr3 447928 199 40M10D60M = 448363 535 GACAGTGAGGCCACCTGGATATCTAGGGTCCCACAGTAGACAGGGATGGGGTGGTCCTGGGGGACAGGGACACCTGCCTTCCACACAACCGCACTGGGGC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:3 13 | chr3_447928_448462_0:0:0_0:0:0_5 147 chr3 448363 199 100M = 447928 -535 CACCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACCGTGTTAGGCAGGATGGTCTCAAACTCCTGACCTTGTGATCCGCCCGCCTCGGCCTCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:9 SD:i:0 SN:i:3 14 | chr3_260025_260490_0:0:0_0:0:0_6 83 chr3 260391 199 1M98I1M = 260025 -466 CATTTCTTTCCTTATGTATAAACAGTTGCTAAAAAGACTTTTCTTTCCATGTGGAATTACGTTGACATCTTCATTGAAAATCAATGGACTATAGAGGTGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:4 15 | chr3_260025_260490_0:0:0_0:0:0_6 163 chr3 260025 199 100I = 260391 466 TATTTTTAGTAGAGACGGGGTTGCACCATGTTGGCCAGGATGGTCTCGATCTCCTGACTTTGTGATCCGCCTGCCTTGGCCTCCCAAAGTGCTAGGATTA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:4 16 | chr3_665069_665608_0:0:0_0:0:0_7 99 chr3 665069 199 1M99I = 665509 540 AGTTTTGCCTTGTAGCCCAGGCTGGAGTGCAGTGGCGCAATCTCTGGTCACTGCAACCTCCGCCTGCCGGGTTCAAGCGATTCTCCTTCCTCAGCCTTCC IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:5 17 | chr3_665069_665608_0:0:0_0:0:0_7 147 chr3 665509 199 99I1M = 665069 -540 GGCTAAGTTTTTGTATTTTAGTAGAGACGGGGTTTCACCATGTTACCAAGGCTGGTTGCAAACTCCTGAGCTCAGGCGATCCACCCGCCTCAGCCTCCCA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:5 18 | chr3_107489_107962_0:0:0_0:0:0_8 83 chr3 107863 199 30M100D70M = 107489 -474 ACGGCTTCCTGCCCCCCGCGCAGGCGGAGATGTTCGCCTGGCAGCAGGAGCTCCTGCGGAAGCAGAACCTGGCCCGGTAGGTGCGGGGAGGCGGGCGGGG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:6 19 | chr3_107489_107962_0:0:0_0:0:0_8 163 chr3 107489 199 100M = 107863 474 GGGGCCGGCAATTAGCGGAGGCGGCGGGGGAGGGGCGCCGGGGCCTTTACGGGAACGGGGGCGGGGGGGACGCCGCTCATTGCGCTGCCGTCCACAGGGA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII SB:i:10 SC:i:10 SD:i:0 SN:i:6 20 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-IgnoreDup.ora: -------------------------------------------------------------------------------- 1 | geNeiD 0 2 | simu_gene1 7 3 | simu_gene2 2 4 | simu_gene3 3 5 | simu_gene4 1 6 | simu_gene5 25 7 | simu_gene6 12 8 | simu_gene7 98 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-JUNC-ONLY.ora: -------------------------------------------------------------------------------- 1 | # Program:featureCounts v1.4.5; Command:"../../../bin/featureCounts" "-o" "corner-JUNC-ONLY.ora" "-a" "test-minimum.GTF" "--countSplitAlignmentsOnly" "-O" "-f" "corner-JUNC.sam" 2 | Geneid Chr Start End corner-JUNC.sam 3 | simu_gene1 chr3 100 10000 0 4 | simu_gene1 chr3 20000 30000 0 5 | simu_gene1 chr3 40000 89000 0 6 | simu_gene2 chr3 100010 101000 0 7 | simu_gene2 chr3 102000 103000 0 8 | simu_gene2 chr3 104000 129000 0 9 | simu_gene2 chr3 102000 131000 0 10 | simu_gene3 chr3 500010 501000 0 11 | simu_gene3 chr3 502000 503000 0 12 | simu_gene3 chr3 504000 529000 0 13 | simu_gene3 chr3 600000 669000 2 14 | simu_gene4 chr3 602000 631000 1 15 | simu_gene4 chr3 672000 699000 0 16 | simu_gene4 chr3 702000 719000 0 17 | simu_gene5 chr4 20000 100000 0 18 | simu_gene5 chr4 120000 190000 0 19 | simu_gene5 chr4 200000 210000 0 20 | simu_gene5 chr4 220000 300000 0 21 | simu_gene6 chr4 420000 490000 0 22 | simu_gene6 chr4 500000 560000 0 23 | simu_gene7 chr5 120000 490000 0 24 | simu_gene7 chr5 500000 960000 0 25 | simu_gene7 chr5 970000 1000000 0 26 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-JUNC.ora: -------------------------------------------------------------------------------- 1 | ## featureCounts output gene_level; version:1.4.0b5 2 | ## CMD:"../../../bin/featureCounts" "-a" "test-minimum.GTF" "-i" "corner-JUNC.sam" "-o" "corner-JUNC.ora" "-p" "-R" 3 | geneid nreads 4 | simu_gene1 0 5 | simu_gene2 1 6 | simu_gene3 1 7 | simu_gene4 1 8 | simu_gene5 0 9 | simu_gene6 0 10 | simu_gene7 0 11 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-Jcounts-FA.ora: -------------------------------------------------------------------------------- 1 | geNeiD 0 2 | simu_gene1 0 3 | simu_gene2 0 4 | simu_gene3 0 5 | simu_gene4 0 6 | simu_gene5 23 7 | simu_gene6 31 8 | simu_gene7 182 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-Jcounts.ora: -------------------------------------------------------------------------------- 1 | geNeiD 0 2 | simu_gene1 0 3 | simu_gene2 0 4 | simu_gene3 0 5 | simu_gene4 0 6 | simu_gene5 23 7 | simu_gene6 31 8 | simu_gene7 182 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-LargestOverlap.ora: -------------------------------------------------------------------------------- 1 | geNeiD 0 2 | simu_gene1 0 3 | simu_gene2 0 4 | simu_gene3 0 5 | simu_gene4 0 6 | simu_gene5 24 7 | simu_gene6 31 8 | simu_gene7 182 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-MaxOPs.ora: -------------------------------------------------------------------------------- 1 | geNeiD 0 2 | simu_gene1 0 3 | simu_gene2 0 4 | simu_gene3 0 5 | simu_gene4 0 6 | simu_gene5 23 7 | simu_gene6 31 8 | simu_gene7 182 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-MinMAPQ.ora: -------------------------------------------------------------------------------- 1 | geNeiD 0 2 | simu_gene1 0 3 | simu_gene2 0 4 | simu_gene3 0 5 | simu_gene4 0 6 | simu_gene5 20 7 | simu_gene6 28 8 | simu_gene7 157 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-MinOverlap.ora: -------------------------------------------------------------------------------- 1 | Geneid data/test-junc.sam 2 | simu_gene1 0 3 | simu_gene2 0 4 | simu_gene3 0 5 | simu_gene4 0 6 | simu_gene5 13 7 | simu_gene6 21 8 | simu_gene7 145 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-MultiMapping.ora: -------------------------------------------------------------------------------- 1 | geNeiD 0 2 | simu_gene1 0 3 | simu_gene2 0 4 | simu_gene3 0 5 | simu_gene4 0 6 | simu_gene5 23 7 | simu_gene6 31 8 | simu_gene7 189 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-NH-PM.ora: -------------------------------------------------------------------------------- 1 | # Program:featureCounts v1.4.4; Command:"../../../bin/featureCounts" "-M" "-p" "-a" "test-minimum.GTF" "-o" "del4.FC" "--primary" "corner-NH.sam" 2 | geneid nreads 3 | simu_gene1 0 4 | simu_gene2 0 5 | simu_gene3 2 6 | simu_gene4 2 7 | simu_gene5 0 8 | simu_gene6 0 9 | simu_gene7 0 10 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-NH.ora: -------------------------------------------------------------------------------- 1 | ## featureCounts output gene_level; version:1.4.0b5 2 | ## CMD:"../../../bin/featureCounts" "-a" "test-minimum.GTF" "-i" "corner-NH.sam" "-o" "corner-NH.ora" "-p" 3 | geneid nreads 4 | simu_gene1 0 5 | simu_gene2 0 6 | simu_gene3 0 7 | simu_gene4 0 8 | simu_gene5 0 9 | simu_gene6 0 10 | simu_gene7 0 11 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-ONEEND-BOTH.ora: -------------------------------------------------------------------------------- 1 | ## featureCounts output gene_level; version:1.4.0b5 2 | ## CMD:"../../../bin/featureCounts" "-a" "test-minimum.GTF" "-i" "corner-ONEEND.sam" "-o" "corner-ONEEND-BOTH.ora" "-p" "-B" 3 | geneid nreads 4 | simu_gene1 0 5 | simu_gene2 0 6 | simu_gene3 0 7 | simu_gene4 0 8 | simu_gene5 0 9 | simu_gene6 0 10 | simu_gene7 0 11 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-ONEEND.ora: -------------------------------------------------------------------------------- 1 | ## featureCounts output gene_level; version:1.4.0b5 2 | ## CMD:"../../../bin/featureCounts" "-a" "test-minimum.GTF" "-i" "corner-ONEEND.sam" "-o" "corner-ONEEND.ora" "-R" "-p" 3 | geneid nreads 4 | simu_gene1 6 5 | simu_gene2 0 6 | simu_gene3 1 7 | simu_gene4 0 8 | simu_gene5 0 9 | simu_gene6 0 10 | simu_gene7 0 11 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-PEdist.ora: -------------------------------------------------------------------------------- 1 | geNeiD 0 2 | simu_gene1 0 3 | simu_gene2 0 4 | simu_gene3 0 5 | simu_gene4 0 6 | simu_gene5 13 7 | simu_gene6 20 8 | simu_gene7 126 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-Read2Pos3.ora: -------------------------------------------------------------------------------- 1 | geNeiD 0 2 | simu_gene1 0 3 | simu_gene2 0 4 | simu_gene3 0 5 | simu_gene4 0 6 | simu_gene5 24 7 | simu_gene6 30 8 | simu_gene7 182 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-Read2Pos5.ora: -------------------------------------------------------------------------------- 1 | geNeiD 0 2 | simu_gene1 0 3 | simu_gene2 0 4 | simu_gene3 0 5 | simu_gene4 0 6 | simu_gene5 23 7 | simu_gene6 32 8 | simu_gene7 182 9 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-fractions.SAF: -------------------------------------------------------------------------------- 1 | @SQ SN:chr3 LN:999950 2 | g1 chr3 1000 2000 + 3 | g1 chr3 5000 6000 + 4 | g2 chr3 1000 2000 + 5 | g2 chr3 3000 4000 + 6 | g3 chr3 1000 2000 + 7 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-fractions.ora: -------------------------------------------------------------------------------- 1 | g1 0.78 2 | g2 1.28 3 | g3 0.44 4 | -------------------------------------------------------------------------------- /test/featureCounts/data/corner-fractions.sam: -------------------------------------------------------------------------------- 1 | @SQ SN:chr3 LN:999950 2 | R1 0 chr3 1000 40 100M * 0 0 N I NH:i:3 HI:i:1 3 | R1 0 chr3 3000 40 100M * 0 0 N I NH:i:3 HI:i:2 4 | R1 0 chr3 5000 40 100M * 0 0 N I NH:i:3 HI:i:3 5 | R2 0 chr3 1000 40 100M * 0 0 N I NH:i:1 HI:i:1 6 | R3 0 chr3 3000 40 100M * 0 0 N I NH:i:2 HI:i:1 7 | R3 0 chr3 9000 40 100M * 0 0 N I NH:i:2 HI:i:2 8 | -------------------------------------------------------------------------------- /test/featureCounts/data/intron_between.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShiLab-Bioinformatics/subread/55dc154f6e5a6813998d2f81039ad0e906bd2c02/test/featureCounts/data/intron_between.bam -------------------------------------------------------------------------------- /test/featureCounts/data/intron_between.bam.ora: -------------------------------------------------------------------------------- 1 | GENE1 1 281759 282189 0 2 | GENE1 1 282484 282891 0 3 | -------------------------------------------------------------------------------- /test/featureCounts/data/intron_between.gtf: -------------------------------------------------------------------------------- 1 | 1 SOURCE exon 281759 282189 . + . gene_id "GENE1"; transcript_id "GENE1.1"; 2 | 1 SOURCE exon 282484 282891 . + . gene_id "GENE1"; transcript_id "GENE1.1"; -------------------------------------------------------------------------------- /test/featureCounts/data/intron_between.sam: -------------------------------------------------------------------------------- 1 | @PG 2 | @SQ SN:1 LN:30427671 3 | Read2 163 1 281941 255 25M85N76M = 282563 723 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA * NH:i:1 HI:i:1 AS:i:200 nM:i:0 4 | Read2 83 1 282563 255 101M = 281941 -723 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA * NH:i:1 HI:i:1 AS:i:200 nM:i:0 -------------------------------------------------------------------------------- /test/featureCounts/data/intron_between.sam.ora: -------------------------------------------------------------------------------- 1 | GENE1 1 281759 282189 0 2 | GENE1 1 282484 282891 0 3 | -------------------------------------------------------------------------------- /test/featureCounts/data/intron_between_nointron.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShiLab-Bioinformatics/subread/55dc154f6e5a6813998d2f81039ad0e906bd2c02/test/featureCounts/data/intron_between_nointron.bam -------------------------------------------------------------------------------- /test/featureCounts/data/intron_between_nointron.bam.ora: -------------------------------------------------------------------------------- 1 | GENE1 1 281759 282189 0 2 | GENE1 1 282484 282891 0 3 | -------------------------------------------------------------------------------- /test/featureCounts/data/intron_between_nointron.sam: -------------------------------------------------------------------------------- 1 | @PG 2 | @SQ SN:1 LN:30427671 3 | Read2 163 1 281941 255 101M = 282563 723 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA * NH:i:1 HI:i:1 AS:i:200 nM:i:0 4 | Read2 83 1 282563 255 101M = 281941 -723 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA * NH:i:1 HI:i:1 AS:i:200 nM:i:0 -------------------------------------------------------------------------------- /test/featureCounts/data/intron_between_nointron.sam.ora: -------------------------------------------------------------------------------- 1 | GENE1 1 281759 282189 0 2 | GENE1 1 282484 282891 0 3 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-chralias.GTF: -------------------------------------------------------------------------------- 1 | chr3 SAF exon 100 10000 . + 0 gene_id "simu_gene1"; transcript_id "TRN_simu_gene1"; exon_id "EXON_simu_gene1.1"; 2 | chr3 SAF exon 20000 30000 . + 0 gene_id "simu_gene1"; transcript_id "TRN_simu_gene1"; exon_id "EXON_simu_gene1.2"; 3 | chr3 SAF exon 40000 89000 . + 0 gene_id "simu_gene1"; transcript_id "TRN_simu_gene1"; exon_id "EXON_simu_gene1.3"; 4 | chr3 SAF exon 100010 101000 . + 0 gene_id "simu_gene2"; transcript_id "TRN_simu_gene2"; exon_id "EXON_simu_gene2.1"; 5 | chr3 SAF exon 102000 103000 . + 0 gene_id "simu_gene2"; transcript_id "TRN_simu_gene2"; exon_id "EXON_simu_gene2.2"; 6 | chr3 SAF exon 104000 129000 . + 0 gene_id "simu_gene2"; transcript_id "TRN_simu_gene2"; exon_id "EXON_simu_gene2.3"; 7 | chr3 SAF exon 102000 131000 . + 0 gene_id "simu_gene2"; transcript_id "TRN_simu_gene2"; exon_id "EXON_simu_gene2.4"; 8 | chr3 SAF exon 500010 501000 . - 0 gene_id "simu_gene3"; transcript_id "TRN_simu_gene3"; exon_id "EXON_simu_gene3.1"; 9 | chr3 SAF exon 502000 503000 . - 0 gene_id "simu_gene3"; transcript_id "TRN_simu_gene3"; exon_id "EXON_simu_gene3.2"; 10 | chr3 SAF exon 504000 529000 . - 0 gene_id "simu_gene3"; transcript_id "TRN_simu_gene3"; exon_id "EXON_simu_gene3.3"; 11 | chr3 SAF exon 600000 669000 . - 0 gene_id "simu_gene3"; transcript_id "TRN_simu_gene3"; exon_id "EXON_simu_gene3.4"; 12 | chr3 SAF exon 602000 631000 . + 0 gene_id "simu_gene4"; transcript_id "TRN_simu_gene4"; exon_id "EXON_simu_gene4.1"; 13 | chr3 SAF exon 672000 699000 . + 0 gene_id "simu_gene4"; transcript_id "TRN_simu_gene4"; exon_id "EXON_simu_gene4.2"; 14 | chr3 SAF exon 702000 719000 . + 0 gene_id "simu_gene4"; transcript_id "TRN_simu_gene4"; exon_id "EXON_simu_gene4.3"; 15 | chr4 SAF exon 20000 100000 . - 0 gene_id "simu_gene5"; transcript_id "TRN_simu_gene5"; exon_id "EXON_simu_gene5.1"; 16 | chr4 SAF exon 120000 190000 . - 0 gene_id "simu_gene5"; transcript_id "TRN_simu_gene5"; exon_id "EXON_simu_gene5.2"; 17 | chr4 SAF exon 200000 210000 . - 0 gene_id "simu_gene5"; transcript_id "TRN_simu_gene5"; exon_id "EXON_simu_gene5.3"; 18 | chr4 SAF exon 220000 300000 . - 0 gene_id "simu_gene5"; transcript_id "TRN_simu_gene5"; exon_id "EXON_simu_gene5.4"; 19 | chr4 SAF exon 420000 490000 . - 0 gene_id "simu_gene6"; transcript_id "TRN_simu_gene6"; exon_id "EXON_simu_gene6.1"; 20 | chr4 SAF exon 500000 560000 . - 0 gene_id "simu_gene6"; transcript_id "TRN_simu_gene6"; exon_id "EXON_simu_gene6.2"; 21 | chr5 SAF exon 120000 490000 . - 0 gene_id "simu_gene7"; transcript_id "TRN_simu_gene7"; exon_id "EXON_simu_gene7.1"; 22 | chr5 SAF exon 500000 960000 . - 0 gene_id "simu_gene7"; transcript_id "TRN_simu_gene7"; exon_id "EXON_simu_gene7.2"; 23 | chr5 SAF exon 970000 1000000 . - 0 gene_id "simu_gene7"; transcript_id "TRN_simu_gene7"; exon_id "EXON_simu_gene7.3"; 24 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-chralias.SAF: -------------------------------------------------------------------------------- 1 | # this file is a SAF file. 2 | # all lines starting with '#' are comment lines. 3 | # lines starting with 'GeneIDChrStart' are also the comment line 4 | GeneID Chr Start End Strand 5 | simu_gene1 chr3 100 10000 + 6 | simu_gene1 chr3 20000 30000 + 7 | simu_gene1 chr3 40000 89000 + 8 | simu_gene2 chr3 100010 101000 + 9 | simu_gene2 chr3 102000 103000 + 10 | simu_gene2 chr3 104000 129000 + 11 | simu_gene2 chr3 102000 131000 + 12 | simu_gene3 chr3 500010 501000 - 13 | simu_gene3 chr3 502000 503000 - 14 | simu_gene3 chr3 504000 529000 - 15 | simu_gene3 chr3 600000 669000 - 16 | simu_gene4 chr3 602000 631000 + 17 | simu_gene4 chr3 672000 699000 + 18 | simu_gene4 chr3 702000 719000 + 19 | simu_gene5 chr4 20000 100000 - 20 | simu_gene5 chr4 120000 190000 - 21 | simu_gene5 chr4 200000 210000 - 22 | simu_gene5 chr4 220000 300000 - 23 | simu_gene6 chr4 420000 490000 - 24 | simu_gene6 chr4 500000 560000 - 25 | simu_gene7 chr5 120000 490000 - 26 | simu_gene7 chr5 500000 960000 - 27 | simu_gene7 chr5 970000 1000000 - 28 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-chralias.ora: -------------------------------------------------------------------------------- 1 | ## featureCounts output gene_level; version:1.4.0b5 2 | ## CMD:"../../bin/featureCounts" "-a" "data/test-chralias.SAF" "-F" "SAF" "-i" "data/test-chralias.sam" "-o" "result/test-chralias.ora" "-p" "-A" "data/test-chralias.txt" 3 | geneid nreads 4 | simu_gene1 16 5 | simu_gene2 5 6 | simu_gene3 8 7 | simu_gene4 6 8 | simu_gene5 48 9 | simu_gene6 22 10 | simu_gene7 169 11 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-chralias.txt: -------------------------------------------------------------------------------- 1 | chr3,III 2 | chr5,V 3 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-chrname.SAF: -------------------------------------------------------------------------------- 1 | # this file is a SAF file. 2 | # all lines starting with '#' are comment lines. 3 | # lines starting with 'GeneIDChrStart' are also the comment line 4 | GeneID Chr Start End Strand 5 | simu_gene1 chr3 100 10000 + 6 | simu_gene1 chr3 20000 30000 + 7 | simu_gene1 chr3 40000 89000 + 8 | simu_gene2 chr3 100010 101000 + 9 | simu_gene2 chr3 102000 103000 + 10 | simu_gene2 chr3 104000 129000 + 11 | simu_gene2 chr3 102000 131000 + 12 | simu_gene3 chr3 500010 501000 - 13 | simu_gene3 chr3 502000 503000 - 14 | simu_gene3 chr3 504000 529000 - 15 | simu_gene3 chr3 600000 669000 - 16 | simu_gene4 chr3 602000 631000 + 17 | simu_gene4 chr3 672000 699000 + 18 | simu_gene4 chr3 702000 719000 + 19 | simu_gene5 chr4 20000 100000 - 20 | simu_gene5 chr4 120000 190000 - 21 | simu_gene5 chr4 200000 210000 - 22 | simu_gene5 chr4 220000 300000 - 23 | simu_gene6 chr4 420000 490000 - 24 | simu_gene6 chr4 500000 560000 - 25 | simu_gene7 5 120000 490000 - 26 | simu_gene7 5 500000 960000 - 27 | simu_gene7 5 970000 1000000 - 28 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-chrname.ora: -------------------------------------------------------------------------------- 1 | ## featureCounts output gene_level; version:1.4.0b5 2 | ## CMD:"../../bin/featureCounts" "-a" "data/test-chrname.SAF" "-F" "SAF" "-i" "data/test-chrname.sam" "-o" "result/test-chrname.ora" "-p" 3 | geneid nreads 4 | simu_gene1 16 5 | simu_gene2 5 6 | simu_gene3 8 7 | simu_gene4 6 8 | simu_gene5 48 9 | simu_gene6 22 10 | simu_gene7 169 11 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-fracOverlap.ora: -------------------------------------------------------------------------------- 1 | Geneid test-fracOverlap.sam 2 | g1 1 3 | g2 0 4 | g3 0 5 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-fracOverlap.sam: -------------------------------------------------------------------------------- 1 | @SQ SN:chr3 LN:30427671 2 | read1 99 chr3 4950 40 60M300N40M = 4970 120 N I 3 | read1 147 chr3 4970 40 40M300N60M = 4950 120 N I 4 | read1 99 chr3 4955 40 60M300N40M = 4975 120 N I 5 | read1 147 chr3 4975 40 40M300N60M = 4955 120 N I 6 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-minimum-35ext.ora: -------------------------------------------------------------------------------- 1 | # Program:featureCounts v1.4.5; Command:"../../../bin/featureCounts" "-a" "test-minimum.GTF" "--minReadOverlap" "-10000" "-o" "test-minimum-35ext.ora" "test-minimum.bam" 2 | Geneid test-minimum.bam 3 | simu_gene1 17 4 | simu_gene2 9 5 | simu_gene3 18 6 | simu_gene4 6 7 | simu_gene5 86 8 | simu_gene6 38 9 | simu_gene7 264 10 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-minimum-3ext.ora: -------------------------------------------------------------------------------- 1 | # Program:featureCounts v1.4.5; Command:"../../../bin/featureCounts" "-o" "test-minimum-3ext.ora" "-a" "test-minimum.GTF" "--readExtension3" "10000" "test-minimum.bam" 2 | Geneid test-minimum.bam 3 | simu_gene1 16 4 | simu_gene2 6 5 | simu_gene3 14 6 | simu_gene4 4 7 | simu_gene5 80 8 | simu_gene6 34 9 | simu_gene7 260 10 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-minimum-5ext.ora: -------------------------------------------------------------------------------- 1 | # Program:featureCounts v1.4.5; Command:"../../../bin/featureCounts" "-o" "test-minimum-5ext.ora" "-a" "test-minimum.GTF" "--readExtension5" "10000" "test-minimum.bam" 2 | Geneid test-minimum.bam 3 | simu_gene1 16 4 | simu_gene2 7 5 | simu_gene3 14 6 | simu_gene4 4 7 | simu_gene5 80 8 | simu_gene6 34 9 | simu_gene7 258 10 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-minimum-5reduce.ora: -------------------------------------------------------------------------------- 1 | # Program:featureCounts v1.4.5; Command:"../../../bin/featureCounts" "-a" "test-minimum.GTF" "-o" "test-minimum-5reduce.ora" "-O" "--debugCommand" "D" "--read2pos" "5" "-p" "corner-reduction.sam" "--readExtension5" "111" 2 | Geneid Chr Start End Strand Length corner-reduction.sam 3 | simu_gene1 chr3;chr3;chr3 100;20000;40000 10000;30000;89000 +;+;+ 68903 0 4 | simu_gene2 chr3;chr3 100010;102000 101000;131000 +;+ 29992 1 5 | simu_gene3 chr3;chr3;chr3;chr3 500010;502000;504000;600000 501000;503000;529000;669000 -;-;-;- 95994 3 6 | simu_gene4 chr3;chr3;chr3 602000;672000;702000 631000;699000;719000 +;+;+ 73003 4 7 | simu_gene5 chr4;chr4;chr4;chr4 20000;120000;200000;220000 100000;190000;210000;300000 -;-;-;- 240004 0 8 | simu_gene6 chr4;chr4 420000;500000 490000;560000 -;- 130002 0 9 | simu_gene7 chr5;chr5;chr5 120000;500000;970000 490000;960000;1000000 -;-;- 860003 0 10 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-minimum-FL.ora: -------------------------------------------------------------------------------- 1 | ## featureCounts output exon_level; version:1.4.0b5 2 | ## CMD:"../../../bin/featureCounts" "-a" "test-minimum.GTF" "-i" "test-minimum.sam" "-o" "test-minimum-FL.ora" "-p" "-f" 3 | geneid chr start end nreads 4 | simu_gene1 chr3 100 10000 0 5 | simu_gene1 chr3 20000 30000 1 6 | simu_gene1 chr3 40000 89000 7 7 | simu_gene2 chr3 100010 101000 0 8 | simu_gene2 chr3 102000 103000 0 9 | simu_gene2 chr3 104000 129000 0 10 | simu_gene2 chr3 102000 131000 0 11 | simu_gene3 chr3 500010 501000 0 12 | simu_gene3 chr3 502000 503000 0 13 | simu_gene3 chr3 504000 529000 4 14 | simu_gene3 chr3 600000 669000 1 15 | simu_gene4 chr3 602000 631000 0 16 | simu_gene4 chr3 672000 699000 1 17 | simu_gene4 chr3 702000 719000 0 18 | simu_gene5 chr4 20000 100000 14 19 | simu_gene5 chr4 120000 190000 11 20 | simu_gene5 chr4 200000 210000 1 21 | simu_gene5 chr4 220000 300000 11 22 | simu_gene6 chr4 420000 490000 12 23 | simu_gene6 chr4 500000 560000 3 24 | simu_gene7 chr5 120000 490000 63 25 | simu_gene7 chr5 500000 960000 61 26 | simu_gene7 chr5 970000 1000000 4 27 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-minimum-O.ora: -------------------------------------------------------------------------------- 1 | ## featureCounts output gene_level; version:1.4.0b5 2 | ## CMD:"../../../bin/featureCounts" "-a" "test-minimum.GTF" "-i" "test-minimum.sam" "-o" "test-minimum-O.ora" "-p" "-O" 3 | geneid nreads 4 | simu_gene1 8 5 | simu_gene2 2 6 | simu_gene3 6 7 | simu_gene4 2 8 | simu_gene5 37 9 | simu_gene6 15 10 | simu_gene7 128 11 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-minimum-PE.ora: -------------------------------------------------------------------------------- 1 | ## featureCounts output gene_level; version:1.4.0b5 2 | ## CMD:"../../../bin/featureCounts" "-a" "test-minimum.GTF" "-i" "test-minimum.sam" "-o" "test-minimum-PE.ora" "-p" 3 | geneid nreads 4 | simu_gene1 8 5 | simu_gene2 2 6 | simu_gene3 5 7 | simu_gene4 1 8 | simu_gene5 37 9 | simu_gene6 15 10 | simu_gene7 128 11 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-minimum-SE.ora: -------------------------------------------------------------------------------- 1 | ## featureCounts output gene_level; version:1.4.0b5 2 | ## CMD:"../../../bin/featureCounts" "-a" "test-minimum.GTF" "-i" "test-minimum.sam" "-o" "test-minimum-SE.ora" 3 | geneid nreads 4 | simu_gene1 15 5 | simu_gene2 4 6 | simu_gene3 10 7 | simu_gene4 2 8 | simu_gene5 74 9 | simu_gene6 30 10 | simu_gene7 254 11 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-minimum-STR.ora: -------------------------------------------------------------------------------- 1 | ## featureCounts output gene_level; version:1.4.0b5 2 | ## CMD:"../../../bin/featureCounts" "-a" "test-minimum.GTF" "-i" "test-minimum.sam" "-o" "test-minimum-STR.ora" "-p" "-s1" 3 | geneid nreads 4 | simu_gene1 4 5 | simu_gene2 0 6 | simu_gene3 4 7 | simu_gene4 1 8 | simu_gene5 16 9 | simu_gene6 10 10 | simu_gene7 51 11 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-minimum-UNSTR.ora: -------------------------------------------------------------------------------- 1 | ## featureCounts output gene_level; version:1.4.0b5 2 | ## CMD:"../../../bin/featureCounts" "-a" "test-minimum.GTF" "-i" "test-minimum.sam" "-o" "test-minimum-UNSTR.ora" "-p" "-s2" 3 | geneid nreads 4 | simu_gene1 4 5 | simu_gene2 2 6 | simu_gene3 2 7 | simu_gene4 1 8 | simu_gene5 21 9 | simu_gene6 5 10 | simu_gene7 77 11 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-minimum-dup.ora: -------------------------------------------------------------------------------- 1 | # Program:featureCounts v1.4.5; Command:"../../../bin/featureCounts" "-o" "test-minimum-dup.ora" "-p" "-a" "test-minimum.GTF" "--ignoreDup" "test-chrname.sam" 2 | Geneid test-chrname.sam 3 | simu_gene1 8 4 | simu_gene2 5 5 | simu_gene3 6 6 | simu_gene4 3 7 | simu_gene5 21 8 | simu_gene6 14 9 | simu_gene7 82 10 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-minimum.GTF: -------------------------------------------------------------------------------- 1 | chr3 FCTEST exon 100 10000 . + 0 gene_id "simu_gene1"; transcript_id "TR:simu_gene1" 2 | chr3 FCTEST exon 20000 30000 . + 0 gene_id "simu_gene1"; transcript_id "TR:simu_gene1" 3 | chr3 FCTEST exon 40000 89000 . + 0 gene_id "simu_gene1"; transcript_id "TR:simu_gene1" 4 | chr3 FCTEST exon 100010 101000 . + 0 gene_id "simu_gene2"; transcript_id "TR:simu_gene2" 5 | chr3 FCTEST exon 102000 103000 . + 0 gene_id "simu_gene2"; transcript_id "TR:simu_gene2" 6 | chr3 FCTEST exon 104000 129000 . + 0 gene_id "simu_gene2"; transcript_id "TR:simu_gene2" 7 | chr3 FCTEST exon 102000 131000 . + 0 gene_id "simu_gene2"; transcript_id "TR:simu_gene2" 8 | chr3 FCTEST exon 500010 501000 . - 0 gene_id "simu_gene3"; transcript_id "TR:simu_gene3" 9 | chr3 FCTEST exon 502000 503000 . - 0 gene_id "simu_gene3"; transcript_id "TR:simu_gene3" 10 | chr3 FCTEST exon 504000 529000 . - 0 gene_id "simu_gene3"; transcript_id "TR:simu_gene3" 11 | chr3 FCTEST exon 600000 669000 . - 0 gene_id "simu_gene3"; transcript_id "TR:simu_gene3" 12 | chr3 FCTEST exon 602000 631000 . + 0 gene_id "simu_gene4"; transcript_id "TR:simu_gene4" 13 | chr3 FCTEST exon 672000 699000 . + 0 gene_id "simu_gene4"; transcript_id "TR:simu_gene4" 14 | chr3 FCTEST exon 702000 719000 . + 0 gene_id "simu_gene4"; transcript_id "TR:simu_gene4" 15 | chr4 FCTEST exon 20000 100000 . - 0 gene_id "simu_gene5"; transcript_id "TR:simu_gene5" 16 | chr4 FCTEST exon 120000 190000 . - 0 gene_id "simu_gene5"; transcript_id "TR:simu_gene5" 17 | chr4 FCTEST exon 200000 210000 . - 0 gene_id "simu_gene5"; transcript_id "TR:simu_gene5" 18 | chr4 FCTEST exon 220000 300000 . - 0 gene_id "simu_gene5"; transcript_id "TR:simu_gene5" 19 | chr4 FCTEST exon 420000 490000 . - 0 gene_id "simu_gene6"; transcript_id "TR:simu_gene6" 20 | chr4 FCTEST exon 500000 560000 . - 0 gene_id "simu_gene6"; transcript_id "TR:simu_gene6" 21 | chr5 FCTEST exon 120000 490000 . - 0 gene_id "simu_gene7"; transcript_id "TR:simu_gene7" 22 | chr5 FCTEST exon 500000 960000 . - 0 gene_id "simu_gene7"; transcript_id "TR:simu_gene7" 23 | chr5 FCTEST exon 970000 1000000 . - 0 gene_id "simu_gene7"; transcript_id "TR:simu_gene7" 24 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-minimum.SAF: -------------------------------------------------------------------------------- 1 | geNeiD chR start End Strand 2 | simu_gene1 chr3 100 10000 + 3 | simu_gene1 chr3 20000 30000 + 4 | simu_gene1 chr3 40000 89000 + 5 | simu_gene2 chr3 100010 101000 + 6 | simu_gene2 chr3 102000 103000 + 7 | simu_gene2 chr3 104000 129000 + 8 | simu_gene2 chr3 102000 131000 + 9 | simu_gene3 chr3 500010 501000 - 10 | simu_gene3 chr3 502000 503000 - 11 | simu_gene3 chr3 504000 529000 - 12 | simu_gene3 chr3 600000 669000 - 13 | simu_gene4 chr3 602000 631000 + 14 | simu_gene4 chr3 672000 699000 + 15 | simu_gene4 chr3 702000 719000 + 16 | simu_gene5 chr4 20000 100000 - 17 | simu_gene5 chr4 120000 190000 - 18 | simu_gene5 chr4 200000 210000 - 19 | simu_gene5 chr4 220000 300000 - 20 | simu_gene6 chr4 420000 490000 - 21 | simu_gene6 chr4 500000 560000 - 22 | simu_gene7 chr5 120000 490000 - 23 | simu_gene7 chr5 500000 960000 - 24 | simu_gene7 chr5 970000 1000000 - 25 | -------------------------------------------------------------------------------- /test/featureCounts/data/test-minimum.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShiLab-Bioinformatics/subread/55dc154f6e5a6813998d2f81039ad0e906bd2c02/test/featureCounts/data/test-minimum.bam -------------------------------------------------------------------------------- /test/featureCounts/data/test-minimum.ora: -------------------------------------------------------------------------------- 1 | ## featureCounts output gene_level; version:1.4.0b5 2 | ## CMD:"../../../bin/featureCounts" "-a" "test-minimum.GTF" "-i" "test-minimum.sam" "-o" "test-minimum.ora" "-p" 3 | geneid nreads 4 | simu_gene1 8 5 | simu_gene2 2 6 | simu_gene3 5 7 | simu_gene4 1 8 | simu_gene5 37 9 | simu_gene6 15 10 | simu_gene7 128 11 | -------------------------------------------------------------------------------- /test/featureCounts/featureCounts-test.sh: -------------------------------------------------------------------------------- 1 | mkdir -p result 2 | sh test_minimal_example.sh 3 | sh test_chr_aliases.sh 4 | sh test_chr_inference.sh 5 | sh test_corner_cases.sh 6 | sh test_featurelevel.sh 7 | -------------------------------------------------------------------------------- /test/featureCounts/test_chr_aliases.sh: -------------------------------------------------------------------------------- 1 | SH_CMD=bash 2 | mkdir -p result 3 | echo 4 | echo "================================================================================" 5 | printf " FeatureCounts Chromosome Name Aliases Tests\n http://subread.sourceforge.net/\n" 6 | echo "================================================================================" 7 | echo 8 | 9 | # ================================================================================ 10 | # Testing alias file to convert chromosome names in the annotation file 11 | # The alias file has each line defining an alias: Chro_Name_in_Annotation,Chro_Name_in_SAM 12 | $SH_CMD data/compare.sh data/test-chralias.sam data/test-chralias.ora data/test-chralias.SAF "-F SAF --countReadPairs -p -A data/test-chralias.txt " "chromosome aliases" 13 | echo 14 | -------------------------------------------------------------------------------- /test/featureCounts/test_chr_inference.sh: -------------------------------------------------------------------------------- 1 | SH_CMD=bash 2 | mkdir -p result 3 | echo 4 | echo "================================================================================" 5 | printf " FeatureCounts Chromosome Name Inference Tests\n http://subread.sourceforge.net/\n" 6 | echo "================================================================================" 7 | echo 8 | 9 | # ================================================================================ 10 | # Testing incomplete chromosome names in the annotations and in the SAM file 11 | $SH_CMD data/compare.sh data/test-chrname.sam data/test-chrname.ora data/test-chrname.SAF "-F SAF -p --countReadPairs " "automatic inference of chromosome names" 12 | 13 | echo 14 | -------------------------------------------------------------------------------- /test/featureCounts/test_commonusage.sh: -------------------------------------------------------------------------------- 1 | echo 2 | echo "================================================================================" 3 | printf " FeatureCounts Common Scenario Tests\n http://subread.sourceforge.net/\n" 4 | echo "================================================================================" 5 | echo 6 | 7 | infiles="/usr/local/work/liao/Rsubread/testsuit/SEQC2011_for_3tests_wild-coorsorted-Picard-SamtAgain.bam /usr/local/work/liao/Rsubread/testsuit/SEQC2011-A-FCpaper-SAM1.4.junc" 8 | 9 | for inf in $infiles 10 | do 11 | intag="complex" 12 | if [[ $inf =~ FCpaper ]] 13 | then 14 | intag="simple" 15 | fi 16 | for level in GENE EXON 17 | do 18 | op_level= 19 | if [[ $level == "EXON" ]] 20 | then 21 | op_level=" -f " 22 | fi 23 | for moverlap in YES NO 24 | do 25 | op_moverlap= 26 | if [[ $moverlap == "YES" ]] 27 | then 28 | op_moverlap=" -O " 29 | fi 30 | for mmapping in YES NO 31 | do 32 | op_mmapping= 33 | if [[ $mmapping == "YES" ]] 34 | then 35 | op_mmapping=" -M " 36 | fi 37 | for fraction in YES NO 38 | do 39 | op_fraction= 40 | if [[ $fraction == "YES" ]] 41 | then 42 | if [[ $mmapping == "NO" ]] 43 | then 44 | continue 45 | fi 46 | op_fraction=" --fraction " 47 | fi 48 | ora=data/commonusage-$intag-$level-$moverlap-$mmapping-$fraction.FC 49 | echo $ora 50 | ../../bin/featureCounts -a /usr/local/work/liao/Rsubread/testsuit/ensembl-for_3tests-shuf.GTF -o data/del4.FC $op_level $op_moverlap $op_mmapping $op_fraction -T7 -p $inf &>/dev/null 51 | cat $ora|grep -v ^#|grep -vi Geneid |md5sum 52 | cat data/del4.FC |grep -v ^#|grep -vi Geneid |md5sum 53 | 54 | cat $ora.summary|grep -v ^#|grep -vi Status |md5sum 55 | cat data/del4.FC.summary |grep -v ^#|grep -v Status |md5sum 56 | rm -f data/del4* 57 | done 58 | done 59 | done 60 | done 61 | done 62 | 63 | -------------------------------------------------------------------------------- /test/featureCounts/test_corner_cases.sh: -------------------------------------------------------------------------------- 1 | mkdir -p --countReadPairs result 2 | echo 3 | echo "================================================================================" 4 | printf " FeatureCounts Corner Case Tests\n http://subread.sourceforge.net/\n" 5 | echo "================================================================================" 6 | echo 7 | 8 | 9 | 10 | SH_CMD=bash 11 | $SH_CMD data/compare.sh data/corner-INDEL.sam data/corner-INDEL.ora data/test-minimum.GTF "-p --countReadPairs" "indel reads" 12 | $SH_CMD data/compare.sh data/corner-JUNC.sam data/corner-JUNC.ora data/test-minimum.GTF "-p --countReadPairs" "junction reads" 13 | $SH_CMD data/compare.sh data/corner-ONEEND.sam data/corner-ONEEND.ora data/test-minimum.GTF "-p --countReadPairs" "paired-end reads (fragment counting)" 14 | $SH_CMD data/compare.sh data/corner-ONEEND.sam data/corner-ONEEND-BOTH.ora data/test-minimum.GTF "-p --countReadPairs -B " "paired-end reads (fragment counting, both ends mapped)" 15 | $SH_CMD data/compare.sh data/test-minimum.sam data/test-minimum-O.ora data/test-minimum.GTF "-p --countReadPairs -O " "multi-overlapping reads" 16 | $SH_CMD data/compare.sh data/test-minimum.sam data/test-minimum-FL.ora data/test-minimum.GTF "-p --countReadPairs -f " "feature-level summarization" FL 17 | $SH_CMD data/compare.sh data/test-minimum.sam data/test-minimum.ora data/test-minimum.GTF "-p --countReadPairs " "gene-level summarization" 18 | $SH_CMD data/compare.sh data/corner-NH.sam data/corner-NH.ora data/test-minimum.GTF "-p --countReadPairs" "multi-mapping reads" 19 | $SH_CMD data/compare.sh data/corner-NH.sam data/corner-NH-PM.ora data/test-minimum.GTF "-p --countReadPairs --countReadPairs --primary -M " "multi-mapping reads (primary only)" 20 | $SH_CMD data/compare.sh data/test-junc.sam data/corner-BothEnds.ora data/test-minimum.SAF "-p --countReadPairs -F SAF -B " "both ends mapped" 21 | $SH_CMD data/compare.sh data/test-junc.sam data/corner-Chimeric.ora data/test-minimum.SAF "-p --countReadPairs -F SAF -C " "disallowing chimeric fragments" 22 | $SH_CMD data/compare.sh data/test-junc.sam data/corner-MultiMapping.ora data/test-minimum.SAF "-p --countReadPairs -F SAF -M " "Allowing multi-mapped reads" 23 | $SH_CMD data/compare.sh data/test-junc.sam data/corner-DoNotSort.ora data/test-minimum.SAF " -p --countReadPairs -F SAF --donotsort " "not sorting input file" 24 | $SH_CMD data/compare.sh data/test-junc.sam data/corner-MinOverlap.ora data/test-minimum.SAF " --minOverlap 125 -p --countReadPairs -F SAF " "minimum overlapping length" 25 | $SH_CMD data/compare.sh data/test-fracOverlap.sam data/test-fracOverlap.ora data/corner-fractions.SAF " --fracOverlap 0.62 -O -p --countReadPairs -F SAF " "minimum overlapping fraction" 26 | $SH_CMD data/compare.sh data/test-junc.sam data/corner-LargestOverlap.ora data/test-minimum.SAF "-p --countReadPairs -F SAF --largestOverlap" "Largest Overlapping" 27 | $SH_CMD data/compare.sh data/test-junc.sam data/corner-PEdist.ora data/test-minimum.SAF " -p --countReadPairs -F SAF -B -C -P -d 130 -D 770 " "paired-end distance" 28 | $SH_CMD data/compare.sh data/test-junc.sam data/corner-Read2Pos5.ora data/test-minimum.SAF " -p --countReadPairs -F SAF --read2pos 5 " "Read to position (5' end)" 29 | $SH_CMD data/compare.sh data/test-junc.sam data/corner-Read2Pos3.ora data/test-minimum.SAF " -p --countReadPairs -F SAF --read2pos 3 " "Read to position (3' end)" 30 | $SH_CMD data/compare.sh data/test-junc.sam data/corner-Extend3.ora data/test-minimum.SAF " -p --countReadPairs -F SAF --readExtension3 1000 " "Read extension to the 3' end" 31 | $SH_CMD data/compare.sh data/test-junc.sam data/corner-Extend5.ora data/test-minimum.SAF " -p --countReadPairs -F SAF --readExtension5 1000 " "Read extension to the 5' end" 32 | $SH_CMD data/compare.sh data/test-junc.sam data/corner-MaxOPs.ora data/test-minimum.SAF " -p --countReadPairs -F SAF --maxMOp 2 " "Low maxOPs value" 33 | $SH_CMD data/compare.sh data/test-junc.sam data/corner-MinMAPQ.ora data/test-minimum.SAF " -p --countReadPairs -F SAF -Q 58" "minimum mapping quality" 34 | $SH_CMD data/compare.sh data/test-dup.sam data/corner-IgnoreDup.ora data/test-minimum.SAF "-p --countReadPairs -F SAF --ignoreDup " "Ignoring duplicated reads" 35 | $SH_CMD data/compare.sh data/test-junc.sam data/corner-Fraction.ora data/test-minimum.SAF "-p --countReadPairs -F SAF --fraction -M " "Fraction counting" 36 | $SH_CMD data/compare.sh data/corner-fractions.sam data/corner-fractions.ora data/corner-fractions.SAF " -O -M -F SAF --fraction " "Advanced fractions" 37 | $SH_CMD data/compare.sh data/test-junc.sam data/corner-Jcounts.ora data/test-minimum.SAF "-p --countReadPairs -F SAF -J " "Junction counting" JC 38 | 39 | if test -f /usr/local/work/work/liao/subread/chromosomes/all_34_alt.fa 40 | then 41 | $SH_CMD data/compare.sh data/test-junc.sam data/corner-Jcounts-FA.ora data/test-minimum.SAF "-p --countReadPairs -F SAF -J --genome /usr/local/work/work/liao/subread/chromosomes/all_34_alt.fa " "Junction counting (with genome) " JC 42 | else 43 | echo "Skipping Junction counting (with genome)." 44 | fi 45 | 46 | 47 | # by default it is in GTF. 48 | $SH_CMD data/compare.sh data/test-minimum.sam data/test-minimum.ora data/test-minimum.GTF "-p --countReadPairs " "GTF format annotations" 49 | $SH_CMD data/compare.sh data/test-minimum.sam data/test-minimum.ora data/test-minimum.SAF "-p --countReadPairs -F SAF " "SAF format annotations" 50 | 51 | # by default it is in SAM. 52 | $SH_CMD data/compare.sh data/test-minimum.sam data/test-minimum.ora data/test-minimum.GTF "-p --countReadPairs " "SAM format input" 53 | $SH_CMD data/compare.sh data/test-minimum.bam data/test-minimum.ora data/test-minimum.GTF "-p --countReadPairs " "BAM format input" 54 | 55 | # by default it is non-strand specific. 56 | $SH_CMD data/compare.sh data/test-minimum.sam data/test-minimum.ora data/test-minimum.GTF "-p --countReadPairs -s 0 " "unstranded read summarization" 57 | $SH_CMD data/compare.sh data/test-minimum.sam data/test-minimum-STR.ora data/test-minimum.GTF "-p --countReadPairs -s 1 " "stranded read summarization" 58 | $SH_CMD data/compare.sh data/test-minimum.sam data/test-minimum-UNSTR.ora data/test-minimum.GTF "-p --countReadPairs -s 2 " "reversely stranded read summarization" 59 | 60 | # test 5' and 3' end extension 61 | $SH_CMD data/compare.sh data/test-chrname.sam data/test-minimum-dup.ora data/test-minimum.GTF " -p --countReadPairs --ignoreDup " "Ignoring duplicate fragments" 62 | $SH_CMD data/compare.sh data/corner-JUNC.sam data/corner-JUNC-ONLY.ora data/test-minimum.GTF " -p --splitOnly -O -f " "Junction reads only" FL 63 | $SH_CMD data/compare.sh data/corner-JUNC.sam data/corner-EXON-ONLY.ora data/test-minimum.GTF " --nonSplitOnly -p --countReadPairs " "Exonic reads only" 64 | 65 | echo 66 | -------------------------------------------------------------------------------- /test/featureCounts/test_featurelevel.sh: -------------------------------------------------------------------------------- 1 | SH_CMD=bash 2 | 3 | 4 | for test in intron_between across_genes across_intron 5 | do 6 | for ams in data/$test*am 7 | do 8 | $SH_CMD data/compare.sh $ams $ams.ora data/$test*gtf ' -p -f --countReadPairs -s 2 ' $test FL 9 | done 10 | done 11 | 12 | -------------------------------------------------------------------------------- /test/featureCounts/test_minimal_example.sh: -------------------------------------------------------------------------------- 1 | mkdir -p result 2 | cat < /dev/null 3 | The options in the command lines below are: 4 | -a : annotation file (GTF by default) 5 | -F : annotation format (GTF by default) 6 | -A : chromosome alias file 7 | -i : input file for reads (SAM by default) 8 | -b : input file is in the BAM format 9 | -o : output file 10 | -p : paired-end assignment 11 | -f : feature-level (exon level) assignment 12 | -O : allowing a read to overlap with multi features 13 | -S : resorting the input SAM or BAM file 14 | -T : threads for assignment 15 | -d and -D : minimum and maximum allowed template lengths 16 | -B : both ends must be mapped in a paired-end fragment 17 | -C : no chimeric fragments are allowed 18 | -M : multi-mapping reads reported by the aligner are allowed 19 | 20 | More options are available. Reference to the user guide for the full option list. 21 | 22 | EOF 23 | 24 | 25 | echo 26 | echo "================================================================================" 27 | printf " FeatureCounts Basic Test\n http://subread.sourceforge.net/\n" 28 | echo "================================================================================" 29 | echo 30 | 31 | 32 | 33 | rm -f data/test-minimum.log 34 | # ================================================================================ 35 | # The minimum runnable test 36 | ../../bin/featureCounts -p -a data/test-minimum.GTF -o result/test-minimum.FC data/test-minimum.sam 37 | 38 | echo "================================================================================" 39 | echo "Basic Test finished." 40 | echo "The results are in result/test-minimum.FC" 41 | echo "================================================================================" 42 | echo 43 | echo 44 | echo 45 | -------------------------------------------------------------------------------- /test/subjunc/subjunc-test.sh: -------------------------------------------------------------------------------- 1 | mkdir result 2 | 3 | ../../bin/subread-buildindex -o ../small1 ../chr901.fa 4 | 5 | ../../bin/subjunc --SAMoutput -i ../small1 -o result/junctions.sam -r data/junction-reads-A.fq -R data/junction-reads-B.fq 6 | 7 | ../../bin/subjunc -i ../small1 -o result/junctionsNfusions.bam -r data/junction-reads-A.fq -R data/junction-reads-B.fq --allJunctions 8 | -------------------------------------------------------------------------------- /test/subread-align/data/indel-test1.fq: -------------------------------------------------------------------------------- 1 | >a1 2 | CACAGCTTTGGGAGGCCAAGGCGGGTGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGACCAACATG 3 | -------------------------------------------------------------------------------- /test/subread-align/data/indel-test2.fq: -------------------------------------------------------------------------------- 1 | >a1 2 | CACTGCACTTCGGCCTGGGCAACAGAGCAAGACTCTGTCTCAAAAGAAAAAAAAGAATATCCAATTGTTC 3 | -------------------------------------------------------------------------------- /test/subread-align/data/subfusion_test.fa: -------------------------------------------------------------------------------- 1 | >x1 2 | CTTATTCTCATGAAAGTTTGTAATCTTTGGAGAGTTGCTTAAACACTTAAAACCATCTTTCCTCTTTCTAATTACAATAAGTATTATCACTAAGTTTATGCAAACACGTGAGGGAATGCTGATTTAGAGCTGGTGGCGTA 3 | >x2 4 | CCTCATAATGTACTTCCCATGGACTAAAGTTGTTCACTTTCTTTAGAGGATATTAGCTAAAGATGTGGTACTGGAATTAGAAGCACAATTCTCCCTTCGCATCCCTCTTGCATCAACCAAAAGTTGAGTTGCGAAGTGGA 5 | >x3 6 | GTAATATAGTTTTCCTTTTTACTTTTTCCCTTGGCCTCGATATGTACCTACACAGCACAGCAAAGCACGCCTTTGCCTGTGATTCTACCACACCTCACCAACGCGTTT 7 | >x4 8 | TCTGCTCTTGGGTGCCAAGGCCACCGTCAGCCAGGAGAGGCCTGCAGAATCAGCCTGTGCACCTCTCTGATACTGTCCAAAACTTTCATTTTAGAAAGGAAAGAA 9 | >x5 10 | AGCCCTGCCCACACCTTGATCTGGGACTTCAGGTTAGCTTGCTGGAGCTGCCATAACAAAGCACCATA 11 | >x6 12 | GTTTGAAGAATGGTGCAGGACTCCCTGGTGAGCCAGGTGGACATGACCTCAAGCTAGCGTGTCACCTTGGGCAAAGCTCTCA 13 | >y1 14 | GCTGTTGCCCACTTCTCTTGGACCCTGGTTAAAGGTGAGAGGAATTGCCCATATC 15 | >y2 16 | AGGTCCCTTTTACATCTGTACAGCAGTTTAGGATATGCTCCTGGGTGGGGA 17 | >y3 18 | GCACTGAGTAGGTGGCACCGACGGGGGAGGGGAAGGCGGGAAACCCCTGCAGACCCAGAGCTGGCTCAGCGGAGGTGCCAGCCCA 19 | >y4 20 | GGGAGAAGACAAGTGACTTCTGTTTCTAAGTAGGCACTCCAAGCAAGCTTGGGATGGCTTAACCA 21 | >z1 22 | TCCATGTGGCTCTGCTGTTGCCCACTTCTCTTGGACCCTGGTTAAAGGTGAGAGGAATTGCCCATATCACACAGCTTATAGAGCTGCAACCACCATAACTGAAGCATCAAAAGTTCCTCCCTCCCCCATCCCTCCACCCCCTGTTCACCCACCCATTCA 23 | >z2 24 | ACAAAGGGAATCCAAAAAGCCCACCCGGTGCACTGAGTAGGTGGCACCGACGGGGGAGGGGAAGGCGGGAAACCCCTGCAGACCCAGAGCTGGCTCAGCGGAGGTGCCAGCCCAGCCTTGAGAACTCCCTTCGCTCCACACGCAGCGGAGGCAGCTGCAGGGCGCCGACCTTCCCTGCCTC 25 | >a1 26 | AGTGCTGGGCTCCGTTTGCTCTGCACTCAGCCTGCCCAGGGCACCGTGCTGTGCGATAGCTCAGGC 27 | >a2 28 | TACCAATCATTTTTTACAAAATAAATTGCTATTTGCGGAGATGAAGTGGTGAAGGCCTGGTTTCCACCGA 29 | >a3 30 | AACTGCATACTATAAAAGTGCTTTAAAATGCAGCAGGAGAATCCCTTCTACATAGGGTTCAGTGCATGTGAGTATACACCGGGCTT 31 | >a4 32 | GACAGCCGGGGCCGGGCGCCCCTGCACCACGCCCGCTGGACTCCTCCGACTCTGCACCCTCTGTGGGGAG 33 | >b1 34 | TGGCAGTGAAGGGACACGGCTCTGGAGTGCTGGGCTCCGTTTGCTCTGCACTCAGCCTGCCCAGGGCACCGTGCTGTGCGATAGCTCAGGCATCGGGTGCTTCAGCTGTTGCAAGTTCCGGATACCAATCATTTTTTACAA 35 | >b2 36 | TGAAAATATTAGGCAAAACTGCATACTATAAAAGTGCTTTAAAATGCAGCAGGAGAATCCCTTCTACATAGGGTTCAGTGCATGTGAGTATACACCGGGCTTGATTCCTGCCCCCGAGTGGAGGTCTGACCTGCCCCACTCTGGAGGGCTGATGCTCTCTGGAAAGGTGGGTGATGTGCAGT 37 | >p1 38 | TCCTCACCAAGGGAGGTCGGCTGTTACATGTAACATTTCTACCTAAAAATACAAAAATTAGCTGGGTGTGGTGGTGCACACCTGTA 39 | >p2 40 | ACCACCAGGAGTGGGGCCCCCTAGTCCCCAAAACCAAGCCTGCCCTTCTGCCGAAGGCTTGTGAGGGGCCTGAGTCCTC 41 | >pp1 42 | TTCTCCCTCCTCACCAAGGGAGGTCGGCTGTTACATGTAACATTTCTACCTAAAAATACAAAAATTAGCTGGGTGTGGTGGTGCACACCTGTAATGGGATTACAGGTGCCCGCCACCACGCCTGGCTAATTTTTTTTT 43 | >d1 44 | CGCTGTGGGCTGCTCCTGGCATCACTCTCACCCAGCTTTTGTGTGAAAAACACAGCATTTTTGGCCAGGTGTGGAG 45 | >d2 46 | CCCCATGCGGGAGTGTGTGCCTGAGGACCCATGTGTAGCCCCAGCAGGCCTCGCCCACTGCACTGGTAGC 47 | >dd1 48 | ACGCCGCTGTGGGCTGCTCCTGGCATCACTCTCACCCAGCTTTTGTGTGAAAAACACAGCATTTTTGGCCAGGTGTGGAGGCTCACGCCTGTAAGAGACCCTGTCTTAAAATAATAACTGTATGGCTGGGGGTGGTGGATCA 49 | >r1 50 | GGGTCTGGAAGCTGGAGCATCCCCCCACCCAGGTGGAGCAGGGTCCCCAAGGCACATACACACTCATATTCTGCCCCG 51 | >r2 52 | CAGGGCCGGACAGGAAGGCGCAAGGCTCAGATGGGGCCCAGCCCCAGGCACTTCCGGACTCCTCCAGT 53 | >rr1 54 | TGCCCCCTCCCCAAGGGTCTGGAAGCTGGAGCATCCCCCCACCCAGGTGGAGCAGGGTCCCCAAGGCACATACACACTCATATTCTGCCCCGGAGCGTGAAACCCGTCTCTACTAAAAATACAAAAAATTAGCCGGGCGTGGTGGCGGGTGCCTGTAGTCCC 55 | >q1 56 | TGAGGGCGGGGTCGGGGCTGTGGGGCCAGAGGACGGTGGTGACGCCACTGTGTGCACACCCACGCAGCACCCGAGTC 57 | >q2 58 | CCTTGCCTTTGTGACAAGCTTTGGCCAGCCGCGTCTACTATGGGGGCCTGGGCCGGGGGCAGACGCGGGTCAGCCGCCTCC 59 | >qq1 60 | CCTGAGGGCGGGGTCGGGGCTGTGGGGCCAGAGGACGGTGGTGACGCCACTGTGTGCACACCCACGCAGCACCCGAGTCAAAAACGCACGCACTCCCGCAGCGCACGCATGACTGGTCCCGCCTCCTAGGGCTCCTGGAC 61 | -------------------------------------------------------------------------------- /test/subread-align/data/subfusion_test2.fa: -------------------------------------------------------------------------------- 1 | >x1 2 | CCAAGGCAGCAGCTCAGGTGCAGCCACCATGGAGGCAGTTGGATCTAAAATGATACTTAAACTGGTTCAGTCTGATTCTTAAACTGGTTCAGTGGAGCTAGGCCAGGCTC 3 | >x2 4 | GTCTCTCGGGACAAGAAATGCTTCTTTAGAAATGCTTCTTTCCTTTCTAAAATGAAAGTTTTGGACAGTACAATGAAGATTTTGATTCAGTCCGTCTGGGTTGGGGCCTG 5 | >x3 6 | CAGGATGGGAGAAGGTGATAACAGCTGAGCTGGGTGTCCAGAAGCCAGCCCCACCTGTCGGGTGCTGCTGAGGATGGTGACTGAGTGAATCAT 7 | >x4 8 | CACTGAGGGGCCCTCTGAGGCCCCTACTCCCAAGGCAGCAGCTCAGGTGCAGCCACCATGGAGGCAGTTT 9 | >x5 10 | GTGATTTGAACATCTCGGTATGGATCTGTGTCGGGTGCTGCTGAGGATGGTGACTGAGTGAATCATGGGCGGTGGGAACAGGCAGCAGGAGG 11 | >x6 12 | GACCGTGGAGGGGTGCAGTTGGGAGAAAGGACCGGGCGGAGGGTCCCACTCCACACAGTGGACCCAGTGGGCCACTTAGAGTTGCCTGG 13 | >y1 14 | TTGCCCATATCACACAGCTTATAGAGCTGCAACCACCATAACTGAAGCATCAAAAGTTCCTCCCTCCCCCATCCCTCCACCCCCTGTTCACCCACCCATTCA 15 | >y2 16 | CCTGGGTGGGGACTGGGCTGTGCCCAGGGCCTCTGTCCCCATTAGGTAAACAGACCCCAGCTCCAGCCACAGGCTTGGACCGGCC 17 | >y3 18 | CCCAGAGCTGGCTCAGCGGAGGTGCCAGCCCAGCCTTGAGAACTCCCTTCGCTCCACACGCAGCGGAGGCAGCTGCAGGGCGCCGACC 19 | >y4 20 | GACTCCTTTCAATCAAAAATGAAAAAATCCTGCAAGAAAAAAATAGAGAAGCCTCTGCACGTGACCCT 21 | >z1 22 | GGGCCCAGGGCTGCTGTCCAGTCCCGCCGGCCCGGGCGTCCACGAGGTCCCTTTTACATCTGTACAGCAGTTTAGGATATGCTCCTGGGTGGGGACTGGGCTGTGCCCAGGGCCTCTGTCCCCATTAGGTAAACAGACCCCAGCTCCAGCCACAGGCTTGGACCGGCC 23 | >z2 24 | TTGGGGAGAAGACAAGTGACTTCTGTTTCTAAGTAGGCACTCCAAGCAAGCTTGGGATGGCTTAACCAGACTCCTTTCAATCAAAAATGAAAAAATCCTGCAAGAAAAAAATAGAGAAGCCTCTGCACGTGACCCT 25 | >a1 26 | GGCACCGTGCTGTGCGATAGCTCAGGCATCGGGTGCTTCAGCTGTTGCAAGTTCCGGATACCAATCATTTTTTA 27 | >a2 28 | GATGAAGTGGTGAAGGCCTGGTTTCCACCGAAGCTCTCACAGCCCAGCATTTCCTTCCTGATCAGCTCTGTCCAGCAGCAACAATAATCCACGTAGA 29 | >a3 30 | CATAGGGTTCAGTGCATGTGAGTATACACCGGGCTTGATTCCTGCCCCCGAGTGGAGGTCTGACCTGCCCCACTCTGGAGGGCTGATGCTCTCTGG 31 | >a4 32 | GACTCCTCCGACTCTGCACCCTCTGTGGGGAGGGGGCCACCTGGGGAACCTCTGTCTTCAGGTCACCCCTTTTCA 33 | >b1 34 | CTTCAAGCCACACACAGCTGTTGCAAGTTCCGGATACCAATCATTTTTTACAAAATAAATTGCTATTTGCGGAGATGAAGTGGTGAAGGCCTGGTTTCCACCGAAGCTCTCACAGCCCAGCATTTCCTTCCTGATCAGCTCTGTCCAGCAGCAACAATAATCCACGTAGA 35 | >b2 36 | CAAAACGGAGCGGACGTGAACCAAAGAGACAGCCGGGGCCGGGCGCCCCTGCACCACGCCCGCTGGACTCCTCCGACTCTGCACCCTCTGTGGGGAGGGGGCCACCTGGGGAACCTCTGTCTTCAGGTCACCCCTTTTCAGGGGCCTGGGTTGCTCAT 37 | >p1 38 | AAAATACAAAAATTAGCTGGGTGTGGTGGTGCACACCTGTAATGGGATTACAGGTGCCCGCCACCACGCCTGGCTAATTTTTTT 39 | >p2 40 | CTTCTGCCGAAGGCTTGTGAGGGGCCTGAGTCCTCTTGCGGGGGGCCTGTCTCGGTGGGGAGTCACTCCTGCTCCGGGAGCGGCT 41 | >pp1 42 | TCCAAACGGTACCACCAGGAGTGGGGCCCCCTAGTCCCCAAAACCAAGCCTGCCCTTCTGCCGAAGGCTTGTGAGGGGCCTGAGTCCTCTTGCGGGGGGCCTGTCTCGGTGGGGAGTCACTCCTGCTCCGGGAGCGGCTCTGCGACTTGG 43 | >d1 44 | AAAACACAGCATTTTTGGCCAGGTGTGGAGGCTCACGCCTGTAAGAGACCCTGTCTTAAAATAATAACTGTATGGCTGGGGGTGGT 45 | >d2 46 | CACTGGTAGCTATGAGGGGCCGGGCGGCAGAACACATCTGTTCCCTGGTCCTTAGGGACCGTCACCTTCAG 47 | >dd1 48 | AGGTGCGTGCATGCCCCATGCGGGAGTGTGTGCCTGAGGACCCATGTGTAGCCCCAGCAGGCCTCGCCCACTGCACTGGTAGCTATGAGGGGCCGGGCGGCAGAACACATCTGTTCCCTGGTCCTTAGGGACCGTCACCTTCAGTC 49 | >r1 50 | GTCCCCAAGGCACATACACACTCATATTCTGCCCCGGAGCGTGAAACCCGTCTCTACTAAAAATACAAAAAATTAGCCG 51 | >r2 52 | GCCCAGCCCCAGGCACTTCCGGACTCCTCCAGTGTTTCGCCTCTCGGAAAGAGATGTTCACGTCCCAGTGGGTGT 53 | >rr1 54 | GTATCATGAAGAGAAGGTACTTGGACCAGGGCCGGACAGGAAGGCGCAAGGCTCAGATGGGGCCCAGCCCCAGGCACTTCCGGACTCCTCCAGTGTTTCGCCTCTCGGAAAGAGATGTTCACGTCCCAGTGGGTGTGGACTCTGCAGG 55 | >q1 56 | CACTGTGTGCACACCCACGCAGCACCCGAGTCAAAAACGCACGCACTCCCGCAGCGCACGCATGACTGGTCCCGCC 57 | >q2 58 | GACGCGGGTCAGCCGCCTCCCGGGCCGCGCGGGAAGCAGGGCCTGGCTCAGCACCGGGAGGGCGCCGCCC 59 | >qq1 60 | CCTTGCCTTTGTGACAAGCTTTGGCCAGCCGCGTCTACTATGGGGGCCTGGGCCGGGGGCAGACGCGGGTCAGCCGCCTCCCGGGCCGCGCGGGAAGCAGGGCCTGGCTCAGCACCGGGAGGGCGCCGCCCCAGCTGCCGCCCCGTCCTT 61 | -------------------------------------------------------------------------------- /test/subread-align/data/test-err-mut-r1.fq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShiLab-Bioinformatics/subread/55dc154f6e5a6813998d2f81039ad0e906bd2c02/test/subread-align/data/test-err-mut-r1.fq.gz -------------------------------------------------------------------------------- /test/subread-align/data/test-err-mut-r2.fq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ShiLab-Bioinformatics/subread/55dc154f6e5a6813998d2f81039ad0e906bd2c02/test/subread-align/data/test-err-mut-r2.fq.gz -------------------------------------------------------------------------------- /test/subread-align/readname_cigar_match.py: -------------------------------------------------------------------------------- 1 | from sys import stdin 2 | 3 | def cigar_method(cigar, method): 4 | ret = 0; 5 | tmp_ret = 0; 6 | for cc in cigar: 7 | if cc.isdigit(): 8 | tmp_ret = tmp_ret*10+int(cc) 9 | else: 10 | if cc==method: 11 | ret += tmp_ret 12 | tmp_ret=0 13 | return ret 14 | 15 | all_r = 0 16 | unmapped = 0 17 | error_pos = 0 18 | error_cig = 0 19 | correct_indel = 0 20 | while True: 21 | fl = stdin.readline() 22 | if not fl: break 23 | fl=fl.strip() 24 | fls=fl.split('\t') 25 | if len(fls)<5: continue 26 | 27 | read_name = fls[0] 28 | chro = fls[2] 29 | chro_pos = int(fls[3]) 30 | cigar = fls[5] 31 | 32 | # R_chr12_110788490_49M1I50M 33 | 34 | oracle_info = read_name.split('_') 35 | oracle_cigar = oracle_info[-1] 36 | oracle_pos = int(oracle_info[-2]) 37 | oracle_chro = '_'.join(oracle_info[1:-2]) 38 | oracle_is_reverse = oracle_info[0]=='R' 39 | 40 | is_correct = True 41 | is_cigar_problem = False 42 | 43 | all_r +=1 44 | 45 | if len(chro)<2: 46 | unmapped+=1 47 | continue 48 | 49 | if chro != oracle_chro : is_correct = False 50 | 51 | if is_correct: 52 | if abs(oracle_pos - chro_pos) > 5: is_correct = False 53 | 54 | if is_correct: 55 | if cigar_method(oracle_cigar,'I') != cigar_method(cigar,'I'): 56 | is_correct = False 57 | is_cigar_problem = True 58 | if cigar_method(oracle_cigar,'D') != cigar_method(cigar,'D'): 59 | is_correct = False 60 | is_cigar_problem = True 61 | if cigar_method(oracle_cigar,'N') != cigar_method(cigar,'N'): 62 | is_correct = False 63 | is_cigar_problem = True 64 | 65 | if cigar_method(oracle_cigar,'I')>0 or cigar_method(oracle_cigar,'D')>0: 66 | if is_correct: 67 | correct_indel+=1 68 | 69 | if not is_correct: 70 | if is_cigar_problem: 71 | print "%s\t%s"%(cigar, oracle_cigar) 72 | error_cig +=1 73 | else: 74 | error_pos +=1 75 | print "%s,%d\t%s,%d\t\t%s"%(chro, chro_pos, oracle_chro, oracle_pos, read_name) 76 | 77 | print "RES: %d reads\t%d unmapped\t%d error_pos\t%d error_cigar\t%d correct_indel"%(all_r,unmapped, error_pos, error_cig, correct_indel) 78 | -------------------------------------------------------------------------------- /test/subread-align/readname_ora_match.py: -------------------------------------------------------------------------------- 1 | from sys import stdin 2 | 3 | def main(): 4 | unmatched = 0 5 | unmapped= 0 6 | matched = 0 7 | paired_match =0 8 | NN = 0 9 | 10 | line = 0 11 | old_read_name = None 12 | old_pos = 0 13 | 14 | while True: 15 | fl = stdin.readline() 16 | if not fl: break 17 | if fl[0]=='@':continue 18 | linfo = fl.split('\t') 19 | read_name = linfo[0] 20 | if len(linfo[2])>30: 21 | sam_chro = linfo[7] 22 | sam_offset = int(linfo[8]) 23 | elif linfo[3]=='+' or linfo[3]=='-': 24 | sam_chro = linfo[1] 25 | sam_offset = int(linfo[2]) 26 | else: 27 | sam_chro = linfo[2] 28 | sam_offset = int(linfo[3]) 29 | line +=1 30 | 31 | if line % 100000 == 0: 32 | print "L=",line 33 | 34 | if fl.find("NNNNNNNN")>0: 35 | NN+=1 36 | continue 37 | 38 | if len(sam_chro)<2: 39 | unmapped +=1 40 | continue 41 | 42 | if read_name.find ('/')>0: 43 | name_info = read_name.split('/') 44 | pair_number = int(name_info[1]) 45 | 46 | if read_name.find ('/')>0: 47 | name_info = read_name.rsplit('_',5) 48 | else: 49 | name_info = read_name.rsplit('_',5) 50 | 51 | if len(name_info)<3: continue 52 | 53 | if name_info[2].count(':')>0: 54 | ora_chro = sam_chro 55 | ora_pos1 = int(name_info[1]) 56 | ora_pos2 = int(name_info[1]) 57 | else: 58 | ora_chro = name_info[0] 59 | ora_pos1 = int(name_info[1]) 60 | ora_pos2 = int(name_info[2]) 61 | 62 | t_name = ora_chro+name_info[1]+name_info[2] 63 | if t_name != old_read_name: 64 | old_pos = sam_offset 65 | 66 | if (not sam_chro.endswith( ora_chro)) or not (abs(ora_pos1 - sam_offset) < 1200 or abs(ora_pos2 - sam_offset)<1200): 67 | unmatched +=1 68 | # if sam_chro != ora_chro: 69 | # print read_name,"\t", sam_chro, ora_chro, ora_pos1 , sam_offset, ora_pos2, fl.strip() 70 | else: 71 | matched +=1 72 | if t_name == old_read_name and (abs(old_pos - ora_pos1) < 1200): 73 | paired_match +=2 74 | # else: 75 | # print "Unpired_match: old=", old_pos," new1", sam_chro, sam_offset, "ora1", ora_chro, ora_pos1 76 | 77 | old_read_name = t_name 78 | 79 | print "unmatched=",unmatched, "; matched=",matched, "; unmapped=", unmapped, "; reads=", line, " ;NN=", NN 80 | print "accuracy=",(matched*1.)/(matched+unmatched)," ; sensitivity=",(matched+unmatched)*1./(line-NN) 81 | print "paired_match=", paired_match," ; paired=", (paired_match*1./(matched)) 82 | 83 | main() 84 | -------------------------------------------------------------------------------- /test/subread-align/subread-align-test.sh: -------------------------------------------------------------------------------- 1 | SUBREAD_HOME=../../bin/ 2 | PYTHON_EXEC=python 3 | 4 | rm test-tmp.log 5 | mkdir -p result 6 | 7 | $SUBREAD_HOME/subread-buildindex -B -F -o ../small1 -M100 ../chr901.fa 8 | md5sum ../small1.00.b.array >> test-tmp.log 9 | md5sum ../small1.00.b.tab >> test-tmp.log 10 | 11 | echo "*************************************************" >> test-tmp.log 12 | echo "*** SINGLE-END READS NO ERROR ******" >> test-tmp.log 13 | echo "*************************************************" >> test-tmp.log 14 | echo >>test-tmp.log 15 | 16 | $SUBREAD_HOME/subread-align --SAMoutput -t0 -P6 -i ../small1 -r data/test-noerror-r1.fq -o result/test-tmp.sam -H -J 17 | cat result/test-tmp.sam | $PYTHON_EXEC readname_ora_match.py >>test-tmp.log 18 | 19 | 20 | echo "*************************************************" >> test-tmp.log 21 | echo "*** SINGLE-END READS NO ERROR NO DUP ******" >> test-tmp.log 22 | echo "*************************************************" >> test-tmp.log 23 | echo >>test-tmp.log 24 | 25 | $SUBREAD_HOME/subread-align --SAMoutput -t0 -P6 -i ../small1 -r data/test-noerror-r1.fq -o result/test-tmp.sam -H -J 26 | cat result/test-tmp.sam | $PYTHON_EXEC readname_ora_match.py >>test-tmp.log 27 | 28 | 29 | 30 | echo >>test-tmp.log 31 | echo "*************************************************" >> test-tmp.log 32 | echo "*** READS WITH NO ERROR ******" >> test-tmp.log 33 | echo "*************************************************" >> test-tmp.log 34 | echo >>test-tmp.log 35 | 36 | $SUBREAD_HOME/subread-align --SAMoutput -t0 -P6 -i ../small1 -r data/test-noerror-r1.fq -R data/test-noerror-r2.fq -o result/test-tmp.sam -H -J 37 | cat result/test-tmp.sam | $PYTHON_EXEC readname_ora_match.py >> test-tmp.log 38 | echo >>test-tmp.log 39 | 40 | echo "*************************************************" >> test-tmp.log 41 | echo "*** READS NO ERROR, NO DUPLICATED REPORT ******" >> test-tmp.log 42 | echo "*************************************************" >> test-tmp.log 43 | echo >>test-tmp.log 44 | 45 | $SUBREAD_HOME/subread-align --SAMoutput -t0 -P6 -i ../small1 -r data/test-noerror-r1.fq -R data/test-noerror-r2.fq -o result/test-tmp.sam -Q -J 46 | cat result/test-tmp.sam | $PYTHON_EXEC readname_ora_match.py >>test-tmp.log 47 | 48 | 49 | echo >>test-tmp.log 50 | 51 | echo "*************************************************" >> test-tmp.log 52 | echo "*** READS WITH ONLY SEQUENCING ERROR ******" >> test-tmp.log 53 | echo "*************************************************" >> test-tmp.log 54 | echo >>test-tmp.log 55 | 56 | $SUBREAD_HOME/subread-align --SAMoutput -t0 -P6 -i ../small1 -r data/test-error-r1.fq -R data/test-error-r2.fq -o result/test-tmp.sam -H -J 57 | cat result/test-tmp.sam | $PYTHON_EXEC readname_ora_match.py >>test-tmp.log 58 | 59 | echo >>test-tmp.log 60 | echo "*************************************************" >> test-tmp.log 61 | echo "*** READS WITH SEQUENCING ERROR AND MUTATION ***" >> test-tmp.log 62 | echo "*** SUBREAD IS RUN WITH LONG INDEL DETECTION ***" >> test-tmp.log 63 | echo "*************************************************" >> test-tmp.log 64 | echo >>test-tmp.log 65 | 66 | $SUBREAD_HOME/subread-align --SAMoutput -t0 -P6 -i ../small1 --gzFASTQinput -r data/test-err-mut-r1.fq.gz -R data/test-err-mut-r2.fq.gz -o result/test-tmp.sam -H -J --rg-id MyTestGroup --rg SM:sample1 --rg TP:1 --rg XX:YY 67 | cat result/test-tmp.sam | $PYTHON_EXEC readname_ora_match.py >>test-tmp.log 68 | 69 | cat test-tmp.log 70 | -------------------------------------------------------------------------------- /test/test_all.sh: -------------------------------------------------------------------------------- 1 | #!bash 2 | echo |awk '{printf("%c[2J%c[0;0H%c[30;47m", 27,27,27)}' 3 | echo 4 | echo 5 | echo 6 | echo 7 | echo " ************************************************** " 8 | echo " ************************************************** " 9 | echo " *** *** " 10 | echo " *** This script will test the major functions *** " 11 | echo " *** in our package, including the index build- *** " 12 | echo " *** er, subread-align, subjunc, featureCounts *** " 13 | echo " *** and exactSNP. *** " 14 | echo " *** *** " 15 | echo " *** Test will start in seconds. *** " 16 | echo " *** *** " 17 | echo " ************************************************** " 18 | echo " ************************************************** " 19 | echo 20 | 21 | echo |awk -v secs=9 '{printf("%c[13;26H%c[33;44m%s",27,27,secs)}' 22 | for secs in {0..8} 23 | do 24 | sleep 1 25 | sec0=` echo 8-$secs | bc ` 26 | echo |awk -v secs=$sec0 '{printf("%c[13;26H%s",27,secs)}' 27 | done 28 | 29 | echo |awk '{printf("%c[0m%c[2J%c[0,0H",27,27, 27)}' 30 | #test subread-align 31 | cd subread-align 32 | sh subread-align-test.sh 33 | 34 | #test subjunc 35 | cd ../subjunc 36 | sh subjunc-test.sh 37 | 38 | #test featureCounts 39 | cd ../featureCounts 40 | sh featureCounts-test.sh 41 | 42 | #test exactSNP 43 | cd ../exactSNP 44 | sh exactSNP-test.sh 45 | 46 | echo |awk '{printf("%c[30;47m", 27)}' 47 | echo 48 | echo 49 | echo " ************************************************** " 50 | echo " ************************************************** " 51 | echo " *** *** " 52 | echo " *** Test finished. *** " 53 | echo " *** *** " 54 | echo " *** Should there be any error, please visit *** " 55 | echo | awk '{printf(" *** %c[34mhttp://subread.sourceforge.net/%c[0;30;47m for more *** \n", 27,27);}' 56 | echo " *** information. *** " 57 | echo " *** *** " 58 | echo " ************************************************** " 59 | echo " ************************************************** " 60 | echo 61 | echo |awk '{printf("%c[0m", 27)}' 62 | echo 63 | echo 64 | -------------------------------------------------------------------------------- /tutorial/Aligning.md: -------------------------------------------------------------------------------- 1 | # Tutorial of using the Subread aligner for read mapping 2 | 3 | Rsubread provides a hughly accurate and efficient read aligner called ``subread-align``. This tutorial demonstrates a workflow for mapping HTS (high-thoughtput sequencing) data using our read aligner. 4 | 5 | ## Prerequisites 6 | 7 | You need an R environment installed on your computer. It can be a Windows PC, a Linux server or a macOS computer, but it must be a 64-bit system. A computer built in or after 2012 is nearly certain a 64-bit system. Your computer also needs at least 100GB of disk space and 16GB of RAM. 8 | 9 | If you are using Windows or macOS, R studio is recommended for that it has everything ready for you installing our package. If you run Linux, you need to have a modern version of gcc installed with libz-devel. 10 | 11 | ## Installing Rsubread 12 | 13 | After entrining the R environment, you can enter commands. The commands for installing the latest version of R package is: 14 | 15 | ```R 16 | if (!requireNamespace("BiocManager", quietly=TRUE)) 17 | install.packages("BiocManager") 18 | 19 | BiocManager::install("Rsubread") 20 | ``` 21 | You will be asked a few questions about where to download the related files and (sometime) whether you want to upgrade related packages. After you select the best server for downloading and ugrading "all" related packages, it takes a while to get everything ready. No error should be reported in all the installation process but it is not uncommon to have some errors. You may report the error through [the supporting site of Bioconductor](https://support.bioconductor.org/). 22 | 23 | After successful installation of Rsubread, you will be able to load the package into R: 24 | 25 | ```R 26 | library(Rsubread) 27 | sessionInfo() 28 | ``` 29 | You will be able to see Rsubread in the output from sessionInfo(). 30 | 31 | ## Downloading the data 32 | We provide a set of publically available data. 33 | 34 | 35 | ## Building an index 36 | Read aligners need to pre-process the reference genome sequences to efficiently map reads to them. This step is usually called index building. A function is provided in Rsubread for building an index on the given reference genome. 37 | ```R 38 | buildindex("hg38-index", "hg38.fasta.gz", indexSplit=FALSE, gappedIndex=TRUE) 39 | ``` 40 | 41 | If your computer has 32GB or more memory, you can set ``gappedIndex=FALSE`` to build a full index. A full index uses around 3 times more memory than a gapped index, but can largely accelerate read mapping and deliver higher mapping quality. 42 | 43 | ## Read mapping 44 | The Rsubread package provides two functions for read mapping: ``align()`` and ``subjunc()``. The ``align()`` function can map both DNA-seq and RNA-seq data while the ``subjunc()`` function is dedicated for mapping RNA-seq reads, but with all exons detected in reads that contain exon-exon junctions. 45 | 46 | Although it only maps at most one exon in each read, the ``align()`` function is usually good enough for gene Differential-Expression (DE) analyses. The ``subjunc()`` function is for specific analyses that involve calling of known/de novo exon-exon junctions. 47 | 48 | ```R 49 | align.sum <- align( 50 | "hg38-index", "SEQC-A_R1.fastq.gz", readfile2="SEQC-A_R2.fastq.gz", output_file="SEQC-A-align.bam", 51 | nthreads=4, useAnnotation=TRUE, annot.inbuilt="hg38" 52 | ) 53 | 54 | junc.sum <- subjunc( 55 | "hg38-index", "SEQC-A_R1.fastq.gz", readfile2="SEQC-A_R2.fastq.gz", output_file="SEQC-A-subjunc.bam", 56 | nthreads=4, useAnnotation=TRUE, annot.inbuilt="hg38" 57 | ) 58 | 59 | print(align.sum) 60 | print(junc.sum) 61 | ``` 62 | The summary of mapping will be printed on screen, and you can find two new files named ``SEQC-A-align.bam`` and ``SEQC-A-subjunc.bam`` in the current working directory. The files contain the mapping results of the SEQC-A sample. You can also find some other files with names that have the same prefixes: the ``index.vcf`` files give the short insertions and deletions (indels) detected in the samples, and the ``junction.bed`` file gives the list of exon-exon junction points called in reads, with the numbers of reads supporting each junction. 63 | 64 | ## Next step 65 | The next step after you have the mapping results depends on the purpose of your study. If you are calling the differential expression genes, you may proceed to our tutorial for counting reads using featureCounts. 66 | --------------------------------------------------------------------------------