├── .gitignore ├── LICENSE.txt ├── Makefile ├── README.md ├── demo ├── abra_demo.bam ├── abra_demo.bam.bai ├── demo.bash └── demo.bed ├── docker └── Dockerfile ├── lib └── libIntelDeflater.so ├── misc └── example.png ├── paper └── paper.txt ├── pom.xml ├── poster ├── AACR17_ABRA2.pdf └── AGBT14_ABRA.pdf ├── src ├── main │ ├── c │ │ ├── Makefile │ │ ├── abra_NativeAssembler.h │ │ ├── abra_NativeSemiGlobalAligner.h │ │ ├── assembler.cpp │ │ ├── sg_aligner.cpp │ │ ├── sparsehash │ │ │ ├── dense_hash_map │ │ │ ├── dense_hash_set │ │ │ ├── internal │ │ │ │ ├── densehashtable.h │ │ │ │ ├── hashtable-common.h │ │ │ │ ├── libc_allocator_with_realloc.h │ │ │ │ ├── sparseconfig.h │ │ │ │ └── sparsehashtable.h │ │ │ ├── sparse_hash_map │ │ │ ├── sparse_hash_set │ │ │ ├── sparsetable │ │ │ ├── template_util.h │ │ │ └── type_traits.h │ │ ├── test.c │ │ └── test2.c │ └── java │ │ ├── abra │ │ ├── Abra.java │ │ ├── AbraRunnable.java │ │ ├── AltContigGenerator.java │ │ ├── AssemblerSettings.java │ │ ├── ChromosomeChunker.java │ │ ├── ChromosomeRegex.java │ │ ├── CigarUtils.java │ │ ├── Clock.java │ │ ├── CompareToReference2.java │ │ ├── ConsensusSequence.java │ │ ├── ContigAligner.java │ │ ├── DownsampledReadList.java │ │ ├── Feature.java │ │ ├── IndelShifter.java │ │ ├── JunctionUtils.java │ │ ├── KmerSizeEvaluator.java │ │ ├── Logger.java │ │ ├── MultiSamReader.java │ │ ├── NativeAssembler.java │ │ ├── NativeLibraryLoader.java │ │ ├── NativeSemiGlobalAligner.java │ │ ├── Options.java │ │ ├── Pair.java │ │ ├── ReAligner.java │ │ ├── ReAlignerOptions.java │ │ ├── ReAlignerRunnable.java │ │ ├── ReadEvaluator.java │ │ ├── ReadPair.java │ │ ├── RealignmentWriter.java │ │ ├── ReferenceEvaluator.java │ │ ├── RegionLoader.java │ │ ├── ReverseComplementor.java │ │ ├── SAMRecordUtils.java │ │ ├── SAMRecordWrapper.java │ │ ├── ScoredContig.java │ │ ├── SemiGlobalAligner.java │ │ ├── Sequence.java │ │ ├── SimpleMapper.java │ │ ├── SortedSAMWriter.java │ │ ├── SortedSAMWriterRunnable.java │ │ ├── SortingSAMRecordCollection.java │ │ ├── ThreadManager.java │ │ ├── Variant.java │ │ └── cadabra │ │ │ ├── Allele.java │ │ │ ├── AlleleCounts.java │ │ │ ├── BetaBinomial.java │ │ │ ├── Cadabra.java │ │ │ ├── CadabraOptions.java │ │ │ ├── CadabraProcessor.java │ │ │ ├── CadabraRunnable.java │ │ │ ├── FishersExactTest.java │ │ │ ├── ForwardShiftInsertIterator.java │ │ │ ├── HomopolymerRun.java │ │ │ ├── IndelInfo.java │ │ │ ├── ReadLocusReader.java │ │ │ ├── ReadsAtLocus.java │ │ │ ├── RepeatUtils.java │ │ │ ├── SimpleAlleleCounter.java │ │ │ ├── SimpleCaller.java │ │ │ ├── SomaticLocusCaller.java │ │ │ └── SpliceJunctionCounter.java │ │ └── htsjdk │ │ └── samtools │ │ └── util │ │ └── SortingCollection2.java ├── python │ ├── assign_unmapped_pos.py │ └── filter_intron_adjacent_indels.py └── test │ └── java │ └── abra │ ├── ChromosomeRegexTest.java │ ├── CigarUtilsTest.java │ ├── FeatureTest.java │ ├── IndelShifterTest.java │ ├── JunctionUtilsTest.java │ ├── MultiSamReaderTest.java │ ├── ReAlignerOptionsTest.java │ ├── ReadEvaluatorTest.java │ ├── RealignerTest.java │ ├── ReverseComplementorTest.java │ ├── SAMRecordUtilsTest.java │ ├── ScoredContigTest.java │ ├── SemiGlobalAlignerTest.java │ ├── SimpleMapperTest.java │ └── cadabra │ ├── HomopolymerRunTest.java │ └── RepeatUtilsTest.java ├── temp ├── abra-0.94b.jar ├── abra-0.94c.jar └── temp.txt ├── test-data ├── annotation1.gtf ├── junctions1.tab ├── sample1.bam ├── sample1.bam.bai ├── sample2.bam ├── sample2.bam.bai └── test.fa └── test2 /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | 3 | # Package Files # 4 | *.jar 5 | *.war 6 | *.ear 7 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2012-2014 Lineberger Comprehensive Cancer Center, University of North Carolina at Chapel Hill 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Make file for ABRA 2 | # libAbra is invoked from the ABRA java code 3 | 4 | SRCDIR=src/main/c 5 | 6 | all: clean native java 7 | 8 | java: 9 | mvn package 10 | 11 | mktargetdir: 12 | mkdir target 13 | 14 | native: mktargetdir 15 | g++ -g -O2 -I$(SRCDIR) -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/linux -shared -fPIC $(SRCDIR)/assembler.cpp $(SRCDIR)/sg_aligner.cpp -o target/libAbra.so 16 | 17 | standalone: 18 | g++ -g -I$(SRCDIR) -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/linux $(SRCDIR)/assembler.c -o abra 19 | 20 | sga: 21 | g++ -g -O2 -I$(SRCDIR) -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/linux $(SRCDIR)/sg_aligner.cpp -o sga 22 | 23 | clean: 24 | rm -rf target 25 | mvn clean 26 | 27 | # TODO: Parameterize version 28 | javah: java 29 | javah -classpath target/abra-0.53-SNAPSHOT.jar abra.NativeAssembler 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ABRA2 2 | 3 | ABRA2 is an updated implementation of [ABRA](https://github.com/mozack/abra) featuring: 4 | * RNA support 5 | * Improved scalability (Human whole genomes now supported) 6 | * Improved accuracy 7 | * Improved stability and usability (BWA is no longer required to run ABRA although we do recommend BWA as the initial aligner for DNA) 8 | 9 | Manuscript: https://doi.org/10.1093/bioinformatics/btz033 10 | 11 | ## Running 12 | 13 | ABRA2 requires Java 8. 14 | 15 | We recommend running from a pre-compiled release. 16 | Go to the Releases tab to download a recent version. 17 | 18 | ### DNA 19 | 20 | Sample command for DNA: 21 | 22 | ```java -Xmx16G -jar abra2.jar --in normal.bam,tumor.bam --out normal.abra.bam,tumor.abra.bam --ref hg38.fa --threads 8 --targets targets.bed --tmpdir /your/tmpdir > abra.log``` 23 | 24 | The above accepts normal.bam and tumor.bam as input and outputs sorted realigned BAM files named normal.abra.bam and tumor.abra.bam 25 | 26 | * Input files must be sorted by coordinate and index 27 | * Output files are sorted 28 | * The tmpdir may grow large. Be sure you have sufficient space there (at least equal to the input file size) 29 | * The targets argument is not required. When omitted, the entire genome will be eligible for realignment. 30 | 31 | ### RNA 32 | 33 | ABRA2 is capable of utilizing junction information to aid in assembly and realignment. It has been tested only on STAR output to date. 34 | 35 | Sample command for RNA: 36 | 37 | ```java -Xmx16G -jar abra2.jar --in star.bam --out star.abra.bam --ref hg38.fa --junctions bam --threads 8 --gtf gencode.v26.annotation.gtf --dist 500000 --sua --tmpdir /your/tmpdir > abra2.log 2>&1``` 38 | 39 | Here, star.bam is the input bam file and star.abra.bam is the output bam file. 40 | 41 | Junctions observed during alignment can be passed in using the ```--junctions``` param. The input file format is similar to the SJ.out.tab file output by STAR. If ```bam``` is specified, ABRA2 will dynamically identify splice junctions from the BAM file on the fly. Note that the SJ.out.tab file contains only junctions deemed "high quality" by STAR. The complete set of all splice junctions can be identified using the program ```abra.cadabra.SpliceJunctionCounter``` 42 | 43 | Annotated junctions can be passed in using the ```--gtf``` param. See: https://www.gencodegenes.org/releases/current.html 44 | It is beneficial to use both of the junction related options. 45 | 46 | Known indels can be passed in using the --in-vcf argument. Unannotated junctions originally identified as splices by the aligner may be converted to deletions if a known deletion is matched. Consider this option if you have indels detected from DNA for the same sample / subject. It is not recommended to use large datasets when using this option (i.e. don't pass in dbSNP). 47 | 48 | 49 | -------------------------------------------------------------------------------- /demo/abra_demo.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozack/abra2/39e6297e578d0407fd687958c32cadc2dfc5845d/demo/abra_demo.bam -------------------------------------------------------------------------------- /demo/abra_demo.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozack/abra2/39e6297e578d0407fd687958c32cadc2dfc5845d/demo/abra_demo.bam.bai -------------------------------------------------------------------------------- /demo/demo.bash: -------------------------------------------------------------------------------- 1 | # Demonstration command line for ABRA. 2 | # a recent version of java must be in your command path 3 | # Assumes your current working directory is the demo directory 4 | 5 | # Path to ABRA jar file (with dependencies) 6 | # If you've downloaded the jar, set this to the appropriate location 7 | # Compiled jars will be under the target directory 8 | JAR=../target/abra2-*-jar-with-dependencies.jar 9 | 10 | # Path to hg19 reference 11 | #REF=/datastore/rclbg/nextgenout3/MOSE_TEST/abra/brca/ref/GRCh37-lite.fa 12 | REF= 13 | 14 | echo "ABRA demo starting..." 15 | 16 | java -Xmx4G -jar $JAR --ref $REF --in abra_demo.bam --out abra_demo_realigned.bam --targets demo.bed > abra_demo.log 2>&1 17 | 18 | echo "ABRA demo done. Realigned BAM: abra_demo_realigned.bam." -------------------------------------------------------------------------------- /demo/demo.bed: -------------------------------------------------------------------------------- 1 | 10 3208376 3208616 2 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | LABEL MAINTAINER "Lisle Mose " 3 | LABEL MAINTAINER "Alan Hoyle " 4 | 5 | RUN apk -U add \ 6 | libc6-compat \ 7 | openjdk8 8 | 9 | ARG ABRA2_VERSION=2.20 10 | ENV ABRA2_VERSION ${ABRA2_VERSION} 11 | ENV JAVA_OPTS "-Xmx16G" 12 | 13 | ADD https://github.com/mozack/abra2/releases/download/v${ABRA2_VERSION}/abra2-${ABRA2_VERSION}.jar / 14 | 15 | RUN chmod 755 /abra2-${ABRA2_VERSION}.jar && \ 16 | ln -s /abra2-${ABRA2_VERSION}.jar /abra2.jar 17 | 18 | ENTRYPOINT [ "java", "-jar", "/abra2.jar" ] 19 | # CMD [ --help ] 20 | -------------------------------------------------------------------------------- /lib/libIntelDeflater.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozack/abra2/39e6297e578d0407fd687958c32cadc2dfc5845d/lib/libIntelDeflater.so -------------------------------------------------------------------------------- /misc/example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozack/abra2/39e6297e578d0407fd687958c32cadc2dfc5845d/misc/example.png -------------------------------------------------------------------------------- /paper/paper.txt: -------------------------------------------------------------------------------- 1 | Software 2 | -------- 3 | FreeBayes version 0.9.9 check out recursively from revision: cc993c5c07e7673 4 | bwa version 0.7.5a-r405 5 | GATK version 2.7-4-g6f46d11 6 | SnpEff version 3.3c 7 | Abra version 0.69 8 | 9 | HapMap Trio 10 | ----------- 11 | Reference: hg19 (hs37d5.fa downloaded from 1000 genomes + alternate haplotypes) 12 | Illumina Platinum genomes 50x CEPH trio fastq files downloaded from: http://www.ebi.ac.uk/ena/data/view/ERP001960 13 | Individuals: NA12891, NA12892, NA12878 14 | 15 | Initial alignments done with bwa mem. 16 | 17 | ABRA command line: 18 | java -Xss8M -Xmx32G -XX:MaxPermSize=256M -jar $JAR --in $BAM --kmer 43,53,63,73,83 --mc-mapq 25 --mcl 102 --mcr -1.0 --mnf 2 --umnf 2 --mpc 50000 --out $ABRA_BAM --ref $REF --targets wxs.gtf --threads 16 --working $WORK --mur 50000000 --paired --no-unalign --mbq 5 19 | 20 | FreeBayes command line: 21 | freebayes -t -= -f 22 | Complex variants are decomposed into primitives using the CIGAR string recorded in the FreeBayes produced VCF. 23 | FreeBayes was run against ABRA output and without realignment 24 | 25 | UnifiedGenotyper command line: 26 | java -Xmx4G -jar GenomeAnalysisTK.jar -R -T UnifiedGenotyper --genotype_likelihoods_model BOTH -I -o -L -rf BadCigar 27 | UnifiedGenotyper was run against output from ABRA, output from GATK's local realignment around indels and without realignment. 28 | 29 | GATK Local Realignment around indels command lines: 30 | java -Xmx2G -jar GenomeAnalysisTK.jar -T RealignerTargetCreator -R -I -o 31 | java -Xmx2G -jar GenomeAnalysisTK.jar -T IndelRealigner -R -I -o --targetIntervals 32 | 33 | HaplotypeCaller command line: 34 | java -Xmx4G -jar GenomeAnalysisTK.jar -R -T HaplotypeCaller -I -o -rf BadCigar -L --bamOutput 35 | HaplotypeCaller was run without realignment 36 | 37 | All samples processed / called independently. 38 | Only variants called within exome capture target regions are considered. 39 | Variants within 100 bases of one another are collapsed into a single event. 40 | SnpEff was applied to filter variants not annotated as HIGH or MODERATE impact. 41 | 42 | TCGA data 43 | --------- 44 | 750 Breast (BRCA) tumor/normal paired BAMs downloaded from the Cancer Genomics Hub: https://cghub.ucsc.edu/ 45 | 46 | ABRA command line: 47 | java -Xss8M -Xmx20G -XX:MaxPermSize=256M -jar $JAR --in normal.bam --in2 tumor.bam --kmer 43,53,63,73,83 --mc-mapq 25 --mcl 101 --mcr -1.0 --mnf 2 --umnf 2 --mpc 50000 --out normal.abra69.bam --out2 tumor.abra69.bam --ref $REF --targets $TARGETS --threads 8 --working working69 --mur 50000000 --paired --no-unalign --mbq 20 48 | 49 | FreeBayes was used for germline calling. Strelka was used for somatic calling. 50 | 51 | Special Strelka options: 52 | isSkipDepthFilters = 1 53 | isWriteRealignedBam = 1 54 | extraStrelkaArguments = -min-small-candidate-indel-read-frac 0.01 --ignore-conflicting-read-names 55 | 56 | For FreeBayes, variants with qual < 30 are filtered. 57 | For Strelka, variants with qual < 30 or normal genotype = 'het' are filtered. 58 | 59 | Only variants called within exome capture target regions are considered. 60 | SnpEff was applied to filter variants not annotated as HIGH or MODERATE impact. 61 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | abra2 7 | abra2 8 | 2.24 9 | jar 10 | 11 | abra 12 | http://maven.apache.org 13 | 14 | 15 | UTF-8 16 | 17 | 18 | 26 | 27 | 28 | 29 | net.sf.jopt-simple 30 | jopt-simple 31 | 4.3 32 | 33 | 34 | commons-lang 35 | commons-lang 36 | 2.4 37 | 38 | 39 | org.testng 40 | testng 41 | 6.8 42 | test 43 | 44 | 45 | com.beust 46 | jcommander 47 | 1.48 48 | 49 | 50 | 51 | org.easymock 52 | easymock 53 | 3.0 54 | test 55 | 56 | 57 | 58 | 65 | 66 | 67 | 68 | com.github.samtools 69 | htsjdk 70 | 2.8.1 71 | 72 | 73 | 74 | 75 | com.intel.gkl 76 | gkl 77 | 0.5.8 78 | 79 | 80 | 81 | 82 | org.apache.commons 83 | commons-math3 84 | 3.2 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | maven-compiler-plugin 94 | 3.1 95 | 96 | 1.6 97 | 1.6 98 | 99 | 100 | 101 | maven-assembly-plugin 102 | 2.3 103 | 104 | 105 | jar-with-dependencies 106 | 107 | 108 | 109 | abra.Abra 110 | 111 | 112 | 113 | 114 | 115 | make-assembly 116 | package 117 | 118 | single 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | target 127 | 128 | libAbra.so 129 | 130 | 131 | 132 | target 133 | 134 | libssw.so 135 | 136 | 137 | 138 | target 139 | 140 | libsswjni.so 141 | 142 | 143 | 144 | 145 | 146 | -------------------------------------------------------------------------------- /poster/AACR17_ABRA2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozack/abra2/39e6297e578d0407fd687958c32cadc2dfc5845d/poster/AACR17_ABRA2.pdf -------------------------------------------------------------------------------- /poster/AGBT14_ABRA.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozack/abra2/39e6297e578d0407fd687958c32cadc2dfc5845d/poster/AGBT14_ABRA.pdf -------------------------------------------------------------------------------- /src/main/c/Makefile: -------------------------------------------------------------------------------- 1 | # Make file for ABRA 2 | # libAbra is invoked from the ABRA java code 3 | 4 | all: 5 | g++ -g -I. -I$(JAVA_HOME)/include -shared assembler.c -o libAbra.so 6 | # g++ -g -I. -I/usr/lib/jvm/java-6-openjdk/include -shared assembler.c -o libAbra.so 7 | 8 | standalone: 9 | g++ -g -I. -I$(JAVA_HOME)/include assembler.c -o abra 10 | 11 | clean: 12 | rm libAbra.so 13 | -------------------------------------------------------------------------------- /src/main/c/abra_NativeAssembler.h: -------------------------------------------------------------------------------- 1 | /* DO NOT EDIT THIS FILE - it is machine generated */ 2 | #include 3 | /* Header for class abra_NativeAssembler */ 4 | 5 | #ifndef _Included_abra_NativeAssembler 6 | #define _Included_abra_NativeAssembler 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | #undef abra_NativeAssembler_MAX_READ_LENGTHS_PER_REGION 11 | #define abra_NativeAssembler_MAX_READ_LENGTHS_PER_REGION 6L 12 | #undef abra_NativeAssembler_CYCLE_KMER_LENGTH_THRESHOLD 13 | #define abra_NativeAssembler_CYCLE_KMER_LENGTH_THRESHOLD 43L 14 | #undef abra_NativeAssembler_MIN_CANDIDATE_BASE_QUALITY 15 | #define abra_NativeAssembler_MIN_CANDIDATE_BASE_QUALITY 10L 16 | /* 17 | * Class: abra_NativeAssembler 18 | * Method: assemble 19 | * Signature: (Ljava/lang/String;Ljava/lang/String;Ljava/lang/String;IIIIIIIDII)Ljava/lang/String; 20 | */ 21 | JNIEXPORT jstring JNICALL Java_abra_NativeAssembler_assemble 22 | (JNIEnv *, jobject, jstring, jstring, jstring, jint, jint, jint, jint, jint, jint, jint, jdouble, jint, jint); 23 | 24 | #ifdef __cplusplus 25 | } 26 | #endif 27 | #endif 28 | -------------------------------------------------------------------------------- /src/main/c/abra_NativeSemiGlobalAligner.h: -------------------------------------------------------------------------------- 1 | /* DO NOT EDIT THIS FILE - it is machine generated */ 2 | #include 3 | /* Header for class abra_NativeSemiGlobalAligner */ 4 | 5 | #ifndef _Included_abra_NativeSemiGlobalAligner 6 | #define _Included_abra_NativeSemiGlobalAligner 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | /* 11 | * Class: abra_NativeSemiGlobalAligner 12 | * Method: align 13 | * Signature: (Ljava/lang/String;Ljava/lang/String;IIII)Ljava/lang/String; 14 | */ 15 | JNIEXPORT jstring JNICALL Java_abra_NativeSemiGlobalAligner_align 16 | (JNIEnv *, jobject, jstring, jstring, jint, jint, jint, jint); 17 | 18 | #ifdef __cplusplus 19 | } 20 | #endif 21 | #endif 22 | -------------------------------------------------------------------------------- /src/main/c/sg_aligner.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "abra_NativeSemiGlobalAligner.h" 6 | 7 | using namespace std; 8 | 9 | #define DIR_UP 1 10 | #define DIR_DIAG 2 11 | #define DIR_LEFT 3 12 | 13 | #define I 0 14 | #define M 1 15 | #define D 2 16 | 17 | #define MAX_REF_LEN 5000 18 | #define MAX_CONTIG_LEN 2000 19 | 20 | __thread int matrix[MAX_CONTIG_LEN][MAX_REF_LEN][3]; 21 | __thread char bt[MAX_CONTIG_LEN][MAX_REF_LEN][3]; 22 | 23 | __thread int match = 8; 24 | __thread int mismatch_pen = -32; 25 | __thread int gap_open = -48; 26 | __thread int gap_extend = -1; 27 | 28 | void populate(const char* seq1, const char* seq2, int seq1_len, int seq2_len) { 29 | 30 | for (int r=1; r<=seq1_len; r++) { 31 | matrix[r][0][I] = gap_open + (r*gap_extend); 32 | matrix[r][0][M] = gap_open + (r*gap_extend); 33 | matrix[r][0][D] = gap_open + (r*gap_extend); 34 | } 35 | 36 | for (int c=0; c<=seq2_len; c++) { 37 | matrix[0][c][I] = gap_open + (c*gap_extend); 38 | matrix[0][c][M] = 0; 39 | matrix[0][c][D] = gap_open + (c*gap_extend); 40 | } 41 | 42 | for (int r=2; r<=seq1_len; r++) { 43 | bt[r][0][I] = DIR_UP; 44 | bt[r][0][D] = DIR_UP; 45 | bt[r][0][M] = DIR_UP; 46 | } 47 | 48 | for (int r=1; r<=seq1_len; r++) { 49 | for (int c=1; c<=seq2_len; c++) { 50 | 51 | // 52 | // Insertion (lower) matrix 53 | int insertExt = matrix[r-1][c][I] + gap_extend; 54 | int insertOpen = matrix[r-1][c][M] + gap_open; 55 | 56 | if (insertExt >= insertOpen) { 57 | matrix[r][c][I] = insertExt; 58 | bt[r][c][I] = DIR_UP; 59 | } else { 60 | matrix[r][c][I] = insertOpen; 61 | bt[r][c][I] = DIR_DIAG; 62 | } 63 | 64 | // Deletion (upper) matrix 65 | int deleteExt = matrix[r][c-1][D] + gap_extend; 66 | int deleteOpen = matrix[r][c-1][M] + gap_open; 67 | 68 | if (deleteExt >= deleteOpen) { 69 | matrix[r][c][D] = deleteExt; 70 | bt[r][c][D] = DIR_LEFT; 71 | } else { 72 | matrix[r][c][D] = deleteOpen; 73 | bt[r][c][D] = DIR_DIAG; 74 | } 75 | 76 | // 77 | // Match/mismatch (middle) matrix 78 | int insertClose = matrix[r][c][I]; 79 | int baseMatch = seq1[r-1] == seq2[c-1] ? (matrix[r-1][c-1][M] + match) : (matrix[r-1][c-1][M] + mismatch_pen); 80 | int deleteClose = matrix[r][c][D]; 81 | 82 | if (baseMatch >=insertClose && baseMatch >= deleteClose) { 83 | matrix[r][c][M] = baseMatch; 84 | bt[r][c][M] = DIR_DIAG; 85 | } else if (insertClose >= deleteClose) { 86 | matrix[r][c][M] = insertClose; 87 | bt[r][c][M] = DIR_UP; 88 | } else { 89 | matrix[r][c][M] = deleteClose; 90 | bt[r][c][M] = DIR_LEFT; 91 | } 92 | } 93 | } 94 | } 95 | 96 | struct cigar_elem { 97 | char op; 98 | int len; 99 | }; 100 | 101 | void update_curr_elem(char op, vector & elems) { 102 | 103 | if (elems.size() == 0 || elems.back().op != op) { 104 | cigar_elem new_elem; 105 | new_elem.op = op; 106 | new_elem.len = 1; 107 | elems.push_back(new_elem); 108 | } else { 109 | elems.back().len += 1; 110 | } 111 | } 112 | 113 | 114 | void backtrack(const char* seq1, const char* seq2, int seq1_len, int seq2_len, char* result) { 115 | int best_idx = -1; 116 | int best_score = -300000000; 117 | int second_best_score = -300000000; 118 | int row = seq1_len; 119 | 120 | for (int c=1; c<=seq2_len; c++) { 121 | if (matrix[row][c][M] > best_score) { 122 | best_idx = c; 123 | best_score = matrix[row][c][M]; 124 | } else if (matrix[row][c][M] > second_best_score) { 125 | second_best_score = matrix[row][c][M]; 126 | } 127 | } 128 | 129 | int r = seq1_len; 130 | int c = best_idx; 131 | int ref_end_idx = c; 132 | 133 | vector elems; 134 | 135 | int level = M; 136 | 137 | while (r > 0) { 138 | char curr_bt = bt[r][c][level]; 139 | 140 | if (curr_bt == DIR_DIAG) { 141 | if (level == M) { 142 | r -= 1; 143 | c -= 1; 144 | } else if (level == I) { 145 | r -= 1; 146 | } else if (level == D) { 147 | c -= 1; 148 | } 149 | 150 | if (level == M) { 151 | // If moving back to M level from I or D, skip update. 152 | update_curr_elem('M', elems); 153 | } 154 | 155 | level = M; 156 | 157 | } else if (curr_bt == DIR_LEFT) { 158 | if (level == D) { 159 | c -= 1; 160 | } else if (level == M) { 161 | // noop 162 | } 163 | 164 | level = D; 165 | 166 | update_curr_elem('D', elems); 167 | } else if (curr_bt == DIR_UP) { 168 | if (level == I) { 169 | r -= 1; 170 | } else if (level == M) { 171 | // noop 172 | } 173 | 174 | level = I; 175 | 176 | update_curr_elem('I', elems); 177 | } else { 178 | break; 179 | } 180 | } 181 | 182 | int ref_idx = c; 183 | 184 | char cigar[2056]; 185 | int idx = 0; 186 | for (int i=elems.size()-1; i>=0; i--) { 187 | snprintf(cigar+idx, 2056-idx, "%d%c", elems[i].len, elems[i].op); 188 | idx = strlen(cigar); 189 | } 190 | 191 | sprintf(result, "%d:%d:%d:%d:%s", best_score, second_best_score, ref_idx, ref_end_idx, cigar); 192 | } 193 | 194 | void align(const char* seq1, const char* seq2, char* result) { 195 | 196 | int seq1_len = strlen(seq1); 197 | int seq2_len = strlen(seq2); 198 | 199 | populate(seq1, seq2, seq1_len, seq2_len); 200 | backtrack(seq1, seq2, seq1_len, seq2_len, result); 201 | } 202 | 203 | extern "C" 204 | JNIEXPORT jstring JNICALL Java_abra_NativeSemiGlobalAligner_align 205 | (JNIEnv *env, jobject obj, jstring j_seq1, jstring j_seq2, jint j_match, jint j_mismatch, 206 | jint j_gap_open, jint j_gap_extend) { 207 | 208 | match = j_match; 209 | mismatch_pen = j_mismatch; 210 | gap_open = j_gap_open; 211 | gap_extend = j_gap_extend; 212 | 213 | const char* seq1 = env->GetStringUTFChars(j_seq1, 0); 214 | const char* seq2 = env->GetStringUTFChars(j_seq2, 0); 215 | 216 | char result[4098]; 217 | align(seq1, seq2, result); 218 | 219 | // fprintf(stderr, "SGA result: %s\n", result); 220 | 221 | jstring ret = env->NewStringUTF(result); 222 | 223 | env->ReleaseStringUTFChars(j_seq1, seq1); 224 | env->ReleaseStringUTFChars(j_seq2, seq2); 225 | 226 | return ret; 227 | } 228 | 229 | /* 230 | int main(int argc, char* argv[]) { 231 | 232 | const char* ref = "CCAGATCAGCCTAGGCAACATGGTGAAACCCCGTCTCTACCAAAAATAAAAAACTTAGCTGAGCGTGGTGGTGCACGCCTGTAGCCCCAGCTGCTGAGGAGCCTGAGCCCAGGGGGTGGAGGCTGCAGTGAGCCATGATCACACTACTGTACTCCAGCCTAGGTGACAGAGTGAGACCCTGTCTCAAAAAAATAAAAGAAAATAAAAATAAACAAAGAGAGAAGTGGAAGAAGAGGTGGAGTTTTGTATTTATGACTTGAATTTTGTATTCATGACTGGGTTGACACCCCAATCCACTCCATTTTTAGCCTTGAAACATGGCAAACAGTAACCATTAAAAGGATGGAAAAGAGAAGAAGGCATGGGTGGGAAACTGTGCCTCCCATTTTTGTGCATCTTTGTTGCTGTCCTTCCACTATACTGTACCTTTCAGCATTTTGACGGCAACCTGGATTGAGACTCCTGTTTTGCTAATTCCATAAGCTGTTGCGTTCATCACTTTTCCAAAAGCACCTGATCCTAGTACCTTCCCTGCAAAGACAAATGGTGAGTACGTGCATTTTAAAGATTTTCCAATGGAAAAGAAATGCTGCAGAAACATTTGGCACATTCCATTCTTACCAAACTCTAAATTTTCTCTTGGAAACTCCCATTTGAGATCATATTCATATTCTCTGAAATCAACGTAGAAGTACTCATTATCTGAGGAGCCGGTCACCTGTACCATCTGTAGCTGGCTTTCATACCTAAATTGCTTCAGAGATGAAATGATGAGTCAGTTAGGAATAGGCAGTTCTGCAGATAGAGGAAAGAATAATGAATTTTTACCTTTGCTTTTACCTTTTTGTACTTGTGACAAATTAGCAGGGTTAAAACGACAATGAAGAGGAGACAAACACCAATTGTTGCATAGAATGAGATGTTGTCTTGGATGAAAGGGAAGGGGCCTGCAACAAAAGAGTGTCACTCAGCGATGAAACAGAATTCCTGTGTGACATTATAAATAGTGGACAACTCATTATAATCTCTCACATCCTGTTTCAGTAATAATCATTTTCAGTCCTAACAACCACTCTACATATACTCTACTCCCCACAGACAATCAGGCAATGTCCCTGTAAAGGATACATTTCCTCCCTAGAAAATTGCGGATTATTCTCAATCCATTCTTTAAAACCATTTACTAGGGTAAATTTACAAGAATTACATCTGGTCCAGGCACGATGGCTCACGCCTGTAGTCCCAGCACTTTGGGAGGCCAAGATGGGAGGATCACTTGAGTCCAAGAATTAGACACCAGCCCAGGCAACACAGTGAAATCCCGTCTCTAAAAAAATTCAAAAATTAGCTGGGCGTGGTGGCAGGTGCCTGTAATCCCAGCTGCTCGGGAGGCTGAGGCAGGAG"; 233 | const char* seq = "CCATTCTTACCAAACTCTAAATTTTCTCTTGGAAACTCCCATTTGAGATCATATTCATATTCTCTGAAATCAACGTAGAAGTACTCATTATCTGAGGAGCCGCACATTCCATTCTTGCCAAACTCTAGATTTTCTCTTGGAAACTCCCATTTGAGATCACATTCATATTCTCTGAAATCAACGTAGAAGTACTCATTATCTGAGGAGCCGGTCACCCGTACCATCTGTAGC"; 234 | 235 | 236 | for (int i=0; i<1000; i++) { 237 | char result[4098]; 238 | align(seq, ref, result); 239 | 240 | printf("result: %s\n", result); 241 | } 242 | } 243 | */ 244 | -------------------------------------------------------------------------------- /src/main/c/sparsehash/internal/libc_allocator_with_realloc.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2010, Google Inc. 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following disclaimer 12 | // in the documentation and/or other materials provided with the 13 | // distribution. 14 | // * Neither the name of Google Inc. nor the names of its 15 | // contributors may be used to endorse or promote products derived from 16 | // this software without specific prior written permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | // --- 31 | 32 | #ifndef UTIL_GTL_LIBC_ALLOCATOR_WITH_REALLOC_H_ 33 | #define UTIL_GTL_LIBC_ALLOCATOR_WITH_REALLOC_H_ 34 | 35 | #include 36 | #include // for malloc/realloc/free 37 | #include // for ptrdiff_t 38 | #include // for placement new 39 | 40 | _START_GOOGLE_NAMESPACE_ 41 | 42 | template 43 | class libc_allocator_with_realloc { 44 | public: 45 | typedef T value_type; 46 | typedef size_t size_type; 47 | typedef ptrdiff_t difference_type; 48 | 49 | typedef T* pointer; 50 | typedef const T* const_pointer; 51 | typedef T& reference; 52 | typedef const T& const_reference; 53 | 54 | libc_allocator_with_realloc() {} 55 | libc_allocator_with_realloc(const libc_allocator_with_realloc&) {} 56 | ~libc_allocator_with_realloc() {} 57 | 58 | pointer address(reference r) const { return &r; } 59 | const_pointer address(const_reference r) const { return &r; } 60 | 61 | pointer allocate(size_type n, const_pointer = 0) { 62 | return static_cast(malloc(n * sizeof(value_type))); 63 | } 64 | void deallocate(pointer p, size_type) { 65 | free(p); 66 | } 67 | pointer reallocate(pointer p, size_type n) { 68 | return static_cast(realloc(p, n * sizeof(value_type))); 69 | } 70 | 71 | size_type max_size() const { 72 | return static_cast(-1) / sizeof(value_type); 73 | } 74 | 75 | void construct(pointer p, const value_type& val) { 76 | new(p) value_type(val); 77 | } 78 | void destroy(pointer p) { p->~value_type(); } 79 | 80 | template 81 | libc_allocator_with_realloc(const libc_allocator_with_realloc&) {} 82 | 83 | template 84 | struct rebind { 85 | typedef libc_allocator_with_realloc other; 86 | }; 87 | }; 88 | 89 | // libc_allocator_with_realloc specialization. 90 | template<> 91 | class libc_allocator_with_realloc { 92 | public: 93 | typedef void value_type; 94 | typedef size_t size_type; 95 | typedef ptrdiff_t difference_type; 96 | typedef void* pointer; 97 | typedef const void* const_pointer; 98 | 99 | template 100 | struct rebind { 101 | typedef libc_allocator_with_realloc other; 102 | }; 103 | }; 104 | 105 | template 106 | inline bool operator==(const libc_allocator_with_realloc&, 107 | const libc_allocator_with_realloc&) { 108 | return true; 109 | } 110 | 111 | template 112 | inline bool operator!=(const libc_allocator_with_realloc&, 113 | const libc_allocator_with_realloc&) { 114 | return false; 115 | } 116 | 117 | _END_GOOGLE_NAMESPACE_ 118 | 119 | #endif // UTIL_GTL_LIBC_ALLOCATOR_WITH_REALLOC_H_ 120 | -------------------------------------------------------------------------------- /src/main/c/sparsehash/internal/sparseconfig.h: -------------------------------------------------------------------------------- 1 | /* 2 | * NOTE: This file is for internal use only. 3 | * Do not use these #defines in your own program! 4 | */ 5 | 6 | /* Namespace for Google classes */ 7 | #define GOOGLE_NAMESPACE ::google 8 | 9 | /* the location of the header defining hash functions */ 10 | #define HASH_FUN_H 11 | 12 | /* the namespace of the hash<> function */ 13 | #define HASH_NAMESPACE std::tr1 14 | 15 | /* Define to 1 if you have the header file. */ 16 | #define HAVE_INTTYPES_H 1 17 | 18 | /* Define to 1 if the system has the type `long long'. */ 19 | #define HAVE_LONG_LONG 1 20 | 21 | /* Define to 1 if you have the `memcpy' function. */ 22 | #define HAVE_MEMCPY 1 23 | 24 | /* Define to 1 if you have the header file. */ 25 | #define HAVE_STDINT_H 1 26 | 27 | /* Define to 1 if you have the header file. */ 28 | #define HAVE_SYS_TYPES_H 1 29 | 30 | /* Define to 1 if the system has the type `uint16_t'. */ 31 | #define HAVE_UINT16_T 1 32 | 33 | /* Define to 1 if the system has the type `u_int16_t'. */ 34 | #define HAVE_U_INT16_T 1 35 | 36 | /* Define to 1 if the system has the type `__uint16'. */ 37 | /* #undef HAVE___UINT16 */ 38 | 39 | /* The system-provided hash function including the namespace. */ 40 | #define SPARSEHASH_HASH HASH_NAMESPACE::hash 41 | 42 | /* Stops putting the code inside the Google namespace */ 43 | #define _END_GOOGLE_NAMESPACE_ } 44 | 45 | /* Puts following code inside the Google namespace */ 46 | #define _START_GOOGLE_NAMESPACE_ namespace google { 47 | -------------------------------------------------------------------------------- /src/main/c/sparsehash/template_util.h: -------------------------------------------------------------------------------- 1 | // Copyright 2005 Google Inc. 2 | // All rights reserved. 3 | // 4 | // Redistribution and use in source and binary forms, with or without 5 | // modification, are permitted provided that the following conditions are 6 | // met: 7 | // 8 | // * Redistributions of source code must retain the above copyright 9 | // notice, this list of conditions and the following disclaimer. 10 | // * Redistributions in binary form must reproduce the above 11 | // copyright notice, this list of conditions and the following disclaimer 12 | // in the documentation and/or other materials provided with the 13 | // distribution. 14 | // * Neither the name of Google Inc. nor the names of its 15 | // contributors may be used to endorse or promote products derived from 16 | // this software without specific prior written permission. 17 | // 18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | // ---- 31 | // 32 | // Template metaprogramming utility functions. 33 | // 34 | // This code is compiled directly on many platforms, including client 35 | // platforms like Windows, Mac, and embedded systems. Before making 36 | // any changes here, make sure that you're not breaking any platforms. 37 | // 38 | // 39 | // The names choosen here reflect those used in tr1 and the boost::mpl 40 | // library, there are similar operations used in the Loki library as 41 | // well. I prefer the boost names for 2 reasons: 42 | // 1. I think that portions of the Boost libraries are more likely to 43 | // be included in the c++ standard. 44 | // 2. It is not impossible that some of the boost libraries will be 45 | // included in our own build in the future. 46 | // Both of these outcomes means that we may be able to directly replace 47 | // some of these with boost equivalents. 48 | // 49 | #ifndef BASE_TEMPLATE_UTIL_H_ 50 | #define BASE_TEMPLATE_UTIL_H_ 51 | 52 | #include 53 | _START_GOOGLE_NAMESPACE_ 54 | 55 | // Types small_ and big_ are guaranteed such that sizeof(small_) < 56 | // sizeof(big_) 57 | typedef char small_; 58 | 59 | struct big_ { 60 | char dummy[2]; 61 | }; 62 | 63 | // Identity metafunction. 64 | template 65 | struct identity_ { 66 | typedef T type; 67 | }; 68 | 69 | // integral_constant, defined in tr1, is a wrapper for an integer 70 | // value. We don't really need this generality; we could get away 71 | // with hardcoding the integer type to bool. We use the fully 72 | // general integer_constant for compatibility with tr1. 73 | 74 | template 75 | struct integral_constant { 76 | static const T value = v; 77 | typedef T value_type; 78 | typedef integral_constant type; 79 | }; 80 | 81 | template const T integral_constant::value; 82 | 83 | 84 | // Abbreviations: true_type and false_type are structs that represent boolean 85 | // true and false values. Also define the boost::mpl versions of those names, 86 | // true_ and false_. 87 | typedef integral_constant true_type; 88 | typedef integral_constant false_type; 89 | typedef true_type true_; 90 | typedef false_type false_; 91 | 92 | // if_ is a templatized conditional statement. 93 | // if_ is a compile time evaluation of cond. 94 | // if_<>::type contains A if cond is true, B otherwise. 95 | template 96 | struct if_{ 97 | typedef A type; 98 | }; 99 | 100 | template 101 | struct if_ { 102 | typedef B type; 103 | }; 104 | 105 | 106 | // type_equals_ is a template type comparator, similar to Loki IsSameType. 107 | // type_equals_::value is true iff "A" is the same type as "B". 108 | // 109 | // New code should prefer base::is_same, defined in base/type_traits.h. 110 | // It is functionally identical, but is_same is the standard spelling. 111 | template 112 | struct type_equals_ : public false_ { 113 | }; 114 | 115 | template 116 | struct type_equals_ : public true_ { 117 | }; 118 | 119 | // and_ is a template && operator. 120 | // and_::value evaluates "A::value && B::value". 121 | template 122 | struct and_ : public integral_constant { 123 | }; 124 | 125 | // or_ is a template || operator. 126 | // or_::value evaluates "A::value || B::value". 127 | template 128 | struct or_ : public integral_constant { 129 | }; 130 | 131 | 132 | _END_GOOGLE_NAMESPACE_ 133 | 134 | #endif // BASE_TEMPLATE_UTIL_H_ 135 | -------------------------------------------------------------------------------- /src/main/c/test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using google::sparse_hash_map; // namespace where class lives by default 5 | using std::cout; 6 | using std::endl; 7 | using std::tr1::hash; // or __gnu_cxx::hash, or maybe tr1::hash, depending on your OS 8 | 9 | struct eqstr 10 | { 11 | bool operator()(const char* s1, const char* s2) const 12 | { 13 | printf("Comparing: %s : %s\n", s1, s2); 14 | return (s1 == s2) || (s1 && s2 && strcmp(s1, s2) == 0); 15 | } 16 | }; 17 | 18 | sparse_hash_map, eqstr> months; 19 | 20 | int main() 21 | { 22 | // sparse_hash_map, eqstr> months; 23 | 24 | cout << "september -> " << months["september"] << endl; 25 | 26 | months["january"] = 31; 27 | months["february"] = 28; 28 | months["march"] = 31; 29 | months["april"] = 30; 30 | months["may"] = 31; 31 | months["june"] = 30; 32 | months["july"] = 31; 33 | months["august"] = 31; 34 | months["september"] = 30; 35 | months["october"] = 31; 36 | months["november"] = 30; 37 | months["december"] = 31; 38 | 39 | cout << "september -> " << months["september"] << endl; 40 | cout << "april -> " << months["april"] << endl; 41 | cout << "june -> " << months["june"] << endl; 42 | cout << "november -> " << months["november"] << endl; 43 | 44 | char* foo = (char*) malloc(50); 45 | char* foo2 = (char*) malloc(50); 46 | memset(foo, 0, 50); 47 | memset(foo2, 0, 50); 48 | // memcpy(foo, "september", 9); 49 | 50 | strcpy(foo, "september"); 51 | strcpy(foo2, foo); 52 | 53 | hash H; 54 | 55 | cout << "const hash: " << H("september") << endl; 56 | cout << "foo hash: " << H(foo) << endl; 57 | cout << "foo2 hash: " << H(foo2) << endl; 58 | 59 | cout << "foo txt: [" << foo << "]" << endl; 60 | cout << "foo2 txt: [" << foo2 << "]" << endl; 61 | 62 | cout << "map: " << months[foo] << endl; 63 | cout << "map2: " << months["september"] << endl; 64 | } 65 | -------------------------------------------------------------------------------- /src/main/c/test2.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using google::sparse_hash_map; // namespace where class lives by default 6 | using google::sparse_hash_set; // namespace where class lives by default 7 | using std::cout; 8 | using std::endl; 9 | using std::tr1::hash; // or __gnu_cxx::hash, or maybe tr1::hash, depending on your OS 10 | 11 | struct eqstr 12 | { 13 | bool operator()(const char* s1, const char* s2) const 14 | { 15 | printf("Comparing: %s : %s\n", s1, s2); 16 | return (s1 == s2) || (s1 && s2 && strcmp(s1, s2) == 0); 17 | } 18 | }; 19 | 20 | struct my_hash 21 | { 22 | long operator()(const char* s1) const 23 | { 24 | long hash = 0; 25 | int c; 26 | 27 | while((c = *s1++)) 28 | { 29 | /* hash = hash * 33 ^ c */ 30 | hash = ((hash << 5) + hash) ^ c; 31 | } 32 | 33 | return hash; 34 | } 35 | }; 36 | 37 | //sparse_hash_map, eqstr> months; 38 | sparse_hash_map months; 39 | 40 | int main() 41 | { 42 | // sparse_hash_map, eqstr> months; 43 | 44 | // cout << "september -> " << months["september"] << endl; 45 | // 46 | // months["january"] = 31; 47 | // months["february"] = 28; 48 | // months["march"] = 31; 49 | // months["april"] = 30; 50 | // months["may"] = 31; 51 | // months["june"] = 30; 52 | // months["july"] = 31; 53 | // months["august"] = 31; 54 | // months["september"] = 30; 55 | // months["october"] = 31; 56 | // months["november"] = 30; 57 | // months["december"] = 31; 58 | // 59 | // cout << "september -> " << months["september"] << endl; 60 | // cout << "april -> " << months["april"] << endl; 61 | // cout << "june -> " << months["june"] << endl; 62 | // cout << "november -> " << months["november"] << endl; 63 | // 64 | // char* foo = (char*) malloc(50); 65 | // char* foo2 = (char*) malloc(50); 66 | // memset(foo, 0, 50); 67 | // memset(foo2, 0, 50); 68 | //// memcpy(foo, "september", 9); 69 | // 70 | // strcpy(foo, "september"); 71 | // strcpy(foo2, foo); 72 | // 73 | // my_hash H; 74 | // 75 | // cout << "const hash: " << H("september") << endl; 76 | // cout << "foo hash: " << H(foo) << endl; 77 | // cout << "foo2 hash: " << H(foo2) << endl; 78 | // 79 | // cout << "foo txt: [" << foo << "]" << endl; 80 | // cout << "foo2 txt: [" << foo2 << "]" << endl; 81 | // 82 | // cout << "map: " << months[foo] << endl; 83 | // cout << "map2: " << months["september"] << endl; 84 | 85 | sparse_hash_set Set; 86 | 87 | char* foo = (char*) malloc(sizeof(char) * 100); 88 | 89 | printf("1: %x\n", foo); 90 | sprintf(foo, "bar"); 91 | printf("2: %x\n", foo); 92 | months[foo] = "rec"; 93 | months["bar2"] = "rec2"; 94 | 95 | 96 | sparse_hash_map::const_iterator it1 = months.find("bar2"); 97 | 98 | const char* val = it1->second; 99 | 100 | printf("Val: %s\n", val); 101 | 102 | if (it1 == months.end()) { 103 | printf("it1 @ end\n"); 104 | } 105 | 106 | sparse_hash_map::const_iterator it2 = months.find("bar3"); 107 | 108 | printf("it2: %x\n", it2); 109 | 110 | if (it2 == months.end()) { 111 | printf("it2 @ end\n"); 112 | } 113 | 114 | // for (sparse_hash_map::const_iterator it = months.begin(); 115 | // it != months.end(); ++it) { 116 | // 117 | // const char* key = it->first; 118 | // const char* value = it->second; 119 | // 120 | // printf("key: %s - %x\n", key, key); 121 | // printf("value: %s\n", value); 122 | // 123 | // } 124 | } 125 | -------------------------------------------------------------------------------- /src/main/java/abra/Abra.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */ 2 | package abra; 3 | 4 | /** 5 | * Main entry point for Abra. 6 | * 7 | * @author Lisle E. Mose (lmose at unc dot edu) 8 | */ 9 | public class Abra { 10 | 11 | public static void main(String[] args) throws Exception { 12 | ReAligner.run(args); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/abra/AbraRunnable.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | /** 4 | * Abstract base class for ABRA threads. 5 | * 6 | * @author Lisle E. Mose (lmose at unc dot edu) 7 | */ 8 | public abstract class AbraRunnable implements Runnable { 9 | 10 | long spawnStartTime; 11 | private ThreadManager threadManager; 12 | 13 | public AbraRunnable(ThreadManager threadManager) { 14 | this.threadManager = threadManager; 15 | } 16 | 17 | @Override 18 | public void run() { 19 | try { 20 | go(); 21 | } catch (Throwable t) { 22 | t.printStackTrace(); 23 | System.exit(-1); 24 | } finally { 25 | threadManager.removeThread(this); 26 | } 27 | } 28 | 29 | public void setSpawnStartTime(long spawnStartTime) { 30 | this.spawnStartTime = spawnStartTime; 31 | } 32 | 33 | public abstract void go() throws Exception; 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/abra/AssemblerSettings.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */ 2 | package abra; 3 | 4 | /** 5 | * Settings for Contig Assembly 6 | * @author Lisle E. Mose (lmose at unc dot edu) 7 | */ 8 | public class AssemblerSettings { 9 | 10 | private int[] kmerSize; 11 | private int minEdgeFrequency; 12 | private int minNodeFrequncy; 13 | private int minUnalignedNodeFrequency; 14 | private int minContigLength; 15 | private int minBaseQuality; 16 | private double minReadCandidateFraction; 17 | private int maxAverageDepth; 18 | private int averageDepthCeiling; 19 | private double minEdgeRatio; 20 | private int maxNodes; 21 | 22 | public int getMaxNodes() { 23 | return maxNodes; 24 | } 25 | 26 | public void setMaxNodes(int maxNodes) { 27 | this.maxNodes = maxNodes; 28 | } 29 | 30 | public double getMinEdgeRatio() { 31 | return minEdgeRatio; 32 | } 33 | 34 | public void setMinEdgeRatio(double minEdgeRatio) { 35 | this.minEdgeRatio = minEdgeRatio; 36 | } 37 | 38 | public int getAverageDepthCeiling() { 39 | return averageDepthCeiling; 40 | } 41 | 42 | public void setAverageDepthCeiling(int averageDepthCeiling) { 43 | this.averageDepthCeiling = averageDepthCeiling; 44 | } 45 | 46 | public int getMaxAverageDepth() { 47 | return maxAverageDepth; 48 | } 49 | 50 | public void setMaxAverageDepth(int maxAverageDepth) { 51 | this.maxAverageDepth = maxAverageDepth; 52 | } 53 | 54 | public double getMinReadCandidateFraction() { 55 | return minReadCandidateFraction; 56 | } 57 | 58 | public void setMinReadCandidateFraction(double minReadCandidateFraction) { 59 | this.minReadCandidateFraction = minReadCandidateFraction; 60 | } 61 | 62 | public int getMinBaseQuality() { 63 | return minBaseQuality; 64 | } 65 | 66 | public void setMinBaseQuality(int minBaseQuality) { 67 | this.minBaseQuality = minBaseQuality; 68 | } 69 | 70 | public int[] getKmerSize() { 71 | return kmerSize; 72 | } 73 | 74 | public void setKmerSize(int[] kmerSize) { 75 | this.kmerSize = kmerSize; 76 | } 77 | 78 | public void setMinUnalignedNodeFrequency(int minUnalignedNodeFrequency) { 79 | this.minUnalignedNodeFrequency = minUnalignedNodeFrequency; 80 | } 81 | 82 | public int getMinUnalignedNodeFrequency() { 83 | return minUnalignedNodeFrequency; 84 | } 85 | 86 | public int getMinNodeFrequncy() { 87 | return minNodeFrequncy; 88 | } 89 | 90 | public void setMinNodeFrequncy(int minNodeFrequncy) { 91 | this.minNodeFrequncy = minNodeFrequncy; 92 | } 93 | 94 | public int getMinContigLength() { 95 | return minContigLength; 96 | } 97 | 98 | public void setMinContigLength(int minContigLength) { 99 | this.minContigLength = minContigLength; 100 | } 101 | 102 | public String getDescription() { 103 | StringBuffer str = new StringBuffer(); 104 | 105 | for (int i=0; i chunks; 20 | 21 | // Chromosome chunks grouped by chromosome 22 | private Map> chunkGroups; 23 | 24 | public ChromosomeChunker(CompareToReference2 c2r) { 25 | this.c2r = c2r; 26 | } 27 | 28 | // Identify chunks for processing 29 | // Split chunks at N regions 30 | public void init() { 31 | 32 | chunks = new ArrayList(); 33 | chunkGroups = new HashMap>(); 34 | 35 | int chunkIdx = 0; 36 | for (String chromosome : c2r.getChromosomes()) { 37 | long currStart = 1; 38 | int chromosomeLength = c2r.getChromosomeLength(chromosome); 39 | 40 | while (currStart < chromosomeLength) { 41 | Feature chunk = new Feature(chromosome, currStart, Math.min(currStart+MIN_CHUNK_SIZE-1, chromosomeLength)); 42 | currStart = chunk.getEnd()+1; 43 | chunks.add(chunk); 44 | 45 | if (!chunkGroups.containsKey(chromosome)) { 46 | chunkGroups.put(chromosome, new ArrayList()); 47 | } 48 | 49 | chunkGroups.get(chromosome).add(chunkIdx); 50 | chunkIdx += 1; 51 | } 52 | 53 | /* 54 | List nRegions = c2r.getUndefinedRegions().get(chromosome); 55 | for (Feature nRegion : nRegions) { 56 | // Only consider N regions of reasonable size 57 | if (nRegion.getEnd() - nRegion.getStart() > 1000) { 58 | if (nRegion.getStart() - currStart > MIN_CHUNK_SIZE && nRegion.getStart()+MIN_CHUNK_SIZE < chromosomeLength) { 59 | Feature chunk = new Feature(chromosome, currStart, nRegion.getStart()); 60 | chunks.add(chunk); 61 | currStart = nRegion.getStart()+1; 62 | } 63 | } 64 | } 65 | 66 | if (currStart < chromosomeLength) { 67 | chunks.add(new Feature(chromosome, currStart, chromosomeLength)); 68 | } 69 | */ 70 | } 71 | 72 | Logger.debug("Chromosome chunks:"); 73 | for (Feature chunk : chunks) { 74 | Logger.debug(chunk.toString()); 75 | } 76 | } 77 | 78 | public List getChunks() { 79 | return chunks; 80 | } 81 | 82 | public Map> getChunkGroups() { 83 | return chunkGroups; 84 | } 85 | 86 | public List getChromosomes() { 87 | return c2r.getChromosomes(); 88 | } 89 | 90 | public static void main(String[] args) throws Exception { 91 | Logger.LEVEL = Level.TRACE; 92 | //String ref = "/home/lmose/dev/reference/hg38/hg38.fa"; 93 | String ref = "/home/lmose/dev/reference/hg38/chr1.fa"; 94 | 95 | CompareToReference2 c2r = new CompareToReference2(); 96 | c2r.init(ref); 97 | 98 | ChromosomeChunker cc = new ChromosomeChunker(c2r); 99 | 100 | cc.init(); 101 | // for (Feature chunk : cc.getChunks()) { 102 | // System.out.println(chunk); 103 | // } 104 | 105 | List indices = cc.getChunkGroups().get("chr1"); 106 | 107 | for (Integer idx : indices) { 108 | Feature chunk = cc.getChunks().get(idx); 109 | System.out.println(chunk); 110 | } 111 | 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /src/main/java/abra/ChromosomeRegex.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import java.util.regex.Matcher; 4 | import java.util.regex.Pattern; 5 | 6 | public class ChromosomeRegex { 7 | 8 | public static final String DEFAULT_SKIP_REGEX = "GL.*|hs37d5|chr.*random|chrUn.*|chrEBV|CMV|HBV|HCV.*|HIV.*|KSHV|HTLV.*|MCV|SV40|HPV.*"; 9 | 10 | private static Pattern p = null; 11 | 12 | public ChromosomeRegex(String regex) { 13 | if (!regex.equals("none")) { 14 | p = Pattern.compile(regex); 15 | } 16 | } 17 | 18 | public boolean matches(String chrom) { 19 | boolean ret = false; 20 | 21 | if (p != null) { 22 | Matcher m = p.matcher(chrom); 23 | ret = m.matches(); 24 | } 25 | 26 | return ret; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/abra/Clock.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */ 2 | package abra; 3 | 4 | /** 5 | * Simple class used to log elapsed wall clock times. 6 | * 7 | * @author Lisle E. Mose (lmose at unc dot edu) 8 | */ 9 | public class Clock { 10 | 11 | private String descriptor; 12 | private long startMsecs; 13 | private long stopMsecs; 14 | 15 | public Clock(String descriptor) { 16 | this.descriptor = descriptor; 17 | } 18 | 19 | public void start() { 20 | this.startMsecs = System.currentTimeMillis(); 21 | } 22 | 23 | public long elapsedSeconds() { 24 | return (stopMsecs - startMsecs) / 1000; 25 | } 26 | 27 | public void stopAndPrint() { 28 | this.stopMsecs = System.currentTimeMillis(); 29 | 30 | Logger.info("Clock time in " + descriptor + ": " + elapsedSeconds()); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/abra/ConsensusSequence.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import java.util.List; 4 | 5 | public class ConsensusSequence { 6 | 7 | // Mutates input!! 8 | // Assumes all input contigs same length 9 | public String buildConsensus(List contigs) { 10 | 11 | StringBuffer consensus = new StringBuffer(); 12 | // Subset to the contigs with highest qual scores 13 | contigs = ScoredContig.filter(contigs, 31); 14 | 15 | int maxLen = 0; 16 | for (ScoredContig contig : contigs) { 17 | maxLen = Math.max(maxLen, contig.getContig().length()); 18 | } 19 | 20 | for (int i=0; i reads = new ArrayList(); 19 | private int maxReads; 20 | private int totalReads = 0; 21 | 22 | public DownsampledReadList(int maxReads) { 23 | this.maxReads = maxReads; 24 | } 25 | 26 | public void add(SAMRecord read) { 27 | totalReads += 1; 28 | if (reads.size() < maxReads) { 29 | reads.add(read); 30 | } else { 31 | int slot = random.nextInt(totalReads); 32 | if (slot < maxReads) { 33 | reads.set(slot, read); 34 | } 35 | } 36 | } 37 | 38 | public List getReads() { 39 | return reads; 40 | } 41 | 42 | public int getTotalReadCount() { 43 | return totalReads; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/abra/Feature.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */ 2 | package abra; 3 | 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | 7 | import abra.SAMRecordWrapper.Span; 8 | import htsjdk.samtools.SAMFileHeader; 9 | import htsjdk.samtools.SAMRecord; 10 | 11 | /** 12 | * Representation of a Feature (i.e. a line in a GTF file) 13 | * 14 | * @author Lisle E. Mose (lmose at unc dot edu) 15 | */ 16 | public class Feature { 17 | private String seqname; 18 | private long start; // 1 based 19 | private long end; // inclusive 20 | private String additionalInfo; 21 | 22 | // Optional kmerSize value specific to ABRA assembly. 23 | private int kmerSize; 24 | 25 | public Feature(String seqname, long start, long end) { 26 | this.seqname = seqname; 27 | this.start = start; 28 | this.end = end; 29 | } 30 | 31 | public String getSeqname() { 32 | return seqname; 33 | } 34 | 35 | public long getStart() { 36 | return start; 37 | } 38 | 39 | public long getEnd() { 40 | return end; 41 | } 42 | 43 | public void setEnd(long end) { 44 | this.end = end; 45 | } 46 | 47 | public String getDescriptor() { 48 | return seqname + "_" + start + "_" + end; 49 | } 50 | 51 | public long getLength() { 52 | return end-start; 53 | } 54 | 55 | public String toString() { 56 | return getDescriptor(); 57 | } 58 | 59 | public void setAdditionalInfo(String info) { 60 | this.additionalInfo = info; 61 | } 62 | 63 | public String getAdditionalInfo() { 64 | return additionalInfo; 65 | } 66 | 67 | 68 | private boolean isWithin(long coord, long start, long stop) { 69 | return coord >= start && coord <= stop; 70 | } 71 | 72 | private boolean overlaps(long start1, long stop1, long start2, long stop2) { 73 | return 74 | isWithin(start1, start2, stop2) || 75 | isWithin(stop1, start2, stop2) || 76 | isWithin(start2, start1, stop1) || 77 | isWithin(stop2, start1, stop1); 78 | } 79 | 80 | public boolean overlapsRead(SAMRecord read) { 81 | int alignmentEnd = Math.max(read.getAlignmentEnd(), read.getAlignmentStart() + read.getReadLength()); 82 | return overlaps(read.getReferenceName(), read.getAlignmentStart(), alignmentEnd); 83 | } 84 | 85 | public boolean overlaps(Feature that) { 86 | return this.overlaps(that.seqname, (int) that.start, (int) that.end); 87 | } 88 | 89 | public boolean overlaps(String chromosome, int startPos, int stopPos) { 90 | return ((this.seqname.equals(chromosome)) && overlaps(start, end, startPos, stopPos)); 91 | } 92 | 93 | public int getKmer() { 94 | return kmerSize; 95 | } 96 | 97 | public void setKmer(int kmer) { 98 | this.kmerSize = kmer; 99 | } 100 | 101 | public static int findFirstOverlappingRegion(SAMFileHeader samHeader, SAMRecordWrapper read, int readStart, int readEnd, List regions, int start) { 102 | if (start < 0) { 103 | start = 0; 104 | } 105 | 106 | for (int idx=start; idx findAllOverlappingRegions(SAMFileHeader samHeader, SAMRecordWrapper read, List regions, int start) { 124 | List overlappingRegions = new ArrayList(); 125 | 126 | for (Span span : read.getSpanningRegions()) { 127 | 128 | int idx = findFirstOverlappingRegion(samHeader, read, span.start, span.end, regions, start); 129 | if (idx > -1) { 130 | overlappingRegions.add(idx); 131 | boolean isOverlap = true; 132 | idx += 1; 133 | 134 | while (isOverlap && idx < regions.size()) { 135 | Feature region = regions.get(idx); 136 | if (region.overlaps(read.getSamRecord().getReferenceName(), span.start, span.end)) { 137 | overlappingRegions.add(idx); 138 | } else { 139 | isOverlap = false; 140 | } 141 | idx += 1; 142 | } 143 | } 144 | } 145 | 146 | return overlappingRegions; 147 | } 148 | 149 | public boolean containsEitherEnd(Feature feature, int fudge) { 150 | long fudge_start = Math.max(1, this.start-fudge); 151 | long fudge_end = this.end + fudge; 152 | 153 | return (feature.getStart() >= fudge_start && feature.getStart() <= fudge_end) || 154 | (feature.getEnd() >= fudge_start && feature.getEnd() <= fudge_end); 155 | } 156 | 157 | @Override 158 | public int hashCode() { 159 | final int prime = 31; 160 | int result = 1; 161 | result = prime * result + (int) (end ^ (end >>> 32)); 162 | result = prime * result + ((seqname == null) ? 0 : seqname.hashCode()); 163 | result = prime * result + (int) (start ^ (start >>> 32)); 164 | return result; 165 | } 166 | 167 | @Override 168 | public boolean equals(Object obj) { 169 | if (this == obj) 170 | return true; 171 | if (obj == null) 172 | return false; 173 | if (getClass() != obj.getClass()) 174 | return false; 175 | Feature other = (Feature) obj; 176 | if (end != other.end) 177 | return false; 178 | if (seqname == null) { 179 | if (other.seqname != null) 180 | return false; 181 | } else if (!seqname.equals(other.seqname)) 182 | return false; 183 | if (start != other.start) 184 | return false; 185 | return true; 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /src/main/java/abra/Logger.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import java.util.Date; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | 7 | /** 8 | * Simple logging class. 9 | * 10 | * @author Lisle E. Mose (lmose at unc dot edu) 11 | */ 12 | public class Logger { 13 | 14 | enum Level { TRACE, DEBUG, INFO, WARN, ERROR }; 15 | 16 | public static Level LEVEL = Level.INFO; 17 | 18 | private static Map stringToLevel; 19 | 20 | static { 21 | stringToLevel = new HashMap(); 22 | stringToLevel.put("TRACE", Level.TRACE); 23 | stringToLevel.put("DEBUG", Level.DEBUG); 24 | stringToLevel.put("INFO", Level.INFO); 25 | stringToLevel.put("WARN", Level.WARN); 26 | stringToLevel.put("ERROR", Level.ERROR); 27 | } 28 | 29 | // For trace or debug messages, use varargs to avoid string concatenation unless enabled 30 | 31 | public static void trace(String format, Object... args) { 32 | if (LEVEL == Level.TRACE) { 33 | log(String.format(format, args), Level.TRACE); 34 | } 35 | } 36 | 37 | public static void debug(String format, Object... args) { 38 | if (LEVEL == Level.DEBUG || LEVEL == Level.TRACE) { 39 | log(String.format(format, args), Level.DEBUG); 40 | } 41 | } 42 | 43 | public static void info(String format, Object... args) { 44 | if (LEVEL == Level.TRACE || LEVEL == Level.DEBUG || LEVEL == Level.INFO) { 45 | log(String.format(format, args), Level.INFO); 46 | } 47 | } 48 | 49 | public static void warn(String message) { 50 | if (LEVEL == Level.TRACE || LEVEL == Level.DEBUG || LEVEL == Level.INFO || LEVEL == Level.WARN) { 51 | log(message, Level.WARN); 52 | } 53 | } 54 | 55 | public static void error(String message) { 56 | log(message, Level.ERROR); 57 | } 58 | 59 | public static void setLevel(String str) { 60 | Level level = stringToLevel.get(str.toUpperCase()); 61 | if (level == null) { 62 | throw new IllegalArgumentException("Log level must be one of trace, debug, info, warn or error."); 63 | } 64 | 65 | Logger.LEVEL = level; 66 | } 67 | 68 | public static void log(String message, Level level) { 69 | 70 | String levelStr = "UNKNOWN"; 71 | 72 | switch (level) { 73 | case ERROR: 74 | levelStr = "ERROR"; 75 | break; 76 | case WARN: 77 | levelStr = "WARNING"; 78 | break; 79 | case INFO: 80 | levelStr = "INFO"; 81 | break; 82 | case DEBUG: 83 | levelStr = "DEBUG"; 84 | break; 85 | case TRACE: 86 | levelStr = "TRACE"; 87 | break; 88 | } 89 | 90 | System.err.println(levelStr + "\t" + new Date() + "\t" + message); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/main/java/abra/MultiSamReader.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.util.Iterator; 6 | 7 | import htsjdk.samtools.SAMFileHeader; 8 | import htsjdk.samtools.SAMRecord; 9 | import htsjdk.samtools.SamReader; 10 | import htsjdk.samtools.ValidationStringency; 11 | 12 | public class MultiSamReader implements Iterable { 13 | 14 | //TODO: Upgrade to newer implementation 15 | private SamReader[] readers; 16 | private Iterator[] iterators; 17 | private SAMRecordWrapper[] nextRecord; 18 | private int minMapqForAssembly; 19 | private boolean isPairedEnd; 20 | 21 | // Iterator used by clients 22 | private Iterator clientIterator; 23 | 24 | public MultiSamReader(String[] inputBams, int minMapqForAssembly, boolean isPairedEnd, Feature region) { 25 | 26 | //TODO: Assert all SAM Headers have same sequence dict 27 | readers = new SamReader[inputBams.length]; 28 | nextRecord = new SAMRecordWrapper[inputBams.length]; 29 | iterators = new Iterator[inputBams.length]; 30 | this.minMapqForAssembly = minMapqForAssembly; 31 | this.isPairedEnd = isPairedEnd; 32 | 33 | int idx = 0; 34 | for (String bamFileName : inputBams) { 35 | SamReader reader = SAMRecordUtils.getSamReader(bamFileName); 36 | 37 | readers[idx] = reader; 38 | 39 | // TODO: Pad by region size? 40 | iterators[idx] = readers[idx].queryOverlapping(region.getSeqname(), (int) region.getStart(), (int) region.getEnd()); 41 | 42 | // cache next record 43 | cacheNextRecord(idx); 44 | 45 | idx += 1; 46 | } 47 | 48 | clientIterator = new MultiSamReaderIterator(this); 49 | } 50 | 51 | private void cacheNextRecord(int sampleIdx) { 52 | nextRecord[sampleIdx] = getNext(sampleIdx); 53 | } 54 | 55 | public SAMFileHeader getSAMFileHeader() { 56 | return readers[0].getFileHeader(); 57 | } 58 | 59 | public void close() throws IOException { 60 | for (SamReader reader : readers) { 61 | reader.close(); 62 | } 63 | } 64 | 65 | private boolean isFiltered(SAMRecord read) { 66 | return SAMRecordUtils.isFiltered(isPairedEnd, read); 67 | } 68 | 69 | private SAMRecordWrapper getNext(int idx) { 70 | SAMRecordWrapper record = null; 71 | if (iterators[idx].hasNext()) { 72 | SAMRecord read = iterators[idx].next(); 73 | // If no genomic location is assigned, we've reached the unmapped read pairs. Do not continue... 74 | // TODO: Need to include these in final bam files 75 | if (read.getReferenceIndex() >= 0) { 76 | record = new SAMRecordWrapper(read, isFiltered(read), shouldAssemble(read), idx); 77 | } 78 | } 79 | 80 | return record; 81 | } 82 | 83 | private boolean shouldAssemble(SAMRecord read) { 84 | return ( 85 | (!read.getDuplicateReadFlag()) && 86 | (!read.getReadFailsVendorQualityCheckFlag()) && 87 | read.getReadLength() > 0 && 88 | (read.getMappingQuality() >= this.minMapqForAssembly || read.getReadUnmappedFlag()) && 89 | SAMRecordUtils.isPrimary(read)); // Was previously an id check, so supplemental / secondary alignments could be included 90 | } 91 | 92 | @Override 93 | public Iterator iterator() { 94 | return clientIterator; 95 | } 96 | 97 | static class MultiSamReaderIterator implements Iterator { 98 | 99 | private MultiSamReader multiSamReader; 100 | 101 | MultiSamReaderIterator(MultiSamReader multiSamReader) { 102 | this.multiSamReader = multiSamReader; 103 | } 104 | 105 | @Override 106 | public boolean hasNext() { 107 | // Return true if any sample has another read 108 | for (SAMRecordWrapper record : multiSamReader.nextRecord) { 109 | if (record != null) { 110 | return true; 111 | } 112 | } 113 | 114 | return false; 115 | } 116 | 117 | @Override 118 | public SAMRecordWrapper next() { 119 | // Return the first read across samples by genomic coordinate 120 | SAMRecordWrapper nextRecord = null; 121 | int bestChr = Integer.MAX_VALUE; 122 | int bestPos = Integer.MAX_VALUE; 123 | int bestSampleIdx = -1; 124 | 125 | for (int i=0; i -1) { 38 | out.write(buffer, 0, len); 39 | } 40 | 41 | out.close(); 42 | in.close(); 43 | 44 | Logger.info("Loading native library from: " + file.getAbsolutePath()); 45 | System.load(file.getAbsolutePath()); 46 | 47 | file.deleteOnExit(); 48 | 49 | } else if (!isLenient) { 50 | throw new RuntimeException("Unable to load library: " + library + " from path [" + urlPath + "] into tempdir: [" + tempDir + "]"); 51 | } 52 | } catch (Throwable t) { 53 | Logger.error("Error loading: " + library + " from : " + tempDir); 54 | t.printStackTrace(); 55 | 56 | if (!isLenient) { 57 | throw new RuntimeException(t); 58 | } 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/abra/NativeSemiGlobalAligner.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import abra.SemiGlobalAligner.Result; 4 | 5 | public class NativeSemiGlobalAligner { 6 | 7 | private native String align(String seq1, String seq2, int match, int mismatch, int gapOpen, int gapExtend); 8 | 9 | private int match = 8; 10 | private int mismatch = -32; 11 | private int gapOpen = -48; 12 | private int gapExtend = -1; 13 | 14 | public static final int MAX_CONTIG_LEN = 1998; 15 | public static final int MAX_REF_LEN = 4998; 16 | 17 | public NativeSemiGlobalAligner(int match, int mismatch, int gapOpen, int gapExtend) { 18 | this.match = match; 19 | this.mismatch = mismatch; 20 | this.gapOpen = gapOpen; 21 | this.gapExtend = gapExtend; 22 | } 23 | 24 | public Result align(String seq1, String seq2) { 25 | 26 | if (seq1.length() > MAX_CONTIG_LEN) { 27 | throw new IllegalArgumentException("Contig too long"); 28 | } 29 | 30 | if (seq2.length() > MAX_REF_LEN) { 31 | throw new IllegalArgumentException("Ref too long"); 32 | } 33 | 34 | // Result returned in format score:secondBest:pos:endPos:cigar 35 | // 789:741:611:734:52M108I71M 36 | String res = align(seq1, seq2, match, mismatch, gapOpen, gapExtend); 37 | 38 | String[] results = res.split(":"); 39 | Result result = new Result(Integer.valueOf(results[0]), Integer.valueOf(results[1]), 40 | Integer.valueOf(results[2]), Integer.valueOf(results[3]), results[4]); 41 | 42 | return result; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/abra/Options.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */ 2 | package abra; 3 | 4 | import java.io.IOException; 5 | 6 | import joptsimple.OptionParser; 7 | import joptsimple.OptionSet; 8 | 9 | /** 10 | * Abstract base class for helping with options parsing. 11 | * 12 | * @author Lisle E. Mose (lmose at unc dot edu) 13 | */ 14 | public abstract class Options { 15 | protected static final String HELP = "help"; 16 | 17 | private OptionSet options; 18 | 19 | protected void printHelp() { 20 | try { 21 | getOptionParser().printHelpOn(System.err); 22 | } 23 | catch (IOException e) { 24 | e.printStackTrace(); 25 | throw new RuntimeException("IOException encountered when attempting to output help."); 26 | } 27 | } 28 | 29 | public void parseOptions(String[] args) { 30 | 31 | try { 32 | options = getOptionParser().parse(args); 33 | 34 | if (options.has(HELP)) { 35 | printHelp(); 36 | } else { 37 | init(); 38 | validate(); 39 | } 40 | } catch (joptsimple.OptionException e) { 41 | System.err.println(e.getMessage()); 42 | printHelp(); 43 | throw e; 44 | } 45 | } 46 | 47 | protected OptionSet getOptions() { 48 | return options; 49 | } 50 | 51 | abstract protected OptionParser getOptionParser(); 52 | 53 | abstract protected void validate(); 54 | 55 | protected void init() { 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/abra/Pair.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | public class Pair { 4 | private T t; 5 | private Y y; 6 | public Pair(T t, Y y) { 7 | this.t = t; 8 | this.y = y; 9 | } 10 | 11 | public T getFirst() { 12 | return t; 13 | } 14 | 15 | public Y getSecond() { 16 | return y; 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/abra/ReAlignerRunnable.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */ 2 | package abra; 3 | 4 | /** 5 | * Thread entry point for chromsome / reference sequence specific processing. 6 | * 7 | * @author Lisle E. Mose (lmose at unc dot edu) 8 | */ 9 | public class ReAlignerRunnable extends AbraRunnable { 10 | private int chromosomeChunkIdx; 11 | private ReAligner reAligner; 12 | 13 | public ReAlignerRunnable(ThreadManager threadManager, ReAligner reAligner, int chromosomeChunkIdx) { 14 | super(threadManager); 15 | this.chromosomeChunkIdx = chromosomeChunkIdx; 16 | this.reAligner = reAligner; 17 | } 18 | 19 | @Override 20 | public void go() throws Exception { 21 | reAligner.processChromosomeChunk(chromosomeChunkIdx); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/abra/ReadPair.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */ 2 | package abra; 3 | 4 | import htsjdk.samtools.SAMRecord; 5 | 6 | /** 7 | * A Pair of SAMRecord's 8 | * 9 | * @author Lisle E. Mose (lmose at unc dot edu) 10 | */ 11 | public class ReadPair { 12 | private SAMRecord read1; 13 | private SAMRecord read2; 14 | 15 | private String hashString = null; 16 | 17 | ReadPair(SAMRecord read1, SAMRecord read2) { 18 | this.read1 = read1; 19 | this.read2 = read2; 20 | } 21 | 22 | public SAMRecord getRead1() { 23 | return read1; 24 | } 25 | 26 | public SAMRecord getRead2() { 27 | return read2; 28 | } 29 | 30 | public String toString() { 31 | String r1 = read1 != null ? read1.getReadName() : "null"; 32 | String r2 = read2 != null ? read2.getReadName() : "null"; 33 | return "read1: " + r1 + ", read2: " + r2; 34 | } 35 | 36 | private synchronized String getHashString() { 37 | if (hashString == null) { 38 | hashString = read1 != null ? read1.getSAMString() : "null" + 39 | read2 != null ? read2.getSAMString() : "null"; 40 | } 41 | 42 | return hashString; 43 | } 44 | 45 | @Override 46 | public int hashCode() { 47 | return getHashString().hashCode(); 48 | } 49 | 50 | @Override 51 | public boolean equals(Object obj) { 52 | ReadPair that = (ReadPair) obj; 53 | return this.getHashString().equals(that.getHashString()); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/abra/RealignmentWriter.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */ 2 | package abra; 3 | 4 | import htsjdk.samtools.SAMRecord; 5 | 6 | /** 7 | * Concrete implementations of this interface are responsible for outputting 8 | * realigned reads to the output BAM file. 9 | * 10 | * @author Lisle E. Mose (lmose at unc dot edu) 11 | */ 12 | public interface RealignmentWriter { 13 | 14 | public void addAlignment(SAMRecord updatedRead, SAMRecord origRead); 15 | 16 | public int flush(); 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/abra/ReferenceEvaluator.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import java.io.BufferedWriter; 4 | import java.io.FileWriter; 5 | import java.io.IOException; 6 | import java.util.Collection; 7 | import java.util.Collections; 8 | import java.util.Comparator; 9 | import java.util.List; 10 | import java.util.Set; 11 | import java.util.TreeSet; 12 | 13 | /** 14 | * Produces BED files indicating genomic windows that lend themselves to assembly. 15 | * 16 | * @author Lisle E. Mose (lmose at unc dot edu) 17 | */ 18 | public class ReferenceEvaluator { 19 | 20 | private int readLength; 21 | private String reference; 22 | private int[] kmers; 23 | private String in; 24 | private String out; 25 | private String qualities; 26 | private BufferedWriter include; 27 | private BufferedWriter exclude; 28 | private Set includeRegions; 29 | private Set excludeRegions; 30 | private ThreadManager threadManager; 31 | 32 | private static final int MAX_NODES = 9000; 33 | 34 | public ReferenceEvaluator(int readLength, String reference, int[] kmers, String in, String out, int numThreads) { 35 | this.readLength = readLength; 36 | this.reference = reference; 37 | this.kmers = kmers; 38 | this.in = in; 39 | this.out = out; 40 | 41 | this.qualities = new String(); 42 | for (int i=0; i(new RegionComparator())); 59 | excludeRegions = Collections.synchronizedSortedSet(new TreeSet(new RegionComparator())); 60 | int i = 0; 61 | int chromosomeLength = c2r.getReferenceLength(chr); 62 | while (i < chromosomeLength - ReAligner.MAX_REGION_LENGTH) { 63 | int regionStart = i; 64 | int regionStop = i + ReAligner.MAX_REGION_LENGTH; 65 | int start = Math.max(regionStart - readLength, 0); 66 | int stop = Math.min(regionStop + readLength, chromosomeLength-1); 67 | String regionBases = c2r.getSequence(chr, start+1, stop-start); 68 | Feature region = new Feature(chr, regionStart, regionStop); 69 | 70 | //TODO: Handle other ambiguous bases 71 | if (!regionBases.contains("N")) { 72 | threadManager.spawnThread(new EvalRunnable(threadManager, this, region, regionBases)); 73 | } else { 74 | 75 | excludeRegions.add(region); 76 | } 77 | 78 | i += ReAligner.REGION_OVERLAP; 79 | } 80 | 81 | threadManager.waitForAllThreadsToComplete(); 82 | 83 | //TODO: Because assembly regions are overlapped, there is overlap between final include/exclude output 84 | outputRegions(include, includeRegions); 85 | outputRegions(exclude, excludeRegions); 86 | } 87 | 88 | include.close(); 89 | exclude.close(); 90 | 91 | System.err.println("Done."); 92 | } 93 | 94 | private void evalRegion(Feature region, String regionBases) { 95 | boolean shouldInclude = false; 96 | NativeAssembler assembler = new NativeAssembler(); 97 | StringBuffer readBuf = new StringBuffer((ReAligner.MAX_REGION_LENGTH + 2*readLength) * readLength); 98 | for (int j=0; j<=regionBases.length() - readLength; j++) { 99 | readBuf.append("0"); // forward strand only 100 | String read = regionBases.substring(j, j+readLength); 101 | readBuf.append(read); 102 | readBuf.append(qualities); 103 | } 104 | 105 | String contig = assembler.nativeAssemble(readBuf.toString(), region.getDescriptor(), "eval", 0, 1, (ReAligner.MAX_REGION_LENGTH + 2*readLength)*2, readLength, kmers, 1, 0, .01, 1, MAX_NODES); 106 | int basesIdx = contig.indexOf('\n') + 1; 107 | if (basesIdx < contig.length()) { 108 | String contigBases = contig.substring(basesIdx, contig.length()-1); 109 | if (regionBases.equals(contigBases)) { 110 | shouldInclude = true; 111 | } 112 | } 113 | 114 | if (shouldInclude) { 115 | includeRegions.add(region); 116 | } else { 117 | excludeRegions.add(region); 118 | } 119 | } 120 | 121 | private void outputRegions(BufferedWriter writer, Collection regions) throws IOException { 122 | List mergedRegions = RegionLoader.collapseRegions(regions, 0); 123 | 124 | for (Feature region : mergedRegions) { 125 | writer.write(region.getSeqname() + "\t" + region.getStart() + "\t" + region.getEnd() + "\n"); 126 | } 127 | } 128 | 129 | // Basic Region comparator. Only considers start position, so must not 130 | // be used across chromosomes. 131 | static class RegionComparator implements Comparator { 132 | 133 | @Override 134 | public int compare(Feature region1, Feature region2) { 135 | return (int) (region1.getStart() - region2.getStart()); 136 | } 137 | } 138 | 139 | static class EvalRunnable extends AbraRunnable { 140 | private ReferenceEvaluator evaluator; 141 | private Feature region; 142 | private String regionBases; 143 | 144 | public EvalRunnable(ThreadManager threadManager, ReferenceEvaluator evaluator, Feature region, String regionBases) { 145 | super(threadManager); 146 | this.evaluator = evaluator; 147 | this.region = region; 148 | this.regionBases = regionBases; 149 | } 150 | 151 | @Override 152 | public void go() throws Exception { 153 | evaluator.evalRegion(region, regionBases); 154 | } 155 | } 156 | 157 | static int[] getKmers(String str) { 158 | String[] strings = str.split(","); 159 | int[] kmers = new int[strings.length]; 160 | for (int i=0; i "); 181 | } 182 | int readLength = Integer.parseInt(args[0]); 183 | String reference = args[1]; 184 | int kmers[] = getKmers(args[2]); 185 | String includeBed = args[3]; 186 | String excludeBed = args[4]; 187 | int threads = Integer.parseInt(args[5]); 188 | ReferenceEvaluator re = new ReferenceEvaluator(readLength, reference, kmers, includeBed, excludeBed, threads); 189 | re.run(); 190 | } 191 | } 192 | -------------------------------------------------------------------------------- /src/main/java/abra/RegionLoader.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */ 2 | package abra; 3 | 4 | import java.io.BufferedReader; 5 | import java.io.FileNotFoundException; 6 | import java.io.FileReader; 7 | import java.io.IOException; 8 | import java.util.ArrayList; 9 | import java.util.Collection; 10 | import java.util.List; 11 | 12 | /** 13 | * Loads region info into memory. 14 | * 15 | * @author Lisle E. Mose (lmose at unc dot edu) 16 | */ 17 | public class RegionLoader { 18 | 19 | private static final int SEQNAME_IDX = 0; 20 | private static final int BED_START_IDX = 1; 21 | private static final int BED_END_IDX = 2; 22 | private static final int KMER_SIZE_IDX = 3; 23 | 24 | public List load(String regionFile, boolean hasPresetKmers) throws FileNotFoundException, IOException { 25 | List features = new ArrayList(); 26 | 27 | int start = BED_START_IDX; 28 | int end = BED_END_IDX; 29 | 30 | BufferedReader reader = new BufferedReader(new FileReader(regionFile)); 31 | 32 | try { 33 | 34 | String lastChr = ""; 35 | long lastStart = -1; 36 | 37 | String line = reader.readLine(); 38 | 39 | int cnt = 0; 40 | 41 | while (line != null) { 42 | if(line.startsWith("#") || line.trim().isEmpty() || line.startsWith("track") || line.startsWith("browser")) { 43 | line = reader.readLine(); 44 | continue; 45 | } 46 | String[] fields = line.split("\t"); 47 | 48 | String chromosome = fields[SEQNAME_IDX]; 49 | long startPos = Long.valueOf(fields[start]); 50 | long endPos = Long.valueOf(fields[end]); 51 | 52 | if (startPos > endPos) { 53 | throw new IllegalArgumentException("Region end must be greater than region start in target BED file: " + line); 54 | } 55 | 56 | if (lastChr.equals(chromosome) && startPos < lastStart) { 57 | throw new IllegalArgumentException("Target BED file must be sorted in increasing coordinate order (grouped by chromosome): " + line); 58 | } 59 | 60 | Feature feature = new Feature(chromosome, startPos, endPos); 61 | 62 | if (fields.length >= KMER_SIZE_IDX+1 && hasPresetKmers) { 63 | int kmerSize = Integer.parseInt(fields[KMER_SIZE_IDX]); 64 | feature.setKmer(kmerSize); 65 | } 66 | 67 | features.add(feature); 68 | 69 | line = reader.readLine(); 70 | cnt++; 71 | 72 | lastChr = chromosome; 73 | lastStart = startPos; 74 | } 75 | } finally { 76 | reader.close(); 77 | } 78 | 79 | return features; 80 | } 81 | 82 | public static List collapseRegions(Collection regions, int maxGap) { 83 | List collapsedRegions = new ArrayList(); 84 | 85 | Feature currentRegion = null; 86 | 87 | for (Feature region : regions) { 88 | if (currentRegion != null) { 89 | if ((currentRegion.getSeqname().equals(region.getSeqname())) && 90 | (currentRegion.getEnd() + (maxGap) >= region.getStart())) { 91 | 92 | currentRegion.setEnd(region.getEnd()); 93 | } else { 94 | collapsedRegions.add(currentRegion); 95 | currentRegion = region; 96 | } 97 | } else { 98 | currentRegion = region; 99 | } 100 | } 101 | 102 | if (currentRegion != null) { 103 | collapsedRegions.add(currentRegion); 104 | } 105 | 106 | Logger.info("Collapsed regions from " + regions.size() + " to " + collapsedRegions.size()); 107 | 108 | return collapsedRegions; 109 | } 110 | 111 | public static void main(String[] args) throws Exception { 112 | RegionLoader loader = new RegionLoader(); 113 | // List regions = loader.load("/home/lmose/dev/abra/issue12/test.bed"); 114 | List regions = loader.load("/home/lmose/dev/abra/issue12/test2.bed", false); 115 | 116 | regions = RegionLoader.collapseRegions(regions, 100); 117 | 118 | /* 119 | for (Feature region : regions) { 120 | if (region.getLength() <= 0) { 121 | System.out.println(region + " - " + region.getLength()); 122 | } 123 | 124 | } 125 | */ 126 | 127 | regions = ReAligner.splitRegions(regions); 128 | 129 | for (Feature region : regions) { 130 | if (region.getLength() <= 0) { 131 | System.err.println(region + " - " + region.getLength()); 132 | } 133 | 134 | } 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /src/main/java/abra/ReverseComplementor.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */ 2 | package abra; 3 | 4 | import org.apache.commons.lang.ArrayUtils; 5 | 6 | /** 7 | * Utility class for reversing and complementing bases. 8 | * 9 | * @author Lisle E. Mose (lmose at unc dot edu) 10 | */ 11 | public class ReverseComplementor { 12 | 13 | /** 14 | * Returns a new byte array containing the contents of the input byte array 15 | * reversed. The input byte array is not modified. 16 | */ 17 | public byte[] reverse(byte[] input) { 18 | byte[] bytes = ArrayUtils.clone(input); 19 | ArrayUtils.reverse(bytes); 20 | 21 | return bytes; 22 | } 23 | 24 | /** 25 | * Returns the reverse of the input string. 26 | */ 27 | public String reverse(String input) { 28 | return new String(reverse(input.getBytes())); 29 | } 30 | 31 | /** 32 | * Returns the reverse complement of the input string, non-DNA characters are allowed and just reversed. 33 | */ 34 | public static String reverseComplement(String s) { 35 | char[] reverse = new char[s.length()]; 36 | for (int i = 0; i < reverse.length; i++) { 37 | switch (s.charAt(i)) { 38 | case 'A': reverse[reverse.length-i-1] = 'T';break; 39 | case 'T': reverse[reverse.length-i-1] = 'A';break; 40 | case 'C': reverse[reverse.length-i-1] = 'G';break; 41 | case 'G': reverse[reverse.length-i-1] = 'C';break; 42 | default: //non-DNA input, just reverse char that was there 43 | reverse[reverse.length-i-1] = s.charAt(i); 44 | } 45 | } 46 | return new String(reverse); 47 | } 48 | } -------------------------------------------------------------------------------- /src/main/java/abra/SAMRecordWrapper.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import htsjdk.samtools.CigarElement; 7 | import htsjdk.samtools.CigarOperator; 8 | import htsjdk.samtools.SAMRecord; 9 | 10 | public class SAMRecordWrapper { 11 | 12 | private SAMRecord samRecord; 13 | private boolean shouldAssemble; 14 | private boolean shouldFilter; 15 | private int sampleIdx; 16 | private boolean isUnalignedRc = false; 17 | private String mergedSeq = null; 18 | private String mergedQual = null; 19 | private int adjustedAlignmentStart = -1; 20 | private int adjustedAlignmentEnd = -1; 21 | 22 | private int bqSum = -1; 23 | 24 | public SAMRecordWrapper(SAMRecord record, boolean shouldFilter, boolean shouldAssemble, int sampleIdx) { 25 | this.samRecord = record; 26 | this.shouldFilter = true; 27 | this.shouldAssemble = shouldAssemble; 28 | this.sampleIdx = sampleIdx; 29 | } 30 | 31 | public SAMRecord getSamRecord() { 32 | return samRecord; 33 | } 34 | 35 | public boolean shouldAssemble() { 36 | return shouldAssemble; 37 | } 38 | 39 | public boolean shouldFilter() { 40 | return shouldFilter; 41 | } 42 | 43 | public int getSampleIdx() { 44 | return sampleIdx; 45 | } 46 | 47 | public boolean isUnalignedRc() { 48 | return isUnalignedRc; 49 | } 50 | 51 | public void setUnalignedRc(boolean isUnalignedRc) { 52 | this.isUnalignedRc = isUnalignedRc; 53 | } 54 | 55 | public void setShouldAssemble(boolean shouldAssemble) { 56 | this.shouldAssemble = false; 57 | } 58 | public int getAdjustedAlignmentStart() { 59 | 60 | int start = 0; 61 | 62 | if (adjustedAlignmentStart > -1) { 63 | start = adjustedAlignmentStart; 64 | } else { 65 | 66 | start = samRecord.getAlignmentStart(); 67 | 68 | if (samRecord.getCigar().numCigarElements() > 0) { 69 | CigarElement elem = samRecord.getCigar().getCigarElement(0); 70 | if (elem.getOperator() == CigarOperator.S) { 71 | start -= elem.getLength(); 72 | if (start < 1) { 73 | start = 1; 74 | } 75 | } 76 | } 77 | } 78 | 79 | return start; 80 | } 81 | 82 | public int getAdjustedAlignmentEnd() { 83 | int end = -1; 84 | 85 | if (adjustedAlignmentEnd > -1) { 86 | end = adjustedAlignmentEnd; 87 | } else { 88 | 89 | if (samRecord.getReadUnmappedFlag()) { 90 | end = samRecord.getAlignmentStart() + samRecord.getReadLength(); 91 | } else { 92 | // Use standard alignment end and pad for soft clipping if necessary 93 | end = samRecord.getAlignmentEnd(); 94 | 95 | if (samRecord.getCigar().numCigarElements() > 0) { 96 | CigarElement elem = samRecord.getCigar().getCigarElement(samRecord.getCigar().numCigarElements()-1); 97 | if (elem.getOperator() == CigarOperator.S) { 98 | end += elem.getLength(); 99 | } 100 | } 101 | } 102 | } 103 | 104 | return end; 105 | } 106 | 107 | public int getReadLength() { 108 | int length = this.samRecord.getReadLength(); 109 | if (hasMergedSeq()) { 110 | length = mergedSeq.length(); 111 | } 112 | 113 | return length; 114 | } 115 | 116 | public String getMergedSeq() { 117 | return mergedSeq; 118 | } 119 | 120 | public String getMergedQual() { 121 | return mergedQual; 122 | } 123 | 124 | public String getSeq() { 125 | String seq; 126 | if (mergedSeq != null) { 127 | seq = mergedSeq; 128 | } else { 129 | seq = samRecord.getReadString(); 130 | } 131 | 132 | return seq; 133 | } 134 | 135 | public String getQual() { 136 | String qual; 137 | if (mergedQual != null) { 138 | qual = mergedQual; 139 | } else { 140 | qual = samRecord.getBaseQualityString(); 141 | } 142 | 143 | return qual; 144 | } 145 | 146 | public void setMerged(String mergedSeq, String mergedQual, int adjustedAlignmentStart, int adjustedAlignmentEnd) { 147 | this.mergedSeq = mergedSeq; 148 | this.mergedQual = mergedQual; 149 | } 150 | 151 | public void setMergedSeqAndQual(String mergedSeq, String mergedQual) { 152 | this.mergedSeq = mergedSeq; 153 | this.mergedQual = mergedQual; 154 | 155 | // Result qual sum 156 | this.bqSum = -1; 157 | } 158 | 159 | public boolean hasMergedSeq() { 160 | return this.mergedSeq != null; 161 | } 162 | 163 | public int baseQualSum() { 164 | if (bqSum < 0) { 165 | if (hasMergedSeq()) { 166 | bqSum = SAMRecordUtils.sumBaseQuals(mergedQual); 167 | } else { 168 | bqSum = SAMRecordUtils.sumBaseQuals(samRecord); 169 | } 170 | } 171 | 172 | return bqSum; 173 | } 174 | 175 | public List getSpanningRegions() { 176 | 177 | List spans = new ArrayList(); 178 | 179 | int start = getAdjustedAlignmentStart(); 180 | 181 | if (samRecord.getReadUnmappedFlag()) { 182 | spans.add(new Span(start, getAdjustedAlignmentEnd())); 183 | } else { 184 | int end = start; 185 | for (CigarElement elem : samRecord.getCigar().getCigarElements()) { 186 | switch (elem.getOperator()) { 187 | case M: 188 | case S: 189 | case D: 190 | end += elem.getLength(); 191 | break; 192 | case I: 193 | break; 194 | case H: 195 | break; 196 | case N: 197 | spans.add(new Span(start, end)); 198 | start = end + elem.getLength(); 199 | end = start; 200 | break; 201 | default: 202 | throw new UnsupportedOperationException("Unhandled cigar operator: " + elem.getOperator() + " in: " + 203 | samRecord.getReadName() + " : " + samRecord.getCigarString()); 204 | } 205 | } 206 | 207 | spans.add(new Span(start, end)); 208 | } 209 | 210 | return spans; 211 | } 212 | 213 | static class Span { 214 | int start; 215 | int end; 216 | 217 | public Span(int start, int end) { 218 | this.start = start; 219 | this.end = end; 220 | } 221 | 222 | @Override 223 | public int hashCode() { 224 | final int prime = 31; 225 | int result = 1; 226 | result = prime * result + end; 227 | result = prime * result + start; 228 | return result; 229 | } 230 | 231 | @Override 232 | public boolean equals(Object obj) { 233 | if (this == obj) 234 | return true; 235 | if (obj == null) 236 | return false; 237 | if (getClass() != obj.getClass()) 238 | return false; 239 | Span other = (Span) obj; 240 | if (end != other.end) 241 | return false; 242 | if (start != other.start) 243 | return false; 244 | return true; 245 | } 246 | } 247 | } 248 | -------------------------------------------------------------------------------- /src/main/java/abra/ScoredContig.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.List; 6 | 7 | public class ScoredContig implements Comparable { 8 | 9 | private double score; 10 | private String contig; 11 | 12 | public ScoredContig(double score, String contig) { 13 | this.score = score; 14 | this.contig = contig; 15 | } 16 | 17 | public double getScore() { 18 | return score; 19 | } 20 | 21 | public String getContig() { 22 | return contig; 23 | } 24 | 25 | public String toString() { 26 | return String.valueOf(score); 27 | } 28 | 29 | @Override 30 | public int compareTo(ScoredContig o) { 31 | if (this.score < o.score) { 32 | return 1; 33 | } else if (this.score > o.score) { 34 | return -1; 35 | } else { 36 | return this.contig.compareTo(o.contig); 37 | } 38 | } 39 | 40 | // public static List convertAndFilter(String contigStrings) { 41 | // return convertAndFilter(contigStrings, MAX_CONTIGS); 42 | // } 43 | 44 | public static List convertAndFilter(String contigStrings, int maxContigs, StringBuffer readBuffer) { 45 | List contigs = new ArrayList(); 46 | 47 | double score = Double.NEGATIVE_INFINITY; 48 | String[] contigSeq = contigStrings.split("\n"); 49 | for (String str : contigSeq) { 50 | if (str.startsWith(">")) { 51 | String[] fields = str.split("_"); 52 | try { 53 | score = Double.parseDouble(fields[4]); 54 | } catch (ArrayIndexOutOfBoundsException e) { 55 | Logger.error("Error parsing assembled contigs. Line: [" + str + "]\n\nContigs: [\n" + contigStrings + "\n]"); 56 | Logger.error("Read buffer: [\n" + readBuffer.toString() + "\n]"); 57 | throw e; 58 | } 59 | } else { 60 | contigs.add(new ScoredContig(score, str)); 61 | } 62 | } 63 | 64 | return filter(contigs, maxContigs); 65 | } 66 | 67 | public static List filter(List contigs, int maxContigs) { 68 | if (contigs.size() > maxContigs) { 69 | Logger.debug("Shrinking eligible contigs from %d to %d", contigs.size(), maxContigs); 70 | Collections.shuffle(contigs); 71 | Collections.sort(contigs); 72 | // Subset to only the first MAX_CONTIGS 73 | contigs = contigs.subList(0, maxContigs); 74 | } 75 | 76 | return contigs; 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/main/java/abra/Sequence.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */ 2 | package abra; 3 | 4 | import java.util.Arrays; 5 | 6 | public class Sequence { 7 | private short length; 8 | private byte[] sequence; 9 | // private int hashCode; 10 | 11 | private static final byte A = 0; 12 | private static final byte C = 1; 13 | private static final byte T = 2; 14 | private static final byte G = 3; 15 | 16 | public Sequence(String str) { 17 | length = (short) str.length(); 18 | 19 | int numBits = str.length() * 2; 20 | int numBytes = numBits / 8; 21 | if ((numBits % 8) > 0) { 22 | numBytes += 1; 23 | } 24 | 25 | sequence = new byte[numBytes]; 26 | Arrays.fill(sequence, (byte) 0); 27 | 28 | for (short i=0; i> posInBucket); 63 | 64 | // (filter all but last 2 bits) i.e. bucket & 0x00000011 65 | byte base = (byte) (bucket & 3); 66 | 67 | char ch = getChar(base); 68 | 69 | seq.append(ch); 70 | } 71 | 72 | return seq.toString(); 73 | } 74 | 75 | public int hashCode() { 76 | return Arrays.hashCode(sequence); 77 | } 78 | 79 | public boolean equals(Object obj) { 80 | Sequence that = (Sequence) obj; 81 | return Arrays.equals(this.sequence, that.sequence); 82 | } 83 | 84 | private char getChar(byte base) { 85 | switch (base) { 86 | case A: 87 | return 'A'; 88 | case C: 89 | return 'C'; 90 | case T: 91 | return 'T'; 92 | case G: 93 | return 'G'; 94 | default: 95 | throw new IllegalArgumentException("Invalid base: " + base); 96 | } 97 | } 98 | 99 | private byte getBase(String str, char ch) { 100 | switch (ch) { 101 | case 'A': 102 | return A; 103 | case 'C': 104 | return C; 105 | case 'T': 106 | return T; 107 | case 'G': 108 | return G; 109 | default: 110 | throw new IllegalArgumentException("Invalid base: " + ch + " for sequence: " + str); 111 | } 112 | } 113 | 114 | public static void main(String[] args) { 115 | 116 | /* 117 | int i = 2; 118 | 119 | i = i >> 1; 120 | 121 | System.out.println(Integer.toBinaryString(i)); 122 | 123 | // int i = 2 >> 2; 124 | 125 | System.out.println(i); 126 | */ 127 | 128 | Sequence s1 = new Sequence("ATCGATCG"); 129 | Sequence s2 = new Sequence("ATCGATCC"); 130 | Sequence s3 = new Sequence("ATCGATCG"); 131 | Sequence s4 = new Sequence("ATCGATCGG"); 132 | Sequence s5 = new Sequence("TCGATCGG"); 133 | 134 | System.out.println(s1.getSequenceAsString()); 135 | System.out.println(s2.getSequenceAsString()); 136 | System.out.println(s3.getSequenceAsString()); 137 | System.out.println(s4.getSequenceAsString()); 138 | System.out.println(s5.getSequenceAsString()); 139 | 140 | System.out.println(s1.equals(s2)); 141 | System.out.println(s2.equals(s1)); 142 | System.out.println(s1.equals(s3)); 143 | System.out.println(s3.equals(s1)); 144 | System.out.println(s3.equals(s4)); 145 | System.out.println(s3.equals(s5)); 146 | System.out.println(s5.equals(s5)); 147 | 148 | 149 | System.out.println(s1.getFirstCharacter()); 150 | System.out.println(s5.getFirstCharacter()); 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /src/main/java/abra/SortedSAMWriterRunnable.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | public class SortedSAMWriterRunnable extends AbraRunnable { 4 | 5 | private SortedSAMWriter writer; 6 | private int sampleIdx; 7 | private String inputBam; 8 | 9 | public SortedSAMWriterRunnable(ThreadManager threadManager, SortedSAMWriter writer, int sampleIdx, String inputBam) { 10 | super(threadManager); 11 | this.writer = writer; 12 | this.sampleIdx = sampleIdx; 13 | this.inputBam = inputBam; 14 | } 15 | 16 | @Override 17 | public void go() throws Exception { 18 | writer.outputFinal(sampleIdx, inputBam); 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/abra/SortingSAMRecordCollection.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import java.io.File; 4 | import java.util.Iterator; 5 | 6 | import htsjdk.samtools.BAMRecordCodec; 7 | import htsjdk.samtools.SAMFileHeader; 8 | import htsjdk.samtools.SAMRecord; 9 | import htsjdk.samtools.SAMRecordCoordinateComparator; 10 | import htsjdk.samtools.SAMRecordQueryNameComparator; 11 | import htsjdk.samtools.util.SortingCollection2; 12 | 13 | /** 14 | * Wrapper class for HTSJDK's SortingCollection specific to SAMRecord and adds size tracking 15 | * 16 | * @author lmose 17 | */ 18 | public class SortingSAMRecordCollection implements Iterable { 19 | 20 | private SortingCollection2 reads; 21 | private int size = 0; 22 | 23 | // Array backing the SortingCollection. 24 | // Re-using this avoids the cost of reallocating the large array each time the SortingCollection is flushed 25 | // private SAMRecord[] records; 26 | 27 | public static SortingSAMRecordCollection newSortByCoordinateInstance(SAMRecord[] recordArray, SAMFileHeader header, int maxRecordsInRAM, String tempDir) { 28 | return new SortingSAMRecordCollection(recordArray, header, new SAMRecordCoordinateComparator(), maxRecordsInRAM, tempDir); 29 | } 30 | 31 | public static SortingSAMRecordCollection newSortByNameInstance(SAMRecord[] recordArray, SAMFileHeader header, int maxRecordsInRAM, String tempDir) { 32 | return new SortingSAMRecordCollection(recordArray, header, new SAMRecordQueryNameComparator(), maxRecordsInRAM, tempDir); 33 | } 34 | 35 | private SortingSAMRecordCollection(SAMRecord[] recordArray, SAMFileHeader header, java.util.Comparator comparator, int maxRecordsInRAM, String tempDir) { 36 | reads = SortingCollection2.newInstance(recordArray, SAMRecord.class, new BAMRecordCodec(header), comparator, maxRecordsInRAM, new File(tempDir)); 37 | } 38 | 39 | @Override 40 | public Iterator iterator() { 41 | return reads.iterator(); 42 | } 43 | 44 | public void add(SAMRecord read) { 45 | reads.add(read); 46 | size += 1; 47 | } 48 | 49 | public int size() { 50 | return size; 51 | } 52 | 53 | public void cleanup() { 54 | reads.cleanup(); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/abra/ThreadManager.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | import java.util.concurrent.ExecutorService; 7 | import java.util.concurrent.Executors; 8 | import java.util.concurrent.TimeUnit; 9 | 10 | /** 11 | * Manages threading 12 | * 13 | * @author Lisle E. Mose (lmose at unc dot edu) 14 | */ 15 | public class ThreadManager { 16 | 17 | private static final int MAX_PENDING = 100; 18 | 19 | private int numThreads; 20 | private List threads = new ArrayList(); 21 | private ExecutorService executor; 22 | 23 | public ThreadManager(int numThreads) { 24 | this.numThreads = numThreads; 25 | executor = Executors.newFixedThreadPool(numThreads); 26 | } 27 | 28 | public void spawnThread(AbraRunnable runnable) { 29 | 30 | try { 31 | waitForAvailableThread(); 32 | } catch (InterruptedException e) {} 33 | 34 | addThread(runnable); 35 | 36 | executor.submit(runnable); 37 | } 38 | 39 | private synchronized void addThread(AbraRunnable thread) { 40 | threads.add(thread); 41 | } 42 | 43 | public synchronized void removeThread(AbraRunnable thread) { 44 | threads.remove(thread); 45 | } 46 | 47 | private synchronized int activeThreads() { 48 | return threads.size(); 49 | } 50 | 51 | private void waitForAvailableThread() throws InterruptedException { 52 | while (activeThreads() >= MAX_PENDING) { 53 | Thread.sleep(50); 54 | } 55 | } 56 | 57 | public void waitForAllThreadsToComplete() throws InterruptedException, IOException { 58 | executor.shutdown(); 59 | while (!executor.awaitTermination(300, TimeUnit.SECONDS)) { 60 | Runtime runtime = Runtime.getRuntime(); 61 | 62 | Logger.info("Waiting on %d queued threads.\tmax_mem\t%d\ttotal_mem\t%d\tfree_mem\t%d", threads.size(), 63 | runtime.maxMemory()/1024, runtime.totalMemory()/1024, runtime.freeMemory()/1024); 64 | } 65 | } 66 | 67 | public int getNumThreads() { 68 | return numThreads; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/abra/Variant.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.FileNotFoundException; 5 | import java.io.FileReader; 6 | import java.io.IOException; 7 | import java.util.ArrayList; 8 | import java.util.Collections; 9 | import java.util.HashMap; 10 | import java.util.HashSet; 11 | import java.util.List; 12 | import java.util.Map; 13 | import java.util.Set; 14 | 15 | public class Variant implements Comparable { 16 | 17 | private String chr; 18 | private int position; 19 | private String ref; 20 | private String alt; 21 | 22 | Variant(String chr, int position, String ref, String alt) { 23 | this.chr = chr; 24 | this.position = position; 25 | this.ref = ref; 26 | this.alt = alt; 27 | } 28 | 29 | public String getChr() { 30 | return chr; 31 | } 32 | 33 | public int getPosition() { 34 | return position; 35 | } 36 | 37 | public String getRef() { 38 | return ref; 39 | } 40 | 41 | public String getAlt() { 42 | return alt; 43 | } 44 | 45 | @Override 46 | public String toString() { 47 | return "Variant [chr=" + chr + ", position=" + position + ", ref=" + ref + ", alt=" + alt + "]"; 48 | } 49 | 50 | @Override 51 | public int hashCode() { 52 | final int prime = 31; 53 | int result = 1; 54 | result = prime * result + ((alt == null) ? 0 : alt.hashCode()); 55 | result = prime * result + ((chr == null) ? 0 : chr.hashCode()); 56 | result = prime * result + position; 57 | result = prime * result + ((ref == null) ? 0 : ref.hashCode()); 58 | return result; 59 | } 60 | 61 | @Override 62 | public boolean equals(Object obj) { 63 | if (this == obj) 64 | return true; 65 | if (obj == null) 66 | return false; 67 | if (getClass() != obj.getClass()) 68 | return false; 69 | Variant other = (Variant) obj; 70 | if (alt == null) { 71 | if (other.alt != null) 72 | return false; 73 | } else if (!alt.equals(other.alt)) 74 | return false; 75 | if (chr == null) { 76 | if (other.chr != null) 77 | return false; 78 | } else if (!chr.equals(other.chr)) 79 | return false; 80 | if (position != other.position) 81 | return false; 82 | if (ref == null) { 83 | if (other.ref != null) 84 | return false; 85 | } else if (!ref.equals(other.ref)) 86 | return false; 87 | return true; 88 | } 89 | 90 | public int getRefSpan() { 91 | return ref.length() - alt.length() + 1; 92 | } 93 | 94 | @Override 95 | public int compareTo(Variant that) { 96 | return this.position - that.position; 97 | } 98 | 99 | // Assumes all variants on same chromosome. 100 | static Map> groupByRegion(List regions, List variants) { 101 | 102 | // Key = region, value = variants with start in region 103 | Map> regionVariants = new HashMap>(); 104 | 105 | // Key = position, value = list of variants 106 | Map> posVariants = new HashMap>(); 107 | 108 | // Track assigned variants, so that we process only once. 109 | Set assignedVariants = new HashSet(); 110 | 111 | for (Variant variant : variants) { 112 | if (!posVariants.containsKey(variant.getPosition())) { 113 | posVariants.put(variant.getPosition(), new ArrayList()); 114 | } 115 | posVariants.get(variant.getPosition()).add(variant); 116 | } 117 | 118 | // For each region 119 | for (Feature region : regions) { 120 | // For each position in region 121 | for (int i= (int) region.getStart(); i < (int) region.getEnd(); i++) { 122 | List currVariants = posVariants.get(i); 123 | if (currVariants != null) { 124 | // For each variant at current position 125 | for (Variant variant : currVariants) { 126 | 127 | // If variant not already assigned, assign to curr region. 128 | if (!assignedVariants.contains(variant)) { 129 | if (!regionVariants.containsKey(region)) { 130 | regionVariants.put(region, new ArrayList()); 131 | } 132 | 133 | regionVariants.get(region).add(variant); 134 | assignedVariants.add(variant); 135 | } 136 | } 137 | } 138 | } 139 | } 140 | 141 | return regionVariants; 142 | } 143 | 144 | /** 145 | * Load variants from VCF return map with key = chromosome, value = variant list sorted by position 146 | */ 147 | static Map> loadFromFile(String vcfFile) throws FileNotFoundException, IOException { 148 | 149 | Map> variants = new HashMap>(); 150 | 151 | BufferedReader reader = new BufferedReader(new FileReader(vcfFile)); 152 | 153 | String line = reader.readLine(); 154 | while (line != null) { 155 | 156 | if (!line.startsWith("#") && !line.trim().isEmpty()) { 157 | String[] fields = line.split("\\t"); 158 | String chr = fields[0]; 159 | int pos = Integer.parseInt(fields[1]); 160 | String ref = fields[3]; 161 | String alt = fields[4]; 162 | String filt = fields[6]; 163 | 164 | if (filt.equalsIgnoreCase("PASS") && (ref.length() > 1 || alt.length() > 1)) { 165 | Variant variant = new Variant(chr,pos,ref,alt); 166 | 167 | if (!variants.containsKey(chr)) { 168 | variants.put(chr, new ArrayList()); 169 | } 170 | 171 | variants.get(chr).add(variant); 172 | } 173 | } 174 | 175 | line = reader.readLine(); 176 | } 177 | 178 | reader.close(); 179 | 180 | for (String chr : variants.keySet()) { 181 | Collections.sort(variants.get(chr)); 182 | } 183 | 184 | return variants; 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /src/main/java/abra/cadabra/Allele.java: -------------------------------------------------------------------------------- 1 | package abra.cadabra; 2 | 3 | public class Allele { 4 | public enum Type { 5 | A, T, C, G, INS, DEL, MNP, UNK 6 | } 7 | 8 | private Type type; 9 | private int length; 10 | 11 | private String seq; 12 | 13 | public static final Allele UNK = new Allele(Type.UNK, 1); 14 | public static final Allele A = new Allele(Type.A, 1); 15 | public static final Allele T = new Allele(Type.T, 1); 16 | public static final Allele C = new Allele(Type.C, 1); 17 | public static final Allele G = new Allele(Type.G, 1); 18 | 19 | public Allele(Type type, int length) { 20 | this.type = type; 21 | this.length = length; 22 | } 23 | 24 | public static Allele getMnpAllele(String seq) { 25 | Allele allele = new Allele(Type.MNP, seq.length()); 26 | allele.seq = seq; 27 | 28 | return allele; 29 | } 30 | 31 | public static Allele getAllele (char base) { 32 | switch (base) { 33 | case 'A': 34 | return A; 35 | case 'T': 36 | return T; 37 | case 'C': 38 | return C; 39 | case 'G': 40 | return G; 41 | default: 42 | return UNK; 43 | } 44 | } 45 | 46 | public Type getType() { 47 | return type; 48 | } 49 | 50 | public int getLength() { 51 | return length; 52 | } 53 | 54 | public String toString() { 55 | String str = ""; 56 | switch (type) { 57 | case A: 58 | str = "A"; 59 | break; 60 | case C: 61 | str = "C"; 62 | break; 63 | case T: 64 | str = "T"; 65 | break; 66 | case G: 67 | str = "G"; 68 | break; 69 | case INS: 70 | str = "INS"; 71 | break; 72 | case DEL: 73 | str = "DEL"; 74 | break; 75 | case MNP: 76 | str = "MNP"; 77 | break; 78 | case UNK: 79 | str = "UNK"; 80 | break; 81 | } 82 | 83 | return str; 84 | } 85 | 86 | @Override 87 | public int hashCode() { 88 | final int prime = 31; 89 | int result = 1; 90 | result = prime * result + length; 91 | result = prime * result + ((seq == null) ? 0 : seq.hashCode()); 92 | result = prime * result + ((type == null) ? 0 : type.hashCode()); 93 | return result; 94 | } 95 | 96 | @Override 97 | public boolean equals(Object obj) { 98 | if (this == obj) 99 | return true; 100 | if (obj == null) 101 | return false; 102 | if (getClass() != obj.getClass()) 103 | return false; 104 | Allele other = (Allele) obj; 105 | if (length != other.length) 106 | return false; 107 | if (seq == null) { 108 | if (other.seq != null) 109 | return false; 110 | } else if (!seq.equals(other.seq)) 111 | return false; 112 | if (type != other.type) 113 | return false; 114 | return true; 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/main/java/abra/cadabra/AlleleCounts.java: -------------------------------------------------------------------------------- 1 | package abra.cadabra; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collection; 5 | import java.util.HashMap; 6 | import java.util.HashSet; 7 | import java.util.List; 8 | import java.util.Map; 9 | import java.util.Set; 10 | 11 | import htsjdk.samtools.SAMRecord; 12 | 13 | public class AlleleCounts { 14 | 15 | private int count; 16 | private int totalCount; // Includes overlapping pairs 17 | private int fwd; 18 | private int rev; 19 | private int minReadIdx = Integer.MAX_VALUE; 20 | private int maxReadIdx = Integer.MIN_VALUE; 21 | private Map insertBaseCounts = new HashMap(); 22 | private Set readIds = new HashSet(); 23 | private Map> alignmentEnds = new HashMap>(); 24 | private int spanEnd = -1; 25 | 26 | private int spanningCount = -1; 27 | 28 | public static final AlleleCounts EMPTY_COUNTS; 29 | 30 | static { 31 | EMPTY_COUNTS = new AlleleCounts(); 32 | EMPTY_COUNTS.count = 0; 33 | EMPTY_COUNTS.fwd = 0; 34 | EMPTY_COUNTS.rev = 0; 35 | EMPTY_COUNTS.minReadIdx = 0; 36 | EMPTY_COUNTS.maxReadIdx = 0; 37 | EMPTY_COUNTS.totalCount = 0; 38 | } 39 | 40 | public void setSpanEnd(int spanEnd) { 41 | this.spanEnd = spanEnd; 42 | } 43 | 44 | public int getCount() { 45 | if (spanEnd <= 0) { 46 | return count; 47 | } 48 | 49 | if (spanningCount < 0) { 50 | // Count number of distinct reads with an alignment end that reaches the spanEnd. 51 | // Distinct read ids are used to not double count overlapping fragments 52 | Set readIds = new HashSet(); 53 | 54 | for (Integer alignmentEnd : alignmentEnds.keySet()) { 55 | if (alignmentEnd > spanEnd) { 56 | readIds.addAll(alignmentEnds.get(alignmentEnd)); 57 | } 58 | } 59 | 60 | spanningCount = readIds.size(); 61 | } 62 | 63 | return spanningCount; 64 | } 65 | 66 | public int getFwd() { 67 | return fwd; 68 | } 69 | 70 | public int getRev() { 71 | return rev; 72 | } 73 | 74 | public int getMinReadIdx() { 75 | return minReadIdx; 76 | } 77 | 78 | public int getMaxReadIdx() { 79 | return maxReadIdx; 80 | } 81 | 82 | public void incrementCount(SAMRecord read) { 83 | if (!readIds.contains(read.getReadName())) { 84 | // Don't allow multiple ends of fragment to be double counted 85 | count += 1; 86 | } 87 | 88 | totalCount += 1; 89 | 90 | if (read.getReadNegativeStrandFlag()) { 91 | incrementRev(); 92 | } else { 93 | incrementFwd(); 94 | } 95 | 96 | readIds.add(read.getReadName()); 97 | if (!alignmentEnds.containsKey(read.getAlignmentEnd())) { 98 | alignmentEnds.put(read.getAlignmentEnd(), new ArrayList()); 99 | } 100 | 101 | alignmentEnds.get(read.getAlignmentEnd()).add(read.getReadName()); 102 | } 103 | 104 | public int getTotalCount() { 105 | return totalCount; 106 | } 107 | 108 | private void incrementFwd() { 109 | fwd += 1; 110 | } 111 | 112 | private void incrementRev() { 113 | rev += 1; 114 | } 115 | 116 | public void clearReadIds() { 117 | readIds.clear(); 118 | } 119 | 120 | public void updateReadIdx(int idx) { 121 | if (idx < minReadIdx) { 122 | minReadIdx = idx; 123 | } 124 | 125 | if (idx > maxReadIdx) { 126 | maxReadIdx = idx; 127 | } 128 | } 129 | 130 | public void updateInsertBases(String bases) { 131 | if (bases != null) { 132 | if (insertBaseCounts.containsKey(bases)) { 133 | insertBaseCounts.put(bases, insertBaseCounts.get(bases)+1); 134 | } else { 135 | insertBaseCounts.put(bases, 1); 136 | } 137 | } 138 | } 139 | 140 | public String getPreferredInsertBases() { 141 | int max = 0; 142 | String maxBases = ""; 143 | for (String bases : insertBaseCounts.keySet()) { 144 | int count = insertBaseCounts.get(bases); 145 | if (count > max) { 146 | max = count; 147 | maxBases = bases; 148 | } 149 | } 150 | 151 | return maxBases; 152 | } 153 | 154 | public static int sum(Collection counts) { 155 | int sum = 0; 156 | 157 | for (AlleleCounts ac : counts) { 158 | sum += ac.getCount(); 159 | } 160 | 161 | return sum; 162 | } 163 | 164 | public static void setSpanEnd(int spanEnd, Map counts) { 165 | for (Allele allele : counts.keySet()) { 166 | if (allele.getType() != Allele.Type.DEL && allele.getType() != Allele.Type.INS) { 167 | AlleleCounts ac = counts.get(allele); 168 | ac.setSpanEnd(spanEnd); 169 | } 170 | } 171 | } 172 | 173 | public String toString() { 174 | return "count: " + count; 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /src/main/java/abra/cadabra/BetaBinomial.java: -------------------------------------------------------------------------------- 1 | package abra.cadabra; 2 | 3 | import org.apache.commons.math3.special.Gamma; 4 | 5 | public class BetaBinomial { 6 | 7 | private static double fita = 0.4657371; 8 | private static double fitb = 1494.0276936; 9 | private static double minVal = 1e-16; 10 | 11 | public static double betabinCDF(int depth, double maxVal) { 12 | double v = 0; 13 | for (int i = depth; i >= maxVal; i--) { 14 | double x = betabinPMFG(depth, i, fita, fitb); 15 | v += x; 16 | } 17 | return pinch(v); 18 | } 19 | 20 | private static double pinch(double n) { 21 | double r = n; 22 | if (r <= minVal) { 23 | return minVal; 24 | } 25 | if (r >= 1 - minVal) { 26 | r = 1 - minVal; 27 | return r; 28 | } 29 | return r; 30 | } 31 | 32 | private static double betabinPMFG(int n, int k, double a, double b) { 33 | double b1 = Gamma.logGamma(n + 1); 34 | double b2 = Gamma.logGamma(k + 1) + Gamma.logGamma(n - k + 1); 35 | double b3 = Gamma.logGamma(k + a) + Gamma.logGamma(n - k + b); 36 | double b4 = Gamma.logGamma(n + a + b); 37 | double b5 = Gamma.logGamma(a + b); 38 | double b6 = Gamma.logGamma(a) + Gamma.logGamma(b); 39 | double v = b1 - b2 + b3 - b4 + b5 - b6; 40 | return Math.exp(v); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/abra/cadabra/Cadabra.java: -------------------------------------------------------------------------------- 1 | package abra.cadabra; 2 | 3 | import java.io.IOException; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | import abra.CompareToReference2; 9 | import abra.Feature; 10 | import abra.Logger; 11 | import abra.SAMRecordUtils; 12 | import abra.ThreadManager; 13 | 14 | import abra.cadabra.CadabraProcessor.SampleCall; 15 | import abra.cadabra.CadabraProcessor.SomaticCall; 16 | import htsjdk.samtools.SAMFileHeader; 17 | import htsjdk.samtools.SAMSequenceRecord; 18 | import htsjdk.samtools.SamReader; 19 | 20 | public class Cadabra { 21 | 22 | private CompareToReference2 c2r; 23 | 24 | private Map> chromosomeCalls = new HashMap>(); 25 | private Map> chromosomeSomaticCalls = new HashMap>(); 26 | 27 | public void call(CadabraOptions options) throws IOException, InterruptedException { 28 | c2r = new CompareToReference2(); 29 | c2r.init(options.getReference()); 30 | 31 | outputHeader(options); 32 | 33 | ThreadManager threadManager = new ThreadManager(options.getNumThreads()); 34 | 35 | for (String chromosome : c2r.getChromosomes()) { 36 | Feature region = new Feature(chromosome, 1, c2r.getChromosomeLength(chromosome)); 37 | CadabraRunnable thread = new CadabraRunnable(threadManager, this, options, c2r, region); 38 | threadManager.spawnThread(thread); 39 | } 40 | 41 | threadManager.waitForAllThreadsToComplete(); 42 | 43 | // Output calls. 44 | if (options.getNormal() == null) { 45 | // Simple calling 46 | for (String chromosome : c2r.getChromosomes()) { 47 | for (SampleCall call : chromosomeCalls.get(chromosome)) { 48 | System.out.println(call); 49 | } 50 | } 51 | } else { 52 | // Somatic calling 53 | for (String chromosome : c2r.getChromosomes()) { 54 | for (SomaticCall call : chromosomeSomaticCalls.get(chromosome)) { 55 | System.out.println(call); 56 | } 57 | } 58 | } 59 | 60 | Logger.info("Cadabra done."); 61 | } 62 | 63 | void addCalls(String chromosome, List calls) { 64 | Logger.info("Choromosome: %s done.", chromosome); 65 | synchronized(chromosomeCalls) { 66 | chromosomeCalls.put(chromosome, calls); 67 | } 68 | } 69 | 70 | void addSomaticCalls(String chromosome, List calls) { 71 | Logger.info("Choromosome: %s done.", chromosome); 72 | synchronized(chromosomeSomaticCalls) { 73 | chromosomeSomaticCalls.put(chromosome, calls); 74 | } 75 | } 76 | 77 | private void outputHeader(CadabraOptions options) throws IOException { 78 | 79 | SAMFileHeader header; 80 | String vcfColumns; 81 | 82 | if (options.getNormal() == null) { 83 | // Single sample 84 | SamReader reader = SAMRecordUtils.getSamReader(options.getTumor()); 85 | header = reader.getFileHeader(); 86 | reader.close(); 87 | vcfColumns = "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE"; 88 | } else { 89 | // Somatic 90 | SamReader reader = SAMRecordUtils.getSamReader(options.getNormal()); 91 | SAMFileHeader normalHeader = reader.getFileHeader(); 92 | reader.close(); 93 | header = normalHeader; 94 | 95 | reader = SAMRecordUtils.getSamReader(options.getTumor()); 96 | SAMFileHeader tumorHeader = reader.getFileHeader(); 97 | reader.close(); 98 | 99 | //TODO: Double check against specified reference? 100 | if (!normalHeader.getSequenceDictionary().equals(tumorHeader.getSequenceDictionary())) { 101 | Logger.error("Reference Sequences for tumor and normal do not match. Check the VCF headers."); 102 | } 103 | 104 | vcfColumns = "#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NORMAL TUMOR"; 105 | } 106 | 107 | System.out.println("##fileformat=VCFv4.2"); 108 | System.out.println("##reference=file://" + options.getReference()); 109 | 110 | for (SAMSequenceRecord seq : header.getSequenceDictionary().getSequences()) { 111 | System.out.println(String.format("##contig=", seq.getSequenceName(), seq.getSequenceLength())); 112 | } 113 | 114 | System.out.println("##INFO="); 115 | System.out.println("##INFO="); 116 | System.out.println("##INFO="); 117 | System.out.println("##INFO="); 118 | System.out.println("##FORMAT="); 119 | System.out.println("##FORMAT="); 120 | System.out.println("##FORMAT="); 121 | System.out.println("##FORMAT="); 122 | System.out.println("##FORMAT="); 123 | System.out.println("##FORMAT="); 124 | System.out.println("##FORMAT="); 125 | System.out.println("##FORMAT="); 126 | System.out.println("##FORMAT="); 127 | System.out.println("##FORMAT="); 128 | System.out.println("##FORMAT="); 129 | System.out.println("##FORMAT="); 130 | System.out.println(vcfColumns); 131 | } 132 | 133 | public static void main(String[] args) throws Exception { 134 | // String normal = "/home/lmose/dev/abra/cadabra/normal_test2.bam"; 135 | // String tumor = "/home/lmose/dev/abra/cadabra/tumor_test2.bam"; 136 | 137 | // String reference = "/home/lmose/reference/chr1/1.fa"; 138 | // String normal = "/home/lmose/dev/abra/cadabra/normal1.bam"; 139 | // String tumor = "/home/lmose/dev/abra/cadabra/tumor1.bam"; 140 | 141 | 142 | // String normal = "/home/lmose/dev/abra/cadabra/normal.abra4.sort.bam"; 143 | // String tumor = "/home/lmose/dev/abra/cadabra/tumor.abra4.sort.bam"; 144 | 145 | // String reference = "/home/lmose/reference/chr1/chr1.fa"; 146 | // String normal = "/home/lmose/dev/abra/cadabra/t2/ntest.bam"; 147 | // String tumor = "/home/lmose/dev/abra/cadabra/t2/ttest.bam"; 148 | 149 | 150 | // String reference = "/home/lmose/reference/chr1/chr1.fa"; 151 | // String normal = "/home/lmose/dev/abra/cadabra/ins/ntest.bam"; 152 | // String tumor = "/home/lmose/dev/abra/cadabra/ins/ttest.bam"; 153 | 154 | CadabraOptions options = new CadabraOptions(); 155 | options.parseOptions(args); 156 | 157 | if (options.isValid()) { 158 | new Cadabra().call(options); 159 | } 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /src/main/java/abra/cadabra/CadabraOptions.java: -------------------------------------------------------------------------------- 1 | package abra.cadabra; 2 | 3 | import abra.Logger; 4 | import abra.Options; 5 | import joptsimple.OptionParser; 6 | 7 | public class CadabraOptions extends Options { 8 | 9 | private static final String NUM_THREADS = "threads"; 10 | private static final String TUMOR = "tumor"; 11 | private static final String NORMAL = "normal"; 12 | private static final String SAMPLE = "sample"; 13 | private static final String REFERENCE = "ref"; 14 | private static final String STRP_THRESHOLD = "strp"; 15 | private static final String HRUN_THRESHOLD = "hrun"; 16 | private static final String ISPAN_FILTER = "ispan"; 17 | // Filter variants below this qual threshold 18 | private static final String QUAL_FILTER = "qual"; 19 | private static final String FS_FILTER = "fs"; 20 | private static final String LOW_MQ_FILTER = "lmq"; 21 | // Do not output variants below this qual threshold 22 | private static final String MIN_QUAL = "mq"; 23 | private static final String MIN_MAPQ = "mapq"; 24 | private static final String MIN_VAF = "mf"; 25 | private static final String PCR_PENALTY = "pen"; 26 | private static final String ODDS_RATIO = "oddsr"; 27 | 28 | 29 | private OptionParser parser; 30 | private boolean isValid; 31 | 32 | private int numThreads; 33 | private String tumor; 34 | private String normal; 35 | private String reference; 36 | private int strpThreshold; 37 | private int hrunThreshold; 38 | private int pcrPenalty; 39 | private int ispanFilter; 40 | private float qualFilter; 41 | private int fsFilter; 42 | private float lowMQFilter; 43 | private float minQual; 44 | private float minMapq; 45 | private float minVaf; 46 | private float oddsr; 47 | 48 | @Override 49 | protected OptionParser getOptionParser() { 50 | if (parser == null) { 51 | parser = new OptionParser(); 52 | parser.accepts(NUM_THREADS, "Processing threads").withRequiredArg().ofType(Integer.class).defaultsTo(4); 53 | parser.accepts(TUMOR, "Tumor BAM file").withRequiredArg().ofType(String.class); 54 | parser.accepts(NORMAL, "Normal BAM file").withRequiredArg().ofType(String.class); 55 | parser.accepts(SAMPLE, "BAM file to be used for single sample calling").withRequiredArg().ofType(String.class); 56 | parser.accepts(REFERENCE, "Reference fasta").withRequiredArg().ofType(String.class); 57 | parser.accepts(STRP_THRESHOLD, "Filter variants with short tandem repeat period at or above this threshold(-1 to disable)").withRequiredArg().ofType(Integer.class).defaultsTo(5); 58 | parser.accepts(HRUN_THRESHOLD, "Filter short indels with a nearby homopolymer run of this length or greater - only neighboring 20 bases searched (-1 to disable)").withRequiredArg().ofType(Integer.class).defaultsTo(6); 59 | parser.accepts(PCR_PENALTY, "Penalize quality score for variants reaching strp or hrun thresholds by specified amount.").withRequiredArg().ofType(Integer.class).defaultsTo(30); 60 | parser.accepts(ISPAN_FILTER, "Filter variants with max index span less than specified value").withRequiredArg().ofType(Integer.class).defaultsTo(19); 61 | parser.accepts(QUAL_FILTER, "Filter variants with quality score less than specified value").withRequiredArg().ofType(Float.class).defaultsTo(5f); 62 | parser.accepts(FS_FILTER, "Filter variants with FS score greater than specified value").withRequiredArg().ofType(Integer.class).defaultsTo(70); 63 | parser.accepts(LOW_MQ_FILTER, "Filter variants with fraction of low quality reads greater than specified value").withRequiredArg().ofType(Float.class).defaultsTo(.5f); 64 | parser.accepts(MIN_QUAL, "Variants with quality below specified threshold are not output").withRequiredArg().ofType(Float.class).defaultsTo(5.0f); 65 | parser.accepts(MIN_MAPQ, "Reads with mapping quality below specified value are excluded from processing (except unmapped reads)").withRequiredArg().ofType(Integer.class).defaultsTo(20); 66 | parser.accepts(MIN_VAF, "Do not output variants with frequency below specified value").withRequiredArg().ofType(Float.class).defaultsTo(0.0f); 67 | parser.accepts(ODDS_RATIO, "Filter variants with odds ratio below specified value").withRequiredArg().ofType(Float.class).defaultsTo(5.0f); 68 | } 69 | 70 | return parser; 71 | } 72 | 73 | public void init() { 74 | this.numThreads = (Integer) getOptions().valueOf(NUM_THREADS); 75 | this.tumor = (String) getOptions().valueOf(TUMOR); 76 | if (tumor == null) { 77 | this.tumor = (String) getOptions().valueOf(SAMPLE); 78 | } 79 | this.normal = (String) getOptions().valueOf(NORMAL); 80 | this.reference = (String) getOptions().valueOf(REFERENCE); 81 | this.strpThreshold = (Integer) getOptions().valueOf(STRP_THRESHOLD); 82 | this.hrunThreshold = (Integer) getOptions().valueOf(HRUN_THRESHOLD); 83 | this.ispanFilter = (Integer) getOptions().valueOf(ISPAN_FILTER); 84 | this.qualFilter = (Float) getOptions().valueOf(QUAL_FILTER); 85 | this.fsFilter = (Integer) getOptions().valueOf(FS_FILTER); 86 | this.lowMQFilter = (Float) getOptions().valueOf(LOW_MQ_FILTER); 87 | this.minQual = (Float) getOptions().valueOf(MIN_QUAL); 88 | this.minMapq = (Integer) getOptions().valueOf(MIN_MAPQ); 89 | this.minVaf = (Float) getOptions().valueOf(MIN_VAF); 90 | this.pcrPenalty = (Integer) getOptions().valueOf(PCR_PENALTY); 91 | this.oddsr = (Float) getOptions().valueOf(ODDS_RATIO); 92 | } 93 | 94 | @Override 95 | protected void validate() { 96 | 97 | isValid = true; 98 | 99 | if (tumor == null) { 100 | Logger.error("Please specify a " + TUMOR + " or a " + SAMPLE); 101 | isValid = false; 102 | } 103 | 104 | if (reference == null) { 105 | Logger.error("Please specify a reference"); 106 | isValid = false; 107 | } 108 | } 109 | 110 | public boolean isValid() { 111 | return isValid; 112 | } 113 | 114 | public int getNumThreads() { 115 | return numThreads; 116 | } 117 | 118 | public String getTumor() { 119 | return tumor; 120 | } 121 | 122 | public String getNormal() { 123 | return normal; 124 | } 125 | 126 | public String getReference() { 127 | return reference; 128 | } 129 | 130 | public int getStrpThreshold() { 131 | return strpThreshold; 132 | } 133 | 134 | public int getHrunThreshold() { 135 | return hrunThreshold; 136 | } 137 | 138 | public int getPcrPenalty() { 139 | return pcrPenalty; 140 | } 141 | 142 | public int getIspanFilter() { 143 | return ispanFilter; 144 | } 145 | 146 | public float getQualFilter() { 147 | return qualFilter; 148 | } 149 | 150 | public int getFsFilter() { 151 | return fsFilter; 152 | } 153 | 154 | public float getLowMQFilter() { 155 | return lowMQFilter; 156 | } 157 | 158 | public float getMinQual() { 159 | return minQual; 160 | } 161 | 162 | public float getMinMapq() { 163 | return minMapq; 164 | } 165 | 166 | public float getMinVaf() { 167 | return minVaf; 168 | } 169 | 170 | public float getOddsRatio() { return oddsr; } 171 | } 172 | -------------------------------------------------------------------------------- /src/main/java/abra/cadabra/CadabraRunnable.java: -------------------------------------------------------------------------------- 1 | package abra.cadabra; 2 | 3 | import abra.AbraRunnable; 4 | import abra.CompareToReference2; 5 | import abra.Feature; 6 | import abra.ThreadManager; 7 | 8 | public class CadabraRunnable extends AbraRunnable { 9 | 10 | private CadabraProcessor processor; 11 | private Feature region; 12 | 13 | public CadabraRunnable(ThreadManager threadManager, Cadabra cadabra, CadabraOptions options, 14 | CompareToReference2 c2r, Feature region) { 15 | super(threadManager); 16 | this.processor = new CadabraProcessor(cadabra, options, c2r); 17 | this.region = region; 18 | } 19 | 20 | @Override 21 | public void go() throws Exception { 22 | processor.process(region); 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/abra/cadabra/FishersExactTest.java: -------------------------------------------------------------------------------- 1 | package abra.cadabra; 2 | 3 | import java.util.Random; 4 | 5 | /** 6 | * Basic Fisher's exact test implementation. 7 | * 8 | * Caches factorial values in log space across calculations, thus speeding things up (a bit). 9 | * 10 | * @author lmose 11 | */ 12 | public class FishersExactTest { 13 | 14 | private static int MAX_SIZE = 5000; 15 | 16 | // Cache of factorial values in log space 17 | private static double[] factorialCache = new double[MAX_SIZE+1]; 18 | 19 | static { 20 | init(); 21 | } 22 | 23 | private static void init() { 24 | for (int i=1; i<=MAX_SIZE; i++) { 25 | factorialCache[i] = factorialCache[i-1] + Math.log(i); 26 | } 27 | } 28 | 29 | public double oneTailedTest(int normalRef, int normalAlt, int tumorRef, int tumorAlt) { 30 | int row1Col1 = normalRef; 31 | int row1Col2 = normalAlt; 32 | int row2Col1 = tumorRef; 33 | int row2Col2 = tumorAlt; 34 | 35 | int n = row1Col1 + row1Col2 + row2Col1 + row2Col2; 36 | if (n > MAX_SIZE) { 37 | double scale = (double) MAX_SIZE / (double) n; 38 | row1Col1 = (int) (row1Col1 * scale); 39 | row1Col2 = (int) (row1Col2 * scale); 40 | row2Col1 = (int) (row2Col1 * scale); 41 | row2Col2 = (int) (row2Col2 * scale); 42 | 43 | n = row1Col1 + row1Col2 + row2Col1 + row2Col2; 44 | } 45 | 46 | int row1Sum = row1Col1 + row1Col2; 47 | int row2Sum = row2Col1 + row2Col2; 48 | int col1Sum = row1Col1 + row2Col1; 49 | int col2Sum = row1Col2 + row2Col2; 50 | 51 | double numerator = factorialCache[row1Sum] + factorialCache[row2Sum] + factorialCache[col1Sum] + factorialCache[col2Sum]; 52 | 53 | double pObserved = getPForTable(row1Col1, row1Col2, row2Col1, row2Col2, n, numerator); 54 | double pValue = pObserved; 55 | 56 | while (row1Col2 > 0 && row2Col1 > 0) { 57 | row1Col1++; 58 | row1Col2--; 59 | row2Col1--; 60 | row2Col2++; 61 | 62 | double nextP = getPForTable(row1Col1, row1Col2, row2Col1, row2Col2, n, numerator); 63 | 64 | if (nextP <= pObserved) { 65 | pValue += nextP; 66 | } 67 | } 68 | 69 | // Cap p-value at 1 to guard against rounding errors 70 | return Math.min(pValue, 1.0); 71 | } 72 | 73 | //TODO: Extract shared code 74 | public double twoTailedTest(int normalRef, int normalAlt, int tumorRef, int tumorAlt) { 75 | int row1Col1 = normalRef; 76 | int row1Col2 = normalAlt; 77 | int row2Col1 = tumorRef; 78 | int row2Col2 = tumorAlt; 79 | 80 | int n = row1Col1 + row1Col2 + row2Col1 + row2Col2; 81 | if (n > MAX_SIZE) { 82 | double scale = (double) MAX_SIZE / (double) n; 83 | row1Col1 = (int) (row1Col1 * scale); 84 | row1Col2 = (int) (row1Col2 * scale); 85 | row2Col1 = (int) (row2Col1 * scale); 86 | row2Col2 = (int) (row2Col2 * scale); 87 | 88 | n = row1Col1 + row1Col2 + row2Col1 + row2Col2; 89 | } 90 | 91 | int row1Col1Start = row1Col1; 92 | int row1Col2Start = row1Col2; 93 | int row2Col1Start = row2Col1; 94 | int row2Col2Start = row2Col2; 95 | 96 | int row1Sum = row1Col1 + row1Col2; 97 | int row2Sum = row2Col1 + row2Col2; 98 | int col1Sum = row1Col1 + row2Col1; 99 | int col2Sum = row1Col2 + row2Col2; 100 | 101 | double numerator = factorialCache[row1Sum] + factorialCache[row2Sum] + factorialCache[col1Sum] + factorialCache[col2Sum]; 102 | 103 | double pObserved = getPForTable(row1Col1, row1Col2, row2Col1, row2Col2, n, numerator); 104 | double pValue = pObserved; 105 | 106 | while (row1Col2 > 0 && row2Col1 > 0) { 107 | row1Col1++; 108 | row1Col2--; 109 | row2Col1--; 110 | row2Col2++; 111 | 112 | double nextP = getPForTable(row1Col1, row1Col2, row2Col1, row2Col2, n, numerator); 113 | 114 | if (nextP <= pObserved) { 115 | pValue += nextP; 116 | } 117 | } 118 | 119 | // Now the other way... 120 | row1Col1 = row1Col1Start; 121 | row1Col2 = row1Col2Start; 122 | row2Col1 = row2Col1Start; 123 | row2Col2 = row2Col2Start; 124 | 125 | while (row1Col1 > 0 && row2Col2 > 0) { 126 | row1Col1--; 127 | row1Col2++; 128 | row2Col1++; 129 | row2Col2--; 130 | 131 | double nextP = getPForTable(row1Col1, row1Col2, row2Col1, row2Col2, n, numerator); 132 | 133 | if (nextP <= pObserved) { 134 | pValue += nextP; 135 | } 136 | } 137 | 138 | // Cap p-value at 1 to guard against rounding errors 139 | return Math.min(pValue, 1.0); 140 | 141 | } 142 | 143 | private double getPForTable(int r1c1, int r1c2, int r2c1, int r2c2, int n, double numerator) { 144 | //TODO: Remove this as optimization 145 | if ((r1c1 + r1c2 + r2c1 + r2c2) != n) throw new IllegalArgumentException("Invalid contigency table"); 146 | 147 | double denominator = factorialCache[r1c1] + factorialCache[r1c2] + factorialCache[r2c1] + factorialCache[r2c2] + factorialCache[n]; 148 | return Math.exp(numerator - denominator); 149 | } 150 | 151 | private static int nextRand(Random r) { 152 | return r.nextInt(10000); 153 | } 154 | 155 | public static void main(String[] args) { 156 | // int nr = 1500; int na = 110; int tr = 1400; int ta = 1100; 157 | 158 | int nr = 709; int na = 20; int tr = 711; int ta = 85; 159 | 160 | FishersExactTest t = new FishersExactTest(); 161 | 162 | double p = t.oneTailedTest(nr, na, tr, ta); 163 | 164 | 165 | // Random r = new Random(); 166 | // 167 | // long s = System.currentTimeMillis(); 168 | // 169 | // for (int i=0; i<10000; i++) { 170 | // double p = t.oneTailedTest(nr + nextRand(r), na + nextRand(r), tr + nextRand(r), ta + nextRand(r)); 171 | // if (i%1000 == 0) { 172 | // System.out.println(p); 173 | // } 174 | // } 175 | // 176 | // long e = System.currentTimeMillis(); 177 | System.out.println("p: " + p); 178 | System.out.println("phred: " + (-10 * Math.log10(p))); 179 | // 180 | // System.out.println(e-s); 181 | 182 | p = t.twoTailedTest(nr, na, tr, ta); 183 | 184 | System.out.println("2 tailed: " + p); 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /src/main/java/abra/cadabra/ForwardShiftInsertIterator.java: -------------------------------------------------------------------------------- 1 | package abra.cadabra; 2 | 3 | import java.util.Iterator; 4 | import java.util.NavigableSet; 5 | import java.util.TreeSet; 6 | 7 | import htsjdk.samtools.CigarOperator; 8 | import htsjdk.samtools.SAMRecord; 9 | 10 | public class ForwardShiftInsertIterator implements Iterator { 11 | 12 | private Iterator iter; 13 | 14 | // Cached reads sorted by adjusted alignment start position. 15 | private NavigableSet cache = new TreeSet(); 16 | 17 | public ForwardShiftInsertIterator(Iterator iter) { 18 | this.iter = iter; 19 | } 20 | 21 | @Override 22 | public boolean hasNext() { 23 | return !cache.isEmpty() || iter.hasNext(); 24 | } 25 | 26 | @Override 27 | public SAMRecord next() { 28 | 29 | SAMRecord read = null; 30 | boolean isCacheUpToDate = false; 31 | if (!cache.isEmpty()) { 32 | InsertShiftSAMRecord first = cache.first(); 33 | InsertShiftSAMRecord last = cache.last(); 34 | 35 | // Don't seek too far ahead 36 | if (last.read.getAlignmentStart() > first.read.getAlignmentStart()+2 || !last.read.getReferenceName().equals(first.read.getReferenceName())) { 37 | isCacheUpToDate = true; 38 | } 39 | 40 | read = first.read; 41 | } else { 42 | read = iter.next(); 43 | cache.add(new InsertShiftSAMRecord(read)); 44 | } 45 | 46 | int cacheStart = read.getAlignmentStart() + 1; 47 | String cacheChromosome = read.getReferenceName(); 48 | 49 | while (!isCacheUpToDate && iter.hasNext() && read.getAlignmentStart() <= cacheStart+1 && read.getReferenceName().equals(cacheChromosome)) { 50 | read = iter.next(); 51 | cache.add(new InsertShiftSAMRecord(read)); 52 | } 53 | 54 | return cache.pollFirst().read; 55 | } 56 | 57 | @Override 58 | public void remove() { 59 | throw new UnsupportedOperationException(); 60 | } 61 | 62 | static class InsertShiftSAMRecord implements Comparable { 63 | private SAMRecord read; 64 | 65 | InsertShiftSAMRecord(SAMRecord read) { 66 | this.read = read; 67 | } 68 | 69 | public int getAlignmentStart() { 70 | int start = read.getAlignmentStart(); 71 | 72 | if (read.getCigarLength() > 0 && read.getCigar().getCigarElement(0).getOperator() == CigarOperator.I) { 73 | start = start - 1; 74 | } 75 | 76 | return start; 77 | } 78 | 79 | @Override 80 | public int compareTo(InsertShiftSAMRecord that) { 81 | int compare = this.read.getReferenceIndex() - that.read.getReferenceIndex(); 82 | if (compare == 0) { 83 | compare = this.getAlignmentStart() - that.getAlignmentStart(); 84 | } 85 | if (compare == 0) { 86 | compare = this.read.getReadName().compareTo(that.read.getReadName()); 87 | } 88 | if (compare == 0) { 89 | compare = this.read.getFlags() - that.read.getFlags(); 90 | } 91 | return compare; 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/main/java/abra/cadabra/HomopolymerRun.java: -------------------------------------------------------------------------------- 1 | package abra.cadabra; 2 | 3 | import abra.ReverseComplementor; 4 | 5 | public class HomopolymerRun { 6 | 7 | private int length; 8 | private char base; 9 | private int pos; 10 | 11 | /** 12 | * Search for short HP run on either side of position. 13 | */ 14 | public static HomopolymerRun find(String seq) { 15 | 16 | // String seq = c2r.getSequence(chromosome, position-9, 20); 17 | 18 | int maxLen = 0; 19 | char maxBase = '0'; 20 | int maxPos = -1; 21 | 22 | int length = 1; 23 | char prev = '0'; 24 | int i = 0; 25 | 26 | while (i < seq.length()) { 27 | if (seq.charAt(i) == prev) { 28 | length += 1; 29 | 30 | if (length > maxLen) { 31 | maxLen = length; 32 | maxBase = seq.charAt(i); 33 | maxPos = i - (length-1); 34 | } 35 | } else { 36 | length = 1; 37 | } 38 | 39 | prev = seq.charAt(i); 40 | i += 1; 41 | } 42 | 43 | HomopolymerRun hrun = null; 44 | 45 | if (maxLen >= 4) { 46 | hrun = new HomopolymerRun(maxLen, maxBase, maxPos); 47 | } 48 | 49 | return hrun; 50 | } 51 | 52 | public HomopolymerRun(int length, char base, int pos) { 53 | this.length = length; 54 | this.base = base; 55 | this.pos = pos; 56 | } 57 | 58 | public int getLength() { 59 | return length; 60 | } 61 | 62 | public char getBase() { 63 | return base; 64 | } 65 | 66 | public int getPos() { 67 | return pos; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/abra/cadabra/IndelInfo.java: -------------------------------------------------------------------------------- 1 | package abra.cadabra; 2 | 3 | import htsjdk.samtools.CigarElement; 4 | 5 | public class IndelInfo { 6 | 7 | private CigarElement cigarElement; 8 | private int readIndex = -1; 9 | private String insertBases; 10 | 11 | public IndelInfo(CigarElement cigarElement, int readIndex) { 12 | this.cigarElement = cigarElement; 13 | this.readIndex = readIndex; 14 | } 15 | 16 | public CigarElement getCigarElement() { 17 | return cigarElement; 18 | } 19 | 20 | public int getReadIndex() { 21 | return readIndex; 22 | } 23 | 24 | public void setReadIndex(int readIndex) { 25 | this.readIndex = readIndex; 26 | } 27 | 28 | public String getInsertBases() { 29 | return insertBases; 30 | } 31 | 32 | public void setInsertBases(String insertBases) { 33 | this.insertBases = insertBases; 34 | } 35 | 36 | @Override 37 | public int hashCode() { 38 | final int prime = 31; 39 | int result = 1; 40 | result = prime * result 41 | + ((cigarElement == null) ? 0 : cigarElement.hashCode()); 42 | result = prime * result 43 | + ((insertBases == null) ? 0 : insertBases.hashCode()); 44 | return result; 45 | } 46 | 47 | @Override 48 | public boolean equals(Object obj) { 49 | if (this == obj) 50 | return true; 51 | if (obj == null) 52 | return false; 53 | if (getClass() != obj.getClass()) 54 | return false; 55 | IndelInfo other = (IndelInfo) obj; 56 | if (cigarElement == null) { 57 | if (other.cigarElement != null) 58 | return false; 59 | } else if (!cigarElement.equals(other.cigarElement)) 60 | return false; 61 | if (insertBases == null) { 62 | if (other.insertBases != null) 63 | return false; 64 | } else if (!insertBases.equals(other.insertBases)) 65 | return false; 66 | return true; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /src/main/java/abra/cadabra/ReadLocusReader.java: -------------------------------------------------------------------------------- 1 | package abra.cadabra; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.util.ArrayList; 6 | import java.util.Iterator; 7 | import java.util.List; 8 | 9 | import abra.Feature; 10 | import abra.Logger; 11 | import abra.SAMRecordUtils; 12 | import htsjdk.samtools.CigarOperator; 13 | import htsjdk.samtools.SAMFileHeader; 14 | import htsjdk.samtools.SAMRecord; 15 | import htsjdk.samtools.SamReader; 16 | import htsjdk.samtools.ValidationStringency; 17 | 18 | public class ReadLocusReader implements Iterable { 19 | 20 | private SamReader samReader; 21 | private Feature region; 22 | private int maxDepth = 100000; 23 | 24 | public ReadLocusReader(String samFile) { 25 | this(samFile, null); 26 | } 27 | 28 | public ReadLocusReader(String samFile, Feature region) { 29 | samReader = SAMRecordUtils.getSamReader(samFile); 30 | this.region = region; 31 | } 32 | 33 | public ReadLocusReader(String samFile, Feature region, int maxDepth) { 34 | this(samFile, region); 35 | this.maxDepth = maxDepth; 36 | } 37 | 38 | @Override 39 | public Iterator iterator() { 40 | return new ReadLocusIterator(samReader, region, maxDepth); 41 | } 42 | 43 | public SAMFileHeader getSamHeader() { 44 | return samReader.getFileHeader(); 45 | } 46 | 47 | public void close() throws IOException { 48 | samReader.close(); 49 | } 50 | 51 | private static class ReadLocusIterator implements Iterator { 52 | 53 | private Iterator samIter; 54 | private String currentChr = ""; 55 | private int currentPos = -1; 56 | private List readCache = new ArrayList(); 57 | private ReadsAtLocus nextCache; 58 | private int maxDepth; 59 | 60 | public ReadLocusIterator(SamReader samReader, Feature region, int maxDepth) { 61 | 62 | this.maxDepth = maxDepth; 63 | 64 | if (region != null) { 65 | samIter = new ForwardShiftInsertIterator(samReader.queryOverlapping(region.getSeqname(), (int) region.getStart(), (int) region.getEnd())); 66 | } else { 67 | samIter = new ForwardShiftInsertIterator(samReader.iterator()); 68 | } 69 | } 70 | 71 | @Override 72 | public boolean hasNext() { 73 | nextCache = next(); 74 | return nextCache != null; 75 | } 76 | 77 | @Override 78 | public ReadsAtLocus next() { 79 | 80 | if (nextCache != null) { 81 | // Return the cached value 82 | ReadsAtLocus ret = nextCache; 83 | nextCache = null; 84 | return ret; 85 | } 86 | 87 | List reads = new ArrayList(); 88 | 89 | loadReadsIntoCache(); 90 | 91 | boolean isLocusAdvanced = getCachedReadsAtCurrentLocus(reads); 92 | 93 | if (isLocusAdvanced) { 94 | getCachedReadsAtCurrentLocus(reads); 95 | } 96 | ReadsAtLocus readsAtLocus = null; 97 | if (!reads.isEmpty()) { 98 | readsAtLocus = new ReadsAtLocus(currentChr, currentPos, reads); 99 | } 100 | 101 | currentPos += 1; 102 | 103 | return readsAtLocus; 104 | } 105 | 106 | private void loadReadsIntoCache() { 107 | boolean shouldReadFromFile = false; 108 | 109 | if (readCache.isEmpty()) { 110 | shouldReadFromFile = true; 111 | } 112 | else { 113 | SAMRecord last = readCache.get(readCache.size()-1); 114 | if (getAlignmentStart(last) <= currentPos && last.getReferenceName().equals(currentChr)) { 115 | shouldReadFromFile = true; 116 | } 117 | } 118 | 119 | while (shouldReadFromFile && samIter.hasNext()) { 120 | SAMRecord read = samIter.next(); 121 | 122 | // Skip over unmapped reads 123 | if (!read.getReadUnmappedFlag()) { 124 | 125 | if (readCache.isEmpty() && !read.getReferenceName().equals(currentChr)) { 126 | currentChr = read.getReferenceName(); 127 | currentPos = getAlignmentStart(read); 128 | } 129 | 130 | readCache.add(read); 131 | 132 | if (getAlignmentStart(read) > currentPos || !read.getReferenceName().equals(currentChr)) { 133 | shouldReadFromFile = false; 134 | } 135 | } 136 | } 137 | 138 | // Skip huge pileups! 139 | if (readCache.size() > maxDepth) { 140 | Logger.warn("Depth too high, clearing read cache " + currentChr + ":" + currentPos); 141 | for (int i=readCache.size()-2; i>=0; i--) { 142 | readCache.remove(i); 143 | } 144 | } 145 | } 146 | 147 | private int getAlignmentStart(SAMRecord read) { 148 | int start = read.getAlignmentStart(); 149 | 150 | if (read.getCigarLength() > 0 && read.getCigar().getCigarElement(0).getOperator() == CigarOperator.I) { 151 | start = start - 1; 152 | } 153 | 154 | return start; 155 | } 156 | 157 | // Returns true if current position is advanced to new locus 158 | private boolean getCachedReadsAtCurrentLocus(List reads) { 159 | 160 | reads.clear(); 161 | 162 | Iterator cacheIter = readCache.iterator(); 163 | 164 | String nextChr = null; 165 | int nextPos = -1; 166 | 167 | while (cacheIter.hasNext()) { 168 | SAMRecord read = cacheIter.next(); 169 | 170 | if (read.getAlignmentEnd() < currentPos && read.getReferenceName().equals(currentChr)) { 171 | // We've gone past the end of this read, so remove from cache. 172 | cacheIter.remove(); 173 | } else if (getAlignmentStart(read) <= currentPos && read.getAlignmentEnd() >= currentPos) { 174 | // This read spans the current locus of interest. 175 | reads.add(read); 176 | } else { 177 | // This read is beyond the current locus. 178 | if (nextChr == null) { 179 | nextChr = read.getReferenceName(); 180 | nextPos = getAlignmentStart(read); 181 | } 182 | } 183 | } 184 | 185 | if (reads.isEmpty() && nextChr != null) { 186 | currentChr = nextChr; 187 | currentPos = nextPos; 188 | 189 | return true; 190 | } 191 | 192 | return false; 193 | } 194 | 195 | @Override 196 | public void remove() { 197 | throw new UnsupportedOperationException(); 198 | } 199 | } 200 | 201 | public static void main(String[] args) { 202 | String file = "/home/lmose/dev/abra/dream/small.sort.bam"; 203 | 204 | ReadLocusReader r = new ReadLocusReader(file, null); 205 | 206 | for (ReadsAtLocus readsAtLocus : r) { 207 | System.out.println(readsAtLocus); 208 | } 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /src/main/java/abra/cadabra/ReadsAtLocus.java: -------------------------------------------------------------------------------- 1 | package abra.cadabra; 2 | 3 | import java.util.List; 4 | 5 | import htsjdk.samtools.SAMRecord; 6 | import htsjdk.samtools.SAMSequenceDictionary; 7 | 8 | public class ReadsAtLocus { 9 | 10 | private String chr; 11 | private int position; 12 | private List reads; 13 | 14 | public ReadsAtLocus(String chr, int position, List reads) { 15 | this.chr = chr; 16 | this.position = position; 17 | this.reads = reads; 18 | } 19 | 20 | public String getChromosome() { 21 | return chr; 22 | } 23 | 24 | public int getPosition() { 25 | return position; 26 | } 27 | 28 | public List getReads() { 29 | return reads; 30 | } 31 | 32 | public String toString() { 33 | String s = chr + ":" + position; 34 | 35 | for (SAMRecord read : reads) { 36 | s += "," + read.getReadName(); 37 | } 38 | 39 | return s; 40 | } 41 | 42 | public int compareLoci(ReadsAtLocus that, SAMSequenceDictionary dict) { 43 | int compare = dict.getSequenceIndex(this.getChromosome()) - dict.getSequenceIndex(that.getChromosome()); 44 | if (compare == 0) { 45 | compare = this.getPosition() - that.getPosition(); 46 | } 47 | 48 | return compare; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/abra/cadabra/RepeatUtils.java: -------------------------------------------------------------------------------- 1 | package abra.cadabra; 2 | 3 | public class RepeatUtils { 4 | 5 | /** 6 | * Return smallest repeat unit in input string. 7 | * i.e. ATATATAT returns AT. ATCTAGC returns ATCTAGC 8 | */ 9 | public static String getRepeatUnit(String seq) { 10 | String unit = seq; 11 | boolean isRepeat = false; 12 | 13 | for (int length = 1; length <= seq.length()/2; length++) { 14 | 15 | unit = seq.substring(0, length); 16 | isRepeat = true; 17 | 18 | for (int i=length; i 0) { 47 | int index = 0; 48 | while ((index < maxLen-bases.length()+1) && (bases.equals(ref.substring(index, index+bases.length())))) { 49 | period += 1; 50 | index += bases.length(); 51 | } 52 | } 53 | 54 | return period; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/abra/cadabra/SpliceJunctionCounter.java: -------------------------------------------------------------------------------- 1 | package abra.cadabra; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.Collections; 6 | import java.util.Comparator; 7 | import java.util.HashMap; 8 | import java.util.HashSet; 9 | import java.util.List; 10 | import java.util.Map; 11 | import java.util.Set; 12 | 13 | import abra.Feature; 14 | import abra.JunctionUtils; 15 | import abra.SAMRecordUtils; 16 | import htsjdk.samtools.CigarElement; 17 | import htsjdk.samtools.SAMFileHeader; 18 | import htsjdk.samtools.SAMRecord; 19 | import htsjdk.samtools.SamReader; 20 | 21 | public class SpliceJunctionCounter { 22 | 23 | Map uniqueReads = new HashMap(); 24 | Map multiMapReads = new HashMap(); 25 | 26 | public void countSplices(String input, Set annotatedJunctions) throws IOException { 27 | 28 | 29 | SamReader reader = SAMRecordUtils.getSamReader(input); 30 | 31 | updateCounts(reader); 32 | 33 | List junctions = new ArrayList(uniqueReads.keySet()); 34 | Collections.sort(junctions, new SpliceJunctionComparator(reader.getFileHeader())); 35 | 36 | reader.close(); 37 | 38 | for (SpliceJunction junction : junctions) { 39 | 40 | int annotated = annotatedJunctions.contains(junction) ? 1 : 0; 41 | 42 | String rec = String.format("%s\t%d\t%d\t.\t.\t%d\t%d\t%d\t.", junction.chrom, junction.start, junction.stop, 43 | annotated, uniqueReads.get(junction), multiMapReads.get(junction)); 44 | 45 | System.out.println(rec); 46 | } 47 | } 48 | 49 | public List getJunctions(String[] inputs) { 50 | SAMFileHeader header = null; 51 | for (String input : inputs) { 52 | SamReader reader = SAMRecordUtils.getSamReader(input); 53 | updateCounts(reader); 54 | 55 | if (header == null) { 56 | header = reader.getFileHeader(); 57 | } 58 | } 59 | 60 | List junctions = new ArrayList(uniqueReads.keySet()); 61 | Collections.sort(junctions, new SpliceJunctionComparator(header)); 62 | 63 | List splices = new ArrayList(); 64 | for (SpliceJunction junction : junctions) { 65 | splices.add(new Feature(junction.chrom, junction.start, junction.stop)); 66 | } 67 | 68 | return splices; 69 | } 70 | 71 | private void updateCounts(SamReader reader) { 72 | for (SAMRecord read : reader) { 73 | if (read.getCigarString().contains("N")) { 74 | for (SpliceJunction junc : getJunctions(read)) { 75 | incrementCount(junc, read); 76 | } 77 | } 78 | } 79 | 80 | } 81 | 82 | private void incrementCount(SpliceJunction junction, SAMRecord read) { 83 | 84 | if (!uniqueReads.containsKey(junction)) { 85 | uniqueReads.put(junction, 0); 86 | } 87 | 88 | if (!multiMapReads.containsKey(junction)) { 89 | multiMapReads.put(junction, 0); 90 | } 91 | 92 | // TODO: Hardcoded to STAR values here. 93 | if (read.getMappingQuality() == 255) { 94 | uniqueReads.put(junction, uniqueReads.get(junction)+1); 95 | } else { 96 | multiMapReads.put(junction, multiMapReads.get(junction)+1); 97 | } 98 | } 99 | 100 | private List getJunctions(SAMRecord read) { 101 | List junctions = new ArrayList(); 102 | 103 | int pos = read.getAlignmentStart(); 104 | 105 | for (CigarElement elem : read.getCigar()) { 106 | switch (elem.getOperator()) { 107 | case D: 108 | case M: 109 | pos += elem.getLength(); 110 | break; 111 | case N: 112 | junctions.add(new SpliceJunction(read.getReferenceName(), pos, pos+elem.getLength()-1)); 113 | pos += elem.getLength(); 114 | break; 115 | case S: 116 | case I: 117 | case H: 118 | // NOOP 119 | break; 120 | default: 121 | throw new UnsupportedOperationException("Unsupported Cigar Operator: " + elem.getOperator()); 122 | } 123 | } 124 | 125 | return junctions; 126 | } 127 | 128 | static class SpliceJunctionComparator implements Comparator { 129 | 130 | private SAMFileHeader header; 131 | 132 | public SpliceJunctionComparator(SAMFileHeader header) { 133 | this.header = header; 134 | } 135 | 136 | @Override 137 | public int compare(SpliceJunction j1, SpliceJunction j2) { 138 | 139 | int idx1 = header.getSequenceIndex(j1.chrom); 140 | int idx2 = header.getSequenceIndex(j2.chrom); 141 | 142 | if (idx1 != idx2) { 143 | return idx1-idx2; 144 | } 145 | 146 | if (j1.start != j2.start) { 147 | return j1.start - j2.start; 148 | } 149 | 150 | return j1.stop - j2.stop; 151 | } 152 | 153 | } 154 | 155 | static class SpliceJunction { 156 | String chrom; 157 | int start; 158 | int stop; 159 | 160 | public SpliceJunction(String chrom, int start, int stop) { 161 | this.chrom = chrom; 162 | this.start = start; 163 | this.stop = stop; 164 | } 165 | 166 | @Override 167 | public int hashCode() { 168 | final int prime = 31; 169 | int result = 1; 170 | result = prime * result + ((chrom == null) ? 0 : chrom.hashCode()); 171 | result = prime * result + start; 172 | result = prime * result + stop; 173 | return result; 174 | } 175 | @Override 176 | public boolean equals(Object obj) { 177 | if (this == obj) 178 | return true; 179 | if (obj == null) 180 | return false; 181 | if (getClass() != obj.getClass()) 182 | return false; 183 | SpliceJunction other = (SpliceJunction) obj; 184 | if (chrom == null) { 185 | if (other.chrom != null) 186 | return false; 187 | } else if (!chrom.equals(other.chrom)) 188 | return false; 189 | if (start != other.start) 190 | return false; 191 | if (stop != other.stop) 192 | return false; 193 | return true; 194 | } 195 | } 196 | 197 | public static void main(String[] args) throws Exception { 198 | 199 | if (args.length == 0) { 200 | System.err.println("SpliceJunctionCounter "); 201 | System.exit(-1); 202 | } 203 | 204 | String input = args[0]; 205 | 206 | Set annotatedJunctions = new HashSet(); 207 | 208 | if (args.length > 1) { 209 | String gtf = args[1]; 210 | Set junctions = JunctionUtils.loadJunctionsFromGtf(gtf); 211 | for (Feature junc : junctions) { 212 | annotatedJunctions.add(new SpliceJunction(junc.getSeqname(), (int)junc.getStart(), (int)junc.getEnd())); 213 | } 214 | } 215 | 216 | SpliceJunctionCounter counter = new SpliceJunctionCounter(); 217 | counter.countSplices(input, annotatedJunctions); 218 | } 219 | } 220 | -------------------------------------------------------------------------------- /src/python/assign_unmapped_pos.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | for line in sys.stdin: 4 | line = line.rstrip() 5 | if line.startswith('@'): 6 | print line 7 | else: 8 | fields = line.split() 9 | flag = int(fields[1]) 10 | # 4 = read unmapped. 8 = mate unmapped 11 | if flag & 0xC == 4 and fields[2] == '*': 12 | # This read is unmapped and mate is mapped 13 | chrom = fields[6] 14 | pos = fields[7] 15 | fields[2] = chrom 16 | fields[3] = pos 17 | print '\t'.join(fields) 18 | elif flag & 0xC == 8 and fields[6] == '*': 19 | # This read is mapped and mate is unmapped 20 | chrom = fields[2] 21 | pos = fields[3] 22 | fields[6] = chrom 23 | fields[7] = pos 24 | print '\t'.join(fields) 25 | else: 26 | print line -------------------------------------------------------------------------------- /src/python/filter_intron_adjacent_indels.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | 4 | # Filter inserts at alignment start or end that are adjacent to intron 5 | # Filter deletions adjacent to intron 6 | #p = re.compile('^[0-9]*I[0-9]*N.*|^[0-9]*S[0-9]*I[0-9]*N.*|.*N[0-9]*I$|.*N[0-9]*I[0-9]*S$|.*N[0-9]*[D].*|.*D[0-9]*N.*') 7 | 8 | # More aggressive filter. Do not require inserts to be at read start or end 9 | # Filter inserts adjacent to intron 10 | # Filter deletions adjacent to intron 11 | p = re.compile('.*N[0-9]*[ID].*|.*[ID][0-9]*N.*') 12 | 13 | 14 | for line in sys.stdin: 15 | line = line.rstrip() 16 | if line.startswith('@'): 17 | print line 18 | continue 19 | 20 | shouldFilter = False 21 | 22 | fields = line.split() 23 | cigar = fields[5] 24 | 25 | m = p.match(cigar) 26 | if m: 27 | shouldFilter = True 28 | 29 | idx = 11 30 | while idx < len(fields): 31 | tag = fields[idx] 32 | if tag.startswith('MC:Z'): 33 | m = p.match(tag) 34 | if m: 35 | shouldFilter = True 36 | break 37 | idx += 1 38 | 39 | 40 | if not shouldFilter: 41 | print line 42 | 43 | -------------------------------------------------------------------------------- /src/test/java/abra/ChromosomeRegexTest.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import static org.testng.Assert.assertFalse; 4 | import static org.testng.Assert.assertTrue; 5 | 6 | import org.testng.annotations.Test; 7 | 8 | public class ChromosomeRegexTest { 9 | 10 | private ChromosomeRegex cr = new ChromosomeRegex(ChromosomeRegex.DEFAULT_SKIP_REGEX); 11 | 12 | @Test (groups = "unit") 13 | public void testChr1() { 14 | assertFalse(cr.matches("chr1")); 15 | } 16 | 17 | @Test (groups = "unit") 18 | public void testChrX() { 19 | assertFalse(cr.matches("chrX")); 20 | } 21 | 22 | @Test (groups = "unit") 23 | public void testHg19Decoy() { 24 | assertTrue(cr.matches("hs37d5")); 25 | } 26 | 27 | @Test (groups = "unit") 28 | public void testHg19Gl() { 29 | assertTrue(cr.matches("GL000238.1")); 30 | } 31 | 32 | @Test (groups = "unit") 33 | public void testRandom() { 34 | assertTrue(cr.matches("chr1_KI270706v1_random")); 35 | } 36 | 37 | @Test (groups = "unit") 38 | public void testUnplaced() { 39 | assertTrue(cr.matches("chrUn_KI270320v1")); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/test/java/abra/FeatureTest.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import org.testng.Assert; 4 | import org.testng.annotations.Test; 5 | 6 | public class FeatureTest { 7 | 8 | @Test (groups = "unit") 9 | public void testOverlaps() { 10 | 11 | Feature feature = new Feature("chr1", 100, 200); 12 | 13 | Assert.assertTrue(feature.overlaps("chr1", 50, 150)); 14 | Assert.assertTrue(feature.overlaps("chr1", 1, 100)); 15 | Assert.assertTrue(feature.overlaps("chr1", 200, 300)); 16 | Assert.assertTrue(feature.overlaps("chr1", 150, 250)); 17 | Assert.assertTrue(feature.overlaps("chr1", 1, 300)); 18 | Assert.assertTrue(feature.overlaps("chr1", 100, 200)); 19 | Assert.assertTrue(feature.overlaps("chr1", 100, 201)); 20 | Assert.assertTrue(feature.overlaps("chr1", 99, 200)); 21 | Assert.assertTrue(feature.overlaps("chr1", 2, 101)); 22 | Assert.assertTrue(feature.overlaps("chr1", 199, 299)); 23 | 24 | Assert.assertFalse(feature.overlaps("chr2", 50, 150)); 25 | Assert.assertFalse(feature.overlaps("chr1", 1, 99)); 26 | Assert.assertFalse(feature.overlaps("chr2", 101, 200)); 27 | Assert.assertFalse(feature.overlaps("chr1", 1000, 1001)); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/test/java/abra/IndelShifterTest.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */ 2 | package abra; 3 | 4 | 5 | import htsjdk.samtools.Cigar; 6 | import htsjdk.samtools.CigarElement; 7 | import htsjdk.samtools.CigarOperator; 8 | import htsjdk.samtools.TextCigarCodec; 9 | 10 | import static org.testng.Assert.assertEquals; 11 | 12 | import org.testng.Assert; 13 | import org.testng.annotations.Test; 14 | 15 | public class IndelShifterTest { 16 | 17 | private IndelShifter indelShifter = new IndelShifter(); 18 | 19 | @Test (groups = "unit" ) 20 | public void testShiftCigarLeft_basic() { 21 | Cigar cigar = new Cigar(); 22 | 23 | cigar.add(new CigarElement(10, CigarOperator.M)); 24 | cigar.add(new CigarElement(3, CigarOperator.D)); 25 | cigar.add(new CigarElement(40, CigarOperator.M)); 26 | 27 | Cigar newCigar; 28 | 29 | newCigar = indelShifter.shiftCigarLeft(cigar, 10); 30 | Assert.assertEquals(newCigar.toString(), "3D50M"); 31 | 32 | newCigar = indelShifter.shiftCigarLeft(cigar, 9); 33 | Assert.assertEquals(newCigar.toString(), "1M3D49M"); 34 | 35 | newCigar = indelShifter.shiftCigarLeft(cigar, 8); 36 | Assert.assertEquals(newCigar.toString(), "2M3D48M"); 37 | 38 | newCigar = indelShifter.shiftCigarLeft(cigar, 4); 39 | Assert.assertEquals(newCigar.toString(), "6M3D44M"); 40 | 41 | newCigar = indelShifter.shiftCigarLeft(cigar, 2); 42 | Assert.assertEquals(newCigar.toString(), "8M3D42M"); 43 | 44 | newCigar = indelShifter.shiftCigarLeft(cigar, 1); 45 | Assert.assertEquals(newCigar.toString(), "9M3D41M"); 46 | } 47 | 48 | @Test (groups = "unit" ) 49 | public void testShiftCigarLeft_softClipping() { 50 | Cigar cigar = new Cigar(); 51 | 52 | cigar.add(new CigarElement(2, CigarOperator.S)); 53 | cigar.add(new CigarElement(6, CigarOperator.M)); 54 | cigar.add(new CigarElement(2, CigarOperator.I)); 55 | cigar.add(new CigarElement(30, CigarOperator.M)); 56 | cigar.add(new CigarElement(10, CigarOperator.S)); 57 | 58 | Cigar newCigar; 59 | 60 | newCigar = indelShifter.shiftCigarLeft(cigar, 6); 61 | Assert.assertEquals(newCigar.toString(), "2S2I36M10S"); 62 | 63 | newCigar = indelShifter.shiftCigarLeft(cigar, 5); 64 | Assert.assertEquals(newCigar.toString(), "2S1M2I35M10S"); 65 | 66 | newCigar = indelShifter.shiftCigarLeft(cigar, 4); 67 | Assert.assertEquals(newCigar.toString(), "2S2M2I34M10S"); 68 | 69 | newCigar = indelShifter.shiftCigarLeft(cigar, 3); 70 | Assert.assertEquals(newCigar.toString(), "2S3M2I33M10S"); 71 | 72 | newCigar = indelShifter.shiftCigarLeft(cigar, 2); 73 | Assert.assertEquals(newCigar.toString(), "2S4M2I32M10S"); 74 | 75 | newCigar = indelShifter.shiftCigarLeft(cigar, 1); 76 | Assert.assertEquals(newCigar.toString(), "2S5M2I31M10S"); 77 | } 78 | 79 | @Test (groups = "unit" ) 80 | public void testShiftCigarLeft_insertAtTail() { 81 | Cigar cigar = new Cigar(); 82 | 83 | cigar.add(new CigarElement(40, CigarOperator.M)); 84 | cigar.add(new CigarElement(10, CigarOperator.I)); 85 | 86 | Cigar newCigar; 87 | 88 | newCigar = indelShifter.shiftCigarLeft(cigar, 40); 89 | Assert.assertEquals(newCigar.toString(), "10I40M"); 90 | 91 | newCigar = indelShifter.shiftCigarLeft(cigar, 39); 92 | Assert.assertEquals(newCigar.toString(), "1M10I39M"); 93 | 94 | newCigar = indelShifter.shiftCigarLeft(cigar, 30); 95 | Assert.assertEquals(newCigar.toString(), "10M10I30M"); 96 | 97 | newCigar = indelShifter.shiftCigarLeft(cigar, 1); 98 | Assert.assertEquals(newCigar.toString(), "39M10I1M"); 99 | } 100 | 101 | @Test (groups = "unit" ) 102 | public void testShiftCigarLeft_multipleIndels() { 103 | Cigar cigar = new Cigar(); 104 | 105 | cigar.add(new CigarElement(20, CigarOperator.M)); 106 | cigar.add(new CigarElement(1, CigarOperator.I)); 107 | cigar.add(new CigarElement(5, CigarOperator.M)); 108 | cigar.add(new CigarElement(3, CigarOperator.D)); 109 | cigar.add(new CigarElement(24, CigarOperator.M)); 110 | 111 | Cigar newCigar; 112 | 113 | newCigar = indelShifter.shiftCigarLeft(cigar, 20); 114 | Assert.assertEquals(newCigar.toString(), "1I5M3D44M"); 115 | 116 | newCigar = indelShifter.shiftCigarLeft(cigar, 10); 117 | Assert.assertEquals(newCigar.toString(), "10M1I5M3D34M"); 118 | 119 | newCigar = indelShifter.shiftCigarLeft(cigar, 1); 120 | Assert.assertEquals(newCigar.toString(), "19M1I5M3D25M"); 121 | } 122 | 123 | @Test (groups = "unit" ) 124 | public void testShiftCigarLeft_complex() { 125 | //3S69M1I18M1D9M 126 | Cigar cigar = new Cigar(); 127 | 128 | cigar.add(new CigarElement(3, CigarOperator.S)); 129 | cigar.add(new CigarElement(69, CigarOperator.M)); 130 | cigar.add(new CigarElement(1, CigarOperator.I)); 131 | cigar.add(new CigarElement(18, CigarOperator.M)); 132 | cigar.add(new CigarElement(1, CigarOperator.D)); 133 | cigar.add(new CigarElement(9, CigarOperator.M)); 134 | 135 | Cigar newCigar; 136 | 137 | newCigar = indelShifter.shiftCigarLeft(cigar, 1); 138 | assertEquals(newCigar.toString(), "3S68M1I18M1D10M"); 139 | } 140 | 141 | @Test (groups = "unit" ) 142 | public void testShiftDelLeft() throws Exception { 143 | Cigar cigar = TextCigarCodec.decode("6M2D8M"); 144 | Cigar newCigar = indelShifter.shiftCigarLeft(cigar, 4); 145 | assertEquals(TextCigarCodec.encode(newCigar), "2M2D12M"); 146 | } 147 | 148 | @Test (groups = "unit" ) 149 | public void testShiftIndelsLeft() throws Exception { 150 | 151 | CompareToReference2 c2r = new CompareToReference2(); 152 | c2r.init("test-data/test.fa"); 153 | /* 154 | TCGAATCGATATATTTCCGGAACAGACTCAG 155 | ------CGATAT--TTCCGGAA--------- <-- orig 156 | ------CG--ATATTTCCGGAA--------- <-- new 157 | 1234567890123456789012 158 | */ 159 | 160 | int refStart = 7; 161 | int refEnd = 22; 162 | Cigar cigar = TextCigarCodec.decode("6M2D8M"); 163 | String seq = "CGATATTTCCGGAA"; 164 | 165 | // 1 based input 166 | Cigar newCigar = indelShifter.shiftIndelsLeft(refStart, refEnd, "seq1", cigar, seq, c2r); 167 | assertEquals(TextCigarCodec.encode(newCigar), "2M2D12M"); 168 | } 169 | 170 | @Test (groups = "unit" ) 171 | public void testShiftIndelsLeft_LocalRef() throws Exception { 172 | 173 | CompareToReference2 c2r = new CompareToReference2(); 174 | c2r.initLocal("seq1", "TCGAATCGATATATTTCCGGAACAGACTCAG"); 175 | //c2r.init("test-data/test.fa"); 176 | /* 177 | TCGAATCGATATATTTCCGGAACAGACTCAG 178 | ------CGATAT--TTCCGGAA--------- <-- orig 179 | ------CG--ATATTTCCGGAA--------- <-- new 180 | 1234567890123456789012 181 | */ 182 | 183 | int refStart = 7; 184 | int refEnd = 22; 185 | Cigar cigar = TextCigarCodec.decode("6M2D8M"); 186 | String seq = "CGATATTTCCGGAA"; 187 | 188 | // 1 based input 189 | Cigar newCigar = indelShifter.shiftIndelsLeft(refStart, refEnd, "seq1", cigar, seq, c2r); 190 | assertEquals(TextCigarCodec.encode(newCigar), "2M2D12M"); 191 | } 192 | } 193 | -------------------------------------------------------------------------------- /src/test/java/abra/JunctionUtilsTest.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import java.util.Arrays; 4 | import java.util.Collections; 5 | import java.util.HashSet; 6 | import java.util.List; 7 | import java.util.Map; 8 | import java.util.Set; 9 | 10 | import static org.testng.Assert.assertEquals; 11 | import static org.testng.Assert.assertTrue; 12 | import org.testng.annotations.Test; 13 | 14 | public class JunctionUtilsTest { 15 | 16 | private void assertJunctionEquals(Feature actual, String chr, int start, int end) { 17 | assertEquals(actual.getSeqname(), chr, "Junction: " + chr + ":" + start + "-" + end); 18 | assertEquals(actual.getStart(), start, "Junction: " + chr + ":" + start + "-" + end); 19 | assertEquals(actual.getEnd(), end, "Junction: " + chr + ":" + start + "-" + end); 20 | } 21 | 22 | @Test (groups = "unit") 23 | public void testLoadJunctionsFromGtf() throws Exception { 24 | Set junctions = JunctionUtils.loadJunctionsFromGtf("test-data/annotation1.gtf"); 25 | assertEquals(junctions.size(), 4); 26 | assertTrue(junctions.contains(new Feature("chr7", 55087059, 55209978))); 27 | assertTrue(junctions.contains(new Feature("chr7", 55210131, 55210997))); 28 | assertTrue(junctions.contains(new Feature("chr7", 55211182, 55218986))); 29 | assertTrue(junctions.contains(new Feature("chr7", 55211182, 55214298))); 30 | } 31 | 32 | @Test (groups = "unit") 33 | public void testGetRegionJunctions() throws Exception { 34 | 35 | RegionLoader loader = new RegionLoader(); 36 | List junctions = loader.load("test-data/junctions1.tab", false); 37 | assertEquals(junctions.size(), 29); 38 | 39 | Feature region = new Feature("chr4", 1803001, 1803401); 40 | List regions = Arrays.asList(region); 41 | int readLength = 50; 42 | int maxRegionLength = 400; 43 | Map> regionJunctionMap = JunctionUtils.getRegionJunctions(regions, junctions, readLength, maxRegionLength); 44 | 45 | List regionJunctions = regionJunctionMap.get(region); 46 | 47 | // TODO: Revisit this. 48 | //assertEquals(regionJunctions.size(), 12); 49 | 50 | assertEquals(regionJunctions.size(), 8); 51 | assertJunctionEquals(regionJunctions.get(0), "chr4", 1800000, 1801530); 52 | assertJunctionEquals(regionJunctions.get(1), "chr4", 1801251, 1801473); 53 | assertJunctionEquals(regionJunctions.get(2), "chr4", 1801540, 1803093); 54 | assertJunctionEquals(regionJunctions.get(3), "chr4", 1803264, 1803346); 55 | assertJunctionEquals(regionJunctions.get(4), "chr4", 1803471, 1803561); 56 | assertJunctionEquals(regionJunctions.get(5), "chr4", 1803471, 1803590); 57 | assertJunctionEquals(regionJunctions.get(6), "chr4", 1803651, 1808025); 58 | 59 | // assertJunctionEquals(regionJunctions.get(6), "chr4", 1803714, 1805418); 60 | // assertJunctionEquals(regionJunctions.get(7), "chr4", 1803753, 1804640); 61 | // assertJunctionEquals(regionJunctions.get(8), "chr4", 1803753, 1805418); 62 | // assertJunctionEquals(regionJunctions.get(9), "chr4", 1803753, 1806056); 63 | // assertJunctionEquals(regionJunctions.get(10), "chr4", 1803753, 1806550); 64 | 65 | assertJunctionEquals(regionJunctions.get(7), "chr4", 1808055, 1808272); 66 | 67 | 68 | // List> junctionPerms = JunctionUtils.combineJunctions(regionJunctions, readLength); 69 | // System.out.println(junctionPerms.size()); 70 | } 71 | 72 | @Test (groups = "unit") 73 | public void testCombineJunctions() throws Exception { 74 | 75 | int readLength = 50; 76 | 77 | Feature j1 = new Feature("chr1", 10000, 10100); 78 | Feature j2 = new Feature("chr1", 10110, 10200); 79 | Feature j3 = new Feature("chr1", 10110, 10300); 80 | Feature j4 = new Feature("chr1", 10330, 10500); 81 | 82 | List inputJunctions = Arrays.asList(j1, j2, j3, j4); 83 | List> junctionPerms = JunctionUtils.combineJunctions(new Feature("chr1", 10000, 10400), inputJunctions, new HashSet(), readLength, readLength); 84 | assertEquals(junctionPerms.size(), 8); 85 | 86 | // Expected permutations 87 | List p1 = Arrays.asList(j1); 88 | List p2 = Arrays.asList(j2); 89 | List p3 = Arrays.asList(j3); 90 | List p4 = Arrays.asList(j4); 91 | List p5 = Arrays.asList(j1,j2); 92 | List p6 = Arrays.asList(j1,j3); 93 | List p7 = Arrays.asList(j3,j4); 94 | List p8 = Arrays.asList(j1,j3,j4); 95 | 96 | assertTrue(junctionPerms.contains(p1)); 97 | assertTrue(junctionPerms.contains(p2)); 98 | assertTrue(junctionPerms.contains(p3)); 99 | assertTrue(junctionPerms.contains(p4)); 100 | assertTrue(junctionPerms.contains(p5)); 101 | assertTrue(junctionPerms.contains(p6)); 102 | assertTrue(junctionPerms.contains(p7)); 103 | assertTrue(junctionPerms.contains(p8)); 104 | } 105 | 106 | public void testCombineVariantWithJunctions() { 107 | 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/test/java/abra/MultiSamReaderTest.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import static org.testng.Assert.assertEquals; 4 | 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | import org.testng.annotations.Test; 9 | 10 | // TODO: Test more thoroughly including multiple samples 11 | public class MultiSamReaderTest { 12 | 13 | @Test (groups = "unit") 14 | public void testReadTwoBams() { 15 | String[] bams = new String[] { "test-data/sample1.bam", "test-data/sample2.bam" }; 16 | int minMapq = 20; 17 | boolean isPairedEnd = false; 18 | String chromosome = "10"; 19 | Feature chromosomeChunk = new Feature(chromosome, 3000000, 4000000); 20 | 21 | MultiSamReader rdr = new MultiSamReader(bams, minMapq, isPairedEnd, chromosomeChunk); 22 | 23 | List reads = new ArrayList(); 24 | for (SAMRecordWrapper read : rdr) { 25 | reads.add(read); 26 | } 27 | 28 | assertEquals(reads.size(), 9); 29 | assertEquals(reads.get(0).getSamRecord().getReadName(), "ERR194161.458962555"); 30 | assertEquals(reads.get(0).getSampleIdx(), 0); 31 | 32 | assertEquals(reads.get(1).getSamRecord().getReadName(), "ERR194161.458962561"); 33 | assertEquals(reads.get(1).getSampleIdx(), 1); 34 | 35 | assertEquals(reads.get(2).getSamRecord().getReadName(), "ERR194161.458962547"); 36 | assertEquals(reads.get(2).getSampleIdx(), 1); 37 | 38 | assertEquals(reads.get(3).getSamRecord().getReadName(), "ERR194161.458962586"); 39 | assertEquals(reads.get(3).getSampleIdx(), 0); 40 | 41 | assertEquals(reads.get(4).getSamRecord().getReadName(), "ERR194161.458962577"); 42 | assertEquals(reads.get(4).getSampleIdx(), 0); 43 | 44 | assertEquals(reads.get(5).getSamRecord().getReadName(), "ERR194161.458962575"); 45 | assertEquals(reads.get(5).getSampleIdx(), 1); 46 | 47 | assertEquals(reads.get(6).getSamRecord().getReadName(), "ERR194161.458962567"); 48 | assertEquals(reads.get(6).getSampleIdx(), 0); 49 | 50 | // Same read in both samples here. Order doesn't matter. 51 | assertEquals(reads.get(7).getSamRecord().getReadName(), "ERR194161.458962591"); 52 | assertEquals(reads.get(8).getSamRecord().getReadName(), "ERR194161.458962591"); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/test/java/abra/ReAlignerOptionsTest.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import static org.testng.Assert.assertEquals; 4 | 5 | import org.testng.annotations.Test; 6 | 7 | /** 8 | * Unit tests for ReAlignerOptions. 9 | * 10 | * @author Lisle E. Mose (lmose at unc dot edu) 11 | */ 12 | public class ReAlignerOptionsTest { 13 | 14 | @Test (groups = "unit") 15 | public void testNoParams() { 16 | ReAlignerOptions options = new ReAlignerOptions(); 17 | options.parseOptions(new String[0]); 18 | } 19 | 20 | @Test (groups = "unit") 21 | public void testBamParams() { 22 | ReAlignerOptions options = new ReAlignerOptions(); 23 | options.parseOptions("--in in1.bam,in2.bam --out out1.bam,out2.bam".split("\\s")); 24 | String[] input = options.getInputFiles(); 25 | String[] output = options.getOutputFiles(); 26 | 27 | assertEquals(input.length, 2); 28 | assertEquals(input[0], "in1.bam"); 29 | assertEquals(input[1], "in2.bam"); 30 | assertEquals(output.length, 2); 31 | assertEquals(output[0], "out1.bam"); 32 | assertEquals(output[1], "out2.bam"); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/test/java/abra/ReadEvaluatorTest.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | import static org.testng.Assert.assertEquals; 7 | 8 | import org.testng.annotations.BeforeMethod; 9 | import org.testng.annotations.Test; 10 | 11 | import abra.ReadEvaluator.Alignment; 12 | import abra.ContigAligner.ContigAlignerResult; 13 | import abra.SimpleMapper.Orientation; 14 | 15 | public class ReadEvaluatorTest { 16 | 17 | private Map> regionContigs; 18 | private Map mappedContigs; 19 | 20 | @BeforeMethod () 21 | public void setUp() { 22 | mappedContigs = new HashMap(); 23 | regionContigs = new HashMap>(); 24 | regionContigs.put(new Feature("foo", 1, 1000), mappedContigs); 25 | } 26 | 27 | @Test (groups="unit") 28 | public void testSingleAlignmentSingleContig() { 29 | String contig1 = "ATCGAAAAAATTTTTTCCCCCCGGGGGGATCGGCTAATCG"; 30 | String read = "ATAAAATTTTTTCCCCCCGGGGGGATCG"; // matches contig at 0 based position 4 with 1 mismatch 31 | 32 | SimpleMapper sm1 = new SimpleMapper(contig1, .05); 33 | ContigAlignerResult swc1 = new ContigAlignerResult(10, "10M1D30M", "chr1", 0, contig1, (short) 1); 34 | 35 | mappedContigs.put(sm1, swc1); 36 | 37 | ReadEvaluator re = new ReadEvaluator(regionContigs); 38 | 39 | // 1 mismatch in alignment to contig versus edit distance 2 in original read 40 | // should result in an improved alignment 41 | Alignment alignment = re.getImprovedAlignment(2, read); 42 | assertEquals(alignment.pos, 14); // Alignment pos = 10 + 4 43 | assertEquals(alignment.cigar, "6M1D22M"); 44 | assertEquals(alignment.numMismatches, 1); 45 | assertEquals(alignment.orientation, Orientation.FORWARD); 46 | } 47 | 48 | @Test (groups="unit") 49 | public void testSingleAlignmentSingleContig_noImprovement() { 50 | String contig1 = "ATCGAAAAAATTTTTTCCCCCCGGGGGGATCGGCTAATCG"; 51 | String read = "ATAAAATTTTTTCCCCCCGGGGGGATCG"; // matches contig at 0 based position 4 with 1 mismatch 52 | 53 | SimpleMapper sm1 = new SimpleMapper(contig1); 54 | ContigAlignerResult swc1 = new ContigAlignerResult(10, "10M1D30M", "chr1", 0, contig1, (short) 1); 55 | 56 | mappedContigs.put(sm1, swc1); 57 | 58 | ReadEvaluator re = new ReadEvaluator(regionContigs); 59 | 60 | Alignment alignment = re.getImprovedAlignment(1, read); 61 | 62 | // Alignment with mismatches == orig edit distance should still return. 63 | assertEquals(alignment.numMismatches, 1); 64 | } 65 | 66 | @Test (groups="unit") 67 | public void testSelectBestAlignment() { 68 | String contig1 = "ATCGAAAAAATTTTTTCCCCCCGGGGGGATCGGCTAATCG"; 69 | String contig2 = "ATCGAAAAAATTTTTTCCCCCCGGGGGGATCGGCTTATCG"; 70 | String contig3 = "ATCGAAAAAATTTTTTCCCCCCGGGGGGATCGGCTCATCG"; 71 | String contig4 = "ATCGATAAAATTTTTTCCCCCCGGGGGGATCGGCTAATCG"; 72 | String read = "ATAAAATTTTTTCCCCCCGGGGGGATCG"; // matches contig at 0 based position 4 with 0 mismatches 73 | 74 | SimpleMapper sm1 = new SimpleMapper(contig1); 75 | ContigAlignerResult swc1 = new ContigAlignerResult(10, "10M1D30M", "chr1", 0, contig1, (short) 1); 76 | 77 | SimpleMapper sm2 = new SimpleMapper(contig2); 78 | ContigAlignerResult swc2 = new ContigAlignerResult(20, "10M1D30M", "chr1", 0, contig2, (short) 1); 79 | 80 | SimpleMapper sm3 = new SimpleMapper(contig3); 81 | ContigAlignerResult swc3 = new ContigAlignerResult(30, "10M1D30M", "chr1", 0, contig3, (short) 1); 82 | 83 | SimpleMapper sm4 = new SimpleMapper(contig4); 84 | ContigAlignerResult swc4 = new ContigAlignerResult(40, "10M1D30M", "chr1", 0, contig4, (short) 1); 85 | 86 | mappedContigs.put(sm1, swc1); 87 | mappedContigs.put(sm2, swc2); 88 | mappedContigs.put(sm3, swc3); 89 | mappedContigs.put(sm4, swc4); 90 | 91 | ReadEvaluator re = new ReadEvaluator(regionContigs); 92 | 93 | // Exact match to contig 4 94 | Alignment alignment = re.getImprovedAlignment(2, read); 95 | assertEquals(alignment.pos, 44); // Alignment pos = 40 + 4 96 | assertEquals(alignment.cigar, "6M1D22M"); 97 | assertEquals(alignment.orientation, Orientation.FORWARD); 98 | } 99 | 100 | @Test (groups="unit") 101 | public void testMapToMultipleContigsSynonymously() { 102 | String contig1 = "ATCGAAAAAATTTTTTCCCCCCGGGGGGATCGGCTAATCG"; 103 | String contig2 = "AATCGAAAAAATTTTTTCCCCCCGGGGGGATCGGCTTATCG"; 104 | String contig3 = "TCGAAAAAATTTTTTCCCCCCGGGGGGATCGGCTCATCG"; 105 | String contig4 = "ATCGAAAAAATTTTTTCCCCCCGGGGGGATCGGCTAATCG"; 106 | String read = "ATAAAATTTTTTCCCCCCGGGGGGATCG"; // matches contig at 0 based position 4 with 0 mismatches 107 | 108 | SimpleMapper sm1 = new SimpleMapper(contig1); 109 | ContigAlignerResult swc1 = new ContigAlignerResult(10, "10M1D30M", "chr1", 0, contig1, (short) 1); 110 | 111 | SimpleMapper sm2 = new SimpleMapper(contig2); 112 | ContigAlignerResult swc2 = new ContigAlignerResult(9, "11M1D31M", "chr1", 0, contig2, (short) 1); 113 | 114 | SimpleMapper sm3 = new SimpleMapper(contig3); 115 | ContigAlignerResult swc3 = new ContigAlignerResult(11, "9M1D29M", "chr1", 0, contig3, (short) 1); 116 | 117 | SimpleMapper sm4 = new SimpleMapper(contig4); 118 | ContigAlignerResult swc4 = new ContigAlignerResult(10, "10M1D30M", "chr1", 0, contig4, (short) 1); 119 | 120 | mappedContigs.put(sm1, swc1); 121 | mappedContigs.put(sm2, swc2); 122 | mappedContigs.put(sm3, swc3); 123 | mappedContigs.put(sm4, swc4); 124 | 125 | ReadEvaluator re = new ReadEvaluator(regionContigs); 126 | 127 | // Maps to multiple contigs with a single mismatch, with each contig's 128 | // alignment result identical in the context of the reference 129 | Alignment alignment = re.getImprovedAlignment(2, read); 130 | assertEquals(alignment.pos, 14); // Alignment pos = 40 + 4 131 | assertEquals(alignment.cigar, "6M1D22M"); 132 | assertEquals(alignment.orientation, Orientation.FORWARD); 133 | } 134 | 135 | @Test (groups="unit") 136 | public void testMultimapWithinContig() { 137 | String contig1 = "ATCGATCGATCGATCGATCGATCGATCGATCGATCG"; 138 | String read = "ACCGATCGATCGATCGATCGATCGATCGATCG"; // matches 2 locations with single mismatch 139 | 140 | SimpleMapper sm1 = new SimpleMapper(contig1); 141 | ContigAlignerResult swc1 = new ContigAlignerResult(100, "36M", "chr1", 0, read, (short) 1); 142 | 143 | mappedContigs.put(sm1, swc1); 144 | 145 | ReadEvaluator re = new ReadEvaluator(regionContigs); 146 | 147 | // 1 mismatch in alignment to contig versus edit distance 2 in original read 148 | // should result in an improved alignment 149 | Alignment alignment = re.getImprovedAlignment(2, read); 150 | assertEquals(alignment, null); 151 | } 152 | 153 | @Test (groups="unit") 154 | public void testSingleAlignmentSingleContig_reverseComplement() { 155 | String contig1 = "ATCGAAAAAATTTTTTCCCCCCGGGGGGATCGGCTAATCG"; 156 | String read = "CGATCCCCCCGGGGGGAAAAAATTTTAT"; // matches contig at 0 based position 4 with 1 mismatch 157 | 158 | SimpleMapper sm1 = new SimpleMapper(contig1); 159 | ContigAlignerResult swc1 = new ContigAlignerResult(10, "10M1D30M", "chr1", 0, read, (short) 1); 160 | 161 | mappedContigs.put(sm1, swc1); 162 | 163 | ReadEvaluator re = new ReadEvaluator(regionContigs); 164 | 165 | // 1 mismatch in alignment to contig versus edit distance 2 in original read 166 | // should result in an improved alignment 167 | Alignment alignment = re.getImprovedAlignment(2, read); 168 | assertEquals(alignment.pos, 14); // Alignment pos = 10 + 4 169 | assertEquals(alignment.cigar, "6M1D22M"); 170 | assertEquals(alignment.numMismatches, 1); 171 | assertEquals(alignment.orientation, Orientation.REVERSE); 172 | } 173 | } 174 | -------------------------------------------------------------------------------- /src/test/java/abra/RealignerTest.java: -------------------------------------------------------------------------------- 1 | /* Copyright 2013 University of North Carolina at Chapel Hill. All rights reserved. */ 2 | package abra; 3 | 4 | import static org.testng.Assert.assertEquals; 5 | 6 | import java.util.ArrayList; 7 | import java.util.Arrays; 8 | import java.util.List; 9 | 10 | import htsjdk.samtools.SAMRecord; 11 | 12 | import org.testng.Assert; 13 | import org.testng.annotations.Test; 14 | 15 | public class RealignerTest { 16 | 17 | /* 18 | @Test (groups = "unit") 19 | public void testUpdateReadAlignment() { 20 | SAMRecord contigRead = new SAMRecord(null); 21 | contigRead.setReferenceName("chr21"); 22 | contigRead.setCigarString("201M50I1105M2D563M"); 23 | contigRead.setAlignmentStart(36205060); 24 | 25 | SAMRecord origRead = new SAMRecord(null); 26 | 27 | List blocks = ReadBlock.getReadBlocks(contigRead); 28 | Assert.assertEquals(blocks.size(), 5); 29 | 30 | ReadPosition readPosition = new ReadPosition(origRead, 1266, 0); 31 | } 32 | */ 33 | 34 | // TODO: Move to RegionLoaderTest 35 | @Test (groups = "unit") 36 | public void testCollapseRegions() { 37 | List input = new ArrayList(); 38 | input.add(new Feature("chr20", 1, 2000)); 39 | input.add(new Feature("chr20", 2050, 10000)); 40 | input.add(new Feature("chr20", 10020, 20000)); 41 | input.add(new Feature("chr20", 20100, 20200)); 42 | input.add(new Feature("chr21", 20201, 20300)); 43 | 44 | List features = RegionLoader.collapseRegions(input, 70); 45 | assertEquals(features.size(), 3); 46 | validateFeature(features.get(0), 1, 20000); 47 | validateFeature(features.get(1), 20100, 20200); 48 | validateFeature(features.get(2), 20201, 20300); 49 | } 50 | 51 | @Test (groups = "unit") 52 | public void testPairJunctions() { 53 | Feature j1 = new Feature("chr8", 27303437, 27308265); 54 | Feature j2 = new Feature("chr8", 27308413, 27308559); 55 | Feature j3 = new Feature("chr8", 27308596, 27309001); 56 | Feature j4 = new Feature("chr8", 27309027, 27310630); 57 | Feature j5 = new Feature("chr8", 27309227, 27314630); 58 | 59 | List junctions = Arrays.asList(j1, j2, j3, j4, j5); 60 | 61 | ReAligner r = new ReAligner(); 62 | List> junctionPairs = r.pairJunctions(junctions, 50); 63 | assertEquals(junctionPairs.size(), 2); 64 | 65 | // 1st pair 66 | assertEquals(j2, junctionPairs.get(0).getFirst()); 67 | assertEquals(j3, junctionPairs.get(0).getSecond()); 68 | 69 | // 2nd pair 70 | assertEquals(j3, junctionPairs.get(1).getFirst()); 71 | assertEquals(j4, junctionPairs.get(1).getSecond()); 72 | } 73 | 74 | @Test (groups = "unit") 75 | public void testPairJunctions_cannotAppearInSameContig() { 76 | Feature j1 = new Feature("chr1", 755395, 755674); 77 | Feature j2 = new Feature("chr1", 755430, 755674); 78 | Feature j3 = new Feature("chr1", 755535, 755674); 79 | 80 | List junctions = Arrays.asList(j1, j2, j3); 81 | 82 | ReAligner r = new ReAligner(); 83 | List> junctionPairs = r.pairJunctions(junctions, 5000); 84 | assertEquals(junctionPairs.size(), 0); 85 | } 86 | 87 | /* 88 | @Test (groups = "unit") 89 | public void testSplitRegions() { 90 | List input = new ArrayList(); 91 | input.add(new Feature("chr20", 1, 2000)); 92 | input.add(new Feature("chr20", 2001, 10000)); 93 | input.add(new Feature("chr20", 9523233, 9523367)); 94 | input.add(new Feature("chr21", 36205258, 36206898)); 95 | input.add(new Feature("chr22", 21303999, 21308037)); 96 | 97 | ReAligner realigner = new ReAligner(); 98 | List features = realigner.splitRegions(input, 2000, 500, 200); 99 | 100 | assertEquals(features.size(), 9); 101 | // first 102 | validateFeature(features.get(0), 1, 2000); 103 | // second 104 | validateFeature(features.get(1), 2001, 4201); 105 | validateFeature(features.get(2), 4001, 6201); 106 | validateFeature(features.get(3), 6001, 8201); 107 | validateFeature(features.get(4), 8001, 10000); 108 | // third 109 | validateFeature(features.get(5), 9523233, 9523367); 110 | // fourth 111 | validateFeature(features.get(6), 36205258, 36206898); 112 | // fifth 113 | validateFeature(features.get(7), 21303999, 21306199); 114 | validateFeature(features.get(8), 21305999, 21308037); 115 | } 116 | */ 117 | 118 | private void validateFeature(Feature feature, int expectedStart, int expectedEnd) { 119 | assertEquals(feature.getStart(), expectedStart); 120 | assertEquals(feature.getEnd(), expectedEnd); 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/test/java/abra/ReverseComplementorTest.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import org.junit.Test; 4 | 5 | import java.util.ArrayList; 6 | import java.util.Random; 7 | 8 | import static org.junit.Assert.assertEquals; 9 | 10 | public class ReverseComplementorTest { 11 | //C->G, G->C, T->A, A->T 12 | 13 | @Test 14 | public void reverseComplement() { 15 | ReverseComplementor r = new ReverseComplementor(); 16 | String x = r.reverseComplement("AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT"); 17 | assertEquals("AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT", x); 18 | } 19 | 20 | @Test 21 | public void reverseComplementNotAllDNA() { 22 | ReverseComplementor r = new ReverseComplementor(); 23 | String x = r.reverseComplement("NAATGANNN"); 24 | assertEquals("NNNTCATTN", x); 25 | System.out.println("REVERSE:" + x); 26 | } 27 | 28 | @Test 29 | public void reverseComplementTimeTest() { 30 | ReverseComplementor r = new ReverseComplementor(); 31 | ArrayList dnaList = new ArrayList(); 32 | int nStringToReverse = 1000000; 33 | 34 | for (int i=0; i < nStringToReverse; i++) { 35 | String s = makeRandomDNA(); 36 | dnaList.add(s); 37 | } 38 | 39 | long start = System.currentTimeMillis(); 40 | 41 | for (int i=0; i < nStringToReverse; i++) 42 | r.reverseComplement(dnaList.get(i)); 43 | 44 | long stop = System.currentTimeMillis(); 45 | 46 | System.out.println("Elapsed time to reverse (ms):" + (stop-start)); 47 | } 48 | 49 | public static String makeRandomDNA() { 50 | Random generator = new Random(0); 51 | int numSymbols = (int)(generator.nextDouble() * 41) + 40; // random integer between 40 and 80 inclusive 52 | 53 | // Make a random DNA sequence with numSymbols symbols 54 | String DNAletters = new String("ACTG"); 55 | StringBuilder result = new StringBuilder(); 56 | 57 | for (int i = 1; i <= numSymbols; i++) { 58 | // Pick a random letter from "ACTG" string 59 | char symbol = DNAletters.charAt((int)(generator.nextDouble()*4)); 60 | // concatenate next random symbol on to result 61 | result.append(symbol); 62 | } 63 | 64 | return result.toString(); 65 | } 66 | } -------------------------------------------------------------------------------- /src/test/java/abra/ScoredContigTest.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import java.util.List; 4 | 5 | import org.testng.annotations.Test; 6 | 7 | import static org.testng.Assert.assertEquals; 8 | 9 | public class ScoredContigTest { 10 | 11 | @Test (groups = "unit") 12 | public void testConvertAndFilter() { 13 | String contigStrings = 14 | ">chr7_55242125_55242525_53_-3.583754\n" + 15 | "contig1\n" + 16 | ">chr7_55242125_55242525_54_-3.333877\n" + 17 | "contig2\n" + 18 | ">chr7_55242125_55242525_55_-3.525407\n" + 19 | "contig3\n" + 20 | ">chr7_55242125_55242525_56_-3.370506\n" + 21 | "contig4\n" + 22 | ">chr7_55242125_55242525_57_-3.008790\n" + 23 | "contig5\n" + 24 | ">chr7_55242125_55242525_58_-2.607389\n" + 25 | "contig6\n" + 26 | ">chr7_55242125_55242525_59_-2.357512\n" + 27 | "contig7\n" + 28 | ">chr7_55242125_55242525_60_-2.549043\n" + 29 | "contig8\n" + 30 | ">chr7_55242125_55242525_61_-2.394141\n" + 31 | "contig9\n" + 32 | ">chr7_55242125_55242525_62_-2.911880\n" + 33 | "contig10\n" + 34 | ">chr7_55242125_55242525_63_-2.707760\n" + 35 | "contig11\n" + 36 | ">chr7_55242125_55242525_64_-5.462782\n" + 37 | "contig12\n"; 38 | 39 | int maxContigs = 3; 40 | List contigs = ScoredContig.convertAndFilter(contigStrings, maxContigs, new StringBuffer()); 41 | assertEquals(contigs.size(), 3); 42 | assertEquals(contigs.get(0).getScore(), -2.357512); 43 | assertEquals(contigs.get(0).getContig(), "contig7"); 44 | assertEquals(contigs.get(1).getScore(), -2.394141); 45 | assertEquals(contigs.get(1).getContig(), "contig9"); 46 | assertEquals(contigs.get(2).getScore(), -2.549043); 47 | assertEquals(contigs.get(2).getContig(), "contig8"); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/test/java/abra/SemiGlobalAlignerTest.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import static org.testng.Assert.assertEquals; 4 | 5 | import org.testng.annotations.Test; 6 | 7 | public class SemiGlobalAlignerTest { 8 | 9 | private SemiGlobalAligner sg = new SemiGlobalAligner(8,-32,-48,-1); 10 | 11 | @Test (groups = "unit") 12 | public void testInsert() { 13 | String ref = "ATCGAATTCCGGGCTA"; 14 | String seq = "GAACCCCTTCCG"; 15 | 16 | SemiGlobalAligner.Result res = sg.align(seq, ref); 17 | assertEquals(res.position, 3); 18 | assertEquals(res.cigar, "3M4I5M"); 19 | } 20 | 21 | @Test (groups = "unit") 22 | public void testDelete() { 23 | String ref = "ATCGAATTCCGGGCTA"; 24 | String seq = "AATTCTA"; 25 | SemiGlobalAligner.Result res = sg.align(seq, ref); 26 | assertEquals(res.position, 4); 27 | assertEquals(res.cigar, "4M5D3M"); 28 | // assertEquals(res.cigar, "5M5D2M"); 29 | assertEquals(res.score, 4); 30 | System.out.println("score: " + res.score); 31 | } 32 | 33 | @Test (groups = "unit") 34 | public void testExactMatch() { 35 | String ref = "ATCGAATTCCGGGCTA"; 36 | String seq = "ATCGAATT"; 37 | SemiGlobalAligner.Result res = sg.align(seq, ref); 38 | assertEquals(res.position, 0); 39 | assertEquals(res.cigar, "8M"); 40 | } 41 | 42 | @Test (groups = "unit") 43 | public void testMismatches() { 44 | String ref = "ATCGAATTCCGGGCTA"; 45 | String seq = "ATTGACTT"; 46 | SemiGlobalAligner.Result res = sg.align(seq, ref); 47 | assertEquals(res.position, 0); 48 | assertEquals(res.cigar, "8M"); 49 | } 50 | 51 | @Test (groups = "unit") 52 | public void testEndToEnd() { 53 | String ref = "ATCGAATTCCGGGCTA"; 54 | String seq = "ATCGAATTCCGGGCTA"; 55 | SemiGlobalAligner.Result res = sg.align(seq, ref); 56 | assertEquals(res.position, 0); 57 | assertEquals(res.cigar, "16M"); 58 | } 59 | 60 | @Test (groups = "unit") 61 | public void testBigDel() { 62 | String seq = "GGGCTGCCGTTTTTCCATTACGGCTTTCGTAATGTGACCACGTGCTTTTGA"; 63 | String ref = "GGGCTGCCGTTTTTCCATTACGGCTTTCCTTTGAAGTATATTTTAGGACATGACAGTCTTGTACCTGAAGTAATGTGACCACGTGCTTTTGA"; 64 | 65 | // SemiGlobalAligner sg2 = new SemiGlobalAligner(1,-4,-8,0); 66 | SemiGlobalAligner.Result res = sg.align(seq, ref); 67 | assertEquals(res.position, 0); 68 | assertEquals(res.cigar, "28M41D23M"); 69 | // assertEquals(res.cigar, "23M41D28M"); 70 | assertEquals(res.score, 320); 71 | // assertEquals(res.score, 200); 72 | } 73 | 74 | @Test (groups = "unit") 75 | public void testUnderflowBug() { 76 | String seq = "ATCCAACGTGATTAAGGATAGGAATCGGTAGTTTGGTTTTTTTGTTTGTTTTGTTTTTTTAACCACAGATAATTGCCAAAGTTTCCACCTGAGGACGGTGTTTGGAGGTTGCCTTTTGGACCTACCACTTTGCTCATTCTTGCTAACCTAGTCTAGGTGACCTACAGTGCCGTGCATTTAAGTCAATGGTTGTTAAAAGAAGTTTCCCGTGTTGTAAATCATGTTTCCCTTATCAGATCATTTGCAAATACATTTAAATGATCTCATGGTAAATGTTGATGTATTTTTTGGTTTATTTTGTGTACTAACATAATAGAGAGAGACTCAGCTCCTTTTATTTATTTTGTTGATTTATGGATCAAATTCTAAAATAAAGTTGCCTGTTGTGACTTTTGTCCCATCTACTGCATACTTAGTGCTGAGATCCCTGTAAAATGTTTTGATGAAAATATGTATGTAGAGTCCAGTCGCATTATACATACATTTCATAAATAATTTTATGAACGTGCTGAACCTTCTTAAATGCCTACTCATTCAGCTTAAACAGGCTGAAGCCAAGTATGACAAAGAGGGGAAGGGCCAAAAACATAATCAAAGAATAATTTTAAAGAGAATTCTTGTCTCTCTTGCAAAAATAAAAAAATAAAAATACAAAAAATTTAACCTGCAATTTAGAAGCAGCAATGTTTAAAGCTTTTCTTGGCTTTGAAAGCAAAATTACCTTTGTGTCAGAT"; 77 | String ref = "TTTGATGTACAGAATGGAAGAGTCTGTTGTTCTGTGGATTTTGAGTGTCTTCAAAATAATTGAAGTATTTACAGTGGACTCAACGCAGAAGAATGGACGAAATGACCATCCAACGTGATTAAGGATAGGAATCGGTAGTTTGGTTTTTTTGTTTGTTTTGTTTTTTTAACCACAGATAATTGCCAAAGTTTCCACCTGAGGACGGTGTTTGGAGGTTGCCTTTTGGACCTACCACTTTGCTCATTCTTGCTAACCTAGTCTAGGTGACCTACAGTGCCGTGCATTTAAGTCAATGGTTGTTAAAAGAAGTTTCCCGTGTTGTAAATCATGTTTCCCTTATCAGATCATTTGCAAATACATTTAAATGATCTCATGGTAAATGTTGATGTATTTTTTGGTTTATTTTGTGTACTAACATAATAGAGAGAGACTCAGCTCCTTTTATTTATTTTGTTGATTTATGGATCAAATTCTAAAATAAAGTTGCCTGTTGTGACTTTTGTCCCATCTACTGCATACTTAGTGCTGAGATCCCTGTAAAATGTTTTGATGAAAATATGTATGTAGAGTCCAGTCGCATTATACATACATTTCATAGTGCTGAACCTTCTTAAATGCCTACTCATTCAGCTTAAACAGGCTGAAGCCAAGTATGACAAAGAGGGGAAGGGCCAAAAACATAATCAAAGAATAATTTTAAAGAGAATTCTTGTCTCTCTTGCAAAAATAAAAAAATAAAAATACAAAAAATTTAACCTGCAATTTAGAAGCAGCAATGTTTAAAGCTTTTCTTGGCTTTGAAAGCAAAATTACCTTTGTGTCAGATTTATGTAGTACTTGATCCTTTATAAAGCATCCCAGTTAGTTACTAATAAAGTTATAATCATAGTACATGTCAGAAGTATTGTCCCTATATTACAGAAGTGTGAATTA"; 78 | // String seq = "TATGTAGAGTCCAGTCGCATTATACATACATTTCATAAATAATTTTATGAACGTGCTGAACCTTCTTAAATGCCTACTCATTCAGCTTAAACAGG"; 79 | // String ref = "TATGTATGTAGAGTCCAGTCGCATTATACATACATTTCATAGTGCTGAACCTTCTTAAATGCCTACTCATTCAGCTTAAACAGGCTGAA"; 80 | 81 | // String seq = "TTTGGAGGTTGCCTTTTGGACCTACCACTTTGCTCATTCTTGCTAACCTAGTCTAGGTGACCTACAGTGCCGTGCATTTAAGTCAATGGTTGTTAAAAGAAGTTTCCCGTGTTGTAAATCATGTTTCCCTTATCAGATCATTTGCAAATACATTTAAATGATCTCATGGTAAATGTTGATGTATTTTTTGGTTTATTTTGTGTACTAACATAATAGAGAGAGACTCAGCTCCTTTTATTTATTTTGTTGATTTATGGATCAAATTCTAAAATAAAGTTGCCTGTTGTGACTTTTGTCCCATCTACTGCATACTTAGTGCTGAGATCCCTGTAAAATGTTTTGATGAAAATATGTATGTAGAGTCCAGTCGCATTATACATACATTTCATAAATAATTTTATGAACGTGCTGAACCTTCTTAAATGCCTACTCATTCAGCTTAAACAGGCTGAAGCCAAGTATGACAAAGAGGGGAAGGGCCAAAAACATAATCAAAGAATAATTTTAAAGAGAATTCTTGTCTCTCTTGCAAAA"; 82 | // String ref = "TTTGATGTACAGAATGGAAGAGTCTGTTGTTCTGTGGATTTTGAGTGTCTTCAAAATAATTGAAGTATTTACAGTGGACTCAACGCAGAAGAATGGACGAAATGACCATCCAACGTGATTAAGGATAGGAATCGGTAGTTTGGTTTTTTTGTTTGTTTTGTTTTTTTAACCACAGATAATTGCCAAAGTTTCCACCTGAGGACGGTGTTTGGAGGTTGCCTTTTGGACCTACCACTTTGCTCATTCTTGCTAACCTAGTCTAGGTGACCTACAGTGCCGTGCATTTAAGTCAATGGTTGTTAAAAGAAGTTTCCCGTGTTGTAAATCATGTTTCCCTTATCAGATCATTTGCAAATACATTTAAATGATCTCATGGTAAATGTTGATGTATTTTTTGGTTTATTTTGTGTACTAACATAATAGAGAGAGACTCAGCTCCTTTTATTTATTTTGTTGATTTATGGATCAAATTCTAAAATAAAGTTGCCTGTTGTGACTTTTGTCCCATCTACTGCATACTTAGTGCTGAGATCCCTGTAAAATGTTTTGATGAAAATATGTATGTAGAGTCCAGTCGCATTATACATACATTTCATAGTGCTGAACCTTCTTAAATGCCTACTCATTCAGCTTAAACAGGCTGAAGCCAAGTATGACAAAGAGGGGAAGGGCCAAAAACATAATCAAAGAATAATTTTAAAGAGAATTCTTGTCTCTCTTGCAAAAATAAAAAAATAAAAATACAAAAAATTTAACCTGCAATTTAGAAGCAGCAATGTTTAAAGCTTTTCTTGGCTTTGAAAGCAAAATTACCTTTGTGTCAGATTTATGTAGTACTTGATCCTTTATAAAGCATCCCAGTTAGTTACTAATAAAGTTATAATCATAGTACATGTCAGAAGTATTGTCCCTATATTACAGAAGTGTGAATTA"; 83 | 84 | SemiGlobalAligner.Result res = sg.align(seq, ref); 85 | assertEquals(res.cigar, "490M15I229M"); 86 | // assertEquals(res.cigar, "487M15I232M"); 87 | // assertEquals(res.cigar, "489M15I230M"); 88 | assertEquals(res.position, 107); 89 | } 90 | 91 | @Test (groups = "unit") 92 | public void testInsert2() { 93 | // String ref = "TCCTAGGGCCTAGCTGCCATCAGCCTGATGTGCAGCAGCCTCTGCAGCGGTGCTGACACCAGTTTACAAGGCCCTGAGAGGG"; 94 | // String seq = "TCCTAGGGCCTAGCTGCCATCAGCCTGATGTGCAGCAGCCTCTGCAGGCTTCAGTTAGATTCATTTCGGTGCTGACACCAGTTTACAAGGCCCTGAGAGGG"; 95 | String ref = "CAGCCTGATGTGCAGCAGCCTCTGCAGCGGTGCTGACACCA"; 96 | String seq = "CAGCCTGATGTGCAGCAGCCTCTGCAGGCTTCAGTTAGATTCATTTCGGTGCTGACACCA"; 97 | 98 | SemiGlobalAligner.Result res = sg.align(seq, ref); 99 | System.out.println(res); 100 | } 101 | 102 | @Test (groups = "unit") 103 | public void testLeadingInsert() { 104 | String ref = "CATGCATGCATGCATGGGGGGGGGGGG"; 105 | String seq = "TTTTTTTCATGCATGCATGCATG"; 106 | 107 | SemiGlobalAligner.Result res = sg.align(seq, ref); 108 | System.out.println(res); 109 | assertEquals(res.cigar, "7I16M"); 110 | assertEquals(res.position, 0); 111 | } 112 | 113 | @Test (groups = "unit") 114 | public void testTrailingInsert() { 115 | String ref = "CATGCATGCATGCATGGGGGGGGGGGG"; 116 | String seq = "CATGCATGCATGCATGTTTTTTT"; 117 | 118 | SemiGlobalAligner.Result res = sg.align(seq, ref); 119 | System.out.println(res); 120 | assertEquals(res.cigar, "16M7I"); 121 | assertEquals(res.position, 0); 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/test/java/abra/SimpleMapperTest.java: -------------------------------------------------------------------------------- 1 | package abra; 2 | 3 | import static org.testng.Assert.assertEquals; 4 | import org.testng.annotations.Test; 5 | 6 | import abra.SimpleMapper.Orientation; 7 | import abra.SimpleMapper.SimpleMapperResult; 8 | 9 | 10 | public class SimpleMapperTest { 11 | 12 | private String contig1 = "TTCAACTAGAGAGAGGTAAAAATTTTTCTAGAACATGAATTGCCCACTCCCCTCATTCCTTCTCAGAAACTAACTGAATTCCAGTGGGTGTGCCTGGCAAACCCAAAAGCAGTTTCTGTTCAGGATGCTGGTCTTACCTGTGAAGGCGTTCATGAACGTGGAGAGGGACCGGTTCAACATTTTGAAGAAAGGGTCTCTGCACGGATATTTCTGAGACCCACAAAGGACGGTATGCTCAAGAATGTGAGGAACACCAGTACTGTCCATGGGAGTGGTACGGAACTGCACGCTAGGGAAGAGAGAGGAATGGCACGCTAGGGAAGGCGAATGACCAGAACGCAAAAGGTTCAGCTTAGTGCTGCGGACACAGTTCCCAGATGCATCATCACCTCAGGCTACTAGAAATCATCATTCTGACACCACAATCCTCCAGCACAGGGTTTTCCAACTATA"; 13 | private String contig2 = "ATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGAT"; 14 | 15 | private static final double DEFAULT_MISMATCH_RATE = .05; 16 | 17 | @Test (groups = "unit" ) 18 | public void testMapExact() { 19 | SimpleMapper sm = new SimpleMapper(contig1, DEFAULT_MISMATCH_RATE); 20 | String read = "TACTGTCCATGGGAGTGGTACGGAACTGCACGCTAGGGAAGAGAGAGGAATGGCACGCTAGGGAAGGCGAATGACCAGAACGCAAAAGGTTCAGCTTAGTG"; 21 | SimpleMapperResult smr = sm.map(read); 22 | assertEquals(0, smr.getMismatches()); 23 | assertEquals(257, smr.getPos()); 24 | assertEquals(Orientation.FORWARD, smr.getOrientation()); 25 | } 26 | 27 | @Test (groups = "unit" ) 28 | public void testMapOneMismatch() { 29 | SimpleMapper sm = new SimpleMapper(contig1, DEFAULT_MISMATCH_RATE); 30 | String read = "TACTGTCCATGGGAGTGCTACGGAACTGCACGCTAGGGAAGAGAGAGGAATGGCACGCTAGGGAAGGCGAATGACCAGAACGCAAAAGGTTCAGCTTAGTG"; 31 | SimpleMapperResult smr = sm.map(read); 32 | assertEquals(1, smr.getMismatches()); 33 | assertEquals(257, smr.getPos()); 34 | assertEquals(Orientation.FORWARD, smr.getOrientation()); 35 | } 36 | 37 | @Test (groups = "unit" ) 38 | public void testMapFiveMismatches() { 39 | SimpleMapper sm = new SimpleMapper(contig1, DEFAULT_MISMATCH_RATE); 40 | String read = "AACTGTCCATGGGAGTGGTACGTTTCTGCACGCTAGGGAAGAGAGAGGAATGGCACGCTAGGGAAGGCGAATGACCAGAACGCAAAAGGTTCAGCTTAGTC"; 41 | SimpleMapperResult smr = sm.map(read); 42 | assertEquals(5, smr.getMismatches()); 43 | assertEquals(257, smr.getPos()); 44 | assertEquals(Orientation.FORWARD, smr.getOrientation()); 45 | } 46 | 47 | @Test (groups = "unit" ) 48 | public void testMapSixMismatches() { 49 | SimpleMapper sm = new SimpleMapper(contig1, DEFAULT_MISMATCH_RATE); 50 | String read = "AACTGTCCATGGGAGTGGTACGTTTCTGCACGCTAGGGAAGAGAGAGGAAAGGCACGCTAGGGAAGGCGAATGACCAGAACGCAAAAGGTTCAGCTTAGTC"; 51 | SimpleMapperResult smr = sm.map(read); 52 | assertEquals(SimpleMapper.UNMAPPED, smr.getPos()); 53 | } 54 | 55 | @Test (groups = "unit" ) 56 | public void testMapSixMismatchesIncreasedMismatchRate() { 57 | SimpleMapper sm = new SimpleMapper(contig1, .06); 58 | String read = "AACTGTCCATGGGAGTGGTACGTTTCTGCACGCTAGGGAAGAGAGAGGAAAGGCACGCTAGGGAAGGCGAATGACCAGAACGCAAAAGGTTCAGCTTAGTC"; 59 | SimpleMapperResult smr = sm.map(read); 60 | assertEquals(6, smr.getMismatches()); 61 | assertEquals(257, smr.getPos()); 62 | } 63 | 64 | 65 | @Test (groups = "unit" ) 66 | public void testMapNoSeedMatch() { 67 | SimpleMapper sm = new SimpleMapper(contig1, DEFAULT_MISMATCH_RATE); 68 | String read = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; 69 | SimpleMapperResult smr = sm.map(read); 70 | assertEquals(SimpleMapper.UNMAPPED, smr.getPos()); 71 | } 72 | 73 | @Test (groups = "unit" ) 74 | public void testMapAmbiguousMatch() { 75 | SimpleMapper sm = new SimpleMapper(contig2, DEFAULT_MISMATCH_RATE); 76 | String read = "CGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATA"; 77 | SimpleMapperResult smr = sm.map(read); 78 | assertEquals(SimpleMapper.HOMOLOGOUS_MAPPING, smr.getPos()); 79 | assertEquals(0, smr.getMismatches()); 80 | } 81 | 82 | @Test (groups = "unit" ) 83 | public void testMapAmbiguousMatchWithMismatches() { 84 | SimpleMapper sm = new SimpleMapper(contig2, DEFAULT_MISMATCH_RATE); 85 | String read = "CGATCGATATCGATCGATAACGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATATCGATCGATTACGATCGATA"; 86 | SimpleMapperResult smr = sm.map(read); 87 | assertEquals(SimpleMapper.HOMOLOGOUS_MAPPING, smr.getPos()); 88 | assertEquals(3, smr.getMismatches()); 89 | } 90 | 91 | @Test (groups = "unit" ) 92 | public void testSimple1Mismatch() { 93 | SimpleMapper sm = new SimpleMapper("ATCGAAAAAATTTTTTCCCCCCGGGGGGATCGGCTAATCG", DEFAULT_MISMATCH_RATE); 94 | String read = "ATAAAATTTTTTCCCCCCGGGGGGATCG"; 95 | 96 | SimpleMapperResult smr = sm.map(read); 97 | assertEquals(4, smr.getPos()); 98 | assertEquals(1, smr.getMismatches()); 99 | assertEquals(Orientation.FORWARD, smr.getOrientation()); 100 | } 101 | 102 | @Test (groups = "unit") 103 | public void testShortAmbiguousMatch() { 104 | String contig1 = "ATCGATCGATCGATCGATCGATCGATCGATCGATCG"; 105 | String read = "ACCGATCGATCGATCGATCGATCGATCGATCG"; 106 | 107 | SimpleMapper sm = new SimpleMapper(contig1, DEFAULT_MISMATCH_RATE); 108 | SimpleMapperResult smr = sm.map(read); 109 | assertEquals(smr.getPos(), SimpleMapper.HOMOLOGOUS_MAPPING); 110 | assertEquals(1, smr.getMismatches()); 111 | } 112 | 113 | @Test (groups = "unit" ) 114 | public void testReverseComplementExact() { 115 | SimpleMapper sm = new SimpleMapper(contig1, DEFAULT_MISMATCH_RATE); 116 | String read = "CCTGAACAGAAACTGCTTTTGGGTTTGCCAGGCACACCCACTGGAATTCAGTTAGTTTCTGAGAAGGAATGAGGGGAGTGGGCAATTCATGTTCTAGAAA"; 117 | SimpleMapperResult smr = sm.map(read); 118 | assertEquals(0, smr.getMismatches()); 119 | assertEquals(24, smr.getPos()); 120 | assertEquals(Orientation.REVERSE, smr.getOrientation()); 121 | } 122 | 123 | @Test (groups = "unit" ) 124 | public void testReverseComplement2Mismatches() { 125 | SimpleMapper sm = new SimpleMapper(contig1, DEFAULT_MISMATCH_RATE); 126 | String read = "CCTGAACAGAAACTGCTTTTGGGAATGCCAGGCACACCCACTGGAATTCAGTTAGTTTCTGAGAAGGAATGAGGGGAGTGGGCAATTCATGTTCTAGAAA"; 127 | SimpleMapperResult smr = sm.map(read); 128 | assertEquals(2, smr.getMismatches()); 129 | assertEquals(24, smr.getPos()); 130 | assertEquals(Orientation.REVERSE, smr.getOrientation()); 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /src/test/java/abra/cadabra/HomopolymerRunTest.java: -------------------------------------------------------------------------------- 1 | package abra.cadabra; 2 | 3 | import static org.testng.Assert.assertEquals; 4 | import org.testng.annotations.Test; 5 | 6 | public class HomopolymerRunTest { 7 | 8 | @Test (groups = "unit") 9 | public void testBasic() { 10 | String seq = "ATCGATCGTATTTTTTTTTT"; 11 | 12 | HomopolymerRun hrun = HomopolymerRun.find(seq); 13 | assertEquals(10, hrun.getLength()); 14 | assertEquals('T', hrun.getBase()); 15 | assertEquals(10, hrun.getPos()); 16 | } 17 | 18 | @Test (groups = "unit") 19 | public void testLeading() { 20 | String seq = "AAAAAAATAAAATACGACTA"; 21 | 22 | HomopolymerRun hrun = HomopolymerRun.find(seq); 23 | assertEquals(7, hrun.getLength()); 24 | assertEquals('A', hrun.getBase()); 25 | assertEquals(0, hrun.getPos()); 26 | } 27 | 28 | @Test (groups = "unit") 29 | public void testTrailing() { 30 | String seq = "AAATAAATAAAGTACGCCCC"; 31 | 32 | HomopolymerRun hrun = HomopolymerRun.find(seq); 33 | assertEquals(4, hrun.getLength()); 34 | assertEquals('C', hrun.getBase()); 35 | assertEquals(16, hrun.getPos()); 36 | } 37 | 38 | @Test (groups = "unit") 39 | public void testNone() { 40 | String seq = "AAATAAATAAAGTACGCGCC"; 41 | 42 | HomopolymerRun hrun = HomopolymerRun.find(seq); 43 | assertEquals(null, hrun); 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/test/java/abra/cadabra/RepeatUtilsTest.java: -------------------------------------------------------------------------------- 1 | package abra.cadabra; 2 | 3 | import org.testng.Assert; 4 | import org.testng.annotations.Test; 5 | 6 | public class RepeatUtilsTest { 7 | 8 | @Test (groups = "unit") 9 | public void testGetRepeatUnit_Str1() { 10 | String rp = RepeatUtils.getRepeatUnit("ATATAT"); 11 | Assert.assertEquals(rp, "AT"); 12 | } 13 | 14 | @Test (groups = "unit") 15 | public void testGetRepeatUnit_Str2() { 16 | String rp = RepeatUtils.getRepeatUnit("ATCATC"); 17 | Assert.assertEquals(rp, "ATC"); 18 | } 19 | 20 | @Test (groups = "unit") 21 | public void testGetRepeatUnit_Str3() { 22 | String rp = RepeatUtils.getRepeatUnit("ATCGATCGATCGATCG"); 23 | Assert.assertEquals(rp, "ATCG"); 24 | } 25 | 26 | @Test (groups = "unit") 27 | public void testGetRepeatUnit_NearStr1() { 28 | String rp = RepeatUtils.getRepeatUnit("ATATA"); 29 | Assert.assertEquals(rp, "ATATA"); 30 | } 31 | 32 | @Test (groups = "unit") 33 | public void testGetRepeatUnit_Homopolymer() { 34 | String rp = RepeatUtils.getRepeatUnit("GGG"); 35 | Assert.assertEquals(rp, "G"); 36 | } 37 | 38 | @Test (groups = "unit") 39 | public void testGetRepeatUnit_NearHomopolymer1() { 40 | String rp = RepeatUtils.getRepeatUnit("GGGGC"); 41 | Assert.assertEquals(rp, "GGGGC"); 42 | } 43 | 44 | @Test (groups = "unit") 45 | public void testGetRepeatUnit_NearHomopolymer2() { 46 | String rp = RepeatUtils.getRepeatUnit("CGGGG"); 47 | Assert.assertEquals(rp, "CGGGG"); 48 | } 49 | 50 | @Test (groups = "unit") 51 | public void testGetRepeatUnit_NearHomopolymer3() { 52 | String rp = RepeatUtils.getRepeatUnit("GGCGG"); 53 | Assert.assertEquals(rp, "GGCGG"); 54 | } 55 | 56 | @Test (groups = "unit") 57 | public void testGetRepeatUnit_SingleNt() { 58 | String rp = RepeatUtils.getRepeatUnit("T"); 59 | Assert.assertEquals(rp, "T"); 60 | } 61 | 62 | @Test (groups = "unit") 63 | public void testGetRepeatPeriod_HpRun() { 64 | int period = RepeatUtils.getRepeatPeriod("T", "TTTTTATATAT"); 65 | Assert.assertEquals(period, 5); 66 | } 67 | 68 | @Test (groups = "unit") 69 | public void testGetRepeatPeriod_NoRepeat1() { 70 | int period = RepeatUtils.getRepeatPeriod("T", "ATTTTATATAT"); 71 | Assert.assertEquals(period, 0); 72 | } 73 | 74 | @Test (groups = "unit") 75 | public void testGetRepeatPeriod_Str1() { 76 | int period = RepeatUtils.getRepeatPeriod("TA", "TATATCTA"); 77 | Assert.assertEquals(period, 2); 78 | } 79 | 80 | @Test (groups = "unit") 81 | public void testGetRepeatPeriod_Str2() { 82 | int period = RepeatUtils.getRepeatPeriod("TAC", "TACTAC"); 83 | Assert.assertEquals(period, 2); 84 | } 85 | 86 | @Test (groups = "unit") 87 | public void testGetRepeatPeriod_Str3() { 88 | int period = RepeatUtils.getRepeatPeriod("ATT", "ATTATTATTATTATTATTATTATT"); 89 | Assert.assertEquals(period, 8); 90 | } 91 | 92 | @Test (groups = "unit") 93 | public void testGetRepeatPeriod_NoRepeat() { 94 | int period = RepeatUtils.getRepeatPeriod("A", "GGGG"); 95 | Assert.assertEquals(period, 0); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /temp/abra-0.94b.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozack/abra2/39e6297e578d0407fd687958c32cadc2dfc5845d/temp/abra-0.94b.jar -------------------------------------------------------------------------------- /temp/abra-0.94c.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozack/abra2/39e6297e578d0407fd687958c32cadc2dfc5845d/temp/abra-0.94c.jar -------------------------------------------------------------------------------- /temp/temp.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozack/abra2/39e6297e578d0407fd687958c32cadc2dfc5845d/temp/temp.txt -------------------------------------------------------------------------------- /test-data/junctions1.tab: -------------------------------------------------------------------------------- 1 | chr4 1017890 1018098 1 1 1 3 0 17 2 | chr4 1795771 1800980 1 1 1 137 5 23 3 | chr4 1800000 1801530 1 1 1 0 0 0 4 | chr4 1801251 1801473 1 1 1 507 0 23 5 | chr4 1801540 1803093 1 1 1 432 2 24 6 | chr4 1803264 1803346 1 1 1 375 2 22 7 | chr4 1803471 1803561 1 1 1 679 0 23 8 | chr4 1803471 1803590 1 1 1 44 0 23 9 | chr4 1803651 1808025 1 1 1 0 0 0 10 | chr4 1803714 1805418 1 1 1 8 4 18 11 | chr4 1803753 1804640 1 1 1 111 0 22 12 | chr4 1803753 1805418 1 1 1 21 4 24 13 | chr4 1803753 1806056 1 1 1 24 0 13 14 | chr4 1803753 1806550 1 1 1 1 0 21 15 | chr4 1804792 1805418 1 1 1 32 0 17 16 | chr4 1804792 1806056 1 1 1 168 1 24 17 | chr4 1805564 1806056 1 1 1 106 5 14 18 | chr4 1806248 1806547 1 1 1 98 0 23 19 | chr4 1806248 1806550 1 1 1 209 0 24 20 | chr4 1806697 1807081 1 1 1 86 0 24 21 | chr4 1807204 1807285 1 1 1 390 0 24 22 | chr4 1807397 1807476 1 1 1 318 0 24 23 | chr4 1807401 1807476 1 1 1 2 0 18 24 | chr4 1807668 1807777 1 1 1 295 0 23 25 | chr4 1807901 1807983 1 1 1 524 0 24 26 | chr4 1808055 1808272 1 1 1 430 0 24 27 | chr4 1808411 1808555 1 1 1 589 0 24 28 | chr4 1808662 1808839 1 1 1 6 0 17 29 | chr4 1808662 1808842 1 1 1 265 0 23 30 | -------------------------------------------------------------------------------- /test-data/sample1.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozack/abra2/39e6297e578d0407fd687958c32cadc2dfc5845d/test-data/sample1.bam -------------------------------------------------------------------------------- /test-data/sample1.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozack/abra2/39e6297e578d0407fd687958c32cadc2dfc5845d/test-data/sample1.bam.bai -------------------------------------------------------------------------------- /test-data/sample2.bam: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozack/abra2/39e6297e578d0407fd687958c32cadc2dfc5845d/test-data/sample2.bam -------------------------------------------------------------------------------- /test-data/sample2.bam.bai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mozack/abra2/39e6297e578d0407fd687958c32cadc2dfc5845d/test-data/sample2.bam.bai -------------------------------------------------------------------------------- /test-data/test.fa: -------------------------------------------------------------------------------- 1 | >seq1 2 | TCGAATCGATATATTTCCGGAACAGACTCAG -------------------------------------------------------------------------------- /test2: -------------------------------------------------------------------------------- 1 | asdfadsa 2 | --------------------------------------------------------------------------------