├── .gitignore ├── DEVELOPING.md ├── LICENSE ├── README.md ├── build.gradle ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── gradlew ├── gradlew.bat ├── settings.gradle └── src ├── main ├── java │ └── mapper │ │ ├── AlignedBlock.java │ │ ├── AlignerWorker.java │ │ ├── AlignmentAnalysis.java │ │ ├── AlignmentCache.java │ │ ├── AlignmentCounter.java │ │ ├── AlignmentListener.java │ │ ├── AlignmentNode.java │ │ ├── AlignmentParameters.java │ │ ├── AlignmentPosition.java │ │ ├── AlignmentPosition_DirectionCounts.java │ │ ├── AlignmentStatistics.java │ │ ├── Alignments.java │ │ ├── AlignmentsSection.java │ │ ├── AncestryDetector.java │ │ ├── Api.java │ │ ├── Basepairs.java │ │ ├── BlockAligner.java │ │ ├── BufferedWriter.java │ │ ├── ByteArrayList.java │ │ ├── ByteKeyStore.java │ │ ├── BytesSlice.java │ │ ├── BytesView.java │ │ ├── ConditionalHashBlock.java │ │ ├── CountMap.java │ │ ├── Counting_HashBlockPath.java │ │ ├── DataLoader.java │ │ ├── Deserializer.java │ │ ├── DirCache.java │ │ ├── DirectionalAlignments.java │ │ ├── DisplayTable.java │ │ ├── Distribution.java │ │ ├── Duplication.java │ │ ├── DuplicationDetector.java │ │ ├── FastaParser.java │ │ ├── FastaWriter.java │ │ ├── FastqParser.java │ │ ├── FastqWriter.java │ │ ├── Filesystem.java │ │ ├── FilteredAlignments.java │ │ ├── FourInt.java │ │ ├── Gapped_HashBlock.java │ │ ├── HashBlock.java │ │ ├── HashBlockMatch_Counter.java │ │ ├── HashBlockPath.java │ │ ├── HashBlockPaths_Counter.java │ │ ├── HashBlock_Aligner.java │ │ ├── HashBlock_BaseRow.java │ │ ├── HashBlock_Buffer.java │ │ ├── HashBlock_Compiler.java │ │ ├── HashBlock_CompilerCache.java │ │ ├── HashBlock_CompilerNode.java │ │ ├── HashBlock_Database.java │ │ ├── HashBlock_Match.java │ │ ├── HashBlock_Matcher.java │ │ ├── HashBlock_ParentRow.java │ │ ├── HashBlock_Pyramid.java │ │ ├── HashBlock_Row.java │ │ ├── HashBlock_Stream.java │ │ ├── HashJob.java │ │ ├── Histogram.java │ │ ├── IMultiHashBlock.java │ │ ├── IndelSummarizer.java │ │ ├── LocalAligner.java │ │ ├── Logger.java │ │ ├── Main.java │ │ ├── MapperMetadata.java │ │ ├── MatchDatabase.java │ │ ├── MultiHashBlock.java │ │ ├── MutationDetectionParameters.java │ │ ├── MutationsFormatRequest.java │ │ ├── MutationsFormatterWorker.java │ │ ├── MutationsWriter.java │ │ ├── OrderingUtils.java │ │ ├── OverriddenSequence.java │ │ ├── PackJob.java │ │ ├── PackedMap.java │ │ ├── PairedEndQueryProvider.java │ │ ├── PathAligner.java │ │ ├── PathAligner_Runner.java │ │ ├── PenaltyAnalysis.java │ │ ├── PenaltySummarizer.java │ │ ├── QueriesIterator.java │ │ ├── Query.java │ │ ├── QueryAlignment.java │ │ ├── QueryAlignments.java │ │ ├── QueryBuilder.java │ │ ├── QueryMatch.java │ │ ├── QueryMatch_Aligner.java │ │ ├── QueryProvider.java │ │ ├── RandomMomentSelector.java │ │ ├── ReadSequence.java │ │ ├── Readable_DuplicationDetector.java │ │ ├── Readable_HashBlock_Database.java │ │ ├── ReferenceAlignmentCounter.java │ │ ├── ReferenceDatabase.java │ │ ├── ReferenceProvider.java │ │ ├── RegionAlignments.java │ │ ├── ReverseComplementSequence.java │ │ ├── SamWriter.java │ │ ├── Sequence.java │ │ ├── SequenceAlignment.java │ │ ├── SequenceBuilder.java │ │ ├── SequenceCondition.java │ │ ├── SequenceDatabase.java │ │ ├── SequenceMatch.java │ │ ├── SequencePosition.java │ │ ├── SequenceProvider.java │ │ ├── SequenceSection.java │ │ ├── SequenceSplitter.java │ │ ├── SequenceWriter.java │ │ ├── SequencesIterator.java │ │ ├── Serializer.java │ │ ├── SimilarityAnalysis.java │ │ ├── SimpleQueryProvider.java │ │ ├── SkipHighAmbiguity_Aligner.java │ │ ├── StatusLogger.java │ │ ├── StderrWriter.java │ │ ├── StdoutWriter.java │ │ ├── StorageFilesystem.java │ │ ├── StraightAligner.java │ │ ├── StringWriter.java │ │ ├── Subsequence.java │ │ ├── TextWriter.java │ │ ├── UnalignedQuery_Writer.java │ │ ├── Variant.java │ │ ├── Variants.java │ │ ├── VariantsInsertions.java │ │ ├── VcfFormatRequest.java │ │ ├── VcfFormatterWorker.java │ │ ├── VcfWriter.java │ │ └── WeightedAlignment.java └── resources │ └── mapper.properties └── test └── java ├── AlignerWorker_Test.java ├── AncestryDetector_Test.java ├── ApiTest.java ├── BasepairsTest.java ├── Counting_HashBlockPath_Test.java ├── DirCache_Test.java ├── FastaParser_Test.java ├── HashBlockAligner_Test.java ├── HashBlockCompiler_Test.java ├── HashBlockDatabase_Test.java ├── HashBlockPaths_Counter_Test.java ├── HashBlock_Test.java ├── HistogramTest.java ├── MapperMetadata_Test.java ├── MatchDatabase_Test.java ├── MemoryFilesystem.java ├── MemoryFilesystem_Test.java ├── MultiHashBlock_Test.java ├── MutationsWriter_Test.java ├── OrderingUtils_Test.java ├── PackedMap_Test.java ├── PathAligner_Test.java ├── RepeatingSequence.java ├── SequenceDatabase_Test.java └── VcfWriter_Test.java /.gitignore: -------------------------------------------------------------------------------- 1 | .gradle 2 | *~ 3 | *.swp 4 | build 5 | data/human 6 | .DS_Store 7 | */.DS_Store 8 | src/Users:.fileloc 9 | .idea 10 | -------------------------------------------------------------------------------- /DEVELOPING.md: -------------------------------------------------------------------------------- 1 | X-Mapper is a [Gradle](https://gradle.org/) project 2 | 3 | You can see the available tasks to run via `./gradle tasks` 4 | 5 | To build a .jar at build/libs/x-mapper.jar, run `./gradlew assemble` 6 | 7 | To run tests, run `./gradlew test` 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Jeff Gaston and Anni Zhang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /build.gradle: -------------------------------------------------------------------------------- 1 | apply plugin: "java" 2 | 3 | // get git version 4 | def describeOutput = new ByteArrayOutputStream() 5 | project.exec({ 6 | commandLine(["git", "describe", "HEAD", "--tags"]) 7 | standardOutput = describeOutput 8 | }) 9 | def gitCommit = describeOutput.toString().replace("\n", "") 10 | def suffixOutput = new ByteArrayOutputStream() 11 | project.exec({ 12 | commandLine(["git", "status", "--porcelain"]) 13 | standardOutput = suffixOutput 14 | }) 15 | def modificationSuffix = "" 16 | if (suffixOutput.toString().trim().replace("\n", "") != "") { 17 | modificationSuffix = "-dev" 18 | } 19 | def gitVersion = gitCommit + modificationSuffix 20 | 21 | compileJava { 22 | sourceCompatibility = 1.8 23 | targetCompatibility = 1.8 24 | } 25 | 26 | String mainClass = "mapper.Main" 27 | 28 | jar { 29 | manifest { 30 | attributes 'Main-Class': mainClass 31 | } 32 | } 33 | 34 | dependencies { 35 | test { 36 | testImplementation "junit:junit:4.13" 37 | } 38 | } 39 | 40 | task release(type: Copy) { 41 | from "build/libs" 42 | include "x-mapper.jar" 43 | into "build/libs" 44 | rename("x-mapper.jar", "x-mapper-${gitVersion}.jar") 45 | dependsOn("build") 46 | } 47 | 48 | def expandProperties = new HashMap() 49 | expandProperties["mapperVersion"] = gitVersion 50 | project.tasks["processResources"].configure { processTask -> 51 | processTask.expand(expandProperties) 52 | processTask.inputs.property("properties", expandProperties) 53 | } 54 | 55 | repositories { 56 | mavenCentral() 57 | } 58 | 59 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mathjeff/Mapper/d20cfb949ef4f8550d4d616cdac3c9984bfbdc48/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-8.5-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /gradlew.bat: -------------------------------------------------------------------------------- 1 | @rem 2 | @rem Copyright 2015 the original author or authors. 3 | @rem 4 | @rem Licensed under the Apache License, Version 2.0 (the "License"); 5 | @rem you may not use this file except in compliance with the License. 6 | @rem You may obtain a copy of the License at 7 | @rem 8 | @rem https://www.apache.org/licenses/LICENSE-2.0 9 | @rem 10 | @rem Unless required by applicable law or agreed to in writing, software 11 | @rem distributed under the License is distributed on an "AS IS" BASIS, 12 | @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | @rem See the License for the specific language governing permissions and 14 | @rem limitations under the License. 15 | @rem 16 | 17 | @if "%DEBUG%" == "" @echo off 18 | @rem ########################################################################## 19 | @rem 20 | @rem Gradle startup script for Windows 21 | @rem 22 | @rem ########################################################################## 23 | 24 | @rem Set local scope for the variables with windows NT shell 25 | if "%OS%"=="Windows_NT" setlocal 26 | 27 | set DIRNAME=%~dp0 28 | if "%DIRNAME%" == "" set DIRNAME=. 29 | set APP_BASE_NAME=%~n0 30 | set APP_HOME=%DIRNAME% 31 | 32 | @rem Resolve any "." and ".." in APP_HOME to make it shorter. 33 | for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi 34 | 35 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 36 | set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" 37 | 38 | @rem Find java.exe 39 | if defined JAVA_HOME goto findJavaFromJavaHome 40 | 41 | set JAVA_EXE=java.exe 42 | %JAVA_EXE% -version >NUL 2>&1 43 | if "%ERRORLEVEL%" == "0" goto execute 44 | 45 | echo. 46 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 47 | echo. 48 | echo Please set the JAVA_HOME variable in your environment to match the 49 | echo location of your Java installation. 50 | 51 | goto fail 52 | 53 | :findJavaFromJavaHome 54 | set JAVA_HOME=%JAVA_HOME:"=% 55 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 56 | 57 | if exist "%JAVA_EXE%" goto execute 58 | 59 | echo. 60 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 61 | echo. 62 | echo Please set the JAVA_HOME variable in your environment to match the 63 | echo location of your Java installation. 64 | 65 | goto fail 66 | 67 | :execute 68 | @rem Setup the command line 69 | 70 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 71 | 72 | 73 | @rem Execute Gradle 74 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* 75 | 76 | :end 77 | @rem End local scope for the variables with windows NT shell 78 | if "%ERRORLEVEL%"=="0" goto mainEnd 79 | 80 | :fail 81 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 82 | rem the _cmd.exe /c_ return code! 83 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 84 | exit /b 1 85 | 86 | :mainEnd 87 | if "%OS%"=="Windows_NT" endlocal 88 | 89 | :omega 90 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | project(":").name = "x-mapper" 2 | -------------------------------------------------------------------------------- /src/main/java/mapper/AlignmentAnalysis.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | 5 | // AlignmentAnalysis keeps track of what we know about an alignment 6 | public class AlignmentAnalysis { 7 | private static double large = 1000000; 8 | 9 | public HashBlock_Matcher hashBlock_matcher; 10 | 11 | // we believe that the best alignment intersects this alignment 12 | public int predictedBestOffset; 13 | // the last value of predictedBestOffset that was checked by StraightAligner 14 | public int lastCheckedOffset; 15 | 16 | public boolean confidentAboutBestOffset; 17 | 18 | public double maxInsertionExtensionPenalty = large; 19 | public double maxDeletionExtensionPenalty = large; 20 | 21 | public AlignmentAnalysis child() { 22 | AlignmentAnalysis result = new AlignmentAnalysis(); 23 | result.predictedBestOffset = this.predictedBestOffset; 24 | result.confidentAboutBestOffset = this.confidentAboutBestOffset; 25 | result.hashBlock_matcher = this.hashBlock_matcher; 26 | result.maxInsertionExtensionPenalty = this.maxInsertionExtensionPenalty; 27 | result.maxDeletionExtensionPenalty = this.maxDeletionExtensionPenalty; 28 | result.lastCheckedOffset = this.lastCheckedOffset; 29 | return result; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/mapper/AlignmentCache.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | import java.util.concurrent.ConcurrentHashMap; 5 | 6 | class AlignmentCache { 7 | public AlignmentCache() { 8 | } 9 | 10 | public QueryAlignments get(Query query) { 11 | QueryAlignments result = this.cache.get(query); 12 | return result; 13 | } 14 | 15 | public void addAlignment(Query query, QueryAlignments alignments) { 16 | this.cache.put(query, alignments); 17 | } 18 | 19 | public int getUsage() { 20 | return this.cache.size(); 21 | } 22 | 23 | public void addHitsAndSkips(int numHits, int numSkips) { 24 | synchronized(this.statsLock) { 25 | this.numHits += numHits; 26 | this.numSkips += numSkips; 27 | } 28 | } 29 | 30 | public long getNumHits() { 31 | return this.numHits; 32 | } 33 | 34 | public long getNumSkips() { 35 | return this.numSkips; 36 | } 37 | 38 | private ConcurrentHashMap cache = new ConcurrentHashMap(); 39 | private Object statsLock = new Object(); 40 | private long numHits; 41 | private long numSkips; 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/mapper/AlignmentCounter.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | import java.util.Map; 5 | 6 | public class AlignmentCounter implements AlignmentListener { 7 | public void addAlignments(List alignments) { 8 | int newNumMatchingSequences = 0; 9 | int newNumMatchingQueries = 0; 10 | double newTotalAlignedPenalty = 0; 11 | long newTotalAlignedQueryLength = 0; 12 | int numNewUnalignedQuerySequences = 0; 13 | Distribution newTotalDistanceBetweenComponents = new Distribution(); 14 | for (QueryAlignments queryAlignments : alignments) { 15 | for (Map.Entry> foundAlignments : queryAlignments.getAlignments().entrySet()) { 16 | List alignment = foundAlignments.getValue(); 17 | Query query = foundAlignments.getKey(); 18 | if (alignment.size() > 0) { 19 | newNumMatchingQueries++; 20 | newNumMatchingSequences += query.getNumSequences(); 21 | 22 | newTotalAlignedPenalty += alignment.get(0).getPenalty(); 23 | newTotalAlignedQueryLength += alignment.get(0).getALength(); 24 | 25 | double currentTotalDistanceBetweenComponents = 0; 26 | for (QueryAlignment choice: alignment) { 27 | newTotalDistanceBetweenComponents.add(choice.getTotalDistanceBetweenComponents(), (double)1.0 / (double)alignment.size()); 28 | } 29 | } else { 30 | numNewUnalignedQuerySequences += query.getNumSequences(); 31 | } 32 | 33 | } 34 | } 35 | synchronized (this) { 36 | this.numMatchingSequences += newNumMatchingSequences; 37 | this.numMatchingQueries += newNumMatchingQueries; 38 | this.totalAlignedPenalty += newTotalAlignedPenalty; 39 | this.totalAlignedQueryLength += newTotalAlignedQueryLength; 40 | this.distanceBetweenQueryComponents = this.distanceBetweenQueryComponents.plus(newTotalDistanceBetweenComponents); 41 | this.numUnmatchedSequences += numNewUnalignedQuerySequences; 42 | } 43 | } 44 | 45 | public long getNumMatchingSequences() { 46 | return numMatchingSequences; 47 | } 48 | 49 | public long getNumSequences() { 50 | return numUnmatchedSequences + numMatchingSequences; 51 | } 52 | 53 | public long getTotalAlignedQueryLength() { 54 | return this.totalAlignedQueryLength; 55 | } 56 | 57 | public double getTotalAlignedPenalty() { 58 | return this.totalAlignedPenalty; 59 | } 60 | 61 | public long getNumAlignedQueries() { 62 | return numMatchingQueries; 63 | } 64 | 65 | public Distribution getDistanceBetweenQueryComponents() { 66 | return distanceBetweenQueryComponents; 67 | } 68 | 69 | long numMatchingSequences = 0; 70 | long numMatchingQueries = 0; 71 | long numUnmatchedSequences = 0; 72 | double totalAlignedPenalty; 73 | long totalAlignedQueryLength; 74 | Distribution distanceBetweenQueryComponents = new Distribution(); 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/mapper/AlignmentListener.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | 5 | // an AlignmentListener listens for Alignments 6 | public interface AlignmentListener { 7 | void addAlignments(List alignments); 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/mapper/AlignmentNode.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | // An AlignmentNode is used by a PathAligner when aligning two sequences 4 | // An AlignmentNode refers to the current best known penalty of a position in the alignment path 5 | public class AlignmentNode implements Comparable { 6 | public AlignmentNode(int x, int y, double penalty, double insertXPenalty, double insertYPenalty, boolean reachedMainDiagonal, boolean reachedOtherDiagonal) { 7 | this.x = x; 8 | this.y = y; 9 | this.penalty = penalty; 10 | this.insertXPenalty = insertXPenalty; 11 | this.insertYPenalty = insertYPenalty; 12 | this.reachedMainDiagonal = reachedMainDiagonal; 13 | this.reachedOtherDiagonal = reachedOtherDiagonal; 14 | } 15 | 16 | public int getX() { 17 | return x; 18 | } 19 | public int getY() { 20 | return y; 21 | } 22 | public double getPenalty() { 23 | return this.penalty; 24 | } 25 | public double getInsertXPenalty() { 26 | return this.insertXPenalty; 27 | } 28 | public double getInsertYPenalty() { 29 | return this.insertYPenalty; 30 | } 31 | public boolean getReachedMainDiagonal() { 32 | return this.reachedMainDiagonal; 33 | } 34 | public boolean getReachedOtherDiagonal() { 35 | return this.reachedOtherDiagonal; 36 | } 37 | 38 | public int compareTo(Object other) { 39 | AlignmentNode converted = (AlignmentNode)other; 40 | int a = Double.compare(this.penalty, converted.penalty); 41 | if (a != 0) { 42 | return a; 43 | } 44 | return Integer.compare(converted.x, this.x); 45 | } 46 | 47 | int x; 48 | int y; 49 | double penalty; 50 | double insertXPenalty; 51 | double insertYPenalty; 52 | boolean reachedMainDiagonal; 53 | boolean reachedOtherDiagonal; 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/mapper/AlignmentParameters.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public class AlignmentParameters { 4 | // the penalty we apply for a point mutation 5 | public double MutationPenalty; 6 | 7 | // the penalty we apply for starting an insertion (base pairs in the query that don't match the reference) 8 | public double InsertionStart_Penalty; 9 | // the penalty we apply for extending an insertion by 1 base pair 10 | public double InsertionExtension_Penalty; 11 | 12 | // the penalty we apply for starting a deletion (base pairs in the reference that don't match the query) 13 | public double DeletionStart_Penalty; 14 | // the penalty we apply for extending a deletion by 1 base pair 15 | public double DeletionExtension_Penalty; 16 | 17 | // The cutoff for how different the sequences can be: the penalty divided by sequence length 18 | public double MaxErrorRate; 19 | 20 | // The amount of penalty added by an unaligned base pair 21 | public double UnalignedPenalty; 22 | 23 | // The amount of penalty added by an ambiguous base pair ('N') 24 | public double AmbiguityPenalty; 25 | 26 | // The maximum number of places that a sequence can match before we ignore it 27 | public int MaxNumMatches = Integer.MAX_VALUE; 28 | 29 | // The maximum difference in penalty between the lowest-penalty alignment that we report and the highest-penalty alignment that we report 30 | public double Max_PenaltySpan; 31 | 32 | public boolean StartingInsertionStartFree; 33 | public double getStartingInsertionStartPenalty() { 34 | if (StartingInsertionStartFree) 35 | return 0; 36 | return InsertionStart_Penalty; 37 | } 38 | 39 | public AlignmentParameters clone() { 40 | AlignmentParameters result = new AlignmentParameters(); 41 | 42 | result.MutationPenalty = MutationPenalty; 43 | result.InsertionStart_Penalty = InsertionStart_Penalty; 44 | result.InsertionExtension_Penalty = InsertionExtension_Penalty; 45 | result.DeletionStart_Penalty = DeletionStart_Penalty; 46 | result.DeletionExtension_Penalty = DeletionExtension_Penalty; 47 | result.MaxErrorRate = MaxErrorRate; 48 | result.UnalignedPenalty = UnalignedPenalty; 49 | result.AmbiguityPenalty = AmbiguityPenalty; 50 | result.MaxNumMatches = MaxNumMatches; 51 | result.Max_PenaltySpan = Max_PenaltySpan; 52 | result.StartingInsertionStartFree = StartingInsertionStartFree; 53 | 54 | return result; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/mapper/AlignmentPosition_DirectionCounts.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | // An AlignmentPosition_DirectionCounts tells how many reads aligned to a specific position in a specific direction 7 | public class AlignmentPosition_DirectionCounts { 8 | private static int listScale = 100; 9 | 10 | public void putScaledReference(float weight) { 11 | this.referenceCount = (int)weight; 12 | } 13 | 14 | public int getScaledReference() { 15 | return this.referenceCount; 16 | } 17 | 18 | public void putScaledAlternate(char value, int scaledWeight) { 19 | if (this.counts == null) { 20 | if (scaledWeight == 0) 21 | return; 22 | this.counts = this.newList(); 23 | } 24 | int index = indexForKey(value); 25 | this.counts[index] = scaledWeight; 26 | } 27 | 28 | public int getScaledAlternate(char value) { 29 | if (this.counts == null) 30 | return 0; 31 | int index = indexForKey(value); 32 | return getScaledAlternate(index); 33 | } 34 | 35 | public int getScaledAlternate(int index) { 36 | if (this.counts == null) 37 | return 0; 38 | return this.counts[index]; 39 | } 40 | 41 | public void ignoreAlternate(char value) { 42 | int alternateCount = this.getScaledAlternate(value); 43 | putScaledAlternate(value, 0); 44 | this.ignoredAlternateCount += alternateCount; 45 | } 46 | 47 | public boolean hasAlternates() { 48 | if (this.counts == null) 49 | return false; 50 | for (int i = 0; i < this.counts.length; i++) { 51 | if (this.counts[i] != 0) 52 | return true; 53 | } 54 | return false; 55 | } 56 | 57 | public float getAlternateCount(int index) { 58 | return ((float)getScaledAlternate(index)) / listScale; 59 | } 60 | public float getAlternateCount(char value) { 61 | int index = this.indexForKey(value); 62 | return this.getAlternateCount(index); 63 | } 64 | 65 | public boolean hasAlternate(int index) { 66 | return this.getAlternateCount(index) > 0; 67 | } 68 | 69 | public float getReferenceCount() { 70 | return (float)(this.referenceCount) / listScale; 71 | } 72 | 73 | public float getIgnoredAlternateCount() { 74 | return (float)(this.ignoredAlternateCount) / listScale; 75 | } 76 | 77 | public static char[] getAllKeys() { 78 | if (allKeys == null) { 79 | allKeys = makeKeys(); 80 | } 81 | return allKeys; 82 | } 83 | public static int getNumKeys() { 84 | return getAllKeys().length; 85 | } 86 | 87 | private static char[] allKeys; 88 | private static char[] makeKeys() { 89 | char[] result = new char[]{'A', 'C', 'G', 'T', 'N','-'}; 90 | return result; 91 | } 92 | public static int indexForKey(char key) { 93 | char[] keys = getAllKeys(); 94 | for (int i = 0; i < keys.length; i++) { 95 | if (key == keys[i]) { 96 | return i; 97 | } 98 | } 99 | return -1; 100 | } 101 | public static char keyForIndex(int index) { 102 | return getAllKeys()[index]; 103 | } 104 | 105 | private Integer[] newList() { 106 | int numAltKeys = getAllKeys().length; 107 | Integer[] list = new Integer[numAltKeys]; 108 | for (int i = 0; i < numAltKeys; i++) { 109 | list[i] = 0; 110 | } 111 | return list; 112 | } 113 | 114 | // The number of items we have that match the reference 115 | private int referenceCount; 116 | // The number of items we have that don't match the reference but we've been asked to ignore 117 | private int ignoredAlternateCount; 118 | 119 | // Lists of items that don't match the reference 120 | // List indices match this.makeKeys() 121 | //private List counts; 122 | private Integer[] counts; 123 | } 124 | -------------------------------------------------------------------------------- /src/main/java/mapper/AlignmentStatistics.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public class AlignmentStatistics { 4 | 5 | long millisReadingQueries; 6 | long millisLaunchingWorkers; 7 | long millisWaitingForWorkers; 8 | 9 | long cpuMillisSpentOnUnalignedQueries; 10 | long cpuMillisSpentAligningMatches; 11 | long cpuMillisThroughOptimisticBestAlignments; 12 | 13 | Query slowestQuery; 14 | int slowestQueryNumAlignments; 15 | long slowestQueryMillis; 16 | 17 | Query queryAtRandomMoment; 18 | 19 | long numCasesImmediatelyAcceptingFirstAlignment; 20 | long numQueriesLoaded; 21 | long numCacheHits; 22 | 23 | long numIndels; 24 | 25 | boolean containsLongRead; 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/mapper/AlignmentsSection.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | // An AlignmentsSection tells how various Sequences align to a particular section of a reference contig 9 | public class AlignmentsSection { 10 | public AlignmentsSection(Sequence sequence) { 11 | this.sequence = sequence; 12 | } 13 | 14 | public void addForward(int referenceIndex, byte encodedValue, float weight, Sequence querySequence, int queryPosition, boolean nearQueryEnd) { 15 | if (nearQueryEnd) 16 | this.ensureEnd().addForward(referenceIndex, encodedValue, weight, querySequence, queryPosition); 17 | else 18 | this.ensureMiddle().addForward(referenceIndex, encodedValue, weight, querySequence, queryPosition); 19 | } 20 | public void addReverse(int referenceIndex, byte encodedValue, float weight, Sequence querySequence, int queryPosition, boolean nearQueryEnd) { 21 | if (nearQueryEnd) 22 | this.ensureEnd().addReverse(referenceIndex, encodedValue, weight, querySequence, queryPosition); 23 | else 24 | this.ensureMiddle().addReverse(referenceIndex, encodedValue, weight, querySequence, queryPosition); 25 | } 26 | public void insertForward(int referenceIndex, String value, float weight, Sequence querySequence, int queryPosition, boolean nearQueryEnd) { 27 | if (nearQueryEnd) 28 | this.ensureEnd().insertForward(referenceIndex, value, weight, querySequence, queryPosition); 29 | else 30 | this.ensureMiddle().insertForward(referenceIndex, value, weight, querySequence, queryPosition); 31 | } 32 | public void insertReverse(int referenceIndex, String value, float weight, Sequence querySequence, int queryPosition, boolean nearQueryEnd) { 33 | if (nearQueryEnd) 34 | this.ensureEnd().insertReverse(referenceIndex, value, weight, querySequence, queryPosition); 35 | else 36 | this.ensureMiddle().insertReverse(referenceIndex, value, weight, querySequence, queryPosition); 37 | } 38 | 39 | public AlignmentPosition getPosition(int referenceIndex) { 40 | AlignmentPosition position = new AlignmentPosition(this.sequence.charAt(referenceIndex)); 41 | if (this.endAlignments != null) 42 | endAlignments.updateCount(position, referenceIndex, true); 43 | if (this.middleAlignments != null) 44 | middleAlignments.updateCount(position, referenceIndex, false); 45 | return position; 46 | } 47 | 48 | public AlignmentPosition getInsertion(int referenceIndex, int insertionIndex) { 49 | AlignmentPosition position = new AlignmentPosition('-'); 50 | if (this.endAlignments != null) 51 | endAlignments.updateInsertionCount(position, referenceIndex, insertionIndex, true); 52 | if (middleAlignments != null) 53 | middleAlignments.updateInsertionCount(position, referenceIndex, insertionIndex, false); 54 | return position; 55 | } 56 | 57 | // returns a RegionAlignments that keeps track of alignments in the middle of queries 58 | private RegionAlignments ensureMiddle() { 59 | if (this.middleAlignments == null) { 60 | this.middleAlignments = new RegionAlignments(this.sequence); 61 | } 62 | return this.middleAlignments; 63 | } 64 | 65 | // returns a RegionAlignments that keeps track of alignments in the ends of queries 66 | private RegionAlignments ensureEnd() { 67 | if (this.endAlignments == null) { 68 | this.endAlignments = new RegionAlignments(this.sequence); 69 | } 70 | return this.endAlignments; 71 | } 72 | 73 | private Sequence sequence; 74 | private RegionAlignments middleAlignments; 75 | private RegionAlignments endAlignments; 76 | 77 | private static char[] allBases = new char[]{'A', 'C', 'G', 'T'}; 78 | } 79 | -------------------------------------------------------------------------------- /src/main/java/mapper/BufferedWriter.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | public class BufferedWriter implements TextWriter { 7 | public BufferedWriter(TextWriter writer, String title, int approximateMaxNumMessages) { 8 | this.title = title; 9 | this.writer = writer; 10 | this.approximateMaxNumMessages = approximateMaxNumMessages; 11 | } 12 | 13 | public void write(String message) { 14 | this.ensureTitle(); 15 | this.components.add(message); 16 | this.flushIfManyMessages(); 17 | } 18 | 19 | public void write(List messages) { 20 | this.ensureTitle(); 21 | this.components.addAll(messages); 22 | this.flushIfManyMessages(); 23 | } 24 | 25 | public void flush() { 26 | this.writer.write(this.components); 27 | this.components.clear(); 28 | } 29 | 30 | private void ensureTitle() { 31 | if (this.components.size() < 1) 32 | this.components.add(this.title); 33 | } 34 | 35 | private void flushIfManyMessages() { 36 | if (this.components.size() >= this.approximateMaxNumMessages) 37 | this.flush(); 38 | } 39 | 40 | List components = new ArrayList(); 41 | TextWriter writer; 42 | String title; 43 | int approximateMaxNumMessages; 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/mapper/ByteArrayList.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | // a ByteArrayList is just an ArrayList without the overhead of an extra pointer in each location 4 | 5 | public class ByteArrayList implements BytesView { 6 | 7 | public ByteArrayList() { 8 | this.content = new byte[8]; 9 | } 10 | 11 | public ByteArrayList(int initialCapacity) { 12 | this.content = new byte[initialCapacity]; 13 | } 14 | 15 | public ByteArrayList(BytesView content) { 16 | int size = content.size(); 17 | this.content = new byte[size]; 18 | for (int i = 0; i < size; i++) { 19 | this.content[i] = content.get(i); 20 | } 21 | this.count = size; 22 | } 23 | 24 | public void ensureCapacity(int capacity) { 25 | if (this.content.length < capacity) { 26 | //System.err.println("ByteArrayList growing to capacity " + capacity); 27 | byte[] newContent = new byte[capacity]; 28 | for (int i = 0; i < this.content.length; i++) { 29 | newContent[i] = this.content[i]; 30 | } 31 | this.content = newContent; 32 | } 33 | } 34 | 35 | private void ensureBeyondCapacity(int capacity) { 36 | int newCapacity = capacity * 11 / 10 + 1; 37 | if (newCapacity < 0) { 38 | throw new IllegalArgumentException("Cannot increase beyond capacity " + capacity); 39 | } 40 | this.ensureCapacity(newCapacity); 41 | } 42 | 43 | public void add(byte item) { 44 | if (this.count >= this.content.length) { 45 | this.ensureBeyondCapacity(this.content.length); 46 | } 47 | this.content[this.count] = item; 48 | this.count++; 49 | } 50 | 51 | public byte get(int index) { 52 | return this.content[index]; 53 | } 54 | 55 | public void put(int index, byte value) { 56 | if (this.content.length <= index) { 57 | this.ensureBeyondCapacity(index); 58 | } 59 | if (this.count <= index) { 60 | this.count = index + 1; 61 | } 62 | this.content[index] = value; 63 | } 64 | 65 | public int size() { 66 | return count; 67 | } 68 | 69 | public int getCapacity() { 70 | return this.content.length; 71 | } 72 | 73 | public ByteArrayList writable() { 74 | return this; 75 | } 76 | 77 | private byte[] content; 78 | private int count; 79 | } 80 | -------------------------------------------------------------------------------- /src/main/java/mapper/BytesSlice.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | class BytesSlice implements BytesView { 4 | public BytesSlice(byte[] data, int start, int length) { 5 | this.data = data; 6 | this.start = start; 7 | this.length = length; 8 | } 9 | 10 | public int size() { 11 | return this.length; 12 | } 13 | 14 | public byte get(int index) { 15 | return this.data[this.start + index]; 16 | } 17 | 18 | public ByteArrayList writable() { 19 | return new ByteArrayList(this); 20 | } 21 | 22 | private byte[] data; 23 | private int start; 24 | private int length; 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/mapper/BytesView.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public interface BytesView { 4 | int size(); 5 | byte get(int index); 6 | ByteArrayList writable(); 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/mapper/ConditionalHashBlock.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | 5 | // A ConditionalHashBlock says that a HashBlock exists at a certain location if a certain condition is met 6 | // A ConditionalHashBlock gets created from an ambiguous sequence (containing, for example, 'N') 7 | public class ConditionalHashBlock { 8 | public ConditionalHashBlock(HashBlock hashBlock, SequenceCondition condition) { 9 | this.hashBlock = hashBlock; 10 | this.condition = condition; 11 | /*if (this.hashBlock == null) { 12 | throw new IllegalArgumentException("null hashblock for " + this.condition.toString()); 13 | }*/ 14 | } 15 | 16 | public HashBlock getHashBlock() { 17 | return this.hashBlock; 18 | } 19 | 20 | public SequenceCondition getCondition() { 21 | return this.condition; 22 | } 23 | 24 | public ConditionalHashBlock shifted(int shift) { 25 | if (shift == 0) 26 | return this; 27 | HashBlock shiftedBlock = null; 28 | if (hashBlock != null) { 29 | shiftedBlock = (HashBlock)hashBlock.withEnd(hashBlock.getEndIndex() + shift); 30 | } 31 | 32 | SequenceCondition shiftedCondition = condition.shifted(shift); 33 | return new ConditionalHashBlock(shiftedBlock, shiftedCondition); 34 | } 35 | 36 | public String toString(Sequence sequence) { 37 | String hashBlockText = null; 38 | if (this.hashBlock != null) 39 | hashBlockText = this.hashBlock.toString(sequence); 40 | return "(" + hashBlockText + " with " + this.condition.toString() + ")"; 41 | } 42 | 43 | private HashBlock hashBlock; 44 | private SequenceCondition condition; 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/mapper/CountMap.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.Map; 4 | import java.util.HashMap; 5 | 6 | // counts the most popular item 7 | // Usually faster than HashMap 8 | public class CountMap { 9 | public CountMap() { 10 | } 11 | 12 | public void add(int key, int value) { 13 | if (key == mostPopularKey || mostPopularKey_count == 0) { 14 | mostPopularKey_count += value; 15 | mostPopularKey = key; 16 | if (counts != null) 17 | counts.put(mostPopularKey, mostPopularKey_count); 18 | } else { 19 | if (counts == null) { 20 | counts = new HashMap(); 21 | counts.put(mostPopularKey, mostPopularKey_count); 22 | } 23 | Integer count = counts.get(key); 24 | if (count == null) { 25 | count = value; 26 | } else { 27 | count += value; 28 | } 29 | counts.put(key, count); 30 | if (count > mostPopularKey_count) { 31 | mostPopularKey = key; 32 | mostPopularKey_count = count; 33 | } 34 | } 35 | } 36 | 37 | public int getMaxPopularity() { 38 | return mostPopularKey_count; 39 | } 40 | public int getMostPopularKey() { 41 | return mostPopularKey; 42 | } 43 | 44 | private int mostPopularKey; 45 | private int mostPopularKey_count; 46 | 47 | private Map counts; 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/mapper/DataLoader.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.io.FileNotFoundException; 7 | import java.io.InputStream; 8 | import java.io.InputStreamReader; 9 | import java.io.IOException; 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | import java.util.zip.GZIPInputStream; 13 | 14 | // A DataLoader reads files (such as .fasta files) and returns the corresponding representation of them 15 | public class DataLoader { 16 | public static SequenceProvider LoadFrom(List paths, boolean keepQualityData) throws IllegalArgumentException, IOException, FileNotFoundException { 17 | List providers = new ArrayList(); 18 | for (String path : paths) { 19 | providers.add(readSequencesFrom(path, keepQualityData)); 20 | } 21 | return new SequencesIterator(providers); 22 | } 23 | 24 | public static SequenceProvider LoadFrom(String path, boolean keepQualityData) throws IllegalArgumentException, IOException, FileNotFoundException { 25 | return readSequencesFrom(path, keepQualityData); 26 | } 27 | 28 | private static SequenceProvider readSequencesFrom(String path, boolean keepQualityData) throws IllegalArgumentException, IOException, FileNotFoundException { 29 | String effectivePath = path; 30 | 31 | InputStream inputStream = new FileInputStream(path); 32 | String gzipSuffix = ".gz"; 33 | if (path.endsWith(gzipSuffix)) { 34 | inputStream = new GZIPInputStream(inputStream); 35 | effectivePath = effectivePath.substring(0, effectivePath.length() - gzipSuffix.length()); 36 | } 37 | 38 | BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); 39 | 40 | if (effectivePath.endsWith(".fasta") || effectivePath.endsWith(".fa") || effectivePath.endsWith(".fna")) { 41 | return new FastaParser(reader, path); 42 | } 43 | if (effectivePath.endsWith(".fastq") || effectivePath.endsWith(".fq") || effectivePath.endsWith(".ca")) { 44 | return new FastqParser(reader, path, keepQualityData); 45 | } 46 | 47 | throw new IllegalArgumentException("Unrecognized file extension: " + effectivePath + ", not .fasta/.fa or .fastq/.fq/.ca"); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/mapper/Deserializer.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.io.BufferedInputStream; 4 | import java.io.File; 5 | import java.io.FileInputStream; 6 | import java.io.InputStream; 7 | import java.io.InvalidObjectException; 8 | import java.io.IOException; 9 | import java.util.Arrays; 10 | import java.util.zip.GZIPInputStream; 11 | 12 | // A Deserializer reads information from a file 13 | // It doesn't worry about supporting reading information written by a previous version of the code, because we don't need that 14 | public class Deserializer { 15 | public Deserializer(File file) throws IOException { 16 | this.inputStream = new BufferedInputStream(new GZIPInputStream(new FileInputStream(file))); 17 | } 18 | 19 | public String readProperty(String name) throws IOException, InvalidObjectException { 20 | readText(name); 21 | readText(":"); 22 | return readUntil(','); 23 | } 24 | 25 | public int readIntProperty(String name) throws IOException, InvalidObjectException { 26 | return Integer.parseInt(readProperty(name)); 27 | } 28 | 29 | public byte[] readLengthPrefixedByteArray() throws IOException, InvalidObjectException { 30 | String lengthText = readUntil(':'); 31 | int length = Integer.parseInt(lengthText); 32 | if (length < 0) { 33 | throw new InvalidObjectException("length = " + length + " < 0"); 34 | } 35 | return readBytes(length); 36 | } 37 | 38 | public void readText(String expectedText) throws IOException, InvalidObjectException { 39 | byte[] expectedBytes = expectedText.getBytes(); 40 | byte[] actualBytes = readBytes(expectedBytes.length); 41 | if (!Arrays.equals(expectedBytes, actualBytes)) { 42 | String actualText = new String(actualBytes); 43 | throw new InvalidObjectException("Expected '" + expectedText + "' but got '" + actualText + "'"); 44 | } 45 | } 46 | 47 | // Reads characters until the given delimiter is encountered 48 | // Returns a string of the characters encountered before that delimiter 49 | public String readUntil(char delimiter) throws IOException { 50 | StringBuilder stringBuilder = new StringBuilder(); 51 | while (true) { 52 | int current = this.inputStream.read(); 53 | if (current == delimiter) 54 | return stringBuilder.toString(); 55 | stringBuilder.append((char)current); 56 | } 57 | } 58 | 59 | public byte[] readBytes(int length) throws IOException { 60 | byte[] result = new byte[length]; 61 | this.inputStream.read(result, 0, length); 62 | return result; 63 | } 64 | 65 | public void close() throws IOException { 66 | this.inputStream.close(); 67 | } 68 | 69 | private BufferedInputStream inputStream; 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/mapper/DisplayTable.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | public class DisplayTable { 7 | public DisplayTable() { 8 | } 9 | 10 | public void addColumn(List column) { 11 | this.columns.add(column); 12 | } 13 | 14 | public void addShortColumn(String firstRow) { 15 | ArrayList column = new ArrayList(); 16 | column.add(firstRow); 17 | this.columns.add(column); 18 | } 19 | 20 | public String format() { 21 | // get max length of each column 22 | List columnLengths = new ArrayList(); 23 | for (int i = 0; i < this.columns.size(); i++) { 24 | int columnLength = 0; 25 | for (String item: this.columns.get(i)) { 26 | if (item.length() > columnLength) 27 | columnLength = item.length(); 28 | } 29 | columnLengths.add(columnLength); 30 | } 31 | int numRows = 0; 32 | for (List column: this.columns) { 33 | if (numRows < column.size()) 34 | numRows = column.size(); 35 | } 36 | 37 | StringBuilder result = new StringBuilder(); 38 | for (int y = 0; y < numRows; y++) { 39 | for (int x = 0; x < this.columns.size(); x++) { 40 | String component = this.getComponent(x, y); 41 | while (component.length() < columnLengths.get(x)) { 42 | component += " "; 43 | } 44 | result.append(component); 45 | } 46 | result.append("\n"); 47 | } 48 | return result.toString(); 49 | } 50 | 51 | private String getComponent(int x, int y) { 52 | List columns = this.columns.get(x); 53 | if (y >= columns.size()) 54 | return ""; 55 | return columns.get(y); 56 | } 57 | 58 | private List> columns = new ArrayList>(); 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/mapper/Distribution.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public class Distribution { 4 | public Distribution() { 5 | } 6 | 7 | public double getMean() { 8 | if (this.sumWeight != 0) 9 | return this.sumValue / this.sumWeight; 10 | return 0; 11 | } 12 | 13 | public double getVariance() { 14 | if (this.sumWeight == 0) 15 | return 0; // no data 16 | double variance = (this.sumSquaredValue - this.sumValue * this.sumValue / this.sumWeight) / sumWeight; 17 | if (variance < 0) 18 | return 0; // rounding error 19 | return variance; 20 | } 21 | 22 | public double getStdDev() { 23 | return Math.sqrt(this.getVariance()); 24 | } 25 | 26 | public double getWeight() { 27 | return this.sumWeight; 28 | } 29 | 30 | public Distribution plus(Distribution other) { 31 | Distribution sum = new Distribution(); 32 | sum.sumValue = this.sumValue + other.sumValue; 33 | sum.sumSquaredValue = this.sumSquaredValue + other.sumSquaredValue; 34 | sum.sumWeight = this.sumWeight + other.sumWeight; 35 | return sum; 36 | } 37 | 38 | public void add(double newValue) { 39 | this.add(newValue, 1); 40 | } 41 | 42 | public void add(double newValue, double weight) { 43 | this.sumValue += (newValue * weight); 44 | this.sumSquaredValue += (newValue * newValue * weight); 45 | this.sumWeight += weight; 46 | } 47 | 48 | private double sumValue; 49 | private double sumSquaredValue; 50 | private double sumWeight; 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/mapper/Duplication.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashSet; 5 | import java.lang.Iterable; 6 | import java.util.List; 7 | 8 | class Duplication implements Comparable { 9 | public Duplication(int length) { 10 | this.length = length; 11 | // disallow duplicate positions, which can happen occasionally if two gapped hashblocks have exactly the same start and end position 12 | this.startPositions = new ArrayList(); 13 | } 14 | 15 | public void addPosition(SequencePosition startPosition) { 16 | this.startPositions.add(startPosition); 17 | } 18 | 19 | public List getStartPositions() { 20 | return this.startPositions; 21 | } 22 | 23 | public void removeDuplicatePositions() { 24 | this.startPositions = new ArrayList(new HashSet(this.startPositions)); 25 | } 26 | 27 | public int getLength() { 28 | return length; 29 | } 30 | 31 | public int getNumInstances() { 32 | return this.startPositions.size(); 33 | } 34 | 35 | @Override 36 | public String toString() { 37 | StringBuilder stringBuilder = new StringBuilder(); 38 | stringBuilder.append("Duplication length " + this.length + " at "); 39 | for (SequencePosition position : this.startPositions) { 40 | stringBuilder.append(position.toString()); 41 | stringBuilder.append(","); 42 | } 43 | return stringBuilder.toString(); 44 | } 45 | 46 | @Override 47 | public int compareTo(Duplication other) { 48 | if (this.length != other.length) { 49 | return this.length - other.length; 50 | } 51 | if (this.startPositions.size() != other.startPositions.size()) { 52 | return this.startPositions.size() - other.startPositions.size(); 53 | } 54 | for (int i = 0; i < this.startPositions.size(); i++) { 55 | int comparison = this.startPositions.get(i).compareTo(other.startPositions.get(i)); 56 | if (comparison != 0) 57 | return comparison; 58 | } 59 | return 0; 60 | } 61 | 62 | private int length; 63 | private List startPositions; 64 | } 65 | -------------------------------------------------------------------------------- /src/main/java/mapper/FastaParser.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | 6 | // A FastaParser parses .fasta files 7 | public class FastaParser implements SequenceProvider { 8 | public FastaParser(BufferedReader reader, String path) { 9 | this.reader = reader; 10 | this.path = path; 11 | } 12 | 13 | public SequenceBuilder getNextSequence() { 14 | try { 15 | return doGetNextSequence(); 16 | } catch(IOException e) { 17 | throw new RuntimeException(e); 18 | } 19 | } 20 | 21 | public boolean get_allReadsContainQualityInformation() { 22 | return false; 23 | } 24 | 25 | private SequenceBuilder doGetNextSequence() throws IOException { 26 | // find sequence start marker 27 | if (!hasReadASequence) { 28 | int first = reader.read(); 29 | if (first < 0) 30 | return null; 31 | char firstChar = (char)first; 32 | if (firstChar != '>') { 33 | throw new IllegalArgumentException("Not in .fasta format: expected first character to be '>', not '" + firstChar + "'"); 34 | } 35 | hasReadASequence = true; 36 | } 37 | 38 | // read sequence name 39 | String name = reader.readLine(); 40 | if (name == null) 41 | return null; 42 | int spaceIndex = name.indexOf(' '); 43 | if (spaceIndex > 0) 44 | name = name.substring(0, spaceIndex); 45 | SequenceBuilder builder = new SequenceBuilder(); 46 | builder.setName(name); 47 | builder.setPath(path); 48 | 49 | // read sequence content until next sequence start marker 50 | while (true) { 51 | int firstInLine = reader.read(); 52 | if (firstInLine < 0) { 53 | break; 54 | } 55 | char firstCharInLine = (char)firstInLine; 56 | if (firstCharInLine == '>') { 57 | break; 58 | } 59 | if (firstCharInLine == '\n') { 60 | // blank line: skip 61 | continue; 62 | } 63 | builder.add(firstCharInLine); 64 | String line = this.reader.readLine(); 65 | if (line == null) { 66 | // remainder of line is empty: skip 67 | continue; 68 | } 69 | builder.add(line); 70 | } 71 | if (builder.getLength() < 1) { 72 | throw new RuntimeException("Sequence " + name + " in " + this.path + " has length " + builder.getLength()); 73 | } 74 | return builder; 75 | } 76 | 77 | @Override 78 | public String toString() { 79 | return this.path; 80 | } 81 | 82 | BufferedReader reader; 83 | String path; 84 | boolean hasReadASequence = false; 85 | } 86 | -------------------------------------------------------------------------------- /src/main/java/mapper/FastaWriter.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.io.BufferedOutputStream; 4 | import java.io.File; 5 | import java.io.FileOutputStream; 6 | import java.io.FileNotFoundException; 7 | import java.io.IOException; 8 | 9 | public class FastaWriter implements SequenceWriter { 10 | public FastaWriter(String path) throws FileNotFoundException { 11 | File file = new File(path); 12 | this.fileStream = new FileOutputStream(file); 13 | this.bufferedStream = new BufferedOutputStream(fileStream); 14 | } 15 | 16 | public void write(Sequence sequence) { 17 | this.write(">" + sequence.getName()); 18 | this.write(sequence.getText()); 19 | } 20 | 21 | public void close() { 22 | try { 23 | this.bufferedStream.close(); 24 | } catch (IOException e) { 25 | } 26 | try { 27 | this.fileStream.close(); 28 | } catch (IOException e) { 29 | } 30 | } 31 | 32 | private void write(String text) { 33 | try { 34 | this.bufferedStream.write((text + "\n").getBytes()); 35 | } catch (IOException e) { 36 | throw new RuntimeException(e); 37 | } 38 | } 39 | 40 | FileOutputStream fileStream; 41 | BufferedOutputStream bufferedStream; 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/mapper/FastqParser.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | 6 | // A FastqParser parses .fastq files 7 | public class FastqParser implements SequenceProvider { 8 | public FastqParser(BufferedReader reader, String path, boolean keepQualityData) { 9 | this.reader = reader; 10 | this.path = path; 11 | this.keepQualityData = keepQualityData; 12 | } 13 | 14 | public SequenceBuilder getNextSequence() { 15 | try { 16 | return doGetNextSequence(); 17 | } catch (IOException e) { 18 | throw new RuntimeException(e); 19 | } 20 | } 21 | 22 | public boolean get_allReadsContainQualityInformation() { 23 | return true; 24 | } 25 | 26 | private SequenceBuilder doGetNextSequence() throws IOException { 27 | int first = this.reader.read(); 28 | if (first < 0) { 29 | // End of file: no more entries 30 | return null; 31 | } 32 | char firstChar = (char)first; 33 | if (firstChar != '@') { 34 | // This is not the start of a new sequence 35 | // Could this be a blank line at the end of the file? 36 | if (firstChar == '\n') { 37 | int second = this.reader.read(); 38 | if (second < 0) { 39 | return null; 40 | } 41 | char secondChar = (char)second; 42 | throw new IllegalArgumentException("Not in .fastq format: " + path + ". After newline expected end of file, not '" + secondChar + "'"); 43 | } 44 | throw new IllegalArgumentException("Not in .fastq format: " + path + ". Expected '@', not '" + firstChar + "'"); 45 | } 46 | String name = this.reader.readLine(); 47 | String nameSuffix; 48 | int spaceIndex = name.indexOf(' '); 49 | if (spaceIndex != -1) { 50 | nameSuffix = name.substring(spaceIndex); 51 | name = name.substring(0, spaceIndex); 52 | } else { 53 | nameSuffix = ""; 54 | } 55 | 56 | // read the sequence text and compress it 57 | String contentLine = this.reader.readLine(); 58 | SequenceBuilder builder = new SequenceBuilder(); 59 | builder.setName(name); 60 | builder.add(contentLine); 61 | 62 | // read the other lines 63 | String commentString = this.reader.readLine(); 64 | String qualityString = this.reader.readLine(); 65 | // build result and return it 66 | if (this.keepQualityData) { 67 | builder.asRead(nameSuffix, qualityString, commentString); 68 | } 69 | if (builder.getLength() < 1) { 70 | throw new RuntimeException("Sequence " + name + " in " + this.path + " has length " + builder.getLength()); 71 | } 72 | return builder; 73 | } 74 | 75 | @Override 76 | public String toString() { 77 | return this.path; 78 | } 79 | 80 | BufferedReader reader; 81 | String path; 82 | boolean keepQualityData; 83 | } 84 | -------------------------------------------------------------------------------- /src/main/java/mapper/FastqWriter.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.io.BufferedOutputStream; 4 | import java.io.File; 5 | import java.io.FileOutputStream; 6 | import java.io.FileNotFoundException; 7 | import java.io.IOException; 8 | 9 | public class FastqWriter implements SequenceWriter { 10 | public FastqWriter(String path) throws FileNotFoundException { 11 | File file = new File(path); 12 | this.fileStream = new FileOutputStream(file); 13 | this.bufferedStream = new BufferedOutputStream(fileStream); 14 | } 15 | 16 | public void write(Sequence sequence) { 17 | ReadSequence read = (ReadSequence)sequence; 18 | this.write("@" + read.getName() + read.nameSuffix); 19 | this.write(read.getText()); 20 | this.write(read.commentString); 21 | this.write(read.qualityString); 22 | } 23 | 24 | private void write(String text) { 25 | try { 26 | this.bufferedStream.write((text + "\n").getBytes()); 27 | } catch (IOException e) { 28 | throw new RuntimeException(e); 29 | } 30 | } 31 | 32 | public void close() { 33 | try { 34 | this.bufferedStream.close(); 35 | } catch (IOException e) { 36 | } 37 | try { 38 | this.fileStream.close(); 39 | } catch (IOException e) { 40 | } 41 | } 42 | 43 | FileOutputStream fileStream; 44 | BufferedOutputStream bufferedStream; 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/mapper/Filesystem.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | 6 | public interface Filesystem { 7 | boolean createNewFile(File file) throws IOException; 8 | void write(File file, byte[] content) throws IOException; 9 | byte[] readFile(File file) throws IOException; 10 | void mkdirs(File dir) throws IOException; 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/mapper/FilteredAlignments.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | // A FilteredAlignments tells how some subset of Sequences align to a particular reference contig 9 | public class FilteredAlignments { 10 | public FilteredAlignments(Alignments alignments, MutationDetectionParameters filter) { 11 | this.alignments = alignments; 12 | this.filter = filter; 13 | } 14 | 15 | public int size() { 16 | return this.alignments.size(); 17 | } 18 | 19 | public AlignmentPosition getPosition(int referenceIndex) { 20 | AlignmentPosition result = this.alignments.getPosition(referenceIndex); 21 | result = filterSNPs(result, referenceIndex); 22 | result = filterDeletions(result, referenceIndex); 23 | return result; 24 | } 25 | 26 | // TODO: make this more efficient rather than calling it separately for each position in the deletion 27 | private boolean couldBeDeletion(int referenceIndex) { 28 | int index = referenceIndex; 29 | while (index >= 0) { 30 | AlignmentPosition position = this.alignments.getPosition(index); 31 | if (this.filter.supportsIndelStart(position)) 32 | return true; 33 | if (!this.filter.supportsIndelContinuation(position)) 34 | return false; 35 | index--; 36 | } 37 | return true; 38 | } 39 | 40 | public AlignmentPosition getInsertion(int referenceIndex, int insertionIndex) { 41 | AlignmentPosition result = this.alignments.getInsertion(referenceIndex, insertionIndex); 42 | result = filterInsertions(result, referenceIndex, insertionIndex); 43 | return result; 44 | } 45 | 46 | private AlignmentPosition filterSNPs(AlignmentPosition position, int referenceIndex) { 47 | float totalCount = position.getCount(); 48 | char[] nonzeroAlternates = position.getNonzeroAlternates(); 49 | for (char alternate: nonzeroAlternates) { 50 | if (alternate != '-') { 51 | float alternateCount = position.getAlternateCount(alternate); 52 | if (!filter.supportsSNP(alternateCount, totalCount)) { 53 | position.ignoreAlternate(alternate); 54 | } 55 | } 56 | } 57 | return position; 58 | } 59 | 60 | private AlignmentPosition filterDeletions(AlignmentPosition position, int referenceIndex) { 61 | // fast path for positions without alternates 62 | if (!position.hasAlternates()) 63 | return position; 64 | // check whether this position satisfies the filter 65 | if (couldBeDeletion(referenceIndex)) 66 | return position; 67 | // filter out deletions 68 | position.ignoreAlternate('-'); 69 | return position; 70 | } 71 | 72 | private AlignmentPosition filterInsertions(AlignmentPosition position, int referenceIndex, int insertionIndex) { 73 | // fast path for most positions without alternates 74 | if (!position.hasAlternates()) 75 | return position; 76 | // check whether this position satisfies the filter 77 | boolean keepInsertions = false; 78 | if (insertionIndex == 0) 79 | keepInsertions = this.filter.supportsIndelStart(position); 80 | else 81 | keepInsertions = this.filter.supportsIndelContinuation(position); 82 | if (!keepInsertions) { 83 | char[] nonzeroAlternates = position.getNonzeroAlternates(); 84 | for (char alternate: nonzeroAlternates) { 85 | position.ignoreAlternate(alternate); 86 | } 87 | } 88 | return position; 89 | } 90 | 91 | Alignments alignments; 92 | MutationDetectionParameters filter; 93 | } 94 | -------------------------------------------------------------------------------- /src/main/java/mapper/FourInt.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | // a FourInt is just four ints. It can be used as a key in a HashMap 4 | public class FourInt { 5 | public FourInt(int a, int b, int c, int d) { 6 | this.a = a; 7 | this.b = b; 8 | this.c = c; 9 | this.d = d; 10 | } 11 | 12 | @Override 13 | public boolean equals(Object otherObject) { 14 | FourInt other = (FourInt)otherObject; 15 | if (this.a != other.a) 16 | return false; 17 | if (this.b != other.b) 18 | return false; 19 | if (this.c != other.c) 20 | return false; 21 | if (this.d != other.d) 22 | return false; 23 | return true; 24 | } 25 | 26 | @Override 27 | public int hashCode() { 28 | return a + b * 11 + c * 101 + d * 1063; 29 | } 30 | 31 | private int a, b, c, d; 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/mapper/Gapped_HashBlock.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | public class Gapped_HashBlock extends HashBlock { 7 | public Gapped_HashBlock(int block1Start, int block1Length, int gapLength, int block2Length) { 8 | super(block1Start, block1Length + gapLength + block2Length); 9 | this.block1Length = block1Length; 10 | this.gapLength = gapLength; 11 | } 12 | 13 | @Override 14 | public String getText(Sequence sequence) { 15 | String prefix = sequence.getRange(this.getStartIndex(), this.block1Length); 16 | String gap = repeat("_", this.gapLength); 17 | String suffix = sequence.getRange(this.getStartIndex() + this.block1Length + this.gapLength, this.getSuffixLength()); 18 | return prefix + gap + suffix; 19 | } 20 | 21 | private String repeat(String text, int count) { 22 | StringBuilder builder = new StringBuilder(); 23 | for (int i = 0; i < count; i++) { 24 | builder.append(text); 25 | } 26 | return builder.toString(); 27 | } 28 | 29 | private int getSuffixLength() { 30 | return this.getLength() - this.block1Length - this.gapLength; 31 | } 32 | 33 | private int block1Length; 34 | private int gapLength; 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/mapper/HashBlockMatch_Counter.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | 5 | public class HashBlockMatch_Counter { 6 | public HashBlockMatch_Counter(SequenceMatch match, List matchHistory, int initialNumDistinctMismatches, int lastMismatchedPosition) { 7 | this.match = match; 8 | this.matchHistory = matchHistory; 9 | this.numDistinctMismatches = initialNumDistinctMismatches; 10 | this.lastMismatchedPosition = lastMismatchedPosition; 11 | this.historyProcessed_index = matchHistory.size() - 1; 12 | } 13 | 14 | public int getNumMatches() { 15 | return numMatches; 16 | } 17 | 18 | public int getNumDistinctMismatches() { 19 | this.update(); 20 | return numDistinctMismatches; 21 | } 22 | 23 | public void addMatch(SequenceMatch match, HashBlock block) { 24 | // this.match = match; 25 | numMatches++; 26 | this.lastMatchedBlock = block; 27 | } 28 | 29 | public int getLastMismatchedPosition() { 30 | return this.lastMismatchedPosition; 31 | } 32 | 33 | public HashBlock getLastMatchedBlock() { 34 | return this.lastMatchedBlock; 35 | } 36 | 37 | public SequenceMatch getMatch() { 38 | return this.match; 39 | } 40 | 41 | public void update() { 42 | while (this.historyProcessed_index < this.matchHistory.size()) { 43 | this.update(matchHistory.get(this.historyProcessed_index)); 44 | this.historyProcessed_index++; 45 | } 46 | } 47 | 48 | public void setGood() { 49 | this.good = true; 50 | this.priority = this.getNumDistinctMismatches(); 51 | } 52 | 53 | public boolean isGood() { 54 | return this.good; 55 | } 56 | 57 | public int getPriority() { 58 | return this.priority; 59 | } 60 | 61 | public void setNextCounter(HashBlockMatch_Counter next) { 62 | this.nextCounter = next; 63 | } 64 | public HashBlockMatch_Counter getNextCounter() { 65 | return this.nextCounter; 66 | } 67 | public void setPreviousCounter(HashBlockMatch_Counter prev) { 68 | this.previousCounter = prev; 69 | } 70 | public HashBlockMatch_Counter getPreviousCounter() { 71 | return this.previousCounter; 72 | } 73 | 74 | private void update(HashBlock block) { 75 | // make sure it's a mismatch 76 | if (block != this.lastMatchedBlock) { 77 | int blockStart = block.getStartIndex(); 78 | int blockEnd = block.getEndIndex(); 79 | // make sure it's a new mismatch 80 | if (blockStart >= this.lastMismatchedPosition) { 81 | // make sure the reference exists here 82 | if (this.match.getOffset() + blockEnd <= this.match.getSequenceB().getLength()) { 83 | this.numDistinctMismatches++; 84 | this.lastMismatchedPosition = blockEnd; 85 | } 86 | } 87 | } 88 | } 89 | 90 | int numMatches; 91 | int numDistinctMismatches; 92 | int lastMismatchedPosition; 93 | HashBlock lastMatchedBlock; 94 | SequenceMatch match; 95 | 96 | List matchHistory; 97 | int historyProcessed_index; 98 | boolean good; 99 | HashBlockMatch_Counter nextCounter; 100 | HashBlockMatch_Counter previousCounter; 101 | 102 | 103 | // Tells how early we want to consider this counter 104 | // Lower priority is better. 0 is the best. 105 | // This value equals the number of distinct mismatched hashblocks found when setGood() is called 106 | int priority; 107 | } 108 | -------------------------------------------------------------------------------- /src/main/java/mapper/HashBlock_BaseRow.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | public class HashBlock_BaseRow implements HashBlock_Row { 9 | public HashBlock_BaseRow(Sequence sequence, HashBlock_Buffer blockListener) { 10 | this.sequence = sequence; 11 | this.blockListener = blockListener; 12 | } 13 | 14 | private static byte[] encodedChars; 15 | private static byte[] getEncodedChars() { 16 | if (encodedChars == null) { 17 | byte[] newChars = new byte[4]; 18 | newChars[0] = Basepairs.encode('A'); 19 | newChars[1] = Basepairs.encode('C'); 20 | newChars[2] = Basepairs.encode('G'); 21 | newChars[3] = Basepairs.encode('T'); 22 | encodedChars = newChars; 23 | } 24 | return encodedChars; 25 | } 26 | 27 | public IMultiHashBlock get(int index) { 28 | if (index >= this.sequence.getLength()) 29 | return null; 30 | IMultiHashBlock block = this.blocks.get(index); 31 | if (block == null) { 32 | byte encodedItemHere = this.sequence.encodedCharAt(index); 33 | if (Basepairs.isAmbiguous(encodedItemHere)) { 34 | List possibleBlocks = new ArrayList(4); 35 | for (byte encodedOptionHere : getEncodedChars()) { 36 | if (Basepairs.canMatch(encodedItemHere, encodedOptionHere)) { 37 | char optionHere = Basepairs.decode(encodedOptionHere); 38 | HashBlock possibleBlock = new HashBlock(optionHere, index); 39 | SequenceCondition condition = new SequenceCondition(index, optionHere); 40 | possibleBlocks.add(new ConditionalHashBlock(possibleBlock, condition)); 41 | } 42 | } 43 | block = new MultiHashBlock(possibleBlocks); 44 | } else { 45 | // This sequence knows which character exists here 46 | char itemHere = Basepairs.decode(encodedItemHere); 47 | block = new HashBlock(itemHere, index); 48 | } 49 | if (block != null) { 50 | if (this.blockListener != null) 51 | this.blockListener.addHashblock(block); 52 | this.blocks.put(index, block); 53 | } 54 | } 55 | return block; 56 | } 57 | 58 | public void skipTo(int index) { 59 | } 60 | 61 | private int charToIndex(char c) { 62 | if (c == 'A') 63 | return 0; 64 | if (c == 'C') 65 | return 1; 66 | if (c == 'G') 67 | return 2; 68 | return 3; 69 | } 70 | 71 | public IMultiHashBlock getAfter(int index) { 72 | return get(index + 1); 73 | } 74 | 75 | public Sequence getSequence() { 76 | return this.sequence; 77 | } 78 | 79 | public void garbageCollect(int index) { 80 | this.blocks.remove(index); 81 | } 82 | 83 | public int getLevel() { 84 | return 0; 85 | } 86 | 87 | private Sequence sequence; 88 | private HashBlock_Buffer blockListener; 89 | private Map blocks = new HashMap(); 90 | } 91 | -------------------------------------------------------------------------------- /src/main/java/mapper/HashBlock_Buffer.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | // a HashBlock_Buffer listens for hashblocks in a certain area, stores them, and emits a lot of them at once to a HashBlock_Database 7 | public class HashBlock_Buffer { 8 | public HashBlock_Buffer(HashJob section, HashBlock_Database database, int minInterestingSize) { 9 | this.section = section; 10 | this.database = database; 11 | this.minInterestingSize = minInterestingSize; 12 | } 13 | 14 | public void addHashblock(IMultiHashBlock block) { 15 | int startIndex = block.getStartIndex(); 16 | if (startIndex < this.section.minStartIndex) 17 | return; // outside the interesting range 18 | if (startIndex >= this.section.maxStartIndexExclusive) 19 | return; // outside the interesting range 20 | 21 | HashBlock single = block.getSingle(); 22 | if (single == null) { 23 | this.multiBlocks.add(block); 24 | if (this.multiBlocks.size() >= 65536) { 25 | this.flush(); 26 | } 27 | } else { 28 | this.singleBlocks.add(block); 29 | if (this.singleBlocks.size() >= 8096) { 30 | this.flush(); 31 | } 32 | } 33 | } 34 | 35 | public void flush() { 36 | this.database.addHashblocks(this.section.sequence, this.multiBlocks); 37 | this.multiBlocks.clear(); 38 | this.database.addHashblocks(this.section.sequence, this.singleBlocks); 39 | this.singleBlocks.clear(); 40 | } 41 | 42 | public int getMinInterestingSize() { 43 | return this.minInterestingSize; 44 | } 45 | 46 | private List multiBlocks = new ArrayList(); 47 | private List singleBlocks = new ArrayList(); 48 | private HashBlock_Database database; 49 | private HashJob section; 50 | private int minInterestingSize; // we don't have to save any hashblocks shorter than this because the HashBlock_Database won't be interested in them 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/mapper/HashBlock_Compiler.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public class HashBlock_Compiler implements HashBlock_Row { 4 | 5 | public HashBlock_Compiler(HashBlock_Row sourceRow) { 6 | this.sourceRow = sourceRow; 7 | this.cache = HashBlock_CompilerCache.getInstance(sourceRow.getLevel()); 8 | } 9 | 10 | public HashBlock_Compiler(HashBlock_Row sourceRow, HashBlock_CompilerCache cache) { 11 | this.sourceRow = sourceRow; 12 | this.cache = cache; 13 | } 14 | 15 | public IMultiHashBlock get(int index) { 16 | IMultiHashBlock next = this.getAfter(index - 1); 17 | if (next != null && next.getStartIndex() == index) { 18 | return next; 19 | } 20 | return null; 21 | } 22 | 23 | public IMultiHashBlock getAfter(int index) { 24 | if (index == this.previousRequestIndex) { 25 | return this.previousResult; 26 | } 27 | this.previousResult = this.computeAfter(index); 28 | this.previousRequestIndex = index; 29 | return this.previousResult; 30 | } 31 | 32 | private IMultiHashBlock computeAfter(int index) { 33 | Sequence sequence = this.getSequence(); 34 | HashBlock_CompilerNode node = this.cache.rootNode; 35 | int endIndex = index + 1; 36 | int last = sequence.getLength(); 37 | IMultiHashBlock pendingBlock = null; 38 | int newRequestShift = -1; 39 | int prevEnd = -1; 40 | if (this.previousResultNode != null) { 41 | newRequestShift = index - this.previousRequestIndex; 42 | } 43 | if (newRequestShift > 0) { 44 | // the previous request was at an earlier index so we might be able to reuse some information 45 | HashBlock_CompilerNode prevNode = this.previousResultNode.getPrevious(newRequestShift); 46 | if (prevNode != null && this.previousResult != null) { 47 | endIndex = this.previousResult.getEndIndex(); 48 | node = prevNode; 49 | if (node.getBlock() != null) { 50 | this.previousResultNode = node; 51 | return node.getBlock().withEnd(endIndex); 52 | } 53 | } 54 | prevEnd = this.previousResult.getEndIndex(); 55 | } 56 | while (true) { 57 | boolean ambiguous = false; 58 | if (endIndex == prevEnd) { 59 | this.previousResultNode.putPrevious(newRequestShift, node); 60 | } 61 | if (endIndex >= last) { 62 | this.previousResultNode = null; 63 | return null; 64 | } 65 | char c = sequence.charAt(endIndex); 66 | if (Basepairs.isAmbiguous(c)) { 67 | ambiguous = true; 68 | } 69 | HashBlock_CompilerNode nextNode = node.getNext(c); 70 | endIndex++; 71 | if (nextNode == null) { 72 | if (pendingBlock == null) { 73 | this.sourceRow.skipTo(index); 74 | pendingBlock = this.sourceRow.getAfter(index); 75 | if (pendingBlock != null && pendingBlock.getSingle() == null) { 76 | // If this is a MultiHashBlock, then we don't know how much area around it can affect its state 77 | // So, if we see a MultiHashBlock, we don't cache it here 78 | this.previousResultNode = null; 79 | return pendingBlock; 80 | } 81 | } 82 | if (ambiguous) { 83 | // If we encounter ambiguity, then we don't know how much area around this can affect the state 84 | // So, if we see ambiguity, we don't cache it here 85 | this.previousResultNode = null; 86 | return pendingBlock; 87 | } 88 | IMultiHashBlock blockHere; 89 | if (pendingBlock != null && pendingBlock.getEndIndex() == endIndex) { 90 | blockHere = pendingBlock; 91 | } else { 92 | blockHere = null; 93 | } 94 | nextNode = new HashBlock_CompilerNode(blockHere); 95 | node.put(c, nextNode); // it is possible for this to overwrite another node added by another thread, but this is just a cache so it's ok if we lose some nodes occasionally 96 | } 97 | node = nextNode; 98 | if (node.getBlock() != null) { 99 | this.previousResultNode = node; 100 | return node.getBlock().withEnd(endIndex); 101 | } 102 | } 103 | } 104 | 105 | public Sequence getSequence() { 106 | return this.sourceRow.getSequence(); 107 | } 108 | 109 | public void garbageCollect(int index) { 110 | this.sourceRow.garbageCollect(index); 111 | } 112 | 113 | public int getLevel() { 114 | return sourceRow.getLevel(); 115 | } 116 | 117 | public void skipTo(int index) { 118 | this.sourceRow.skipTo(index); 119 | } 120 | 121 | HashBlock_Row sourceRow; 122 | HashBlock_CompilerCache cache; 123 | int previousRequestIndex = -2; 124 | IMultiHashBlock previousResult; 125 | HashBlock_CompilerNode previousResultNode; 126 | } 127 | -------------------------------------------------------------------------------- /src/main/java/mapper/HashBlock_CompilerCache.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public class HashBlock_CompilerCache { 4 | 5 | public static HashBlock_CompilerCache getInstance(int level) { 6 | HashBlock_CompilerCache cache = instances[level]; 7 | if (cache == null) { 8 | synchronized(instances) { 9 | cache = instances[level]; 10 | if (cache == null) { 11 | cache = new HashBlock_CompilerCache(); 12 | instances[level] = cache; 13 | } 14 | } 15 | } 16 | return cache; 17 | } 18 | private static HashBlock_CompilerCache[] instances = new HashBlock_CompilerCache[100]; 19 | 20 | public HashBlock_CompilerNode rootNode = new HashBlock_CompilerNode(null); 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/mapper/HashBlock_CompilerNode.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public class HashBlock_CompilerNode { 4 | public HashBlock_CompilerNode(IMultiHashBlock block) { 5 | if (block == null) 6 | this.nexts = new HashBlock_CompilerNode[4]; 7 | this.block = block; 8 | } 9 | 10 | public HashBlock_CompilerNode getNext(char item) { 11 | if (this.block != null) 12 | throw new IllegalArgumentException("called getExisting(" + item + ") on compiler node having nonempty hashblock: " + this.block); 13 | int index = itemToIndex(item); 14 | if (index < 0) 15 | return null; 16 | return this.nexts[index]; 17 | } 18 | 19 | // Saves the next node in the tree 20 | // Only allowed if this.block is null 21 | public void put(char item, HashBlock_CompilerNode next) { 22 | this.nexts[itemToIndex(item)] = next; 23 | } 24 | 25 | public IMultiHashBlock getBlock() { 26 | return this.block; 27 | } 28 | 29 | public HashBlock_CompilerNode getPrevious(int prefixLength) { 30 | if (this.block == null) 31 | throw new IllegalArgumentException("called getPrevious(" + prefixLength + ") on compiler node having empty hashblock"); 32 | // guard against concurrent modification from another thread 33 | HashBlock_CompilerNode[] currentNexts = this.nexts; 34 | if (currentNexts == null || prefixLength > currentNexts.length) 35 | return null; 36 | return currentNexts[prefixLength - 1]; 37 | } 38 | 39 | // Specifies the a node in a related tree having a different start 40 | // Only allowed if this.block is not null 41 | public void putPrevious(int prefixLength, HashBlock_CompilerNode prev) { 42 | // guard against concurrent modification from another thread 43 | HashBlock_CompilerNode[] currentNexts = this.nexts; 44 | boolean overwrite = false; 45 | if (currentNexts == null || prefixLength > currentNexts.length) { 46 | currentNexts = new HashBlock_CompilerNode[prefixLength + 1]; 47 | overwrite = true; 48 | } 49 | currentNexts[prefixLength - 1] = prev; 50 | if (overwrite) 51 | this.nexts = currentNexts; 52 | } 53 | 54 | private int itemToIndex(char item) { 55 | if (item == 'A') 56 | return 0; 57 | if (item == 'C') 58 | return 1; 59 | if (item == 'G') 60 | return 2; 61 | if (item == 'T') 62 | return 3; 63 | return -1; 64 | } 65 | 66 | private HashBlock_CompilerNode[] nexts; 67 | private IMultiHashBlock block; 68 | } 69 | -------------------------------------------------------------------------------- /src/main/java/mapper/HashBlock_Match.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | 5 | public class HashBlock_Match { 6 | public HashBlock_Match(HashBlock block, SequencePosition[] matches) { 7 | this.block = block; 8 | this.matches = matches; 9 | } 10 | public HashBlock getBlock() { 11 | return this.block; 12 | } 13 | public SequencePosition[] getMatches() { 14 | return this.matches; 15 | } 16 | 17 | public HashBlock block; 18 | public SequencePosition[] matches; 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/mapper/HashBlock_Pyramid.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | // A HashBlock_Pyramid is essentially a List>. 7 | // See HashBlock_Stream for details 8 | public class HashBlock_Pyramid { 9 | public HashBlock_Pyramid(HashBlock_Stream stream) { 10 | this.stream = stream; 11 | this.blocks = new ArrayList(); 12 | } 13 | 14 | 15 | public HashBlock_Row get(int index) { 16 | while(true) { 17 | if (this.blocks.size() > index) { 18 | return this.blocks.get(index); 19 | } 20 | if (!this.addBatch()) { 21 | return null; 22 | } 23 | } 24 | } 25 | 26 | private boolean addBatch() { 27 | HashBlock_Row next = this.stream.getNextBatch(); 28 | if (next == null) 29 | return false; 30 | this.blocks.add(next); 31 | return true; 32 | } 33 | 34 | private HashBlock_Stream stream; 35 | private List blocks; 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/mapper/HashBlock_Row.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public interface HashBlock_Row { 4 | // gets the HashBlock at this position 5 | IMultiHashBlock get(int index); 6 | 7 | // returns the next HashBlock after this position 8 | IMultiHashBlock getAfter(int index); 9 | 10 | Sequence getSequence(); 11 | 12 | // specifies that we're not planning to use the block at anymore 13 | void garbageCollect(int index); 14 | 15 | int getLevel(); 16 | 17 | void skipTo(int index); 18 | } 19 | -------------------------------------------------------------------------------- /src/main/java/mapper/HashBlock_Stream.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | // A HashBlock_Stream is essentially a lazy List>. 7 | // A HashBlock_Stream produces collections of HashBlock, one at a time. 8 | // It does this by starting with lots of individual HashBlocks and gradually combining them with their 9 | // neighbors until there are none left. 10 | // The way in which HashBlocks are combined in deterministic based on the contents of those blocks. 11 | // So, when given the same substring in various positions, the pattern of blocks produced for that 12 | // substring will always be the same, to facilitate matching via hashing. 13 | // Additionally, the number of HashBlocks with any given size decays exponentially with respect to 14 | // their size, so the total number of HashBlocks produced for a given sequence is a constant times the 15 | // length of that sequence. 16 | public class HashBlock_Stream { 17 | public HashBlock_Stream(Sequence sequence, boolean assumeOnlyUsedOnce, HashBlock_Buffer blockListener) { 18 | this.blocks = new HashBlock_BaseRow(sequence, blockListener); 19 | this.sequence = sequence; 20 | this.assumeOnlyUsedOnce = assumeOnlyUsedOnce; 21 | this.blockListener = blockListener; 22 | } 23 | 24 | // combines the current batch of blocks into a new batch, and returns the previous batch 25 | public HashBlock_Row getNextBatch() { 26 | this.ensureFreshBlocks(); 27 | this.emittedCurrentBlocks = true; 28 | return this.blocks; 29 | } 30 | 31 | // ensures that this.blocks refers to a batch that we haven't yet given to the user 32 | private void ensureFreshBlocks() { 33 | if (this.emittedCurrentBlocks) { 34 | this.advance(); 35 | } 36 | } 37 | 38 | // advances this.blocks to the next batch 39 | private void advance() { 40 | this.blocks = new HashBlock_ParentRow(this.blocks, this.assumeOnlyUsedOnce, this.blockListener); 41 | // Check whether we can use the HashBlock_Compiler 42 | // The HashBlock_Compiler can skip generating some hashblocks for parent rows, so if we are listening for all parent hashblocks then we can't use the compiler 43 | boolean blockListenerAllowsCompiler; 44 | if (this.blockListener == null) { 45 | // if there is no block listener then we don't need to worry about generating all hashblocks for parent rows 46 | blockListenerAllowsCompiler = true; 47 | } else { 48 | int maxBlockLength = ((int)Math.pow(2, this.blocks.getLevel())); 49 | int maxGapmerLength = HashBlock.getMaxGapmerNumBasepairsUsed(maxBlockLength); 50 | if (this.blockListener.getMinInterestingSize() > maxGapmerLength) { 51 | // if this.blockListener is not interested in blocks from this row, then it's ok to use a compiler 52 | blockListenerAllowsCompiler = true; 53 | } else { 54 | // block listener might be interested in blocks from parent rows 55 | blockListenerAllowsCompiler = false; 56 | } 57 | blockListenerAllowsCompiler = false; 58 | } 59 | if (blockListenerAllowsCompiler) { 60 | // Check whether it is more efficient to use the HashBlock_Compiler 61 | // For rows at higher levels, the number of combinations to save is too many for our compiler to be faster 62 | // For the first row, the existing logic is already as fast as a compiler 63 | // For rows just above the bottom row, the compiler should help a little bit 64 | if (this.blocks.getLevel() <= 3 && this.blocks.getLevel() > 0) { 65 | this.blocks = new HashBlock_Compiler(this.blocks); 66 | } 67 | } 68 | this.emittedCurrentBlocks = false; 69 | } 70 | 71 | private Sequence sequence; 72 | private HashBlock_Row blocks; 73 | // whether this.blocks has been given to a caller already 74 | private boolean emittedCurrentBlocks = false; 75 | private boolean assumeOnlyUsedOnce; 76 | private HashBlock_Buffer blockListener; 77 | } 78 | -------------------------------------------------------------------------------- /src/main/java/mapper/HashJob.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | 5 | // a HashJob specifies that a certain part of the reference genome needs to be analyzed to generate HashBlocks 6 | public class HashJob { 7 | public HashJob(Sequence sequence, int minStartIndex, int maxStartIndexExclusive) { 8 | this.sequence = sequence; 9 | this.minStartIndex = minStartIndex; 10 | this.maxStartIndexExclusive = maxStartIndexExclusive; 11 | } 12 | 13 | public Sequence sequence; 14 | public int minStartIndex; 15 | public int maxStartIndexExclusive; 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/mapper/Histogram.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | public class Histogram { 7 | public static List formatColumn(String title, String yName, String xName, double xMin, double xMax, int desiredNumBins, double[] counts) { 8 | double[] binCounts; 9 | if (desiredNumBins < counts.length) 10 | binCounts = squash(counts, desiredNumBins); 11 | else 12 | binCounts = counts; 13 | 14 | List rows = new ArrayList(); 15 | int bodyIndentSize = 1; // size of indent of body compared to title 16 | String leftSpace = repeat(" ", yName.length() + bodyIndentSize); // empty space to the left of the y-axis 17 | 18 | rows.add(title); 19 | double maxCount = getMax(binCounts); 20 | int chartHeight = 10; 21 | int numColumns = binCounts.length; 22 | 23 | double[] scaledValues; 24 | if (maxCount > 0) 25 | scaledValues = rescale(binCounts, chartHeight / maxCount); 26 | else 27 | scaledValues = binCounts; 28 | 29 | // Plot the graph contents 30 | int middleRowIndex = chartHeight / 2 - 1; 31 | for (int y = chartHeight - 1; y >= 0; y--) { 32 | StringBuilder rowBuilder = new StringBuilder(); 33 | if (y == middleRowIndex) { 34 | rowBuilder.append(repeat(" ", bodyIndentSize) + yName); 35 | } else { 36 | rowBuilder.append(leftSpace); 37 | } 38 | rowBuilder.append("|"); 39 | for (int x = 0; x < numColumns; x++) { 40 | double difference = scaledValues[x] - y; 41 | // Use a different character at the top of the bar based on the content 42 | if (difference >= 0.8) { 43 | rowBuilder.append("#"); 44 | } else { 45 | if (difference >= 0.6) { 46 | rowBuilder.append("^"); 47 | } else { 48 | if (difference >= 0.4) { 49 | String marker = "-"; 50 | if (x > 0 && x + 1 < numColumns) { 51 | if (scaledValues[x - 1] < scaledValues[x] && scaledValues[x] < scaledValues[x + 1]) 52 | marker = "/"; 53 | if (scaledValues[x - 1] > scaledValues[x] && scaledValues[x] > scaledValues[x + 1]) 54 | marker = "\\"; 55 | } 56 | rowBuilder.append(marker); 57 | } else { 58 | if (difference >= 0.2) { 59 | rowBuilder.append("_"); 60 | } else { 61 | rowBuilder.append(" "); 62 | } 63 | } 64 | } 65 | } 66 | rowBuilder.append(" "); 67 | } 68 | rows.add(rowBuilder.toString()); 69 | } 70 | rows.add(leftSpace + "." + repeat("--", numColumns)); 71 | String lowerBoundFormatted = String.format("%.2f", xMin); 72 | String upperBoundFormatted = String.format("%.2f", xMax); 73 | String rangeContents = lowerBoundFormatted + repeat(" ", Math.max(1, numColumns * 2 - lowerBoundFormatted.length() - upperBoundFormatted.length())) + upperBoundFormatted; 74 | rows.add(leftSpace + " " + rangeContents); 75 | rows.add(leftSpace + " " + xName); 76 | return rows; 77 | } 78 | 79 | private static double getMax(double[] binCounts) { 80 | double maxCount = 0; 81 | for (int i = 0; i < binCounts.length; i++) { 82 | maxCount = Math.max(maxCount, binCounts[i]); 83 | } 84 | return maxCount; 85 | } 86 | 87 | private static double[] rescale(double[] binCounts, double multiplier) { 88 | double[] results = new double[binCounts.length]; 89 | for (int i = 0; i < binCounts.length; i++) { 90 | results[i] = binCounts[i] * multiplier; 91 | } 92 | return results; 93 | } 94 | 95 | private static String repeat(String text, int count) { 96 | StringBuilder builder = new StringBuilder(); 97 | for (int i = 0; i < count; i++) { 98 | builder.append(text); 99 | } 100 | return builder.toString(); 101 | } 102 | 103 | public static double[] squash(double[] counts, int desiredNumBins) { 104 | double[] results = new double[desiredNumBins]; 105 | double start = 0; 106 | for (int readIndex = 0; readIndex < counts.length; readIndex++) { 107 | double end = (double)(readIndex + 1) / (double)counts.length * (double)results.length; 108 | if ((int)start == (int)end) { 109 | results[(int)start] += counts[readIndex]; 110 | } else { 111 | int lowIndex = (int)start; 112 | double lowerWeight = (lowIndex + 1 - start); 113 | double upperWeight = end - (int)(lowIndex + 1); 114 | double totalWeight = lowerWeight + upperWeight; 115 | int highIndex = lowIndex + 1; 116 | results[lowIndex] += counts[readIndex] * lowerWeight / totalWeight; 117 | if (highIndex < results.length) 118 | results[highIndex] += counts[readIndex] * upperWeight / totalWeight; 119 | } 120 | start = end; 121 | } 122 | return results; 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/main/java/mapper/IMultiHashBlock.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | 5 | // An IMultiHashBlock is essentially just a MultiHashBlock but can be implemented via a HashBlock to save memory 6 | public interface IMultiHashBlock { 7 | public HashBlock getSingle(); 8 | public List getPossibilities(); 9 | 10 | // the smallest index of any of the HashBlocks in getPossibilities 11 | public int getStartIndex(); 12 | 13 | // the largest index any of the HashBlocks in getPossibilities 14 | public int getEndIndex(); 15 | 16 | // the smallest length of any of the HashBlocks in getPossibilities 17 | public int getMinLength(); 18 | 19 | public String toString(Sequence sequence); 20 | 21 | public IMultiHashBlock withEnd(int endIndex); 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/mapper/IndelSummarizer.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Map; 6 | 7 | // Summarizes the penalties of alignments into a histogram 8 | public class IndelSummarizer implements AlignmentListener { 9 | public IndelSummarizer() { 10 | this.extensionCounts = new ArrayList(); 11 | } 12 | 13 | public void addAlignments(List queryAlignments) { 14 | ArrayList additions = new ArrayList(); 15 | 16 | for (QueryAlignments alignments: queryAlignments) { 17 | for (Map.Entry> foundAlignments: alignments.getAlignments().entrySet()) { 18 | List alignment = foundAlignments.getValue(); 19 | Query query = foundAlignments.getKey(); 20 | if (alignment.size() > 0) { 21 | QueryAlignment firstAlignment = alignment.get(0); 22 | for (SequenceAlignment component: firstAlignment.getComponents()) { 23 | for (AlignedBlock block: component.getSections()) { 24 | int indelLength = block.getIndelLength(); 25 | if (indelLength > 0) { 26 | while (additions.size() <= indelLength) { 27 | additions.add(0); 28 | } 29 | additions.set(indelLength, additions.get(indelLength) + 1); 30 | } 31 | } 32 | } 33 | } 34 | } 35 | } 36 | addIndels(additions); 37 | } 38 | 39 | private void addIndels(ArrayList indelLengths) { 40 | synchronized(this) { 41 | while (this.extensionCounts.size() < indelLengths.size()) { 42 | this.extensionCounts.add(0); 43 | } 44 | for (int i = 0; i < indelLengths.size(); i++) { 45 | this.extensionCounts.set(i, this.extensionCounts.get(i) + indelLengths.get(i)); 46 | } 47 | } 48 | } 49 | 50 | public double[] getInterestingIndelLengthCounts() { 51 | // compute total 52 | double total = 0; 53 | for (int i = 0; i < this.extensionCounts.size(); i++) { 54 | total += this.extensionCounts.get(i); 55 | } 56 | // find max length that is a significant fraction of the total 57 | int maxInterestingLength = 0; 58 | for (int i = 0; i < this.extensionCounts.size(); i++) { 59 | if (this.extensionCounts.get(i) * 100 >= total) 60 | maxInterestingLength = i + 1; 61 | } 62 | // also show the next length so the caller can know that we have more data 63 | if (maxInterestingLength + 1 < this.extensionCounts.size()) { 64 | maxInterestingLength++; 65 | } 66 | 67 | // return results 68 | double[] results = new double[maxInterestingLength]; 69 | for (int i = 0; i < results.length; i++) { 70 | results[i] = this.extensionCounts.get(i); 71 | } 72 | return results; 73 | } 74 | 75 | ArrayList extensionCounts; 76 | } 77 | -------------------------------------------------------------------------------- /src/main/java/mapper/LocalAligner.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | // A LocalAligner aligns a query to a small section of the reference, including finding indels 4 | interface LocalAligner { 5 | SequenceAlignment align(SequenceSection querySection, SequenceSection referenceSection, AlignmentParameters parameters, AlignmentAnalysis alignmentAnalysis); 6 | void setLogger(Logger logger); 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/mapper/Logger.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public class Logger { 4 | public static Logger NoOpLogger = new Logger(null, 1, 0); 5 | 6 | public Logger(TextWriter writer) { 7 | this.writer = writer; 8 | this.maxEnabledDepth = Integer.MAX_VALUE; 9 | this.initialize(); 10 | } 11 | 12 | public Logger(TextWriter writer, int depth, int maxEnabledDepth) { 13 | this.writer = writer; 14 | this.depth = depth; 15 | this.maxEnabledDepth = maxEnabledDepth; 16 | this.initialize(); 17 | } 18 | 19 | private void initialize() { 20 | this.prefix = this.computePrefix(this.depth); 21 | this.enabled = (this.depth <= this.maxEnabledDepth); 22 | } 23 | 24 | public void log(String message) { 25 | if (this.enabled) { 26 | this.writer.write(this.prefix + message.replace("\n", "\n" + this.prefix)); 27 | } else { 28 | throw new IllegalArgumentException("Called log() on a disabled (depth = " + depth + ", maxEnabledDepth = " + maxEnabledDepth + ") with message '" + message + "'"); 29 | } 30 | } 31 | 32 | public boolean getEnabled() { 33 | return this.enabled; 34 | } 35 | 36 | public void flush() { 37 | this.writer.flush(); 38 | } 39 | 40 | public Logger incrementScope() { 41 | return new Logger(this.writer, this.depth + 1, this.maxEnabledDepth); 42 | } 43 | 44 | public Logger withWriter(TextWriter writer) { 45 | return new Logger(writer, this.depth, this.maxEnabledDepth); 46 | } 47 | 48 | private String computePrefix(int depth) { 49 | StringBuilder builder = new StringBuilder(); 50 | for (int i = 0; i < depth; i++) { 51 | builder.append(" "); 52 | } 53 | return builder.toString(); 54 | } 55 | 56 | private TextWriter writer; 57 | private int depth; 58 | private String prefix; 59 | private int maxEnabledDepth; 60 | private boolean enabled; 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/mapper/MapperMetadata.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.net.URISyntaxException; 6 | import java.nio.file.Path; 7 | import java.util.List; 8 | import java.util.Properties; 9 | import java.lang.management.ManagementFactory; 10 | 11 | // Returns metadata about Mapper 12 | public class MapperMetadata { 13 | 14 | // The version of Mapper that is running 15 | public static String getVersion() { 16 | Properties properties = new Properties(); 17 | try { 18 | properties.load(MapperMetadata.class.getResourceAsStream("/mapper.properties")); 19 | } catch (IOException e) { 20 | throw new RuntimeException("Failed to get X-Mapper version", e); 21 | } 22 | String version = properties.getProperty("mapper.version", "unknown"); 23 | return version; 24 | } 25 | 26 | // A guess of the command line used to run X-Mapper 27 | // This might not get the correct filepath for java 28 | // This might not get exactly the correct filepath of the X-Mapper jar 29 | public static String guessCommandLine() { 30 | String[] mainArguments = getMainArguments(); 31 | if (mainArguments == null) 32 | return "unknown"; 33 | 34 | String javaArgumentsString = String.join(" ", getJavaArguments()); 35 | Path mapperJarPath = getXMapperPath(); 36 | Path workingDirPath = new File(".").toPath(); 37 | 38 | Path simplifiedXMapperPath = simplifyPath(mapperJarPath, workingDirPath); 39 | 40 | String mainArgumentsString = String.join(" ", mainArguments); 41 | return "java " + javaArgumentsString + " -jar " + simplifiedXMapperPath + " " + mainArgumentsString; 42 | } 43 | 44 | private static Path getXMapperPath() { 45 | try { 46 | File mapperJar = new File(MapperMetadata.class.getProtectionDomain().getCodeSource().getLocation().toURI()); 47 | return mapperJar.toPath(); 48 | } catch (URISyntaxException e) { 49 | throw new RuntimeException(e); 50 | } 51 | } 52 | 53 | // Returns a simpler path for destPath given a working directory of workingDir 54 | public static Path simplifyPath(Path destPath, Path workingDir) { 55 | Path relativePath = workingDir.toAbsolutePath().relativize(destPath.toAbsolutePath()); 56 | // Return relative path if it's shorter 57 | if (relativePath.toString().length() <= destPath.toString().length()) { 58 | return relativePath; 59 | } 60 | return destPath; 61 | } 62 | 63 | public static List getJavaArguments() { 64 | return ManagementFactory.getRuntimeMXBean().getInputArguments(); 65 | } 66 | 67 | public static String[] getMainArguments() { 68 | return mainArguments; 69 | } 70 | public static void setMainArguments(String[] arguments) { 71 | mainArguments = arguments; 72 | } 73 | 74 | private static String[] mainArguments; 75 | 76 | } 77 | -------------------------------------------------------------------------------- /src/main/java/mapper/MatchDatabase.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | // Stores a collection of alignedBlocks 9 | public class MatchDatabase implements AlignmentListener { 10 | public MatchDatabase(double queryEndFraction) { 11 | this.queryEndFraction = queryEndFraction; 12 | this.alignmentsBySequence = new HashMap(); 13 | } 14 | 15 | // Adds the given QueryAlignment's to this 16 | public void addAlignments(List alignments) { 17 | Map> alignmentsByReference = this.groupByReference(alignments); 18 | 19 | List recipients = new ArrayList(); 20 | for (Map.Entry> job: alignmentsByReference.entrySet()) { 21 | Sequence reference = job.getKey(); 22 | Alignments alignmentsHere; 23 | synchronized (this.alignmentsBySequence) { 24 | alignmentsHere = this.getOrCreateAlignments(reference); 25 | } 26 | recipients.add(alignmentsHere); 27 | alignmentsHere.add(job.getValue()); 28 | } 29 | for (Alignments recipient: recipients) { 30 | recipient.offerProcess(); 31 | } 32 | } 33 | 34 | private Map> groupByReference(List allAlignments) { 35 | Map> alignmentsByReference = new HashMap>(); 36 | 37 | for (QueryAlignments alignments: allAlignments) { 38 | for (Map.Entry> entry : alignments.getAlignments().entrySet()) { 39 | List queryAlignments = entry.getValue(); 40 | if (queryAlignments.size() > 0) { 41 | float weight = (float)1.0 / (float)queryAlignments.size(); 42 | for (QueryAlignment queryAlignment: queryAlignments) { 43 | for (SequenceAlignment alignment: queryAlignment.getComponents()) { 44 | List blocks = alignment.getSections(); 45 | if (blocks.size() > 0) { 46 | Sequence reference = blocks.get(0).getSequenceB(); 47 | List alignmentsOnThisRef = alignmentsByReference.get(reference); 48 | if (alignmentsOnThisRef == null) { 49 | alignmentsOnThisRef = new ArrayList(); 50 | alignmentsByReference.put(reference, alignmentsOnThisRef); 51 | } 52 | alignmentsOnThisRef.add(new WeightedAlignment(alignment, queryAlignment, weight)); 53 | } 54 | } 55 | } 56 | } 57 | } 58 | } 59 | return alignmentsByReference; 60 | } 61 | 62 | 63 | // Map from name of sequence to Alignments on that sequence 64 | public Map groupByPosition() { 65 | return alignmentsBySequence; 66 | } 67 | 68 | private Alignments getOrCreateAlignments(Sequence reference) { 69 | Alignments alignments = alignmentsBySequence.get(reference); 70 | if (alignments == null) { 71 | alignments = new Alignments(reference, this.queryEndFraction); 72 | alignmentsBySequence.put(reference, alignments); 73 | } 74 | return alignments; 75 | } 76 | 77 | private Map alignmentsBySequence; 78 | private double queryEndFraction; 79 | } 80 | -------------------------------------------------------------------------------- /src/main/java/mapper/MultiHashBlock.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | // A MultiHashBlock represents several alternate HashBlocks 7 | // The reason for this is if a sequence contains ambiguities then the sequence really refers to multiple possible series of base pairs 8 | // A MultiHashBlock refers to a section of HashBlocks for each possible set of underlying base pairs 9 | public class MultiHashBlock implements IMultiHashBlock { 10 | public MultiHashBlock(List possibilities) { 11 | this.possibilities = possibilities; 12 | } 13 | 14 | // used by IMultiHashBlock 15 | public HashBlock getSingle() { 16 | return null; 17 | } 18 | public List getPossibilities() { 19 | return this.possibilities; 20 | } 21 | public int getStartIndex() { 22 | int min = -1; 23 | for (ConditionalHashBlock possibility : this.possibilities) { 24 | HashBlock block = possibility.getHashBlock(); 25 | if (block != null) { 26 | int value = block.getStartIndex(); 27 | if (min < 0 || min > value) 28 | min = value; 29 | } 30 | } 31 | return min; 32 | } 33 | 34 | public int getEndIndex() { 35 | int max = -1; 36 | for (ConditionalHashBlock possibility : this.possibilities) { 37 | HashBlock block = possibility.getHashBlock(); 38 | if (block != null) { 39 | int value = block.getEndIndex(); 40 | if (max < value) 41 | max = value; 42 | } 43 | } 44 | return max; 45 | } 46 | 47 | public int getMinLength() { 48 | int min = -1; 49 | for (ConditionalHashBlock possibility : this.possibilities) { 50 | HashBlock block = possibility.getHashBlock(); 51 | if (block != null) { 52 | int value = block.getLength(); 53 | if (min < 0 || min > value) 54 | min = value; 55 | } 56 | } 57 | return min; 58 | } 59 | 60 | public IMultiHashBlock withEnd(int index) { 61 | int shift = index - this.getEndIndex(); 62 | List shiftedPossibilities = new ArrayList(); 63 | for (ConditionalHashBlock possibility : possibilities) { 64 | shiftedPossibilities.add(possibility.shifted(shift)); 65 | } 66 | return new MultiHashBlock(shiftedPossibilities); 67 | } 68 | 69 | public String toString(Sequence sequence) { 70 | StringBuilder builder = new StringBuilder(); 71 | builder.append("("); 72 | boolean first = true; 73 | for (ConditionalHashBlock conditional : this.possibilities) { 74 | if (first) { 75 | first = false; 76 | } else { 77 | builder.append("|"); 78 | } 79 | builder.append(conditional.toString(sequence)); 80 | } 81 | builder.append(")"); 82 | return builder.toString(); 83 | } 84 | private List possibilities; 85 | } 86 | -------------------------------------------------------------------------------- /src/main/java/mapper/MutationDetectionParameters.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public class MutationDetectionParameters { 4 | public static MutationDetectionParameters defaultFilter() { 5 | MutationDetectionParameters result = new MutationDetectionParameters(); 6 | result.minSNPTotalDepth = 5; 7 | result.minSNPDepthFraction = (float)0.9; 8 | 9 | result.minIndelTotalStartDepth = 1; 10 | result.minIndelStartDepthFraction = (float)0.8; 11 | 12 | result.minIndelContinuationTotalDepth = 1; 13 | result.minIndelContinuationDepthFraction = (float)0.7; 14 | 15 | return result; 16 | } 17 | 18 | public static MutationDetectionParameters emptyFilter() { 19 | return new MutationDetectionParameters(); 20 | } 21 | 22 | public float minSNPTotalDepth; 23 | public float minSNPDepthFraction; 24 | 25 | public float minIndelTotalStartDepth; 26 | public float minIndelStartDepthFraction; 27 | 28 | public float minIndelContinuationTotalDepth; 29 | public float minIndelContinuationDepthFraction; 30 | 31 | public boolean supportsSNP(float mutationDepth, float totalDepth) { 32 | if (totalDepth < this.minSNPTotalDepth) 33 | return false; 34 | if (mutationDepth <= 0) 35 | return false; 36 | float mutationFraction = mutationDepth / totalDepth; 37 | if (mutationFraction < this.minSNPDepthFraction) 38 | return false; 39 | return true; 40 | } 41 | 42 | public boolean supportsIndelStart(AlignmentPosition frequencies) { 43 | float middleDepth = frequencies.getMiddleCount(); 44 | if (middleDepth < this.minIndelTotalStartDepth) 45 | return false; 46 | float middleIndelDepth; 47 | float endIndelDepth; 48 | if (frequencies.getReference() == '-') { 49 | middleIndelDepth = middleDepth - frequencies.getMiddleReferenceCount(); 50 | endIndelDepth = frequencies.getEndCount() - frequencies.getEndReferenceCount(); 51 | } else { 52 | middleIndelDepth = frequencies.getMiddleAlternateCount('-'); 53 | endIndelDepth = frequencies.getEndAlternateCount('-'); 54 | } 55 | if (middleIndelDepth <= 0 && endIndelDepth <= 0) 56 | return false; 57 | float indelFraction = middleIndelDepth / middleDepth; 58 | if (indelFraction < this.minIndelStartDepthFraction) 59 | return false; 60 | return true; 61 | } 62 | 63 | public boolean supportsIndelContinuation(AlignmentPosition frequencies) { 64 | float middleDepth = frequencies.getMiddleCount(); 65 | if (middleDepth < this.minIndelContinuationTotalDepth) 66 | return false; 67 | 68 | float middleIndelDepth; 69 | float endIndelDepth; 70 | if (frequencies.getReference() == '-') { 71 | middleIndelDepth = middleDepth - frequencies.getMiddleReferenceCount(); 72 | endIndelDepth = frequencies.getEndCount() - frequencies.getEndReferenceCount(); 73 | } else { 74 | middleIndelDepth = frequencies.getMiddleAlternateCount('-'); 75 | endIndelDepth = frequencies.getEndAlternateCount('-'); 76 | } 77 | 78 | if (middleIndelDepth <= 0 && endIndelDepth <= 0) 79 | return false; 80 | 81 | float indelFraction = middleIndelDepth / middleDepth; 82 | if (indelFraction < this.minIndelContinuationDepthFraction) 83 | return false; 84 | return true; 85 | } 86 | 87 | 88 | } 89 | -------------------------------------------------------------------------------- /src/main/java/mapper/MutationsFormatRequest.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | class MutationsFormatRequest { 4 | public MutationsFormatRequest(Sequence sequence, int startIndex, int length, FilteredAlignments alignments, int jobIndex) { 5 | this.sequence = sequence; 6 | this.startIndex = startIndex; 7 | this.length = length; 8 | this.alignments = alignments; 9 | this.jobIndex = jobIndex; 10 | } 11 | 12 | public Sequence sequence; 13 | public int startIndex; 14 | public int length; 15 | public FilteredAlignments alignments; 16 | public int jobIndex; 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/mapper/OrderingUtils.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | class OrderingUtils { 4 | static SequencePosition[] orderDeterministically(SequencePosition[] items) { 5 | // handle base cases 6 | if (items == null) 7 | return null; 8 | if (items.length < 2) 9 | return items; 10 | // allocate results array 11 | SequencePosition[] results = new SequencePosition[items.length]; 12 | 13 | // hash items into bins and count collisions 14 | int numCollisions = 0; 15 | for (int i = 0; i < items.length; i++) { 16 | SequencePosition item = items[i]; 17 | int index = chooseIndex(items[i], items.length); 18 | SequencePosition existing = results[index]; 19 | if (existing == null) { 20 | results[index] = item; 21 | } else { 22 | if (item.compareTo(existing) > 0) { 23 | results[index] = item; 24 | } 25 | numCollisions++; 26 | } 27 | } 28 | 29 | if (numCollisions < 1) 30 | return results; 31 | 32 | // identify items that we didn't have space for 33 | int collisionIndex = 0; 34 | SequencePosition[] collisions = new SequencePosition[numCollisions]; 35 | for (int i = 0; i < items.length; i++) { 36 | SequencePosition item = items[i]; 37 | int index = chooseIndex(items[i], items.length); 38 | if (results[index] != item) { 39 | collisions[collisionIndex] = item; 40 | collisionIndex++; 41 | } 42 | } 43 | SequencePosition[] deterministicCollisions = orderDeterministically(collisions); 44 | 45 | // put leftovers back into the results array 46 | int writeIndex = 0; 47 | for (int i = 0; i < deterministicCollisions.length; i++) { 48 | while (results[writeIndex] != null) { 49 | writeIndex++; 50 | } 51 | results[writeIndex] = deterministicCollisions[i]; 52 | } 53 | 54 | return results; 55 | } 56 | 57 | static int chooseIndex(SequencePosition item, int numItems) { 58 | return item.getStartIndex() % numItems; 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/mapper/OverriddenSequence.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.HashMap; 4 | import java.util.Map; 5 | 6 | // An OverriddenSequence is a sequence that is based on another Sequence plus some changes 7 | public class OverriddenSequence extends Sequence { 8 | public OverriddenSequence(Sequence original, String name) { 9 | super(name, null, original.getLength(), original.getPath()); 10 | this.original = original; 11 | } 12 | 13 | /*public void put(Integer offset, char value) { 14 | byte encoded = Basepairs.encode(value); 15 | this.putEncoded(offset, encoded); 16 | }*/ 17 | 18 | public void putEncoded(Integer offset, Byte value) { 19 | Byte existingOverride = this.overrides.get(offset); 20 | if (existingOverride != null) { 21 | throw new IllegalArgumentException("Cannot override " + this.getName() + "[" + offset + "] to " + Basepairs.decode(value) + " because it is already overridden to " + Basepairs.decode(existingOverride)); 22 | } 23 | //System.err.println("overriding " + this.getName() + "[" + offset + "] = " + Basepairs.decode(value)); 24 | 25 | this.overrides.put(offset, value); 26 | } 27 | 28 | @Override 29 | protected byte computeEncodedCharAt(int index) { 30 | Byte overridden = this.overrides.get(index); 31 | if (overridden != null) 32 | return overridden; 33 | return this.original.encodedCharAt(index); 34 | } 35 | 36 | private Sequence original; 37 | private Map overrides = new HashMap(); 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/mapper/PackJob.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | 5 | // a PackJob specifies that there are some HashBlocks to add to a PackedMap 6 | public class PackJob { 7 | public PackJob(Sequence sequence, List blocks, boolean preventDuplicates) { 8 | this.sequence = sequence; 9 | this.blocks = blocks; 10 | this.preventDuplicates = preventDuplicates; 11 | } 12 | 13 | public Sequence sequence; 14 | public List blocks; 15 | public boolean preventDuplicates; 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/mapper/PairedEndQueryProvider.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | // A PairedEndQueryProvider generates Query objects from Illumina-style paired-end reads 7 | public class PairedEndQueryProvider implements QueryProvider { 8 | public PairedEndQueryProvider(SequenceProvider leftsProvider, SequenceProvider rightsProvider, double expectedInnerDistance, double spacingDeviationPerUnitPenalty) { 9 | this.sequenceProviders = new ArrayList(1); 10 | this.sequenceProviders.add(leftsProvider); 11 | this.sequenceProviders.add(rightsProvider); 12 | this.expectedInnerDistance = expectedInnerDistance; 13 | this.spacingDeviationPerUnitPenalty = spacingDeviationPerUnitPenalty; 14 | } 15 | 16 | public QueryBuilder getNextQueryBuilder() { 17 | List components = new ArrayList(this.sequenceProviders.size()); 18 | boolean anyNull = false; 19 | for (SequenceProvider provider : this.sequenceProviders) { 20 | SequenceBuilder builder = provider.getNextSequence(); 21 | if (builder == null) { 22 | anyNull = true; 23 | } 24 | components.add(builder); 25 | } 26 | if (anyNull) { 27 | if (components.get(0) == null && components.get(1) == null) { 28 | // both query providers ended after the same number of queries 29 | return null; 30 | } 31 | // one query provider ended and the other didn't 32 | int nullIndex; 33 | if (components.get(0) == null) 34 | nullIndex = 0; 35 | else 36 | nullIndex = 1; 37 | int nonNullIndex = 1 - nullIndex; 38 | SequenceProvider completedQueries = sequenceProviders.get(nullIndex); 39 | SequenceProvider remainingQueries = sequenceProviders.get(nonNullIndex); 40 | 41 | throw new IllegalArgumentException("" + remainingQueries + " has more queries than " + completedQueries + "!"); 42 | } 43 | // Choose an upper bound on the maximum possible offset between sequences 44 | return new QueryBuilder(components, this.expectedInnerDistance, this.spacingDeviationPerUnitPenalty); 45 | } 46 | 47 | @Override 48 | public String toString() { 49 | return "paired queries: " + this.sequenceProviders.get(0).toString() + ", " + this.sequenceProviders.get(1).toString(); 50 | } 51 | 52 | public boolean get_allReadsContainQualityInformation() { 53 | for (SequenceProvider sequenceProvider : this.sequenceProviders) { 54 | if (!sequenceProvider.get_allReadsContainQualityInformation()) { 55 | return false; 56 | } 57 | } 58 | return true; 59 | } 60 | 61 | public boolean get_containsPairedEndReads() { 62 | return true; 63 | } 64 | 65 | private List sequenceProviders; 66 | private double expectedInnerDistance; 67 | private double spacingDeviationPerUnitPenalty; 68 | } 69 | -------------------------------------------------------------------------------- /src/main/java/mapper/PathAligner_Runner.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | import java.util.PriorityQueue; 8 | import java.util.TreeSet; 9 | 10 | // A PathAligner aligns two sequences by exploring possible alignments mostly along a path 11 | public class PathAligner_Runner implements LocalAligner { 12 | public PathAligner_Runner() { 13 | } 14 | 15 | public void setLogger(Logger logger) { 16 | this.logger = logger; 17 | } 18 | 19 | public SequenceAlignment align(SequenceSection querySection, SequenceSection referenceSection, AlignmentParameters parameters, AlignmentAnalysis alignmentAnalysis) { 20 | // make a PathAligner_impl so this can be stateless 21 | return new PathAligner(logger).align(querySection, referenceSection, parameters, alignmentAnalysis); 22 | } 23 | private Logger logger; 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/mapper/PenaltyAnalysis.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public class PenaltyAnalysis { 4 | public double minPossiblePenalty; 5 | 6 | public double maxInsertionExtensionPenalty; 7 | public double maxDeletionExtensionPenalty; 8 | 9 | public int offsetWithMostHashblockMatches; 10 | public int numHashBlockMatchesWithBestOffset; 11 | } 12 | -------------------------------------------------------------------------------- /src/main/java/mapper/PenaltySummarizer.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | import java.util.Map; 5 | 6 | // Summarizes the penalties of alignments into a histogram 7 | public class PenaltySummarizer implements AlignmentListener { 8 | public PenaltySummarizer(AlignmentParameters alignmentParameters) { 9 | this.counts = new int[20]; 10 | this.alignmentParameters = alignmentParameters; 11 | } 12 | 13 | public void addAlignments(List queryAlignments) { 14 | int[] additions = new int[this.counts.length]; 15 | for (QueryAlignments alignments: queryAlignments) { 16 | for (Map.Entry> foundAlignments: alignments.getAlignments().entrySet()) { 17 | List alignment = foundAlignments.getValue(); 18 | Query query = foundAlignments.getKey(); 19 | if (alignment.size() > 0) { 20 | QueryAlignment firstAlignment = alignment.get(0); 21 | double penalty = firstAlignment.getPenalty(); 22 | double maxAllowedPenalty = query.getLength() * this.alignmentParameters.MaxErrorRate; 23 | if (maxAllowedPenalty == 0) 24 | maxAllowedPenalty = 1; 25 | double penaltyFraction = penalty / maxAllowedPenalty; 26 | int binIndex = (int)((double)penaltyFraction * (double)this.counts.length); 27 | if (binIndex < additions.length) 28 | additions[binIndex]++; 29 | } 30 | } 31 | } 32 | this.add(additions); 33 | } 34 | 35 | public double[] getCounts() { 36 | double[] results = new double[this.counts.length]; 37 | for (int i = 0; i < this.counts.length; i++) { 38 | results[i] = this.counts[i]; 39 | } 40 | return results; 41 | } 42 | 43 | private void add(int[] additions) { 44 | synchronized(this) { 45 | for (int i = 0; i < additions.length; i++) { 46 | this.counts[i] += additions[i]; 47 | } 48 | } 49 | } 50 | 51 | int[] counts; 52 | AlignmentParameters alignmentParameters; 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/mapper/QueriesIterator.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | 5 | // A QueriesIterator provides a list of queries 6 | public class QueriesIterator implements QueryProvider { 7 | public QueriesIterator(List providers) { 8 | this.providers = providers; 9 | } 10 | 11 | public QueryBuilder getNextQueryBuilder() { 12 | while (this.nextIndex < this.providers.size()) { 13 | QueryBuilder next = this.providers.get(this.nextIndex).getNextQueryBuilder(); 14 | if (next != null) { 15 | return next; 16 | } 17 | this.nextIndex++; 18 | } 19 | return null; 20 | } 21 | 22 | public boolean get_allReadsContainQualityInformation() { 23 | for (QueryProvider provider : this.providers) { 24 | if (!provider.get_allReadsContainQualityInformation()) { 25 | return false; 26 | } 27 | } 28 | return true; 29 | } 30 | 31 | public boolean get_containsPairedEndReads() { 32 | for (QueryProvider provider : this.providers) { 33 | if (provider.get_containsPairedEndReads()) { 34 | return true; 35 | } 36 | } 37 | return false; 38 | } 39 | 40 | int nextIndex; 41 | List providers; 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/mapper/Query.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | // A Query is a question that can be asked about where some sequences align 7 | // It can model a single sequence or can model Illumina-style paired-end reads 8 | public class Query { 9 | public Query(Sequence sequence) { 10 | this.sequences = new ArrayList(1); 11 | this.sequences.add(sequence); 12 | this.spacingDeviationPerUnitPenalty = 1; 13 | } 14 | 15 | public Query(Sequence forward, Sequence reverse, double expectedInnerDistance, double spacingDeviationPerUnitPenalty) { 16 | this.sequences = new ArrayList(2); 17 | this.sequences.add(forward); 18 | this.sequences.add(reverse); 19 | this.expectedInnerDistance = expectedInnerDistance; 20 | this.spacingDeviationPerUnitPenalty = spacingDeviationPerUnitPenalty; 21 | } 22 | 23 | public Query(List sequences, double expectedInnerDistance, double spacingDeviationPerUnitPenalty) { 24 | this.sequences = sequences; 25 | this.maxOffset = maxOffset; 26 | this.expectedInnerDistance = expectedInnerDistance; 27 | this.spacingDeviationPerUnitPenalty = spacingDeviationPerUnitPenalty; 28 | } 29 | 30 | public Query subquery(int index) { 31 | Query subquery = new Query(this.sequences.get(index)); 32 | subquery.expectedInnerDistance = this.expectedInnerDistance; 33 | subquery.spacingDeviationPerUnitPenalty = this.spacingDeviationPerUnitPenalty; 34 | return subquery; 35 | } 36 | 37 | public List getSequences() { 38 | return this.sequences; 39 | } 40 | 41 | public Sequence getSequence(int index) { 42 | return this.sequences.get(index); 43 | } 44 | 45 | public int getNumSequences() { 46 | return this.sequences.size(); 47 | } 48 | 49 | public double getExpectedInnerDistance() { 50 | return this.expectedInnerDistance; 51 | } 52 | 53 | public double getSpacingDeviationPerUnitPenalty() { 54 | return this.spacingDeviationPerUnitPenalty; 55 | } 56 | 57 | public long getId() { 58 | return this.sequences.get(0).getId(); 59 | } 60 | 61 | public int getLength() { 62 | int total = 0; 63 | for (Sequence sequence: this.sequences) { 64 | total += sequence.getLength(); 65 | } 66 | return total; 67 | } 68 | 69 | public String format() { 70 | int totalSize = 0; 71 | for (Sequence sequence : this.sequences) { 72 | totalSize += sequence.getLength(); 73 | } 74 | if (totalSize > 1000) { 75 | return "[" + this.sequences.size() + " sequences totalling " + totalSize + " base pairs]"; 76 | } 77 | 78 | StringBuilder builder = new StringBuilder(); 79 | for (int i = 0; i < this.sequences.size(); i++) { 80 | Sequence sequence = this.sequences.get(i); 81 | builder.append(sequence.getText()); 82 | if (i < this.sequences.size() - 1) { 83 | builder.append(" / "); 84 | } 85 | } 86 | return builder.toString(); 87 | } 88 | 89 | public void compress() { 90 | for (Sequence sequence: this.sequences) { 91 | sequence.compress(); 92 | } 93 | } 94 | 95 | public void decompress() { 96 | for (Sequence sequence: this.sequences) { 97 | sequence.decompress(); 98 | } 99 | } 100 | 101 | @Override 102 | public String toString() { 103 | StringBuilder builder = new StringBuilder(); 104 | for (Sequence sequence : this.sequences) { 105 | builder.append(sequence.getText() + " "); 106 | } 107 | return builder.toString(); 108 | } 109 | 110 | @Override 111 | public int hashCode() { 112 | int hash = 0; 113 | for (Sequence sequence: sequences) { 114 | hash *= 13; 115 | hash += sequence.getContentHash(); 116 | } 117 | return hash; 118 | } 119 | 120 | @Override 121 | public boolean equals(Object otherObject) { 122 | Query other = (Query)otherObject; 123 | if (other.sequences.size() != sequences.size()) 124 | return false; 125 | for (int i = 0; i < sequences.size(); i++) { 126 | if (!sequences.get(i).textEquals(other.sequences.get(i))) 127 | return false; 128 | } 129 | if (maxOffset != other.maxOffset) 130 | return false; 131 | if (expectedInnerDistance != other.expectedInnerDistance) 132 | return false; 133 | if (spacingDeviationPerUnitPenalty != other.spacingDeviationPerUnitPenalty) 134 | return false; 135 | //System.err.println("Equal queries: " + this.toString() + " and " + other.toString()); 136 | return true; 137 | } 138 | 139 | private List sequences; 140 | private int maxOffset; // max distance betweeen where the sequences can be aligned 141 | private double expectedInnerDistance; 142 | private double spacingDeviationPerUnitPenalty; 143 | } 144 | -------------------------------------------------------------------------------- /src/main/java/mapper/QueryAlignments.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.LinkedHashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | // A QueryAlignments lists the places where a Query may align 9 | // It's mostly a List with some extra metadata about whether we were able to align each Sequence 10 | public class QueryAlignments { 11 | 12 | public QueryAlignments(Query query, QueryAlignment alignment) { 13 | this.alignments = new LinkedHashMap>(1); 14 | List queryAlignments = new ArrayList(1); 15 | queryAlignments.add(alignment); 16 | this.alignments.put(query, queryAlignments); 17 | } 18 | 19 | public QueryAlignments(Map> alignments) { 20 | this.alignments = alignments; 21 | } 22 | 23 | public QueryAlignments(Query query, List alignments) { 24 | this.alignments = new LinkedHashMap>(1); 25 | this.alignments.put(query, alignments); 26 | } 27 | 28 | public QueryAlignments(Query query) { 29 | this.alignments = new LinkedHashMap>(1); 30 | this.alignments.put(query, new ArrayList(0)); 31 | } 32 | 33 | public Map> getAlignments() { 34 | return this.alignments; 35 | } 36 | 37 | public List getAlignmentsForQuery(Query query) { 38 | List result = this.alignments.get(query); 39 | if (result == null) 40 | result = new ArrayList(0); 41 | return result; 42 | } 43 | 44 | public int getTotalOfAllComponents() { 45 | int total = 0; 46 | for (List value: this.alignments.values()) { 47 | total += value.size(); 48 | } 49 | return total; 50 | } 51 | 52 | // Returns the number of subqueries that this alignment represents 53 | // If our query wasn't a paired-end read, this number should be 1 54 | // If our query was a paired-end read: 55 | // If neither mate aligned, this number should be 1 56 | // If both mates aligned together, this number should be 1 57 | // If one query aligned and one didn't, this number should be 2 58 | // If both aligned to different places, this number should be 2 59 | public int getNumQueries() { 60 | return alignments.size(); 61 | } 62 | 63 | // Returns the number of queries for which we found an alignment. 64 | // If our query wasn't a paired-end read: 65 | // this number should be 0 (unaligned) or 1 (aligned) 66 | // If our query was a paired-end read: 67 | // If neither mate aligned anywhere, this should return 0 68 | // If both mates aligned together, this should return 1 69 | // If one mate aligned and one didn't, this should return 1 70 | // If both mates aligned but not together, this should return 2 71 | public int getNumQueriesHavingAlignments() { 72 | int count = 0; 73 | for (Map.Entry> entry: this.alignments.entrySet()) { 74 | if (entry.getValue().size() > 0) { 75 | count++; 76 | } 77 | } 78 | return count; 79 | } 80 | 81 | public Query getFirstQuery() { 82 | for (Query query: this.alignments.keySet()) { 83 | return query; 84 | } 85 | return null; 86 | } 87 | 88 | public List getFirstAlignments() { 89 | for (List components: this.alignments.values()) { 90 | return components; 91 | } 92 | return null; 93 | } 94 | 95 | private Map> alignments; 96 | } 97 | -------------------------------------------------------------------------------- /src/main/java/mapper/QueryBuilder.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | public class QueryBuilder { 7 | 8 | public QueryBuilder(SequenceBuilder sequenceProvider) { 9 | this.sequenceProviders = new ArrayList(1); 10 | this.sequenceProviders.add(sequenceProvider); 11 | } 12 | 13 | public QueryBuilder(List sequenceProviders, double expectedInnerDistance, double spacingDeviationPerUnitPenalty) { 14 | this.sequenceProviders = sequenceProviders; 15 | this.maxOffset = maxOffset; 16 | this.expectedInnerDistance = expectedInnerDistance; 17 | this.spacingDeviationPerUnitPenalty = spacingDeviationPerUnitPenalty; 18 | } 19 | 20 | public Query build() { 21 | List sequences = new ArrayList(this.sequenceProviders.size()); 22 | boolean odd = false; 23 | for (SequenceBuilder sequenceProvider : this.sequenceProviders) { 24 | sequences.add(sequenceProvider.build()); 25 | } 26 | return new Query(sequences, this.expectedInnerDistance, this.spacingDeviationPerUnitPenalty); 27 | } 28 | 29 | public void setId(long id) { 30 | for (SequenceBuilder builder : this.sequenceProviders) { 31 | builder.setId(id); 32 | } 33 | } 34 | 35 | public int getLength() { 36 | int total = 0; 37 | for (SequenceBuilder builder : this.sequenceProviders) { 38 | total += builder.getLength(); 39 | } 40 | return total; 41 | } 42 | 43 | private List sequenceProviders; 44 | private int maxOffset; 45 | private int id; 46 | private double expectedInnerDistance; 47 | private double spacingDeviationPerUnitPenalty; 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/mapper/QueryMatch.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | public class QueryMatch { 7 | public QueryMatch(SequenceMatch component, int priority) { 8 | this.components = new ArrayList(1); 9 | this.components.add(component); 10 | this.priority = priority; 11 | } 12 | 13 | public QueryMatch(List components, int priority, boolean hintCheckComponentsInForwardOrder) { 14 | this.components = components; 15 | this.priority = priority; 16 | this.hintCheckComponentsInForwardOrder = hintCheckComponentsInForwardOrder; 17 | } 18 | 19 | public QueryMatch(SequenceMatch component1, SequenceMatch component2, int priority, boolean hintCheckComponentsInForwardOrder) { 20 | this.components = new ArrayList(2); 21 | this.components.add(component1); 22 | this.components.add(component2); 23 | this.priority = priority; 24 | this.hintCheckComponentsInForwardOrder = hintCheckComponentsInForwardOrder; 25 | } 26 | 27 | public List getComponents() { 28 | return this.components; 29 | } 30 | public SequenceMatch getComponent(int index) { 31 | return this.components.get(index); 32 | } 33 | public int getNumSequences() { 34 | return this.components.size(); 35 | } 36 | public int getPriority() { 37 | return this.priority; 38 | } 39 | 40 | public int getQueryTotalLength() { 41 | int total = 0; 42 | for (SequenceMatch match: this.components) { 43 | total += match.getSequenceA().getLength(); 44 | } 45 | return total; 46 | } 47 | 48 | public int getStartIndexB() { 49 | SequenceMatch last = this.components.get(this.components.size() - 1); 50 | SequenceMatch first = this.components.get(0); 51 | return Math.min(first.getStartIndexB(), last.getStartIndexB()); 52 | } 53 | 54 | public int getEndIndexB() { 55 | SequenceMatch last = this.components.get(this.components.size() - 1); 56 | SequenceMatch first = this.components.get(0); 57 | return Math.max(first.getStartIndexB(), last.getStartIndexB()); 58 | } 59 | 60 | public int getTotalDistanceAcross() { 61 | SequenceMatch last = this.components.get(this.components.size() - 1); 62 | SequenceMatch first = this.components.get(0); 63 | if (this.getReversed()) 64 | return first.getEndIndexB() - last.getStartIndexB(); 65 | else 66 | return last.getEndIndexB() - first.getStartIndexB(); 67 | } 68 | 69 | // Returns the total inner distance between subsequent pairs of components 70 | public int getTotalDistanceBetweenComponents() { 71 | int totalDistance = 0; 72 | SequenceMatch previousComponent = this.components.get(0); 73 | for (int i = 1; i < this.components.size(); i++) { 74 | SequenceMatch currentComponent = this.components.get(i); 75 | totalDistance += getDistance(previousComponent, currentComponent); 76 | previousComponent = currentComponent; 77 | } 78 | return totalDistance; 79 | } 80 | 81 | public boolean samePosition(QueryMatch other) { 82 | if (this.reversed != other.reversed) 83 | return false; 84 | if (this.components.size() != other.components.size()) { 85 | return false; 86 | } 87 | for (int i = 0; i < components.size(); i++) { 88 | if (!components.get(i).equals(other.components.get(i))) { 89 | return false; 90 | } 91 | } 92 | return true; 93 | } 94 | 95 | public String summarizePositionB() { 96 | String result = null; 97 | for (SequenceMatch component: this.components) { 98 | String append = component.summarizePositionB(); 99 | if (result == null) 100 | result = append; 101 | else 102 | result = result + " / " + append; 103 | } 104 | return result; 105 | } 106 | 107 | public boolean get_hintCheckComponentsInForwardOrder() { 108 | return hintCheckComponentsInForwardOrder; 109 | } 110 | 111 | // Returns the distance between the two blocks 112 | // Can return a negative number if they overlap 113 | private int getDistance(SequenceMatch a, SequenceMatch b) { 114 | if (a.getSequenceB() != b.getSequenceB()) 115 | return Integer.MAX_VALUE; 116 | int difference; 117 | if (this.getReversed()) 118 | difference = a.getStartIndexB() - b.getEndIndexB(); 119 | else 120 | difference = b.getStartIndexB() - a.getEndIndexB(); 121 | return difference; 122 | } 123 | 124 | private boolean getReversed() { 125 | return this.components.get(0).getReversed(); 126 | } 127 | 128 | private List components; 129 | private int priority; 130 | private boolean reversed; 131 | private boolean hintCheckComponentsInForwardOrder; 132 | } 133 | -------------------------------------------------------------------------------- /src/main/java/mapper/QueryProvider.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public interface QueryProvider { 4 | QueryBuilder getNextQueryBuilder(); 5 | boolean get_allReadsContainQualityInformation(); 6 | boolean get_containsPairedEndReads(); 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/mapper/RandomMomentSelector.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.Random; 4 | 5 | // Selects a random moment in time among all of the times that it is called 6 | public class RandomMomentSelector { 7 | public RandomMomentSelector() { 8 | this.startTime = System.currentTimeMillis(); 9 | this.random = new Random(); 10 | } 11 | 12 | // Tells whether this moment has been selected. 13 | // If this function returns true, then this moment has been selected and any previous moments have been unselected 14 | public boolean select(long currentTimeMillis) { 15 | long elapsed = currentTimeMillis - this.startTime; 16 | if (elapsed >= this.targetDuration) { 17 | double divisor = random.nextFloat(); 18 | if (divisor <= 0 || divisor >= 1) 19 | divisor = 0.5; 20 | this.targetDuration = elapsed / divisor; 21 | return true; 22 | } 23 | return false; 24 | } 25 | 26 | 27 | private Random random; 28 | private double targetDuration; 29 | private long startTime; 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/mapper/ReadSequence.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | // A ReadSequence is a Sequence that has been read from somewhere 4 | // A ReadSequence can contain quality information and comments 5 | // A ReadSequence represents an entry in a .fastq file 6 | public class ReadSequence extends Sequence { 7 | public ReadSequence(String name, String packedContents, int length, String path) { 8 | super(name, packedContents, length, path); 9 | } 10 | 11 | public String nameSuffix; 12 | public String qualityString; 13 | public String commentString; 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/mapper/Readable_DuplicationDetector.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.Collection; 4 | import java.util.HashSet; 5 | import java.util.Map; 6 | import java.util.Set; 7 | import java.util.TreeMap; 8 | 9 | // detects duplication in a genome 10 | public class Readable_DuplicationDetector { 11 | public Readable_DuplicationDetector(DuplicationDetector source, Logger logger) { 12 | this.duplicationDetector = source; 13 | this.logger = logger; 14 | } 15 | 16 | // Returns interesting duplications on this sequence 17 | // A duplication is considered interesting if it doesn't contain another duplication 18 | public TreeMap getInterestingDuplicationsOnSequence(Sequence sequence) { 19 | this.ensureSetup(); 20 | TreeMap duplicationsHere = this.interestingDuplicationsBySequence.get(sequence); 21 | return duplicationsHere; 22 | } 23 | 24 | // Determines whether it's possible for there to be an interesting duplication in this range 25 | // If so, returns a non-null Duplication 26 | // Usually, the returned Duplication will be in the given range, but not necessarily 27 | // (It's possible that we didn't save the Duplication that is in this range) 28 | public Integer mayContainDuplicationInRange(Sequence sequence, int startIndex, int endIndex) { 29 | int windowStart = duplicationDetector.getWindowNumber(startIndex); 30 | int windowEnd = duplicationDetector.getWindowNumber(endIndex); 31 | TreeMap entriesHere = getInterestingDuplicationsOnSequence(sequence); 32 | if (entriesHere == null) 33 | return null; // no duplications on this sequence 34 | Map.Entry previous = entriesHere.floorEntry(endIndex); 35 | if (previous != null) { 36 | int previousWindow = duplicationDetector.getWindowNumber(previous.getKey()); 37 | if (previousWindow >= windowStart && previousWindow <= windowEnd) 38 | return previous.getKey(); 39 | } 40 | Map.Entry next = entriesHere.ceilingEntry(startIndex); 41 | if (next != null) { 42 | int nextWindow = duplicationDetector.getWindowNumber(next.getKey()); 43 | if (nextWindow >= windowStart && nextWindow <= windowEnd) 44 | return next.getKey(); 45 | } 46 | return null; 47 | } 48 | 49 | public Set getAll() { 50 | this.ensureSetup(); 51 | return this.duplicationDetector.getAll(); 52 | } 53 | 54 | public double getDetectionGranularity() { 55 | return duplicationDetector.getDetectionGranularity(); 56 | } 57 | 58 | public void helpSetup() { 59 | this.ensureSetup(); 60 | } 61 | 62 | private void ensureSetup() { 63 | if (this.interestingDuplicationsBySequence == null) { 64 | this.duplicationDetector.setup(this, this.logger); 65 | } 66 | } 67 | 68 | public void setup(Map> interestingDuplicationsBySequence) { 69 | this.interestingDuplicationsBySequence = interestingDuplicationsBySequence; 70 | } 71 | 72 | DuplicationDetector duplicationDetector; 73 | Map> interestingDuplicationsBySequence; 74 | Logger logger; 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/mapper/ReferenceAlignmentCounter.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | import java.util.TreeSet; 8 | import java.io.BufferedOutputStream; 9 | import java.io.File; 10 | import java.io.FileOutputStream; 11 | import java.io.FileNotFoundException; 12 | import java.io.IOException; 13 | 14 | // Count the number of reads aligned to each reference/genome 15 | public class ReferenceAlignmentCounter implements AlignmentListener { 16 | public ReferenceAlignmentCounter() { 17 | } 18 | 19 | public void addAlignments(List alignments) { 20 | synchronized (this) { 21 | for (QueryAlignments queryAlignments: alignments) { 22 | for (Map.Entry> alignment: queryAlignments.getAlignments().entrySet()) { 23 | List subqueryAlignments = alignment.getValue(); 24 | if (subqueryAlignments.size() > 0) 25 | this.addAlignmentsForQuery(subqueryAlignments); 26 | } 27 | } 28 | } 29 | } 30 | 31 | // Get reference path/filename of each reference sequence that mapped to this query 32 | private void addAlignmentsForQuery(List alignments) { 33 | TreeSet referenceList = new TreeSet(); 34 | for (QueryAlignment queryAlignment: alignments) { 35 | for (SequenceAlignment alignment: queryAlignment.getComponents()) { 36 | for (AlignedBlock block: alignment.getSections()) { 37 | referenceList.add(block.getSequenceB().getPath()); 38 | } 39 | } 40 | } 41 | if (!this.referenceAlignmentCount.containsKey(referenceList)) { 42 | this.referenceAlignmentCount.put(referenceList, 0); 43 | } 44 | this.referenceAlignmentCount.put(referenceList, this.referenceAlignmentCount.get(referenceList) + 1); 45 | } 46 | // Summarize all references/genomes that mapped to this query 47 | public void sumAlignments(String outputPath) throws FileNotFoundException, IOException { 48 | File file = new File(outputPath); 49 | FileOutputStream fileStream = new FileOutputStream(file); 50 | BufferedOutputStream bufferedStream = new BufferedOutputStream(fileStream); 51 | bufferedStream.write("Genome_set\tNo.reads\n".getBytes()); 52 | ArrayList alloutput = new ArrayList(referenceAlignmentCount.size()); 53 | for (TreeSet genomeset : referenceAlignmentCount.keySet()) { 54 | StringBuilder templine = new StringBuilder(); 55 | for (String genome : genomeset){ 56 | templine.append(splitPath(genome) + "-"); 57 | } 58 | templine.append("\t" + referenceAlignmentCount.get(genomeset) + "\n"); 59 | bufferedStream.write(templine.toString().getBytes()); 60 | } 61 | bufferedStream.close(); 62 | fileStream.close(); 63 | } 64 | // Split path and get file filename 65 | public static String splitPath(String pathString) { 66 | return new File(pathString).getName(); 67 | } 68 | private HashMap,Integer> referenceAlignmentCount = new HashMap,Integer>();//genome set, how many reads map to that genome set 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/mapper/ReferenceDatabase.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | // A ReferenceDatabase stores analyses of a set of reference genomes 4 | // A ReferenceDatabase can be created by callers in other projects via the Api class 5 | public class ReferenceDatabase { 6 | 7 | // The functions in this class are not intended to be called by code from other projects. 8 | ReferenceDatabase(HashBlock_Database hashblockDatabase, DuplicationDetector duplicationDetector, AlignmentCache alignmentCache) { 9 | this.hashblockDatabase = hashblockDatabase; 10 | this.duplicationDetector = duplicationDetector; 11 | this.alignmentCache = alignmentCache; 12 | } 13 | 14 | HashBlock_Database hashblockDatabase; 15 | DuplicationDetector duplicationDetector; 16 | AlignmentCache alignmentCache; 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/mapper/ReferenceProvider.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | interface ReferenceProvider { 4 | HashBlock_Database get_HashBlock_database(Logger logger); 5 | boolean getCanUseHelp(); 6 | Sequence getOriginalSequence(Sequence modified); 7 | boolean getEnableGapmers(); 8 | } 9 | -------------------------------------------------------------------------------- /src/main/java/mapper/RegionAlignments.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | // A RegionAlignments tells how parts of Sequences align 9 | // One RegionAlignments handles the middles of the queries and one RegionAlignments handles the ends of the queries 10 | public class RegionAlignments { 11 | public RegionAlignments(Sequence sequence) { 12 | this.forwardAlignments = new DirectionalAlignments(sequence); 13 | this.reverseAlignments = new DirectionalAlignments(sequence); 14 | } 15 | public void addForward(int referenceIndex, byte encodedValue, float weight, Sequence querySequence, int queryPosition) { 16 | this.forwardAlignments.add(referenceIndex, encodedValue, weight, querySequence, queryPosition); 17 | } 18 | public void addReverse(int referenceIndex, byte encodedValue, float weight, Sequence querySequence, int queryPosition) { 19 | this.reverseAlignments.add(referenceIndex, encodedValue, weight, querySequence, queryPosition); 20 | } 21 | public void insertForward(int referenceIndex, String value, float weight, Sequence querySequence, int queryPosition) { 22 | this.forwardAlignments.insert(referenceIndex, value, weight, querySequence, queryPosition); 23 | } 24 | public void insertReverse(int referenceIndex, String value, float weight, Sequence querySequence, int queryPosition) { 25 | this.reverseAlignments.insert(referenceIndex, value, weight, querySequence, queryPosition); 26 | } 27 | 28 | public void updateCount(AlignmentPosition position, int referenceIndex, boolean nearQueryEnd) { 29 | reverseAlignments.updateCount(position, referenceIndex, false, nearQueryEnd); 30 | forwardAlignments.updateCount(position, referenceIndex, true, nearQueryEnd); 31 | } 32 | 33 | public void updateInsertionCount(AlignmentPosition position, int referenceIndex, int insertionIndex, boolean nearQueryEnd) { 34 | forwardAlignments.updateInsertionCount(position, referenceIndex, insertionIndex, true, nearQueryEnd); 35 | reverseAlignments.updateInsertionCount(position, referenceIndex, insertionIndex, false, nearQueryEnd); 36 | } 37 | 38 | private DirectionalAlignments forwardAlignments; 39 | private DirectionalAlignments reverseAlignments; 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/mapper/ReverseComplementSequence.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | class ReverseComplementSequence extends Sequence { 4 | public ReverseComplementSequence(Sequence forward) { 5 | super(forward.getName() + "-rev", null, forward.getLength(), forward.getPath()); 6 | this.complementedFrom = forward; 7 | // The only thing we use the ID for is to break ties between two different queries that were both read from a file, to make sure that the order we process the queries doesn't affect the output 8 | // Also, the reverse complement sequence is essentially a different view of the same sequence anyway 9 | // We give this reverse complement sequence the same ID as the forward sequence because it's convenient 10 | this.setId(forward.getId()); 11 | } 12 | 13 | @Override 14 | protected byte computeEncodedCharAt(int index) { 15 | byte other = complementedFrom.encodedCharAt(this.getLength() - index - 1); 16 | return Basepairs.complement(other); 17 | } 18 | 19 | @Override 20 | public String getSourceName() { 21 | return this.getComplementedFrom().getName(); 22 | } 23 | 24 | @Override 25 | public Sequence getComplementedFrom() { 26 | return this.complementedFrom; 27 | } 28 | 29 | @Override 30 | public Sequence reverseComplement() { 31 | return this.getComplementedFrom(); 32 | } 33 | 34 | private Sequence complementedFrom; 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/mapper/Sequence.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.Arrays; 4 | import java.util.List; 5 | 6 | // A Sequence is a list of genomic base pairs: A,C,G,T 7 | public class Sequence { 8 | public Sequence(String name, String packedContents, int length, String path) { 9 | this.name = name; 10 | this.packedContents = packedContents; 11 | this.length = length; 12 | this.path = path; 13 | } 14 | 15 | // returns the name of this sequence 16 | public String getName() { 17 | return this.name; 18 | } 19 | 20 | // returns the name of the sequence that this sequence came from, or its own name if it didn't come from another sequence 21 | public String getSourceName() { 22 | return this.getName(); 23 | } 24 | 25 | public String getPath() { 26 | return this.path; 27 | } 28 | 29 | // an internal unique identifier that is shorter than getName 30 | public long getId() { 31 | return this.identifier; 32 | } 33 | 34 | public void setId(long identifier) { 35 | this.identifier = identifier; 36 | } 37 | 38 | public String getRange(int startIndex, int count) { 39 | StringBuilder builder = new StringBuilder(); 40 | int endIndex = startIndex + count; 41 | for (int i = startIndex; i < endIndex; i++) { 42 | builder.append(this.charAt(i)); 43 | } 44 | return builder.toString(); 45 | } 46 | 47 | public char charAt(int index) { 48 | byte basePair = encodedCharAt(index); 49 | return Basepairs.decode(basePair); 50 | } 51 | 52 | public byte encodedCharAt(int index) { 53 | byte[] decompressedContents = this.decompressedContents; 54 | if (decompressedContents != null) 55 | return decompressedContents[index]; 56 | return this.computeEncodedCharAt(index); 57 | } 58 | 59 | protected byte computeEncodedCharAt(int index) { 60 | // character index (16 bits per character and 4 bits per basepair, so 4 basepairs per character) 61 | int characterIndex = index >> 2; 62 | if (characterIndex < 0) { 63 | throw new IllegalArgumentException("computeEncodedCharAt(" + index + ") attempting to access encoded character at " + characterIndex); 64 | } 65 | // offset within character (4 bits per basepair and 4 basepairs per character) 66 | int offsetInCharacter = (index & 3) << 2; 67 | 68 | // the character to extract bits from 69 | char character = this.packedContents.charAt(characterIndex); 70 | 71 | // result 72 | byte result = (byte)((character >> offsetInCharacter) & 15); 73 | return result; 74 | } 75 | 76 | public void decompress() { 77 | byte[] decompressed = new byte[this.length]; 78 | for (int i = 0; i < this.length; i++) { 79 | decompressed[i] = this.computeEncodedCharAt(i); 80 | } 81 | this.decompressedContents = decompressed; 82 | } 83 | public void compress() { 84 | this.decompressedContents = null; 85 | Sequence complementedFrom = this.getComplementedFrom(); 86 | if (complementedFrom != null) 87 | complementedFrom.compress(); 88 | } 89 | 90 | public Sequence getSubsequence(int startIndex, int count) { 91 | if (startIndex == 0 && count == this.getLength()) { 92 | return this; 93 | } 94 | return new Subsequence(this, startIndex, count); 95 | } 96 | 97 | public String getText() { 98 | return this.getRange(0, this.getLength()); 99 | } 100 | 101 | public int getLength() { 102 | return this.length; 103 | } 104 | 105 | public Sequence reverseComplement() { 106 | Sequence reverseComplement = new ReverseComplementSequence(this); 107 | if (this.decompressedContents != null) 108 | reverseComplement.decompress(); 109 | return reverseComplement; 110 | } 111 | 112 | // returns the Sequence that this one was created as the reverseComplement of, if any 113 | public Sequence getComplementedFrom() { 114 | return null; 115 | } 116 | 117 | public int compareTo(Sequence other) { 118 | return Long.compare(this.identifier, other.identifier); 119 | } 120 | 121 | public int getContentHash() { 122 | return this.packedContents.hashCode(); 123 | } 124 | 125 | public boolean textEquals(Sequence other) { 126 | if (this.getLength() != other.getLength()) 127 | return false; 128 | for (int i = 0; i < this.getLength(); i++) { 129 | if (this.encodedCharAt(i) != other.encodedCharAt(i)) 130 | return false; 131 | } 132 | return true; 133 | } 134 | 135 | private String name; 136 | private String packedContents; 137 | private byte[] decompressedContents; 138 | private long identifier; 139 | private int length; 140 | private String path; 141 | } 142 | -------------------------------------------------------------------------------- /src/main/java/mapper/SequenceBuilder.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | 5 | public class SequenceBuilder { 6 | public SequenceBuilder() { 7 | this.stringBuilder = new StringBuilder(); 8 | } 9 | public SequenceBuilder setName(String name) { 10 | this.name = name; 11 | return this; 12 | } 13 | public void setPath(String path) { 14 | this.path = path; 15 | } 16 | 17 | private String compress(String text) { 18 | this.currentValue = 0; 19 | this.currentCount = 0; 20 | StringBuilder compressed = new StringBuilder(); 21 | for (int i = 0; i < text.length(); i++) { 22 | char basePair = text.charAt(i); 23 | byte newValue = Basepairs.encode(basePair); 24 | 25 | this.currentValue += ((int)newValue << this.currentCount); 26 | this.currentCount += 4; 27 | if (this.currentCount >= 16) { 28 | compressed.append(this.emitChar()); 29 | } 30 | } 31 | if (currentCount > 0) { 32 | compressed.append(this.emitChar()); 33 | } 34 | return compressed.toString(); 35 | } 36 | 37 | public SequenceBuilder add(char text) { 38 | stringBuilder.append(text); 39 | this.length++; 40 | return this; 41 | } 42 | 43 | public SequenceBuilder add(String text) { 44 | stringBuilder.append(text); 45 | this.length += text.length(); 46 | return this; 47 | } 48 | 49 | public void setId(long identifier) { 50 | this.identifier = identifier; 51 | } 52 | 53 | public Sequence build() { 54 | String compressed = this.compress(this.stringBuilder.toString().toUpperCase()); 55 | if (this.buildRead) { 56 | ReadSequence result = new ReadSequence(name, compressed, length, path); 57 | result.nameSuffix = this.nameSuffix; 58 | result.qualityString = this.qualityString; 59 | result.commentString = this.commentString; 60 | result.setId(this.identifier); 61 | return result; 62 | } else { 63 | Sequence result = new Sequence(name, compressed, length, path); 64 | result.setId(this.identifier); 65 | return result; 66 | } 67 | } 68 | 69 | public SequenceBuilder asRead(String nameSuffix, String qualityString, String commentString) { 70 | this.buildRead = true; 71 | this.nameSuffix = nameSuffix; 72 | this.qualityString = qualityString; 73 | this.commentString = commentString; 74 | return this; 75 | } 76 | 77 | public int getLength() { 78 | return this.length; 79 | } 80 | 81 | private char emitChar() { 82 | char newValue = (char)(this.currentValue % 65536); 83 | this.currentCount -= 16; 84 | this.currentValue = this.currentValue >> 16; 85 | return newValue; 86 | } 87 | 88 | private String name; 89 | private String path; 90 | StringBuilder stringBuilder; 91 | int currentValue; 92 | int currentCount; 93 | int length; 94 | boolean buildRead = false; 95 | String nameSuffix; 96 | String qualityString; 97 | String commentString; 98 | long identifier; 99 | } 100 | -------------------------------------------------------------------------------- /src/main/java/mapper/SequenceCondition.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.TreeMap; 4 | 5 | // A SequenceCondition refers to the concept of a Sequence having a certain base pair at a certain position 6 | // A SequenceCondition can evaluate to True or False 7 | // A SequenceCondition might be useful in the case of ambiguous sequences (containing 'N') 8 | public class SequenceCondition { 9 | public static SequenceCondition ALWAYS = new SequenceCondition(); 10 | 11 | public SequenceCondition() { 12 | this.keys = new int[0]; 13 | this.values = new char[0]; 14 | } 15 | public SequenceCondition(int position, char value) { 16 | this.keys = new int[1]; 17 | this.keys[0] = position; 18 | this.values = new char[1]; 19 | this.values[0] = value; 20 | } 21 | private SequenceCondition(int[] keys, char[] values) { 22 | this.keys = keys; 23 | this.values = values; 24 | } 25 | 26 | // returns a SequenceCondition that returns if and only if and return 27 | public SequenceCondition intersect(SequenceCondition other) { 28 | // Check some simple cases 29 | if (other.values.length < 1) 30 | return this; 31 | if (this.values.length < 1) 32 | return other; 33 | if (this == other) 34 | return this; 35 | 36 | // check for conflicts 37 | int i = 0; 38 | int j = 0; 39 | int numMatchingKeys = 0; 40 | while (i < this.keys.length && j < other.keys.length) { 41 | int ourKey = this.keys[i]; 42 | int theirKey = other.keys[j]; 43 | if (ourKey < theirKey) { 44 | i++; 45 | } else { 46 | if (theirKey < ourKey) { 47 | j++; 48 | } else { 49 | char ourValue = this.values[i]; 50 | char theirValue = other.values[j]; 51 | if (ourValue != theirValue) { 52 | return null; // conflict 53 | } 54 | numMatchingKeys++; 55 | i++; 56 | j++; 57 | } 58 | } 59 | } 60 | // now compute the merge 61 | if (numMatchingKeys == this.keys.length) 62 | return other; 63 | if (numMatchingKeys == other.keys.length) 64 | return this; 65 | int mergedCapacity = this.keys.length + other.keys.length - numMatchingKeys; 66 | int[] mergedKeys = new int[mergedCapacity]; 67 | char[] mergedValues = new char[mergedCapacity]; 68 | 69 | int writeIndex = 0; 70 | i = j = 0; 71 | while (i < this.keys.length && j < other.keys.length) { 72 | int ourKey = this.keys[i]; 73 | int theirKey = other.keys[j]; 74 | if (ourKey < theirKey) { 75 | mergedKeys[writeIndex] = this.keys[i]; 76 | mergedValues[writeIndex] = this.values[i]; 77 | i++; 78 | } else { 79 | if (theirKey < ourKey) { 80 | mergedKeys[writeIndex] = other.keys[j]; 81 | mergedValues[writeIndex] = other.values[j]; 82 | j++; 83 | } else { 84 | mergedKeys[writeIndex] = this.keys[i]; 85 | mergedValues[writeIndex] = this.values[i]; 86 | i++; 87 | j++; 88 | } 89 | } 90 | writeIndex++; 91 | } 92 | while (i < this.keys.length) { 93 | mergedKeys[writeIndex] = this.keys[i]; 94 | mergedValues[writeIndex] = this.values[i]; 95 | i++; 96 | writeIndex++; 97 | } 98 | while (j < other.keys.length) { 99 | mergedKeys[writeIndex] = other.keys[j]; 100 | mergedValues[writeIndex] = other.values[j]; 101 | j++; 102 | writeIndex++; 103 | } 104 | 105 | return new SequenceCondition(mergedKeys, mergedValues); 106 | } 107 | 108 | // Returns the number of positions that each must have a specific value in order to satisfy this condition 109 | // For, example, if this condition says "position 0 is 'A' and position 5 is 'C' " then complexity = 2 110 | public int getComplexity() { 111 | return this.values.length; 112 | } 113 | 114 | public SequenceCondition shifted(int shift) { 115 | SequenceCondition copy = new SequenceCondition(); 116 | copy.values = this.values; 117 | copy.keys = new int[this.keys.length]; 118 | for (int i = 0; i < this.keys.length; i++) { 119 | copy.keys[i] = this.keys[i] + shift; 120 | } 121 | return copy; 122 | } 123 | 124 | public String toString() { 125 | String result = ""; 126 | for (int i = 0; i < this.keys.length; i++) { 127 | if (result.length() > 0) 128 | result += ","; 129 | result += "seq[" + this.keys[i] + "]=" + this.values[i]; 130 | } 131 | return result; 132 | } 133 | 134 | private int[] keys; 135 | private char[] values; 136 | } 137 | -------------------------------------------------------------------------------- /src/main/java/mapper/SequenceMatch.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | // A SequenceMatch says that if one sequence if shifted by a certain amount then it resembles another one. 4 | public class SequenceMatch { 5 | public SequenceMatch(Sequence sequenceA, Sequence sequenceB, int offset) { 6 | this.sequenceA = sequenceA; 7 | this.sequenceB = sequenceB; 8 | this.offset = offset; 9 | } 10 | public Sequence getSequenceA() { 11 | return this.sequenceA; 12 | } 13 | public Sequence getSequenceB() { 14 | return this.sequenceB; 15 | } 16 | public int getStartIndexB() { 17 | return Math.max(0, offset); 18 | } 19 | public int getEndIndexB() { 20 | return Math.min(offset + sequenceA.getLength(), sequenceB.getLength()); 21 | } 22 | public int getStartIndexA() { 23 | return getStartIndexB() - offset; 24 | } 25 | public int getEndIndexA() { 26 | return getEndIndexB() - offset; 27 | } 28 | public int getLength() { 29 | return getEndIndexB() - getStartIndexB(); 30 | } 31 | public String getTextA() { 32 | return this.sequenceA.getRange(getStartIndexA(), this.getLength()); 33 | } 34 | public String getTextB() { 35 | return this.sequenceB.getRange(getStartIndexB(), this.getLength()); 36 | } 37 | public int getOffset() { 38 | return offset; 39 | } 40 | 41 | @Override 42 | public boolean equals(Object otherObject) { 43 | SequenceMatch other = (SequenceMatch)otherObject; 44 | if (other == null) 45 | return false; 46 | if (this.offset != other.offset) 47 | return false; 48 | if (sequenceA != other.sequenceA) 49 | return false; 50 | if (sequenceB != other.sequenceB) 51 | return false; 52 | return true; 53 | } 54 | 55 | @Override 56 | public int hashCode() { 57 | return offset; 58 | } 59 | 60 | public boolean getReversed() { 61 | return (this.sequenceA.getComplementedFrom() != null); 62 | } 63 | 64 | public String summarizePositionB() { 65 | return this.sequenceB.getName() + " offset " + this.offset; 66 | } 67 | 68 | public String summarize() { 69 | return this.sequenceA.getName() + " at " + this.summarizePositionB(); 70 | } 71 | 72 | public Sequence sequenceA; 73 | public Sequence sequenceB; 74 | public int offset; 75 | // Whether this was generated from a matching subsequence 76 | // If false, this was guessed in another way (probably from the paired-end sequence mate's offset) 77 | public boolean fromHashblockMatch = true; 78 | } 79 | -------------------------------------------------------------------------------- /src/main/java/mapper/SequencePosition.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | // A SequencePosition is a position on a Sequence 4 | public class SequencePosition implements Comparable { 5 | public SequencePosition(Sequence sequence, int startIndex) { 6 | this.sequence = sequence; 7 | this.startIndex = startIndex; 8 | } 9 | 10 | public Sequence getSequence() { 11 | return this.sequence; 12 | } 13 | 14 | public int getStartIndex() { 15 | return this.startIndex; 16 | } 17 | 18 | public int compareTo(SequencePosition other) { 19 | int sequenceComparison = this.sequence.compareTo(other.sequence); 20 | if (sequenceComparison != 0) 21 | return sequenceComparison; 22 | return Integer.compare(this.startIndex, other.startIndex); 23 | } 24 | 25 | @Override 26 | public int hashCode() { 27 | return this.startIndex; 28 | } 29 | 30 | @Override 31 | public boolean equals(Object otherObject) { 32 | SequencePosition other = (SequencePosition)otherObject; 33 | if (other == null) 34 | return false; 35 | if (this.startIndex != other.startIndex) 36 | return false; 37 | if (this.sequence != other.sequence) 38 | return false; 39 | return true; 40 | } 41 | 42 | @Override 43 | public int compareTo(Object otherObject) { 44 | SequencePosition other = (SequencePosition)otherObject; 45 | if (this.sequence != other.sequence) 46 | return Long.compare(this.sequence.getId(), other.sequence.getId()); 47 | if (this.startIndex != other.startIndex) 48 | return Integer.compare(this.startIndex, other.startIndex); 49 | return 0; 50 | } 51 | 52 | @Override 53 | public String toString() { 54 | return this.sequence.getName() + "[" + this.startIndex + "]"; 55 | } 56 | 57 | private Sequence sequence; 58 | private int startIndex; 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/mapper/SequenceProvider.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | // A SequenceProvider returns Sequence objects 4 | public interface SequenceProvider { 5 | SequenceBuilder getNextSequence(); 6 | boolean get_allReadsContainQualityInformation(); 7 | } 8 | -------------------------------------------------------------------------------- /src/main/java/mapper/SequenceSection.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public class SequenceSection { 4 | public SequenceSection(Sequence sequence, int startIndex, int endIndex) { 5 | this.sequence = sequence; 6 | this.startIndex = startIndex; 7 | this.endIndex = endIndex; 8 | } 9 | 10 | public Sequence getSequence() { 11 | return this.sequence; 12 | } 13 | 14 | public int getStartIndex() { 15 | return this.startIndex; 16 | } 17 | 18 | public int getEndIndex() { 19 | return this.endIndex; 20 | } 21 | 22 | public int getLength() { 23 | return endIndex - startIndex; 24 | } 25 | 26 | public String format() { 27 | return sequence.getName() + this.formatRange(); 28 | } 29 | 30 | public String formatRange() { 31 | if (this.startIndex != 0 || this.endIndex != this.sequence.getLength()) 32 | return "[" + this.startIndex + ":" + this.endIndex + "]"; 33 | else 34 | return ""; 35 | } 36 | 37 | private Sequence sequence; 38 | private int startIndex; 39 | private int endIndex; 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/mapper/SequenceSplitter.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public class SequenceSplitter implements SequenceProvider { 4 | public SequenceSplitter(int maxLength, SequenceProvider provider) { 5 | this.maxLength = maxLength; 6 | this.provider = provider; 7 | } 8 | 9 | public SequenceBuilder getNextSequence() { 10 | if (this.numSectionsConsumed >= this.numSections) { 11 | SequenceBuilder sequenceBuilder = this.provider.getNextSequence(); 12 | if (sequenceBuilder == null) { 13 | this.pendingSequence = null; 14 | return null; 15 | } 16 | this.pendingSequence = sequenceBuilder.build(); 17 | this.numSections = (this.pendingSequence.getLength() - 1) / this.maxLength + 1; 18 | this.numSectionsConsumed = 0; 19 | } 20 | int startIndex = this.getStartIndex(); 21 | this.numSectionsConsumed++; 22 | int endIndex = this.getStartIndex(); 23 | Sequence nextSequence = this.pendingSequence.getSubsequence((int)startIndex, (int)(endIndex - startIndex)); 24 | 25 | // convert from Sequence back to SequenceBuilder 26 | // TODO: make this faster if it's important 27 | SequenceBuilder builder = new SequenceBuilder(); 28 | builder.setName(nextSequence.getName()); 29 | builder.setPath(nextSequence.getPath()); 30 | builder.add(nextSequence.getText()); 31 | return builder; 32 | } 33 | 34 | // gets the start index of the next subsequence 35 | private int getStartIndex() { 36 | // use longs for computation to avoid overflow 37 | return (int)((long)this.pendingSequence.getLength() * (long)this.numSectionsConsumed / (long)this.numSections); 38 | } 39 | 40 | public boolean get_allReadsContainQualityInformation() { 41 | // not implemented 42 | return false; 43 | } 44 | 45 | @Override 46 | public String toString() { 47 | return "" + this.provider + " split to size <= " + this.maxLength; 48 | } 49 | 50 | Sequence pendingSequence; 51 | int numSections; 52 | int numSectionsConsumed; 53 | 54 | SequenceProvider provider; 55 | int maxLength; 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/mapper/SequenceWriter.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public interface SequenceWriter { 4 | public void write(Sequence sequence); 5 | public void close(); 6 | } 7 | -------------------------------------------------------------------------------- /src/main/java/mapper/SequencesIterator.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | 5 | // A SequencesIterator provides a list of sequences 6 | public class SequencesIterator implements SequenceProvider { 7 | public SequencesIterator(List providers) { 8 | this.providers = providers; 9 | } 10 | 11 | public SequenceBuilder getNextSequence() { 12 | while (this.nextIndex < this.providers.size()) { 13 | SequenceBuilder next = this.providers.get(this.nextIndex).getNextSequence(); 14 | if (next != null) { 15 | return next; 16 | } 17 | this.nextIndex++; 18 | } 19 | return null; 20 | } 21 | 22 | public boolean get_allReadsContainQualityInformation() { 23 | for (SequenceProvider provider : this.providers) { 24 | if (!provider.get_allReadsContainQualityInformation()) { 25 | return false; 26 | } 27 | } 28 | return true; 29 | } 30 | 31 | int nextIndex; 32 | List providers; 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/mapper/Serializer.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.io.BufferedOutputStream; 4 | import java.io.File; 5 | import java.io.FileOutputStream; 6 | import java.io.IOException; 7 | import java.util.zip.GZIPOutputStream; 8 | 9 | // A Serializer writes information to a file 10 | // It doesn't worry about making sure the written data can still be read by a future version of the code, because we don't need that 11 | public class Serializer { 12 | public Serializer(File file) throws IOException { 13 | this.outputStream = new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(file))); 14 | } 15 | 16 | public void writeBytesAndLength(byte[] data) throws IOException { 17 | int length; 18 | if (data != null) 19 | length = data.length; 20 | else 21 | length = 0; 22 | this.writeString("" + length + ":"); 23 | if (data != null) 24 | this.writeBytes(data); 25 | } 26 | public void writeBytes(byte[] data) throws IOException { 27 | this.outputStream.write(data); 28 | } 29 | 30 | public void writeString(String text) throws IOException { 31 | this.writeBytes(text.getBytes()); 32 | } 33 | 34 | public void writeProperty(String key, String value) throws IOException { 35 | this.writeString(key); 36 | this.writeString(":"); 37 | this.writeString(value); 38 | this.writeString(","); 39 | } 40 | 41 | public void close() throws IOException { 42 | this.outputStream.close(); 43 | } 44 | 45 | private BufferedOutputStream outputStream; 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/mapper/SimilarityAnalysis.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | // a SimilarityAnalysis is used by AncestryDetector when looking for similar parts of a reference genome 4 | public class SimilarityAnalysis { 5 | public SimilarityAnalysis(Sequence sequence, int startIndex, int boundIndex, double initialScore) { 6 | this.sequence = sequence; 7 | 8 | this.startIndex = startIndex; 9 | this.boundIndex = boundIndex; 10 | this.currentIndex = startIndex; 11 | this.bestIndex = startIndex; 12 | 13 | this.cumulativeScore = initialScore; 14 | this.bestScore = initialScore; 15 | } 16 | 17 | public void addScore(double scoreHere) { 18 | this.cumulativeScore += scoreHere; 19 | 20 | if (this.cumulativeScore > this.bestScore) { 21 | this.bestScore = this.cumulativeScore; 22 | this.bestIndex = this.currentIndex; 23 | } 24 | } 25 | 26 | public boolean getReachedEndOfSequence() { 27 | return this.currentIndex < 0 || this.currentIndex >= this.sequence.getLength(); 28 | } 29 | 30 | public Sequence sequence; 31 | 32 | public int startIndex; 33 | public int boundIndex; 34 | 35 | public double cumulativeScore; 36 | public int currentIndex; 37 | 38 | public double bestScore; 39 | public int bestIndex; 40 | 41 | @Override 42 | public String toString() { 43 | return "SimilarityAnalysis on " + this.sequence.getName() + " from " + this.startIndex + " to " + this.boundIndex; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/mapper/SimpleQueryProvider.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | // a SimpleQueryProvider just reads sequences and treats each one as a query 4 | public class SimpleQueryProvider implements QueryProvider { 5 | public SimpleQueryProvider(SequenceProvider sequenceProvider) { 6 | this.sequenceProvider = sequenceProvider; 7 | } 8 | 9 | public QueryBuilder getNextQueryBuilder() { 10 | SequenceBuilder builder = this.sequenceProvider.getNextSequence(); 11 | if (builder == null) { 12 | return null; 13 | } 14 | return new QueryBuilder(builder); 15 | } 16 | 17 | public boolean get_allReadsContainQualityInformation() { 18 | return this.sequenceProvider.get_allReadsContainQualityInformation(); 19 | } 20 | 21 | public boolean get_containsPairedEndReads() { 22 | return false; 23 | } 24 | 25 | @Override 26 | public String toString() { 27 | return this.sequenceProvider.toString(); 28 | } 29 | 30 | private SequenceProvider sequenceProvider; 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/mapper/SkipHighAmbiguity_Aligner.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public class SkipHighAmbiguity_Aligner implements LocalAligner { 4 | public SkipHighAmbiguity_Aligner(LocalAligner nextAligner) { 5 | this.nextAligner = nextAligner; 6 | } 7 | 8 | public void setLogger(Logger logger) { 9 | this.logger = logger; 10 | this.nextAligner.setLogger(logger); 11 | } 12 | 13 | public SequenceAlignment align(SequenceSection querySection, SequenceSection referenceSection, AlignmentParameters parameters, AlignmentAnalysis alignmentAnalysis) { 14 | int numAmbiguities = 0; 15 | Sequence reference = referenceSection.getSequence(); 16 | for (int i = referenceSection.getStartIndex(); i < referenceSection.getEndIndex(); i++) { 17 | if (Basepairs.isAmbiguous(reference.charAt(i))) { 18 | numAmbiguities++; 19 | } 20 | } 21 | if (numAmbiguities >= referenceSection.getLength() / 4) { 22 | if (this.logger.getEnabled()) { 23 | this.logger.log("Skipping checking for indels due to high number of ambiguities (" + numAmbiguities + ") among " + referenceSection.getLength() + " basepairs"); 24 | } 25 | return null; 26 | } 27 | return this.nextAligner.align(querySection, referenceSection, parameters, alignmentAnalysis); 28 | } 29 | 30 | Logger logger; 31 | LocalAligner nextAligner; 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/mapper/StatusLogger.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | 5 | // A StatusLogger is used for logging status messages 6 | // A StatusLogger might choose not to log all messages if there are too many status updates consecutively 7 | public class StatusLogger { 8 | public StatusLogger(Logger logger, long startMillis) { 9 | this.logger = logger; 10 | this.lastLoggedAt = 0; 11 | this.startMillis = startMillis; 12 | } 13 | 14 | // If no message was received recently or if this message is important, then this method will log it 15 | public void log(String message, boolean important) { 16 | if (this.logger.getEnabled()) { 17 | long now = System.currentTimeMillis(); 18 | if (now - lastLoggedAt > 1000 || important) { 19 | this.lastLoggedAt = now; 20 | double elapsedSeconds = (now - this.startMillis) / 1000.0; 21 | this.logger.log(message + " at " + elapsedSeconds + "s"); 22 | } 23 | } 24 | } 25 | 26 | public Logger getLogger() { 27 | return this.logger; 28 | } 29 | 30 | Logger logger; 31 | long lastLoggedAt; 32 | long startMillis; 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/mapper/StderrWriter.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | 5 | public class StderrWriter implements TextWriter { 6 | public StderrWriter() { 7 | } 8 | 9 | public void write(String message) { 10 | synchronized(this) { 11 | System.err.println(message); 12 | } 13 | } 14 | 15 | public void write(List messages) { 16 | synchronized(this) { 17 | for (String message: messages) { 18 | this.write(message); 19 | } 20 | } 21 | } 22 | 23 | public void flush() { 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/mapper/StdoutWriter.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | 5 | public class StdoutWriter implements TextWriter { 6 | public StdoutWriter() { 7 | } 8 | 9 | public void write(String message) { 10 | synchronized(this) { 11 | System.out.println(message); 12 | } 13 | } 14 | 15 | public void write(List messages) { 16 | synchronized(this) { 17 | for (String message: messages) { 18 | this.write(message); 19 | } 20 | } 21 | } 22 | 23 | public void flush() { 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/mapper/StorageFilesystem.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.io.File; 4 | import java.io.FileOutputStream; 5 | import java.io.IOException; 6 | import java.nio.file.Files; 7 | import java.nio.file.Path; 8 | import java.nio.file.Paths; 9 | 10 | // A StorageFilesystem passes requests along to the actual filesystem 11 | public class StorageFilesystem implements Filesystem { 12 | public boolean createNewFile(File file) throws IOException { 13 | file.getParentFile().mkdirs(); 14 | return file.createNewFile(); 15 | } 16 | 17 | public void write(File file, byte[] content) throws IOException { 18 | FileOutputStream fileStream = new FileOutputStream(file); 19 | fileStream.write(content); 20 | fileStream.close(); 21 | } 22 | 23 | public byte[] readFile(File file) throws IOException { 24 | Path path = Paths.get(file.getAbsolutePath()); 25 | return Files.readAllBytes(path); 26 | } 27 | 28 | public void mkdirs(File dir) throws IOException { 29 | dir.mkdirs(); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/mapper/StringWriter.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | 5 | public class StringWriter implements TextWriter { 6 | public StringWriter() { 7 | } 8 | 9 | public void write(String message) { 10 | synchronized(this) { 11 | this.builder.append(message); 12 | this.builder.append("\n"); 13 | } 14 | } 15 | 16 | public void write(List messages) { 17 | synchronized(this) { 18 | for (String message: messages) { 19 | this.write(message); 20 | } 21 | } 22 | } 23 | 24 | public void flush() { 25 | } 26 | 27 | public String getText() { 28 | return this.builder.toString(); 29 | } 30 | 31 | private StringBuilder builder = new StringBuilder(); 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/mapper/Subsequence.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | class Subsequence extends Sequence { 4 | public Subsequence(Sequence original, int start, int length) { 5 | super(original.getName() + "[" + start + ":" + (start + length) + "]", null, length, original.getPath()); 6 | this.startIndex = start; 7 | this.original = original; 8 | // The only thing we use the ID for is to break ties between two different queries that were both read from a file, to make sure that the order we process the queries doesn't affect the output 9 | // We give this reverse complement sequence the same ID as the original sequence because it's convenient 10 | this.setId(original.getId()); 11 | } 12 | 13 | @Override 14 | protected byte computeEncodedCharAt(int index) { 15 | return this.original.encodedCharAt(index + this.startIndex); 16 | } 17 | 18 | private Sequence original; 19 | private int startIndex; 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/mapper/TextWriter.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | 5 | public interface TextWriter { 6 | 7 | void write(String message); 8 | 9 | void write(List messages); 10 | 11 | void flush(); 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/mapper/UnalignedQuery_Writer.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.io.BufferedOutputStream; 4 | import java.io.File; 5 | import java.io.FileOutputStream; 6 | import java.io.FileNotFoundException; 7 | import java.io.IOException; 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | import java.util.Map; 11 | 12 | // An UnalignedQuery_Writer writes queries that did not align 13 | public class UnalignedQuery_Writer implements AlignmentListener { 14 | public UnalignedQuery_Writer(String path, boolean allReadsContainQualityInformation) throws FileNotFoundException { 15 | if (path.endsWith(".fastq")) { 16 | if (!allReadsContainQualityInformation) { 17 | throw new IllegalArgumentException("Cannot write a .fastq file (" + path + ") when not all input reads are in .fastq format"); 18 | } 19 | this.initialize(new FastqWriter(path)); 20 | } else { 21 | if (path.endsWith(".fasta")) { 22 | this.initialize(new FastaWriter(path)); 23 | } else { 24 | throw new IllegalArgumentException("Unsupported output type (must be .fastq or .fasta): " + path); 25 | } 26 | } 27 | } 28 | 29 | private void initialize(SequenceWriter writer) { 30 | this.writer = writer; 31 | } 32 | 33 | public void addAlignments(List alignments) { 34 | synchronized(this) { 35 | for (QueryAlignments queryAlignments: alignments) { 36 | for (Map.Entry> subqueries: queryAlignments.getAlignments().entrySet()) { 37 | if (subqueries.getValue().size() < 1) { 38 | Query query = subqueries.getKey(); 39 | for (Sequence sequence: query.getSequences()) { 40 | this.writer.write(sequence); 41 | } 42 | } 43 | } 44 | } 45 | } 46 | } 47 | 48 | public void close() { 49 | this.writer.close(); 50 | } 51 | 52 | SequenceWriter writer; 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/mapper/Variant.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public class Variant { 4 | public Variant(char allele) { 5 | this.allele = allele; 6 | } 7 | 8 | public void setCount(int newCount) { 9 | this.count = newCount; 10 | } 11 | 12 | public int getCount() { 13 | return this.count; 14 | } 15 | 16 | public void addCount(int extraCount) { 17 | this.count += extraCount; 18 | } 19 | 20 | public void setExample(Sequence sequence, int index) { 21 | this.exampleSequence = sequence; 22 | this.exampleIndex = index; 23 | } 24 | 25 | public Sequence getExampleSequence() { 26 | return this.exampleSequence; 27 | } 28 | 29 | public int getExampleIndex() { 30 | return this.exampleIndex; 31 | } 32 | 33 | public char getAllele() { 34 | return this.allele; 35 | } 36 | 37 | private int count; 38 | private Sequence exampleSequence; 39 | private int exampleIndex; 40 | private char allele; 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/mapper/Variants.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | // A Variants is essentially a Map 4 | public class Variants { 5 | public Variants() { 6 | } 7 | 8 | public Variant get(char allele) { 9 | if (this.variants != null) { 10 | for (Variant variant: this.variants) { 11 | if (variant.getAllele() == allele) 12 | return variant; 13 | } 14 | } 15 | return null; 16 | } 17 | 18 | public Variant getOrCreate(char allele) { 19 | Variant variant = this.get(allele); 20 | if (variant != null) 21 | return variant; 22 | int numVariants = this.getNumVariants(); 23 | Variant[] newVariants = new Variant[numVariants + 1]; 24 | for (int i = 0; i < numVariants; i++) { 25 | newVariants[i] = this.variants[i]; 26 | } 27 | Variant result = new Variant(allele); 28 | newVariants[numVariants] = result; 29 | this.variants = newVariants; 30 | return result; 31 | } 32 | 33 | public Variant[] getAll() { 34 | return this.variants; 35 | } 36 | 37 | private int getNumVariants() { 38 | if (this.variants == null) 39 | return 0; 40 | return this.variants.length; 41 | } 42 | 43 | private Variant[] variants; 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/mapper/VariantsInsertions.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | // A VariantsInsertion is essentially a Variants and a List 7 | public class VariantsInsertions extends Variants { 8 | public VariantsInsertions() { 9 | } 10 | 11 | public Variants getInsertions(int index) { 12 | if (this.insertions == null) 13 | return null; 14 | if (this.insertions.size() <= index) 15 | return null; 16 | return this.insertions.get(index); 17 | } 18 | 19 | public Variants getOrCreateInsertions(int index) { 20 | if (this.insertions == null) 21 | this.insertions = new ArrayList(); 22 | while (this.insertions.size() <= index) { 23 | this.insertions.add(new Variants()); 24 | } 25 | return this.insertions.get(index); 26 | } 27 | 28 | private List insertions; 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/mapper/VcfFormatRequest.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | class VcfFormatRequest { 4 | public VcfFormatRequest(Sequence sequence, int startIndex, int length, FilteredAlignments alignments, int jobIndex) { 5 | this.sequence = sequence; 6 | this.startIndex = startIndex; 7 | this.length = length; 8 | this.alignments = alignments; 9 | this.jobIndex = jobIndex; 10 | } 11 | 12 | public Sequence sequence; 13 | public int startIndex; 14 | public int length; 15 | public FilteredAlignments alignments; 16 | public int jobIndex; 17 | 18 | @Override 19 | public String toString() { 20 | return "VcfFormatRequest " + jobIndex + " on " + sequence.getName() + " at " + startIndex + " length " + length; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/mapper/WeightedAlignment.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | // A WeightedAlignment represents a SequenceAlignment plus a weight at each position 4 | class WeightedAlignment { 5 | public WeightedAlignment(SequenceAlignment sequenceAlignment, QueryAlignment queryAlignment, float weight) { 6 | this.sequenceAlignment = sequenceAlignment; 7 | this.queryAlignment = queryAlignment; 8 | this.overallWeight = weight; 9 | } 10 | 11 | public SequenceAlignment getAlignment() { 12 | return this.sequenceAlignment; 13 | } 14 | 15 | public QueryAlignment getQueryAlignment() { 16 | return this.queryAlignment; 17 | } 18 | 19 | public float getWeight(int referenceIndex) { 20 | float numAlignmentsHere = queryAlignment.getNumAlignmentsCoveringIndexB(referenceIndex); 21 | float positionWeight; 22 | if (numAlignmentsHere != 0) { 23 | positionWeight = (float)1.0 / numAlignmentsHere; 24 | } else { 25 | positionWeight = 0; 26 | } 27 | return this.overallWeight * positionWeight; 28 | } 29 | 30 | private SequenceAlignment sequenceAlignment; 31 | private QueryAlignment queryAlignment; 32 | private float overallWeight; 33 | } 34 | -------------------------------------------------------------------------------- /src/main/resources/mapper.properties: -------------------------------------------------------------------------------- 1 | mapper.version=${mapperVersion} 2 | -------------------------------------------------------------------------------- /src/test/java/ApiTest.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | // The ApiTest class checks that all of the expected functions exist and accept the expected parameters 7 | public class ApiTest { 8 | @Test 9 | public void testAlignOnce() { 10 | Api.alignOnce("ACGT", "ACGT", new AlignmentParameters(), Logger.NoOpLogger); 11 | Sequence querySequence = new SequenceBuilder().setName("query").add("ACGT").build(); 12 | Api.alignOnce(new Query(querySequence), "ACGT", new AlignmentParameters(), Logger.NoOpLogger); 13 | } 14 | 15 | @Test 16 | public void testReusingDatabase() { 17 | String genome = "AACGTCGT"; 18 | ReferenceDatabase referenceDatabase = Api.newDatabase(genome, Logger.NoOpLogger); 19 | Api.align("AACG", referenceDatabase, new AlignmentParameters(), Logger.NoOpLogger); 20 | Api.align("ACGT", referenceDatabase, new AlignmentParameters(), Logger.NoOpLogger); 21 | } 22 | 23 | @Test 24 | public void testCanUseCache() { 25 | StringWriter writer = new StringWriter(); 26 | Logger logger = new Logger(writer); 27 | String genome = "AACCGT"; 28 | ReferenceDatabase referenceDatabase = Api.newDatabase(genome, logger); 29 | Api.align("AACC", referenceDatabase, new AlignmentParameters(), logger); 30 | String marker = "reusing cached result"; 31 | if (writer.getText().contains(marker)) { 32 | fail("First lookup reused cache result. Output: " + writer.getText()); 33 | } 34 | 35 | // The cache isn't always enabled for all queries because we don't know how often it helps 36 | // So, we have to run this alignment a few times to ensure the cache should be enabled 37 | for (int i = 0; i < 3; i++) { 38 | Api.align("AACC", referenceDatabase, new AlignmentParameters(), logger); 39 | } 40 | boolean usedCache = writer.getText().contains(marker); 41 | if (!usedCache) { 42 | fail("Didn't use cache on subsequent lookup. Output: " + writer.getText()); 43 | } 44 | } 45 | 46 | private void fail(String message) { 47 | Assert.fail(message); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/test/java/BasepairsTest.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | public class BasepairsTest { 7 | 8 | @Test 9 | public void penaltyTest() { 10 | byte A = Basepairs.encode('A'); 11 | byte C = Basepairs.encode('C'); 12 | byte N = Basepairs.encode('N'); 13 | byte AOrC = Basepairs.union(A, C); 14 | 15 | AlignmentParameters alignmentParameters = new AlignmentParameters(); 16 | double ambiguityPenalty = 3; 17 | alignmentParameters.AmbiguityPenalty = ambiguityPenalty; 18 | double mutationPenalty = 100; 19 | alignmentParameters.MutationPenalty = mutationPenalty; 20 | 21 | double AToCPenalty = Basepairs.getPenalty(A, C, alignmentParameters); 22 | if (AToCPenalty != mutationPenalty) { 23 | fail("Expected A to C penalty to be " + mutationPenalty + ", not " + AToCPenalty); 24 | } 25 | 26 | double AToNPenalty = Basepairs.getPenalty(A, N, alignmentParameters); 27 | if (AToNPenalty != ambiguityPenalty) { 28 | fail("Expected A to N penalty to be " + ambiguityPenalty + ", not " + ambiguityPenalty); 29 | } 30 | double NToAPenalty = Basepairs.getPenalty(N, A, alignmentParameters); 31 | if (ambiguityPenalty != AToNPenalty) { 32 | fail("Expected N to A penalty to be " + ambiguityPenalty + ", not " + ambiguityPenalty); 33 | } 34 | 35 | double expectedPartialAmbiguityPenalty = ambiguityPenalty / 3; 36 | double AToAOrCPenalty = Basepairs.getPenalty(A, AOrC, alignmentParameters); 37 | if (AToAOrCPenalty != expectedPartialAmbiguityPenalty) { 38 | fail("Expected A to (A or C) penalty to be " + expectedPartialAmbiguityPenalty + ", not " + AToAOrCPenalty); 39 | } 40 | 41 | double AOrCToAPenalty = Basepairs.getPenalty(AOrC, A, alignmentParameters); 42 | if (AOrCToAPenalty != expectedPartialAmbiguityPenalty) { 43 | fail("Expected (A or C) to A penalty to be " + expectedPartialAmbiguityPenalty + ", not " + AOrCToAPenalty); 44 | } 45 | 46 | } 47 | 48 | private void fail(String message) { 49 | Assert.fail(message); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/test/java/Counting_HashBlockPath_Test.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.List; 4 | import org.junit.Assert; 5 | import org.junit.Test; 6 | 7 | public class Counting_HashBlockPath_Test { 8 | public Counting_HashBlockPath_Test() { 9 | } 10 | 11 | @Test 12 | public void checkEfficientlyHandlesRepetitionInQuery() { 13 | String query = "GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG"; 14 | String reference = "GGGGGGGGACGTTGCAAACCGGTTATGCTGCAAATTGGCC"; 15 | 16 | Counting_HashBlockPath path = makePath(query, reference); 17 | 18 | int numOffsets = path.findGoodPositionsHavingPriorityUpTo(query.length()).size(); 19 | if (numOffsets != 0) { 20 | fail("number of offsets checked is " + numOffsets + ". Does repetition in the query cause us to check for alignments at an inefficiently large number of offsets?"); 21 | } 22 | } 23 | 24 | @Test 25 | public void checkOneHashblockMatchSufficientNearEndOfReference() { 26 | String query = "CCCTTAAGGACCGTGTGAGAACGAC"; 27 | String reference = "ACGTAAGTACGAGCCGTAAGGTCCC"; 28 | // ! ! ! ! 29 | 30 | Counting_HashBlockPath path = makePath(query, reference); 31 | 32 | int expectedOffset = 12; 33 | List matchingCounters = path.findGoodPositionsHavingPriorityUpTo(query.length()); 34 | if (!containsOffset(matchingCounters, expectedOffset)) { 35 | fail("didn't find offset " + expectedOffset); 36 | } 37 | } 38 | 39 | @Test 40 | public void checkRepeatedHashblockMatchInsufficientEvenNearEndOfReference() { 41 | String query = "ACCC"; 42 | String reference = "ACCCACCCACCCACCCACCC"; 43 | 44 | Counting_HashBlockPath path = makePath(query, reference); 45 | 46 | List matchingCounters = path.findGoodPositionsHavingPriorityUpTo(query.length()); 47 | if (matchingCounters.size() > 0) { 48 | String message = "Expected 0 interesting offsets but found " + matchingCounters.size() + ":"; 49 | for (HashBlockMatch_Counter counter: matchingCounters) { 50 | message += " offset " + counter.getMatch().getOffset(); 51 | } 52 | fail(message); 53 | } 54 | } 55 | 56 | private boolean containsOffset(List counters, int offset) { 57 | for (HashBlockMatch_Counter counter: counters) { 58 | if (counter.getMatch().getOffset() == offset) 59 | return true; 60 | } 61 | return false; 62 | } 63 | 64 | private Counting_HashBlockPath makePath(String queryText, String referenceText) { 65 | Sequence query = new SequenceBuilder().setName("query").add(queryText).build(); 66 | Sequence reference = new SequenceBuilder().setName("reference").add(referenceText).build(); 67 | SequenceDatabase sequenceDatabase = new SequenceDatabase(reference, true); 68 | Logger logger = new Logger(new StderrWriter()); 69 | HashBlock_Pyramid queryPyramid = new HashBlock_Pyramid(new HashBlock_Stream(query, false, null)); 70 | HashBlock_Database database = new HashBlock_Database(sequenceDatabase); 71 | AlignmentParameters parameters = new AlignmentParameters(); 72 | parameters.DeletionExtension_Penalty = 0.1; 73 | Counting_HashBlockPath path = new Counting_HashBlockPath(queryPyramid, database.getView(), sequenceDatabase, query, "query", logger, parameters); 74 | return path; 75 | } 76 | 77 | private void fail(String message) { 78 | Assert.fail(message); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/test/java/DirCache_Test.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | import java.io.File; 6 | import java.io.IOException; 7 | import java.util.Set; 8 | import java.util.HashMap; 9 | import java.util.Map; 10 | import java.util.TreeMap; 11 | 12 | public class DirCache_Test { 13 | @Test 14 | public void consistencyTest() throws IOException { 15 | DirCache cache = new DirCache(new File("/tmp/cache"), new MemoryFilesystem()); 16 | Map> cachePaths = new HashMap>(); 17 | int numEntries = 1000; 18 | 19 | // save a bunch of cache entries and make sure they're all different 20 | for (int i = 0; i < numEntries; i++) { 21 | TreeMap properties = makeProperties(i); 22 | File dir = cache.getOrCreateDir(properties); 23 | String path = dir.getAbsolutePath(); 24 | TreeMap previousProperties = cachePaths.get(path); 25 | if (previousProperties != null) 26 | fail("Properties " + properties + " and " + previousProperties + " were both saved at " + path); 27 | cachePaths.put(path, properties); 28 | } 29 | // check the same cache entries and make sure they all already exist 30 | for (int i = 0; i < numEntries; i++) { 31 | TreeMap properties = makeProperties(i); 32 | File dir = cache.getOrCreateDir(properties); 33 | String path = dir.getAbsolutePath(); 34 | if (!cachePaths.containsKey(path)) 35 | fail("Searched at " + path + " for properties " + properties + " but nothing was previously saved there"); 36 | TreeMap previousProperties = cachePaths.get(path); 37 | if (!properties.equals(previousProperties)) 38 | fail("Searched at " + path + " for properties " + properties + ", but previously the properties saved there were different: " + previousProperties); 39 | } 40 | } 41 | 42 | private TreeMap makeProperties(int i) { 43 | TreeMap map = new TreeMap(); 44 | map.put("zeros", "" + (i % 10)); 45 | map.put("tens", "" + ((i / 10) % 10)); 46 | map.put("hundreds", "" + ((i / 100) % 10)); 47 | return map; 48 | } 49 | 50 | private void fail(String message) { 51 | Assert.fail(message); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/test/java/FastaParser_Test.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | import java.io.StringReader; 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | import org.junit.Assert; 9 | import org.junit.Test; 10 | 11 | public class FastaParser_Test { 12 | @Test 13 | public void testRemovesSpacesInContigName() { 14 | String text = ">sequence details\nACGT"; 15 | 16 | List sequences = parse(text); 17 | if (sequences.size() != 1) { 18 | fail("Expected 1 sequence, not " + sequences.size()); 19 | } 20 | Sequence sequence = sequences.get(0); 21 | String expectedName = "sequence"; 22 | if (!sequence.getName().equals(expectedName)) { 23 | fail("Expected sequence to have name '" + expectedName + "', not '" + sequence.getName() + "'"); 24 | } 25 | } 26 | 27 | private List parse(String text) { 28 | FastaParser reader = buildReader(text); 29 | List sequences = new ArrayList(); 30 | while (true) { 31 | SequenceBuilder builder = reader.getNextSequence(); 32 | if (builder == null) 33 | break; 34 | sequences.add(builder.build()); 35 | } 36 | return sequences; 37 | } 38 | 39 | private FastaParser buildReader(String text) { 40 | StringReader stringReader = new StringReader(text); 41 | BufferedReader bufferedReader = new BufferedReader(stringReader); 42 | String path = "test"; 43 | return new FastaParser(bufferedReader, path); 44 | } 45 | 46 | 47 | private void fail(String message) { 48 | Assert.fail(message); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/test/java/HashBlockCompiler_Test.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | public class HashBlockCompiler_Test { 7 | @Test 8 | public void shortTest() { 9 | check("AACGACGT"); 10 | } 11 | 12 | @Test 13 | public void ambiguityTest() { 14 | // Ambiguity here: . 15 | check("GGGGGAGCGACCAAACGGCAGSTTCACTCA"); 16 | } 17 | 18 | // build a pyramid for this text, compile each row, and check that the compilation gives the same results 19 | private void check(String text) { 20 | Sequence sequence = new SequenceBuilder().setName("seq").add(text).build(); 21 | HashBlock_Row row = new HashBlock_BaseRow(sequence, null); 22 | check(row); 23 | } 24 | 25 | // build a pyramid from this row, compile each resulting row, and check that the compilation gives the same results 26 | private void check(HashBlock_Row row) { 27 | while (true) { 28 | HashBlock_Row compiled = new HashBlock_Compiler(row, new HashBlock_CompilerCache()); 29 | compare(row, compiled); 30 | compare(row, compiled); 31 | if (row.getAfter(-1) == null) 32 | break; 33 | row = new HashBlock_ParentRow(compiled, false, null); 34 | } 35 | } 36 | 37 | // checks that two rows are the same 38 | private void compare(HashBlock_Row rowA, HashBlock_Row rowB) { 39 | Sequence sequence = rowA.getSequence(); 40 | for (int i = -1; i < sequence.getLength(); i++) { 41 | IMultiHashBlock blockA = rowA.getAfter(i); 42 | IMultiHashBlock blockB = rowB.getAfter(i); 43 | String stringA = null; 44 | if (blockA != null) { 45 | stringA = blockA.toString(sequence); 46 | } 47 | String stringB = null; 48 | if (blockB != null) { 49 | stringB = blockB.toString(sequence); 50 | } 51 | boolean equal = false; 52 | if ((stringA == null) != (stringB == null)) { 53 | equal = false; 54 | } else { 55 | if (stringA == null && stringB == null) { 56 | equal = true; 57 | } else { 58 | equal = stringA.equals(stringB); 59 | } 60 | } 61 | if (!equal) { 62 | fail("rowA.getAfter(" + i + ") = " + stringA + " but rowB.getAfter(" + i + ") = " + stringB); 63 | } 64 | } 65 | } 66 | 67 | private void fail(String message) { 68 | Assert.fail(message); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/test/java/HashBlockDatabase_Test.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import org.junit.Assert; 6 | import org.junit.Test; 7 | 8 | 9 | public class HashBlockDatabase_Test { 10 | public HashBlockDatabase_Test() { 11 | } 12 | 13 | @Test 14 | public void testConsistency() { 15 | Sequence a = new SequenceBuilder().setName("contig1").add("ACCCCCCC").build(); 16 | Sequence b = new SequenceBuilder().setName("contig2").add("CTTTTTTT").build(); 17 | List list1 = new ArrayList(); 18 | list1.add(a); 19 | list1.add(b); 20 | SequenceDatabase s = new SequenceDatabase(list1, true); 21 | StatusLogger statusLogger = new StatusLogger(new Logger(new StderrWriter()), 0); 22 | HashBlock_Database db1 = new HashBlock_Database(s, 1, 1, -1, true, null, statusLogger, false); 23 | HashBlock_Database db2 = new HashBlock_Database(s, 1, 1, -1, true, null, statusLogger, true); 24 | db1.requireSetUpThroughSize(100); 25 | db2.requireSetUpThroughSize(100); 26 | db1.verifyMatches(db2); 27 | } 28 | 29 | private void fail(String message) { 30 | Assert.fail(message); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/test/java/HashBlockPaths_Counter_Test.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import org.junit.Assert; 6 | import org.junit.Test; 7 | 8 | public class HashBlockPaths_Counter_Test { 9 | public HashBlockPaths_Counter_Test() { 10 | } 11 | 12 | @Test 13 | public void checkComputesDistanceCorrectly() { 14 | String refText = "GGGGGACGTGGGGGGAACTAAGGGG"; 15 | String seq1Text = "GACGTG"; 16 | String seq2Text = "AACTAAG"; 17 | checkDistance(refText, seq1Text, seq2Text, 5, 18); 18 | } 19 | 20 | @Test 21 | public void checkReverseComplementAlignment() { 22 | String refText = "GGGGGACGTGGGGGGAACTAAGGGG"; 23 | String seq1Text = "GACGTG"; 24 | String seq2Text = "AACTAAG"; 25 | checkDistance(reverseComplement(refText), seq1Text, seq2Text, 5, 18); 26 | } 27 | 28 | @Test 29 | public void checkOverlappingDistance() { 30 | String refText = "GGGGAACCACTGGGGG"; 31 | String seq1Text = "GAACCACTG"; 32 | String seq2Text = "CCACTGGGG"; 33 | checkDistance(refText, seq1Text, seq2Text, -6, 12); 34 | } 35 | 36 | @Test 37 | public void checkMultipleMatches() { 38 | String refText = "GGGGGAACAGTGGGGGGAACTAAGGGGAATTGTATATAGCG"; 39 | String seq1Text = "GAACAGTG"; 40 | String seq2Text = "AACTAAGGGGAA"; 41 | List matches = getMatches(refText + refText, seq1Text, seq2Text); 42 | if (matches.size() != 2) { 43 | fail("Expected 2 matches, got " + matches.size()); 44 | } 45 | 46 | } 47 | 48 | private void checkDistance(String refText, String seq1Text, String seq2Text, int innerDistance, int outerDistance) { 49 | System.err.println("checkDistance ref = '" + refText + "' seq1 = '" + seq1Text + "' seq2 = '" + seq2Text + "'"); 50 | List matches = getMatches(refText, seq1Text, seq2Text); 51 | if (matches.size() != 1) { 52 | fail("Expected 1 match, got " + matches.size()); 53 | } 54 | QueryMatch match = matches.get(0); 55 | if (match.getTotalDistanceBetweenComponents() != innerDistance) { 56 | fail("Expected total distance between components of " + innerDistance + ", not " + match.getTotalDistanceBetweenComponents()); 57 | } 58 | if (match.getTotalDistanceAcross() != outerDistance) { 59 | fail("Expected total distance between components of " + outerDistance + ", not " + match.getTotalDistanceAcross()); 60 | } 61 | } 62 | 63 | private List getMatches(String refText, String seq1Text, String seq2Text) { 64 | Logger logger = new Logger(new StderrWriter()); 65 | Sequence query1 = new SequenceBuilder().setName("seq1").add(seq1Text).build(); 66 | seq2Text = reverseComplement(seq2Text); 67 | Sequence query2 = new SequenceBuilder().setName("seq2").add(seq2Text).build(); 68 | Sequence reference = new SequenceBuilder().setName("ref").add(refText).build(); 69 | 70 | List components = new ArrayList(); 71 | 72 | List referenceSequences = new ArrayList(); 73 | referenceSequences.add(reference); 74 | referenceSequences.add(reference.reverseComplement()); 75 | SequenceDatabase referenceDatabase = new SequenceDatabase(referenceSequences); 76 | 77 | components.add(makePath(query1, referenceDatabase, "fwd-query")); 78 | components.add(makePath(query2, referenceDatabase, "rev-query")); 79 | int expectedInnerDistance = 10; 80 | int maxInnerDistance = 20; 81 | HashBlockPaths_Counter counter = new HashBlockPaths_Counter(components, expectedInnerDistance, maxInnerDistance, logger); 82 | List matches = counter.findGoodPositionsHavingPriority(0); 83 | return matches; 84 | } 85 | 86 | private Counting_HashBlockPath makePath(Sequence query, SequenceDatabase sequenceDatabase, String name) { 87 | Logger logger = new Logger(new StderrWriter()); 88 | StatusLogger statusLogger = new StatusLogger(logger, 0); 89 | HashBlock_Pyramid queryPyramid = new HashBlock_Pyramid(new HashBlock_Stream(query, false, null)); 90 | HashBlock_Database database = new HashBlock_Database(sequenceDatabase, statusLogger); 91 | AlignmentParameters parameters = new AlignmentParameters(); 92 | parameters.DeletionExtension_Penalty = 0.1; 93 | Counting_HashBlockPath path = new Counting_HashBlockPath(queryPyramid, database.getView(), sequenceDatabase, query, name, logger, parameters); 94 | return path; 95 | } 96 | 97 | private String reverseComplement(String input) { 98 | return new SequenceBuilder().setName("temp").add(input).build().reverseComplement().getText(); 99 | } 100 | 101 | private void fail(String message) { 102 | Assert.fail(message); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/test/java/HistogramTest.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | 7 | public class HistogramTest { 8 | public HistogramTest() { 9 | } 10 | 11 | @Test 12 | public void testSquash_unchanged() { 13 | checkSquash(new double[]{1, 2, 3, 4, 5}, new double[]{1, 2, 3, 4, 5}); 14 | } 15 | 16 | @Test 17 | public void testSquash_squash6To3() { 18 | checkSquash(new double[]{4, 1, 6, 5, 3, 4}, new double[]{5, 11, 7}); 19 | } 20 | 21 | @Test 22 | public void testSquash_squash3To2() { 23 | checkSquash(new double[]{1, 2, 4}, new double[]{2, 5}); 24 | } 25 | 26 | @Test 27 | public void testSquash_squash4To3() { 28 | checkSquash(new double[]{3, 6, 9, 3}, new double[]{5, 10, 6}); 29 | } 30 | 31 | @Test 32 | public void testSquash_squash0to1() { 33 | checkSquash(new double[]{}, new double[]{0}); 34 | } 35 | 36 | @Test 37 | public void testSquash_stretch() { 38 | checkSquash(new double[]{4, 4, 4}, new double[]{3, 3, 3, 3}); 39 | } 40 | 41 | private void checkSquash(double[] data, double[] expectedSquashed) { 42 | int desiredNumBins = expectedSquashed.length; 43 | double[] squashed = Histogram.squash(data, desiredNumBins); 44 | if (!equivalent(squashed, expectedSquashed)) { 45 | fail("Squashed " + toString(data) + " into " + desiredNumBins + " bins and got " + toString(squashed) + " instead of " + toString(expectedSquashed)); 46 | } 47 | } 48 | 49 | private boolean equivalent(double[] a, double[] b) { 50 | if (a.length != b.length) { 51 | return false; 52 | } 53 | for (int i = 0; i < a.length; i++) { 54 | if (Math.abs(a[i] - b[i]) > 0.001) // ignore rounding error 55 | return false; 56 | } 57 | return true; 58 | } 59 | 60 | private String toString(double[] a) { 61 | StringBuilder result = new StringBuilder(); 62 | result.append("["); 63 | for (int i = 0; i < a.length; i++) { 64 | if (i > 0) 65 | result.append(","); 66 | result.append("" + a[i]); 67 | } 68 | result.append("]"); 69 | return result.toString(); 70 | } 71 | 72 | private void fail(String message) { 73 | Assert.fail(message); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/test/java/MapperMetadata_Test.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.nio.file.Path; 4 | import java.nio.file.Paths; 5 | import org.junit.Assert; 6 | import org.junit.Test; 7 | 8 | public class MapperMetadata_Test { 9 | @Test 10 | public void simplificationOfSubdir() { 11 | Path a = Paths.get("/a"); 12 | Path abc = Paths.get("/a/b/c"); 13 | Path abcFromA = MapperMetadata.simplifyPath(abc, a); 14 | // "b/c" should be simpler than "/a/b/c" 15 | checkPathsEqual(abcFromA, Paths.get("b/c")); 16 | } 17 | 18 | @Test 19 | public void simplificationOfNearParentDir() { 20 | Path ab = Paths.get("/a/b"); 21 | Path abc = Paths.get("/a/b/c"); 22 | Path abFromAbc = MapperMetadata.simplifyPath(ab, abc); 23 | // ".." should be simpler than "/a/b" 24 | checkPathsEqual(abFromAbc, Paths.get("..")); 25 | } 26 | 27 | @Test 28 | public void simplificationOfFarParentDir() { 29 | Path a = Paths.get("/a"); 30 | Path abc = Paths.get("/a/b/c"); 31 | Path aFromAbc = MapperMetadata.simplifyPath(a, abc); 32 | // "/a" should be simpler than "../.." 33 | checkPathsEqual(aFromAbc, a); 34 | } 35 | 36 | private void checkPathsEqual(Path a, Path b) { 37 | Assert.assertEquals(a, b); // we want a comparison that works across platforms 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/test/java/MatchDatabase_Test.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import org.junit.Assert; 6 | import org.junit.Test; 7 | 8 | public class MatchDatabase_Test { 9 | public MatchDatabase_Test() { 10 | } 11 | 12 | @Test 13 | public void testQueryEndingWithMismatch() { 14 | String queryText = "AACCACGT"; 15 | String refText = "AACCACGA"; 16 | 17 | Sequence a = new SequenceBuilder().setName("a").add(queryText).build(); 18 | Query query = new Query(a); 19 | Sequence b = new SequenceBuilder().setName("b").add(refText).build(); 20 | SequenceAlignment sequenceAlignment = new SequenceAlignment(new AlignedBlock(a, b, 0, 0, queryText.length(), refText.length()), new AlignmentParameters(), false); 21 | QueryAlignment alignment = new QueryAlignment(sequenceAlignment); 22 | MatchDatabase database = new MatchDatabase(0); 23 | List queryAlignments = new ArrayList(); 24 | queryAlignments.add(new QueryAlignments(query, alignment)); 25 | database.addAlignments(queryAlignments); 26 | Alignments alignments = database.groupByPosition().get(b); 27 | for (int i = 0; i < refText.length(); i++) { 28 | AlignmentPosition position = alignments.getPosition(i); 29 | float count = position.getCount(); 30 | if (count != 1) { 31 | fail("number of bases aligned at position " + i + " in reference equals " + count + " rather than 1"); 32 | } 33 | } 34 | } 35 | 36 | @Test 37 | public void testOverlappingPairedEndQueries() { 38 | String refText = "AACCACGATTAC"; 39 | String query1Text = "AACCACGA"; 40 | String query2Text = "CACGATTAC"; 41 | Sequence query1 = new SequenceBuilder().setName("q1").add(query1Text).build(); 42 | Sequence query2 = new SequenceBuilder().setName("q2").add(query2Text).build(); 43 | Sequence ref = new SequenceBuilder().setName("ref").add(refText).build(); 44 | 45 | SequenceAlignment sequence1Alignment = new SequenceAlignment(new AlignedBlock(query1, ref, 0, 0, query1Text.length(), query1Text.length()), new AlignmentParameters(), false); 46 | SequenceAlignment sequence2Alignment = new SequenceAlignment(new AlignedBlock(query2, ref, 0, 3, query2Text.length(), query2Text.length()), new AlignmentParameters(), false); 47 | 48 | List components = new ArrayList(); 49 | components.add(sequence1Alignment); 50 | components.add(sequence2Alignment); 51 | QueryAlignment alignment = new QueryAlignment(components, 0, 0, 0, 0, -5); 52 | 53 | MatchDatabase database = new MatchDatabase(0); 54 | Query query = new Query(query1, query2, 0, 1); 55 | List queryAlignments = new ArrayList(); 56 | queryAlignments.add(new QueryAlignments(query, alignment)); 57 | database.addAlignments(queryAlignments); 58 | 59 | Alignments alignments = database.groupByPosition().get(ref); 60 | for (int i = 0; i < refText.length(); i++) { 61 | AlignmentPosition position = alignments.getPosition(i); 62 | float count = position.getCount(); 63 | if (count != 1) { 64 | fail("number of bases aligned at position " + i + " in reference equals " + count + " rather than 1"); 65 | } 66 | } 67 | 68 | } 69 | 70 | private void fail(String message) { 71 | Assert.fail(message); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/test/java/MemoryFilesystem.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.io.File; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | 7 | public class MemoryFilesystem implements Filesystem { 8 | public boolean createNewFile(File file) { 9 | String key = getKey(file); 10 | if (this.fileContents.containsKey(key)) 11 | return false; 12 | this.fileContents.put(key, new byte[0]); 13 | return true; 14 | } 15 | 16 | public void write(File file, byte[] contents) { 17 | String key = getKey(file); 18 | this.fileContents.put(key, contents); 19 | } 20 | 21 | public byte[] readFile(File file) { 22 | return this.fileContents.get(file.getAbsolutePath()); 23 | } 24 | 25 | public void mkdirs(File dir) { 26 | } 27 | 28 | private String getKey(File file) { 29 | return file.getAbsolutePath(); 30 | } 31 | 32 | private Map fileContents = new HashMap(); 33 | } 34 | -------------------------------------------------------------------------------- /src/test/java/MemoryFilesystem_Test.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.io.File; 4 | import org.junit.Assert; 5 | import org.junit.Test; 6 | 7 | public class MemoryFilesystem_Test { 8 | @Test 9 | public void simpleTest() { 10 | MemoryFilesystem filesystem = new MemoryFilesystem(); 11 | File f1 = new File("a"); 12 | File f2 = new File("b"); 13 | 14 | // files missing 15 | Assert.assertEquals(filesystem.readFile(f1), null); 16 | Assert.assertEquals(filesystem.readFile(f2), null); 17 | 18 | // make new file 19 | Assert.assertEquals(filesystem.createNewFile(f1), true); 20 | 21 | // one empty file should exist 22 | Assert.assertEquals(filesystem.readFile(f1).length, 0); 23 | Assert.assertEquals(filesystem.readFile(f2), null); 24 | 25 | // write text 26 | byte[] contents = "sample text".getBytes(); 27 | filesystem.write(f1, contents); 28 | 29 | // one file should have the same contents 30 | Assert.assertEquals(filesystem.readFile(f1), contents); 31 | Assert.assertEquals(filesystem.readFile(f2), null); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/test/java/OrderingUtils_Test.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | public class OrderingUtils_Test { 7 | @Test 8 | public void test1() { 9 | // make some positions 10 | int length = 20; 11 | Sequence sequence = new RepeatingSequence("test", 'A', length); 12 | SequencePosition[] positions1 = makePositions(sequence, length, 1); 13 | SequencePosition[] positions3 = makePositions(sequence, length, 3); 14 | SequencePosition[] positions7 = makePositions(sequence, length, 7); 15 | SequencePosition[] positions9 = makePositions(sequence, length, 9); 16 | SequencePosition[] positions11 = makePositions(sequence, length, 11); 17 | SequencePosition[] positions13 = makePositions(sequence, length, 13); 18 | SequencePosition[] positions17 = makePositions(sequence, length, 17); 19 | SequencePosition[] positions19 = makePositions(sequence, length, 19); 20 | test(positions1, positions3); 21 | test(positions1, positions7); 22 | test(positions1, positions9); 23 | test(positions1, positions11); 24 | test(positions1, positions13); 25 | test(positions1, positions17); 26 | test(positions1, positions19); 27 | } 28 | 29 | private SequencePosition[] makePositions(Sequence sequence, int length, int orderMultiplier) { 30 | SequencePosition[] positions = new SequencePosition[length]; 31 | for (int i = 0; i < positions.length; i++) { 32 | int index = i * orderMultiplier % length; 33 | if (positions[index] != null) { 34 | throw new IllegalArgumentException("length " + length + " does not support orderMultiplier " + orderMultiplier + ": multiple positions attempting to go to index " + index + ". length and orderMultiplier shouldn't share any factors."); 35 | } 36 | int position = i * 6; 37 | positions[index] = new SequencePosition(sequence, position); 38 | } 39 | return positions; 40 | } 41 | 42 | private void test(SequencePosition[] a, SequencePosition[] b) { 43 | if (a.length != b.length) { 44 | throw new IllegalArgumentException("a.length = " + a.length + ", b.length = " + b.length); 45 | } 46 | a = OrderingUtils.orderDeterministically(a); 47 | b = OrderingUtils.orderDeterministically(b); 48 | for (int i = 0; i < a.length; i++) { 49 | if (!(a[i].equals(b[i]))) { 50 | String error = "a[" + i + "] = " + a[i] + ", b[" + i + "] = " + b[i] + ". Full contents:\n"; 51 | error += "a = "; 52 | for (int j = 0; j < a.length; j++) { 53 | error += a[j] + ","; 54 | } 55 | error += "\n"; 56 | error += "b = "; 57 | for (int j = 0; j < b.length; j++) { 58 | error += b[j] + ","; 59 | } 60 | error += "\n"; 61 | fail(error); 62 | } 63 | } 64 | } 65 | 66 | private void fail(String message) { 67 | Assert.fail(message); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/test/java/PackedMap_Test.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import org.junit.Assert; 6 | import org.junit.Test; 7 | 8 | 9 | public class PackedMap_Test { 10 | public PackedMap_Test() { 11 | } 12 | 13 | @Test 14 | public void testLargeReferenceSize() { 15 | List sequences = makeSequences(8, (int)Math.pow(2, 31)); 16 | Sequence firstSequence = sequences.get(0); 17 | SequenceDatabase sequenceDatabase = new SequenceDatabase(sequences, true); 18 | int keyCapacity = 10; 19 | PackedMap packedMap = new PackedMap(5, keyCapacity, sequenceDatabase, 1); 20 | List blocks = new ArrayList(); 21 | for (int i = 0; i < keyCapacity * 2; i++) { 22 | int forwardHash = i % keyCapacity; 23 | int reverseHash = -forwardHash - 1; 24 | blocks.add(new HashBlock(i, 1, forwardHash, reverseHash)); 25 | } 26 | packedMap.add(firstSequence, blocks, false); 27 | for (int i = 0; i < keyCapacity; i++) { 28 | SequencePosition[] lookupResults = packedMap.get(i); 29 | if (lookupResults.length != 2) { 30 | fail("Looked up key " + i + " and expected 2 results, not " + lookupResults.length); 31 | } 32 | int expected0 = (i + 0 * keyCapacity); 33 | int expected1 = (i + 1 * keyCapacity); 34 | int actual0 = lookupResults[0].getStartIndex(); 35 | int actual1 = lookupResults[1].getStartIndex(); 36 | if (actual1 < actual0) { 37 | int temp = actual1; 38 | actual1 = actual0; 39 | actual0 = temp; 40 | } 41 | if (expected0 != actual0) { 42 | fail("Looked up key " + i + " and got lookup result " + actual0 + ", not " + expected0); 43 | } 44 | if (expected1 != actual1) { 45 | fail("Looked up key " + i + " and got lookup result " + actual1 + ", not " + expected1); 46 | } 47 | } 48 | } 49 | 50 | private List makeSequences(int numSequences, int sequenceLength) { 51 | // Make SequenceDatabase 52 | List reference = new ArrayList(); 53 | for (int i = 0; i < numSequences; i++) { 54 | reference.add(makeSequence(i, sequenceLength)); 55 | } 56 | return reference; 57 | } 58 | 59 | private Sequence makeSequence(int identifier, int length) { 60 | return new RepeatingSequence("" + identifier, 'A', length); 61 | } 62 | 63 | private void fail(String message) { 64 | Assert.fail(message); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/test/java/PathAligner_Test.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | import org.junit.Assert; 4 | import org.junit.Test; 5 | 6 | public class PathAligner_Test { 7 | public PathAligner_Test() { 8 | } 9 | 10 | @Test 11 | public void testQueryEndingWithMismatchAndExtension() { 12 | check("AACCGGTT", "AAT", "AAC", "AAT", 1.5); 13 | } 14 | 15 | @Test 16 | public void testQueryStartingWithShortExtension() { 17 | String query = "AAACCGGTTACGTACGTACGT"; 18 | String ref = "AACCGGTTACGTTACGTACGT"; 19 | String alignedQuery = "AACCGGTTACG-TACGTACGT"; 20 | String alignedRef = "AACCGGTTACGTTACGTACGT"; 21 | check(query, ref, alignedQuery, alignedRef, 3.1); 22 | } 23 | 24 | private void check(String textA, String textB, String alignedTextA, String alignedTextB, double expectedPenalty) { 25 | Sequence a = new SequenceBuilder().setName("a").add(textA).build(); 26 | Sequence b = new SequenceBuilder().setName("b").add(textB).build(); 27 | SequenceMatch match = new SequenceMatch(a, b, 0); 28 | 29 | PathAligner aligner = new PathAligner(new Logger(new StderrWriter())); 30 | AlignmentAnalysis analysis = new AlignmentAnalysis(); 31 | analysis.maxInsertionExtensionPenalty = expectedPenalty; 32 | analysis.maxDeletionExtensionPenalty = expectedPenalty; 33 | SequenceAlignment result = aligner.align(new SequenceSection(a, 0, a.getLength()), new SequenceSection(b, 0, b.getLength()), makeParameters(), analysis); 34 | check(result, alignedTextA, alignedTextB, expectedPenalty); 35 | } 36 | 37 | private void check(SequenceAlignment alignment, String alignedTextA, String alignedTextB, double expectedPenalty) { 38 | if (alignment == null) { 39 | fail("Expected alignment of " + alignedTextA + " / " + alignedTextB + ", not null"); 40 | } 41 | if (alignment.getPenalty() != expectedPenalty) { 42 | fail("Expected alignment penalty of " + expectedPenalty + " for alignment\n" + alignedTextA + "\n" + alignedTextB + "\nbut got " + alignment.getPenalty() + " for alignment:\n" + alignment.format()); 43 | } 44 | if (!alignedTextA.equals(alignment.getAlignedTextA())) { 45 | fail("Expected alignment text a of " + alignedTextA + "\nbut got alignment (with same penalty) of \n" + alignment.format()); 46 | } 47 | if (!alignedTextB.equals(alignment.getAlignedTextB())) { 48 | fail("Expected alignment text b of " + alignedTextB + "\nbut got alignment (with same penalty) of \n" + alignment.format()); 49 | } 50 | } 51 | 52 | private void fail(String message) { 53 | Assert.fail(message); 54 | } 55 | private AlignmentParameters makeParameters() { 56 | AlignmentParameters parameters = new AlignmentParameters(); 57 | parameters.MutationPenalty = 1; 58 | parameters.InsertionStart_Penalty = 2; 59 | parameters.InsertionExtension_Penalty = 0.5; 60 | parameters.DeletionStart_Penalty = 2; 61 | parameters.DeletionExtension_Penalty = 1; 62 | parameters.MaxErrorRate = 1; 63 | parameters.AmbiguityPenalty = 0.1; 64 | return parameters; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /src/test/java/RepeatingSequence.java: -------------------------------------------------------------------------------- 1 | package mapper; 2 | 3 | public class RepeatingSequence extends Sequence { 4 | public RepeatingSequence(String name, char repeatingContent, int length) { 5 | super(name, "", length, ""); 6 | this.repeatingContent = Basepairs.encode(repeatingContent); 7 | } 8 | 9 | @Override 10 | public byte computeEncodedCharAt(int i) { 11 | return repeatingContent; 12 | } 13 | 14 | private byte repeatingContent; 15 | } 16 | --------------------------------------------------------------------------------