├── .gitignore ├── BB_User-Meeting-2014-poster-FINAL.pdf ├── README.md ├── bbmap-help-Biostar.md ├── bitbucket-pipelines.yml ├── build.xml ├── config ├── cluster16s.txt ├── filter16s.txt ├── histograms.txt ├── recalibrate.txt ├── rnaseq.txt └── trimadapters.txt ├── current ├── align2 │ ├── AbstractIndex.java │ ├── AbstractMapThread.java │ ├── AbstractMapper.java │ ├── BBIndex.java │ ├── BBIndex5.java │ ├── BBIndexAcc.java │ ├── BBIndexPacBio.java │ ├── BBIndexPacBioSkimmer.java │ ├── BBMap.java │ ├── BBMap5.java │ ├── BBMapAcc.java │ ├── BBMapPacBio.java │ ├── BBMapPacBioSkimmer.java │ ├── BBMapThread.java │ ├── BBMapThread5.java │ ├── BBMapThreadAcc.java │ ├── BBMapThreadPacBio.java │ ├── BBMapThreadPacBioSkimmer.java │ ├── BBSplitter.java │ ├── BBWrap.java │ ├── BandedAligner.java │ ├── BandedAlignerConcrete.java │ ├── BandedAlignerJNI.java │ ├── Blacklist.java │ ├── Block.java │ ├── ChromLoadThread.java │ ├── CompareSamFiles.java │ ├── CompressString.java │ ├── Evaluate.java │ ├── GapTools.java │ ├── GradeSamFile.java │ ├── Index.java │ ├── IndexMaker4.java │ ├── IndexMaker5.java │ ├── KeyRing.java │ ├── MSA.java │ ├── MakeQualityHistogram.java │ ├── MakeRocCurve.java │ ├── MultiStateAligner10ts.java │ ├── MultiStateAligner11ts.java │ ├── MultiStateAligner11tsJNI.java │ ├── MultiStateAligner9Flat.java │ ├── MultiStateAligner9PacBio.java │ ├── MultiStateAligner9PacBioAdapter.java │ ├── MultiStateAligner9PacBioAdapter2.java │ ├── MultiStateAligner9PacBioAdapter_WithBarriers.java │ ├── MultiStateAligner9XFlat.java │ ├── MultiStateAligner9ts.java │ ├── NeedlemanWunsch.java │ ├── PackedHeap.java │ ├── Pointer.java │ ├── PrintTime.java │ ├── Quad.java │ ├── Quad64.java │ ├── Quad64Heap.java │ ├── QuadHeap.java │ ├── QualityTools.java │ ├── RandomReads3.java │ ├── ReadComparatorID.java │ ├── ReadComparatorMapping.java │ ├── ReadComparatorName.java │ ├── ReadComparatorTopological.java │ ├── ReadErrorComparator.java │ ├── ReadLengthComparator.java │ ├── ReadStats.java │ ├── RefToIndex.java │ ├── ReformatBatchOutput.java │ ├── ReformatBatchOutput2.java │ ├── Shared.java │ ├── Solver.java │ ├── SortReadsByID.java │ ├── SortReadsByMapping.java │ ├── SortReadsTopologically.java │ ├── SplitMappedReads.java │ ├── Tools.java │ ├── TranslateColorspaceRead.java │ └── TrimRead.java ├── aligner │ ├── Aligner.java │ ├── Alignment.java │ ├── AlignmentResult.java │ ├── AllToAll.java │ ├── FlatAligner.java │ ├── FlatAligner2.java │ ├── MultiStateAligner9PacBioAdapter.java │ ├── MultiStateAligner9PacBioAdapter2.java │ ├── MultiStateAligner9PacBioAdapter3.java │ ├── MultiStateAligner9PacBioAdapter_WithBarriers.java │ ├── SingleStateAlignerFlat.java │ ├── SingleStateAlignerFlat2.java │ ├── SingleStateAlignerFlat2Amino.java │ ├── SingleStateAlignerFlat2_1D.java │ ├── SingleStateAlignerFlat3.java │ ├── SingleStateAlignerFlatFloat.java │ └── SingleStateAlignerPacBioAdapter.java ├── assemble │ ├── AbstractBuildThread.java │ ├── AbstractExploreThread.java │ ├── AbstractProcessContigThread.java │ ├── AbstractRemoveThread.java │ ├── AbstractShaveThread.java │ ├── BubblePopper.java │ ├── Contig.java │ ├── Edge.java │ ├── ErrorTracker.java │ ├── KmerCompressor.java │ ├── Postfilter.java │ ├── Rollback.java │ ├── ShaveObject.java │ ├── Shaver.java │ ├── Shaver1.java │ ├── Shaver2.java │ ├── TadPipe.java │ ├── Tadpole.java │ ├── Tadpole1.java │ ├── Tadpole2.java │ └── TadpoleWrapper.java ├── bloom │ ├── BloomFilter.java │ ├── BloomFilterCorrector.java │ ├── BloomFilterCorrectorWrapper.java │ ├── BloomFilterWrapper.java │ ├── ErrorCorrect.java │ ├── KCountArray.java │ ├── KCountArray2.java │ ├── KCountArray3.java │ ├── KCountArray4.java │ ├── KCountArray4MT.java │ ├── KCountArray5MT.java │ ├── KCountArray6MT.java │ ├── KCountArray7MT.java │ ├── KCountArray7MTA.java │ ├── KCountArray8MT.java │ ├── KmerCount3.java │ ├── KmerCount4.java │ ├── KmerCount5.java │ ├── KmerCount6.java │ ├── KmerCount6MT.java │ ├── KmerCount7MT.java │ ├── KmerCount7MTA.java │ ├── KmerCountAbstract.java │ ├── LargeKmerCount.java │ ├── LargeKmerCount2.java │ └── TestLargeKmer.java ├── cardinality │ ├── BBLog.java │ ├── BBLog_simple.java │ ├── CardinalityTracker.java │ ├── LogLog.java │ ├── LogLog16.java │ ├── LogLog2.java │ ├── LogLog8.java │ ├── LogLog8_simple.java │ ├── LogLogWrapper.java │ ├── LogLog_old.java │ └── MultiLogLog.java ├── clump │ ├── Clump.java │ ├── ClumpList.java │ ├── ClumpTools.java │ ├── Clumpify.java │ ├── Condensor.java │ ├── KmerComparator.java │ ├── KmerComparator2.java │ ├── KmerComparatorX.java │ ├── KmerComparatorY.java │ ├── KmerComparator_original.java │ ├── KmerReduce.java │ ├── KmerSort.java │ ├── KmerSort1.java │ ├── KmerSort2.java │ ├── KmerSort3.java │ ├── KmerSplit.java │ ├── PivotSet.java │ ├── ReadKey.java │ ├── Splitter.java │ └── StreamToOutput.java ├── cluster │ ├── Cluster.java │ ├── ClusterTools.java │ ├── MergeReadHeaders.java │ ├── ReadTag.java │ └── ReclusterByKmer.java ├── consensus │ ├── BaseGraph.java │ ├── BaseGraphPart.java │ ├── BaseNode.java │ ├── ConsensusMaker.java │ ├── ConsensusObject.java │ ├── FixScaffoldGaps.java │ └── Lilypad.java ├── covid │ └── SummarizeCoverage.java ├── dna │ ├── AminoAcid.java │ ├── ChromArrayMaker.java │ ├── ChromToFasta.java │ ├── ChromosomeArray.java │ ├── Coverage.java │ ├── CoverageArray.java │ ├── CoverageArray2.java │ ├── CoverageArray3.java │ ├── Data.java │ ├── Exon.java │ ├── FastaToChromArrays2.java │ ├── Gene.java │ ├── GeneSet.java │ ├── IntMap.java │ ├── IntMapFlex.java │ ├── Matrix.java │ ├── Motif.java │ ├── MotifMulti.java │ ├── MotifProbsN.java │ ├── MotifSimple.java │ ├── Parser.java │ ├── Range.java │ ├── ScafLoc.java │ ├── Scaffold.java │ └── Timer.java ├── driver │ ├── A_Sample_Textfile.java │ ├── BBVersion.java │ ├── ClearRam.java │ ├── CollateSpikeIn.java │ ├── CompareReferenceGenomes.java │ ├── CompareSequences.java │ ├── ConcatenateFiles.java │ ├── ConcatenateTextFiles.java │ ├── Concatenator.java │ ├── ConvertSamToAln.java │ ├── CorrelateIdentity.java │ ├── CountRNAs.java │ ├── CountSharedLines.java │ ├── EstherFilter.java │ ├── FilterAssemblySummary.java │ ├── FilterLines.java │ ├── FilterReadsByName.java │ ├── FindMotifs.java │ ├── FixChr.java │ ├── FixDumbFile.java │ ├── GenerateNoCallsFromCoverage.java │ ├── GetSequence.java │ ├── GetUniquePrefixes.java │ ├── Grep.java │ ├── Life.java │ ├── LineCount.java │ ├── LoadReads.java │ ├── LookAtID.java │ ├── MakeTestScript.java │ ├── MakeTestScriptScoreOnly.java │ ├── MeasureGene.java │ ├── MergeBigelow.java │ ├── MergeCoverageOTU.java │ ├── MergeTextFiles.java │ ├── MergeTextFiles2.java │ ├── MoveFiles.java │ ├── ParseCrossblockResults.java │ ├── PlotGC.java │ ├── PrintEnv.java │ ├── ProcessFragMerging.java │ ├── ProcessSpeed.java │ ├── ProcessSpeed2.java │ ├── ProcessWebcheck.java │ ├── ReduceSilva.java │ ├── RenameAndMux.java │ ├── RenameByHeader.java │ ├── RenameNcbiToTid.java │ ├── RenameRefseqFiles.java │ ├── Sample.java │ ├── Search.java │ ├── SelectReads.java │ ├── SniffSplices.java │ ├── SortByQuality.java │ ├── SummarizeContamReport.java │ ├── SummarizeCoverage.java │ ├── SummarizeCrossblock.java │ ├── SummarizeMSDIN.java │ ├── SummarizeQuast.java │ ├── SummarizeSealStats.java │ ├── TestCompressionSpeed.java │ ├── TestLockSpeed.java │ ├── Translator.java │ ├── Translator2.java │ ├── TransposeTextFile.java │ └── TrimSamFile.java ├── fileIO │ ├── ArrayFile.java │ ├── ByteFile.java │ ├── ByteFile1.java │ ├── ByteFile2.java │ ├── ByteStreamWriter.java │ ├── ChainBlock.java │ ├── ChainLine.java │ ├── CompressFiles.java │ ├── CopyFile.java │ ├── CopyFiles.java │ ├── CopyFiles2.java │ ├── FileFormat.java │ ├── FindFiles.java │ ├── GenericTextFile.java │ ├── LoadThread.java │ ├── MatrixFile.java │ ├── OpenFile.java │ ├── PipeThread.java │ ├── QuickFile.java │ ├── ReadWrite.java │ ├── SummaryFile.java │ ├── TextFile.java │ └── TextStreamWriter.java ├── fun │ ├── Calc.java │ ├── Chance.java │ ├── DiskBench.java │ ├── FindPath.java │ ├── Genetic.java │ ├── Life.java │ ├── MakeAdjacencyList.java │ ├── Palindrome.java │ ├── ProbShared.java │ ├── ProbShared2.java │ └── ProbShared3.java ├── gff │ ├── CompareGff.java │ ├── CompareGff_old.java │ ├── CutGff.java │ ├── CutGff_ST.java │ ├── GbffFeature.java │ ├── GbffFile.java │ ├── GbffLocus.java │ ├── GffLine.java │ └── VcfToGff.java ├── hiseq │ ├── AnalyzeFlowCell.java │ ├── CycleTracker.java │ ├── FlowCell.java │ ├── FlowcellCoordinate.java │ ├── IlluminaHeaderParser.java │ ├── Lane.java │ ├── MicroTile.java │ ├── PlotFlowCell.java │ └── Tile.java ├── hmm │ ├── HMMSearchLine.java │ ├── HMMSearchReport.java │ └── ProteinSummary.java ├── icecream │ ├── IceCreamAligner.java │ ├── IceCreamAlignerJNI.java │ ├── IceCreamAlignerJava.java │ ├── IceCreamFinder.java │ ├── IceCreamGrader.java │ ├── IceCreamMaker.java │ ├── PBHeader.java │ ├── PolymerTrimmer.java │ ├── ReadBuilder.java │ ├── ReformatPacBio.java │ ├── ZMW.java │ └── ZMWStreamer.java ├── jasper │ ├── Comparison.java │ ├── DenseSimilarityMatrix.java │ ├── DenseTree.java │ ├── DenseTreeValidate.java │ ├── KmerPosition.java │ ├── KmerPosition3.java │ ├── NCBIComparison.java │ ├── NCBISparseSimilarityMatrix.java │ ├── NCBISparseTree.java │ ├── NCBISparseTreeValidate.java │ ├── NCBITreeNode.java │ ├── Organism.java │ ├── SimilarityMatrix.java │ ├── SparseSimilarityMatrix.java │ ├── SparseTree.java │ ├── SparseTreeValidate.java │ ├── TaxCompare.java │ └── TreeNode.java ├── jgi │ ├── A_Sample.java │ ├── A_Sample2.java │ ├── A_SampleByteFile.java │ ├── A_SampleD.java │ ├── A_SampleMT.java │ ├── A_Sample_Unpaired.java │ ├── AddAdapters.java │ ├── AdjustHomopolymers.java │ ├── AssemblyStats2.java │ ├── AssemblyStatsWrapper.java │ ├── BBDuk.java │ ├── BBDuk2.java │ ├── BBDukF.java │ ├── BBMask.java │ ├── BBMask_noSam.java │ ├── BBMerge.java │ ├── BBMergeOverlapper.java │ ├── BBQC.java │ ├── BBTool_ST.java │ ├── CalcTrueQuality.java │ ├── CalcTrueQuality_single.java │ ├── CalcUniqueness.java │ ├── CallPeaks.java │ ├── Consect.java │ ├── CorrelateBarcodes.java │ ├── CountBarcodes.java │ ├── CountGC.java │ ├── CountUniqueness.java │ ├── CovStatsLine.java │ ├── CoveragePileup.java │ ├── CrossContaminate.java │ ├── CutPrimers.java │ ├── DecontaminateByNormalization.java │ ├── Dedupe.java │ ├── Dedupe2.java │ ├── DedupeByMapping.java │ ├── DedupeProtein.java │ ├── DemuxByName.java │ ├── DemuxByName2.java │ ├── Difference.java │ ├── ErrorCorrect.java │ ├── FakeReads.java │ ├── FilterByCoverage.java │ ├── FilterBySequence.java │ ├── FilterReadsWithSubs.java │ ├── FindHiCJunctions.java │ ├── FindPrimers.java │ ├── FindPrimers_old.java │ ├── FindString.java │ ├── FungalRelease.java │ ├── FuseSequence.java │ ├── GatherKapaStats.java │ ├── GetReads.java │ ├── GradeMergedReads.java │ ├── GreedyBarCodeFinder.java │ ├── IdentityMatrix.java │ ├── Info.java │ ├── KExpand.java │ ├── KeepBestCopy.java │ ├── KmerCountExact.java │ ├── KmerCountMulti.java │ ├── KmerCoverage.java │ ├── KmerFilterSetMaker.java │ ├── KmerNormalize.java │ ├── KmerSample.java │ ├── LogLog.java │ ├── MakeChimeras.java │ ├── MakeContaminatedGenomes.java │ ├── MakeCoverageHistogram.java │ ├── MakeLengthHistogram.java │ ├── MakePolymers.java │ ├── MateReadsMT.java │ ├── MergeBarcodes.java │ ├── MergeSam.java │ ├── MutateGenome.java │ ├── MutateGenome2.java │ ├── NormAndCorrectWrapper.java │ ├── Orf.java │ ├── PartitionReads.java │ ├── PhylipToFasta.java │ ├── RQCFilter.java │ ├── RQCFilter2.java │ ├── RQCFilterStats.java │ ├── RandomGenome.java │ ├── ReadKmerDepthDistribution.java │ ├── RedirectTest.java │ ├── ReformatReads.java │ ├── RemapQuality.java │ ├── RemoveBadBarcodes.java │ ├── RenameReads.java │ ├── ReplaceHeaders.java │ ├── RepresentativeSet.java │ ├── SamToEst.java │ ├── Seal.java │ ├── Shred.java │ ├── Shuffle.java │ ├── SmallKmerFrequency.java │ ├── SplitNexteraLMP.java │ ├── SplitPairsAndSingles.java │ ├── SplitSam4Way.java │ ├── SplitSam6Way.java │ ├── SplitSamFile.java │ ├── StringCount.java │ ├── SynthMDA.java │ ├── TestFilesystem.java │ ├── TestFormat.java │ ├── TetramerFrequencies.java │ ├── TranslateSixFrames.java │ ├── UnicodeToAscii.java │ └── Unzip.java ├── json │ ├── JsonLiteral.java │ ├── JsonObject.java │ └── JsonParser.java ├── kmer │ ├── AbstractKmerTable.java │ ├── AbstractKmerTableSet.java │ ├── AtomicShortArray.java │ ├── DumpThread.java │ ├── HashArray.java │ ├── HashArray1D.java │ ├── HashArray2D.java │ ├── HashArrayHybrid.java │ ├── HashArrayHybridFast.java │ ├── HashBuffer.java │ ├── HashForest.java │ ├── HistogramMaker.java │ ├── KmerBuffer.java │ ├── KmerLink.java │ ├── KmerNode.java │ ├── KmerNode1D.java │ ├── KmerNode2D.java │ ├── KmerTable.java │ ├── KmerTableSet.java │ ├── OwnershipThread.java │ ├── Primes.java │ ├── ScheduleMaker.java │ ├── TableLoaderLockFree.java │ ├── TableReader.java │ └── Walker.java ├── ml │ └── ProcessBBMergeHeaders.java ├── pacbio │ ├── CalcCoverageFromSites.java │ ├── GenerateMultiChrom.java │ ├── MakePacBioScript.java │ ├── MergeFastaContigs.java │ ├── MergeReadsAndGenome.java │ ├── PartitionFastaFile.java │ ├── PartitionReads.java │ ├── ProcessStackedSitesNormalized.java │ ├── RemoveAdapters2.java │ ├── RemoveAdapters3.java │ ├── RemoveNFromChromosome.java │ ├── SiteR.java │ ├── SortSites.java │ ├── SplitOffPerfectContigs.java │ ├── StackSites.java │ └── StackSites2.java ├── prok │ ├── AnalyzeGenes.java │ ├── CallGenes.java │ ├── Feature.java │ ├── FetchProks.java │ ├── FilterSilva.java │ ├── FrameStats.java │ ├── GeneCaller.java │ ├── GeneModel.java │ ├── GeneModelParser.java │ ├── MergeRibo.java │ ├── MergeRibo_Fast.java │ ├── Orf.java │ ├── PGMTools.java │ ├── ProkObject.java │ ├── RiboMaker.java │ ├── ScafData.java │ ├── ScoreTracker.java │ ├── SplitRibo.java │ └── StatsContainer.java ├── server │ ├── PercentEncoding.java │ ├── ServerTools.java │ └── SimpleHttpServer.java ├── shared │ ├── Colors.java │ ├── KillSwitch.java │ ├── MetadataWriter.java │ ├── Parse.java │ ├── Parser.java │ ├── PreParser.java │ ├── Primes.java │ ├── ReadStats.java │ ├── Shared.java │ ├── Timer.java │ ├── Tools.java │ └── TrimRead.java ├── sketch │ ├── AddSSU.java │ ├── AlignmentJob.java │ ├── AlignmentThreadPool.java │ ├── AnalyzeSketchResults.java │ ├── Blacklist.java │ ├── BlacklistMaker.java │ ├── BlacklistMaker2.java │ ├── CompareBuffer.java │ ├── CompareSSU.java │ ├── CompareSketch.java │ ├── Comparison.java │ ├── DisplayParams.java │ ├── GlocalAligner.java │ ├── InvertKey.java │ ├── KmerLimit.java │ ├── KmerLimit2.java │ ├── MergeSketch.java │ ├── Record.java │ ├── RecordSet.java │ ├── ResultLineParser.java │ ├── SSUMap.java │ ├── SendSketch.java │ ├── Sketch.java │ ├── SketchHeap.java │ ├── SketchIdComparator.java │ ├── SketchIndex.java │ ├── SketchMaker.java │ ├── SketchMakerMini.java │ ├── SketchObject.java │ ├── SketchResults.java │ ├── SketchSearcher.java │ ├── SketchTool.java │ ├── SubSketch.java │ ├── SummarizeSketchStats.java │ └── Whitelist.java ├── sort │ ├── ContigLengthComparator.java │ ├── MergeSorted.java │ ├── ReadComparator.java │ ├── ReadComparatorFlowcell.java │ ├── ReadComparatorID.java │ ├── ReadComparatorList.java │ ├── ReadComparatorMapping.java │ ├── ReadComparatorName.java │ ├── ReadComparatorPosition.java │ ├── ReadComparatorRandom.java │ ├── ReadComparatorTaxa.java │ ├── ReadComparatorTopological.java │ ├── ReadErrorComparator.java │ ├── ReadLengthComparator.java │ ├── ReadQualityComparator.java │ ├── Shuffle.java │ ├── Shuffle2.java │ ├── SortByName.java │ └── SortReadsByID.java ├── stream │ ├── ArrayListSet.java │ ├── BufferedMultiCros.java │ ├── ByteBuilder.java │ ├── ConcurrentCollectionReadInputStream.java │ ├── ConcurrentDepot.java │ ├── ConcurrentGenericReadInputStream.java │ ├── ConcurrentGenericReadOutputStream.java │ ├── ConcurrentLegacyReadInputStream.java │ ├── ConcurrentReadInputStream.java │ ├── ConcurrentReadInputStreamD.java │ ├── ConcurrentReadListDepot.java │ ├── ConcurrentReadOutputStream.java │ ├── ConcurrentReadOutputStreamD.java │ ├── ConcurrentReadStreamInterface.java │ ├── CrisContainer.java │ ├── CrisWrapper.java │ ├── DualCris.java │ ├── EmblReadInputStream.java │ ├── FASTQ.java │ ├── FastaQualReadInputStream.java │ ├── FastaReadInputStream.java │ ├── FastaReadInputStream2.java │ ├── FastaShredInputStream.java │ ├── FastqReadInputStream.java │ ├── GbkReadInputStream.java │ ├── Header.java │ ├── HeaderInputStream.java │ ├── KillSwitch.java │ ├── MDWalker.java │ ├── MultiCros.java │ ├── MultiCros2.java │ ├── MultiCros3.java │ ├── NullOutputStream.java │ ├── OnelineReadInputStream.java │ ├── RTextInputStream.java │ ├── RandomReadInputStream3.java │ ├── Read.java │ ├── ReadInputStream.java │ ├── ReadStreamByteWriter.java │ ├── ReadStreamStringWriter.java │ ├── ReadStreamWriter.java │ ├── SamHeader.java │ ├── SamLine.java │ ├── SamLineStreamer.java │ ├── SamReadInputStream.java │ ├── SamReadStreamer.java │ ├── SamStreamer.java │ ├── SamStreamerMF.java │ ├── SamStreamerWrapper.java │ ├── ScaffoldCoordinates.java │ ├── ScarfReadInputStream.java │ ├── SequentialReadInputStream.java │ ├── SiteScore.java │ └── SiteScoreR.java ├── structures │ ├── AbstractBitSet.java │ ├── AbstractIntHashMap.java │ ├── AtomicBitSet.java │ ├── ByteBuilder.java │ ├── CoverageArray.java │ ├── CoverageArray2.java │ ├── CoverageArray3.java │ ├── CoverageArray3A.java │ ├── DoubleList.java │ ├── EntropyTracker.java │ ├── FloatList.java │ ├── Heap.java │ ├── IntHashMap.java │ ├── IntHashMapBinary.java │ ├── IntHashSet.java │ ├── IntHashSetList.java │ ├── IntList.java │ ├── IntList2.java │ ├── IntList3.java │ ├── IntListCompressor.java │ ├── IntLongHashMap.java │ ├── IntMap.java │ ├── ListNum.java │ ├── LongHashMap.java │ ├── LongHashSet.java │ ├── LongHeap.java │ ├── LongHeapMap.java │ ├── LongHeapSet.java │ ├── LongHeapSetInterface.java │ ├── LongList.java │ ├── LongListSet.java │ ├── LongLongHashMap.java │ ├── LongM.java │ ├── LongPair.java │ ├── MultiBitSet.java │ ├── PolymerTracker.java │ ├── Quantizer.java │ ├── Range.java │ ├── RawBitSet.java │ ├── StringNum.java │ ├── StringPair.java │ └── SuperLongList.java ├── tax │ ├── AccessionToTaxid.java │ ├── AccessionToTaxid_old.java │ ├── AnalyzeAccession.java │ ├── AnalyzeAccession_ST.java │ ├── ExplodeTree.java │ ├── FilterByTaxa.java │ ├── FindAncestor.java │ ├── GiToNcbi.java │ ├── GiToTaxid.java │ ├── IDNode.java │ ├── IDTree.java │ ├── ImgRecord.java │ ├── ImgRecord2.java │ ├── PrintTaxonomy.java │ ├── Query.java │ ├── RenameGiToNcbi.java │ ├── RenameGiToTaxid.java │ ├── RenameIMG.java │ ├── ShrinkAccession.java │ ├── SortByTaxa.java │ ├── SplitByTaxa.java │ ├── TaxApp.java │ ├── TaxClient.java │ ├── TaxFilter.java │ ├── TaxNode.java │ ├── TaxServer.java │ ├── TaxSize.java │ └── TaxTree.java ├── template │ ├── A_Sample.java │ ├── A_Sample2.java │ ├── A_SampleBasic.java │ ├── A_SampleBasic2.java │ ├── A_SampleByteFile.java │ ├── A_SampleD.java │ ├── A_SampleMT.java │ ├── A_SampleSamStreamer.java │ ├── A_SampleSummary.java │ ├── A_Sample_Generator.java │ ├── A_Sample_Unpaired.java │ ├── Accumulator.java │ ├── BBTool_ST.java │ ├── DoWorker.java │ ├── ThreadPoolJob.java │ └── ThreadWaiter.java ├── ukmer │ ├── AbstractKmerTableU.java │ ├── DumpThreadU.java │ ├── HashArrayU.java │ ├── HashArrayU1D.java │ ├── HashArrayU2D.java │ ├── HashArrayUHybrid.java │ ├── HashBufferU.java │ ├── HashForestU.java │ ├── HistogramMakerU.java │ ├── Kmer.java │ ├── KmerBufferU.java │ ├── KmerNodeU.java │ ├── KmerNodeU1D.java │ ├── KmerNodeU2D.java │ ├── KmerTableSetU.java │ ├── OwnershipThread.java │ └── WalkerU.java ├── var │ ├── ApplyVarsToReference.java │ ├── GenerateConsensusVariations.java │ ├── GenerateVarlets.java │ ├── GenerateVarlets2.java │ ├── GenerateVarlets3.java │ ├── StackVariations.java │ ├── StackVariations2.java │ ├── VarLine.java │ ├── Variation.java │ └── Varlet.java └── var2 │ ├── ApplyVariants.java │ ├── CallVariants.java │ ├── CallVariants2.java │ ├── CompareVCF.java │ ├── FilterSam.java │ ├── FilterVCF.java │ ├── MergeSamples.java │ ├── Realign.java │ ├── Realigner.java │ ├── SamFilter.java │ ├── ScafMap.java │ ├── Scaffold.java │ ├── SoftClipper.java │ ├── VCFFile.java │ ├── VCFLine.java │ ├── Var.java │ ├── VarFilter.java │ ├── VarKey.java │ ├── VarMap.java │ ├── VcfLoader.java │ └── VcfWriter.java ├── docs ├── Legal.txt ├── Legal_Illumina.txt ├── RestartingServers.txt ├── TableOfContents.txt ├── ToolDescriptions.txt ├── UsageGuide.txt ├── bbcms.txt ├── changelog.txt ├── citation.txt ├── compiling.txt ├── git.txt ├── guides │ ├── A_SampleGuide.txt │ ├── AddAdaptersGuide.txt │ ├── BBDukGuide.txt │ ├── BBMapGuide.txt │ ├── BBMap_old_readme.txt │ ├── BBMaskGuide.txt │ ├── BBMergeGuide.txt │ ├── BBNormGuide.txt │ ├── BBSketchGuide.txt │ ├── CalcUniquenessGuide.txt │ ├── CallVariantsGuide.txt │ ├── ClumpifyGuide.txt │ ├── DedupeGuide.txt │ ├── FilterByTileGuide.txt │ ├── PreprocessingGuide.txt │ ├── ReformatGuide.txt │ ├── RepairGuide.txt │ ├── SealGuide.txt │ ├── SplitNexteraGuide.txt │ ├── StatsGuide.txt │ ├── TadpoleGuide.txt │ └── TaxonomyGuide.txt ├── readme.txt ├── readme_config.txt └── readme_filetypes.txt ├── jni ├── BBMergeOverlapper.c ├── BBMergeOverlapper.o ├── BandedAlignerJNI.c ├── BandedAlignerJNI.o ├── CMakeLists.txt ├── IceCreamAlignerJNI.c ├── IceCreamAlignerJNI.o ├── MultiStateAligner11tsJNI.c ├── MultiStateAligner11tsJNI.o ├── README.txt ├── align2_BandedAlignerJNI.h ├── align2_MultiStateAligner11tsJNI.h ├── icecream_IceCreamAlignerJNI.h ├── jgi_BBMergeOverlapper.h ├── libbbtoolsjni.dylib ├── libbbtoolsjni.so ├── makefile.linux ├── makefile.osx └── run.sh ├── license.txt ├── pipelines ├── assembleMito_Illumina.sh ├── assembleMito_PacBio.sh ├── assemblyPipeline.sh ├── callInsertions.sh ├── covid │ ├── makeSummary.sh │ ├── processCorona.sh │ ├── processCoronaWrapper.sh │ ├── readme.txt │ └── recal.sh ├── cutRna.sh ├── fetch │ ├── fetchMito.sh │ ├── fetchNt.sh │ ├── fetchNtOuter.sh │ ├── fetchPlasmid.sh │ ├── fetchPlastid.sh │ ├── fetchProkByGenus.sh │ ├── fetchRefSeq.sh │ ├── fetchRefSeqClades.sh │ ├── fetchRefSeqCladesOuter.sh │ ├── fetchSilva.sh │ ├── fetchTaxonomy.sh │ ├── runRefSeqProtein.sh │ └── sketchRefSeq.sh ├── makeRiboKmers.sh ├── processIMG.sh ├── server │ ├── startNtServerVM.sh │ ├── startProteinServerVM.sh │ ├── startRefseqServerVM.sh │ ├── startSilvaServerVM.sh │ └── startTaxServerVM.sh ├── silva │ ├── fetchSilva.sh │ ├── make15mers.sh │ ├── makeCoveringSetLsu.sh │ ├── makeCoveringSetSsu.sh │ └── makeRep.sh ├── testPlatformQuality.sh ├── testsketch.sh └── variantPipeline.sh ├── pytools ├── .gitignore ├── ReadMe ├── css │ └── report.css ├── filter.py ├── filter_desc │ ├── 3primerna.txt │ ├── bisulphite.txt │ ├── cell-enrichment.txt │ ├── chip-seq.txt │ ├── clip-pe.txt │ ├── clrs.txt │ ├── dna.txt │ ├── filter.json │ ├── filter_mt.json │ ├── filter_nextera.json │ ├── filter_report.txt │ ├── fungal.txt │ ├── iso.txt │ ├── itag.txt │ ├── lfpe.txt │ ├── metagenome.txt │ ├── microtrans.txt │ ├── mtaa.txt │ ├── nextera-lmp.txt │ ├── plant-2x150.txt │ ├── plant-2x250.txt │ ├── rna.txt │ ├── rnawohuman.txt │ ├── sag.txt │ ├── smrna.txt │ ├── sop_list.txt │ └── viral-metagenome.txt ├── filter_param │ ├── 3primerna.config │ ├── bisulphite.config │ ├── cell-enrichment.config │ ├── chip-seq.config │ ├── clip-pe.config │ ├── clrs.config │ ├── dna.config │ ├── fungal.config │ ├── iso.config │ ├── itag.config │ ├── lfpe.config │ ├── metagenome.config │ ├── microtrans.config │ ├── mtaa.config │ ├── nextera-lmp.config │ ├── nextseq.config │ ├── plant-2x150.config │ ├── plant-2x250.config │ ├── rna.config │ ├── rnawohuman.config │ ├── sag.config │ ├── smrna.config │ └── viral-metagenome.config ├── images │ ├── JGI_logo.jpg │ ├── favicon.ico │ └── jgi_log_transparent.png ├── lib │ ├── common.py │ ├── html_utility.py │ ├── kmercount_pos.py │ ├── os_utility.py │ ├── readSeq.py │ ├── readqc_constants.py │ ├── readqc_report.py │ ├── readqc_utils.py │ ├── rqc_constants.py │ ├── rqc_fastq.py │ └── rqc_utility.py ├── readqc.py ├── template │ ├── filter_body_template.html │ ├── readqc_artifacts.html │ ├── readqc_body_template.html │ └── template.html └── tools │ └── patterN_fastq.pl ├── resources ├── 16S_15mers.fa ├── 16S_consensus_sequence.fa ├── 18S_15mers.fa ├── 18S_consensus_sequence.fa ├── 23S_15mers.fa ├── 23S_consensus_sequence.fa ├── 5S_10mers.fa ├── 5S_11mers.fa ├── 5S_12mers.fa ├── 5S_15mers.fa ├── 5S_9mers.fa ├── 5S_consensus_sequence.fa ├── Covid19_ref.fa ├── ITS_animal_consensus_sequence.fq ├── ITS_consensus_sequence.fq ├── ITS_fungi_consensus_sequence.fq ├── ITS_nonfungi_consensus_sequence.fq ├── ITS_other_consensus_sequence.fq ├── ITS_plant_consensus_sequence.fq ├── ITS_universal_consensus_sequence.fq ├── adapters.fa ├── adapters_no_transposase.fa.gz ├── artic3.fasta ├── blacklist_nt_merged.sketch ├── blacklist_prokprot_merged.sketch ├── blacklist_refseq_merged.sketch ├── blacklist_silva_merged.sketch ├── contents.txt ├── crelox.fa.gz ├── favicon.ico ├── kapatags.L40.fa ├── lambda.fa.gz ├── lfpe.linker.fa.gz ├── lsu_15mers.fa ├── m16S_consensus_sequence.fa ├── m18S_consensus_sequence.fa ├── model.pgm ├── mtst.fa ├── nextera.fa.gz ├── nextera_LMP_adapter.fa.gz ├── nextera_LMP_linker.fa.gz ├── p16S_consensus_sequence.fa ├── pJET1.2.fa ├── phix174_ill.ref.fa.gz ├── phix_adapters.fa.gz ├── polyA.fa.gz ├── primes.txt.gz ├── remote_files.txt ├── remote_files_old.txt ├── sample1.fq.gz ├── sample2.fq.gz ├── sequencing_artifacts.fa.gz ├── short.fa ├── ssu_15mers.fa ├── tRNA_10mers.fa ├── tRNA_11mers.fa ├── tRNA_15mers.fa ├── tRNA_16mers.fa ├── tRNA_consensus_sequence.fa ├── tax_server.html ├── truseq.fa.gz └── truseq_rna.fa.gz └── sh ├── a_sample_mt.sh ├── addadapters.sh ├── addssu.sh ├── adjusthomopolymers.sh ├── alltoall.sh ├── analyzeaccession.sh ├── analyzegenes.sh ├── analyzesketchresults.sh ├── applyvariants.sh ├── bbcms.sh ├── bbcountunique.sh ├── bbduk.sh ├── bbduk2.sh ├── bbest.sh ├── bbfakereads.sh ├── bbmap.sh ├── bbmapskimmer.sh ├── bbmask.sh ├── bbmerge-auto.sh ├── bbmerge.sh ├── bbnorm.sh ├── bbrealign.sh ├── bbrename.sh ├── bbsketch.sh ├── bbsplit.sh ├── bbsplitpairs.sh ├── bbstats.sh ├── bbversion.sh ├── bbwrap.sh ├── bloomfilter.sh ├── calcmem.sh ├── calctruequality.sh ├── callgenes.sh ├── callpeaks.sh ├── callvariants.sh ├── callvariants2.sh ├── clumpify.sh ├── commonkmers.sh ├── comparegff.sh ├── comparesketch.sh ├── comparessu.sh ├── comparevcf.sh ├── consect.sh ├── consensus.sh ├── countbarcodes.sh ├── countgc.sh ├── countsharedlines.sh ├── crossblock.sh ├── crosscontaminate.sh ├── current ├── cutgff.sh ├── cutprimers.sh ├── decontaminate.sh ├── dedupe.sh ├── dedupe2.sh ├── dedupebymapping.sh ├── demuxbyname.sh ├── diskbench.sh ├── ecc.sh ├── estherfilter.sh ├── explodetree.sh ├── fetchproks.sh ├── filterassemblysummary.sh ├── filterbarcodes.sh ├── filterbycoverage.sh ├── filterbyname.sh ├── filterbysequence.sh ├── filterbytaxa.sh ├── filterbytile.sh ├── filterlines.sh ├── filterqc.sh ├── filtersam.sh ├── filtersilva.sh ├── filtersubs.sh ├── filtervcf.sh ├── fixgaps.sh ├── fungalrelease.sh ├── fuse.sh ├── gbff2gff.sh ├── getreads.sh ├── gi2ancestors.sh ├── gi2taxid.sh ├── gitable.sh ├── grademerge.sh ├── gradesam.sh ├── icecreamfinder.sh ├── icecreamgrader.sh ├── icecreammaker.sh ├── idmatrix.sh ├── idtree.sh ├── invertkey.sh ├── kapastats.sh ├── kcompress.sh ├── keepbestcopy.sh ├── khist.sh ├── kmercountexact.sh ├── kmercountmulti.sh ├── kmercoverage.sh ├── kmerfilterset.sh ├── kmerlimit.sh ├── kmerlimit2.sh ├── kmerposition.sh ├── kmutate.sh ├── lilypad.sh ├── loadreads.sh ├── loglog.sh ├── makechimeras.sh ├── makecontaminatedgenomes.sh ├── makepolymers.sh ├── mapPacBio.sh ├── matrixtocolumns.sh ├── mergeOTUs.sh ├── mergebarcodes.sh ├── mergepgm.sh ├── mergeribo.sh ├── mergesam.sh ├── mergesketch.sh ├── mergesorted.sh ├── msa.sh ├── mutate.sh ├── muxbyname.sh ├── partition.sh ├── phylip2fasta.sh ├── pileup.sh ├── plotflowcell.sh ├── plotgc.sh ├── postfilter.sh ├── printtime.sh ├── processfrag.sh ├── processhi-c.sh ├── processspeed.sh ├── randomgenome.sh ├── randomreads.sh ├── readlength.sh ├── readqc.sh ├── reducesilva.sh ├── reformat.sh ├── reformatpb.sh ├── removebadbarcodes.sh ├── removecatdogmousehuman.sh ├── removehuman.sh ├── removehuman2.sh ├── removemicrobes.sh ├── removesmartbell.sh ├── rename.sh ├── renameimg.sh ├── repair.sh ├── replaceheaders.sh ├── representative.sh ├── rqcfilter.sh ├── rqcfilter2.sh ├── runhmm.sh ├── samtoroc.sh ├── seal.sh ├── sendsketch.sh ├── shred.sh ├── shrinkaccession.sh ├── shuffle.sh ├── shuffle2.sh ├── sketch.sh ├── sketchblacklist.sh ├── sketchblacklist2.sh ├── sortbyname.sh ├── sortbytaxa.sh ├── splitbytaxa.sh ├── splitnextera.sh ├── splitribo.sh ├── splitsam.sh ├── splitsam4way.sh ├── splitsam6way.sh ├── stats.sh ├── statswrapper.sh ├── streamsam.sh ├── subsketch.sh ├── summarizecontam.sh ├── summarizecoverage.sh ├── summarizecrossblock.sh ├── summarizemerge.sh ├── summarizequast.sh ├── summarizescafstats.sh ├── summarizeseal.sh ├── summarizesketch.sh ├── synthmda.sh ├── tadpipe.sh ├── tadpole.sh ├── tadwrapper.sh ├── taxonomy.sh ├── taxserver.sh ├── taxsize.sh ├── taxtree.sh ├── testfilesystem.sh ├── testformat.sh ├── testformat2.sh ├── tetramerfreq.sh ├── textfile.sh ├── translate6frames.sh ├── unicode2ascii.sh ├── unzip.sh ├── upd.sh ├── vcf2gff.sh └── webcheck.sh /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | 3 | # Mobile Tools for Java (J2ME) 4 | .mtj.tmp/ 5 | 6 | # Package Files # 7 | *.jar 8 | *.war 9 | *.ear 10 | 11 | # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml 12 | hs_err_pid* 13 | -------------------------------------------------------------------------------- /BB_User-Meeting-2014-poster-FINAL.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/BB_User-Meeting-2014-poster-FINAL.pdf -------------------------------------------------------------------------------- /bitbucket-pipelines.yml: -------------------------------------------------------------------------------- 1 | image: python:2.7.14 2 | 3 | pipelines: 4 | default: 5 | - step: 6 | caches: 7 | - pip 8 | script: 9 | - pipe: atlassian/slack-notify:0.3.2 10 | variables: 11 | WEBHOOK_URL: $slack_webhook 12 | MESSAGE: ':skull_and_crossbones: BBTools bitbucket commit -- Branch:$BITBUCKET_BRANCH -- Commit id:$BITBUCKET_COMMIT -- Tag:$BITBUCKET_TAG' 13 | - pipe: atlassian/ssh-run:0.2.2 14 | variables: 15 | SSH_USER: 'brycef' 16 | SERVER: 'cori.nersc.gov' 17 | COMMAND: 'bash ~/jgi-bbtools-test.sh' 18 | -------------------------------------------------------------------------------- /config/cluster16s.txt: -------------------------------------------------------------------------------- 1 | #This is for clustering PacBio 16s reads of insert with Dedupe 2 | csf=stats_e26.txt 3 | outbest=best_e26.fq 4 | qin=33 5 | usejni=t 6 | am=f 7 | ac=f 8 | fo 9 | c 10 | rnc=f 11 | mcs=3 12 | k=27 13 | mo=1420 14 | ow 15 | unpigz 16 | cc 17 | pto 18 | nam=4 19 | e=26 20 | -------------------------------------------------------------------------------- /config/filter16s.txt: -------------------------------------------------------------------------------- 1 | #Filter PacBio 16s Reads of Insert with reformat.sh before clustering 2 | minlen=1420 3 | maxlen=1640 4 | maq=20 5 | qin=33 6 | -------------------------------------------------------------------------------- /config/histograms.txt: -------------------------------------------------------------------------------- 1 | #Histograms, for BBMap 2 | bhist.txt 3 | qhist=qhist.txt 4 | aqhist=aqhist.txt 5 | qahist=qahist.txt 6 | bqhist=bqhist.txt 7 | qchist=qchist.txt 8 | lhist=lhist.txt 9 | ihist=ihist.txt 10 | ehist=ehist.txt 11 | indelhist=indelhist.txt 12 | mhist=mhist.txt 13 | gchist=gchist.txt 14 | gcbins=auto 15 | idhist=idhist.txt 16 | idbins=auto 17 | -------------------------------------------------------------------------------- /config/recalibrate.txt: -------------------------------------------------------------------------------- 1 | #Quality recalibration parameters for CalcTrueQuality and BBDuk 2 | recalpasses=2 3 | observationcutoff_p1=100 4 | observationcutoff_p2=200 5 | recalqmax=41 6 | recalqmin=2 7 | 8 | #first pass matrices 9 | loadq102_p1=f 10 | loadqbp_p1=t 11 | loadq10_p1=f 12 | loadq12_p1=f 13 | loadqb12_p1=f 14 | loadqb012_p1=f 15 | loadqb123_p1=t 16 | loadqb234_p1=f 17 | loadq12b12_p1=f 18 | loadqp_p1=f 19 | loadq_p1=f 20 | 21 | #second pass matrices 22 | loadq102_p2=f 23 | loadqbp_p2=t 24 | loadq10_p2=f 25 | loadq12_p2=f 26 | loadqb12_p2=f 27 | loadqb012_p2=f 28 | loadqb123_p2=f 29 | loadqb234_p2=f 30 | loadq12b12_p2=f 31 | loadqp_p2=f 32 | loadq_p2=f 33 | -------------------------------------------------------------------------------- /config/rnaseq.txt: -------------------------------------------------------------------------------- 1 | #Vertebrate RNA-seq settings for BBMap 2 | maxindel=200000 3 | intronlen=10 4 | xs=us 5 | -------------------------------------------------------------------------------- /config/trimadapters.txt: -------------------------------------------------------------------------------- 1 | k=23 2 | mink=11 3 | hdist=1 4 | tbo 5 | tpe 6 | -------------------------------------------------------------------------------- /current/align2/Index.java: -------------------------------------------------------------------------------- 1 | package align2; 2 | 3 | /** 4 | * @author Brian Bushnell 5 | * @date Dec 19, 2012 6 | * 7 | */ 8 | public abstract class Index { 9 | 10 | //TODO: Put static methods here. 11 | 12 | } 13 | -------------------------------------------------------------------------------- /current/align2/Pointer.java: -------------------------------------------------------------------------------- 1 | package align2; 2 | 3 | public class Pointer implements Comparable{ 4 | 5 | public static Pointer[] loadMatrix(int[][] matrix){ 6 | Pointer[] out=new Pointer[matrix.length]; 7 | for(int i=0; i0){ 19 | File f=new File(args[0]); 20 | if(f.exists()){ 21 | String s=ReadWrite.readString(args[0]); 22 | // TextFile tf=new TextFile(args[0], false, false); 23 | // String s=tf.nextLine(); 24 | // tf.close(); 25 | long old=Long.parseLong(s); 26 | long elapsed=millis-old; 27 | if(args.length<2 || Parse.parseBoolean(args[1])){ 28 | System.out.println("Elapsed:\t"+String.format(Locale.ROOT, "%.2f", elapsed/1000d)); 29 | if(true){ 30 | System.err.println("Elapsed:\t"+String.format(Locale.ROOT, "%.2f", elapsed/1000d)); 31 | } 32 | } 33 | } 34 | f=null; 35 | ReadWrite.writeString(millis+"", args[0]); 36 | } 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /current/align2/Quad.java: -------------------------------------------------------------------------------- 1 | package align2; 2 | 3 | public class Quad implements Comparable{ 4 | 5 | public Quad(int col_, int row_, int val_){ 6 | column=col_; 7 | row=row_; 8 | site=val_; 9 | } 10 | 11 | @Override 12 | public boolean equals(Object other){ 13 | return site==((Quad)other).site; 14 | } 15 | 16 | @Override 17 | public int hashCode(){return site;} 18 | 19 | @Override 20 | public int compareTo(Quad other) { 21 | int x=site-other.site; 22 | return(x==0 ? column-other.column : x); 23 | } 24 | 25 | @Override 26 | public String toString(){ 27 | return("("+column+","+row+","+site+")"); 28 | } 29 | 30 | public final int column; 31 | public int row; 32 | public int site; 33 | public int list[]; 34 | 35 | } 36 | -------------------------------------------------------------------------------- /current/align2/Quad64.java: -------------------------------------------------------------------------------- 1 | package align2; 2 | 3 | public class Quad64 implements Comparable{ 4 | 5 | public Quad64(int col_, int row_, int val_){ 6 | column=col_; 7 | row=row_; 8 | site=val_; 9 | } 10 | 11 | @Override 12 | public boolean equals(Object other){ 13 | assert(false); 14 | return site==((Quad64)other).site; 15 | } 16 | 17 | @Override 18 | public int hashCode(){return (int)site;} 19 | 20 | @Override 21 | public int compareTo(Quad64 other) { 22 | return site>other.site ? 1 : site0 ? 1 : x<0 ? -1 : column-other.column); 25 | } 26 | 27 | @Override 28 | public String toString(){ 29 | return("("+column+","+row+","+site+")"); 30 | } 31 | 32 | public final int column; 33 | public int row; 34 | public long site; 35 | public int list[]; 36 | 37 | } 38 | -------------------------------------------------------------------------------- /current/align2/ReadComparatorID.java: -------------------------------------------------------------------------------- 1 | package align2; 2 | 3 | import java.util.Comparator; 4 | 5 | import stream.Read; 6 | 7 | /** 8 | * @author Brian Bushnell 9 | * @date Oct 27, 2014 10 | * 11 | */ 12 | 13 | public final class ReadComparatorID implements Comparator{ 14 | 15 | @Override 16 | public int compare(Read r1, Read r2) { 17 | if(r1.numericIDr2.numericID){return 1;} 19 | 20 | if(!r1.id.equals(r2.id)){return r1.id.compareTo(r2.id);} 21 | return 0; 22 | } 23 | 24 | public static final ReadComparatorID comparator=new ReadComparatorID(); 25 | 26 | } 27 | -------------------------------------------------------------------------------- /current/align2/ReadComparatorName.java: -------------------------------------------------------------------------------- 1 | package align2; 2 | 3 | import java.util.Comparator; 4 | 5 | import stream.Read; 6 | 7 | /** 8 | * @author Brian Bushnell 9 | * @date Oct 27, 2014 10 | * 11 | */ 12 | 13 | public final class ReadComparatorName implements Comparator{ 14 | 15 | @Override 16 | public int compare(Read r1, Read r2) { 17 | 18 | if(r1.id==null && r2.id==null){return r1.pairnum()-r2.pairnum();} 19 | if(r1.id==null){return -1;} 20 | if(r2.id==null){return 1;} 21 | int x=r1.id.compareTo(r2.id); 22 | if(x==0){return r1.pairnum()-r2.pairnum();} 23 | return x; 24 | } 25 | 26 | public static final ReadComparatorName comparator=new ReadComparatorName(); 27 | 28 | } 29 | -------------------------------------------------------------------------------- /current/align2/ReadErrorComparator.java: -------------------------------------------------------------------------------- 1 | package align2; 2 | 3 | import java.util.Comparator; 4 | 5 | import stream.Read; 6 | 7 | /** 8 | * @author Brian Bushnell 9 | * @date May 30, 2013 10 | * 11 | */ 12 | public final class ReadErrorComparator implements Comparator{ 13 | 14 | @Override 15 | public int compare(Read r1, Read r2) { 16 | 17 | int a=(r1.errors+(r1.mate==null ? 0 : r1.mate.errors)); 18 | int b=(r2.errors+(r2.mate==null ? 0 : r2.mate.errors)); 19 | if(a!=b){return a-b;} 20 | 21 | a=(r1.length()+(r1.mate==null ? 0 : r1.mateLength())); 22 | b=(r2.length()+(r2.mate==null ? 0 : r2.mateLength())); 23 | if(a!=b){return b-a;} 24 | 25 | float a2=(r1.expectedErrors(true, 0)+(r1.mate==null ? 0 : r1.mate.expectedErrors(true, 0))); 26 | float b2=(r2.expectedErrors(true, 0)+(r2.mate==null ? 0 : r2.mate.expectedErrors(true, 0))); 27 | if(a2!=b2){return a2>b2 ? 1 : -1;} 28 | 29 | if(r1.numericIDr2.numericID){return 1;} 31 | 32 | if(!r1.id.equals(r2.id)){return r1.id.compareTo(r2.id);} 33 | return 0; 34 | } 35 | 36 | public static final ReadErrorComparator comparator=new ReadErrorComparator(); 37 | 38 | } 39 | -------------------------------------------------------------------------------- /current/align2/ReadLengthComparator.java: -------------------------------------------------------------------------------- 1 | package align2; 2 | 3 | import java.util.Comparator; 4 | 5 | import stream.Read; 6 | 7 | /** 8 | * Sorts longest reads first 9 | * @author Brian Bushnell 10 | * @date Jul 19, 2013 11 | * 12 | */ 13 | public final class ReadLengthComparator implements Comparator { 14 | 15 | private ReadLengthComparator(){} 16 | 17 | @Override 18 | public int compare(Read a, Read b) { 19 | int x=compare2(a, b); 20 | if(x==0){x=compare2(a.mate, b.mate);} 21 | if(x==0){x=a.id.compareTo(b.id);} 22 | if(x==0){x=a.numericID>b.numericID ? 1 : a.numericID{ 7 | 8 | public Alignment(Read r_){ 9 | r=r_; 10 | } 11 | 12 | @Override 13 | public int compareTo(Alignment o) { 14 | return id>o.id ? 1 : ido.r.length() ? 1 : r.length() contigs=new ArrayList(); 37 | 38 | long readsInT=0; 39 | long basesInT=0; 40 | long lowqReadsT=0; 41 | long lowqBasesT=0; 42 | final int id; 43 | 44 | } 45 | -------------------------------------------------------------------------------- /current/assemble/AbstractShaveThread.java: -------------------------------------------------------------------------------- 1 | package assemble; 2 | 3 | /** 4 | * @author Brian Bushnell 5 | * @date Jul 20, 2015 6 | * 7 | */ 8 | /** 9 | * Removes dead-end kmers. 10 | */ 11 | abstract class AbstractShaveThread extends Thread{ 12 | 13 | /** 14 | * Constructor 15 | */ 16 | public AbstractShaveThread(int id_){ 17 | id=id_; 18 | } 19 | 20 | @Override 21 | public final void run(){ 22 | while(processNextTable()){} 23 | } 24 | 25 | abstract boolean processNextTable(); 26 | 27 | /*--------------------------------------------------------------*/ 28 | 29 | long kmersRemovedT=0; 30 | 31 | final int id; 32 | 33 | } -------------------------------------------------------------------------------- /current/assemble/Rollback.java: -------------------------------------------------------------------------------- 1 | package assemble; 2 | 3 | import stream.Read; 4 | import structures.IntList; 5 | 6 | public class Rollback { 7 | 8 | public Rollback(Read r){ 9 | this(r, null); 10 | } 11 | 12 | public Rollback(Read r, IntList counts){ 13 | id0=r.id; 14 | flags0=r.flags; 15 | bases0=r.bases.clone(); 16 | quals0=(r.quality==null ? null : r.quality.clone()); 17 | counts0=(counts==null ? null : counts.copy()); 18 | } 19 | 20 | public void rollback(Read r){ 21 | rollback(r, null); 22 | } 23 | 24 | public void rollback(Read r, IntList counts){ 25 | r.id=id0; 26 | r.flags=flags0; 27 | if(r.length()==bases0.length){ 28 | System.arraycopy(bases0, 0, r.bases, 0, bases0.length); 29 | if(quals0!=null){System.arraycopy(quals0, 0, r.quality, 0, quals0.length);} 30 | if(counts!=null){System.arraycopy(counts0.array, 0, counts.array, 0, counts0.size);} 31 | }else{ 32 | r.bases=bases0; 33 | r.quality=quals0; 34 | if(counts!=null){ 35 | counts.clear(); 36 | counts.addAll(counts0); 37 | } 38 | } 39 | } 40 | 41 | final String id0; 42 | final int flags0; 43 | final byte[] bases0, quals0; 44 | public final IntList counts0; 45 | 46 | } 47 | -------------------------------------------------------------------------------- /current/bloom/KmerCountAbstract.java: -------------------------------------------------------------------------------- 1 | package bloom; 2 | 3 | import shared.Shared; 4 | 5 | /** 6 | * @author Brian Bushnell 7 | * @date Dec 2, 2014 8 | * 9 | */ 10 | public abstract class KmerCountAbstract { 11 | 12 | protected static final long[] transformToFrequency(int[] count){ 13 | long[] freq=new long[2000]; 14 | int max=freq.length-1; 15 | for(int i=0; iy ? x : y;} 37 | 38 | public static byte minQuality=6; 39 | public static long readsProcessed=0; 40 | public static long maxReads=-1; 41 | public static int BUFFERLEN=500; 42 | 43 | public static float minProb=0.5f; 44 | 45 | public static long keysCounted=0; 46 | 47 | public static int THREADS=Shared.threads(); 48 | public static final boolean verbose=false; 49 | public static boolean PREJOIN=false; 50 | public static boolean CANONICAL=false; 51 | public static boolean KEEP_DUPLICATE_KMERS=true; 52 | public static boolean SKETCH_MODE=false; 53 | public static boolean STORE_HASHED=false; 54 | 55 | } 56 | -------------------------------------------------------------------------------- /current/cardinality/MultiLogLog.java: -------------------------------------------------------------------------------- 1 | package cardinality; 2 | 3 | import shared.Parser; 4 | import stream.Read; 5 | import structures.IntList; 6 | import ukmer.Kmer; 7 | 8 | public class MultiLogLog { 9 | 10 | public MultiLogLog(Parser p){ 11 | this(p.loglogbuckets, p.loglogseed, p.loglogMinprob, p.loglogKlist); 12 | } 13 | 14 | public MultiLogLog(int buckets, long seed, float minProb, IntList klist0){ 15 | assert(klist0.size>0) : "No valid kmer lengths specified."; 16 | IntList klist=new IntList(klist0.size); 17 | for(int i=0; i0){ 21 | klist.add(k); 22 | } 23 | } 24 | klist.sort(); 25 | klist.shrinkToUnique(); 26 | assert(klist.size>0) : "No valid kmer lengths specified."; 27 | kArray=klist.toArray(); 28 | counters=new LogLog[kArray.length]; 29 | for(int i=0; i reads, int k, int minCount){ 23 | fname1=fname2=null; 24 | table=null; 25 | ConcurrentCollectionReadInputStream cris=new ConcurrentCollectionReadInputStream(reads, null, -1); 26 | cris.start(); 27 | table=PivotSet.makeKcaStatic(cris, k, minCount, Shared.AMINO_IN); 28 | ReadWrite.closeStream(cris); 29 | return table; 30 | } 31 | 32 | public static synchronized KCountArray getTable(String fname1_, String fname2_, int k_, int minCount_){ 33 | if(fname1==null || !fname1.equals(fname1_) || table==null){ 34 | fname1=fname1_; 35 | fname2=fname2_; 36 | String[] args=new String[] {"in1="+fname1, "in2="+fname2, "k="+k_, "minCount="+minCount_}; 37 | table=PivotSet.makeSet(args); 38 | } 39 | return table; 40 | } 41 | 42 | public static synchronized void clearTable() { 43 | fname1=fname2=null; 44 | table=null; 45 | } 46 | 47 | private static String fname1=null, fname2=null; 48 | private static KCountArray table=null; 49 | 50 | } 51 | -------------------------------------------------------------------------------- /current/clump/Condensor.java: -------------------------------------------------------------------------------- 1 | package clump; 2 | 3 | import java.util.ArrayList; 4 | 5 | import stream.Read; 6 | 7 | /** 8 | * @author Brian Bushnell 9 | * @date Nov 7, 2015 10 | * 11 | */ 12 | public class Condensor { 13 | 14 | public Condensor(){ 15 | 16 | } 17 | 18 | public ArrayList makeClumps(ArrayList list){ 19 | throw new RuntimeException(); 20 | } 21 | 22 | public ArrayList condense(ArrayList list){ 23 | 24 | throw new RuntimeException(); 25 | } 26 | 27 | } 28 | -------------------------------------------------------------------------------- /current/clump/KmerComparatorX.java: -------------------------------------------------------------------------------- 1 | package clump; 2 | 3 | public class KmerComparatorX extends KmerComparator2 { 4 | 5 | private KmerComparatorX(){} 6 | 7 | @Override 8 | public int compare(ReadKey a, ReadKey b){ 9 | // assert(FlowcellCoordinate.spanTiles || Clump.forceSortXY); 10 | if(a.kmer!=b.kmer){return a.kmer>b.kmer ? -1 : 1;} //Bigger kmers first... 11 | if(a.kmerMinusStrand!=b.kmerMinusStrand){return a.kmerMinusStrand ? 1 : -1;} 12 | if(a.position!=b.position){return a.positionb.kmer ? -1 : 1;} //Bigger kmers first... 11 | if(a.kmerMinusStrand!=b.kmerMinusStrand){return a.kmerMinusStrand ? 1 : -1;} 12 | if(a.position!=b.position){return a.position varSet; //TODO: Could change these to arrays and sort them. 15 | public int min=Integer.MAX_VALUE; 16 | public int max=0; 17 | public int covered=0; 18 | public int uncovered=0; 19 | public long sum=0; 20 | public float avg; 21 | public float covRatio; 22 | 23 | public int[] missingChromRelative; 24 | public int[] missingGeneRelative; 25 | 26 | } -------------------------------------------------------------------------------- /current/dna/MotifMulti.java: -------------------------------------------------------------------------------- 1 | package dna; 2 | import java.util.Arrays; 3 | 4 | 5 | public class MotifMulti extends Motif { 6 | 7 | public MotifMulti(String name_, Motif...args){ 8 | super(name_, args[0].length, args[0].center); 9 | commonLetters=Arrays.toString(args); 10 | sub=args; 11 | } 12 | 13 | 14 | @Override 15 | public boolean matchesExactly(byte[] source, int a){ 16 | for(int i=0; i0){System.out.println(Shared.BBMAP_VERSION_NAME);} 10 | } 11 | 12 | } 13 | -------------------------------------------------------------------------------- /current/driver/ClearRam.java: -------------------------------------------------------------------------------- 1 | package driver; 2 | 3 | import java.util.ArrayList; 4 | 5 | import fileIO.ReadWrite; 6 | 7 | public class ClearRam { 8 | 9 | public static void main(String[] args){ 10 | 11 | for(int i=0; i<2; i++){ 12 | 13 | try { 14 | System.gc(); 15 | attempt(); 16 | } catch(final java.lang.OutOfMemoryError e) { 17 | // e.printStackTrace(); 18 | System.err.println("Out of memory at "+((current*8)/(1<<20))+" MB"); 19 | } 20 | } 21 | } 22 | 23 | public static void attempt(){ 24 | ArrayList list=new ArrayList(8000); 25 | current=0; 26 | 27 | while(true){ 28 | long[] array=null; 29 | 30 | array=new long[1<<20]; 31 | list.add(array); 32 | 33 | // for(int i=0; i set=new HashSet(); 17 | 18 | String header=null; 19 | StringBuilder sequence=new StringBuilder(); 20 | for(String line=tf.nextLine(); line!=null; line=tf.nextLine()){ 21 | if(line.startsWith(">")){ 22 | if(sequence.length()>0){ 23 | if(sequence.length()>prefix){sequence.setLength(prefix);} 24 | String s=sequence.toString(); 25 | if(set.contains(s)){} 26 | else{ 27 | set.add(s); 28 | System.out.println(header+"\n"+s); 29 | } 30 | } 31 | sequence.setLength(0); 32 | header=line; 33 | }else{ 34 | sequence.append(line); 35 | } 36 | } 37 | 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /current/driver/Grep.java: -------------------------------------------------------------------------------- 1 | package driver; 2 | 3 | import fileIO.TextFile; 4 | 5 | public class Grep { 6 | 7 | public static void main(String[] args){ 8 | 9 | TextFile tf=new TextFile(args[0], true); 10 | 11 | String s=null; 12 | 13 | for(s=tf.nextLine(); s!=null; s=tf.nextLine()){ 14 | if(s.contains(args[1])){System.out.println(s);} 15 | } 16 | tf.close(); 17 | 18 | } 19 | 20 | } 21 | -------------------------------------------------------------------------------- /current/driver/LineCount.java: -------------------------------------------------------------------------------- 1 | package driver; 2 | 3 | import fileIO.TextFile; 4 | 5 | public class LineCount { 6 | 7 | public static void main(String[] args){ 8 | 9 | TextFile tf=new TextFile(args[0], false); 10 | long lines=tf.countLines(); 11 | tf.close(); 12 | System.out.println(args[0]+" has "+lines+" non-blank lines."); 13 | 14 | } 15 | 16 | } 17 | -------------------------------------------------------------------------------- /current/driver/LookAtID.java: -------------------------------------------------------------------------------- 1 | package driver; 2 | 3 | import java.util.Arrays; 4 | 5 | import fileIO.TextFile; 6 | import shared.Tools; 7 | import stream.SiteScoreR; 8 | 9 | /** 10 | * @author Brian Bushnell 11 | * @date Dec 3, 2012 12 | * 13 | */ 14 | public class LookAtID { 15 | 16 | public static void main(String[] args){ 17 | 18 | TextFile tf=new TextFile(args[0], true); 19 | 20 | long max=0; 21 | 22 | long line=0; 23 | 24 | for(String s=tf.nextLine(); s!=null; s=tf.nextLine()){ 25 | SiteScoreR[] array=SiteScoreR.fromTextArray(s); 26 | String[] split=s.split("\t"); 27 | for(int i=0; i=Integer.MAX_VALUE){ 32 | System.out.println("Found overflow ID "+ssr.numericID+" at line "+line); 33 | System.out.println("ssr="+ssr.toText()); 34 | System.out.println("raw="+s2); 35 | System.out.println("All:\n"+Arrays.toString(split)); 36 | System.out.println(); 37 | break; 38 | } 39 | } 40 | line++; 41 | } 42 | tf.close(); 43 | System.out.println("Max ID was "+max); 44 | 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /current/driver/PrintEnv.java: -------------------------------------------------------------------------------- 1 | package driver; 2 | 3 | import java.net.UnknownHostException; 4 | import java.util.ArrayList; 5 | import java.util.Date; 6 | import java.util.Map; 7 | 8 | import shared.Shared; 9 | 10 | /** 11 | * @author Brian Bushnell 12 | * @date Apr 4, 2013 13 | * 14 | */ 15 | public class PrintEnv { 16 | 17 | public static void main(String[] args){ 18 | 19 | Date d=new Date(); 20 | System.out.println("Time: "+d.getTime()+" = "+d+"\n"); 21 | 22 | Map env=System.getenv(); 23 | ArrayList keys=new ArrayList(env.keySet()); 24 | Shared.sort(keys); 25 | for(String s : keys){ 26 | System.out.println(s+"\t"+env.get(s)); 27 | } 28 | try { 29 | java.net.InetAddress localMachine = java.net.InetAddress.getLocalHost(); 30 | System.out.println("Hostname of local machine: " + localMachine.getHostName()); 31 | } catch (UnknownHostException e) { 32 | // TODO Auto-generated catch block 33 | e.printStackTrace(); 34 | } 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /current/driver/ProcessSpeed2.java: -------------------------------------------------------------------------------- 1 | package driver; 2 | 3 | import java.util.Locale; 4 | 5 | import fileIO.TextFile; 6 | 7 | /** 8 | * For generic data collation 9 | * @author Brian Bushnell 10 | * @date December 6, 2016 11 | * 12 | */ 13 | public class ProcessSpeed2 { 14 | 15 | public static void main(String[] args){ 16 | 17 | System.out.println("#real\tuser\tsys"); 18 | 19 | String fname=args[0].replace("in=", ""); 20 | TextFile tf=new TextFile(fname); 21 | for(String line=tf.nextLine(); line!=null; line=tf.nextLine()){ 22 | if(line.startsWith("real\t")){ 23 | String time=line.split("\t")[1]; 24 | double seconds=toSeconds(time); 25 | System.out.print(String.format(Locale.ROOT, "%.3f\t", seconds)); 26 | }else if(line.startsWith("user\t")){ 27 | String time=line.split("\t")[1]; 28 | double seconds=toSeconds(time); 29 | System.out.print(String.format(Locale.ROOT, "%.3f\t", seconds)); 30 | }else if(line.startsWith("sys\t")){ 31 | String time=line.split("\t")[1]; 32 | double seconds=toSeconds(time); 33 | System.out.print(String.format(Locale.ROOT, "%.3f\n", seconds)); 34 | } 35 | 36 | } 37 | 38 | } 39 | 40 | public static double toSeconds(String s){ 41 | s=s.replaceAll("s", ""); 42 | String[] split=s.split("m"); 43 | String seconds=split[1], minutes=split[0]; 44 | return 60*Double.parseDouble(minutes)+Double.parseDouble(seconds); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /current/driver/RenameRefseqFiles.java: -------------------------------------------------------------------------------- 1 | package driver; 2 | 3 | import java.io.File; 4 | 5 | import tax.TaxNode; 6 | import tax.TaxTree; 7 | 8 | public class RenameRefseqFiles { 9 | 10 | public static void main(String[] args){ 11 | TaxTree tree=TaxTree.loadTaxTree(TaxTree.defaultTreeFile(), System.err, false, false); 12 | for(TaxNode tn : tree.nodes){ 13 | if(tn!=null){ 14 | String dir=tree.toDir(tn, args[0]); 15 | String path=dir+tn.id+".fa.gz"; 16 | File f=new File(path); 17 | if(f.exists()){ 18 | f.renameTo(new File(dir+"refseq_"+tn.id+".fa.gz")); 19 | } 20 | } 21 | } 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /current/driver/TransposeTextFile.java: -------------------------------------------------------------------------------- 1 | package driver; 2 | 3 | import fileIO.ReadWrite; 4 | import fileIO.TextFile; 5 | 6 | public class TransposeTextFile { 7 | 8 | public static void main(String[] args){ 9 | 10 | int skipLines=args.length>1 ? Integer.parseInt(args[1]) : 0; 11 | 12 | int minChrom=1; 13 | int maxChrom=22; 14 | 15 | for(int i=minChrom; i<=maxChrom; i++){ 16 | if(args[0].contains("#")){ 17 | process(args[0].replace("#", ""+i), skipLines); 18 | }else{ 19 | process(args[0], skipLines); 20 | break; 21 | } 22 | } 23 | 24 | } 25 | 26 | public static void process(String fname, int skipLines){ 27 | TextFile tf=new TextFile(fname, false); 28 | String[] lines=tf.toStringLines(); 29 | tf.close(); 30 | String[][] lines2=TextFile.doublesplitWhitespace(lines, true); 31 | 32 | StringBuilder sb=new StringBuilder(4096); 33 | 34 | int columns=lines2[skipLines].length; 35 | 36 | for(int column=0; column list=new ArrayList(4096); 18 | 19 | for(s=nextLine(); s!=null; s=nextLine()){ 20 | list.add(s); 21 | } 22 | 23 | return list.toArray(new String[list.size()]); 24 | 25 | } 26 | 27 | @Override 28 | public String nextLine(){ 29 | String line=readLine(); 30 | while(line!=null && false){ 31 | line=readLine(); 32 | } 33 | return line; 34 | } 35 | 36 | 37 | } 38 | -------------------------------------------------------------------------------- /current/fileIO/OpenFile.java: -------------------------------------------------------------------------------- 1 | package fileIO; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | 6 | public class OpenFile { 7 | 8 | public static void main(String[] args){ 9 | InputStream is=ReadWrite.getRawInputStream(args[0], false); 10 | byte[] line=new byte[100]; 11 | try { 12 | int r=is.read(line, 0, 100); 13 | if(r>0){ 14 | System.err.println("'"+new String(line, 0, r)+"'"); 15 | } 16 | } catch (IOException e) { 17 | // TODO Auto-generated catch block 18 | e.printStackTrace(); 19 | } 20 | try { 21 | is.close(); 22 | } catch (IOException e) { 23 | // TODO Auto-generated catch block 24 | e.printStackTrace(); 25 | } 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /current/fun/Chance.java: -------------------------------------------------------------------------------- 1 | package fun; 2 | 3 | import java.util.Locale; 4 | import java.util.Random; 5 | 6 | import shared.Parse; 7 | import shared.Shared; 8 | 9 | public class Chance { 10 | 11 | //Probability of something with a chance of X happening at least Y times in Z chances 12 | public static void main(String[] args){ 13 | 14 | int draws; 15 | int minSuccess; 16 | float prob; 17 | long rounds; 18 | try { 19 | draws = Parse.parseIntKMG(args[0]); 20 | minSuccess = Parse.parseIntKMG(args[1]); 21 | prob = Float.parseFloat(args[2]); 22 | rounds = Parse.parseKMG(args[3]); 23 | } catch (Exception e) { 24 | System.err.println("Chance (int)draws (int)minSuccess (float)prob (int)rounds"); 25 | System.exit(1); 26 | throw new RuntimeException(); 27 | } 28 | 29 | Random randy=Shared.threadLocalRandom(); 30 | 31 | long passes=0; 32 | for(long i=0; i=minSuccess ? 1 : 0); 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /current/fun/ProbShared.java: -------------------------------------------------------------------------------- 1 | package fun; 2 | 3 | public class ProbShared { 4 | 5 | public static void main(String args[]){ 6 | int k=Integer.parseInt(args[0]); 7 | int len1=Integer.parseInt(args[1]); 8 | int len2=Integer.parseInt(args[2]); 9 | 10 | System.out.println("Cardinality 1: "+cardinality(k, len1)); 11 | System.out.println("Cardinality 2: "+cardinality(k, len2)); 12 | System.out.println("Probability: "+probIntersect(k, len1, len2)); 13 | 14 | } 15 | 16 | static int cardinality(int k, int seqLength){ 17 | double space=Math.pow(4, k); 18 | int kmers=seqLength-k+1; 19 | double unique=0; 20 | for(int i=0; i set=new HashSet(); 20 | for(int i=0; i set){ 27 | fillRandomSet(k, len2, set); 28 | final long space=(long)Math.pow(4, k); 29 | final int kmers=len1-k+1; 30 | for(int i=0; i set){ 38 | set.clear(); 39 | final long space=(long)Math.pow(4, k); 40 | final int kmers=len-k+1; 41 | for(int i=0; i tiles=new ArrayList(); 26 | 27 | public int lane; 28 | 29 | } 30 | -------------------------------------------------------------------------------- /current/hiseq/Tile.java: -------------------------------------------------------------------------------- 1 | package hiseq; 2 | 3 | import java.util.ArrayList; 4 | 5 | import structures.ByteBuilder; 6 | 7 | public class Tile { 8 | 9 | public Tile(int lane_, int tile_){ 10 | lane=lane_; 11 | tile=tile_; 12 | } 13 | 14 | public MicroTile get(int x, int y){ 15 | final int xindex=x/xSize, yindex=y/ySize; 16 | ArrayList ylist=getIndex(xindex); 17 | while(yindex>=ylist.size()){ylist.add(null);} 18 | MicroTile mt=ylist.get(yindex); 19 | if(mt==null){ 20 | mt=new MicroTile(lane, tile, xindex*xSize, (xindex+1)*xSize-1, yindex*ySize, (yindex+1)*ySize-1); 21 | ylist.set(yindex, mt); 22 | } 23 | assert(mt.contains(x, y)) : x+", "+y+", "+xindex+", "+yindex+", "+mt; 24 | return mt; 25 | } 26 | 27 | private ArrayList getIndex(int xindex){ 28 | while(xindex>=xlist.size()){xlist.add(new ArrayList());} 29 | ArrayList ylist=xlist.get(xindex); 30 | return ylist; 31 | } 32 | 33 | @Override 34 | public String toString(){ 35 | ByteBuilder bb=new ByteBuilder(); 36 | // sb.append(">lane="+lane+"\ttile="+tile); 37 | for(ArrayList ylist : xlist){ 38 | if(ylist!=null){ 39 | for(MicroTile mt : ylist){ 40 | if(mt!=null){ 41 | mt.toText(bb); 42 | } 43 | } 44 | } 45 | } 46 | return bb.toString(); 47 | } 48 | 49 | public ArrayList> xlist=new ArrayList>(); 50 | 51 | public int lane; 52 | public int tile; 53 | public static int xSize=500; 54 | public static int ySize=500; 55 | 56 | } 57 | -------------------------------------------------------------------------------- /current/hmm/ProteinSummary.java: -------------------------------------------------------------------------------- 1 | package hmm; 2 | 3 | import java.util.HashMap; 4 | 5 | public class ProteinSummary { 6 | 7 | public ProteinSummary(String name){ 8 | this.name=name; 9 | } 10 | 11 | /** Returns true if anything changed */ 12 | public boolean add(HMMSearchLine line){ 13 | Integer old=map.get(line.name); 14 | if(old==null || old map=new HashMap(); 27 | 28 | } 29 | -------------------------------------------------------------------------------- /current/jasper/Comparison.java: -------------------------------------------------------------------------------- 1 | package jasper; 2 | 3 | public class Comparison { 4 | 5 | /** 6 | * Object for storing sequence similarity values between two nodes 7 | * @param queryID_ int Node ID of the primary query sequence. 8 | * @param refID_ int Node ID of the reference sequence. 9 | * @param identity_ double similarity value between both nodes. 10 | */ 11 | public Comparison(int queryID_, int refID_, double identity_) { 12 | 13 | //Set the queryID as the input value 14 | this.queryID = queryID_; 15 | 16 | //Set the refID as the input value 17 | this.refID = refID_; 18 | 19 | //Set the identity as the input value 20 | this.identity = identity_; 21 | } 22 | /** 23 | * toString method to return the queryID, the refID and the similarity with some formatting. 24 | */ 25 | public String toString() { 26 | return "Query node ID = " + queryID + ", Reference node ID = " + refID + 27 | ", Similarity identity = " + identity; 28 | } 29 | 30 | /** 31 | * Current node/organism of focus. 32 | */ 33 | int queryID; 34 | 35 | /** 36 | * Node being compared to the queryID node. 37 | */ 38 | int refID; 39 | 40 | /** 41 | * Similarity between the queryID node and the refID node. 42 | */ 43 | double identity; 44 | 45 | } 46 | -------------------------------------------------------------------------------- /current/jasper/Organism.java: -------------------------------------------------------------------------------- 1 | package jasper; 2 | 3 | 4 | public class Organism { 5 | 6 | String orgName; 7 | int taxId; 8 | 9 | // This is the constructor of the class Organism 10 | public Organism(int id, String name) { 11 | this.taxId = id; 12 | this.orgName = name; 13 | } 14 | 15 | //public void addName(String addName) { 16 | // orgName = addName; 17 | // } 18 | 19 | 20 | public void printOrg() { 21 | System.out.println("ID:"+ taxId ); 22 | System.out.println("Name:" + orgName ); 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /current/jgi/Difference.java: -------------------------------------------------------------------------------- 1 | package jgi; 2 | 3 | import fileIO.TextFile; 4 | 5 | /** 6 | * @author Brian Bushnell 7 | * @date Oct 9, 2013 8 | * 9 | */ 10 | public class Difference { 11 | 12 | public static void main(String[] args){ 13 | 14 | TextFile tf1=new TextFile(args[0], false); 15 | TextFile tf2=new TextFile(args[1], false); 16 | 17 | String s1=tf1.readLine(false); 18 | String s2=tf2.readLine(false); 19 | 20 | int difs=0; 21 | int i=1; 22 | while(s1!=null && s2!=null){ 23 | if(!s1.equals(s2)){ 24 | difs++; 25 | System.err.println("Line "+i+":\n"+s1+"\n"+s2+"\n"); 26 | assert(difs<5); 27 | } 28 | i++; 29 | s1=tf1.readLine(false); 30 | s2=tf2.readLine(false); 31 | } 32 | 33 | assert(s1==null && s2==null) : "Line "+i+":\n"+s1+"\n"+s2+"\n"; 34 | 35 | tf1.close(); 36 | tf2.close(); 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /current/jgi/FindString.java: -------------------------------------------------------------------------------- 1 | package jgi; 2 | 3 | import fileIO.TextFile; 4 | 5 | /** 6 | * @author Brian Bushnell 7 | * @date Jun 18, 2013 8 | * 9 | */ 10 | public class FindString { 11 | 12 | public static void main(String[] args){ 13 | String fname=args[0]; 14 | TextFile tf=new TextFile(fname, true); 15 | for(String line=tf.nextLine(); line!=null; line=tf.nextLine()){ 16 | boolean b=false; 17 | for(int i=1; i{ 8 | 9 | public StringCount(String name_, int len_, long reads_, long bases_){ 10 | name=name_; 11 | length=len_; 12 | reads=reads_; 13 | bases=bases_; 14 | } 15 | @Override 16 | public final int compareTo(StringCount o){ 17 | if(bases!=o.bases){return o.bases>bases ? 1 : -1;} 18 | if(reads!=o.reads){return o.reads>reads ? 1 : -1;} 19 | return name.compareTo(o.name); 20 | } 21 | public final boolean equals(StringCount o){ 22 | return compareTo(o)==0; 23 | } 24 | @Override 25 | public final int hashCode(){ 26 | return name.hashCode(); 27 | } 28 | @Override 29 | public final String toString(){ 30 | return name+"\t"+length+"\t"+reads+"\t"+bases; 31 | } 32 | 33 | /*--------------------------------------------------------------*/ 34 | 35 | public final String name; 36 | public final int length; 37 | public final long reads, bases; 38 | } -------------------------------------------------------------------------------- /current/json/JsonLiteral.java: -------------------------------------------------------------------------------- 1 | package json; 2 | 3 | import java.util.Locale; 4 | 5 | public class JsonLiteral { 6 | 7 | public JsonLiteral(String s_){ 8 | s=s_; 9 | } 10 | 11 | public JsonLiteral(double value, int decimals){ 12 | s=String.format(Locale.ROOT, "%."+decimals+"f", value); 13 | } 14 | 15 | @Override 16 | public String toString(){return s;} 17 | 18 | private final String s; 19 | 20 | } 21 | -------------------------------------------------------------------------------- /current/kmer/AtomicShortArray.java: -------------------------------------------------------------------------------- 1 | package kmer; 2 | 3 | import java.util.concurrent.atomic.AtomicIntegerArray; 4 | 5 | /** 6 | * @author Brian Bushnell 7 | * @date May 14, 2015 8 | * 9 | */ 10 | public class AtomicShortArray { 11 | 12 | public AtomicShortArray(int length_){ 13 | assert(length_>=0); 14 | length=length_; 15 | intArray=new AtomicIntegerArray((length+1)/2); 16 | assert(false) : "TODO"; 17 | } 18 | 19 | // public short set(int position, short value){ 20 | // in 21 | // intArray 22 | // } 23 | 24 | private AtomicIntegerArray intArray; 25 | private final int length; 26 | 27 | } 28 | -------------------------------------------------------------------------------- /current/kmer/KmerBuffer.java: -------------------------------------------------------------------------------- 1 | package kmer; 2 | 3 | import structures.ByteBuilder; 4 | import structures.IntList; 5 | import structures.LongList; 6 | 7 | /** 8 | * @author Brian Bushnell 9 | * @date Jul 30, 2015 10 | * 11 | */ 12 | public class KmerBuffer { 13 | 14 | public KmerBuffer(int buflen, int k_, boolean initValues){ 15 | k=k_; 16 | kmers=new LongList(buflen); 17 | values=(initValues ? new IntList(buflen) : null); 18 | } 19 | 20 | public int add(long kmer){ 21 | assert(values==null); 22 | kmers.add(kmer); 23 | assert(values==null); 24 | return kmers.size; 25 | } 26 | 27 | public int addMulti(long kmer, int times){ 28 | assert(values==null); 29 | for(int i=0; i0){bb.append(',');} 54 | bb.appendKmer(kmers.get(i), k); 55 | } 56 | return bb.toString(); 57 | } 58 | 59 | private final int k; 60 | final LongList kmers; 61 | final IntList values; 62 | 63 | } 64 | -------------------------------------------------------------------------------- /current/kmer/Walker.java: -------------------------------------------------------------------------------- 1 | package kmer; 2 | 3 | public abstract class Walker { 4 | 5 | 6 | /** 7 | * Allows iteration through a hash map. 8 | * Concurrent modification is not recommended. 9 | */ 10 | public abstract boolean next(); 11 | 12 | /** Current object kmer (key) for kmer package */ 13 | public abstract long kmer(); 14 | 15 | /** Current value */ 16 | public abstract int value(); 17 | 18 | } 19 | -------------------------------------------------------------------------------- /current/pacbio/RemoveNFromChromosome.java: -------------------------------------------------------------------------------- 1 | package pacbio; 2 | 3 | import java.io.File; 4 | 5 | import dna.ChromosomeArray; 6 | import dna.Data; 7 | import dna.FastaToChromArrays2; 8 | import dna.Gene; 9 | import fileIO.ReadWrite; 10 | 11 | /** 12 | * @author Brian Bushnell 13 | * @date Jul 19, 2012 14 | * 15 | */ 16 | public class RemoveNFromChromosome { 17 | 18 | public static void main(String[] args){ 19 | int ingenome=Integer.parseInt(args[0]); 20 | int outgenome=Integer.parseInt(args[1]); 21 | int padding=Integer.parseInt(args[2]); 22 | 23 | String outRoot=Data.ROOT_GENOME+outgenome+"/"; 24 | File f=new File(outRoot); 25 | if(!f.exists()){ 26 | f.mkdirs(); 27 | } 28 | 29 | Data.setGenome(ingenome); 30 | for(int chrom=1; chrom<=Data.numChroms; chrom++){ 31 | ChromosomeArray cha=Data.getChromosome(chrom); 32 | Data.unload(chrom, true); 33 | ChromosomeArray chb=new ChromosomeArray(chrom, Gene.PLUS, 0, cha.countDefinedBases()+2*padding+1); 34 | chb.maxIndex=-1; 35 | for(int i=0; i dest_){ 8 | c=c_; 9 | dest=dest_; 10 | } 11 | 12 | void doWork(){ 13 | assert(!isPoison()); 14 | try { 15 | c.ssuIdentity(); 16 | }catch (Throwable t){ 17 | t.printStackTrace(); 18 | } 19 | put(); 20 | } 21 | 22 | private void put(){ 23 | boolean success=false; 24 | while(!success){ 25 | try { 26 | dest.put(c); 27 | success=true; 28 | } catch (InterruptedException e) { 29 | // TODO Auto-generated catch block 30 | e.printStackTrace(); 31 | } 32 | } 33 | } 34 | 35 | final boolean isPoison(){return c==null;} 36 | 37 | final Comparison c; 38 | final ArrayBlockingQueue dest; 39 | 40 | } -------------------------------------------------------------------------------- /current/sketch/SketchIdComparator.java: -------------------------------------------------------------------------------- 1 | package sketch; 2 | 3 | import java.util.Comparator; 4 | 5 | public class SketchIdComparator implements Comparator { 6 | 7 | private SketchIdComparator(){}; 8 | 9 | @Override 10 | public int compare(Sketch a, Sketch b) { 11 | return a.sketchID-b.sketchID; 12 | } 13 | 14 | public static final SketchIdComparator comparator=new SketchIdComparator(); 15 | 16 | } 17 | -------------------------------------------------------------------------------- /current/sketch/Whitelist.java: -------------------------------------------------------------------------------- 1 | package sketch; 2 | 3 | import kmer.AbstractKmerTable; 4 | import structures.LongList; 5 | 6 | public class Whitelist { 7 | 8 | public static void initialize(AbstractKmerTable[] tableArray){ 9 | assert(keySets==null); 10 | keySets=tableArray; 11 | } 12 | 13 | public static void apply(Sketch s){ 14 | assert(exists()); 15 | LongList list=new LongList(s.keys.length); 16 | for(long key : s.keys){ 17 | if(contains(key)){ 18 | list.add(key); 19 | } 20 | } 21 | if(list.size()!=s.keys.length){ 22 | s.keys=list.toArray(); 23 | } 24 | } 25 | 26 | /** Hashed value from an actual sketch */ 27 | public static boolean contains(long key){ 28 | if(keySets==null){return true;} 29 | int way=(int)(key%ways); 30 | return keySets[way].getValue(key)>0; 31 | } 32 | 33 | /** Raw hashed value which has not yet been subtracted from Long.MAX_VALUE */ 34 | public static boolean containsRaw(long key){ 35 | return contains(Long.MAX_VALUE-key); 36 | } 37 | 38 | public static boolean exists(){ 39 | return keySets!=null; 40 | } 41 | 42 | /** Hold codes. A code X such that X%WAYS=Y will be stored in keySets[Y] */ 43 | private static AbstractKmerTable[] keySets; 44 | private static final int ways=31; 45 | 46 | } 47 | -------------------------------------------------------------------------------- /current/sort/ReadComparator.java: -------------------------------------------------------------------------------- 1 | package sort; 2 | 3 | import java.util.Comparator; 4 | 5 | import stream.Read; 6 | 7 | /** 8 | * @author Brian Bushnell 9 | * @date Nov 9, 2016 10 | * 11 | */ 12 | public abstract class ReadComparator implements Comparator { 13 | 14 | public abstract void setAscending(boolean asc); 15 | 16 | } 17 | -------------------------------------------------------------------------------- /current/sort/ReadComparatorFlowcell.java: -------------------------------------------------------------------------------- 1 | package sort; 2 | 3 | import hiseq.FlowcellCoordinate; 4 | import stream.Read; 5 | 6 | /** 7 | * @author Brian Bushnell 8 | * @date Oct 27, 2014 9 | * 10 | */ 11 | 12 | public final class ReadComparatorFlowcell extends ReadComparator { 13 | 14 | private ReadComparatorFlowcell(){} 15 | 16 | @Override 17 | public int compare(Read r1, Read r2) { 18 | int x=compareInner(r1, r2); 19 | return ascending*x; 20 | } 21 | 22 | public int compareInner(Read r1, Read r2) { 23 | if(r1.id==null && r2.id==null){return r1.pairnum()-r2.pairnum();} 24 | if(r1.id==null){return -1;} 25 | if(r2.id==null){return 1;} 26 | 27 | FlowcellCoordinate fc1=tlc1.get(), fc2=tlc2.get(); 28 | if(fc1==null){ 29 | fc1=new FlowcellCoordinate(); 30 | fc2=new FlowcellCoordinate(); 31 | tlc1.set(fc1); 32 | tlc2.set(fc2); 33 | } 34 | fc1.setFrom(r1.id); 35 | fc2.setFrom(r2.id); 36 | 37 | int x=fc1.compareTo(fc2); 38 | if(x==0){return r1.pairnum()-r2.pairnum();} 39 | return x; 40 | } 41 | 42 | private int ascending=1; 43 | 44 | @Override 45 | public void setAscending(boolean asc){ 46 | ascending=(asc ? 1 : -1); 47 | } 48 | 49 | public ThreadLocal tlc1=new ThreadLocal(); 50 | public ThreadLocal tlc2=new ThreadLocal(); 51 | 52 | public static final ReadComparatorFlowcell comparator=new ReadComparatorFlowcell(); 53 | 54 | } 55 | -------------------------------------------------------------------------------- /current/sort/ReadComparatorID.java: -------------------------------------------------------------------------------- 1 | package sort; 2 | 3 | import stream.Read; 4 | 5 | /** 6 | * @author Brian Bushnell 7 | * @date Oct 27, 2014 8 | * 9 | */ 10 | 11 | public final class ReadComparatorID extends ReadComparator{ 12 | 13 | @Override 14 | public int compare(Read r1, Read r2) { 15 | return compareInner(r1, r2)*mult; 16 | } 17 | 18 | public static int compareInner(Read r1, Read r2) { 19 | if(r1.numericIDr2.numericID){return 1;} 21 | 22 | int p1=r1.pairnum(), p2=r2.pairnum(); 23 | if(p1p2){return 1;} 25 | 26 | return r1.id.compareTo(r2.id); 27 | } 28 | 29 | public static final ReadComparatorID comparator=new ReadComparatorID(); 30 | 31 | @Override 32 | public void setAscending(boolean asc) { 33 | mult=asc ? 1 : -1; 34 | } 35 | 36 | private int mult=1; 37 | 38 | } 39 | -------------------------------------------------------------------------------- /current/sort/ReadComparatorList.java: -------------------------------------------------------------------------------- 1 | package sort; 2 | 3 | import java.io.File; 4 | import java.util.HashMap; 5 | 6 | import fileIO.TextFile; 7 | import shared.Shared; 8 | import shared.Tools; 9 | import stream.Read; 10 | 11 | /** 12 | * @author Brian Bushnell 13 | * @date Oct 27, 2014 14 | * 15 | */ 16 | 17 | public final class ReadComparatorList extends ReadComparator { 18 | 19 | public ReadComparatorList(String fname){ 20 | String[] array; 21 | if(new File(fname).exists()){ 22 | array=TextFile.toStringLines(fname); 23 | }else{ 24 | array=fname.split(","); 25 | } 26 | int mapSize=(int)Tools.min(Shared.MAX_ARRAY_LEN, (array.length*3L)/2); 27 | map=new HashMap(mapSize); 28 | for(int i=0; i map; 60 | 61 | } 62 | -------------------------------------------------------------------------------- /current/sort/ReadComparatorName.java: -------------------------------------------------------------------------------- 1 | package sort; 2 | 3 | import stream.Read; 4 | 5 | /** 6 | * @author Brian Bushnell 7 | * @date Oct 27, 2014 8 | * 9 | */ 10 | 11 | public final class ReadComparatorName extends ReadComparator { 12 | 13 | private ReadComparatorName(){} 14 | 15 | @Override 16 | public int compare(Read r1, Read r2) { 17 | int x=compareInner(r1, r2); 18 | return ascending*x; 19 | } 20 | 21 | public static int compareInner(Read r1, Read r2) { 22 | 23 | if(r1.id==null && r2.id==null){return r1.pairnum()-r2.pairnum();} 24 | if(r1.id==null){return -1;} 25 | if(r2.id==null){return 1;} 26 | int x=r1.id.compareTo(r2.id); 27 | if(x==0){return r1.pairnum()-r2.pairnum();} 28 | return x; 29 | } 30 | 31 | private int ascending=1; 32 | 33 | @Override 34 | public void setAscending(boolean asc){ 35 | ascending=(asc ? 1 : -1); 36 | } 37 | 38 | public static final ReadComparatorName comparator=new ReadComparatorName(); 39 | 40 | } 41 | -------------------------------------------------------------------------------- /current/sort/ReadComparatorPosition.java: -------------------------------------------------------------------------------- 1 | package sort; 2 | 3 | import stream.Read; 4 | import stream.SamLine; 5 | import var2.ScafMap; 6 | 7 | /** 8 | * @author Brian Bushnell 9 | * @date November 20, 2016 10 | * 11 | */ 12 | 13 | public final class ReadComparatorPosition extends ReadComparator { 14 | 15 | private ReadComparatorPosition(){} 16 | 17 | @Override 18 | public int compare(Read r1, Read r2) { 19 | int x=compareInner(r1, r2); 20 | return ascending*x; 21 | } 22 | 23 | public static int compareInner(Read r1, Read r2) { 24 | int x=compareInner(r1.samline, r2.samline); 25 | if(x!=0){return x;} 26 | if(r1.id==null && r2.id==null){return 0;} 27 | if(r1.id==null){return -1;} 28 | if(r2.id==null){return 1;} 29 | return r1.id.compareTo(r2.id); 30 | } 31 | 32 | public static int compareInner(SamLine a, SamLine b) { 33 | if(a.scafnum<0){a.setScafnum(scafMap);} 34 | if(b.scafnum<0){b.setScafnum(scafMap);} 35 | if(a.scafnum!=b.scafnum){return a.scafnum-b.scafnum;} 36 | if(a.pos!=b.pos){return a.pos-b.pos;} 37 | if(a.strand()!=b.strand()){return a.strand()-b.strand();} 38 | if(a.pnext!=b.pnext){return a.pnext-b.pnext;} 39 | if(a.pairnum()!=b.pairnum()){return a.pairnum()-b.pairnum();} 40 | return 0; 41 | } 42 | 43 | private int ascending=1; 44 | 45 | @Override 46 | public void setAscending(boolean asc){ 47 | ascending=(asc ? 1 : -1); 48 | } 49 | 50 | public static final ReadComparatorPosition comparator=new ReadComparatorPosition(); 51 | public static ScafMap scafMap=null; 52 | 53 | } 54 | -------------------------------------------------------------------------------- /current/sort/ReadComparatorRandom.java: -------------------------------------------------------------------------------- 1 | package sort; 2 | 3 | import stream.Read; 4 | 5 | /** 6 | * @author Brian Bushnell 7 | * @date Mar 6, 2017 8 | * 9 | */ 10 | 11 | public final class ReadComparatorRandom extends ReadComparator{ 12 | 13 | @Override 14 | public int compare(Read r1, Read r2) { 15 | return compareInner(r1, r2)*mult; 16 | } 17 | 18 | public static int compareInner(Read r1, Read r2) { 19 | if(r1.randr2.rand){return 1;} 21 | return 0; 22 | } 23 | 24 | public static final ReadComparatorRandom comparator=new ReadComparatorRandom(); 25 | 26 | @Override 27 | public void setAscending(boolean asc) { 28 | mult=asc ? 1 : -1; 29 | } 30 | 31 | private int mult=1; 32 | 33 | } 34 | -------------------------------------------------------------------------------- /current/sort/ReadErrorComparator.java: -------------------------------------------------------------------------------- 1 | package sort; 2 | 3 | import java.util.Comparator; 4 | 5 | import stream.Read; 6 | 7 | /** 8 | * @author Brian Bushnell 9 | * @date May 30, 2013 10 | * 11 | */ 12 | public final class ReadErrorComparator implements Comparator{ 13 | 14 | @Override 15 | public int compare(Read r1, Read r2) { 16 | 17 | int a=(r1.errors+(r1.mate==null ? 0 : r1.mate.errors)); 18 | int b=(r2.errors+(r2.mate==null ? 0 : r2.mate.errors)); 19 | if(a!=b){return a-b;} 20 | 21 | a=(r1.length()+(r1.mate==null ? 0 : r1.mateLength())); 22 | b=(r2.length()+(r2.mate==null ? 0 : r2.mateLength())); 23 | if(a!=b){return b-a;} 24 | 25 | float a2=(r1.expectedErrors(true, 0)+(r1.mate==null ? 0 : r1.mate.expectedErrors(true, 0))); 26 | float b2=(r2.expectedErrors(true, 0)+(r2.mate==null ? 0 : r2.mate.expectedErrors(true, 0))); 27 | if(a2!=b2){return a2>b2 ? 1 : -1;} 28 | 29 | if(r1.numericIDr2.numericID){return 1;} 31 | 32 | if(!r1.id.equals(r2.id)){return r1.id.compareTo(r2.id);} 33 | return 0; 34 | } 35 | 36 | public static final ReadErrorComparator comparator=new ReadErrorComparator(); 37 | 38 | } 39 | -------------------------------------------------------------------------------- /current/sort/ReadLengthComparator.java: -------------------------------------------------------------------------------- 1 | package sort; 2 | 3 | import stream.Read; 4 | 5 | /** 6 | * Sorts longest reads first 7 | * @author Brian Bushnell 8 | * @date Jul 19, 2013 9 | * 10 | */ 11 | public final class ReadLengthComparator extends ReadComparator { 12 | 13 | private ReadLengthComparator(){} 14 | 15 | @Override 16 | public int compare(Read a, Read b) { 17 | int x=compareInner(a, b); 18 | if(x==0){x=compareInner(a.mate, b.mate);} 19 | if(x==0){x=a.id.compareTo(b.id);} 20 | if(x==0){x=a.numericID>b.numericID ? 1 : a.numericID { 7 | 8 | @SuppressWarnings("unchecked") 9 | public ConcurrentDepot(int bufSize, int numBufs){ 10 | bufferSize=bufSize; 11 | bufferCount=numBufs; 12 | 13 | lists=new ArrayList[numBufs]; 14 | empty=new ArrayBlockingQueue>(numBufs+1, fair); 15 | full=new ArrayBlockingQueue>(numBufs+1, fair); 16 | 17 | for(int i=0; i(bufSize); 19 | empty.add(lists[i]); 20 | } 21 | 22 | } 23 | 24 | 25 | public final ArrayBlockingQueue> empty; 26 | public final ArrayBlockingQueue> full; 27 | 28 | public final int bufferSize; 29 | public final int bufferCount; 30 | 31 | public static boolean fair=false; 32 | 33 | private final ArrayList[] lists; 34 | 35 | } 36 | -------------------------------------------------------------------------------- /current/stream/ConcurrentReadListDepot.java: -------------------------------------------------------------------------------- 1 | package stream; 2 | 3 | import java.util.ArrayList; 4 | import java.util.concurrent.ArrayBlockingQueue; 5 | 6 | public class ConcurrentReadListDepot { 7 | 8 | 9 | 10 | public ConcurrentReadListDepot(int bufSize, int numBufs){ 11 | bufferSize=bufSize; 12 | bufferCount=numBufs; 13 | 14 | lists=new ArrayList[numBufs]; 15 | empty=new ArrayBlockingQueue>(numBufs+1); 16 | full=new ArrayBlockingQueue>(numBufs+1); 17 | 18 | for(int i=0; i(bufSize); 20 | empty.add(lists[i]); 21 | } 22 | 23 | } 24 | 25 | 26 | public final ArrayBlockingQueue> empty; 27 | public final ArrayBlockingQueue> full; 28 | 29 | public final int bufferSize; 30 | public final int bufferCount; 31 | 32 | 33 | private final ArrayList[] lists; 34 | 35 | } 36 | -------------------------------------------------------------------------------- /current/stream/NullOutputStream.java: -------------------------------------------------------------------------------- 1 | package stream; 2 | 3 | import java.io.IOException; 4 | import java.io.OutputStream; 5 | 6 | /** Writes to nowhere. 7 | * Courtesy of https://stackoverflow.com/a/692580 and https://stackoverflow.com/a/691835 */ 8 | public class NullOutputStream extends OutputStream { 9 | 10 | @Override 11 | public void write(int b) throws IOException {} 12 | 13 | @Override 14 | public void write(byte[] b) throws IOException {} 15 | 16 | @Override 17 | public void write(byte[] b, int off, int len) throws IOException {} 18 | 19 | } 20 | -------------------------------------------------------------------------------- /current/structures/IntListCompressor.java: -------------------------------------------------------------------------------- 1 | package structures; 2 | 3 | public final class IntListCompressor { 4 | 5 | public void add(int value){ 6 | list.add(value); 7 | if(list.freeSpace()==0 && lastCompression<0.75f*list.size()){ 8 | sortAndShrink(); 9 | } 10 | } 11 | 12 | public void sortAndShrink(){ 13 | if(lastCompression>=list.size()){return;} 14 | list.sort(); 15 | list.shrinkToUnique(); 16 | lastCompression=list.size(); 17 | } 18 | 19 | public IntList list=new IntList(4); 20 | private int lastCompression=0; 21 | 22 | } 23 | -------------------------------------------------------------------------------- /current/structures/LongHeapSetInterface.java: -------------------------------------------------------------------------------- 1 | package structures; 2 | 3 | public interface LongHeapSetInterface { 4 | 5 | public boolean add(long key); 6 | 7 | public int increment(long key, int incr); 8 | 9 | public void clear(); 10 | 11 | public int size(); 12 | 13 | public int capacity(); 14 | 15 | public boolean hasRoom(); 16 | 17 | public LongHeap heap(); 18 | 19 | public long peek(); 20 | 21 | public boolean contains(long key); 22 | 23 | } 24 | 25 | -------------------------------------------------------------------------------- /current/structures/LongPair.java: -------------------------------------------------------------------------------- 1 | package structures; 2 | 3 | public class LongPair implements Comparable{ 4 | 5 | public LongPair(long a_, long b_){ 6 | a=a_; 7 | b=b_; 8 | } 9 | 10 | public LongPair(){} 11 | 12 | @Override 13 | public int compareTo(LongPair other) { 14 | if(a!=other.a){return a>other.a ? 1 : -1;} 15 | return b>other.b ? 1 : b { 4 | 5 | public StringNum(String s_, long n_){ 6 | s=s_; 7 | n=n_; 8 | } 9 | 10 | public long increment(){ 11 | return (n=n+1); 12 | } 13 | 14 | public long increment(long x){ 15 | return (n=n+x); 16 | } 17 | 18 | public void add(StringNum sn) { 19 | n+=sn.n; 20 | } 21 | 22 | /* (non-Javadoc) 23 | * @see java.lang.Comparable#compareTo(java.lang.Object) 24 | */ 25 | @Override 26 | public int compareTo(StringNum o) { 27 | if(no.n){return 1;} 29 | return s.compareTo(o.s); 30 | } 31 | 32 | @Override 33 | public String toString(){ 34 | return s+"\t"+n; 35 | } 36 | 37 | @Override 38 | public int hashCode(){ 39 | return ((int)(n&Integer.MAX_VALUE))^(s.hashCode()); 40 | } 41 | 42 | @Override 43 | public boolean equals(Object other){ 44 | return equals((StringNum)other); 45 | } 46 | 47 | public boolean equals(StringNum other){ 48 | if(other==null){return false;} 49 | if(n!=other.n){return false;} 50 | if(s==other.s){return true;} 51 | if(s==null || other.s==null){return false;} 52 | return s.equals(other.s); 53 | } 54 | 55 | /*--------------------------------------------------------------*/ 56 | 57 | public final String s; 58 | public long n; 59 | 60 | } 61 | -------------------------------------------------------------------------------- /current/structures/StringPair.java: -------------------------------------------------------------------------------- 1 | package structures; 2 | 3 | public class StringPair { 4 | 5 | public StringPair(String a_, String b_){ 6 | a=a_; 7 | b=b_; 8 | } 9 | 10 | @Override 11 | public String toString(){return "("+a+", "+b+")";} 12 | 13 | public String a; 14 | public String b; 15 | 16 | } 17 | -------------------------------------------------------------------------------- /current/tax/TaxApp.java: -------------------------------------------------------------------------------- 1 | package tax; 2 | 3 | import javax.swing.JButton; 4 | import javax.swing.JFrame; 5 | import javax.swing.JOptionPane; 6 | 7 | public class TaxApp { 8 | 9 | public static void main(final String[] args) { 10 | final JFrame parent = new JFrame(); 11 | JButton button = new JButton(); 12 | 13 | button.setText("Button text"); 14 | parent.add(button); 15 | parent.pack(); 16 | parent.setVisible(true); 17 | 18 | button.addActionListener(new java.awt.event.ActionListener() { 19 | @Override 20 | public void actionPerformed(java.awt.event.ActionEvent evt) { 21 | String name = JOptionPane.showInputDialog(parent, 22 | "Prompt", null); 23 | } 24 | }); 25 | } 26 | 27 | } -------------------------------------------------------------------------------- /current/template/Accumulator.java: -------------------------------------------------------------------------------- 1 | package template; 2 | 3 | /** 4 | * Interface for accumulating statistics captured by threads. 5 | * 6 | * @author Brian Bushnell 7 | * @date November 19, 2015 8 | * 9 | * @param 10 | */ 11 | public interface Accumulator { 12 | 13 | /** Accumulate personal variables */ 14 | public void accumulate(T t); 15 | 16 | /** True if it finished successfully */ 17 | public boolean success(); 18 | 19 | } 20 | -------------------------------------------------------------------------------- /current/template/DoWorker.java: -------------------------------------------------------------------------------- 1 | package template; 2 | 3 | public interface DoWorker { 4 | 5 | public void doWork(); 6 | 7 | } 8 | -------------------------------------------------------------------------------- /current/template/ThreadPoolJob.java: -------------------------------------------------------------------------------- 1 | package template; 2 | 3 | import java.util.concurrent.ArrayBlockingQueue; 4 | 5 | import shared.KillSwitch; 6 | 7 | /** 8 | * 9 | * @author Brian Bushnell 10 | * @date August 26, 2019 11 | * 12 | */ 13 | public class ThreadPoolJob { 14 | 15 | public ThreadPoolJob(X x_, ArrayBlockingQueue dest_){ 16 | x=x_; 17 | dest=dest_; 18 | } 19 | 20 | /** Process a job */ 21 | final void doJob(){ 22 | result=doWork(); 23 | cleanup(); 24 | } 25 | 26 | /** Do whatever specific work needs to be done for this job */ 27 | public Y doWork(){ 28 | KillSwitch.kill("Unimplemented Method"); 29 | return null; 30 | } 31 | 32 | /** Retire the job to the destination queue */ 33 | final void cleanup(){ 34 | boolean success=false; 35 | while(!success) { 36 | try { 37 | dest.put(x); 38 | success=true; 39 | } catch (InterruptedException e) { 40 | // TODO Auto-generated catch block 41 | e.printStackTrace(); 42 | } 43 | } 44 | } 45 | 46 | final boolean isPoison(){return x==null;} 47 | 48 | public final X x; 49 | final ArrayBlockingQueue dest; 50 | public Y result; 51 | 52 | } 53 | -------------------------------------------------------------------------------- /current/ukmer/WalkerU.java: -------------------------------------------------------------------------------- 1 | package ukmer; 2 | 3 | public abstract class WalkerU { 4 | 5 | /** 6 | * Allows iteration through a hash map. 7 | * Concurrent modification is not recommended. 8 | */ 9 | public abstract boolean next(); 10 | 11 | /** Current object kmer (key) for ukmer package */ 12 | public abstract Kmer kmer(); 13 | 14 | /** Current value */ 15 | public abstract int value(); 16 | 17 | } 18 | -------------------------------------------------------------------------------- /docs/Legal.txt: -------------------------------------------------------------------------------- 1 | BBTools Copyright (c) 2014, The Regents of the University of California, through Lawrence Berkeley National Laboratory (subject to receipt of any required approvals from the U.S. Dept. of Energy). All rights reserved. 2 | 3 | 4 | 5 | If you have questions about your rights to use or distribute this software, please contact Technology Transfer and IP Management at TTD@lbl.gov referring to " BB Tools (LBNL Ref 2014-042)." 6 | 7 | 8 | 9 | NOTICE. This software was developed under funding from the U.S. Department of Energy. As such, the U.S. Government has been granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, prepare derivative works, and perform publicly and display publicly. Beginning five (5) years after the date permission to assert copyright is obtained from the U.S. Department of Energy, and subject to any subsequent five (5) year renewals, the U.S. Government is granted for itself and others acting on its behalf a paid-up, nonexclusive, irrevocable, worldwide license in the Software to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. 10 | -------------------------------------------------------------------------------- /docs/Legal_Illumina.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/docs/Legal_Illumina.txt -------------------------------------------------------------------------------- /docs/RestartingServers.txt: -------------------------------------------------------------------------------- 1 | These instructions are for JGI internal use. 2 | 3 | If a taxonomy server or Sketch server dies: 4 | 5 | Look at /global/projectb/sandbox/gaag/bbtools/server/start*Server.sh, which includes: 6 | 7 | startTaxServer.sh 8 | startRefseqServer.sh 9 | startProteinServer.sh 10 | startNtServer.sh 11 | startSilvaServer.sh 12 | 13 | Each file indicates where to run the script. So, for example, startTaxServer.sh says: #Run this on gpweb25 14 | ssh to the indicated machine, change directory to /global/projectb/sandbox/gaag/bbtools/server/, and run startTaxServer.sh. 15 | It does not matter which user you are logged in as. But if you run into permission problems, make a copy of the script somewhere else and run it there. 16 | 17 | To update taxonomy, go to /global/projectb/sandbox/gaag/bbtools/tax 18 | 1) Create a new directory 19 | 2) Copy the old shellscripts from "latest" 20 | 3) Execute the shellscripts 21 | 4) Point the "latest" symlink to the new directory after execution finishes 22 | 5) Restart the taxonomy server 23 | 24 | For example: 25 | cd /global/projectb/sandbox/gaag/bbtools/tax/ 26 | mkdir feb13_2019 27 | cp latest/*.sh feb13_2019 28 | cd feb13_2019 29 | sh fetchOuter.sh 1>fetch.o 2>&1 & 30 | #(wait until it finishes; it will create a file called "finished" when complete) 31 | cd .. 32 | ln -sfn feb13_2019 latest 33 | #(restart the taxonomy server) 34 | -------------------------------------------------------------------------------- /docs/UsageGuide.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/docs/UsageGuide.txt -------------------------------------------------------------------------------- /docs/citation.txt: -------------------------------------------------------------------------------- 1 | Citation: 2 | 3 | All tools in the BBTools package are free to use. If you use BBTools in work leading to a publication, please cite it! 4 | BBMerge has been published in PloS One: http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0185056 5 | 6 | Please cite any of the unpublished tools using the Sourceforge website, something like this: 7 | BBMap - Bushnell B. - sourceforge.net/projects/bbmap/ 8 | -------------------------------------------------------------------------------- /docs/compiling.txt: -------------------------------------------------------------------------------- 1 | To install BBTools, unzip it (typically with the command "tar -xzf BBMap_38.20.tar.gz") and it will run as long as Java is installed. 2 | 3 | BBTools has 4 components: 4 | 1) Java code. This is the bulk of the code and all that is strictly necessary. It is already compiled for Java 6+ and does not need recompiling. Most BBTools can run in Java 6, and all of them can run in Java 7 or higher. No components will work with Java versions below 6. 5 | 2) Bash shellscripts. These are present to make it easier to invoke the Java code (by automatically detecting and setting memory limits, for example). If you are not using Bash and Linux, then the shellscripts probably won't work, but you can still invoke the Java code from the command line. Shellscripts are interpreted and do not require compiling. They should work in Windows 10 now, in addition to Linux and MacOS. 6 | 3) C code. This was developed by Jonathan Rood to accelerate BBMap, BBMerge, and Dedupe, but is currently disabled. 7 | -------------------------------------------------------------------------------- /docs/git.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/docs/git.txt -------------------------------------------------------------------------------- /docs/guides/A_SampleGuide.txt: -------------------------------------------------------------------------------- 1 | A_Sample Guide 2 | Written by Brian Bushnell 3 | Last updated December 22, 2015 4 | 5 | A_Sample and A_SampleMT are sample BBTools that do some kind of manipulation on reads. They are designed for rapid creation of custom high-performance tools, following BBTools best practices and taking advantage of its existing infrastructure such as read input streams. A_Sample is singlethreaded, and A_SampleMT is multithreaded. Neither currently does anything to the reads (they act as a "null transform"); but there is an empty function, "processReadPair", into which new code can be placed to modify or filter Read objects. 6 | 7 | 8 | *Notes* 9 | 10 | 11 | Java Code: 12 | 13 | These programs are not for end-users, only developers. The general idea is that you make a copy of A_Sample or A_SampleMT, rename it, and change processReadPair function. Usually, you will need to add any additional fields at the bottom and additional parsing terms at the top. Be sure that a new parse keyword you add does not conflict with one already in the Parser class. 14 | 15 | 16 | Shellscript: 17 | 18 | After the java code is done, copy a_sample_mt.sh and rename that, too. Then modify the shellscript's usage function at the top, the execution function at the bottom (where the java command is called, e.g. "java $EA $z -cp $CP jgi.A_SampleMT $@"), and the default amounts of memory (default of variable "z" and the line with "freeRam"). 19 | -------------------------------------------------------------------------------- /docs/guides/AddAdaptersGuide.txt: -------------------------------------------------------------------------------- 1 | AddAdapters Guide 2 | Written by Brian Bushnell 3 | Last updated December 22, 2015 4 | 5 | AddAdapters is designed for grading the performance of adapter-trimming tools. It can add adapters to reads, and annotate the reads with their correct post-trimming length; and it can be run on trimmed reads, to calculate the rates of correct and incorrect trimming. However, it does not understand insert size, so for adding adapters to paired reads, it's better to use RandomReads. As such, this is deprecated for paired reads. 6 | 7 | 8 | *Usage Examples* 9 | 10 | 11 | To add adapters to reads: 12 | addadapters.sh in=a.fq out=b.fq adapters=adapters.fa 13 | 14 | 15 | To grade trimmed reads: 16 | addadapters.sh in=trimmed.fq grade 17 | 18 | 19 | To use RandomReads instead, to add adapters in the correct location according to insert size: 20 | randomreads.sh ref=ref.fa out=reads.fq len=150 paired reads=100k mininsert=50 maxinsert=350 fragadapter1=ACTG fragadapter2=ACTG 21 | rename.sh in=reads.fq out=renamed.fq renamebytrim interleaved 22 | 23 | The result of this will still be named correctly for grading by addadapters. "ACTG" would normally be a much longer adapter sequence. 24 | -------------------------------------------------------------------------------- /docs/guides/BBSketchGuide.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/docs/guides/BBSketchGuide.txt -------------------------------------------------------------------------------- /docs/guides/FilterByTileGuide.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/docs/guides/FilterByTileGuide.txt -------------------------------------------------------------------------------- /docs/guides/SplitNexteraGuide.txt: -------------------------------------------------------------------------------- 1 | SplitNextera Guide 2 | Written by Brian Bushnell 3 | Last updated December 22, 2015 4 | 5 | SplitNextera splits Nextera LMP libraries into subsets based on linker orientation. It is designed strictly for Nextera LMP (long-mate-pair) reads, not for normal libraries using a Nextera kit. Nextera LMP libraries must be split prior to further processing; they are not usable raw. Adapter-trimming should still be done on Nextera LMP libraries prior to splitting. 6 | 7 | 8 | *Usage Examples* 9 | 10 | 11 | Processing a Nextera LMP library: 12 | bbduk.sh in=reads.fq out=trimmed.fq ref=adapters.fa ktrim=r k=23 mink=11 hdist=1 tpe tbo 13 | splitnextera.sh in=trimmed.fq out=lmp.fq outf=fragments.fq outu=unknown.fq outs=singletons.fq mask 14 | 15 | This will produce 4 output files - long-mate pairs, fragments (short pairs), singletons, and unknown. The unknown are typically long-mate pairs, but the linker was not found so they might be short pairs. The "mask" flag tells the program to look for the junction. It's possible to alternately look for the junction with BBDuk, instead (see below). 16 | 17 | 18 | Processing a Nextera LMP library, but finding the junctions with BBDuk: 19 | bbduk.sh in=reads.fq out=trimmed.fq ref=adapters.fa ktrim=r k=23 mink=11 hdist=1 tpe tbo 20 | bbduk.sh in=trimmed.fq out=stdout.fq ktmask=J k=19 hdist=1 mink=11 hdist2=0 literal=CTGTCTCTTATACACATCTAGATGTGTATAAGAGACAG | splitnextera.sh in=stdin.fq out=lmp.fq outf=fragments.fq outu=unknown.fq outs=singletons.fq 21 | 22 | This is somewhat faster but will yield the same output. 23 | -------------------------------------------------------------------------------- /docs/guides/StatsGuide.txt: -------------------------------------------------------------------------------- 1 | Stats Guide 2 | Written by Brian Bushnell 3 | Last updated December 22, 2015 4 | 5 | Stats is designed to generate basic assembly statistics such as scaffold count, N50, L50, GC content, gap percent, etc. It can also generate per-sequence GC-content information. The reason for the existence of stats is to replace prior tools that had similar function, but could not scale to large metagenomes; Stats is capable of processing an assembly of practically unbounded size, with sequences of practically unbounded length. And it does this rapidly, in a small amount of memory. Stats can also estimate the memory requirements of BBMap for a given assembly and kmer length. 6 | 7 | 8 | *Notes* 9 | 10 | 11 | Memory: 12 | 13 | Stats uses 120MB of RAM regardless of the assembly size. 14 | 15 | 16 | Threads: 17 | 18 | Stats is singlethreaded; it does not do garbage-collection or even use independent threads for I/O streams, unlike other BBTools. 19 | 20 | 21 | *Usage Examples* 22 | 23 | 24 | To get stats on an assembly: 25 | stats.sh in=contigs.fa 26 | 27 | 28 | To compare multiple assemblies: 29 | statswrapper.sh in=a.fa,b.fa,c.fa format=6 30 | 31 | 32 | To print GC and length information per sequence: 33 | stats.sh in=contigs.fa gc=gc.txt gcformat=4 34 | -------------------------------------------------------------------------------- /docs/readme_config.txt: -------------------------------------------------------------------------------- 1 | BBTools Config File Readme 2 | Written by Brian Bushnell 3 | Last updated May 12, 2015 4 | 5 | A config file is a text file with a set of parameters that will be added to the command line. 6 | The format is one parameter per line, with the # symbol indicating comments. 7 | To use a config file, use the config=file flag. For example, take BBDuk: 8 | 9 | bbduk.sh in=reads.fq out=trimmed.fq ref=ref.fa k=23 mink=11 hdist=1 tbo tpe 10 | 11 | That is equivalent to: 12 | 13 | bbduk.sh in=reads.fq out=trimmed.fq ref=ref.fa config=trimadapters.txt 14 | ...if trimadapters.txt contained these lines: 15 | k=23 16 | mink=11 17 | hdist=1 18 | tbo 19 | tpe 20 | 21 | 22 | Any parameter placed AFTER the config file will override the same parameter if it is in the config file. 23 | For example, in this case k=20 will be used: 24 | bbduk.sh in=reads.fq out=trimmed.fq ref=ref.fa config=trimadapters.txt k=20 25 | 26 | But in this case, k=23 will be used, from the config file: 27 | bbduk.sh in=reads.fq out=trimmed.fq ref=ref.fa k=20 config=trimadapters.txt 28 | 29 | What are config files for? Well, mainly, to overcome difficulties like whitespace in file paths, or command lines that are too long. 30 | There are some example config files in bbmap/config/. They are not used unless you specifically tell a program to use them. 31 | -------------------------------------------------------------------------------- /jni/BBMergeOverlapper.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/jni/BBMergeOverlapper.o -------------------------------------------------------------------------------- /jni/BandedAlignerJNI.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/jni/BandedAlignerJNI.o -------------------------------------------------------------------------------- /jni/IceCreamAlignerJNI.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/jni/IceCreamAlignerJNI.o -------------------------------------------------------------------------------- /jni/MultiStateAligner11tsJNI.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/jni/MultiStateAligner11tsJNI.o -------------------------------------------------------------------------------- /jni/icecream_IceCreamAlignerJNI.h: -------------------------------------------------------------------------------- 1 | /* DO NOT EDIT THIS FILE - it is machine generated */ 2 | #include 3 | /* Header for class icecream_IceCreamAlignerJNI */ 4 | 5 | #ifndef _Included_icecream_IceCreamAlignerJNI 6 | #define _Included_icecream_IceCreamAlignerJNI 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | /* 11 | * Class: icecream_IceCreamAlignerJNI 12 | * Method: alignForwardJNI 13 | * Signature: ([I[I[IIIIF)V 14 | */ 15 | JNIEXPORT void JNICALL Java_icecream_IceCreamAlignerJNI_alignForwardJNI 16 | (JNIEnv *, jclass, jintArray, jintArray, jintArray, jint, jint, jint, jfloat); 17 | 18 | /* 19 | * Class: icecream_IceCreamAlignerJNI 20 | * Method: alignForward16JNI 21 | * Signature: ([S[S[ISSSF)V 22 | */ 23 | JNIEXPORT void JNICALL Java_icecream_IceCreamAlignerJNI_alignForward16JNI 24 | (JNIEnv *, jclass, jshortArray, jshortArray, jintArray, jshort, jshort, jshort, jfloat); 25 | 26 | /* 27 | * Class: icecream_IceCreamAlignerJNI 28 | * Method: alignForwardShortJNI 29 | * Signature: ([I[I[III)V 30 | */ 31 | JNIEXPORT void JNICALL Java_icecream_IceCreamAlignerJNI_alignForwardShortJNI 32 | (JNIEnv *, jclass, jintArray, jintArray, jintArray, jint, jint); 33 | 34 | /* 35 | * Class: icecream_IceCreamAlignerJNI 36 | * Method: alignForwardShort16JNI 37 | * Signature: ([S[S[ISS)V 38 | */ 39 | JNIEXPORT void JNICALL Java_icecream_IceCreamAlignerJNI_alignForwardShort16JNI 40 | (JNIEnv *, jclass, jshortArray, jshortArray, jintArray, jshort, jshort); 41 | 42 | #ifdef __cplusplus 43 | } 44 | #endif 45 | #endif 46 | -------------------------------------------------------------------------------- /jni/libbbtoolsjni.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/jni/libbbtoolsjni.dylib -------------------------------------------------------------------------------- /jni/libbbtoolsjni.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/jni/libbbtoolsjni.so -------------------------------------------------------------------------------- /jni/makefile.linux: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | INCS=-I"$(JAVA_HOME)/include" -I"$(JAVA_HOME)/include/linux" 3 | CFLAGS=-O3 -Wall -std=c99 -fPIC 4 | LDFLAGS=-O3 -Wall -std=c99 -fPIC -shared 5 | OBJ=BandedAlignerJNI.o MultiStateAligner11tsJNI.o BBMergeOverlapper.o IceCreamAlignerJNI.o 6 | 7 | %.o: %.c 8 | $(CC) $(CFLAGS) $(INCS) -c -o $@ $< 9 | 10 | libbbtoolsjni.so: $(OBJ) 11 | $(CC) -o $@ $^ $(LDFLAGS) 12 | 13 | .PHONY: clean 14 | 15 | clean: 16 | rm -f *.o *~ core libbbtoolsjni.so 17 | -------------------------------------------------------------------------------- /jni/makefile.osx: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | INCS=-I"$(JAVA_HOME)/include" -I"$(JAVA_HOME)/include/darwin" 3 | CFLAGS=-O3 -Wall -std=c99 -fPIC 4 | LDFLAGS=-O3 -Wall -std=c99 -fPIC -dynamiclib 5 | OBJ=BandedAlignerJNI.o MultiStateAligner11tsJNI.o BBMergeOverlapper.o IceCreamAlignerJNI.o 6 | 7 | %.o: %.c 8 | $(CC) $(CFLAGS) $(INCS) -c -o $@ $< 9 | 10 | libbbtoolsjni.dylib: $(OBJ) 11 | $(CC) -o $@ $^ $(LDFLAGS) 12 | 13 | .PHONY: clean 14 | 15 | clean: 16 | rm -f *.o *~ core libbbtoolsjni.dylib 17 | -------------------------------------------------------------------------------- /jni/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rm I*.o *.so ; make -f makefile.linux 3 | -------------------------------------------------------------------------------- /pipelines/covid/makeSummary.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## Written by Brian Bushnell 4 | ## Last modified April 11, 2020 5 | ## Description: Summarizes all the runs of "processCorona.sh" in this directory. 6 | ## Then tars them if you want to send them somewhere. 7 | ## Should be run only after all samples are processed individually. 8 | 9 | ## Set minimum coverage for variant calls. 10 | MINCOV=5 11 | 12 | ## Specify the viral reference file. 13 | ## NC_045512.fasta contains the SARS-CoV-2 genome, equivalent to bbmap/resources/Covid19_ref.fa 14 | REF="NC_045512.fasta" 15 | 16 | ## Call variants in multisample mode to solve things. 17 | ## This reports the genotype of *all* samples at any position at which a variant is called in *any* sample. 18 | callvariants.sh *_deduped_trimclip.sam.gz ref="$REF" multisample out=allVars.vcf ow -Xmx4g usebias=f strandedcov minstrandratio=0 maf=0.6 minreads="$MINCOV" mincov="$MINCOV" minedistmax=30 minedist=16 flagnearby 19 | 20 | ## Make a summary of coverage at varous depth cutoffs for all libraries. 21 | summarizecoverage.sh *basecov_border5.txt out=coverageSummary.txt 22 | 23 | mkdir output 24 | cp *.sh output 25 | cp *.bam* output 26 | cp *.txt output 27 | cp *.vcf output 28 | cp *genome*.fa output 29 | 30 | rm results.tar 31 | tar -cf results.tar output 32 | -------------------------------------------------------------------------------- /pipelines/covid/processCoronaWrapper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## Written by Brian Bushnell 4 | ## Last modified April 11, 2020 5 | ## Description: Outer wrapper for processing a bunch of Covid samples in a directory. 6 | ## This is just a template that needs to be modified before use, according to the input file names. 7 | 8 | echo "This template must be modifed before use according to your file names." 9 | exit 10 | 11 | ## Optionally, get rid of old files first, if the pipeline is being rerun. 12 | rm *.sam.gz *.bam *.bai *.txt *_genome.fa *_adapters.fa *.vcf *.vcf.gz 13 | 14 | ## Generate quality-score calibration matrices. 15 | ## This only needs to be run on one sample. 16 | sh ./recal.sh Sample1 17 | 18 | ## Add a line like this for each interleaved PE file named, for example, Sample1.fq.gz 19 | ## Alternately you could put some kind of loop here, depending on your naming convention. 20 | sh processCorona.sh Sample1 21 | sh processCorona.sh Sample2 22 | ## etc. 23 | 24 | ## Summarize the output if there are multiple libraries (optional). 25 | sh makeSummary.sh 1>makeSummary.o 2>&1 26 | -------------------------------------------------------------------------------- /pipelines/cutRna.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | #Written by Brian Bushnell 5 | #Last updated August 8, 2019 6 | 7 | #This script is designed to cut out the ribosomal and tRNAs from annotated genomes, 8 | #which have paired fna.gz and gff.gz files. The output is then used to create 9 | #sets of kmers such that every sequence will contain at least one of the kmers in the set. 10 | #In other words, a sequence not sharing a kmer with the set is probably not 11 | #a sequence of that type. These are used by CallGenes, but Silva is used for the 12 | #ssu and lsu (16S and 23S). 13 | 14 | 15 | cutgff.sh fastawrap=10k types=rRNA out=23S.fa attributes=23S */*.fna.gz 16 | cutgff.sh fastawrap=10k types=rRNA out=16S.fa attributes=16S */*.fna.gz 17 | cutgff.sh fastawrap=10k types=rRNA out=5S.fa attributes=5S */*.fna.gz 18 | cutgff.sh fastawrap=10k types=tRNA out=tRNA.fa */*.fna.gz 19 | 20 | kmerfilterset.sh in=23S.fa k=15 out=23S_15mers.fa ow minkpp=1 maxkpp=1 rcomp=f 21 | kmerfilterset.sh in=16S.fa k=15 out=16S_15mers.fa ow minkpp=1 maxkpp=1 rcomp=f 22 | kmerfilterset.sh in=5S.fa k=9 out=5S_9mers.fa ow minkpp=1 maxkpp=1 rcomp=f 23 | kmerfilterset.sh in=tRNA.fa k=9 out=tRNA_9mers.fa ow minkpp=1 maxkpp=1 rcomp=f 24 | -------------------------------------------------------------------------------- /pipelines/fetch/fetchMito.sh: -------------------------------------------------------------------------------- 1 | #Fetches mitochondrial genomes and annotations from NCBI. 2 | 3 | wget -q -O - ftp://ftp.ncbi.nih.gov/genomes/refseq/mitochondrion/*genomic.gbff.gz > mito.genomic.gbff.gz 4 | wget -q -O - ftp://ftp.ncbi.nih.gov/genomes/refseq/mitochondrion/*genomic.fna.gz > mito.genomic.fna.gz 5 | gbff2gff.sh mito.genomic.gbff.gz mito.genomic.gff.gz 6 | 7 | -------------------------------------------------------------------------------- /pipelines/fetch/fetchNtOuter.sh: -------------------------------------------------------------------------------- 1 | nohup time sh fetchNt.sh 2 | -------------------------------------------------------------------------------- /pipelines/fetch/fetchPlasmid.sh: -------------------------------------------------------------------------------- 1 | #Fetches plasmid genomes and annotations from NCBI. 2 | 3 | wget -q -O - ftp://ftp.ncbi.nih.gov/genomes/refseq/plasmid/*genomic.gbff.gz > plasmid.genomic.gbff.gz 4 | wget -q -O - ftp://ftp.ncbi.nih.gov/genomes/refseq/plasmid/*genomic.fna.gz > plasmid.genomic.fna.gz 5 | gbff2gff.sh plasmid.genomic.gbff.gz plasmid.genomic.gff.gz 6 | 7 | -------------------------------------------------------------------------------- /pipelines/fetch/fetchPlastid.sh: -------------------------------------------------------------------------------- 1 | #Fetches plastid genomes and annotations from NCBI. 2 | 3 | wget -q -O - ftp://ftp.ncbi.nih.gov/genomes/refseq/plastid/*genomic.gbff.gz > plastid.genomic.gbff.gz 4 | wget -q -O - ftp://ftp.ncbi.nih.gov/genomes/refseq/plastid/*genomic.fna.gz > plastid.genomic.fna.gz 5 | gbff2gff.sh plastid.genomic.gbff.gz plastid.genomic.gff.gz 6 | 7 | -------------------------------------------------------------------------------- /pipelines/fetch/fetchProkByGenus.sh: -------------------------------------------------------------------------------- 1 | time fetchproks.sh ftp://ftp.ncbi.nih.gov:21/genomes/refseq/archaea archaea.sh 1>fetchA.o 2>&1 2 | time fetchproks.sh ftp://ftp.ncbi.nih.gov:21/genomes/refseq/bacteria bacteria.sh 1>fetchB.o 2>&1 3 | 4 | mkdir archea 5 | cp archaea.sh archaea 6 | cd archaea 7 | sh archaea.sh 8 | cd .. 9 | mkdir bacteria 10 | cp bacteria.sh bacteria 11 | cd bacteria 12 | sh bacteria.sh 13 | cd .. 14 | 15 | time nice analyzegenes.sh archaea/*.fna.gz out=archaea.pgm -Xmx1g 16 | time nice analyzegenes.sh bacteria/*.fna.gz out=bacteria.pgm -Xmx1g 17 | time nice analyzegenes.sh */*.fna.gz out=model.pgm -Xmx1g 18 | 19 | cutRna.sh 20 | -------------------------------------------------------------------------------- /pipelines/fetch/fetchRefSeq.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | #Written by Brian Bushnell 5 | #Last updated August 7, 2019 6 | 7 | #Fetches and renames RefSeq. 8 | #Be sure the taxonomy server is updated first, or run with local taxonomy data! 9 | #To use this script outside of NERSC, modify $TAXPATH to point to your directory with the BBTools taxonomy data, 10 | #e.g. TAXPATH="/path/to/taxonomy_directory/" 11 | 12 | TAXPATH="auto" 13 | 14 | #Ensure necessary executables are in your path 15 | #module load pigz 16 | 17 | #Fetch RefSeq 18 | #time wget -nv ftp://ftp.ncbi.nlm.nih.gov/refseq/release/complete/*genomic.fna.gz 19 | #The line below requires pigz! 20 | wget -q -O - ftp://ftp.ncbi.nlm.nih.gov/refseq/release/complete/*genomic.fna.gz | gi2taxid.sh -Xmx1g in=stdin.fa.gz out=renamed.fa.gz pigz=16 unpigz zl=9 server ow maxbadheaders=5000 badheaders=badHeaders.txt bgzip 21 | 22 | -------------------------------------------------------------------------------- /pipelines/fetch/fetchRefSeqCladesOuter.sh: -------------------------------------------------------------------------------- 1 | nohup sh fetchRefSeqClades.sh 2 | -------------------------------------------------------------------------------- /pipelines/server/startNtServerVM.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #Run this on jgi-web-4 3 | 4 | LOG=ntlogVM_32.txt 5 | PASS=xxxxx 6 | DOMAIN=https://nt-sketch.jgi.doe.gov 7 | KILL=https://nt-sketch.jgi.doe.gov/kill/ 8 | PORT=3071 9 | DB=nt 10 | 11 | nohup /global/projectb/sandbox/gaag/bbtools/jgi-bbtools/taxserver.sh -da -Xmx9g port=$PORT verbose tree=auto sketchonly index domain=$DOMAIN killcode=$PASS oldcode=$PASS oldaddress=$KILL $DB k=32,24 1>>$LOG 2>&1 & 12 | 13 | #simple mode, for testing: 14 | #/global/projectb/sandbox/gaag/bbtools/jgi-bbtools/taxserver.sh -ea -Xmx9g port=3071 verbose tree=auto sketchonly nt k=32,24 index=f 15 | -------------------------------------------------------------------------------- /pipelines/server/startProteinServerVM.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #Run this on jgi-web-5 3 | 4 | LOG=proteinlogVM_32.txt 5 | PASS=xxxxx 6 | DOMAIN=https://protein-sketch.jgi.doe.gov 7 | KILL=https://protein-sketch.jgi.doe.gov/kill/ 8 | PORT=3074 9 | DB=ProkProt 10 | 11 | nohup /global/projectb/sandbox/gaag/bbtools/jgi-bbtools/taxserver.sh -da -Xmx16g prealloc=0.9 port=$PORT verbose tree=auto sizemult=2 sketchonly index amino domain=$DOMAIN killcode=$PASS oldcode=$PASS oldaddress=$KILL $DB k=12,9 1>>$LOG 2>&1 & 12 | 13 | #simple mode, for testing: 14 | #nohup /global/projectb/sandbox/gaag/bbtools/jgi-bbtools/taxserver.sh -ea -Xmx28g port=$PORT verbose tree=auto sizemult=2 sketchonly $DB k=12,9 index=f 15 | -------------------------------------------------------------------------------- /pipelines/server/startRefseqServerVM.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #Run this on jgi-web-4 3 | 4 | LOG=refseqlogVM_32.txt 5 | PASS=xxxxx 6 | DOMAIN=https://refseq-sketch.jgi.doe.gov 7 | KILL=https://refseq-sketch.jgi.doe.gov/kill/ 8 | PORT=3072 9 | DB=RefSeq 10 | 11 | nohup /global/projectb/sandbox/gaag/bbtools/jgi-bbtools/taxserver.sh -da -Xmx28g prealloc=0.9 port=$PORT verbose tree=auto sizemult=2 sketchonly index domain=$DOMAIN killcode=$PASS oldcode=$PASS oldaddress=$KILL $DB k=32,24 1>>$LOG 2>&1 & 12 | 13 | #simple mode, for testing: 14 | #nohup /global/projectb/sandbox/gaag/bbtools/jgi-bbtools/taxserver.sh -ea -Xmx28g port=3072 verbose tree=auto sizemult=2 sketchonly RefSeq k=32,24 index=t 15 | -------------------------------------------------------------------------------- /pipelines/server/startSilvaServerVM.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #Run this on jgi-web-4 3 | 4 | LOG=ribologVM_32.txt 5 | PASS=xxxxx 6 | DOMAIN=https://ribo-sketch.jgi.doe.gov 7 | KILL=https://ribo-sketch.jgi.doe.gov/kill/ 8 | PORT=3073 9 | REF=/global/projectb/sandbox/gaag/bbtools/silva/latest/both_seq#.sketch 10 | DB=silva 11 | 12 | nohup /global/projectb/sandbox/gaag/bbtools/jgi-bbtools/taxserver.sh -da -Xmx10g port=$PORT verbose tree=auto sketchonly index whitelist domain=$DOMAIN killcode=$PASS oldcode=$PASS oldaddress=$KILL ref=$REF dbname=Silva blacklist=silva k=32,24 1>>$LOG 2>&1 & 13 | 14 | #simple mode, for testing: 15 | #/global/projectb/sandbox/gaag/bbtools/jgi-bbtools/taxserver.sh -ea -Xmx10g port=3073 verbose tree=auto sketchonly silva k=32,24 index=f domain=https://ribo-sketch.jgi-psf.org 16 | -------------------------------------------------------------------------------- /pipelines/server/startTaxServerVM.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #Run this on jgi-web-1 3 | 4 | LOG=taxlogVM_55.txt 5 | PASS=xxxxx 6 | DOMAIN=https://taxonomy.jgi.doe.gov 7 | KILL=https://taxonomy.jgi.doe.gov/kill/ 8 | PORT=3068 9 | 10 | nohup /global/projectb/sandbox/gaag/bbtools/jgi-bbtools/taxserver.sh -da -Xmx31g port=$PORT verbose accession=auto tree=auto table=auto size=auto img=auto pattern=auto prealloc domain=$DOMAIN killcode=$PASS oldcode=$PASS oldaddress=$KILL html 1>>$LOG 2>&1 & 11 | 12 | #simple mode, for testing: 13 | #/global/projectb/sandbox/gaag/bbtools/jgi-bbtools/taxserver.sh -ea -Xmx8g port=$PORT verbose accession=null tree=auto table=null 14 | -------------------------------------------------------------------------------- /pipelines/silva/makeCoveringSetLsu.sh: -------------------------------------------------------------------------------- 1 | #kmerfilterset.sh in=lsu_deduped100pct.fa.gz k=31 rcomp=f out=lsu_covering_31mers.fa maxkpp=1 2 | #reformat.sh in=lsu_deduped100pct.fa.gz out=lsu_deduped100pct_padded.fa.gz padleft=31 padright=31 ow 3 | #shred.sh in=lsu_deduped100pct_padded.fa.gz length=150 minlength=62 overlap=30 out=shreds.fa.gz ow 4 | #kmerfilterset.sh in=shreds.fa.gz initial=lsu_covering_31mers.fa k=31 rcomp=f out=lsu_shred_covering_31mers.fa maxkpp=1 5 | nohup time kmerfilterset.sh in=shreds.fa.gz initial=lsu_shred_covering_31mers_temp.fa k=31 rcomp=f out=lsu_shred_covering_31mers.fa maxkpp=1 maxpasses=40000 fastawrap=99999 -Xmx2g ow 6 | -------------------------------------------------------------------------------- /pipelines/silva/makeCoveringSetSsu.sh: -------------------------------------------------------------------------------- 1 | kmerfilterset.sh in=ssu_deduped100pct.fa.gz k=31 rcomp=f out=ssu_covering_31mers.fa maxkpp=1 -Xmx8g 2 | reformat.sh in=ssu_deduped100pct.fa.gz out=ssu_deduped100pct_padded.fa.gz padleft=31 padright=31 ow 3 | shred.sh in=ssu_deduped100pct_padded.fa.gz length=150 minlength=62 overlap=30 out=shredsSsu.fa.gz ow 4 | time kmerfilterset.sh in=shredsSsu.fa.gz initial=ssu_covering_31mers.fa k=31 rcomp=f out=ssu_shred_covering_31mers.fa maxkpp=1 maxpasses=200000 fastawrap=99999 -Xmx8g ow 5 | -------------------------------------------------------------------------------- /pipelines/silva/makeRep.sh: -------------------------------------------------------------------------------- 1 | representative.sh in=results_ani94_sr00_wkid.txt thresh=0.95 minratio=0.95 out=rep_95_95.txt ow -Xmx31g 2 | representative.sh in=results_ani94_sr00_wkid.txt thresh=0.97 minratio=0.97 out=rep_97_97.txt ow -Xmx31g 3 | representative.sh in=results_ani94_sr00_wkid.txt thresh=0.98 minratio=0.98 out=rep_98_98.txt ow -Xmx31g 4 | representative.sh in=results_ani94_sr00_wkid.txt thresh=0.99 minratio=0.99 out=rep_99_99.txt ow -Xmx31g 5 | representative.sh in=results_ani94_sr00_wkid.txt thresh=0.995 minratio=0.995 out=rep_995_995.txt ow -Xmx31g 6 | representative.sh in=results_ani94_sr00_wkid.txt thresh=0.999 minratio=0.999 out=rep_999_999.txt ow -Xmx31g 7 | -------------------------------------------------------------------------------- /pytools/.gitignore: -------------------------------------------------------------------------------- 1 | config 2 | *.pyc 3 | *.swp 4 | *.log 5 | ._*.py 6 | -------------------------------------------------------------------------------- /pytools/ReadMe: -------------------------------------------------------------------------------- 1 | The readqc standalone pipeline; 2 | The filter standalone pipeline; 3 | 4 | ex data: 5 | 12248.8.247376.CAGAGTG-ACACTCT.fastq.gz 6 | 7 | ex run syntax: 8 | % time filter.py -o OUTDIR -f FASTQ.gz --prod-type RNA --skip-blast 9 | % readqc.py -o OUTDIR -f FASTQ.gz --skip-blast -html 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /pytools/filter_desc/bisulphite.txt: -------------------------------------------------------------------------------- 1 | Bisulphite-Seq: SOP 1055 2 | Updated: 2016-10-24 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained adapter sequence, right quality trim reads where quality drops below 6. BBDuk was used to remove reads that contained 2 or more 'N' bases, had an average quality score across the read less than 10 or had minimum length <= 51 bp or 33% of the full read length. Reads mapped with BBMap [2] to masked [5] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Further, reads aligned to common microbial contaminants [6] were separated into a chaff file [5]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1055 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | -------------------------------------------------------------------------------- /pytools/filter_desc/cell-enrichment.txt: -------------------------------------------------------------------------------- 1 | Cell Enrichment: SOP 1056 2 | Updated: 2016-10-24 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained adapter sequence, right quality trim reads where quality drops to 0. BBDuk was used to remove reads that contained 1 or more 'N' bases or had a minimum length <= 51 bp or 33% of the full read length. Reads mapped with BBMap [2] to maked [5] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Further, reads aligned to common microbial contaminants [6] were placed into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1056 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /pytools/filter_desc/chip-seq.txt: -------------------------------------------------------------------------------- 1 | ChIP-Seq: SOP 1076 2 | Updated: 2016-10-24 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained adapter sequence and right quality trim reads where quality drops below 6. BBDuk was used to remove reads that contained 1 or more 'N' bases, had an average quality score across the read less than 10 or had minimum length <= 49 bp. Reads mapped with BBMap [2] to masked [5] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Reads aligned to common microbial contaminants [6] were separated into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1076 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | -------------------------------------------------------------------------------- /pytools/filter_desc/clip-pe.txt: -------------------------------------------------------------------------------- 1 | Clip-PE: SOP 1057 2 | Updated: 2016-10-24 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained adapter sequence, right quality trim reads where quality drops below 6 and right trim where the CLIP-PE linker was found. BBDuk was used to remove reads that contained 1 or more 'N' bases, had an average quality score across the read less than 10 or had a minimum length <= 31 bp. Reads mapped with BBMap [2] to masked [5] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Reads aligned to common microbial contaminants [6] were separated into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1057 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | -------------------------------------------------------------------------------- /pytools/filter_desc/clrs.txt: -------------------------------------------------------------------------------- 1 | CLRS: SOP 1058 2 | Updated: 2016-10-24 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained adapter sequence, right quality trim reads where quality drops below 6 and right trim reads where the CLRS linker was found. BBDuk was used to remove reads that contained 1 or more 'N' bases, had an average quality score across the read less than 10 or had a minimum length <= 50 bp. Reads mapped with BBMap [2] to masked [5] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Reads aligned to common microbial contaminants [6] were separated into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1058 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | -------------------------------------------------------------------------------- /pytools/filter_desc/dna.txt: -------------------------------------------------------------------------------- 1 | DNA: SOP 1059.1 2 | Updated: 2017-01-19 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained adapter sequence and right quality trim reads where quality drops below 6. BBDuk was used to remove reads that contained 2 or more 'N' bases, had an average quality score across the read less than 10 or had a minimum length <= 49 bp or 33% of the full read length. Reads mapped with BBMap [2] to masked [5] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Further, reads aligned to common microbial contaminants [6] were separated into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1059.1 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | -------------------------------------------------------------------------------- /pytools/filter_desc/filter_report.txt: -------------------------------------------------------------------------------- 1 | JGI RQC Filtering Report 2 | Filtering Date: 3 | 4 | Input Sequence Unit: 5 | Library: 6 | Filtered File Name: 7 | 8 | Input Reads: 9 | Input Bases: 10 | Percent Low Quality: (/) 11 | Percent Artifact: (/) 12 | Percent Ribosomal RNA: (/) 13 | Percent Microbial: (/) 14 | Percent Human Filtering: (/) 15 | Percent Dog Filtering: (/) 16 | Percent Cat Filtering: (/) 17 | Percent Mouse Filtering: (/) 18 | Percent Adapter Filtering (Reads): (/) 19 | Percent Adapter Filtering (Bases): (/) 20 | Remaining Reads: 21 | Remaining Bases: 22 | Percent Reads Removed: 23 | Percent Bases Removed: 24 | 25 | Filter command: 26 | 27 | -------------------------------------------------------------------------------- /pytools/filter_desc/fungal.txt: -------------------------------------------------------------------------------- 1 | Fungal: SOP 1060 2 | Updated: 2016-10-24 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained adapter sequence and right quality trim reads where quality drops to 0. BBDuk was used to remove reads that contained 1 or more 'N' bases, had an average quality score across the read less than 13 or had a minimum length <= 41 bp or 33% of the full read length. Reads mapped with BBMap [2] to masked [5] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Further, reads aligned to common microbial contaminants [6] were separated into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1060 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | -------------------------------------------------------------------------------- /pytools/filter_desc/iso.txt: -------------------------------------------------------------------------------- 1 | Microbial Isolate: SOP 1061 2 | Updated: 2016-10-24 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained adapter sequence, remove reads containing 1 or more 'N' bases or having length <= 51 bp or 33% of the full read length. Reads mapped with BBMap [2] to masked [5] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Further, reads aligned to masked common microbial contaminants [6] were separated into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1061 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /pytools/filter_desc/itag.txt: -------------------------------------------------------------------------------- 1 | ITags: SOP 1062 2 | Updated: 2016-10-24 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3] and trim reads that contained adapter sequence. BBDuk was used to remove reads that contained 1 or more 'N' bases, had an average quality score across the read less than 10 or had a minimum length <= 51 bp or 33% of the full read length. Reads mapped with BBMap [2] to masked [6] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Reads aligned to common microbial contaminants [5] were separated into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1062 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. SOP 1077 16 | 6. Filtering references file: 17 | filtering_references-20160921.tar 18 | 19 | 20 | -------------------------------------------------------------------------------- /pytools/filter_desc/lfpe.txt: -------------------------------------------------------------------------------- 1 | LFPE: SOP 1063 2 | Updated: 2016-10-24 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained DNA specific sequencing artifacts, right quality trim reads where quality drops below 6 and right trim reads where the LFPE linker was found. BBDuk was used to remove reads that contained 1 or more 'N' bases, had an average quality score across the read less than 10 or had a minimum length <= 51 bp or 33% of the full read length. Reads mapped with BBMap [2] to masked [5] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Reads aligned to common microbial contaminants [6] were separated into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1063 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | -------------------------------------------------------------------------------- /pytools/filter_desc/metagenome.txt: -------------------------------------------------------------------------------- 1 | Metagenome: SOP 1064 2 | Updated: 2016-10-24 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained adapter sequence and right quality trim reads where quality drops to 0. BBDuk was used to remove reads that contained 4 or more 'N' bases, had an average quality score across the read less than 3 or had a minimum length <= 51 bp or 33% of the full read length. Reads mapped with BBMap [2] to masked [5] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Reads aligned to common microbial contaminants [6] were separated into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1064 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | -------------------------------------------------------------------------------- /pytools/filter_desc/microtrans.txt: -------------------------------------------------------------------------------- 1 | Microbial Transcriptome: SOP 1065 2 | Updated: 2016-10-24 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained adapter sequence, right quality trim reads where quality drops below 6, remove reads containing 1 or more 'N' bases, remove reads with average quality score across the read less than 10, having minimum length <= 49bp or 33% of the full read length. Reads mapped with BBMap [2] to human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Reads aligned to masked common microbial contaminants [6] were separated into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1065 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | -------------------------------------------------------------------------------- /pytools/filter_desc/plant-2x150.txt: -------------------------------------------------------------------------------- 1 | Plant 2x150: SOP 1068 2 | Updated: 2016-10-24 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained adapter sequence and right quality trim reads where quality drops below 6. BBDuk was used to remove reads that contained 1 or more 'N' bases, had an average quality score across the read less than 10 or had minimum length <= 50 bp. Reads mapped with BBMap [2] to masked [5] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Reads aligned to common microbial contaminants [6] were separated into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1068 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | -------------------------------------------------------------------------------- /pytools/filter_desc/plant-2x250.txt: -------------------------------------------------------------------------------- 1 | Plant 2x250: SOP 1069 2 | Updated: 2016-10-24 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained adapter sequence and right quality trim reads where quality drops below 6. BBDuk was used to remove reads that contained 1 or more 'N' bases, had an average quality score across the read less than 10 or had minimum length <= 75 bp. Reads mapped with BBMap [2] to masked [5] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Reads aligned to common microbial contaminants [6] were separated into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1069 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | -------------------------------------------------------------------------------- /pytools/filter_desc/rna.txt: -------------------------------------------------------------------------------- 1 | RNA: SOP 1070 2 | Updated: 2016-10-24 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained adapter sequence and right quality trim reads where quality drops below 6. BBDuk was used to remove reads that contained 1 or more 'N' bases, had average quality score across the read less than 10 or had minimum length <= 49 bp or 33% of the full read length. Reads mapped with BBMap [2] to masked [5] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Reads aligned to common microbial contaminants [6] were separated into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1070 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | -------------------------------------------------------------------------------- /pytools/filter_desc/rnawohuman.txt: -------------------------------------------------------------------------------- 1 | RNA: SOP 1070 2 | Updated: 2016-10-24 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained adapter sequence and right quality trim reads where quality drops below 6. BBDuk was used to remove reads that contained 1 or more 'N' bases, had average quality score across the read less than 10 or had minimum length <= 49 bp or 33% of the full read length. Reads mapped with BBMap [2] to masked [5] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Reads aligned to common microbial contaminants [6] were separated into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1070 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | -------------------------------------------------------------------------------- /pytools/filter_desc/sag.txt: -------------------------------------------------------------------------------- 1 | Microbial Single Cell: SOP 1071 2 | Updated: 2016-10-24 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained adapter sequence, remove reads containing 1 or more 'N' bases or having a minimum length <= 51 bp or 33% of the full read length. Reads mapped with BBMap [2] to masked [5] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Further, reads aligned to masked common microbial contaminants [6] were separated into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1071 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /pytools/filter_desc/smrna.txt: -------------------------------------------------------------------------------- 1 | Small RNA: SOP 1072.2 2 | Updated: 2017-04-24 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained adapter sequence and right quality trim reads where quality drops below 6. BBDuk was used to remove reads that contained 1 or more 'N' bases, had an average quality score across the read less than 10, had a minimum length <= 17 bp or mapped to the organisms chloroplast or ribosomal sequence. Reads mapped with BBMap [2] to masked [5] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Reads aligned to common microbial contaminants [6] were separated into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1072.2 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | -------------------------------------------------------------------------------- /pytools/filter_desc/sop_list.txt: -------------------------------------------------------------------------------- 1 | # file_name = SOP id 2 | # 2017-04-24 3 | 3primerna.txt = 0 4 | bisulphite.txt = 1055 5 | cell-enrichment.txt = 1056 6 | clip-pe.txt = 1057 7 | clrs.txt = 1058 8 | dna.txt = 1059.1 9 | fungal.txt = 1060 10 | iso.txt = 1061 11 | itag.txt = 1062 12 | lfpe.txt = 1063 13 | metagenome.txt = 1064 14 | mtaa.txt = 1066.1 15 | microtrans.txt = 1065 16 | nextera-lmp.txt = 1067 17 | plant-2x150.txt = 1068 18 | plant-2x250.txt = 1069 19 | rna.txt = 1070 20 | sag.txt = 1071 21 | smrna.txt = 1072.2 22 | chip-seq.txt = 1076 23 | viral-metagenome.txt = 1078 24 | -------------------------------------------------------------------------------- /pytools/filter_desc/viral-metagenome.txt: -------------------------------------------------------------------------------- 1 | Viral Metagenome: SOP 1078 2 | Updated: 2017-02-21 3 | RQC Read Filtering Methods 4 | 5 | 6 | Sequence data for library was generated at the DOE Joint Genome Institute (JGI) using Illumina technology [1]. An Illumina was constructed and sequenced using the Illumina platform which generated reads totaling bp. BBDuk (version ) [2] was used to remove contaminants [3], trim reads that contained adapter sequence and right quality trim reads where quality drops to 0. BBDuk was used to remove reads that contained 4 or more 'N' bases, had an average quality score across the read less than 3 or had a minimum length <= 51 bp or 33% of the full read length. The 10 bases on the left and right end of the reads are trimmed off. Reads mapped with BBMap [2] to masked [5] human, cat, dog and mouse references at 93% identity were separated into a chaff file [4]. Reads aligned to common microbial contaminants [6] were separated into a chaff file [4]. The final filtered fastq contained reads totalling bp. 7 | 8 | 9 | 1. SOP 1078 10 | 2. B. Bushnell: BBTools software package, http:\\bbtools.jgi.doe.gov 11 | 3. BBDuk, BBMap and BBMerge commands used for filtering are placed in file: 12 | 13 | 4. All removed reads are placed in file: 14 | 15 | 5. Filtering references file: 16 | filtering_references-20160921.tar 17 | 6. SOP 1077 18 | 19 | 20 | -------------------------------------------------------------------------------- /pytools/filter_param/3primerna.config: -------------------------------------------------------------------------------- 1 | rna=t trimfragadapter=t qtrim=r trimq=6 maxns=1 maq=10 minlen=25 mlf=0.33 phix=t removehuman=t removedog=t removecat=t removemouse=t khist=t removemicrobes=t sketch -------------------------------------------------------------------------------- /pytools/filter_param/bisulphite.config: -------------------------------------------------------------------------------- 1 | rna=f trimfragadapter=t qtrim=r trimq=6 maxns=1 maq=10 minlen=49 mlf=0.33 phix=t removehuman=t removedog=t removecat=t removemouse=t khist=t removemicrobes=t bisulfite=t sketch -------------------------------------------------------------------------------- /pytools/filter_param/cell-enrichment.config: -------------------------------------------------------------------------------- 1 | rna=f minlength=51 phix=t trimfragadapter=t maxns=1 mlf=0.33 qtrim=r trimq=6 maq=10 minlen=49 khist=t removehuman=t removedog=t removecat=t removemouse=t removemicrobes=t aggressive=t microbebuild=3 sketch -------------------------------------------------------------------------------- /pytools/filter_param/chip-seq.config: -------------------------------------------------------------------------------- 1 | rna=f trimfragadapter=t qtrim=r trimq=6 maxns=1 maq=10 minlen=49 phix=t khist=t removehuman=t removedog=t removecat=t removemouse=t removemicrobes=t sketch -------------------------------------------------------------------------------- /pytools/filter_param/clip-pe.config: -------------------------------------------------------------------------------- 1 | rna=f minlength=31 library=clip cliplinker=CATG phix=t qtrim=r trimq=6 maxns=1 maq=10 removehuman=t removedog=t removecat=t removemouse=t khist=t removemicrobes=t sketch -------------------------------------------------------------------------------- /pytools/filter_param/clrs.config: -------------------------------------------------------------------------------- 1 | rna=f minlength=50 library=clrs phix=t qtrim=r trimq=6 maxns=1 maq=10 removehuman=t removedog=t removecat=t removemouse=t khist=t removemicrobes=t sketch -------------------------------------------------------------------------------- /pytools/filter_param/dna.config: -------------------------------------------------------------------------------- 1 | rna=f trimfragadapter=t qtrim=r trimq=6 maxns=1 maq=10 minlen=49 mlf=0.33 phix=t removehuman=t removedog=t removecat=t removemouse=t khist=t removemicrobes=t sketch -------------------------------------------------------------------------------- /pytools/filter_param/fungal.config: -------------------------------------------------------------------------------- 1 | rna=f trimfragadapter=t qtrim=r trimq=0 maxns=0 maq=13 minlen=41 mlf=0.33 phix=t removehuman=t removedog=t removecat=t removemouse=t khist=t removemicrobes=t sketch -------------------------------------------------------------------------------- /pytools/filter_param/iso.config: -------------------------------------------------------------------------------- 1 | rna=f minlength=51 phix=t trimfragadapter=t maxns=1 mlf=0.33 removehuman=t removedog=t removecat=t removemouse=t khist=t removemicrobes=t sketch -------------------------------------------------------------------------------- /pytools/filter_param/itag.config: -------------------------------------------------------------------------------- 1 | rna=f trimfragadapter=t removehuman=f removedog=f removecat=f removemouse=f phix=t qtrim=f trimq=0 maxns=1 maq=10 mlf=0.33 removeribo=f minlen=51 keephuman=t removemicrobes=t sketch -------------------------------------------------------------------------------- /pytools/filter_param/lfpe.config: -------------------------------------------------------------------------------- 1 | rna=f minlength=31 library=lfpe phix=t qtrim=r trimq=6 maxns=1 maq=10 removehuman=t removedog=t removecat=t removemouse=t khist=t removemicrobes=t sketch -------------------------------------------------------------------------------- /pytools/filter_param/metagenome.config: -------------------------------------------------------------------------------- 1 | rna=f trimfragadapter=t qtrim=r trimq=0 maxns=3 maq=3 minlen=51 mlf=0.33 phix=t removehuman=t removedog=t removecat=t removemouse=t khist=t removemicrobes=t sketch -------------------------------------------------------------------------------- /pytools/filter_param/microtrans.config: -------------------------------------------------------------------------------- 1 | rna=t minlength=49 qtrim=r maq=10 trimq=6 trimfragadapter=t phix=t maxns=1 mlf=0.33 removehuman=t removedog=t removecat=t removemouse=t khist=t removeribo=t removemicrobes=t sketch -------------------------------------------------------------------------------- /pytools/filter_param/mtaa.config: -------------------------------------------------------------------------------- 1 | rna=t trimfragadapter=t qtrim=r trimq=0 maxns=1 maq=10 minlen=51 mlf=0.33 phix=t removeribo=t removehuman=t removedog=t removecat=t removemouse=t khist=t removemicrobes=t mtst=t sketch -------------------------------------------------------------------------------- /pytools/filter_param/nextera-lmp.config: -------------------------------------------------------------------------------- 1 | rna=f nextera=t phix=t minlength=31 qtrim=r trimq=6 maxns=1 maq=10 trimfragadapter=t removehuman=t removedog=t removecat=t removemouse=t khist=t removemicrobes=t -------------------------------------------------------------------------------- /pytools/filter_param/nextseq.config: -------------------------------------------------------------------------------- 1 | k=23 mink=11 tbo tpe ktrim=r ftm=5 removemicrobes=t -------------------------------------------------------------------------------- /pytools/filter_param/plant-2x150.config: -------------------------------------------------------------------------------- 1 | rna=f trimfragadapter=t qtrim=r trimq=6 maxns=1 maq=10 minlen=50 phix=t removehuman=t removedog=t removecat=t removemouse=t khist=t removemicrobes=t sketch -------------------------------------------------------------------------------- /pytools/filter_param/plant-2x250.config: -------------------------------------------------------------------------------- 1 | rna=f trimfragadapter=t qtrim=r trimq=6 maxns=1 maq=10 minlen=75 phix=t removehuman=t removedog=t removecat=t removemouse=t khist=t removemicrobes=t sketch -------------------------------------------------------------------------------- /pytools/filter_param/rna.config: -------------------------------------------------------------------------------- 1 | rna=t minlength=49 qtrim=r maq=10 trimq=6 trimfragadapter=t phix=t maxns=1 mlf=0.33 removehuman=t removedog=t removecat=t removemouse=t khist=t removemicrobes=t sketch -------------------------------------------------------------------------------- /pytools/filter_param/rnawohuman.config: -------------------------------------------------------------------------------- 1 | rna=t minlength=49 qtrim=r maq=10 trimq=6 trimfragadapter=t phix=t maxns=1 mlf=0.33 removehuman=f removedog=t removecat=t removemouse=t khist=t removemicrobes=t -------------------------------------------------------------------------------- /pytools/filter_param/sag.config: -------------------------------------------------------------------------------- 1 | rna=f minlength=51 phix=t trimfragadapter=t maxns=1 mlf=0.33 removehuman=t removedog=t removecat=t removemouse=t khist=t removemicrobes=t sketch -------------------------------------------------------------------------------- /pytools/filter_param/smrna.config: -------------------------------------------------------------------------------- 1 | rna=t trimfragadapter=t qtrim=r trimq=6 maxns=1 maq=10 minlen=17 mlf=0 khist=t trimk=23 mink=3 hdist=1 hdist2=1 ktrim=r sketch mito chloro ribomap taxlevel=species -------------------------------------------------------------------------------- /pytools/filter_param/viral-metagenome.config: -------------------------------------------------------------------------------- 1 | rna=f trimfragadapter=t qtrim=r trimq=0 maxns=3 maq=3 minlen=51 mlf=0.33 phix=t removehuman=t removedog=t removecat=t removemouse=t khist=t removemicrobes=t ftl=10 ftr2=10 sketch -------------------------------------------------------------------------------- /pytools/images/JGI_logo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/pytools/images/JGI_logo.jpg -------------------------------------------------------------------------------- /pytools/images/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/pytools/images/favicon.ico -------------------------------------------------------------------------------- /pytools/images/jgi_log_transparent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/pytools/images/jgi_log_transparent.png -------------------------------------------------------------------------------- /pytools/lib/html_utility.py: -------------------------------------------------------------------------------- 1 | def html_tag(tag, txt, attrs=None): 2 | html = '<%s' 3 | if attrs: 4 | for name in attrs: 5 | html += ' %s="%s"' % (name, attrs[name]) 6 | html += '>%s\n' 7 | html = html % (tag, txt, tag) 8 | 9 | return html 10 | 11 | def html_th(header, attrs=None): 12 | html = '' 13 | for d in header: 14 | html += html_tag('th', d, attrs=attrs) 15 | return html_tag('tr', html) 16 | 17 | def html_tr(data): 18 | html = '' 19 | for d in data: 20 | html += html_tag('td', d) 21 | return html_tag('tr', html) 22 | 23 | def html_link(href, txt): 24 | html = '%s' % (href, txt) 25 | return html -------------------------------------------------------------------------------- /pytools/template/readqc_artifacts.html: -------------------------------------------------------------------------------- 1 | Artifacts (first [_CONTAM-ART-FIRST-BP_]db)[_CONTAM-ART-FIRST-BP-SEAL-PCT_]% 2 | The 1st 50bp (20bp for smRNA) of reads are aligned to the artifacts database 3 | -------------------------------------------------------------------------------- /resources/16S_15mers.fa: -------------------------------------------------------------------------------- 1 | >7905 2 | AAGTCGTAACAAGGTA 3 | >2737 4 | CGACCTCGATGTTGGA 5 | >56 6 | TATATATATATATATA 7 | >43 8 | TGCCAGCAGCCGCGGT 9 | >42 10 | ATCGAATTAAACCACA 11 | >37 12 | CCTTGCACACACCGCC 13 | >20 14 | GACCTCGATGTTGAAT 15 | >16 16 | CTGTTTACCAAAAACA 17 | >12 18 | AGGAATTACTAGTAAT 19 | >8 20 | GGGCAGGCATCACCTT 21 | >4 22 | TGGGCTATAGACGTGC 23 | >2 24 | AACTAACCAGAGTCTC 25 | >2 26 | TAGGGTTCACATAAGA 27 | >1 28 | TCCCCTGCCTCCCCGC 29 | >1 30 | GAGGGGGCCGCCCGGC 31 | >1 32 | CCAAGATTTTGATCAG 33 | -------------------------------------------------------------------------------- /resources/18S_15mers.fa: -------------------------------------------------------------------------------- 1 | >164 2 | GCCAGCAGCCGCGGTA 3 | >3 4 | AGAAACGGCTACCACA 5 | -------------------------------------------------------------------------------- /resources/23S_15mers.fa: -------------------------------------------------------------------------------- 1 | >7848 2 | TGGCACCTCGATGTCG 3 | >460 4 | ACCGTCGTGAGACAGG 5 | >76 6 | AGTGCGTAACAGCTCA 7 | >48 8 | GGCTCATTCTTCAACA 9 | >27 10 | AGTACCGTGAGGGAAA 11 | >7 12 | CTCTTAAGGTAGCGTA 13 | >6 14 | AACGTCGTGAGACAGT 15 | >2 16 | AAAGGGAAACAGCCCA 17 | -------------------------------------------------------------------------------- /resources/5S_10mers.fa: -------------------------------------------------------------------------------- 1 | >2414 2 | CGCCGATGGTA 3 | >1417 4 | CCATCCCGAAC 5 | >880 6 | CCATTCCGAAC 7 | >209 8 | CCATCTCGAAC 9 | >202 10 | CCATACCGAAC 11 | >167 12 | CCATGCCGAAC 13 | >121 14 | GAAGTTAAGCC 15 | >102 16 | CCTTCCCGAAC 17 | >51 18 | GGGAGAGTAGG 19 | >45 20 | CGAACACAGAA 21 | >38 22 | CCATTTCGAAC 23 | >35 24 | GAACCCGGAAG 25 | >30 26 | CGCCGATGATA 27 | >28 28 | CTCGTTTCGAT 29 | >15 30 | ATCCCGAACAC 31 | >15 32 | GCAGTTAAGCC 33 | >12 34 | GTGGGAGAGTA 35 | >10 36 | CAAGCCGGCCA 37 | >9 38 | CAGGGATACAC 39 | >8 40 | CGACCATAGCG 41 | >7 42 | ATCTCGGAAAT 43 | >6 44 | CAGAAGTTAAG 45 | >5 46 | CCGGCCATAGG 47 | >5 48 | CTCATTCCGAA 49 | >5 50 | GAAACACCCGT 51 | >5 52 | CACCTGATCCC 53 | >4 54 | ACCTGTTCCCA 55 | >3 56 | AAGTTAAGCTC 57 | >3 58 | GTAGTTAAGCC 59 | >3 60 | CACCCGTTCCC 61 | >3 62 | ACCACTCCGAT 63 | >2 64 | CCCTTTCCGAA 65 | >1 66 | GAACGTCGATT 67 | >1 68 | GAGTAGTGTAG 69 | >1 70 | AGATCCCGGAA 71 | >1 72 | ACCTCCTGGGA 73 | >1 74 | CCCTGGGAGAG 75 | >1 76 | TTTTTCCATGT 77 | >1 78 | TCTGAGTTCGG 79 | -------------------------------------------------------------------------------- /resources/5S_11mers.fa: -------------------------------------------------------------------------------- 1 | >2621 2 | CGCCGATGGTA 3 | >1565 4 | CCATCCCGAAC 5 | >967 6 | CCATTCCGAAC 7 | >222 8 | CCATCTCGAAC 9 | >215 10 | CCATACCGAAC 11 | >176 12 | CCATGCCGAAC 13 | >137 14 | GAAGTTAAGCC 15 | >109 16 | CCTTCCCGAAC 17 | >55 18 | GGGAGAGTAGG 19 | >49 20 | CGAACACAGAA 21 | >40 22 | CCATTTCGAAC 23 | >38 24 | GAACCCGGAAG 25 | >31 26 | CTCGTTTCGAT 27 | >31 28 | CGCCGATGATA 29 | >17 30 | ATCCCGAACAC 31 | >15 32 | GCAGTTAAGCC 33 | >13 34 | GTGGGAGAGTA 35 | >11 36 | GAAGTCAAACC 37 | >9 38 | CGACCATAGCG 39 | >9 40 | CAGGGATACAC 41 | >8 42 | CAGAAGTTAAG 43 | >7 44 | ATCTCGGAAAT 45 | >5 46 | CCGGCCATAGG 47 | >5 48 | CAGCAGTTAAG 49 | >5 50 | GAAACACCCGT 51 | >4 52 | GGGGGTTGCCC 53 | >4 54 | GGAACCACCTG 55 | >3 56 | TCCGAACACGG 57 | >3 58 | AAGTTAAGCTC 59 | >3 60 | GTAGTTAAGCC 61 | >3 62 | CACCCGTTCCC 63 | >2 64 | CCCTTTCCGAA 65 | >1 66 | ATAACGTGATG 67 | >1 68 | GAGTAGTGTAG 69 | >1 70 | AGATCCCGGAA 71 | >1 72 | AGAGTAAGTCA 73 | >1 74 | CGTATCACGTG 75 | >1 76 | TCTGAGTTCGG 77 | >1 78 | CGTGGTGGTAC 79 | -------------------------------------------------------------------------------- /resources/5S_9mers.fa: -------------------------------------------------------------------------------- 1 | >2761 2 | GCCGATGGTA 3 | >1304 4 | CATCCCGAAC 5 | >814 6 | CATTCCGAAC 7 | >295 8 | GAAGTTAAGC 9 | >176 10 | CATCTCGAAC 11 | >101 12 | CCTTCCCGAA 13 | >90 14 | CCGAACACAG 15 | >79 16 | GCCGATGATA 17 | >45 18 | CATTTCGAAC 19 | >32 20 | CATACCGAAC 21 | >28 22 | CTTTCCGAAC 23 | >28 24 | CTCGTTTCGA 25 | >22 26 | CATGCCGAAC 27 | >20 28 | GAACCCGGAA 29 | >12 30 | CCTGTTCCCA 31 | >12 32 | TGGGAGAGTA 33 | >9 34 | GGTCATAGCG 35 | >9 36 | GAAGTGAAAC 37 | >6 38 | CCCGTTCCGA 39 | >5 40 | CGACCATAGC 41 | >4 42 | CCCGTCCCGA 43 | >3 44 | AGATCCCGGA 45 | >3 46 | ACCACTCCGA 47 | >3 48 | CCCGTTCCCA 49 | >2 50 | GCGGCCATAT 51 | >1 52 | TGACGATTAC 53 | >1 54 | CGAGAGTAGT 55 | >1 56 | ATAGGTAGTT 57 | -------------------------------------------------------------------------------- /resources/5S_consensus_sequence.fa: -------------------------------------------------------------------------------- 1 | >universal 2 | TCCTGGTGGCCATAGCGGAGGGGAAACACCCGTTCCCATCCCGAACACGGAAGTTAAGCCCTTCAGCGCC 3 | GATGGTACTGTGGGGTGACCCCGTGGGAGAGTAGGTCGCCGCCAGGAT 4 | >bacteria 5 | TCCTGGTGGCCATAGCGAGGTGGAAACACCCGTTCCCATCCCGAACACGGAAGTTAAGCCCCTCAGCGCC 6 | GATGGTACTGTGGGGTCACCCCGTGGGAGAGTAGGTCGCCGCCAGGC 7 | >archaea 8 | TACAGGCGGCCATAGCGGCGGGGTAACACCCGTACCCATCCCGAACACGGAAGTTAAGCCCCCTAGCGTT 9 | CCTGGTAGTACTGGAGTGCGCGAGCCTCGGGAACTGCCAGGTTCGCCGCCTGCC 10 | >plastid 11 | AATATTCTGGTGTCCATGGCGTAGTGGAACCACACCAATCCATCCCGAACTTGGTGGTTAAACTCTACAG 12 | CGGTGACGATACTGTAGGGGAGCCCCTCGGGAAAATAGCTCGATGCCAGGAT 13 | -------------------------------------------------------------------------------- /resources/ITS_fungi_consensus_sequence.fq: -------------------------------------------------------------------------------- 1 | @ITS_fungi 2 | CTGCGCACATCTTATCAATCAAAGGTGTGCACGCGGGTGTAGCGGGCGTGTGGCGGGCCCCCTGCACCCTCTGTCCACGTACACGATTGTCGCTGGCTAGGATTGATCTTCTAGCATTTGTGCACATCTATCCATTACACACGCCTGTGCACCTATTGTAGATCGAAAGATCTATGTCAACTATCACTGTCAGCAATGTATGTCCAGAATGTTAATAAATATTAAAACTTTCAACAACGGATCTCTTGGCTCTCGCATCGATGAAGAACGCAGCGAAATGCGATAAGTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCGCCCCTTGGTATTCCGAGGGGCATGCCTGTTTGAGCGTCATTAAATTCTCAACCCATCAAGTTTCGGCTTGGAGTTGGGTTTGCTGGCTTTGAGTAGTCGGCTCCCCTGAAATGCATTAGCGAGTGACGTCTTGACATGCACGGCCTCTTGACGTGATTAATGATCGTCTAGTGGGCTGGAAGTGTCATTAGGAAGCTTGCTTTAACATTAACATTAACATTGATGAGATGTGAACTGAGTGAAGGT 3 | + 4 | *B@,=2<505;?53?905>60*;/*>1;38=50;.2)>;<49;,5*?(=-<,64/)46'3*6>13@80;:0?<=),=8.?7;4=+14=:7/6:>30?6.92+4.;0-=1595.7>)5*3<2?6/;)2=5)409*38(:)8,/>2=385>8-2<3,1+;20,4.?+3):-80<64=2=.7>3=49.;)B9@-90.;/A?72:=?6(46:2<+3->@77B>>A,AH/GHGGGGDG:HGHBHHHHHHHHGHH+HGB-FGHGHHHHHHHHHGHGFGHGFHHDFHGHHFHAEHGBHGEHHGHGHHHGGEGGEAHGHHFGHHHGGHDHFHHHHHHHHHHA/HHHHBH5C13>@H@GEGHA@/6D5HBHCCFFDGG@+GHH.DGGBE;7@<.1>FEE?>;B+B@35?*6B07?1=4A7-71-:0,A5=4@B99.6A@/ADC668B2=+D?<2A/5+?88;2=?-80A972;.;8*>2.9;2/:,377*/:95<-08292?-2.-70-35?-58A630:7<0-4(-<1:3)6.3<,;4:+>>)6/90/:2/7 5 | -------------------------------------------------------------------------------- /resources/ITS_nonfungi_consensus_sequence.fq: -------------------------------------------------------------------------------- 1 | @ITS_nonfungi 2 | AAGGATCATTGTCGAAACCTCATAGCAGAACGACCCGCGAACATGTTATAAAACAACGGGGGGCACGCGGGGGTGGCGGGCGTGTGGCGGGCCCCCTGCCCCCCCTGTCCACGAGCACGATTGTCGCTGGCTCGGATTAATCAACGAACCCCGGCGCGGAACGCGCCAAGGAACACAACTGTGCAACGATTGTAGCTCGAACGATCTCTGTCAGCTGCCACTGTCAGCGATGTATGTCCAGCATGTTAATAAATAAAACGACTCTCGGCAACGGATATCTCGGCTCTCGCATCGATGAAGAACGTAGCGAAATGCGATACTTGGTGTGAATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAAGCCATTCCGAGGGGCACGTCTGCCTGGGCGTCACGCATCGCTCCCCCCAACCACTCTCGCATTGGAGTTGGGTGTGCTGGCGGGGAGTAGTTGGCCTCCCGTGGCTGCATTGGCGCGGTTGGCCTGAAATGCGAGTCCTCTTGACGTGATCGATGATCGACTAGTGGGCTGGATGTGTCATTAGGACGCTTGCTGTAACGTTGGCGTTAGCATTGATGGGATGGGAAGTGAGTGAAGGTCATCATTGATTATTACTGAGTGCATCGAATCCGACCCCAGGTCAGGCG 3 | + 4 | =9>==AB;@=?CDD@53CB@=4*>:@A?<8;?B?<5D/C?BB0.CC3672)48>4*;37/:-=820=4:20)3:5/<5*/@-8.53*@0(49/72/:=60'-2(65?48.,6>+<<,::*6,?4;:;773=6(:>,-)5<0<-@?/?3:)@64=+6*63B7.*@7A-5A+15?8/8.773>+;33<8>6)2;5@G;?GFGBFC=AGAA;GGGG@DGG9GGEGG=9DCEBGGGGGGGGFBGG6FGF;GE@GGFGEEFB6EDBE>GGE4@8:-A26EAEEG?E5ECG?AFDBB@A*@80?3-.<45=>BC26ADA<@.DC;711?;B:>82=/+?2<@/?4.=?2,75,B@8>)<@?.>B.C=A0<=.7.<;-16,3.>5>/5:95=8,(=?:*:-A;6,);?+7);.24(4250)60*/810*349-+3.>+2>/4*@-.))3*0>3<1;.=B+;?>8*6*>B=?;4><7=;<9<7>; 5 | -------------------------------------------------------------------------------- /resources/ITS_plant_consensus_sequence.fq: -------------------------------------------------------------------------------- 1 | @ITS_plant 2 | TGTCGAAACCTGCAAAGCAGAACGACCCGCGAACATGTTATAAAACACAGGGGGGCACGCGGGGGTGGCGGGCGTGTGGCGTGCCCCCTGCCTCCCCTGTCCACGAGCACGATTGTCGCTGGCTAGGACTCATCGTCGAGCATCGGCGCACAACGAACCCCGGCGCGGACATGCGCCAAGGAATATAAATCGAAAGATCTGTGTCCGCTGCCGCTGTCAGCGGTGTATGTCCAGCATGTTAATAAATAAAACGACTCTCGGCAACGGATATCTCGGCTCTCGCATCGATGAAGAACGTAGCGAAATGCGATACTTGGTGTGAATTGCAGAATCCCGTGAACCATCGAGTCTTTGAACGCAAGTTGCGCCCGAAGCCATTAGGCCGAGGGCACGTCTGCCTGGGCGTCACGCATCGCTCCCCCCAACCACTCTCGCATTGGAGTTGGGTGTGCTGGCTGGGAGTAGTTGGCCTCCCGTGCCTGCATCGGCGCGGTTGGCCTGAAATGCGAGTCCTCTTGACGTGATGCACGATCGACTAGTGGGGTGGATGTGTCATTAGGACGCTTGCTGTAGCGTGCGCGTTAGCATCGATGGGATGGGACGTGAGTGACCCTCATCATCGATTATTACTGAGTGCATCGACTGCGACCCCAGGTC 3 | + 4 | ADFFEC74EECA@5.?=BCA?:=AEC>8E0EBDC0.DE5584)7<=3<0)/<8.>:60>5<,7(5;.8=8+4A.8/37*A398<6,90;>8237.;15?7=0-8?*;>+;=-7(>2>8>5.9=3);>,<2;-5;?2;>(:>-;8A5;1,>3CA@-AC:>=BEBD;D3C?-<3F4B--2B5)5->@1=-<5.7:=HHFHHHHHGHG)B:DC0.?C6>/A4@*B29,5<1?,5@2;2?1B20CA/*<@D6C-E36GGE=@:ABDDAB=79/@255@55?@EG25EFC??+GF=6/1@9C<@;3>,)?16-B:/A>5-DA9@(?A>3@D?E;C4=:+6+=:3(7+0*@4>.5:75=:*1=?>>=6:;6,);?(9?=1/3)7739,50B(8*702AA?BB6-;.8*1?>+1*(2)4>0>/=-=D(;A>=80;>D@@?:A>;B<> 5 | -------------------------------------------------------------------------------- /resources/ITS_universal_consensus_sequence.fq: -------------------------------------------------------------------------------- 1 | @ITS_universal 2 | TCGAAACCTCATAGCAGAACGACCCGCGAACATCTTATAAAACAAAGGGGGGCACGCGGGGGTGGGGGGCGTGTGGCGGGCCCCCTGCCCCCCCTGTCCACGAACACGATTGTCGCTGGCTAGGATTAATCTTCTAGCATTTGTGCACATCTATCCATTACACACACCTGTGCACCTATTGTAGATCGAAAGATCTATGTCAACTACCACTGTCAGCAATGTATGTCCAGAATGTTAATAAATATTACAACTTTCAGCAACGGATCTCTTGGCTCTCGCATCGATGAAGAACGCAGCGAAATGCGATAAGTAATGTGAATTGCAGAATTCAGTGAATCATCGAATCTTTGAACGCACATTGCGCCCGTTGGTATTCCGAGGGGCATGCCTGTTTGAGCGTCATTCAATTCTCAACCCAACCAGTTTCGGATTGGAGTTGGGTTTGCTGGCTTTGAGTAGTCGGCTCCCCTTAAATGCATTAGCGAGTGACGTCTTGACATGCACGGCCTCTTGACGTGATTAATGATCGTCTAGTGGGCTGGAAGTGTCATTAGGAAGCTTGCTTTAACATTAACATTAACATTGATGGGATGGGAAGTGAGTGAAGGTAATAATTGATTATTACTGAGTGCATCGAATCCGAC 3 | + 4 | @>C>51?>A;4-:<;=@947>?=81A/A<:>38*6<2:)16-=1'.-3,8(:95/9550,'3<*3(-2*@+:09..9307-5+0:1/04/>30*571/=2/7;973-5:1*520;>>/2843+<@418>=.6-851=.6/:?89?88@1*G54GGG1GD3,GDE?HHHH1FHG3HH7GG@4EFFEHHHHHHHGGEHG5GHGAHFBFHGHGFG-4G3-GGCHHGGGHHHGFEF2F.GCHHE3GGGFFG0F?GGGHHHGHGH-)GFHGCG;C*)0C42@B@B9A25D;GCH/D6FDG1-7E4G0EGGA4.4@5)->@C71>9A0(>5051.=936)<9?>0796A<;/+9?:1?9<3)2:/<.3<85;=?96/:;:(8757:::/:*B><+?1:*9>-=/8;7.3>:743<09-:5,7>05>9=8/6.52228@7/=50;99-66<5<3,7392?200-/51+7?.5:=7+420=,4.(+=,81(/>.8.:173(128.'34,+9.3+547,,,5(/1).5+=/.,+4*3>5:380:A-;?<8+7,:@;< 5 | -------------------------------------------------------------------------------- /resources/adapters_no_transposase.fa.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/resources/adapters_no_transposase.fa.gz -------------------------------------------------------------------------------- /resources/crelox.fa.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/resources/crelox.fa.gz -------------------------------------------------------------------------------- /resources/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/resources/favicon.ico -------------------------------------------------------------------------------- /resources/lambda.fa.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/resources/lambda.fa.gz -------------------------------------------------------------------------------- /resources/lfpe.linker.fa.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/resources/lfpe.linker.fa.gz -------------------------------------------------------------------------------- /resources/nextera.fa.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/resources/nextera.fa.gz -------------------------------------------------------------------------------- /resources/nextera_LMP_adapter.fa.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/resources/nextera_LMP_adapter.fa.gz -------------------------------------------------------------------------------- /resources/nextera_LMP_linker.fa.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/resources/nextera_LMP_linker.fa.gz -------------------------------------------------------------------------------- /resources/phix174_ill.ref.fa.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/resources/phix174_ill.ref.fa.gz -------------------------------------------------------------------------------- /resources/phix_adapters.fa.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/resources/phix_adapters.fa.gz -------------------------------------------------------------------------------- /resources/polyA.fa.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/resources/polyA.fa.gz -------------------------------------------------------------------------------- /resources/primes.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/resources/primes.txt.gz -------------------------------------------------------------------------------- /resources/remote_files_old.txt: -------------------------------------------------------------------------------- 1 | Masked version of human HG19 reference, for removing human contaminant reads: 2 | https://drive.google.com/open?id=0B3llHR93L14wd0pSSnFULUlhcUk 3 | 4 | Masked version of mouse reference, for removing mouse contaminant reads: 5 | https://drive.google.com/open?id=0B3llHR93L14wYmJYNm9EbkhMVHM 6 | 7 | Masked version of cat reference, for removing cat contaminant reads: 8 | https://drive.google.com/open?id=0B3llHR93L14wOXJhWXRlZjBpVUU 9 | 10 | Masked version of dog reference, for removing dog contaminant reads: 11 | https://drive.google.com/open?id=0B3llHR93L14wTHdWRG55c2hPUXM 12 | 13 | Ribosomal kmers, for ribosomal (16S/18S/etc) read removal via BBDuk: 14 | https://drive.google.com/open?id=0B3llHR93L14wS2NqRXpXakhFaEk 15 | 16 | Masked version of common bacterial contaminants, for removing bacterial contamination from eukaryotes: 17 | https://drive.google.com/open?id=0B3llHR93L14wZ1N6akxrSW16Z0U 18 | 19 | Masked version of common bacterial contaminants, for removing bacterial contamination from bacteria: (This is the same organisms as above, but more heavily masked wuith other non-contaminant bacteria) to 20 | https://drive.google.com/open?id=0B3llHR93L14wNkxnSk0wOUZubk0 21 | -------------------------------------------------------------------------------- /resources/sample1.fq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/resources/sample1.fq.gz -------------------------------------------------------------------------------- /resources/sample2.fq.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/resources/sample2.fq.gz -------------------------------------------------------------------------------- /resources/sequencing_artifacts.fa.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/resources/sequencing_artifacts.fa.gz -------------------------------------------------------------------------------- /resources/short.fa: -------------------------------------------------------------------------------- 1 | >contam_56 2 | ATCTCGTATGCCGTCTTCTGCTTG 3 | >contam_63 4 | CAAGCAGAAGACGGCATACGA 5 | >contam_169 6 | GATCGGAAGAGCACACGTCT 7 | >contam_196 8 | GCCTTGGCACCCGAGAATTCCA 9 | >contam_223 10 | GTTCAGAGTTCTACAGTCCGACGATC 11 | >contam_236 12 | TCGTATGCCGTCTTCTGCTTGT 13 | >contam_241 14 | TGGAATTCTCGGGTGCCAAGG 15 | -------------------------------------------------------------------------------- /resources/tRNA_consensus_sequence.fa: -------------------------------------------------------------------------------- 1 | >universal 2 | GGTGGCGGGGCGGTGATTAGCTCAGCCTGGTAGAGCACTTCGTTCGGGACGAAGGGGTCGCGGGTTCGAA 3 | TCCTCTCTCGCCGACCA 4 | >bacteria 5 | GGGCGGTGAGTAGCTCAGACTGGTAGAGCACTTCGTTCGGGACGAAGGGGTCGCGGGTTCGAATCCTCTC 6 | TCGCCGACCA 7 | >archaea 8 | GGGGCGGTGATTGGCGCAGCCTGGTAGCGCACTTCGTTCGGGACGAAGGGGTCGCGGGTTCAAATCCGCG 9 | CTCGCCGACCA 10 | >plastid 11 | GGCGGTGATTAGCTCAGATTGGTAGAGCACTTCGTTCGGAACGAAGAGGTCGGAGGTTCGAATCCTCTAT 12 | CACCGA 13 | -------------------------------------------------------------------------------- /resources/truseq.fa.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/resources/truseq.fa.gz -------------------------------------------------------------------------------- /resources/truseq_rna.fa.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BioInfoTools/BBMap/a9ceda047a7c918dc090de0fdbf6f924292d4a1f/resources/truseq_rna.fa.gz -------------------------------------------------------------------------------- /sh/bbmapskimmer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #This is a version of BBMap designed to final all sites above a given threshold, 4 | #rather than the single best site. Syntax is the same as BBMap. 5 | 6 | usage(){ 7 | bash "$DIR"bbmap.sh 8 | } 9 | 10 | #This block allows symlinked shellscripts to correctly set classpath. 11 | pushd . > /dev/null 12 | DIR="${BASH_SOURCE[0]}" 13 | while [ -h "$DIR" ]; do 14 | cd "$(dirname "$DIR")" 15 | DIR="$(readlink "$(basename "$DIR")")" 16 | done 17 | cd "$(dirname "$DIR")" 18 | DIR="$(pwd)/" 19 | popd > /dev/null 20 | 21 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 22 | CP="$DIR""current/" 23 | JNI="-Djava.library.path=""$DIR""jni/" 24 | JNI="" 25 | 26 | z="-Xmx1g" 27 | z2="-Xms1g" 28 | set=0 29 | 30 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 31 | usage 32 | exit 33 | fi 34 | 35 | calcXmx () { 36 | source "$DIR""/calcmem.sh" 37 | setEnvironment 38 | parseXmx "$@" 39 | if [[ $set == 1 ]]; then 40 | return 41 | fi 42 | freeRam 3200m 84 43 | z="-Xmx${RAM}m" 44 | z2="-Xms${RAM}m" 45 | } 46 | calcXmx "$@" 47 | 48 | mapPacBioSkimmer() { 49 | local CMD="java $EA $EOOM $z $z2 $JNI -cp $CP align2.BBMapPacBioSkimmer build=1 overwrite=true minratio=0.40 fastareadlen=6000 ambig=all minscaf=100 startpad=10000 stoppad=10000 midpad=6000 $@" 50 | echo $CMD >&2 51 | eval $CMD 52 | } 53 | 54 | mapPacBioSkimmer "$@" 55 | -------------------------------------------------------------------------------- /sh/bbmerge-auto.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | bbmerge-auto.sh is a wrapper for BBMerge that attempts to use all available 6 | memory, instead of a fixed amount. This is for use with the Tadpole options 7 | of error-correction (ecct) and extension, which require more memory. 8 | For merging by overlap only, please use bbmerge.sh. If you set memory 9 | manually with the -Xmx flag, bbmerge.sh and bbmerge-auto.sh are equivalent. 10 | 11 | For information about usage and parameters, please run bbmerge.sh. 12 | " 13 | } 14 | 15 | #This block allows symlinked shellscripts to correctly set classpath. 16 | pushd . > /dev/null 17 | DIR="${BASH_SOURCE[0]}" 18 | while [ -h "$DIR" ]; do 19 | cd "$(dirname "$DIR")" 20 | DIR="$(readlink "$(basename "$DIR")")" 21 | done 22 | cd "$(dirname "$DIR")" 23 | DIR="$(pwd)/" 24 | popd > /dev/null 25 | 26 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 27 | CP="$DIR""current/" 28 | JNI="-Djava.library.path=""$DIR""jni/" 29 | JNI="" 30 | 31 | z="-Xmx14g" 32 | z2="-Xms14g" 33 | set=0 34 | 35 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 36 | usage 37 | exit 38 | fi 39 | 40 | calcXmx () { 41 | source "$DIR""/calcmem.sh" 42 | setEnvironment 43 | parseXmx "$@" 44 | if [[ $set == 1 ]]; then 45 | return 46 | fi 47 | freeRam 15000m 84 48 | z="-Xmx${RAM}m" 49 | z2="-Xms${RAM}m" 50 | } 51 | calcXmx "$@" 52 | 53 | function merge() { 54 | local CMD="java $EA $EOOM $z $z2 $JNI -cp $CP jgi.BBMerge $@" 55 | echo $CMD >&2 56 | eval $CMD 57 | } 58 | 59 | merge "$@" 60 | -------------------------------------------------------------------------------- /sh/bbrename.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #For more information, please see rename.sh 4 | #This exists for people who type bbrename.sh instead of rename.sh 5 | 6 | pushd . > /dev/null 7 | DIR="${BASH_SOURCE[0]}" 8 | while [ -h "$DIR" ]; do 9 | cd "$(dirname "$DIR")" 10 | DIR="$(readlink "$(basename "$DIR")")" 11 | done 12 | cd "$(dirname "$DIR")" 13 | DIR="$(pwd)/" 14 | popd > /dev/null 15 | 16 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 17 | 18 | "$DIR"rename.sh $@ 19 | -------------------------------------------------------------------------------- /sh/bbsketch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #For more information, please see sketch.sh 4 | #This exists for people who type bbsketch.sh instead of sketch.sh 5 | #I haven't decided which one will be the canonical version. 6 | 7 | pushd . > /dev/null 8 | DIR="${BASH_SOURCE[0]}" 9 | while [ -h "$DIR" ]; do 10 | cd "$(dirname "$DIR")" 11 | DIR="$(readlink "$(basename "$DIR")")" 12 | done 13 | cd "$(dirname "$DIR")" 14 | DIR="$(pwd)/" 15 | popd > /dev/null 16 | 17 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 18 | 19 | "$DIR"sketch.sh $@ 20 | -------------------------------------------------------------------------------- /sh/bbstats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #For more information, please see stats.sh 4 | #This exists for people who type bbstats.sh instead of stats.sh 5 | 6 | pushd . > /dev/null 7 | DIR="${BASH_SOURCE[0]}" 8 | while [ -h "$DIR" ]; do 9 | cd "$(dirname "$DIR")" 10 | DIR="$(readlink "$(basename "$DIR")")" 11 | done 12 | cd "$(dirname "$DIR")" 13 | DIR="$(pwd)/" 14 | popd > /dev/null 15 | 16 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 17 | 18 | "$DIR"stats.sh $@ 19 | -------------------------------------------------------------------------------- /sh/bbversion.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified May 4, 2017 7 | 8 | Description: Prints the BBTools version number. 9 | Add an argument to print the version name too. 10 | 11 | Usage: bbversion.sh 12 | " 13 | } 14 | 15 | pushd . > /dev/null 16 | DIR="${BASH_SOURCE[0]}" 17 | while [ -h "$DIR" ]; do 18 | cd "$(dirname "$DIR")" 19 | DIR="$(readlink "$(basename "$DIR")")" 20 | done 21 | cd "$(dirname "$DIR")" 22 | DIR="$(pwd)/" 23 | popd > /dev/null 24 | 25 | CP="$DIR""current/" 26 | 27 | bbversion() { 28 | local CMD="java -Xmx80m -cp $CP driver.BBVersion $@" 29 | eval $CMD 30 | } 31 | 32 | bbversion "$@" 33 | -------------------------------------------------------------------------------- /sh/comparegff.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified August 12, 2019 7 | 8 | Description: Compares CDS, rRNA, and tRNA lines in gff files. 9 | 10 | Usage: comparegff.sh in= ref= 11 | 12 | Standard parameters: 13 | in= Query gff. 14 | ref= Reference gff. 15 | 16 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 17 | " 18 | } 19 | 20 | #This block allows symlinked shellscripts to correctly set classpath. 21 | pushd . > /dev/null 22 | DIR="${BASH_SOURCE[0]}" 23 | while [ -h "$DIR" ]; do 24 | cd "$(dirname "$DIR")" 25 | DIR="$(readlink "$(basename "$DIR")")" 26 | done 27 | cd "$(dirname "$DIR")" 28 | DIR="$(pwd)/" 29 | popd > /dev/null 30 | 31 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 32 | CP="$DIR""current/" 33 | 34 | z="-Xmx1g" 35 | z2="-Xms1g" 36 | set=0 37 | 38 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 39 | usage 40 | exit 41 | fi 42 | 43 | calcXmx () { 44 | source "$DIR""/calcmem.sh" 45 | setEnvironment 46 | parseXmx "$@" 47 | if [[ $set == 1 ]]; then 48 | return 49 | fi 50 | freeRam 1000m 42 51 | z="-Xmx${RAM}m" 52 | z2="-Xms${RAM}m" 53 | } 54 | calcXmx "$@" 55 | 56 | comparegff() { 57 | local CMD="java $EA $EOOM $z -cp $CP gff.CompareGff $@" 58 | echo $CMD >&2 59 | eval $CMD 60 | } 61 | 62 | comparegff "$@" 63 | -------------------------------------------------------------------------------- /sh/countgc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified January 21, 2015 7 | 8 | Description: Counts GC content of reads or scaffolds. 9 | 10 | Usage: countgc in= out= format= 11 | 12 | Input may be stdin or a fasta or fastq file, compressed or uncompressed. 13 | Output (which is optional) is tab-delimited. 14 | format=1: name length A C G T N 15 | format=2: name GC 16 | format=4: name length GC 17 | Note that in format 1, A+C+G+T=1 even when N is nonzero. 18 | 19 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 20 | " 21 | } 22 | 23 | #This block allows symlinked shellscripts to correctly set classpath. 24 | pushd . > /dev/null 25 | DIR="${BASH_SOURCE[0]}" 26 | while [ -h "$DIR" ]; do 27 | cd "$(dirname "$DIR")" 28 | DIR="$(readlink "$(basename "$DIR")")" 29 | done 30 | cd "$(dirname "$DIR")" 31 | DIR="$(pwd)/" 32 | popd > /dev/null 33 | 34 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 35 | CP="$DIR""current/" 36 | 37 | z="-Xmx120m" 38 | set=0 39 | 40 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 41 | usage 42 | exit 43 | fi 44 | 45 | calcXmx () { 46 | source "$DIR""/calcmem.sh" 47 | setEnvironment 48 | parseXmx "$@" 49 | } 50 | calcXmx "$@" 51 | 52 | countgc() { 53 | local CMD="java $EA $EOOM $z -cp $CP jgi.CountGC $@" 54 | echo $CMD >&2 55 | eval $CMD 56 | } 57 | 58 | countgc "$@" 59 | -------------------------------------------------------------------------------- /sh/crossblock.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #For usage information, please see decontaminate.sh 4 | 5 | function crossblock(){ 6 | CMD="decontaminate.sh $@" 7 | eval $CMD 8 | } 9 | 10 | crossblock "$@" 11 | 12 | -------------------------------------------------------------------------------- /sh/current: -------------------------------------------------------------------------------- 1 | ../current -------------------------------------------------------------------------------- /sh/ecc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #ecc.sh in= out= 3 | 4 | usage(){ 5 | echo " 6 | Description: Corrects substitution errors in reads using kmer depth information. 7 | Can also normalize and/or bin reads by kmer depth. 8 | 9 | Usage: ecc.sh in= out= outt= hist= 10 | 11 | Please see bbnorm.sh for more information. 12 | All the flags are the same, only the parameters (near the bottom of this file) differ. 13 | " 14 | } 15 | 16 | pushd . > /dev/null 17 | DIR="${BASH_SOURCE[0]}" 18 | while [ -h "$DIR" ]; do 19 | cd "$(dirname "$DIR")" 20 | DIR="$(readlink "$(basename "$DIR")")" 21 | done 22 | cd "$(dirname "$DIR")" 23 | DIR="$(pwd)/" 24 | popd > /dev/null 25 | 26 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 27 | CP="$DIR""current/" 28 | 29 | z="-Xmx31g" 30 | z2="-Xms31g" 31 | EA="-ea" 32 | set=0 33 | 34 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 35 | usage 36 | exit 37 | fi 38 | 39 | calcXmx () { 40 | source "$DIR""/calcmem.sh" 41 | parseXmx "$@" 42 | if [[ $set == 1 ]]; then 43 | return 44 | fi 45 | freeRam 31000m 84 46 | z="-Xmx${RAM}m" 47 | z2="-Xms${RAM}m" 48 | } 49 | calcXmx "$@" 50 | 51 | correct() { 52 | if [[ $NERSC_HOST == genepool ]]; then 53 | module unload oracle-jdk 54 | module load oracle-jdk/1.7_64bit 55 | module load pigz 56 | fi 57 | local CMD="java $EA $z $z2 -cp $CP jgi.KmerNormalize bits=16 ecc=t passes=1 keepall dr=f prefilter $@" 58 | echo $CMD >&2 59 | eval $CMD 60 | } 61 | 62 | correct "$@" 63 | -------------------------------------------------------------------------------- /sh/gbff2gff.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified August 14, 2019 7 | 8 | Description: Generates a GFF3 from a GBFF. 9 | Only for features I care about though. 10 | 11 | Usage: gbff2gff.sh 12 | " 13 | } 14 | 15 | #This block allows symlinked shellscripts to correctly set classpath. 16 | pushd . > /dev/null 17 | DIR="${BASH_SOURCE[0]}" 18 | while [ -h "$DIR" ]; do 19 | cd "$(dirname "$DIR")" 20 | DIR="$(readlink "$(basename "$DIR")")" 21 | done 22 | cd "$(dirname "$DIR")" 23 | DIR="$(pwd)/" 24 | popd > /dev/null 25 | 26 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 27 | CP="$DIR""current/" 28 | 29 | z="-Xmx1g" 30 | set=0 31 | 32 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 33 | usage 34 | exit 35 | fi 36 | 37 | calcXmx () { 38 | source "$DIR""/calcmem.sh" 39 | setEnvironment 40 | parseXmx "$@" 41 | } 42 | calcXmx "$@" 43 | 44 | gff() { 45 | local CMD="java $EA $EOOM $z -cp $CP gff.GbffFile $@" 46 | echo $CMD >&2 47 | eval $CMD 48 | } 49 | 50 | gff "$@" 51 | -------------------------------------------------------------------------------- /sh/getreads.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified February 17, 2015 7 | 8 | Description: Selects reads with designated numeric IDs. 9 | 10 | Usage: getreads.sh in= id= out= 11 | 12 | The first read (or pair) has ID 0, the second read (or pair) has ID 1, etc. 13 | 14 | Parameters: 15 | in= Specify the input file, or stdin. 16 | out= Specify the output file, or stdout. 17 | id= Comma delimited list of numbers or ranges, in any order. 18 | For example: id=5,93,17-31,8,0,12-13 19 | 20 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 21 | " 22 | } 23 | 24 | #This block allows symlinked shellscripts to correctly set classpath. 25 | pushd . > /dev/null 26 | DIR="${BASH_SOURCE[0]}" 27 | while [ -h "$DIR" ]; do 28 | cd "$(dirname "$DIR")" 29 | DIR="$(readlink "$(basename "$DIR")")" 30 | done 31 | cd "$(dirname "$DIR")" 32 | DIR="$(pwd)/" 33 | popd > /dev/null 34 | 35 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 36 | CP="$DIR""current/" 37 | 38 | z="-Xmx200m" 39 | set=0 40 | 41 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 42 | usage 43 | exit 44 | fi 45 | 46 | calcXmx () { 47 | source "$DIR""/calcmem.sh" 48 | setEnvironment 49 | parseXmx "$@" 50 | } 51 | calcXmx "$@" 52 | 53 | function tf() { 54 | local CMD="java $EA $EOOM $z -cp $CP jgi.GetReads $@" 55 | echo $CMD >&2 56 | eval $CMD 57 | } 58 | 59 | tf "$@" -------------------------------------------------------------------------------- /sh/grademerge.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified February 17, 2015 7 | 8 | Description: Grades correctness of merging synthetic reads with headers 9 | generated by RandomReads and re-headered by RenameReads. 10 | 11 | Usage: grademerge.sh in= 12 | 13 | Parameters: 14 | in= Specify the input file, or 'stdin'. 15 | 16 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 17 | " 18 | } 19 | 20 | #This block allows symlinked shellscripts to correctly set classpath. 21 | pushd . > /dev/null 22 | DIR="${BASH_SOURCE[0]}" 23 | while [ -h "$DIR" ]; do 24 | cd "$(dirname "$DIR")" 25 | DIR="$(readlink "$(basename "$DIR")")" 26 | done 27 | cd "$(dirname "$DIR")" 28 | DIR="$(pwd)/" 29 | popd > /dev/null 30 | 31 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 32 | CP="$DIR""current/" 33 | set=0 34 | 35 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 36 | usage 37 | exit 38 | fi 39 | 40 | function grademerge() { 41 | local CMD="java $EA $EOOM -Xmx200m -cp $CP jgi.GradeMergedReads $@" 42 | # echo $CMD >&2 43 | eval $CMD 44 | } 45 | 46 | grademerge "$@" 47 | -------------------------------------------------------------------------------- /sh/icecreamgrader.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified January 21, 2020 7 | 8 | Description: Counts the rate of triangle reads in a file 9 | generated by IceCreamMaker with custom headers. 10 | 11 | Usage: icecreamgrader.sh in= 12 | 13 | Standard parameters: 14 | in= Reads to grade. 15 | 16 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 17 | " 18 | } 19 | 20 | #This block allows symlinked shellscripts to correctly set classpath. 21 | pushd . > /dev/null 22 | DIR="${BASH_SOURCE[0]}" 23 | while [ -h "$DIR" ]; do 24 | cd "$(dirname "$DIR")" 25 | DIR="$(readlink "$(basename "$DIR")")" 26 | done 27 | cd "$(dirname "$DIR")" 28 | DIR="$(pwd)/" 29 | popd > /dev/null 30 | 31 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 32 | CP="$DIR""current/" 33 | 34 | z="-Xmx200m" 35 | z2="-Xms200m" 36 | set=0 37 | 38 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 39 | usage 40 | exit 41 | fi 42 | 43 | calcXmx () { 44 | source "$DIR""/calcmem.sh" 45 | setEnvironment 46 | parseXmx "$@" 47 | } 48 | calcXmx "$@" 49 | 50 | icecreamgrader() { 51 | local CMD="java $EA $EOOM $z -cp $CP icecream.IceCreamGrader $@" 52 | echo $CMD >&2 53 | eval $CMD 54 | } 55 | 56 | icecreamgrader "$@" 57 | -------------------------------------------------------------------------------- /sh/khist.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Description: Generates a histogram of kmer counts for the input reads or assemblies. 6 | 7 | Usage: khist.sh in= hist= 8 | 9 | Please see bbnorm.sh for more information. 10 | All the flags are the same, only the parameters (near the bottom of this file) differ. 11 | " 12 | } 13 | 14 | #This block allows symlinked shellscripts to correctly set classpath. 15 | pushd . > /dev/null 16 | DIR="${BASH_SOURCE[0]}" 17 | while [ -h "$DIR" ]; do 18 | cd "$(dirname "$DIR")" 19 | DIR="$(readlink "$(basename "$DIR")")" 20 | done 21 | cd "$(dirname "$DIR")" 22 | DIR="$(pwd)/" 23 | popd > /dev/null 24 | 25 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 26 | CP="$DIR""current/" 27 | 28 | z="-Xmx31g" 29 | z2="-Xms31g" 30 | set=0 31 | 32 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 33 | usage 34 | exit 35 | fi 36 | 37 | calcXmx () { 38 | source "$DIR""/calcmem.sh" 39 | setEnvironment 40 | parseXmx "$@" 41 | if [[ $set == 1 ]]; then 42 | return 43 | fi 44 | freeRam 31000m 84 45 | z="-Xmx${RAM}m" 46 | z2="-Xms${RAM}m" 47 | } 48 | calcXmx "$@" 49 | 50 | khist() { 51 | local CMD="java $EA $EOOM $z $z2 -cp $CP jgi.KmerNormalize bits=32 ecc=f passes=1 keepall dr=f prefilter hist=stdout minprob=0 minqual=0 mindepth=0 minkmers=1 hashes=3 $@" 52 | echo $CMD >&2 53 | eval $CMD 54 | } 55 | 56 | khist "$@" 57 | -------------------------------------------------------------------------------- /sh/mergeOTUs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified January 21, 2015 7 | 8 | Description: Merges coverage stats lines (from pileup) for the same OTU, 9 | according to some custom naming scheme. 10 | 11 | Usage: mergeOTUs.sh in= out= 12 | 13 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 14 | " 15 | } 16 | 17 | #This block allows symlinked shellscripts to correctly set classpath. 18 | pushd . > /dev/null 19 | DIR="${BASH_SOURCE[0]}" 20 | while [ -h "$DIR" ]; do 21 | cd "$(dirname "$DIR")" 22 | DIR="$(readlink "$(basename "$DIR")")" 23 | done 24 | cd "$(dirname "$DIR")" 25 | DIR="$(pwd)/" 26 | popd > /dev/null 27 | 28 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 29 | CP="$DIR""current/" 30 | 31 | z="-Xmx1g" 32 | set=0 33 | 34 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 35 | usage 36 | exit 37 | fi 38 | 39 | calcXmx () { 40 | source "$DIR""/calcmem.sh" 41 | setEnvironment 42 | parseXmx "$@" 43 | } 44 | calcXmx "$@" 45 | 46 | function mergeOTUs() { 47 | local CMD="java $EA $EOOM $z -cp $CP driver.MergeCoverageOTU $@" 48 | echo $CMD >&2 49 | eval $CMD 50 | } 51 | 52 | mergeOTUs "$@" 53 | -------------------------------------------------------------------------------- /sh/mergesam.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified March 8, 2017 7 | 8 | Description: Concatenates sam files, keeping only the header from the first. 9 | 10 | Usage: mergesam.sh out= 11 | 12 | Java Parameters: 13 | -da Disable assertions. 14 | 15 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 16 | " 17 | } 18 | 19 | #This block allows symlinked shellscripts to correctly set classpath. 20 | pushd . > /dev/null 21 | DIR="${BASH_SOURCE[0]}" 22 | while [ -h "$DIR" ]; do 23 | cd "$(dirname "$DIR")" 24 | DIR="$(readlink "$(basename "$DIR")")" 25 | done 26 | cd "$(dirname "$DIR")" 27 | DIR="$(pwd)/" 28 | popd > /dev/null 29 | 30 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 31 | CP="$DIR""current/" 32 | 33 | z="-Xmx400m" 34 | set=0 35 | 36 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 37 | usage 38 | exit 39 | fi 40 | 41 | calcXmx () { 42 | source "$DIR""/calcmem.sh" 43 | setEnvironment 44 | parseXmx "$@" 45 | } 46 | calcXmx "$@" 47 | 48 | function mergesam() { 49 | local CMD="java $EA $EOOM $z -cp $CP jgi.MergeSam $@" 50 | echo $CMD >&2 51 | eval $CMD 52 | } 53 | 54 | mergesam "$@" 55 | -------------------------------------------------------------------------------- /sh/printtime.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified January 21, 2015 7 | 8 | Description: Prints time elapsed since last called on the same file. 9 | 10 | Usage: printtime.sh 11 | 12 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 13 | " 14 | } 15 | 16 | #This block allows symlinked shellscripts to correctly set classpath. 17 | pushd . > /dev/null 18 | DIR="${BASH_SOURCE[0]}" 19 | while [ -h "$DIR" ]; do 20 | cd "$(dirname "$DIR")" 21 | DIR="$(readlink "$(basename "$DIR")")" 22 | done 23 | cd "$(dirname "$DIR")" 24 | DIR="$(pwd)/" 25 | popd > /dev/null 26 | 27 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 28 | CP="$DIR""current/" 29 | set=0 30 | 31 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 32 | usage 33 | exit 34 | fi 35 | 36 | calcXmx () { 37 | source "$DIR""/calcmem.sh" 38 | setEnvironment 39 | parseXmx "$@" 40 | } 41 | calcXmx "$@" 42 | 43 | function printtime() { 44 | local CMD="java $EA $EOOM -Xmx8m -cp $CP align2.PrintTime $@" 45 | echo $CMD >&2 46 | eval $CMD 47 | } 48 | 49 | printtime "$@" 50 | -------------------------------------------------------------------------------- /sh/processspeed.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified December 6, 2016 7 | 8 | Description: Summarizes results of Linux time command. 9 | 10 | Usage: processspeed.sh 11 | 12 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 13 | " 14 | } 15 | 16 | #This block allows symlinked shellscripts to correctly set classpath. 17 | pushd . > /dev/null 18 | DIR="${BASH_SOURCE[0]}" 19 | while [ -h "$DIR" ]; do 20 | cd "$(dirname "$DIR")" 21 | DIR="$(readlink "$(basename "$DIR")")" 22 | done 23 | cd "$(dirname "$DIR")" 24 | DIR="$(pwd)/" 25 | popd > /dev/null 26 | 27 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 28 | CP="$DIR""current/" 29 | 30 | z="-Xmx120m" 31 | set=0 32 | 33 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 34 | usage 35 | exit 36 | fi 37 | 38 | calcXmx () { 39 | source "$DIR""/calcmem.sh" 40 | setEnvironment 41 | parseXmx "$@" 42 | } 43 | calcXmx "$@" 44 | 45 | processspeed() { 46 | local CMD="java $EA $EOOM $z -cp $CP driver.ProcessSpeed2 $@" 47 | # echo $CMD >&2 48 | eval $CMD 49 | } 50 | 51 | processspeed "$@" 52 | -------------------------------------------------------------------------------- /sh/removesmartbell.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified May 2, 2017 7 | 8 | Description: Remove Smart Bell adapters from PacBio reads. 9 | 10 | Usage: removesmartbell in= out= split=t 11 | 12 | Input may be fasta or fastq, compressed or uncompressed (not H5 files). 13 | 14 | Parameters: 15 | in=file Specify the input file, or stdin. 16 | out=file Specify the output file, or stdout. 17 | adapter= Specify the adapter sequence (default is normal SmrtBell). 18 | split=t t: Splits reads at adapters. 19 | f: Masks adapters with X symbols. 20 | 21 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 22 | " 23 | } 24 | 25 | #This block allows symlinked shellscripts to correctly set classpath. 26 | pushd . > /dev/null 27 | DIR="${BASH_SOURCE[0]}" 28 | while [ -h "$DIR" ]; do 29 | cd "$(dirname "$DIR")" 30 | DIR="$(readlink "$(basename "$DIR")")" 31 | done 32 | cd "$(dirname "$DIR")" 33 | DIR="$(pwd)/" 34 | popd > /dev/null 35 | 36 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 37 | CP="$DIR""current/" 38 | 39 | z="-Xmx400m" 40 | set=0 41 | 42 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 43 | usage 44 | exit 45 | fi 46 | 47 | calcXmx () { 48 | source "$DIR""/calcmem.sh" 49 | setEnvironment 50 | parseXmx "$@" 51 | } 52 | calcXmx "$@" 53 | 54 | removesmartbell() { 55 | local CMD="java $EA $EOOM $z -cp $CP pacbio.RemoveAdapters2 $@" 56 | echo $CMD >&2 57 | eval $CMD 58 | } 59 | 60 | removesmartbell "$@" 61 | -------------------------------------------------------------------------------- /sh/rqcfilter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #For more information, please see rqcfilter2.sh 4 | #RQCFilter was deprecated and replaced with RQCFilter2 5 | 6 | pushd . > /dev/null 7 | DIR="${BASH_SOURCE[0]}" 8 | while [ -h "$DIR" ]; do 9 | cd "$(dirname "$DIR")" 10 | DIR="$(readlink "$(basename "$DIR")")" 11 | done 12 | cd "$(dirname "$DIR")" 13 | DIR="$(pwd)/" 14 | popd > /dev/null 15 | 16 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 17 | 18 | "$DIR"rqcfilter2.sh $@ 19 | -------------------------------------------------------------------------------- /sh/runhmm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified August 5, 2020 7 | 8 | Description: Processes data. (WIP) 9 | 10 | Usage: runhmm.sh in= out= 11 | 12 | Parameters and their defaults: 13 | 14 | ow=f (overwrite) Overwrites files that already exist. 15 | 16 | Processing Parameters: 17 | 18 | None yet! 19 | 20 | 21 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 22 | " 23 | } 24 | 25 | #This block allows symlinked shellscripts to correctly set classpath. 26 | pushd . > /dev/null 27 | DIR="${BASH_SOURCE[0]}" 28 | while [ -h "$DIR" ]; do 29 | cd "$(dirname "$DIR")" 30 | DIR="$(readlink "$(basename "$DIR")")" 31 | done 32 | cd "$(dirname "$DIR")" 33 | DIR="$(pwd)/" 34 | popd > /dev/null 35 | 36 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 37 | CP="$DIR""current/" 38 | 39 | z="-Xmx300m" 40 | z="-Xms300m" 41 | set=0 42 | 43 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 44 | usage 45 | exit 46 | fi 47 | 48 | calcXmx () { 49 | source "$DIR""/calcmem.sh" 50 | setEnvironment 51 | parseXmx "$@" 52 | } 53 | calcXmx "$@" 54 | 55 | function runhmm() { 56 | local CMD="java $EA $EOOM $z $z2 -cp $CP hmm.HMMSearchReport $@" 57 | echo $CMD >&2 58 | eval $CMD 59 | } 60 | 61 | runhmm "$@" 62 | -------------------------------------------------------------------------------- /sh/splitsam.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified February 9, 2015 7 | 8 | Description: Splits a sam file into three files: 9 | Plus-mapped reads, Minus-mapped reads, and Unmapped. 10 | If 'header' is the 5th argument, header lines will be included. 11 | 12 | Usage: splitsam 13 | 14 | Input may be stdin or a sam file, raw or gzipped. 15 | Outputs must be sam files, and may be gzipped. 16 | " 17 | } 18 | 19 | #This block allows symlinked shellscripts to correctly set classpath. 20 | pushd . > /dev/null 21 | DIR="${BASH_SOURCE[0]}" 22 | while [ -h "$DIR" ]; do 23 | cd "$(dirname "$DIR")" 24 | DIR="$(readlink "$(basename "$DIR")")" 25 | done 26 | cd "$(dirname "$DIR")" 27 | DIR="$(pwd)/" 28 | popd > /dev/null 29 | 30 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 31 | CP="$DIR""current/" 32 | set=0 33 | 34 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 35 | usage 36 | exit 37 | fi 38 | 39 | calcXmx () { 40 | source "$DIR""/calcmem.sh" 41 | setEnvironment 42 | parseXmx "$@" 43 | } 44 | calcXmx "$@" 45 | 46 | function splitsam() { 47 | local CMD="java $EA $EOOM -Xmx128m -cp $CP jgi.SplitSamFile $@" 48 | echo $CMD 49 | eval $CMD 50 | } 51 | 52 | splitsam "$@" 53 | -------------------------------------------------------------------------------- /sh/splitsam4way.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified January 21, 2015 7 | 8 | Description: Splits sam reads into 4 output files depending on mapping. 9 | 10 | Usage: splitsam4way.sh 11 | 12 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 13 | " 14 | } 15 | 16 | #This block allows symlinked shellscripts to correctly set classpath. 17 | pushd . > /dev/null 18 | DIR="${BASH_SOURCE[0]}" 19 | while [ -h "$DIR" ]; do 20 | cd "$(dirname "$DIR")" 21 | DIR="$(readlink "$(basename "$DIR")")" 22 | done 23 | cd "$(dirname "$DIR")" 24 | DIR="$(pwd)/" 25 | popd > /dev/null 26 | 27 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 28 | CP="$DIR""current/" 29 | set=0 30 | 31 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 32 | usage 33 | exit 34 | fi 35 | 36 | calcXmx () { 37 | source "$DIR""/calcmem.sh" 38 | setEnvironment 39 | parseXmx "$@" 40 | } 41 | calcXmx "$@" 42 | 43 | function split() { 44 | local CMD="java $EA $EOOM -Xmx128m -Xms128m -cp $CP jgi.SplitSam4Way $@" 45 | echo $CMD >&2 46 | eval $CMD 47 | } 48 | 49 | split "$@" 50 | -------------------------------------------------------------------------------- /sh/splitsam6way.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified June 15, 2017 7 | 8 | Description: Splits sam reads into 6 output files depending on mapping. 9 | 10 | Usage: splitsam6way.sh 11 | 12 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 13 | " 14 | } 15 | 16 | #This block allows symlinked shellscripts to correctly set classpath. 17 | pushd . > /dev/null 18 | DIR="${BASH_SOURCE[0]}" 19 | while [ -h "$DIR" ]; do 20 | cd "$(dirname "$DIR")" 21 | DIR="$(readlink "$(basename "$DIR")")" 22 | done 23 | cd "$(dirname "$DIR")" 24 | DIR="$(pwd)/" 25 | popd > /dev/null 26 | 27 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 28 | CP="$DIR""current/" 29 | set=0 30 | 31 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 32 | usage 33 | exit 34 | fi 35 | 36 | calcXmx () { 37 | source "$DIR""/calcmem.sh" 38 | setEnvironment 39 | parseXmx "$@" 40 | } 41 | calcXmx "$@" 42 | 43 | function split() { 44 | local CMD="java $EA $EOOM -Xmx128m -Xms128m -cp $CP jgi.SplitSam6Way $@" 45 | echo $CMD >&2 46 | eval $CMD 47 | } 48 | 49 | split "$@" 50 | -------------------------------------------------------------------------------- /sh/summarizemerge.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified June 6, 2016 7 | 8 | Description: Summarizes the output of GradeMerge for comparing 9 | read-merging performance. 10 | 11 | Usage: summarizemerge.sh in= 12 | 13 | Parameters: 14 | in= A file containing GradeMerge output. 15 | 16 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 17 | " 18 | } 19 | 20 | #This block allows symlinked shellscripts to correctly set classpath. 21 | pushd . > /dev/null 22 | DIR="${BASH_SOURCE[0]}" 23 | while [ -h "$DIR" ]; do 24 | cd "$(dirname "$DIR")" 25 | DIR="$(readlink "$(basename "$DIR")")" 26 | done 27 | cd "$(dirname "$DIR")" 28 | DIR="$(pwd)/" 29 | popd > /dev/null 30 | 31 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 32 | CP="$DIR""current/" 33 | 34 | z="-Xmx120m" 35 | set=0 36 | 37 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 38 | usage 39 | exit 40 | fi 41 | 42 | calcXmx () { 43 | source "$DIR""/calcmem.sh" 44 | setEnvironment 45 | parseXmx "$@" 46 | } 47 | calcXmx "$@" 48 | 49 | summarizemerge() { 50 | local CMD="java $EA $EOOM $z -cp $CP driver.ProcessSpeed $@" 51 | # echo $CMD >&2 52 | eval $CMD 53 | } 54 | 55 | summarizemerge "$@" 56 | -------------------------------------------------------------------------------- /sh/summarizequast.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified October 17, 2016 7 | 8 | Description: Summarizes the output of multiple Quast reports for 9 | making box plots. 10 | 11 | Usage: summarizequast.sh */quast/report.tsv 12 | 13 | Parameters: 14 | out=stdout Destination for summary. 15 | required= A required substring in assembly names for filtering. 16 | normalize=t Normalize each metric to the average per report. 17 | box=t Print only 5 points per metric for box plots. 18 | 19 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 20 | " 21 | } 22 | 23 | #This block allows symlinked shellscripts to correctly set classpath. 24 | pushd . > /dev/null 25 | DIR="${BASH_SOURCE[0]}" 26 | while [ -h "$DIR" ]; do 27 | cd "$(dirname "$DIR")" 28 | DIR="$(readlink "$(basename "$DIR")")" 29 | done 30 | cd "$(dirname "$DIR")" 31 | DIR="$(pwd)/" 32 | popd > /dev/null 33 | 34 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 35 | CP="$DIR""current/" 36 | 37 | z="-Xmx400m" 38 | set=0 39 | 40 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 41 | usage 42 | exit 43 | fi 44 | 45 | calcXmx () { 46 | source "$DIR""/calcmem.sh" 47 | setEnvironment 48 | parseXmx "$@" 49 | } 50 | calcXmx "$@" 51 | 52 | summarizequast() { 53 | local CMD="java $EA $EOOM $z -cp $CP driver.SummarizeQuast $@" 54 | # echo $CMD >&2 55 | eval $CMD 56 | } 57 | 58 | summarizequast "$@" 59 | -------------------------------------------------------------------------------- /sh/testfilesystem.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified December 11, 2017 7 | 8 | Description: Logs filesystem performance by creating, deleting, 9 | and copying files. 10 | 11 | Usage: testfilesystem.sh 12 | 13 | 'in' should contain the # symbol if ways>1. 14 | 15 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 16 | " 17 | } 18 | 19 | 20 | #This block allows symlinked shellscripts to correctly set classpath. 21 | pushd . > /dev/null 22 | DIR="${BASH_SOURCE[0]}" 23 | while [ -h "$DIR" ]; do 24 | cd "$(dirname "$DIR")" 25 | DIR="$(readlink "$(basename "$DIR")")" 26 | done 27 | cd "$(dirname "$DIR")" 28 | DIR="$(pwd)/" 29 | popd > /dev/null 30 | 31 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 32 | CP="$DIR""current/" 33 | 34 | z="-Xmx50m" 35 | set=0 36 | 37 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 38 | usage 39 | exit 40 | fi 41 | 42 | calcXmx () { 43 | source "$DIR""/calcmem.sh" 44 | setEnvironment 45 | parseXmx "$@" 46 | } 47 | calcXmx "$@" 48 | 49 | function testfs() { 50 | local CMD="java $EA $EOOM $z -cp $CP jgi.TestFilesystem $@" 51 | echo $CMD >&2 52 | eval $CMD 53 | } 54 | 55 | testfs "$@" 56 | -------------------------------------------------------------------------------- /sh/testformat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified August 4, 2016 7 | 8 | Description: Tests file extensions and contents to determine format, 9 | quality, compression, interleaving, and read length. More than one file 10 | may be specified. Note that ASCII-33 (sanger) and ASCII-64 11 | (old Illumina/Solexa) cannot always be differentiated. 12 | 13 | Usage: testformat.sh 14 | 15 | See also: testformat2.sh, stats.sh 16 | 17 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 18 | " 19 | } 20 | 21 | #This block allows symlinked shellscripts to correctly set classpath. 22 | pushd . > /dev/null 23 | DIR="${BASH_SOURCE[0]}" 24 | while [ -h "$DIR" ]; do 25 | cd "$(dirname "$DIR")" 26 | DIR="$(readlink "$(basename "$DIR")")" 27 | done 28 | cd "$(dirname "$DIR")" 29 | DIR="$(pwd)/" 30 | popd > /dev/null 31 | 32 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 33 | CP="$DIR""current/" 34 | 35 | z="-Xmx120m" 36 | set=0 37 | 38 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 39 | usage 40 | exit 41 | fi 42 | 43 | calcXmx () { 44 | source "$DIR""/calcmem.sh" 45 | setEnvironment 46 | parseXmx "$@" 47 | } 48 | calcXmx "$@" 49 | 50 | testformat() { 51 | local CMD="java $EA $EOOM $z -cp $CP fileIO.FileFormat $@" 52 | # echo $CMD >&2 53 | eval $CMD 54 | } 55 | 56 | testformat "$@" 57 | -------------------------------------------------------------------------------- /sh/textfile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified February 17, 2015 7 | 8 | Description: Displays contents of a text file. 9 | Start line and stop line are zero-based. Start is inclusive, 10 | stop is exclusive. 11 | 12 | Usage: textfile.sh 13 | 14 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 15 | " 16 | } 17 | 18 | #This block allows symlinked shellscripts to correctly set classpath. 19 | pushd . > /dev/null 20 | DIR="${BASH_SOURCE[0]}" 21 | while [ -h "$DIR" ]; do 22 | cd "$(dirname "$DIR")" 23 | DIR="$(readlink "$(basename "$DIR")")" 24 | done 25 | cd "$(dirname "$DIR")" 26 | DIR="$(pwd)/" 27 | popd > /dev/null 28 | 29 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 30 | CP="$DIR""current/" 31 | set=0 32 | 33 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 34 | usage 35 | exit 36 | fi 37 | 38 | calcXmx () { 39 | source "$DIR""/calcmem.sh" 40 | setEnvironment 41 | parseXmx "$@" 42 | } 43 | calcXmx "$@" 44 | 45 | function tf() { 46 | local CMD="java $EA $EOOM -Xmx120m -cp $CP fileIO.TextFile $@" 47 | echo $CMD >&2 48 | eval $CMD 49 | } 50 | 51 | tf "$@" 52 | -------------------------------------------------------------------------------- /sh/unicode2ascii.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified October 17, 2017 7 | 8 | Description: Replaces unicode and control characters with printable ascii characters. 9 | WARNING - this does not work in many cases, and is not recommended! 10 | It is only retained because there is some situation in which it is needed. 11 | 12 | Usage: unicode2ascii.sh in= out= 13 | 14 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 15 | " 16 | } 17 | 18 | #This block allows symlinked shellscripts to correctly set classpath. 19 | pushd . > /dev/null 20 | DIR="${BASH_SOURCE[0]}" 21 | while [ -h "$DIR" ]; do 22 | cd "$(dirname "$DIR")" 23 | DIR="$(readlink "$(basename "$DIR")")" 24 | done 25 | cd "$(dirname "$DIR")" 26 | DIR="$(pwd)/" 27 | popd > /dev/null 28 | 29 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 30 | CP="$DIR""current/" 31 | 32 | z="-Xmx200m" 33 | set=0 34 | 35 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 36 | usage 37 | exit 38 | fi 39 | 40 | calcXmx () { 41 | source "$DIR""/calcmem.sh" 42 | setEnvironment 43 | parseXmx "$@" 44 | } 45 | calcXmx "$@" 46 | 47 | function unicode2ascii() { 48 | local CMD="java $EA $EOOM $z -cp $CP jgi.UnicodeToAscii $@" 49 | echo $CMD >&2 50 | eval $CMD 51 | } 52 | 53 | unicode2ascii "$@" 54 | -------------------------------------------------------------------------------- /sh/unzip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified April 25, 2019 7 | 8 | Description: Compresses or decompresses files based on extensions. 9 | This only exists because the syntax and default behavior of many 10 | compression utilities is unintuitive; it is just a wrapper, and 11 | relies on existing executables in the command line (pigz, lbzip, etc.) 12 | Does not delete the input file. 13 | Does not untar files. 14 | 15 | Usage: unzip.sh in= out= 16 | 17 | Parameters: 18 | in= Input file. 19 | out= Output file for good reads. 20 | zl= Set the compression level; 0-9 or 11. 21 | 22 | Please contact Brian Bushnell at bbushnell@lbl.gov if you encounter any problems. 23 | " 24 | } 25 | 26 | #This block allows symlinked shellscripts to correctly set classpath. 27 | pushd . > /dev/null 28 | DIR="${BASH_SOURCE[0]}" 29 | while [ -h "$DIR" ]; do 30 | cd "$(dirname "$DIR")" 31 | DIR="$(readlink "$(basename "$DIR")")" 32 | done 33 | cd "$(dirname "$DIR")" 34 | DIR="$(pwd)/" 35 | popd > /dev/null 36 | 37 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 38 | CP="$DIR""current/" 39 | 40 | z="-Xmx80m" 41 | set=0 42 | 43 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 44 | usage 45 | exit 46 | fi 47 | 48 | calcXmx () { 49 | source "$DIR""/calcmem.sh" 50 | setEnvironment 51 | parseXmx "$@" 52 | } 53 | calcXmx "$@" 54 | 55 | unzip() { 56 | local CMD="java $EA $EOOM $z -cp $CP jgi.Unzip $@" 57 | # echo $CMD >&2 58 | eval $CMD 59 | } 60 | 61 | unzip "$@" 62 | -------------------------------------------------------------------------------- /sh/upd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | mkdir t tt 2>/dev/null 4 | 5 | # extract BBMap tgz to ./t/ first. 6 | tar -xzv -C ./t --strip-components=1 -f $1 7 | 8 | rm -fr current && mv t/current . 9 | rm -fr docs && mv t/docs . 10 | rm -fr config && mv t/config . 11 | rm -fr jni && mv t/jni . 12 | rm -fr resources && mv t/resources . 13 | rm -fr sh && mkdir sh && mv t/*.sh sh/ 14 | mv t/build.xml . && mv t/license.txt . 15 | mv t/README.md tt/ 16 | 17 | sed -i.bak 's/^# *//g' tt/README.md 18 | cat tt/README.md 19 | ln -s ../current sh/ 20 | ls -la t tt sh/current 21 | 22 | git add sh docs resources current config jni 23 | 24 | echo git commit . -m "'Extract Version 3?.?? from `basename $1`'" 25 | echo git tag -a v35.85 -F tt/README.md 26 | # git push && git push --tags 27 | 28 | -------------------------------------------------------------------------------- /sh/vcf2gff.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | usage(){ 4 | echo " 5 | Written by Brian Bushnell 6 | Last modified August 13, 2019 7 | 8 | Description: Generates a GFF3 from a VCF. 9 | 10 | Usage: vcf2gff.sh in= out= 11 | 12 | Parameters: 13 | in= Input VCF file. 14 | out= Output GFF file. 15 | " 16 | } 17 | 18 | #This block allows symlinked shellscripts to correctly set classpath. 19 | pushd . > /dev/null 20 | DIR="${BASH_SOURCE[0]}" 21 | while [ -h "$DIR" ]; do 22 | cd "$(dirname "$DIR")" 23 | DIR="$(readlink "$(basename "$DIR")")" 24 | done 25 | cd "$(dirname "$DIR")" 26 | DIR="$(pwd)/" 27 | popd > /dev/null 28 | 29 | #DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/" 30 | CP="$DIR""current/" 31 | 32 | z="-Xmx200m" 33 | set=0 34 | 35 | if [ -z "$1" ] || [[ $1 == -h ]] || [[ $1 == --help ]]; then 36 | usage 37 | exit 38 | fi 39 | 40 | calcXmx () { 41 | source "$DIR""/calcmem.sh" 42 | setEnvironment 43 | parseXmx "$@" 44 | } 45 | calcXmx "$@" 46 | 47 | gff() { 48 | local CMD="java $EA $EOOM $z -cp $CP gff.GffLine $@" 49 | # echo $CMD >&2 50 | eval $CMD 51 | } 52 | 53 | gff "$@" 54 | --------------------------------------------------------------------------------