├── .classpath ├── .gitignore ├── .project ├── META-INF └── MANIFEST.MF ├── README.rdoc ├── build.xml ├── dist └── java-genomics-toolkit.jar ├── galaxy-conf ├── Add.xml ├── Autocorrelation.xml ├── Average.xml ├── BaseAlignCounts.xml ├── Correlate.xml ├── DNAPropertyCalculator.xml ├── Divide.xml ├── Downsample.xml ├── DynaPro.xml ├── ExtractDataFromRegion.xml ├── FastqIlluminaToSanger.xml ├── FindAbsoluteMaxima.xml ├── FindBoundaryNucleosomes.xml ├── FindNMers.xml ├── FindOutlierRegions.xml ├── GaussianSmooth.xml ├── GeneTrackToBedGraph.xml ├── GeneTrackToWig.xml ├── GreedyCaller.xml ├── InterpolateDiscontinuousData.xml ├── IntervalAverager.xml ├── IntervalLengthDistribution.xml ├── IntervalStats.xml ├── IntervalToBed.xml ├── IntervalToWig.xml ├── KMeans.xml ├── LogTransform.xml ├── MapDyads.xml ├── MatrixAligner.xml ├── MovingAverageSmooth.xml ├── Multiply.xml ├── PairOverlappingNucleosomes.xml ├── PercusDecomposition.xml ├── Phasogram.xml ├── PowerSpectrum.xml ├── ReadLengthDistributionMatrix.xml ├── RollingReadLength.xml ├── RomanNumeralize.xml ├── Scale.xml ├── Shift.xml ├── StripMatrix.xml ├── Subsample.xml ├── Subtract.xml ├── Summary.xml ├── ValueDistribution.xml ├── WaveletTransform.xml ├── ZScore.xml ├── galaxyToolRunner.sh ├── log4j.properties └── matrix2png.xml ├── galaxyToolConf.xml ├── lib ├── BigWig.jar ├── JLargeArrays-1.2.jar ├── JTransforms-3.0.jar ├── commons-lang3-3.1.jar ├── commons-math3-3.0.jar ├── dnaproperties-1732.jar ├── hamcrest-core-1.1.0.jar ├── java-genomics-io.jar ├── jcommander-1.27.jar ├── junit.jar ├── log4j-1.2.15.jar ├── picard-1.67.jar └── sam-1.67.jar ├── license.txt ├── log4j.properties ├── resources └── assemblies │ ├── ce10.len │ ├── ce2.len │ ├── ce3.len │ ├── ce4.len │ ├── ce5.len │ ├── ce6.len │ ├── ce7.len │ ├── ce8.len │ ├── ce9.len │ ├── dm1.len │ ├── dm2.len │ ├── dm3.len │ ├── hg15.len │ ├── hg16.len │ ├── hg17.len │ ├── hg18.len │ ├── hg19.len │ ├── hg19Haps.len │ ├── hg19Patch2.len │ ├── hg38.len │ ├── klac.len │ ├── kwal.len │ ├── sacCer1.len │ ├── sacCer2.len │ └── sacCer3.len ├── sam_fa_indices.loc.sample ├── src ├── edu │ └── unc │ │ ├── genomics │ │ ├── AssemblyConverter.java │ │ ├── AssemblyFactory.java │ │ ├── CommandLineTool.java │ │ ├── CommandLineToolException.java │ │ ├── KMeansRow.java │ │ ├── NucleosomeCall.java │ │ ├── NucleosomeCallsFileReader.java │ │ ├── PathConverter.java │ │ ├── PathFactory.java │ │ ├── ReadMapperTool.java │ │ ├── ReadablePathValidator.java │ │ ├── WigAnalysisTool.java │ │ ├── WigMathTool.java │ │ ├── converters │ │ │ ├── FastqIlluminaToSanger.java │ │ │ ├── GeneTrackToBedGraph.java │ │ │ ├── GeneTrackToWig.java │ │ │ ├── InterpolateDiscontinuousData.java │ │ │ ├── IntervalToBed.java │ │ │ ├── IntervalToWig.java │ │ │ └── RomanNumeralize.java │ │ ├── dna │ │ │ ├── DNAPropertyCalculator.java │ │ │ └── FindNMers.java │ │ ├── ngs │ │ │ ├── Autocorrelation.java │ │ │ ├── BaseAlignCounts.java │ │ │ ├── ExtractDataFromRegion.java │ │ │ ├── FilterRegions.java │ │ │ ├── FindAbsoluteMaxima.java │ │ │ ├── FindOutlierRegions.java │ │ │ ├── FragmentLengthDistributionByGene.java │ │ │ ├── IntervalLengthDistribution.java │ │ │ ├── IntervalStats.java │ │ │ ├── PowerSpectrum.java │ │ │ ├── ReadLengthDistributionMatrix.java │ │ │ ├── RollingReadLength.java │ │ │ ├── SplitReads.java │ │ │ ├── SplitWigIntervals.java │ │ │ ├── Subsample.java │ │ │ └── WaveletTransform.java │ │ ├── nucleosomes │ │ │ ├── DynaPro.java │ │ │ ├── FindBoundaryNucleosomes.java │ │ │ ├── GreedyCaller.java │ │ │ ├── IntervalEntropy.java │ │ │ ├── MapDyads.java │ │ │ ├── PairOverlappingNucleosomes.java │ │ │ ├── PercusDecomposition.java │ │ │ ├── Phasogram.java │ │ │ └── PredictDinucleosomes.java │ │ ├── visualization │ │ │ ├── IntervalAverager.java │ │ │ ├── KMeans.java │ │ │ ├── MatrixAligner.java │ │ │ └── StripMatrix.java │ │ └── wigmath │ │ │ ├── Add.java │ │ │ ├── Average.java │ │ │ ├── Correlate.java │ │ │ ├── Divide.java │ │ │ ├── Downsample.java │ │ │ ├── ExtractRegion.java │ │ │ ├── GaussianSmooth.java │ │ │ ├── LogTransform.java │ │ │ ├── MovingAverageSmooth.java │ │ │ ├── MovingEntropy.java │ │ │ ├── Multiply.java │ │ │ ├── Root.java │ │ │ ├── Scale.java │ │ │ ├── Shift.java │ │ │ ├── StandardDeviation.java │ │ │ ├── Subtract.java │ │ │ ├── Summary.java │ │ │ ├── ValueDistribution.java │ │ │ └── ZScore.java │ │ └── utils │ │ ├── ArrayScaler.java │ │ ├── ArrayUtils.java │ │ ├── FFTUtils.java │ │ ├── FloatCorrelation.java │ │ ├── FloatHistogram.java │ │ ├── RomanNumeral.java │ │ ├── Samtools.java │ │ ├── SequenceUtils.java │ │ ├── SortUtils.java │ │ └── WigStatistic.java └── log4j.properties ├── test-data ├── baseAlignCounts1.wig ├── bedGraphToWig.input.bedGraph ├── divide1.wig ├── divide2.wig ├── divide3.wig ├── downsample1.wig ├── downsample2.wig ├── downsample3.wig ├── gaussian1.wig ├── gaussian2.wig ├── gaussian3.wig ├── geneTrackToBedGraph.bedGraph ├── geneTrackToWig1.wig ├── geneTrackToWig2.wig ├── intervallengthdistribution1.txt ├── intervallengthdistribution2.txt ├── intervallengthdistribution3.txt ├── logger1.wig ├── logger2.wig ├── logger3.wig ├── logger4.wig ├── logger5.wig ├── logger6.wig ├── romanNumeralize.input ├── romanNumeralize.output ├── stripMatrix.txt ├── test.bed ├── test.fasta ├── test.fasta.fai ├── test.fastqillumina ├── test.fastqsanger ├── test.genetrack ├── test.matrix2png.txt ├── test2.bed ├── twist.normalized.wig ├── twist.wig ├── wigmath1.bw ├── wigmath1.wig ├── wigmath2.bw ├── wigmath2.wig ├── wigmath3.bw ├── wigmath3.wig ├── wigsummary1.txt ├── wigsummary2.txt ├── wigsummary3.txt ├── zscorer1.wig ├── zscorer2.wig ├── zscorer3.wig ├── zscorer4.wig ├── zscorer5.wig └── zscorer6.wig ├── test └── edu │ └── unc │ └── utils │ ├── ArrayScalerTest.java │ ├── ArrayUtilsTest.java │ ├── FloatCorrelationTest.java │ ├── FloatHistogramTest.java │ ├── RomanNumeralTest.java │ └── SortUtilsTest.java ├── toolRunner.bat ├── toolRunner.sh └── tool_data_table_conf.xml.sample /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | build 3 | reports 4 | docs 5 | *.class 6 | *.log 7 | .DS_Store 8 | *.wig.idx 9 | *.tbi 10 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | java-genomics-toolkit 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /META-INF/MANIFEST.MF: -------------------------------------------------------------------------------- 1 | Manifest-Version: 1.0 2 | Main-Class: edu.unc.genomics.GenomicsToolkit 3 | Bundle-ManifestVersion: 2 4 | Bundle-Name: java-genomics-toolkit 5 | Bundle-SymbolicName: java-genomics-toolkit 6 | Bundle-Version: 1.0.0 7 | Bundle-RequiredExecutionEnvironment: JavaSE-1.7 8 | Bundle-Description: This project provides tools for common genomic data processing. 9 | Bundle-DocURL: http://github.com/timpalpant/java-genomics-toolkit -------------------------------------------------------------------------------- /dist/java-genomics-toolkit.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/dist/java-genomics-toolkit.jar -------------------------------------------------------------------------------- /galaxy-conf/Add.xml: -------------------------------------------------------------------------------- 1 | 2 | multiple (Big)Wig files 3 | 4 | galaxyToolRunner.sh wigmath.Add -o $output 5 | #for $input in $inputs 6 | ${input.file} 7 | #end for 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | This tool will add all values in the specified Wig files base pair by base pair. 21 | 22 | .. class:: infomark 23 | 24 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /galaxy-conf/Autocorrelation.xml: -------------------------------------------------------------------------------- 1 | 2 | of data in a Wiggle file 3 | galaxyToolRunner.sh ngs.Autocorrelation -i $input -l $windows -m $max -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | This tool computes the unnormalized autocovariance_ of intervals of data in a Wig file. 16 | 17 | .. _autocovariance: http://en.wikipedia.org/wiki/Autocorrelation 18 | 19 | ----- 20 | 21 | **Syntax** 22 | 23 | - **Input data** is the genomic data on which to compute the autocorrelation. 24 | - **List of intervals:** The autocorrelation will be computed for each genomic interval specified in this list. 25 | - **Maximum shift:** In computing the autocorrelation, the data will be phase-shifted up to this limit. 26 | 27 | ----- 28 | 29 | .. class:: infomark 30 | 31 | **TIP:** For more information, see Wikipedia_ (right click to open this link in another window). 32 | 33 | .. _Wikipedia: http://en.wikipedia.org/wiki/Autocorrelation 34 | 35 | .. class:: infomark 36 | 37 | **TIP:** If your input data does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format. Similarly, the intervals must be in either Bed, BedGraph, or GFF format. 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /galaxy-conf/Average.xml: -------------------------------------------------------------------------------- 1 | 2 | multiple (Big)Wig files 3 | 4 | galaxyToolRunner.sh wigmath.Average -o $output $file1 $file2 5 | #for $input in $inputs 6 | ${input.file} 7 | #end for 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | This tool will average the values of the provided Wig files, base pair by base pair. 27 | 28 | .. class:: infomark 29 | 30 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /galaxy-conf/BaseAlignCounts.xml: -------------------------------------------------------------------------------- 1 | 2 | of sequencing reads 3 | galaxyToolRunner.sh ngs.BaseAlignCounts -i $input -a ${chromInfo} -x $X -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | This tool produces a new Wig file with the number of reads/intervals overlapping each base pair. Reads can be artificially extended to match known fragment lengths. If you wish to count the number of reads starting at each base pair, set the read extension to 1. If you wish to count the number of intervals overlapping each base pair, set the extension to -1. 16 | 17 | ----- 18 | 19 | .. class:: warningmark 20 | 21 | This tool requires sequencing reads in SAM, BAM, Bed, or BedGraph format. If you are artificially extending reads, ensure that the strand is set correctly in SAM, BAM, and Bed files. 22 | 23 | .. class:: warningmark 24 | 25 | Paired-end reads are considered to be the entire fragment (the distance from the 5' end of mate 1 to the 5' end of mate 2) if the extension is set to -1. 26 | 27 | .. class:: infomark 28 | 29 | If you would like to convert valued interval data (e.g. BedGraph files from microarrays) to Wig format, use the Converters -> Interval to Wig converter. 30 | 31 | .. class:: infomark 32 | 33 | **TIP:** If you are going to be using reads in SAM format for multiple analyses, it is often more efficient to first convert it into BAM format using NGS: SAM Tools -> SAM-to-BAM. 34 | 35 | ----- 36 | 37 | **Syntax** 38 | 39 | - **Sequencing reads** are mapped reads from a high-throughput sequencing experiment. 40 | - **In silico extension:** Reads will be artificially extended from their 5' end to be this length. 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /galaxy-conf/Divide.xml: -------------------------------------------------------------------------------- 1 | 2 | two (Big)Wig files 3 | galaxyToolRunner.sh wigmath.Divide -n $dividend -d $divisor -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | .. class:: infomark 32 | 33 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /galaxy-conf/Downsample.xml: -------------------------------------------------------------------------------- 1 | 2 | a (Big)Wig file 3 | galaxyToolRunner.sh wigmath.Downsample -i $input -m $metric -w $window -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | This tool can be used to reduce the resolution and file size of Wig files for easier upload to UCSC. Data is downsampled in non-overlapping windows starting from the beginning of each chromosome. Each window can be downsampled as the mean, minimum, maximum, total, or coverage of the original data. 43 | 44 | ----- 45 | 46 | **Downsampling Methods** 47 | 48 | - **Mean:** the arithmetic mean of the values in the original data window 49 | - **Minimum:** the least value in the original data window 50 | - **Maximum:** the greatest value in the original data window 51 | - **Coverage:** the fraction of bases with values in the original window 52 | - **Total:** the sum of all values in the original data window 53 | 54 | ----- 55 | 56 | .. class:: infomark 57 | 58 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /galaxy-conf/DynaPro.xml: -------------------------------------------------------------------------------- 1 | 2 | using DynaPro 3 | galaxyToolRunner.sh nucleosomes.DynaPro -i $input -n $N 4 | #if str( $mean ) != '' 5 | -m $mean 6 | #end if 7 | 8 | #if str( $variance ) != '' 9 | -v $variance 10 | #end if 11 | -o $output 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | .. class:: warningmark 25 | 26 | At present, this tool is only suitable for small genomes (yeast) since entire chromosomes must be loaded into memory. 27 | 28 | ----- 29 | 30 | Equilibrium nucleosome distribution is modeled as a one-dimensional fluid of hard rods adsorbing and moving within an external potential. This tool provides a simplified version of the DynaPro_ algorithm for a single factor interacting with hard-core repulsion. 31 | 32 | .. _DynaPro: http://nucleosome.rutgers.edu/nucleosome/ 33 | 34 | ----- 35 | 36 | **Syntax** 37 | 38 | - **Energy landscape** is the external potential function for each genomic base pair, and must be in Wig format. 39 | - **Nucleosome size** is the hard-core interaction size. 40 | 41 | ----- 42 | 43 | **Citation** 44 | 45 | Morozov AV, Fortney K, Gaykalova DA, Studitsky VM, Widom J and Siggia ED (2009) Using DNA mechanics to predict in vitro nucleosome positions and formation energies. Nucleic Acids Res 37: 4707–4722. 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /galaxy-conf/ExtractDataFromRegion.xml: -------------------------------------------------------------------------------- 1 | 2 | for a genomic interval 3 | 4 | galaxyToolRunner.sh ngs.ExtractDataFromRegion --chr $chr --start $start --stop $stop -o $output $file1 5 | #for $input in $inputs 6 | ${input.file} 7 | #end for 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | This tool will extract data from Wig or BigWig file(s) for a specific region of the genome. 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /galaxy-conf/FastqIlluminaToSanger.xml: -------------------------------------------------------------------------------- 1 | 2 | from Illumina to Sanger 3 | galaxyToolRunner.sh converters.FastqIlluminaToSanger -i $input -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | This tool will convert a FASTQ file with ASCII quality scores encoded in Illumina 1.3-1.7 format (Phred+64) to Sanger format (Phred+33) for use with Bowtie and other Galaxy tools. Illumina CASAVA >= 1.8 already produces FASTQ files in Sanger format, so this tool should not be used on new Illumina sequencing data. This tool is a simpler, faster version of the FASTQ Groomer that does little error checking but performs much faster. If you are unsure what format your file is in, or need to do other conversions, use the FASTQ Groomer instead. 19 | 20 | For more information, read about FASTQ formats_ (right-click to open in new window). 21 | 22 | .. _formats: http://en.wikipedia.org/wiki/FASTQ_format 23 | 24 | .. class:: warningmark 25 | 26 | This tool requires fastqillumina formatted data. If you have fastq data that was not correctly autodetected, change the metadata by clicking on the pencil icon for the dataset. 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /galaxy-conf/FindAbsoluteMaxima.xml: -------------------------------------------------------------------------------- 1 | 2 | in intervals 3 | 4 | galaxyToolRunner.sh ngs.FindAbsoluteMaxima -l $window -o $output 5 | #for $input in $inputs 6 | ${input.file} 7 | #end for 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | This tool can be used to find the location of the maximum value in genomic intervals, such as finding the peak summit inside a set of peak calls. 22 | 23 | .. class:: infomark 24 | 25 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. Intervals must be provided in Bed, BedGraph, or GFF format. 26 | 27 | ----- 28 | 29 | **Example** 30 | 31 | 32 | if **Intervals** are genes :: 33 | 34 | chr11 5203271 5204877 NM_000518 0 - 35 | chr11 5210634 5212434 NM_000519 0 - 36 | chr11 5226077 5227663 NM_000559 0 - 37 | 38 | and **Wig files** are :: 39 | 40 | Data1.wig 41 | Data2.wig 42 | 43 | this tool will find the location of the maximum value in each interval for each of the provided Wig/BigWig files, and append them in columns in the order that they were added :: 44 | 45 | chr11 5203271 5204877 NM_000518 0 - 5203374 5204300 46 | chr11 5210634 5212434 NM_000519 0 - 5210638 5212450 47 | chr11 5226077 5227663 NM_000559 0 - 5226800 5226241 48 | 49 | where column 7 is the location of the maximum value in that interval for Data1.wig, and column 7 is the location of the maximum value in that interval for Data2.wig. 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /galaxy-conf/FindBoundaryNucleosomes.xml: -------------------------------------------------------------------------------- 1 | 2 | in windows 3 | galaxyToolRunner.sh nucleosomes.FindBoundaryNucleosomes -i $input -l $loci -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | .. class:: infomark 15 | 16 | Use the Call Nucleosomes tool to create a file of called nucleosomes, then use this tool to identify the first nucleosome's dyad position (peak maximum) from the 5' and 3' end of the gene. 17 | 18 | .. class:: infomark 19 | 20 | **TIP:** Nucleosome calls must be in tabular format of the kind produced by the Nucleosomes -> Call nucleosomes tool. Intervals must be in either Bed, BedGraph, or GFF format. 21 | 22 | ----- 23 | 24 | **Syntax** 25 | 26 | - **Nucleosome calls** is a list of stereotypic nucleosome position calls. 27 | - **List of intervals:** The 5' and 3' boundary nucleosomes will be found for each interval in this list 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /galaxy-conf/FindNMers.xml: -------------------------------------------------------------------------------- 1 | 2 | in a DNA sequence 3 | galaxyToolRunner.sh dna.FindNMers -i 4 | #if $refGenomeSource.genomeSource == "history": 5 | $refGenomeSource.ownFile 6 | #else 7 | ${refGenomeSource.index.fields.path} 8 | #end if 9 | -m $mismatches -n $nmer $rc -o $output 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | This tool will find all matches of a given NMer in a DNA sequence. Sequences may be provided in FASTA format or selected from available reference genomes. Mismatches are allowed, but not insertions/deletions. The output is a Bed file with the locations of matches in the reference sequence. 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /galaxy-conf/FindOutlierRegions.xml: -------------------------------------------------------------------------------- 1 | 2 | such as CNVs 3 | galaxyToolRunner.sh ngs.FindOutlierRegions -i $input -w $window -t $threshold $below -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | This tool identifies regions of the genome that may be repetitive elements or CNVs by scanning for windows that have an exceptionally high mean relative to the genome-wide mean. 17 | 18 | ----- 19 | 20 | .. class:: infomark 21 | 22 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. 23 | 24 | ----- 25 | 26 | **Syntax** 27 | 28 | - **Input data** is Wig or BigWig formatted data from a high-throughput sequencing experiment. 29 | - **Window size** is the size of the moving average to use. 30 | - **Threshold** is the fold times the genome-wide mean that a window's mean must be in order to be considered an outlier region. 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /galaxy-conf/GaussianSmooth.xml: -------------------------------------------------------------------------------- 1 | 2 | a (Big)Wig file 3 | galaxyToolRunner.sh wigmath.GaussianSmooth -i $input -s $S -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | This tool smooths genomic data with an area-preserving Gaussian_ filter. The Gaussian filter is computed out to +/- 3 standard deviations. 32 | 33 | .. _Gaussian: http://en.wikipedia.org/wiki/Gaussian_filter 34 | 35 | .. class:: infomark 36 | 37 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /galaxy-conf/GeneTrackToBedGraph.xml: -------------------------------------------------------------------------------- 1 | 2 | converter 3 | galaxyToolRunner.sh converters.GeneTrackToBedGraph -i $input -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | This tool will sum the counts from the forward and reverse strands in a GeneTrack_ index to create a BedGraph file. 19 | 20 | .. _GeneTrack: http://atlas.bx.psu.edu/genetrack/docs/genetrack.html 21 | 22 | .. class:: warningmark 23 | 24 | This tool requires GeneTrack formatted data. If you have tabular data that was not correctly autodetected, change the metadata by clicking on the pencil icon for the dataset. 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /galaxy-conf/GeneTrackToWig.xml: -------------------------------------------------------------------------------- 1 | 2 | converter 3 | galaxyToolRunner.sh converters.GeneTrackToWig -i $input -s $shift $zero -a ${chromInfo} -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | This tool will convert GeneTrack_ format files into Wig files, optionally offsetting the + and - strand counts by a specified value before merging them. 31 | 32 | .. _GeneTrack: http://atlas.bx.psu.edu/genetrack/docs/genetrack.html 33 | 34 | .. class:: warningmark 35 | 36 | This tool requires GeneTrack formatted data. If you have tabular data that was not correctly autodetected, change the metadata by clicking on the pencil icon for the dataset. 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /galaxy-conf/InterpolateDiscontinuousData.xml: -------------------------------------------------------------------------------- 1 | 2 | missing values in a (Big)Wig file 3 | galaxyToolRunner.sh converters.InterpolateDiscontinousData -i $input -t $type -m $max -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | This tool will attempt to interpolate missing values (NaN) in a Wig file that result when converting discontinuous microarray probe data to Wig format. Stretches of missing data that extend longer than the allowed maximum will be left as NaN. 20 | 21 | ----- 22 | 23 | **Interpolation types** 24 | 25 | - **Nearest** uses the value of the nearest base pair that has data 26 | - **Linear** uses a linear interpolant between the values of the nearest two probes 27 | - **Cubic** uses a cubic interpolant between the values of the nearest two probes 28 | 29 | For more information, see Wikipedia_. 30 | 31 | .. _Wikipedia: http://en.wikipedia.org/wiki/Interpolation 32 | 33 | ----- 34 | 35 | .. class:: infomark 36 | 37 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use the Converters -> IntervalToWig tool to convert Bed, BedGraph, or GFF-formatted microarray data to Wig format, then use this tool to interpolate the missing values between probes. 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /galaxy-conf/IntervalAverager.xml: -------------------------------------------------------------------------------- 1 | 2 | that have been aligned 3 | 4 | galaxyToolRunner.sh visualization.IntervalAverager -l $loci -o $output $file1 5 | #for $input in $inputs 6 | ${input.file} 7 | #end for 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | This tool calculates the average signal for a set of aligned intervals. Intervals are lined up on their alignment point (column 5 in the Bed file), flipped if on the - strand, and averaged. The output is equivalent to aligning the data in a matrix and then taking the columnwise average of the matrix. 23 | 24 | Intervals with alignment points must be provided in the following extended Bed format :: 25 | 26 | chr low high id alignment strand 27 | 28 | .. class:: infomark 29 | 30 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. 31 | 32 | ----- 33 | 34 | **Syntax** 35 | 36 | - **Sequencing data** is the genomic data used to create the average 37 | - **List of intervals** is a list of intervals in Bed format with alignment points 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /galaxy-conf/IntervalLengthDistribution.xml: -------------------------------------------------------------------------------- 1 | 2 | of read lengths 3 | galaxyToolRunner.sh ngs.IntervalLengthDistribution -i $input $freq -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | This tool calculates the distribution of interval lengths from a list of intervals or reads in SAM, BAM, Bed, BedGraph, or GFF format. 32 | 33 | .. class:: warningmark 34 | 35 | For paired-end sequencing reads, the length is the length of the fragment (5' end of read 1 to 5' end of read 2) 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /galaxy-conf/IntervalStats.xml: -------------------------------------------------------------------------------- 1 | 2 | of data in a (Big)Wig file 3 | 4 | galaxyToolRunner.sh ngs.IntervalStats -l $windows -s $stat -o $output 5 | #for $input in $inputs 6 | ${input.file} 7 | #end for 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | This tool calculates the arithmetic mean, maximum, or minimum value for the Wig data in each interval. For each Wig file provided, an additional column is added to the output file in the order that they are added above. 28 | 29 | .. class:: infomark 30 | 31 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. 32 | 33 | ----- 34 | 35 | **Example** 36 | 37 | Calculate the mean change in nucleosome occupancy for each gene in the yeast genome: 38 | 39 | - 1. Create a "change in occupancy" dataset by subtracting the normalized occupancy Wig files from your two conditions using the WigMath -> Subtract tool. 40 | - 2. Upload a list of intervals corresponding to the genes in the yeast genome, or pull the data from UCSC using Get Data -> UCSC Main. 41 | - 3. Calculate the mean change in occupancy for each gene using this tool and the datasets from (1) and (2). 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /galaxy-conf/IntervalToBed.xml: -------------------------------------------------------------------------------- 1 | 2 | converter 3 | galaxyToolRunner.sh converters.IntervalToBed -i $input -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | This tool will convert any file in SAM, BAM, GFF, BedGraph, BigBed, or VCF format to Bed format. 13 | 14 | .. class:: warningmark 15 | 16 | For SAM/BAM data, paired-end reads are converted to Bed format as the entire fragment (5' end of mate 1 to the 5' end of mate 2). Single-end reads are converted to Bed format as the read itself, with strand information. If your SAM/BAM file contains both mate alignments from a paired-end sequencing run (i.e. two entries for each fragment), you should first filter out reads from either the + or - strand with the SAM Tools -> Filter SAM tool to avoid producing redundant entries in the output Bed file. 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /galaxy-conf/IntervalToWig.xml: -------------------------------------------------------------------------------- 1 | 2 | converter 3 | galaxyToolRunner.sh converters.IntervalToWig -i $input $zero -a ${chromInfo} -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | This tool converts data from an interval format, such as Bed, BedGraph or GFF, to Wig format. This can be used to convert data from microarrays to Wig format. The value of each interval is mapped into the Wig file. Intervals that overlap in the original file (multiple-valued base pairs) are averaged, and bases without data in the original interval file are set to NaN. 15 | 16 | .. class:: warningmark 17 | 18 | This tool requires Bed, BedGraph, or GFF formatted data. If you have tabular data that was not correctly autodetected, change the metadata by clicking on the pencil icon for the dataset. 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /galaxy-conf/KMeans.xml: -------------------------------------------------------------------------------- 1 | 2 | an aligned matrix 3 | galaxyToolRunner.sh visualization.KMeans -i $input -k $K -1 $min -2 $max -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | .. class:: warningmark 19 | 20 | This tool requires tabular data in matrix2png format (with column AND row headers). For more information about the required format and usage instructions, see the matrix2png_ website. 21 | 22 | .. _matrix2png: http://bioinformatics.ubc.ca/matrix2png/dataformat.html 23 | 24 | .. class:: infomark 25 | 26 | You can use the "Align values in a matrix" tool to create a matrix, then use this tool to cluster the matrix with k-means. 27 | 28 | .. class:: infomark 29 | 30 | **TIP:** You can use the **min** and **max** columns to cluster a large matrix based on a subset of the columns. For example, you could cluster a 4000x4000 matrix on columns 200-300 by setting min = 200 and max = 300. This will greatly increase the efficiency of distance calculations during the k-means EM, and also allows you to cluster based on specific regions, such as promoters or coding sequences. 31 | 32 | ----- 33 | 34 | This tool will cluster the rows in an aligned matrix with KMeans_. The implementation builds upon the KMeansPlusPlusClusterer available in commons-math3_. 35 | 36 | .. _KMeans: http://en.wikipedia.org/wiki/K-means_clustering 37 | 38 | .. _commons-math3: http://commons.apache.org/math/ 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /galaxy-conf/LogTransform.xml: -------------------------------------------------------------------------------- 1 | 2 | a (Big)Wig file 3 | galaxyToolRunner.sh wigmath.LogTransform -i $input -b $base -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | .. class:: infomark 47 | 48 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /galaxy-conf/MapDyads.xml: -------------------------------------------------------------------------------- 1 | 2 | from sequencing reads 3 | 4 | galaxyToolRunner.sh nucleosomes.MapDyads -i $input -a ${chromInfo} -o $output 5 | #if $type.read == 'single' 6 | -s $type.size 7 | #end if 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | This tool produces a Wig file with the number of dyads at each base pair. For paired-end MNase data, dyads are approximated using the center of the fragment. For Bed/BedGraph formatted input, this means the center of the interval; for SAM/BAM formatted input, this means the middle between the 5' end of mate 1 and the 5' end of mate 2. For single-end data, the estimated mononucleosome fragment length (N) must be specified, which will be used to offset reads from the + and - strands by +/- N/2. 31 | 32 | .. class:: warningmark 33 | 34 | This tool requires sequencing reads in SAM, BAM, Bed, or BedGraph format. 35 | 36 | .. class:: warningmark 37 | 38 | Since BedGraph format does not contain strand information, all reads in BedGraph format are considered to be on the 5' strand. 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /galaxy-conf/Multiply.xml: -------------------------------------------------------------------------------- 1 | 2 | (Big)Wig files 3 | 4 | galaxyToolRunner.sh wigmath.Multiply -o $output 5 | #for $input in $inputs 6 | ${input.file} 7 | #end for 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | This tool multiplies Wig or BigWig files base pair by base pair. 21 | 22 | .. class:: infomark 23 | 24 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /galaxy-conf/PairOverlappingNucleosomes.xml: -------------------------------------------------------------------------------- 1 | 2 | by overlap 3 | galaxyToolRunner.sh nucleosomes.PairOverlappingNucleosomes -a $input1 -b $input2 -m $N -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | This tool will pair overlapping nucleosomes from two sets of nucleosome calls. In the event that multiple calls overlap, the one with the largest overlap is selected as a match. 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /galaxy-conf/PercusDecomposition.xml: -------------------------------------------------------------------------------- 1 | 2 | from occupancy data 3 | galaxyToolRunner.sh nucleosomes.PercusDecomposition -d $dyads -n $N -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | This tool derives an external potential energy function from experimental nucleosome positioning data by assuming that nucleosomes interact with DNA like a fluid of hard rods. This energy function can then be used to derive sequence-specific nucleosome formation preferences, while accounting for hard-core steric restriction by adjacent nucleosomes. This tool is a reimplementation of the algorithm described in (Locke et al. 2010). 15 | 16 | ----- 17 | 18 | **Citations** 19 | 20 | Locke G, Tolkunov D, Moqtaderi Z, Struhl K and Morozov AV (2010) High-throughput sequencing reveals a simple model of nucleosome energetics. Proceedings of the National Academy of Sciences 107: 20998–21003 21 | 22 | Percus JK (1976) Equilibrium state of a classical fluid of hard rods in an external field. J Stat Phys 15: 505–511 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /galaxy-conf/Phasogram.xml: -------------------------------------------------------------------------------- 1 | 2 | of dyads 3 | galaxyToolRunner.sh nucleosomes.Phasogram -i $input -m $max -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | This tool calculates the phase distribution of sequencing data. It can be used to identify genome-wide periodicities. Phase counts are aggregated for each base pair across the genome. This is equivalent to summing the autocovariance of a sliding window across the genome. The tool is a reimplementation of the algorithm described in (Valouev et al. 2011). 15 | 16 | .. class:: infomark 17 | 18 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. 19 | 20 | ----- 21 | 22 | **Citation** 23 | 24 | Valouev A, Johnson SM, Boyd SD, Smith CL, Fire AZ and Sidow A (2011) Determinants of nucleosome organization in primary human cells. Nature 474: 516–520 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /galaxy-conf/PowerSpectrum.xml: -------------------------------------------------------------------------------- 1 | 2 | of data in a Wiggle file 3 | galaxyToolRunner.sh ngs.PowerSpectrum -i $input -l $windows -m $max -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | This tool computes the power spectrum of intervals of sequencing data. For each interval provided, the normalized power spectrum is calculated, representing the relative power in each frequency. Power spectra are normalized to have total power 1, with the DC component (0 frequency) removed. Power spectra are computed using the FFT_ implementation in JTransforms_. 16 | 17 | .. _FFT: http://en.wikipedia.org/wiki/Fast_Fourier_transform 18 | 19 | .. _JTransforms: http://sites.google.com/site/piotrwendykier/software/jtransforms 20 | 21 | ----- 22 | 23 | **Syntax** 24 | 25 | - **Input data** is the genomic data on which to compute the power spectrum. 26 | - **List of intervals:** The power spectrum will be computed for each genomic interval specified in this list. 27 | - **Number of frequencies:** The power spectrum will be truncated at this frequency in the output 28 | 29 | ----- 30 | 31 | **Output** 32 | 33 | The output has the following format :: 34 | 35 | chr start stop id alignment strand freq1 freq2 ... 36 | 37 | up to the maximum frequency specified. Frequencies are truncated to reduce the size of the output since signals are often band-limited. 38 | 39 | ----- 40 | 41 | .. class:: warningmark 42 | 43 | **NOTE:** Even though frequencies may be truncated in the output, all frequencies in the power spectrum are computed and used for normalization. 44 | 45 | .. class:: infomark 46 | 47 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. Intervals must be provided in Bed, BedGraph, or GFF format. 48 | 49 | ----- 50 | 51 | This tool is equivalent to the following Matlab commands, where x is a vector with the interval of sequencing data :: 52 | 53 | N = length(x); 54 | f = fft(x); 55 | p = abs(f(2:N/2)).^2; 56 | p = p / sum(p); 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /galaxy-conf/RollingReadLength.xml: -------------------------------------------------------------------------------- 1 | 2 | over each locus 3 | galaxyToolRunner.sh ngs.RollingReadLength -i $input -a ${chromInfo} -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | This tool will compute the mean length of all fragments overlapping a given locus, and can be used to identify sites with exceptionally long or short reads. 14 | 15 | .. class:: warningmark 16 | 17 | This tool requires paired-end SAM, BAM, Bed, or BedGraph formatted data. Using single-end data will result in a constant read length. 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /galaxy-conf/RomanNumeralize.xml: -------------------------------------------------------------------------------- 1 | 2 | on any file 3 | galaxyToolRunner.sh converters.RomanNumeralize -i $input -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | This tool scans any file with chromosomal coordinates of the form "chr5" and replaces them with "chrV". 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /galaxy-conf/Shift.xml: -------------------------------------------------------------------------------- 1 | 2 | a (Big)Wig file 3 | galaxyToolRunner.sh wigmath.Shift -i $input -m $M $chr -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | This tool will shift all values in a Wig file by a scalar so that the output has the desired mean. 15 | 16 | .. class:: infomark 17 | 18 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /galaxy-conf/StripMatrix.xml: -------------------------------------------------------------------------------- 1 | 2 | from an aligned matrix 3 | galaxyToolRunner.sh visualization.StripMatrix -i $input -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | This tool is intended to strip the column/row headers off of an aligned matrix (in matrix2png format) for easy import into Matlab or other software where only the data values are required. It removes the first row and first column from a tabular file. 20 | 21 | ----- 22 | 23 | **Example** 24 | 25 | If the following tabular matrix is used as input :: 26 | 27 | ID col1 col2 col3 28 | row1 2 4 5 29 | row2 5 1 1 30 | 31 | then the following tabular matrix will be produced as output :: 32 | 33 | 2 4 5 34 | 5 1 1 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /galaxy-conf/Subsample.xml: -------------------------------------------------------------------------------- 1 | 2 | from an interval file 3 | galaxyToolRunner.sh ngs.Subsample -i $input -n $n -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | This tool will randomly select N reads from a file. 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /galaxy-conf/Subtract.xml: -------------------------------------------------------------------------------- 1 | 2 | two (Big)Wig files 3 | galaxyToolRunner.sh wigmath.Subtract -m $minuend -s $subtrahend -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 36 | 37 | 38 | 39 | 40 | This tool will subtract the values in one Wig file from another, base pair by base pair. 41 | 42 | .. class:: infomark 43 | 44 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /galaxy-conf/ValueDistribution.xml: -------------------------------------------------------------------------------- 1 | 2 | of a (Big)Wig file 3 | galaxyToolRunner.sh wigmath.ValueDistribution -i $input 4 | #if str( $min ) != '' 5 | --min $min 6 | #end if 7 | 8 | #if str( $max ) != '' 9 | --max $max 10 | #end if 11 | 12 | -n $bins -o $output 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | This tool computes a histogram of the values in a Wig file, as well as the moments of the distribution. 27 | 28 | ----- 29 | 30 | **Syntax** 31 | 32 | - **Input data** is the genomic data used to compute the histogram. 33 | - **Minimum bin value** is the smallest bin. If unset, it is equal to the minimum value in the input data 34 | - **Maximum bin value** is the largest bin. If unset, it is equal to the maximum value in the input data 35 | - **Number of bins** is the number of bins to use. The bin size will be equal to (max - min) / (# bins). 36 | 37 | ----- 38 | 39 | **Output** 40 | 41 | The output is in 2-column tabular format, where the first column represents the lower edge of a bin inteval and the second column represents the number of values that fell in that bin. For example if the **minimum bin value** is 0, the **maximum bin value** is 0.3, and the **number of bins** is 3, then the following output might be produced :: 42 | 43 | bin count 44 | <0 3 45 | 0 1 46 | 0.1 10 47 | 0.2 4 48 | >0.3 12 49 | 50 | where there were 3 values in (-inf, 0), 1 value in [0, 0.1), 10 values in [0.1, 0.2), 4 values in [0.2, 0.3), and 12 values in [0.3, inf). 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /galaxy-conf/WaveletTransform.xml: -------------------------------------------------------------------------------- 1 | 2 | across a genomic interval 3 | galaxyToolRunner.sh ngs.WaveletTransform -i $input -w $wavelet --chr $chr --start $start --stop $stop --min $min --max $max --step $N -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | This tool will perform a Wavelet_ scaling_ analysis on an interval of genomic data. For each base pair in the interval, the similarity (correlation) is calculated between the data and the wavelet over a range of scales. This can be used to identify high-frequency and low-frequency features in the data. The output is a matrix in matrix2png format that can be used to generate a heatmap: along the x-axis (columns) are the base pairs in the interval, along the y-axis (rows) are the correlation coefficients for each Wavelet size, with the largest Wavelet scale at the top and the smallest scale at the bottom. 21 | 22 | .. _Wavelet: http://en.wikipedia.org/wiki/Wavelet 23 | 24 | .. _scaling: http://en.wikipedia.org/wiki/Scaleogram 25 | 26 | ----- 27 | 28 | **Syntax** 29 | 30 | - **Input data** is the genomic data on which to compute the Wavelet scaling analysis. 31 | - **Wavelet** a single column of values representing a discrete Wavelet. 32 | - **Chromosome** a locus in the genome 33 | - **Start base pair** a locus in the genome 34 | - **Stop base pair** a locus in the genome 35 | - **Minimum wavelet size** the smallest Wavelet to analyze 36 | - **Minimum wavelet size** the largest Wavelet to analyze 37 | - **Scaling step size** the step size for the range of wavelet scales. The provided wavelet will be scaled to each size in the set min:step:max by interpolating the provided wavelet. 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /galaxy-conf/ZScore.xml: -------------------------------------------------------------------------------- 1 | 2 | a (Big)Wig file 3 | galaxyToolRunner.sh wigmath.ZScore -i $input $chr -o $output 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | This tool will compute normal scores (Z-scores) for each of the values in a Wig file. For each base pair, the Z-scored value is equal to the deviance from the mean divided by the standard deviation (i.e. the number of standard deviations a value is away from the mean). The output file should have mean 0 and standard deviation 1. 41 | 42 | .. class:: infomark 43 | 44 | This tool is equivalent to using the **Mean Shift** tool to shift a Wig file to mean 0, then using the **Scale** tool to scale by 1/(standard deviation). 45 | 46 | .. class:: infomark 47 | 48 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /galaxy-conf/galaxyToolRunner.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | if [ $# -eq 0 ] 4 | then 5 | echo "USAGE: galaxyToolRunner.sh APPNAME [ARGS]"; 6 | exit; 7 | fi 8 | 9 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 10 | java -Dlog4j.configuration=log4j.properties -cp $DIR:$DIR/../build:$DIR/../dist/*:$DIR/../lib/* edu.unc.genomics."$@" 11 | -------------------------------------------------------------------------------- /galaxy-conf/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=info, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | 6 | # Pattern to output the caller's file name and line number. 7 | log4j.appender.stdout.layout.ConversionPattern=%m%n 8 | 9 | # Only output errors from the BigWig library 10 | log4j.logger.org.broad.igv.bbfile=ERROR 11 | # Only output errors from java-genomics-io 12 | log4j.logger.edu.unc.genomics.io=ERROR -------------------------------------------------------------------------------- /lib/BigWig.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/BigWig.jar -------------------------------------------------------------------------------- /lib/JLargeArrays-1.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/JLargeArrays-1.2.jar -------------------------------------------------------------------------------- /lib/JTransforms-3.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/JTransforms-3.0.jar -------------------------------------------------------------------------------- /lib/commons-lang3-3.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/commons-lang3-3.1.jar -------------------------------------------------------------------------------- /lib/commons-math3-3.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/commons-math3-3.0.jar -------------------------------------------------------------------------------- /lib/dnaproperties-1732.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/dnaproperties-1732.jar -------------------------------------------------------------------------------- /lib/hamcrest-core-1.1.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/hamcrest-core-1.1.0.jar -------------------------------------------------------------------------------- /lib/java-genomics-io.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/java-genomics-io.jar -------------------------------------------------------------------------------- /lib/jcommander-1.27.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/jcommander-1.27.jar -------------------------------------------------------------------------------- /lib/junit.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/junit.jar -------------------------------------------------------------------------------- /lib/log4j-1.2.15.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/log4j-1.2.15.jar -------------------------------------------------------------------------------- /lib/picard-1.67.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/picard-1.67.jar -------------------------------------------------------------------------------- /lib/sam-1.67.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/sam-1.67.jar -------------------------------------------------------------------------------- /log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=debug, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | 6 | # Pattern to output the caller's file name and line number. 7 | log4j.appender.stdout.layout.ConversionPattern=%5p - %m%n 8 | 9 | # Only output errors from the BigWig library 10 | log4j.logger.org.broad.igv.bbfile=ERROR 11 | # Only output errors from java-genomics-io 12 | log4j.logger.edu.unc.genomics.io.IntervalFileSniffer=DEBUG 13 | log4j.logger.edu.unc.genomics.io=DEBUG 14 | log4j.logger.edu.unc.genomics.util=INFO 15 | log4j.logger.edu.ucsc.genome=ERROR 16 | -------------------------------------------------------------------------------- /resources/assemblies/ce10.len: -------------------------------------------------------------------------------- 1 | chrV 20924149 2 | chrX 17718866 3 | chrIV 17493793 4 | chrII 15279345 5 | chrI 15072423 6 | chrIII 13783700 7 | chrM 13794 8 | -------------------------------------------------------------------------------- /resources/assemblies/ce2.len: -------------------------------------------------------------------------------- 1 | chrI 15080483 2 | chrII 15279308 3 | chrIII 13783313 4 | chrIV 17493791 5 | chrM 13794 6 | chrV 20922231 7 | chrX 17718849 8 | -------------------------------------------------------------------------------- /resources/assemblies/ce3.len: -------------------------------------------------------------------------------- 1 | chrI 15080552 2 | chrII 15279311 3 | chrIII 13783317 4 | chrIV 17493785 5 | chrM 13794 6 | chrV 20922231 7 | chrX 17718850 8 | -------------------------------------------------------------------------------- /resources/assemblies/ce4.len: -------------------------------------------------------------------------------- 1 | chrI 15072419 2 | chrII 15279316 3 | chrIII 13783681 4 | chrIV 17493784 5 | chrM 13794 6 | chrV 20919398 7 | chrX 17718852 8 | -------------------------------------------------------------------------------- /resources/assemblies/ce5.len: -------------------------------------------------------------------------------- 1 | chrV 20919568 2 | chrX 17718851 3 | chrIV 17493785 4 | chrII 15279316 5 | chrI 15072421 6 | chrIII 13783681 7 | chrM 13794 8 | -------------------------------------------------------------------------------- /resources/assemblies/ce6.len: -------------------------------------------------------------------------------- 1 | chrV 20919568 2 | chrX 17718854 3 | chrIV 17493785 4 | chrII 15279323 5 | chrI 15072421 6 | chrIII 13783681 7 | chrM 13794 8 | -------------------------------------------------------------------------------- /resources/assemblies/ce7.len: -------------------------------------------------------------------------------- 1 | chrV 20924143 2 | chrX 17718854 3 | chrIV 17493784 4 | chrII 15279324 5 | chrI 15072421 6 | chrIII 13783682 7 | chrM 13794 8 | -------------------------------------------------------------------------------- /resources/assemblies/ce8.len: -------------------------------------------------------------------------------- 1 | chrV 20924143 2 | chrX 17718854 3 | chrIV 17493784 4 | chrII 15279323 5 | chrI 15072421 6 | chrIII 13783685 7 | chrM 13794 8 | -------------------------------------------------------------------------------- /resources/assemblies/ce9.len: -------------------------------------------------------------------------------- 1 | chrV 20924143 2 | chrX 17718854 3 | chrIV 17493784 4 | chrII 15279323 5 | chrI 15072421 6 | chrIII 13783685 7 | chrM 13794 8 | -------------------------------------------------------------------------------- /resources/assemblies/dm1.len: -------------------------------------------------------------------------------- 1 | chr4 1237870 2 | chrU 8248647 3 | chrX 21780003 4 | chr2L 22217931 5 | chr2R 20302755 6 | chr2h 1651714 7 | chr3L 23352213 8 | chr3R 27890790 9 | chr3h 1961095 10 | chrXh 359526 11 | chrYh 321294 12 | -------------------------------------------------------------------------------- /resources/assemblies/dm2.len: -------------------------------------------------------------------------------- 1 | chr4 1281640 2 | chrM 19517 3 | chrU 8724946 4 | chrX 22224390 5 | chr2L 22407834 6 | chr2R 20766785 7 | chr2h 1694122 8 | chr3L 23771897 9 | chr3R 27905053 10 | chr3h 2955737 11 | chr4h 88110 12 | chrXh 359526 13 | chrYh 396896 14 | -------------------------------------------------------------------------------- /resources/assemblies/dm3.len: -------------------------------------------------------------------------------- 1 | chr2L 23011544 2 | chr2LHet 368872 3 | chr2R 21146708 4 | chr2RHet 3288761 5 | chr3L 24543557 6 | chr3LHet 2555491 7 | chr3R 27905053 8 | chr3RHet 2517507 9 | chr4 1351857 10 | chrU 10049037 11 | chrUextra 29004656 12 | chrX 22422827 13 | chrXHet 204112 14 | chrYHet 347038 15 | chrM 19517 16 | -------------------------------------------------------------------------------- /resources/assemblies/hg15.len: -------------------------------------------------------------------------------- 1 | chr1 245203898 2 | chr2 243315028 3 | chr3 199411731 4 | chr4 191610523 5 | chr5 180967295 6 | chr6 170740541 7 | chr7 158431299 8 | chr8 145908738 9 | chr9 134505819 10 | chrM 16571 11 | chrX 152634166 12 | chrY 50961097 13 | chr1_random 12562665 14 | chr2_random 1464032 15 | chr3_random 423185 16 | chr4_random 1219494 17 | chr6_random 12061844 18 | chr7_random 1057565 19 | chr8_random 427716 20 | chr9_random 2536476 21 | chrX_random 4859112 22 | chrY_random 191708 23 | chr10 135480874 24 | chr11 134978784 25 | chr12 133464434 26 | chr13 114151656 27 | chr14 105311216 28 | chr15 100114055 29 | chr16 89995999 30 | chr17 81691216 31 | chr18 77753510 32 | chr19 63790860 33 | chr20 63644868 34 | chr21 46976537 35 | chr22 49476972 36 | chr10_random 710249 37 | chr11_random 150110 38 | chr12_random 590431 39 | chr13_random 414659 40 | chr15_random 366089 41 | chr16_random 24360 42 | chr17_random 337440 43 | chr19_random 301858 44 | chrUn_random 611077 45 | -------------------------------------------------------------------------------- /resources/assemblies/hg16.len: -------------------------------------------------------------------------------- 1 | chr1 246127941 2 | chr2 243615958 3 | chr3 199344050 4 | chr4 191731959 5 | chr5 181034922 6 | chr6 170914576 7 | chr7 158545518 8 | chr8 146308819 9 | chr9 136372045 10 | chrM 16571 11 | chrX 153692391 12 | chrY 50286555 13 | chr1_random 6515988 14 | chr2_random 1104831 15 | chr3_random 749256 16 | chr4_random 648024 17 | chr5_random 143687 18 | chr6_random 2055751 19 | chr7_random 632637 20 | chr8_random 1499381 21 | chr9_random 2766341 22 | chrX_random 3403558 23 | chr10 135037215 24 | chr11 134482954 25 | chr12 132078379 26 | chr13 113042980 27 | chr14 105311216 28 | chr15 100256656 29 | chr16 90041932 30 | chr17 81860266 31 | chr18 76115139 32 | chr19 63811651 33 | chr20 63741868 34 | chr21 46976097 35 | chr22 49396972 36 | chr10_random 1043775 37 | chr13_random 189598 38 | chr15_random 1132826 39 | chr17_random 2549222 40 | chr18_random 4262 41 | chr19_random 92689 42 | chrUn_random 3349625 43 | -------------------------------------------------------------------------------- /resources/assemblies/hg17.len: -------------------------------------------------------------------------------- 1 | chr10 135413628 2 | chr10_random 113275 3 | chr11 134452384 4 | chr12 132449811 5 | chr12_random 466818 6 | chr13 114142980 7 | chr13_random 186858 8 | chr14 106368585 9 | chr15 100338915 10 | chr15_random 784346 11 | chr16 88827254 12 | chr16_random 105485 13 | chr17 78774742 14 | chr17_random 2618010 15 | chr18 76117153 16 | chr18_random 4262 17 | chr19 63811651 18 | chr19_random 301858 19 | chr1 245522847 20 | chr1_random 3897131 21 | chr20 62435964 22 | chr21 46944323 23 | chr22 49554710 24 | chr22_random 257318 25 | chr2 243018229 26 | chr2_random 418158 27 | chr3 199505740 28 | chr3_random 970716 29 | chr4 191411218 30 | chr4_random 1030282 31 | chr5 180857866 32 | chr5_random 143687 33 | chr6 170975699 34 | chr6_random 1875562 35 | chr6_hla_hap1 139182 36 | chr6_hla_hap2 150447 37 | chr7 158628139 38 | chr7_random 778964 39 | chr8 146274826 40 | chr8_random 943810 41 | chr9 138429268 42 | chr9_random 1312665 43 | chrM 16571 44 | chrX 154824264 45 | chrX_random 1719168 46 | chrY 57701691 47 | -------------------------------------------------------------------------------- /resources/assemblies/hg18.len: -------------------------------------------------------------------------------- 1 | chr1 247249719 2 | chr1_random 1663265 3 | chr10 135374737 4 | chr10_random 113275 5 | chr11 134452384 6 | chr11_random 215294 7 | chr12 132349534 8 | chr13 114142980 9 | chr13_random 186858 10 | chr14 106368585 11 | chr15 100338915 12 | chr15_random 784346 13 | chr16 88827254 14 | chr16_random 105485 15 | chr17 78774742 16 | chr17_random 2617613 17 | chr18 76117153 18 | chr18_random 4262 19 | chr19 63811651 20 | chr19_random 301858 21 | chr2 242951149 22 | chr2_random 185571 23 | chr20 62435964 24 | chr21 46944323 25 | chr21_random 1679693 26 | chr22 49691432 27 | chr22_random 257318 28 | chr22_h2_hap1 63661 29 | chr3 199501827 30 | chr3_random 749256 31 | chr4 191273063 32 | chr4_random 842648 33 | chr5 180857866 34 | chr5_random 143687 35 | chr5_h2_hap1 1794870 36 | chr6 170899992 37 | chr6_random 1875562 38 | chr6_cox_hap1 4731698 39 | chr6_qbl_hap2 4565931 40 | chr7 158821424 41 | chr7_random 549659 42 | chr8 146274826 43 | chr8_random 943810 44 | chr9 140273252 45 | chr9_random 1146434 46 | chrM 16571 47 | chrX 154913754 48 | chrX_random 1719168 49 | chrY 57772954 50 | -------------------------------------------------------------------------------- /resources/assemblies/hg19.len: -------------------------------------------------------------------------------- 1 | chr1 249250621 2 | chr2 243199373 3 | chr3 198022430 4 | chr4 191154276 5 | chr5 180915260 6 | chr6 171115067 7 | chr7 159138663 8 | chrX 155270560 9 | chr8 146364022 10 | chr9 141213431 11 | chr10 135534747 12 | chr11 135006516 13 | chr12 133851895 14 | chr13 115169878 15 | chr14 107349540 16 | chr15 102531392 17 | chr16 90354753 18 | chr17 81195210 19 | chr18 78077248 20 | chr20 63025520 21 | chrY 59373566 22 | chr19 59128983 23 | chr22 51304566 24 | chr21 48129895 25 | chr6_ssto_hap7 4928567 26 | chr6_mcf_hap5 4833398 27 | chr6_cox_hap2 4795371 28 | chr6_mann_hap4 4683263 29 | chr6_apd_hap1 4622290 30 | chr6_qbl_hap6 4611984 31 | chr6_dbb_hap3 4610396 32 | chr17_ctg5_hap1 1680828 33 | chr4_ctg9_hap1 590426 34 | chr1_gl000192_random 547496 35 | chrUn_gl000225 211173 36 | chr4_gl000194_random 191469 37 | chr4_gl000193_random 189789 38 | chr9_gl000200_random 187035 39 | chrUn_gl000222 186861 40 | chrUn_gl000212 186858 41 | chr7_gl000195_random 182896 42 | chrUn_gl000223 180455 43 | chrUn_gl000224 179693 44 | chrUn_gl000219 179198 45 | chr17_gl000205_random 174588 46 | chrUn_gl000215 172545 47 | chrUn_gl000216 172294 48 | chrUn_gl000217 172149 49 | chr9_gl000199_random 169874 50 | chrUn_gl000211 166566 51 | chrUn_gl000213 164239 52 | chrUn_gl000220 161802 53 | chrUn_gl000218 161147 54 | chr19_gl000209_random 159169 55 | chrUn_gl000221 155397 56 | chrUn_gl000214 137718 57 | chrUn_gl000228 129120 58 | chrUn_gl000227 128374 59 | chr1_gl000191_random 106433 60 | chr19_gl000208_random 92689 61 | chr9_gl000198_random 90085 62 | chr17_gl000204_random 81310 63 | chrUn_gl000233 45941 64 | chrUn_gl000237 45867 65 | chrUn_gl000230 43691 66 | chrUn_gl000242 43523 67 | chrUn_gl000243 43341 68 | chrUn_gl000241 42152 69 | chrUn_gl000236 41934 70 | chrUn_gl000240 41933 71 | chr17_gl000206_random 41001 72 | chrUn_gl000232 40652 73 | chrUn_gl000234 40531 74 | chr11_gl000202_random 40103 75 | chrUn_gl000238 39939 76 | chrUn_gl000244 39929 77 | chrUn_gl000248 39786 78 | chr8_gl000196_random 38914 79 | chrUn_gl000249 38502 80 | chrUn_gl000246 38154 81 | chr17_gl000203_random 37498 82 | chr8_gl000197_random 37175 83 | chrUn_gl000245 36651 84 | chrUn_gl000247 36422 85 | chr9_gl000201_random 36148 86 | chrUn_gl000235 34474 87 | chrUn_gl000239 33824 88 | chr21_gl000210_random 27682 89 | chrUn_gl000231 27386 90 | chrUn_gl000229 19913 91 | chrM 16571 92 | chrUn_gl000226 15008 93 | chr18_gl000207_random 4262 94 | -------------------------------------------------------------------------------- /resources/assemblies/hg19Haps.len: -------------------------------------------------------------------------------- 1 | chr6_ssto_hap7 4928567 2 | chr6_mcf_hap5 4833398 3 | chr6_cox_hap2 4795371 4 | chr6_mann_hap4 4683263 5 | chr6_apd_hap1 4622290 6 | chr6_qbl_hap6 4611984 7 | chr6_dbb_hap3 4610396 8 | chr17_ctg5_hap1 1680828 9 | chr4_ctg9_hap1 590426 10 | -------------------------------------------------------------------------------- /resources/assemblies/hg19Patch2.len: -------------------------------------------------------------------------------- 1 | chr6_ssto_hap7 4928567 2 | chr6_mcf_hap5 4833398 3 | chr6_cox_hap2 4795371 4 | chr6_mann_hap4 4683263 5 | chr6_apd_hap1 4622290 6 | chr6_qbl_hap6 4611984 7 | chr6_dbb_hap3 4610396 8 | chr17_ctg5_hap1 1680828 9 | chr5_ctg1_gl339449 1620324 10 | chr4_ctg9_hap1 590426 11 | chr17_gl383560 534288 12 | chr17_gl383558 457041 13 | chr8_gl383535 429806 14 | chr17_gl383561 406963 15 | chr10_gl383543 392792 16 | chr15_ctg8_gl383555 388773 17 | chr19_ctg3_gl383573 385657 18 | chr4_ctg6_gl383528 376187 19 | chr1_ctg31_gl383520 366579 20 | chr17_gl383559 338640 21 | chr9_gl339450 330164 22 | chr10_ctg5_gl383546 309802 23 | chr15_ctg4_gl383554 296527 24 | chr18_ctg1_gl383567 289831 25 | chr17_ctg1_gl383563 270261 26 | chr17_ctg4_gl383565 223995 27 | chr8_gl383536 203777 28 | chr21_ctg1_gl383579 201198 29 | chr18_ctg2_gl383571 198278 30 | chr16_ctg3_gl383556 192462 31 | chr19_ctg3_gl383576 188024 32 | chr12_ctg5_gl383551 184319 33 | chr1_ctg31_gl383518 182439 34 | chr3_ctg2_gl383526 180671 35 | chr10_ctg2_gl383545 179254 36 | chr5_ctg5_gl383531 173459 37 | chr3_gl383523 171362 38 | chr9_ctg35_gl383541 171286 39 | chr19_ctg3_gl383575 170227 40 | chr12_ctg2_gl383550 169178 41 | chr18_ctg2_gl383569 167950 42 | chr12_gl383548 165247 43 | chr18_ctg1_gl383570 164789 44 | chr4_ctg12_gl383527 164536 45 | chr9_ctg1_gl383539 162988 46 | chr18_ctg2_gl383572 159547 47 | chr22_ctg1_gl383582 158507 48 | chr19_ctg3_gl383574 155864 49 | chr12_ctg2_gl383553 154881 50 | chr11_ctg1_gl383547 154407 51 | chr2_ctg1_gl383521 143390 52 | chr12_ctg2_gl383552 138655 53 | chr17_ctg4_gl383564 133151 54 | chr20_ctg1_gl383577 128385 55 | chr10_gl383544 128378 56 | chr6_ctg5_gl383533 124736 57 | chr2_ctg12_gl383522 123821 58 | chr4_ctg9_gl383529 121345 59 | chr12_ctg2_gl383549 120804 60 | chr7_ctg6_gl383534 119383 61 | chr21_ctg1_gl383581 116690 62 | chr1_ctg31_gl383519 110268 63 | chr18_ctg2_gl383568 104552 64 | chr5_ctg2_gl383530 101241 65 | chr22_ctg2_gl383583 96924 66 | chr17_ctg4_gl383566 90219 67 | chr16_ctg3_gl383557 89672 68 | chr5_ctg1_gl383532 82728 69 | chr3_gl383524 78793 70 | chr21_ctg1_gl383580 74652 71 | chr9_ctg35_gl383540 71551 72 | chr3_gl383525 65063 73 | chr21_ctg1_gl383578 63917 74 | chr9_gl383537 62435 75 | chr9_ctg35_gl383542 60032 76 | chr1_gl383517 49352 77 | chr1_gl383516 49316 78 | chr9_gl383538 49281 79 | chr17_gl383562 45551 80 | chrM_rCRS 16569 81 | -------------------------------------------------------------------------------- /resources/assemblies/klac.len: -------------------------------------------------------------------------------- 1 | 1 1062590 2 | 2 1320834 3 | 3 1753957 4 | 4 1715506 5 | 5 2234072 6 | 6 2602197 7 | -------------------------------------------------------------------------------- /resources/assemblies/sacCer1.len: -------------------------------------------------------------------------------- 1 | chr1 230208 2 | chr10 745446 3 | chr11 666445 4 | chr12 1078173 5 | chr13 924430 6 | chr14 784328 7 | chr15 1091285 8 | chr16 948060 9 | chr2 813136 10 | chr3 316613 11 | chr4 1531914 12 | chr5 576869 13 | chr6 270148 14 | chr7 1090944 15 | chr8 562639 16 | chr9 439885 17 | chrM 85779 18 | -------------------------------------------------------------------------------- /resources/assemblies/sacCer2.len: -------------------------------------------------------------------------------- 1 | chrIV 1531919 2 | chrXV 1091289 3 | chrVII 1090947 4 | chrXII 1078175 5 | chrXVI 948062 6 | chrXIII 924429 7 | chrII 813178 8 | chrXIV 784333 9 | chrX 745742 10 | chrXI 666454 11 | chrV 576869 12 | chrVIII 562643 13 | chrIX 439885 14 | chrIII 316617 15 | chrVI 270148 16 | chrI 230208 17 | chrM 85779 18 | 2micron 6318 19 | -------------------------------------------------------------------------------- /resources/assemblies/sacCer3.len: -------------------------------------------------------------------------------- 1 | chrI 230218 2 | chrII 813184 3 | chrIII 316620 4 | chrIV 1531933 5 | chrIX 439888 6 | chrV 576874 7 | chrVI 270161 8 | chrVII 1090940 9 | chrVIII 562643 10 | chrX 745751 11 | chrXI 666816 12 | chrXII 1078177 13 | chrXIII 924431 14 | chrXIV 784333 15 | chrXV 1091291 16 | chrXVI 948066 17 | chrM 85779 18 | -------------------------------------------------------------------------------- /sam_fa_indices.loc.sample: -------------------------------------------------------------------------------- 1 | #This is a sample file distributed with Galaxy that enables tools 2 | #to use a directory of Samtools indexed sequences data files. You will need 3 | #to create these data files and then create a sam_fa_indices.loc file 4 | #similar to this one (store it in this directory) that points to 5 | #the directories in which those files are stored. The sam_fa_indices.loc 6 | #file has this format (white space characters are TAB characters): 7 | # 8 | #index 9 | # 10 | #So, for example, if you had hg18 indexed stored in 11 | #/depot/data2/galaxy/sam/, 12 | #then the sam_fa_indices.loc entry would look like this: 13 | # 14 | #index hg18 /depot/data2/galaxy/sam/hg18.fa 15 | # 16 | #and your /depot/data2/galaxy/sam/ directory 17 | #would contain hg18.fa and hg18.fa.fai files: 18 | # 19 | #-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.fa 20 | #-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.fa.fai 21 | # 22 | #Your sam_fa_indices.loc file should include an entry per line for 23 | #each index set you have stored. The file in the path does actually 24 | #exist, but it should never be directly used. Instead, the name serves 25 | #as a prefix for the index file. For example: 26 | # 27 | #index hg18 /depot/data2/galaxy/sam/hg18.fa 28 | #index hg19 /depot/data2/galaxy/sam/hg19.fa 29 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/AssemblyConverter.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Files; 5 | import java.nio.file.Path; 6 | import java.nio.file.Paths; 7 | import java.util.zip.DataFormatException; 8 | 9 | import org.apache.log4j.Logger; 10 | 11 | import com.beust.jcommander.IStringConverter; 12 | import com.beust.jcommander.ParameterException; 13 | 14 | /** 15 | * @author timpalpant 16 | * 17 | */ 18 | public class AssemblyConverter implements IStringConverter { 19 | 20 | private static final Logger log = Logger.getLogger(AssemblyConverter.class); 21 | 22 | public static final Path ASSEMBLIES_DIR = Paths.get("resources", "assemblies"); 23 | 24 | @Override 25 | public Assembly convert(String value) throws ParameterException { 26 | // Look for the assembly in the resources/assemblies directory 27 | Path p = ASSEMBLIES_DIR.resolve(value + ".len"); 28 | 29 | // If it does not exist in the assemblies directory, check if it is a path 30 | // to a file 31 | if (!Files.isReadable(p)) { 32 | PathConverter converter = new PathConverter(); 33 | p = converter.convert(value); 34 | // If it does not exist, then throw an exception that the assembly cannot 35 | // be found 36 | if (!Files.isReadable(p)) { 37 | throw new ParameterException("Cannot find Assembly file: " + value); 38 | } 39 | } 40 | 41 | // Attempt to load the assembly from file 42 | try { 43 | return new Assembly(p); 44 | } catch (IOException | DataFormatException e) { 45 | log.error("Error loading Assembly from file: " + p); 46 | throw new ParameterException(e); 47 | } 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/AssemblyFactory.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics; 2 | 3 | import com.beust.jcommander.IStringConverterFactory; 4 | 5 | /** 6 | * @author timpalpant 7 | * 8 | */ 9 | public class AssemblyFactory implements IStringConverterFactory { 10 | 11 | /* 12 | * (non-Javadoc) 13 | * 14 | * @see 15 | * com.beust.jcommander.IStringConverterFactory#getConverter(java.lang.Class) 16 | */ 17 | @Override 18 | public Class getConverter(Class forType) { 19 | if (forType.equals(Assembly.class)) { 20 | return AssemblyConverter.class; 21 | } else { 22 | return null; 23 | } 24 | } 25 | 26 | } 27 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/CommandLineTool.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics; 2 | 3 | import java.io.IOException; 4 | import java.util.Arrays; 5 | 6 | import org.apache.commons.lang3.StringUtils; 7 | 8 | import com.beust.jcommander.JCommander; 9 | import com.beust.jcommander.ParameterException; 10 | 11 | import net.sf.samtools.SAMFileReader; 12 | import net.sf.samtools.SAMFileReader.ValidationStringency; 13 | 14 | /** 15 | * A command-line script 16 | * 17 | * @author timpalpant 18 | * 19 | */ 20 | public abstract class CommandLineTool { 21 | 22 | /** 23 | * The default bite-size to use for applications that process files in chunks 24 | * TODO Read from a configuration file 25 | */ 26 | public static final int DEFAULT_CHUNK_SIZE = 10_000_000; 27 | 28 | /** 29 | * Do the main computation of this tool 30 | * 31 | * @throws IOException 32 | */ 33 | public abstract void run() throws IOException; 34 | 35 | /** 36 | * Parse command-line arguments and run the tool Exit on parameter exceptions 37 | * 38 | * @param args 39 | */ 40 | public void instanceMain(String[] args) throws CommandLineToolException { 41 | // Initialize the command-line options parser 42 | JCommander jc = new JCommander(this); 43 | 44 | // Add factories for parsing Paths, Assemblies, IntervalFiles, and WigFiles 45 | jc.addConverterFactory(new PathFactory()); 46 | jc.addConverterFactory(new AssemblyFactory()); 47 | 48 | // Set the program name to be the class name 49 | String[] nameParts = getClass().getName().split("\\."); 50 | String shortName = StringUtils.join(Arrays.copyOfRange(nameParts, nameParts.length - 2, nameParts.length), '.'); 51 | jc.setProgramName(shortName); 52 | 53 | try { 54 | jc.parse(args); 55 | } catch (ParameterException e) { 56 | System.err.println(e.getMessage()); 57 | jc.usage(); 58 | System.exit(-1); 59 | } 60 | 61 | ValidationStringency stringency = SAMFileReader.getDefaultValidationStringency(); 62 | try { 63 | SAMFileReader.setDefaultValidationStringency(SAMFileReader.ValidationStringency.LENIENT); 64 | run(); 65 | } catch (IOException e) { 66 | throw new CommandLineToolException("IO error while running", e); 67 | } finally { 68 | SAMFileReader.setDefaultValidationStringency(stringency); 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/CommandLineToolException.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics; 2 | 3 | /** 4 | * @author timpalpant 5 | * 6 | */ 7 | public class CommandLineToolException extends RuntimeException { 8 | 9 | /** 10 | * 11 | */ 12 | private static final long serialVersionUID = 4740440799806133636L; 13 | 14 | /** 15 | * 16 | */ 17 | public CommandLineToolException() { 18 | // TODO Auto-generated constructor stub 19 | } 20 | 21 | /** 22 | * @param message 23 | */ 24 | public CommandLineToolException(String message) { 25 | super(message); 26 | // TODO Auto-generated constructor stub 27 | } 28 | 29 | /** 30 | * @param cause 31 | */ 32 | public CommandLineToolException(Throwable cause) { 33 | super(cause); 34 | // TODO Auto-generated constructor stub 35 | } 36 | 37 | /** 38 | * @param message 39 | * @param cause 40 | */ 41 | public CommandLineToolException(String message, Throwable cause) { 42 | super(message, cause); 43 | // TODO Auto-generated constructor stub 44 | } 45 | 46 | /** 47 | * @param message 48 | * @param cause 49 | * @param enableSuppression 50 | * @param writableStackTrace 51 | */ 52 | public CommandLineToolException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { 53 | super(message, cause, enableSuppression, writableStackTrace); 54 | // TODO Auto-generated constructor stub 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/NucleosomeCallsFileReader.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Path; 5 | 6 | import net.sf.samtools.TabixWriter; 7 | import net.sf.samtools.TabixWriter.Conf; 8 | 9 | import edu.unc.genomics.IntervalFactory; 10 | import edu.unc.genomics.io.TextIntervalFileReader; 11 | 12 | /** 13 | * Read nucleosome calls files 14 | * 15 | * @author timpalpant 16 | * 17 | */ 18 | public class NucleosomeCallsFileReader extends TextIntervalFileReader { 19 | 20 | public NucleosomeCallsFileReader(Path p) throws IOException { 21 | super(p, new NucleosomeCallFactory()); 22 | } 23 | 24 | public static class NucleosomeCallFactory implements IntervalFactory { 25 | 26 | public static final TabixWriter.Conf NUCLEOSOME_CALL_CONF = new TabixWriter.Conf(0, 1, 2, 3, '#', 0); 27 | 28 | @Override 29 | public NucleosomeCall parse(String line) { 30 | return NucleosomeCall.parse(line); 31 | } 32 | 33 | @Override 34 | public Conf tabixConf() { 35 | return NUCLEOSOME_CALL_CONF; 36 | } 37 | 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/PathConverter.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics; 2 | 3 | import java.nio.file.Path; 4 | import java.nio.file.Paths; 5 | 6 | import com.beust.jcommander.IStringConverter; 7 | 8 | /** 9 | * @author timpalpant 10 | * 11 | */ 12 | public class PathConverter implements IStringConverter { 13 | 14 | @Override 15 | public Path convert(String value) { 16 | return Paths.get(value); 17 | } 18 | 19 | } 20 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/PathFactory.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package edu.unc.genomics; 5 | 6 | import java.nio.file.Path; 7 | 8 | import com.beust.jcommander.IStringConverterFactory; 9 | 10 | /** 11 | * @author timpalpant 12 | * 13 | */ 14 | public class PathFactory implements IStringConverterFactory { 15 | /* 16 | * (non-Javadoc) 17 | * 18 | * @see 19 | * com.beust.jcommander.IStringConverterFactory#getConverter(java.lang.Class) 20 | */ 21 | @Override 22 | public Class getConverter(Class forType) { 23 | if (forType.equals(Path.class)) { 24 | return PathConverter.class; 25 | } else { 26 | return null; 27 | } 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/ReadablePathValidator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package edu.unc.genomics; 5 | 6 | import java.nio.file.Files; 7 | import java.nio.file.Path; 8 | 9 | import com.beust.jcommander.IParameterValidator; 10 | import com.beust.jcommander.ParameterException; 11 | 12 | /** 13 | * @author timpalpant 14 | * 15 | */ 16 | public class ReadablePathValidator implements IParameterValidator { 17 | 18 | /* 19 | * (non-Javadoc) 20 | * 21 | * @see com.beust.jcommander.IParameterValidator#validate(java.lang.String, 22 | * java.lang.String) 23 | */ 24 | @Override 25 | public void validate(String name, String value) throws ParameterException { 26 | PathConverter converter = new PathConverter(); 27 | Path p = converter.convert(value); 28 | if (!Files.isReadable(p)) { 29 | throw new ParameterException("Parameter " + name + " should be a readable file"); 30 | } 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/converters/FastqIlluminaToSanger.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.converters; 2 | 3 | import java.io.IOException; 4 | import java.io.PrintWriter; 5 | import java.nio.charset.Charset; 6 | import java.nio.file.Files; 7 | import java.nio.file.Path; 8 | 9 | import net.sf.picard.fastq.FastqReader; 10 | import net.sf.picard.fastq.FastqRecord; 11 | 12 | import org.apache.log4j.Logger; 13 | 14 | import com.beust.jcommander.Parameter; 15 | 16 | import edu.unc.genomics.CommandLineTool; 17 | import edu.unc.genomics.ReadablePathValidator; 18 | 19 | /** 20 | * Converts a FASTQ file with Illumina quality scores (Phred+64) to Sanger 21 | * quality scores (Phred+33) 22 | * 23 | * @author timpalpant 24 | * 25 | */ 26 | public class FastqIlluminaToSanger extends CommandLineTool { 27 | 28 | private static final Logger log = Logger.getLogger(FastqIlluminaToSanger.class); 29 | 30 | @Parameter(names = { "-i", "--input" }, description = "Input file (FASTQ, Illumina)", required = true, validateWith = ReadablePathValidator.class) 31 | public Path inputFile; 32 | @Parameter(names = { "-o", "--output" }, description = "Output file (FASTQ, Sanger)", required = true) 33 | public Path outputFile; 34 | 35 | @Override 36 | public void run() throws IOException { 37 | int count = 0; 38 | try (FastqReader reader = new FastqReader(inputFile.toFile()); 39 | PrintWriter writer = new PrintWriter(Files.newBufferedWriter(outputFile, Charset.defaultCharset()))) { 40 | for (FastqRecord r : reader) { 41 | writer.print("@"); 42 | writer.println(r.getReadHeader()); 43 | writer.println(r.getReadString()); 44 | writer.print("+"); 45 | writer.println(r.getBaseQualityHeader()); 46 | 47 | // Convert the quality score to Sanger format 48 | char[] qual = r.getBaseQualityString().toCharArray(); 49 | for (int i = 0; i < qual.length; i++) { 50 | qual[i] -= 31; 51 | } 52 | writer.println(qual); 53 | 54 | count++; 55 | } 56 | } 57 | 58 | log.info("Processed " + count + " reads"); 59 | } 60 | 61 | public static void main(String[] args) { 62 | new FastqIlluminaToSanger().instanceMain(args); 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/converters/GeneTrackToBedGraph.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.converters; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Path; 5 | 6 | import org.apache.log4j.Logger; 7 | 8 | import com.beust.jcommander.Parameter; 9 | 10 | import edu.unc.genomics.GeneTrackEntry; 11 | import edu.unc.genomics.CommandLineTool; 12 | import edu.unc.genomics.io.BedGraphFileWriter; 13 | import edu.unc.genomics.io.GeneTrackFileReader; 14 | 15 | /** 16 | * Convert a GeneTrack format file to BedGraph, adding the +/- strand values 17 | * 18 | * @author timpalpant 19 | * 20 | */ 21 | public class GeneTrackToBedGraph extends CommandLineTool { 22 | 23 | private static final Logger log = Logger.getLogger(GeneTrackToBedGraph.class); 24 | 25 | @Parameter(names = { "-i", "--input" }, description = "Input file (GeneTrack format)", required = true) 26 | public Path gtFile; 27 | @Parameter(names = { "-o", "--output" }, description = "Output file (BedGraph)", required = true) 28 | public Path outputFile; 29 | 30 | @Override 31 | public void run() throws IOException { 32 | log.debug("Initializing input/output files"); 33 | try (GeneTrackFileReader gt = new GeneTrackFileReader(gtFile); 34 | BedGraphFileWriter writer = new BedGraphFileWriter<>(outputFile)) { 35 | for (GeneTrackEntry entry : gt) { 36 | writer.write(entry); 37 | } 38 | } 39 | } 40 | 41 | public static void main(String[] args) { 42 | new GeneTrackToBedGraph().instanceMain(args); 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/converters/IntervalToBed.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.converters; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Path; 5 | 6 | import org.apache.log4j.Logger; 7 | 8 | import com.beust.jcommander.Parameter; 9 | 10 | import edu.unc.genomics.CommandLineTool; 11 | import edu.unc.genomics.Interval; 12 | import edu.unc.genomics.io.BedFileWriter; 13 | import edu.unc.genomics.io.IntervalFileReader; 14 | 15 | /** 16 | * Convert any known interval format to Bed-6 format 17 | * 18 | * @author timpalpant 19 | * 20 | */ 21 | public class IntervalToBed extends CommandLineTool { 22 | 23 | private static final Logger log = Logger.getLogger(IntervalToBed.class); 24 | 25 | @Parameter(names = { "-i", "--input" }, description = "Input file (Bedgraph/BigBed/GFF/SAM/BAM format)", required = true) 26 | public Path inputFile; 27 | @Parameter(names = { "-o", "--output" }, description = "Output file (Bed format)", required = true) 28 | public Path outputFile; 29 | 30 | @Override 31 | public void run() throws IOException { 32 | log.debug("Initializing input/output files"); 33 | try (IntervalFileReader reader = IntervalFileReader.autodetect(inputFile); 34 | BedFileWriter writer = new BedFileWriter<>(outputFile)) { 35 | for (Interval entry : reader) { 36 | writer.write(entry); 37 | } 38 | } 39 | } 40 | 41 | public static void main(String[] args) { 42 | new IntervalToBed().instanceMain(args); 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/converters/IntervalToWig.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.converters; 2 | 3 | import java.io.IOException; 4 | import java.util.Iterator; 5 | 6 | import org.apache.log4j.Logger; 7 | 8 | import com.beust.jcommander.Parameter; 9 | 10 | import edu.unc.genomics.CommandLineToolException; 11 | import edu.unc.genomics.Interval; 12 | import edu.unc.genomics.ReadMapperTool; 13 | import edu.unc.genomics.ValuedInterval; 14 | import edu.unc.genomics.io.IntervalFileReader; 15 | 16 | /** 17 | * Convert interval-based data such as microarray data in Bed, BedGraph, or GFF 18 | * format to Wig format. Overlapping probes in the original interval dataset are 19 | * averaged. 20 | * 21 | * @author timpalpant 22 | * 23 | */ 24 | public class IntervalToWig extends ReadMapperTool { 25 | 26 | private static final Logger log = Logger.getLogger(IntervalToWig.class); 27 | 28 | @Parameter(names = { "-z", "--zero" }, description = "Assume zero where there is no data (default = NaN)") 29 | public boolean defaultZero = false; 30 | 31 | @Override 32 | public float[] compute(IntervalFileReader reader, Interval chunk) throws IOException { 33 | float[] sum = new float[chunk.length()]; 34 | int[] count = new int[chunk.length()]; 35 | 36 | Iterator it = reader.query(chunk); 37 | while (it.hasNext()) { 38 | ValuedInterval entry; 39 | try { 40 | entry = (ValuedInterval) it.next(); 41 | } catch (ClassCastException e) { 42 | log.error("Input file does not appear to be a valued interval format (Bed/BedGraph/GFF/GeneTrack)!"); 43 | throw new CommandLineToolException( 44 | "Input file does not appear to be a valued interval format (Bed/BedGraph/GFF/GeneTrack)!"); 45 | } 46 | 47 | if (entry.getValue() != null) { 48 | int entryStart = Math.max(chunk.getStart(), entry.low()); 49 | int entryStop = Math.min(chunk.getStop(), entry.high()); 50 | for (int i = entryStart; i <= entryStop; i++) { 51 | sum[i - chunk.getStart()] += entry.getValue().floatValue(); 52 | count[i - chunk.getStart()]++; 53 | } 54 | } 55 | } 56 | 57 | // Calculate the average at each base pair in the chunk 58 | for (int i = 0; i < sum.length; i++) { 59 | if (count[i] != 0 || !defaultZero) { 60 | sum[i] /= count[i]; 61 | } 62 | } 63 | 64 | return sum; 65 | } 66 | 67 | public static void main(String[] args) { 68 | new IntervalToWig().instanceMain(args); 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/converters/RomanNumeralize.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.converters; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.BufferedWriter; 5 | import java.io.IOException; 6 | import java.nio.charset.Charset; 7 | import java.nio.file.Files; 8 | import java.nio.file.Path; 9 | import java.util.regex.Matcher; 10 | import java.util.regex.Pattern; 11 | 12 | import org.apache.log4j.Logger; 13 | 14 | import com.beust.jcommander.Parameter; 15 | 16 | import edu.unc.genomics.CommandLineTool; 17 | import edu.unc.genomics.ReadablePathValidator; 18 | import edu.unc.utils.RomanNumeral; 19 | 20 | /** 21 | * Convert instances of "chr12" to "chrXII" in a text file, etc. 22 | * 23 | * @author timpalpant 24 | * 25 | */ 26 | public class RomanNumeralize extends CommandLineTool { 27 | 28 | private static final Logger log = Logger.getLogger(RomanNumeralize.class); 29 | 30 | @Parameter(names = { "-i", "--input" }, description = "Input file", required = true, validateWith = ReadablePathValidator.class) 31 | public Path inputFile; 32 | @Parameter(names = { "-o", "--output" }, description = "Output file", required = true) 33 | public Path outputFile; 34 | 35 | /** 36 | * Pattern for finding "chr12" tokens (will find "chr1" through "chr99") 37 | */ 38 | Pattern p = Pattern.compile("chr[\\d]{1,2}"); 39 | 40 | @Override 41 | public void run() throws IOException { 42 | try (BufferedReader reader = Files.newBufferedReader(inputFile, Charset.defaultCharset()); 43 | BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) { 44 | log.debug("Copying input to output and replacing with Roman Numerals"); 45 | String line; 46 | while ((line = reader.readLine()) != null) { 47 | Matcher m = p.matcher(line); 48 | StringBuffer converted = new StringBuffer(line.length()); 49 | while (m.find()) { 50 | String chrNum = line.substring(m.start() + 3, m.end()); 51 | int arabic = Integer.parseInt(chrNum); 52 | String roman = RomanNumeral.int2roman(arabic); 53 | m.appendReplacement(converted, "chr" + roman); 54 | } 55 | m.appendTail(converted); 56 | 57 | writer.write(converted.toString()); 58 | writer.newLine(); 59 | } 60 | } 61 | } 62 | 63 | public static void main(String[] args) { 64 | new RomanNumeralize().instanceMain(args); 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/ngs/BaseAlignCounts.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.ngs; 2 | 3 | import java.io.IOException; 4 | import java.util.Iterator; 5 | 6 | import com.beust.jcommander.Parameter; 7 | 8 | import edu.unc.genomics.Interval; 9 | import edu.unc.genomics.ReadMapperTool; 10 | import edu.unc.genomics.io.IntervalFileReader; 11 | 12 | /** 13 | * This tool calculates the coverage of sequencing reads (or any interval data) 14 | * and creates a new Wig file with the number of reads overlapping each base 15 | * pair. 16 | * 17 | * @author timpalpant 18 | * 19 | */ 20 | public class BaseAlignCounts extends ReadMapperTool { 21 | 22 | @Parameter(names = { "-x", "--extend" }, description = "Extend reads from 5' end (default = fragment length)") 23 | public Integer extend = -1; 24 | 25 | @Override 26 | public float[] compute(IntervalFileReader reader, Interval chunk) throws IOException { 27 | float[] count = new float[chunk.length()]; 28 | 29 | // Need to pad the query if extending reads 30 | int paddedStart = chunk.getStart(); 31 | int paddedStop = chunk.getStop(); 32 | if (extend != null && extend != -1) { 33 | paddedStart = Math.max(chunk.getStart() - extend - 1, 1); 34 | paddedStop = Math.min(chunk.getStop() + extend + 1, assembly.getChrLength(chunk.getChr())); 35 | } 36 | 37 | Iterator it = reader.query(chunk.getChr(), paddedStart, paddedStop); 38 | while (it.hasNext()) { 39 | Interval entry = it.next(); 40 | int entryStop = entry.getStop(); 41 | if (extend != null && extend != -1) { 42 | if (entry.isWatson()) { 43 | entryStop = entry.getStart() + extend - 1; 44 | } else { 45 | entryStop = entry.getStart() - extend + 1; 46 | } 47 | } 48 | 49 | // Clamp to the current chunk 50 | int low = Math.max(Math.min(entry.getStart(), entryStop), chunk.getStart()); 51 | int high = Math.min(Math.max(entry.getStart(), entryStop), chunk.getStop()); 52 | for (int i = low; i <= high; i++) { 53 | count[i - chunk.getStart()]++; 54 | } 55 | } 56 | 57 | return count; 58 | } 59 | 60 | public static void main(String[] args) { 61 | new BaseAlignCounts().instanceMain(args); 62 | } 63 | } -------------------------------------------------------------------------------- /src/edu/unc/genomics/ngs/IntervalLengthDistribution.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.ngs; 2 | 3 | import java.io.IOException; 4 | import java.io.PrintWriter; 5 | import java.nio.charset.Charset; 6 | import java.nio.file.Files; 7 | import java.nio.file.Path; 8 | 9 | import org.apache.commons.math3.stat.Frequency; 10 | import org.apache.log4j.Logger; 11 | 12 | import com.beust.jcommander.Parameter; 13 | 14 | import edu.unc.genomics.CommandLineTool; 15 | import edu.unc.genomics.Interval; 16 | import edu.unc.genomics.ReadablePathValidator; 17 | import edu.unc.genomics.io.IntervalFileReader; 18 | 19 | /** 20 | * Generate a histogram of interval lengths, such as read lengths or gene 21 | * lengths 22 | * 23 | * @author timpalpant 24 | * 25 | */ 26 | public class IntervalLengthDistribution extends CommandLineTool { 27 | 28 | private static final Logger log = Logger.getLogger(IntervalLengthDistribution.class); 29 | 30 | @Parameter(names = { "-i", "--input" }, description = "Interval file", required = true, validateWith = ReadablePathValidator.class) 31 | public Path inputFile; 32 | @Parameter(names = { "-f", "--freq" }, description = "Output frequencies rather than counts") 33 | public boolean outputFreq = false; 34 | @Parameter(names = { "-o", "--output" }, description = "Output file", required = true) 35 | public Path outputFile; 36 | 37 | @Override 38 | public void run() throws IOException { 39 | log.debug("Generating histogram of interval lengths"); 40 | Frequency freq = new Frequency(); 41 | int min = Integer.MAX_VALUE; 42 | int max = -1; 43 | try (IntervalFileReader reader = IntervalFileReader.autodetect(inputFile)) { 44 | for (Interval i : reader) { 45 | int L = i.length(); 46 | freq.addValue(L); 47 | 48 | if (L < min) { 49 | min = L; 50 | } 51 | if (L > max) { 52 | max = L; 53 | } 54 | } 55 | } 56 | 57 | log.debug("Writing histogram output"); 58 | try (PrintWriter writer = new PrintWriter(Files.newBufferedWriter(outputFile, Charset.defaultCharset()))) { 59 | for (int i = min; i <= max; i++) { 60 | if (outputFreq) { 61 | writer.println(i + "\t" + freq.getPct(i)); 62 | } else { 63 | writer.println(i + "\t" + freq.getCount(i)); 64 | } 65 | } 66 | } 67 | } 68 | 69 | public static void main(String[] args) { 70 | new IntervalLengthDistribution().instanceMain(args); 71 | } 72 | 73 | } 74 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/ngs/RollingReadLength.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.ngs; 2 | 3 | import java.io.IOException; 4 | import java.util.Iterator; 5 | 6 | import edu.unc.genomics.Interval; 7 | import edu.unc.genomics.ReadMapperTool; 8 | import edu.unc.genomics.io.IntervalFileReader; 9 | 10 | /** 11 | * Creates a new Wig file with the mean read length of reads covering each base 12 | * pair. 13 | * 14 | * @author timpalpant 15 | * 16 | */ 17 | public class RollingReadLength extends ReadMapperTool { 18 | 19 | @Override 20 | public float[] compute(IntervalFileReader reader, Interval chunk) throws IOException { 21 | int[] sum = new int[chunk.length()]; 22 | int[] count = new int[chunk.length()]; 23 | 24 | Iterator it = reader.query(chunk); 25 | while (it.hasNext()) { 26 | Interval entry = it.next(); 27 | int entryStart = Math.max(entry.low(), chunk.getStart()); 28 | int entryStop = Math.min(entry.high(), chunk.getStop()); 29 | for (int i = entryStart; i <= entryStop; i++) { 30 | sum[i - chunk.getStart()] += entry.length(); 31 | count[i - chunk.getStart()]++; 32 | } 33 | } 34 | 35 | // Calculate the average at each base pair 36 | float[] avg = new float[chunk.length()]; 37 | for (int i = 0; i < avg.length; i++) { 38 | if (count[i] == 0) { 39 | avg[i] = Float.NaN; 40 | } else { 41 | avg[i] = ((float) sum[i]) / count[i]; 42 | } 43 | } 44 | 45 | return avg; 46 | } 47 | 48 | public static void main(String[] args) { 49 | new RollingReadLength().instanceMain(args); 50 | } 51 | } -------------------------------------------------------------------------------- /src/edu/unc/genomics/ngs/SplitReads.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.ngs; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Path; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | import org.apache.log4j.Logger; 9 | 10 | import com.beust.jcommander.Parameter; 11 | 12 | import edu.unc.genomics.CommandLineTool; 13 | import edu.unc.genomics.Interval; 14 | import edu.unc.genomics.ReadablePathValidator; 15 | import edu.unc.genomics.io.IntervalFileReader; 16 | import edu.unc.genomics.io.IntervalFileWriter; 17 | 18 | /** 19 | * This tool splits sequencing reads into bins 20 | * 21 | * @author timpalpant 22 | * 23 | */ 24 | public class SplitReads extends CommandLineTool { 25 | 26 | private static final Logger log = Logger.getLogger(SplitReads.class); 27 | 28 | @Parameter(names = { "-i", "--input" }, required = true, description = "Input file", validateWith = ReadablePathValidator.class) 29 | public Path input; 30 | @Parameter(names = { "-b", "--bins" }, description = "Number of bins to split reads into") 31 | public int bins = 5; 32 | @Parameter(names = { "-o", "--output" }, description = "Output file") 33 | public Path output; 34 | 35 | @Override 36 | public void run() throws IOException { 37 | // Prepare the outputs 38 | String[] splitName = output.toString().split("\\.(?=[^\\.]+$)"); 39 | String basename = splitName[0]; 40 | String ext = splitName[1]; 41 | List> writers = new ArrayList<>(); 42 | try { 43 | for (int i = 0; i < bins; i++) { 44 | Path outFile = output.resolve(basename + '.' + i + '.' + ext); 45 | IntervalFileWriter writer = new IntervalFileWriter<>(outFile); 46 | writers.add(writer); 47 | } 48 | 49 | try (IntervalFileReader reader = IntervalFileReader.autodetect(input)) { 50 | int current = 0; 51 | int count = 0; 52 | for (Interval interval : reader) { 53 | writers.get(current).write(interval); 54 | current++; 55 | current %= bins; 56 | if (++count % 1_000_000 == 0) { 57 | log.debug("Processed " + count + " reads."); 58 | } 59 | } 60 | } 61 | } finally { 62 | for (IntervalFileWriter writer : writers) { 63 | writer.close(); 64 | } 65 | } 66 | } 67 | 68 | public static void main(String[] args) { 69 | new SplitReads().instanceMain(args); 70 | } 71 | } -------------------------------------------------------------------------------- /src/edu/unc/genomics/ngs/Subsample.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.ngs; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Path; 5 | import java.util.Random; 6 | 7 | import org.apache.log4j.Logger; 8 | 9 | import com.beust.jcommander.Parameter; 10 | 11 | import edu.unc.genomics.CommandLineTool; 12 | import edu.unc.genomics.CommandLineToolException; 13 | import edu.unc.genomics.Interval; 14 | import edu.unc.genomics.ReadablePathValidator; 15 | import edu.unc.genomics.io.IntervalFileReader; 16 | import edu.unc.genomics.io.IntervalFileWriter; 17 | 18 | /** 19 | * Randomly select N reads out of a total of M 20 | * 21 | * @author timpalpant 22 | * 23 | */ 24 | public class Subsample extends CommandLineTool { 25 | 26 | private static final Logger log = Logger.getLogger(Subsample.class); 27 | 28 | @Parameter(names = { "-i", "--input" }, required = true, description = "Input file", validateWith = ReadablePathValidator.class) 29 | public Path input; 30 | @Parameter(names = { "-n", "--select" }, required = true, description = "Number of entries to select") 31 | public int n; 32 | @Parameter(names = { "-o", "--output" }, required = true, description = "Output file") 33 | public Path output; 34 | 35 | @Override 36 | public void run() throws IOException { 37 | try (IntervalFileReader reader = IntervalFileReader.autodetect(input); 38 | IntervalFileWriter writer = new IntervalFileWriter<>(output)) { 39 | int nRemaining = reader.count(); 40 | log.info("Input file has " + nRemaining + " entries"); 41 | if (n >= reader.count()) { 42 | throw new CommandLineToolException("Cannot select " + n + " entries from a file with " + nRemaining); 43 | } 44 | 45 | // See http://eyalsch.wordpress.com/2010/04/01/random-sample/ 46 | // for a nice summary of different algorithms to randomly pick n entries 47 | log.info("Randomly selecting " + n + " entries"); 48 | Random rng = new Random(); 49 | for (Interval entry : reader) { 50 | if (n == 0) { 51 | break; 52 | } else if (rng.nextDouble() < ((double) n) / nRemaining) { 53 | writer.write(entry); 54 | n--; 55 | } 56 | 57 | nRemaining--; 58 | } 59 | } 60 | } 61 | 62 | public static void main(String[] args) { 63 | new Subsample().instanceMain(args); 64 | } 65 | } -------------------------------------------------------------------------------- /src/edu/unc/genomics/nucleosomes/MapDyads.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.nucleosomes; 2 | 3 | import java.io.IOException; 4 | import java.util.Iterator; 5 | 6 | import com.beust.jcommander.Parameter; 7 | 8 | import edu.unc.genomics.Interval; 9 | import edu.unc.genomics.ReadMapperTool; 10 | import edu.unc.genomics.io.IntervalFileReader; 11 | 12 | /** 13 | * Count the number of read centers overlapping each base pair in the genome 14 | * 15 | * @author timpalpant 16 | * 17 | */ 18 | public class MapDyads extends ReadMapperTool { 19 | 20 | @Parameter(names = { "-s", "--size" }, description = "Mononucleosome length (default: read length)") 21 | public Integer nucleosomeSize; 22 | 23 | @Override 24 | public float[] compute(IntervalFileReader reader, Interval chunk) throws IOException { 25 | float[] count = new float[chunk.length()]; 26 | 27 | // Need to pad the query if artificially shifting read centers 28 | int paddedStart = chunk.getStart(); 29 | int paddedStop = chunk.getStop(); 30 | if (nucleosomeSize != null && nucleosomeSize > 0) { 31 | paddedStart = Math.max(chunk.getStart() - nucleosomeSize - 1, 1); 32 | paddedStop = Math.min(chunk.getStop() + nucleosomeSize + 1, assembly.getChrLength(chunk.getChr())); 33 | } 34 | 35 | Iterator it = reader.query(chunk.getChr(), paddedStart, paddedStop); 36 | while (it.hasNext()) { 37 | Interval entry = it.next(); 38 | int center; 39 | if (nucleosomeSize == null || nucleosomeSize <= 0) { 40 | center = entry.center(); 41 | } else { 42 | if (entry.isWatson()) { 43 | center = entry.getStart() + nucleosomeSize / 2; 44 | } else { 45 | center = entry.getStart() - nucleosomeSize / 2; 46 | } 47 | } 48 | 49 | // Only map if it is in the current chunk 50 | if (chunk.getStart() <= center && center <= chunk.getStop()) { 51 | count[center - chunk.getStart()]++; 52 | } 53 | } 54 | 55 | return count; 56 | } 57 | 58 | public static void main(String[] args) { 59 | new MapDyads().instanceMain(args); 60 | } 61 | 62 | } -------------------------------------------------------------------------------- /src/edu/unc/genomics/visualization/StripMatrix.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.visualization; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.BufferedWriter; 5 | import java.io.IOException; 6 | import java.nio.charset.Charset; 7 | import java.nio.file.Files; 8 | import java.nio.file.Path; 9 | 10 | import com.beust.jcommander.Parameter; 11 | 12 | import edu.unc.genomics.CommandLineTool; 13 | import edu.unc.genomics.ReadablePathValidator; 14 | 15 | /** 16 | * Remove the first column and row (headers) from a matrix in matrix2png format 17 | * so that the output is purely numerical for easy import into Matlab 18 | * 19 | * @author timpalpant 20 | * 21 | */ 22 | public class StripMatrix extends CommandLineTool { 23 | 24 | @Parameter(names = { "-i", "--input" }, description = "Input file (matrix2png format)", required = true, validateWith = ReadablePathValidator.class) 25 | public Path inputFile; 26 | @Parameter(names = { "-o", "--output" }, description = "Output file (tabular)", required = true) 27 | public Path outputFile; 28 | 29 | public void run() throws IOException { 30 | try (BufferedReader reader = Files.newBufferedReader(inputFile, Charset.defaultCharset()); 31 | BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) { 32 | // Skip the first (header) line 33 | String line = reader.readLine(); 34 | while ((line = reader.readLine()) != null) { 35 | String[] row = line.split("\t"); 36 | for (int i = 1; i < row.length; i++) { 37 | String cell = row[i]; 38 | if (cell.equalsIgnoreCase("-")) { 39 | writer.write("NaN"); 40 | } else { 41 | writer.write(cell); 42 | } 43 | 44 | if (i < row.length - 1) { 45 | writer.write("\t"); 46 | } 47 | } 48 | writer.newLine(); 49 | } 50 | } 51 | } 52 | 53 | public static void main(String[] args) { 54 | new StripMatrix().instanceMain(args); 55 | } 56 | } -------------------------------------------------------------------------------- /src/edu/unc/genomics/wigmath/Add.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.wigmath; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Paths; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | import org.apache.log4j.Logger; 9 | 10 | import com.beust.jcommander.Parameter; 11 | 12 | import edu.unc.genomics.CommandLineToolException; 13 | import edu.unc.genomics.Interval; 14 | import edu.unc.genomics.WigMathTool; 15 | import edu.unc.genomics.io.WigFileReader; 16 | import edu.unc.genomics.io.WigFileException; 17 | 18 | /** 19 | * This tool will add all values in the specified Wig files base pair by base 20 | * pair. 21 | * 22 | * @author timpalpant 23 | * 24 | */ 25 | public class Add extends WigMathTool { 26 | 27 | private static final Logger log = Logger.getLogger(Add.class); 28 | 29 | @Parameter(description = "Input files", required = true) 30 | public List inputFiles = new ArrayList(); 31 | 32 | @Override 33 | public void setup() { 34 | if (inputFiles.size() < 2) { 35 | throw new CommandLineToolException("No reason to add < 2 files."); 36 | } 37 | 38 | log.debug("Initializing input files"); 39 | for (String inputFile : inputFiles) { 40 | try { 41 | addInputFile(WigFileReader.autodetect(Paths.get(inputFile))); 42 | } catch (IOException e) { 43 | throw new CommandLineToolException(e); 44 | } 45 | } 46 | log.debug("Initialized " + inputs.size() + " input files"); 47 | } 48 | 49 | @Override 50 | public float[] compute(Interval chunk) throws IOException, WigFileException { 51 | float[] sum = new float[chunk.length()]; 52 | 53 | for (WigFileReader wig : inputs) { 54 | float[] data = wig.query(chunk).getValues(); 55 | for (int i = 0; i < data.length; i++) { 56 | if (!Float.isNaN(data[i])) { 57 | sum[i] += data[i]; 58 | } 59 | } 60 | } 61 | 62 | return sum; 63 | } 64 | 65 | /** 66 | * @param args 67 | * @throws WigFileException 68 | * @throws IOException 69 | */ 70 | public static void main(String[] args) throws IOException, WigFileException { 71 | new Add().instanceMain(args); 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/wigmath/Average.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.wigmath; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Paths; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | import org.apache.log4j.Logger; 9 | 10 | import com.beust.jcommander.Parameter; 11 | 12 | import edu.unc.genomics.CommandLineToolException; 13 | import edu.unc.genomics.Interval; 14 | import edu.unc.genomics.WigMathTool; 15 | import edu.unc.genomics.io.WigFileReader; 16 | import edu.unc.genomics.io.WigFileException; 17 | 18 | /** 19 | * Average multiple Wig files base pair by base pair 20 | * 21 | * @author timpalpant 22 | * 23 | */ 24 | public class Average extends WigMathTool { 25 | 26 | private static final Logger log = Logger.getLogger(Average.class); 27 | 28 | @Parameter(description = "Input files", required = true) 29 | public List inputFiles = new ArrayList(); 30 | 31 | @Override 32 | public void setup() { 33 | if (inputFiles.size() < 2) { 34 | throw new CommandLineToolException("No reason to average < 2 files."); 35 | } 36 | 37 | log.debug("Initializing input files"); 38 | for (String inputFile : inputFiles) { 39 | try { 40 | addInputFile(WigFileReader.autodetect(Paths.get(inputFile))); 41 | } catch (IOException e) { 42 | log.error("IOError initializing input Wig file: " + inputFile); 43 | e.printStackTrace(); 44 | throw new CommandLineToolException(e.getMessage()); 45 | } 46 | } 47 | log.debug("Initialized " + inputs.size() + " input files"); 48 | } 49 | 50 | @Override 51 | public float[] compute(Interval chunk) throws IOException, WigFileException { 52 | float[] avg = new float[chunk.length()]; 53 | int[] count = new int[chunk.length()]; 54 | 55 | for (WigFileReader wig : inputs) { 56 | float[] data = wig.query(chunk).getValues(); 57 | for (int i = 0; i < data.length; i++) { 58 | if (!Float.isNaN(data[i])) { 59 | avg[i] += data[i]; 60 | count[i]++; 61 | } 62 | } 63 | } 64 | 65 | for (int i = 0; i < avg.length; i++) { 66 | if (count[i] > 0) { 67 | avg[i] /= count[i]; 68 | } else { 69 | avg[i] = Float.NaN; 70 | } 71 | } 72 | 73 | return avg; 74 | } 75 | 76 | /** 77 | * @param args 78 | * @throws WigFileException 79 | * @throws IOException 80 | */ 81 | public static void main(String[] args) throws IOException, WigFileException { 82 | new Average().instanceMain(args); 83 | } 84 | 85 | } 86 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/wigmath/Divide.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.wigmath; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Path; 5 | 6 | import org.apache.log4j.Logger; 7 | 8 | import com.beust.jcommander.Parameter; 9 | 10 | import edu.unc.genomics.CommandLineToolException; 11 | import edu.unc.genomics.Interval; 12 | import edu.unc.genomics.ReadablePathValidator; 13 | import edu.unc.genomics.WigMathTool; 14 | import edu.unc.genomics.io.WigFileReader; 15 | import edu.unc.genomics.io.WigFileException; 16 | 17 | /** 18 | * Divide two (Big)Wig files base pair by base pair 19 | * 20 | * @author timpalpant 21 | * 22 | */ 23 | public class Divide extends WigMathTool { 24 | 25 | private static final Logger log = Logger.getLogger(Divide.class); 26 | 27 | @Parameter(names = { "-n", "--numerator" }, description = "Dividend / Numerator (file 1)", required = true, validateWith = ReadablePathValidator.class) 28 | public Path dividendFile; 29 | @Parameter(names = { "-d", "--denominator" }, description = "Divisor / Denominator (file 2)", required = true) 30 | public Path divisorFile; 31 | 32 | WigFileReader dividendReader, divisorReader; 33 | 34 | @Override 35 | public void setup() { 36 | try { 37 | dividendReader = WigFileReader.autodetect(dividendFile); 38 | divisorReader = WigFileReader.autodetect(divisorFile); 39 | } catch (IOException e) { 40 | throw new CommandLineToolException(e); 41 | } 42 | inputs.add(dividendReader); 43 | inputs.add(divisorReader); 44 | log.debug("Initialized " + inputs.size() + " input files"); 45 | } 46 | 47 | @Override 48 | public float[] compute(Interval chunk) throws IOException, WigFileException { 49 | float[] dividend = dividendReader.query(chunk).getValues(); 50 | float[] divisor = divisorReader.query(chunk).getValues(); 51 | for (int i = 0; i < dividend.length; i++) { 52 | if (divisor[i] == 0) { 53 | dividend[i] = Float.NaN; 54 | } else { 55 | dividend[i] /= divisor[i]; 56 | } 57 | } 58 | 59 | return dividend; 60 | } 61 | 62 | /** 63 | * @param args 64 | * @throws WigFileException 65 | * @throws IOException 66 | */ 67 | public static void main(String[] args) throws IOException, WigFileException { 68 | new Divide().instanceMain(args); 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/wigmath/LogTransform.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.wigmath; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Path; 5 | 6 | import com.beust.jcommander.Parameter; 7 | 8 | import edu.unc.genomics.CommandLineToolException; 9 | import edu.unc.genomics.Interval; 10 | import edu.unc.genomics.ReadablePathValidator; 11 | import edu.unc.genomics.WigMathTool; 12 | import edu.unc.genomics.io.WigFileReader; 13 | import edu.unc.genomics.io.WigFileException; 14 | 15 | /** 16 | * Log-transform a (Big)Wig file 17 | * 18 | * @author timpalpant 19 | * 20 | */ 21 | public class LogTransform extends WigMathTool { 22 | 23 | @Parameter(names = { "-i", "--input" }, description = "Input file", required = true, validateWith = ReadablePathValidator.class) 24 | public Path inputFile; 25 | @Parameter(names = { "-b", "--base" }, description = "Logarithm base (default = 2)") 26 | public double base = 2; 27 | 28 | WigFileReader reader; 29 | private double baseChange; 30 | 31 | @Override 32 | public void setup() { 33 | baseChange = Math.log(base); 34 | 35 | try { 36 | reader = WigFileReader.autodetect(inputFile); 37 | } catch (IOException e) { 38 | throw new CommandLineToolException(e); 39 | } 40 | inputs.add(reader); 41 | } 42 | 43 | @Override 44 | public float[] compute(Interval chunk) throws IOException, WigFileException { 45 | float[] result = reader.query(chunk).getValues(); 46 | for (int i = 0; i < result.length; i++) { 47 | result[i] = (float) (Math.log(result[i]) / baseChange); 48 | } 49 | 50 | return result; 51 | } 52 | 53 | /** 54 | * @param args 55 | * @throws WigFileException 56 | * @throws IOException 57 | */ 58 | public static void main(String[] args) throws IOException, WigFileException { 59 | new LogTransform().instanceMain(args); 60 | } 61 | 62 | } 63 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/wigmath/MovingAverageSmooth.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.wigmath; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Path; 5 | 6 | import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics; 7 | 8 | import com.beust.jcommander.Parameter; 9 | 10 | import edu.unc.genomics.Contig; 11 | import edu.unc.genomics.CommandLineToolException; 12 | import edu.unc.genomics.Interval; 13 | import edu.unc.genomics.ReadablePathValidator; 14 | import edu.unc.genomics.WigMathTool; 15 | import edu.unc.genomics.io.WigFileReader; 16 | import edu.unc.genomics.io.WigFileException; 17 | 18 | import edu.unc.utils.FFTUtils; 19 | 20 | /** 21 | * Smooth a (Big)Wig file with a moving average filter 22 | * 23 | * @author timpalpant 24 | * 25 | */ 26 | public class MovingAverageSmooth extends WigMathTool { 27 | 28 | @Parameter(names = { "-i", "--input" }, description = "Input file", required = true, validateWith = ReadablePathValidator.class) 29 | public Path inputFile; 30 | @Parameter(names = { "-w", "--width" }, description = "Width of kernel (bp)") 31 | public int width = 11; 32 | 33 | WigFileReader reader; 34 | 35 | @Override 36 | public void setup() { 37 | try { 38 | reader = WigFileReader.autodetect(inputFile); 39 | } catch (IOException e) { 40 | throw new CommandLineToolException(e); 41 | } 42 | inputs.add(reader); 43 | } 44 | 45 | @Override 46 | public float[] compute(Interval chunk) throws IOException, WigFileException { 47 | // Pad the query so that we can provide values for the ends 48 | int queryStart = Math.max(chunk.getStart() - width / 2, reader.getChrStart(chunk.getChr())); 49 | int queryStop = Math.min(chunk.getStop() + width / 2, reader.getChrStop(chunk.getChr())); 50 | Contig contig = reader.query(chunk.getChr(), queryStart, queryStop); 51 | int nValues = (int) Math.ceil(((float) chunk.length()) / step); 52 | float[] result = new float[nValues]; 53 | for (int i = 0; i < result.length; i++) { 54 | float x = 0; 55 | int start, stop, n; 56 | if (step < width) { 57 | n = width; 58 | start = contig.getStart() + i*step + step/2 - width/2; 59 | stop = contig.getStart() + i*step + n; 60 | } else { 61 | n = step; 62 | start = contig.getStart() + i*step; 63 | stop = contig.getStart() + n; 64 | } 65 | 66 | for (int bp = start; bp <= stop; bp++) { 67 | x += contig.get(bp); 68 | } 69 | result[i] = x / n; 70 | } 71 | return result; 72 | } 73 | 74 | public static void main(String[] args) throws IOException, WigFileException { 75 | new MovingAverageSmooth().instanceMain(args); 76 | } 77 | 78 | } 79 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/wigmath/Multiply.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.wigmath; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Paths; 5 | import java.util.ArrayList; 6 | import java.util.Arrays; 7 | import java.util.List; 8 | 9 | import org.apache.log4j.Logger; 10 | 11 | import com.beust.jcommander.Parameter; 12 | 13 | import edu.unc.genomics.CommandLineToolException; 14 | import edu.unc.genomics.Interval; 15 | import edu.unc.genomics.WigMathTool; 16 | import edu.unc.genomics.io.WigFileReader; 17 | import edu.unc.genomics.io.WigFileException; 18 | 19 | /** 20 | * Multiply (Big)Wig files base pair by base pair 21 | * 22 | * @author timpalpant 23 | * 24 | */ 25 | public class Multiply extends WigMathTool { 26 | 27 | private static final Logger log = Logger.getLogger(Multiply.class); 28 | 29 | @Parameter(description = "Input files", required = true) 30 | public List inputFiles = new ArrayList(); 31 | 32 | @Override 33 | public void setup() { 34 | log.debug("Initializing input files"); 35 | for (String inputFile : inputFiles) { 36 | try { 37 | addInputFile(WigFileReader.autodetect(Paths.get(inputFile))); 38 | } catch (IOException e) { 39 | throw new CommandLineToolException(e); 40 | } 41 | } 42 | log.debug("Initialized " + inputs.size() + " input files"); 43 | } 44 | 45 | @Override 46 | public float[] compute(Interval chunk) throws IOException, WigFileException { 47 | float[] product = new float[chunk.length()]; 48 | Arrays.fill(product, 1); 49 | 50 | for (WigFileReader wig : inputs) { 51 | float[] data = wig.query(chunk).getValues(); 52 | for (int i = 0; i < data.length; i++) { 53 | if (!Float.isNaN(data[i])) { 54 | product[i] *= data[i]; 55 | } 56 | } 57 | } 58 | 59 | return product; 60 | } 61 | 62 | /** 63 | * @param args 64 | * @throws WigFileException 65 | * @throws IOException 66 | */ 67 | public static void main(String[] args) throws IOException, WigFileException { 68 | new Multiply().instanceMain(args); 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/wigmath/Root.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.wigmath; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Path; 5 | 6 | import com.beust.jcommander.Parameter; 7 | 8 | import edu.unc.genomics.CommandLineToolException; 9 | import edu.unc.genomics.Interval; 10 | import edu.unc.genomics.ReadablePathValidator; 11 | import edu.unc.genomics.WigMathTool; 12 | import edu.unc.genomics.io.WigFileReader; 13 | import edu.unc.genomics.io.WigFileException; 14 | 15 | /** 16 | * Take the nth-root of a (Big)Wig file 17 | * 18 | * @author timpalpant 19 | * 20 | */ 21 | public class Root extends WigMathTool { 22 | 23 | @Parameter(names = { "-i", "--input" }, description = "Input file", required = true, validateWith = ReadablePathValidator.class) 24 | public Path inputFile; 25 | @Parameter(names = { "-n", "--root" }, description = "Take the nth root (default = 2)") 26 | public double root = 2; 27 | 28 | WigFileReader reader; 29 | private double baseChange; 30 | 31 | @Override 32 | public void setup() { 33 | try { 34 | reader = WigFileReader.autodetect(inputFile); 35 | } catch (IOException e) { 36 | throw new CommandLineToolException(e); 37 | } 38 | inputs.add(reader); 39 | } 40 | 41 | @Override 42 | public float[] compute(Interval chunk) throws IOException, WigFileException { 43 | float[] result = reader.query(chunk).getValues(); 44 | for (int i = 0; i < result.length; i++) { 45 | result[i] = (float) (Math.sqrt(result[i])); 46 | } 47 | 48 | return result; 49 | } 50 | 51 | /** 52 | * @param args 53 | * @throws WigFileException 54 | * @throws IOException 55 | */ 56 | public static void main(String[] args) throws IOException, WigFileException { 57 | new Root().instanceMain(args); 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/wigmath/Shift.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.wigmath; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Path; 5 | import java.util.HashMap; 6 | import java.util.Map; 7 | 8 | import org.apache.log4j.Logger; 9 | 10 | import com.beust.jcommander.Parameter; 11 | 12 | import edu.unc.genomics.CommandLineToolException; 13 | import edu.unc.genomics.Interval; 14 | import edu.unc.genomics.ReadablePathValidator; 15 | import edu.unc.genomics.WigMathTool; 16 | import edu.unc.genomics.io.WigFileReader; 17 | import edu.unc.genomics.io.WigFileException; 18 | 19 | /** 20 | * Shift a Wig file to have a specified mean 21 | * 22 | * @author timpalpant 23 | * 24 | */ 25 | public class Shift extends WigMathTool { 26 | 27 | private static final Logger log = Logger.getLogger(Shift.class); 28 | 29 | @Parameter(names = { "-i", "--input" }, description = "Input file", required = true, validateWith = ReadablePathValidator.class) 30 | public Path inputFile; 31 | @Parameter(names = { "-m", "--mean" }, description = "New mean") 32 | public float newMean = 0; 33 | @Parameter(names = { "-b", "--bychr" }, description = "Shift each chromosome individually") 34 | public boolean byChromosome = false; 35 | 36 | WigFileReader reader; 37 | Map shifts = new HashMap<>(); 38 | 39 | @Override 40 | public void setup() { 41 | try { 42 | reader = WigFileReader.autodetect(inputFile); 43 | for (String chr : reader.chromosomes()) { 44 | float shift; 45 | if (byChromosome) { 46 | float chrMean = (float) reader.queryStats(chr, reader.getChrStart(chr), reader.getChrStop(chr)).getMean(); 47 | log.debug("Mean of " + chr + " = " + chrMean); 48 | shift = newMean - chrMean; 49 | } else { 50 | shift = (float) (newMean - reader.mean()); 51 | } 52 | shifts.put(chr, shift); 53 | } 54 | } catch (IOException | WigFileException e) { 55 | throw new CommandLineToolException(e); 56 | } 57 | inputs.add(reader); 58 | } 59 | 60 | @Override 61 | public float[] compute(Interval chunk) throws IOException, WigFileException { 62 | float[] result = reader.query(chunk).getValues(); 63 | float shift = shifts.get(chunk.getChr()); 64 | for (int i = 0; i < result.length; i++) { 65 | result[i] += shift; 66 | } 67 | 68 | return result; 69 | } 70 | 71 | /** 72 | * @param args 73 | * @throws WigFileException 74 | * @throws IOException 75 | */ 76 | public static void main(String[] args) throws IOException, WigFileException { 77 | new Shift().instanceMain(args); 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/wigmath/StandardDeviation.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.wigmath; 2 | 3 | import java.io.IOException; 4 | import java.nio.file.Paths; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | import org.apache.commons.math3.stat.descriptive.SummaryStatistics; 9 | import org.apache.log4j.Logger; 10 | 11 | import com.beust.jcommander.Parameter; 12 | 13 | import edu.unc.genomics.CommandLineToolException; 14 | import edu.unc.genomics.Interval; 15 | import edu.unc.genomics.WigMathTool; 16 | import edu.unc.genomics.io.WigFileReader; 17 | import edu.unc.genomics.io.WigFileException; 18 | 19 | /** 20 | * Calculate base pair by base pair variance for a set of Wig files 21 | * 22 | * @author timpalpant 23 | * 24 | */ 25 | public class StandardDeviation extends WigMathTool { 26 | 27 | private static final Logger log = Logger.getLogger(StandardDeviation.class); 28 | 29 | @Parameter(description = "Input files", required = true) 30 | public List inputFiles = new ArrayList(); 31 | 32 | @Override 33 | public void setup() { 34 | if (inputFiles.size() < 2) { 35 | throw new CommandLineToolException("Cannot compute variance with < 2 files."); 36 | } 37 | 38 | log.debug("Initializing input files"); 39 | for (String inputFile : inputFiles) { 40 | try { 41 | addInputFile(WigFileReader.autodetect(Paths.get(inputFile))); 42 | } catch (IOException e) { 43 | log.error("IOError initializing input Wig file: " + inputFile); 44 | e.printStackTrace(); 45 | throw new CommandLineToolException(e.getMessage()); 46 | } 47 | } 48 | log.debug("Initialized " + inputs.size() + " input files"); 49 | } 50 | 51 | @Override 52 | public float[] compute(Interval chunk) throws IOException, WigFileException { 53 | SummaryStatistics[] stats = new SummaryStatistics[chunk.length()]; 54 | for (int i = 0; i < stats.length; i++) { 55 | stats[i] = new SummaryStatistics(); 56 | } 57 | 58 | for (WigFileReader wig : inputs) { 59 | float[] data = wig.query(chunk).getValues(); 60 | for (int i = 0; i < data.length; i++) { 61 | if (!Float.isNaN(data[i])) { 62 | stats[i].addValue(data[i]); 63 | } 64 | } 65 | } 66 | 67 | float[] result = new float[chunk.length()]; 68 | for (int i = 0; i < result.length; i++) { 69 | result[i] = (float) stats[i].getStandardDeviation(); 70 | } 71 | return result; 72 | } 73 | 74 | /** 75 | * @param args 76 | * @throws WigFileException 77 | * @throws IOException 78 | */ 79 | public static void main(String[] args) throws IOException, WigFileException { 80 | new StandardDeviation().instanceMain(args); 81 | } 82 | 83 | } 84 | -------------------------------------------------------------------------------- /src/edu/unc/genomics/wigmath/Summary.java: -------------------------------------------------------------------------------- 1 | package edu.unc.genomics.wigmath; 2 | 3 | import java.io.BufferedWriter; 4 | import java.io.IOException; 5 | import java.nio.charset.Charset; 6 | import java.nio.file.Files; 7 | import java.nio.file.Path; 8 | 9 | import org.apache.log4j.Logger; 10 | 11 | import com.beust.jcommander.Parameter; 12 | 13 | import edu.unc.genomics.CommandLineTool; 14 | import edu.unc.genomics.ReadablePathValidator; 15 | import edu.unc.genomics.io.WigFileReader; 16 | import edu.unc.genomics.io.WigFileException; 17 | import edu.unc.genomics.ngs.Autocorrelation; 18 | 19 | /** 20 | * Output a summary of a (Big)Wig file with information about the chromosomes, 21 | * contigs, and statistics about the data. 22 | * 23 | * @author timpalpant 24 | * 25 | */ 26 | public class Summary extends CommandLineTool { 27 | 28 | private static final Logger log = Logger.getLogger(Autocorrelation.class); 29 | 30 | @Parameter(names = { "-i", "--input" }, description = "Input file", required = true, validateWith = ReadablePathValidator.class) 31 | public Path inputFile; 32 | @Parameter(names = { "-o", "--output" }, description = "Output file") 33 | public Path outputFile; 34 | 35 | public void run() throws IOException { 36 | try (WigFileReader reader = WigFileReader.autodetect(inputFile)) { 37 | String summary = reader.toString(); 38 | 39 | if (outputFile != null) { 40 | log.debug("Writing to output file"); 41 | try (BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) { 42 | writer.write(summary); 43 | } 44 | } else { 45 | System.out.println(summary); 46 | } 47 | } 48 | } 49 | 50 | public static void main(String[] args) throws IOException, WigFileException { 51 | new Summary().instanceMain(args); 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /src/edu/unc/utils/ArrayScaler.java: -------------------------------------------------------------------------------- 1 | package edu.unc.utils; 2 | 3 | import org.apache.commons.math3.analysis.UnivariateFunction; 4 | import org.apache.commons.math3.analysis.interpolation.SplineInterpolator; 5 | import org.apache.commons.math3.analysis.interpolation.UnivariateInterpolator; 6 | 7 | /** 8 | * Generate scaled versions of an array with different resolution Can be used 9 | * for downsampling/upsampling a 1D array using interpolation Interpolation 10 | * routines are from Apache commons-math3 11 | * 12 | * @author timpalpant 13 | * 14 | */ 15 | public class ArrayScaler { 16 | 17 | private UnivariateFunction interp; 18 | 19 | /** 20 | * Create a new ArrayScaler 21 | * 22 | * @param x 23 | * the seed array to downsample/upsample 24 | */ 25 | public ArrayScaler(double[] x) { 26 | double[] indices = new double[x.length]; 27 | for (int i = 0; i < indices.length; i++) { 28 | indices[i] = ((double) i) / (x.length - 1); 29 | } 30 | 31 | UnivariateInterpolator interpolator = new SplineInterpolator(); 32 | interp = interpolator.interpolate(indices, x); 33 | } 34 | 35 | /** 36 | * Interpolate to create a new scaled vector of length l 37 | * 38 | * @param l 39 | * the desired vector length 40 | * @return a new vector of length l created by interpolating x 41 | */ 42 | public double[] getScaled(int l) { 43 | double[] stretched = new double[l]; 44 | for (int i = 0; i < l; i++) { 45 | stretched[i] = interp.value(((double) i) / l); 46 | } 47 | return stretched; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/edu/unc/utils/ArrayUtils.java: -------------------------------------------------------------------------------- 1 | package edu.unc.utils; 2 | 3 | /** 4 | * Miscellaneous utility functions for working with arrays 5 | * 6 | * @author timpalpant 7 | * 8 | */ 9 | public class ArrayUtils { 10 | 11 | /** 12 | * Get the index of the maximum (largest) value in an array In the event of a 13 | * tie, the first index is returned 14 | * 15 | * @param x 16 | * a vector of values 17 | * @return the index of the largest element in x 18 | */ 19 | public static int maxIndex(float[] x) { 20 | float maxValue = -Float.MAX_VALUE; 21 | int maxIndex = -1; 22 | for (int i = 0; i < x.length; i++) { 23 | if (x[i] > maxValue) { 24 | maxValue = x[i]; 25 | maxIndex = i; 26 | } 27 | } 28 | 29 | return maxIndex; 30 | } 31 | 32 | public static float[] mapToFloat(int[] data) { 33 | float[] ret = new float[data.length]; 34 | for (int i = 0; i < data.length; i++) { 35 | ret[i] = data[i]; 36 | } 37 | return ret; 38 | } 39 | 40 | public static int[] mapToInt(float[] data) { 41 | int[] ret = new int[data.length]; 42 | for (int i = 0; i < data.length; i++) { 43 | ret[i] = (int) data[i]; 44 | } 45 | return ret; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/edu/unc/utils/FFTUtils.java: -------------------------------------------------------------------------------- 1 | package edu.unc.utils; 2 | 3 | import org.jtransforms.fft.FloatFFT_1D; 4 | 5 | /** 6 | * Routines for working with Fourier transforms and frequency spectrum data 7 | * 8 | * @author timpalpant 9 | * 10 | */ 11 | public class FFTUtils { 12 | /** 13 | * Computes the power spectrum from FFT data taking into account even/odd 14 | * length arrays refer to JTransforms documentation for layout of the FFT data 15 | * 16 | * @param f 17 | * the DFT-transformed data from JTransforms.realForward() 18 | * @return the power spectrum of the complex frequency spectrum in f 19 | */ 20 | public static float[] abs2(float[] f) { 21 | int n = f.length; 22 | float[] ps = new float[n / 2 + 1]; 23 | // DC component 24 | ps[0] = (f[0] * f[0]) / (n * n); 25 | 26 | // Even 27 | if (n % 2 == 0) { 28 | for (int k = 1; k < n / 2; k++) { 29 | ps[k] = f[2 * k] * f[2 * k] + f[2 * k + 1] * f[2 * k + 1]; 30 | } 31 | ps[n / 2] = f[1] * f[1]; 32 | // Odd 33 | } else { 34 | for (int k = 1; k < (n - 1) / 2; k++) { 35 | ps[k] = f[2 * k] * f[2 * k] + f[2 * k + 1] * f[2 * k + 1]; 36 | } 37 | 38 | ps[(n - 1) / 2] = f[n - 1] * f[n - 1] + f[1] * f[1]; 39 | } 40 | 41 | return ps; 42 | } 43 | 44 | /** 45 | * Computes the autocovariance of the data in f 46 | * 47 | * @param x 48 | * a vector of real data 49 | * @param maxShift 50 | * the maximum phase shift to calculate 51 | * @return the autocovariance values, having length Math.min(x.length, 52 | * maxShift) 53 | */ 54 | public static float[] autocovariance(float[] x, int maxShift) { 55 | float total = 0; 56 | for (int i = 0; i < x.length; i++) { 57 | total += x[i]; 58 | } 59 | float mean = total / x.length; 60 | 61 | int stop = Math.min(x.length, maxShift); 62 | float[] auto = new float[stop]; 63 | for (int i = 0; i < stop; i++) { 64 | for (int j = 0; j < x.length - i; j++) { 65 | auto[i] += (x[j] - mean) * (x[j + i] - mean); 66 | } 67 | } 68 | 69 | return auto; 70 | } 71 | 72 | /** 73 | * Computes the autocovariance of the data in f for all possible shifts 74 | * 75 | * @param x 76 | * a vector of real data 77 | * @return the autocovariance values, having length equal to x.length 78 | */ 79 | public static float[] autocovariance(float[] x) { 80 | return autocovariance(x, x.length); 81 | } 82 | 83 | //public static float[] convolve(float[] x, float[] y) { 84 | // TODO 85 | //} 86 | 87 | } 88 | -------------------------------------------------------------------------------- /src/edu/unc/utils/FloatCorrelation.java: -------------------------------------------------------------------------------- 1 | package edu.unc.utils; 2 | 3 | /** 4 | * Some basic routines for calculating correlation coefficients with 5 | * single-precision float[] arrays since commons-math only works with double[] 6 | * arrays 7 | * 8 | * @author timpalpant 9 | * 10 | */ 11 | public class FloatCorrelation { 12 | /** 13 | * Calculate Pearson's product-moment correlation coefficient (R) between x,y 14 | * data 15 | * 16 | * @param x 17 | * a vector of values 18 | * @param y 19 | * a vector of values 20 | * @return the Pearson correlation between the values in x and the values in y 21 | */ 22 | public static float pearson(float[] x, float[] y) { 23 | if (x.length != y.length) { 24 | throw new RuntimeException("Length of x (" + x.length + ") does not equal length of y (" + y.length + ")"); 25 | } 26 | 27 | int N = 0; 28 | double sumX = 0, sumY = 0; 29 | double sumSqX = 0, sumSqY = 0; 30 | double sumXY = 0; 31 | for (int i = 0; i < x.length; i++) { 32 | // Skip NaN / Infinity values in the correlation calculation 33 | if (!Float.isNaN(x[i]) && !Float.isInfinite(x[i]) && !Float.isNaN(y[i]) && !Float.isInfinite(y[i])) { 34 | N++; 35 | sumX += x[i]; 36 | sumY += y[i]; 37 | sumSqX += x[i] * x[i]; 38 | sumSqY += y[i] * y[i]; 39 | sumXY += x[i] * y[i]; 40 | } 41 | } 42 | 43 | return (float) ((N * sumXY - sumX * sumY) / Math.sqrt(N * sumSqX - sumX * sumX) / Math.sqrt(N * sumSqY - sumY 44 | * sumY)); 45 | } 46 | 47 | /** 48 | * Calculate Spearman's rank correlation coefficient between x,y data defined 49 | * to be the Pearson correlation between the ranks of the data 50 | * 51 | * @param x 52 | * a vector of values 53 | * @param y 54 | * a vector of values 55 | * @return the Spearman correlation between the values in x and the values in 56 | * y 57 | */ 58 | public static float spearman(float[] x, float[] y) { 59 | if (x.length != y.length) { 60 | throw new RuntimeException("Length of x (" + x.length + ") does not equal length of y (" + y.length + ")"); 61 | } 62 | 63 | // Compute the ranking of x and y 64 | float[] rankX = ArrayUtils.mapToFloat(SortUtils.rank(x)); 65 | float[] rankY = ArrayUtils.mapToFloat(SortUtils.rank(y)); 66 | 67 | return pearson(rankX, rankY); 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /src/edu/unc/utils/FloatHistogram.java: -------------------------------------------------------------------------------- 1 | package edu.unc.utils; 2 | 3 | import java.util.Arrays; 4 | 5 | /** 6 | * @author timpalpant Adapted from: 7 | * http://www.particle.kth.se/~fmi/kurs/PhysicsSimulation 8 | * /Lectures/11B/Examples/Experiment/Histogram.java A simple histogram 9 | * class. The setData(float f) finds in which bin the value falls for 10 | * nBins between the given minimum and maximum values. An integer array 11 | * keeps track of the number of times the input value fell into a 12 | * particular bin. 13 | */ 14 | public class FloatHistogram { 15 | 16 | int[] bins = null; 17 | int nBins; 18 | double xLow, xHigh; 19 | double delBin; 20 | 21 | int overFlows = 0, underFlows = 0; 22 | 23 | public FloatHistogram(int nBins, double xLow, double xHigh) { 24 | 25 | this.nBins = nBins; 26 | this.xLow = xLow; 27 | this.xHigh = xHigh; 28 | 29 | bins = new int[nBins]; 30 | delBin = (xHigh - xLow) / (float) nBins; 31 | 32 | reset(); 33 | } 34 | 35 | public void addValue(double data) { 36 | if (data < xLow) { 37 | underFlows++; 38 | } else if (data >= xHigh) { 39 | overFlows++; 40 | } else { 41 | int bin = (int) ((data - xLow) / delBin); 42 | if (bin >= 0 && bin < nBins) { 43 | bins[bin]++; 44 | } 45 | } 46 | } 47 | 48 | public int[] getHistogram() { 49 | return bins; 50 | } 51 | 52 | public double getBinSize() { 53 | return delBin; 54 | } 55 | 56 | public void reset() { 57 | Arrays.fill(bins, 0); 58 | underFlows = 0; 59 | overFlows = 0; 60 | } 61 | 62 | public String toString() { 63 | StringBuilder sb = new StringBuilder(); 64 | sb.append("<" + xLow + "\t" + underFlows + "\n"); 65 | for (int i = 0; i < bins.length; i++) { 66 | sb.append(xLow + i * delBin + "\t" + bins[i] + "\n"); 67 | } 68 | sb.append(">" + xHigh + "\t" + overFlows); 69 | return sb.toString(); 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /src/edu/unc/utils/RomanNumeral.java: -------------------------------------------------------------------------------- 1 | package edu.unc.utils; 2 | 3 | /** 4 | * Rudimentary Class for doing Arabic Integer -> Roman Numeral conversion 5 | * 6 | * @author timpalpant 7 | * @author Fred Swartz 8 | * 9 | */ 10 | public class RomanNumeral { 11 | 12 | final static RomanValue[] ROMAN_VALUE_TABLE = { new RomanValue(1000, "M"), new RomanValue(900, "CM"), 13 | new RomanValue(500, "D"), new RomanValue(400, "CD"), new RomanValue(100, "C"), new RomanValue(90, "XC"), 14 | new RomanValue(50, "L"), new RomanValue(40, "XL"), new RomanValue(10, "X"), new RomanValue(9, "IX"), 15 | new RomanValue(5, "V"), new RomanValue(4, "IV"), new RomanValue(1, "I") }; 16 | 17 | /** 18 | * Convert an int to Roman numeral 19 | * 20 | * @param n 21 | * an integer between 1-3999 22 | * @return n as a Roman numeral 23 | */ 24 | public static String int2roman(int n) { 25 | if (n >= 4000 || n < 1) { 26 | throw new NumberFormatException("Numbers must be in range 1-3999"); 27 | } 28 | 29 | // ... Start with largest value, and work toward smallest. 30 | StringBuilder result = new StringBuilder(10); 31 | for (RomanValue equiv : ROMAN_VALUE_TABLE) { 32 | // ... Remove as many of this value as possible (maybe none). 33 | while (n >= equiv.intVal) { 34 | n -= equiv.intVal; // Subtract value. 35 | result.append(equiv.romVal); // Add roman equivalent. 36 | } 37 | } 38 | 39 | return result.toString(); 40 | } 41 | 42 | private static class RomanValue { 43 | // ... No need to make this fields private because they are 44 | // used only in this private value class. 45 | int intVal; // Integer value. 46 | String romVal; // Equivalent roman numeral. 47 | 48 | RomanValue(int dec, String rom) { 49 | this.intVal = dec; 50 | this.romVal = rom; 51 | } 52 | } 53 | } -------------------------------------------------------------------------------- /src/edu/unc/utils/Samtools.java: -------------------------------------------------------------------------------- 1 | package edu.unc.utils; 2 | 3 | import java.nio.file.Path; 4 | 5 | import org.apache.log4j.Logger; 6 | 7 | import net.sf.picard.reference.IndexedFastaSequenceFile; 8 | 9 | /** 10 | * Helper methods for calling the samtools executable externally Note: If at all 11 | * possible, these should be avoided since they require the user to have 12 | * samtools installed and available in the PATH 13 | * 14 | * It is preferred to use functionality in SAM-JDK / Picard 15 | * 16 | * @author timpalpant 17 | * 18 | */ 19 | public class Samtools { 20 | 21 | private static final Logger log = Logger.getLogger(Samtools.class); 22 | 23 | /** 24 | * Index a FASTA file with 'samtools faidx' 25 | * 26 | * @param p 27 | * the FASTA file to index 28 | * @throws Exception 29 | * if the index is not created successfully 30 | */ 31 | public static void indexFasta(Path p) throws Exception { 32 | log.debug("Attempting to generate FASTA index by calling 'samtools faidx'"); 33 | 34 | try { 35 | Process proc = new ProcessBuilder("samtools", "faidx", p.toString()).start(); 36 | proc.waitFor(); 37 | } catch (Exception e) { 38 | log.error("Error attempting to call 'samtools faidx'. Is samtools available in the PATH?"); 39 | } finally { 40 | if (!IndexedFastaSequenceFile.canCreateIndexedFastaReader(p.toFile())) { 41 | log.error("Could not create FASTA index for file " + p); 42 | throw new Exception("Could not create FASTA index for file " + p); 43 | } 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/edu/unc/utils/SequenceUtils.java: -------------------------------------------------------------------------------- 1 | package edu.unc.utils; 2 | 3 | import net.sf.samtools.util.SequenceUtil; 4 | 5 | /** 6 | * Helper methods for working with sequence data 7 | * 8 | * @author timpalpant 9 | * 10 | */ 11 | public class SequenceUtils { 12 | /** 13 | * Search for the next index of a subsequence in a larger sequence, allowing 14 | * mismatches 15 | * 16 | * @param bases 17 | * the sequence to search in 18 | * @param nmer 19 | * the nmer to search for 20 | * @param allowedMismatches 21 | * the number of mismatches allowed 22 | * @param fromIndex 23 | * the index to start searching at 24 | * @return the index of the next match of nmer in bases, or -1 if no matches 25 | * are found 26 | */ 27 | public static int indexOf(byte[] bases, byte[] nmer, int allowedMismatches, int fromIndex) { 28 | for (int i = fromIndex; i < bases.length - nmer.length; i++) { 29 | int mismatches = 0; 30 | for (int j = 0; j < nmer.length; j++) { 31 | if (!SequenceUtil.basesEqual(bases[i + j], nmer[j])) { 32 | if (++mismatches > allowedMismatches) { 33 | break; 34 | } 35 | } 36 | } 37 | 38 | // If we found one at this position, return the index 39 | if (mismatches <= allowedMismatches) { 40 | return i; 41 | } 42 | } 43 | 44 | return -1; 45 | } 46 | 47 | /** 48 | * Search for the next index of a subsequence in a larger sequence, allowing 49 | * mismatches 50 | * 51 | * @param bases 52 | * the sequence to search in 53 | * @param nmer 54 | * the nmer to search for 55 | * @param allowedMismatches 56 | * the number of mismatches to allow 57 | * @return the index of the next match of nmer in bases, or -1 if no matches 58 | * are found 59 | */ 60 | public static int indexOf(byte[] bases, byte[] nmer, int allowedMismatches) { 61 | return indexOf(bases, nmer, allowedMismatches, 0); 62 | } 63 | 64 | /** 65 | * Search for the next index of a subsequence in a larger sequence, with no 66 | * mismatches 67 | * 68 | * @param bases 69 | * the sequence to search in 70 | * @param nmer 71 | * the nmer to search for 72 | * @return the index of the next match of nmer in bases, or -1 if no matches 73 | * are found 74 | */ 75 | public static int indexOf(byte[] bases, byte[] nmer) { 76 | return indexOf(bases, nmer, 0); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/edu/unc/utils/SortUtils.java: -------------------------------------------------------------------------------- 1 | package edu.unc.utils; 2 | 3 | /** 4 | * Custom sorting utilities see: 5 | * http://stackoverflow.com/questions/951848/java-array 6 | * -sort-quick-way-to-get-a-sorted-list-of-indices-of-an-array 7 | * 8 | * @author timpalpant 9 | * 10 | */ 11 | public class SortUtils { 12 | /** 13 | * Sort an array in ascending order, but return the index of each sorted 14 | * element in the original array 15 | * 16 | * @param main 17 | * an array to sort in ascending order 18 | * @return the index of each sorted element in main 19 | */ 20 | public static int[] sortIndices(float[] main) { 21 | int[] index = new int[main.length]; 22 | for (int i = 0; i < index.length; i++) { 23 | index[i] = i; 24 | } 25 | 26 | quicksort(main, index, 0, index.length - 1); 27 | 28 | return index; 29 | } 30 | 31 | /** 32 | * Return the rank (in ascending order) of each element in an array 33 | * 34 | * @param main 35 | * an array to rank 36 | * @return the rank of each element in main 37 | */ 38 | public static int[] rank(float[] main) { 39 | int[] sortedIndices = sortIndices(main); 40 | int[] rank = new int[main.length]; 41 | for (int i = 0; i < rank.length; i++) { 42 | rank[sortedIndices[i]] = i + 1; 43 | } 44 | 45 | return rank; 46 | } 47 | 48 | // quicksort a[left] to a[right] 49 | private static void quicksort(float[] a, int[] index, int left, int right) { 50 | if (right <= left) { 51 | return; 52 | } 53 | 54 | int i = partition(a, index, left, right); 55 | quicksort(a, index, left, i - 1); 56 | quicksort(a, index, i + 1, right); 57 | } 58 | 59 | // partition a[left] to a[right], assumes left < right 60 | private static int partition(float[] a, int[] index, int left, int right) { 61 | int i = left - 1; 62 | int j = right; 63 | while (true) { 64 | // find item on left to swap 65 | while (a[index[++i]] < a[index[right]]) 66 | ; // a[right] acts as sentinel 67 | // find item on right to swap 68 | while (a[index[right]] < a[index[--j]]) { 69 | // don't go out-of-bounds 70 | if (j == left) { 71 | break; 72 | } 73 | } 74 | 75 | // check if pointers cross 76 | if (i >= j) { 77 | break; 78 | } 79 | 80 | swap(a, index, i, j); // swap two elements into place 81 | } 82 | 83 | swap(a, index, i, right); // swap with partition element 84 | return i; 85 | } 86 | 87 | // exchange a[i] and a[j] 88 | private static void swap(float[] a, int[] index, int i, int j) { 89 | int tmp = index[i]; 90 | index[i] = index[j]; 91 | index[j] = tmp; 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/edu/unc/utils/WigStatistic.java: -------------------------------------------------------------------------------- 1 | package edu.unc.utils; 2 | 3 | /** 4 | * An enumeration of the statistics that we know how to compute on Wig data 5 | * These correspond to the statistics that are built into the UCSC BigWig tools 6 | * 7 | * @author timpalpant 8 | * 9 | */ 10 | public enum WigStatistic { 11 | COVERAGE("coverage"), TOTAL("total"), MEAN("mean"), MIN("min"), MAX("max"); 12 | 13 | private String name; 14 | 15 | WigStatistic(final String name) { 16 | this.name = name; 17 | } 18 | 19 | public static WigStatistic fromName(final String name) { 20 | for (WigStatistic dsm : WigStatistic.values()) { 21 | if (dsm.getName().equalsIgnoreCase(name)) { 22 | return dsm; 23 | } 24 | } 25 | 26 | return null; 27 | } 28 | 29 | /** 30 | * @return the name 31 | */ 32 | public String getName() { 33 | return name; 34 | } 35 | } -------------------------------------------------------------------------------- /src/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=debug, stdout 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | 6 | # Pattern to output the caller's file name and line number. 7 | log4j.appender.stdout.layout.ConversionPattern=%5p [%t] (%F:%L) - %m%n 8 | 9 | # Only output errors from the BigWig library 10 | log4j.logger.org.broad.igv.bbfile=ERROR 11 | # Only output info from java-genomics-io 12 | log4j.logger.edu.unc.genomics.io=DEBUG 13 | log4j.logger.edu.unc.genomics.util=INFO 14 | log4j.logger.edu.ucsc.genome=ERROR -------------------------------------------------------------------------------- /test-data/baseAlignCounts1.wig: -------------------------------------------------------------------------------- 1 | track type=wiggle_0 name='Converted test.sam' description='Converted test.sam' 2 | fixedStep chrom=2micron start=1 step=1 span=1 3 | -------------------------------------------------------------------------------- /test-data/bedGraphToWig.input.bedGraph: -------------------------------------------------------------------------------- 1 | chrI 9 14 10.0 2 | chrI 14 99 2.6 3 | chrII 19 24 6.0 4 | chrII 24 29 2.0 5 | chrIII 14 19 2.0 6 | chrIV 0 1 12.0 7 | chrIV 1 2 1.0 8 | chrIV 2 3 3.0 9 | chrIV 3 4 1.2 10 | chrIV 4 5 10.0 11 | -------------------------------------------------------------------------------- /test-data/divide1.wig: -------------------------------------------------------------------------------- 1 | track type=wiggle_0 2 | fixedStep chrom=2micron start=100 span=1 step=1 3 | 0.96153849 4 | 0.88235289 5 | NaN 6 | NaN 7 | NaN 8 | 2 9 | NaN 10 | NaN 11 | NaN 12 | NaN 13 | 0.625 14 | 0.95238101 15 | fixedStep chrom=chrI start=1 span=1 step=1 16 | 0.5 17 | 0.66666669 18 | 0.75 19 | 0.80000001 20 | 0.83333331 21 | 0.85714287 22 | 0.875 23 | 0.8888889 24 | 0.89999998 25 | 0.90909094 26 | 0.91666669 27 | 0.92307693 28 | 0.9285714 29 | 0.93333334 30 | 0.9375 31 | fixedStep chrom=chrXI start=25 span=4 step=5 32 | 1 33 | 2 34 | 9 35 | 0 36 | 1 37 | 11 38 | 0.5 39 | 1.66666663 40 | 3.5 41 | 8 42 | 4.5 43 | 0.22727273 44 | 0.66666669 45 | 1.5 46 | 0.2 47 | 0.25 48 | 0.66666669 49 | 1 50 | 1 51 | 1 52 | NaN 53 | 13 54 | 1.25 55 | 1.125 56 | 0.39285713 57 | -------------------------------------------------------------------------------- /test-data/divide2.wig: -------------------------------------------------------------------------------- 1 | track type=wiggle_0 2 | fixedStep chrom=2micron start=100 span=1 step=1 3 | 5 4 | 2.60869575 5 | NaN 6 | NaN 7 | NaN 8 | 0.66666669 9 | NaN 10 | NaN 11 | NaN 12 | NaN 13 | NaN 14 | NaN 15 | fixedStep chrom=chrI start=1 span=1 step=1 16 | 0.33333334 17 | 0.5 18 | 0.60000002 19 | 0.66666669 20 | 0.71428573 21 | 0.75 22 | 0.77777779 23 | 0.80000001 24 | 0.81818181 25 | 0.83333331 26 | 0.84615386 27 | 0.85714287 28 | 0.86666667 29 | 0.875 30 | 0.88235295 31 | fixedStep chrom=chrXI start=20 span=4 step=5 32 | 0 33 | 0.75 34 | 0.5714286 35 | 9 36 | 0 37 | 1.20000005 38 | 11 39 | 0.04477612 40 | 0.09259259 41 | 1.39999998 42 | 1 43 | 1 44 | 2.5 45 | 1.5 46 | 0.5 47 | 0.11111111 48 | 0.01298701 49 | 0.2857143 50 | 0.23076923 51 | 0.16 52 | 1.25 53 | 0.11111111 54 | 13 55 | 3 56 | 2.25 57 | 11 58 | -------------------------------------------------------------------------------- /test-data/divide3.wig: -------------------------------------------------------------------------------- 1 | track type=wiggle_0 2 | fixedStep chrom=2micron start=100 span=1 step=1 3 | 0.1923077 4 | 0.33823529 5 | NaN 6 | NaN 7 | NaN 8 | 3 9 | NaN 10 | NaN 11 | NaN 12 | NaN 13 | 0 14 | 0 15 | fixedStep chrom=chrI start=1 span=1 step=1 16 | 1.5 17 | 1.33333337 18 | 1.25 19 | 1.20000005 20 | 1.16666663 21 | 1.14285719 22 | 1.125 23 | 1.11111116 24 | 1.10000002 25 | 1.09090912 26 | 1.08333337 27 | 1.07692313 28 | 1.07142854 29 | 1.06666672 30 | 1.0625 31 | fixedStep chrom=chrXI start=25 span=4 step=5 32 | 1.33333337 33 | 3.5 34 | 1 35 | 0.03092784 36 | 0.83333331 37 | 1 38 | 11.16666698 39 | 18 40 | 2.5 41 | 8 42 | 4.5 43 | 0.09090909 44 | 0.44444445 45 | 3 46 | 1.79999995 47 | 19.25 48 | 2.33333325 49 | 4.33333349 50 | 6.25 51 | 0.80000001 52 | NaN 53 | 1 54 | 0.41666666 55 | 0.5 56 | 0.03571429 57 | -------------------------------------------------------------------------------- /test-data/downsample1.wig: -------------------------------------------------------------------------------- 1 | track type=wiggle_0 name='Downsampled wigmath1.wig' description='Downsampled wigmath1.wig' 2 | fixedStep chrom=2micron start=100 step=1 span=1 3 | 5.0 4 | 6.0 5 | NaN 6 | NaN 7 | NaN 8 | 10.0 9 | NaN 10 | NaN 11 | NaN 12 | NaN 13 | 1.0 14 | fixedStep chrom=chrI start=1 step=1 span=1 15 | 1.0 16 | 2.0 17 | 3.0 18 | 4.0 19 | 5.0 20 | 6.0 21 | 7.0 22 | 8.0 23 | 9.0 24 | 10.0 25 | 11.0 26 | 12.0 27 | 13.0 28 | 14.0 29 | fixedStep chrom=chrXI start=20 step=1 span=1 30 | 0.0 31 | 0.0 32 | 0.0 33 | 0.0 34 | NaN 35 | 3.0 36 | 3.0 37 | 3.0 38 | 3.0 39 | NaN 40 | 4.0 41 | 4.0 42 | 4.0 43 | 4.0 44 | NaN 45 | 9.0 46 | 9.0 47 | 9.0 48 | 9.0 49 | NaN 50 | 0.0 51 | 0.0 52 | 0.0 53 | 0.0 54 | NaN 55 | 6.0 56 | 6.0 57 | 6.0 58 | 6.0 59 | NaN 60 | 44.0 61 | 44.0 62 | 44.0 63 | 44.0 64 | NaN 65 | 3.0 66 | 3.0 67 | 3.0 68 | 3.0 69 | NaN 70 | 5.0 71 | 5.0 72 | 5.0 73 | 5.0 74 | NaN 75 | 7.0 76 | 7.0 77 | 7.0 78 | 7.0 79 | NaN 80 | 8.0 81 | 8.0 82 | 8.0 83 | 8.0 84 | NaN 85 | 9.0 86 | 9.0 87 | 9.0 88 | 9.0 89 | NaN 90 | 5.0 91 | 5.0 92 | 5.0 93 | 5.0 94 | NaN 95 | 6.0 96 | 6.0 97 | 6.0 98 | 6.0 99 | NaN 100 | 3.0 101 | 3.0 102 | 3.0 103 | 3.0 104 | NaN 105 | 1.0 106 | 1.0 107 | 1.0 108 | 1.0 109 | NaN 110 | 1.0 111 | 1.0 112 | 1.0 113 | 1.0 114 | NaN 115 | 2.0 116 | 2.0 117 | 2.0 118 | 2.0 119 | NaN 120 | 3.0 121 | 3.0 122 | 3.0 123 | 3.0 124 | NaN 125 | 4.0 126 | 4.0 127 | 4.0 128 | 4.0 129 | NaN 130 | 5.0 131 | 5.0 132 | 5.0 133 | 5.0 134 | NaN 135 | 6.0 136 | 6.0 137 | 6.0 138 | 6.0 139 | NaN 140 | 13.0 141 | 13.0 142 | 13.0 143 | 13.0 144 | NaN 145 | 15.0 146 | 15.0 147 | 15.0 148 | 15.0 149 | NaN 150 | 18.0 151 | 18.0 152 | 18.0 153 | 18.0 154 | NaN 155 | 22.0 156 | 22.0 157 | 22.0 158 | -------------------------------------------------------------------------------- /test-data/downsample2.wig: -------------------------------------------------------------------------------- 1 | track type=wiggle_0 name='Downsampled wigmath2.bw' description='Downsampled wigmath2.bw' 2 | fixedStep chrom=2micron start=100 step=5 span=5 3 | 6.8 4 | 5.0 5 | 2.1 6 | fixedStep chrom=chrI start=1 step=5 span=5 7 | 6.0 8 | 11.0 9 | 16.0 10 | fixedStep chrom=chrXI start=20 step=5 span=5 11 | 0.0 12 | 3.0 13 | 2.0 14 | 1.0 15 | 97.0 16 | 6.0 17 | 4.0 18 | 6.0 19 | 3.0 20 | 2.0 21 | 1.0 22 | 2.0 23 | 22.0 24 | 9.0 25 | 2.0 26 | 5.0 27 | 4.0 28 | 3.0 29 | 3.0 30 | 4.0 31 | 5.0 32 | 0.0 33 | 1.0 34 | 12.0 35 | 16.0 36 | 56.0 37 | -------------------------------------------------------------------------------- /test-data/downsample3.wig: -------------------------------------------------------------------------------- 1 | track type=wiggle_0 name='Downsampled wigmath3.wig' description='Downsampled wigmath3.wig' 2 | fixedStep chrom=2micron start=100 step=100 span=100 3 | 18.3 4 | fixedStep chrom=chrI start=1 step=100 span=100 5 | 150.0 6 | fixedStep chrom=chrXI start=20 step=100 span=100 7 | 1264.0 8 | 296.0 9 | -------------------------------------------------------------------------------- /test-data/gaussian1.wig: -------------------------------------------------------------------------------- 1 | track type=wiggle_0 2 | fixedStep chrom=2micron start=111 span=1 step=1 3 | NaN 4 | fixedStep chrom=chrI start=15 span=1 step=1 5 | NaN 6 | fixedStep chrom=chrXI start=148 span=1 step=1 7 | NaN 8 | -------------------------------------------------------------------------------- /test-data/gaussian2.wig: -------------------------------------------------------------------------------- 1 | track type=wiggle_0 2 | fixedStep chrom=2micron start=111 span=1 step=1 3 | NaN 4 | fixedStep chrom=chrI start=15 span=1 step=1 5 | NaN 6 | fixedStep chrom=chrXI start=148 span=1 step=1 7 | NaN 8 | -------------------------------------------------------------------------------- /test-data/gaussian3.wig: -------------------------------------------------------------------------------- 1 | track type=wiggle_0 2 | fixedStep chrom=2micron start=111 span=1 step=1 3 | NaN 4 | fixedStep chrom=chrI start=15 span=1 step=1 5 | NaN 6 | fixedStep chrom=chrXI start=148 span=1 step=1 7 | NaN 8 | -------------------------------------------------------------------------------- /test-data/logger1.wig: -------------------------------------------------------------------------------- 1 | track type=wiggle_0 2 | fixedStep chrom=2micron start=100 step=1 span=1 3 | 2.321928 4 | 2.5849626 5 | NaN 6 | NaN 7 | NaN 8 | 3.321928 9 | NaN 10 | NaN 11 | NaN 12 | NaN 13 | 0.0 14 | 1.0 15 | fixedStep chrom=chrI start=1 step=1 span=1 16 | 0.0 17 | 1.0 18 | 1.5849625 19 | 2.0 20 | 2.321928 21 | 2.5849626 22 | 2.807355 23 | 3.0 24 | 3.169925 25 | 3.321928 26 | 3.4594316 27 | 3.5849626 28 | 3.7004397 29 | 3.807355 30 | 3.9068906 31 | fixedStep chrom=chrXI start=20 step=1 span=1 32 | -Infinity 33 | -Infinity 34 | -Infinity 35 | -Infinity 36 | NaN 37 | 1.5849625 38 | 1.5849625 39 | 1.5849625 40 | 1.5849625 41 | NaN 42 | 2.0 43 | 2.0 44 | 2.0 45 | 2.0 46 | NaN 47 | 3.169925 48 | 3.169925 49 | 3.169925 50 | 3.169925 51 | NaN 52 | -Infinity 53 | -Infinity 54 | -Infinity 55 | -Infinity 56 | NaN 57 | 2.5849626 58 | 2.5849626 59 | 2.5849626 60 | 2.5849626 61 | NaN 62 | 5.4594316 63 | 5.4594316 64 | 5.4594316 65 | 5.4594316 66 | NaN 67 | 1.5849625 68 | 1.5849625 69 | 1.5849625 70 | 1.5849625 71 | NaN 72 | 2.321928 73 | 2.321928 74 | 2.321928 75 | 2.321928 76 | NaN 77 | 2.807355 78 | 2.807355 79 | 2.807355 80 | 2.807355 81 | NaN 82 | 3.0 83 | 3.0 84 | 3.0 85 | 3.0 86 | NaN 87 | 3.169925 88 | 3.169925 89 | 3.169925 90 | 3.169925 91 | NaN 92 | 2.321928 93 | 2.321928 94 | 2.321928 95 | 2.321928 96 | NaN 97 | 2.5849626 98 | 2.5849626 99 | 2.5849626 100 | 2.5849626 101 | NaN 102 | 1.5849625 103 | 1.5849625 104 | 1.5849625 105 | 1.5849625 106 | NaN 107 | 0.0 108 | 0.0 109 | 0.0 110 | 0.0 111 | NaN 112 | 0.0 113 | 0.0 114 | 0.0 115 | 0.0 116 | NaN 117 | 1.0 118 | 1.0 119 | 1.0 120 | 1.0 121 | NaN 122 | 1.5849625 123 | 1.5849625 124 | 1.5849625 125 | 1.5849625 126 | NaN 127 | 2.0 128 | 2.0 129 | 2.0 130 | 2.0 131 | NaN 132 | 2.321928 133 | 2.321928 134 | 2.321928 135 | 2.321928 136 | NaN 137 | 2.5849626 138 | 2.5849626 139 | 2.5849626 140 | 2.5849626 141 | NaN 142 | 3.7004397 143 | 3.7004397 144 | 3.7004397 145 | 3.7004397 146 | NaN 147 | 3.9068906 148 | 3.9068906 149 | 3.9068906 150 | 3.9068906 151 | NaN 152 | 4.169925 153 | 4.169925 154 | 4.169925 155 | 4.169925 156 | NaN 157 | 4.4594316 158 | 4.4594316 159 | 4.4594316 160 | 4.4594316 161 | -------------------------------------------------------------------------------- /test-data/logger2.wig: -------------------------------------------------------------------------------- 1 | track type=wiggle_0 2 | fixedStep chrom=2micron start=100 step=1 span=1 3 | 1.5006738 4 | 1.7448581 5 | NaN 6 | NaN 7 | NaN 8 | 1.4649736 9 | NaN 10 | NaN 11 | NaN 12 | NaN 13 | 0.42781577 14 | 0.6753404 15 | fixedStep chrom=chrI start=1 step=1 span=1 16 | 0.63092977 17 | 1.0 18 | 1.2618595 19 | 1.4649736 20 | 1.6309297 21 | 1.7712437 22 | 1.8927892 23 | 2.0 24 | 2.0959032 25 | 2.1826584 26 | 2.2618594 27 | 2.3347175 28 | 2.4021735 29 | 2.4649734 30 | 2.523719 31 | fixedStep chrom=chrXI start=20 step=1 span=1 32 | -Infinity 33 | -Infinity 34 | -Infinity 35 | -Infinity 36 | NaN 37 | 1.0 38 | 1.0 39 | 1.0 40 | 1.0 41 | NaN 42 | 0.63092977 43 | 0.63092977 44 | 0.63092977 45 | 0.63092977 46 | NaN 47 | 0.0 48 | 0.0 49 | 0.0 50 | 0.0 51 | NaN 52 | 4.1640816 53 | 4.1640816 54 | 4.1640816 55 | 4.1640816 56 | NaN 57 | 1.6309297 58 | 1.6309297 59 | 1.6309297 60 | 1.6309297 61 | NaN 62 | 1.2618595 63 | 1.2618595 64 | 1.2618595 65 | 1.2618595 66 | NaN 67 | 1.6309297 68 | 1.6309297 69 | 1.6309297 70 | 1.6309297 71 | NaN 72 | 1.0 73 | 1.0 74 | 1.0 75 | 1.0 76 | NaN 77 | 0.63092977 78 | 0.63092977 79 | 0.63092977 80 | 0.63092977 81 | NaN 82 | 0.0 83 | 0.0 84 | 0.0 85 | 0.0 86 | NaN 87 | 0.63092977 88 | 0.63092977 89 | 0.63092977 90 | 0.63092977 91 | NaN 92 | 2.8135881 93 | 2.8135881 94 | 2.8135881 95 | 2.8135881 96 | NaN 97 | 2.0 98 | 2.0 99 | 2.0 100 | 2.0 101 | NaN 102 | 0.63092977 103 | 0.63092977 104 | 0.63092977 105 | 0.63092977 106 | NaN 107 | 1.4649736 108 | 1.4649736 109 | 1.4649736 110 | 1.4649736 111 | NaN 112 | 1.2618595 113 | 1.2618595 114 | 1.2618595 115 | 1.2618595 116 | NaN 117 | 1.0 118 | 1.0 119 | 1.0 120 | 1.0 121 | NaN 122 | 1.0 123 | 1.0 124 | 1.0 125 | 1.0 126 | NaN 127 | 1.2618595 128 | 1.2618595 129 | 1.2618595 130 | 1.2618595 131 | NaN 132 | 1.4649736 133 | 1.4649736 134 | 1.4649736 135 | 1.4649736 136 | NaN 137 | -Infinity 138 | -Infinity 139 | -Infinity 140 | -Infinity 141 | NaN 142 | 0.0 143 | 0.0 144 | 0.0 145 | 0.0 146 | NaN 147 | 2.2618594 148 | 2.2618594 149 | 2.2618594 150 | 2.2618594 151 | NaN 152 | 2.523719 153 | 2.523719 154 | 2.523719 155 | 2.523719 156 | NaN 157 | 3.664033 158 | 3.664033 159 | 3.664033 160 | 3.664033 161 | -------------------------------------------------------------------------------- /test-data/logger3.wig: -------------------------------------------------------------------------------- 1 | track type=wiggle_0 2 | fixedStep chrom=2micron start=100 step=1 span=1 3 | 0.0 4 | 0.36172783 5 | NaN 6 | NaN 7 | NaN 8 | 1.1760913 9 | NaN 10 | NaN 11 | NaN 12 | NaN 13 | -Infinity 14 | -Infinity 15 | fixedStep chrom=chrI start=1 step=1 span=1 16 | 0.47712126 17 | 0.60206 18 | 0.69897 19 | 0.7781513 20 | 0.845098 21 | 0.90309 22 | 0.9542425 23 | 1.0 24 | 1.0413927 25 | 1.0791812 26 | 1.1139433 27 | 1.146128 28 | 1.1760913 29 | 1.20412 30 | 1.230449 31 | fixedStep chrom=chrXI start=20 step=1 span=1 32 | 0.7781513 33 | 0.7781513 34 | 0.7781513 35 | 0.7781513 36 | NaN 37 | 0.60206 38 | 0.60206 39 | 0.60206 40 | 0.60206 41 | NaN 42 | 0.845098 43 | 0.845098 44 | 0.845098 45 | 0.845098 46 | NaN 47 | 0.0 48 | 0.0 49 | 0.0 50 | 0.0 51 | NaN 52 | 0.47712126 53 | 0.47712126 54 | 0.47712126 55 | 0.47712126 56 | NaN 57 | 0.69897 58 | 0.69897 59 | 0.69897 60 | 0.69897 61 | NaN 62 | 0.60206 63 | 0.60206 64 | 0.60206 65 | 0.60206 66 | NaN 67 | 1.8260748 68 | 1.8260748 69 | 1.8260748 70 | 1.8260748 71 | NaN 72 | 1.7323937 73 | 1.7323937 74 | 1.7323937 75 | 1.7323937 76 | NaN 77 | 0.69897 78 | 0.69897 79 | 0.69897 80 | 0.69897 81 | NaN 82 | 0.90309 83 | 0.90309 84 | 0.90309 85 | 0.90309 86 | NaN 87 | 0.9542425 88 | 0.9542425 89 | 0.9542425 90 | 0.9542425 91 | NaN 92 | 0.30103 93 | 0.30103 94 | 0.30103 95 | 0.30103 96 | NaN 97 | 0.60206 98 | 0.60206 99 | 0.60206 100 | 0.60206 101 | NaN 102 | 0.7781513 103 | 0.7781513 104 | 0.7781513 105 | 0.7781513 106 | NaN 107 | 0.9542425 108 | 0.9542425 109 | 0.9542425 110 | 0.9542425 111 | NaN 112 | 1.8864907 113 | 1.8864907 114 | 1.8864907 115 | 1.8864907 116 | NaN 117 | 0.845098 118 | 0.845098 119 | 0.845098 120 | 0.845098 121 | NaN 122 | 1.1139433 123 | 1.1139433 124 | 1.1139433 125 | 1.1139433 126 | NaN 127 | 1.39794 128 | 1.39794 129 | 1.39794 130 | 1.39794 131 | NaN 132 | 0.60206 133 | 0.60206 134 | 0.60206 135 | 0.60206 136 | NaN 137 | 1.7323937 138 | 1.7323937 139 | 1.7323937 140 | 1.7323937 141 | NaN 142 | 0.0 143 | 0.0 144 | 0.0 145 | 0.0 146 | NaN 147 | 0.69897 148 | 0.69897 149 | 0.69897 150 | 0.69897 151 | NaN 152 | 0.90309 153 | 0.90309 154 | 0.90309 155 | 0.90309 156 | NaN 157 | 0.30103 158 | 0.30103 159 | 0.30103 160 | 0.30103 161 | -------------------------------------------------------------------------------- /test-data/logger4.wig: -------------------------------------------------------------------------------- 1 | track type=wiggle_0 2 | fixedStep chrom=2micron start=100 step=1 span=1 3 | 2.321928 4 | 2.5849626 5 | NaN 6 | NaN 7 | NaN 8 | 3.321928 9 | NaN 10 | NaN 11 | NaN 12 | NaN 13 | 0.0 14 | 1.0 15 | fixedStep chrom=chrI start=1 step=1 span=1 16 | 0.0 17 | 1.0 18 | 1.5849625 19 | 2.0 20 | 2.321928 21 | 2.5849626 22 | 2.807355 23 | 3.0 24 | 3.169925 25 | 3.321928 26 | 3.4594316 27 | 3.5849626 28 | 3.7004397 29 | 3.807355 30 | 3.9068906 31 | fixedStep chrom=chrXI start=20 step=1 span=1 32 | -Infinity 33 | -Infinity 34 | -Infinity 35 | -Infinity 36 | NaN 37 | 1.5849625 38 | 1.5849625 39 | 1.5849625 40 | 1.5849625 41 | NaN 42 | 2.0 43 | 2.0 44 | 2.0 45 | 2.0 46 | NaN 47 | 3.169925 48 | 3.169925 49 | 3.169925 50 | 3.169925 51 | NaN 52 | -Infinity 53 | -Infinity 54 | -Infinity 55 | -Infinity 56 | NaN 57 | 2.5849626 58 | 2.5849626 59 | 2.5849626 60 | 2.5849626 61 | NaN 62 | 5.4594316 63 | 5.4594316 64 | 5.4594316 65 | 5.4594316 66 | NaN 67 | 1.5849625 68 | 1.5849625 69 | 1.5849625 70 | 1.5849625 71 | NaN 72 | 2.321928 73 | 2.321928 74 | 2.321928 75 | 2.321928 76 | NaN 77 | 2.807355 78 | 2.807355 79 | 2.807355 80 | 2.807355 81 | NaN 82 | 3.0 83 | 3.0 84 | 3.0 85 | 3.0 86 | NaN 87 | 3.169925 88 | 3.169925 89 | 3.169925 90 | 3.169925 91 | NaN 92 | 2.321928 93 | 2.321928 94 | 2.321928 95 | 2.321928 96 | NaN 97 | 2.5849626 98 | 2.5849626 99 | 2.5849626 100 | 2.5849626 101 | NaN 102 | 1.5849625 103 | 1.5849625 104 | 1.5849625 105 | 1.5849625 106 | NaN 107 | 0.0 108 | 0.0 109 | 0.0 110 | 0.0 111 | NaN 112 | 0.0 113 | 0.0 114 | 0.0 115 | 0.0 116 | NaN 117 | 1.0 118 | 1.0 119 | 1.0 120 | 1.0 121 | NaN 122 | 1.5849625 123 | 1.5849625 124 | 1.5849625 125 | 1.5849625 126 | NaN 127 | 2.0 128 | 2.0 129 | 2.0 130 | 2.0 131 | NaN 132 | 2.321928 133 | 2.321928 134 | 2.321928 135 | 2.321928 136 | NaN 137 | 2.5849626 138 | 2.5849626 139 | 2.5849626 140 | 2.5849626 141 | NaN 142 | 3.7004397 143 | 3.7004397 144 | 3.7004397 145 | 3.7004397 146 | NaN 147 | 3.9068906 148 | 3.9068906 149 | 3.9068906 150 | 3.9068906 151 | NaN 152 | 4.169925 153 | 4.169925 154 | 4.169925 155 | 4.169925 156 | NaN 157 | 4.4594316 158 | 4.4594316 159 | 4.4594316 160 | 4.4594316 161 | -------------------------------------------------------------------------------- /test-data/logger5.wig: -------------------------------------------------------------------------------- 1 | track type=wiggle_0 2 | fixedStep chrom=2micron start=100 step=1 span=1 3 | 1.5006738 4 | 1.7448581 5 | NaN 6 | NaN 7 | NaN 8 | 1.4649736 9 | NaN 10 | NaN 11 | NaN 12 | NaN 13 | 0.42781577 14 | 0.6753404 15 | fixedStep chrom=chrI start=1 step=1 span=1 16 | 0.63092977 17 | 1.0 18 | 1.2618595 19 | 1.4649736 20 | 1.6309297 21 | 1.7712437 22 | 1.8927892 23 | 2.0 24 | 2.0959032 25 | 2.1826584 26 | 2.2618594 27 | 2.3347175 28 | 2.4021735 29 | 2.4649734 30 | 2.523719 31 | fixedStep chrom=chrXI start=20 step=1 span=1 32 | -Infinity 33 | -Infinity 34 | -Infinity 35 | -Infinity 36 | NaN 37 | 1.0 38 | 1.0 39 | 1.0 40 | 1.0 41 | NaN 42 | 0.63092977 43 | 0.63092977 44 | 0.63092977 45 | 0.63092977 46 | NaN 47 | 0.0 48 | 0.0 49 | 0.0 50 | 0.0 51 | NaN 52 | 4.1640816 53 | 4.1640816 54 | 4.1640816 55 | 4.1640816 56 | NaN 57 | 1.6309297 58 | 1.6309297 59 | 1.6309297 60 | 1.6309297 61 | NaN 62 | 1.2618595 63 | 1.2618595 64 | 1.2618595 65 | 1.2618595 66 | NaN 67 | 1.6309297 68 | 1.6309297 69 | 1.6309297 70 | 1.6309297 71 | NaN 72 | 1.0 73 | 1.0 74 | 1.0 75 | 1.0 76 | NaN 77 | 0.63092977 78 | 0.63092977 79 | 0.63092977 80 | 0.63092977 81 | NaN 82 | 0.0 83 | 0.0 84 | 0.0 85 | 0.0 86 | NaN 87 | 0.63092977 88 | 0.63092977 89 | 0.63092977 90 | 0.63092977 91 | NaN 92 | 2.8135881 93 | 2.8135881 94 | 2.8135881 95 | 2.8135881 96 | NaN 97 | 2.0 98 | 2.0 99 | 2.0 100 | 2.0 101 | NaN 102 | 0.63092977 103 | 0.63092977 104 | 0.63092977 105 | 0.63092977 106 | NaN 107 | 1.4649736 108 | 1.4649736 109 | 1.4649736 110 | 1.4649736 111 | NaN 112 | 1.2618595 113 | 1.2618595 114 | 1.2618595 115 | 1.2618595 116 | NaN 117 | 1.0 118 | 1.0 119 | 1.0 120 | 1.0 121 | NaN 122 | 1.0 123 | 1.0 124 | 1.0 125 | 1.0 126 | NaN 127 | 1.2618595 128 | 1.2618595 129 | 1.2618595 130 | 1.2618595 131 | NaN 132 | 1.4649736 133 | 1.4649736 134 | 1.4649736 135 | 1.4649736 136 | NaN 137 | -Infinity 138 | -Infinity 139 | -Infinity 140 | -Infinity 141 | NaN 142 | 0.0 143 | 0.0 144 | 0.0 145 | 0.0 146 | NaN 147 | 2.2618594 148 | 2.2618594 149 | 2.2618594 150 | 2.2618594 151 | NaN 152 | 2.523719 153 | 2.523719 154 | 2.523719 155 | 2.523719 156 | NaN 157 | 3.664033 158 | 3.664033 159 | 3.664033 160 | 3.664033 161 | -------------------------------------------------------------------------------- /test-data/logger6.wig: -------------------------------------------------------------------------------- 1 | track type=wiggle_0 2 | fixedStep chrom=2micron start=100 step=1 span=1 3 | 0.0 4 | 0.36172783 5 | NaN 6 | NaN 7 | NaN 8 | 1.1760913 9 | NaN 10 | NaN 11 | NaN 12 | NaN 13 | -Infinity 14 | -Infinity 15 | fixedStep chrom=chrI start=1 step=1 span=1 16 | 0.47712126 17 | 0.60206 18 | 0.69897 19 | 0.7781513 20 | 0.845098 21 | 0.90309 22 | 0.9542425 23 | 1.0 24 | 1.0413927 25 | 1.0791812 26 | 1.1139433 27 | 1.146128 28 | 1.1760913 29 | 1.20412 30 | 1.230449 31 | fixedStep chrom=chrXI start=20 step=1 span=1 32 | 0.7781513 33 | 0.7781513 34 | 0.7781513 35 | 0.7781513 36 | NaN 37 | 0.60206 38 | 0.60206 39 | 0.60206 40 | 0.60206 41 | NaN 42 | 0.845098 43 | 0.845098 44 | 0.845098 45 | 0.845098 46 | NaN 47 | 0.0 48 | 0.0 49 | 0.0 50 | 0.0 51 | NaN 52 | 0.47712126 53 | 0.47712126 54 | 0.47712126 55 | 0.47712126 56 | NaN 57 | 0.69897 58 | 0.69897 59 | 0.69897 60 | 0.69897 61 | NaN 62 | 0.60206 63 | 0.60206 64 | 0.60206 65 | 0.60206 66 | NaN 67 | 1.8260748 68 | 1.8260748 69 | 1.8260748 70 | 1.8260748 71 | NaN 72 | 1.7323937 73 | 1.7323937 74 | 1.7323937 75 | 1.7323937 76 | NaN 77 | 0.69897 78 | 0.69897 79 | 0.69897 80 | 0.69897 81 | NaN 82 | 0.90309 83 | 0.90309 84 | 0.90309 85 | 0.90309 86 | NaN 87 | 0.9542425 88 | 0.9542425 89 | 0.9542425 90 | 0.9542425 91 | NaN 92 | 0.30103 93 | 0.30103 94 | 0.30103 95 | 0.30103 96 | NaN 97 | 0.60206 98 | 0.60206 99 | 0.60206 100 | 0.60206 101 | NaN 102 | 0.7781513 103 | 0.7781513 104 | 0.7781513 105 | 0.7781513 106 | NaN 107 | 0.9542425 108 | 0.9542425 109 | 0.9542425 110 | 0.9542425 111 | NaN 112 | 1.8864907 113 | 1.8864907 114 | 1.8864907 115 | 1.8864907 116 | NaN 117 | 0.845098 118 | 0.845098 119 | 0.845098 120 | 0.845098 121 | NaN 122 | 1.1139433 123 | 1.1139433 124 | 1.1139433 125 | 1.1139433 126 | NaN 127 | 1.39794 128 | 1.39794 129 | 1.39794 130 | 1.39794 131 | NaN 132 | 0.60206 133 | 0.60206 134 | 0.60206 135 | 0.60206 136 | NaN 137 | 1.7323937 138 | 1.7323937 139 | 1.7323937 140 | 1.7323937 141 | NaN 142 | 0.0 143 | 0.0 144 | 0.0 145 | 0.0 146 | NaN 147 | 0.69897 148 | 0.69897 149 | 0.69897 150 | 0.69897 151 | NaN 152 | 0.90309 153 | 0.90309 154 | 0.90309 155 | 0.90309 156 | NaN 157 | 0.30103 158 | 0.30103 159 | 0.30103 160 | 0.30103 161 | -------------------------------------------------------------------------------- /test-data/romanNumeralize.input: -------------------------------------------------------------------------------- 1 | chr1 10 30 Spot1 10 2 | chr2 100 95 Spot2 13.2 3 | chr3 20 50 Spot3 5.0 + 4 | chr14 15 20 Spot4 2.0 - 5 | illegal entry 6 | chr4 100200 100220 Spot5 10.0 - 7 | chr5 1000000 1001000 Spot6 1 8 | chr6 0 12 Spot7 12 + 9 | chr7 1 10 Spot8 1.0 - 10 | chr8 15 20 Spot9 . + 11 | chr9 25 10 Spot1 12 | illegal entry 13 | chr16 1 14 | -------------------------------------------------------------------------------- /test-data/romanNumeralize.output: -------------------------------------------------------------------------------- 1 | chrI 10 30 Spot1 10 2 | chrII 100 95 Spot2 13.2 3 | chrIII 20 50 Spot3 5.0 + 4 | chrXIV 15 20 Spot4 2.0 - 5 | illegal entry 6 | chrIV 100200 100220 Spot5 10.0 - 7 | chrV 1000000 1001000 Spot6 1 8 | chrVI 0 12 Spot7 12 + 9 | chrVII 1 10 Spot8 1.0 - 10 | chrVIII 15 20 Spot9 . + 11 | chrIX 25 10 Spot1 12 | illegal entry 13 | chrXVI 1 14 | -------------------------------------------------------------------------------- /test-data/stripMatrix.txt: -------------------------------------------------------------------------------- 1 | 0.263444282 0.378414242 0.26688602 0.835096585 0.857437429 2 | 0.841166712 0.004052166 0.663859714 0.130333189 0.307903778 3 | 0.842871693 0.676914668 0.678535517 0.15927136 0.152751526 4 | -------------------------------------------------------------------------------- /test-data/test.fasta.fai: -------------------------------------------------------------------------------- 1 | 2micron 6318 9 50 51 2 | -------------------------------------------------------------------------------- /test-data/test.fastqillumina: -------------------------------------------------------------------------------- 1 | @GAII03:1:1:7:1061#0/1 2 | TGGTAAGTTCGCATNTATTATACAGGTTTTTGGTTCAAAAGTANTGATTAG 3 | +GAII03:1:1:7:1061#0/1 4 | a]XX]\YUSPNZWMDO\^_TSZBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 5 | @GAII03:1:1:7:822#0/1 6 | ACCATATGGGTGTCTTGGATCATTGATCAATTAGACTGGCGTTNAATGGCG 7 | +GAII03:1:1:7:822#0/1 8 | a^USY_VSKDKVWXV]TSMJNWVYQWWZWBBBBBBBBBBBBBBBBBBBBBB 9 | @GAII03:1:1:7:183#0/1 10 | TCACGGTCAGCAGCATTGCTTTGTGACTTTGGCGATTGACATTNAGCAGAG 11 | +GAII03:1:1:7:183#0/1 12 | aab^\UY]a][aP_a_[__^_\_[Ta_`MRGRYT]U\U_]BBBBBBBBBBB 13 | @GAII03:1:1:7:1306#0/1 14 | AGCGGATTCCGACTTCCATGGCCACCGTCCGGCTGTCTATATTNACTAAGA 15 | +GAII03:1:1:7:1306#0/1 16 | aSXUTYUWZSSWS[TOP\YRNUZXBBBBBBBBBBBBBBBBBBBBBBBBBBB 17 | @GAII03:1:1:7:1942#0/1 18 | AGTCTCACCAAAAAACGATAAATCTGCAAGACAACATGCAGGTNTAGCTGG 19 | +GAII03:1:1:7:1942#0/1 20 | a`_`a^a\^`a`Xa]YW_aa^]_Y[Z^^`WY^__]_]]]\WUDDXXPP[VV 21 | @GAII03:1:1:7:1498#0/1 22 | AGGGGAACTGGCGAGTTATTTTGTTGGGATGGATGTATAGTTTNATTGCTG 23 | +GAII03:1:1:7:1498#0/1 24 | a`a_\^ZP[\QXV[GY_^`R\]`[aYaY_TOXUWFFMYYTWBBBBBBBBBB 25 | @GAII03:1:1:7:1110#0/1 26 | TCCGCCTCCGCGTCTTGTTCCTTTAAACTATTGACTTCATGTTNTACATTT 27 | +GAII03:1:1:7:1110#0/1 28 | \a`^]_YX]^_XX`_aZ[__TDP`]__U^`_\G`Z^]BBBBBBBBBBBBBB 29 | @GAII03:1:1:7:1806#0/1 30 | TGTGGTAGATTGAGATTGAGAGCGCTGCCTGTTAACTATCGGANTATTAAG 31 | +GAII03:1:1:7:1806#0/1 32 | aaYa`W`^][W[WWX\_R]\YU[\_][Y\TTXY]\Z^_XBBBBBBBBBBBB 33 | @GAII03:1:1:7:1170#0/1 34 | TACCATCTTTGGATAGAGCCTTGGAGATATCTGGCTTTAATCTNCTTTAGT 35 | +GAII03:1:1:7:1170#0/1 36 | a`Z]aa^``]JMZ`]P]V^[`_UWXN]_^FXVFNRXZ^BBBBBBBBBBBBB 37 | @GAII03:1:1:7:1652#0/1 38 | TACAAAATATTGAAAAGAAGGCACGTCAAAAAAGCGCCATCGANAAACAAA 39 | +GAII03:1:1:7:1652#0/1 40 | `baaaab`a`_Xabaa^aaU[Yb_]a`aa`aa]H`___aLOFNDX_XG[^` 41 | -------------------------------------------------------------------------------- /test-data/test.fastqsanger: -------------------------------------------------------------------------------- 1 | @GAII03:1:1:7:1061#0/1 2 | TGGTAAGTTCGCATNTATTATACAGGTTTTTGGTTCAAAAGTANTGATTAG 3 | +GAII03:1:1:7:1061#0/1 4 | B>99>=:641/;8.%0=?@54;############################# 5 | @GAII03:1:1:7:822#0/1 6 | ACCATATGGGTGTCTTGGATCATTGATCAATTAGACTGGCGTTNAATGGCG 7 | +GAII03:1:1:7:822#0/1 8 | B?64:@74,%,7897>54.+/87:288;8###################### 9 | @GAII03:1:1:7:183#0/1 10 | TCACGGTCAGCAGCATTGCTTTGTGACTTTGGCGATTGACATTNAGCAGAG 11 | +GAII03:1:1:7:183#0/1 12 | BBC?=6:>B>6=6@>########### 13 | @GAII03:1:1:7:1306#0/1 14 | AGCGGATTCCGACTTCCATGGCCACCGTCCGGCTGTCTATATTNACTAAGA 15 | +GAII03:1:1:7:1306#0/1 16 | B4965:68;4484<501=:3/6;9########################### 17 | @GAII03:1:1:7:1942#0/1 18 | AGTCTCACCAAAAAACGATAAATCTGCAAGACAACATGCAGGTNTAGCTGG 19 | +GAII03:1:1:7:1942#0/1 20 | BA@AB?B=?ABA9B>:8@BB?>@:<;??A8:?@@>@>>>=86%%9911<77 21 | @GAII03:1:1:7:1498#0/1 22 | AGGGGAACTGGCGAGTTATTTTGTTGGGATGGATGTATAGTTTNATTGCTG 23 | +GAII03:1:1:7:1498#0/1 24 | BAB@=?;1<=297<(:@?A3=>A@:9>?@99A@B;<@@5%1A>@@6?A@=(A;?>############## 29 | @GAII03:1:1:7:1806#0/1 30 | TGTGGTAGATTGAGATTGAGAGCGCTGCCTGTTAACTATCGGANTATTAAG 31 | +GAII03:1:1:7:1806#0/1 32 | BB:BA8A?><8<889=@3>=:6<=@><:=559:>=;?@9############ 33 | @GAII03:1:1:7:1170#0/1 34 | TACCATCTTTGGATAGAGCCTTGGAGATATCTGGCTTTAATCTNCTTTAGT 35 | +GAII03:1:1:7:1170#0/1 36 | BA;>BB?AA>+.;A>1>7?@?'97'/39;?############# 37 | @GAII03:1:1:7:1652#0/1 38 | TACAAAATATTGAAAAGAAGGCACGTCAAAAAAGCGCCATCGANAAACAAA 39 | +GAII03:1:1:7:1652#0/1 40 | ACBBBBCABA@9BCBB?BB6<:C@>BABBABB>)A@@@B-0'/%9@9(