├── .classpath
├── .gitignore
├── .project
├── META-INF
└── MANIFEST.MF
├── README.rdoc
├── build.xml
├── dist
└── java-genomics-toolkit.jar
├── galaxy-conf
├── Add.xml
├── Autocorrelation.xml
├── Average.xml
├── BaseAlignCounts.xml
├── Correlate.xml
├── DNAPropertyCalculator.xml
├── Divide.xml
├── Downsample.xml
├── DynaPro.xml
├── ExtractDataFromRegion.xml
├── FastqIlluminaToSanger.xml
├── FindAbsoluteMaxima.xml
├── FindBoundaryNucleosomes.xml
├── FindNMers.xml
├── FindOutlierRegions.xml
├── GaussianSmooth.xml
├── GeneTrackToBedGraph.xml
├── GeneTrackToWig.xml
├── GreedyCaller.xml
├── InterpolateDiscontinuousData.xml
├── IntervalAverager.xml
├── IntervalLengthDistribution.xml
├── IntervalStats.xml
├── IntervalToBed.xml
├── IntervalToWig.xml
├── KMeans.xml
├── LogTransform.xml
├── MapDyads.xml
├── MatrixAligner.xml
├── MovingAverageSmooth.xml
├── Multiply.xml
├── PairOverlappingNucleosomes.xml
├── PercusDecomposition.xml
├── Phasogram.xml
├── PowerSpectrum.xml
├── ReadLengthDistributionMatrix.xml
├── RollingReadLength.xml
├── RomanNumeralize.xml
├── Scale.xml
├── Shift.xml
├── StripMatrix.xml
├── Subsample.xml
├── Subtract.xml
├── Summary.xml
├── ValueDistribution.xml
├── WaveletTransform.xml
├── ZScore.xml
├── galaxyToolRunner.sh
├── log4j.properties
└── matrix2png.xml
├── galaxyToolConf.xml
├── lib
├── BigWig.jar
├── JLargeArrays-1.2.jar
├── JTransforms-3.0.jar
├── commons-lang3-3.1.jar
├── commons-math3-3.0.jar
├── dnaproperties-1732.jar
├── hamcrest-core-1.1.0.jar
├── java-genomics-io.jar
├── jcommander-1.27.jar
├── junit.jar
├── log4j-1.2.15.jar
├── picard-1.67.jar
└── sam-1.67.jar
├── license.txt
├── log4j.properties
├── resources
└── assemblies
│ ├── ce10.len
│ ├── ce2.len
│ ├── ce3.len
│ ├── ce4.len
│ ├── ce5.len
│ ├── ce6.len
│ ├── ce7.len
│ ├── ce8.len
│ ├── ce9.len
│ ├── dm1.len
│ ├── dm2.len
│ ├── dm3.len
│ ├── hg15.len
│ ├── hg16.len
│ ├── hg17.len
│ ├── hg18.len
│ ├── hg19.len
│ ├── hg19Haps.len
│ ├── hg19Patch2.len
│ ├── hg38.len
│ ├── klac.len
│ ├── kwal.len
│ ├── sacCer1.len
│ ├── sacCer2.len
│ └── sacCer3.len
├── sam_fa_indices.loc.sample
├── src
├── edu
│ └── unc
│ │ ├── genomics
│ │ ├── AssemblyConverter.java
│ │ ├── AssemblyFactory.java
│ │ ├── CommandLineTool.java
│ │ ├── CommandLineToolException.java
│ │ ├── KMeansRow.java
│ │ ├── NucleosomeCall.java
│ │ ├── NucleosomeCallsFileReader.java
│ │ ├── PathConverter.java
│ │ ├── PathFactory.java
│ │ ├── ReadMapperTool.java
│ │ ├── ReadablePathValidator.java
│ │ ├── WigAnalysisTool.java
│ │ ├── WigMathTool.java
│ │ ├── converters
│ │ │ ├── FastqIlluminaToSanger.java
│ │ │ ├── GeneTrackToBedGraph.java
│ │ │ ├── GeneTrackToWig.java
│ │ │ ├── InterpolateDiscontinuousData.java
│ │ │ ├── IntervalToBed.java
│ │ │ ├── IntervalToWig.java
│ │ │ └── RomanNumeralize.java
│ │ ├── dna
│ │ │ ├── DNAPropertyCalculator.java
│ │ │ └── FindNMers.java
│ │ ├── ngs
│ │ │ ├── Autocorrelation.java
│ │ │ ├── BaseAlignCounts.java
│ │ │ ├── ExtractDataFromRegion.java
│ │ │ ├── FilterRegions.java
│ │ │ ├── FindAbsoluteMaxima.java
│ │ │ ├── FindOutlierRegions.java
│ │ │ ├── FragmentLengthDistributionByGene.java
│ │ │ ├── IntervalLengthDistribution.java
│ │ │ ├── IntervalStats.java
│ │ │ ├── PowerSpectrum.java
│ │ │ ├── ReadLengthDistributionMatrix.java
│ │ │ ├── RollingReadLength.java
│ │ │ ├── SplitReads.java
│ │ │ ├── SplitWigIntervals.java
│ │ │ ├── Subsample.java
│ │ │ └── WaveletTransform.java
│ │ ├── nucleosomes
│ │ │ ├── DynaPro.java
│ │ │ ├── FindBoundaryNucleosomes.java
│ │ │ ├── GreedyCaller.java
│ │ │ ├── IntervalEntropy.java
│ │ │ ├── MapDyads.java
│ │ │ ├── PairOverlappingNucleosomes.java
│ │ │ ├── PercusDecomposition.java
│ │ │ ├── Phasogram.java
│ │ │ └── PredictDinucleosomes.java
│ │ ├── visualization
│ │ │ ├── IntervalAverager.java
│ │ │ ├── KMeans.java
│ │ │ ├── MatrixAligner.java
│ │ │ └── StripMatrix.java
│ │ └── wigmath
│ │ │ ├── Add.java
│ │ │ ├── Average.java
│ │ │ ├── Correlate.java
│ │ │ ├── Divide.java
│ │ │ ├── Downsample.java
│ │ │ ├── ExtractRegion.java
│ │ │ ├── GaussianSmooth.java
│ │ │ ├── LogTransform.java
│ │ │ ├── MovingAverageSmooth.java
│ │ │ ├── MovingEntropy.java
│ │ │ ├── Multiply.java
│ │ │ ├── Root.java
│ │ │ ├── Scale.java
│ │ │ ├── Shift.java
│ │ │ ├── StandardDeviation.java
│ │ │ ├── Subtract.java
│ │ │ ├── Summary.java
│ │ │ ├── ValueDistribution.java
│ │ │ └── ZScore.java
│ │ └── utils
│ │ ├── ArrayScaler.java
│ │ ├── ArrayUtils.java
│ │ ├── FFTUtils.java
│ │ ├── FloatCorrelation.java
│ │ ├── FloatHistogram.java
│ │ ├── RomanNumeral.java
│ │ ├── Samtools.java
│ │ ├── SequenceUtils.java
│ │ ├── SortUtils.java
│ │ └── WigStatistic.java
└── log4j.properties
├── test-data
├── baseAlignCounts1.wig
├── bedGraphToWig.input.bedGraph
├── divide1.wig
├── divide2.wig
├── divide3.wig
├── downsample1.wig
├── downsample2.wig
├── downsample3.wig
├── gaussian1.wig
├── gaussian2.wig
├── gaussian3.wig
├── geneTrackToBedGraph.bedGraph
├── geneTrackToWig1.wig
├── geneTrackToWig2.wig
├── intervallengthdistribution1.txt
├── intervallengthdistribution2.txt
├── intervallengthdistribution3.txt
├── logger1.wig
├── logger2.wig
├── logger3.wig
├── logger4.wig
├── logger5.wig
├── logger6.wig
├── romanNumeralize.input
├── romanNumeralize.output
├── stripMatrix.txt
├── test.bed
├── test.fasta
├── test.fasta.fai
├── test.fastqillumina
├── test.fastqsanger
├── test.genetrack
├── test.matrix2png.txt
├── test2.bed
├── twist.normalized.wig
├── twist.wig
├── wigmath1.bw
├── wigmath1.wig
├── wigmath2.bw
├── wigmath2.wig
├── wigmath3.bw
├── wigmath3.wig
├── wigsummary1.txt
├── wigsummary2.txt
├── wigsummary3.txt
├── zscorer1.wig
├── zscorer2.wig
├── zscorer3.wig
├── zscorer4.wig
├── zscorer5.wig
└── zscorer6.wig
├── test
└── edu
│ └── unc
│ └── utils
│ ├── ArrayScalerTest.java
│ ├── ArrayUtilsTest.java
│ ├── FloatCorrelationTest.java
│ ├── FloatHistogramTest.java
│ ├── RomanNumeralTest.java
│ └── SortUtilsTest.java
├── toolRunner.bat
├── toolRunner.sh
└── tool_data_table_conf.xml.sample
/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | bin
2 | build
3 | reports
4 | docs
5 | *.class
6 | *.log
7 | .DS_Store
8 | *.wig.idx
9 | *.tbi
10 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | java-genomics-toolkit
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 |
15 | org.eclipse.jdt.core.javanature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/META-INF/MANIFEST.MF:
--------------------------------------------------------------------------------
1 | Manifest-Version: 1.0
2 | Main-Class: edu.unc.genomics.GenomicsToolkit
3 | Bundle-ManifestVersion: 2
4 | Bundle-Name: java-genomics-toolkit
5 | Bundle-SymbolicName: java-genomics-toolkit
6 | Bundle-Version: 1.0.0
7 | Bundle-RequiredExecutionEnvironment: JavaSE-1.7
8 | Bundle-Description: This project provides tools for common genomic data processing.
9 | Bundle-DocURL: http://github.com/timpalpant/java-genomics-toolkit
--------------------------------------------------------------------------------
/dist/java-genomics-toolkit.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/dist/java-genomics-toolkit.jar
--------------------------------------------------------------------------------
/galaxy-conf/Add.xml:
--------------------------------------------------------------------------------
1 |
2 | multiple (Big)Wig files
3 |
4 | galaxyToolRunner.sh wigmath.Add -o $output
5 | #for $input in $inputs
6 | ${input.file}
7 | #end for
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | This tool will add all values in the specified Wig files base pair by base pair.
21 |
22 | .. class:: infomark
23 |
24 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly.
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/galaxy-conf/Autocorrelation.xml:
--------------------------------------------------------------------------------
1 |
2 | of data in a Wiggle file
3 | galaxyToolRunner.sh ngs.Autocorrelation -i $input -l $windows -m $max -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 | This tool computes the unnormalized autocovariance_ of intervals of data in a Wig file.
16 |
17 | .. _autocovariance: http://en.wikipedia.org/wiki/Autocorrelation
18 |
19 | -----
20 |
21 | **Syntax**
22 |
23 | - **Input data** is the genomic data on which to compute the autocorrelation.
24 | - **List of intervals:** The autocorrelation will be computed for each genomic interval specified in this list.
25 | - **Maximum shift:** In computing the autocorrelation, the data will be phase-shifted up to this limit.
26 |
27 | -----
28 |
29 | .. class:: infomark
30 |
31 | **TIP:** For more information, see Wikipedia_ (right click to open this link in another window).
32 |
33 | .. _Wikipedia: http://en.wikipedia.org/wiki/Autocorrelation
34 |
35 | .. class:: infomark
36 |
37 | **TIP:** If your input data does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format. Similarly, the intervals must be in either Bed, BedGraph, or GFF format.
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/galaxy-conf/Average.xml:
--------------------------------------------------------------------------------
1 |
2 | multiple (Big)Wig files
3 |
4 | galaxyToolRunner.sh wigmath.Average -o $output $file1 $file2
5 | #for $input in $inputs
6 | ${input.file}
7 | #end for
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 | This tool will average the values of the provided Wig files, base pair by base pair.
27 |
28 | .. class:: infomark
29 |
30 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly.
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/galaxy-conf/BaseAlignCounts.xml:
--------------------------------------------------------------------------------
1 |
2 | of sequencing reads
3 | galaxyToolRunner.sh ngs.BaseAlignCounts -i $input -a ${chromInfo} -x $X -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 | This tool produces a new Wig file with the number of reads/intervals overlapping each base pair. Reads can be artificially extended to match known fragment lengths. If you wish to count the number of reads starting at each base pair, set the read extension to 1. If you wish to count the number of intervals overlapping each base pair, set the extension to -1.
16 |
17 | -----
18 |
19 | .. class:: warningmark
20 |
21 | This tool requires sequencing reads in SAM, BAM, Bed, or BedGraph format. If you are artificially extending reads, ensure that the strand is set correctly in SAM, BAM, and Bed files.
22 |
23 | .. class:: warningmark
24 |
25 | Paired-end reads are considered to be the entire fragment (the distance from the 5' end of mate 1 to the 5' end of mate 2) if the extension is set to -1.
26 |
27 | .. class:: infomark
28 |
29 | If you would like to convert valued interval data (e.g. BedGraph files from microarrays) to Wig format, use the Converters -> Interval to Wig converter.
30 |
31 | .. class:: infomark
32 |
33 | **TIP:** If you are going to be using reads in SAM format for multiple analyses, it is often more efficient to first convert it into BAM format using NGS: SAM Tools -> SAM-to-BAM.
34 |
35 | -----
36 |
37 | **Syntax**
38 |
39 | - **Sequencing reads** are mapped reads from a high-throughput sequencing experiment.
40 | - **In silico extension:** Reads will be artificially extended from their 5' end to be this length.
41 |
42 |
43 |
44 |
--------------------------------------------------------------------------------
/galaxy-conf/Divide.xml:
--------------------------------------------------------------------------------
1 |
2 | two (Big)Wig files
3 | galaxyToolRunner.sh wigmath.Divide -n $dividend -d $divisor -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 | .. class:: infomark
32 |
33 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly.
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/galaxy-conf/Downsample.xml:
--------------------------------------------------------------------------------
1 |
2 | a (Big)Wig file
3 | galaxyToolRunner.sh wigmath.Downsample -i $input -m $metric -w $window -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 | This tool can be used to reduce the resolution and file size of Wig files for easier upload to UCSC. Data is downsampled in non-overlapping windows starting from the beginning of each chromosome. Each window can be downsampled as the mean, minimum, maximum, total, or coverage of the original data.
43 |
44 | -----
45 |
46 | **Downsampling Methods**
47 |
48 | - **Mean:** the arithmetic mean of the values in the original data window
49 | - **Minimum:** the least value in the original data window
50 | - **Maximum:** the greatest value in the original data window
51 | - **Coverage:** the fraction of bases with values in the original window
52 | - **Total:** the sum of all values in the original data window
53 |
54 | -----
55 |
56 | .. class:: infomark
57 |
58 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly.
59 |
60 |
61 |
62 |
--------------------------------------------------------------------------------
/galaxy-conf/DynaPro.xml:
--------------------------------------------------------------------------------
1 |
2 | using DynaPro
3 | galaxyToolRunner.sh nucleosomes.DynaPro -i $input -n $N
4 | #if str( $mean ) != ''
5 | -m $mean
6 | #end if
7 |
8 | #if str( $variance ) != ''
9 | -v $variance
10 | #end if
11 | -o $output
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 | .. class:: warningmark
25 |
26 | At present, this tool is only suitable for small genomes (yeast) since entire chromosomes must be loaded into memory.
27 |
28 | -----
29 |
30 | Equilibrium nucleosome distribution is modeled as a one-dimensional fluid of hard rods adsorbing and moving within an external potential. This tool provides a simplified version of the DynaPro_ algorithm for a single factor interacting with hard-core repulsion.
31 |
32 | .. _DynaPro: http://nucleosome.rutgers.edu/nucleosome/
33 |
34 | -----
35 |
36 | **Syntax**
37 |
38 | - **Energy landscape** is the external potential function for each genomic base pair, and must be in Wig format.
39 | - **Nucleosome size** is the hard-core interaction size.
40 |
41 | -----
42 |
43 | **Citation**
44 |
45 | Morozov AV, Fortney K, Gaykalova DA, Studitsky VM, Widom J and Siggia ED (2009) Using DNA mechanics to predict in vitro nucleosome positions and formation energies. Nucleic Acids Res 37: 4707–4722.
46 |
47 |
48 |
49 |
--------------------------------------------------------------------------------
/galaxy-conf/ExtractDataFromRegion.xml:
--------------------------------------------------------------------------------
1 |
28 |
--------------------------------------------------------------------------------
/galaxy-conf/FastqIlluminaToSanger.xml:
--------------------------------------------------------------------------------
1 |
2 | from Illumina to Sanger
3 | galaxyToolRunner.sh converters.FastqIlluminaToSanger -i $input -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | This tool will convert a FASTQ file with ASCII quality scores encoded in Illumina 1.3-1.7 format (Phred+64) to Sanger format (Phred+33) for use with Bowtie and other Galaxy tools. Illumina CASAVA >= 1.8 already produces FASTQ files in Sanger format, so this tool should not be used on new Illumina sequencing data. This tool is a simpler, faster version of the FASTQ Groomer that does little error checking but performs much faster. If you are unsure what format your file is in, or need to do other conversions, use the FASTQ Groomer instead.
19 |
20 | For more information, read about FASTQ formats_ (right-click to open in new window).
21 |
22 | .. _formats: http://en.wikipedia.org/wiki/FASTQ_format
23 |
24 | .. class:: warningmark
25 |
26 | This tool requires fastqillumina formatted data. If you have fastq data that was not correctly autodetected, change the metadata by clicking on the pencil icon for the dataset.
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/galaxy-conf/FindAbsoluteMaxima.xml:
--------------------------------------------------------------------------------
1 |
2 | in intervals
3 |
4 | galaxyToolRunner.sh ngs.FindAbsoluteMaxima -l $window -o $output
5 | #for $input in $inputs
6 | ${input.file}
7 | #end for
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | This tool can be used to find the location of the maximum value in genomic intervals, such as finding the peak summit inside a set of peak calls.
22 |
23 | .. class:: infomark
24 |
25 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. Intervals must be provided in Bed, BedGraph, or GFF format.
26 |
27 | -----
28 |
29 | **Example**
30 |
31 |
32 | if **Intervals** are genes ::
33 |
34 | chr11 5203271 5204877 NM_000518 0 -
35 | chr11 5210634 5212434 NM_000519 0 -
36 | chr11 5226077 5227663 NM_000559 0 -
37 |
38 | and **Wig files** are ::
39 |
40 | Data1.wig
41 | Data2.wig
42 |
43 | this tool will find the location of the maximum value in each interval for each of the provided Wig/BigWig files, and append them in columns in the order that they were added ::
44 |
45 | chr11 5203271 5204877 NM_000518 0 - 5203374 5204300
46 | chr11 5210634 5212434 NM_000519 0 - 5210638 5212450
47 | chr11 5226077 5227663 NM_000559 0 - 5226800 5226241
48 |
49 | where column 7 is the location of the maximum value in that interval for Data1.wig, and column 7 is the location of the maximum value in that interval for Data2.wig.
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/galaxy-conf/FindBoundaryNucleosomes.xml:
--------------------------------------------------------------------------------
1 |
2 | in windows
3 | galaxyToolRunner.sh nucleosomes.FindBoundaryNucleosomes -i $input -l $loci -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | .. class:: infomark
15 |
16 | Use the Call Nucleosomes tool to create a file of called nucleosomes, then use this tool to identify the first nucleosome's dyad position (peak maximum) from the 5' and 3' end of the gene.
17 |
18 | .. class:: infomark
19 |
20 | **TIP:** Nucleosome calls must be in tabular format of the kind produced by the Nucleosomes -> Call nucleosomes tool. Intervals must be in either Bed, BedGraph, or GFF format.
21 |
22 | -----
23 |
24 | **Syntax**
25 |
26 | - **Nucleosome calls** is a list of stereotypic nucleosome position calls.
27 | - **List of intervals:** The 5' and 3' boundary nucleosomes will be found for each interval in this list
28 |
29 |
30 |
31 |
--------------------------------------------------------------------------------
/galaxy-conf/FindNMers.xml:
--------------------------------------------------------------------------------
1 |
2 | in a DNA sequence
3 | galaxyToolRunner.sh dna.FindNMers -i
4 | #if $refGenomeSource.genomeSource == "history":
5 | $refGenomeSource.ownFile
6 | #else
7 | ${refGenomeSource.index.fields.path}
8 | #end if
9 | -m $mismatches -n $nmer $rc -o $output
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 | This tool will find all matches of a given NMer in a DNA sequence. Sequences may be provided in FASTA format or selected from available reference genomes. Mismatches are allowed, but not insertions/deletions. The output is a Bed file with the locations of matches in the reference sequence.
50 |
51 |
52 |
53 |
--------------------------------------------------------------------------------
/galaxy-conf/FindOutlierRegions.xml:
--------------------------------------------------------------------------------
1 |
2 | such as CNVs
3 | galaxyToolRunner.sh ngs.FindOutlierRegions -i $input -w $window -t $threshold $below -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | This tool identifies regions of the genome that may be repetitive elements or CNVs by scanning for windows that have an exceptionally high mean relative to the genome-wide mean.
17 |
18 | -----
19 |
20 | .. class:: infomark
21 |
22 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly.
23 |
24 | -----
25 |
26 | **Syntax**
27 |
28 | - **Input data** is Wig or BigWig formatted data from a high-throughput sequencing experiment.
29 | - **Window size** is the size of the moving average to use.
30 | - **Threshold** is the fold times the genome-wide mean that a window's mean must be in order to be considered an outlier region.
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/galaxy-conf/GaussianSmooth.xml:
--------------------------------------------------------------------------------
1 |
2 | a (Big)Wig file
3 | galaxyToolRunner.sh wigmath.GaussianSmooth -i $input -s $S -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 | This tool smooths genomic data with an area-preserving Gaussian_ filter. The Gaussian filter is computed out to +/- 3 standard deviations.
32 |
33 | .. _Gaussian: http://en.wikipedia.org/wiki/Gaussian_filter
34 |
35 | .. class:: infomark
36 |
37 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly.
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/galaxy-conf/GeneTrackToBedGraph.xml:
--------------------------------------------------------------------------------
1 |
2 | converter
3 | galaxyToolRunner.sh converters.GeneTrackToBedGraph -i $input -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | This tool will sum the counts from the forward and reverse strands in a GeneTrack_ index to create a BedGraph file.
19 |
20 | .. _GeneTrack: http://atlas.bx.psu.edu/genetrack/docs/genetrack.html
21 |
22 | .. class:: warningmark
23 |
24 | This tool requires GeneTrack formatted data. If you have tabular data that was not correctly autodetected, change the metadata by clicking on the pencil icon for the dataset.
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/galaxy-conf/GeneTrackToWig.xml:
--------------------------------------------------------------------------------
1 |
2 | converter
3 | galaxyToolRunner.sh converters.GeneTrackToWig -i $input -s $shift $zero -a ${chromInfo} -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 | This tool will convert GeneTrack_ format files into Wig files, optionally offsetting the + and - strand counts by a specified value before merging them.
31 |
32 | .. _GeneTrack: http://atlas.bx.psu.edu/genetrack/docs/genetrack.html
33 |
34 | .. class:: warningmark
35 |
36 | This tool requires GeneTrack formatted data. If you have tabular data that was not correctly autodetected, change the metadata by clicking on the pencil icon for the dataset.
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/galaxy-conf/InterpolateDiscontinuousData.xml:
--------------------------------------------------------------------------------
1 |
2 | missing values in a (Big)Wig file
3 | galaxyToolRunner.sh converters.InterpolateDiscontinousData -i $input -t $type -m $max -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | This tool will attempt to interpolate missing values (NaN) in a Wig file that result when converting discontinuous microarray probe data to Wig format. Stretches of missing data that extend longer than the allowed maximum will be left as NaN.
20 |
21 | -----
22 |
23 | **Interpolation types**
24 |
25 | - **Nearest** uses the value of the nearest base pair that has data
26 | - **Linear** uses a linear interpolant between the values of the nearest two probes
27 | - **Cubic** uses a cubic interpolant between the values of the nearest two probes
28 |
29 | For more information, see Wikipedia_.
30 |
31 | .. _Wikipedia: http://en.wikipedia.org/wiki/Interpolation
32 |
33 | -----
34 |
35 | .. class:: infomark
36 |
37 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use the Converters -> IntervalToWig tool to convert Bed, BedGraph, or GFF-formatted microarray data to Wig format, then use this tool to interpolate the missing values between probes.
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/galaxy-conf/IntervalAverager.xml:
--------------------------------------------------------------------------------
1 |
2 | that have been aligned
3 |
4 | galaxyToolRunner.sh visualization.IntervalAverager -l $loci -o $output $file1
5 | #for $input in $inputs
6 | ${input.file}
7 | #end for
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 | This tool calculates the average signal for a set of aligned intervals. Intervals are lined up on their alignment point (column 5 in the Bed file), flipped if on the - strand, and averaged. The output is equivalent to aligning the data in a matrix and then taking the columnwise average of the matrix.
23 |
24 | Intervals with alignment points must be provided in the following extended Bed format ::
25 |
26 | chr low high id alignment strand
27 |
28 | .. class:: infomark
29 |
30 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly.
31 |
32 | -----
33 |
34 | **Syntax**
35 |
36 | - **Sequencing data** is the genomic data used to create the average
37 | - **List of intervals** is a list of intervals in Bed format with alignment points
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/galaxy-conf/IntervalLengthDistribution.xml:
--------------------------------------------------------------------------------
1 |
2 | of read lengths
3 | galaxyToolRunner.sh ngs.IntervalLengthDistribution -i $input $freq -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 | This tool calculates the distribution of interval lengths from a list of intervals or reads in SAM, BAM, Bed, BedGraph, or GFF format.
32 |
33 | .. class:: warningmark
34 |
35 | For paired-end sequencing reads, the length is the length of the fragment (5' end of read 1 to 5' end of read 2)
36 |
37 |
38 |
39 |
--------------------------------------------------------------------------------
/galaxy-conf/IntervalStats.xml:
--------------------------------------------------------------------------------
1 |
2 | of data in a (Big)Wig file
3 |
4 | galaxyToolRunner.sh ngs.IntervalStats -l $windows -s $stat -o $output
5 | #for $input in $inputs
6 | ${input.file}
7 | #end for
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 | This tool calculates the arithmetic mean, maximum, or minimum value for the Wig data in each interval. For each Wig file provided, an additional column is added to the output file in the order that they are added above.
28 |
29 | .. class:: infomark
30 |
31 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly.
32 |
33 | -----
34 |
35 | **Example**
36 |
37 | Calculate the mean change in nucleosome occupancy for each gene in the yeast genome:
38 |
39 | - 1. Create a "change in occupancy" dataset by subtracting the normalized occupancy Wig files from your two conditions using the WigMath -> Subtract tool.
40 | - 2. Upload a list of intervals corresponding to the genes in the yeast genome, or pull the data from UCSC using Get Data -> UCSC Main.
41 | - 3. Calculate the mean change in occupancy for each gene using this tool and the datasets from (1) and (2).
42 |
43 |
44 |
45 |
--------------------------------------------------------------------------------
/galaxy-conf/IntervalToBed.xml:
--------------------------------------------------------------------------------
1 |
2 | converter
3 | galaxyToolRunner.sh converters.IntervalToBed -i $input -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 | This tool will convert any file in SAM, BAM, GFF, BedGraph, BigBed, or VCF format to Bed format.
13 |
14 | .. class:: warningmark
15 |
16 | For SAM/BAM data, paired-end reads are converted to Bed format as the entire fragment (5' end of mate 1 to the 5' end of mate 2). Single-end reads are converted to Bed format as the read itself, with strand information. If your SAM/BAM file contains both mate alignments from a paired-end sequencing run (i.e. two entries for each fragment), you should first filter out reads from either the + or - strand with the SAM Tools -> Filter SAM tool to avoid producing redundant entries in the output Bed file.
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/galaxy-conf/IntervalToWig.xml:
--------------------------------------------------------------------------------
1 |
2 | converter
3 | galaxyToolRunner.sh converters.IntervalToWig -i $input $zero -a ${chromInfo} -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | This tool converts data from an interval format, such as Bed, BedGraph or GFF, to Wig format. This can be used to convert data from microarrays to Wig format. The value of each interval is mapped into the Wig file. Intervals that overlap in the original file (multiple-valued base pairs) are averaged, and bases without data in the original interval file are set to NaN.
15 |
16 | .. class:: warningmark
17 |
18 | This tool requires Bed, BedGraph, or GFF formatted data. If you have tabular data that was not correctly autodetected, change the metadata by clicking on the pencil icon for the dataset.
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/galaxy-conf/KMeans.xml:
--------------------------------------------------------------------------------
1 |
2 | an aligned matrix
3 | galaxyToolRunner.sh visualization.KMeans -i $input -k $K -1 $min -2 $max -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 | .. class:: warningmark
19 |
20 | This tool requires tabular data in matrix2png format (with column AND row headers). For more information about the required format and usage instructions, see the matrix2png_ website.
21 |
22 | .. _matrix2png: http://bioinformatics.ubc.ca/matrix2png/dataformat.html
23 |
24 | .. class:: infomark
25 |
26 | You can use the "Align values in a matrix" tool to create a matrix, then use this tool to cluster the matrix with k-means.
27 |
28 | .. class:: infomark
29 |
30 | **TIP:** You can use the **min** and **max** columns to cluster a large matrix based on a subset of the columns. For example, you could cluster a 4000x4000 matrix on columns 200-300 by setting min = 200 and max = 300. This will greatly increase the efficiency of distance calculations during the k-means EM, and also allows you to cluster based on specific regions, such as promoters or coding sequences.
31 |
32 | -----
33 |
34 | This tool will cluster the rows in an aligned matrix with KMeans_. The implementation builds upon the KMeansPlusPlusClusterer available in commons-math3_.
35 |
36 | .. _KMeans: http://en.wikipedia.org/wiki/K-means_clustering
37 |
38 | .. _commons-math3: http://commons.apache.org/math/
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/galaxy-conf/LogTransform.xml:
--------------------------------------------------------------------------------
1 |
2 | a (Big)Wig file
3 | galaxyToolRunner.sh wigmath.LogTransform -i $input -b $base -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 | .. class:: infomark
47 |
48 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly.
49 |
50 |
51 |
52 |
--------------------------------------------------------------------------------
/galaxy-conf/MapDyads.xml:
--------------------------------------------------------------------------------
1 |
2 | from sequencing reads
3 |
4 | galaxyToolRunner.sh nucleosomes.MapDyads -i $input -a ${chromInfo} -o $output
5 | #if $type.read == 'single'
6 | -s $type.size
7 | #end if
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 | This tool produces a Wig file with the number of dyads at each base pair. For paired-end MNase data, dyads are approximated using the center of the fragment. For Bed/BedGraph formatted input, this means the center of the interval; for SAM/BAM formatted input, this means the middle between the 5' end of mate 1 and the 5' end of mate 2. For single-end data, the estimated mononucleosome fragment length (N) must be specified, which will be used to offset reads from the + and - strands by +/- N/2.
31 |
32 | .. class:: warningmark
33 |
34 | This tool requires sequencing reads in SAM, BAM, Bed, or BedGraph format.
35 |
36 | .. class:: warningmark
37 |
38 | Since BedGraph format does not contain strand information, all reads in BedGraph format are considered to be on the 5' strand.
39 |
40 |
41 |
42 |
--------------------------------------------------------------------------------
/galaxy-conf/Multiply.xml:
--------------------------------------------------------------------------------
1 |
2 | (Big)Wig files
3 |
4 | galaxyToolRunner.sh wigmath.Multiply -o $output
5 | #for $input in $inputs
6 | ${input.file}
7 | #end for
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | This tool multiplies Wig or BigWig files base pair by base pair.
21 |
22 | .. class:: infomark
23 |
24 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly.
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/galaxy-conf/PairOverlappingNucleosomes.xml:
--------------------------------------------------------------------------------
1 |
2 | by overlap
3 | galaxyToolRunner.sh nucleosomes.PairOverlappingNucleosomes -a $input1 -b $input2 -m $N -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 | This tool will pair overlapping nucleosomes from two sets of nucleosome calls. In the event that multiple calls overlap, the one with the largest overlap is selected as a match.
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/galaxy-conf/PercusDecomposition.xml:
--------------------------------------------------------------------------------
1 |
2 | from occupancy data
3 | galaxyToolRunner.sh nucleosomes.PercusDecomposition -d $dyads -n $N -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | This tool derives an external potential energy function from experimental nucleosome positioning data by assuming that nucleosomes interact with DNA like a fluid of hard rods. This energy function can then be used to derive sequence-specific nucleosome formation preferences, while accounting for hard-core steric restriction by adjacent nucleosomes. This tool is a reimplementation of the algorithm described in (Locke et al. 2010).
15 |
16 | -----
17 |
18 | **Citations**
19 |
20 | Locke G, Tolkunov D, Moqtaderi Z, Struhl K and Morozov AV (2010) High-throughput sequencing reveals a simple model of nucleosome energetics. Proceedings of the National Academy of Sciences 107: 20998–21003
21 |
22 | Percus JK (1976) Equilibrium state of a classical fluid of hard rods in an external field. J Stat Phys 15: 505–511
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/galaxy-conf/Phasogram.xml:
--------------------------------------------------------------------------------
1 |
2 | of dyads
3 | galaxyToolRunner.sh nucleosomes.Phasogram -i $input -m $max -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | This tool calculates the phase distribution of sequencing data. It can be used to identify genome-wide periodicities. Phase counts are aggregated for each base pair across the genome. This is equivalent to summing the autocovariance of a sliding window across the genome. The tool is a reimplementation of the algorithm described in (Valouev et al. 2011).
15 |
16 | .. class:: infomark
17 |
18 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly.
19 |
20 | -----
21 |
22 | **Citation**
23 |
24 | Valouev A, Johnson SM, Boyd SD, Smith CL, Fire AZ and Sidow A (2011) Determinants of nucleosome organization in primary human cells. Nature 474: 516–520
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/galaxy-conf/PowerSpectrum.xml:
--------------------------------------------------------------------------------
1 |
2 | of data in a Wiggle file
3 | galaxyToolRunner.sh ngs.PowerSpectrum -i $input -l $windows -m $max -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 | This tool computes the power spectrum of intervals of sequencing data. For each interval provided, the normalized power spectrum is calculated, representing the relative power in each frequency. Power spectra are normalized to have total power 1, with the DC component (0 frequency) removed. Power spectra are computed using the FFT_ implementation in JTransforms_.
16 |
17 | .. _FFT: http://en.wikipedia.org/wiki/Fast_Fourier_transform
18 |
19 | .. _JTransforms: http://sites.google.com/site/piotrwendykier/software/jtransforms
20 |
21 | -----
22 |
23 | **Syntax**
24 |
25 | - **Input data** is the genomic data on which to compute the power spectrum.
26 | - **List of intervals:** The power spectrum will be computed for each genomic interval specified in this list.
27 | - **Number of frequencies:** The power spectrum will be truncated at this frequency in the output
28 |
29 | -----
30 |
31 | **Output**
32 |
33 | The output has the following format ::
34 |
35 | chr start stop id alignment strand freq1 freq2 ...
36 |
37 | up to the maximum frequency specified. Frequencies are truncated to reduce the size of the output since signals are often band-limited.
38 |
39 | -----
40 |
41 | .. class:: warningmark
42 |
43 | **NOTE:** Even though frequencies may be truncated in the output, all frequencies in the power spectrum are computed and used for normalization.
44 |
45 | .. class:: infomark
46 |
47 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly. Intervals must be provided in Bed, BedGraph, or GFF format.
48 |
49 | -----
50 |
51 | This tool is equivalent to the following Matlab commands, where x is a vector with the interval of sequencing data ::
52 |
53 | N = length(x);
54 | f = fft(x);
55 | p = abs(f(2:N/2)).^2;
56 | p = p / sum(p);
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/galaxy-conf/RollingReadLength.xml:
--------------------------------------------------------------------------------
1 |
2 | over each locus
3 | galaxyToolRunner.sh ngs.RollingReadLength -i $input -a ${chromInfo} -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 | This tool will compute the mean length of all fragments overlapping a given locus, and can be used to identify sites with exceptionally long or short reads.
14 |
15 | .. class:: warningmark
16 |
17 | This tool requires paired-end SAM, BAM, Bed, or BedGraph formatted data. Using single-end data will result in a constant read length.
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/galaxy-conf/RomanNumeralize.xml:
--------------------------------------------------------------------------------
1 |
2 | on any file
3 | galaxyToolRunner.sh converters.RomanNumeralize -i $input -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | This tool scans any file with chromosomal coordinates of the form "chr5" and replaces them with "chrV".
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/galaxy-conf/Shift.xml:
--------------------------------------------------------------------------------
1 |
2 | a (Big)Wig file
3 | galaxyToolRunner.sh wigmath.Shift -i $input -m $M $chr -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | This tool will shift all values in a Wig file by a scalar so that the output has the desired mean.
15 |
16 | .. class:: infomark
17 |
18 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly.
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/galaxy-conf/StripMatrix.xml:
--------------------------------------------------------------------------------
1 |
2 | from an aligned matrix
3 | galaxyToolRunner.sh visualization.StripMatrix -i $input -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 | This tool is intended to strip the column/row headers off of an aligned matrix (in matrix2png format) for easy import into Matlab or other software where only the data values are required. It removes the first row and first column from a tabular file.
20 |
21 | -----
22 |
23 | **Example**
24 |
25 | If the following tabular matrix is used as input ::
26 |
27 | ID col1 col2 col3
28 | row1 2 4 5
29 | row2 5 1 1
30 |
31 | then the following tabular matrix will be produced as output ::
32 |
33 | 2 4 5
34 | 5 1 1
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/galaxy-conf/Subsample.xml:
--------------------------------------------------------------------------------
1 |
2 | from an interval file
3 | galaxyToolRunner.sh ngs.Subsample -i $input -n $n -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 | This tool will randomly select N reads from a file.
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/galaxy-conf/Subtract.xml:
--------------------------------------------------------------------------------
1 |
2 | two (Big)Wig files
3 | galaxyToolRunner.sh wigmath.Subtract -m $minuend -s $subtrahend -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
36 |
37 |
38 |
39 |
40 | This tool will subtract the values in one Wig file from another, base pair by base pair.
41 |
42 | .. class:: infomark
43 |
44 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly.
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/galaxy-conf/ValueDistribution.xml:
--------------------------------------------------------------------------------
1 |
2 | of a (Big)Wig file
3 | galaxyToolRunner.sh wigmath.ValueDistribution -i $input
4 | #if str( $min ) != ''
5 | --min $min
6 | #end if
7 |
8 | #if str( $max ) != ''
9 | --max $max
10 | #end if
11 |
12 | -n $bins -o $output
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 | This tool computes a histogram of the values in a Wig file, as well as the moments of the distribution.
27 |
28 | -----
29 |
30 | **Syntax**
31 |
32 | - **Input data** is the genomic data used to compute the histogram.
33 | - **Minimum bin value** is the smallest bin. If unset, it is equal to the minimum value in the input data
34 | - **Maximum bin value** is the largest bin. If unset, it is equal to the maximum value in the input data
35 | - **Number of bins** is the number of bins to use. The bin size will be equal to (max - min) / (# bins).
36 |
37 | -----
38 |
39 | **Output**
40 |
41 | The output is in 2-column tabular format, where the first column represents the lower edge of a bin inteval and the second column represents the number of values that fell in that bin. For example if the **minimum bin value** is 0, the **maximum bin value** is 0.3, and the **number of bins** is 3, then the following output might be produced ::
42 |
43 | bin count
44 | <0 3
45 | 0 1
46 | 0.1 10
47 | 0.2 4
48 | >0.3 12
49 |
50 | where there were 3 values in (-inf, 0), 1 value in [0, 0.1), 10 values in [0.1, 0.2), 4 values in [0.2, 0.3), and 12 values in [0.3, inf).
51 |
52 |
53 |
54 |
--------------------------------------------------------------------------------
/galaxy-conf/WaveletTransform.xml:
--------------------------------------------------------------------------------
1 |
2 | across a genomic interval
3 | galaxyToolRunner.sh ngs.WaveletTransform -i $input -w $wavelet --chr $chr --start $start --stop $stop --min $min --max $max --step $N -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 | This tool will perform a Wavelet_ scaling_ analysis on an interval of genomic data. For each base pair in the interval, the similarity (correlation) is calculated between the data and the wavelet over a range of scales. This can be used to identify high-frequency and low-frequency features in the data. The output is a matrix in matrix2png format that can be used to generate a heatmap: along the x-axis (columns) are the base pairs in the interval, along the y-axis (rows) are the correlation coefficients for each Wavelet size, with the largest Wavelet scale at the top and the smallest scale at the bottom.
21 |
22 | .. _Wavelet: http://en.wikipedia.org/wiki/Wavelet
23 |
24 | .. _scaling: http://en.wikipedia.org/wiki/Scaleogram
25 |
26 | -----
27 |
28 | **Syntax**
29 |
30 | - **Input data** is the genomic data on which to compute the Wavelet scaling analysis.
31 | - **Wavelet** a single column of values representing a discrete Wavelet.
32 | - **Chromosome** a locus in the genome
33 | - **Start base pair** a locus in the genome
34 | - **Stop base pair** a locus in the genome
35 | - **Minimum wavelet size** the smallest Wavelet to analyze
36 | - **Minimum wavelet size** the largest Wavelet to analyze
37 | - **Scaling step size** the step size for the range of wavelet scales. The provided wavelet will be scaled to each size in the set min:step:max by interpolating the provided wavelet.
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/galaxy-conf/ZScore.xml:
--------------------------------------------------------------------------------
1 |
2 | a (Big)Wig file
3 | galaxyToolRunner.sh wigmath.ZScore -i $input $chr -o $output
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 | This tool will compute normal scores (Z-scores) for each of the values in a Wig file. For each base pair, the Z-scored value is equal to the deviance from the mean divided by the standard deviation (i.e. the number of standard deviations a value is away from the mean). The output file should have mean 0 and standard deviation 1.
41 |
42 | .. class:: infomark
43 |
44 | This tool is equivalent to using the **Mean Shift** tool to shift a Wig file to mean 0, then using the **Scale** tool to scale by 1/(standard deviation).
45 |
46 | .. class:: infomark
47 |
48 | **TIP:** If your dataset does not appear in the pulldown menu, it means that it is not in Wig or BigWig format. Use "edit attributes" to set the correct format if it was not detected correctly.
49 |
50 |
51 |
52 |
--------------------------------------------------------------------------------
/galaxy-conf/galaxyToolRunner.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | if [ $# -eq 0 ]
4 | then
5 | echo "USAGE: galaxyToolRunner.sh APPNAME [ARGS]";
6 | exit;
7 | fi
8 |
9 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
10 | java -Dlog4j.configuration=log4j.properties -cp $DIR:$DIR/../build:$DIR/../dist/*:$DIR/../lib/* edu.unc.genomics."$@"
11 |
--------------------------------------------------------------------------------
/galaxy-conf/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=info, stdout
2 |
3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
5 |
6 | # Pattern to output the caller's file name and line number.
7 | log4j.appender.stdout.layout.ConversionPattern=%m%n
8 |
9 | # Only output errors from the BigWig library
10 | log4j.logger.org.broad.igv.bbfile=ERROR
11 | # Only output errors from java-genomics-io
12 | log4j.logger.edu.unc.genomics.io=ERROR
--------------------------------------------------------------------------------
/lib/BigWig.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/BigWig.jar
--------------------------------------------------------------------------------
/lib/JLargeArrays-1.2.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/JLargeArrays-1.2.jar
--------------------------------------------------------------------------------
/lib/JTransforms-3.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/JTransforms-3.0.jar
--------------------------------------------------------------------------------
/lib/commons-lang3-3.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/commons-lang3-3.1.jar
--------------------------------------------------------------------------------
/lib/commons-math3-3.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/commons-math3-3.0.jar
--------------------------------------------------------------------------------
/lib/dnaproperties-1732.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/dnaproperties-1732.jar
--------------------------------------------------------------------------------
/lib/hamcrest-core-1.1.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/hamcrest-core-1.1.0.jar
--------------------------------------------------------------------------------
/lib/java-genomics-io.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/java-genomics-io.jar
--------------------------------------------------------------------------------
/lib/jcommander-1.27.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/jcommander-1.27.jar
--------------------------------------------------------------------------------
/lib/junit.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/junit.jar
--------------------------------------------------------------------------------
/lib/log4j-1.2.15.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/log4j-1.2.15.jar
--------------------------------------------------------------------------------
/lib/picard-1.67.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/picard-1.67.jar
--------------------------------------------------------------------------------
/lib/sam-1.67.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/timpalpant/java-genomics-toolkit/50f88f011ab450ef2b68f39f3f48ed40351c5c5d/lib/sam-1.67.jar
--------------------------------------------------------------------------------
/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=debug, stdout
2 |
3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
5 |
6 | # Pattern to output the caller's file name and line number.
7 | log4j.appender.stdout.layout.ConversionPattern=%5p - %m%n
8 |
9 | # Only output errors from the BigWig library
10 | log4j.logger.org.broad.igv.bbfile=ERROR
11 | # Only output errors from java-genomics-io
12 | log4j.logger.edu.unc.genomics.io.IntervalFileSniffer=DEBUG
13 | log4j.logger.edu.unc.genomics.io=DEBUG
14 | log4j.logger.edu.unc.genomics.util=INFO
15 | log4j.logger.edu.ucsc.genome=ERROR
16 |
--------------------------------------------------------------------------------
/resources/assemblies/ce10.len:
--------------------------------------------------------------------------------
1 | chrV 20924149
2 | chrX 17718866
3 | chrIV 17493793
4 | chrII 15279345
5 | chrI 15072423
6 | chrIII 13783700
7 | chrM 13794
8 |
--------------------------------------------------------------------------------
/resources/assemblies/ce2.len:
--------------------------------------------------------------------------------
1 | chrI 15080483
2 | chrII 15279308
3 | chrIII 13783313
4 | chrIV 17493791
5 | chrM 13794
6 | chrV 20922231
7 | chrX 17718849
8 |
--------------------------------------------------------------------------------
/resources/assemblies/ce3.len:
--------------------------------------------------------------------------------
1 | chrI 15080552
2 | chrII 15279311
3 | chrIII 13783317
4 | chrIV 17493785
5 | chrM 13794
6 | chrV 20922231
7 | chrX 17718850
8 |
--------------------------------------------------------------------------------
/resources/assemblies/ce4.len:
--------------------------------------------------------------------------------
1 | chrI 15072419
2 | chrII 15279316
3 | chrIII 13783681
4 | chrIV 17493784
5 | chrM 13794
6 | chrV 20919398
7 | chrX 17718852
8 |
--------------------------------------------------------------------------------
/resources/assemblies/ce5.len:
--------------------------------------------------------------------------------
1 | chrV 20919568
2 | chrX 17718851
3 | chrIV 17493785
4 | chrII 15279316
5 | chrI 15072421
6 | chrIII 13783681
7 | chrM 13794
8 |
--------------------------------------------------------------------------------
/resources/assemblies/ce6.len:
--------------------------------------------------------------------------------
1 | chrV 20919568
2 | chrX 17718854
3 | chrIV 17493785
4 | chrII 15279323
5 | chrI 15072421
6 | chrIII 13783681
7 | chrM 13794
8 |
--------------------------------------------------------------------------------
/resources/assemblies/ce7.len:
--------------------------------------------------------------------------------
1 | chrV 20924143
2 | chrX 17718854
3 | chrIV 17493784
4 | chrII 15279324
5 | chrI 15072421
6 | chrIII 13783682
7 | chrM 13794
8 |
--------------------------------------------------------------------------------
/resources/assemblies/ce8.len:
--------------------------------------------------------------------------------
1 | chrV 20924143
2 | chrX 17718854
3 | chrIV 17493784
4 | chrII 15279323
5 | chrI 15072421
6 | chrIII 13783685
7 | chrM 13794
8 |
--------------------------------------------------------------------------------
/resources/assemblies/ce9.len:
--------------------------------------------------------------------------------
1 | chrV 20924143
2 | chrX 17718854
3 | chrIV 17493784
4 | chrII 15279323
5 | chrI 15072421
6 | chrIII 13783685
7 | chrM 13794
8 |
--------------------------------------------------------------------------------
/resources/assemblies/dm1.len:
--------------------------------------------------------------------------------
1 | chr4 1237870
2 | chrU 8248647
3 | chrX 21780003
4 | chr2L 22217931
5 | chr2R 20302755
6 | chr2h 1651714
7 | chr3L 23352213
8 | chr3R 27890790
9 | chr3h 1961095
10 | chrXh 359526
11 | chrYh 321294
12 |
--------------------------------------------------------------------------------
/resources/assemblies/dm2.len:
--------------------------------------------------------------------------------
1 | chr4 1281640
2 | chrM 19517
3 | chrU 8724946
4 | chrX 22224390
5 | chr2L 22407834
6 | chr2R 20766785
7 | chr2h 1694122
8 | chr3L 23771897
9 | chr3R 27905053
10 | chr3h 2955737
11 | chr4h 88110
12 | chrXh 359526
13 | chrYh 396896
14 |
--------------------------------------------------------------------------------
/resources/assemblies/dm3.len:
--------------------------------------------------------------------------------
1 | chr2L 23011544
2 | chr2LHet 368872
3 | chr2R 21146708
4 | chr2RHet 3288761
5 | chr3L 24543557
6 | chr3LHet 2555491
7 | chr3R 27905053
8 | chr3RHet 2517507
9 | chr4 1351857
10 | chrU 10049037
11 | chrUextra 29004656
12 | chrX 22422827
13 | chrXHet 204112
14 | chrYHet 347038
15 | chrM 19517
16 |
--------------------------------------------------------------------------------
/resources/assemblies/hg15.len:
--------------------------------------------------------------------------------
1 | chr1 245203898
2 | chr2 243315028
3 | chr3 199411731
4 | chr4 191610523
5 | chr5 180967295
6 | chr6 170740541
7 | chr7 158431299
8 | chr8 145908738
9 | chr9 134505819
10 | chrM 16571
11 | chrX 152634166
12 | chrY 50961097
13 | chr1_random 12562665
14 | chr2_random 1464032
15 | chr3_random 423185
16 | chr4_random 1219494
17 | chr6_random 12061844
18 | chr7_random 1057565
19 | chr8_random 427716
20 | chr9_random 2536476
21 | chrX_random 4859112
22 | chrY_random 191708
23 | chr10 135480874
24 | chr11 134978784
25 | chr12 133464434
26 | chr13 114151656
27 | chr14 105311216
28 | chr15 100114055
29 | chr16 89995999
30 | chr17 81691216
31 | chr18 77753510
32 | chr19 63790860
33 | chr20 63644868
34 | chr21 46976537
35 | chr22 49476972
36 | chr10_random 710249
37 | chr11_random 150110
38 | chr12_random 590431
39 | chr13_random 414659
40 | chr15_random 366089
41 | chr16_random 24360
42 | chr17_random 337440
43 | chr19_random 301858
44 | chrUn_random 611077
45 |
--------------------------------------------------------------------------------
/resources/assemblies/hg16.len:
--------------------------------------------------------------------------------
1 | chr1 246127941
2 | chr2 243615958
3 | chr3 199344050
4 | chr4 191731959
5 | chr5 181034922
6 | chr6 170914576
7 | chr7 158545518
8 | chr8 146308819
9 | chr9 136372045
10 | chrM 16571
11 | chrX 153692391
12 | chrY 50286555
13 | chr1_random 6515988
14 | chr2_random 1104831
15 | chr3_random 749256
16 | chr4_random 648024
17 | chr5_random 143687
18 | chr6_random 2055751
19 | chr7_random 632637
20 | chr8_random 1499381
21 | chr9_random 2766341
22 | chrX_random 3403558
23 | chr10 135037215
24 | chr11 134482954
25 | chr12 132078379
26 | chr13 113042980
27 | chr14 105311216
28 | chr15 100256656
29 | chr16 90041932
30 | chr17 81860266
31 | chr18 76115139
32 | chr19 63811651
33 | chr20 63741868
34 | chr21 46976097
35 | chr22 49396972
36 | chr10_random 1043775
37 | chr13_random 189598
38 | chr15_random 1132826
39 | chr17_random 2549222
40 | chr18_random 4262
41 | chr19_random 92689
42 | chrUn_random 3349625
43 |
--------------------------------------------------------------------------------
/resources/assemblies/hg17.len:
--------------------------------------------------------------------------------
1 | chr10 135413628
2 | chr10_random 113275
3 | chr11 134452384
4 | chr12 132449811
5 | chr12_random 466818
6 | chr13 114142980
7 | chr13_random 186858
8 | chr14 106368585
9 | chr15 100338915
10 | chr15_random 784346
11 | chr16 88827254
12 | chr16_random 105485
13 | chr17 78774742
14 | chr17_random 2618010
15 | chr18 76117153
16 | chr18_random 4262
17 | chr19 63811651
18 | chr19_random 301858
19 | chr1 245522847
20 | chr1_random 3897131
21 | chr20 62435964
22 | chr21 46944323
23 | chr22 49554710
24 | chr22_random 257318
25 | chr2 243018229
26 | chr2_random 418158
27 | chr3 199505740
28 | chr3_random 970716
29 | chr4 191411218
30 | chr4_random 1030282
31 | chr5 180857866
32 | chr5_random 143687
33 | chr6 170975699
34 | chr6_random 1875562
35 | chr6_hla_hap1 139182
36 | chr6_hla_hap2 150447
37 | chr7 158628139
38 | chr7_random 778964
39 | chr8 146274826
40 | chr8_random 943810
41 | chr9 138429268
42 | chr9_random 1312665
43 | chrM 16571
44 | chrX 154824264
45 | chrX_random 1719168
46 | chrY 57701691
47 |
--------------------------------------------------------------------------------
/resources/assemblies/hg18.len:
--------------------------------------------------------------------------------
1 | chr1 247249719
2 | chr1_random 1663265
3 | chr10 135374737
4 | chr10_random 113275
5 | chr11 134452384
6 | chr11_random 215294
7 | chr12 132349534
8 | chr13 114142980
9 | chr13_random 186858
10 | chr14 106368585
11 | chr15 100338915
12 | chr15_random 784346
13 | chr16 88827254
14 | chr16_random 105485
15 | chr17 78774742
16 | chr17_random 2617613
17 | chr18 76117153
18 | chr18_random 4262
19 | chr19 63811651
20 | chr19_random 301858
21 | chr2 242951149
22 | chr2_random 185571
23 | chr20 62435964
24 | chr21 46944323
25 | chr21_random 1679693
26 | chr22 49691432
27 | chr22_random 257318
28 | chr22_h2_hap1 63661
29 | chr3 199501827
30 | chr3_random 749256
31 | chr4 191273063
32 | chr4_random 842648
33 | chr5 180857866
34 | chr5_random 143687
35 | chr5_h2_hap1 1794870
36 | chr6 170899992
37 | chr6_random 1875562
38 | chr6_cox_hap1 4731698
39 | chr6_qbl_hap2 4565931
40 | chr7 158821424
41 | chr7_random 549659
42 | chr8 146274826
43 | chr8_random 943810
44 | chr9 140273252
45 | chr9_random 1146434
46 | chrM 16571
47 | chrX 154913754
48 | chrX_random 1719168
49 | chrY 57772954
50 |
--------------------------------------------------------------------------------
/resources/assemblies/hg19.len:
--------------------------------------------------------------------------------
1 | chr1 249250621
2 | chr2 243199373
3 | chr3 198022430
4 | chr4 191154276
5 | chr5 180915260
6 | chr6 171115067
7 | chr7 159138663
8 | chrX 155270560
9 | chr8 146364022
10 | chr9 141213431
11 | chr10 135534747
12 | chr11 135006516
13 | chr12 133851895
14 | chr13 115169878
15 | chr14 107349540
16 | chr15 102531392
17 | chr16 90354753
18 | chr17 81195210
19 | chr18 78077248
20 | chr20 63025520
21 | chrY 59373566
22 | chr19 59128983
23 | chr22 51304566
24 | chr21 48129895
25 | chr6_ssto_hap7 4928567
26 | chr6_mcf_hap5 4833398
27 | chr6_cox_hap2 4795371
28 | chr6_mann_hap4 4683263
29 | chr6_apd_hap1 4622290
30 | chr6_qbl_hap6 4611984
31 | chr6_dbb_hap3 4610396
32 | chr17_ctg5_hap1 1680828
33 | chr4_ctg9_hap1 590426
34 | chr1_gl000192_random 547496
35 | chrUn_gl000225 211173
36 | chr4_gl000194_random 191469
37 | chr4_gl000193_random 189789
38 | chr9_gl000200_random 187035
39 | chrUn_gl000222 186861
40 | chrUn_gl000212 186858
41 | chr7_gl000195_random 182896
42 | chrUn_gl000223 180455
43 | chrUn_gl000224 179693
44 | chrUn_gl000219 179198
45 | chr17_gl000205_random 174588
46 | chrUn_gl000215 172545
47 | chrUn_gl000216 172294
48 | chrUn_gl000217 172149
49 | chr9_gl000199_random 169874
50 | chrUn_gl000211 166566
51 | chrUn_gl000213 164239
52 | chrUn_gl000220 161802
53 | chrUn_gl000218 161147
54 | chr19_gl000209_random 159169
55 | chrUn_gl000221 155397
56 | chrUn_gl000214 137718
57 | chrUn_gl000228 129120
58 | chrUn_gl000227 128374
59 | chr1_gl000191_random 106433
60 | chr19_gl000208_random 92689
61 | chr9_gl000198_random 90085
62 | chr17_gl000204_random 81310
63 | chrUn_gl000233 45941
64 | chrUn_gl000237 45867
65 | chrUn_gl000230 43691
66 | chrUn_gl000242 43523
67 | chrUn_gl000243 43341
68 | chrUn_gl000241 42152
69 | chrUn_gl000236 41934
70 | chrUn_gl000240 41933
71 | chr17_gl000206_random 41001
72 | chrUn_gl000232 40652
73 | chrUn_gl000234 40531
74 | chr11_gl000202_random 40103
75 | chrUn_gl000238 39939
76 | chrUn_gl000244 39929
77 | chrUn_gl000248 39786
78 | chr8_gl000196_random 38914
79 | chrUn_gl000249 38502
80 | chrUn_gl000246 38154
81 | chr17_gl000203_random 37498
82 | chr8_gl000197_random 37175
83 | chrUn_gl000245 36651
84 | chrUn_gl000247 36422
85 | chr9_gl000201_random 36148
86 | chrUn_gl000235 34474
87 | chrUn_gl000239 33824
88 | chr21_gl000210_random 27682
89 | chrUn_gl000231 27386
90 | chrUn_gl000229 19913
91 | chrM 16571
92 | chrUn_gl000226 15008
93 | chr18_gl000207_random 4262
94 |
--------------------------------------------------------------------------------
/resources/assemblies/hg19Haps.len:
--------------------------------------------------------------------------------
1 | chr6_ssto_hap7 4928567
2 | chr6_mcf_hap5 4833398
3 | chr6_cox_hap2 4795371
4 | chr6_mann_hap4 4683263
5 | chr6_apd_hap1 4622290
6 | chr6_qbl_hap6 4611984
7 | chr6_dbb_hap3 4610396
8 | chr17_ctg5_hap1 1680828
9 | chr4_ctg9_hap1 590426
10 |
--------------------------------------------------------------------------------
/resources/assemblies/hg19Patch2.len:
--------------------------------------------------------------------------------
1 | chr6_ssto_hap7 4928567
2 | chr6_mcf_hap5 4833398
3 | chr6_cox_hap2 4795371
4 | chr6_mann_hap4 4683263
5 | chr6_apd_hap1 4622290
6 | chr6_qbl_hap6 4611984
7 | chr6_dbb_hap3 4610396
8 | chr17_ctg5_hap1 1680828
9 | chr5_ctg1_gl339449 1620324
10 | chr4_ctg9_hap1 590426
11 | chr17_gl383560 534288
12 | chr17_gl383558 457041
13 | chr8_gl383535 429806
14 | chr17_gl383561 406963
15 | chr10_gl383543 392792
16 | chr15_ctg8_gl383555 388773
17 | chr19_ctg3_gl383573 385657
18 | chr4_ctg6_gl383528 376187
19 | chr1_ctg31_gl383520 366579
20 | chr17_gl383559 338640
21 | chr9_gl339450 330164
22 | chr10_ctg5_gl383546 309802
23 | chr15_ctg4_gl383554 296527
24 | chr18_ctg1_gl383567 289831
25 | chr17_ctg1_gl383563 270261
26 | chr17_ctg4_gl383565 223995
27 | chr8_gl383536 203777
28 | chr21_ctg1_gl383579 201198
29 | chr18_ctg2_gl383571 198278
30 | chr16_ctg3_gl383556 192462
31 | chr19_ctg3_gl383576 188024
32 | chr12_ctg5_gl383551 184319
33 | chr1_ctg31_gl383518 182439
34 | chr3_ctg2_gl383526 180671
35 | chr10_ctg2_gl383545 179254
36 | chr5_ctg5_gl383531 173459
37 | chr3_gl383523 171362
38 | chr9_ctg35_gl383541 171286
39 | chr19_ctg3_gl383575 170227
40 | chr12_ctg2_gl383550 169178
41 | chr18_ctg2_gl383569 167950
42 | chr12_gl383548 165247
43 | chr18_ctg1_gl383570 164789
44 | chr4_ctg12_gl383527 164536
45 | chr9_ctg1_gl383539 162988
46 | chr18_ctg2_gl383572 159547
47 | chr22_ctg1_gl383582 158507
48 | chr19_ctg3_gl383574 155864
49 | chr12_ctg2_gl383553 154881
50 | chr11_ctg1_gl383547 154407
51 | chr2_ctg1_gl383521 143390
52 | chr12_ctg2_gl383552 138655
53 | chr17_ctg4_gl383564 133151
54 | chr20_ctg1_gl383577 128385
55 | chr10_gl383544 128378
56 | chr6_ctg5_gl383533 124736
57 | chr2_ctg12_gl383522 123821
58 | chr4_ctg9_gl383529 121345
59 | chr12_ctg2_gl383549 120804
60 | chr7_ctg6_gl383534 119383
61 | chr21_ctg1_gl383581 116690
62 | chr1_ctg31_gl383519 110268
63 | chr18_ctg2_gl383568 104552
64 | chr5_ctg2_gl383530 101241
65 | chr22_ctg2_gl383583 96924
66 | chr17_ctg4_gl383566 90219
67 | chr16_ctg3_gl383557 89672
68 | chr5_ctg1_gl383532 82728
69 | chr3_gl383524 78793
70 | chr21_ctg1_gl383580 74652
71 | chr9_ctg35_gl383540 71551
72 | chr3_gl383525 65063
73 | chr21_ctg1_gl383578 63917
74 | chr9_gl383537 62435
75 | chr9_ctg35_gl383542 60032
76 | chr1_gl383517 49352
77 | chr1_gl383516 49316
78 | chr9_gl383538 49281
79 | chr17_gl383562 45551
80 | chrM_rCRS 16569
81 |
--------------------------------------------------------------------------------
/resources/assemblies/klac.len:
--------------------------------------------------------------------------------
1 | 1 1062590
2 | 2 1320834
3 | 3 1753957
4 | 4 1715506
5 | 5 2234072
6 | 6 2602197
7 |
--------------------------------------------------------------------------------
/resources/assemblies/sacCer1.len:
--------------------------------------------------------------------------------
1 | chr1 230208
2 | chr10 745446
3 | chr11 666445
4 | chr12 1078173
5 | chr13 924430
6 | chr14 784328
7 | chr15 1091285
8 | chr16 948060
9 | chr2 813136
10 | chr3 316613
11 | chr4 1531914
12 | chr5 576869
13 | chr6 270148
14 | chr7 1090944
15 | chr8 562639
16 | chr9 439885
17 | chrM 85779
18 |
--------------------------------------------------------------------------------
/resources/assemblies/sacCer2.len:
--------------------------------------------------------------------------------
1 | chrIV 1531919
2 | chrXV 1091289
3 | chrVII 1090947
4 | chrXII 1078175
5 | chrXVI 948062
6 | chrXIII 924429
7 | chrII 813178
8 | chrXIV 784333
9 | chrX 745742
10 | chrXI 666454
11 | chrV 576869
12 | chrVIII 562643
13 | chrIX 439885
14 | chrIII 316617
15 | chrVI 270148
16 | chrI 230208
17 | chrM 85779
18 | 2micron 6318
19 |
--------------------------------------------------------------------------------
/resources/assemblies/sacCer3.len:
--------------------------------------------------------------------------------
1 | chrI 230218
2 | chrII 813184
3 | chrIII 316620
4 | chrIV 1531933
5 | chrIX 439888
6 | chrV 576874
7 | chrVI 270161
8 | chrVII 1090940
9 | chrVIII 562643
10 | chrX 745751
11 | chrXI 666816
12 | chrXII 1078177
13 | chrXIII 924431
14 | chrXIV 784333
15 | chrXV 1091291
16 | chrXVI 948066
17 | chrM 85779
18 |
--------------------------------------------------------------------------------
/sam_fa_indices.loc.sample:
--------------------------------------------------------------------------------
1 | #This is a sample file distributed with Galaxy that enables tools
2 | #to use a directory of Samtools indexed sequences data files. You will need
3 | #to create these data files and then create a sam_fa_indices.loc file
4 | #similar to this one (store it in this directory) that points to
5 | #the directories in which those files are stored. The sam_fa_indices.loc
6 | #file has this format (white space characters are TAB characters):
7 | #
8 | #index
9 | #
10 | #So, for example, if you had hg18 indexed stored in
11 | #/depot/data2/galaxy/sam/,
12 | #then the sam_fa_indices.loc entry would look like this:
13 | #
14 | #index hg18 /depot/data2/galaxy/sam/hg18.fa
15 | #
16 | #and your /depot/data2/galaxy/sam/ directory
17 | #would contain hg18.fa and hg18.fa.fai files:
18 | #
19 | #-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.fa
20 | #-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.fa.fai
21 | #
22 | #Your sam_fa_indices.loc file should include an entry per line for
23 | #each index set you have stored. The file in the path does actually
24 | #exist, but it should never be directly used. Instead, the name serves
25 | #as a prefix for the index file. For example:
26 | #
27 | #index hg18 /depot/data2/galaxy/sam/hg18.fa
28 | #index hg19 /depot/data2/galaxy/sam/hg19.fa
29 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/AssemblyConverter.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Files;
5 | import java.nio.file.Path;
6 | import java.nio.file.Paths;
7 | import java.util.zip.DataFormatException;
8 |
9 | import org.apache.log4j.Logger;
10 |
11 | import com.beust.jcommander.IStringConverter;
12 | import com.beust.jcommander.ParameterException;
13 |
14 | /**
15 | * @author timpalpant
16 | *
17 | */
18 | public class AssemblyConverter implements IStringConverter {
19 |
20 | private static final Logger log = Logger.getLogger(AssemblyConverter.class);
21 |
22 | public static final Path ASSEMBLIES_DIR = Paths.get("resources", "assemblies");
23 |
24 | @Override
25 | public Assembly convert(String value) throws ParameterException {
26 | // Look for the assembly in the resources/assemblies directory
27 | Path p = ASSEMBLIES_DIR.resolve(value + ".len");
28 |
29 | // If it does not exist in the assemblies directory, check if it is a path
30 | // to a file
31 | if (!Files.isReadable(p)) {
32 | PathConverter converter = new PathConverter();
33 | p = converter.convert(value);
34 | // If it does not exist, then throw an exception that the assembly cannot
35 | // be found
36 | if (!Files.isReadable(p)) {
37 | throw new ParameterException("Cannot find Assembly file: " + value);
38 | }
39 | }
40 |
41 | // Attempt to load the assembly from file
42 | try {
43 | return new Assembly(p);
44 | } catch (IOException | DataFormatException e) {
45 | log.error("Error loading Assembly from file: " + p);
46 | throw new ParameterException(e);
47 | }
48 | }
49 |
50 | }
51 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/AssemblyFactory.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics;
2 |
3 | import com.beust.jcommander.IStringConverterFactory;
4 |
5 | /**
6 | * @author timpalpant
7 | *
8 | */
9 | public class AssemblyFactory implements IStringConverterFactory {
10 |
11 | /*
12 | * (non-Javadoc)
13 | *
14 | * @see
15 | * com.beust.jcommander.IStringConverterFactory#getConverter(java.lang.Class)
16 | */
17 | @Override
18 | public Class getConverter(Class forType) {
19 | if (forType.equals(Assembly.class)) {
20 | return AssemblyConverter.class;
21 | } else {
22 | return null;
23 | }
24 | }
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/CommandLineTool.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics;
2 |
3 | import java.io.IOException;
4 | import java.util.Arrays;
5 |
6 | import org.apache.commons.lang3.StringUtils;
7 |
8 | import com.beust.jcommander.JCommander;
9 | import com.beust.jcommander.ParameterException;
10 |
11 | import net.sf.samtools.SAMFileReader;
12 | import net.sf.samtools.SAMFileReader.ValidationStringency;
13 |
14 | /**
15 | * A command-line script
16 | *
17 | * @author timpalpant
18 | *
19 | */
20 | public abstract class CommandLineTool {
21 |
22 | /**
23 | * The default bite-size to use for applications that process files in chunks
24 | * TODO Read from a configuration file
25 | */
26 | public static final int DEFAULT_CHUNK_SIZE = 10_000_000;
27 |
28 | /**
29 | * Do the main computation of this tool
30 | *
31 | * @throws IOException
32 | */
33 | public abstract void run() throws IOException;
34 |
35 | /**
36 | * Parse command-line arguments and run the tool Exit on parameter exceptions
37 | *
38 | * @param args
39 | */
40 | public void instanceMain(String[] args) throws CommandLineToolException {
41 | // Initialize the command-line options parser
42 | JCommander jc = new JCommander(this);
43 |
44 | // Add factories for parsing Paths, Assemblies, IntervalFiles, and WigFiles
45 | jc.addConverterFactory(new PathFactory());
46 | jc.addConverterFactory(new AssemblyFactory());
47 |
48 | // Set the program name to be the class name
49 | String[] nameParts = getClass().getName().split("\\.");
50 | String shortName = StringUtils.join(Arrays.copyOfRange(nameParts, nameParts.length - 2, nameParts.length), '.');
51 | jc.setProgramName(shortName);
52 |
53 | try {
54 | jc.parse(args);
55 | } catch (ParameterException e) {
56 | System.err.println(e.getMessage());
57 | jc.usage();
58 | System.exit(-1);
59 | }
60 |
61 | ValidationStringency stringency = SAMFileReader.getDefaultValidationStringency();
62 | try {
63 | SAMFileReader.setDefaultValidationStringency(SAMFileReader.ValidationStringency.LENIENT);
64 | run();
65 | } catch (IOException e) {
66 | throw new CommandLineToolException("IO error while running", e);
67 | } finally {
68 | SAMFileReader.setDefaultValidationStringency(stringency);
69 | }
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/CommandLineToolException.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics;
2 |
3 | /**
4 | * @author timpalpant
5 | *
6 | */
7 | public class CommandLineToolException extends RuntimeException {
8 |
9 | /**
10 | *
11 | */
12 | private static final long serialVersionUID = 4740440799806133636L;
13 |
14 | /**
15 | *
16 | */
17 | public CommandLineToolException() {
18 | // TODO Auto-generated constructor stub
19 | }
20 |
21 | /**
22 | * @param message
23 | */
24 | public CommandLineToolException(String message) {
25 | super(message);
26 | // TODO Auto-generated constructor stub
27 | }
28 |
29 | /**
30 | * @param cause
31 | */
32 | public CommandLineToolException(Throwable cause) {
33 | super(cause);
34 | // TODO Auto-generated constructor stub
35 | }
36 |
37 | /**
38 | * @param message
39 | * @param cause
40 | */
41 | public CommandLineToolException(String message, Throwable cause) {
42 | super(message, cause);
43 | // TODO Auto-generated constructor stub
44 | }
45 |
46 | /**
47 | * @param message
48 | * @param cause
49 | * @param enableSuppression
50 | * @param writableStackTrace
51 | */
52 | public CommandLineToolException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
53 | super(message, cause, enableSuppression, writableStackTrace);
54 | // TODO Auto-generated constructor stub
55 | }
56 |
57 | }
58 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/NucleosomeCallsFileReader.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Path;
5 |
6 | import net.sf.samtools.TabixWriter;
7 | import net.sf.samtools.TabixWriter.Conf;
8 |
9 | import edu.unc.genomics.IntervalFactory;
10 | import edu.unc.genomics.io.TextIntervalFileReader;
11 |
12 | /**
13 | * Read nucleosome calls files
14 | *
15 | * @author timpalpant
16 | *
17 | */
18 | public class NucleosomeCallsFileReader extends TextIntervalFileReader {
19 |
20 | public NucleosomeCallsFileReader(Path p) throws IOException {
21 | super(p, new NucleosomeCallFactory());
22 | }
23 |
24 | public static class NucleosomeCallFactory implements IntervalFactory {
25 |
26 | public static final TabixWriter.Conf NUCLEOSOME_CALL_CONF = new TabixWriter.Conf(0, 1, 2, 3, '#', 0);
27 |
28 | @Override
29 | public NucleosomeCall parse(String line) {
30 | return NucleosomeCall.parse(line);
31 | }
32 |
33 | @Override
34 | public Conf tabixConf() {
35 | return NUCLEOSOME_CALL_CONF;
36 | }
37 |
38 | }
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/PathConverter.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics;
2 |
3 | import java.nio.file.Path;
4 | import java.nio.file.Paths;
5 |
6 | import com.beust.jcommander.IStringConverter;
7 |
8 | /**
9 | * @author timpalpant
10 | *
11 | */
12 | public class PathConverter implements IStringConverter {
13 |
14 | @Override
15 | public Path convert(String value) {
16 | return Paths.get(value);
17 | }
18 |
19 | }
20 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/PathFactory.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package edu.unc.genomics;
5 |
6 | import java.nio.file.Path;
7 |
8 | import com.beust.jcommander.IStringConverterFactory;
9 |
10 | /**
11 | * @author timpalpant
12 | *
13 | */
14 | public class PathFactory implements IStringConverterFactory {
15 | /*
16 | * (non-Javadoc)
17 | *
18 | * @see
19 | * com.beust.jcommander.IStringConverterFactory#getConverter(java.lang.Class)
20 | */
21 | @Override
22 | public Class getConverter(Class forType) {
23 | if (forType.equals(Path.class)) {
24 | return PathConverter.class;
25 | } else {
26 | return null;
27 | }
28 | }
29 |
30 | }
31 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/ReadablePathValidator.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package edu.unc.genomics;
5 |
6 | import java.nio.file.Files;
7 | import java.nio.file.Path;
8 |
9 | import com.beust.jcommander.IParameterValidator;
10 | import com.beust.jcommander.ParameterException;
11 |
12 | /**
13 | * @author timpalpant
14 | *
15 | */
16 | public class ReadablePathValidator implements IParameterValidator {
17 |
18 | /*
19 | * (non-Javadoc)
20 | *
21 | * @see com.beust.jcommander.IParameterValidator#validate(java.lang.String,
22 | * java.lang.String)
23 | */
24 | @Override
25 | public void validate(String name, String value) throws ParameterException {
26 | PathConverter converter = new PathConverter();
27 | Path p = converter.convert(value);
28 | if (!Files.isReadable(p)) {
29 | throw new ParameterException("Parameter " + name + " should be a readable file");
30 | }
31 | }
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/converters/FastqIlluminaToSanger.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.converters;
2 |
3 | import java.io.IOException;
4 | import java.io.PrintWriter;
5 | import java.nio.charset.Charset;
6 | import java.nio.file.Files;
7 | import java.nio.file.Path;
8 |
9 | import net.sf.picard.fastq.FastqReader;
10 | import net.sf.picard.fastq.FastqRecord;
11 |
12 | import org.apache.log4j.Logger;
13 |
14 | import com.beust.jcommander.Parameter;
15 |
16 | import edu.unc.genomics.CommandLineTool;
17 | import edu.unc.genomics.ReadablePathValidator;
18 |
19 | /**
20 | * Converts a FASTQ file with Illumina quality scores (Phred+64) to Sanger
21 | * quality scores (Phred+33)
22 | *
23 | * @author timpalpant
24 | *
25 | */
26 | public class FastqIlluminaToSanger extends CommandLineTool {
27 |
28 | private static final Logger log = Logger.getLogger(FastqIlluminaToSanger.class);
29 |
30 | @Parameter(names = { "-i", "--input" }, description = "Input file (FASTQ, Illumina)", required = true, validateWith = ReadablePathValidator.class)
31 | public Path inputFile;
32 | @Parameter(names = { "-o", "--output" }, description = "Output file (FASTQ, Sanger)", required = true)
33 | public Path outputFile;
34 |
35 | @Override
36 | public void run() throws IOException {
37 | int count = 0;
38 | try (FastqReader reader = new FastqReader(inputFile.toFile());
39 | PrintWriter writer = new PrintWriter(Files.newBufferedWriter(outputFile, Charset.defaultCharset()))) {
40 | for (FastqRecord r : reader) {
41 | writer.print("@");
42 | writer.println(r.getReadHeader());
43 | writer.println(r.getReadString());
44 | writer.print("+");
45 | writer.println(r.getBaseQualityHeader());
46 |
47 | // Convert the quality score to Sanger format
48 | char[] qual = r.getBaseQualityString().toCharArray();
49 | for (int i = 0; i < qual.length; i++) {
50 | qual[i] -= 31;
51 | }
52 | writer.println(qual);
53 |
54 | count++;
55 | }
56 | }
57 |
58 | log.info("Processed " + count + " reads");
59 | }
60 |
61 | public static void main(String[] args) {
62 | new FastqIlluminaToSanger().instanceMain(args);
63 | }
64 |
65 | }
66 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/converters/GeneTrackToBedGraph.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.converters;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Path;
5 |
6 | import org.apache.log4j.Logger;
7 |
8 | import com.beust.jcommander.Parameter;
9 |
10 | import edu.unc.genomics.GeneTrackEntry;
11 | import edu.unc.genomics.CommandLineTool;
12 | import edu.unc.genomics.io.BedGraphFileWriter;
13 | import edu.unc.genomics.io.GeneTrackFileReader;
14 |
15 | /**
16 | * Convert a GeneTrack format file to BedGraph, adding the +/- strand values
17 | *
18 | * @author timpalpant
19 | *
20 | */
21 | public class GeneTrackToBedGraph extends CommandLineTool {
22 |
23 | private static final Logger log = Logger.getLogger(GeneTrackToBedGraph.class);
24 |
25 | @Parameter(names = { "-i", "--input" }, description = "Input file (GeneTrack format)", required = true)
26 | public Path gtFile;
27 | @Parameter(names = { "-o", "--output" }, description = "Output file (BedGraph)", required = true)
28 | public Path outputFile;
29 |
30 | @Override
31 | public void run() throws IOException {
32 | log.debug("Initializing input/output files");
33 | try (GeneTrackFileReader gt = new GeneTrackFileReader(gtFile);
34 | BedGraphFileWriter writer = new BedGraphFileWriter<>(outputFile)) {
35 | for (GeneTrackEntry entry : gt) {
36 | writer.write(entry);
37 | }
38 | }
39 | }
40 |
41 | public static void main(String[] args) {
42 | new GeneTrackToBedGraph().instanceMain(args);
43 | }
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/converters/IntervalToBed.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.converters;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Path;
5 |
6 | import org.apache.log4j.Logger;
7 |
8 | import com.beust.jcommander.Parameter;
9 |
10 | import edu.unc.genomics.CommandLineTool;
11 | import edu.unc.genomics.Interval;
12 | import edu.unc.genomics.io.BedFileWriter;
13 | import edu.unc.genomics.io.IntervalFileReader;
14 |
15 | /**
16 | * Convert any known interval format to Bed-6 format
17 | *
18 | * @author timpalpant
19 | *
20 | */
21 | public class IntervalToBed extends CommandLineTool {
22 |
23 | private static final Logger log = Logger.getLogger(IntervalToBed.class);
24 |
25 | @Parameter(names = { "-i", "--input" }, description = "Input file (Bedgraph/BigBed/GFF/SAM/BAM format)", required = true)
26 | public Path inputFile;
27 | @Parameter(names = { "-o", "--output" }, description = "Output file (Bed format)", required = true)
28 | public Path outputFile;
29 |
30 | @Override
31 | public void run() throws IOException {
32 | log.debug("Initializing input/output files");
33 | try (IntervalFileReader extends Interval> reader = IntervalFileReader.autodetect(inputFile);
34 | BedFileWriter writer = new BedFileWriter<>(outputFile)) {
35 | for (Interval entry : reader) {
36 | writer.write(entry);
37 | }
38 | }
39 | }
40 |
41 | public static void main(String[] args) {
42 | new IntervalToBed().instanceMain(args);
43 | }
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/converters/IntervalToWig.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.converters;
2 |
3 | import java.io.IOException;
4 | import java.util.Iterator;
5 |
6 | import org.apache.log4j.Logger;
7 |
8 | import com.beust.jcommander.Parameter;
9 |
10 | import edu.unc.genomics.CommandLineToolException;
11 | import edu.unc.genomics.Interval;
12 | import edu.unc.genomics.ReadMapperTool;
13 | import edu.unc.genomics.ValuedInterval;
14 | import edu.unc.genomics.io.IntervalFileReader;
15 |
16 | /**
17 | * Convert interval-based data such as microarray data in Bed, BedGraph, or GFF
18 | * format to Wig format. Overlapping probes in the original interval dataset are
19 | * averaged.
20 | *
21 | * @author timpalpant
22 | *
23 | */
24 | public class IntervalToWig extends ReadMapperTool {
25 |
26 | private static final Logger log = Logger.getLogger(IntervalToWig.class);
27 |
28 | @Parameter(names = { "-z", "--zero" }, description = "Assume zero where there is no data (default = NaN)")
29 | public boolean defaultZero = false;
30 |
31 | @Override
32 | public float[] compute(IntervalFileReader extends Interval> reader, Interval chunk) throws IOException {
33 | float[] sum = new float[chunk.length()];
34 | int[] count = new int[chunk.length()];
35 |
36 | Iterator extends Interval> it = reader.query(chunk);
37 | while (it.hasNext()) {
38 | ValuedInterval entry;
39 | try {
40 | entry = (ValuedInterval) it.next();
41 | } catch (ClassCastException e) {
42 | log.error("Input file does not appear to be a valued interval format (Bed/BedGraph/GFF/GeneTrack)!");
43 | throw new CommandLineToolException(
44 | "Input file does not appear to be a valued interval format (Bed/BedGraph/GFF/GeneTrack)!");
45 | }
46 |
47 | if (entry.getValue() != null) {
48 | int entryStart = Math.max(chunk.getStart(), entry.low());
49 | int entryStop = Math.min(chunk.getStop(), entry.high());
50 | for (int i = entryStart; i <= entryStop; i++) {
51 | sum[i - chunk.getStart()] += entry.getValue().floatValue();
52 | count[i - chunk.getStart()]++;
53 | }
54 | }
55 | }
56 |
57 | // Calculate the average at each base pair in the chunk
58 | for (int i = 0; i < sum.length; i++) {
59 | if (count[i] != 0 || !defaultZero) {
60 | sum[i] /= count[i];
61 | }
62 | }
63 |
64 | return sum;
65 | }
66 |
67 | public static void main(String[] args) {
68 | new IntervalToWig().instanceMain(args);
69 | }
70 |
71 | }
72 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/converters/RomanNumeralize.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.converters;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.BufferedWriter;
5 | import java.io.IOException;
6 | import java.nio.charset.Charset;
7 | import java.nio.file.Files;
8 | import java.nio.file.Path;
9 | import java.util.regex.Matcher;
10 | import java.util.regex.Pattern;
11 |
12 | import org.apache.log4j.Logger;
13 |
14 | import com.beust.jcommander.Parameter;
15 |
16 | import edu.unc.genomics.CommandLineTool;
17 | import edu.unc.genomics.ReadablePathValidator;
18 | import edu.unc.utils.RomanNumeral;
19 |
20 | /**
21 | * Convert instances of "chr12" to "chrXII" in a text file, etc.
22 | *
23 | * @author timpalpant
24 | *
25 | */
26 | public class RomanNumeralize extends CommandLineTool {
27 |
28 | private static final Logger log = Logger.getLogger(RomanNumeralize.class);
29 |
30 | @Parameter(names = { "-i", "--input" }, description = "Input file", required = true, validateWith = ReadablePathValidator.class)
31 | public Path inputFile;
32 | @Parameter(names = { "-o", "--output" }, description = "Output file", required = true)
33 | public Path outputFile;
34 |
35 | /**
36 | * Pattern for finding "chr12" tokens (will find "chr1" through "chr99")
37 | */
38 | Pattern p = Pattern.compile("chr[\\d]{1,2}");
39 |
40 | @Override
41 | public void run() throws IOException {
42 | try (BufferedReader reader = Files.newBufferedReader(inputFile, Charset.defaultCharset());
43 | BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) {
44 | log.debug("Copying input to output and replacing with Roman Numerals");
45 | String line;
46 | while ((line = reader.readLine()) != null) {
47 | Matcher m = p.matcher(line);
48 | StringBuffer converted = new StringBuffer(line.length());
49 | while (m.find()) {
50 | String chrNum = line.substring(m.start() + 3, m.end());
51 | int arabic = Integer.parseInt(chrNum);
52 | String roman = RomanNumeral.int2roman(arabic);
53 | m.appendReplacement(converted, "chr" + roman);
54 | }
55 | m.appendTail(converted);
56 |
57 | writer.write(converted.toString());
58 | writer.newLine();
59 | }
60 | }
61 | }
62 |
63 | public static void main(String[] args) {
64 | new RomanNumeralize().instanceMain(args);
65 | }
66 |
67 | }
68 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/ngs/BaseAlignCounts.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.ngs;
2 |
3 | import java.io.IOException;
4 | import java.util.Iterator;
5 |
6 | import com.beust.jcommander.Parameter;
7 |
8 | import edu.unc.genomics.Interval;
9 | import edu.unc.genomics.ReadMapperTool;
10 | import edu.unc.genomics.io.IntervalFileReader;
11 |
12 | /**
13 | * This tool calculates the coverage of sequencing reads (or any interval data)
14 | * and creates a new Wig file with the number of reads overlapping each base
15 | * pair.
16 | *
17 | * @author timpalpant
18 | *
19 | */
20 | public class BaseAlignCounts extends ReadMapperTool {
21 |
22 | @Parameter(names = { "-x", "--extend" }, description = "Extend reads from 5' end (default = fragment length)")
23 | public Integer extend = -1;
24 |
25 | @Override
26 | public float[] compute(IntervalFileReader extends Interval> reader, Interval chunk) throws IOException {
27 | float[] count = new float[chunk.length()];
28 |
29 | // Need to pad the query if extending reads
30 | int paddedStart = chunk.getStart();
31 | int paddedStop = chunk.getStop();
32 | if (extend != null && extend != -1) {
33 | paddedStart = Math.max(chunk.getStart() - extend - 1, 1);
34 | paddedStop = Math.min(chunk.getStop() + extend + 1, assembly.getChrLength(chunk.getChr()));
35 | }
36 |
37 | Iterator extends Interval> it = reader.query(chunk.getChr(), paddedStart, paddedStop);
38 | while (it.hasNext()) {
39 | Interval entry = it.next();
40 | int entryStop = entry.getStop();
41 | if (extend != null && extend != -1) {
42 | if (entry.isWatson()) {
43 | entryStop = entry.getStart() + extend - 1;
44 | } else {
45 | entryStop = entry.getStart() - extend + 1;
46 | }
47 | }
48 |
49 | // Clamp to the current chunk
50 | int low = Math.max(Math.min(entry.getStart(), entryStop), chunk.getStart());
51 | int high = Math.min(Math.max(entry.getStart(), entryStop), chunk.getStop());
52 | for (int i = low; i <= high; i++) {
53 | count[i - chunk.getStart()]++;
54 | }
55 | }
56 |
57 | return count;
58 | }
59 |
60 | public static void main(String[] args) {
61 | new BaseAlignCounts().instanceMain(args);
62 | }
63 | }
--------------------------------------------------------------------------------
/src/edu/unc/genomics/ngs/IntervalLengthDistribution.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.ngs;
2 |
3 | import java.io.IOException;
4 | import java.io.PrintWriter;
5 | import java.nio.charset.Charset;
6 | import java.nio.file.Files;
7 | import java.nio.file.Path;
8 |
9 | import org.apache.commons.math3.stat.Frequency;
10 | import org.apache.log4j.Logger;
11 |
12 | import com.beust.jcommander.Parameter;
13 |
14 | import edu.unc.genomics.CommandLineTool;
15 | import edu.unc.genomics.Interval;
16 | import edu.unc.genomics.ReadablePathValidator;
17 | import edu.unc.genomics.io.IntervalFileReader;
18 |
19 | /**
20 | * Generate a histogram of interval lengths, such as read lengths or gene
21 | * lengths
22 | *
23 | * @author timpalpant
24 | *
25 | */
26 | public class IntervalLengthDistribution extends CommandLineTool {
27 |
28 | private static final Logger log = Logger.getLogger(IntervalLengthDistribution.class);
29 |
30 | @Parameter(names = { "-i", "--input" }, description = "Interval file", required = true, validateWith = ReadablePathValidator.class)
31 | public Path inputFile;
32 | @Parameter(names = { "-f", "--freq" }, description = "Output frequencies rather than counts")
33 | public boolean outputFreq = false;
34 | @Parameter(names = { "-o", "--output" }, description = "Output file", required = true)
35 | public Path outputFile;
36 |
37 | @Override
38 | public void run() throws IOException {
39 | log.debug("Generating histogram of interval lengths");
40 | Frequency freq = new Frequency();
41 | int min = Integer.MAX_VALUE;
42 | int max = -1;
43 | try (IntervalFileReader extends Interval> reader = IntervalFileReader.autodetect(inputFile)) {
44 | for (Interval i : reader) {
45 | int L = i.length();
46 | freq.addValue(L);
47 |
48 | if (L < min) {
49 | min = L;
50 | }
51 | if (L > max) {
52 | max = L;
53 | }
54 | }
55 | }
56 |
57 | log.debug("Writing histogram output");
58 | try (PrintWriter writer = new PrintWriter(Files.newBufferedWriter(outputFile, Charset.defaultCharset()))) {
59 | for (int i = min; i <= max; i++) {
60 | if (outputFreq) {
61 | writer.println(i + "\t" + freq.getPct(i));
62 | } else {
63 | writer.println(i + "\t" + freq.getCount(i));
64 | }
65 | }
66 | }
67 | }
68 |
69 | public static void main(String[] args) {
70 | new IntervalLengthDistribution().instanceMain(args);
71 | }
72 |
73 | }
74 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/ngs/RollingReadLength.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.ngs;
2 |
3 | import java.io.IOException;
4 | import java.util.Iterator;
5 |
6 | import edu.unc.genomics.Interval;
7 | import edu.unc.genomics.ReadMapperTool;
8 | import edu.unc.genomics.io.IntervalFileReader;
9 |
10 | /**
11 | * Creates a new Wig file with the mean read length of reads covering each base
12 | * pair.
13 | *
14 | * @author timpalpant
15 | *
16 | */
17 | public class RollingReadLength extends ReadMapperTool {
18 |
19 | @Override
20 | public float[] compute(IntervalFileReader extends Interval> reader, Interval chunk) throws IOException {
21 | int[] sum = new int[chunk.length()];
22 | int[] count = new int[chunk.length()];
23 |
24 | Iterator extends Interval> it = reader.query(chunk);
25 | while (it.hasNext()) {
26 | Interval entry = it.next();
27 | int entryStart = Math.max(entry.low(), chunk.getStart());
28 | int entryStop = Math.min(entry.high(), chunk.getStop());
29 | for (int i = entryStart; i <= entryStop; i++) {
30 | sum[i - chunk.getStart()] += entry.length();
31 | count[i - chunk.getStart()]++;
32 | }
33 | }
34 |
35 | // Calculate the average at each base pair
36 | float[] avg = new float[chunk.length()];
37 | for (int i = 0; i < avg.length; i++) {
38 | if (count[i] == 0) {
39 | avg[i] = Float.NaN;
40 | } else {
41 | avg[i] = ((float) sum[i]) / count[i];
42 | }
43 | }
44 |
45 | return avg;
46 | }
47 |
48 | public static void main(String[] args) {
49 | new RollingReadLength().instanceMain(args);
50 | }
51 | }
--------------------------------------------------------------------------------
/src/edu/unc/genomics/ngs/SplitReads.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.ngs;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Path;
5 | import java.util.ArrayList;
6 | import java.util.List;
7 |
8 | import org.apache.log4j.Logger;
9 |
10 | import com.beust.jcommander.Parameter;
11 |
12 | import edu.unc.genomics.CommandLineTool;
13 | import edu.unc.genomics.Interval;
14 | import edu.unc.genomics.ReadablePathValidator;
15 | import edu.unc.genomics.io.IntervalFileReader;
16 | import edu.unc.genomics.io.IntervalFileWriter;
17 |
18 | /**
19 | * This tool splits sequencing reads into bins
20 | *
21 | * @author timpalpant
22 | *
23 | */
24 | public class SplitReads extends CommandLineTool {
25 |
26 | private static final Logger log = Logger.getLogger(SplitReads.class);
27 |
28 | @Parameter(names = { "-i", "--input" }, required = true, description = "Input file", validateWith = ReadablePathValidator.class)
29 | public Path input;
30 | @Parameter(names = { "-b", "--bins" }, description = "Number of bins to split reads into")
31 | public int bins = 5;
32 | @Parameter(names = { "-o", "--output" }, description = "Output file")
33 | public Path output;
34 |
35 | @Override
36 | public void run() throws IOException {
37 | // Prepare the outputs
38 | String[] splitName = output.toString().split("\\.(?=[^\\.]+$)");
39 | String basename = splitName[0];
40 | String ext = splitName[1];
41 | List> writers = new ArrayList<>();
42 | try {
43 | for (int i = 0; i < bins; i++) {
44 | Path outFile = output.resolve(basename + '.' + i + '.' + ext);
45 | IntervalFileWriter writer = new IntervalFileWriter<>(outFile);
46 | writers.add(writer);
47 | }
48 |
49 | try (IntervalFileReader extends Interval> reader = IntervalFileReader.autodetect(input)) {
50 | int current = 0;
51 | int count = 0;
52 | for (Interval interval : reader) {
53 | writers.get(current).write(interval);
54 | current++;
55 | current %= bins;
56 | if (++count % 1_000_000 == 0) {
57 | log.debug("Processed " + count + " reads.");
58 | }
59 | }
60 | }
61 | } finally {
62 | for (IntervalFileWriter writer : writers) {
63 | writer.close();
64 | }
65 | }
66 | }
67 |
68 | public static void main(String[] args) {
69 | new SplitReads().instanceMain(args);
70 | }
71 | }
--------------------------------------------------------------------------------
/src/edu/unc/genomics/ngs/Subsample.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.ngs;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Path;
5 | import java.util.Random;
6 |
7 | import org.apache.log4j.Logger;
8 |
9 | import com.beust.jcommander.Parameter;
10 |
11 | import edu.unc.genomics.CommandLineTool;
12 | import edu.unc.genomics.CommandLineToolException;
13 | import edu.unc.genomics.Interval;
14 | import edu.unc.genomics.ReadablePathValidator;
15 | import edu.unc.genomics.io.IntervalFileReader;
16 | import edu.unc.genomics.io.IntervalFileWriter;
17 |
18 | /**
19 | * Randomly select N reads out of a total of M
20 | *
21 | * @author timpalpant
22 | *
23 | */
24 | public class Subsample extends CommandLineTool {
25 |
26 | private static final Logger log = Logger.getLogger(Subsample.class);
27 |
28 | @Parameter(names = { "-i", "--input" }, required = true, description = "Input file", validateWith = ReadablePathValidator.class)
29 | public Path input;
30 | @Parameter(names = { "-n", "--select" }, required = true, description = "Number of entries to select")
31 | public int n;
32 | @Parameter(names = { "-o", "--output" }, required = true, description = "Output file")
33 | public Path output;
34 |
35 | @Override
36 | public void run() throws IOException {
37 | try (IntervalFileReader extends Interval> reader = IntervalFileReader.autodetect(input);
38 | IntervalFileWriter writer = new IntervalFileWriter<>(output)) {
39 | int nRemaining = reader.count();
40 | log.info("Input file has " + nRemaining + " entries");
41 | if (n >= reader.count()) {
42 | throw new CommandLineToolException("Cannot select " + n + " entries from a file with " + nRemaining);
43 | }
44 |
45 | // See http://eyalsch.wordpress.com/2010/04/01/random-sample/
46 | // for a nice summary of different algorithms to randomly pick n entries
47 | log.info("Randomly selecting " + n + " entries");
48 | Random rng = new Random();
49 | for (Interval entry : reader) {
50 | if (n == 0) {
51 | break;
52 | } else if (rng.nextDouble() < ((double) n) / nRemaining) {
53 | writer.write(entry);
54 | n--;
55 | }
56 |
57 | nRemaining--;
58 | }
59 | }
60 | }
61 |
62 | public static void main(String[] args) {
63 | new Subsample().instanceMain(args);
64 | }
65 | }
--------------------------------------------------------------------------------
/src/edu/unc/genomics/nucleosomes/MapDyads.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.nucleosomes;
2 |
3 | import java.io.IOException;
4 | import java.util.Iterator;
5 |
6 | import com.beust.jcommander.Parameter;
7 |
8 | import edu.unc.genomics.Interval;
9 | import edu.unc.genomics.ReadMapperTool;
10 | import edu.unc.genomics.io.IntervalFileReader;
11 |
12 | /**
13 | * Count the number of read centers overlapping each base pair in the genome
14 | *
15 | * @author timpalpant
16 | *
17 | */
18 | public class MapDyads extends ReadMapperTool {
19 |
20 | @Parameter(names = { "-s", "--size" }, description = "Mononucleosome length (default: read length)")
21 | public Integer nucleosomeSize;
22 |
23 | @Override
24 | public float[] compute(IntervalFileReader extends Interval> reader, Interval chunk) throws IOException {
25 | float[] count = new float[chunk.length()];
26 |
27 | // Need to pad the query if artificially shifting read centers
28 | int paddedStart = chunk.getStart();
29 | int paddedStop = chunk.getStop();
30 | if (nucleosomeSize != null && nucleosomeSize > 0) {
31 | paddedStart = Math.max(chunk.getStart() - nucleosomeSize - 1, 1);
32 | paddedStop = Math.min(chunk.getStop() + nucleosomeSize + 1, assembly.getChrLength(chunk.getChr()));
33 | }
34 |
35 | Iterator extends Interval> it = reader.query(chunk.getChr(), paddedStart, paddedStop);
36 | while (it.hasNext()) {
37 | Interval entry = it.next();
38 | int center;
39 | if (nucleosomeSize == null || nucleosomeSize <= 0) {
40 | center = entry.center();
41 | } else {
42 | if (entry.isWatson()) {
43 | center = entry.getStart() + nucleosomeSize / 2;
44 | } else {
45 | center = entry.getStart() - nucleosomeSize / 2;
46 | }
47 | }
48 |
49 | // Only map if it is in the current chunk
50 | if (chunk.getStart() <= center && center <= chunk.getStop()) {
51 | count[center - chunk.getStart()]++;
52 | }
53 | }
54 |
55 | return count;
56 | }
57 |
58 | public static void main(String[] args) {
59 | new MapDyads().instanceMain(args);
60 | }
61 |
62 | }
--------------------------------------------------------------------------------
/src/edu/unc/genomics/visualization/StripMatrix.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.visualization;
2 |
3 | import java.io.BufferedReader;
4 | import java.io.BufferedWriter;
5 | import java.io.IOException;
6 | import java.nio.charset.Charset;
7 | import java.nio.file.Files;
8 | import java.nio.file.Path;
9 |
10 | import com.beust.jcommander.Parameter;
11 |
12 | import edu.unc.genomics.CommandLineTool;
13 | import edu.unc.genomics.ReadablePathValidator;
14 |
15 | /**
16 | * Remove the first column and row (headers) from a matrix in matrix2png format
17 | * so that the output is purely numerical for easy import into Matlab
18 | *
19 | * @author timpalpant
20 | *
21 | */
22 | public class StripMatrix extends CommandLineTool {
23 |
24 | @Parameter(names = { "-i", "--input" }, description = "Input file (matrix2png format)", required = true, validateWith = ReadablePathValidator.class)
25 | public Path inputFile;
26 | @Parameter(names = { "-o", "--output" }, description = "Output file (tabular)", required = true)
27 | public Path outputFile;
28 |
29 | public void run() throws IOException {
30 | try (BufferedReader reader = Files.newBufferedReader(inputFile, Charset.defaultCharset());
31 | BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) {
32 | // Skip the first (header) line
33 | String line = reader.readLine();
34 | while ((line = reader.readLine()) != null) {
35 | String[] row = line.split("\t");
36 | for (int i = 1; i < row.length; i++) {
37 | String cell = row[i];
38 | if (cell.equalsIgnoreCase("-")) {
39 | writer.write("NaN");
40 | } else {
41 | writer.write(cell);
42 | }
43 |
44 | if (i < row.length - 1) {
45 | writer.write("\t");
46 | }
47 | }
48 | writer.newLine();
49 | }
50 | }
51 | }
52 |
53 | public static void main(String[] args) {
54 | new StripMatrix().instanceMain(args);
55 | }
56 | }
--------------------------------------------------------------------------------
/src/edu/unc/genomics/wigmath/Add.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.wigmath;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Paths;
5 | import java.util.ArrayList;
6 | import java.util.List;
7 |
8 | import org.apache.log4j.Logger;
9 |
10 | import com.beust.jcommander.Parameter;
11 |
12 | import edu.unc.genomics.CommandLineToolException;
13 | import edu.unc.genomics.Interval;
14 | import edu.unc.genomics.WigMathTool;
15 | import edu.unc.genomics.io.WigFileReader;
16 | import edu.unc.genomics.io.WigFileException;
17 |
18 | /**
19 | * This tool will add all values in the specified Wig files base pair by base
20 | * pair.
21 | *
22 | * @author timpalpant
23 | *
24 | */
25 | public class Add extends WigMathTool {
26 |
27 | private static final Logger log = Logger.getLogger(Add.class);
28 |
29 | @Parameter(description = "Input files", required = true)
30 | public List inputFiles = new ArrayList();
31 |
32 | @Override
33 | public void setup() {
34 | if (inputFiles.size() < 2) {
35 | throw new CommandLineToolException("No reason to add < 2 files.");
36 | }
37 |
38 | log.debug("Initializing input files");
39 | for (String inputFile : inputFiles) {
40 | try {
41 | addInputFile(WigFileReader.autodetect(Paths.get(inputFile)));
42 | } catch (IOException e) {
43 | throw new CommandLineToolException(e);
44 | }
45 | }
46 | log.debug("Initialized " + inputs.size() + " input files");
47 | }
48 |
49 | @Override
50 | public float[] compute(Interval chunk) throws IOException, WigFileException {
51 | float[] sum = new float[chunk.length()];
52 |
53 | for (WigFileReader wig : inputs) {
54 | float[] data = wig.query(chunk).getValues();
55 | for (int i = 0; i < data.length; i++) {
56 | if (!Float.isNaN(data[i])) {
57 | sum[i] += data[i];
58 | }
59 | }
60 | }
61 |
62 | return sum;
63 | }
64 |
65 | /**
66 | * @param args
67 | * @throws WigFileException
68 | * @throws IOException
69 | */
70 | public static void main(String[] args) throws IOException, WigFileException {
71 | new Add().instanceMain(args);
72 | }
73 |
74 | }
75 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/wigmath/Average.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.wigmath;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Paths;
5 | import java.util.ArrayList;
6 | import java.util.List;
7 |
8 | import org.apache.log4j.Logger;
9 |
10 | import com.beust.jcommander.Parameter;
11 |
12 | import edu.unc.genomics.CommandLineToolException;
13 | import edu.unc.genomics.Interval;
14 | import edu.unc.genomics.WigMathTool;
15 | import edu.unc.genomics.io.WigFileReader;
16 | import edu.unc.genomics.io.WigFileException;
17 |
18 | /**
19 | * Average multiple Wig files base pair by base pair
20 | *
21 | * @author timpalpant
22 | *
23 | */
24 | public class Average extends WigMathTool {
25 |
26 | private static final Logger log = Logger.getLogger(Average.class);
27 |
28 | @Parameter(description = "Input files", required = true)
29 | public List inputFiles = new ArrayList();
30 |
31 | @Override
32 | public void setup() {
33 | if (inputFiles.size() < 2) {
34 | throw new CommandLineToolException("No reason to average < 2 files.");
35 | }
36 |
37 | log.debug("Initializing input files");
38 | for (String inputFile : inputFiles) {
39 | try {
40 | addInputFile(WigFileReader.autodetect(Paths.get(inputFile)));
41 | } catch (IOException e) {
42 | log.error("IOError initializing input Wig file: " + inputFile);
43 | e.printStackTrace();
44 | throw new CommandLineToolException(e.getMessage());
45 | }
46 | }
47 | log.debug("Initialized " + inputs.size() + " input files");
48 | }
49 |
50 | @Override
51 | public float[] compute(Interval chunk) throws IOException, WigFileException {
52 | float[] avg = new float[chunk.length()];
53 | int[] count = new int[chunk.length()];
54 |
55 | for (WigFileReader wig : inputs) {
56 | float[] data = wig.query(chunk).getValues();
57 | for (int i = 0; i < data.length; i++) {
58 | if (!Float.isNaN(data[i])) {
59 | avg[i] += data[i];
60 | count[i]++;
61 | }
62 | }
63 | }
64 |
65 | for (int i = 0; i < avg.length; i++) {
66 | if (count[i] > 0) {
67 | avg[i] /= count[i];
68 | } else {
69 | avg[i] = Float.NaN;
70 | }
71 | }
72 |
73 | return avg;
74 | }
75 |
76 | /**
77 | * @param args
78 | * @throws WigFileException
79 | * @throws IOException
80 | */
81 | public static void main(String[] args) throws IOException, WigFileException {
82 | new Average().instanceMain(args);
83 | }
84 |
85 | }
86 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/wigmath/Divide.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.wigmath;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Path;
5 |
6 | import org.apache.log4j.Logger;
7 |
8 | import com.beust.jcommander.Parameter;
9 |
10 | import edu.unc.genomics.CommandLineToolException;
11 | import edu.unc.genomics.Interval;
12 | import edu.unc.genomics.ReadablePathValidator;
13 | import edu.unc.genomics.WigMathTool;
14 | import edu.unc.genomics.io.WigFileReader;
15 | import edu.unc.genomics.io.WigFileException;
16 |
17 | /**
18 | * Divide two (Big)Wig files base pair by base pair
19 | *
20 | * @author timpalpant
21 | *
22 | */
23 | public class Divide extends WigMathTool {
24 |
25 | private static final Logger log = Logger.getLogger(Divide.class);
26 |
27 | @Parameter(names = { "-n", "--numerator" }, description = "Dividend / Numerator (file 1)", required = true, validateWith = ReadablePathValidator.class)
28 | public Path dividendFile;
29 | @Parameter(names = { "-d", "--denominator" }, description = "Divisor / Denominator (file 2)", required = true)
30 | public Path divisorFile;
31 |
32 | WigFileReader dividendReader, divisorReader;
33 |
34 | @Override
35 | public void setup() {
36 | try {
37 | dividendReader = WigFileReader.autodetect(dividendFile);
38 | divisorReader = WigFileReader.autodetect(divisorFile);
39 | } catch (IOException e) {
40 | throw new CommandLineToolException(e);
41 | }
42 | inputs.add(dividendReader);
43 | inputs.add(divisorReader);
44 | log.debug("Initialized " + inputs.size() + " input files");
45 | }
46 |
47 | @Override
48 | public float[] compute(Interval chunk) throws IOException, WigFileException {
49 | float[] dividend = dividendReader.query(chunk).getValues();
50 | float[] divisor = divisorReader.query(chunk).getValues();
51 | for (int i = 0; i < dividend.length; i++) {
52 | if (divisor[i] == 0) {
53 | dividend[i] = Float.NaN;
54 | } else {
55 | dividend[i] /= divisor[i];
56 | }
57 | }
58 |
59 | return dividend;
60 | }
61 |
62 | /**
63 | * @param args
64 | * @throws WigFileException
65 | * @throws IOException
66 | */
67 | public static void main(String[] args) throws IOException, WigFileException {
68 | new Divide().instanceMain(args);
69 | }
70 |
71 | }
72 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/wigmath/LogTransform.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.wigmath;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Path;
5 |
6 | import com.beust.jcommander.Parameter;
7 |
8 | import edu.unc.genomics.CommandLineToolException;
9 | import edu.unc.genomics.Interval;
10 | import edu.unc.genomics.ReadablePathValidator;
11 | import edu.unc.genomics.WigMathTool;
12 | import edu.unc.genomics.io.WigFileReader;
13 | import edu.unc.genomics.io.WigFileException;
14 |
15 | /**
16 | * Log-transform a (Big)Wig file
17 | *
18 | * @author timpalpant
19 | *
20 | */
21 | public class LogTransform extends WigMathTool {
22 |
23 | @Parameter(names = { "-i", "--input" }, description = "Input file", required = true, validateWith = ReadablePathValidator.class)
24 | public Path inputFile;
25 | @Parameter(names = { "-b", "--base" }, description = "Logarithm base (default = 2)")
26 | public double base = 2;
27 |
28 | WigFileReader reader;
29 | private double baseChange;
30 |
31 | @Override
32 | public void setup() {
33 | baseChange = Math.log(base);
34 |
35 | try {
36 | reader = WigFileReader.autodetect(inputFile);
37 | } catch (IOException e) {
38 | throw new CommandLineToolException(e);
39 | }
40 | inputs.add(reader);
41 | }
42 |
43 | @Override
44 | public float[] compute(Interval chunk) throws IOException, WigFileException {
45 | float[] result = reader.query(chunk).getValues();
46 | for (int i = 0; i < result.length; i++) {
47 | result[i] = (float) (Math.log(result[i]) / baseChange);
48 | }
49 |
50 | return result;
51 | }
52 |
53 | /**
54 | * @param args
55 | * @throws WigFileException
56 | * @throws IOException
57 | */
58 | public static void main(String[] args) throws IOException, WigFileException {
59 | new LogTransform().instanceMain(args);
60 | }
61 |
62 | }
63 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/wigmath/MovingAverageSmooth.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.wigmath;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Path;
5 |
6 | import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
7 |
8 | import com.beust.jcommander.Parameter;
9 |
10 | import edu.unc.genomics.Contig;
11 | import edu.unc.genomics.CommandLineToolException;
12 | import edu.unc.genomics.Interval;
13 | import edu.unc.genomics.ReadablePathValidator;
14 | import edu.unc.genomics.WigMathTool;
15 | import edu.unc.genomics.io.WigFileReader;
16 | import edu.unc.genomics.io.WigFileException;
17 |
18 | import edu.unc.utils.FFTUtils;
19 |
20 | /**
21 | * Smooth a (Big)Wig file with a moving average filter
22 | *
23 | * @author timpalpant
24 | *
25 | */
26 | public class MovingAverageSmooth extends WigMathTool {
27 |
28 | @Parameter(names = { "-i", "--input" }, description = "Input file", required = true, validateWith = ReadablePathValidator.class)
29 | public Path inputFile;
30 | @Parameter(names = { "-w", "--width" }, description = "Width of kernel (bp)")
31 | public int width = 11;
32 |
33 | WigFileReader reader;
34 |
35 | @Override
36 | public void setup() {
37 | try {
38 | reader = WigFileReader.autodetect(inputFile);
39 | } catch (IOException e) {
40 | throw new CommandLineToolException(e);
41 | }
42 | inputs.add(reader);
43 | }
44 |
45 | @Override
46 | public float[] compute(Interval chunk) throws IOException, WigFileException {
47 | // Pad the query so that we can provide values for the ends
48 | int queryStart = Math.max(chunk.getStart() - width / 2, reader.getChrStart(chunk.getChr()));
49 | int queryStop = Math.min(chunk.getStop() + width / 2, reader.getChrStop(chunk.getChr()));
50 | Contig contig = reader.query(chunk.getChr(), queryStart, queryStop);
51 | int nValues = (int) Math.ceil(((float) chunk.length()) / step);
52 | float[] result = new float[nValues];
53 | for (int i = 0; i < result.length; i++) {
54 | float x = 0;
55 | int start, stop, n;
56 | if (step < width) {
57 | n = width;
58 | start = contig.getStart() + i*step + step/2 - width/2;
59 | stop = contig.getStart() + i*step + n;
60 | } else {
61 | n = step;
62 | start = contig.getStart() + i*step;
63 | stop = contig.getStart() + n;
64 | }
65 |
66 | for (int bp = start; bp <= stop; bp++) {
67 | x += contig.get(bp);
68 | }
69 | result[i] = x / n;
70 | }
71 | return result;
72 | }
73 |
74 | public static void main(String[] args) throws IOException, WigFileException {
75 | new MovingAverageSmooth().instanceMain(args);
76 | }
77 |
78 | }
79 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/wigmath/Multiply.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.wigmath;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Paths;
5 | import java.util.ArrayList;
6 | import java.util.Arrays;
7 | import java.util.List;
8 |
9 | import org.apache.log4j.Logger;
10 |
11 | import com.beust.jcommander.Parameter;
12 |
13 | import edu.unc.genomics.CommandLineToolException;
14 | import edu.unc.genomics.Interval;
15 | import edu.unc.genomics.WigMathTool;
16 | import edu.unc.genomics.io.WigFileReader;
17 | import edu.unc.genomics.io.WigFileException;
18 |
19 | /**
20 | * Multiply (Big)Wig files base pair by base pair
21 | *
22 | * @author timpalpant
23 | *
24 | */
25 | public class Multiply extends WigMathTool {
26 |
27 | private static final Logger log = Logger.getLogger(Multiply.class);
28 |
29 | @Parameter(description = "Input files", required = true)
30 | public List inputFiles = new ArrayList();
31 |
32 | @Override
33 | public void setup() {
34 | log.debug("Initializing input files");
35 | for (String inputFile : inputFiles) {
36 | try {
37 | addInputFile(WigFileReader.autodetect(Paths.get(inputFile)));
38 | } catch (IOException e) {
39 | throw new CommandLineToolException(e);
40 | }
41 | }
42 | log.debug("Initialized " + inputs.size() + " input files");
43 | }
44 |
45 | @Override
46 | public float[] compute(Interval chunk) throws IOException, WigFileException {
47 | float[] product = new float[chunk.length()];
48 | Arrays.fill(product, 1);
49 |
50 | for (WigFileReader wig : inputs) {
51 | float[] data = wig.query(chunk).getValues();
52 | for (int i = 0; i < data.length; i++) {
53 | if (!Float.isNaN(data[i])) {
54 | product[i] *= data[i];
55 | }
56 | }
57 | }
58 |
59 | return product;
60 | }
61 |
62 | /**
63 | * @param args
64 | * @throws WigFileException
65 | * @throws IOException
66 | */
67 | public static void main(String[] args) throws IOException, WigFileException {
68 | new Multiply().instanceMain(args);
69 | }
70 |
71 | }
72 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/wigmath/Root.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.wigmath;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Path;
5 |
6 | import com.beust.jcommander.Parameter;
7 |
8 | import edu.unc.genomics.CommandLineToolException;
9 | import edu.unc.genomics.Interval;
10 | import edu.unc.genomics.ReadablePathValidator;
11 | import edu.unc.genomics.WigMathTool;
12 | import edu.unc.genomics.io.WigFileReader;
13 | import edu.unc.genomics.io.WigFileException;
14 |
15 | /**
16 | * Take the nth-root of a (Big)Wig file
17 | *
18 | * @author timpalpant
19 | *
20 | */
21 | public class Root extends WigMathTool {
22 |
23 | @Parameter(names = { "-i", "--input" }, description = "Input file", required = true, validateWith = ReadablePathValidator.class)
24 | public Path inputFile;
25 | @Parameter(names = { "-n", "--root" }, description = "Take the nth root (default = 2)")
26 | public double root = 2;
27 |
28 | WigFileReader reader;
29 | private double baseChange;
30 |
31 | @Override
32 | public void setup() {
33 | try {
34 | reader = WigFileReader.autodetect(inputFile);
35 | } catch (IOException e) {
36 | throw new CommandLineToolException(e);
37 | }
38 | inputs.add(reader);
39 | }
40 |
41 | @Override
42 | public float[] compute(Interval chunk) throws IOException, WigFileException {
43 | float[] result = reader.query(chunk).getValues();
44 | for (int i = 0; i < result.length; i++) {
45 | result[i] = (float) (Math.sqrt(result[i]));
46 | }
47 |
48 | return result;
49 | }
50 |
51 | /**
52 | * @param args
53 | * @throws WigFileException
54 | * @throws IOException
55 | */
56 | public static void main(String[] args) throws IOException, WigFileException {
57 | new Root().instanceMain(args);
58 | }
59 |
60 | }
61 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/wigmath/Shift.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.wigmath;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Path;
5 | import java.util.HashMap;
6 | import java.util.Map;
7 |
8 | import org.apache.log4j.Logger;
9 |
10 | import com.beust.jcommander.Parameter;
11 |
12 | import edu.unc.genomics.CommandLineToolException;
13 | import edu.unc.genomics.Interval;
14 | import edu.unc.genomics.ReadablePathValidator;
15 | import edu.unc.genomics.WigMathTool;
16 | import edu.unc.genomics.io.WigFileReader;
17 | import edu.unc.genomics.io.WigFileException;
18 |
19 | /**
20 | * Shift a Wig file to have a specified mean
21 | *
22 | * @author timpalpant
23 | *
24 | */
25 | public class Shift extends WigMathTool {
26 |
27 | private static final Logger log = Logger.getLogger(Shift.class);
28 |
29 | @Parameter(names = { "-i", "--input" }, description = "Input file", required = true, validateWith = ReadablePathValidator.class)
30 | public Path inputFile;
31 | @Parameter(names = { "-m", "--mean" }, description = "New mean")
32 | public float newMean = 0;
33 | @Parameter(names = { "-b", "--bychr" }, description = "Shift each chromosome individually")
34 | public boolean byChromosome = false;
35 |
36 | WigFileReader reader;
37 | Map shifts = new HashMap<>();
38 |
39 | @Override
40 | public void setup() {
41 | try {
42 | reader = WigFileReader.autodetect(inputFile);
43 | for (String chr : reader.chromosomes()) {
44 | float shift;
45 | if (byChromosome) {
46 | float chrMean = (float) reader.queryStats(chr, reader.getChrStart(chr), reader.getChrStop(chr)).getMean();
47 | log.debug("Mean of " + chr + " = " + chrMean);
48 | shift = newMean - chrMean;
49 | } else {
50 | shift = (float) (newMean - reader.mean());
51 | }
52 | shifts.put(chr, shift);
53 | }
54 | } catch (IOException | WigFileException e) {
55 | throw new CommandLineToolException(e);
56 | }
57 | inputs.add(reader);
58 | }
59 |
60 | @Override
61 | public float[] compute(Interval chunk) throws IOException, WigFileException {
62 | float[] result = reader.query(chunk).getValues();
63 | float shift = shifts.get(chunk.getChr());
64 | for (int i = 0; i < result.length; i++) {
65 | result[i] += shift;
66 | }
67 |
68 | return result;
69 | }
70 |
71 | /**
72 | * @param args
73 | * @throws WigFileException
74 | * @throws IOException
75 | */
76 | public static void main(String[] args) throws IOException, WigFileException {
77 | new Shift().instanceMain(args);
78 | }
79 |
80 | }
81 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/wigmath/StandardDeviation.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.wigmath;
2 |
3 | import java.io.IOException;
4 | import java.nio.file.Paths;
5 | import java.util.ArrayList;
6 | import java.util.List;
7 |
8 | import org.apache.commons.math3.stat.descriptive.SummaryStatistics;
9 | import org.apache.log4j.Logger;
10 |
11 | import com.beust.jcommander.Parameter;
12 |
13 | import edu.unc.genomics.CommandLineToolException;
14 | import edu.unc.genomics.Interval;
15 | import edu.unc.genomics.WigMathTool;
16 | import edu.unc.genomics.io.WigFileReader;
17 | import edu.unc.genomics.io.WigFileException;
18 |
19 | /**
20 | * Calculate base pair by base pair variance for a set of Wig files
21 | *
22 | * @author timpalpant
23 | *
24 | */
25 | public class StandardDeviation extends WigMathTool {
26 |
27 | private static final Logger log = Logger.getLogger(StandardDeviation.class);
28 |
29 | @Parameter(description = "Input files", required = true)
30 | public List inputFiles = new ArrayList();
31 |
32 | @Override
33 | public void setup() {
34 | if (inputFiles.size() < 2) {
35 | throw new CommandLineToolException("Cannot compute variance with < 2 files.");
36 | }
37 |
38 | log.debug("Initializing input files");
39 | for (String inputFile : inputFiles) {
40 | try {
41 | addInputFile(WigFileReader.autodetect(Paths.get(inputFile)));
42 | } catch (IOException e) {
43 | log.error("IOError initializing input Wig file: " + inputFile);
44 | e.printStackTrace();
45 | throw new CommandLineToolException(e.getMessage());
46 | }
47 | }
48 | log.debug("Initialized " + inputs.size() + " input files");
49 | }
50 |
51 | @Override
52 | public float[] compute(Interval chunk) throws IOException, WigFileException {
53 | SummaryStatistics[] stats = new SummaryStatistics[chunk.length()];
54 | for (int i = 0; i < stats.length; i++) {
55 | stats[i] = new SummaryStatistics();
56 | }
57 |
58 | for (WigFileReader wig : inputs) {
59 | float[] data = wig.query(chunk).getValues();
60 | for (int i = 0; i < data.length; i++) {
61 | if (!Float.isNaN(data[i])) {
62 | stats[i].addValue(data[i]);
63 | }
64 | }
65 | }
66 |
67 | float[] result = new float[chunk.length()];
68 | for (int i = 0; i < result.length; i++) {
69 | result[i] = (float) stats[i].getStandardDeviation();
70 | }
71 | return result;
72 | }
73 |
74 | /**
75 | * @param args
76 | * @throws WigFileException
77 | * @throws IOException
78 | */
79 | public static void main(String[] args) throws IOException, WigFileException {
80 | new StandardDeviation().instanceMain(args);
81 | }
82 |
83 | }
84 |
--------------------------------------------------------------------------------
/src/edu/unc/genomics/wigmath/Summary.java:
--------------------------------------------------------------------------------
1 | package edu.unc.genomics.wigmath;
2 |
3 | import java.io.BufferedWriter;
4 | import java.io.IOException;
5 | import java.nio.charset.Charset;
6 | import java.nio.file.Files;
7 | import java.nio.file.Path;
8 |
9 | import org.apache.log4j.Logger;
10 |
11 | import com.beust.jcommander.Parameter;
12 |
13 | import edu.unc.genomics.CommandLineTool;
14 | import edu.unc.genomics.ReadablePathValidator;
15 | import edu.unc.genomics.io.WigFileReader;
16 | import edu.unc.genomics.io.WigFileException;
17 | import edu.unc.genomics.ngs.Autocorrelation;
18 |
19 | /**
20 | * Output a summary of a (Big)Wig file with information about the chromosomes,
21 | * contigs, and statistics about the data.
22 | *
23 | * @author timpalpant
24 | *
25 | */
26 | public class Summary extends CommandLineTool {
27 |
28 | private static final Logger log = Logger.getLogger(Autocorrelation.class);
29 |
30 | @Parameter(names = { "-i", "--input" }, description = "Input file", required = true, validateWith = ReadablePathValidator.class)
31 | public Path inputFile;
32 | @Parameter(names = { "-o", "--output" }, description = "Output file")
33 | public Path outputFile;
34 |
35 | public void run() throws IOException {
36 | try (WigFileReader reader = WigFileReader.autodetect(inputFile)) {
37 | String summary = reader.toString();
38 |
39 | if (outputFile != null) {
40 | log.debug("Writing to output file");
41 | try (BufferedWriter writer = Files.newBufferedWriter(outputFile, Charset.defaultCharset())) {
42 | writer.write(summary);
43 | }
44 | } else {
45 | System.out.println(summary);
46 | }
47 | }
48 | }
49 |
50 | public static void main(String[] args) throws IOException, WigFileException {
51 | new Summary().instanceMain(args);
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/src/edu/unc/utils/ArrayScaler.java:
--------------------------------------------------------------------------------
1 | package edu.unc.utils;
2 |
3 | import org.apache.commons.math3.analysis.UnivariateFunction;
4 | import org.apache.commons.math3.analysis.interpolation.SplineInterpolator;
5 | import org.apache.commons.math3.analysis.interpolation.UnivariateInterpolator;
6 |
7 | /**
8 | * Generate scaled versions of an array with different resolution Can be used
9 | * for downsampling/upsampling a 1D array using interpolation Interpolation
10 | * routines are from Apache commons-math3
11 | *
12 | * @author timpalpant
13 | *
14 | */
15 | public class ArrayScaler {
16 |
17 | private UnivariateFunction interp;
18 |
19 | /**
20 | * Create a new ArrayScaler
21 | *
22 | * @param x
23 | * the seed array to downsample/upsample
24 | */
25 | public ArrayScaler(double[] x) {
26 | double[] indices = new double[x.length];
27 | for (int i = 0; i < indices.length; i++) {
28 | indices[i] = ((double) i) / (x.length - 1);
29 | }
30 |
31 | UnivariateInterpolator interpolator = new SplineInterpolator();
32 | interp = interpolator.interpolate(indices, x);
33 | }
34 |
35 | /**
36 | * Interpolate to create a new scaled vector of length l
37 | *
38 | * @param l
39 | * the desired vector length
40 | * @return a new vector of length l created by interpolating x
41 | */
42 | public double[] getScaled(int l) {
43 | double[] stretched = new double[l];
44 | for (int i = 0; i < l; i++) {
45 | stretched[i] = interp.value(((double) i) / l);
46 | }
47 | return stretched;
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/edu/unc/utils/ArrayUtils.java:
--------------------------------------------------------------------------------
1 | package edu.unc.utils;
2 |
3 | /**
4 | * Miscellaneous utility functions for working with arrays
5 | *
6 | * @author timpalpant
7 | *
8 | */
9 | public class ArrayUtils {
10 |
11 | /**
12 | * Get the index of the maximum (largest) value in an array In the event of a
13 | * tie, the first index is returned
14 | *
15 | * @param x
16 | * a vector of values
17 | * @return the index of the largest element in x
18 | */
19 | public static int maxIndex(float[] x) {
20 | float maxValue = -Float.MAX_VALUE;
21 | int maxIndex = -1;
22 | for (int i = 0; i < x.length; i++) {
23 | if (x[i] > maxValue) {
24 | maxValue = x[i];
25 | maxIndex = i;
26 | }
27 | }
28 |
29 | return maxIndex;
30 | }
31 |
32 | public static float[] mapToFloat(int[] data) {
33 | float[] ret = new float[data.length];
34 | for (int i = 0; i < data.length; i++) {
35 | ret[i] = data[i];
36 | }
37 | return ret;
38 | }
39 |
40 | public static int[] mapToInt(float[] data) {
41 | int[] ret = new int[data.length];
42 | for (int i = 0; i < data.length; i++) {
43 | ret[i] = (int) data[i];
44 | }
45 | return ret;
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/src/edu/unc/utils/FFTUtils.java:
--------------------------------------------------------------------------------
1 | package edu.unc.utils;
2 |
3 | import org.jtransforms.fft.FloatFFT_1D;
4 |
5 | /**
6 | * Routines for working with Fourier transforms and frequency spectrum data
7 | *
8 | * @author timpalpant
9 | *
10 | */
11 | public class FFTUtils {
12 | /**
13 | * Computes the power spectrum from FFT data taking into account even/odd
14 | * length arrays refer to JTransforms documentation for layout of the FFT data
15 | *
16 | * @param f
17 | * the DFT-transformed data from JTransforms.realForward()
18 | * @return the power spectrum of the complex frequency spectrum in f
19 | */
20 | public static float[] abs2(float[] f) {
21 | int n = f.length;
22 | float[] ps = new float[n / 2 + 1];
23 | // DC component
24 | ps[0] = (f[0] * f[0]) / (n * n);
25 |
26 | // Even
27 | if (n % 2 == 0) {
28 | for (int k = 1; k < n / 2; k++) {
29 | ps[k] = f[2 * k] * f[2 * k] + f[2 * k + 1] * f[2 * k + 1];
30 | }
31 | ps[n / 2] = f[1] * f[1];
32 | // Odd
33 | } else {
34 | for (int k = 1; k < (n - 1) / 2; k++) {
35 | ps[k] = f[2 * k] * f[2 * k] + f[2 * k + 1] * f[2 * k + 1];
36 | }
37 |
38 | ps[(n - 1) / 2] = f[n - 1] * f[n - 1] + f[1] * f[1];
39 | }
40 |
41 | return ps;
42 | }
43 |
44 | /**
45 | * Computes the autocovariance of the data in f
46 | *
47 | * @param x
48 | * a vector of real data
49 | * @param maxShift
50 | * the maximum phase shift to calculate
51 | * @return the autocovariance values, having length Math.min(x.length,
52 | * maxShift)
53 | */
54 | public static float[] autocovariance(float[] x, int maxShift) {
55 | float total = 0;
56 | for (int i = 0; i < x.length; i++) {
57 | total += x[i];
58 | }
59 | float mean = total / x.length;
60 |
61 | int stop = Math.min(x.length, maxShift);
62 | float[] auto = new float[stop];
63 | for (int i = 0; i < stop; i++) {
64 | for (int j = 0; j < x.length - i; j++) {
65 | auto[i] += (x[j] - mean) * (x[j + i] - mean);
66 | }
67 | }
68 |
69 | return auto;
70 | }
71 |
72 | /**
73 | * Computes the autocovariance of the data in f for all possible shifts
74 | *
75 | * @param x
76 | * a vector of real data
77 | * @return the autocovariance values, having length equal to x.length
78 | */
79 | public static float[] autocovariance(float[] x) {
80 | return autocovariance(x, x.length);
81 | }
82 |
83 | //public static float[] convolve(float[] x, float[] y) {
84 | // TODO
85 | //}
86 |
87 | }
88 |
--------------------------------------------------------------------------------
/src/edu/unc/utils/FloatCorrelation.java:
--------------------------------------------------------------------------------
1 | package edu.unc.utils;
2 |
3 | /**
4 | * Some basic routines for calculating correlation coefficients with
5 | * single-precision float[] arrays since commons-math only works with double[]
6 | * arrays
7 | *
8 | * @author timpalpant
9 | *
10 | */
11 | public class FloatCorrelation {
12 | /**
13 | * Calculate Pearson's product-moment correlation coefficient (R) between x,y
14 | * data
15 | *
16 | * @param x
17 | * a vector of values
18 | * @param y
19 | * a vector of values
20 | * @return the Pearson correlation between the values in x and the values in y
21 | */
22 | public static float pearson(float[] x, float[] y) {
23 | if (x.length != y.length) {
24 | throw new RuntimeException("Length of x (" + x.length + ") does not equal length of y (" + y.length + ")");
25 | }
26 |
27 | int N = 0;
28 | double sumX = 0, sumY = 0;
29 | double sumSqX = 0, sumSqY = 0;
30 | double sumXY = 0;
31 | for (int i = 0; i < x.length; i++) {
32 | // Skip NaN / Infinity values in the correlation calculation
33 | if (!Float.isNaN(x[i]) && !Float.isInfinite(x[i]) && !Float.isNaN(y[i]) && !Float.isInfinite(y[i])) {
34 | N++;
35 | sumX += x[i];
36 | sumY += y[i];
37 | sumSqX += x[i] * x[i];
38 | sumSqY += y[i] * y[i];
39 | sumXY += x[i] * y[i];
40 | }
41 | }
42 |
43 | return (float) ((N * sumXY - sumX * sumY) / Math.sqrt(N * sumSqX - sumX * sumX) / Math.sqrt(N * sumSqY - sumY
44 | * sumY));
45 | }
46 |
47 | /**
48 | * Calculate Spearman's rank correlation coefficient between x,y data defined
49 | * to be the Pearson correlation between the ranks of the data
50 | *
51 | * @param x
52 | * a vector of values
53 | * @param y
54 | * a vector of values
55 | * @return the Spearman correlation between the values in x and the values in
56 | * y
57 | */
58 | public static float spearman(float[] x, float[] y) {
59 | if (x.length != y.length) {
60 | throw new RuntimeException("Length of x (" + x.length + ") does not equal length of y (" + y.length + ")");
61 | }
62 |
63 | // Compute the ranking of x and y
64 | float[] rankX = ArrayUtils.mapToFloat(SortUtils.rank(x));
65 | float[] rankY = ArrayUtils.mapToFloat(SortUtils.rank(y));
66 |
67 | return pearson(rankX, rankY);
68 | }
69 |
70 | }
71 |
--------------------------------------------------------------------------------
/src/edu/unc/utils/FloatHistogram.java:
--------------------------------------------------------------------------------
1 | package edu.unc.utils;
2 |
3 | import java.util.Arrays;
4 |
5 | /**
6 | * @author timpalpant Adapted from:
7 | * http://www.particle.kth.se/~fmi/kurs/PhysicsSimulation
8 | * /Lectures/11B/Examples/Experiment/Histogram.java A simple histogram
9 | * class. The setData(float f) finds in which bin the value falls for
10 | * nBins between the given minimum and maximum values. An integer array
11 | * keeps track of the number of times the input value fell into a
12 | * particular bin.
13 | */
14 | public class FloatHistogram {
15 |
16 | int[] bins = null;
17 | int nBins;
18 | double xLow, xHigh;
19 | double delBin;
20 |
21 | int overFlows = 0, underFlows = 0;
22 |
23 | public FloatHistogram(int nBins, double xLow, double xHigh) {
24 |
25 | this.nBins = nBins;
26 | this.xLow = xLow;
27 | this.xHigh = xHigh;
28 |
29 | bins = new int[nBins];
30 | delBin = (xHigh - xLow) / (float) nBins;
31 |
32 | reset();
33 | }
34 |
35 | public void addValue(double data) {
36 | if (data < xLow) {
37 | underFlows++;
38 | } else if (data >= xHigh) {
39 | overFlows++;
40 | } else {
41 | int bin = (int) ((data - xLow) / delBin);
42 | if (bin >= 0 && bin < nBins) {
43 | bins[bin]++;
44 | }
45 | }
46 | }
47 |
48 | public int[] getHistogram() {
49 | return bins;
50 | }
51 |
52 | public double getBinSize() {
53 | return delBin;
54 | }
55 |
56 | public void reset() {
57 | Arrays.fill(bins, 0);
58 | underFlows = 0;
59 | overFlows = 0;
60 | }
61 |
62 | public String toString() {
63 | StringBuilder sb = new StringBuilder();
64 | sb.append("<" + xLow + "\t" + underFlows + "\n");
65 | for (int i = 0; i < bins.length; i++) {
66 | sb.append(xLow + i * delBin + "\t" + bins[i] + "\n");
67 | }
68 | sb.append(">" + xHigh + "\t" + overFlows);
69 | return sb.toString();
70 | }
71 |
72 | }
73 |
--------------------------------------------------------------------------------
/src/edu/unc/utils/RomanNumeral.java:
--------------------------------------------------------------------------------
1 | package edu.unc.utils;
2 |
3 | /**
4 | * Rudimentary Class for doing Arabic Integer -> Roman Numeral conversion
5 | *
6 | * @author timpalpant
7 | * @author Fred Swartz
8 | *
9 | */
10 | public class RomanNumeral {
11 |
12 | final static RomanValue[] ROMAN_VALUE_TABLE = { new RomanValue(1000, "M"), new RomanValue(900, "CM"),
13 | new RomanValue(500, "D"), new RomanValue(400, "CD"), new RomanValue(100, "C"), new RomanValue(90, "XC"),
14 | new RomanValue(50, "L"), new RomanValue(40, "XL"), new RomanValue(10, "X"), new RomanValue(9, "IX"),
15 | new RomanValue(5, "V"), new RomanValue(4, "IV"), new RomanValue(1, "I") };
16 |
17 | /**
18 | * Convert an int to Roman numeral
19 | *
20 | * @param n
21 | * an integer between 1-3999
22 | * @return n as a Roman numeral
23 | */
24 | public static String int2roman(int n) {
25 | if (n >= 4000 || n < 1) {
26 | throw new NumberFormatException("Numbers must be in range 1-3999");
27 | }
28 |
29 | // ... Start with largest value, and work toward smallest.
30 | StringBuilder result = new StringBuilder(10);
31 | for (RomanValue equiv : ROMAN_VALUE_TABLE) {
32 | // ... Remove as many of this value as possible (maybe none).
33 | while (n >= equiv.intVal) {
34 | n -= equiv.intVal; // Subtract value.
35 | result.append(equiv.romVal); // Add roman equivalent.
36 | }
37 | }
38 |
39 | return result.toString();
40 | }
41 |
42 | private static class RomanValue {
43 | // ... No need to make this fields private because they are
44 | // used only in this private value class.
45 | int intVal; // Integer value.
46 | String romVal; // Equivalent roman numeral.
47 |
48 | RomanValue(int dec, String rom) {
49 | this.intVal = dec;
50 | this.romVal = rom;
51 | }
52 | }
53 | }
--------------------------------------------------------------------------------
/src/edu/unc/utils/Samtools.java:
--------------------------------------------------------------------------------
1 | package edu.unc.utils;
2 |
3 | import java.nio.file.Path;
4 |
5 | import org.apache.log4j.Logger;
6 |
7 | import net.sf.picard.reference.IndexedFastaSequenceFile;
8 |
9 | /**
10 | * Helper methods for calling the samtools executable externally Note: If at all
11 | * possible, these should be avoided since they require the user to have
12 | * samtools installed and available in the PATH
13 | *
14 | * It is preferred to use functionality in SAM-JDK / Picard
15 | *
16 | * @author timpalpant
17 | *
18 | */
19 | public class Samtools {
20 |
21 | private static final Logger log = Logger.getLogger(Samtools.class);
22 |
23 | /**
24 | * Index a FASTA file with 'samtools faidx'
25 | *
26 | * @param p
27 | * the FASTA file to index
28 | * @throws Exception
29 | * if the index is not created successfully
30 | */
31 | public static void indexFasta(Path p) throws Exception {
32 | log.debug("Attempting to generate FASTA index by calling 'samtools faidx'");
33 |
34 | try {
35 | Process proc = new ProcessBuilder("samtools", "faidx", p.toString()).start();
36 | proc.waitFor();
37 | } catch (Exception e) {
38 | log.error("Error attempting to call 'samtools faidx'. Is samtools available in the PATH?");
39 | } finally {
40 | if (!IndexedFastaSequenceFile.canCreateIndexedFastaReader(p.toFile())) {
41 | log.error("Could not create FASTA index for file " + p);
42 | throw new Exception("Could not create FASTA index for file " + p);
43 | }
44 | }
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/src/edu/unc/utils/SequenceUtils.java:
--------------------------------------------------------------------------------
1 | package edu.unc.utils;
2 |
3 | import net.sf.samtools.util.SequenceUtil;
4 |
5 | /**
6 | * Helper methods for working with sequence data
7 | *
8 | * @author timpalpant
9 | *
10 | */
11 | public class SequenceUtils {
12 | /**
13 | * Search for the next index of a subsequence in a larger sequence, allowing
14 | * mismatches
15 | *
16 | * @param bases
17 | * the sequence to search in
18 | * @param nmer
19 | * the nmer to search for
20 | * @param allowedMismatches
21 | * the number of mismatches allowed
22 | * @param fromIndex
23 | * the index to start searching at
24 | * @return the index of the next match of nmer in bases, or -1 if no matches
25 | * are found
26 | */
27 | public static int indexOf(byte[] bases, byte[] nmer, int allowedMismatches, int fromIndex) {
28 | for (int i = fromIndex; i < bases.length - nmer.length; i++) {
29 | int mismatches = 0;
30 | for (int j = 0; j < nmer.length; j++) {
31 | if (!SequenceUtil.basesEqual(bases[i + j], nmer[j])) {
32 | if (++mismatches > allowedMismatches) {
33 | break;
34 | }
35 | }
36 | }
37 |
38 | // If we found one at this position, return the index
39 | if (mismatches <= allowedMismatches) {
40 | return i;
41 | }
42 | }
43 |
44 | return -1;
45 | }
46 |
47 | /**
48 | * Search for the next index of a subsequence in a larger sequence, allowing
49 | * mismatches
50 | *
51 | * @param bases
52 | * the sequence to search in
53 | * @param nmer
54 | * the nmer to search for
55 | * @param allowedMismatches
56 | * the number of mismatches to allow
57 | * @return the index of the next match of nmer in bases, or -1 if no matches
58 | * are found
59 | */
60 | public static int indexOf(byte[] bases, byte[] nmer, int allowedMismatches) {
61 | return indexOf(bases, nmer, allowedMismatches, 0);
62 | }
63 |
64 | /**
65 | * Search for the next index of a subsequence in a larger sequence, with no
66 | * mismatches
67 | *
68 | * @param bases
69 | * the sequence to search in
70 | * @param nmer
71 | * the nmer to search for
72 | * @return the index of the next match of nmer in bases, or -1 if no matches
73 | * are found
74 | */
75 | public static int indexOf(byte[] bases, byte[] nmer) {
76 | return indexOf(bases, nmer, 0);
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/src/edu/unc/utils/SortUtils.java:
--------------------------------------------------------------------------------
1 | package edu.unc.utils;
2 |
3 | /**
4 | * Custom sorting utilities see:
5 | * http://stackoverflow.com/questions/951848/java-array
6 | * -sort-quick-way-to-get-a-sorted-list-of-indices-of-an-array
7 | *
8 | * @author timpalpant
9 | *
10 | */
11 | public class SortUtils {
12 | /**
13 | * Sort an array in ascending order, but return the index of each sorted
14 | * element in the original array
15 | *
16 | * @param main
17 | * an array to sort in ascending order
18 | * @return the index of each sorted element in main
19 | */
20 | public static int[] sortIndices(float[] main) {
21 | int[] index = new int[main.length];
22 | for (int i = 0; i < index.length; i++) {
23 | index[i] = i;
24 | }
25 |
26 | quicksort(main, index, 0, index.length - 1);
27 |
28 | return index;
29 | }
30 |
31 | /**
32 | * Return the rank (in ascending order) of each element in an array
33 | *
34 | * @param main
35 | * an array to rank
36 | * @return the rank of each element in main
37 | */
38 | public static int[] rank(float[] main) {
39 | int[] sortedIndices = sortIndices(main);
40 | int[] rank = new int[main.length];
41 | for (int i = 0; i < rank.length; i++) {
42 | rank[sortedIndices[i]] = i + 1;
43 | }
44 |
45 | return rank;
46 | }
47 |
48 | // quicksort a[left] to a[right]
49 | private static void quicksort(float[] a, int[] index, int left, int right) {
50 | if (right <= left) {
51 | return;
52 | }
53 |
54 | int i = partition(a, index, left, right);
55 | quicksort(a, index, left, i - 1);
56 | quicksort(a, index, i + 1, right);
57 | }
58 |
59 | // partition a[left] to a[right], assumes left < right
60 | private static int partition(float[] a, int[] index, int left, int right) {
61 | int i = left - 1;
62 | int j = right;
63 | while (true) {
64 | // find item on left to swap
65 | while (a[index[++i]] < a[index[right]])
66 | ; // a[right] acts as sentinel
67 | // find item on right to swap
68 | while (a[index[right]] < a[index[--j]]) {
69 | // don't go out-of-bounds
70 | if (j == left) {
71 | break;
72 | }
73 | }
74 |
75 | // check if pointers cross
76 | if (i >= j) {
77 | break;
78 | }
79 |
80 | swap(a, index, i, j); // swap two elements into place
81 | }
82 |
83 | swap(a, index, i, right); // swap with partition element
84 | return i;
85 | }
86 |
87 | // exchange a[i] and a[j]
88 | private static void swap(float[] a, int[] index, int i, int j) {
89 | int tmp = index[i];
90 | index[i] = index[j];
91 | index[j] = tmp;
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/src/edu/unc/utils/WigStatistic.java:
--------------------------------------------------------------------------------
1 | package edu.unc.utils;
2 |
3 | /**
4 | * An enumeration of the statistics that we know how to compute on Wig data
5 | * These correspond to the statistics that are built into the UCSC BigWig tools
6 | *
7 | * @author timpalpant
8 | *
9 | */
10 | public enum WigStatistic {
11 | COVERAGE("coverage"), TOTAL("total"), MEAN("mean"), MIN("min"), MAX("max");
12 |
13 | private String name;
14 |
15 | WigStatistic(final String name) {
16 | this.name = name;
17 | }
18 |
19 | public static WigStatistic fromName(final String name) {
20 | for (WigStatistic dsm : WigStatistic.values()) {
21 | if (dsm.getName().equalsIgnoreCase(name)) {
22 | return dsm;
23 | }
24 | }
25 |
26 | return null;
27 | }
28 |
29 | /**
30 | * @return the name
31 | */
32 | public String getName() {
33 | return name;
34 | }
35 | }
--------------------------------------------------------------------------------
/src/log4j.properties:
--------------------------------------------------------------------------------
1 | log4j.rootLogger=debug, stdout
2 |
3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
5 |
6 | # Pattern to output the caller's file name and line number.
7 | log4j.appender.stdout.layout.ConversionPattern=%5p [%t] (%F:%L) - %m%n
8 |
9 | # Only output errors from the BigWig library
10 | log4j.logger.org.broad.igv.bbfile=ERROR
11 | # Only output info from java-genomics-io
12 | log4j.logger.edu.unc.genomics.io=DEBUG
13 | log4j.logger.edu.unc.genomics.util=INFO
14 | log4j.logger.edu.ucsc.genome=ERROR
--------------------------------------------------------------------------------
/test-data/baseAlignCounts1.wig:
--------------------------------------------------------------------------------
1 | track type=wiggle_0 name='Converted test.sam' description='Converted test.sam'
2 | fixedStep chrom=2micron start=1 step=1 span=1
3 |
--------------------------------------------------------------------------------
/test-data/bedGraphToWig.input.bedGraph:
--------------------------------------------------------------------------------
1 | chrI 9 14 10.0
2 | chrI 14 99 2.6
3 | chrII 19 24 6.0
4 | chrII 24 29 2.0
5 | chrIII 14 19 2.0
6 | chrIV 0 1 12.0
7 | chrIV 1 2 1.0
8 | chrIV 2 3 3.0
9 | chrIV 3 4 1.2
10 | chrIV 4 5 10.0
11 |
--------------------------------------------------------------------------------
/test-data/divide1.wig:
--------------------------------------------------------------------------------
1 | track type=wiggle_0
2 | fixedStep chrom=2micron start=100 span=1 step=1
3 | 0.96153849
4 | 0.88235289
5 | NaN
6 | NaN
7 | NaN
8 | 2
9 | NaN
10 | NaN
11 | NaN
12 | NaN
13 | 0.625
14 | 0.95238101
15 | fixedStep chrom=chrI start=1 span=1 step=1
16 | 0.5
17 | 0.66666669
18 | 0.75
19 | 0.80000001
20 | 0.83333331
21 | 0.85714287
22 | 0.875
23 | 0.8888889
24 | 0.89999998
25 | 0.90909094
26 | 0.91666669
27 | 0.92307693
28 | 0.9285714
29 | 0.93333334
30 | 0.9375
31 | fixedStep chrom=chrXI start=25 span=4 step=5
32 | 1
33 | 2
34 | 9
35 | 0
36 | 1
37 | 11
38 | 0.5
39 | 1.66666663
40 | 3.5
41 | 8
42 | 4.5
43 | 0.22727273
44 | 0.66666669
45 | 1.5
46 | 0.2
47 | 0.25
48 | 0.66666669
49 | 1
50 | 1
51 | 1
52 | NaN
53 | 13
54 | 1.25
55 | 1.125
56 | 0.39285713
57 |
--------------------------------------------------------------------------------
/test-data/divide2.wig:
--------------------------------------------------------------------------------
1 | track type=wiggle_0
2 | fixedStep chrom=2micron start=100 span=1 step=1
3 | 5
4 | 2.60869575
5 | NaN
6 | NaN
7 | NaN
8 | 0.66666669
9 | NaN
10 | NaN
11 | NaN
12 | NaN
13 | NaN
14 | NaN
15 | fixedStep chrom=chrI start=1 span=1 step=1
16 | 0.33333334
17 | 0.5
18 | 0.60000002
19 | 0.66666669
20 | 0.71428573
21 | 0.75
22 | 0.77777779
23 | 0.80000001
24 | 0.81818181
25 | 0.83333331
26 | 0.84615386
27 | 0.85714287
28 | 0.86666667
29 | 0.875
30 | 0.88235295
31 | fixedStep chrom=chrXI start=20 span=4 step=5
32 | 0
33 | 0.75
34 | 0.5714286
35 | 9
36 | 0
37 | 1.20000005
38 | 11
39 | 0.04477612
40 | 0.09259259
41 | 1.39999998
42 | 1
43 | 1
44 | 2.5
45 | 1.5
46 | 0.5
47 | 0.11111111
48 | 0.01298701
49 | 0.2857143
50 | 0.23076923
51 | 0.16
52 | 1.25
53 | 0.11111111
54 | 13
55 | 3
56 | 2.25
57 | 11
58 |
--------------------------------------------------------------------------------
/test-data/divide3.wig:
--------------------------------------------------------------------------------
1 | track type=wiggle_0
2 | fixedStep chrom=2micron start=100 span=1 step=1
3 | 0.1923077
4 | 0.33823529
5 | NaN
6 | NaN
7 | NaN
8 | 3
9 | NaN
10 | NaN
11 | NaN
12 | NaN
13 | 0
14 | 0
15 | fixedStep chrom=chrI start=1 span=1 step=1
16 | 1.5
17 | 1.33333337
18 | 1.25
19 | 1.20000005
20 | 1.16666663
21 | 1.14285719
22 | 1.125
23 | 1.11111116
24 | 1.10000002
25 | 1.09090912
26 | 1.08333337
27 | 1.07692313
28 | 1.07142854
29 | 1.06666672
30 | 1.0625
31 | fixedStep chrom=chrXI start=25 span=4 step=5
32 | 1.33333337
33 | 3.5
34 | 1
35 | 0.03092784
36 | 0.83333331
37 | 1
38 | 11.16666698
39 | 18
40 | 2.5
41 | 8
42 | 4.5
43 | 0.09090909
44 | 0.44444445
45 | 3
46 | 1.79999995
47 | 19.25
48 | 2.33333325
49 | 4.33333349
50 | 6.25
51 | 0.80000001
52 | NaN
53 | 1
54 | 0.41666666
55 | 0.5
56 | 0.03571429
57 |
--------------------------------------------------------------------------------
/test-data/downsample1.wig:
--------------------------------------------------------------------------------
1 | track type=wiggle_0 name='Downsampled wigmath1.wig' description='Downsampled wigmath1.wig'
2 | fixedStep chrom=2micron start=100 step=1 span=1
3 | 5.0
4 | 6.0
5 | NaN
6 | NaN
7 | NaN
8 | 10.0
9 | NaN
10 | NaN
11 | NaN
12 | NaN
13 | 1.0
14 | fixedStep chrom=chrI start=1 step=1 span=1
15 | 1.0
16 | 2.0
17 | 3.0
18 | 4.0
19 | 5.0
20 | 6.0
21 | 7.0
22 | 8.0
23 | 9.0
24 | 10.0
25 | 11.0
26 | 12.0
27 | 13.0
28 | 14.0
29 | fixedStep chrom=chrXI start=20 step=1 span=1
30 | 0.0
31 | 0.0
32 | 0.0
33 | 0.0
34 | NaN
35 | 3.0
36 | 3.0
37 | 3.0
38 | 3.0
39 | NaN
40 | 4.0
41 | 4.0
42 | 4.0
43 | 4.0
44 | NaN
45 | 9.0
46 | 9.0
47 | 9.0
48 | 9.0
49 | NaN
50 | 0.0
51 | 0.0
52 | 0.0
53 | 0.0
54 | NaN
55 | 6.0
56 | 6.0
57 | 6.0
58 | 6.0
59 | NaN
60 | 44.0
61 | 44.0
62 | 44.0
63 | 44.0
64 | NaN
65 | 3.0
66 | 3.0
67 | 3.0
68 | 3.0
69 | NaN
70 | 5.0
71 | 5.0
72 | 5.0
73 | 5.0
74 | NaN
75 | 7.0
76 | 7.0
77 | 7.0
78 | 7.0
79 | NaN
80 | 8.0
81 | 8.0
82 | 8.0
83 | 8.0
84 | NaN
85 | 9.0
86 | 9.0
87 | 9.0
88 | 9.0
89 | NaN
90 | 5.0
91 | 5.0
92 | 5.0
93 | 5.0
94 | NaN
95 | 6.0
96 | 6.0
97 | 6.0
98 | 6.0
99 | NaN
100 | 3.0
101 | 3.0
102 | 3.0
103 | 3.0
104 | NaN
105 | 1.0
106 | 1.0
107 | 1.0
108 | 1.0
109 | NaN
110 | 1.0
111 | 1.0
112 | 1.0
113 | 1.0
114 | NaN
115 | 2.0
116 | 2.0
117 | 2.0
118 | 2.0
119 | NaN
120 | 3.0
121 | 3.0
122 | 3.0
123 | 3.0
124 | NaN
125 | 4.0
126 | 4.0
127 | 4.0
128 | 4.0
129 | NaN
130 | 5.0
131 | 5.0
132 | 5.0
133 | 5.0
134 | NaN
135 | 6.0
136 | 6.0
137 | 6.0
138 | 6.0
139 | NaN
140 | 13.0
141 | 13.0
142 | 13.0
143 | 13.0
144 | NaN
145 | 15.0
146 | 15.0
147 | 15.0
148 | 15.0
149 | NaN
150 | 18.0
151 | 18.0
152 | 18.0
153 | 18.0
154 | NaN
155 | 22.0
156 | 22.0
157 | 22.0
158 |
--------------------------------------------------------------------------------
/test-data/downsample2.wig:
--------------------------------------------------------------------------------
1 | track type=wiggle_0 name='Downsampled wigmath2.bw' description='Downsampled wigmath2.bw'
2 | fixedStep chrom=2micron start=100 step=5 span=5
3 | 6.8
4 | 5.0
5 | 2.1
6 | fixedStep chrom=chrI start=1 step=5 span=5
7 | 6.0
8 | 11.0
9 | 16.0
10 | fixedStep chrom=chrXI start=20 step=5 span=5
11 | 0.0
12 | 3.0
13 | 2.0
14 | 1.0
15 | 97.0
16 | 6.0
17 | 4.0
18 | 6.0
19 | 3.0
20 | 2.0
21 | 1.0
22 | 2.0
23 | 22.0
24 | 9.0
25 | 2.0
26 | 5.0
27 | 4.0
28 | 3.0
29 | 3.0
30 | 4.0
31 | 5.0
32 | 0.0
33 | 1.0
34 | 12.0
35 | 16.0
36 | 56.0
37 |
--------------------------------------------------------------------------------
/test-data/downsample3.wig:
--------------------------------------------------------------------------------
1 | track type=wiggle_0 name='Downsampled wigmath3.wig' description='Downsampled wigmath3.wig'
2 | fixedStep chrom=2micron start=100 step=100 span=100
3 | 18.3
4 | fixedStep chrom=chrI start=1 step=100 span=100
5 | 150.0
6 | fixedStep chrom=chrXI start=20 step=100 span=100
7 | 1264.0
8 | 296.0
9 |
--------------------------------------------------------------------------------
/test-data/gaussian1.wig:
--------------------------------------------------------------------------------
1 | track type=wiggle_0
2 | fixedStep chrom=2micron start=111 span=1 step=1
3 | NaN
4 | fixedStep chrom=chrI start=15 span=1 step=1
5 | NaN
6 | fixedStep chrom=chrXI start=148 span=1 step=1
7 | NaN
8 |
--------------------------------------------------------------------------------
/test-data/gaussian2.wig:
--------------------------------------------------------------------------------
1 | track type=wiggle_0
2 | fixedStep chrom=2micron start=111 span=1 step=1
3 | NaN
4 | fixedStep chrom=chrI start=15 span=1 step=1
5 | NaN
6 | fixedStep chrom=chrXI start=148 span=1 step=1
7 | NaN
8 |
--------------------------------------------------------------------------------
/test-data/gaussian3.wig:
--------------------------------------------------------------------------------
1 | track type=wiggle_0
2 | fixedStep chrom=2micron start=111 span=1 step=1
3 | NaN
4 | fixedStep chrom=chrI start=15 span=1 step=1
5 | NaN
6 | fixedStep chrom=chrXI start=148 span=1 step=1
7 | NaN
8 |
--------------------------------------------------------------------------------
/test-data/logger1.wig:
--------------------------------------------------------------------------------
1 | track type=wiggle_0
2 | fixedStep chrom=2micron start=100 step=1 span=1
3 | 2.321928
4 | 2.5849626
5 | NaN
6 | NaN
7 | NaN
8 | 3.321928
9 | NaN
10 | NaN
11 | NaN
12 | NaN
13 | 0.0
14 | 1.0
15 | fixedStep chrom=chrI start=1 step=1 span=1
16 | 0.0
17 | 1.0
18 | 1.5849625
19 | 2.0
20 | 2.321928
21 | 2.5849626
22 | 2.807355
23 | 3.0
24 | 3.169925
25 | 3.321928
26 | 3.4594316
27 | 3.5849626
28 | 3.7004397
29 | 3.807355
30 | 3.9068906
31 | fixedStep chrom=chrXI start=20 step=1 span=1
32 | -Infinity
33 | -Infinity
34 | -Infinity
35 | -Infinity
36 | NaN
37 | 1.5849625
38 | 1.5849625
39 | 1.5849625
40 | 1.5849625
41 | NaN
42 | 2.0
43 | 2.0
44 | 2.0
45 | 2.0
46 | NaN
47 | 3.169925
48 | 3.169925
49 | 3.169925
50 | 3.169925
51 | NaN
52 | -Infinity
53 | -Infinity
54 | -Infinity
55 | -Infinity
56 | NaN
57 | 2.5849626
58 | 2.5849626
59 | 2.5849626
60 | 2.5849626
61 | NaN
62 | 5.4594316
63 | 5.4594316
64 | 5.4594316
65 | 5.4594316
66 | NaN
67 | 1.5849625
68 | 1.5849625
69 | 1.5849625
70 | 1.5849625
71 | NaN
72 | 2.321928
73 | 2.321928
74 | 2.321928
75 | 2.321928
76 | NaN
77 | 2.807355
78 | 2.807355
79 | 2.807355
80 | 2.807355
81 | NaN
82 | 3.0
83 | 3.0
84 | 3.0
85 | 3.0
86 | NaN
87 | 3.169925
88 | 3.169925
89 | 3.169925
90 | 3.169925
91 | NaN
92 | 2.321928
93 | 2.321928
94 | 2.321928
95 | 2.321928
96 | NaN
97 | 2.5849626
98 | 2.5849626
99 | 2.5849626
100 | 2.5849626
101 | NaN
102 | 1.5849625
103 | 1.5849625
104 | 1.5849625
105 | 1.5849625
106 | NaN
107 | 0.0
108 | 0.0
109 | 0.0
110 | 0.0
111 | NaN
112 | 0.0
113 | 0.0
114 | 0.0
115 | 0.0
116 | NaN
117 | 1.0
118 | 1.0
119 | 1.0
120 | 1.0
121 | NaN
122 | 1.5849625
123 | 1.5849625
124 | 1.5849625
125 | 1.5849625
126 | NaN
127 | 2.0
128 | 2.0
129 | 2.0
130 | 2.0
131 | NaN
132 | 2.321928
133 | 2.321928
134 | 2.321928
135 | 2.321928
136 | NaN
137 | 2.5849626
138 | 2.5849626
139 | 2.5849626
140 | 2.5849626
141 | NaN
142 | 3.7004397
143 | 3.7004397
144 | 3.7004397
145 | 3.7004397
146 | NaN
147 | 3.9068906
148 | 3.9068906
149 | 3.9068906
150 | 3.9068906
151 | NaN
152 | 4.169925
153 | 4.169925
154 | 4.169925
155 | 4.169925
156 | NaN
157 | 4.4594316
158 | 4.4594316
159 | 4.4594316
160 | 4.4594316
161 |
--------------------------------------------------------------------------------
/test-data/logger2.wig:
--------------------------------------------------------------------------------
1 | track type=wiggle_0
2 | fixedStep chrom=2micron start=100 step=1 span=1
3 | 1.5006738
4 | 1.7448581
5 | NaN
6 | NaN
7 | NaN
8 | 1.4649736
9 | NaN
10 | NaN
11 | NaN
12 | NaN
13 | 0.42781577
14 | 0.6753404
15 | fixedStep chrom=chrI start=1 step=1 span=1
16 | 0.63092977
17 | 1.0
18 | 1.2618595
19 | 1.4649736
20 | 1.6309297
21 | 1.7712437
22 | 1.8927892
23 | 2.0
24 | 2.0959032
25 | 2.1826584
26 | 2.2618594
27 | 2.3347175
28 | 2.4021735
29 | 2.4649734
30 | 2.523719
31 | fixedStep chrom=chrXI start=20 step=1 span=1
32 | -Infinity
33 | -Infinity
34 | -Infinity
35 | -Infinity
36 | NaN
37 | 1.0
38 | 1.0
39 | 1.0
40 | 1.0
41 | NaN
42 | 0.63092977
43 | 0.63092977
44 | 0.63092977
45 | 0.63092977
46 | NaN
47 | 0.0
48 | 0.0
49 | 0.0
50 | 0.0
51 | NaN
52 | 4.1640816
53 | 4.1640816
54 | 4.1640816
55 | 4.1640816
56 | NaN
57 | 1.6309297
58 | 1.6309297
59 | 1.6309297
60 | 1.6309297
61 | NaN
62 | 1.2618595
63 | 1.2618595
64 | 1.2618595
65 | 1.2618595
66 | NaN
67 | 1.6309297
68 | 1.6309297
69 | 1.6309297
70 | 1.6309297
71 | NaN
72 | 1.0
73 | 1.0
74 | 1.0
75 | 1.0
76 | NaN
77 | 0.63092977
78 | 0.63092977
79 | 0.63092977
80 | 0.63092977
81 | NaN
82 | 0.0
83 | 0.0
84 | 0.0
85 | 0.0
86 | NaN
87 | 0.63092977
88 | 0.63092977
89 | 0.63092977
90 | 0.63092977
91 | NaN
92 | 2.8135881
93 | 2.8135881
94 | 2.8135881
95 | 2.8135881
96 | NaN
97 | 2.0
98 | 2.0
99 | 2.0
100 | 2.0
101 | NaN
102 | 0.63092977
103 | 0.63092977
104 | 0.63092977
105 | 0.63092977
106 | NaN
107 | 1.4649736
108 | 1.4649736
109 | 1.4649736
110 | 1.4649736
111 | NaN
112 | 1.2618595
113 | 1.2618595
114 | 1.2618595
115 | 1.2618595
116 | NaN
117 | 1.0
118 | 1.0
119 | 1.0
120 | 1.0
121 | NaN
122 | 1.0
123 | 1.0
124 | 1.0
125 | 1.0
126 | NaN
127 | 1.2618595
128 | 1.2618595
129 | 1.2618595
130 | 1.2618595
131 | NaN
132 | 1.4649736
133 | 1.4649736
134 | 1.4649736
135 | 1.4649736
136 | NaN
137 | -Infinity
138 | -Infinity
139 | -Infinity
140 | -Infinity
141 | NaN
142 | 0.0
143 | 0.0
144 | 0.0
145 | 0.0
146 | NaN
147 | 2.2618594
148 | 2.2618594
149 | 2.2618594
150 | 2.2618594
151 | NaN
152 | 2.523719
153 | 2.523719
154 | 2.523719
155 | 2.523719
156 | NaN
157 | 3.664033
158 | 3.664033
159 | 3.664033
160 | 3.664033
161 |
--------------------------------------------------------------------------------
/test-data/logger3.wig:
--------------------------------------------------------------------------------
1 | track type=wiggle_0
2 | fixedStep chrom=2micron start=100 step=1 span=1
3 | 0.0
4 | 0.36172783
5 | NaN
6 | NaN
7 | NaN
8 | 1.1760913
9 | NaN
10 | NaN
11 | NaN
12 | NaN
13 | -Infinity
14 | -Infinity
15 | fixedStep chrom=chrI start=1 step=1 span=1
16 | 0.47712126
17 | 0.60206
18 | 0.69897
19 | 0.7781513
20 | 0.845098
21 | 0.90309
22 | 0.9542425
23 | 1.0
24 | 1.0413927
25 | 1.0791812
26 | 1.1139433
27 | 1.146128
28 | 1.1760913
29 | 1.20412
30 | 1.230449
31 | fixedStep chrom=chrXI start=20 step=1 span=1
32 | 0.7781513
33 | 0.7781513
34 | 0.7781513
35 | 0.7781513
36 | NaN
37 | 0.60206
38 | 0.60206
39 | 0.60206
40 | 0.60206
41 | NaN
42 | 0.845098
43 | 0.845098
44 | 0.845098
45 | 0.845098
46 | NaN
47 | 0.0
48 | 0.0
49 | 0.0
50 | 0.0
51 | NaN
52 | 0.47712126
53 | 0.47712126
54 | 0.47712126
55 | 0.47712126
56 | NaN
57 | 0.69897
58 | 0.69897
59 | 0.69897
60 | 0.69897
61 | NaN
62 | 0.60206
63 | 0.60206
64 | 0.60206
65 | 0.60206
66 | NaN
67 | 1.8260748
68 | 1.8260748
69 | 1.8260748
70 | 1.8260748
71 | NaN
72 | 1.7323937
73 | 1.7323937
74 | 1.7323937
75 | 1.7323937
76 | NaN
77 | 0.69897
78 | 0.69897
79 | 0.69897
80 | 0.69897
81 | NaN
82 | 0.90309
83 | 0.90309
84 | 0.90309
85 | 0.90309
86 | NaN
87 | 0.9542425
88 | 0.9542425
89 | 0.9542425
90 | 0.9542425
91 | NaN
92 | 0.30103
93 | 0.30103
94 | 0.30103
95 | 0.30103
96 | NaN
97 | 0.60206
98 | 0.60206
99 | 0.60206
100 | 0.60206
101 | NaN
102 | 0.7781513
103 | 0.7781513
104 | 0.7781513
105 | 0.7781513
106 | NaN
107 | 0.9542425
108 | 0.9542425
109 | 0.9542425
110 | 0.9542425
111 | NaN
112 | 1.8864907
113 | 1.8864907
114 | 1.8864907
115 | 1.8864907
116 | NaN
117 | 0.845098
118 | 0.845098
119 | 0.845098
120 | 0.845098
121 | NaN
122 | 1.1139433
123 | 1.1139433
124 | 1.1139433
125 | 1.1139433
126 | NaN
127 | 1.39794
128 | 1.39794
129 | 1.39794
130 | 1.39794
131 | NaN
132 | 0.60206
133 | 0.60206
134 | 0.60206
135 | 0.60206
136 | NaN
137 | 1.7323937
138 | 1.7323937
139 | 1.7323937
140 | 1.7323937
141 | NaN
142 | 0.0
143 | 0.0
144 | 0.0
145 | 0.0
146 | NaN
147 | 0.69897
148 | 0.69897
149 | 0.69897
150 | 0.69897
151 | NaN
152 | 0.90309
153 | 0.90309
154 | 0.90309
155 | 0.90309
156 | NaN
157 | 0.30103
158 | 0.30103
159 | 0.30103
160 | 0.30103
161 |
--------------------------------------------------------------------------------
/test-data/logger4.wig:
--------------------------------------------------------------------------------
1 | track type=wiggle_0
2 | fixedStep chrom=2micron start=100 step=1 span=1
3 | 2.321928
4 | 2.5849626
5 | NaN
6 | NaN
7 | NaN
8 | 3.321928
9 | NaN
10 | NaN
11 | NaN
12 | NaN
13 | 0.0
14 | 1.0
15 | fixedStep chrom=chrI start=1 step=1 span=1
16 | 0.0
17 | 1.0
18 | 1.5849625
19 | 2.0
20 | 2.321928
21 | 2.5849626
22 | 2.807355
23 | 3.0
24 | 3.169925
25 | 3.321928
26 | 3.4594316
27 | 3.5849626
28 | 3.7004397
29 | 3.807355
30 | 3.9068906
31 | fixedStep chrom=chrXI start=20 step=1 span=1
32 | -Infinity
33 | -Infinity
34 | -Infinity
35 | -Infinity
36 | NaN
37 | 1.5849625
38 | 1.5849625
39 | 1.5849625
40 | 1.5849625
41 | NaN
42 | 2.0
43 | 2.0
44 | 2.0
45 | 2.0
46 | NaN
47 | 3.169925
48 | 3.169925
49 | 3.169925
50 | 3.169925
51 | NaN
52 | -Infinity
53 | -Infinity
54 | -Infinity
55 | -Infinity
56 | NaN
57 | 2.5849626
58 | 2.5849626
59 | 2.5849626
60 | 2.5849626
61 | NaN
62 | 5.4594316
63 | 5.4594316
64 | 5.4594316
65 | 5.4594316
66 | NaN
67 | 1.5849625
68 | 1.5849625
69 | 1.5849625
70 | 1.5849625
71 | NaN
72 | 2.321928
73 | 2.321928
74 | 2.321928
75 | 2.321928
76 | NaN
77 | 2.807355
78 | 2.807355
79 | 2.807355
80 | 2.807355
81 | NaN
82 | 3.0
83 | 3.0
84 | 3.0
85 | 3.0
86 | NaN
87 | 3.169925
88 | 3.169925
89 | 3.169925
90 | 3.169925
91 | NaN
92 | 2.321928
93 | 2.321928
94 | 2.321928
95 | 2.321928
96 | NaN
97 | 2.5849626
98 | 2.5849626
99 | 2.5849626
100 | 2.5849626
101 | NaN
102 | 1.5849625
103 | 1.5849625
104 | 1.5849625
105 | 1.5849625
106 | NaN
107 | 0.0
108 | 0.0
109 | 0.0
110 | 0.0
111 | NaN
112 | 0.0
113 | 0.0
114 | 0.0
115 | 0.0
116 | NaN
117 | 1.0
118 | 1.0
119 | 1.0
120 | 1.0
121 | NaN
122 | 1.5849625
123 | 1.5849625
124 | 1.5849625
125 | 1.5849625
126 | NaN
127 | 2.0
128 | 2.0
129 | 2.0
130 | 2.0
131 | NaN
132 | 2.321928
133 | 2.321928
134 | 2.321928
135 | 2.321928
136 | NaN
137 | 2.5849626
138 | 2.5849626
139 | 2.5849626
140 | 2.5849626
141 | NaN
142 | 3.7004397
143 | 3.7004397
144 | 3.7004397
145 | 3.7004397
146 | NaN
147 | 3.9068906
148 | 3.9068906
149 | 3.9068906
150 | 3.9068906
151 | NaN
152 | 4.169925
153 | 4.169925
154 | 4.169925
155 | 4.169925
156 | NaN
157 | 4.4594316
158 | 4.4594316
159 | 4.4594316
160 | 4.4594316
161 |
--------------------------------------------------------------------------------
/test-data/logger5.wig:
--------------------------------------------------------------------------------
1 | track type=wiggle_0
2 | fixedStep chrom=2micron start=100 step=1 span=1
3 | 1.5006738
4 | 1.7448581
5 | NaN
6 | NaN
7 | NaN
8 | 1.4649736
9 | NaN
10 | NaN
11 | NaN
12 | NaN
13 | 0.42781577
14 | 0.6753404
15 | fixedStep chrom=chrI start=1 step=1 span=1
16 | 0.63092977
17 | 1.0
18 | 1.2618595
19 | 1.4649736
20 | 1.6309297
21 | 1.7712437
22 | 1.8927892
23 | 2.0
24 | 2.0959032
25 | 2.1826584
26 | 2.2618594
27 | 2.3347175
28 | 2.4021735
29 | 2.4649734
30 | 2.523719
31 | fixedStep chrom=chrXI start=20 step=1 span=1
32 | -Infinity
33 | -Infinity
34 | -Infinity
35 | -Infinity
36 | NaN
37 | 1.0
38 | 1.0
39 | 1.0
40 | 1.0
41 | NaN
42 | 0.63092977
43 | 0.63092977
44 | 0.63092977
45 | 0.63092977
46 | NaN
47 | 0.0
48 | 0.0
49 | 0.0
50 | 0.0
51 | NaN
52 | 4.1640816
53 | 4.1640816
54 | 4.1640816
55 | 4.1640816
56 | NaN
57 | 1.6309297
58 | 1.6309297
59 | 1.6309297
60 | 1.6309297
61 | NaN
62 | 1.2618595
63 | 1.2618595
64 | 1.2618595
65 | 1.2618595
66 | NaN
67 | 1.6309297
68 | 1.6309297
69 | 1.6309297
70 | 1.6309297
71 | NaN
72 | 1.0
73 | 1.0
74 | 1.0
75 | 1.0
76 | NaN
77 | 0.63092977
78 | 0.63092977
79 | 0.63092977
80 | 0.63092977
81 | NaN
82 | 0.0
83 | 0.0
84 | 0.0
85 | 0.0
86 | NaN
87 | 0.63092977
88 | 0.63092977
89 | 0.63092977
90 | 0.63092977
91 | NaN
92 | 2.8135881
93 | 2.8135881
94 | 2.8135881
95 | 2.8135881
96 | NaN
97 | 2.0
98 | 2.0
99 | 2.0
100 | 2.0
101 | NaN
102 | 0.63092977
103 | 0.63092977
104 | 0.63092977
105 | 0.63092977
106 | NaN
107 | 1.4649736
108 | 1.4649736
109 | 1.4649736
110 | 1.4649736
111 | NaN
112 | 1.2618595
113 | 1.2618595
114 | 1.2618595
115 | 1.2618595
116 | NaN
117 | 1.0
118 | 1.0
119 | 1.0
120 | 1.0
121 | NaN
122 | 1.0
123 | 1.0
124 | 1.0
125 | 1.0
126 | NaN
127 | 1.2618595
128 | 1.2618595
129 | 1.2618595
130 | 1.2618595
131 | NaN
132 | 1.4649736
133 | 1.4649736
134 | 1.4649736
135 | 1.4649736
136 | NaN
137 | -Infinity
138 | -Infinity
139 | -Infinity
140 | -Infinity
141 | NaN
142 | 0.0
143 | 0.0
144 | 0.0
145 | 0.0
146 | NaN
147 | 2.2618594
148 | 2.2618594
149 | 2.2618594
150 | 2.2618594
151 | NaN
152 | 2.523719
153 | 2.523719
154 | 2.523719
155 | 2.523719
156 | NaN
157 | 3.664033
158 | 3.664033
159 | 3.664033
160 | 3.664033
161 |
--------------------------------------------------------------------------------
/test-data/logger6.wig:
--------------------------------------------------------------------------------
1 | track type=wiggle_0
2 | fixedStep chrom=2micron start=100 step=1 span=1
3 | 0.0
4 | 0.36172783
5 | NaN
6 | NaN
7 | NaN
8 | 1.1760913
9 | NaN
10 | NaN
11 | NaN
12 | NaN
13 | -Infinity
14 | -Infinity
15 | fixedStep chrom=chrI start=1 step=1 span=1
16 | 0.47712126
17 | 0.60206
18 | 0.69897
19 | 0.7781513
20 | 0.845098
21 | 0.90309
22 | 0.9542425
23 | 1.0
24 | 1.0413927
25 | 1.0791812
26 | 1.1139433
27 | 1.146128
28 | 1.1760913
29 | 1.20412
30 | 1.230449
31 | fixedStep chrom=chrXI start=20 step=1 span=1
32 | 0.7781513
33 | 0.7781513
34 | 0.7781513
35 | 0.7781513
36 | NaN
37 | 0.60206
38 | 0.60206
39 | 0.60206
40 | 0.60206
41 | NaN
42 | 0.845098
43 | 0.845098
44 | 0.845098
45 | 0.845098
46 | NaN
47 | 0.0
48 | 0.0
49 | 0.0
50 | 0.0
51 | NaN
52 | 0.47712126
53 | 0.47712126
54 | 0.47712126
55 | 0.47712126
56 | NaN
57 | 0.69897
58 | 0.69897
59 | 0.69897
60 | 0.69897
61 | NaN
62 | 0.60206
63 | 0.60206
64 | 0.60206
65 | 0.60206
66 | NaN
67 | 1.8260748
68 | 1.8260748
69 | 1.8260748
70 | 1.8260748
71 | NaN
72 | 1.7323937
73 | 1.7323937
74 | 1.7323937
75 | 1.7323937
76 | NaN
77 | 0.69897
78 | 0.69897
79 | 0.69897
80 | 0.69897
81 | NaN
82 | 0.90309
83 | 0.90309
84 | 0.90309
85 | 0.90309
86 | NaN
87 | 0.9542425
88 | 0.9542425
89 | 0.9542425
90 | 0.9542425
91 | NaN
92 | 0.30103
93 | 0.30103
94 | 0.30103
95 | 0.30103
96 | NaN
97 | 0.60206
98 | 0.60206
99 | 0.60206
100 | 0.60206
101 | NaN
102 | 0.7781513
103 | 0.7781513
104 | 0.7781513
105 | 0.7781513
106 | NaN
107 | 0.9542425
108 | 0.9542425
109 | 0.9542425
110 | 0.9542425
111 | NaN
112 | 1.8864907
113 | 1.8864907
114 | 1.8864907
115 | 1.8864907
116 | NaN
117 | 0.845098
118 | 0.845098
119 | 0.845098
120 | 0.845098
121 | NaN
122 | 1.1139433
123 | 1.1139433
124 | 1.1139433
125 | 1.1139433
126 | NaN
127 | 1.39794
128 | 1.39794
129 | 1.39794
130 | 1.39794
131 | NaN
132 | 0.60206
133 | 0.60206
134 | 0.60206
135 | 0.60206
136 | NaN
137 | 1.7323937
138 | 1.7323937
139 | 1.7323937
140 | 1.7323937
141 | NaN
142 | 0.0
143 | 0.0
144 | 0.0
145 | 0.0
146 | NaN
147 | 0.69897
148 | 0.69897
149 | 0.69897
150 | 0.69897
151 | NaN
152 | 0.90309
153 | 0.90309
154 | 0.90309
155 | 0.90309
156 | NaN
157 | 0.30103
158 | 0.30103
159 | 0.30103
160 | 0.30103
161 |
--------------------------------------------------------------------------------
/test-data/romanNumeralize.input:
--------------------------------------------------------------------------------
1 | chr1 10 30 Spot1 10
2 | chr2 100 95 Spot2 13.2
3 | chr3 20 50 Spot3 5.0 +
4 | chr14 15 20 Spot4 2.0 -
5 | illegal entry
6 | chr4 100200 100220 Spot5 10.0 -
7 | chr5 1000000 1001000 Spot6 1
8 | chr6 0 12 Spot7 12 +
9 | chr7 1 10 Spot8 1.0 -
10 | chr8 15 20 Spot9 . +
11 | chr9 25 10 Spot1
12 | illegal entry
13 | chr16 1
14 |
--------------------------------------------------------------------------------
/test-data/romanNumeralize.output:
--------------------------------------------------------------------------------
1 | chrI 10 30 Spot1 10
2 | chrII 100 95 Spot2 13.2
3 | chrIII 20 50 Spot3 5.0 +
4 | chrXIV 15 20 Spot4 2.0 -
5 | illegal entry
6 | chrIV 100200 100220 Spot5 10.0 -
7 | chrV 1000000 1001000 Spot6 1
8 | chrVI 0 12 Spot7 12 +
9 | chrVII 1 10 Spot8 1.0 -
10 | chrVIII 15 20 Spot9 . +
11 | chrIX 25 10 Spot1
12 | illegal entry
13 | chrXVI 1
14 |
--------------------------------------------------------------------------------
/test-data/stripMatrix.txt:
--------------------------------------------------------------------------------
1 | 0.263444282 0.378414242 0.26688602 0.835096585 0.857437429
2 | 0.841166712 0.004052166 0.663859714 0.130333189 0.307903778
3 | 0.842871693 0.676914668 0.678535517 0.15927136 0.152751526
4 |
--------------------------------------------------------------------------------
/test-data/test.fasta.fai:
--------------------------------------------------------------------------------
1 | 2micron 6318 9 50 51
2 |
--------------------------------------------------------------------------------
/test-data/test.fastqillumina:
--------------------------------------------------------------------------------
1 | @GAII03:1:1:7:1061#0/1
2 | TGGTAAGTTCGCATNTATTATACAGGTTTTTGGTTCAAAAGTANTGATTAG
3 | +GAII03:1:1:7:1061#0/1
4 | a]XX]\YUSPNZWMDO\^_TSZBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
5 | @GAII03:1:1:7:822#0/1
6 | ACCATATGGGTGTCTTGGATCATTGATCAATTAGACTGGCGTTNAATGGCG
7 | +GAII03:1:1:7:822#0/1
8 | a^USY_VSKDKVWXV]TSMJNWVYQWWZWBBBBBBBBBBBBBBBBBBBBBB
9 | @GAII03:1:1:7:183#0/1
10 | TCACGGTCAGCAGCATTGCTTTGTGACTTTGGCGATTGACATTNAGCAGAG
11 | +GAII03:1:1:7:183#0/1
12 | aab^\UY]a][aP_a_[__^_\_[Ta_`MRGRYT]U\U_]BBBBBBBBBBB
13 | @GAII03:1:1:7:1306#0/1
14 | AGCGGATTCCGACTTCCATGGCCACCGTCCGGCTGTCTATATTNACTAAGA
15 | +GAII03:1:1:7:1306#0/1
16 | aSXUTYUWZSSWS[TOP\YRNUZXBBBBBBBBBBBBBBBBBBBBBBBBBBB
17 | @GAII03:1:1:7:1942#0/1
18 | AGTCTCACCAAAAAACGATAAATCTGCAAGACAACATGCAGGTNTAGCTGG
19 | +GAII03:1:1:7:1942#0/1
20 | a`_`a^a\^`a`Xa]YW_aa^]_Y[Z^^`WY^__]_]]]\WUDDXXPP[VV
21 | @GAII03:1:1:7:1498#0/1
22 | AGGGGAACTGGCGAGTTATTTTGTTGGGATGGATGTATAGTTTNATTGCTG
23 | +GAII03:1:1:7:1498#0/1
24 | a`a_\^ZP[\QXV[GY_^`R\]`[aYaY_TOXUWFFMYYTWBBBBBBBBBB
25 | @GAII03:1:1:7:1110#0/1
26 | TCCGCCTCCGCGTCTTGTTCCTTTAAACTATTGACTTCATGTTNTACATTT
27 | +GAII03:1:1:7:1110#0/1
28 | \a`^]_YX]^_XX`_aZ[__TDP`]__U^`_\G`Z^]BBBBBBBBBBBBBB
29 | @GAII03:1:1:7:1806#0/1
30 | TGTGGTAGATTGAGATTGAGAGCGCTGCCTGTTAACTATCGGANTATTAAG
31 | +GAII03:1:1:7:1806#0/1
32 | aaYa`W`^][W[WWX\_R]\YU[\_][Y\TTXY]\Z^_XBBBBBBBBBBBB
33 | @GAII03:1:1:7:1170#0/1
34 | TACCATCTTTGGATAGAGCCTTGGAGATATCTGGCTTTAATCTNCTTTAGT
35 | +GAII03:1:1:7:1170#0/1
36 | a`Z]aa^``]JMZ`]P]V^[`_UWXN]_^FXVFNRXZ^BBBBBBBBBBBBB
37 | @GAII03:1:1:7:1652#0/1
38 | TACAAAATATTGAAAAGAAGGCACGTCAAAAAAGCGCCATCGANAAACAAA
39 | +GAII03:1:1:7:1652#0/1
40 | `baaaab`a`_Xabaa^aaU[Yb_]a`aa`aa]H`___aLOFNDX_XG[^`
41 |
--------------------------------------------------------------------------------
/test-data/test.fastqsanger:
--------------------------------------------------------------------------------
1 | @GAII03:1:1:7:1061#0/1
2 | TGGTAAGTTCGCATNTATTATACAGGTTTTTGGTTCAAAAGTANTGATTAG
3 | +GAII03:1:1:7:1061#0/1
4 | B>99>=:641/;8.%0=?@54;#############################
5 | @GAII03:1:1:7:822#0/1
6 | ACCATATGGGTGTCTTGGATCATTGATCAATTAGACTGGCGTTNAATGGCG
7 | +GAII03:1:1:7:822#0/1
8 | B?64:@74,%,7897>54.+/87:288;8######################
9 | @GAII03:1:1:7:183#0/1
10 | TCACGGTCAGCAGCATTGCTTTGTGACTTTGGCGATTGACATTNAGCAGAG
11 | +GAII03:1:1:7:183#0/1
12 | BBC?=6:>B>6=6@>###########
13 | @GAII03:1:1:7:1306#0/1
14 | AGCGGATTCCGACTTCCATGGCCACCGTCCGGCTGTCTATATTNACTAAGA
15 | +GAII03:1:1:7:1306#0/1
16 | B4965:68;4484<501=:3/6;9###########################
17 | @GAII03:1:1:7:1942#0/1
18 | AGTCTCACCAAAAAACGATAAATCTGCAAGACAACATGCAGGTNTAGCTGG
19 | +GAII03:1:1:7:1942#0/1
20 | BA@AB?B=?ABA9B>:8@BB?>@:<;??A8:?@@>@>>>=86%%9911<77
21 | @GAII03:1:1:7:1498#0/1
22 | AGGGGAACTGGCGAGTTATTTTGTTGGGATGGATGTATAGTTTNATTGCTG
23 | +GAII03:1:1:7:1498#0/1
24 | BAB@=?;1<=297<(:@?A3=>A@:9>?@99A@B;<@@5%1A>@@6?A@=(A;?>##############
29 | @GAII03:1:1:7:1806#0/1
30 | TGTGGTAGATTGAGATTGAGAGCGCTGCCTGTTAACTATCGGANTATTAAG
31 | +GAII03:1:1:7:1806#0/1
32 | BB:BA8A?><8<889=@3>=:6<=@><:=559:>=;?@9############
33 | @GAII03:1:1:7:1170#0/1
34 | TACCATCTTTGGATAGAGCCTTGGAGATATCTGGCTTTAATCTNCTTTAGT
35 | +GAII03:1:1:7:1170#0/1
36 | BA;>BB?AA>+.;A>1>7?@?'97'/39;?#############
37 | @GAII03:1:1:7:1652#0/1
38 | TACAAAATATTGAAAAGAAGGCACGTCAAAAAAGCGCCATCGANAAACAAA
39 | +GAII03:1:1:7:1652#0/1
40 | ACBBBBCABA@9BCBB?BB6<:C@>BABBABB>)A@@@B-0'/%9@9(