├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── annotate_windows.sh
├── calc_coverage_from_bam.sh
├── calc_window_mappability.py
├── call_cnv.c
├── data
├── clamms_special_regions.grch38.bed
├── clamms_special_regions.hg19.bed
└── example_qcs.Rdata
├── fit_models.c
├── hmm.c
├── hmm.h
├── ltqnorm.c
├── normalize_coverage.c
├── plot_cnv.R
├── plot_cnv.sh
├── sam_gatk_coverage_to_bed.c
├── split_targets_into_windows.awk
├── transpose.gawk
├── utils.c
└── utils.h
/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | normalize_coverage
3 | fit_models
4 | call_cnv
5 | genotype
6 | sam_gatk_coverage_to_bed
7 |
8 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2015 Regeneron Genetics Center LLC
2 |
3 | License:
4 |
5 | Permission is hereby granted, free of charge, by Regeneron Genetics Center LLC (the “Licensor”) to any person obtaining a copy of this software (“you”) and any associated documentation files (the "Software"), to publish, copy and redistribute this material in any medium or format, and to adapt the Software ("Adaptations").
6 |
7 | You may distribute the Software or Adaptations but you must give appropriate credit to Licensor by citing the following publication:
8 |
9 | Packer JS, Maxwell EK, O’Dushlaine C, et al. (2015) CLAMMS: a scalable algorithm for calling common and rare copy number variants from exome sequencing data. Bioinformatics.
10 |
11 | Your citation of that publication does not in any way suggest that the Licensor endorses you or your use.
12 |
13 | This license is subject to the following conditions:
14 |
15 | (i) The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software or Adaptations;
16 | (ii) you are not permitted to sublicense, and/or sell copies of the Software or Adaptations, and are not permitted to permit persons to whom the Software or Adaptations are furnished to sublicense, and/or sell copies of the Software or Adaptations; and
17 | (iii) you may not apply legal terms or technological measures that restrict others from doing anything this license permits.
18 |
19 | Note that this license may not give you all of the permissions necessary for your intended use. For example, other rights, such as publicity, privacy or moral rights, may limit how you use the material.
20 |
21 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT REPRESENTATION OR WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT, AND LICENSOR EXPRESSLY DISCLAIMS ALL SUCH REPRESENTATION AND WARRANTIES.
22 |
23 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR ADAPTATIONS OR THE USE IN THE SOFTWARE OR ADAPTATIONS.
24 |
25 | By downloading, obtaining or otherwise using the Software or Adaptations, you thereby accept and agree to be bound by the terms of this license.
26 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all: normalize_coverage fit_models call_cnv sam_gatk_coverage_to_bed
2 |
3 | utils.o: utils.c
4 | gcc -c utils.c
5 |
6 | hmm.o: hmm.c ltqnorm.c utils.h
7 | gcc -c hmm.c
8 |
9 | normalize_coverage: normalize_coverage.o utils.o
10 | gcc normalize_coverage.o utils.o -o normalize_coverage -lm
11 |
12 | normalize_coverage.o: normalize_coverage.c utils.h
13 | gcc -c normalize_coverage.c
14 |
15 | fit_models: fit_models.o utils.o
16 | gcc fit_models.o utils.o -o fit_models -lm
17 |
18 | fit_models.o: fit_models.c utils.h
19 | gcc -c fit_models.c
20 |
21 | call_cnv: call_cnv.o utils.o hmm.o
22 | gcc call_cnv.o utils.o hmm.o -o call_cnv -lm
23 |
24 | call_cnv.o: call_cnv.c utils.h hmm.h
25 | gcc -c call_cnv.c
26 |
27 | sam_gatk_coverage_to_bed: sam_gatk_coverage_to_bed.o
28 | gcc sam_gatk_coverage_to_bed.o -o sam_gatk_coverage_to_bed
29 |
30 | sam_gatk_coverage_to_bed.o: sam_gatk_coverage_to_bed.c
31 | gcc -c sam_gatk_coverage_to_bed.c
32 |
33 | clean:
34 | rm -rf *.o normalize_coverage fit_models call_cnv sam_gatk_coverage_to_bed
35 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # CLAMMS: a scalable algorithm for calling common and rare copy number variants from exome sequencing data
2 |
3 | ## What CLAMMS is for
4 |
5 | As per the title, CLAMMS (Copy number estimation using Lattice-Aligned Mixture Models) is an algorithm for calling copy number variants (CNVs) from exome sequencing read depths. It has two main advantages over previous CNV callers for exome data:
6 |
7 | 1. CLAMMS is suitable for calling CNVs across the whole allele frequency spectrum, not just rare CNVs. Previous tools require that each sample be compared to a reference panel of samples that are assumed to be diploid in any given region. This assumption does not hold in copy number polymorphic regions (where non-diploid alleles are not rare), leading to improper genotypes.
8 | 1. CLAMMS can scale to datasets of tens or hundreds of thousands of samples. Apart from one short processing step (which takes ~30 seconds for 30,000 samples), each sample can be processed in parallel. Unlike previous tools, which have RAM requirements that scale linearly in the number of samples, each CLAMMS process uses a constant amount of RAM regardless of the number of samples.
9 |
10 | Please note that CLAMMS is not intended to be used with whole-genome sequencing data or data from cancer samples.
11 |
12 | The rest of this README will give instructions on how to use CLAMMS.
13 |
14 | ## Related Publications
15 |
16 | * Methods paper: Packer JS, Maxwell EK, O?~@~YDushlaine C, et al. (2015) CLAMMS: a scalable algorithm for calling common and rare copy number variants from exome sequencing data. Bioinformatics 32 (1): 133-135.) [link](http://bioinformatics.oxfordjournals.org/content/32/1/133) describes the methods of CLAMMS, as well as the results of validation experiments we used to evaluate its performance in comparison to previous tools.
17 | * 50K DiscovEHR Study CNV analysis (pre-print): Maxwell EK, Packer JS, O'Dushlaine C, McCarthy SE, Hare-Harris A, Gonzaga-Jauregui C, et al. (2017) Profiling copy number variation and disease associations from 50,726 DiscovEHR Study exomes. bioRxiv. Survey of CLAMMS CNVs from ~50k DiscovEHR study exomes with paired EHR phenotype associations. See supplemental materials for additional details on CLAMMS validation and quality-control procedures. [http://biorxiv.org/content/early/2017/03/22/119461] (http://biorxiv.org/content/early/2017/03/22/119461)
18 |
19 | ## Getting Started
20 |
21 | First, clone the CLAMMS Github repository and compile the code:
22 |
23 | git clone https://github.com/rgcgithub/clamms.git
24 | cd clamms
25 | make
26 |
27 | Set the environment variable `CLAMMS_DIR` to the appropriate path using the `export` command
28 |
29 | export $CLAMMS_DIR=/path/to/clamms
30 |
31 | Now you will need to generate a file `windows.bed`. This file will list windows along the exome for which CLAMMS will estimate copy numbers, along with metadata for those windows. Most windows will simply be exons from your exome capture design, but large exons (>= 1000 bp) will be split up into equally-sized calling windows of ~500 bp.
32 |
33 | To generate `windows.bed`, you will need four input files:
34 |
35 | 1. targets.bed — a BED file listing your exome capture regions.
36 | 1. genome.fa — an indexed FASTA file for the reference genome you are using.
37 | 1. mappability.bed — a BED file listing mappability scores across the genome. More detail on this below.
38 | 1. clamms_special_regions.bed — provided in the data/ directory with the code distribution (hg19 coordinates).
39 |
40 | The chromosome names in the BED files and in the genome index should not have "chr" preceding the number/letter (i.e. "1" instead of "chr1"). The BED files must be sorted using either `bedtools sort` or `sort -k1,1 -k2,2n`.
41 |
42 | The FASTA index should be generated from the raw FASTA file using [BWA](http://bio-bwa.sourceforge.net/bwa.shtml): `bwa index genome.fa`.
43 |
44 | The mappability score for a given base is one divided by the number of locations in the genome that the k-mer starting at that base aligns to (k = the length of your reads), with up to two mismatches allowed (see [here](http://genome.ucsc.edu/cgi-bin/hgFileUi?db=hg19&g=wgEncodeMapability) for more details). You can download mappability tracks for 75-mers or 100-mers on the GRCh37 human reference genome from the link above and convert them to CLAMMS-ready BED files (requires `bigWigToWig` tool from [UCSC](http://genome.ucsc.edu/goldenpath/help/bigWig.html)):
45 |
46 | wget http://hgdownload.cse.ucsc.edu/goldenPath/hg19/encodeDCC/wgEncodeMapability/wgEncodeCrgMapabilityAlign75mer.bigWig
47 | bigWigToWig wgEncodeCrgMapabilityAlign75mer.bigWig wgEncodeCrgMapabilityAlign75mer.wig
48 | grep -v '^#' wgEncodeCrgMapabilityAlign75mer.wig | sed 's/^chr//g' > mappability.bed
49 |
50 | Once you have the input files ready, you can generate `windows.bed` with the following commands. This will take ~5 minutes. Note that the preprocessing script `annotate_windows.sh` requires [Bedtools](http://bedtools.readthedocs.org) to be installed and in your system PATH.
51 |
52 | export INSERT_SIZE=200
53 | chmod +x $CLAMMS_DIR/annotate_windows.sh
54 | $CLAMMS_DIR/annotate_windows.sh targets.bed genome.fa mappability.bed $INSERT_SIZE $CLAMMS_DIR/data/clamms_special_regions.bed >windows.bed
55 |
56 | The `INSERT_SIZE` variable should be set to a value that is a little bit larger than the average insert size for your sequencing process (so that most reads will come from inserts of size <= this value). For example, we use `INSERT_SIZE = 200` when our mean insert size is ~150 bp. If a window is smaller than `INSERT_SIZE`, it is extended to the length of `INSERT_SIZE` for purposes of calculating it's GC content. This is because according to [Benjamini and Speed (2012)](http://www.ncbi.nlm.nih.gov/pubmed/22323520), GC coverage bias is best estimated based on the GC content of the insert, not the individual reads.
57 |
58 | #### Troubleshooting windows.bed file generation
59 | If you have trouble generating the windows.bed file, your input files are likely improperly formatted or have inconsistencies. A few things you should check:
60 |
61 | * All BED files are sorted properly, using `sort -k1,1 -k2,2n`. This sorts by chromosome name (string sort) then by start position (numeric sort). You should re-sort all files in the event that your system locale settings differ from those that were used to sort the externally sourced input files.
62 | * Chromosome naming consistency: Make sure that all chromosomes are named consistently (i.e. chromosome 1 is "1", not "chr1"). This must be the case in all input files, including the genome.fa input file.
63 |
64 | Here is a simple test you can run on your input files to make sure they are consistent and sorted properly:
65 |
66 | cut -f 1 targets.bed | uniq
67 | cut -f 1 mappability.bed | uniq
68 | cut -f 1 clamms_special_regions.bed | uniq
69 |
70 | All of these should return the same chromosome names and sort order:
71 |
72 | 1
73 | 10
74 | ...
75 | 19
76 | 2
77 | 20
78 | 21
79 | 22
80 | 3
81 | 4
82 | ...
83 | 9
84 | X
85 | Y
86 |
87 | Also check the chromosome names in the genome FASTA file (sort order is not important):
88 |
89 | grep '^>' -m 24 genome.fa
90 | >1
91 | >2
92 | ...
93 | >22
94 | >X
95 | >Y
96 |
97 | ## Computing depths of coverage
98 |
99 | You will need a BED file for each of your samples listng the mean depth of coverage at each of the exact windows listed in `windows.bed`. The coverage files must be named in the following format: `sample_name.coverage.bed`
100 |
101 | The sample depth-of-coverage files can be generated from BAM files using [Samtools](http://www.htslib.org/):
102 |
103 | # 30 = minimum mapping quality for a read to be counted
104 | samtools bedcov -Q 30 windows.bed sample.bam \
105 | | awk '{ printf "%s\t%d\t%d\t%.6g\n", $1, $2, $3, $NF/($3-$2); }' \
106 | >sample.coverage.bed
107 |
108 | They can also be generated from GATK DepthOfCoverage output files:
109 |
110 | $CLAMMS_DIR/gatk_coverage_to_bed sample.gatk_readDepth_1x_q30.out windows.bed >sample.coverage.bed
111 |
112 | This step will almost certainly take longer than the CNV calling process itself. To speed it up, consider processing several samples in parallel using `xargs`:
113 |
114 | cat list.of.samples.txt | xargs -P $NUM_PROCESSES --max-args 1 ./compute_coverage.sh
115 |
116 | Where `list.of.samples.txt` lists each sample name (one per line) and `compute_coverage.sh` is a shell script that takes a sample name as its argument and generates its coverage file using one of the two methods shown above.
117 |
118 | ## Normalizing the coverage data
119 |
120 | The first step of CLAMMS is to normalize each individual sample's coverage data to control for GC-bias and the sample's overall average depth of coverage.
121 |
122 | ls *.coverage.bed | cut -d '.' -f 1 | while read SAMPLE
123 | do
124 | $CLAMMS_DIR/normalize_coverage $SAMPLE.coverage.bed windows.bed >$SAMPLE.norm.cov.bed
125 | done
126 |
127 | This step can be parallelized using `xargs`:
128 |
129 | echo '$CLAMMS_DIR/normalize_coverage $1.coverage.bed windows.bed >$1.norm.cov.bed' \
130 | >normalize_coverage.sh && chmod +x normalize_coverage.sh
131 | cat list.of.samples.txt | xargs -P $NUM_PROCESSES --max-args 1 ./normalize_coverage.sh
132 |
133 | ## Training the statistical models
134 |
135 | To call CNVs for a given sample, CLAMMS compares its coverage data to probability distributions that descibe the expected depth of coverage, conditional on copy number state, at each calling window. These distributions are fit using coverage data from a reference panel of samples that ideally have been sequenced using the same procedures. In this section, we show how to train CLAMMS models under the unrealistic assumption that there are no "batch effects" in your data. Batch effects are systematic variations in coverage due to variability in sample preparation procedures, sequencing procedures, or even input DNA quality. At the end of the tutorial, we will show how to implement our recommended procedure for batch effect correction, which involves selecting a "custom" reference panel for each sample.
136 |
137 | A reference panel is specified using a file with two columns: 1) a path to a sample's normalized coverage file, and 2) the sample's sex (optional).
138 |
139 | ls *.norm.cov.bed | while read FILE;
140 | do
141 | echo -e -n "$FILE\t"
142 | grep "^Y" $FILE | awk '{ x += $4; n++; } END { if (x/n >= 0.1) print "M"; else print "F"; }'
143 | done >ref.panel.files.txt
144 |
145 | If you have a reference panel of more than 1000 samples, you may have to increase the limit on the maximum number of open files for your system (i.e. `ulimit -n 20000`). [This link](http://stackoverflow.com/questions/11342167/how-to-increase-ulimit-on-amazon-ec2-instance) explains how to increase max open files on an Amazon EC2 instance.
146 |
147 | The CLAMMS models are trained using the `fit_models` program.
148 |
149 | $CLAMMS_DIR/fit_models ref.panel.files.txt windows.bed >models.bed
150 |
151 | `models.bed` will have the following columns:
152 |
153 | 1. chromosome
154 | 1. window start coordinate
155 | 1. window end coordinate
156 | 1. max copy number considered (-1 if window filtered, 6 for known duplication regions, and 3 otherwise; see Supplementary Materials)
157 | 1. GC fraction of the window
158 | 1. average mappability score of bases in the window
159 | 1. homozygous deletion distribution flag (model parameter; see CLAMMS paper for details)
160 | 1. lambda (model parameter)
161 | 1. mu\_dip (model parameter)
162 | 1. sigma\_dip (model parameter)
163 | 1. estimated \# samples in the reference panel that have copy number 0 at this window
164 | 1. estimated \# samples in the reference panel that have copy number 1 at this window
165 | 1. estimated \# samples in the reference panel that have copy number 2 at this window
166 | 1. estimated \# samples in the reference panel that have copy number 3 at this window
167 | 1. estimated \# samples in the reference panel that have copy number 4 at this window
168 | 1. estimated \# samples in the reference panel that have copy number 5 at this window
169 | 1. estimated \# samples in the reference panel that have copy number 6 at this window
170 |
171 | ## Making CNV Calls
172 |
173 | Once you have a models file, you can call CNVs using the `call_cnv` program.
174 |
175 | $CLAMMS_DIR/call_cnv sample.norm.cov.bed models.bed --sex $SEX >sample.cnv.bed
176 |
177 | The `--sex` argument is optional and takes the values `M` or `F`.
178 |
179 | `sample.cnv.bed` will have the following columns.
180 |
181 | 1. chromosome
182 | 1. window start coordinate
183 | 1. window end coordinate
184 | 1. interval (chr:start-end)
185 | 1. sample name/id
186 | 1. DEL or DUP
187 | 1. most likely integer copy number
188 | 1. number of windows in the call
189 | 1. Q\_SOME: Phred-scaled quality of any CNV being in this interval.
190 | 1. Q\_EXACT: a non-Phred-scaled quality score that measures how closely the coverage profile matches the exact called CNV state and breakpoints. Will document in greater detail later. Any call with Q\_EXACT < 0 is of questionable quality.
191 | 1. Q\_LEFT\_EXTEND: Phred-scaled quality of the left breakpoint (based on the likelihood ratio of the stated breakpoint compared to extending the call by 1 window on the left)
192 | 1. LEFT\_EXTEND\_COORD: add this to the CNV start coordinate to get the start coordinate of the first window to the left of the called CNV
193 | 1. Q\_RIGHT\_EXTEND: phred-scaled quality of the right breakpoint (based on the likelihood ratio of the stated breakpoint compared to extending the call by 1 window on the right)
194 | 1. RIGHT\_EXTEND\_COORD: add this to the CNV end coordinate to get the end coordinate of the first window to the right of the called CNV
195 | 1. Q\_LEFT\_CONTRACT: phred-scaled quality of the left breakpoint (based on the likelihood ratio of the stated breakpoint compared to shrinking the call by 1 window on the left)
196 | 1. LEFT\_CONTRACT\_COORD: add this to the CNV start coordinate to get the start coordinate of the second window of the called CNV
197 | 1. Q\_RIGHT\_CONTRACT: phred-scaled quality of the right breakpoint (based on the likelihood ratio of the stated breakpoint compared to shrinking the call by 1 window on the right)
198 | 1. RIGHT\_CONTRACT\_COORD: add this to the CNV end coordinate to get the end coordinate of the second-to-last window of the called CNV
199 |
200 | ## Handling Batch Effects
201 |
202 | Calling CNVs on large sample sets is difficult because variability in DNA quality, sample preparation procedures, and sequencing procedures results in systematic biases in coverage data ("batch effects"). Some methods attempt to correct for these biases using dimensionality reduction techniques (i.e. PCA), while others select for each sample a "custom" reference panel of samples that have coverage profiles that are highly correlated to the sample in question. Both approaches become computationally limiting, as they require each sample's coverage profile to be compared to every other sample's coverage profile, resulting in O(*n*2) computational complexity.
203 |
204 | CLAMMS uses the "custom reference panel" approach to correct batch effects, but instead of examining samples' coverage profile directly, it examines a small number of sequencing quality control (QC) metrics. In practice, we find that samples with similar QC metrics have similar coverage profiles, so the QC metrics can be thought of as a pre-defined dimensionality reduction of the coverage data that to some extent reflects the underlying causes of coverage variance. Since the number of QC metrics we examine is small, we can use a *k*-d tree data structure to efficiently select a reference panel suitable for any given sample. The computational complexity of the CLAMMS CNV calling pipeline is formally O(*n* log *n*), but the reference panel selection procedure is very fast (~30 seconds for 30,000 samples) and all other steps are O(*n*), so in practice CLAMMS achieves linear scalability in the number of samples processed.
205 |
206 | Below, we will show how to generate the *k*-d tree and select a reference panel for each sample in your analysis. However, please note that if you are not building CLAMMS into an automated pipeline and are performing a one-time analysis on a dataset of tens or hundreds of samples, it may be sufficient to take a more streamlined approach where a small set of precomputed models are reused within sample batches:
207 |
208 | 1. Generate a PCA plot from the coverage data from all samples in your small dataset
209 | 1. Manually assign samples to batches based on the PCA plot
210 | 1. Train a set of CLAMMS models for each batch
211 | 1. For each sample, call CNVs using the models for the batch you assigned it to
212 |
213 | The `svd` program ([link](http://tedlab.mit.edu/~dr/SVDLIBC/)) makes it easy to compute a PCA, and supports computing only the first *n* principal components. The following code shows an example of computing the first 4 principle components from your coverage data.
214 |
215 | sudo apt-get install gawk
216 | NUM_SAMPLES=`ls *.norm.cov.bed | wc -l | awk '{print $1}'`
217 | NUM_WINDOWS=`ls *.norm.cov.bed | head -n 1 | xargs awk '$1 != "X" && $1 != "Y" && $NF == 0 {x++} END {print x}'`
218 | echo -e "$NUM_SAMPLES\t$NUM_WINDOWS" >matrix.txt
219 |
220 | ls *.norm.cov.bed | while read FILE
221 | do
222 | awk '$1 != "X" && $1 != "Y" && $NF == 0 { print $4 }' $FILE \
223 | | gawk -f $CLAMMS_DIR/transpose.gawk >>matrix.txt
224 | done
225 |
226 | svd -d 4 -o svd-output -r dt matrix.txt
227 | ls *.norm.cov.bed | cut -d '.' -f 1 >sample.names.txt
228 | tail -n +2 svd-output-Ut | tr ' ' '\t' | gawk -f $CLAMMS_DIR/transpose.gawk \
229 | | paste sample.names.txt - >pca.coordinates.txt
230 |
231 | The output file `pca.coordinates.txt` can be loaded in R:
232 |
233 | coords <- read.table("pca.coordinates.txt", col.names=c("sample", "pc1", "pc2", "pc3", "pc4"), colClasses=c("character", rep("numeric", 4)))
234 |
235 | We recommend the library `ggplot2` for plotting.
236 |
237 | library(ggplot2)
238 | ggplot(coords, aes(x = pc1, y = pc2)) + geom_point()
239 |
240 | ## Selecting reference panels using the *k*-d tree
241 |
242 | To identify a custom reference panel for every sample efficiently, CLAMMS collects seven QC metrics for each sample and performs a fast *k*-nearest neighbors search algorithm (*k=100*) implemented using a *k*-d tree data structure. This is performed with the R package `FNN` ([link](http://cran.r-project.org/web/packages/FNN/index.html)). While different QC metrics and values of *k* can be used, we found the Picard metrics GCDROPOUT, ATDROPOUT, MEANINSERTSIZE, ONBAITVSSELECTED, PCTPFUQREADS, PCTTARGETBASES10X, and PCTTARGETBASES50X to work well in practice. Also note that QC metrics should be normalized to similar scales such that distances between QC metrics are equally weighted.
243 |
244 | We have provided an example data set and steps to identify the 20-nearest neighbors in R.
245 |
246 | $CLAMMS_DIR/data/example_qcs.Rdata
247 |
248 | The following code assumes that a data frame has been constructed with sample IDs in the first column and raw QC metrics in the subsequent columns, with one sample per row. It also requires the `FNN` package described above.
249 |
250 | # This code requires the FNN (Fast Nearest Neighbors) R package (http://cran.r-project.org/package=FNN)
251 | require(FNN)
252 |
253 | # Load the example data set into data frame 'example.qcs'
254 | load("example_qcs.Rdata")
255 |
256 | # Create a scaled copy of the data frame
257 | example.qcs.scaled <- example.qcs
258 | for (i in 2:ncol(example.qcs.scaled)) {
259 | mini <- min(example.qcs.scaled[,i])
260 | maxi <- max(example.qcs.scaled[,i])
261 | example.qcs.scaled[,i] <- apply(example.qcs.scaled, 1, function(row) {
262 | row[[i]] <- (as.numeric(row[[i]]) - mini) / (maxi - mini)
263 | } )
264 | }
265 |
266 | # Get k-nearest neighbors for each sample
267 | k.param <- 20
268 | knns <- get.knn(example.qcs.scaled[,c(seq(2,ncol(example.qcs.scaled)))],k=k.param,algorithm="kd_tree")
269 |
270 | # Generate a single file for each sample listing its k-nearest neighbor sample IDs
271 | for (i in 1:nrow(example.qcs.scaled)) {
272 | fname <- paste(example.qcs.scaled$SAMPLE[i], ".", k.param, "nns.txt", sep="")
273 | nn.sampleids <- example.qcs.scaled$SAMPLE[ knns$nn.index[i,] ]
274 | write.table(nn.sampleids, fname, quote=F, row.names=F, col.names=F)
275 | }
276 |
277 | A single file will be generated for each sample with a list of its *k*-nearest neighbor sample IDs (with filenames `.nn.txt`). Mapping this list to a list of normalized coverage BED files will produce the input required for the fit_models command (see sections "Training the Statistical Models" and "Calling CNVs using the selected reference panels").
278 |
279 | sed 's/$/.norm.cov.bed/' .nn.txt > .ref.panel.files.txt
280 |
281 | To see how well the *k*-nearest neighbors fit for each sample, we compute the distance of each sample to the mean of the cluster corresponding to its *k*-nearest neighbors, then plot the cumulative distribution of this metric over all samples. This should help to identify if there are outlier samples that do not have a good reference panel.
282 |
283 | # To check how well each sample's kNNs fit, compute the distance to its kNN cluster mean
284 | example.qcs.scaled$DistanceToClusterMean <- sapply(1:nrow(example.qcs.scaled), function(x) {
285 | this.knns <- knns$nn.index[x,];
286 | center <- colMeans(example.qcs.scaled[this.knns, 2:ncol(example.qcs.scaled)]);
287 | return(as.numeric(dist(rbind(as.numeric(example.qcs.scaled[x, 2:ncol(example.qcs.scaled)]), as.numeric(center)))))
288 | })
289 |
290 | # Plot distance distribution
291 | plot(ecdf(example.qcs.scaled$DistanceToClusterMean))
292 |
293 | ## Calling CNVs using the selected reference panels
294 |
295 | # make a master list of the name, norm.cov.bed filepath, and sex of every sample
296 |
297 | ls *.norm.cov.bed | while read FILE;
298 | do
299 | SAMPLE=`echo "$FILE" | cut -d '.' -f 1`
300 | echo -e -n "$SAMPLE\t$FILE\t"
301 | grep "^Y" $FILE | awk '{ x += $4; n++; } END { if (x/n >= 0.1) print "M"; else print "F"; }'
302 | done >sample.file.sex.txt
303 |
304 | # call CNVs (should take ~1 minute/sample)
305 | # you can run this parallel if you put the inner part of the loop
306 | # in a shell script and run it using xargs as shown in previous examples
307 |
308 | ls *.norm.cov.bed | cut -d '.' -f 1 | while read SAMPLE;
309 | do
310 | SEX=`echo "$SAMPLE" | join - sample.file.sex.txt | tr ' ' '\t' | cut -f 3`
311 | join $SAMPLE.100nns.txt sample.file.sex.txt | tr ' ' '\t' | cut -f 2- >$SAMPLE.ref.panel.txt
312 | $CLAMMS_DIR/fit_models $SAMPLE.ref.panel.txt windows.bed >$SAMPLE.models.bed
313 | $CLAMMS_DIR/call_cnv $SAMPLE.norm.cov.bed $SAMPLE.models.bed --sex $SEX >$SAMPLE.cnv.bed
314 | done
315 |
316 | To alter the sensitivity/specificity profile of CLAMMS CNV calls, modify the `call_cnv --cnv_rate` parameter. The default (3.0e-8) is tuned for specificity, but can be increased to improve sensitivity (particularly for small CNVs) at the cost of an increased FDR.
317 |
318 | ## Visualizing CNVs for a sample
319 |
320 | First, install the R packages `dplyr` and `ggplot2`.
321 | Then, for a sample `$SAMPLE`, run the following command:
322 |
323 | $CLAMMS_DIR/plot_cnv.sh $SAMPLE.cnv.txt $SAMPLE.normalized.coverage.bed $SAMPLE.models.bed
324 |
325 | This script will create a directory `clamms_cnv_plots/$SAMPLE/` with PNG images visualizing each CNV called for the sample. An example output image is shown in Section 8 of the Supplementary Materials.
326 |
--------------------------------------------------------------------------------
/annotate_windows.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ -z "$CLAMMS_DIR" ]; then
4 | echo "Environment variable CLAMMS_DIR must be set."
5 | exit 1
6 | fi
7 |
8 | if [[ $# != 5 ]]; then
9 | echo "Usage: ./annotate_windows targets.bed genome.fa mappability.bed insert_size special_regions.bed >windows.bed"
10 | exit 1
11 | fi
12 |
13 | awk -f $CLAMMS_DIR/split_targets_into_windows.awk $1 \
14 | | awk -v INSERT_SIZE=$4 '{
15 | window_len = $3 - $2;
16 | if (INSERT_SIZE > window_len) window_len = INSERT_SIZE;
17 | left_bases = int(window_len / 2);
18 | right_bases = left_bases;
19 | if (window_len % 2 == 1) right_bases++;
20 | mid = int(($3+$2)/2);
21 | printf "%s\t%d\t%d\t%s\n",
22 | $1, mid - left_bases, mid + right_bases, $4 }' \
23 | | bedtools nuc -fi $2 -bed - \
24 | | tail -n +2 | cut -f 1-4,6 \
25 | | awk '{
26 | split($4, arr1, ":");
27 | split(arr1[2], arr2, "-");
28 | printf "%s\t%d\t%d\t%s\t%.0f\t%.3f\n",
29 | arr1[1], arr2[1], arr2[2], $4, $5*200, $5 }' \
30 | | $CLAMMS_DIR/calc_window_mappability.py $3 \
31 | | bedtools map -a - -b $5 -c 4 -o min | awk '
32 | BEGIN { OFS="\t"; } $8 == "." { $8 = 3; } { print; }
33 | '
34 |
35 |
--------------------------------------------------------------------------------
/calc_coverage_from_bam.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [[ $# != 3 ]]; then
4 | echo "Usage: ./calc_coverage windows.bed sample.bam min_map_qscore"
5 | exit 1
6 | fi
7 |
8 | samtools bedcov -Q $3 $1 $2 \
9 | | awk '{ printf "%s\t%d\t%d\t%.6g\n", $1, $2, $3, $NF/($3-$2); }'
10 |
11 |
--------------------------------------------------------------------------------
/calc_window_mappability.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import sys
4 | from signal import signal, SIGPIPE, SIG_DFL
5 | signal(SIGPIPE, SIG_DFL)
6 |
7 | if len(sys.argv) != 2:
8 | sys.stderr.write("Usage: cat windows.bed | ./calc_window_mappability.py mappability.bed\n")
9 | sys.exit(1)
10 |
11 | window_file = sys.stdin
12 | values_file = open(sys.argv[1])
13 | last_val = None
14 |
15 | def read_window():
16 | line = window_file.readline()
17 | if len(line) == 0:
18 | return None
19 | i1 = line.find('\t')
20 | i2 = line.find('\t', i1+1)
21 | i3 = line.find('\t', i2+1);
22 | if (i3 == -1):
23 | i3 = len(line) - 1;
24 | return (line[:i1], int(line[i1+1:i2]), int(line[i2+1:i3]), line[i3:-1])
25 |
26 | def read_value():
27 | line = values_file.readline()
28 | if len(line) == 0:
29 | return None
30 | i1 = line.find('\t')
31 | i2 = line.find('\t', i1+1)
32 | i3 = line.find('\t', i2+1)
33 | return (line[:i1], int(line[i1+1:i2]), int(line[i2+1:i3]), float(line[i3+1:-1]))
34 |
35 | while True:
36 | window = read_window()
37 | if window is None:
38 | break
39 |
40 | tot_val = 0.0
41 | tot_bp = 0
42 | first = True
43 |
44 | while True:
45 | if first and last_val is not None and last_val[0] == window[0]:
46 | value = last_val
47 | else:
48 | value = read_value()
49 | last_val = value
50 | first = False
51 |
52 | if value is None:
53 | break
54 | while value[0] != window[0]:
55 | value = read_value()
56 | last_val = value
57 | if value[2] <= window[1]:
58 | continue
59 | if value[1] >= window[2]:
60 | break
61 |
62 | n_bp = min(value[2], window[2]) - max(value[1], window[1])
63 | tot_val += n_bp * value[3]
64 | tot_bp += n_bp
65 |
66 | if value[2] >= window[2]:
67 | break
68 |
69 | print "%s\t%d\t%d%s\t%.3f" % (window[0], window[1], window[2], window[3],
70 | tot_val/tot_bp if tot_bp > 0 else 0.0)
71 |
72 | window_file.close()
73 | values_file.close()
74 |
75 |
--------------------------------------------------------------------------------
/call_cnv.c:
--------------------------------------------------------------------------------
1 | #include "stdio.h"
2 | #include "stdlib.h"
3 | #include "string.h"
4 | #include "math.h"
5 |
6 | #include "hmm.h"
7 | #include "utils.h"
8 |
9 | // I don't want to have to implement resizeable arrays in C,
10 | // so I just make a big array to store CNVs
11 | // and assume no sample will legitimately have more than this amount.
12 | // (CLAMMS shouldn't be used with cancer data, it's not designed for that).
13 | #define CNV_BUF_SIZE 4096
14 |
15 | typedef struct {
16 | double cnv_rate;
17 | double mean_cnv_length;
18 | double min_gc;
19 | double max_gc;
20 | char sex;
21 | } Options;
22 |
23 | typedef struct {
24 | unsigned char chr;
25 | unsigned char type;
26 | unsigned char ml_copy_number;
27 | unsigned char max_considered_cn;
28 | int q_any;
29 | double model_fit;
30 | int n_windows;
31 | int start_window;
32 | int end_window;
33 | int start_coord;
34 | int end_coord;
35 | unsigned char can_extend_left;
36 | unsigned char can_extend_right;
37 | unsigned char can_contract_left;
38 | unsigned char can_contract_right;
39 | unsigned char q_extend_left;
40 | unsigned char q_extend_right;
41 | unsigned char q_contract_left;
42 | unsigned char q_contract_right;
43 | int extend_left_delta;
44 | int extend_right_delta;
45 | int contract_left_delta;
46 | int contract_right_delta;
47 | } CNVCall;
48 |
49 | Options parse_args(int argc, char *argv[], int arg_start) {
50 | Options options;
51 | options.cnv_rate = 3.0e-8;
52 | options.mean_cnv_length = 3.5e+4;
53 | options.min_gc = 0.3;
54 | options.max_gc = 0.7;
55 | options.sex = '\0';
56 |
57 | int i;
58 | for (i = arg_start; i < argc; i += 2) {
59 | if (strcmp(argv[i], "--cnv_rate") == 0) {
60 | if (i+1 >= argc) missing_value_error(argv[i]);
61 | options.cnv_rate = strtod(argv[i+1], NULL);
62 | if (options.cnv_rate <= 0.0 || options.cnv_rate >= 0.1)
63 | invalid_value_error(argv[i]);
64 | } else if (strcmp(argv[i], "--mean_cnv_length") == 0) {
65 | if (i+1 >= argc) missing_value_error(argv[i]);
66 | options.mean_cnv_length = strtod(argv[i+1], NULL);
67 | if(options.mean_cnv_length <= 0.0)
68 | invalid_value_error(argv[i]);
69 | } else if (strcmp(argv[i], "--min_gc") == 0) {
70 | if (i+1 >= argc) missing_value_error(argv[i]);
71 | options.min_gc = strtod(argv[i+1], NULL);
72 | if (options.min_gc < 0.0 || options.min_gc > 1.0)
73 | invalid_value_error(argv[i]);
74 | } else if (strcmp(argv[i], "--max_gc") == 0) {
75 | if (i+1 >= argc) missing_value_error(argv[i]);
76 | options.max_gc = strtod(argv[i+1], NULL);
77 | if (options.max_gc < 0.0 || options.max_gc > 1.0)
78 | invalid_value_error(argv[i]);
79 | } else if (strcmp(argv[i], "--sex") == 0) {
80 | if (i+1 >= argc) missing_value_error(argv[i]);
81 | sscanf(argv[i+1], "%c", &options.sex);
82 | if (!(options.sex == 'M' || options.sex == 'F'))
83 | invalid_value_error(argv[i]);
84 | } else {
85 | fprintf(stderr, "Unrecognized argument: %s\n", argv[i]);
86 | fprintf(stderr, "Try '%s --help' for more information.\n", argv[0]);
87 | exit(1);
88 | }
89 | }
90 |
91 | return options;
92 | }
93 |
94 | void note_cnv(unsigned char chr,
95 | unsigned char type,
96 | int n_windows,
97 | int start_window,
98 | int end_window,
99 | int start_coord,
100 | int end_coord,
101 | int *n_cnv,
102 | CNVCall *cnv) {
103 | if (*n_cnv >= CNV_BUF_SIZE) {
104 | fprintf(stderr, "ERROR: more than %d CNVs detected!\n", CNV_BUF_SIZE);
105 | fprintf(stderr, "The reference panel must not be a good fit for this sample.\n");
106 | fprintf(stderr, "Note: CLAMMS is not meant to be used with cancer samples.\n");
107 | exit(1);
108 | }
109 |
110 | cnv->chr = chr;
111 | cnv->type = type;
112 | cnv->n_windows = n_windows;
113 | cnv->start_window = start_window;
114 | cnv->end_window = end_window;
115 | cnv->start_coord = start_coord;
116 | cnv->end_coord = end_coord;
117 | (*n_cnv)++;
118 | }
119 |
120 | int call_cnv(int n_windows,
121 | unsigned char *window_chr,
122 | int *window_start,
123 | int *window_end,
124 | char *max_cn,
125 | unsigned char *ml_state_seq,
126 | CNVCall *cnv) {
127 | int i;
128 | int n_cnv = 0;
129 | unsigned char last_chr = 0;
130 | unsigned char last_state;
131 | int last_window;
132 | int last_end_coord;
133 | int state_start_coord;
134 | int state_start_window;
135 | int state_n_windows;
136 | for (i = 0; i < n_windows; i++) {
137 | if (max_cn[i] < 0) continue;
138 | if (window_chr[i] != last_chr) {
139 | if (last_chr != 0 && last_state != NORM) {
140 | note_cnv(last_chr, last_state,
141 | state_n_windows, state_start_window, last_window,
142 | state_start_coord, last_end_coord,
143 | &n_cnv, cnv+n_cnv); }
144 | last_chr = window_chr[i];
145 | last_state = ml_state_seq[i];
146 | state_start_coord = window_start[i];
147 | state_start_window = i;
148 | state_n_windows = 1;
149 | } else if (ml_state_seq[i] != last_state) {
150 | if (last_state != NORM) {
151 | note_cnv(last_chr, last_state,
152 | state_n_windows, state_start_window, last_window,
153 | state_start_coord, last_end_coord,
154 | &n_cnv, cnv+n_cnv); }
155 | last_state = ml_state_seq[i];
156 | state_start_coord = window_start[i];
157 | state_start_window = i;
158 | state_n_windows = 1;
159 | } else {
160 | state_n_windows++;
161 | }
162 | last_window = i;
163 | last_end_coord = window_end[i];
164 | }
165 |
166 | if (last_state != NORM) {
167 | note_cnv(last_chr, last_state,
168 | state_n_windows, state_start_window, last_window,
169 | state_start_coord, last_end_coord,
170 | &n_cnv, cnv+n_cnv);
171 | }
172 |
173 | return n_cnv;
174 | }
175 |
176 | void estimate_het_or_hom(int n_cnv,
177 | CNVCall *cnv,
178 | char sex,
179 | unsigned char *window_chr,
180 | char *max_cn,
181 | double **cn_emission_logp) {
182 | int i, j, k;
183 | for (i = 0; i < n_cnv; i++) {
184 | int norm_cn = expected_copy_number(sex, cnv[i].chr);
185 | if (norm_cn == HAPLOID) {
186 | cnv[i].max_considered_cn = 2;
187 | if (cnv[i].type == DEL)
188 | cnv[i].ml_copy_number = 0;
189 | else
190 | cnv[i].ml_copy_number = 2;
191 | } else {
192 | cnv[i].max_considered_cn = 3;
193 | for (j = cnv[i].start_window; j <= cnv[i].end_window; j++) {
194 | if (max_cn[j] > 3) { cnv[i].max_considered_cn = MAX_CN; break; }
195 | }
196 | if (cnv[i].type == DEL) {
197 | double logp_0 = 0.0;
198 | double logp_1 = 0.0;
199 | for (j = cnv[i].start_window; j <= cnv[i].end_window; j++) {
200 | if (max_cn[j] < 0) continue;
201 | logp_0 += cn_emission_logp[j][0];
202 | logp_1 += cn_emission_logp[j][1];
203 | }
204 | if (logp_0 > logp_1)
205 | cnv[i].ml_copy_number = 0;
206 | else
207 | cnv[i].ml_copy_number = 1;
208 | } else {
209 | if (cnv[i].max_considered_cn == 3) {
210 | cnv[i].ml_copy_number = 3;
211 | continue;
212 | }
213 |
214 | double cn_logp[MAX_CN+1];
215 | for (k = 3; k <= MAX_CN; k++) cn_logp[k] = 0.0;
216 | for (j = cnv[i].start_window; j <= cnv[i].end_window; j++) {
217 | if (max_cn[j] < 0) continue;
218 | for (k = 3; k <= MAX_CN; k++)
219 | cn_logp[k] += cn_emission_logp[j][k];
220 | }
221 |
222 | cnv[i].ml_copy_number = 3;
223 | double ml_cn_logp = cn_logp[3];
224 | for (k = 4; k <= MAX_CN; k++) {
225 | if (cn_logp[k] > ml_cn_logp) {
226 | cnv[i].ml_copy_number = k;
227 | ml_cn_logp = cn_logp[k];
228 | }
229 | }
230 | }
231 | }
232 | }
233 | }
234 |
235 | void calc_quality_metrics(int n_cnv,
236 | CNVCall *cnv,
237 | int n_windows,
238 | unsigned char *window_chr,
239 | int *window_start,
240 | int *window_end,
241 | char *max_cn,
242 | double *cov,
243 | unsigned char *hom_del_flag,
244 | double *lambda,
245 | double *sigma_dip,
246 | double **hmm_state_emission_logp,
247 | double cnv_rate,
248 | double mean_cnv_length,
249 | double **forward_scaled_prob,
250 | double **backward_scaled_prob) {
251 | int i, j, k;
252 | double log_T_norm_norm = log(1.0 - 2.0*cnv_rate);
253 | double log_10 = log(10.0);
254 | double log_rad_4pi = log(sqrt(4.0 * M_PI));
255 | double log_rad_1_half = log(sqrt(0.5));
256 | double log_rad_3_halves = log(sqrt(1.5));
257 | double log_rad_4_halves = log(sqrt(2.0));
258 | double log_rad_5_halves = log(sqrt(2.5));
259 | double log_rad_6_halves = log(sqrt(3.0));
260 |
261 | for (i = 0; i < n_cnv; i++) {
262 | int start_window = cnv[i].start_window;
263 | int end_window = cnv[i].end_window;
264 | unsigned char type = cnv[i].type;
265 | unsigned char mlcn = cnv[i].ml_copy_number;
266 |
267 | double log_p_norm = log(forward_scaled_prob[start_window][NORM]);
268 | for (j = start_window+1; j <= end_window; j++) {
269 | if (max_cn[j] < 0) continue;
270 | log_p_norm += hmm_state_emission_logp[j][NORM];
271 | log_p_norm += log_T_norm_norm;
272 | }
273 |
274 | // problem: suppose backwards scaled probs are 1/3 for DEL, DIP, DUP.
275 | // this means that future sequence gives no information about present state.
276 | // if the distribution were not uniform, it would act as a prior distribution.
277 | // to integrate this prior with the probability of the CNV region being all-diploid
278 | // would require also computing the marginal probabilities of all other possible
279 | // state sequences in the CNV region, which are exponentially many.
280 | // I can't figure out a way to do that, so I use this crappy heuristic instead.
281 | if (backward_scaled_prob[end_window][NORM] < 1.0 / N_STATES) {
282 | log_p_norm += log(backward_scaled_prob[end_window][NORM] * N_STATES);
283 | }
284 |
285 | cnv[i].q_any = (int) fmin(999.0, ((-10.0/log_10) * log_p_norm));
286 |
287 | // compute model goodness-of-fit metric
288 | // values < 1 fit model less well than would be expected
289 | // if you took random samples from the model.
290 | // values > 1 fit better.
291 |
292 | double log_fit_metric = 0.0;
293 |
294 | for (j = start_window; j <= end_window; j++) {
295 | if (max_cn[j] < 0) continue;
296 | if (mlcn == 0) {
297 | log_fit_metric += homozygous_del_log_likelihood(cov[j], hom_del_flag[j], lambda[j]);
298 | if (hom_del_flag[j])
299 | log_fit_metric -= 3.912023; // log(50)
300 | else
301 | log_fit_metric -= log(0.5 * lambda[j]);
302 | } else {
303 | log_fit_metric += gaussian_log_likelihood(cov[j], mlcn, sigma_dip[j]);
304 | log_fit_metric += log(sigma_dip[j]);
305 | log_fit_metric += log_rad_4pi;
306 | }
307 | }
308 |
309 | cnv[i].model_fit = log_fit_metric / cnv[i].n_windows;
310 | if (mlcn == 1) log_fit_metric += log_rad_1_half;
311 | else if (mlcn == 3) log_fit_metric += log_rad_3_halves;
312 | else if (mlcn == 4) log_fit_metric += log_rad_4_halves;
313 | else if (mlcn == 5) log_fit_metric += log_rad_5_halves;
314 | else if (mlcn == 6) log_fit_metric += log_rad_6_halves;
315 |
316 | // compute conditional call extension metrics
317 | // comparing L(call extended by 1 window) / L(call with specified breakpoints)
318 |
319 | int prev_window = get_prev_window(start_window, n_windows, window_chr, max_cn);
320 | if (prev_window == -1) {
321 | cnv[i].can_extend_left = 0;
322 | } else {
323 | cnv[i].can_extend_left = 1;
324 | double logp_ratio = 0.0;
325 | logp_ratio += log(forward_scaled_prob[prev_window][type]);
326 | logp_ratio -= log(forward_scaled_prob[prev_window][NORM]);
327 | double attenuation = exp(-((double)(window_start[start_window]-window_start[prev_window])) / mean_cnv_length);
328 | logp_ratio += log(transition_prob(type, type, cnv_rate, attenuation));
329 | logp_ratio -= log(transition_prob(NORM, type, cnv_rate, attenuation));
330 | cnv[i].q_extend_left = (unsigned char) fmin(99.0, fmax(0.0, (-10.0/log_10) * logp_ratio));
331 | cnv[i].extend_left_delta = window_start[prev_window] - window_start[start_window];
332 | }
333 |
334 | int next_window = get_next_window(end_window, n_windows, window_chr, max_cn);
335 | if (next_window == -1) {
336 | cnv[i].can_extend_right = 0;
337 | } else {
338 | cnv[i].can_extend_right = 1;
339 | double logp_ratio = 0.0;
340 | logp_ratio += log(backward_scaled_prob[next_window][type]);
341 | logp_ratio -= log(backward_scaled_prob[next_window][NORM]);
342 | logp_ratio += hmm_state_emission_logp[next_window][type];
343 | logp_ratio -= hmm_state_emission_logp[next_window][NORM];
344 | double attenuation = exp(-((double)(window_start[next_window]-window_start[end_window])) / mean_cnv_length);
345 | logp_ratio += log(transition_prob(type, type, cnv_rate, attenuation));
346 | logp_ratio -= log(transition_prob(type, NORM, cnv_rate, attenuation));
347 | cnv[i].q_extend_right = (unsigned char) fmin(99.0, fmax(0.0, (-10.0/log_10) * logp_ratio));
348 | cnv[i].extend_right_delta = window_end[next_window] - window_end[end_window];
349 | }
350 |
351 | // compute conditional call contraction metrics
352 | // comparing L(call contracted by 1 window) / L(call with specified breakpoints)
353 | cnv[i].can_contract_left = 0;
354 | cnv[i].can_contract_right = 0;
355 | if (cnv[i].n_windows > 1) {
356 | next_window = get_next_window(start_window, n_windows, window_chr, max_cn);
357 | if (next_window != -1) {
358 | cnv[i].can_contract_left = 1;
359 | double logp_ratio = 0.0;
360 | logp_ratio += log(forward_scaled_prob[start_window][NORM]);
361 | logp_ratio -= log(forward_scaled_prob[start_window][type]);
362 | double attenuation = exp(-((double)(window_start[next_window]-window_start[start_window])) / mean_cnv_length);
363 | logp_ratio += log(transition_prob(NORM, type, cnv_rate, attenuation));
364 | logp_ratio -= log(transition_prob(type, type, cnv_rate, attenuation));
365 | cnv[i].q_contract_left = (unsigned char) fmin(99.0, fmax(0.0, (-10.0/log_10) * logp_ratio));
366 | cnv[i].contract_left_delta = window_start[next_window] - window_start[start_window];
367 | }
368 |
369 | prev_window = get_prev_window(end_window, n_windows, window_chr, max_cn);
370 | if (prev_window != -1) {
371 | cnv[i].can_contract_right = 1;
372 | double logp_ratio = 0.0;
373 | logp_ratio += log(backward_scaled_prob[end_window][NORM]);
374 | logp_ratio -= log(backward_scaled_prob[end_window][type]);
375 | logp_ratio += hmm_state_emission_logp[end_window][NORM];
376 | logp_ratio -= hmm_state_emission_logp[end_window][type];
377 | double attenuation = exp(-((double)(window_end[end_window]-window_end[prev_window])) / mean_cnv_length);
378 | logp_ratio += log(transition_prob(type, NORM, cnv_rate, attenuation));
379 | logp_ratio -= log(transition_prob(type, type, cnv_rate, attenuation));
380 | cnv[i].q_contract_right = (unsigned char) fmin(99.0, fmax(0.0, (-10.0/log_10) * logp_ratio));
381 | cnv[i].contract_right_delta = window_end[prev_window] - window_end[end_window];
382 | }
383 | }
384 | }
385 | }
386 |
387 | void write_cnv(CNVCall *cnv,
388 | int n_cnv,
389 | char *sample_name) {
390 | int i;
391 | for (i = 0; i < n_cnv; i++) {
392 | if (cnv[i].chr == CHR_X) printf("X");
393 | else if (cnv[i].chr == CHR_Y) printf("Y");
394 | else if (cnv[i].chr == CHR_M) printf("MT");
395 | else printf("%hhu", cnv[i].chr);
396 |
397 | printf("\t%d\t%d\t",
398 | cnv[i].start_coord, cnv[i].end_coord);
399 |
400 | if (cnv[i].chr == CHR_X) printf("X");
401 | else if (cnv[i].chr == CHR_Y) printf("Y");
402 | else if (cnv[i].chr == CHR_M) printf("MT");
403 | else printf("%hhu", cnv[i].chr);
404 |
405 | printf(":%d-%d\t%s\t%s\t%hhu\t%d\t%d\t%.3g",
406 | cnv[i].start_coord, cnv[i].end_coord,
407 | sample_name,
408 | (cnv[i].type == DEL ? "DEL" : "DUP"),
409 | cnv[i].ml_copy_number,
410 | cnv[i].n_windows,
411 | cnv[i].q_any,
412 | cnv[i].model_fit);
413 |
414 | if (cnv[i].can_extend_left)
415 | printf("\t%hhu\t%d", cnv[i].q_extend_left, cnv[i].extend_left_delta);
416 | else
417 | printf("\tNA\tNA");
418 |
419 | if (cnv[i].can_extend_right)
420 | printf("\t%hhu\t%d", cnv[i].q_extend_right, cnv[i].extend_right_delta);
421 | else
422 | printf("\tNA\tNA");
423 |
424 | if (cnv[i].can_contract_left)
425 | printf("\t%hhu\t%d", cnv[i].q_contract_left, cnv[i].contract_left_delta);
426 | else
427 | printf("\tNA\tNA");
428 |
429 | if (cnv[i].can_contract_right)
430 | printf("\t%hhu\t%d", cnv[i].q_contract_right, cnv[i].contract_right_delta);
431 | else
432 | printf("\tNA\tNA");
433 |
434 | printf("\n");
435 | }
436 | }
437 |
438 | int main(int argc, char *argv[]) {
439 | if (argc < 3) {
440 | fprintf(stderr, "Usage: %s sample.norm.cov.bed models.out [OPTIONS] >sample.cnvs.bed\n\n", argv[0]);
441 | fputs("Calls CNVs for a sample.\n", stderr);
442 | fputs("sample.norm.cov.bed should have been generated by the CLAMMS 'normalize_coverage' program.\n", stderr);
443 | fputs("models.out should have been generated by the CLAMMS 'fit_models' program.\n\n", stderr);
444 | fputs("If you want to make calls on sex chromosomes, you must specify\n", stderr);
445 | fputs("a sex for this sample with the --sex option.\n", stderr);
446 | fputs("Additionally, when 'fit_models' was run to generate models.out,\n", stderr);
447 | fputs("you must have specified sexes for each sample.\n\n", stderr);
448 | fputs(" --cnv_rate P(DIP->DIP) == P(DIP->DUP)\n", stderr);
449 | fputs(" Default = 3.0e-8.\n", stderr);
450 | fputs(" --mean_cnv_length Mean of prior distribution for CNV lengths (in b.p.)\n", stderr);
451 | fputs(" Default = 3.5e+4.\n", stderr);
452 | fputs(" --min_gc If you used a non-default --min_gc for 'normalize_coverage' and 'fit_models'\n", stderr);
453 | fputs(" then you must use it again here.\n", stderr);
454 | fputs(" --max_gc If you used a non-default --max_gc for 'normalize_coverage' and 'fit_models'\n", stderr);
455 | fputs(" then you must use it again here.\n", stderr);
456 | fputs(" --sex 'M' or 'F'\n", stderr);
457 | fputs(" Default = unspecified (no calls on sex chr).\n\n", stderr);
458 | return 1;
459 | }
460 |
461 | FILE *coverage = open_file(argv[1]);
462 | FILE *models = open_file(argv[2]);
463 |
464 | char sample_name[1024];
465 | read_sample_name(sample_name, argv[1]);
466 |
467 | Options options = parse_args(argc, argv, 3);
468 |
469 | int i;
470 | int n_windows = count_lines_in_file(coverage);
471 |
472 | // read all the coverage values and mixture model parameters into memory
473 |
474 | unsigned char *window_chr = (unsigned char *) malloc(n_windows * sizeof(unsigned char));
475 | int *window_start = (int *) malloc(n_windows * sizeof(int));
476 | int *window_end = (int *) malloc(n_windows * sizeof(int));
477 | char *max_cn = (char *) malloc(n_windows * sizeof(char));
478 | unsigned char *hom_del_flag = (unsigned char *) malloc(n_windows * sizeof(unsigned char));
479 |
480 | double *window_gc = (double *) malloc(n_windows * sizeof(double));
481 | double *cov = (double *) malloc(n_windows * sizeof(double));
482 | double *lambda = (double *) malloc(n_windows * sizeof(double));
483 | double *mu_dip = (double *) malloc(n_windows * sizeof(double));
484 | double *sigma_dip = (double *) malloc(n_windows * sizeof(double));
485 | double *model_conf = (double *) malloc(n_windows * sizeof(double));
486 |
487 | read_model_data(models, n_windows,
488 | window_chr, window_start, window_end,
489 | max_cn, hom_del_flag, window_gc,
490 | lambda, mu_dip, sigma_dip, model_conf);
491 | read_coverage_data(coverage, n_windows,
492 | window_chr, window_start, window_end,
493 | cov, mu_dip);
494 | calc_base_model_conf(n_windows, options.min_gc, options.max_gc,
495 | window_chr, window_start, window_end,
496 | max_cn, window_gc, model_conf);
497 | calc_sample_specific_model_conf(n_windows, options.sex, window_chr,
498 | max_cn, cov, hom_del_flag,
499 | lambda, sigma_dip, model_conf);
500 |
501 | free(window_gc);
502 | free(mu_dip);
503 |
504 | // if the sex of the sample isn't specified, don't make calls for chrX/Y
505 | if (!options.sex) {
506 | for (i = 0; i < n_windows; i++) {
507 | if (window_chr[i] == CHR_X || window_chr[i] == CHR_Y)
508 | max_cn[i] = -1;
509 | }
510 | }
511 |
512 | // calculate the emission log-probabilities for each copy number state
513 | // and for each HMM state (DEL, DIP, DUP) using Bayes theorem with a uniform prior.
514 | //
515 | // likelihood uniform prior
516 | // / \
517 | // P(coverage | CN=2) * 1
518 | // example: P(DIP | coverage) = -------------------------------------------------
519 | // sum {k in 0..MAX_CN} P(coverage | CN=k) * 1
520 | // \
521 | // normalizing factor ("evidence")
522 | //
523 |
524 | double **cn_emission_logp = (double **) malloc(n_windows * sizeof(double *));
525 | double **hmm_state_emission_logp = (double **) malloc(n_windows * sizeof(double *));
526 | for (i = 0; i < n_windows; i++) {
527 | cn_emission_logp[i] = (double *) malloc((MAX_CN+1) * sizeof(double));
528 | hmm_state_emission_logp[i] = (double *) malloc( N_STATES * sizeof(double));
529 | }
530 |
531 | calc_cn_emission_logp(n_windows, options.sex,
532 | window_chr, max_cn, cov,
533 | hom_del_flag, lambda, sigma_dip,
534 | cn_emission_logp);
535 | calc_hmm_state_emission_logp(n_windows, options.sex,
536 | window_chr, max_cn,
537 | cn_emission_logp, hmm_state_emission_logp);
538 |
539 | // We run the Viterbi algorithm in both directions
540 | // and only call variants when both Viterbi runs predict non-diploid state.
541 | // This avoids the directionality bias that comes from the transition model
542 | // where it costs a lot to open an CNV but not much to extend it.
543 | // It is a bit conservative however, so we increased the default value
544 | // for p (the transition probability of NORM to DEL or DUP) to compensate.
545 | unsigned char *ml_seq = viterbi(
546 | n_windows, FORWARD,
547 | window_chr, window_start, window_end,
548 | max_cn, model_conf,
549 | hmm_state_emission_logp,
550 | options.cnv_rate, options.mean_cnv_length
551 | );
552 | unsigned char *backward_seq = viterbi(
553 | n_windows, BACKWARD,
554 | window_chr, window_start, window_end,
555 | max_cn, model_conf,
556 | hmm_state_emission_logp,
557 | options.cnv_rate, options.mean_cnv_length
558 | );
559 | mask_sequence(n_windows, max_cn, ml_seq, backward_seq);
560 |
561 | // identify CNVs in the consensus state sequence
562 | CNVCall *cnv = (CNVCall *) malloc(CNV_BUF_SIZE * sizeof(CNVCall));
563 | int n_cnv = call_cnv(n_windows,
564 | window_chr, window_start, window_end,
565 | max_cn, ml_seq, cnv);
566 | free(ml_seq);
567 | free(backward_seq);
568 |
569 | // Run the forward-backward algorithm
570 | // We will use the posterior probabilities it outputs to compute quality metrics
571 | // such as P(any CNV in called regions) for putative CNV calls
572 | double **forward_scaled_prob = (double **) malloc(n_windows * sizeof(double *));
573 | double **backward_scaled_prob = (double **) malloc(n_windows * sizeof(double *));
574 | for (i = 0; i < n_windows; i++) {
575 | forward_scaled_prob[i] = (double *) malloc(N_STATES * sizeof(double));
576 | backward_scaled_prob[i] = (double *) malloc(N_STATES * sizeof(double));
577 | }
578 |
579 | forward_backward(
580 | n_windows,
581 | window_chr, window_start, window_end,
582 | max_cn, model_conf,
583 | hmm_state_emission_logp,
584 | options.cnv_rate, options.mean_cnv_length,
585 | forward_scaled_prob, backward_scaled_prob);
586 | estimate_het_or_hom(
587 | n_cnv, cnv, options.sex,
588 | window_chr, max_cn, cn_emission_logp);
589 | calc_quality_metrics(
590 | n_cnv, cnv, n_windows,
591 | window_chr, window_start, window_end,
592 | max_cn, cov,
593 | hom_del_flag, lambda, sigma_dip,
594 | hmm_state_emission_logp,
595 | options.cnv_rate, options.mean_cnv_length,
596 | forward_scaled_prob, backward_scaled_prob);
597 |
598 | write_cnv(cnv, n_cnv, sample_name);
599 |
600 | free(window_chr);
601 | free(window_start);
602 | free(window_end);
603 | free(model_conf);
604 | free(cov);
605 | free(max_cn);
606 | free(hom_del_flag);
607 | free(lambda);
608 | free(sigma_dip);
609 |
610 | for (i = 0; i < n_windows; i++) {
611 | free(cn_emission_logp[i]);
612 | free(hmm_state_emission_logp[i]);
613 | free(forward_scaled_prob[i]);
614 | free(backward_scaled_prob[i]);
615 | }
616 |
617 | free(cn_emission_logp);
618 | free(hmm_state_emission_logp);
619 | free(forward_scaled_prob);
620 | free(backward_scaled_prob);
621 | free(cnv);
622 |
623 | fclose(coverage);
624 | fclose(models);
625 |
626 | return 0;
627 | }
628 |
629 |
--------------------------------------------------------------------------------
/data/clamms_special_regions.grch38.bed:
--------------------------------------------------------------------------------
1 | 1 829411 867135 -1
2 | 1 829411 867135 6
3 | 1 852321 914493 6
4 | 1 1052181 1056763 6
5 | 1 1222575 1226199 6
6 | 1 1632457 1647500 -1
7 | 1 1632457 1647500 6
8 | 1 1653095 1661379 6
9 | 1 1670804 1686011 6
10 | 1 1699616 1742448 6
11 | 1 1699617 1716261 -1
12 | 1 1699617 1716261 6
13 | 1 8143986 8151005 6
14 | 1 12794003 12827479 6
15 | 1 12848983 12860368 6
16 | 1 13075112 13104252 -1
17 | 1 13075112 13104252 6
18 | 1 13122522 13151676 -1
19 | 1 13122522 13151676 6
20 | 1 16021271 16040352 6
21 | 1 16042011 16055150 6
22 | 1 16702762 16725237 -1
23 | 1 16702762 16725237 6
24 | 1 16761934 16799163 -1
25 | 1 16761934 16799163 6
26 | 1 16879095 16895125 -1
27 | 1 16879095 16895125 6
28 | 1 16911131 16940506 -1
29 | 1 16911131 16940506 6
30 | 1 17672390 17676565 6
31 | 1 21967490 21993014 -1
32 | 1 21967490 21993014 6
33 | 1 21993156 22016200 -1
34 | 1 21993156 22016200 6
35 | 1 25258882 25268025 -1
36 | 1 25258882 25268025 6
37 | 1 25268024 25329028 -1
38 | 1 25268024 25329028 6
39 | 1 25329026 25338354 -1
40 | 1 25329026 25338354 6
41 | 1 25362423 25425328 -1
42 | 1 25362423 25425328 6
43 | 1 25381738 25407443 6
44 | 1 30452163 30465167 6
45 | 1 43631220 43641527 6
46 | 1 46838544 46900684 -1
47 | 1 46838544 46900684 6
48 | 1 47067240 47122930 -1
49 | 1 47067240 47122930 6
50 | 1 72984346 72993124 6
51 | 1 74184071 74197320 6
52 | 1 83132476 83181767 -1
53 | 1 83132476 83181767 6
54 | 1 83182173 83489744 -1
55 | 1 83182173 83489744 6
56 | 1 85514563 85540307 -1
57 | 1 85514563 85540307 6
58 | 1 96269153 96273008 6
59 | 1 100824870 100829158 6
60 | 1 102876466 103108496 -1
61 | 1 103560464 103601323 6
62 | 1 103610788 103621185 6
63 | 1 103619305 103667876 -1
64 | 1 103619305 103667876 6
65 | 1 103713475 103762027 -1
66 | 1 103713475 103762027 6
67 | 1 108221620 108310652 -1
68 | 1 108221620 108310652 6
69 | 1 108383727 108472664 -1
70 | 1 108383727 108472664 6
71 | 1 109673477 109691884 -1
72 | 1 109673477 109691884 6
73 | 1 109691885 109716262 -1
74 | 1 109691885 109716262 6
75 | 1 109701190 109712310 6
76 | 1 119598533 119608361 6
77 | 1 120286658 120323081 -1
78 | 1 120286658 120323081 6
79 | 1 121344829 121366714 -1
80 | 1 121344829 121366714 6
81 | 1 121344832 121391237 -1
82 | 1 121344832 121391237 6
83 | 1 143276000 143305387 -1
84 | 1 143276000 143305387 6
85 | 1 143573083 143673806 6
86 | 1 143714432 143721631 6
87 | 1 143733547 143737409 6
88 | 1 143744016 143920698 -1
89 | 1 143744016 143920698 6
90 | 1 143873991 143965075 -1
91 | 1 143873991 143965075 6
92 | 1 143873992 143965075 -1
93 | 1 143873992 143965075 6
94 | 1 143938829 143965075 -1
95 | 1 143938829 143965075 6
96 | 1 143938830 143965075 -1
97 | 1 143938830 143965075 6
98 | 1 144333723 144445560 -1
99 | 1 144333723 144445560 6
100 | 1 144435041 144506294 -1
101 | 1 144435041 144506294 6
102 | 1 144914152 144935242 -1
103 | 1 144914152 144935242 6
104 | 1 144914152 144989607 -1
105 | 1 144914152 144989607 6
106 | 1 145601946 145686998 -1
107 | 1 145601946 145686998 6
108 | 1 147922392 148009863 -1
109 | 1 147922392 148009863 6
110 | 1 148822110 148857288 -1
111 | 1 148822110 148857288 6
112 | 1 148866949 148890204 -1
113 | 1 148866949 148890204 6
114 | 1 148898450 148935344 -1
115 | 1 148898450 148935344 6
116 | 1 148945095 148970231 -1
117 | 1 148945095 148970231 6
118 | 1 149077138 149145233 -1
119 | 1 149077138 149145233 6
120 | 1 149654969 149828156 -1
121 | 1 149654969 149828156 6
122 | 1 151360120 151376761 6
123 | 1 151380822 151412880 6
124 | 1 152675640 152687307 6
125 | 1 153698246 153718210 6
126 | 1 155214276 155236328 6
127 | 1 161509807 161548852 -1
128 | 1 161509807 161548852 6
129 | 1 161519140 161550228 6
130 | 1 161548853 161595332 -1
131 | 1 161548853 161595332 6
132 | 1 161590716 161649113 6
133 | 1 161591132 161630209 -1
134 | 1 161591132 161630209 6
135 | 1 161630211 161677104 -1
136 | 1 161630211 161677104 6
137 | 1 166209973 166225419 6
138 | 1 195840472 195844554 6
139 | 1 196742574 196771224 -1
140 | 1 196742574 196771224 6
141 | 1 196786971 196827189 -1
142 | 1 196786971 196827189 6
143 | 1 196827190 196855915 -1
144 | 1 196827190 196855915 6
145 | 1 196911497 196951222 -1
146 | 1 196911497 196951222 6
147 | 1 206308878 206385433 -1
148 | 1 206308878 206385433 6
149 | 1 206363483 206409977 -1
150 | 1 206363483 206409977 6
151 | 1 207523599 207542170 6
152 | 1 207526178 207544733 6
153 | 1 207542171 207560727 6
154 | 1 207542684 207561241 6
155 | 1 207544734 207563290 6
156 | 1 207561242 207578307 6
157 | 1 234783387 234820600 -1
158 | 1 234783387 234820600 6
159 | 1 242249902 242365526 -1
160 | 1 242249902 242365526 6
161 | 1 245755044 245758825 6
162 | 1 248420938 248459788 -1
163 | 1 248420938 248459788 6
164 | 1 248457329 248472613 6
165 | 1 248459786 248519266 -1
166 | 1 248459786 248519266 6
167 | 1 248528796 248588406 -1
168 | 1 248528796 248588406 6
169 | 1 248632255 248670880 -1
170 | 1 248632255 248670880 6
171 | 1 248650046 248660946 6
172 | 10 14060 84371 -1
173 | 10 14060 84371 6
174 | 10 5048765 5107686 -1
175 | 10 11915240 11919409 6
176 | 10 18459014 18473275 6
177 | 10 19366293 19370490 6
178 | 10 23871601 23877254 6
179 | 10 26934988 26940090 6
180 | 10 31288052 31293725 6
181 | 10 38231172 38348576 -1
182 | 10 38231172 38348576 6
183 | 10 42205970 42224526 6
184 | 10 42531190 42551345 6
185 | 10 44714159 44775177 6
186 | 10 45705383 45806813 -1
187 | 10 45705383 45806813 6
188 | 10 45788341 45931516 6
189 | 10 46163278 46216218 6
190 | 10 46224482 46287743 6
191 | 10 46300266 46335141 6
192 | 10 46527903 46573333 -1
193 | 10 46527903 46573333 6
194 | 10 46602601 46840017 -1
195 | 10 46602601 46840017 6
196 | 10 47501074 47739149 -1
197 | 10 47501074 47739149 6
198 | 10 49884015 49928032 6
199 | 10 50017558 50022019 6
200 | 10 50047155 50147389 -1
201 | 10 50047155 50147389 6
202 | 10 54682795 54709523 6
203 | 10 79510822 79533104 6
204 | 10 79650173 79680196 -1
205 | 10 79650173 79680196 6
206 | 10 79694506 79876515 -1
207 | 10 79694506 79876515 6
208 | 10 79745553 79838424 6
209 | 10 79794280 79825377 -1
210 | 10 79794280 79825377 6
211 | 10 81368976 81374810 6
212 | 10 87216734 87392812 -1
213 | 10 87216734 87392812 6
214 | 10 87237437 87308004 -1
215 | 10 87237437 87308004 6
216 | 10 87429954 87500606 -1
217 | 10 87429954 87500606 6
218 | 10 103548238 103551707 6
219 | 10 122584611 122593210 6
220 | 10 123181070 123184895 6
221 | 10 131482001 131487406 6
222 | 10 133422646 133436429 -1
223 | 10 133422646 133436429 6
224 | 10 133433027 133455754 6
225 | 10 133488662 133541453 6
226 | 10 133567485 133580555 -1
227 | 10 133567485 133580555 6
228 | 10 133588382 133628867 6
229 | 11 3246879 3292666 6
230 | 11 3292669 3338483 6
231 | 11 4307619 4331947 6
232 | 11 5250120 5253942 6
233 | 11 5861257 5897136 6
234 | 11 5901343 5915568 6
235 | 11 18919901 18942445 -1
236 | 11 18919901 18942445 6
237 | 11 25057159 25060945 6
238 | 11 40747626 40751063 6
239 | 11 42945185 42954639 6
240 | 11 48879441 48888300 -1
241 | 11 48879441 48888300 6
242 | 11 48892128 48902846 -1
243 | 11 48892128 48902846 6
244 | 11 49465497 49791730 -1
245 | 11 49465497 49791730 6
246 | 11 50113256 50154983 -1
247 | 11 50113256 50154983 6
248 | 11 50324118 50365332 -1
249 | 11 50324118 50365332 6
250 | 11 59040392 59086240 6
251 | 11 61204480 61223316 -1
252 | 11 61204480 61223316 6
253 | 11 61223317 61242142 -1
254 | 11 61223317 61242142 6
255 | 11 63421874 63432852 -1
256 | 11 63421874 63432852 6
257 | 11 69677811 69682335 6
258 | 11 88842207 89159225 -1
259 | 11 88842207 89159225 6
260 | 11 89742408 89920623 -1
261 | 11 89742408 89920623 6
262 | 11 89923849 90097515 -1
263 | 11 89923849 90097515 6
264 | 11 95832879 95924207 -1
265 | 11 107367947 107376400 6
266 | 11 107783838 107806079 6
267 | 11 114620731 114641580 6
268 | 11 124212602 124224877 6
269 | 11 132053640 132060328 6
270 | 12 7850405 7874118 6
271 | 12 7881217 7934286 6
272 | 12 7956262 7964869 6
273 | 12 8209699 8236998 6
274 | 12 8386043 8398011 6
275 | 12 8804393 8813027 6
276 | 12 9391530 9402917 -1
277 | 12 9391530 9402917 6
278 | 12 9480090 9505503 -1
279 | 12 9480090 9505503 6
280 | 12 9510411 9579296 -1
281 | 12 9510411 9579296 6
282 | 12 9570401 9579390 -1
283 | 12 9570401 9579390 6
284 | 12 10416634 10432073 -1
285 | 12 10416634 10432073 6
286 | 12 10432074 10447498 -1
287 | 12 10432074 10447498 6
288 | 12 11296885 11326747 6
289 | 12 17803486 17812211 6
290 | 12 19313890 19334389 6
291 | 12 19339548 19352417 6
292 | 12 19369721 19418779 6
293 | 12 19420090 19427458 6
294 | 12 22418524 22425164 6
295 | 12 22425165 22431804 6
296 | 12 31108624 31180230 -1
297 | 12 31108624 31180230 6
298 | 12 31159426 31254878 6
299 | 12 31180853 31201044 -1
300 | 12 31180853 31201044 6
301 | 12 34230449 34236543 6
302 | 12 63529638 63557839 -1
303 | 12 63529638 63557839 6
304 | 12 63545491 63723213 6
305 | 12 63560585 63678460 -1
306 | 12 63560585 63678460 6
307 | 12 63577300 63725465 -1
308 | 12 63577300 63725465 6
309 | 12 63725469 63752467 -1
310 | 12 63725469 63752467 6
311 | 12 71104859 71118018 6
312 | 12 76100881 76106698 6
313 | 12 79759288 79768857 6
314 | 12 92949380 92954615 6
315 | 12 122693806 122706530 -1
316 | 12 122693806 122706530 6
317 | 12 122697978 122714266 6
318 | 12 122706545 122719956 -1
319 | 12 122706545 122719956 6
320 | 12 124011341 124015591 6
321 | 12 131282231 131309862 -1
322 | 12 131282231 131309862 6
323 | 12 131529746 131540626 6
324 | 12 131572916 131590714 6
325 | 12 131618247 131645030 6
326 | 12 131650304 131684649 -1
327 | 12 131650304 131684649 6
328 | 13 18593833 18701842 -1
329 | 13 18593833 18701842 6
330 | 13 18727447 18874746 -1
331 | 13 18727447 18874746 6
332 | 13 19062858 19068790 6
333 | 13 20963500 20966918 6
334 | 13 22522605 22534338 6
335 | 13 22826451 22846359 6
336 | 13 43024887 43035131 6
337 | 13 46407147 46460587 -1
338 | 13 46407147 46460587 6
339 | 13 50489953 50501411 -1
340 | 13 50489953 50501411 6
341 | 13 50957183 50966508 6
342 | 13 52308531 52325552 6
343 | 13 57234843 57248784 6
344 | 13 63716791 63727377 -1
345 | 13 63716791 63727377 6
346 | 13 63737551 63758888 -1
347 | 13 63737551 63758888 6
348 | 13 63758931 63769516 -1
349 | 13 63758931 63769516 6
350 | 13 63780493 63834250 -1
351 | 13 63780493 63834250 6
352 | 13 63820969 63844255 -1
353 | 13 63820969 63844255 6
354 | 13 68053902 68082745 6
355 | 13 75533095 75550249 6
356 | 14 18683673 18693478 6
357 | 14 19723840 19955841 -1
358 | 14 19723840 19955841 6
359 | 14 20886288 20948311 6
360 | 14 23951902 23986734 -1
361 | 14 23951902 23986734 6
362 | 14 23980866 23994695 6
363 | 14 23986738 24034303 -1
364 | 14 23986738 24034303 6
365 | 14 24009992 24014938 6
366 | 14 29246090 29253587 6
367 | 14 64548219 64553836 6
368 | 14 65267378 65277018 6
369 | 14 73530824 73540154 6
370 | 14 73533394 73553592 -1
371 | 14 73533394 73553592 6
372 | 14 73559488 73574660 6
373 | 14 73563311 73585769 -1
374 | 14 73563311 73585769 6
375 | 14 82031367 82038502 6
376 | 14 94681654 94688962 6
377 | 14 103233971 103240641 6
378 | 14 103263132 103267008 6
379 | 14 104942631 104947453 6
380 | 14 105527919 106874876 -1
381 | 14 105599748 105608344 6
382 | 14 105650950 105660136 6
383 | 14 105669198 105702758 6
384 | 14 106695872 106774525 -1
385 | 14 106695872 106774525 6
386 | 15 20187560 20231590 -1
387 | 15 20187560 20231590 6
388 | 15 20342788 20374196 -1
389 | 15 20342788 20374196 6
390 | 15 20400803 20424894 -1
391 | 15 20400803 20424894 6
392 | 15 20428924 20642147 -1
393 | 15 20428924 20642147 6
394 | 15 20641209 20683371 -1
395 | 15 20641209 20683371 6
396 | 15 20673122 20689298 -1
397 | 15 20673122 20689298 6
398 | 15 20729746 20828705 -1
399 | 15 20729746 20828705 6
400 | 15 20828116 20994234 -1
401 | 15 20828116 20994234 6
402 | 15 21242091 21258354 -1
403 | 15 21242091 21258354 6
404 | 15 21250197 21303343 -1
405 | 15 21250197 21303343 6
406 | 15 21298796 21397799 -1
407 | 15 21298796 21397799 6
408 | 15 21401683 21569953 -1
409 | 15 21401683 21569953 6
410 | 15 22007176 22049389 -1
411 | 15 22007176 22049389 6
412 | 15 22048819 22055999 6
413 | 15 22055999 22084975 -1
414 | 15 22055999 22084975 6
415 | 15 22095692 22146490 -1
416 | 15 22095692 22146490 6
417 | 15 22148072 22271830 -1
418 | 15 22148072 22271830 6
419 | 15 22273830 22301701 -1
420 | 15 22273830 22301701 6
421 | 15 22358242 22571452 -1
422 | 15 22358242 22571452 6
423 | 15 22420443 22624349 -1
424 | 15 22420443 22624349 6
425 | 15 22656943 22782169 -1
426 | 15 22656943 22782169 6
427 | 15 24291651 24344133 -1
428 | 15 24291651 24344133 6
429 | 15 24339834 24407618 6
430 | 15 24429720 24477469 6
431 | 15 24479695 24530790 6
432 | 15 24496954 24558726 -1
433 | 15 24496954 24558726 6
434 | 15 28316058 28419210 -1
435 | 15 28316058 28419210 6
436 | 15 28492438 28595553 -1
437 | 15 28492438 28595553 6
438 | 15 28515370 28730223 -1
439 | 15 28515370 28730223 6
440 | 15 28730151 28856539 -1
441 | 15 28730151 28856539 6
442 | 15 30077907 30377808 -1
443 | 15 30077907 30377808 6
444 | 15 30400207 30618102 -1
445 | 15 30400207 30618102 6
446 | 15 30602279 30642956 -1
447 | 15 30602279 30642956 6
448 | 15 30678215 30781397 -1
449 | 15 30678215 30781397 6
450 | 15 31617790 31727417 -1
451 | 15 31617790 31727417 6
452 | 15 32153206 32460660 -1
453 | 15 32153206 32460660 6
454 | 15 32211716 32230104 6
455 | 15 32389246 32607507 -1
456 | 15 32389246 32607507 6
457 | 15 32591722 32632914 -1
458 | 15 32591722 32632914 6
459 | 15 34378801 34437851 -1
460 | 15 34378801 34437851 6
461 | 15 34461088 34527235 6
462 | 15 34525014 34583697 -1
463 | 15 34525014 34583697 6
464 | 15 43558937 43658153 -1
465 | 15 43558937 43658153 6
466 | 15 43658492 43749512 -1
467 | 15 43658492 43749512 6
468 | 15 44817317 44824951 6
469 | 15 44817436 44842217 -1
470 | 15 44817436 44842217 6
471 | 15 45058581 45083192 -1
472 | 15 45058581 45083192 6
473 | 15 84399265 84403744 6
474 | 15 88920298 88927453 6
475 | 16 153062 163217 6
476 | 16 1123050 1127840 6
477 | 16 1228547 1238837 -1
478 | 16 1228547 1238837 6
479 | 16 1244331 1251050 6
480 | 16 1247504 1258598 -1
481 | 16 1247504 1258598 6
482 | 16 2534141 2584844 -1
483 | 16 2534141 2584844 6
484 | 16 2599223 2611355 -1
485 | 16 2599223 2611355 6
486 | 16 2615341 2677513 -1
487 | 16 2615341 2677513 6
488 | 16 2654921 2690715 6
489 | 16 2677677 2691053 -1
490 | 16 2677677 2691053 6
491 | 16 14687179 14706143 -1
492 | 16 14687179 14706143 6
493 | 16 14726299 14745265 -1
494 | 16 14726299 14745265 6
495 | 16 14744235 14816374 -1
496 | 16 14744235 14816374 6
497 | 16 14816372 14954790 -1
498 | 16 14816372 14954790 -1
499 | 16 14816372 14954790 6
500 | 16 14816372 14954790 6
501 | 16 14965460 15030910 -1
502 | 16 14965460 15030910 6
503 | 16 14974714 15029477 -1
504 | 16 14974714 15029477 6
505 | 16 15161832 15231509 -1
506 | 16 15161832 15231509 6
507 | 16 15231233 15379928 -1
508 | 16 15231233 15379928 -1
509 | 16 15231233 15379928 6
510 | 16 15231233 15379928 6
511 | 16 15312474 15384481 -1
512 | 16 15312474 15384481 6
513 | 16 15883866 15930951 6
514 | 16 16198409 16326468 -1
515 | 16 16198409 16326468 6
516 | 16 16215114 16353400 -1
517 | 16 16215114 16353400 6
518 | 16 16368152 16571302 -1
519 | 16 16368152 16571302 6
520 | 16 16377608 16525476 -1
521 | 16 16377608 16525476 6
522 | 16 16536360 16596251 6
523 | 16 16571299 16632191 -1
524 | 16 16571299 16632191 6
525 | 16 16637370 16762938 -1
526 | 16 16637370 16762938 6
527 | 16 18073544 18143135 -1
528 | 16 18073544 18143135 6
529 | 16 18141221 18344802 -1
530 | 16 18141221 18344802 6
531 | 16 18186698 18334299 -1
532 | 16 18186698 18334299 6
533 | 16 18355281 18579544 -1
534 | 16 18355281 18579544 6
535 | 16 18381424 18596289 -1
536 | 16 18381424 18596289 6
537 | 16 18596292 18721701 -1
538 | 16 18596292 18721701 6
539 | 16 18721534 18782479 -1
540 | 16 18721534 18782479 6
541 | 16 21504966 21568600 -1
542 | 16 21504966 21568600 6
543 | 16 21729103 21797508 -1
544 | 16 21729103 21797508 6
545 | 16 22545231 22613938 -1
546 | 16 22545231 22613938 6
547 | 16 22607657 22629968 -1
548 | 16 22607657 22629968 6
549 | 16 22640784 22698393 -1
550 | 16 22640784 22698393 6
551 | 16 24194599 24202462 6
552 | 16 25327300 25333095 6
553 | 16 28337951 28471892 -1
554 | 16 28337951 28471892 6
555 | 16 28603268 28615937 -1
556 | 16 28603268 28615937 6
557 | 16 28643181 28777130 -1
558 | 16 28643181 28777130 6
559 | 16 32479162 32641519 -1
560 | 16 32479162 32641519 6
561 | 16 32799028 33024977 -1
562 | 16 32799028 33024977 6
563 | 16 32903610 33307616 -1
564 | 16 33648796 33689209 -1
565 | 16 33648796 33689209 6
566 | 16 33748271 33770413 -1
567 | 16 33748271 33770413 6
568 | 16 33784785 33829905 -1
569 | 16 33784785 33829905 6
570 | 16 33829437 34062003 -1
571 | 16 33829437 34062003 6
572 | 16 35264424 35336056 6
573 | 16 35353075 35451231 6
574 | 16 35480977 35526018 6
575 | 16 35786227 35834882 6
576 | 16 55798546 55830689 6
577 | 16 68753643 68759727 6
578 | 16 70009899 70075615 -1
579 | 16 70009899 70075615 6
580 | 16 70075623 70107146 -1
581 | 16 70075623 70107146 6
582 | 16 70110348 70149551 -1
583 | 16 70110348 70149551 6
584 | 16 70149769 70164483 -1
585 | 16 70149769 70164483 6
586 | 16 70811383 71168670 -1
587 | 16 70811383 71168670 6
588 | 16 72052298 72079362 6
589 | 16 74331605 74372145 -1
590 | 16 74331605 74372145 6
591 | 16 74525430 74556263 -1
592 | 16 74525430 74556263 6
593 | 16 75498671 75542660 6
594 | 16 77165107 77194257 6
595 | 16 81800974 81808919 6
596 | 16 86417350 86422290 6
597 | 16 88137846 88147382 6
598 | 16 88733060 88736375 6
599 | 17 10429976 10434809 6
600 | 17 15756347 15773480 -1
601 | 17 15756347 15773480 6
602 | 17 16807503 16821281 6
603 | 17 19025184 19063230 -1
604 | 17 19025184 19063230 6
605 | 17 19111334 19149353 -1
606 | 17 19111334 19149353 6
607 | 17 20571397 20588496 -1
608 | 17 20571397 20588496 6
609 | 17 22402151 22425841 6
610 | 17 28415454 28420089 6
611 | 17 36116913 36159099 -1
612 | 17 36116913 36159099 6
613 | 17 36375392 36422656 -1
614 | 17 36375392 36422656 6
615 | 17 37890226 37937509 -1
616 | 17 37890226 37937509 6
617 | 17 41375686 41388082 6
618 | 17 42003689 42007538 6
619 | 17 43356327 43361329 6
620 | 17 45516131 45553913 -1
621 | 17 45516131 45553913 6
622 | 17 45587787 45616292 6
623 | 17 45593558 45627799 -1
624 | 17 45593558 45627799 6
625 | 17 46094454 46137068 6
626 | 17 46150600 46185755 6
627 | 17 46209259 46265130 6
628 | 17 46217040 46252364 -1
629 | 17 46217040 46252364 6
630 | 17 46269539 46290935 6
631 | 17 46292046 46489410 -1
632 | 17 46292046 46489410 6
633 | 17 46509624 46707123 -1
634 | 17 46509624 46707123 6
635 | 17 47013621 47052446 -1
636 | 17 47013621 47052446 6
637 | 17 59447720 59453777 6
638 | 17 59572880 59600102 -1
639 | 17 59572880 59600102 6
640 | 17 59975405 60002602 -1
641 | 17 59975405 60002602 6
642 | 17 77221237 77234268 6
643 | 17 78476893 78481194 6
644 | 17 79367558 79398223 6
645 | 17 80629378 80640074 6
646 | 17 82317295 82322587 6
647 | 17 82359324 82363914 6
648 | 18 14415 84190 -1
649 | 18 14415 84190 6
650 | 18 38590 64703 6
651 | 18 1200353 1209321 6
652 | 18 14185211 14353420 -1
653 | 18 14185211 14353420 6
654 | 18 14259676 14275557 6
655 | 18 14358135 14728625 -1
656 | 18 14358135 14728625 6
657 | 18 14358136 14464820 -1
658 | 18 14358136 14464820 6
659 | 18 14807068 14897138 -1
660 | 18 14807068 14897138 6
661 | 18 15026971 15101189 -1
662 | 18 15026971 15101189 6
663 | 18 65532942 65539752 6
664 | 18 79351772 79359003 6
665 | 19 4218926 4224414 6
666 | 19 6375772 6392787 6
667 | 19 8271973 8286829 6
668 | 19 8286830 8301679 6
669 | 19 8848843 8981342 -1
670 | 19 9177760 9191740 6
671 | 19 11588197 12566412 -1
672 | 19 15672340 15727763 6
673 | 19 17332349 17338971 6
674 | 19 18188935 18202040 6
675 | 19 19687617 24153707 -1
676 | 19 20896952 20922082 6
677 | 19 22233407 22265069 6
678 | 19 22266324 22273737 6
679 | 19 23191555 23196028 6
680 | 19 39877358 39893615 -1
681 | 19 39877358 39893615 6
682 | 19 40830651 40855617 -1
683 | 19 40830651 40855617 6
684 | 19 40849705 40866666 6
685 | 19 40861516 40887402 -1
686 | 19 40861516 40887402 6
687 | 19 42721641 42740516 -1
688 | 19 42747717 42792964 6
689 | 19 42752686 42765679 -1
690 | 19 42793902 42867851 6
691 | 19 42836996 42855718 -1
692 | 19 42866460 42879719 -1
693 | 19 42881773 42986946 6
694 | 19 42902081 42917924 -1
695 | 19 42924131 42937178 -1
696 | 19 42990014 43042066 6
697 | 19 43007656 43026479 -1
698 | 19 43064209 43082741 -1
699 | 19 43099345 43143540 6
700 | 19 43151063 43195157 6
701 | 19 43167742 43186536 -1
702 | 19 43192701 43205774 -1
703 | 19 43253282 43269530 -1
704 | 19 43354470 43365854 -1
705 | 19 43354470 43365854 6
706 | 19 43369082 43380285 -1
707 | 19 43369082 43380285 6
708 | 19 43855848 44505160 -1
709 | 19 51630623 51646968 6
710 | 19 52819327 52854763 6
711 | 19 53013296 53041643 6
712 | 19 53675711 53762430 -1
713 | 19 54224443 54243920 6
714 | 19 54532691 54545742 -1
715 | 19 54724478 54736536 -1
716 | 19 54738508 54753052 -1
717 | 19 54755128 54769335 6
718 | 19 54769812 54784326 -1
719 | 19 54786421 54833317 6
720 | 19 54803611 54814517 -1
721 | 19 54816437 54830778 -1
722 | 19 54832675 54848569 -1
723 | 19 54850442 54867215 -1
724 | 19 56663793 56671729 6
725 | 2 10745298 10753236 6
726 | 2 13064045 13109556 6
727 | 2 16842519 16855855 6
728 | 2 37731219 37743371 -1
729 | 2 37731219 37743371 6
730 | 2 37744644 37774904 -1
731 | 2 37744644 37774904 6
732 | 2 38728119 38745431 6
733 | 2 56719073 56726682 6
734 | 2 57167884 57175955 6
735 | 2 57178578 57219980 6
736 | 2 63271389 63275796 6
737 | 2 73640914 73692293 6
738 | 2 77674170 77689855 -1
739 | 2 77674170 77689855 6
740 | 2 77746851 77775352 6
741 | 2 79102943 79116683 6
742 | 2 86708833 86743083 -1
743 | 2 86708833 86743083 6
744 | 2 86945215 87053645 -1
745 | 2 86945215 87053645 6
746 | 2 87053644 87087879 -1
747 | 2 87053644 87087879 6
748 | 2 87094932 87111045 -1
749 | 2 87094932 87111045 6
750 | 2 87124695 87162923 -1
751 | 2 87124695 87162923 6
752 | 2 87124724 87246317 -1
753 | 2 87124724 87246317 6
754 | 2 87246315 87333375 -1
755 | 2 87246315 87333375 6
756 | 2 87429713 87441083 6
757 | 2 87431439 87697988 -1
758 | 2 87431439 87697988 6
759 | 2 87700482 87739019 -1
760 | 2 87700482 87739019 6
761 | 2 87715964 87824700 -1
762 | 2 87715964 87824700 6
763 | 2 88528493 88532100 6
764 | 2 89840889 89876621 -1
765 | 2 89840889 89876621 6
766 | 2 89954379 90009635 6
767 | 2 90173013 90209914 6
768 | 2 91633396 91677574 -1
769 | 2 91633396 91677574 6
770 | 2 96000079 96009370 6
771 | 2 97194072 97348019 -1
772 | 2 97194072 97348019 6
773 | 2 97360487 97561511 -1
774 | 2 97360487 97561511 6
775 | 2 106237747 106256547 -1
776 | 2 106237747 106256547 6
777 | 2 106238735 106255003 6
778 | 2 107905668 107924082 -1
779 | 2 107905668 107924082 6
780 | 2 109736854 110095177 -1
781 | 2 109736854 110095177 6
782 | 2 110276211 110634615 -1
783 | 2 110276211 110634615 6
784 | 2 111252926 111519068 -1
785 | 2 111252926 111519068 6
786 | 2 111518940 111525762 6
787 | 2 111525223 111615296 -1
788 | 2 111525223 111615296 6
789 | 2 111719717 111838985 -1
790 | 2 111719717 111838985 6
791 | 2 123026948 123034271 6
792 | 2 130061242 130138211 -1
793 | 2 130061242 130138211 6
794 | 2 130121627 130241112 -1
795 | 2 130121627 130241112 6
796 | 2 130245672 130286100 -1
797 | 2 130245672 130286100 6
798 | 2 130401552 130442427 -1
799 | 2 130401552 130442427 6
800 | 2 130679098 130719935 -1
801 | 2 130679098 130719935 6
802 | 2 131200604 131278121 -1
803 | 2 131200604 131278121 6
804 | 2 131366289 131401549 -1
805 | 2 131366289 131401549 6
806 | 2 131433205 131551347 -1
807 | 2 131433205 131551347 6
808 | 2 131903421 131942230 -1
809 | 2 131903421 131942230 6
810 | 2 132521058 132552107 6
811 | 2 158849092 158867541 6
812 | 2 178431413 178451231 -1
813 | 2 184889025 184910156 6
814 | 2 224230980 224236642 6
815 | 2 227325150 227357836 -1
816 | 2 227375836 227393598 6
817 | 2 234036940 234044014 6
818 | 2 240672506 240685789 -1
819 | 2 240672506 240685789 6
820 | 2 240688967 240701802 -1
821 | 2 240688967 240701802 6
822 | 2 240958807 240962510 6
823 | 2 241818926 241825711 6
824 | 20 25712993 25726887 6
825 | 20 25752456 25845152 -1
826 | 20 25752456 25845152 6
827 | 20 26010758 26103777 -1
828 | 20 26010758 26103777 6
829 | 20 30205357 30249733 -1
830 | 20 30205357 30249733 6
831 | 20 31886668 31893737 6
832 | 20 34356129 34361049 6
833 | 20 52067021 52080491 6
834 | 20 53714934 53721831 6
835 | 20 59538099 59542366 6
836 | 20 60992269 61014363 6
837 | 20 63184937 63188740 6
838 | 20 63564188 63568796 6
839 | 21 10505084 10555890 -1
840 | 21 10505084 10555890 6
841 | 21 10567196 10610487 -1
842 | 21 10567196 10610487 6
843 | 21 13221088 13255161 -1
844 | 21 13221088 13255161 6
845 | 21 13268103 13342186 -1
846 | 21 13268103 13342186 6
847 | 21 13342206 13429256 -1
848 | 21 13342206 13429256 6
849 | 21 13429267 13801729 -1
850 | 21 13429267 13801729 6
851 | 21 13655780 13828252 6
852 | 21 13871411 13880677 6
853 | 21 18684528 18708547 6
854 | 21 43401994 43414672 6
855 | 21 43549675 43554231 6
856 | 21 45881374 45885177 6
857 | 22 16481166 16522730 6
858 | 22 18136327 18142525 6
859 | 22 18518836 18622467 -1
860 | 22 18518836 18622467 6
861 | 22 18552240 18659561 -1
862 | 22 18552240 18659561 6
863 | 22 18725024 18891257 -1
864 | 22 18725024 18891257 6
865 | 22 18952166 19036164 6
866 | 22 21161158 21325236 -1
867 | 22 21161158 21325236 6
868 | 22 21335656 21443089 -1
869 | 22 21335656 21443089 6
870 | 22 21458188 21562827 -1
871 | 22 21458188 21562827 6
872 | 22 22031174 22922910 -1
873 | 22 23939932 23969548 -1
874 | 22 23939932 23969548 6
875 | 22 23971774 24001082 -1
876 | 22 23971774 24001082 6
877 | 22 25226889 25247092 -1
878 | 22 25226889 25247092 6
879 | 22 25266489 25287340 6
880 | 22 25311456 25523568 6
881 | 22 25454749 25471797 -1
882 | 22 25454749 25471797 6
883 | 22 30060528 30067667 6
884 | 22 42127542 42138433 6
885 | 22 42500173 42506131 -1
886 | 22 42500173 42506131 6
887 | 22 42509970 42551058 6
888 | 22 42553734 42559454 -1
889 | 22 42553734 42559454 6
890 | 22 42559758 42577798 6
891 | 22 50630154 50639820 6
892 | 3 6276434 6294823 6
893 | 3 8784411 8815297 6
894 | 3 47199829 47215648 6
895 | 3 80335918 80339225 6
896 | 3 94792491 94797496 6
897 | 3 99484955 99497275 6
898 | 3 100611920 100678689 6
899 | 3 125718309 125742568 6
900 | 3 128661230 128685362 6
901 | 3 128684646 128693600 6
902 | 3 155759835 155782924 6
903 | 3 166356483 166377124 6
904 | 3 173521688 173573003 6
905 | 3 195694615 195719133 -1
906 | 3 195694615 195719133 6
907 | 3 195729682 195743609 -1
908 | 3 195729682 195743609 6
909 | 3 195948324 195968302 6
910 | 4 3058311 3067587 6
911 | 4 3465604 3472330 6
912 | 4 3556190 3586664 -1
913 | 4 3556190 3586664 6
914 | 4 4013518 4055931 6
915 | 4 8970248 9087489 6
916 | 4 9102555 9124946 6
917 | 4 9123911 9157054 6
918 | 4 9369078 9444473 6
919 | 4 9484023 9510911 6
920 | 4 9587457 9624069 6
921 | 4 25554559 25578712 6
922 | 4 29289815 29295692 6
923 | 4 34681638 34686633 6
924 | 4 49487345 49510104 6
925 | 4 59106935 59122386 6
926 | 4 59455253 59464986 6
927 | 4 68808808 68848830 6
928 | 4 71805625 71809547 6
929 | 4 77356353 77367236 6
930 | 4 79967139 79976335 6
931 | 4 87613313 87618814 6
932 | 4 89179946 89186643 6
933 | 4 143797769 143830361 6
934 | 4 144021006 144091625 6
935 | 4 160939539 160957703 6
936 | 4 160957903 160963730 6
937 | 4 160965993 160977484 6
938 | 4 160978747 161012041 6
939 | 4 164940650 164962049 6
940 | 5 686007 731279 -1
941 | 5 686007 731279 6
942 | 5 697530 711011 -1
943 | 5 697530 711011 6
944 | 5 714708 733813 -1
945 | 5 714708 733813 6
946 | 5 730410 767203 -1
947 | 5 730410 767203 6
948 | 5 756637 779052 -1
949 | 5 756637 779052 6
950 | 5 778076 815599 -1
951 | 5 778076 815599 6
952 | 5 788636 797682 -1
953 | 5 788636 797682 6
954 | 5 814601 825364 6
955 | 5 821838 850886 -1
956 | 5 821838 850886 6
957 | 5 1034567 1038461 6
958 | 5 13204073 13209447 6
959 | 5 17701280 17715643 6
960 | 5 29621829 29635162 6
961 | 5 32105318 32146525 6
962 | 5 38819372 38824372 6
963 | 5 54814543 54825675 6
964 | 5 65339358 65343648 6
965 | 5 69533889 69612650 -1
966 | 5 69533889 69612650 6
967 | 5 69829048 70129737 -1
968 | 5 69829048 70129737 6
969 | 5 70704174 71005160 -1
970 | 5 70704174 71005160 6
971 | 5 70996747 71017592 6
972 | 5 71015225 71093996 -1
973 | 5 71015225 71093996 6
974 | 5 82875489 82881177 6
975 | 5 99390626 99434842 -1
976 | 5 99390626 99434842 6
977 | 5 99467941 99504570 -1
978 | 5 99467941 99504570 6
979 | 5 99513921 99535492 -1
980 | 5 99513921 99535492 6
981 | 5 100070271 100107193 -1
982 | 5 100070271 100107193 6
983 | 5 100330900 100374689 -1
984 | 5 100330900 100374689 6
985 | 5 100378519 100401226 -1
986 | 5 100378519 100401226 6
987 | 5 104287086 104291526 6
988 | 5 118536895 118547017 6
989 | 5 138394928 138409044 6
990 | 5 139273751 139331677 -1
991 | 5 139441873 139448033 6
992 | 5 140786135 141012344 -1
993 | 5 140794858 141012344 -1
994 | 5 140801197 141012344 -1
995 | 5 140807073 141012344 -1
996 | 5 140821775 141012344 -1
997 | 5 140827921 141012344 -1
998 | 5 140834383 141012344 -1
999 | 5 140841321 141012344 -1
1000 | 5 140847771 141012344 -1
1001 | 5 140855882 141012344 -1
1002 | 5 140867512 141012344 -1
1003 | 5 140875301 141012344 -1
1004 | 5 140882268 141012344 -1
1005 | 5 140926716 141012344 -1
1006 | 5 140966161 141012344 -1
1007 | 5 147940209 147958320 6
1008 | 5 176115296 176186890 6
1009 | 5 176201179 176301975 -1
1010 | 5 176201179 176301975 6
1011 | 5 177620794 177710737 -1
1012 | 5 177620794 177710737 6
1013 | 5 179369865 179374706 6
1014 | 5 180748803 180772289 6
1015 | 6 256882 296034 -1
1016 | 6 256882 296034 6
1017 | 6 312087 376690 -1
1018 | 6 312087 376690 6
1019 | 6 26100960 26321777 -1
1020 | 6 26666991 26732650 -1
1021 | 6 26666991 26732650 6
1022 | 6 26702157 26737000 -1
1023 | 6 26702157 26737000 6
1024 | 6 26737023 26821186 -1
1025 | 6 26737023 26821186 6
1026 | 6 26740120 26748485 6
1027 | 6 26788996 26898014 -1
1028 | 6 26788996 26898014 6
1029 | 6 29723339 29727296 -1
1030 | 6 29726600 29749049 -1
1031 | 6 29826978 29831122 -1
1032 | 6 29887759 29891080 -1
1033 | 6 29941429 29945768 6
1034 | 6 29942469 29945884 -1
1035 | 6 30005970 30009956 -1
1036 | 6 30177104 30181436 6
1037 | 6 30259561 30266951 -1
1038 | 6 30489405 30494205 -1
1039 | 6 31026483 31029807 6
1040 | 6 31268748 31272136 -1
1041 | 6 31353871 31357212 -1
1042 | 6 31999486 32013273 -1
1043 | 6 31999486 32013273 6
1044 | 6 32032224 32046127 -1
1045 | 6 32032224 32046127 6
1046 | 6 32439841 32445049 -1
1047 | 6 32517376 32530229 -1
1048 | 6 32552712 32560002 -1
1049 | 6 32578769 32589836 -1
1050 | 6 32637405 32643652 -1
1051 | 6 32659463 32666689 -1
1052 | 6 32741385 32746887 -1
1053 | 6 32756097 32763553 -1
1054 | 6 32812762 32817048 -1
1055 | 6 32934628 32941070 -1
1056 | 6 32948613 32953122 -1
1057 | 6 33004182 33009612 -1
1058 | 6 33064568 33080778 -1
1059 | 6 33075925 33089696 -1
1060 | 6 33112515 33129113 -1
1061 | 6 35551621 35598248 6
1062 | 6 35786858 35798675 -1
1063 | 6 35786858 35798675 6
1064 | 6 37992622 38020640 6
1065 | 6 49460716 49480713 6
1066 | 6 57332005 57365431 -1
1067 | 6 57332005 57365431 6
1068 | 6 57790492 57876878 -1
1069 | 6 57790492 57876878 6
1070 | 6 57863530 57939124 -1
1071 | 6 57863530 57939124 6
1072 | 6 57967069 58257088 -1
1073 | 6 57967069 58257088 6
1074 | 6 58290805 58341497 -1
1075 | 6 58290805 58341497 6
1076 | 6 58330705 58399928 -1
1077 | 6 58330705 58399928 6
1078 | 6 60533912 60555004 -1
1079 | 6 60533912 60555004 6
1080 | 6 60590851 60616070 -1
1081 | 6 60590851 60616070 6
1082 | 6 60718603 61011831 -1
1083 | 6 60718603 61011831 6
1084 | 6 61047420 61119912 -1
1085 | 6 61047420 61119912 6
1086 | 6 61109122 61159829 -1
1087 | 6 61109122 61159829 6
1088 | 6 68516222 68532008 6
1089 | 6 73997596 74008611 6
1090 | 6 89052303 89056482 6
1091 | 6 124112523 124149940 6
1092 | 6 131698187 131714119 -1
1093 | 6 131698187 131714119 6
1094 | 6 137060646 137097493 6
1095 | 6 148657862 148673587 6
1096 | 6 160822358 160847756 6
1097 | 6 160855211 160861427 6
1098 | 7 6860971 6883459 6
1099 | 7 29652963 29701061 -1
1100 | 7 29652963 29701061 6
1101 | 7 30100904 30107563 6
1102 | 7 32712679 32761375 -1
1103 | 7 32712679 32761375 6
1104 | 7 35099529 35241571 -1
1105 | 7 35099529 35241571 6
1106 | 7 38246489 38265321 6
1107 | 7 51367570 51409670 6
1108 | 7 51673615 51684136 6
1109 | 7 52662386 52677596 6
1110 | 7 56787891 57095079 -1
1111 | 7 56787891 57095079 6
1112 | 7 57476150 57497184 6
1113 | 7 63290206 63365583 -1
1114 | 7 63290206 63365583 6
1115 | 7 63382525 63458000 -1
1116 | 7 63382525 63458000 6
1117 | 7 63477423 63788554 -1
1118 | 7 63477423 63788554 6
1119 | 7 65123277 65219156 -1
1120 | 7 65123277 65219156 6
1121 | 7 65629889 65725022 -1
1122 | 7 65629889 65725022 6
1123 | 7 65796556 65810092 -1
1124 | 7 65796556 65810092 6
1125 | 7 66635431 66670132 6
1126 | 7 67020343 67062483 -1
1127 | 7 67020343 67062483 6
1128 | 7 67062128 67206594 -1
1129 | 7 67062128 67206594 6
1130 | 7 67208418 67273391 -1
1131 | 7 67208418 67273391 6
1132 | 7 72524833 72536871 -1
1133 | 7 72524833 72536871 6
1134 | 7 72541341 72605642 -1
1135 | 7 72541341 72605642 6
1136 | 7 72605408 72749591 -1
1137 | 7 72605408 72749591 6
1138 | 7 72764229 72805248 -1
1139 | 7 72764229 72805248 6
1140 | 7 72846678 72858747 -1
1141 | 7 72846678 72858747 6
1142 | 7 72858572 73113988 -1
1143 | 7 72858572 73113988 6
1144 | 7 72931881 72971826 -1
1145 | 7 72931881 72971826 6
1146 | 7 73286436 73330451 -1
1147 | 7 73286436 73330451 6
1148 | 7 75307566 75507275 -1
1149 | 7 75307566 75507275 6
1150 | 7 75439803 75465108 -1
1151 | 7 75439803 75465108 6
1152 | 7 75724116 75735272 6
1153 | 7 76433860 76449858 -1
1154 | 7 76433860 76449858 6
1155 | 7 76448020 76539736 -1
1156 | 7 76448020 76539736 6
1157 | 7 76548520 76608837 6
1158 | 7 76553029 76602289 -1
1159 | 7 76553029 76602289 6
1160 | 7 76602312 76618477 -1
1161 | 7 76602312 76618477 6
1162 | 7 76624373 76649712 -1
1163 | 7 76624373 76649712 6
1164 | 7 76651385 76946262 -1
1165 | 7 76651385 76946262 6
1166 | 7 76670848 76812607 6
1167 | 7 76817384 76843678 6
1168 | 7 76946260 77030376 -1
1169 | 7 76946260 77030376 6
1170 | 7 101020082 101058859 -1
1171 | 7 101027053 101034685 6
1172 | 7 103175333 103288771 -1
1173 | 7 103175333 103288771 6
1174 | 7 142060494 142065772 6
1175 | 7 143521768 143587141 -1
1176 | 7 143521768 143587141 6
1177 | 7 143587137 143650804 -1
1178 | 7 143587137 143650804 6
1179 | 7 143742323 143807700 -1
1180 | 7 143742323 143807700 6
1181 | 7 143806841 143874696 -1
1182 | 7 143806841 143874696 6
1183 | 7 144186948 144254793 -1
1184 | 7 144186948 144254793 6
1185 | 7 144256421 144272351 -1
1186 | 7 144256421 144272351 6
1187 | 7 144272351 144296188 -1
1188 | 7 144272351 144296188 6
1189 | 7 144296183 144364104 -1
1190 | 7 144296183 144364104 6
1191 | 7 144353388 144377283 -1
1192 | 7 144353388 144377283 6
1193 | 7 149855396 149901414 6
1194 | 7 149889693 149930483 -1
1195 | 7 149889693 149930483 6
1196 | 7 149911607 150090255 6
1197 | 7 149930517 150036952 -1
1198 | 7 149930517 150036952 6
1199 | 7 150038766 150142817 -1
1200 | 7 150038766 150142817 6
1201 | 7 150086628 150123309 6
1202 | 7 150142568 150263791 -1
1203 | 7 150142568 150263791 6
1204 | 7 152762381 152803227 -1
1205 | 7 152762381 152803227 6
1206 | 7 152912115 152917321 6
1207 | 7 153705618 153824661 -1
1208 | 7 153705618 153824661 6
1209 | 7 153780403 153938549 6
1210 | 7 153957893 154063328 -1
1211 | 7 153957893 154063328 6
1212 | 7 154057299 154167276 -1
1213 | 7 154057299 154167276 6
1214 | 7 156516833 156520696 6
1215 | 7 156937693 156941950 6
1216 | 8 290490 330244 -1
1217 | 8 290490 330244 6
1218 | 8 608101 618306 6
1219 | 8 1037610 1043986 6
1220 | 8 2232510 2247909 -1
1221 | 8 2232510 2247909 6
1222 | 8 2306810 2317432 -1
1223 | 8 2306810 2317432 6
1224 | 8 2383068 2486876 -1
1225 | 8 2383068 2486876 6
1226 | 8 6982438 7001548 -1
1227 | 8 6982438 7001548 6
1228 | 8 7001549 7020647 -1
1229 | 8 7001549 7020647 6
1230 | 8 7148168 7278125 6
1231 | 8 7342478 7578561 -1
1232 | 8 7342478 7578561 6
1233 | 8 7742479 7967891 -1
1234 | 8 7742479 7967891 6
1235 | 8 8009343 8142061 6
1236 | 8 8215640 8229317 6
1237 | 8 12121730 12153574 6
1238 | 8 12379784 12393130 6
1239 | 8 23156099 23159470 6
1240 | 8 25717102 25732262 6
1241 | 8 46611313 46630151 6
1242 | 8 59221272 59226035 6
1243 | 8 68270573 68281018 6
1244 | 8 121307517 121328565 6
1245 | 8 121387681 121417667 6
1246 | 8 139591613 139594916 6
1247 | 8 145012420 145028142 6
1248 | 9 2369682 2375648 6
1249 | 9 5302551 5337309 6
1250 | 9 7020506 7025374 6
1251 | 9 35170603 35174170 6
1252 | 9 37502275 37514748 6
1253 | 9 38995387 39022951 6
1254 | 9 39101568 39135717 6
1255 | 9 39826673 39840712 6
1256 | 9 40016164 40195245 -1
1257 | 9 40016164 40195245 6
1258 | 9 40929925 41103545 -1
1259 | 9 40929925 41103545 6
1260 | 9 40929926 41103613 -1
1261 | 9 40929926 41103613 6
1262 | 9 41015341 41205156 -1
1263 | 9 41015341 41205156 6
1264 | 9 41296556 41448370 -1
1265 | 9 41296556 41448370 6
1266 | 9 42913900 42974548 6
1267 | 9 42913900 42974549 6
1268 | 9 61571473 61582755 6
1269 | 9 62371698 62599106 -1
1270 | 9 62371698 62599106 6
1271 | 9 64803068 64828544 -1
1272 | 9 64803068 64828544 6
1273 | 9 66752605 66854809 6
1274 | 9 66756492 66854809 6
1275 | 9 66802171 66828474 6
1276 | 9 67032155 67217003 -1
1277 | 9 67032155 67217003 6
1278 | 9 67036223 67216749 -1
1279 | 9 67036223 67216749 6
1280 | 9 68226146 68416768 -1
1281 | 9 68226146 68416768 6
1282 | 9 69419193 69441334 6
1283 | 9 71290870 71295370 6
1284 | 9 77155278 77162540 6
1285 | 9 81903567 81924261 -1
1286 | 9 81903567 81924261 6
1287 | 9 81921504 81936576 -1
1288 | 9 81921504 81936576 6
1289 | 9 81924262 81939333 -1
1290 | 9 81924262 81939333 6
1291 | 9 81936577 81951670 -1
1292 | 9 81936577 81951670 6
1293 | 9 83894218 83903291 6
1294 | 9 113104738 113111681 -1
1295 | 9 113104738 113111681 6
1296 | 9 113112892 113119829 -1
1297 | 9 113112892 113119829 6
1298 | 9 114321464 114329477 6
1299 | 9 115817784 115828703 6
1300 | 9 116751081 116754503 6
1301 | 9 131742290 131746493 6
1302 | 9 133065675 133080813 6
1303 | 9 134305317 134309032 6
1304 | 9 138177079 138202113 -1
1305 | 9 138177079 138202113 6
1306 | X 10000 2781479 -1
1307 | X 155701382 156030895 -1
1308 | Y 10000 2781479 -1
1309 | Y 56887903 57217415 -1
1310 |
--------------------------------------------------------------------------------
/data/clamms_special_regions.hg19.bed:
--------------------------------------------------------------------------------
1 | 1 764791 802515 -1
2 | 1 764791 802515 6
3 | 1 787701 849873 6
4 | 1 987561 992143 6
5 | 1 1157955 1161579 6
6 | 1 1567836 1584471 -1
7 | 1 1567836 1584471 6
8 | 1 1583271 1592840 6
9 | 1 1602243 1617450 6
10 | 1 1631055 1673887 6
11 | 1 1631056 1647700 -1
12 | 1 1631056 1647700 6
13 | 1 8204046 8211065 6
14 | 1 12854152 12887338 6
15 | 1 12908836 12920223 6
16 | 1 13142561 13171720 -1
17 | 1 13142561 13171720 6
18 | 1 13189995 13219127 -1
19 | 1 13189995 13219127 6
20 | 1 16347766 16366847 6
21 | 1 16368506 16381645 6
22 | 1 17029257 17051732 -1
23 | 1 17029257 17051732 6
24 | 1 17088429 17125658 -1
25 | 1 17088429 17125658 6
26 | 1 17205590 17221620 -1
27 | 1 17205590 17221620 6
28 | 1 17237626 17267001 -1
29 | 1 17237626 17267001 6
30 | 1 17998885 18003060 6
31 | 1 22293983 22319507 -1
32 | 1 22293983 22319507 6
33 | 1 22319649 22342693 -1
34 | 1 22319649 22342693 6
35 | 1 25585373 25594516 -1
36 | 1 25585373 25594516 6
37 | 1 25594515 25655519 -1
38 | 1 25594515 25655519 6
39 | 1 25655517 25664845 -1
40 | 1 25655517 25664845 6
41 | 1 25688914 25751819 -1
42 | 1 25688914 25751819 6
43 | 1 25708229 25733934 6
44 | 1 30925010 30938014 6
45 | 1 44096891 44107198 6
46 | 1 47304216 47366356 -1
47 | 1 47304216 47366356 6
48 | 1 47532912 47588602 -1
49 | 1 47532912 47588602 6
50 | 1 73450029 73458807 6
51 | 1 74649755 74663004 6
52 | 1 83598159 83647450 -1
53 | 1 83598159 83647450 6
54 | 1 83647856 83955427 -1
55 | 1 83647856 83955427 6
56 | 1 85980246 86005990 -1
57 | 1 85980246 86005990 6
58 | 1 96734709 96738564 6
59 | 1 101290426 101294714 6
60 | 1 103342022 103574052 -1
61 | 1 104103086 104143945 6
62 | 1 104153410 104163807 6
63 | 1 104161927 104210498 -1
64 | 1 104161927 104210498 6
65 | 1 104256097 104304649 -1
66 | 1 104256097 104304649 6
67 | 1 108764242 108853274 -1
68 | 1 108764242 108853274 6
69 | 1 108926349 109015286 -1
70 | 1 108926349 109015286 6
71 | 1 110216099 110234506 -1
72 | 1 110216099 110234506 6
73 | 1 110234507 110258884 -1
74 | 1 110234507 110258884 6
75 | 1 110243812 110254932 6
76 | 1 120141156 120150984 6
77 | 1 120845898 120936695 -1
78 | 1 120845898 120936695 6
79 | 1 121086695 121108575 -1
80 | 1 121086695 121108575 6
81 | 1 121086698 121133098 -1
82 | 1 121086698 121133098 6
83 | 1 143880003 143906232 -1
84 | 1 143880003 143906232 6
85 | 1 144019497 144095783 -1
86 | 1 144019497 144095783 6
87 | 1 144073868 144095783 -1
88 | 1 144073868 144095783 6
89 | 1 144551743 144620163 -1
90 | 1 144551743 144620163 6
91 | 1 144673795 144710724 -1
92 | 1 144673795 144710724 6
93 | 1 144914243 144939393 -1
94 | 1 144914243 144939393 6
95 | 1 144949144 144986041 -1
96 | 1 144949144 144986041 6
97 | 1 144994280 145017532 -1
98 | 1 144994280 145017532 6
99 | 1 145027117 145062269 -1
100 | 1 145027117 145062269 6
101 | 1 145748065 145833117 -1
102 | 1 145748065 145833117 6
103 | 1 146215196 146253299 6
104 | 1 146399366 146439200 6
105 | 1 147394506 147482095 -1
106 | 1 147394506 147482095 6
107 | 1 148511359 148582607 -1
108 | 1 148511359 148582607 6
109 | 1 148572088 148684147 -1
110 | 1 148572088 148684147 6
111 | 1 148734148 148845858 -1
112 | 1 148734148 148845858 6
113 | 1 149004921 149040886 -1
114 | 1 149004921 149040886 6
115 | 1 149067745 149168446 6
116 | 1 149209089 149216287 6
117 | 1 149228195 149232059 6
118 | 1 149238667 149415268 -1
119 | 1 149238667 149415268 6
120 | 1 149368543 149459645 -1
121 | 1 149368543 149459645 6
122 | 1 149433402 149459645 -1
123 | 1 149433402 149459645 6
124 | 1 149626126 149799720 -1
125 | 1 149626126 149799720 6
126 | 1 151332596 151349237 6
127 | 1 151353298 151385356 6
128 | 1 152648116 152659783 6
129 | 1 153670722 153690686 6
130 | 1 155184058 155206119 6
131 | 1 161479597 161518642 -1
132 | 1 161479597 161518642 6
133 | 1 161488930 161520018 6
134 | 1 161518643 161565122 -1
135 | 1 161518643 161565122 6
136 | 1 161560506 161618903 6
137 | 1 161560922 161599999 -1
138 | 1 161560922 161599999 6
139 | 1 161600001 161646894 -1
140 | 1 161600001 161646894 6
141 | 1 166179210 166194656 6
142 | 1 195809602 195813684 6
143 | 1 196711704 196740354 -1
144 | 1 196711704 196740354 6
145 | 1 196756101 196796319 -1
146 | 1 196756101 196796319 6
147 | 1 196796320 196825045 -1
148 | 1 196796320 196825045 6
149 | 1 196880627 196920352 -1
150 | 1 196880627 196920352 6
151 | 1 206482222 206558788 -1
152 | 1 206482222 206558788 6
153 | 1 206536833 206583337 -1
154 | 1 206536833 206583337 6
155 | 1 207696944 207715515 6
156 | 1 207699523 207718078 6
157 | 1 207715516 207734072 6
158 | 1 207716029 207734586 6
159 | 1 207718079 207736635 6
160 | 1 207734587 207751652 6
161 | 1 234919134 234956347 -1
162 | 1 234919134 234956347 6
163 | 1 242413204 242528828 -1
164 | 1 242413204 242528828 6
165 | 1 245918346 245922127 6
166 | 1 248584239 248623089 -1
167 | 1 248584239 248623089 6
168 | 1 248620630 248635914 6
169 | 1 248623087 248682567 -1
170 | 1 248623087 248682567 6
171 | 1 248692097 248751707 -1
172 | 1 248692097 248751707 6
173 | 1 248795556 248834181 -1
174 | 1 248795556 248834181 6
175 | 1 248813347 248824247 6
176 | 10 60000 130311 -1
177 | 10 60000 130311 6
178 | 10 5090957 5149878 -1
179 | 10 11957239 11961408 6
180 | 10 18747943 18762204 6
181 | 10 19655222 19659419 6
182 | 10 24160530 24166183 6
183 | 10 27223917 27229019 6
184 | 10 31576981 31582654 6
185 | 10 38520100 38637504 -1
186 | 10 38520100 38637504 6
187 | 10 42701418 42719974 6
188 | 10 43026638 43046793 6
189 | 10 45209607 45270625 6
190 | 10 46200831 46302261 -1
191 | 10 46200831 46302261 6
192 | 10 46283789 46426964 6
193 | 10 46712811 46947015 -1
194 | 10 46712811 46947015 6
195 | 10 46976283 47021713 -1
196 | 10 46976283 47021713 6
197 | 10 47024896 47259996 -1
198 | 10 47024896 47259996 6
199 | 10 47149455 47323674 6
200 | 10 47190000 47429169 -1
201 | 10 47190000 47429169 6
202 | 10 47534514 47587454 6
203 | 10 47595718 47658979 6
204 | 10 47671502 47706372 6
205 | 10 48105708 48280373 6
206 | 10 51092061 51136078 6
207 | 10 51187411 51330432 6
208 | 10 51777318 51781779 6
209 | 10 51806915 51907149 -1
210 | 10 51806915 51907149 6
211 | 10 56442555 56469283 6
212 | 10 81270578 81292860 6
213 | 10 81409929 81439952 -1
214 | 10 81409929 81439952 6
215 | 10 81454262 81636271 -1
216 | 10 81454262 81636271 6
217 | 10 81505309 81598180 6
218 | 10 81554036 81585133 -1
219 | 10 81554036 81585133 6
220 | 10 83128732 83134566 6
221 | 10 88976491 89152569 -1
222 | 10 88976491 89152569 6
223 | 10 88997194 89067761 -1
224 | 10 88997194 89067761 6
225 | 10 89189711 89260363 -1
226 | 10 89189711 89260363 6
227 | 10 105307995 105311464 6
228 | 10 124344127 124352726 6
229 | 10 124940586 124944411 6
230 | 10 133280264 133285669 6
231 | 10 135236150 135249933 -1
232 | 10 135236150 135249933 6
233 | 10 135246531 135269258 6
234 | 10 135302166 135354957 6
235 | 10 135380989 135394059 -1
236 | 10 135380989 135394059 6
237 | 10 135401886 135442371 6
238 | 11 3268109 3313896 6
239 | 11 3313899 3359713 6
240 | 11 4328849 4353177 6
241 | 11 5271350 5275172 6
242 | 11 5882487 5918366 6
243 | 11 5922573 5936798 6
244 | 11 18941448 18963992 -1
245 | 11 18941448 18963992 6
246 | 11 25078705 25082491 6
247 | 11 40769176 40772613 6
248 | 11 42966735 42976189 6
249 | 11 48900993 48909852 -1
250 | 11 48900993 48909852 6
251 | 11 48913680 48924398 -1
252 | 11 48913680 48924398 6
253 | 11 49487049 49813282 -1
254 | 11 49487049 49813282 6
255 | 11 50072427 50114154 -1
256 | 11 50072427 50114154 6
257 | 11 50283289 50324503 -1
258 | 11 50283289 50324503 6
259 | 11 58807865 58853713 6
260 | 11 60971952 60990788 -1
261 | 11 60971952 60990788 6
262 | 11 60990789 61009614 -1
263 | 11 60990789 61009614 6
264 | 11 63189346 63200324 -1
265 | 11 63189346 63200324 6
266 | 11 69492579 69497103 6
267 | 11 88575375 88892393 -1
268 | 11 88575375 88892393 6
269 | 11 89475576 89653791 -1
270 | 11 89475576 89653791 6
271 | 11 89657017 89830683 -1
272 | 11 89657017 89830683 6
273 | 11 95566043 95657371 -1
274 | 11 107238673 107247126 6
275 | 11 107654564 107676805 6
276 | 11 114491453 114512302 6
277 | 11 124083309 124095582 6
278 | 11 131923534 131930222 6
279 | 12 8003001 8026714 6
280 | 12 8033813 8086882 6
281 | 12 8108858 8117465 6
282 | 12 8362295 8389594 6
283 | 12 8538639 8550607 6
284 | 12 8956989 8965623 6
285 | 12 9544126 9555513 -1
286 | 12 9544126 9555513 6
287 | 12 9632686 9658099 -1
288 | 12 9632686 9658099 6
289 | 12 9663007 9731892 -1
290 | 12 9663007 9731892 6
291 | 12 9722997 9731986 -1
292 | 12 9722997 9731986 6
293 | 12 10569233 10584672 -1
294 | 12 10569233 10584672 6
295 | 12 10584673 10600097 -1
296 | 12 10584673 10600097 6
297 | 12 11449819 11479681 6
298 | 12 17956420 17965145 6
299 | 12 19466824 19487323 6
300 | 12 19492482 19505351 6
301 | 12 19522655 19571713 6
302 | 12 19573024 19580392 6
303 | 12 22571458 22578098 6
304 | 12 22578099 22584738 6
305 | 12 31261558 31333164 -1
306 | 12 31261558 31333164 6
307 | 12 31312360 31407812 6
308 | 12 31333787 31353978 -1
309 | 12 31333787 31353978 6
310 | 12 34383384 34389478 6
311 | 12 63923418 63951619 -1
312 | 12 63923418 63951619 6
313 | 12 63939271 64116993 6
314 | 12 63954365 64072240 -1
315 | 12 63954365 64072240 6
316 | 12 63971080 64119245 -1
317 | 12 63971080 64119245 6
318 | 12 64119249 64146247 -1
319 | 12 64119249 64146247 6
320 | 12 71498639 71511798 6
321 | 12 76494661 76500478 6
322 | 12 80153068 80162637 6
323 | 12 93343156 93348391 6
324 | 12 123178353 123191077 -1
325 | 12 123178353 123191077 6
326 | 12 123182525 123198813 6
327 | 12 123191092 123204503 -1
328 | 12 123191092 123204503 6
329 | 12 124495888 124500138 6
330 | 12 131766776 131794407 -1
331 | 12 131766776 131794407 6
332 | 12 132014291 132025171 6
333 | 12 132057461 132075259 6
334 | 12 132102792 132129575 6
335 | 12 132134849 132169194 -1
336 | 12 132134849 132169194 6
337 | 13 19167973 19275982 -1
338 | 13 19167973 19275982 6
339 | 13 19301587 19448886 -1
340 | 13 19301587 19448886 6
341 | 13 19636998 19642930 6
342 | 13 21537639 21541057 6
343 | 13 23096744 23108477 6
344 | 13 23400590 23420498 6
345 | 13 43599023 43609267 6
346 | 13 46981282 47034722 -1
347 | 13 46981282 47034722 6
348 | 13 51064089 51075547 -1
349 | 13 51064089 51075547 6
350 | 13 51531319 51540644 6
351 | 13 52882666 52899687 6
352 | 13 57808977 57822918 6
353 | 13 64290924 64301510 -1
354 | 13 64290924 64301510 6
355 | 13 64311684 64333021 -1
356 | 13 64311684 64333021 6
357 | 13 64333064 64343649 -1
358 | 13 64333064 64343649 6
359 | 13 64354626 64408383 -1
360 | 13 64354626 64408383 6
361 | 13 64395102 64418388 -1
362 | 13 64395102 64418388 6
363 | 13 68628034 68656877 6
364 | 13 76107231 76124385 6
365 | 14 19460150 19469955 6
366 | 14 20191999 20424000 -1
367 | 14 20191999 20424000 6
368 | 14 21354447 21416470 6
369 | 14 24421111 24455943 -1
370 | 14 24421111 24455943 6
371 | 14 24450075 24463904 6
372 | 14 24455947 24503512 -1
373 | 14 24455947 24503512 6
374 | 14 24479201 24484147 6
375 | 14 29715296 29722793 6
376 | 14 65014937 65020554 6
377 | 14 65734096 65743736 6
378 | 14 73997528 74006858 6
379 | 14 74000098 74020296 -1
380 | 14 74000098 74020296 6
381 | 14 74026192 74041364 6
382 | 14 74030015 74052473 -1
383 | 14 74030015 74052473 6
384 | 14 82497711 82504846 6
385 | 14 95147991 95155299 6
386 | 14 103700308 103706978 6
387 | 14 103729469 103733345 6
388 | 14 105408968 105413790 6
389 | 14 105994256 107283085 -1
390 | 14 106066085 106074681 6
391 | 14 106117287 106126473 6
392 | 14 106135535 106169095 6
393 | 14 107151889 107182764 -1
394 | 14 107151889 107182764 6
395 | 15 20392813 20436843 -1
396 | 15 20392813 20436843 6
397 | 15 20548041 20579449 -1
398 | 15 20548041 20579449 6
399 | 15 20606056 20630147 -1
400 | 15 20606056 20630147 6
401 | 15 20634177 20847448 -1
402 | 15 20634177 20847448 6
403 | 15 20846510 20888700 -1
404 | 15 20846510 20888700 6
405 | 15 20878451 20894627 -1
406 | 15 20878451 20894627 6
407 | 15 20935075 21034034 -1
408 | 15 20935075 21034034 6
409 | 15 21033445 21199563 -1
410 | 15 21033445 21199563 6
411 | 15 21885001 21901264 -1
412 | 15 21885001 21901264 6
413 | 15 21893107 21946253 -1
414 | 15 21893107 21946253 6
415 | 15 21941706 22040711 -1
416 | 15 21941706 22040711 6
417 | 15 22044595 22210800 -1
418 | 15 22044595 22210800 6
419 | 15 22099107 22797761 -1
420 | 15 22295127 22337340 -1
421 | 15 22295127 22337340 6
422 | 15 22336770 22343950 6
423 | 15 22343950 22372926 -1
424 | 15 22343950 22372926 6
425 | 15 22383643 22434441 -1
426 | 15 22383643 22434441 6
427 | 15 22436023 22559781 -1
428 | 15 22436023 22559781 6
429 | 15 22561781 22589652 -1
430 | 15 22561781 22589652 6
431 | 15 23090898 23216152 -1
432 | 15 23090898 23216152 6
433 | 15 23248746 23452652 -1
434 | 15 23248746 23452652 6
435 | 15 23301643 23514853 -1
436 | 15 23301643 23514853 6
437 | 15 24536798 24589280 -1
438 | 15 24536798 24589280 6
439 | 15 24584981 24652765 6
440 | 15 24674867 24722616 6
441 | 15 24724842 24775937 6
442 | 15 24742101 24803873 -1
443 | 15 24742101 24803873 6
444 | 15 28561204 28664356 -1
445 | 15 28561204 28664356 6
446 | 15 28737584 28840699 -1
447 | 15 28737584 28840699 6
448 | 15 28760516 28975369 -1
449 | 15 28760516 28975369 6
450 | 15 28975297 29101685 -1
451 | 15 28975297 29101685 6
452 | 15 30370110 30670011 -1
453 | 15 30370110 30670011 6
454 | 15 30692410 30910305 -1
455 | 15 30692410 30910305 6
456 | 15 30894482 30935159 -1
457 | 15 30894482 30935159 6
458 | 15 30970418 31073600 -1
459 | 15 30970418 31073600 6
460 | 15 31909993 32019620 -1
461 | 15 31909993 32019620 6
462 | 15 32445407 32752861 -1
463 | 15 32445407 32752861 6
464 | 15 32503917 32522305 6
465 | 15 32681447 32899708 -1
466 | 15 32681447 32899708 6
467 | 15 32883923 32925115 -1
468 | 15 32883923 32925115 6
469 | 15 34671002 34730052 -1
470 | 15 34671002 34730052 6
471 | 15 34753289 34819436 6
472 | 15 34817215 34875898 -1
473 | 15 34817215 34875898 6
474 | 15 43851135 43950351 -1
475 | 15 43851135 43950351 6
476 | 15 43950690 44041710 -1
477 | 15 43950690 44041710 6
478 | 15 45109515 45117149 6
479 | 15 45109634 45134415 -1
480 | 15 45109634 45134415 6
481 | 15 45350779 45375390 -1
482 | 15 45350779 45375390 6
483 | 15 84953963 84958442 6
484 | 15 89463529 89470684 6
485 | 16 203061 213216 6
486 | 16 1173050 1177840 6
487 | 16 1278547 1288838 -1
488 | 16 1278547 1288838 6
489 | 16 1294332 1301051 6
490 | 16 1297505 1308599 -1
491 | 16 1297505 1308599 6
492 | 16 2584142 2634845 -1
493 | 16 2584142 2634845 6
494 | 16 2649224 2661356 -1
495 | 16 2649224 2661356 6
496 | 16 2665342 2727514 -1
497 | 16 2665342 2727514 6
498 | 16 2704922 2740716 6
499 | 16 2727678 2741054 -1
500 | 16 2727678 2741054 6
501 | 16 14781036 14800000 -1
502 | 16 14781036 14800000 6
503 | 16 14820156 14839122 -1
504 | 16 14820156 14839122 6
505 | 16 14838092 14910231 -1
506 | 16 14838092 14910231 6
507 | 16 14910229 15048647 -1
508 | 16 14910229 15048647 -1
509 | 16 14910229 15048647 6
510 | 16 14910229 15048647 6
511 | 16 15059317 15124767 -1
512 | 16 15059317 15124767 6
513 | 16 15068571 15123334 -1
514 | 16 15068571 15123334 6
515 | 16 15255689 15325366 -1
516 | 16 15255689 15325366 6
517 | 16 15325090 15473785 -1
518 | 16 15325090 15473785 -1
519 | 16 15325090 15473785 6
520 | 16 15325090 15473785 6
521 | 16 15406331 15478338 -1
522 | 16 15406331 15478338 6
523 | 16 15977723 16024808 6
524 | 16 16292266 16420325 -1
525 | 16 16292266 16420325 6
526 | 16 16308971 16447257 -1
527 | 16 16308971 16447257 6
528 | 16 16462009 16665159 -1
529 | 16 16462009 16665159 6
530 | 16 16471465 16619333 -1
531 | 16 16471465 16619333 6
532 | 16 16630217 16690108 6
533 | 16 16665156 16726048 -1
534 | 16 16665156 16726048 6
535 | 16 16731227 16856795 -1
536 | 16 16731227 16856795 6
537 | 16 18167401 18236992 -1
538 | 16 18167401 18236992 6
539 | 16 18235078 18438659 -1
540 | 16 18235078 18438659 6
541 | 16 18280555 18428156 -1
542 | 16 18280555 18428156 6
543 | 16 18449138 18590866 -1
544 | 16 18449138 18590866 6
545 | 16 18475281 18607611 -1
546 | 16 18475281 18607611 6
547 | 16 18607614 18733023 -1
548 | 16 18607614 18733023 6
549 | 16 18732856 18793801 -1
550 | 16 18732856 18793801 6
551 | 16 21516287 21579921 -1
552 | 16 21516287 21579921 6
553 | 16 21740424 21808829 -1
554 | 16 21740424 21808829 6
555 | 16 22556552 22625259 -1
556 | 16 22556552 22625259 6
557 | 16 22618978 22641289 -1
558 | 16 22618978 22641289 6
559 | 16 22652105 22709714 -1
560 | 16 22652105 22709714 6
561 | 16 24205920 24213783 6
562 | 16 25338621 25344416 6
563 | 16 28349272 28483213 -1
564 | 16 28349272 28483213 6
565 | 16 28614589 28627258 -1
566 | 16 28614589 28627258 6
567 | 16 28654502 28788451 -1
568 | 16 28654502 28788451 6
569 | 16 32490483 32652840 -1
570 | 16 32490483 32652840 6
571 | 16 32810349 33036298 -1
572 | 16 32810349 33036298 6
573 | 16 32914931 33208857 -1
574 | 16 33451263 33491676 -1
575 | 16 33451263 33491676 6
576 | 16 33550738 33572880 -1
577 | 16 33550738 33572880 6
578 | 16 33587252 33632372 -1
579 | 16 33587252 33632372 6
580 | 16 33631904 33864470 -1
581 | 16 33631904 33864470 6
582 | 16 34498795 34570427 6
583 | 16 34587446 34685602 6
584 | 16 34715348 34760389 6
585 | 16 35020598 35069253 6
586 | 16 55832458 55864601 6
587 | 16 68787546 68793630 6
588 | 16 70043802 70109518 -1
589 | 16 70043802 70109518 6
590 | 16 70109526 70141049 -1
591 | 16 70109526 70141049 6
592 | 16 70144251 70183454 -1
593 | 16 70144251 70183454 6
594 | 16 70183672 70198386 -1
595 | 16 70183672 70198386 6
596 | 16 70845286 71202573 -1
597 | 16 70845286 71202573 6
598 | 16 72086197 72113261 6
599 | 16 74365503 74406043 -1
600 | 16 74365503 74406043 6
601 | 16 74559328 74590161 -1
602 | 16 74559328 74590161 6
603 | 16 75532569 75576558 6
604 | 16 77199004 77228154 6
605 | 16 81834579 81842524 6
606 | 16 86450956 86455896 6
607 | 16 88171452 88180988 6
608 | 16 88799468 88802783 6
609 | 17 10333293 10338126 6
610 | 17 15659661 15676794 -1
611 | 17 15659661 15676794 6
612 | 17 16710817 16724595 6
613 | 17 18928497 18966543 -1
614 | 17 18928497 18966543 6
615 | 17 19014647 19052666 -1
616 | 17 19014647 19052666 6
617 | 17 20474710 20491809 -1
618 | 17 20474710 20491809 6
619 | 17 21904639 21925170 6
620 | 17 26742472 26747107 6
621 | 17 34444306 34486472 -1
622 | 17 34444306 34486472 6
623 | 17 34743980 34791215 -1
624 | 17 34743980 34791215 6
625 | 17 36249847 36297053 -1
626 | 17 36249847 36297053 6
627 | 17 39531938 39544334 6
628 | 17 40155707 40159556 6
629 | 17 41433695 41438697 6
630 | 17 43593497 43631279 -1
631 | 17 43593497 43631279 6
632 | 17 43665153 43693658 6
633 | 17 43670924 43705165 -1
634 | 17 43670924 43705165 6
635 | 17 44171820 44214434 6
636 | 17 44227966 44263121 6
637 | 17 44286625 44342496 6
638 | 17 44294406 44329730 -1
639 | 17 44294406 44329730 6
640 | 17 44346905 44368301 6
641 | 17 44369412 44566776 -1
642 | 17 44369412 44566776 6
643 | 17 44586990 44784489 -1
644 | 17 44586990 44784489 6
645 | 17 45090987 45129812 -1
646 | 17 45090987 45129812 6
647 | 17 57525081 57531138 6
648 | 17 57650241 57677463 -1
649 | 17 57650241 57677463 6
650 | 17 58052766 58079963 -1
651 | 17 58052766 58079963 6
652 | 17 75217319 75230350 6
653 | 17 76472975 76477276 6
654 | 17 77363640 77394305 6
655 | 17 78603178 78613874 6
656 | 17 80275171 80280463 6
657 | 17 80317200 80321790 6
658 | 18 14415 84190 -1
659 | 18 14415 84190 6
660 | 18 38590 64703 6
661 | 18 1200354 1209322 6
662 | 18 14185210 14353419 -1
663 | 18 14185210 14353419 6
664 | 18 14259675 14275556 6
665 | 18 14358134 14728624 -1
666 | 18 14358134 14728624 6
667 | 18 14358135 14464819 -1
668 | 18 14358135 14464819 6
669 | 18 14807067 14897137 -1
670 | 18 14807067 14897137 6
671 | 18 15026970 15101188 -1
672 | 18 15026970 15101188 6
673 | 18 63200178 63206988 6
674 | 18 77111772 77119003 6
675 | 19 4218923 4224411 6
676 | 19 6375783 6392798 6
677 | 19 8336857 8351713 6
678 | 19 8351714 8366563 6
679 | 19 8959519 9092018 -1
680 | 19 9288436 9302416 6
681 | 19 11699012 12677226 -1
682 | 19 15783150 15838573 6
683 | 19 17443158 17449780 6
684 | 19 18299745 18312850 6
685 | 19 19798426 24336509 -1
686 | 19 21079758 21104888 6
687 | 19 22416209 22447871 6
688 | 19 22449126 22456539 6
689 | 19 23374357 23378830 6
690 | 19 40367998 40384255 -1
691 | 19 40367998 40384255 6
692 | 19 40384256 40399943 -1
693 | 19 40384256 40399943 6
694 | 19 41336556 41361522 -1
695 | 19 41336556 41361522 6
696 | 19 41355610 41372571 6
697 | 19 41367421 41393307 -1
698 | 19 41367421 41393307 6
699 | 19 43225793 43244668 -1
700 | 19 43251869 43297116 6
701 | 19 43256838 43269831 -1
702 | 19 43298054 43372003 6
703 | 19 43341148 43359870 -1
704 | 19 43370612 43383871 -1
705 | 19 43385925 43491098 6
706 | 19 43406233 43422076 -1
707 | 19 43428283 43441330 -1
708 | 19 43494166 43546218 6
709 | 19 43511808 43530631 -1
710 | 19 43568361 43586893 -1
711 | 19 43603497 43647692 6
712 | 19 43655215 43699309 6
713 | 19 43671894 43690688 -1
714 | 19 43696853 43709926 -1
715 | 19 43757434 43773682 -1
716 | 19 43858622 43870006 -1
717 | 19 43858622 43870006 6
718 | 19 43873234 43884437 -1
719 | 19 43873234 43884437 6
720 | 19 44360000 45009212 -1
721 | 19 52133876 52150221 6
722 | 19 53322580 53358016 6
723 | 19 53516549 53544896 6
724 | 19 54178965 54265684 -1
725 | 19 54728315 54747758 6
726 | 19 55043908 55057024 -1
727 | 19 55235983 55248003 -1
728 | 19 55249973 55264504 -1
729 | 19 55266580 55280787 6
730 | 19 55281264 55295778 -1
731 | 19 55297873 55344772 6
732 | 19 55315066 55325972 -1
733 | 19 55327892 55342233 -1
734 | 19 55344130 55360024 -1
735 | 19 55361897 55378670 -1
736 | 19 57175161 57183097 6
737 | 2 10885424 10893362 6
738 | 2 13204170 13249681 6
739 | 2 17023786 17037122 6
740 | 2 37958362 37970514 -1
741 | 2 37958362 37970514 6
742 | 2 37971787 38002047 -1
743 | 2 37971787 38002047 6
744 | 2 38955261 38972573 6
745 | 2 56946208 56953817 6
746 | 2 57395019 57403090 6
747 | 2 57405713 57447115 6
748 | 2 63498524 63502931 6
749 | 2 73868041 73919420 6
750 | 2 77901296 77916981 -1
751 | 2 77901296 77916981 6
752 | 2 77973977 78002478 6
753 | 2 79330069 79343809 6
754 | 2 86935956 86970206 -1
755 | 2 86935956 86970206 6
756 | 2 87172338 87280768 -1
757 | 2 87172338 87280768 6
758 | 2 87280767 87315002 -1
759 | 2 87280767 87315002 6
760 | 2 87322055 87338168 -1
761 | 2 87322055 87338168 6
762 | 2 87351818 87390046 -1
763 | 2 87351818 87390046 6
764 | 2 87351847 87473440 -1
765 | 2 87351847 87473440 6
766 | 2 87473438 87560498 -1
767 | 2 87473438 87560498 6
768 | 2 87655922 87668206 6
769 | 2 87728376 87740602 6
770 | 2 87730958 87997507 -1
771 | 2 87730958 87997507 6
772 | 2 88000001 88038538 -1
773 | 2 88000001 88038538 6
774 | 2 88015483 88124219 -1
775 | 2 88015483 88124219 6
776 | 2 88828011 88831618 6
777 | 2 89156874 90471176 -1
778 | 2 89879699 89915431 -1
779 | 2 89879699 89915431 6
780 | 2 89993189 90048445 6
781 | 2 90211854 90248780 6
782 | 2 91821422 91865600 -1
783 | 2 91821422 91865600 6
784 | 2 96665827 96675118 6
785 | 2 97859809 98013244 -1
786 | 2 97859809 98013244 6
787 | 2 98025647 98177974 -1
788 | 2 98025647 98177974 6
789 | 2 106854203 106873003 -1
790 | 2 106854203 106873003 6
791 | 2 106855191 106871459 6
792 | 2 108522124 108540538 -1
793 | 2 108522124 108540538 6
794 | 2 110494431 110852754 -1
795 | 2 110494431 110852754 6
796 | 2 111033788 111392192 -1
797 | 2 111033788 111392192 6
798 | 2 112010503 112276645 -1
799 | 2 112010503 112276645 6
800 | 2 112276517 112283339 6
801 | 2 112282800 112372873 -1
802 | 2 112282800 112372873 6
803 | 2 112477294 112596562 -1
804 | 2 112477294 112596562 6
805 | 2 123784524 123791847 6
806 | 2 130818815 130895784 -1
807 | 2 130818815 130895784 6
808 | 2 130879200 130998685 -1
809 | 2 130879200 130998685 6
810 | 2 131003245 131043673 -1
811 | 2 131003245 131043673 6
812 | 2 131159125 131200000 -1
813 | 2 131159125 131200000 6
814 | 2 131436671 131477508 -1
815 | 2 131436671 131477508 6
816 | 2 131958177 132035694 -1
817 | 2 131958177 132035694 6
818 | 2 132123862 132159122 -1
819 | 2 132123862 132159122 6
820 | 2 132190778 132308920 -1
821 | 2 132190778 132308920 6
822 | 2 132660994 132699803 -1
823 | 2 132660994 132699803 6
824 | 2 133278631 133309680 6
825 | 2 159705604 159724053 6
826 | 2 179296140 179315958 -1
827 | 2 185753752 185774883 6
828 | 2 225095697 225101359 6
829 | 2 228189866 228222552 -1
830 | 2 228240552 228258314 6
831 | 2 234945584 234952658 6
832 | 2 241611923 241625206 -1
833 | 2 241611923 241625206 6
834 | 2 241628384 241641219 -1
835 | 2 241628384 241641219 6
836 | 2 241898224 241901927 6
837 | 2 242760254 242767889 6
838 | 20 25693629 25707523 6
839 | 20 25733092 25825788 -1
840 | 20 25733092 25825788 6
841 | 20 25991394 26084413 -1
842 | 20 25991394 26084413 6
843 | 20 29440033 29484409 -1
844 | 20 29440033 29484409 6
845 | 20 30474471 30481540 6
846 | 20 32943935 32948855 6
847 | 20 50683560 50697030 6
848 | 20 52331473 52338370 6
849 | 20 58113154 58117421 6
850 | 20 59567325 59589419 6
851 | 20 61816289 61820092 6
852 | 20 62195541 62200149 6
853 | 21 10901969 10945260 -1
854 | 21 10901969 10945260 6
855 | 21 10956566 11007372 -1
856 | 21 10956566 11007372 6
857 | 21 14593409 14627482 -1
858 | 21 14593409 14627482 6
859 | 21 14640424 14714507 -1
860 | 21 14640424 14714507 6
861 | 21 14714527 14801577 -1
862 | 21 14714527 14801577 6
863 | 21 14801588 15174050 -1
864 | 21 14801588 15174050 6
865 | 21 15028101 15200573 6
866 | 21 15243732 15252998 6
867 | 21 20056846 20080865 6
868 | 21 44821874 44834552 6
869 | 21 44969556 44974112 6
870 | 21 47301288 47305091 6
871 | 22 16961912 17003620 6
872 | 22 18619094 18625292 6
873 | 22 18712537 18878770 -1
874 | 22 18712537 18878770 6
875 | 22 18939679 19023677 6
876 | 22 20368703 20472334 -1
877 | 22 20368703 20472334 6
878 | 22 20402107 20509428 -1
879 | 22 20402107 20509428 6
880 | 22 21515447 21679525 -1
881 | 22 21515447 21679525 6
882 | 22 21689945 21797378 -1
883 | 22 21689945 21797378 6
884 | 22 21812477 21917116 -1
885 | 22 21812477 21917116 6
886 | 22 22385572 23265082 -1
887 | 22 24282119 24311737 -1
888 | 22 24282119 24311737 6
889 | 22 24313963 24343586 -1
890 | 22 24313963 24343586 6
891 | 22 24334508 24352142 -1
892 | 22 24334508 24352142 6
893 | 22 24386423 24404807 -1
894 | 22 24386423 24404807 6
895 | 22 25622856 25643059 -1
896 | 22 25622856 25643059 6
897 | 22 25662456 25683307 6
898 | 22 25707423 25919535 6
899 | 22 25850716 25867764 -1
900 | 22 25850716 25867764 6
901 | 22 30456517 30463656 6
902 | 22 42523544 42534442 6
903 | 22 42896179 42902137 -1
904 | 22 42896179 42902137 6
905 | 22 42905976 42947064 6
906 | 22 42949740 42955460 -1
907 | 22 42949740 42955460 6
908 | 22 42955764 42973804 6
909 | 22 51068582 51078248 6
910 | 3 6318121 6336510 6
911 | 3 8826097 8856983 6
912 | 3 47241319 47257138 6
913 | 3 80385068 80388375 6
914 | 3 94511335 94516340 6
915 | 3 99203799 99216119 6
916 | 3 100330764 100397533 6
917 | 3 125437152 125461411 6
918 | 3 128380073 128404205 6
919 | 3 128403489 128412443 6
920 | 3 155477624 155500713 6
921 | 3 166074271 166094912 6
922 | 3 173239478 173290793 6
923 | 3 195421486 195446004 -1
924 | 3 195421486 195446004 6
925 | 3 195456553 195470480 -1
926 | 3 195456553 195470480 6
927 | 3 195675195 195695173 6
928 | 4 3060038 3069314 6
929 | 4 3467331 3474057 6
930 | 4 3557917 3588391 -1
931 | 4 3557917 3588391 6
932 | 4 4015245 4057658 6
933 | 4 8971974 9089215 6
934 | 4 9104281 9126672 6
935 | 4 9125637 9158780 6
936 | 4 9370804 9446199 6
937 | 4 9485670 9512560 6
938 | 4 9589081 9625693 6
939 | 4 25556181 25580334 6
940 | 4 29291437 29297314 6
941 | 4 34683260 34688255 6
942 | 4 49489362 49512121 6
943 | 4 59972653 59988104 6
944 | 4 60320971 60330704 6
945 | 4 69674526 69714548 6
946 | 4 72671342 72675264 6
947 | 4 78277506 78288390 6
948 | 4 80888293 80897489 6
949 | 4 88534465 88539966 6
950 | 4 90101097 90107794 6
951 | 4 144718922 144751514 6
952 | 4 144942159 145012778 6
953 | 4 161860691 161878855 6
954 | 4 161879055 161884882 6
955 | 4 161887145 161898636 6
956 | 4 161899899 161933193 6
957 | 4 165861802 165883201 6
958 | 5 686122 731394 -1
959 | 5 686122 731394 6
960 | 5 697645 711126 -1
961 | 5 697645 711126 6
962 | 5 714823 733928 -1
963 | 5 714823 733928 6
964 | 5 730525 767318 -1
965 | 5 730525 767318 6
966 | 5 756752 779167 -1
967 | 5 756752 779167 6
968 | 5 778191 815714 -1
969 | 5 778191 815714 6
970 | 5 788751 797797 -1
971 | 5 788751 797797 6
972 | 5 814716 825479 6
973 | 5 821953 851001 -1
974 | 5 821953 851001 6
975 | 5 1034682 1038576 6
976 | 5 13204185 13209559 6
977 | 5 17701389 17715752 6
978 | 5 29621936 29635269 6
979 | 5 32105424 32146631 6
980 | 5 38819474 38824474 6
981 | 5 54110371 54121503 6
982 | 5 64635185 64639475 6
983 | 5 68829716 68908477 -1
984 | 5 68829716 68908477 6
985 | 5 69124875 69425564 -1
986 | 5 69124875 69425564 6
987 | 5 70000001 70300987 -1
988 | 5 70000001 70300987 6
989 | 5 70292574 70313419 6
990 | 5 70311052 70389823 -1
991 | 5 70311052 70389823 6
992 | 5 82171308 82176996 6
993 | 5 98726330 98770546 -1
994 | 5 98726330 98770546 6
995 | 5 98803645 98840274 -1
996 | 5 98803645 98840274 6
997 | 5 98849625 98871196 -1
998 | 5 98849625 98871196 6
999 | 5 99405975 99442897 -1
1000 | 5 99405975 99442897 6
1001 | 5 99666604 99710393 -1
1002 | 5 99666604 99710393 6
1003 | 5 99714223 99736930 -1
1004 | 5 99714223 99736930 6
1005 | 5 103622787 103627227 6
1006 | 5 117872590 117882712 6
1007 | 5 137730617 137744733 6
1008 | 5 138609440 138667366 -1
1009 | 5 138777562 138783722 6
1010 | 5 140165720 140391929 -1
1011 | 5 140174443 140391929 -1
1012 | 5 140180782 140391929 -1
1013 | 5 140186658 140391929 -1
1014 | 5 140201360 140391929 -1
1015 | 5 140207506 140391929 -1
1016 | 5 140213968 140391929 -1
1017 | 5 140220906 140391929 -1
1018 | 5 140227356 140391929 -1
1019 | 5 140235467 140391929 -1
1020 | 5 140247097 140391929 -1
1021 | 5 140254886 140391929 -1
1022 | 5 140261853 140391929 -1
1023 | 5 140306301 140391929 -1
1024 | 5 140345746 140391929 -1
1025 | 5 147319772 147337883 6
1026 | 5 175542299 175613893 6
1027 | 5 175628182 175728978 -1
1028 | 5 175628182 175728978 6
1029 | 5 177047795 177137738 -1
1030 | 5 177047795 177137738 6
1031 | 5 178796866 178801707 6
1032 | 5 180175803 180199289 6
1033 | 6 256882 296034 -1
1034 | 6 256882 296034 6
1035 | 6 312087 376690 -1
1036 | 6 312087 376690 6
1037 | 6 26101188 26322005 -1
1038 | 6 26667219 26732854 -1
1039 | 6 26667219 26732854 6
1040 | 6 26702386 26741686 -1
1041 | 6 26702386 26741686 6
1042 | 6 26753416 26761765 6
1043 | 6 26760048 26798341 -1
1044 | 6 26760048 26798341 6
1045 | 6 26789206 26865793 -1
1046 | 6 26789206 26865793 6
1047 | 6 29691116 29695073 -1
1048 | 6 29694377 29716826 -1
1049 | 6 29794755 29798899 -1
1050 | 6 29855536 29858857 -1
1051 | 6 29909206 29913545 6
1052 | 6 29910246 29913661 -1
1053 | 6 29973747 29977733 -1
1054 | 6 30144881 30149213 6
1055 | 6 30227338 30234728 -1
1056 | 6 30457182 30461982 -1
1057 | 6 30994260 30997584 6
1058 | 6 31236525 31239913 -1
1059 | 6 31321648 31324989 -1
1060 | 6 31967263 31981050 -1
1061 | 6 31967263 31981050 6
1062 | 6 32000001 32013904 -1
1063 | 6 32000001 32013904 6
1064 | 6 32407618 32412826 -1
1065 | 6 32485153 32498006 -1
1066 | 6 32520489 32527779 -1
1067 | 6 32546546 32557613 -1
1068 | 6 32605182 32611429 -1
1069 | 6 32627240 32634466 -1
1070 | 6 32709162 32714664 -1
1071 | 6 32723874 32731330 -1
1072 | 6 32780539 32784825 -1
1073 | 6 32902405 32908847 -1
1074 | 6 32916390 32920899 -1
1075 | 6 32971959 32977389 -1
1076 | 6 33032345 33048555 -1
1077 | 6 33043702 33057473 -1
1078 | 6 33080292 33096890 -1
1079 | 6 35519398 35566025 6
1080 | 6 35754635 35766452 -1
1081 | 6 35754635 35766452 6
1082 | 6 37960398 37988416 6
1083 | 6 49428429 49448426 6
1084 | 6 57196803 57230229 -1
1085 | 6 57196803 57230229 6
1086 | 6 57501659 57522751 -1
1087 | 6 57501659 57522751 6
1088 | 6 57558598 57583817 -1
1089 | 6 57558598 57583817 6
1090 | 6 57686350 57979578 -1
1091 | 6 57686350 57979578 6
1092 | 6 58015167 58087659 -1
1093 | 6 58015167 58087659 6
1094 | 6 58139548 58203156 -1
1095 | 6 58139548 58203156 6
1096 | 6 58189808 58265402 -1
1097 | 6 58189808 58265402 6
1098 | 6 58293347 58583366 -1
1099 | 6 58293347 58583366 6
1100 | 6 58617083 58667775 -1
1101 | 6 58617083 58667775 6
1102 | 6 58656983 58726206 -1
1103 | 6 58656983 58726206 6
1104 | 6 62077884 62128589 -1
1105 | 6 62077884 62128589 6
1106 | 6 69226114 69241900 6
1107 | 6 74707312 74718327 6
1108 | 6 89762022 89766201 6
1109 | 6 124433668 124471085 6
1110 | 6 132019327 132035259 -1
1111 | 6 132019327 132035259 6
1112 | 6 137381783 137418630 6
1113 | 6 148978998 148994723 6
1114 | 6 161243390 161268788 6
1115 | 6 161276243 161282459 6
1116 | 7 6900602 6923090 6
1117 | 7 29692579 29740677 -1
1118 | 7 29692579 29740677 6
1119 | 7 30140520 30147179 6
1120 | 7 32752291 32800987 -1
1121 | 7 32752291 32800987 6
1122 | 7 35139141 35281183 -1
1123 | 7 35139141 35281183 6
1124 | 7 38286090 38304922 6
1125 | 7 51435267 51477367 6
1126 | 7 51741311 51751832 6
1127 | 7 52730082 52745291 6
1128 | 7 56855584 57162786 -1
1129 | 7 56855584 57162786 6
1130 | 7 57535856 57556890 6
1131 | 7 62750584 62825961 -1
1132 | 7 62750584 62825961 6
1133 | 7 62842903 62918378 -1
1134 | 7 62842903 62918378 6
1135 | 7 62937801 63248932 -1
1136 | 7 62937801 63248932 6
1137 | 7 64583655 64679534 -1
1138 | 7 64583655 64679534 6
1139 | 7 65094802 65189995 -1
1140 | 7 65094802 65189995 6
1141 | 7 65261543 65275079 -1
1142 | 7 65261543 65275079 6
1143 | 7 66100418 66135119 6
1144 | 7 66485330 66527470 -1
1145 | 7 66485330 66527470 6
1146 | 7 66527115 66671581 -1
1147 | 7 66527115 66671581 6
1148 | 7 66673405 66738378 -1
1149 | 7 66673405 66738378 6
1150 | 7 71989818 72001856 -1
1151 | 7 71989818 72001856 6
1152 | 7 72006326 72070627 -1
1153 | 7 72006326 72070627 6
1154 | 7 72070393 72214564 -1
1155 | 7 72070393 72214564 6
1156 | 7 72229240 72271431 -1
1157 | 7 72229240 72271431 6
1158 | 7 72317259 72329307 -1
1159 | 7 72317259 72329307 6
1160 | 7 72329132 72528311 -1
1161 | 7 72329132 72528311 6
1162 | 7 72402419 72442357 -1
1163 | 7 72402419 72442357 6
1164 | 7 72700438 72744454 -1
1165 | 7 72700438 72744454 6
1166 | 7 74936724 75136597 -1
1167 | 7 74936724 75136597 6
1168 | 7 75069081 75094373 -1
1169 | 7 75069081 75094373 6
1170 | 7 75353434 75364590 6
1171 | 7 76063177 76079175 -1
1172 | 7 76063177 76079175 6
1173 | 7 76077337 76169053 -1
1174 | 7 76077337 76169053 6
1175 | 7 76177837 76238154 6
1176 | 7 76182346 76231606 -1
1177 | 7 76182346 76231606 6
1178 | 7 76231629 76247794 -1
1179 | 7 76231629 76247794 6
1180 | 7 76253690 76279029 -1
1181 | 7 76253690 76279029 6
1182 | 7 76280702 76575579 -1
1183 | 7 76280702 76575579 6
1184 | 7 76300165 76441924 6
1185 | 7 76446701 76472995 6
1186 | 7 76575577 76659693 -1
1187 | 7 76575577 76659693 6
1188 | 7 100663363 100702140 -1
1189 | 7 100670334 100677966 6
1190 | 7 102815780 102929218 -1
1191 | 7 102815780 102929218 6
1192 | 7 141760294 141765572 6
1193 | 7 143218861 143284234 -1
1194 | 7 143218861 143284234 6
1195 | 7 143284230 143347897 -1
1196 | 7 143284230 143347897 6
1197 | 7 143439416 143504793 -1
1198 | 7 143439416 143504793 6
1199 | 7 143503934 143571789 -1
1200 | 7 143503934 143571789 6
1201 | 7 143884041 143951886 -1
1202 | 7 143884041 143951886 6
1203 | 7 143953514 143969444 -1
1204 | 7 143953514 143969444 6
1205 | 7 143969444 143993281 -1
1206 | 7 143969444 143993281 6
1207 | 7 143993276 144061197 -1
1208 | 7 143993276 144061197 6
1209 | 7 144050481 144074376 -1
1210 | 7 144050481 144074376 6
1211 | 7 149552485 149598503 6
1212 | 7 149586782 149627572 -1
1213 | 7 149586782 149627572 6
1214 | 7 149608696 149787344 6
1215 | 7 149627606 149734041 -1
1216 | 7 149627606 149734041 6
1217 | 7 149735855 149839906 -1
1218 | 7 149735855 149839906 6
1219 | 7 149783717 149820398 6
1220 | 7 149839657 149960880 -1
1221 | 7 149839657 149960880 6
1222 | 7 152459466 152500312 -1
1223 | 7 152459466 152500312 6
1224 | 7 152609200 152614406 6
1225 | 7 153402703 153521746 -1
1226 | 7 153402703 153521746 6
1227 | 7 153477488 153635634 6
1228 | 7 153654978 153760413 -1
1229 | 7 153654978 153760413 6
1230 | 7 153754384 153864361 -1
1231 | 7 153754384 153864361 6
1232 | 7 156309527 156313390 6
1233 | 7 156730387 156734644 6
1234 | 8 240490 280244 -1
1235 | 8 240490 280244 6
1236 | 8 558101 568306 6
1237 | 8 987610 993986 6
1238 | 8 2180177 2195485 -1
1239 | 8 2180177 2195485 6
1240 | 8 2252865 2263484 -1
1241 | 8 2252865 2263484 6
1242 | 8 2329211 2343982 -1
1243 | 8 2329211 2343982 6
1244 | 8 6839960 6859070 -1
1245 | 8 6839960 6859070 6
1246 | 8 6859071 6878169 -1
1247 | 8 6859071 6878169 6
1248 | 8 7005690 7135647 6
1249 | 8 7200000 7436083 -1
1250 | 8 7200000 7436083 6
1251 | 8 7600001 7825413 -1
1252 | 8 7600001 7825413 6
1253 | 8 7866865 7999583 6
1254 | 8 8073162 8086839 6
1255 | 8 11979239 12011083 6
1256 | 8 12237293 12250639 6
1257 | 8 23013612 23016983 6
1258 | 8 25574618 25589778 6
1259 | 8 47522935 47541773 6
1260 | 8 60133831 60138594 6
1261 | 8 69182808 69193253 6
1262 | 8 122319757 122340805 6
1263 | 8 122399921 122429907 6
1264 | 8 140603856 140607159 6
1265 | 8 146237806 146253528 6
1266 | 9 2369682 2375648 6
1267 | 9 5302551 5337309 6
1268 | 9 7020506 7025374 6
1269 | 9 35170600 35174167 6
1270 | 9 37502272 37514745 6
1271 | 9 38995384 39022948 6
1272 | 9 39101565 39135714 6
1273 | 9 39280786 39663686 6
1274 | 9 40475834 40660685 -1
1275 | 9 40475834 40660685 6
1276 | 9 40838031 40940341 6
1277 | 9 41426027 41808718 6
1278 | 9 41971691 41985730 6
1279 | 9 42161182 42340263 -1
1280 | 9 42161182 42340263 6
1281 | 9 43494883 43522513 6
1282 | 9 43642700 43675607 6
1283 | 9 43657265 43843957 -1
1284 | 9 43657265 43843957 6
1285 | 9 43996569 44057218 6
1286 | 9 44176332 44406880 -1
1287 | 9 44176332 44406880 6
1288 | 9 44779311 44790593 6
1289 | 9 46682999 46910407 -1
1290 | 9 46682999 46910407 6
1291 | 9 66090011 66192215 6
1292 | 9 66344016 66404656 6
1293 | 9 68664181 68838946 -1
1294 | 9 68664181 68838946 6
1295 | 9 69003155 69176842 -1
1296 | 9 69003155 69176842 6
1297 | 9 69088570 69278385 -1
1298 | 9 69088570 69278385 6
1299 | 9 69815486 69840962 -1
1300 | 9 69815486 69840962 6
1301 | 9 70556536 70735468 -1
1302 | 9 70556536 70735468 6
1303 | 9 70841062 71031684 -1
1304 | 9 70841062 71031684 6
1305 | 9 72034109 72056250 6
1306 | 9 73905786 73910286 6
1307 | 9 79770194 79777456 6
1308 | 9 84518482 84539176 -1
1309 | 9 84518482 84539176 6
1310 | 9 84536419 84551491 -1
1311 | 9 84536419 84551491 6
1312 | 9 84539177 84554248 -1
1313 | 9 84539177 84554248 6
1314 | 9 84551492 84566585 -1
1315 | 9 84551492 84566585 6
1316 | 9 86509133 86518206 6
1317 | 9 115867018 115873961 -1
1318 | 9 115867018 115873961 6
1319 | 9 115875172 115882109 -1
1320 | 9 115875172 115882109 6
1321 | 9 117083744 117091757 6
1322 | 9 118580063 118590982 6
1323 | 9 119513360 119516782 6
1324 | 9 134617677 134621880 6
1325 | 9 135941062 135956200 6
1326 | 9 137197163 137200878 6
1327 | 9 141071531 141092563 -1
1328 | 9 141071531 141092563 6
1329 | GL000192.1 48724 413559 -1
1330 | GL000192.1 48724 413559 6
1331 | GL000209.1 40020 87282 6
1332 | GL000215.1 1 172544 6
1333 | GL000236.1 1 41933 6
1334 |
--------------------------------------------------------------------------------
/data/example_qcs.Rdata:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rgcgithub/clamms/f486d6b2dcb54c2f579592f62c9ed6642434cf70/data/example_qcs.Rdata
--------------------------------------------------------------------------------
/hmm.c:
--------------------------------------------------------------------------------
1 | #include "math.h"
2 | #include "stdio.h"
3 | #include "stdlib.h"
4 | #include "string.h"
5 |
6 | #include "hmm.h"
7 | #include "utils.h"
8 | #include "ltqnorm.c"
9 |
10 | int get_next_window(int window, int n_windows,
11 | unsigned char *window_chr,
12 | char *max_cn) {
13 | int next = window + 1;
14 | while (max_cn[next] < 0 && next < n_windows-1)
15 | next++;
16 | if (max_cn[next] < 0 || window_chr[next] != window_chr[window])
17 | return -1;
18 | else
19 | return next;
20 | }
21 |
22 | int get_prev_window(int window, int n_windows,
23 | unsigned char *window_chr,
24 | char *max_cn) {
25 | int prev = window - 1;
26 | while (max_cn[prev] < 0 && prev > 0)
27 | prev--;
28 | if (max_cn[prev] < 0 || window_chr[prev] != window_chr[window])
29 | return -1;
30 | else
31 | return prev;
32 | }
33 |
34 | double transition_prob(int from, int to, double p, double f) {
35 | if (from == NORM) {
36 | if (to == NORM) return 1.0 - 2.0*p;
37 | else return p;
38 | } else if (from == DEL) {
39 | if (to == DEL) return f + (1.0 - f) * p;
40 | else if (to == NORM) return (1.0 - f) * (1.0 - 2.0*p);
41 | else return (1.0 - f) * p;
42 | } else if (from == DUP) {
43 | if (to == DUP) return f + (1.0 - f) * p;
44 | else if (to == NORM) return (1.0 - f) * (1.0 - 2.0*p);
45 | else return (1.0 - f) * p;
46 | } else {
47 | fprintf(stderr, "Invalid call to transition_prob: from %d to %d\n", from, to);
48 | exit(1);
49 | }
50 | }
51 |
52 | // cov should be already centered+scaled
53 | double homozygous_del_log_likelihood(double cov, int dist_is_point, double lambda) {
54 | if (dist_is_point) {
55 | if (cov < -0.98) return 3.912023; // log(50)
56 | else if (cov < 0.0) return -13.81551; // log(1e-6)
57 | else return -100.0;
58 | } else {
59 | double log_lambda = log(lambda);
60 | return log_lambda - lambda * (cov + 1.0);
61 | }
62 | }
63 |
64 | // cov should be already centered+scaled
65 | double gaussian_log_likelihood(double cov, int cn, double sigma_dip) {
66 | double var = sigma_dip * sigma_dip;
67 | double const_term = -0.5 * log(M_2_PI * var);
68 | double scale_term = 1.0 / (2.0 * var);
69 | double dev = cov + 1.0 - 0.5 * cn;
70 | return const_term - scale_term * dev * dev;
71 | }
72 |
73 | int expected_copy_number(char sex, unsigned char chr) {
74 | if (sex == 'F' && chr == CHR_Y)
75 | return NOT_PRESENT;
76 | else if ((sex == 'M' && (chr == CHR_X || chr == CHR_Y)) || chr == CHR_M)
77 | return HAPLOID;
78 | else
79 | return DIPLOID;
80 | }
81 |
82 | double gc_confidence_term(double gc, double gc_min, double gc_max) {
83 | if (gc < gc_min || gc > gc_max) return 0.0;
84 | double constant;
85 | if (gc <= 0.5) {
86 | constant = 1.0/( 0.5 - gc_min + GC_BUFFER );
87 | }
88 | else if (gc > 0.5) {
89 | constant = 1.0/( gc_max - 0.5 + GC_BUFFER );
90 | }
91 | double x = constant * fabs(gc - 0.5);
92 | double x_2 = x * x;
93 | double x_4 = x_2 * x_2;
94 | double x_8 = x_4 * x_4;
95 | double x_16 = x_8 * x_8;
96 | double x_18 = x_16 * x_2;
97 | double y = 1.0 - x_18;
98 | double y_2 = y * y;
99 | double y_4 = y_2 * y_2;
100 | double y_8 = y_4 * y_4;
101 | double y_16 = y_8 * y_8;
102 | double y_18 = y_16 * y_2;
103 | if (y_18 < 0.0) return 0.0;
104 | else return y_18;
105 | }
106 |
107 | double cov_confidence_term(int expected_cn,
108 | char max_cn,
109 | double coverage,
110 | unsigned char hom_del_flag,
111 | double lambda,
112 | double sigma_dip) {
113 | if (hom_del_flag && coverage < HOM_DEL_THRESHOLD)
114 | return 1.0;
115 |
116 | int i;
117 | int ml_nonzero_cn = 1;
118 | double ml_cov_sigma = fabs(coverage + 0.5) / (sigma_dip * SIGMA_RATIO_CN1);
119 | for (i = 2; i <= max_cn; i++) {
120 | double model_sigma = sigma_dip;
121 | if (i == 3) model_sigma *= SIGMA_RATIO_CN3;
122 | else if (i == 4) model_sigma *= SIGMA_RATIO_CN4;
123 | else if (i == 5) model_sigma *= SIGMA_RATIO_CN5;
124 | else if (i == 6) model_sigma *= SIGMA_RATIO_CN6;
125 | double cov_sigma = fabs(coverage - (-1.0 + 0.5*i)) / model_sigma;
126 | if (cov_sigma < ml_cov_sigma) {
127 | ml_cov_sigma = cov_sigma;
128 | ml_nonzero_cn = i;
129 | }
130 | }
131 |
132 | if (ml_nonzero_cn == 1 && !hom_del_flag) {
133 | double expo_tail_prob = exp(-lambda * (coverage + 1.0));
134 | double gaus_tail_prob = 0.5 * erfc(ml_cov_sigma / sqrt(2.0));
135 | if (expo_tail_prob > gaus_tail_prob) {
136 | if (expo_tail_prob > 0.5)
137 | return 1.0;
138 | else
139 | ml_cov_sigma = -ltqnorm(expo_tail_prob); // gaussian inverse cdf
140 | }
141 | }
142 |
143 | double max_sigma = 3.598858; // -> y_18 @ 3-sigma = 0.5
144 | if (ml_nonzero_cn > expected_cn)
145 | max_sigma = 2.9990483; // -> y_18 @ 2.5-sigma = 0.5
146 |
147 | if (ml_cov_sigma < 0.0)
148 | return 1.0; // shouldn't happen, but checking just in case there's floating point error or something
149 | else if (ml_cov_sigma > max_sigma)
150 | return 0.0;
151 |
152 | double x = ml_cov_sigma / max_sigma;
153 | double x_2 = x * x;
154 | double x_4 = x_2 * x_2;
155 | double x_8 = x_4 * x_4;
156 | double x_16 = x_8 * x_8;
157 | double x_18 = x_16 * x_2;
158 | double y = 1.0 - x_18;
159 | double y_2 = y * y;
160 | double y_4 = y_2 * y_2;
161 | double y_8 = y_4 * y_4;
162 | double y_16 = y_8 * y_8;
163 | double y_18 = y_16 * y_2;
164 | if (y_18 < 0.0) return 0.0;
165 | else return y_18;
166 | }
167 |
168 | void read_model_data(FILE *input,
169 | int n_windows,
170 | unsigned char *window_chr,
171 | int *window_start,
172 | int *window_end,
173 | char *max_cn,
174 | unsigned char *hom_del_flag,
175 | double *window_gc,
176 | double *lambda,
177 | double *mu_dip,
178 | double *sigma_dip,
179 | double *model_conf) {
180 | int i = 0;
181 | char *line = NULL;
182 | char *pos;
183 | size_t line_len;
184 | ssize_t bytes_read;
185 |
186 | while ((bytes_read = getline(&line, &line_len, input)) != -1) {
187 | if (*line == 'X') window_chr[i] = CHR_X;
188 | else if (*line == 'Y') window_chr[i] = CHR_Y;
189 | else if (*line == 'M') window_chr[i] = CHR_M;
190 | else sscanf(line, "%hhu", window_chr + i);
191 |
192 | pos = line;
193 | pos = strchr(pos+1, '\t');
194 | sscanf(pos+1, "%u", window_start + i);
195 | pos = strchr(pos+1, '\t');
196 | sscanf(pos+1, "%u", window_end + i);
197 | pos = strchr(pos+1, '\t');
198 | pos++;
199 | sscanf(pos, "%hhd", max_cn + i);
200 | //if (max_cn[i] == 2) max_cn[i] == 3; // we limit common del regions to CN 2
201 | // for purposes of fitting model parameters
202 | // but we allow the possibility of a rare CN 3
203 | // when making the final CNV calls
204 | window_gc[i] = strtod(pos+2, &pos);
205 | pos = strchr(pos+1, '\t');
206 | pos++;
207 | sscanf(pos, "%hhu", hom_del_flag + i);
208 | lambda[i] = strtod(pos+1, &pos);
209 | mu_dip[i] = strtod(pos+1, &pos);
210 | sigma_dip[i] = strtod(pos+1, &pos);
211 |
212 | i++;
213 | }
214 | }
215 |
216 | void read_coverage_data(FILE *input,
217 | int n_windows,
218 | unsigned char *window_chr,
219 | int *window_start,
220 | int *window_end,
221 | double *cov,
222 | double *mu_dip) {
223 | int i;
224 | char *line = NULL;
225 | char *pos;
226 | size_t line_len;
227 | ssize_t bytes_read;
228 |
229 | for (i = 0; i < n_windows; i++) {
230 | bytes_read = getline(&line, &line_len, input);
231 | if (bytes_read == -1) {
232 | fputs("ERROR: coverage file and models file must be for exactly the same set of genomic windows\n.", stderr);
233 | exit(1);
234 | }
235 |
236 | unsigned char chr;
237 | int start, end;
238 |
239 | if (*line == 'X') chr = CHR_X;
240 | else if (*line == 'Y') chr = CHR_Y;
241 | else if (*line == 'M') chr = CHR_M;
242 | else sscanf(line, "%hhu", &chr);
243 |
244 | pos = line;
245 | pos = strchr(pos+1, '\t');
246 | sscanf(pos+1, "%u", &start);
247 | pos = strchr(pos+1, '\t');
248 | sscanf(pos+1, "%u", &end);
249 |
250 | if (chr != window_chr[i] || start != window_start[i] || end != window_end[i]) {
251 | fputs("ERROR: coverage file and models file must be for exactly the same set of genomic windows\n.", stderr);
252 | exit(1);
253 | }
254 |
255 | pos = strchr(pos+1, '\t');
256 | cov[i] = (strtod(pos+1, NULL) - mu_dip[i]) / mu_dip[i];
257 | }
258 | }
259 |
260 | void calc_base_model_conf(int n_windows,
261 | double gc_min,
262 | double gc_max,
263 | unsigned char *window_chr,
264 | int *window_start,
265 | int *window_end,
266 | char *max_cn,
267 | double *window_gc,
268 | double *model_conf) {
269 | int i;
270 | for (i = 0; i < n_windows; i++) {
271 | if (max_cn[i] < 0) continue;
272 | int prev_window = get_prev_window(i, n_windows, window_chr, max_cn);
273 | int next_window = get_next_window(i, n_windows, window_chr, max_cn);
274 | int prev_window_end = -1;
275 | int next_window_start = -1;
276 | if (prev_window != -1) prev_window_end = window_end[prev_window];
277 | if (next_window != -1) next_window_start = window_start[next_window];
278 |
279 | model_conf[i] = gc_confidence_term(window_gc[i], gc_min, gc_max);
280 | if (window_start[i] == prev_window_end || window_end[i] == next_window_start)
281 | model_conf[i] = fmin(model_conf[i], 2.0/3.0);
282 | }
283 | }
284 |
285 | // coverage should already be centered+scaled around mu_dip
286 | void calc_sample_specific_model_conf(int n_windows,
287 | char sex,
288 | unsigned char *window_chr,
289 | char *max_cn,
290 | double *cov,
291 | unsigned char *hom_del_flag,
292 | double *lambda,
293 | double *sigma_dip,
294 | double *model_conf) {
295 | int i;
296 | for (i = 0; i < n_windows; i++) {
297 | if (max_cn[i] < 0) continue;
298 | double cov_conf = cov_confidence_term(
299 | expected_copy_number(sex, window_chr[i]),
300 | max_cn[i], cov[i], hom_del_flag[i], lambda[i], sigma_dip[i]);
301 | model_conf[i] = fmin(model_conf[i], cov_conf);
302 | }
303 | }
304 |
305 | void calc_cn_emission_logp(int n_windows,
306 | char sex,
307 | unsigned char *window_chr,
308 | char *max_cn,
309 | double *cov,
310 | unsigned char *hom_del_flag,
311 | double *lambda,
312 | double *sigma_dip,
313 | double **cn_emission_logp) {
314 | int i, j;
315 | for (i = 0; i < n_windows; i++) {
316 | if (max_cn[i] < 0) continue;
317 | int norm_cn = expected_copy_number(sex, window_chr[i]);
318 | if (norm_cn == HAPLOID) max_cn[i] = 2;
319 |
320 | // first just compute the likelihoods
321 |
322 | cn_emission_logp[i][0] = homozygous_del_log_likelihood(cov[i], hom_del_flag[i], lambda[i]);
323 | for (j = 1; j <= MAX_CN; j++)
324 | cn_emission_logp[i][j] = gaussian_log_likelihood(cov[i], j, sigma_dip[i]);
325 |
326 | // now use bayes theorem to go from likelihoods to actual probabilities
327 | // we use a uniform prior, since our prior beliefs about CNV probabilities
328 | // are already encoded in the transition matrix of the HMM
329 | double evidence = 0.0;
330 | for (j = 0; j <= max_cn[i]; j++)
331 | evidence += exp(cn_emission_logp[i][j]);
332 | evidence = log(evidence);
333 | for (j = 0; j <= max_cn[i]; j++)
334 | cn_emission_logp[i][j] -= evidence;
335 | }
336 | }
337 |
338 | void calc_hmm_state_emission_logp(int n_windows,
339 | char sex,
340 | unsigned char *window_chr,
341 | char *max_cn,
342 | double **cn_emission_logp,
343 | double **hmm_state_emission_logp) {
344 | int i, j;
345 | for (i = 0; i < n_windows; i++) {
346 | int norm_cn = expected_copy_number(sex, window_chr[i]);
347 | if (norm_cn == NOT_PRESENT) {
348 | hmm_state_emission_logp[i][DEL] = -100.0;
349 | hmm_state_emission_logp[i][NORM] = 0.0;
350 | hmm_state_emission_logp[i][DUP] = -100.0;
351 | } else if (norm_cn == HAPLOID) {
352 | hmm_state_emission_logp[i][DEL] = cn_emission_logp[i][0];
353 | hmm_state_emission_logp[i][NORM] = cn_emission_logp[i][1];
354 | hmm_state_emission_logp[i][DUP] = cn_emission_logp[i][2];
355 | } else {
356 | hmm_state_emission_logp[i][DEL] = log(exp(cn_emission_logp[i][0]) +
357 | exp(cn_emission_logp[i][1]));
358 | hmm_state_emission_logp[i][NORM] = cn_emission_logp[i][2];
359 | double dup_tmp = 0.0;
360 | for (j = 3; j <= max_cn[i]; j++)
361 | dup_tmp += exp(cn_emission_logp[i][j]);
362 | hmm_state_emission_logp[i][DUP] = log(dup_tmp);
363 | }
364 | }
365 | }
366 |
367 | unsigned char *viterbi(int n_windows,
368 | int direction,
369 | unsigned char *window_chr,
370 | int *window_start,
371 | int *window_end,
372 | char *max_cn,
373 | double *model_conf,
374 | double **hmm_state_emission_logp,
375 | double cnv_rate,
376 | double mean_cnv_length) {
377 | int i, j;
378 | unsigned char **ml_prev_state = (unsigned char **) malloc(n_windows * sizeof(unsigned char *));
379 | unsigned char *ml_final_state = (unsigned char *) malloc(N_CHROM * sizeof(unsigned char));
380 | for (i = 0; i < n_windows; i++)
381 | ml_prev_state[i] = (unsigned char *) malloc(N_STATES * sizeof(unsigned char));
382 | unsigned char *ml_state_seq = (unsigned char *) malloc(n_windows * sizeof(unsigned char *));
383 |
384 | int start_window, end_window, delta;
385 | if (direction == FORWARD) {
386 | start_window = 0;
387 | end_window = n_windows;
388 | delta = 1;
389 | } else {
390 | start_window = n_windows-1;
391 | end_window = -1;
392 | delta = -1;
393 | }
394 |
395 | unsigned char last_chr = 0;
396 | int last_coord;
397 |
398 | double v_prev[N_STATES]; // viterbi state scores for previous window
399 | double v_cur[N_STATES]; // viterbi state scores for current window
400 |
401 | unsigned char tmp_state;
402 | double tmp_logp;
403 | for (i = start_window; i != end_window; i += delta) {
404 | if (max_cn[i] < 0) continue;
405 |
406 | // if at a new chromosome, restart the algorithm
407 | if (window_chr[i] != last_chr) {
408 | if (last_chr != 0) {
409 | tmp_state = DEL; tmp_logp = v_prev[DEL];
410 | if (v_prev[NORM] > tmp_logp) { tmp_state = NORM; tmp_logp = v_prev[NORM]; }
411 | if (v_prev[DUP] > tmp_logp) { tmp_state = DUP; tmp_logp = v_prev[DUP]; }
412 | ml_final_state[last_chr-1] = tmp_state;
413 | }
414 | last_chr = window_chr[i];
415 | if (direction == FORWARD)
416 | last_coord = window_start[i];
417 | else
418 | last_coord = window_end[i];
419 | v_prev[DEL] = log(cnv_rate);
420 | v_prev[NORM] = log(1.0 - 2.0*cnv_rate);
421 | v_prev[DUP] = log(cnv_rate);
422 | }
423 |
424 | v_cur[DEL] = hmm_state_emission_logp[i][DEL];
425 | v_cur[NORM] = hmm_state_emission_logp[i][NORM];
426 | v_cur[DUP] = hmm_state_emission_logp[i][DUP];
427 |
428 | // if we're not confident in the validity of the model at this window
429 | // don't give as much weight to its emission likelihoods
430 | // note that this doesn't bias in favor of the DIP state:
431 | // the effect of the window's predictions on all states is discounted
432 | v_cur[DEL] *= model_conf[i];
433 | v_cur[NORM] *= model_conf[i];
434 | v_cur[DUP] *= model_conf[i];
435 |
436 | // attenuation factor for state transitions
437 | // the probability of a CNV in the last window being extended to this one
438 | // is proportional to this factor
439 | double attenuation;
440 | if (direction == FORWARD)
441 | attenuation = exp(-((double)(window_start[i]-last_coord)) / mean_cnv_length);
442 | else
443 | attenuation = exp(-((double)(last_coord-window_end[i])) / mean_cnv_length);
444 |
445 | // find most likely previous state for DEL
446 | double del_del = v_prev[DEL] + log(transition_prob(DEL, DEL, cnv_rate, attenuation));
447 | double norm_del = v_prev[NORM] + log(transition_prob(NORM, DEL, cnv_rate, attenuation));
448 | double dup_del = v_prev[DUP] + log(transition_prob(DUP, DEL, cnv_rate, attenuation));
449 | tmp_state = DEL; tmp_logp = del_del;
450 | if (norm_del > tmp_logp) { tmp_state = NORM; tmp_logp = norm_del; }
451 | if (dup_del > tmp_logp) { tmp_state = DUP; tmp_logp = dup_del; }
452 | ml_prev_state[i][DEL] = tmp_state;
453 | v_cur[DEL] += tmp_logp;
454 |
455 | // find most likely previous state for NORM
456 | double del_norm = v_prev[DEL] + log(transition_prob(DEL, NORM, cnv_rate, attenuation));
457 | double norm_norm = v_prev[NORM] + log(transition_prob(NORM, NORM, cnv_rate, attenuation));
458 | double dup_norm = v_prev[DUP] + log(transition_prob(DUP, NORM, cnv_rate, attenuation));
459 | tmp_state = DEL; tmp_logp = del_norm;
460 | if (norm_norm > tmp_logp) { tmp_state = NORM; tmp_logp = norm_norm; }
461 | if (dup_norm > tmp_logp) { tmp_state = DUP; tmp_logp = dup_norm; }
462 | ml_prev_state[i][NORM] = tmp_state;
463 | v_cur[NORM] += tmp_logp;
464 |
465 | // find most likely previous state for DUP
466 | double del_dup = v_prev[DEL] + log(transition_prob(DEL, DUP, cnv_rate, attenuation));
467 | double norm_dup = v_prev[NORM] + log(transition_prob(NORM, DUP, cnv_rate, attenuation));
468 | double dup_dup = v_prev[DUP] + log(transition_prob(DUP, DUP, cnv_rate, attenuation));
469 | tmp_state = DEL; tmp_logp = del_dup;
470 | if (norm_dup > tmp_logp) { tmp_state = NORM; tmp_logp = norm_dup; }
471 | if (dup_dup > tmp_logp) { tmp_state = DUP; tmp_logp = dup_dup; }
472 | ml_prev_state[i][DUP] = tmp_state;
473 | v_cur[DUP] += tmp_logp;
474 |
475 | // update lagging statistics
476 | if (direction == FORWARD)
477 | last_coord = window_start[i];
478 | else
479 | last_coord = window_end[i];
480 | v_prev[DEL] = v_cur[DEL];
481 | v_prev[NORM] = v_cur[NORM];
482 | v_prev[DUP] = v_cur[DUP];
483 | }
484 |
485 | tmp_state = DEL; tmp_logp = v_prev[DEL];
486 | if (v_prev[NORM] > tmp_logp) { tmp_state = NORM; tmp_logp = v_prev[NORM]; }
487 | if (v_prev[DUP] > tmp_logp) { tmp_state = DUP; tmp_logp = v_prev[DUP]; }
488 | ml_final_state[last_chr-1] = tmp_state;
489 |
490 | // backtrack through the DAG to find the maximum likelihood state sequence
491 | if (direction == FORWARD) {
492 | start_window = n_windows-1;
493 | end_window = -1;
494 | delta = -1;
495 | } else {
496 | start_window = 0;
497 | end_window = n_windows;
498 | delta = 1;
499 | }
500 |
501 | last_chr = 0;
502 | unsigned char last_state;
503 | int lookbehind = -delta;
504 | for (i = start_window; i != end_window; i += delta) {
505 | if (max_cn[i] < 0) { lookbehind -= delta; continue; };
506 | if (window_chr[i] != last_chr) {
507 | last_chr = window_chr[i];
508 | ml_state_seq[i] = ml_final_state[window_chr[i]-1];
509 | } else {
510 | ml_state_seq[i] = ml_prev_state[i+lookbehind][last_state];
511 | }
512 |
513 | last_state = ml_state_seq[i];
514 | lookbehind = -delta;
515 | }
516 |
517 | for (i = 0; i < n_windows; i++)
518 | free(ml_prev_state[i]);
519 | free(ml_prev_state);
520 | free(ml_final_state);
521 | return ml_state_seq;
522 | }
523 |
524 | void mask_sequence(int n_windows, char *max_cn,
525 | unsigned char *seq1, unsigned char *seq2) {
526 | int i;
527 | for (i = 0; i < n_windows; i++) {
528 | if (max_cn[i] < 0) continue;
529 | if (seq1[i] != seq2[i]) seq1[i] = NORM;
530 | }
531 | }
532 |
533 | void forward_backward(int n_windows,
534 | unsigned char *window_chr,
535 | int *window_start,
536 | int *window_end,
537 | char *max_cn,
538 | double *model_conf,
539 | double **hmm_state_emission_logp,
540 | double cnv_rate,
541 | double mean_cnv_length,
542 | double **forward_scaled_prob,
543 | double **backward_scaled_prob) {
544 | int i, j;
545 | double attenuation;
546 | // emission likelihoods for the window (raw, not log)
547 | double E_del, E_norm, E_dup;
548 | // the forward/backward scaled prob for the last non-filtered window
549 | double prev_del, prev_norm, prev_dup;
550 | double scale_factor;
551 |
552 | unsigned char last_chr;
553 | int last_window;
554 | int last_coord;
555 |
556 | // compute forward posteriors
557 | last_chr = 0;
558 | for (i = 0; i < n_windows; i++) {
559 | if (max_cn[i] < 0) continue;
560 |
561 | E_del = exp(hmm_state_emission_logp[i][DEL] * model_conf[i]);
562 | E_norm = exp(hmm_state_emission_logp[i][NORM] * model_conf[i]);
563 | E_dup = exp(hmm_state_emission_logp[i][DUP] * model_conf[i]);
564 |
565 | if (window_chr[i] != last_chr) {
566 | last_chr = window_chr[i];
567 | forward_scaled_prob[i][DEL] = E_del * cnv_rate;
568 | forward_scaled_prob[i][NORM] = E_norm * (1.0 - 2.0*cnv_rate);
569 | forward_scaled_prob[i][DUP] = E_dup * cnv_rate;
570 | } else {
571 | attenuation = exp(-((double)(window_start[i]-last_coord)) / mean_cnv_length);
572 |
573 | forward_scaled_prob[i][DEL] =
574 | transition_prob(DEL, DEL, cnv_rate, attenuation) * prev_del;
575 | forward_scaled_prob[i][DEL] +=
576 | transition_prob(NORM, DEL, cnv_rate, attenuation) * prev_norm;
577 | forward_scaled_prob[i][DEL] +=
578 | transition_prob(DUP, DEL, cnv_rate, attenuation) * prev_dup;
579 | forward_scaled_prob[i][DEL] *= E_del;
580 |
581 | forward_scaled_prob[i][NORM] =
582 | transition_prob(DEL, NORM, cnv_rate, attenuation) * prev_del;
583 | forward_scaled_prob[i][NORM] +=
584 | transition_prob(NORM, NORM, cnv_rate, attenuation) * prev_norm;
585 | forward_scaled_prob[i][NORM] +=
586 | transition_prob(DUP, NORM, cnv_rate, attenuation) * prev_dup;
587 | forward_scaled_prob[i][NORM] *= E_norm;
588 |
589 | forward_scaled_prob[i][DUP] =
590 | transition_prob(DEL, DUP, cnv_rate, attenuation) * prev_del;
591 | forward_scaled_prob[i][DUP] +=
592 | transition_prob(NORM, DUP, cnv_rate, attenuation) * prev_norm;
593 | forward_scaled_prob[i][DUP] +=
594 | transition_prob(DUP, DUP, cnv_rate, attenuation) * prev_dup;
595 | forward_scaled_prob[i][DUP] *= E_dup;
596 |
597 | }
598 |
599 | scale_factor = forward_scaled_prob[i][DEL] +
600 | forward_scaled_prob[i][NORM] +
601 | forward_scaled_prob[i][DUP];
602 | forward_scaled_prob[i][DEL] /= scale_factor;
603 | forward_scaled_prob[i][NORM] /= scale_factor;
604 | forward_scaled_prob[i][DUP] /= scale_factor;
605 |
606 | last_coord = window_start[i];
607 | prev_del = forward_scaled_prob[i][DEL];
608 | prev_norm = forward_scaled_prob[i][NORM];
609 | prev_dup = forward_scaled_prob[i][DUP];
610 | }
611 |
612 | // compute backward posteriors
613 | last_chr = 0;
614 | last_window = n_windows;
615 | for (i = n_windows-1; i >= 0; i--) {
616 | if (max_cn[i] < 0) continue;
617 |
618 | if (window_chr[i] != last_chr) {
619 | last_chr = window_chr[i];
620 | backward_scaled_prob[i][DEL] = 1.0;
621 | backward_scaled_prob[i][NORM] = 1.0;
622 | backward_scaled_prob[i][DUP] = 1.0;
623 | } else {
624 | E_del = exp(hmm_state_emission_logp[last_window][DEL] * model_conf[last_window]);
625 | E_norm = exp(hmm_state_emission_logp[last_window][NORM] * model_conf[last_window]);
626 | E_dup = exp(hmm_state_emission_logp[last_window][DUP] * model_conf[last_window]);
627 | attenuation = exp(-((double)(last_coord-window_end[i])) / mean_cnv_length);
628 |
629 | backward_scaled_prob[i][DEL] =
630 | transition_prob(DEL, DEL, cnv_rate, attenuation) * E_del * prev_del;
631 | backward_scaled_prob[i][DEL] +=
632 | transition_prob(DEL, NORM, cnv_rate, attenuation) * E_norm * prev_norm;
633 | backward_scaled_prob[i][DEL] +=
634 | transition_prob(DEL, DUP, cnv_rate, attenuation) * E_dup * prev_dup;
635 |
636 | backward_scaled_prob[i][NORM] =
637 | transition_prob(NORM, DEL, cnv_rate, attenuation) * E_del * prev_del;
638 | backward_scaled_prob[i][NORM] +=
639 | transition_prob(NORM, NORM, cnv_rate, attenuation) * E_norm * prev_norm;
640 | backward_scaled_prob[i][NORM] +=
641 | transition_prob(NORM, DUP, cnv_rate, attenuation) * E_dup * prev_dup;
642 |
643 | backward_scaled_prob[i][DUP] =
644 | transition_prob(DUP, DEL, cnv_rate, attenuation) * E_del * prev_del;
645 | backward_scaled_prob[i][DUP] +=
646 | transition_prob(DUP, NORM, cnv_rate, attenuation) * E_norm * prev_norm;
647 | backward_scaled_prob[i][DUP] +=
648 | transition_prob(DUP, DUP, cnv_rate, attenuation) * E_dup * prev_dup;
649 | }
650 |
651 | scale_factor = backward_scaled_prob[i][DEL] +
652 | backward_scaled_prob[i][NORM] +
653 | backward_scaled_prob[i][DUP];
654 | backward_scaled_prob[i][DEL] /= scale_factor;
655 | backward_scaled_prob[i][NORM] /= scale_factor;
656 | backward_scaled_prob[i][DUP] /= scale_factor;
657 |
658 | last_coord = window_end[i];
659 | last_window = i;
660 | prev_del = backward_scaled_prob[i][DEL];
661 | prev_norm = backward_scaled_prob[i][NORM];
662 | prev_dup = backward_scaled_prob[i][DUP];
663 | }
664 | }
665 |
--------------------------------------------------------------------------------
/hmm.h:
--------------------------------------------------------------------------------
1 | #ifndef HMM_H
2 | #define HMM_H
3 |
4 | #define N_STATES 3 // DEL, NORM, DUP
5 | // hom/het distinctions for DEL and DUP are not part of the HMM
6 | // but are rather estimated seperately for the CNV calls that the HMM makes
7 | #define N_CHROM 25 // 1-22, X, Y, M
8 | #define CHR_X 23
9 | #define CHR_Y 24
10 | #define CHR_M 25
11 |
12 | // array indexes for HMM states
13 | #define DEL 0
14 | #define NORM 1
15 | #define DUP 2
16 |
17 | // possible values for NORM state
18 | // NORM = diploid for autosome and female chrX
19 | // NORM = haploid for male chrX/Y and both sexes chrM
20 | // NORM = not present for female chrY
21 | #define NOT_PRESENT 0
22 | #define HAPLOID 1
23 | #define DIPLOID 2
24 |
25 | // directions to run Viterbi algorithm
26 | #define FORWARD 1
27 | #define BACKWARD -1
28 |
29 | // variables for defining GC-content weights
30 | #define GC_BUFFER 0.01
31 |
32 | int get_next_window(int window, int n_windows,
33 | unsigned char *window_chr, char *max_cn);
34 | int get_prev_window(int window, int n_windows,
35 | unsigned char *window_chr, char *max_cn);
36 | double transition_prob(int from, int to, double p, double f);
37 | double homozygous_del_log_likelihood(double cov, int dist_is_point, double lambda);
38 | double gaussian_log_likelihood(double cov, int cn, double sigma_dip);
39 | int expected_copy_number(char sex, unsigned char chr);
40 | void read_model_data(FILE *input,
41 | int n_windows,
42 | unsigned char *window_chr,
43 | int *window_start,
44 | int *window_end,
45 | char *max_cn,
46 | unsigned char *hom_del_flag,
47 | double *window_gc,
48 | double *lambda,
49 | double *mu_dip,
50 | double *sigma_dip,
51 | double *model_conf);
52 | void read_coverage_data(FILE *input,
53 | int n_windows,
54 | unsigned char *window_chr,
55 | int *window_start,
56 | int *window_end,
57 | double *cov,
58 | double *mu_dip);
59 | void calc_base_model_conf(int n_windows,
60 | double gc_min,
61 | double gc_max,
62 | unsigned char *window_chr,
63 | int *window_start,
64 | int *window_end,
65 | char *max_cn,
66 | double *window_gc,
67 | double *model_conf);
68 | void calc_sample_specific_model_conf(int n_windows,
69 | char sex,
70 | unsigned char *window_chr,
71 | char *max_cn,
72 | double *cov,
73 | unsigned char *hom_del_flag,
74 | double *lambda,
75 | double *sigma_dip,
76 | double *model_conf);
77 | void calc_cn_emission_logp(int n_windows,
78 | char sex,
79 | unsigned char *window_chr,
80 | char *max_cn,
81 | double *cov,
82 | unsigned char *hom_del_flag,
83 | double *lambda,
84 | double *sigma_dip,
85 | double **cn_emission_logp);
86 | void calc_hmm_state_emission_logp(int n_windows,
87 | char sex,
88 | unsigned char *window_chr,
89 | char *max_cn,
90 | double **cn_emission_logp,
91 | double **hmm_state_emission_logp);
92 | unsigned char *viterbi(int n_windows,
93 | int direction,
94 | unsigned char *window_chr,
95 | int *window_start,
96 | int *window_end,
97 | char *max_cn,
98 | double *model_conf,
99 | double **hmm_state_emission_logp,
100 | double cnv_rate,
101 | double mean_cnv_length);
102 | void mask_sequence(int n_windows, char *max_cn,
103 | unsigned char *seq1, unsigned char *seq2);
104 | void forward_backward(int n_windows,
105 | unsigned char *window_chr,
106 | int *window_start,
107 | int *window_end,
108 | char *max_cn,
109 | double *model_conf,
110 | double **hmm_state_emission_logp,
111 | double cnv_rate,
112 | double mean_cnv_length,
113 | double **forward_scaled_prob,
114 | double **backward_scaled_prob);
115 |
116 | #endif
117 |
--------------------------------------------------------------------------------
/ltqnorm.c:
--------------------------------------------------------------------------------
1 | /*
2 | * http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c
3 | * Implementation by Chad Sprouse
4 | * Original comments below
5 | *
6 | *-------------------------------------------------------------------
7 | *
8 | * Lower tail quantile for standard normal distribution function.
9 | *
10 | * This function returns an approximation of the inverse cumulative
11 | * standard normal distribution function. I.e., given P, it returns
12 | * an approximation to the X satisfying P = Pr{Z <= X} where Z is a
13 | * random variable from the standard normal distribution.
14 | *
15 | * The algorithm uses a minimax approximation by rational functions
16 | * and the result has a relative error whose absolute value is less
17 | * than 1.15e-9.
18 | *
19 | * Author: Peter John Acklam
20 | * Time-stamp: 2002-06-09 18:45:44 +0200
21 | * E-mail: jacklam@math.uio.no
22 | * WWW URL: http://www.math.uio.no/~jacklam
23 | *
24 | * C implementation adapted from Peter's Perl version
25 | */
26 |
27 | #include
28 | #include
29 |
30 | /* Coefficients in rational approximations. */
31 | static const double a[] =
32 | {
33 | -3.969683028665376e+01,
34 | 2.209460984245205e+02,
35 | -2.759285104469687e+02,
36 | 1.383577518672690e+02,
37 | -3.066479806614716e+01,
38 | 2.506628277459239e+00
39 | };
40 |
41 | static const double b[] =
42 | {
43 | -5.447609879822406e+01,
44 | 1.615858368580409e+02,
45 | -1.556989798598866e+02,
46 | 6.680131188771972e+01,
47 | -1.328068155288572e+01
48 | };
49 |
50 | static const double c[] =
51 | {
52 | -7.784894002430293e-03,
53 | -3.223964580411365e-01,
54 | -2.400758277161838e+00,
55 | -2.549732539343734e+00,
56 | 4.374664141464968e+00,
57 | 2.938163982698783e+00
58 | };
59 |
60 | static const double d[] =
61 | {
62 | 7.784695709041462e-03,
63 | 3.224671290700398e-01,
64 | 2.445134137142996e+00,
65 | 3.754408661907416e+00
66 | };
67 |
68 | #define LOW 0.02425
69 | #define HIGH 0.97575
70 |
71 | double
72 | ltqnorm(double p)
73 | {
74 | double q, r;
75 |
76 | errno = 0;
77 |
78 | if (p < 0 || p > 1)
79 | {
80 | errno = EDOM;
81 | return 0.0;
82 | }
83 | else if (p == 0)
84 | {
85 | errno = ERANGE;
86 | return -HUGE_VAL /* minus "infinity" */;
87 | }
88 | else if (p == 1)
89 | {
90 | errno = ERANGE;
91 | return HUGE_VAL /* "infinity" */;
92 | }
93 | else if (p < LOW)
94 | {
95 | /* Rational approximation for lower region */
96 | q = sqrt(-2*log(p));
97 | return (((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) /
98 | ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1);
99 | }
100 | else if (p > HIGH)
101 | {
102 | /* Rational approximation for upper region */
103 | q = sqrt(-2*log(1-p));
104 | return -(((((c[0]*q+c[1])*q+c[2])*q+c[3])*q+c[4])*q+c[5]) /
105 | ((((d[0]*q+d[1])*q+d[2])*q+d[3])*q+1);
106 | }
107 | else
108 | {
109 | /* Rational approximation for central region */
110 | q = p - 0.5;
111 | r = q*q;
112 | return (((((a[0]*r+a[1])*r+a[2])*r+a[3])*r+a[4])*r+a[5])*q /
113 | (((((b[0]*r+b[1])*r+b[2])*r+b[3])*r+b[4])*r+1);
114 | }
115 | }
116 |
--------------------------------------------------------------------------------
/normalize_coverage.c:
--------------------------------------------------------------------------------
1 | #include "math.h"
2 | #include "stdio.h"
3 | #include "stdlib.h"
4 | #include "string.h"
5 | #include "utils.h"
6 |
7 | typedef struct {
8 | double min_gc; // %
9 | double max_gc; // %
10 | int gc_window_size; // # bases in the window used to calculate GC %
11 | int n_gc_bins;
12 | double min_mappability;
13 | } Options;
14 |
15 | typedef struct {
16 | int n_bins;
17 | int min_gc; // # bases
18 | int max_gc; // # bases
19 | int gc_range; // max_gc - min_gc
20 | int bin_size; // # distinct values per bin
21 | int bin_size_odd; // boolean
22 | int left_edge_bin_size; // bins at the left and right edges
23 | int right_edge_bin_size; // may be larger than other bins
24 | // if (gc_range+1) % n_bins != 0
25 | double left_edge_center;
26 | double right_edge_center;
27 | } GCBinParams;
28 |
29 | Options parse_args(int argc, char *argv[], int arg_start) {
30 | Options options;
31 | options.min_gc = 0.3;
32 | options.max_gc = 0.7;
33 | options.gc_window_size = 200;
34 | options.min_mappability = 0.75;
35 |
36 | int i;
37 | int n_gc_bins_set = 0;
38 | for (i = arg_start; i < argc; i += 2) {
39 | if (strcmp(argv[i], "--min_gc") == 0) {
40 | if (i+1 >= argc) missing_value_error(argv[i]);
41 | options.min_gc = strtod(argv[i+1], NULL);
42 | if (options.min_gc <= 0.0 || options.min_gc > 1.0)
43 | invalid_value_error(argv[i]);
44 | } else if (strcmp(argv[i], "--max_gc") == 0) {
45 | if (i+1 >= argc) missing_value_error(argv[i]);
46 | options.max_gc = strtod(argv[i+1], NULL);
47 | if (options.max_gc <= 0.0 || options.max_gc > 1.0)
48 | invalid_value_error(argv[i]);
49 | } else if (strcmp(argv[i], "--n_gc_bins") == 0) {
50 | if (i+1 >= argc) missing_value_error(argv[i]);
51 | options.n_gc_bins = (int) strtol(argv[i+1], NULL, 10);
52 | if (options.n_gc_bins < 10) invalid_value_error(argv[i]);
53 | n_gc_bins_set = 1;
54 | } else if (strcmp(argv[i], "--min_mappability") == 0) {
55 | if (i+1 >= argc) missing_value_error(argv[i]);
56 | options.min_mappability = strtod(argv[i+1], NULL);
57 | if (options.min_mappability <= 0.0 || options.min_mappability > 1.0)
58 | invalid_value_error(argv[i]);
59 | } else {
60 | fprintf(stderr, "Unrecognized argument: %s\n", argv[i]);
61 | fprintf(stderr, "Try '%s --help' for more information.\n", argv[0]);
62 | exit(1);
63 | }
64 | }
65 |
66 | if (!n_gc_bins_set)
67 | options.n_gc_bins = (int) floor((1.0 + options.gc_window_size *
68 | (options.max_gc - options.min_gc)) / 3.0);
69 |
70 | return options;
71 | }
72 |
73 | GCBinParams calc_gc_bin_params(Options *options) {
74 | GCBinParams gc_bin_params;
75 | gc_bin_params.n_bins = options->n_gc_bins;
76 | gc_bin_params.min_gc = (int) ceil(options->min_gc * options->gc_window_size);
77 | gc_bin_params.max_gc = (int) floor(options->max_gc * options->gc_window_size);
78 | gc_bin_params.gc_range = gc_bin_params.max_gc - gc_bin_params.min_gc;
79 | gc_bin_params.bin_size = (int) floor((gc_bin_params.gc_range + 1) / gc_bin_params.n_bins);
80 | gc_bin_params.bin_size_odd = (gc_bin_params.bin_size % 2 == 1 ? 1 : 0);
81 | gc_bin_params.left_edge_bin_size = gc_bin_params.bin_size;
82 | gc_bin_params.right_edge_bin_size = gc_bin_params.bin_size;
83 | int remainder = (gc_bin_params.gc_range + 1) % options->n_gc_bins;
84 | if (remainder % 2 == 0) {
85 | gc_bin_params.left_edge_bin_size += remainder / 2;
86 | gc_bin_params.right_edge_bin_size += remainder / 2;
87 | } else {
88 | gc_bin_params.left_edge_bin_size += (remainder + 1) / 2;
89 | gc_bin_params.right_edge_bin_size += (remainder - 1) / 2;
90 | }
91 |
92 | gc_bin_params.left_edge_center = (double) (gc_bin_params.left_edge_bin_size - 1) / 2.0;
93 | gc_bin_params.right_edge_center = (double) gc_bin_params.gc_range
94 | - ((double) gc_bin_params.right_edge_bin_size - 1) / 2.0;
95 |
96 | return gc_bin_params;
97 | }
98 |
99 | int get_gc_bin(int gc, GCBinParams *params) {
100 | int x = gc - params->min_gc;
101 | if (x < params->left_edge_bin_size) {
102 | return 0;
103 | } else if (x > params->gc_range - params->right_edge_bin_size) {
104 | return params->n_bins - 1;
105 | } else {
106 | x -= params->left_edge_bin_size;
107 | return 1 + x / params->bin_size;
108 | };
109 | }
110 |
111 | // estimate median(coverage | gc)
112 | // from median(coverage | gc bin)
113 | double get_normalizing_factor(int gc, double *gc_meds, GCBinParams *params) {
114 | int x = gc - params->min_gc;
115 | if (x < params->left_edge_bin_size + params->bin_size / 2) {
116 | double rise = gc_meds[1] - gc_meds[0];
117 | double run = (double) params->left_edge_bin_size / 2.0
118 | + (double) params->bin_size / 2.0;
119 | double slope = rise / run;
120 | return gc_meds[0] + slope * ((double) x - params->left_edge_center);
121 | } else if (x > params->gc_range - (params->right_edge_bin_size + params->bin_size / 2)) {
122 | double rise = gc_meds[params->n_bins-2] - gc_meds[params->n_bins-1];
123 | double run = (double) params->right_edge_bin_size / 2.0
124 | + (double) params->bin_size / 2.0;
125 | double slope = rise / run;
126 | return gc_meds[params->n_bins-1] + slope * (params->right_edge_center - (double) x);
127 | } else {
128 | x -= params->left_edge_bin_size;
129 | int bin = 1 + x / params->bin_size;
130 | int remainder = x % params->bin_size;
131 | if (params->bin_size_odd && remainder == params->bin_size / 2) {
132 | return gc_meds[bin];
133 | } else {
134 | double t = ((double) remainder + (double) (remainder+1)) / (2.0 * params->bin_size);
135 | if (t < 0.5) {
136 | bin--;
137 | t += 0.5;
138 | } else {
139 | t -= 0.5;
140 | }
141 | return (1.0 - t) * gc_meds[bin] + t * gc_meds[bin+1];
142 | }
143 | }
144 | }
145 |
146 | int main(int argc, char *argv[]) {
147 | if (argc < 3) {
148 | fprintf(stderr, "Usage: %s coverage.bed windows.bed [OPTIONS] >normalized.coverage.bed\n\n", argv[0]);
149 | fputs("Normalizes a sample's coverage track relative to it's overall median depth\n", stderr);
150 | fputs("and also corrects coverage biases due to sequence GC content.\n\n", stderr);
151 | fputs(" --min_gc Windows with GC fraction less than this are filtered.\n", stderr);
152 | fputs(" Default = 0.3\n", stderr);
153 | fputs(" --max_gc Windows with GC fraction greater than this are filtered.\n", stderr);
154 | fputs(" Default = 0.7\n", stderr);
155 | fputs(" --n_gc_bins # of bins to use when estimating the gc bias curve.\n", stderr);
156 | fputs(" Default = floor((1+200*(max_gc-min_gc))/3).\n", stderr);
157 | fputs(" --min_mappability Windows with mean mappability less than this are filtered.\n", stderr);
158 | fputs(" Default = 0.75\n", stderr);
159 | fputs(" Mappability scores are taken from windows.bed (column 8).\n\n", stderr);
160 | return 1;
161 | }
162 |
163 | FILE *coverage = open_file(argv[1]);
164 | FILE *windows = open_file(argv[2]);
165 |
166 | Options options = parse_args(argc, argv, 3);
167 |
168 | char *line = NULL;
169 | char *pos, *last_pos;
170 | size_t line_len;
171 | ssize_t bytes_read;
172 |
173 | int i, j;
174 | int max_mm_cn;
175 | int n_windows = count_lines_in_file(windows);
176 |
177 | // read window info
178 | char **window_coords = (char **) malloc(n_windows * sizeof(char *));
179 | int *window_gc = (int *) malloc(n_windows * sizeof(int));
180 | double *window_mappability = (double *) malloc(n_windows * sizeof(double));
181 | int *window_blacklisted = (int *) malloc(n_windows * sizeof(int));
182 | for (i = 0; i < n_windows; i++)
183 | window_blacklisted[i] = 0;
184 |
185 | i = 0;
186 | while ((bytes_read = getline(&line, &line_len, windows)) != -1) {
187 | pos = strchr(line, '\t');
188 | pos = strchr(pos+1, '\t');
189 | pos = strchr(pos+1, '\t');
190 | int coord_str_len = (int) (pos - line);
191 | window_coords[i] = (char *) malloc((coord_str_len+1) * sizeof(char));
192 | strncpy(window_coords[i], line, coord_str_len);
193 | window_coords[i][coord_str_len] = '\0';
194 |
195 | pos = strchr(pos+1, '\t');
196 | window_gc[i] = (int) strtol(pos, &pos, 10);
197 | pos = strchr(pos+1, '\t');
198 | window_mappability[i] = strtod(pos, &pos);
199 | if (*pos == '\t') {
200 | sscanf(++pos, "%d", &max_mm_cn);
201 | if (max_mm_cn < 0)
202 | window_blacklisted[i] = 1;
203 | }
204 | i++;
205 | }
206 |
207 | // initialize gc bins
208 | GCBinParams gc_bin_params = calc_gc_bin_params(&options);
209 | int max_bin_size = n_windows / 5; // don't want to implement resizeable array (or C++)
210 | // so I just allocate a big chunk of memory and assume it'll be enough
211 | // it's only 1 sample being processed, so memory usage shouldn't be an issue
212 | double **gc_bins = (double **) malloc(gc_bin_params.n_bins * sizeof(double *));
213 | int *gc_bin_n_elems = (int *) malloc(gc_bin_params.n_bins * sizeof(int));
214 | for (i = 0; i < gc_bin_params.n_bins; i++) {
215 | gc_bins[i] = (double *) malloc(max_bin_size * sizeof(double));
216 | gc_bin_n_elems[i] = 0;
217 | }
218 |
219 | // read coverage values and put them into gc bins
220 | i = 0; // window # (autosome only)
221 | j = 0; // line #
222 | double *cov = (double *) malloc(n_windows * sizeof(double));
223 | while ((bytes_read = getline(&line, &line_len, coverage)) != -1) {
224 | j++;
225 |
226 | char chr = *line;
227 | pos = strchr(line, '\t');
228 | pos = strchr(pos+1, '\t');
229 | pos = strchr(pos+1, '\t');
230 | int coord_str_len = (int) (pos - line);
231 | if (strncmp(window_coords[i], line, coord_str_len) != 0) {
232 | fprintf(stderr, "Coordinates [ %*.*s ] at line %d of coverage file\n"
233 | "do not match coordinates [ %s ] at line %d of windows file\n",
234 | coord_str_len, coord_str_len, line, j, window_coords[i], j);
235 | break;
236 | }
237 |
238 | cov[i] = strtod(pos, &pos);
239 | if (window_gc[i] < gc_bin_params.min_gc ||
240 | window_gc[i] > gc_bin_params.max_gc ||
241 | window_mappability[i] < options.min_mappability) {
242 | window_blacklisted[i] = 1;
243 | } else if (!(window_blacklisted[i] || chr == 'X' || chr == 'Y')) {
244 | int bin = get_gc_bin(window_gc[i], &gc_bin_params);
245 | gc_bins[bin][gc_bin_n_elems[bin]++] = cov[i];
246 | }
247 |
248 | i++;
249 | }
250 |
251 | // compute median(coverage | sample, gc_bin)
252 | double *gc_meds = (double *) malloc(gc_bin_params.n_bins * sizeof(double));
253 | for (i = 0; i < options.n_gc_bins; i++) {
254 | qsort(gc_bins[i], gc_bin_n_elems[i], sizeof(double), double_comp);
255 | gc_meds[i] = median(gc_bins[i], gc_bin_n_elems[i]);
256 | }
257 |
258 | // for debugging the gc-bias curve estimation
259 | /*
260 | for (i = 61; i <= 141; i++) {
261 | fprintf(stderr, "%d\t%.2f\t%.2f\n", i, gc_meds[get_gc_bin(i, &gc_bin_params)], get_normalizing_factor(i, gc_meds, &gc_bin_params));
262 | }
263 | */
264 | // output normalized coverage values
265 | for (i = 0; i < n_windows; i++) {
266 | printf("%s\t%.6g\n",
267 | window_coords[i],
268 | cov[i] / get_normalizing_factor(window_gc[i], gc_meds, &gc_bin_params));
269 | }
270 |
271 | // cleanup
272 |
273 | if (line) free (line);
274 | for (i = 0; i < n_windows; i++)
275 | free(window_coords[i]);
276 | for (i = 0; i < options.n_gc_bins; i++)
277 | free(gc_bins[i]);
278 | free(window_coords);
279 | free(window_gc);
280 | free(window_mappability);
281 | free(gc_bins);
282 | free(gc_bin_n_elems);
283 | free(gc_meds);
284 | free(cov);
285 |
286 | fclose(coverage);
287 | fclose(windows);
288 | }
289 |
290 |
--------------------------------------------------------------------------------
/plot_cnv.R:
--------------------------------------------------------------------------------
1 | suppressPackageStartupMessages({
2 | library(dplyr)
3 | library(ggplot2)
4 | library(scales)
5 | })
6 |
7 | df <- read.table(
8 | commandArgs(trailingOnly=T)[1],
9 | col.names=c("sample", "cnv", "cnv.type", "copy.number",
10 | "exon", "in.cnv.1", "in.cnv.2", "cov",
11 | "max.cn.considered", "mu.dip", "sigma.dip"),
12 | colClasses=c("factor", "factor", "factor", "integer",
13 | "factor", "factor", "factor", "numeric",
14 | "integer", "numeric", "numeric")
15 | )
16 |
17 | df$max.cn.considered <- ifelse(df$max.cn.considered == 0, 3, -1)
18 |
19 | sample <- levels(df$sample)[1]
20 |
21 | for (this.cnv in levels(df$cnv)) {
22 | cat("Plotting ", this.cnv, "\n", sep="")
23 | this.cnv.df <- filter(df, cnv == this.cnv, max.cn.considered > 0)
24 | this.copy.number <- this.cnv.df$copy.number[1]
25 | plot.max.rel.cov <- max(0.65, max((this.cnv.df$cov - this.cnv.df$mu.dip)/this.cnv.df$mu.dip), 0.5 + (this.copy.number-2)/2)
26 |
27 | if (nrow(this.cnv.df) > 1) {
28 | plot <- ggplot(this.cnv.df, aes(x = exon, y = (cov - mu.dip) / mu.dip, group=1))
29 | plot <- plot + scale_y_continuous(limits=c(-0.89, plot.max.rel.cov), labels=percent)
30 | plot <- plot + geom_ribbon(aes(ymin = -2*sigma.dip, ymax=2*sigma.dip),
31 | fill="lightgrey")
32 | plot <- plot + geom_ribbon(aes(ymin = -sigma.dip, ymax=sigma.dip),
33 | fill="grey")
34 |
35 | plot <- plot + geom_hline(yintercept=-0.5, size=1.5, colour="grey40")
36 | plot <- plot + geom_hline(yintercept=0.5, size=1.5, colour="grey40")
37 | plot <- plot + geom_line(aes(colour = in.cnv.1), size=2)
38 | plot <- plot + geom_point(aes(colour = in.cnv.2), size=2)
39 | plot <- plot + scale_colour_manual(values=c("black", "firebrick"), guide=F)
40 | plot <- plot + xlab("Each tick is an exon. Grey ribbons show +/- 2\u03C3 for the diploid coverage distribution at each exon.")
41 | plot <- plot + ylab("Coverage relative to diploid mean")
42 | plot <- plot + ggtitle(paste("Sample", sample, "predicted\nto have copy number", this.copy.number, "at region", this.cnv))
43 | plot <- plot + theme_bw() + theme(
44 | plot.title=element_text(size=8),
45 | axis.title.x=element_text(size=8),
46 | axis.title.y=element_text(size=8),
47 | axis.text.x=element_blank(),
48 | axis.text.y=element_text(size=8*5/6))
49 |
50 | ggsave(paste("clamms_cnv_plots/", sample, "/",
51 | gsub(":", "_", gsub("-", "_", this.cnv)),
52 | ".png", sep=""),
53 | plot=plot, units="in", width=4*1.618, height=4)
54 | } else {
55 | this.exon <- this.cnv.df$exon[1]
56 | cov <- this.cnv.df$cov[1]
57 | mu.dip <- this.cnv.df$mu.dip[1]
58 | sigma.dip <- this.cnv.df$sigma.dip[1]
59 | cov <- (cov - mu.dip) / mu.dip
60 |
61 | plot <- ggplot(data.frame(x=seq(-1, plot.max.rel.cov, 0.01)), aes(x=x))
62 | plot <- plot + scale_x_continuous(
63 | limits=c(-1, plot.max.rel.cov),
64 | breaks=seq(-1, plot.max.rel.cov, 0.5))
65 | for (k in seq(1, max(3, this.copy.number))) {
66 | plot <- plot + stat_function(
67 | fun=dnorm,
68 | arg=list(mean=0.5*(k-2), sd=sigma.dip*sqrt(k/2)))
69 | }
70 | plot <- plot + geom_point(x=cov, y=dnorm(cov, mean=0.5*(this.copy.number-2), sd=sigma.dip*sqrt(this.copy.number/2)), size=2, colour="red")
71 | plot <- plot + xlab("Coverage relative to diploid mean")
72 | plot <- plot + ylab("Probability density for mixture component")
73 | plot <- plot + ggtitle(paste("Sample", sample, "predicted\nto have copy number", this.copy.number, "at region", this.cnv))
74 | plot <- plot + theme_bw() + theme(
75 | plot.title=element_text(size=6),
76 | axis.title.x=element_text(size=6),
77 | axis.title.y=element_text(size=6),
78 | axis.text.x=element_text(size=5),
79 | axis.text.y=element_text(size=5))
80 |
81 | ggsave(paste("clamms_cnv_plots/", sample, "/",
82 | gsub(":", "_", gsub("-", "_", this.cnv)),
83 | ".png", sep=""),
84 | plot=plot, units="in", width=2*1.618, height=2)
85 | }
86 | }
87 |
88 |
--------------------------------------------------------------------------------
/plot_cnv.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ $# != 3 ]; then
4 | echo "Usage: ./plot_cnv.sh sample.cnv.txt sample.normalized.coverage.bed sample.models.bed"
5 | exit 1
6 | fi
7 |
8 | sample=`echo "$1" | awk '{ n=split($1, arr, "/"); print arr[n] }' | cut -d '.' -f 1`
9 | cnv=$1
10 | cov=$2
11 | models=$3
12 |
13 | SCRIPT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
14 |
15 | cut -f -4 $cov >tmp.$sample.cov.txt
16 |
17 | awk '{
18 | if ($4 >= 0) {
19 | n++;
20 | window[n] = $1 "\t" $2 "\t" $3;
21 | start[n] = $2;
22 | end[n] = $3;
23 | }
24 | } END {
25 | for (i = 4; i <= n-3; i++) {
26 | print window[i] "\t" start[i-3] "\t" end[i+3]
27 | }
28 | }' $models \
29 | | sort -k1,1 -k2,2n \
30 | >tmp.$sample.prev-and-next.windows.bed
31 |
32 | cut -f -7 $cnv \
33 | | bedtools map -a - -b tmp.$sample.prev-and-next.windows.bed -c 4,5 -o first,last \
34 | | awk 'BEGIN { OFS="\t"; } {
35 | if ($8 != "." && $9 != ".") { $2 = $8; $3 = $9 }
36 | $1 = $1;
37 | printf "%s", $1;
38 | for (i = 2; i <= 7; i++) printf "\t%s", $i;
39 | printf "\n";
40 | }' \
41 | | bedtools intersect -a - -b tmp.$sample.cov.txt -wb \
42 | | sort -k1,1 -k2,2n \
43 | | bedtools map -a - -b $models -c 4,9,10 -o first,first,first \
44 | | awk '{
45 | split($4, arr1, ":"); split(arr1[2], arr2, "-");
46 | x1 = 0; if ($2 >= arr2[1]-1 && $3 < arr2[2]) x1 = 1;
47 | x2 = 0; if ($2 >= arr2[1]-1 && $3 <= arr2[2]) x2 = 1;
48 | print $5 "\t" $4 "\t" $6 "\t" $7 "\t" $1 ":" $2 "-" $3 "\t" x1 "\t" x2 "\t" $11 "\t" $12 "\t" $13 "\t" $14 }' >tmp.plot_cnv.$sample.txt
49 |
50 | mkdir -p clamms_cnv_plots/$sample
51 | Rscript $SCRIPT_DIR/plot_cnv.R tmp.plot_cnv.$sample.txt
52 |
53 | rm tmp.$sample.cov.txt
54 | rm tmp.$sample.prev-and-next.windows.bed
55 | rm tmp.plot_cnv.$sample.txt
56 |
57 |
--------------------------------------------------------------------------------
/sam_gatk_coverage_to_bed.c:
--------------------------------------------------------------------------------
1 | #include "stdio.h"
2 | #include "stdlib.h"
3 | #include "string.h"
4 |
5 | #define CHR_X 23
6 | #define CHR_Y 24
7 | #define CHR_M 25
8 |
9 | #define X_ASCII 88
10 | #define Y_ASCII 89
11 | #define M_ASCII 77
12 | #define T_ASCII 84
13 | #define DIGIT_ASCII_OFFSET 48
14 |
15 | // read chromosome id into unsigned char
16 | // and skip the next character after
17 | // (':' in GATK coverage input, '\t' in samtools depth and windows input)
18 | int read_chr(FILE *input, unsigned char *chr) {
19 | int tmp = getc(input);
20 | if (tmp == EOF) return 0;
21 |
22 | if (tmp == X_ASCII) { *chr = CHR_X; getc(input); return 1; }
23 | if (tmp == Y_ASCII) { *chr = CHR_Y; getc(input); return 1; }
24 | if (tmp == M_ASCII) {
25 | *chr = CHR_M;
26 | tmp = getc(input);
27 | if (tmp == T_ASCII) getc(input);
28 | return 1;
29 | }
30 | if (tmp < DIGIT_ASCII_OFFSET || tmp > (DIGIT_ASCII_OFFSET+9)) {
31 | *chr = 0;
32 | tmp = getc(input);
33 | return 2; // Illegal chromosome character. Likely the header.
34 | }
35 |
36 | tmp -= DIGIT_ASCII_OFFSET;
37 | if (tmp < 3) {
38 | int tmp2 = getc(input) - DIGIT_ASCII_OFFSET;
39 | if (tmp2 >= 0 && tmp2 <= 9) {
40 | *chr = 10*tmp + tmp2;
41 | getc(input);
42 | } else {
43 | *chr = tmp;
44 | }
45 | } else {
46 | *chr = tmp;
47 | getc(input);
48 | }
49 |
50 | return 1;
51 | }
52 |
53 | void skip_to_end_of_line(FILE *input) {
54 | int tmp;
55 | while ((tmp = getc(input)) != '\n') {
56 | if (tmp == EOF) {
57 | fputs("ERROR: malformed input\n", stderr);
58 | exit(1);
59 | }
60 | }
61 | }
62 |
63 | void print_chr(unsigned char chr,
64 | unsigned char *window_chr,
65 | int *window_start,
66 | int *window_end,
67 | double *window_cov,
68 | int *chr_start_idx) {
69 | int i;
70 | char chr_str[64];
71 | if (chr == CHR_X) { chr_str[0] = 'X'; chr_str[1] = '\0'; }
72 | else if (chr == CHR_Y) { chr_str[0] = 'Y'; chr_str[1] = '\0'; }
73 | else if (chr == CHR_M) {
74 | chr_str[0] = 'M'; chr_str[1] = 'T'; chr_str[2] = '\0';
75 | } else sprintf(chr_str, "%hhu", chr);
76 |
77 | i = chr_start_idx[chr];
78 | while (window_chr[i] == chr) {
79 | printf("%s\t%d\t%d\t%.6g\n",
80 | chr_str, window_start[i], window_end[i], window_cov[i]);
81 | i++;
82 | }
83 | }
84 |
85 | int main(int argc, char *argv[]) {
86 | if (argc < 3) {
87 | fprintf(stderr, "Usage: %s sample.gatk_readDepth_1x_q30.out windows.bed\n\n", argv[0]);
88 | fputs("Computes depth of coverage for intervals specified in windows.bed.\n", stderr);
89 | fputs("Also changes chr sort order from GATK's 1,2,3.. to BED's 1,10,11,..\n\n", stderr);
90 | return 1;
91 | }
92 |
93 | FILE *base_cov = fopen(argv[1], "r");
94 | if (base_cov == NULL) {
95 | fprintf(stderr, "Cannot read coverage file: %s\n", argv[1]);
96 | return 1;
97 | }
98 |
99 |
100 | FILE *windows = fopen(argv[2], "r");
101 | if (windows == NULL) {
102 | fprintf(stderr, "Cannot read windows file: %s\n", argv[2]);
103 | return 1;
104 | }
105 |
106 | char *line = NULL;
107 | char *pos;
108 | size_t line_len;
109 | ssize_t bytes_read;
110 |
111 | int i, j;
112 | int tmp_char;
113 | int n_windows = 0;
114 |
115 | while ((tmp_char = getc(windows)) != EOF) {
116 | if (tmp_char == '\n') n_windows++;
117 | } rewind(windows);
118 |
119 | int chr_start_idx[26];
120 | for (i = 0; i < 26; i++)
121 | chr_start_idx[i] = -1;
122 |
123 | unsigned char last_chr = 0;
124 | unsigned char *window_chr = (unsigned char *) malloc(n_windows * sizeof(unsigned char));
125 | int *window_start = (int *) malloc(n_windows * sizeof(int));
126 | int *window_end = (int *) malloc(n_windows * sizeof(int));
127 | double *window_cov = (double *) malloc(n_windows * sizeof(double));
128 | memset(window_cov, 0.0, n_windows*sizeof(double));
129 |
130 | i = 0;
131 | while (read_chr(windows, window_chr+i)) {
132 | fscanf(windows, "%d\t%d", window_start+i, window_end+i);
133 | skip_to_end_of_line(windows);
134 | if (window_chr[i] != last_chr) {
135 | last_chr = window_chr[i];
136 | chr_start_idx[window_chr[i]] = i;
137 | }
138 | i++;
139 | }
140 |
141 | unsigned char chr;
142 | last_chr = 0;
143 | int locus, read_depth;
144 | int cur_window, tot_cov, n_bases;
145 | n_bases = 0;
146 | // Check for header, advance line if it exists
147 | if (read_chr(base_cov, &chr) < 2)
148 | rewind(base_cov);
149 | else
150 | skip_to_end_of_line(base_cov);
151 | while (read_chr(base_cov, &chr)) {
152 | fscanf(base_cov, "%d\t%d", &locus, &read_depth);
153 | skip_to_end_of_line(base_cov);
154 |
155 | if (chr != last_chr) {
156 | if (n_bases > 0 && window_cov[cur_window] == 0.0)
157 | window_cov[cur_window] = (double) tot_cov / (double) n_bases;
158 | last_chr = chr;
159 | cur_window = chr_start_idx[chr];
160 | tot_cov = 0; n_bases = 0;
161 | } else if (chr != window_chr[cur_window]) {
162 | continue;
163 | }
164 |
165 | if (locus > window_end[cur_window]) {
166 | if (n_bases == 0)
167 | window_cov[cur_window] = 0.;
168 | else
169 | window_cov[cur_window] = (double) tot_cov / (double) n_bases;
170 | cur_window++;
171 | while (locus > window_end[cur_window] && chr == window_chr[cur_window])
172 | cur_window++;
173 | tot_cov = 0; n_bases = 0;
174 | }
175 |
176 | if (locus < window_start[cur_window]+1) continue;
177 |
178 | tot_cov += read_depth;
179 | n_bases++;
180 | }
181 | if (n_bases > 0 && window_cov[cur_window] == 0.0)
182 | window_cov[cur_window] = (double) tot_cov / (double) n_bases;
183 |
184 | print_chr(1, window_chr, window_start, window_end, window_cov, chr_start_idx);
185 | for (i = 10; i <= 19; i++)
186 | print_chr(i, window_chr, window_start, window_end, window_cov, chr_start_idx);
187 | print_chr(2, window_chr, window_start, window_end, window_cov, chr_start_idx);
188 | for (i = 20; i <= 22; i++)
189 | print_chr(i, window_chr, window_start, window_end, window_cov, chr_start_idx);
190 | for (i = 3; i <= 9; i++)
191 | print_chr(i, window_chr, window_start, window_end, window_cov, chr_start_idx);
192 | print_chr(CHR_M, window_chr, window_start, window_end, window_cov, chr_start_idx);
193 | print_chr(CHR_X, window_chr, window_start, window_end, window_cov, chr_start_idx);
194 | print_chr(CHR_Y, window_chr, window_start, window_end, window_cov, chr_start_idx);
195 |
196 | free(window_chr);
197 | free(window_start);
198 | free(window_end);
199 | free(window_cov);
200 |
201 | return 0;
202 | }
203 |
--------------------------------------------------------------------------------
/split_targets_into_windows.awk:
--------------------------------------------------------------------------------
1 | # splits targets >= 1000 bp long
2 | # into equally sized windows
3 | # of a size within [500, 1000) bp
4 |
5 | {
6 | len = $3 - $2;
7 | if (len < 1000) {
8 | printf "%s\t%d\t%d\t%s:%d-%d\t%s:%d-%d\n",
9 | $1, $2, $3, $1, $2, $3, $1, $2, $3;
10 | next;
11 | }
12 |
13 | n = int(len / 500);
14 | spacing = len / n;
15 |
16 | for (i = 0; i < n; i++) {
17 | start = int($2 + spacing * i);
18 | end = int($2 + spacing * (i+1));
19 | printf "%s\t%d\t%d\t%s:%d-%d\t%s:%d-%d\n",
20 | $1, start, end, $1, start, end, $1, $2, $3;
21 | }
22 | }
23 |
24 |
--------------------------------------------------------------------------------
/transpose.gawk:
--------------------------------------------------------------------------------
1 | # taken from http://stackoverflow.com/questions/1729824/transpose-a-file-in-bash
2 | {
3 | for (i=1; i<=NF; i++) {
4 | a[NR,i] = $i
5 | }
6 | }
7 | NF>p { p = NF }
8 | END {
9 | for(j=1; j<=p; j++) {
10 | str=a[1,j]
11 | for(i=2; i<=NR; i++){
12 | str=str"\t"a[i,j];
13 | }
14 | print str
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/utils.c:
--------------------------------------------------------------------------------
1 | #include "stdio.h"
2 | #include "stdlib.h"
3 | #include "string.h"
4 |
5 | void missing_value_error(char *arg) {
6 | fprintf(stderr, "Missing value for argument: %s\n", arg);
7 | exit(1);
8 | }
9 |
10 | void invalid_value_error(char *arg) {
11 | fprintf(stderr, "Invalid value for argument: %s\n", arg);
12 | exit(1);
13 | }
14 |
15 | // used by qsort
16 | int double_comp(const void *a, const void *b) {
17 | if (*((const double *) a) < *((const double *) b))
18 | return -1;
19 | return *((const double *) a) > *((const double *) b);
20 | }
21 |
22 | // input must be sorted
23 | double median(double *arr, int len) {
24 | if (len % 2 == 1)
25 | return arr[(len-1)/2];
26 | else
27 | return (arr[len/2] + arr[len/2 - 1]) / 2.0;
28 | }
29 |
30 | FILE *open_file(char *path) {
31 | FILE *file = fopen(path, "r");
32 | if (file == NULL) {
33 | fprintf(stderr, "Cannot read file: %s\n", path);
34 | exit(1);
35 | }
36 | return file;
37 | }
38 |
39 | int count_lines_in_file(FILE *file) {
40 | int n_lines = 0;
41 | int tmp_char;
42 | while ((tmp_char = getc(file)) != EOF) {
43 | if (tmp_char == '\n') n_lines++;
44 | } rewind(file);
45 | return n_lines;
46 | }
47 |
48 | void read_sample_name(char *dest, char *src) {
49 | char *slash_pos;
50 | while ((slash_pos = strchr(src, '/')) != NULL)
51 | src = slash_pos + 1;
52 | char *first_dot = strchr(src, '.');
53 | if (first_dot == NULL) {
54 | strcpy(dest, src);
55 | } else {
56 | int len = first_dot - src;
57 | strncpy(dest, src, len);
58 | dest[len] = '\0';
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/utils.h:
--------------------------------------------------------------------------------
1 | #ifndef UTILS_H
2 | #define UTILS_H
3 |
4 | #define MAX_CN 6
5 |
6 | #define SIGMA_RATIO_CN1 0.707107 // sigma_haploid = this * sigma_diploid
7 | // poisson distribution variance = mean
8 | // so var(hap) = 1/2 var(dip)
9 | #define SIGMA_RATIO_CN3 1.224745
10 | #define SIGMA_RATIO_CN4 1.414214
11 | #define SIGMA_RATIO_CN5 1.581139
12 | #define SIGMA_RATIO_CN6 1.732051
13 |
14 | #define HOM_DEL_THRESHOLD -0.98 // if the exponential distribution model for hom del coverage
15 | // is fit by the EM algorithm to have a very small mean,
16 | // it gets replaced by a uniform distribution from -1 to this
17 |
18 | void missing_value_error(char *arg);
19 | void invalid_value_error(char *arg);
20 | int double_comp(const void *a, const void *b);
21 | double median(double *arr, int len);
22 | FILE* open_file(char *path);
23 | int count_lines_in_file(FILE *file);
24 | void read_sample_name(char *dest, char *src);
25 |
26 | #endif
27 |
--------------------------------------------------------------------------------