├── DESCRIPTION
├── MD5
├── NAMESPACE
├── NEWS
├── R
    ├── bayes.R
    ├── extract.R
    ├── fit.R
    ├── gcstats.R
    ├── graphics.R
    ├── misc.R
    ├── model.R
    ├── mutations.R
    ├── read.R
    ├── results.R
    ├── segments.R
    ├── solutions.R
    └── windows.R
├── build
    └── vignette.rds
├── data
    ├── CP.example.RData
    └── example.seqz.rda
├── inst
    ├── CITATION
    ├── doc
    │   ├── sequenza.R
    │   ├── sequenza.Rmd
    │   └── sequenza.html
    └── extdata
    │   ├── example.seqz.txt.gz
    │   └── example.seqz.txt.gz.tbi
├── man
    ├── CP.data.Rd
    ├── baf.model.fit.Rd
    ├── bayes.Rd
    ├── breaks.Rd
    ├── chromosome.view.Rd
    ├── cp.plot.Rd
    ├── example.seqz.Rd
    ├── gc.Rd
    ├── model_points.Rd
    ├── mutations.Rd
    ├── plotWindows.Rd
    ├── read_seqz.Rd
    ├── theoretical.Rd
    ├── type_matrix.Rd
    ├── windowValues.Rd
    └── workflow.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test.model.R
    │   └── test.read.R
└── vignettes
    └── sequenza.Rmd


/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: sequenza
 2 | Title: Copy Number Estimation from Tumor Genome Sequencing Data
 3 | Description: Tools to analyze genomic sequencing data from
 4 |  paired normal-tumor samples, including cellularity and ploidy estimation; mutation
 5 |  and copy number (allele-specific and total copy number) detection, quantification
 6 |  and visualization.
 7 | Version: 3.0.0
 8 | Date: 2019-05-09
 9 | Authors@R: c(
10 |    person(given = "Francesco", family = "Favero",
11 |           email = "favero.francesco@gmail.com",
12 |           role = c("aut", "cre"),
13 |           comment = c(ORCID = "0000-0003-3684-2659")),
14 |    person(given = c("Andrea", "Marion"), family = "Marquard",
15 |           role = c("rev"),
16 |           comment = c(ORCID = "0000-0003-2928-6017")),
17 |    person(given = "Tejal", family = "Joshi",
18 |           role = c("rev"),
19 |           comment = c(ORCID = "0000-0002-0939-2982")),
20 |    person(given = c("Aron", "Charles"), family = "Eklund",
21 |           role = c("aut", "ths"),
22 |           comment = c(ORCID = "0000-0003-0861-1001"))
23 |   )
24 | Depends: R (>= 3.2.0)
25 | Imports: pbapply, squash, iotools, readr, seqminer, copynumber
26 | Suggests: testthat, knitr, rmarkdown, rmdformats
27 | License: GPL-3
28 | URL: https://sequenzatools.bitbucket.io, Mailing list:
29 |         https://groups.google.com/forum/#!forum/sequenza-user-group
30 | BugReports: https://bitbucket.org/sequenzatools/sequenza/issues
31 | SystemRequirements: pandoc (>= 1.12.3)
32 | VignetteBuilder: knitr, rmarkdown
33 | Encoding: UTF-8
34 | NeedsCompilation: no
35 | Packaged: 2019-05-09 12:58:45 UTC; lgq442
36 | Author: Francesco Favero [aut, cre] (<https://orcid.org/0000-0003-3684-2659>),
37 |   Andrea Marion Marquard [rev] (<https://orcid.org/0000-0003-2928-6017>),
38 |   Tejal Joshi [rev] (<https://orcid.org/0000-0002-0939-2982>),
39 |   Aron Charles Eklund [aut, ths]
40 |     (<https://orcid.org/0000-0003-0861-1001>)
41 | Maintainer: Francesco Favero <favero.francesco@gmail.com>
42 | Repository: CRAN
43 | Date/Publication: 2019-05-09 13:50:04 UTC
44 | 


--------------------------------------------------------------------------------
/MD5:
--------------------------------------------------------------------------------
 1 | 60b9b3990ed91c0ce59226344dca5979 *DESCRIPTION
 2 | 837dc998a0bfc9cb81b08d5583f4d980 *NAMESPACE
 3 | dc74f3941cd7eb5d5d2820a78e855291 *NEWS
 4 | 0090c3e305006e38f8ec8cef5616a3e2 *R/bayes.R
 5 | 1c95cfc1a6597466e42c386b0c6a6196 *R/extract.R
 6 | c37085ee8b851dfd42eea66341904e82 *R/fit.R
 7 | 06b55a573555d6e0315478fb47fd48c0 *R/gcstats.R
 8 | 3ad97d0891d98e23cd5be6b4417067c5 *R/graphics.R
 9 | 79624c9cf668bfb2b05e8a339e98f393 *R/misc.R
10 | 4b7f3320bad5b252cc0727ba9e019ab8 *R/model.R
11 | 8ab48098af7628c6d2f155b8f9f24b74 *R/mutations.R
12 | 1415cb2b3e892624fb7ea4ff76a2c46f *R/read.R
13 | bfe91a98d0c94dab8ae8bcb554eb5acd *R/results.R
14 | f763a946f45e700642201c486263b969 *R/segments.R
15 | 0359f75dbcc67975a0a8654cc04e32a3 *R/solutions.R
16 | 379d0ddc4bffc4fcd9449f18c0a2654a *R/windows.R
17 | 60d166bb186a38ec469d4e76452c2052 *build/vignette.rds
18 | 33cb86ef72ebdd33f91839241f0bd890 *data/CP.example.RData
19 | 47e0915035bb00de6af2b672114b34e0 *data/example.seqz.rda
20 | 49a67dbefc97a501bc47492ca1108e0a *inst/CITATION
21 | a93878af46779d2634e55b13814cb9e2 *inst/doc/sequenza.R
22 | 25228690501dd6c00834e253f2002e07 *inst/doc/sequenza.Rmd
23 | fd4b930147525ec496116c2c84ca6b31 *inst/doc/sequenza.html
24 | 6cecdcda8da1c0a1c7971d6f3923bb20 *inst/extdata/example.seqz.txt.gz
25 | 70764a3c09eea6ae46eaa489498da14f *inst/extdata/example.seqz.txt.gz.tbi
26 | 5fcc49fe68e8207193911291677fdeb5 *man/CP.data.Rd
27 | 6fdf4884a01b8a3586464842f004c469 *man/baf.model.fit.Rd
28 | e503cc7708c4f3a8327a55827398b240 *man/bayes.Rd
29 | 10f9ad6809df484eb98f4de2bde191ec *man/breaks.Rd
30 | 31fc1f8ee27e8a7ca30dbdee27bddbae *man/chromosome.view.Rd
31 | 9c03a6ae6635e05ab4b74eafc17f1eb1 *man/cp.plot.Rd
32 | d436a0c2af880fa09e74fb4e483bb281 *man/example.seqz.Rd
33 | 1289aacdf8835553e2029932b5064948 *man/gc.Rd
34 | 7abd12a6f38af576424d55417ec9bf0d *man/model_points.Rd
35 | f14dff388f23e5bff09e926505787867 *man/mutations.Rd
36 | 3c90a936eb5f68993b38381955a71d48 *man/plotWindows.Rd
37 | c35f64702b6f29bb8a5922366b5b4e25 *man/read_seqz.Rd
38 | 73f2cc0d3bf0c141919c9d2cea3aad44 *man/theoretical.Rd
39 | a50ca8ef2629fb91193b2a0660337f43 *man/type_matrix.Rd
40 | 565da5a0ae9ca06b579cf9ce4d0c8f18 *man/windowValues.Rd
41 | ebf3602ece02d98d51766ef8d2baac78 *man/workflow.Rd
42 | ddd5ddf951a4a538c3d4cb03c3673c7a *tests/testthat.R
43 | 5c373f393040d14844556c63d0ee140d *tests/testthat/test.model.R
44 | 92691ae6d8ce244b626ab7ee777071b5 *tests/testthat/test.read.R
45 | 25228690501dd6c00834e253f2002e07 *vignettes/sequenza.Rmd
46 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
  1 | export(
  2 |     baf.bayes,
  3 |     baf.model.fit,
  4 |     baf.model.points,
  5 |     baf.types.matrix,
  6 |     chromosome.view,
  7 |     cp.plot,
  8 |     cp.plot.contours,
  9 |     find.breaks,
 10 |     gc.sample.stats,
 11 |     gc.summary.plot,
 12 |     get.ci,
 13 |     mean_gc,
 14 |     median_gc,
 15 |     mufreq.bayes,
 16 |     mufreq.model.fit,
 17 |     mufreq.model.points,
 18 |     mufreq.types.matrix,
 19 |     plotWindows,
 20 |     read.seqz,
 21 |     segment.breaks,
 22 |     sequenza.extract,
 23 |     sequenza.fit,
 24 |     sequenza.results,
 25 |     theoretical.baf,
 26 |     theoretical.depth.ratio,
 27 |     theoretical.mufreq,
 28 |     windowBf,
 29 |     windowValues
 30 | )
 31 | importFrom("squash",
 32 |     "colorgram",
 33 |     "makecmap"
 34 | )
 35 | 
 36 | importFrom("pbapply",
 37 |     "pblapply"
 38 | )
 39 | 
 40 | importFrom("squash",
 41 |     "colorgram",
 42 |     "makecmap"
 43 | )
 44 | importFrom("iotools",
 45 |     "mstrsplit",
 46 |     "chunk.apply"
 47 | )
 48 | 
 49 | importFrom("readr",
 50 |     "read_tsv"
 51 | )
 52 | 
 53 | importFrom("seqminer",
 54 |     "tabix.read"
 55 | )
 56 | 
 57 | importFrom("copynumber",
 58 |     "winsorize",
 59 |     "pcf",
 60 |     "aspcf"
 61 | )
 62 | 
 63 | importFrom("grDevices",
 64 |     "colorRampPalette",
 65 |     "dev.off",
 66 |     "gray.colors",
 67 |     "heat.colors",
 68 |     "palette",
 69 |     "pdf",
 70 |     "rgb"
 71 | )
 72 | importFrom("graphics",
 73 |     "abline",
 74 |     "axis",
 75 |     "barplot",
 76 |     "contour",
 77 |     "legend",
 78 |     "lines",
 79 |     "mtext",
 80 |     "par",
 81 |     "plot",
 82 |     "points",
 83 |     "polygon",
 84 |     "rect",
 85 |     "segments",
 86 |     "strwidth",
 87 |     "text"
 88 | )
 89 | importFrom("stats",
 90 |     "dbinom",
 91 |     "density",
 92 |     "dpois",
 93 |     "dt",
 94 |     "median",
 95 |     "na.exclude",
 96 |     "quantile",
 97 |     "sd",
 98 |     "setNames",
 99 |     "weighted.mean"
100 | )
101 | importFrom("utils",
102 |     "packageVersion",
103 |     "read.delim",
104 |     "read.table",
105 |     "setTxtProgressBar",
106 |     "tail",
107 |     "txtProgressBar",
108 |     "write.table"
109 | )
110 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
 1 | Changes in version 3.0.0 (2019-05-09)
 2 |  - Code cleanup and refactoring
 3 |  - Bug fixes
 4 |  - Use readr and stop relying on system commands (grep, sed, gunzip) to read seqz files
 5 |  - Use the package pbapply for progress bar
 6 |  - Normalize each sample separately
 7 |  - Provide coverage profile of the 2 samples separately, before and after normalization
 8 |  - Provide CG-content vs depth profile for each samples, before and after normalization
 9 |  - Draw standard "mirrored" BAF (from 0 to 1) in the raw genome view
10 | 
11 | 
12 | Changes in version 2.1.2 (2015-08-18)
13 |  - Keep the order of the bases corresponding to the major and minor alleles as in the normal sample. (allows mocking haplotype).
14 |  - Fix check for NOTES in newer R versions
15 | 
16 | Changes in version 2.1.1 (2015-01-20)
17 |  - Fix heterozygous detection when importing VarScan2 data of very high coverage seq.
18 |  - Cleanup sequenza-utils.py
19 |  - Update citation info to published manuscript
20 | 
21 | Changes in version 2.1.0 (2014-10-08)
22 |  - Add sequenza-utils.py function bam2seqz
23 |  - Add raw data depth-ratio/Bf in the genome view plots
24 |  - Results model fitting plot using the B-allele/depth-ratio plot.
25 |  - Present alternative solutions using local maxima of the CP plot.
26 |  - Model the expected B-allele frequencies with a t-distribution using the observed sd, rather then taking the 95% of the observed Bf.
27 |  - Use dt instead of dbinom for the BAF and depth ratio model.
28 |  - Fix documentation discrepancies on the sequenza-utils.py execution.
29 |  - Add breaks as optional argument - enables to input custom segmentation -
30 |  - Add different segmentation options, fast, het and full, corresponding to different resolutions.
31 |  - Minor fixes.
32 | 
33 | Changes in version 2.0.0 (2014-04-08)
34 |  - Change seqz names header and some function arguments/formats to improve usability
35 |  - Add to sequenza.fit a method argument to calculate cellularity and ploidy also from mutations
36 |  - Change the recommended file extension from ".abfreq" to ".seqz" for clarity.
37 |  - The "seqz" file now contains a column with strand orientation information.
38 |  - "cp.plot" now plots the scaled log-likelihood.
39 |  - "theoretical.depth.ratio" now implements the correct formula.
40 |  - The "theoretical.*" functions have their arguments rearranged, and defaults changed, for consistency.
41 |  - "sequenza.extract" now has additional filtering arguments.
42 |  - Miscellaneous cleanup/simplification/optimization.
43 | 
44 | Changes in version 1.0.5 (2014-02-04)
45 |  - Add a python utility for binning the data to a desired window size (reducing vastly memory footprint in the analysis)
46 |  - Fix default workflows parameter to detect CN up to 20.
47 | 
48 | Changes in version 1.0.4 (2014-01-16)
49 |  - Add function to import VarScan2 output
50 |  - Add results function, to save results and standard plots.
51 |  - Fix error on loading non-zipped files in the workflow.
52 |  - Ignore error and finish the process if one of the chromosome fails in the workflows.
53 | 
54 | Changes in version 1.0.3 (2013-12-12)
55 |  - Fix grep instructions that broke single chromosome loading
56 | 


--------------------------------------------------------------------------------
/R/bayes.R:
--------------------------------------------------------------------------------
  1 | mufreq.dbinom <- function(mufreq, mufreq.model, depth.t,
  2 |     seq.errors = 0.01, ...) {
  3 |     mufreq.model[mufreq.model == 0] <- seq.errors
  4 |     n.success <- round(mufreq * depth.t, 0)
  5 |     dbinom(x = n.success, size = depth.t, prob = mufreq.model, ...)
  6 | }
  7 | 
  8 | mufreq.dpois <- function(mufreq, mufreq.model, depth.t,
  9 |     seq.errors = 0.01, ...) {
 10 |     mufreq.model[mufreq.model == 0] <- seq.errors
 11 |     n.success <- round(mufreq * depth.t, 0)
 12 |     dpois( x = n.success, lambda = mufreq.model * depth.t, ...)
 13 | }
 14 | 
 15 | baf.dbinom <- function(baf, baf.model, depth.t, ...) {
 16 |     n.success <- round(baf * depth.t, 0)
 17 |     dbinom( x = n.success, size = depth.t, prob = baf.model, ...)
 18 | }
 19 | 
 20 | baf.dpois <- function(baf, baf.model, depth.t, ...) {
 21 |     n.success <- round(baf * depth.t, 0)
 22 |     dpois( x = n.success, lambda = baf.model * depth.t, ...)
 23 | }
 24 | 
 25 | depth.ratio.dbinom <- function(size, depth.ratio, depth.ratio.model, ...) {
 26 |     n.success <- round(size * (depth.ratio / (1 + depth.ratio)), 0)
 27 |     prob <- depth.ratio.model / (1 + depth.ratio.model)
 28 |     dbinom( x = n.success, size = size, prob = prob, ...)
 29 | }
 30 | 
 31 | depth.ratio.dpois <- function(size, depth.ratio, depth.ratio.model, ...) {
 32 |     x <- round(size * depth.ratio, 0)
 33 |     dpois( x = x, lambda = depth.ratio.model * size, ...)
 34 | }
 35 | 
 36 | 
 37 | 
 38 | baf.bayes <- function(Bf, depth.ratio, cellularity, ploidy, avg.depth.ratio,
 39 |     sd.Bf = 0.1, sd.ratio = 0.5, weight.Bf = 1, weight.ratio = 1, CNt.min = 0,
 40 |     CNt.max = 7, CNn = 2, priors.table = data.frame(CN = CNt.min:CNt.max,
 41 |     value = 1), ratio.priority = FALSE) {
 42 | 
 43 |     baf.tab <- data.frame(Bf = Bf, ratio = log(depth.ratio),
 44 |         sd.Bf = sd.Bf, sd.ratio = sd.ratio, weight.Bf = weight.Bf,
 45 |         weight.ratio = weight.ratio)
 46 |     baf_types <- baf.types.matrix(CNt.min = CNt.min,
 47 |         CNt.max = CNt.max, CNn = CNn)
 48 |     model_baf <- baf.model.points(cellularity = cellularity,
 49 |         ploidy = ploidy, baf_types = baf_types,
 50 |         avg.depth.ratio = avg.depth.ratio)
 51 |     model.pts <- cbind(CNt =  baf_types$CNt, A = baf_types$CNt - baf_types$B,
 52 |         B = baf_types$B, model_baf)
 53 | 
 54 |     rows.x <- 1:nrow(baf.tab)
 55 |     priors <- rep(1, nrow(model.pts))
 56 |     for (i in 1:nrow(priors.table)) {
 57 |         priors[model.pts$CNt == priors.table$CN[i]] <- priors.table$value[i]
 58 |     }
 59 |     priors <- priors / sum(priors)
 60 | 
 61 |     bayes.fit <- function (x, mat, model.pts, priors, ratio.priority) {
 62 |         test.ratio <- log(model.pts$depth.ratio)
 63 |         test.baf <- model.pts$BAF
 64 |         min.offset <- 1e-323
 65 |         score.r <- dt2(sd = mat[x, ]$sd.ratio / sqrt(mat[x, ]$weight.ratio),
 66 |             mean = mat[x, ]$ratio, x = test.ratio, df = 5, log = TRUE)
 67 |         score.r <- score.r + log(priors)
 68 |         if (!is.na(mat[x, ]$Bf) & !is.na(mat[x, ]$sd.Bf /
 69 |             sqrt(mat[x, ]$weight.Bf))) {
 70 |             score.b <- dt2(mean = mat[x, ]$Bf, sd = mat[x, ]$sd.Bf /
 71 |                 sqrt(mat[x, ]$weight.Bf), x = test.baf, df = 5, log = TRUE)
 72 |             post.model <- score.r + score.b
 73 |         } else {
 74 |             post.model <- score.r
 75 |         }
 76 |         if (ratio.priority == FALSE) {
 77 |             max.lik <-  which.max(post.model)
 78 |             max.post <- c(as.numeric(model.pts[max.lik, 1:3]),
 79 |                 post.model[max.lik])
 80 |         } else {
 81 |             res.cn <- model.pts$CNt[which.max(score.r)]
 82 |             idx.pts <- model.pts$CNt == res.cn
 83 |             model.lik <- cbind(model.pts[idx.pts, 1:3], post.model[idx.pts])
 84 |             if (is.null(dim(model.lik))) {
 85 |                 max.post <- model.lik
 86 |             } else {
 87 |                 max.post   <- model.lik[which.max(model.lik[, 4]), ]
 88 |             }
 89 |         }
 90 |         if (is.na(mat[x, ]$Bf)) {
 91 |              max.post[2:3] <- NA
 92 |         }
 93 |         max.post
 94 |     }
 95 |     bafs.L <- mapply(FUN = bayes.fit, rows.x, MoreArgs = list(mat = baf.tab,
 96 |         model.pts = model.pts, priors = priors,
 97 |         ratio.priority = ratio.priority), SIMPLIFY = FALSE)
 98 |     bafs.L <- do.call(rbind, bafs.L)
 99 |     colnames(bafs.L) <- c("CNt", "A", "B", "LPP")
100 |     bafs.L
101 | }
102 | 
103 | baf.model.fit <- function(cellularity = seq(0.3, 1, by = 0.01),
104 |     ploidy = seq(1, 7, by = 0.1), mc.cores = getOption("mc.cores", 2L), ...) {
105 | 
106 |     result <- expand.grid(ploidy = ploidy, cellularity = cellularity,
107 |         KEEP.OUT.ATTRS = FALSE)
108 | 
109 |     fit.cp <- function(ii) {
110 |         L.model <- baf.bayes(cellularity = result$cellularity[ii],
111 |             ploidy = result$ploidy[ii], ...)
112 |         sum(L.model[,4])
113 |     }
114 |     bayes.res <- pblapply(X = 1:nrow(result), FUN = fit.cp,
115 |         cl = mc.cores)
116 |     result$LPP <- unlist(bayes.res)
117 |     z <- tapply(result$LPP, list(result$ploidy, result$cellularity), mean)
118 |     x <- as.numeric(rownames(z))
119 |     y <- as.numeric(colnames(z))
120 |     max.lik <- max(result$LPP, na.rm = TRUE)
121 |     LogSumLik <- log(sum(exp(result$LPP - max.lik))) + max.lik
122 |     znorm <- exp(z - LogSumLik)
123 |     list(ploidy = x, cellularity = y, lpp = znorm)
124 | }
125 | 
126 | mufreq.bayes <- function(mufreq, depth.ratio, cellularity,
127 |     ploidy, avg.depth.ratio, weight.mufreq = 100, weight.ratio = 100,
128 |     CNt.min = 1, CNt.max = 7, CNn = 2,
129 |     priors.table = data.frame(CN = CNt.min:CNt.max, value = 1)) {
130 |     mufreq.tab <- data.frame(F = mufreq, ratio = depth.ratio,
131 |         weight.mufreq = weight.mufreq, weight.ratio = weight.ratio)
132 |     mufreq_types <- mufreq.types.matrix(CNt.min = CNt.min,
133 |         CNt.max = CNt.max, CNn = CNn)
134 |     model.pts <- mufreq.model.points(cellularity = cellularity,
135 |         ploidy = ploidy, mufreq_types = mufreq_types,
136 |         avg.depth.ratio = avg.depth.ratio)
137 |     model.pts <- cbind(mufreq_types, model.pts)
138 |     rows.x <- 1:nrow(mufreq.tab)
139 |     priors <- rep(1, nrow(model.pts))
140 |     for (i in 1:nrow(priors.table)) {
141 |         priors[model.pts$CNt ==
142 |             priors.table$CN[i]] <- priors.table$value[i]
143 |     }
144 |     priors <- priors / sum(priors)
145 |     bayes.fit <- function (x, mat, model.pts, priors) {
146 |         test.ratio <- model.pts$depth.ratio
147 |         test.mufrq <- model.pts$mufreqs
148 |         min.offset <- 1e-323
149 |         score.r <- depth.ratio.dbinom(size = mat[x, ]$weight.ratio,
150 |             depth.ratio = mat[x, ]$ratio, test.ratio)
151 |         score.m <- mufreq.dbinom(mufreq = mat[x, ]$F,
152 |             depth.t = mat[x, ]$weight.mufreq, test.mufrq)
153 |         score.r <- score.r * priors
154 |         score.m <- score.m
155 |         post.model <- score.r * score.m
156 |         post.model[post.model == 0] <- min.offset
157 |         res.cn <- model.pts$CNt[which.max(score.r)]
158 |         idx.pts <- model.pts$CNt == res.cn
159 |         model.lik <- cbind(model.pts[idx.pts, 1:3], log(post.model[idx.pts]))
160 |         if (is.null(dim(model.lik))) {
161 |             max.post <- model.lik
162 |         } else {
163 |             max.post <- model.lik[which.max(model.lik[,4]),]
164 |         }
165 |         max.post
166 |     }
167 |     types.L <- mapply(FUN = bayes.fit, rows.x, MoreArgs = list(
168 |         mat = mufreq.tab, model.pts = model.pts,
169 |         priors = priors), SIMPLIFY = FALSE)
170 |    types.L <- do.call(rbind, types.L)
171 |    colnames(types.L) <- c("CNn", "CNt", "Mt", "LPP")
172 |    types.L
173 | }
174 | 
175 | mufreq.model.fit <- function(cellularity = seq(0.3, 1, by = 0.01),
176 |     ploidy = seq(1, 7, by = 0.1),  mc.cores = getOption("mc.cores", 2L), ...) {
177 |    result <- expand.grid(ploidy = ploidy, cellularity = cellularity,
178 |         KEEP.OUT.ATTRS = FALSE)
179 |     fit.cp <- function(ii) {
180 |         L.model <- mufreq.bayes(cellularity = result$cellularity[ii],
181 |             ploidy = result$ploidy[ii], ...)
182 |         sum(L.model[, 4])
183 |     }
184 |     bayes.res <- pblapply(X = 1:nrow(result), FUN = fit.cp,
185 |         cl = mc.cores)
186 |     result$LPP <- unlist(bayes.res)
187 |     z <- tapply(result$LPP, list(result$ploidy, result$cellularity), mean)
188 |     x <- as.numeric(rownames(z))
189 |     y <- as.numeric(colnames(z))
190 |     max.lik <- max(result$LPP, na.rm = TRUE)
191 |     LogSumLik <- log(sum(exp(result$LPP - max.lik))) + max.lik
192 |     znorm <- exp(z - LogSumLik)
193 |     list(ploidy = x, cellularity = y, lpp = znorm)
194 | }
195 | 


--------------------------------------------------------------------------------
/R/extract.R:
--------------------------------------------------------------------------------
  1 | sequenza.extract <- function(file, window = 1e6, overlap = 1,
  2 |     gamma = 80, kmin = 10, gamma.pcf = 140, kmin.pcf = 40,
  3 |     mufreq.treshold = 0.10, min.reads = 40, min.reads.normal = 10,
  4 |     min.reads.baf = 1, max.mut.types = 1, min.type.freq = 0.9,
  5 |     min.fw.freq = 0, verbose = TRUE, chromosome.list = NULL,
  6 |     breaks = NULL, breaks.method = "het", assembly = "hg19",
  7 |     weighted.mean = TRUE, normalization.method = "mean",
  8 |     ignore.normal = FALSE, parallel = 1, gc.stats = NULL,
  9 |     segments.samples = FALSE){
 10 | 
 11 |     if (is.null(gc.stats)) {
 12 |         gc.stats <- gc.sample.stats(file, verbose = verbose,
 13 |             parallel = parallel)
 14 |     }
 15 |     if (normalization.method == "mean") {
 16 |         gc.normal.vect <- mean_gc(gc.stats$normal)
 17 |         gc.tumor.vect  <- mean_gc(gc.stats$tumor)
 18 |         avg_tum_depth <- weighted.mean(x = gc.stats$tumor$depth,
 19 |             w = colSums(gc.stats$tumor$n))
 20 |         avg_nor_depth <- weighted.mean(x = gc.stats$normal$depth,
 21 |             w = colSums(gc.stats$normal$n))
 22 |     } else {
 23 |         gc.normal.vect <- median_gc(gc.stats$normal)
 24 |         gc.tumor.vect  <- median_gc(gc.stats$tumor)
 25 |         avg_tum_depth <- weighted.median(x = gc.stats$tumor$depth,
 26 |             w = colSums(gc.stats$tumor$n))
 27 |         avg_nor_depth <- weighted.median(x = gc.stats$normal$depth,
 28 |             w = colSums(gc.stats$normal$n))
 29 |     }
 30 |     windows.baf   <- list()
 31 |     windows.ratio <- list()
 32 |     windows.raw_ratio <- list()
 33 |     windows.normal <- list()
 34 |     windows.tumor <- list()
 35 |     windows.n_normal <- list()
 36 |     windows.n_tumor <- list()
 37 |     mutation.list <- list()
 38 |     segments.list <- list()
 39 |     segments_samples.list <- list()
 40 |     norm.gc.list <- list()
 41 |     if (is.null(dim(breaks))) {
 42 |         breaks <- NULL
 43 |     }
 44 |     chr.vect <- as.character(gc.stats$file.metrics$chr)
 45 |     if (is.null(chromosome.list)) {
 46 |         chromosome.list <- chr.vect
 47 |     } else {
 48 |         chromosome.list <- chromosome.list[chromosome.list %in% chr.vect]
 49 |     }
 50 |     for (chr in chromosome.list) {
 51 |         if (verbose) {
 52 |             message("Processing ", chr, ":", appendLF = TRUE)
 53 |         }
 54 |         tbi <- file.exists(paste0(file, ".tbi"))
 55 |         if (tbi) {
 56 |             seqz.data   <- read.seqz(file, chr_name = chr)
 57 |         } else {
 58 |             file.lines <- gc.stats$file.metrics[which(chr.vect == chr), ]
 59 |             seqz.data   <- read.seqz(file, n_lines = c(file.lines$start,
 60 |                 file.lines$end))
 61 |         }
 62 | 
 63 |         norm_tumor_depth <- seqz.data$depth.tumor /
 64 |             gc.tumor.vect[as.character(seqz.data$GC.percent)]
 65 |         norm_normal_depth <- seqz.data$depth.normal /
 66 |             gc.normal.vect[as.character(seqz.data$GC.percent)]
 67 |         norm.gc.stats <- depths_gc(
 68 |             depth_n = round(norm_normal_depth * avg_nor_depth, 0),
 69 |             depth_t = round(norm_tumor_depth * avg_tum_depth, 0),
 70 |             gc = seqz.data$GC.percent)
 71 |         if (ignore.normal) {
 72 |             seqz.data$adjusted.ratio <- round(norm_tumor_depth, 3)
 73 |         } else {
 74 |             seqz.data$adjusted.ratio <- round(
 75 |                 norm_tumor_depth / norm_normal_depth, 3)
 76 |         }
 77 |         if (segments.samples == TRUE) {
 78 |             breaks_normal_chr <- breaks_full(
 79 |                 data = data.frame(chromosome = seqz.data$chromosome,
 80 |                                   position = seqz.data$position,
 81 |                                   adjusted.ratio = norm_normal_depth,
 82 |                                   singsAsFactors = FALSE),
 83 |                 gamma = gamma.pcf, kmin = kmin.pcf, assembly = assembly,
 84 |                 breaks.het = NULL)
 85 |             breaks_tumor_chr <- breaks_full(
 86 |                data = data.frame(chromosome = seqz.data$chromosome,
 87 |                                  position = seqz.data$position,
 88 |                                  adjusted.ratio = norm_tumor_depth,
 89 |                                  singsAsFactors = FALSE),
 90 |                gamma = gamma.pcf, kmin = kmin.pcf, assembly = assembly,
 91 |                breaks.het = NULL)
 92 |         } else {
 93 |            breaks_normal_chr <- NULL
 94 |            breaks_tumor_chr <- NULL
 95 |         }
 96 |         seqz.r.win <- windowValues(x = seqz.data$adjusted.ratio,
 97 |             positions = seqz.data$position,
 98 |             chromosomes = seqz.data$chromosome,
 99 |             window = window, overlap = overlap,
100 |             weight = seqz.data$depth.normal)
101 |         seqz.n.win <- windowValues(x = seqz.data$depth.normal / avg_nor_depth,
102 |             positions = seqz.data$position,
103 |             chromosomes = seqz.data$chromosome,
104 |             window = window, overlap = overlap)
105 |         seqz.t.win <- windowValues(x = seqz.data$depth.tumor / avg_tum_depth,
106 |             positions = seqz.data$position,
107 |             chromosomes = seqz.data$chromosome,
108 |             window = window, overlap = overlap)
109 |         seqz.r_r.win <- windowValues(x = seqz.data$depth.ratio / (
110 |                 avg_tum_depth / avg_nor_depth),
111 |             positions = seqz.data$position,
112 |             chromosomes = seqz.data$chromosome,
113 |             window = window, overlap = overlap,
114 |             weight = seqz.data$depth.normal)
115 |         seqz.n_n.win <- windowValues(x = norm_normal_depth,
116 |             positions = seqz.data$position,
117 |             chromosomes = seqz.data$chromosome,
118 |             window = window, overlap = overlap)
119 |         seqz.n_t.win <- windowValues(x = norm_tumor_depth,
120 |             positions = seqz.data$position,
121 |             chromosomes = seqz.data$chromosome,
122 |             window = window, overlap = overlap)
123 | 
124 | 
125 |         seqz.hom <- seqz.data$zygosity.normal == "hom"
126 |         seqz.het <- seqz.data[!seqz.hom, ]
127 |         het.filt <- seqz.het$good.reads >= min.reads.baf
128 |         seqz.het <- seqz.het[het.filt, ]
129 |         het_ok <- nrow(seqz.het) > 0
130 |         if (is.null(breaks)) {
131 |             breaks_chr <- NULL
132 |         } else {
133 |             breaks_chr <- breaks[breaks$chrom == chr, ]
134 |         }
135 |         if (het_ok) {
136 |             seqz.b.win <- windowBf(Af = seqz.het$Af, Bf = seqz.het$Bf,
137 |                 good.reads = seqz.het$good.reads,
138 |                 chromosomes = seqz.het$chromosome,
139 |                 positions = seqz.het$position, conf = 0.95,
140 |                 window = window, overlap = overlap)
141 |         } else {
142 |             seqz.b.win <- list()
143 |             seqz.b.win[[1]] <- data.frame(start = min(seqz.data$position,
144 |                 na.rm = TRUE), end = max(seqz.data$position, na.rm = TRUE),
145 |                 mean = 0, q0 = 0,  q1 = 0, N = 1)
146 |         }
147 |         if (het_ok) {
148 |             breaks_chr <- extract_breaks(data = seqz.data, data_het = seqz.het,
149 |                 ratio = seqz.r.win, baf = seqz.b.win,
150 |                 gamma = gamma, kmin = kmin, breaks = breaks_chr,
151 |                 gamma.pcf = gamma.pcf, kmin.pcf = kmin.pcf,
152 |                 assembly = assembly, chromosome = chr,
153 |                 method = breaks.method)
154 |         } else {
155 |             if (breaks.method == "full") {
156 |                 breaks_chr <- extract_breaks(data = seqz.data,
157 |                     data_het = seqz.het, ratio = seqz.r.win, baf = seqz.b.win,
158 |                     gamma = gamma, kmin = kmin,
159 |                     gamma.pcf = gamma.pcf, kmin.pcf = kmin.pcf,
160 |                     assembly = assembly, chromosome = chr,
161 |                     method = breaks.method)
162 |             }
163 |         }
164 |         if (class(breaks_chr) == "try-error") {
165 |            breaks_chr <- NULL
166 |         }
167 |         if (is.null(breaks_chr) || nrow(breaks_chr) == 0 ||
168 |             length(breaks_chr) == 0) {
169 |             breaks_chr <- data.frame(chrom = chr,
170 |                 start.pos = min(seqz.data$position, na.rm = TRUE),
171 |                 end.pos = max(seqz.data$position, na.rm = TRUE))
172 |         }
173 |         seg.s1 <- segment.breaks(seqz.tab = seqz.data, breaks = breaks_chr,
174 |             min.reads.baf = min.reads.baf, weighted.mean = weighted.mean)
175 | 
176 |         mut.tab   <- mutation.table(seqz.data,
177 |             mufreq.treshold = mufreq.treshold,
178 |             min.reads = min.reads, min.reads.normal = min.reads.normal,
179 |             max.mut.types = max.mut.types, min.type.freq = min.type.freq,
180 |             min.fw.freq = min.fw.freq, segments = seg.s1)
181 | 
182 |         windows.ratio[[which(chromosome.list == chr)]] <- seqz.r.win[[1]]
183 |         windows.raw_ratio[[which(chromosome.list == chr)]] <- seqz.r_r.win[[1]]
184 |         windows.normal[[which(chromosome.list == chr)]] <- seqz.n.win[[1]]
185 |         windows.tumor[[which(chromosome.list == chr)]] <- seqz.t.win[[1]]
186 |         windows.n_normal[[which(chromosome.list == chr)]] <- seqz.n_n.win[[1]]
187 |         windows.n_tumor[[which(chromosome.list == chr)]] <- seqz.n_t.win[[1]]
188 |         windows.baf[[which(chromosome.list == chr)]]   <- seqz.b.win[[1]]
189 |         segments.list[[which(chromosome.list == chr)]] <- seg.s1
190 |         mutation.list[[which(chromosome.list == chr)]] <- mut.tab
191 |         norm.gc.list[[which(chromosome.list == chr)]] <- norm.gc.stats
192 |         segments_samples.list[[which(chromosome.list == chr)]] <- list(
193 |             normal = breaks_normal_chr, tumor = breaks_tumor_chr)
194 | 
195 |         if (verbose) {
196 |             message('   ', nrow(mut.tab), ' variant calls.', appendLF = TRUE)
197 |             message('   ', nrow(seg.s1), ' copy-number segments.', appendLF = TRUE)
198 |             message('   ', nrow(seqz.het), ' heterozygous positions.', appendLF = TRUE)
199 |             message('   ', sum(seqz.hom), ' homozygous positions.', appendLF = TRUE)
200 |         }
201 |     }
202 |     names(windows.baf)   <- chromosome.list
203 |     names(windows.ratio) <- chromosome.list
204 |     names(windows.raw_ratio) <- chromosome.list
205 |     names(windows.normal) <- chromosome.list
206 |     names(windows.tumor) <- chromosome.list
207 |     names(windows.n_normal) <- chromosome.list
208 |     names(windows.n_tumor) <- chromosome.list
209 |     names(mutation.list) <- chromosome.list
210 |     names(segments.list) <- chromosome.list
211 |     names(segments_samples.list) <- chromosome.list
212 | 
213 | 
214 |     gc_norm <- unfold_gc(do.call(rbind, norm.gc.list), stats = FALSE)
215 | 
216 |     if (normalization.method == "mean") {
217 |         avg_tum_ndepth <- weighted.mean(x = gc_norm$tumor$depth,
218 |             w = colSums(gc_norm$tumor$n))
219 |         avg_nor_ndepth <- weighted.mean(x = gc_norm$normal$depth,
220 |             w = colSums(gc_norm$normal$n))
221 |     } else {
222 |         avg_tum_ndepth <- weighted.median(x = gc_norm$tumor$depth,
223 |             w = colSums(gc_norm$tumor$n))
224 |         avg_nor_ndepth <- weighted.median(x = gc_norm$normal$depth,
225 |             w = colSums(gc_norm$normal$n))
226 |     }
227 |     if (ignore.normal) {
228 |         avg_depth_ratio <- avg_tum_ndepth / avg_tum_depth
229 |     } else {
230 |         avg_depth_ratio <- (avg_tum_ndepth / avg_tum_depth) /
231 |             (avg_nor_ndepth / avg_nor_depth)
232 |     }
233 | 
234 |     list(BAF = windows.baf, ratio = windows.ratio,
235 |         raw_ratio = windows.raw_ratio,
236 |         depths =  list(
237 |             raw = list(normal = windows.normal, tumor = windows.tumor),
238 |             norm = list(normal = windows.n_normal, tumor = windows.n_tumor)),
239 |         mutations = mutation.list, segments = segments.list,
240 |         chromosomes = chromosome.list, gc = gc.stats,
241 |         gc_norm = gc_norm, avg.depth.ratio = avg_depth_ratio,
242 |         avg.depth.tumor = avg_tum_depth, avg.depth.normal = avg_nor_depth,
243 |         segments_samples = segments_samples.list)
244 | }
245 | 


--------------------------------------------------------------------------------
/R/fit.R:
--------------------------------------------------------------------------------
 1 | sequenza.fit <- function(sequenza.extract, female = TRUE,
 2 |     N.ratio.filter = 10, N.BAF.filter = 1, segment.filter = 3e6,
 3 |     mufreq.treshold = 0.10, XY = c(X = "X", Y = "Y"),
 4 |     cellularity = seq(0.1, 1, 0.01), ploidy = seq(1, 7, 0.1),
 5 |     ratio.priority = FALSE, method = "baf",
 6 |     priors.table = data.frame(CN = 2, value = 2), chromosome.list = 1:24,
 7 |     mc.cores = getOption("mc.cores", 2L)){
 8 | 
 9 |     if (is.null(chromosome.list)) {
10 |         mut.all <- do.call(rbind, sequenza.extract$mutations)
11 |         mut.all <- na.exclude(mut.all)
12 |         segs.all <- do.call(rbind, sequenza.extract$segments)
13 |     } else {
14 |         mut.all <- do.call(rbind,
15 |             sequenza.extract$mutations[chromosome.list])
16 |         mut.all <- na.exclude(mut.all)
17 |         segs.all <- do.call(rbind,
18 |             sequenza.extract$segments[chromosome.list])
19 |     }
20 |     segs.len <- segs.all$end.pos - segs.all$start.pos
21 |     avg.depth.ratio <- sequenza.extract$avg.depth.ratio
22 | 
23 |     if (method == "baf") {
24 | 
25 |         avg.sd.ratio <- sum(segs.all$sd.ratio * segs.all$N.ratio,
26 |             na.rm = TRUE) / sum(segs.all$N.ratio, na.rm = TRUE)
27 |         avg.sd.Bf <- sum(segs.all$sd.BAF * segs.all$N.BAF, na.rm = TRUE) /
28 |             sum(segs.all$N.BAF, na.rm = TRUE)
29 |         segs.all$sd.BAF[segs.all$sd.BAF == 0] <- max(
30 |             segs.all$sd.BAF, na.rm = TRUE)
31 |         segs.all$sd.ratio[segs.all$sd.ratio == 0] <- max(
32 |             segs.all$sd.ratio, na.rm = TRUE)
33 | 
34 |         segs.filt <- segs.all$N.ratio > N.ratio.filter &
35 |             segs.all$N.BAF > N.BAF.filter
36 |         segs.filt <- segs.len >= segment.filter & segs.filt
37 |         if (female){
38 |             segs.is.xy <- segs.all$chromosome == XY["Y"]
39 |         } else{
40 |             segs.is.xy <- segs.all$chromosome %in% XY
41 |         }
42 |         filt.test  <- segs.filt & !segs.is.xy
43 |         seg.test   <- segs.all[filt.test, ]
44 |         seg.len.mb <- segs.len[filt.test] / 1e6
45 |         baf.model.fit(Bf = seg.test$Bf, depth.ratio = seg.test$depth.ratio,
46 |             sd.ratio = seg.test$sd.ratio, weight.ratio = seg.len.mb,
47 |             sd.Bf = seg.test$sd.BAF, weight.Bf = seg.len.mb,
48 |             avg.depth.ratio = avg.depth.ratio, cellularity = cellularity,
49 |             ploidy = ploidy, priors.table = priors.table,
50 |             mc.cores = mc.cores, ratio.priority = ratio.priority)
51 |     } else if (method == "mufreq") {
52 |         mut.filt <- mut.all$F >= mufreq.treshold
53 |         if (female){
54 |             mut.is.xy  <- mut.all$chromosome == XY["Y"]
55 |         } else{
56 |             mut.is.xy  <- mut.all$chromosome %in% XY
57 |         }
58 |         filt.test  <- mut.filt & !mut.is.xy
59 |         mut.test   <- mut.all[filt.test, ]
60 |         w.mufreq   <- round(mut.test$good.reads, 0)
61 |         mufreq.model.fit(mufreq = mut.test$F,
62 |             depth.ratio = mut.test$adjusted.ratio,
63 |             weight.ratio = 2 * w.mufreq, weight.mufreq = w.mufreq,
64 |             avg.depth.ratio = avg.depth.ratio, cellularity = cellularity,
65 |             ploidy = ploidy, priors.table = priors.table, mc.cores = mc.cores)
66 |     } else {
67 |       stop("The only available methods are \"baf\" and \"mufreq\"")
68 |    }
69 | }
70 | 


--------------------------------------------------------------------------------
/R/gcstats.R:
--------------------------------------------------------------------------------
 1 | gc.sample.stats <- function(file, col_types = "c--dd----d----",
 2 |     buffer = 33554432, parallel = 2L, verbose = TRUE) {
 3 | 
 4 |     con <- gzfile(file, "rb")
 5 | 
 6 |     suppressWarnings(skip_line <- readLines(con, n = 1))
 7 |     remove(skip_line)
 8 |     parse_chunck <- function(x, col_types) {
 9 |         x <- read_tsv(file = paste(mstrsplit(x), collapse = "\n"),
10 |             col_types = col_types, col_names = FALSE,
11 |             skip = 0, n_max = Inf, progress = FALSE)
12 |         u_chr <- unique(x[, 1])
13 |         n_chr <- table(x[, 1])
14 |         gc1 <- lapply(split(x[, 2], x[, 4]), table)
15 |         gc2 <- lapply(split(x[, 3], x[, 4]), table)
16 |         if (verbose){
17 |             message(".", appendLF = FALSE)
18 |         }
19 |         list(unique = u_chr, lines = n_chr, gc_nor = gc1, gc_tum = gc2)
20 |     }
21 |     if (verbose){
22 |         message("Collecting GC information ", appendLF = FALSE)
23 |     }
24 |     res <- chunk.apply(input = con, FUN = parse_chunck, col_types = col_types,
25 |         CH.MAX.SIZE = buffer, parallel = parallel)
26 |     close(con)
27 |     if (verbose){
28 |         message(" done\n")
29 |     }
30 |     unfold_gc(res, stats = TRUE)
31 | }
32 | 
33 | unfold_gc <- function(x, stats = TRUE) {
34 |     gc_norm <- get_gc(x[, "gc_nor"])
35 |     gc_tum <- get_gc(x[, "gc_tum"])
36 |     if (stats) {
37 |         ord_chrom <- unique(Reduce("c", Reduce("c", x[, "unique"])))
38 |         stats_chrom <- Reduce("c", x[, "lines"])
39 |         stats_chrom <- sapply(splash_table(x[, "lines"]), sum)
40 |         stats_chrom <- stats_chrom[ord_chrom]
41 |         stats_start <- cumsum(c(1, stats_chrom[-length(stats_chrom)]))
42 |         stats_end   <- stats_start + stats_chrom - 1
43 |         stats_chrom <- data.frame(chr = ord_chrom, n_lines = stats_chrom,
44 |             start = stats_start, end = stats_end)
45 | 
46 |         list(file.metrics = stats_chrom, normal = gc_norm, tumor = gc_tum)
47 |     } else {
48 |         list(normal = gc_norm, tumor = gc_tum)
49 |     }
50 | }
51 | 
52 | splash_table <- function(lis_obj){
53 |     lis_obj <- Reduce("c", lis_obj)
54 |     split(lis_obj, names(lis_obj))
55 | }
56 | 
57 | get_gc <- function(gc_col) {
58 |     sort_char <- function(x) {
59 |         as.character(sort(as.numeric(x)))
60 |     }
61 |     all_depths <- splash_table(gc_col)
62 |     all_depths <- lapply(all_depths, FUN = function(x) {
63 |         sapply(splash_table(x), sum)
64 |     })
65 |     names_gc <- sort_char(names(all_depths))
66 |     all_depths <- all_depths[names_gc]
67 |     names_depths <- sort_char(unique(Reduce("c", lapply(all_depths, names))))
68 |     n <- do.call(rbind, lapply(all_depths, FUN = function(x, names_depths) {
69 |             res <- x[names_depths]
70 |             names(res) <- names_depths
71 |             res
72 |         },
73 |         names_depths = names_depths))
74 |     n[is.na(n)] <- 0
75 |     list(gc = as.numeric(names_gc), depth = as.numeric(names_depths), n = n)
76 | }
77 | 
78 | median_gc <- function(gc_list) {
79 |     apply(gc_list$n, 1, FUN = function(x, w) {
80 |             weighted.median(x = w, w = x, na.rm = TRUE)
81 |         },
82 |         w = gc_list$depth)
83 | }
84 | 
85 | mean_gc <- function(gc_list) {
86 |     apply(gc_list$n, 1, FUN = function(x, w) {
87 |             weighted.mean(x = w, w = x, na.rm = TRUE)
88 |         },
89 |         w = gc_list$depth)
90 | }
91 | 
92 | depths_gc <- function(depth_n, depth_t, gc) {
93 |     gc_nor <- lapply(split(depth_n, gc), table)
94 |     gc_tum <- lapply(split(depth_t, gc), table)
95 |     list(gc_nor = gc_nor, gc_tum = gc_tum)
96 | }
97 | 


--------------------------------------------------------------------------------
/R/graphics.R:
--------------------------------------------------------------------------------
  1 | plotWindows <- function(seqz.window, m.lty = 1, m.lwd = 3,
  2 |                          m.col = "black", q.bg = "lightblue",
  3 |                          log2.plot = FALSE, n.min = 1, xlim, ylim,
  4 |                          add = FALSE, ...) {
  5 |     if (log2.plot) {
  6 |         seqz.window[, c(3, 4, 5)] <- log2(seqz.window[, c(3, 4, 5)])
  7 |     }
  8 |     if (!add) {
  9 |         if (missing(xlim))
 10 |             xlim <- c(seqz.window$start[1], seqz.window$end[nrow(seqz.window)])
 11 |         if (missing(ylim))
 12 |            ylim <- c(min(seqz.window$q0, na.rm = TRUE),
 13 |                max(seqz.window$q1, na.rm = TRUE))
 14 |         plot(xlim, ylim, type = "n", ...)
 15 |     }
 16 |     seqz.window <- seqz.window[seqz.window$N >= n.min, ]
 17 |     rect(xleft = seqz.window$start, ybottom = seqz.window$q0,
 18 |         xright = seqz.window$end, ytop = seqz.window$q1,
 19 |         col = q.bg, border = NA)
 20 |     segments(y0 = seqz.window$mean, x0 = seqz.window$start,
 21 |         x1 = seqz.window$end, lty = m.lty, lwd = m.lwd, col = m.col)
 22 | 
 23 | }
 24 | 
 25 | gc.plot <- function(gc_list, range.gc = NULL, range.depth = NULL, ...) {
 26 |     n <- gc_list$n
 27 |     n[n == 0] <- NA
 28 |     gc <- gc_list$gc
 29 |     depth <- gc_list$depth
 30 |     if (length(range.gc) == 2) {
 31 |         range.gc <- sort(range.gc)
 32 |         gc.select <- gc <= range.gc[2] & gc >= range.gc[1]
 33 |         gc <- gc[gc.select]
 34 |         n <- n[gc.select, ]
 35 |     }
 36 |     if (length(range.depth) == 2) {
 37 |         range.depth <- sort(range.depth)
 38 |         depth.select <- depth <= range.depth[2] & depth >= range.depth[1]
 39 |         depth <- depth[depth.select]
 40 |         n <- n[, depth.select]
 41 |     }
 42 |     colorgram(x = gc, y = depth, z = n,  ...)
 43 | }
 44 | 
 45 | gc.summary.plot <- function(gc_list, mean.col = 1, median.col = 2,
 46 |     scale.subset = 1.5, ...){
 47 |     mengc <- mean_gc(gc_list)
 48 |     medgc <- median_gc(gc_list)
 49 |     max_depth <- round(max(c(mengc, medgc)) * scale.subset, 0)
 50 |     gc.plot(gc_list, range.depth = c(0, max_depth), ...)
 51 |     lines(x = gc_list$gc, y = mengc, lwd = 3, col = mean.col)
 52 |     lines(x = gc_list$gc, y = medgc, lwd = 3, col = median.col)
 53 |     legend("topright", c("Mean depth", "Median depth"),
 54 |         col = c(mean.col, median.col), bg = "white", lty = 1, lwd = 3)
 55 | }
 56 | 
 57 | cp.plot <- function (cp.table, xlab = "Ploidy", ylab = "Cellularity",
 58 |     zlab = "Scaled rank LPP",
 59 |     colFn = colorRampPalette(c("white", "lightblue")), ...) {
 60 |     z <- matrix(rank(cp.table$lpp), nrow = nrow(cp.table$lpp)) /
 61 |         length(cp.table$lpp)
 62 |     map <- makecmap(c(0, 1), colFn = colFn, include.lowest = TRUE)
 63 |     colorgram(x = cp.table$ploidy, y = cp.table$cellularity, z = z,
 64 |         map = map, las = 1, xlab = xlab, ylab = ylab, zlab = zlab, ...)
 65 | }
 66 | 
 67 | cp.plot.contours <- function(cp.table, likThresh = c(0.95),
 68 |     alternative = TRUE, col = palette(), legend.pos = "bottomright",
 69 |     pch = 18, alt.pch = 3, ...) {
 70 |     znormsort <- sort(cp.table$lpp, decreasing = TRUE)
 71 |     znormcumLik <- cumsum(znormsort)
 72 |     n <- sapply(likThresh, function(x) sum(znormcumLik < x) + 1)
 73 |     LikThresh <- znormsort[n]
 74 |     names(LikThresh) <- paste0(likThresh * 100, "%")
 75 |     contour(x = cp.table$ploidy, y = cp.table$cellularity, z = cp.table$lpp,
 76 |         levels = znormsort[n], col = col, drawlabels = FALSE,
 77 |         xlab = "Ploidy", ylab = "Cellularity", ...)
 78 |     max.xy <- which(cp.table$lpp == max(cp.table$lpp), arr.ind = TRUE)
 79 |     points(x = cp.table$ploidy[max.xy[, "row"]],
 80 |         y = cp.table$cellularity[max.xy[, "col"]], pch = pch)
 81 |     if (alternative == TRUE){
 82 |         alt.sol <- alternative.cp.solutions(cp.table)
 83 |         alt.sol <- alt.sol[-1, ]
 84 |         points(x = alt.sol$ploidy, y = alt.sol$cellularity, pch = alt.pch)
 85 |     }
 86 |     if (!is.na(legend.pos)) {
 87 |         if (alternative == FALSE) {
 88 |             legend(legend.pos, legend = c(paste("C.R.", names(LikThresh),
 89 |                 sep = " "), "Point estimate"),
 90 |                 col = c(col[1:length(LikThresh)], "black"),
 91 |                 lty = c(rep(1, length(LikThresh)), NA),
 92 |                 pch = c(rep(NA, length(LikThresh)), pch),
 93 |                 border = NA, bty = "n")
 94 |         } else {
 95 |             legend(legend.pos, legend = c(paste("C.R.",
 96 |                     names(LikThresh), sep = " "),
 97 |                     "Point estimate", "Alternative solutions"),
 98 |                 col = c(col[1:length(LikThresh)], "black", "black"),
 99 |                 lty = c(rep(1, length(LikThresh)), NA, NA),
100 |                 pch = c(rep(NA, length(LikThresh)), pch, alt.pch),
101 |                 border = NA, bty = "n")
102 |         }
103 |     }
104 |    invisible(LikThresh)
105 | }
106 | 
107 | chromosome.view <- function(baf.windows, ratio.windows, mut.tab = NULL,
108 |     segments = NULL,  min.N.baf = 1, min.N.ratio = 1e4, main = "",
109 |     vlines = FALSE, legend.inset = c(-20 * strwidth("a", units = "figure"), 0),
110 |     CNn = 2, cellularity = NULL, ploidy = NULL,
111 |     avg.depth.ratio = NULL, model.lwd = 1, model.lty = "24", model.col = 1,
112 |     x.chr.space = 10) {
113 |     if (is.null(segments)) {
114 |       data.model <- NULL
115 |     } else {
116 |         if ("CNt" %in% colnames(segments)) {
117 |             if (length(c(cellularity, ploidy, avg.depth.ratio)) != 3) {
118 |                 data.model <- NULL
119 |             } else {
120 |                 data.model <- list()
121 |                 CNt.max <- max(segments$CNt, na.rm = TRUE) + 1
122 |                 CNt.min <- 0
123 |                 baf_types <- baf.types.matrix(CNt.min = CNt.min,
124 |                     CNt.max = CNt.max, CNn = 2)
125 |                 data.model$baf <- baf.model.points(cellularity = cellularity,
126 |                     ploidy = ploidy, baf_types = baf_types,
127 |                     avg.depth.ratio = avg.depth.ratio)
128 |                 data.model$baf <- data.frame(CNt =  baf_types$CNt,
129 |                     A = baf_types$CNt - baf_types$B, B = baf_types$B,
130 |                     data.model$baf)
131 |                 mufreq_types <- mufreq.types.matrix(CNt.min = CNt.min,
132 |                     CNt.max = CNt.max, CNn = CNn)
133 |                 data.model$muf <- cbind(mufreq_types,
134 |                     mufreq.model.points(cellularity = cellularity,
135 |                         ploidy = ploidy, mufreq_types = mufreq_types,
136 |                         avg.depth.ratio = avg.depth.ratio))
137 |             }
138 |         } else {
139 |             data.model <- NULL
140 |         }
141 |     }
142 |     if (is.null(mut.tab)) {
143 |         par(mar = c(0, 4, 0, 3), oma = c(5, 0, 4, 0),
144 |             mfcol = c(2, 1), xaxt = "n")
145 |         min.x <- min(c(min(baf.windows$start), min(ratio.windows$start)))
146 |         max.x <- max(c(max(baf.windows$end), max(ratio.windows$end)))
147 |         xlim <- c(min.x, max.x)
148 |     } else {
149 |         min.x <- min(c(min(baf.windows$start), min(ratio.windows$start),
150 |             min(mut.tab$position)))
151 |         max.x <- max(c(max(baf.windows$end), max(ratio.windows$end),
152 |             max(mut.tab$position)))
153 |         xlim <- c(min.x, max.x)
154 |         par(mar = c(0, 4, 0, 10), oma = c(5, 0, 4, 0),
155 |             mfcol = c(3, 1), xaxt = "n", xpd = TRUE)
156 |         mutation.colors <- c(
157 |             "A>C" = rgb(red = 0, green = 178, blue = 238,
158 |                 alpha = 120, maxColorValue = 255),
159 |             "T>G" = rgb(red =   0, green = 178, blue = 238,
160 |                 alpha = 120, maxColorValue = 255),
161 |             "A>G" = rgb(red = 255, green = 64, blue = 64,
162 |                 alpha = 120, maxColorValue = 255),
163 |             "T>C" = rgb(red = 255, green = 64, blue = 64,
164 |                 alpha = 120, maxColorValue = 255),
165 |             "A>T" = rgb(red =  34, green = 139, blue = 34,
166 |                 alpha = 120, maxColorValue = 255),
167 |             "T>A" = rgb(red =  34, green = 139, blue = 34,
168 |                 alpha = 120, maxColorValue = 255),
169 |             "C>A" = rgb(red = 139, green = 90, blue = 0,
170 |                 alpha = 120, maxColorValue = 255),
171 |             "G>T" = rgb(red = 139, green = 90, blue = 0,
172 |                 alpha = 120, maxColorValue = 255),
173 |             "C>G" = rgb(red = 127, green =   0, blue = 255,
174 |                 alpha = 120, maxColorValue = 255),
175 |             "G>C" = rgb(red = 127, green =   0, blue = 255,
176 |                 alpha = 120, maxColorValue = 255),
177 |             "C>T" = rgb(red = 255, green = 215, blue = 0,
178 |                 alpha = 120, maxColorValue = 255),
179 |             "G>A" = rgb(red = 255, green = 215, blue = 0,
180 |                 alpha = 120, maxColorValue = 255))
181 |         plot(x = mut.tab$position, y = mut.tab$F,
182 |             ylab = "Mutant allele frequency", las = 1, pch = 19,
183 |             col = c(mutation.colors, "NA" = NA)[as.character(mut.tab$mutation)],
184 |             ylim = c(min(mut.tab$F, na.rm = TRUE), 1), xlim = xlim)
185 |         unique.colors <- unique(mutation.colors)
186 |         labels <- sapply(unique.colors, function(a) {
187 |             paste(names(mutation.colors)[mutation.colors == a],
188 |                 collapse = ", ")
189 |             })
190 |         legend(y = "center", x  = "right", legend = labels,
191 |             inset = legend.inset, pch = 19, col = unique.colors,
192 |             pt.bg = unique.colors, border = NA, bty = "n")
193 |         if (!is.null(segments)){
194 |             if (vlines) {
195 |                 abline(v = segments$end.pos, lwd = 1, lty = 2)
196 |             }
197 |             if (!is.null(data.model)) {
198 |                 for (i in 1:nrow(segments)) {
199 |                 segments(x0 = segments$start.pos[i],
200 |                     x1 = segments$end.pos[i],
201 |                     y0 = unique(data.model$muf$mufreqs[
202 |                         data.model$muf$CNt == segments$CNt[i]]),
203 |                     lwd = model.lwd, lty = model.lty, col = model.col)
204 |                 }
205 |             }
206 |         }
207 |     }
208 |     if (!is.null(segments)){
209 |         plot(ylab = "B allele frequency", type = "n",
210 |             x = xlim, y = c(0, 0.5), las = 1)
211 |         plotWindows(baf.windows, ylab = "B allele frequency",
212 |                   xlim = xlim, ylim = c(0, 0.5), las = 1,
213 |                   n.min = min.N.baf, add = TRUE)
214 |         if (vlines) {
215 |             abline(v = segments$end.pos, lwd = 1, lty = 2)
216 |         }
217 |         segments(x0 = segments$start.pos, y0 = segments$Bf,
218 |             x1 = segments$end.pos, y1 = segments$Bf, col = "red", lwd = 3)
219 |         if (!is.null(data.model)) {
220 |             for (i in 1:nrow(segments)) {
221 |                segments(x0 = segments$start.pos[i], x1 = segments$end.pos[i],
222 |                         y0 = unique(data.model$baf$BAF[
223 |                             data.model$baf$CNt == segments$CNt[i]]),
224 |                         lwd = model.lwd, lty = model.lty, col = model.col)
225 |             }
226 |         }
227 |     } else {
228 |         plotWindows(baf.windows, ylab = "B allele frequency",
229 |             xlim = xlim, ylim = c(0, 0.5), las = 1, n.min = min.N.baf)
230 |     }
231 |     plotWindows(ratio.windows, ylab = "Depth ratio",
232 |         las = 1, n.min = min.N.ratio, ylim = c(0, 2.5))
233 |     if (!is.null(segments)){
234 |         if (vlines) {
235 |             abline(v = segments$end.pos, lwd = 1, lty = 2)
236 |         }
237 |         segments(x0 = segments$start.pos, y0 = segments$depth.ratio,
238 |             x1 = segments$end.pos, y1 = segments$depth.ratio,
239 |             col = "red", lwd = 3)
240 |         if (!is.null(data.model)) {
241 |             ratios.theoric <- unique(data.model$muf[, c("CNt", "depth.ratio")])
242 |             segments(x0 = rep(min(segments$start.pos, na.rm = TRUE),
243 |                     times = nrow(ratios.theoric)),
244 |                 x1 = rep(max(segments$end.pos, na.rm = TRUE),
245 |                     times = nrow(ratios.theoric)),
246 |                 y0 = ratios.theoric$depth.ratio, lwd = model.lwd,
247 |                 lty = model.lty, col = model.col)
248 | 
249 |             axis(labels = as.character(ratios.theoric$CNt), side = 4,
250 |                 line = 0, las = 1, at = ratios.theoric$depth.ratio)
251 |             mtext(text = "Copy number", side = 4, line = 2,
252 |                 cex = par("cex.lab") * par("cex"))
253 |         }
254 |     }
255 |     par(xaxt = "s")
256 |     axis(labels = as.character(round(seq(xlim[1] / 1e6, xlim[2] / 1e6,
257 |             by = x.chr.space), 0)),
258 |         side = 1, line = 0, at = seq(xlim[1], xlim[2], by = 1e6 * x.chr.space),
259 |         outer = FALSE, cex = par("cex.axis") * par("cex"))
260 |     mtext("Position (Mb)", side = 1, line = 3, outer = FALSE,
261 |         cex = par("cex.lab") * par("cex"))
262 |     mtext(main, 3, outer = TRUE, cex = par("cex.main") * par("cex"), line = 2)
263 | }
264 | 
265 | genome.view <- function(seg.cn, info.type = "AB", ...) {
266 |     chr.order <- unique(seg.cn$chromosome)
267 |     seg.list <- split(x = seg.cn[,
268 |         c("chromosome", "start.pos", "end.pos", "A", "B", "CNt")],
269 |         f = seg.cn$chromosome)
270 | 
271 |     seg.list <- seg.list[order(order(chr.order))]
272 |     seg.max <- lapply(X = seg.list, FUN = function(x) x[nrow(x), "end.pos" ])
273 |     seg.pos <- lapply(seg.list, "[", TRUE, c("start.pos", "end.pos"))
274 |     seg.max <- cumsum(as.numeric(do.call(rbind, seg.max)))
275 |     chr.offset <- 0
276 |     for (i in 1:length(seg.pos)){
277 |         seg.pos[[i]] <- seg.pos[[i]] + chr.offset
278 |         colnames(seg.pos[[i]]) <- c("abs.start", "abs.end")
279 |         chr.offset <- seg.max[i]
280 |     }
281 |     seg.max <- sapply(X = seg.pos, FUN = function(x) x[nrow(x), "abs.end" ])
282 |     abs.list <- mapply(cbind, seg.list, seg.pos, SIMPLIFY = FALSE)
283 |     abs.segments <- do.call(rbind, abs.list)
284 |     if (info.type == "AB") {
285 |         na_As <- is.na(abs.segments$A)
286 |         max_A <- max(abs.segments$A, na.rm = TRUE)
287 |         abs.segments$A[na_As] <- abs.segments$CNt[na_As]
288 |         plot(x = c(min(abs.segments$abs.start), max(abs.segments$abs.end)),
289 |             y = c(-0.1, (max_A + 0.1)), type = "n",
290 |             ylab = "Copy number", xlab = "Position (Mb)",
291 |             xaxt = "n",  yaxt = "n", xaxs = "i", ...)
292 |         axis(labels = 0:max_A, at = 0:max_A, side = 2, line = 0, las = 1)
293 |         segments(x0 = abs.segments$abs.start, x1 = abs.segments$abs.end,
294 |             y0 = (abs.segments$B - 0.1), y1 = (abs.segments$B - 0.1),
295 |             col = "blue", lwd = 5, lend = 1)
296 |         segments(x0 = abs.segments$abs.start, x1 = abs.segments$abs.end,
297 |             y0 = (abs.segments$A + 0.1), y1 = (abs.segments$A + 0.1),
298 |             col = "red", lwd = 5, lend = 1)
299 |     } else {
300 |         min_CNt <- min(abs.segments$CNt, na.rm = TRUE)
301 |         max_CNt <- max(abs.segments$CNt, na.rm = TRUE)
302 |         plot(x = c(min(abs.segments$abs.start), max(abs.segments$abs.end)),
303 |             y = c(min_CNt, max_CNt), type = "n",
304 |             ylab = "Copy number", xlab = "Position (Mb)",
305 |             xaxt = "n", yaxt = "n", xaxs = "i", ...)
306 |         axis(labels = min_CNt:max_CNt,
307 |             at = min_CNt:max_CNt,
308 |             side = 2, line = 0, las = 1)
309 |         segments(x0 = abs.segments$abs.start, x1 = abs.segments$abs.end,
310 |             y0 = abs.segments$CNt, y1 = abs.segments$CNt, col = "red",
311 |             lwd = 5, lend = 1)
312 |     }
313 |     abline(v = c(0, seg.max), lty = 3)
314 |     for (i in 1:length(abs.list)){
315 |         max.pos <- nrow(abs.list[[i]])
316 |         mtext(chr.order[i], side = 3, line = 0,
317 |             at = sum(abs.list[[i]]$abs.start[1],
318 |                 abs.list[[i]]$abs.end[max.pos]) / 2)
319 |     }
320 |     axis(labels = as.character(round(seq(abs.list[[1]]$start.pos[1] / 1e6,
321 |         abs.list[[1]]$end.pos[nrow(abs.list[[1]])] / 1e6, by = 50), 0)),
322 |         at = seq(abs.list[[1]]$abs.start[1],
323 |             abs.list[[1]]$abs.end[nrow(abs.list[[1]])], by = 5e7),
324 |         outer = FALSE, cex = par("cex.axis") * par("cex"), side = 1,
325 |         line = 1)
326 | }
327 | 
328 | plotRawGenome <- function(sequenza.extract, cellularity,
329 |     ploidy, CNt.max = 7, main = "", mirror.BAF = TRUE, ...){
330 |     max.end <- sapply(sequenza.extract$ratio, FUN = function(x) {
331 |         max(x$end, na.rm = TRUE)
332 |     })
333 |     max.end <- c(0, cumsum(as.numeric(max.end)))
334 |     chrs <- names(sequenza.extract$ratio)
335 |     coords.names <- (max.end + c(diff(max.end) / 2, 0))[1:length(chrs)]
336 |     new.coords <- function(win.list, max.end){
337 |         lapply(1:length(win.list), FUN = function(x) {
338 |             y <- win.list[[x]]
339 |             y$start <- y$start + max.end[x]
340 |             y$end <- y$end + max.end[x]
341 |             y
342 |         }
343 |     )}
344 |     new.coords.segs <- function(segs, max.end){
345 |         lapply(1:length(segs), FUN = function(x) {
346 |             y <- segs[[x]]
347 |             y$start.pos <- y$start.pos + max.end[x]
348 |             y$end.pos <- y$end.pos + max.end[x]
349 |             y
350 |         }
351 |     )}
352 | 
353 |     ratio.new <- new.coords(sequenza.extract$ratio, max.end)
354 |     BAF.new <- new.coords(sequenza.extract$BAF, max.end)
355 | 
356 |     segs.new <- do.call(rbind,
357 |         new.coords.segs(sequenza.extract$segments, max.end))
358 |     avg.depth.ratio <- 1
359 | 
360 |     par(mar = c(1, 4, 0, 3), oma = c(5, 0, 4, 0), mfcol = c(2, 1), ...)
361 | 
362 |     if (mirror.BAF) {
363 |         AAF.new <- lapply(BAF.new, function(x) {
364 |             x[, 3:5] <- 1 - x[, 3:5]
365 |             x
366 |         })
367 |         plot(x = c(min(max.end), max(max.end)), y = c(0, 1), main = main,
368 |             xlab = NA, ylab = "Allele frequency", type = "n", las = 1,
369 |             xaxs = "i", yaxs = "i", xaxt = "n" )
370 |             plotWindows(seqz.window = do.call(rbind, AAF.new),
371 |                 q.bg = "lightblue", m.col = "black", add = T)
372 |             segments(x0 = segs.new$start.pos, x1 = segs.new$end.pos,
373 |                 y0 = 1 - (segs.new$Bf), y1 = 1 - (segs.new$Bf),
374 |                 col = "red", lwd = 2, lend = 1)
375 |     } else {
376 |         plot(x = c(min(max.end), max(max.end)), y = c(0, 0.5), main = main,
377 |             xlab = NA, ylab = "B allele frequency", type = "n", las = 1,
378 |             xaxs = "i", yaxs = "i", xaxt = "n" )
379 |     }
380 |     plotWindows(seqz.window = do.call(rbind, BAF.new), q.bg = "lightblue",
381 |         m.col = "black", add = T)
382 |     segments(x0 = segs.new$start.pos, x1 = segs.new$end.pos,
383 |         y0 = segs.new$Bf, y1 = segs.new$Bf, col = "red", lwd = 2, lend = 1)
384 |     abline(v = max.end, lty = 1)
385 |     plot(x = c(min(max.end), max(max.end)), y = c(0, 2.5), main = "",
386 |         xlab = NA, ylab = "Depth ratio", type = "n", las = 1, xaxs = "i",
387 |         yaxs = "i", xaxt = "n")
388 |     plotWindows(seqz.window = do.call(rbind, ratio.new), q.bg = "lightblue",
389 |         m.col = "black", add = T)
390 |     segments(x0 = segs.new$start.pos, x1 = segs.new$end.pos,
391 |         y0 = (segs.new$depth.ratio), y1 = (segs.new$depth.ratio),
392 |         col = "red", lwd = 2, lend = 1)
393 |     if (!missing(ploidy) & !missing(cellularity)){
394 |         types <- baf.types.matrix(CNt.min = 0, CNt.max = CNt.max, CNn = 2)
395 |         depth.ratios <- baf.model.points(cellularity = cellularity,
396 |             ploidy = ploidy, avg.depth.ratio = avg.depth.ratio,
397 |             baf_types = types)[, "depth.ratio"]
398 |         depth.ratios <- unique(data.frame(CNt = types$CNt,
399 |             ratio = depth.ratios))
400 |         abline(h = depth.ratios$ratio, lty = 2)
401 |         axis(labels = as.character(depth.ratios$CNt), side = 4,
402 |             line = 0, las = 1, at = depth.ratios$ratio)
403 |         mtext(text = "Copy number", side = 4, line = 2,
404 |             cex = par("cex.lab") * par("cex"))
405 |     }
406 |     abline(v = max.end, lty = 1)
407 |     axis(labels = chrs, at = coords.names, side = 1, cex.axis = 1)
408 | }
409 | 
410 | baf.model.view <- function(cellularity, ploidy, segs,
411 |     BAF.space = seq(0.001, 0.5, 0.005), ratio.space = seq(0.01, 2.5, 0.05),
412 |     avg.depth.ratio = 1, CNt.max = 7, segment.filter = 3e6, col = "black") {
413 |     s.b   <- mean(segs$sd.BAF, na.rm = TRUE)
414 |     s.r   <- mean(segs$sd.ratio, na.rm = TRUE)
415 |     l.s   <- segs$end.pos - segs$start.pos
416 |     s.big <- l.s >= segment.filter
417 |     test.values <- expand.grid(Bf = BAF.space, ratio = ratio.space,
418 |         KEEP.OUT.ATTRS = FALSE)
419 |     both.space  <- baf.bayes(Bf = test.values$Bf, CNt.max = CNt.max,
420 |         CNt.min = 0, depth.ratio = test.values$ratio,
421 |         cellularity = cellularity, ploidy = ploidy,
422 |         avg.depth.ratio = avg.depth.ratio, sd.Bf = s.b, weight.Bf = 10,
423 |         sd.ratio = s.r, weight.ratio = 10, ratio.priority = F, CNn = 2)
424 |     both.space <- as.data.frame(both.space)
425 |     z <- tapply(both.space$LPP, list(test.values$Bf, test.values$ratio), mean)
426 |     x <- as.numeric(rownames(z))
427 |     y <- as.numeric(colnames(z))
428 |     t <- baf.types.matrix(CNt.min = 0, CNt.max = CNt.max, CNn = 2)
429 |     mpts <- cbind(t, baf.model.points(cellularity = cellularity,
430 |         ploidy = ploidy, baf_types = t, avg.depth.ratio = avg.depth.ratio)
431 |     )
432 |     mpts <- unique(mpts[, c("CNt", "depth.ratio")])
433 |     par(mar = c(5.1, 4.1, 4.1, 4.1))
434 |     rev.heat <- function(...){rev(heat.colors(...))}
435 |     suppressWarnings(colorgram(x, y, z, key = NA, nz = 1000,
436 |         xlab = "B allele frequency", ylab = "Depth ratio",
437 |         main = paste("cellularity:", cellularity, "ploidy:", ploidy,
438 |         "sd.BAF:", round(s.b, 2), sep = " "),
439 |         map = makecmap(z, breaks = unique(quantile(z, seq(0.25, 1, 0.0001))),
440 |             right = TRUE, n = 1000, colFn = rev.heat), outlier = "white",
441 |         las = 1, xlim = c(0, 0.5)))
442 |     axis(side = 4, at = mpts$depth.ratio, labels = mpts$CNt, las = 1)
443 |     mtext(text = "Copy number", side = 4, line = 2)
444 |     segs$col <- col
445 |     points(x = segs$Bf[s.big], y = segs$depth.ratio[s.big],
446 |         pch = 1, cex = 1, col = segs$col[s.big])
447 |     points(x = segs$Bf[!s.big], y = segs$depth.ratio[!s.big], pch = ".",
448 |         cex = 1, col = segs$col[!s.big])
449 | }
450 | 


--------------------------------------------------------------------------------
/R/misc.R:
--------------------------------------------------------------------------------
 1 | dt2 <- function(x, df, ncp, log = FALSE, mean, sd) {
 2 |     x2 <- (x - mean) / sd
 3 |     dt(x2, df = df, ncp = ncp, log = log)
 4 | }
 5 | 
 6 | split_chr_coord <- function (x) {
 7 |     # Ensure that there is a start and a end coordinate
 8 |     split_chr <- strsplit(x, split = ":")[[1]]
 9 |     chromosome <- split_chr[1]
10 |     start_end <- split_chr[2]
11 |     split_coors <- strsplit(start_end, split = "-")[[1]]
12 |     start <- split_coors[1]
13 |     end <- split_coors[2]
14 |     if (is.na(start)) {
15 |         start <- 1
16 |     }
17 |     if (is.na(end)){
18 |         end <- 2147483647
19 |     }
20 |     paste0(chromosome, ":", start, "-", end)
21 | }
22 | 
23 | weighted.median <- function(x, w, na.rm=TRUE, ties=NULL) {
24 |     if (missing(w)) {
25 |         w <- rep(1, length(x))
26 |     }
27 |     if (na.rm == TRUE) {
28 |         keep <- !(is.na(x) | is.na(w));
29 |         x <- x[keep]
30 |         w <- w[keep]
31 |     } else if (any(is.na(x))) {
32 |         return(NA)
33 |     }
34 | 
35 |     if (any(w < 0)) {
36 |         stop("The weight vactor can only contains positive numbers")
37 |     }
38 |     n <- length(w)
39 |     keep <- (w > 0)
40 |     nkeep <- sum(keep)
41 |     if (nkeep < n) {
42 |         x <- x[keep]
43 |         w <- w[keep]
44 |         n <- nkeep
45 |     }
46 |     wInfs <- is.infinite(w)
47 |     if (any(wInfs)) {
48 |         x <- x[wInfs]
49 |         n <- length(x)
50 |         w <- rep(1, n)
51 |     }
52 | 
53 |     if (n == 0) {
54 |         return(NA)
55 |     }
56 |     ord <- order(x)
57 |     x <- x[ord]
58 |     w <- w[ord]
59 |     wcum <- cumsum(w)
60 |     wsum <- wcum[n]
61 |     wmid <- wsum / 2
62 |     lows <- (wcum <= wmid)
63 |     k  <- sum(lows)
64 | 
65 |     if (k == 0) {
66 |         return(x[1])
67 |     }
68 |     if (k == n) {
69 |         return(x[n])
70 |     }
71 | 
72 |     wlow  <- wcum[k]
73 |     whigh <- wsum - wlow
74 |     if (whigh > wmid) {
75 |         return(x[k + 1])
76 |     }
77 |     (wlow * x[k] + whigh * x[k + 1]) / wsum
78 | }
79 | 


--------------------------------------------------------------------------------
/R/model.R:
--------------------------------------------------------------------------------
 1 | theoretical.depth.ratio <- function(CNt, cellularity, ploidy, CNn = 2,
 2 |     normal.ploidy = 2, avg.depth.ratio = 1) {
 3 |     cellu_copy_term <- (1 - cellularity) + (CNt / CNn * cellularity)
 4 |     ploidy_cellu_term <- (ploidy / normal.ploidy * cellularity) +
 5 |         1 - cellularity
 6 |     avg.depth.ratio * cellu_copy_term / ploidy_cellu_term
 7 | }
 8 | 
 9 | theoretical.baf <- function(CNt, B, cellularity, CNn = 2) {
10 |     baf <- ( (B * cellularity) + ( 1 - cellularity) ) /
11 |         ( (CNt * cellularity) + CNn * ( 1 - cellularity) )
12 |     baf[CNn <= 1] <- NA
13 |     baf
14 | }
15 | 
16 | theoretical.mufreq <- function(CNt, Mt, cellularity, CNn = 2) {
17 |     normal_alleles <- (CNt - Mt) * cellularity + CNn * (1 - cellularity)
18 |     all_alleles <- (CNt * cellularity) + CNn * (1 - cellularity)
19 |     1 - (normal_alleles / all_alleles)
20 | }
21 | 
22 | baf.types.matrix <- function(CNt.min, CNt.max, CNn = 2) {
23 |     cn_ratio_vect <- seq(from = CNt.min / CNn, to = CNt.max / CNn,
24 |         by = 1 / CNn)
25 |     CNt <- cn_ratio_vect * CNn
26 |     if (CNn < 2) {
27 |         b_comb <- lapply(CNt, FUN = function(x) 0)
28 |     } else {
29 |         b_comb <- lapply(CNt, FUN = function(x) {
30 |             seq(from = 0, to = trunc(x / 2))
31 |         })
32 |     }
33 |     times_b <- sapply(b_comb, length)
34 |     CNt <- rep(CNt, times = times_b)
35 |     B <- unlist(b_comb)
36 |     data.frame(CNn = CNn, CNt = CNt, B = B)
37 | }
38 | 
39 | mufreq.types.matrix <- function(CNt.min, CNt.max, CNn = 2) {
40 |     cn_ratio_vect <- seq(from = CNt.min / CNn,
41 |         to = CNt.max / CNn, by = 1 / CNn)
42 |     CNt <- cn_ratio_vect * CNn
43 |     mut_comb <- lapply(CNt, FUN = function(x) seq(from = 0, to = x))
44 |     times_muts <- sapply(mut_comb, length)
45 |     data.frame(CNn = CNn, CNt = rep(CNt, times = times_muts),
46 |         Mt = unlist(mut_comb))
47 | }
48 | 
49 | baf.model.points <- function(cellularity, ploidy, baf_types, avg.depth.ratio) {
50 |     depth_ratio <- theoretical.depth.ratio(cellularity = cellularity,
51 |         ploidy = ploidy, CNn = baf_types[, "CNn"], CNt = baf_types[, "CNt"],
52 |         avg.depth.ratio = avg.depth.ratio)
53 |     baf <- theoretical.baf(cellularity = cellularity, CNn = baf_types[, "CNn"],
54 |         CNt = baf_types[, "CNt"], B = baf_types[, "B"])
55 |     data.frame(BAF = baf, depth.ratio = depth_ratio)
56 | }
57 | 
58 | mufreq.model.points <- function(cellularity, ploidy, mufreq_types,
59 |     avg.depth.ratio) {
60 |     mufreqs <- theoretical.mufreq(cellularity = cellularity,
61 |         CNn = mufreq_types[, "CNn"], CNt = mufreq_types[, "CNt"],
62 |         Mt = mufreq_types[, "Mt"])
63 |     depth_ratio <- theoretical.depth.ratio(cellularity = cellularity,
64 |         ploidy = ploidy, CNn = mufreq_types[, "CNn"],
65 |         CNt = mufreq_types[, "CNt"], avg.depth.ratio = avg.depth.ratio)
66 |     data.frame(mufreqs = mufreqs, depth.ratio = depth_ratio)
67 | }
68 | 
69 | b_allele_freq <- function(Af, Bf, good.reads, conf = 0.95) {
70 |     if (length(Bf) > 1) {
71 |         dd <- density(c(Bf, Af), weight = c(good.reads, good.reads) /
72 |             (2 * sum(good.reads)))
73 |         points.max <- which(diff(sign(diff(dd$y))) == -2) + 1
74 |         if (length(points.max) < 1) {
75 |             points.max <- which(diff(sign(diff(dd$y))) == -1) + 1
76 |         }
77 |         l.max <- dd$x[points.max]
78 |         d.max <- dd$y[points.max]
79 |         b.val <- l.max[which.max(dd$y[dd$x %in% l.max])]
80 |         if (length(b.val) < 1) {
81 |             message('WARNING', l.max, d.max, b.val)
82 |             b.val <- min(l.max)
83 |         }
84 |         d.val <- d.max[which(l.max == b.val)]
85 |         b.range <- range(dd$x[dd$y >=  d.val - (d.val * (1 - conf))])
86 |         if (b.val > 0.5) {
87 |             b.val <- 1 - b.val
88 |         }
89 |         max_diff <- max(b.range) - b.val
90 |         min_diff <- b.val - min(b.range)
91 |         min_diff <- min(c(max_diff, min_diff))
92 |         c(b.val - min_diff, b.val, b.val + min_diff)
93 |     } else if (length(Bf) == 1) {
94 |         c(Bf, Bf, Bf)
95 |     } else {
96 |         c(NA, NA, NA)
97 |     }
98 | }
99 | 


--------------------------------------------------------------------------------
/R/mutations.R:
--------------------------------------------------------------------------------
 1 | mut.fractions <- function(AB.tumor, Af, tumor.strand) {
 2 |     F = 1 - Af
 3 |     base.mut <- lapply(X = AB.tumor, FUN = function(x) {
 4 |         unlist(strsplit(as.character(x), split = "[:]"))
 5 |     })
 6 |     base.fw  <- lapply(X = tumor.strand, FUN = function(x) {
 7 |         unlist(strsplit(as.character(x), split = "[:]"))
 8 |     })
 9 |     frequencify <- function (x) {
10 |         base.name <- substr(unlist(x), 1, 1)
11 |         base.val  <- as.numeric(substr(unlist(x), 2, nchar(x)))
12 |         setNames(base.val, base.name)
13 |     }
14 |     base.freqs <- lapply(X = base.mut, FUN = frequencify)
15 |     fw.freqs   <- lapply(X = base.fw,  FUN = frequencify)
16 |     n.base.mut <- do.call(c, lapply(X = base.mut, FUN = length))
17 |     max.fq <- function (x) {
18 |         freq.rel <- base.freqs[[x]] / F[x]
19 |         f.max    <- which.max(freq.rel)
20 |         c(freq.rel[f.max], names(base.freqs[[x]])[f.max],
21 |             base.freqs[[x]][f.max], fw.freqs[[x]][f.max])
22 |     }
23 |     max.freqs  <- do.call(rbind, lapply(1:length(F), max.fq))
24 |     data.frame(base.count = as.integer(n.base.mut),
25 |         maj.base.freq = as.numeric(max.freqs[, 1]),
26 |         base = as.character(max.freqs[, 2]),
27 |         freq = as.numeric(max.freqs[, 3]),
28 |         fw.freq = as.numeric(max.freqs[, 4]))
29 | }
30 | 
31 | mutation.table <- function(seqz.tab, mufreq.treshold = 0.15,
32 |     min.reads = 40, min.reads.normal = 10, max.mut.types = 3,
33 |     min.type.freq = 0.9, min.fw.freq = 0, segments = NULL) {
34 |     chroms <- unique(seqz.tab$chromosome)
35 |     hom.filt <- seqz.tab$zygosity.normal == "hom" &
36 |        seqz.tab$AB.tumor != "."
37 |     seqz.tab <- seqz.tab[hom.filt, ]
38 |     reads.filt <- seqz.tab$good.reads >= min.reads &
39 |         seqz.tab$depth.normal >= min.reads.normal
40 |     seqz.tab <- seqz.tab[reads.filt, ]
41 |     mufreq.filt <- seqz.tab$Af <= (1 - mufreq.treshold)
42 |     seqz.tab <- seqz.tab[mufreq.filt, ]
43 |     if (!is.null(segments)) {
44 |         for (i in 1:nrow(segments)) {
45 |             pos.filt <- seqz.tab$chromosome == segments$chromosome[i] &
46 |                 seqz.tab$position >= segments$start.pos[i] &
47 |                 seqz.tab$position <= segments$end.pos[i]
48 |             seqz.tab$adjusted.ratio[pos.filt] <- segments$depth.ratio[i]
49 |         }
50 |     }
51 |     seqz.dummy   <- data.frame(chromosome = chroms, position = 1,
52 |         GC.percent = NA, good.reads = NA, adjusted.ratio = NA,
53 |         F = 0, mutation = "NA", stringsAsFactors = FALSE)
54 |     if (nrow(seqz.tab) >= 1) {
55 |         mu.fracts   <- mut.fractions(AB.tumor = seqz.tab$AB.tumor,
56 |             Af = seqz.tab$Af, tumor.strand = seqz.tab$tumor.strand)
57 |         mufreq.filt <- mu.fracts$freq >= mufreq.treshold
58 |         type.filt   <- mu.fracts$base.count <= max.mut.types
59 |         prop.filt   <- mu.fracts$maj.base.freq >= min.type.freq
60 |         if (!is.na(min.fw.freq)) {
61 |             fw.2 <- 1 - min.fw.freq
62 |             fw.2 <- sort(c(fw.2, min.fw.freq))
63 |             fw.filt     <- mu.fracts$fw.freq > fw.2[1] &
64 |                 mu.fracts$fw.freq < fw.2[2]
65 |             mufreq.filt <- mufreq.filt & type.filt  & prop.filt & fw.filt
66 |         } else {
67 |             mufreq.filt <- mufreq.filt & type.filt  & prop.filt
68 |         }
69 |         mut.type    <- paste(seqz.tab$AB.normal, mu.fracts$base, sep = ">")
70 |         seqz.tab     <- seqz.tab[, c("chromosome", "position", "GC.percent",
71 |             "good.reads", "adjusted.ratio")]
72 |         seqz.tab     <- cbind(seqz.tab, F = mu.fracts$freq,
73 |             mutation = mut.type)
74 |         rbind(seqz.tab[mufreq.filt, ], seqz.dummy)
75 |     } else {
76 |         seqz.dummy
77 |    }
78 | }
79 | 


--------------------------------------------------------------------------------
/R/read.R:
--------------------------------------------------------------------------------
 1 | read.seqz <- function(file, n_lines = NULL, col_types = "ciciidddcddccc",
 2 |     chr_name = NULL, buffer = 33554432, parallel = 1,
 3 |     col_names = c("chromosome", "position", "base.ref", "depth.normal",
 4 |                   "depth.tumor", "depth.ratio", "Af", "Bf", "zygosity.normal",
 5 |                   "GC.percent", "good.reads", "AB.normal", "AB.tumor",
 6 |                   "tumor.strand"), ...) {
 7 | 
 8 |     if (is.null(n_lines)) {
 9 |         skip <- 1
10 |         n_max <- Inf
11 |     } else {
12 |         n_lines <- round(sort(n_lines), 0)
13 |         skip <- n_lines[1]
14 | 
15 |         n_max <- n_lines[2] - skip + 1
16 |     }
17 |     if (!is.null(chr_name)) {
18 |         chr_name <- as.character(chr_name)
19 |         tbi <- file.exists(paste(file, "tbi", sep = "."))
20 |         if (tbi) {
21 |             read.seqz.tbi(file, split_chr_coord(chr_name),
22 |                 col_names, col_types)
23 |         } else {
24 |             read.seqz.chr(file, chr_name = chr_name, col_types = col_types,
25 |                 col_names = col_names, buffer = buffer, parallel = parallel)
26 |         }
27 |     } else {
28 |         read_tsv(file = file, col_types = col_types, skip = skip,
29 |             n_max = n_max, col_names = col_names, progress = FALSE, ...)
30 |     }
31 | }
32 | 
33 | read.seqz.chr <- function(file, chr_name, col_names,
34 |     col_types, buffer, parallel) {
35 |     con <- gzfile(file, "rb")
36 |     suppressWarnings(skip_line <- readLines(con, n = 1))
37 |     remove(skip_line)
38 |     parse_chunck <- function(x, chr_name, col_names, col_types) {
39 |         x <- read_tsv(file = paste(mstrsplit(x), collapse = "\n"),
40 |             col_types = col_types, skip = 0, n_max = Inf,
41 |             col_names = col_names, progress = FALSE)
42 |         x[x$chromosome == chr_name, ]
43 |     }
44 |     res <- chunk.apply(input = con, FUN = parse_chunck, chr_name = chr_name,
45 |         col_names = col_names, col_types = col_types, CH.MAX.SIZE = buffer,
46 |         parallel = parallel)
47 |     close(con)
48 |     res
49 | }
50 | 
51 | read.seqz.tbi <- function(file, chr_name, col_names, col_types) {
52 |     res <- tabix.read(file, chr_name)
53 |     res <- read_tsv(file = paste(mstrsplit(res), collapse = "\n"),
54 |         col_types = col_types, skip = 0, n_max = Inf,
55 |         col_names = col_names, progress = FALSE)
56 | }
57 | 


--------------------------------------------------------------------------------
/R/results.R:
--------------------------------------------------------------------------------
  1 | sequenza.results <- function(sequenza.extract, cp.table = NULL,
  2 |     sample.id, out.dir = getwd(), cellularity = NULL, ploidy = NULL,
  3 |     female = TRUE, CNt.max = 20, ratio.priority = FALSE,
  4 |     XY = c(X = "X", Y = "Y"), chromosome.list = 1:24){
  5 |     if(!file.exists(out.dir)) {
  6 |         dir.ok <- dir.create(path = out.dir, recursive = TRUE)
  7 |         if(!dir.ok) {
  8 |             stop("Directory does not exist and cannot be created: ", out.dir)
  9 |         }
 10 |     }
 11 |     make_filename <- function(x){
 12 |         file.path(out.dir, paste(sample.id, x, sep = "_"))
 13 |     }
 14 |     cp.file   <- make_filename("CP_contours.pdf")
 15 |     cint.file <- make_filename("confints_CP.txt")
 16 |     chrw.file <- make_filename("chromosome_view.pdf")
 17 |     depths.file <- make_filename("chromosome_depths.pdf")
 18 |     gc.file <- make_filename("gc_plots.pdf")
 19 |     geno.file <- make_filename("genome_view.pdf")
 20 |     cn.file <- make_filename("CN_bars.pdf")
 21 |     fit.file <- make_filename("model_fit.pdf")
 22 |     alt.file <- make_filename("alternative_solutions.txt")
 23 |     afit.file <- make_filename("alternative_fit.pdf")
 24 |     muts.file <- make_filename("mutations.txt")
 25 |     segs.file <- make_filename("segments.txt")
 26 |     robj.extr <- make_filename("sequenza_extract.RData")
 27 |     robj.fit <- make_filename("sequenza_cp_table.RData")
 28 |     log.file <- make_filename("sequenza_log.txt")
 29 |     seg.tab <- do.call(rbind, sequenza.extract$segments[chromosome.list])
 30 |     seg.len <- (seg.tab$end.pos - seg.tab$start.pos) / 1e6
 31 | 
 32 |     avg.depth.ratio <- sequenza.extract$avg.depth.ratio
 33 |     assign(x = paste0(sample.id, "_sequenza_extract"),
 34 |         value = sequenza.extract)
 35 |     save(list = paste0(sample.id, "_sequenza_extract"), file = robj.extr)
 36 |     if (is.null(cp.table) && (is.null(cellularity) || is.null(ploidy))){
 37 |         stop("cp.table and/or cellularity and ploidy argument are required.")
 38 |     }
 39 | 
 40 |     pdf(gc.file, width = 10, height = 5)
 41 |     par(mfrow=c(1, 2))
 42 |     gc.summary.plot(sequenza.extract$gc$normal, mean.col = "lightsalmon",
 43 |         median.col = "lightgreen", las = 1, xlab = "GC %", ylab = "Depth",
 44 |         zlab = "N", main = "GC vs raw depth in the normal sample")
 45 |     gc.summary.plot(sequenza.extract$gc_norm$normal, mean.col = "lightsalmon",
 46 |         median.col = "lightgreen", las = 1, xlab = "GC %", ylab = "Depth",
 47 |         zlab = "N", main = "GC vs normalized depth in the normal sample")
 48 |     gc.summary.plot(sequenza.extract$gc$tumor, mean.col = "lightsalmon",
 49 |         median.col = "lightgreen", las = 1, xlab = "GC %", ylab = "Depth",
 50 |         zlab = "N", main = "GC vs raw depth in the tumor sample")
 51 |     gc.summary.plot(sequenza.extract$gc_norm$tumor, mean.col = "lightsalmon",
 52 |         median.col = "lightgreen", las = 1, xlab = "GC %", ylab = "Depth",
 53 |         zlab = "N", main = "GC vs normalized depth in the tumor sample")
 54 |     dev.off()
 55 |     pdf(depths.file, height = 10, width = 15)
 56 |     for (i in unique(seg.tab$chromosome)) {
 57 |         max_coord_chr_i <- max(sequenza.extract$ratio[[i]]$end)
 58 |         par(mfcol = c(3, 2), xaxt = "n", mar = c(0, 4, 3, 0), oma = c(5, 0, 4, 0))
 59 |         plotWindows(sequenza.extract$depths$raw$normal[[i]],
 60 |             ylab = "normal depth", ylim = c(0, 2.5),
 61 |             main = paste("raw", i, sep = " "))
 62 |         plotWindows(sequenza.extract$depths$raw$tumor[[i]],
 63 |             ylab = "tumor depth", ylim = c(0, 2.5))
 64 |         plotWindows(sequenza.extract$raw_ratio[[i]],
 65 |             ylab = "depth ratio", ylim = c(0, 2.5))
 66 |         par(xaxt = "s")
 67 |         axis(labels = as.character(round(seq(0, max_coord_chr_i / 1e6,
 68 |                 by = 10), 0)),
 69 |             side = 1, line = 0, at = seq(0, max_coord_chr_i, by = 1e7),
 70 |             outer = FALSE, cex = par("cex.axis") * par("cex"))
 71 |         mtext("Position (Mb)", side = 1, line = 3, outer = FALSE,
 72 |             cex = par("cex.lab") * par("cex"))
 73 |         par(xaxt = "n")
 74 |         plotWindows(sequenza.extract$depths$norm$normal[[i]],
 75 |             ylab = "normal depth", ylim = c(0, 2.5),
 76 |             main = paste("normalized", i, sep = " "))
 77 |         plotWindows(sequenza.extract$depths$norm$tumor[[i]],
 78 |             ylab = "tumor depth", ylim = c(0, 2.5))
 79 |         plotWindows(sequenza.extract$ratio[[i]],
 80 |             ylab = "depth ratio", ylim = c(0, 2.5))
 81 |         par(xaxt = "s")
 82 |         axis(labels = as.character(round(seq(0, max_coord_chr_i / 1e6,
 83 |                 by = 10), 0)),
 84 |             side = 1, line = 0, at = seq(0, max_coord_chr_i, by = 1e7),
 85 |             outer = FALSE, cex = par("cex.axis") * par("cex"))
 86 |         mtext("Position (Mb)", side = 1, line = 3, outer = FALSE,
 87 |             cex = par("cex.lab") * par("cex"))
 88 |     }
 89 |     dev.off()
 90 | 
 91 |     if (!is.null(cp.table)){
 92 |         assign(x = paste0(sample.id, "_sequenza_cp_table"), value = cp.table)
 93 |         save(list = paste0(sample.id, "_sequenza_cp_table"), file = robj.fit)
 94 |         cint <- get.ci(cp.table)
 95 |         pdf(cp.file)
 96 |             cp.plot(cp.table)
 97 |             cp.plot.contours(cp.table, add = TRUE,
 98 |                 likThresh = c(0.95), col = "red", pch = 20)
 99 |             if (!is.null(cellularity) || !is.null(ploidy)) {
100 |                 if (is.null(cellularity)) {
101 |                     cellularity <- cint$max.cellularity
102 |                 }
103 |                 if (is.null(ploidy)) {
104 |                     ploidy <- cint$max.ploidy
105 |                 }
106 |                 points(x = ploidy, y = cellularity, pch = 5)
107 |                 text(x = ploidy, y = cellularity, labels = "User selection",
108 |                     pos = 3, offset = 0.5)
109 |             } else {
110 |                 cellularity <- cint$max.cellularity
111 |                 ploidy <- cint$max.ploidy
112 |             }
113 |         dev.off()
114 |     }
115 |     mut.tab <- na.exclude(do.call(rbind,
116 |         sequenza.extract$mutations[chromosome.list]))
117 |     if (female){
118 |         segs.is.xy <- seg.tab$chromosome == XY["Y"]
119 |         mut.is.xy <- mut.tab$chromosome == XY["Y"]
120 |     } else{
121 |         segs.is.xy <- seg.tab$chromosome %in% XY
122 |         mut.is.xy <- mut.tab$chromosome %in% XY
123 |     }
124 |     avg.sd.ratio <- sum(seg.tab$sd.ratio * seg.tab$N.ratio, na.rm = TRUE) /
125 |         sum(seg.tab$N.ratio, na.rm = TRUE)
126 |     avg.sd.Bf <- sum(seg.tab$sd.BAF * seg.tab$N.BAF, na.rm = TRUE) /
127 |         sum(seg.tab$N.BAF, na.rm = TRUE)
128 |     cn.alleles  <- baf.bayes(Bf = seg.tab$Bf[!segs.is.xy], CNt.max = CNt.max,
129 |         depth.ratio = seg.tab$depth.ratio[!segs.is.xy],
130 |         cellularity = cellularity, ploidy = ploidy,
131 |         avg.depth.ratio = avg.depth.ratio,
132 |         sd.ratio = seg.tab$sd.ratio[!segs.is.xy],
133 |         weight.ratio = seg.len[!segs.is.xy],
134 |         sd.Bf = seg.tab$sd.BAF[!segs.is.xy],
135 |         weight.Bf = 1, ratio.priority = ratio.priority, CNn = 2)
136 |     seg.res <- cbind(seg.tab[!segs.is.xy, ], cn.alleles)
137 |     if (!female){
138 |         if (sum(segs.is.xy) >= 1) {
139 |             cn.alleles  <- baf.bayes(Bf = NA, CNt.max = CNt.max,
140 |                 depth.ratio = seg.tab$depth.ratio[segs.is.xy],
141 |                 cellularity = cellularity, ploidy = ploidy,
142 |                 avg.depth.ratio = avg.depth.ratio,
143 |                 sd.ratio = seg.tab$sd.ratio[segs.is.xy],
144 |                 weight.ratio = seg.len[segs.is.xy], sd.Bf = NA,
145 |                 weight.Bf = NA, ratio.priority = ratio.priority, CNn = 1)
146 |             seg.xy <- cbind(seg.tab[segs.is.xy, ], cn.alleles)
147 |             seg.res <- rbind(seg.res, seg.xy)
148 |         }
149 |     }
150 |     write.table(seg.res, file = segs.file, col.names = TRUE,
151 |         row.names = FALSE, sep = "\t", quote = FALSE)
152 |     if (nrow(mut.tab) > 0) {
153 |         mut.alleles  <- mufreq.bayes(mufreq = mut.tab$F[!mut.is.xy],
154 |             CNt.max = CNt.max,
155 |             depth.ratio = mut.tab$adjusted.ratio[!mut.is.xy],
156 |             cellularity = cellularity, ploidy = ploidy,
157 |             avg.depth.ratio = avg.depth.ratio, CNn = 2)
158 |         mut.res <- cbind(mut.tab[!mut.is.xy, ], mut.alleles)
159 |         if (!female){
160 |             if (sum(mut.is.xy) >= 1) {
161 |                 mut.alleles <- mufreq.bayes(mufreq = mut.tab$F[mut.is.xy],
162 |                     CNt.max = CNt.max,
163 |                     depth.ratio = mut.tab$adjusted.ratio[mut.is.xy],
164 |                     cellularity = cellularity, ploidy = ploidy,
165 |                     avg.depth.ratio = avg.depth.ratio, CNn = 1)
166 |                 mut.xy <- cbind(mut.tab[mut.is.xy, ], mut.alleles)
167 |                 mut.res <- rbind(mut.res, mut.xy)
168 |             }
169 |         }
170 |         write.table(mut.res, file = muts.file, col.names = TRUE,
171 |             row.names = FALSE, sep = "\t", quote = FALSE)
172 |     }
173 |     pdf(chrw.file)
174 |     for (i in unique(seg.res$chromosome)) {
175 |         if (!female && i %in% XY){
176 |             CNn <- 1
177 |         } else {
178 |             CNn <- 2
179 |         }
180 |         chromosome.view(mut.tab = sequenza.extract$mutations[[i]],
181 |             baf.windows = sequenza.extract$BAF[[i]],
182 |             ratio.windows = sequenza.extract$ratio[[i]],
183 |             cellularity = cellularity, ploidy = ploidy, main = i,
184 |             segments = seg.res[seg.res$chromosome == i, ],
185 |             avg.depth.ratio = avg.depth.ratio, CNn = CNn, min.N.ratio = 1)
186 |     }
187 |     dev.off()
188 |     pdf(geno.file, height = 5, width = 15)
189 |     if (sum(!is.na(seg.res$A)) > 0) {
190 |         genome.view(seg.res)
191 |     }
192 |     genome.view(seg.res, "CN")
193 |     plotRawGenome(sequenza.extract, cellularity = cellularity, ploidy = ploidy,
194 |         mirror.BAF = TRUE)
195 |     dev.off()
196 |     barscn <- data.frame(size = seg.res$end.pos - seg.res$start.pos,
197 |         CNt = seg.res$CNt)
198 |     cn.sizes <- split(barscn$size, barscn$CNt)
199 |     cn.sizes <- sapply(cn.sizes, "sum")
200 |     pdf(cn.file)
201 |     barplot(round(cn.sizes / sum(cn.sizes) * 100), names = names(cn.sizes),
202 |         las = 1, ylab = "Percentage (%)", xlab = "Copy number")
203 |     dev.off()
204 | 
205 |     ## Write down the results.... ploidy etc...
206 |     if (!is.null(cp.table)){
207 |         res.tab <- data.frame(cellularity = c(cint$confint.cellularity[1],
208 |             cint$max.cellularity[1], cint$confint.cellularity[2]),
209 |             ploidy.estimate = c(cint$confint.ploidy[1], cint$max.ploidy[1],
210 |                 cint$confint.ploidy[2]),
211 |             ploidy.mean.cn = weighted.mean(x = as.integer(names(cn.sizes)),
212 |                 w = cn.sizes))
213 |         write.table(res.tab, cint.file, col.names = TRUE,
214 |             row.names = FALSE, sep = "\t", quote = FALSE)
215 |     }
216 |     pdf(fit.file, width = 6, height = 6)
217 |         baf.model.view(cellularity = cellularity, ploidy = ploidy,
218 |             segs = seg.res[!segs.is.xy, ])
219 |     dev.off()
220 |     if (!is.null(cp.table)){
221 |         alt.sol <- alternative.cp.solutions(cp.table)
222 |         write.table(alt.sol, file = alt.file, col.names = TRUE,
223 |             row.names = FALSE, sep = "\t", quote = FALSE)
224 |         pdf(afit.file)
225 |         for (sol in 1:nrow(alt.sol)){
226 |             baf.model.view(cellularity = alt.sol$cellularity[sol],
227 |                 ploidy = alt.sol$ploidy[sol], segs = seg.res[!segs.is.xy, ])
228 |         }
229 |         dev.off()
230 |     }
231 |     file_conn <- file(log.file)
232 |     writeLines(c(date(), paste("Sequenza version:",
233 |         packageVersion("sequenza"), sep = " ")), file_conn)
234 |     close(file_conn)
235 | }
236 | 


--------------------------------------------------------------------------------
/R/segments.R:
--------------------------------------------------------------------------------
  1 | extract_breaks <- function(data, data_het, ratio, baf, breaks, gamma, kmin,
  2 |     gamma.pcf, kmin.pcf, assembly, chromosome,
  3 |     method = c("het", "full", "fast")) {
  4 |     method_list <- c("het", "full", "fast")
  5 |     if (is.null(breaks)) {
  6 |         if (method %in% method_list) {
  7 |             if (method == "fast"){
  8 |                 breaks <- breaks_fast(ratio_win = ratio, baf_win = baf,
  9 |                     gamma = gamma, kmin = kmin, chr = chromosome)
 10 |             } else {
 11 |                 breaks <- breaks_het(data = data_het, gamma = gamma,
 12 |                     kmin = kmin, assembly = assembly)
 13 |             }
 14 |             if (method == "full") {
 15 |                 breaks <- breaks_full(data = data, gamma = gamma.pcf,
 16 |                     kmin = kmin.pcf, assembly, breaks.het = breaks)
 17 |             }
 18 |         } else {
 19 |             stop("Available methods are \'full\', \'het\' and \'fast\'.")
 20 |         }
 21 |     }
 22 |     breaks
 23 | }
 24 | 
 25 | 
 26 | breaks_het <- function(data, gamma, kmin, assembly){
 27 |     try(
 28 |         find.breaks(data, gamma = gamma, assembly = assembly,
 29 |             kmin = kmin, baf.thres = c(0, 0.5)),
 30 |     silent = FALSE)
 31 | }
 32 | 
 33 | breaks_full <- function(data, gamma, kmin,
 34 |     assembly, breaks.het = NULL) {
 35 |     merge.breaks <- function (breaks, breaks.het) {
 36 |         merged.breaks <- unique(sort(c(breaks$start.pos,
 37 |             breaks$end.pos, breaks.het$start.pos, breaks.het$end.pos)))
 38 |         merged.breaks <- merged.breaks[diff(merged.breaks) > 1]
 39 |         merged.start <- merged.breaks
 40 |         merged.start[-1] <- merged.start[-1] + 1
 41 |         breaks <- data.frame(chrom = unique(breaks$chrom),
 42 |             start.pos = merged.start[- (length(merged.start))],
 43 |             end.pos = merged.breaks[-1])
 44 |     }
 45 |     breaks <- find.breaks(data, gamma = gamma, kmin = kmin,
 46 |         assembly = assembly, seg.algo = "pcf")
 47 |     if (!is.null(breaks.het)) {
 48 |         chr.p <- merge.breaks(breaks[breaks$arm == "p", ],
 49 |             breaks.het[breaks.het$arm == "p", ])
 50 |         chr.q <- merge.breaks(breaks[breaks$arm == "q", ],
 51 |             breaks.het[breaks.het$arm == "q", ])
 52 |         breaks <- rbind(chr.p, chr.q)
 53 |     }
 54 |     breaks
 55 | }
 56 | 
 57 | breaks_fast <- function(ratio_win, baf_win, chr, gamma, kmin) {
 58 |     BAF <- data.frame(chrom = chr,
 59 |         pos = c(baf_win[[1]]$start, tail(baf_win[[1]]$end, n = 1)),
 60 |         s1 = c(baf_win[[1]]$mean, tail(baf_win[[1]]$mean, n = 1)))
 61 |     logR <- data.frame(chrom = chr,
 62 |         pos = c(ratio_win[[1]]$start, tail(ratio_win[[1]]$end, n = 1)),
 63 |         s1 = c(log2(ratio_win[[1]]$mean),
 64 |             log2(tail(ratio_win[[1]]$mean, n = 1))))
 65 |     cat(nrow(BAF), nrow(logR), "\n")
 66 |     not.cover <- is.na(logR$s1) & is.na(BAF$s1)
 67 |     BAF  <- BAF[!not.cover, ]
 68 |     logR <- logR[!not.cover, ]
 69 |     logR.wins <- copynumber::winsorize(logR, verbose = FALSE)
 70 |     allele.seg <- copynumber::aspcf(logR = logR.wins, BAF = BAF,
 71 |         baf.thres = c(0, 0.5), verbose = FALSE,
 72 |         gamma = gamma, kmin = kmin)
 73 |     if (length(grep("^chr", chr)) > 0) {
 74 |         allele.seg$chrom <- paste0("chr", allele.seg$chrom)
 75 |     }
 76 |     breaks   <- allele.seg[, c("chrom", "start.pos", "end.pos")]
 77 |     not.uniq <- which(breaks$end.pos == c(breaks$start.pos[-1], 0))
 78 |     breaks$end.pos[not.uniq] <- breaks$end.pos[not.uniq] - 1
 79 |     breaks
 80 | }
 81 | 
 82 | 
 83 | find.breaks <- function(seqz.baf, gamma = 80, kmin = 10,
 84 |     baf.thres = c(0, 0.5), verbose = FALSE, seg.algo = "aspcf", ...) {
 85 |     chromosome <- gsub(x = seqz.baf$chromosome,
 86 |         pattern = "chr", replacement = "")
 87 |     logR = data.frame(chrom = chromosome,
 88 |         pos = seqz.baf$position,
 89 |         s1 = log2(seqz.baf$adjusted.ratio))
 90 |     logR.wins <- copynumber::winsorize(logR, verbose = verbose)
 91 |     if (seg.algo == "aspcf"){
 92 |         BAF = data.frame(chrom = chromosome,
 93 |             pos = seqz.baf$position,
 94 |             s1 = seqz.baf$Bf)
 95 |         allele.seg <- copynumber::aspcf(logR = logR.wins,
 96 |             BAF = BAF, baf.thres = baf.thres, verbose = verbose,
 97 |             gamma = gamma, kmin = kmin, ...)
 98 |     } else if (seg.algo == "pcf") {
 99 |         allele.seg <- copynumber::pcf(data = logR.wins, verbose = verbose,
100 |             gamma = gamma, kmin = kmin, ...)
101 |     } else {
102 |       stop("Segmentation algorithm must be either \'aspcf\' or \'pcf\'.")
103 |     }
104 |     if (length(grep("chr", seqz.baf$chromosome)) > 0) {
105 |         allele.seg$chrom <- paste("chr", allele.seg$chrom, sep = "")
106 |     }
107 |     breaks   <- allele.seg[, c("chrom", "start.pos", "end.pos", "arm")]
108 |     not.uniq <- which(breaks$end.pos == c(breaks$start.pos[-1],0))
109 |     breaks$end.pos[not.uniq] <- breaks$end.pos[not.uniq] - 1
110 |     breaks
111 | }
112 | 
113 | segment.breaks <- function(seqz.tab, breaks, min.reads.baf = 1,
114 |     weighted.mean = TRUE) {
115 |     if (weighted.mean){
116 |         w.r <- sqrt(seqz.tab$depth.normal)
117 |         rw <- seqz.tab$adjusted.ratio * w.r
118 |         w.b <- sqrt(seqz.tab$good.reads)
119 |         bw <- seqz.tab$Bf * w.b
120 |         seqz.tab <- cbind(seqz.tab[, c("chromosome", "position",
121 |             "zygosity.normal", "good.reads", "Af", "Bf")],
122 |             rw = rw, w.r = w.r, bw = bw, w.b = w.b)
123 |     }
124 |     chr.order <- unique(seqz.tab$chromosome)
125 |     seqz.tab <- split(seqz.tab, f = seqz.tab$chromosome)
126 |     segments <- list()
127 |     for (i in 1:length(seqz.tab)) {
128 |         seqz.b.i <- seqz.tab[[i]][seqz.tab[[i]]$zygosity.normal == "het", ]
129 |         seqz.b.i <- seqz.b.i[seqz.b.i$good.reads >= min.reads.baf, ]
130 |         breaks.i <- breaks[breaks$chrom == names(seqz.tab)[i], ]
131 |         nb <- nrow(breaks.i)
132 |         breaks.vect <- do.call(cbind, split.data.frame(breaks.i[,
133 |             c("start.pos", "end.pos")], f = 1:nb))
134 |         unique.breaks <- function(b, offset = 1) {
135 |             while(any(diff(b) == 0)) {
136 |                 b[which(diff(b) == 0) + 1] <- b[diff(b) == 0] + offset
137 |             }
138 |             b
139 |         }
140 |         breaks.vect <- unique.breaks(b = as.numeric(breaks.vect), offset = 1)
141 |         fact.r.i <- cut(seqz.tab[[i]]$position, breaks.vect)
142 |         fact.b.i <- cut(seqz.b.i$position, breaks.vect)
143 |         seg.i.s.r <- sapply(X = split(seqz.tab[[i]]$chromosome,
144 |             f = fact.r.i), FUN = length)
145 |         seg.i.s.b <- sapply(X = split(seqz.b.i$chromosome,
146 |             f = fact.b.i), FUN = length)
147 | 
148 |         if (weighted.mean) {
149 |             seg.i.rw    <- sapply(X = split(seqz.tab[[i]]$rw, f = fact.r.i),
150 |                 FUN = function(a) sum(a, na.rm = TRUE))
151 |             seg.i.w.r   <- sapply(X = split(seqz.tab[[i]]$w.r, f = fact.r.i),
152 |                 FUN = function(a) sum(a, na.rm = TRUE))
153 |             seg.i.r.sd  <- sapply(X = split(seqz.tab[[i]]$rw /
154 |                 seqz.tab[[i]]$w.r, f = fact.r.i),
155 |                 FUN = function(a) sd(a, na.rm = TRUE))
156 |             seg.i.b.sd  <- sapply(X = split(seqz.b.i$bw /
157 |                 seqz.b.i$w.b, f = fact.b.i),
158 |                 FUN = function(a) sd(a, na.rm = TRUE))
159 |             A.split <- split(seqz.b.i$Af, f = fact.b.i)
160 |             B.split <- split(seqz.b.i$Bf, f = fact.b.i)
161 |             d.split <- split(seqz.b.i$good.reads, f = fact.b.i)
162 |             window.quantiles <- mapply(b_allele_freq, Af = A.split,
163 |                 Bf = B.split, good.reads = d.split, conf = 0.95)
164 |             segments.i <- data.frame(chromosome  = names(seqz.tab)[i],
165 |                 start.pos = as.numeric(breaks.vect[-length(breaks.vect)]),
166 |                 end.pos = as.numeric(breaks.vect[-1]),
167 |                 Bf = window.quantiles[2, ], N.BAF = seg.i.s.b,
168 |                 sd.BAF = seg.i.b.sd, depth.ratio = seg.i.rw / seg.i.w.r,
169 |                 N.ratio = seg.i.s.r, sd.ratio = seg.i.r.sd,
170 |                 stringsAsFactors = FALSE)
171 |         } else {
172 |             seg.i.r    <- sapply(X = split(seqz.tab[[i]]$adjusted.ratio,
173 |                 f = fact.r.i), FUN = function(a) mean(a, na.rm = TRUE))
174 |             A.split <- split(seqz.b.i$Af, f = fact.b.i)
175 |             B.split <- split(seqz.b.i$Bf, f = fact.b.i)
176 |             d.split <- split(seqz.b.i$good.reads, f = fact.b.i)
177 |             window.quantiles <- mapply(b_allele_freq, Af = A.split,
178 |                 Bf = B.split, good.reads = d.split, conf = 0.95)
179 |             seg.i.r.sd <- sapply(X = split(seqz.tab[[i]]$adjusted.ratio,
180 |                 f = fact.r.i), FUN = function(a) sd(a, na.rm = TRUE))
181 |             seg.i.b.sd <- sapply(X = split(seqz.b.i$Bf, f = fact.b.i),
182 |                 FUN = function(a) sd(a, na.rm = TRUE))
183 |             segments.i <- data.frame(chromosome  = names(seqz.tab)[i],
184 |                 start.pos = as.numeric(breaks.vect[-length(breaks.vect)]),
185 |                 end.pos = as.numeric(breaks.vect[-1]),
186 |                 Bf = window.quantiles[2, ], N.BAF = seg.i.s.b,
187 |                 sd.BAF = seg.i.b.sd, depth.ratio = seg.i.r,
188 |                 N.ratio = seg.i.s.r, sd.ratio = seg.i.r.sd,
189 |                 stringsAsFactors = FALSE)
190 |         }
191 |         segments[[i]] <- segments.i[seq(from = 1,
192 |             to = nrow(segments.i), by = 2),]
193 |     }
194 |     segments <- do.call(rbind, segments[as.factor(chr.order)])
195 |     row.names(segments) <- 1:nrow(segments)
196 |     len.seg <- (segments$end.pos - segments$start.pos) / 1e6
197 |     segments[(segments$N.ratio / len.seg) >= 2, ]
198 | }
199 | 


--------------------------------------------------------------------------------
/R/solutions.R:
--------------------------------------------------------------------------------
 1 | get.ci <- function(cp.table, level = 0.95) {
 2 |     znormsort <- sort(cp.table$lpp, decreasing = TRUE)
 3 |     znormcumLik <- cumsum(znormsort)
 4 |     n <- sapply(level, function(x) sum(znormcumLik < x) + 1)
 5 |     LikThresh <- znormsort[n]
 6 |     values.x <- data.frame(x = cp.table$ploidy,
 7 |         y = apply(cp.table$lpp, 1, max))
 8 |     values.y <- data.frame(x = apply(cp.table$lpp, 2, max),
 9 |     y = cp.table$cellularity)
10 |     up.x  <- max(values.x$x[values.x$y >= LikThresh])
11 |     low.x <- min(values.x$x[values.x$y >= LikThresh])
12 |     max.x <- values.x$x[which.max(values.x$y)]
13 |     up.y  <- max(values.y$y[values.y$x >= LikThresh])
14 |     low.y <- min(values.y$y[values.y$x >= LikThresh])
15 |     max.y <- values.y$y[which.max(values.y$x)]
16 |     values.x$y <- values.x$y / sum(values.x$y)
17 |     values.y$x <- values.y$x / sum(values.y$x)
18 |     results <- list()
19 |     results$values.ploidy <- values.x
20 |     results$confint.ploidy <- c(low.x, up.x)
21 |     results$max.ploidy <- max.x
22 |     results$values.cellularity <- values.y
23 |     results$confint.cellularity <- c(low.y, up.y)
24 |     results$max.cellularity <- max.y
25 |     results
26 | }
27 | 
28 | alternative.cp.solutions <- function(cp.table) {
29 |     ci <- get.ci(cp.table)
30 |     p.alt <- which(diff(sign(diff(ci$values.ploidy$y))) == -2) + 1
31 |     get.alt <- function(idx.p, cp.table) {
32 |         idx.c <- which.max(cp.table$lpp[idx.p, ])
33 |         c(cellularity = cp.table$cellularity[idx.c],
34 |             ploidy = cp.table$ploidy[idx.p],
35 |             SLPP = cp.table$lpp[idx.p, idx.c])
36 |         }
37 |     res <- lapply(p.alt, FUN = function (x) get.alt(x, cp.table))
38 |     res <- as.data.frame(do.call(rbind, res))
39 |     if (nrow(res) > 0 ){
40 |         res[order(res$SLPP, decreasing = TRUE), ]
41 |     } else {
42 |         data.frame(cellularity = ci$max.cellularity,
43 |             ploidy = ci$max.ploidy,
44 |             SLPP =  cp.table$lpp[which(cp.table$ploidy == ci$max.ploidy),
45 |                 which(cp.table$cellularity == ci$max.cellularity)])
46 |    }
47 | }
48 | 


--------------------------------------------------------------------------------
/R/windows.R:
--------------------------------------------------------------------------------
 1 | windowValues <- function(x, positions, chromosomes, window = 1e6,
 2 |                          overlap = 0, weight = rep.int(x = 1,
 3 |                          times = length(x)), start.coord = 1) {
 4 |     weight   <- sqrt(weight)
 5 |     overlap  <- as.integer(overlap)
 6 |     window.offset <- as.integer(window -
 7 |         round(window * (overlap / (overlap + 1))))
 8 |     chr.ordered <- unique(chromosomes)
 9 |     data.splitByChr    <- split(data.frame(pos = positions, x = x,
10 |                                            weight = weight),
11 |                                 f = factor(chromosomes, levels = chr.ordered))
12 |     lapply(data.splitByChr, function(x) {
13 |         range.pos <- range(x$pos, na.rm = TRUE)
14 |         if (!is.null(start.coord)) {
15 |             range.pos[1] <- as.integer(start.coord)
16 |         }
17 |         beam.coords <- seq.int(range.pos[1], range.pos[2], by = window.offset)
18 |         if (max(beam.coords) != range.pos[2]) {
19 |             beam.coords <- c(beam.coords, range.pos[2])
20 |         }
21 |         nWindows <- length(beam.coords) - overlap - 1
22 |         pos.cut <- cut(x$pos, breaks = beam.coords)
23 |         x.split <- split(x$x, f = pos.cut)
24 |         weight.split <- split(x$weight, f = pos.cut)
25 |         window.starts <- beam.coords[1:nWindows]
26 |         window.ends <- beam.coords[(1:nWindows) + 1 + overlap]
27 |         idx.list <- lapply(1:nWindows, function(ii) ii + (0:overlap))
28 |         x.window <- lapply(idx.list, function(idx) {
29 |             unlist(x.split[idx], use.names = FALSE)
30 |         })
31 |         weight.window <- lapply(idx.list, function(idx) {
32 |             unlist(weight.split[idx], use.names = FALSE)
33 |         })
34 |         window.means <- mapply(weighted.mean, x = x.window, w = weight.window)
35 |         window.quantiles <- sapply(x.window, quantile,
36 |             probs = c(0.25, 0.75), na.rm = TRUE, names = FALSE)
37 |         window.counts <- sapply(x.window, length)
38 |         data.frame(start = window.starts, end = window.ends,
39 |             mean = window.means, q0 = window.quantiles[1, ],
40 |             q1 = window.quantiles[2, ], N = window.counts)
41 |     })
42 | }
43 | 
44 | windowBf <- function(Af, Bf, good.reads, positions, chromosomes,
45 |     window = 1e6, overlap = 0, start.coord = 1, conf = 0.95) {
46 |     overlap  <- as.integer(overlap)
47 |     window.offset <- as.integer(window - round(window * (overlap /
48 |         (overlap + 1))))
49 |     chr.ordered <- unique(chromosomes)
50 |     data.splitByChr <- split(data.frame(pos = positions, Bf, Af, good.reads),
51 |         f = factor(chromosomes, levels = chr.ordered))
52 |     lapply(data.splitByChr, function(data.oneChr) {
53 |         range.pos <- range(data.oneChr$pos, na.rm = TRUE)
54 |         if (!is.null(start.coord)) {
55 |             range.pos[1] <- as.integer(start.coord)
56 |         }
57 |         beam.coords <- seq.int(range.pos[1], range.pos[2], by = window.offset)
58 |         if (max(beam.coords) != range.pos[2] ) {
59 |             beam.coords <- c(beam.coords, range.pos[2])
60 |         }
61 |         nWindows <- length(beam.coords) - overlap - 1
62 |         pos.cut <- cut(data.oneChr$pos, breaks = beam.coords)
63 |         A.split <- split(data.oneChr$Af, f = pos.cut)
64 |         B.split <- split(data.oneChr$Bf, f = pos.cut)
65 |         d.split <- split(data.oneChr$good.reads, f = pos.cut)
66 |         window.starts <- beam.coords[1:nWindows]
67 |         window.ends <- beam.coords[(1:nWindows) + 1 + overlap]
68 |         idx.list <- lapply(1:nWindows, function(ii) ii + (0:overlap))
69 |         A.window <- lapply(idx.list, function(idx) {
70 |             unlist(A.split[idx], use.names = FALSE)
71 |         })
72 |         B.window <- lapply(idx.list, function(idx) {
73 |             unlist(B.split[idx], use.names = FALSE)
74 |         })
75 |         d.window <- lapply(idx.list, function(idx) {
76 |             unlist(d.split[idx], use.names = FALSE)
77 |         })
78 |         window.quantiles <- mapply(b_allele_freq, Af = A.window, Bf = B.window,
79 |             good.reads = d.window, conf = conf)
80 |         window.counts <- sapply(B.window, length)
81 |         data.frame(start = window.starts, end = window.ends,
82 |             mean = window.quantiles[2, ], q0 = window.quantiles[1, ],
83 |             q1 = window.quantiles[3, ], N = window.counts)
84 |     })
85 | }
86 | 


--------------------------------------------------------------------------------
/build/vignette.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/sequenza/6b83b882f114d4927311388d16513e0d472d4077/build/vignette.rds


--------------------------------------------------------------------------------
/data/CP.example.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/sequenza/6b83b882f114d4927311388d16513e0d472d4077/data/CP.example.RData


--------------------------------------------------------------------------------
/data/example.seqz.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/sequenza/6b83b882f114d4927311388d16513e0d472d4077/data/example.seqz.rda


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("To cite package 'sequenza' in publications use:")
 2 | 
 3 | bibentry(bibtype = 'Article',
 4 |         title = "Sequenza: allele-specific copy number and mutation profiles from tumor sequencing data",
 5 |         author = personList(as.person("Francesco Favero"),
 6 |                             as.person("Tejal Joshi"),
 7 |                             as.person("Andrea M Marquard"),
 8 |                             as.person("Nicolai J Birkbak"),
 9 |                             as.person("Marcin Krzystanek"),
10 |                             as.person("Qiyuan Li"),
11 |                             as.person("Zoltan Szallasi"),
12 |                             as.person("Aron C Eklund")),
13 |         journal = "Annals of Oncology",
14 |         year = 2015,
15 |         volume = 26,
16 |         issue = 1,
17 |         pages = "64-70",
18 |         doi = "10.1093/annonc/mdu479",
19 |         url = "http://annonc.oxfordjournals.org/content/26/1/64"
20 | )
21 | 


--------------------------------------------------------------------------------
/inst/doc/sequenza.R:
--------------------------------------------------------------------------------
 1 | ## ----load----------------------------------------------------------------
 2 | library(sequenza)
 3 | 
 4 | ## ----data----------------------------------------------------------------
 5 | data.file <-  system.file("extdata", "example.seqz.txt.gz", package = "sequenza")
 6 | 
 7 | ## ----cp_data, echo=FALSE, message=FALSE----------------------------------
 8 | library(sequenza)
 9 | data(CP.example)
10 | CP <- CP.example
11 | 
12 | ## ----extract, message=FALSE, warning=FALSE, results="hide"---------------
13 | test <- sequenza.extract(data.file, verbose = FALSE)
14 | 
15 | ## ----fit, eval=FALSE-----------------------------------------------------
16 | #  CP <- sequenza.fit(test)
17 | 
18 | ## ----results-------------------------------------------------------------
19 | sequenza.results(sequenza.extract = test,
20 |     cp.table = CP, sample.id = "Test",
21 |     out.dir="TEST")
22 | 
23 | ## ----res_dir, echo=FALSE-------------------------------------------------
24 | 
25 | # create results list 
26 | # 
27 | res_list <- c("alternative_fit.pdf", "alternative_solutions.txt",
28 |     "chromosome_depths.pdf", "chromosome_view.pdf",
29 |     "CN_bars.pdf", "confints_CP.txt",
30 |     "CP_contours.pdf", "gc_plots.pdf",
31 |     "genome_view.pdf", "model_fit.pdf",
32 |     "mutations.txt", "segments.txt",
33 |     "sequenza_cp_table.RData", "sequenza_extract.RData",
34 |     "sequenza_log.txt")
35 | res_list <- paste("Test", res_list, sep = "_")
36 | 
37 | description_list <- c(
38 |     "Alternative solution fir to the segments. One solution per slide",
39 |     "List of all ploidy/cellularity alternative solution",
40 |     "Visualization of sequencing coverage in the normal and in the tumor samples, before and after normalization",
41 |     "Visualization per chromosome of depth.ratio, B-allele frequency and mutations, using the selected or estimated solution. One chromosome per slide",
42 |     "Bar plot representing the percentage of genome in the detected copy number states",
43 |     "Table of the confidence inerval of the best solution from the model",
44 |     "Visualization of the likelihood density for each pair of cellularity/ploidy solution. The local maximum-likelihood points and confidence interval of the best estimate are also visualized",
45 |     "Visualization of the GC correction in the normal and in the tumor sample",
46 |     "Genome-whide visualization of the allele-specific and absolute copy number results, and raw profile of the depth ratio and allele frequency",
47 |     "model_fit.pdf",
48 |     "Table with mutation and estimated number of mutated alleles (Mt)",
49 |     "Table listing the detected segments, with estimated copy number state at each sement",
50 |     "RData object dump of the maxima a posteriori computation",
51 |     "RData object dump of all the sample information",
52 |     "Log with version and time information")
53 | knitr::kable(data.frame(Files = res_list, Description = description_list))
54 | #dir("TEST", pattern = "Test")
55 | 
56 | 
57 | ## ----read_segs, echo=FALSE-----------------------------------------------
58 | seg.tab <- read.table("TEST/Test_segments.txt",
59 |                       header = TRUE, sep ="\t")
60 | 
61 | alt_res <- read.table("TEST/Test_alternative_solutions.txt",
62 |                       header = TRUE, sep ="\t")
63 | seg.tab <- seg.tab[seg.tab$CNt <= 4, ]
64 | is.num <- sapply(seg.tab, is.numeric)
65 | seg.tab[is.num] <- lapply(seg.tab[is.num], round, 3)
66 | 
67 | ## ----head_segs, echo=FALSE-----------------------------------------------
68 | knitr::kable(head(seg.tab))
69 | 
70 | 
71 | ## ----g_view, echo=FALSE, fig.height=5, fig.width=10, fig.align='center'----
72 | sequenza:::genome.view(seg.tab)
73 | 
74 | ## ----g_view_tot, echo=FALSE, fig.height=5, fig.width=10, fig.align='center'----
75 | sequenza:::genome.view(seg.tab, info.type = "CNt")
76 | 
77 | ## ----g_view_raw, echo=FALSE, fig.height=5, fig.width=10, fig.align='center'----
78 | sequenza:::plotRawGenome(test, cellularity = alt_res$cellularity[1],
79 |     ploidy = alt_res$ploidy[1])
80 | 
81 | ## ----CPplot, echo=TRUE, fig.height=5, fig.width=5, fig.align='center'----
82 | cp.plot(CP)
83 | cp.plot.contours(CP, add = TRUE,
84 |    likThresh = c(0.999, 0.95),
85 |    col = c("lightsalmon", "red"), pch = 20)
86 | 
87 | ## ----c_view, echo=TRUE, fig.height=6, fig.width=8, fig.align='center'----
88 | chromosome.view(mut.tab = test$mutations[[1]], baf.windows = test$BAF[[1]],
89 |                 ratio.windows = test$ratio[[1]],  min.N.ratio = 1,
90 |                 segments = test$segments[[1]],
91 |                 main = test$chromosomes[1],
92 |                 cellularity = 0.89, ploidy = 1.9,
93 |                 avg.depth.ratio = 1)
94 | 
95 | 


--------------------------------------------------------------------------------
/inst/doc/sequenza.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Sequenza User Guide"
  3 | date: "`r Sys.Date()`"
  4 | author: "[Francesco Favero](mailto:favero.francesco@gmail.com)"
  5 | output:
  6 |   rmdformats::readthedown:
  7 |     self_contained: true
  8 | vignette: >
  9 |   %\VignetteIndexEntry{Sequenza User Guide}
 10 |   %\VignetteEngine{knitr::rmarkdown}
 11 |   %\VignetteEncoding{UTF-8}
 12 | ---
 13 | 
 14 | # About
 15 | 
 16 | > Sequenza: Copy Number Estimation from Tumor Genome Sequencing Data
 17 | 
 18 | 
 19 | ![](https://bytebucket.org/sequenzatools/icons/raw/324bd43ac4d10546b64b04c38d8c513e8420346d/svg/sequenza_tools/sequenzaalpha_150.svg)
 20 | 
 21 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/sequenza)](https://cran.r-project.org/package=sequenza)
 22 | [![CRAN_Downloads_Badge](http://cranlogs.r-pkg.org/badges/sequenza)](https://cran.r-project.org/package=sequenza)
 23 | [![CRAN_licence](https://img.shields.io/cran/l/sequenza.svg)](https://www.gnu.org/licenses/gpl-3.0.txt)
 24 | 
 25 | 
 26 | 
 27 | Sequenza is a tool to analyze genomic sequencing data from paired normal-tumor samples, including cellularity and ploidy estimation; mutation and copy number (allele-specific and total copy number) detection, quantification and visualization.
 28 | 
 29 | # Introduction
 30 | 
 31 | Deep sequence of tumor DNA along with corresponding normal DNA can provide a
 32 | valuable perspective on the mutations and aberrations that characterize the
 33 | tumor. However, analysis of this data can be impeded by tumor cellularity and
 34 | heterogeneity and by unwieldy data. Here we describe *Sequenza*, an R package
 35 | that enables the efficient estimation of tumor cellularity and ploidy, and
 36 | generation of copy number, loss-of-heterozygosity, and mutation frequency
 37 | profiles.
 38 | 
 39 | This document details a typical analysis of matched tumor-normal exome sequence
 40 | data using *sequenza*.
 41 | 
 42 | # Getting started
 43 | 
 44 | ## Minimum requirements
 45 |  - Software: R, Python, SAMtools, tabix
 46 |  - Operating system: Linux, OS X, Windows
 47 |  - Memory: Minimum 4 GB of RAM. Recommended >8 GB.
 48 |  - Disk space: 1.5 GB for sample (depending on sequencing depth)
 49 |  - R version: 3.2.0
 50 |  - Python version: 2.7, 3.4, 3.5, 3.6 (or PyPy)
 51 | 
 52 | ## Installation
 53 | 
 54 | The R package can be installed by:
 55 | 
 56 | ```r
 57 | setRepositories(graphics = FALSE, ind = 1:6)
 58 | install.packages("sequenza")
 59 | ```
 60 | 
 61 | To install the Python companion package *sequenza-utils* to preprocess BAM
 62 | files, refer to the [*sequenza-utils*](https://pypi.org/project/sequenza-utils)
 63 | project page, or simply use the python package manager from the command prompt:
 64 | 
 65 | ```bash
 66 | pip install sequenza-utils
 67 | ```
 68 | 
 69 | # Running sequenza
 70 | 
 71 | ## Preprocessing of input files
 72 | 
 73 | In order to obtain precise mutational and aberration patterns in a tumor sample,
 74 | Sequenza requires a matched normal sample from the same patient. Typically, the
 75 | following files are needed to get started with Sequenza:
 76 | 
 77 |  - A BAM file (or a derived pileup file) from the tumor specimen.
 78 |  - A BAM file (or a derived pileup file) from the normal specimen.
 79 |  - A FASTA reference genomic sequence file
 80 | 
 81 | 
 82 | The normal and tumor BAM files are processed together to generate a *seqz* file, which
 83 | is the required input for the analysis.
 84 | It is possible to generate a *seqz* starting from other processed data, such as
 85 | pileup, or VCF files. The available options are described in the
 86 | [*sequenza-utils*](http://sequenza-utils.readthedocs.io/) manual pages.
 87 | 
 88 | The *sequenza-utils* command provides various tools; here we highlight only the
 89 | basic usage:
 90 | 
 91 |  - Process a FASTA file to produce a GC [Wiggle](https://genome.ucsc.edu/goldenpath/help/wiggle.html)
 92 |  track file:
 93 |  
 94 | ```bash
 95 | sequenza−utils gc_wiggle −w 50 --fasta hg19.fa -o hg19.gc50Base.wig.gz
 96 | ```
 97 | 
 98 |  - Process BAM and Wiggle files to produce a *seqz* file:
 99 |  
100 | ```bash
101 | sequenza−utils bam2seqz -n normal.bam -t tumor.bam --fasta hg19.fa \
102 |     -gc hg19.gc50Base.wig.gz -o out.seqz.gz
103 | ```
104 | 
105 |  - Post-process by binning the original *seqz* file:
106 |  
107 | ```bash
108 | sequenza−utils seqz_binning --seqz out.seqz.gz -w 50 -o out small.seqz.gz
109 | ```
110 |  
111 | ## Sequenza analysis (in R)
112 | 
113 | ```{r load}
114 | library(sequenza)
115 | ```
116 | 
117 | In the package is provided a small *seqz* file
118 | 
119 | ```{r data}
120 | data.file <-  system.file("extdata", "example.seqz.txt.gz", package = "sequenza")
121 | ```
122 | 
123 | ```{r cp_data, echo=FALSE, message=FALSE}
124 | library(sequenza)
125 | data(CP.example)
126 | CP <- CP.example
127 | ```
128 | 
129 | 
130 | 
131 | The main interface consists of 3 functions:
132 | 
133 |  - sequenza.extract: process seqz data, normalization and segmentation
134 | ```{r extract, message=FALSE, warning=FALSE, results="hide"}
135 | test <- sequenza.extract(data.file, verbose = FALSE)
136 | ```
137 | 
138 |  - sequenza.fit: run grid-search approach to estimate cellularity and ploidy
139 | ```{r fit, eval=FALSE}
140 | CP <- sequenza.fit(test)
141 | ```
142 | 
143 |  - sequenza.results: write files and plots using suggested or selected solution
144 | ```{r results}
145 | sequenza.results(sequenza.extract = test,
146 |     cp.table = CP, sample.id = "Test",
147 |     out.dir="TEST")
148 | ```
149 | 
150 | # Plots and Results
151 | 
152 | The function _sequenza.results_ outputs various files in the specified path.
153 | The resulting files are either output in pdf of in plain text. The files include
154 | quality control assessments (eg evaluate GC-correction), visualization of the
155 | data and files such as segmentation with copy number calling and mutation lists.
156 | 
157 | ## Result files
158 | 
159 | Each generated file is briefly explained in the following table
160 | 
161 | ```{r res_dir, echo=FALSE}
162 | 
163 | # create results list 
164 | # 
165 | res_list <- c("alternative_fit.pdf", "alternative_solutions.txt",
166 |     "chromosome_depths.pdf", "chromosome_view.pdf",
167 |     "CN_bars.pdf", "confints_CP.txt",
168 |     "CP_contours.pdf", "gc_plots.pdf",
169 |     "genome_view.pdf", "model_fit.pdf",
170 |     "mutations.txt", "segments.txt",
171 |     "sequenza_cp_table.RData", "sequenza_extract.RData",
172 |     "sequenza_log.txt")
173 | res_list <- paste("Test", res_list, sep = "_")
174 | 
175 | description_list <- c(
176 |     "Alternative solution fir to the segments. One solution per slide",
177 |     "List of all ploidy/cellularity alternative solution",
178 |     "Visualization of sequencing coverage in the normal and in the tumor samples, before and after normalization",
179 |     "Visualization per chromosome of depth.ratio, B-allele frequency and mutations, using the selected or estimated solution. One chromosome per slide",
180 |     "Bar plot representing the percentage of genome in the detected copy number states",
181 |     "Table of the confidence inerval of the best solution from the model",
182 |     "Visualization of the likelihood density for each pair of cellularity/ploidy solution. The local maximum-likelihood points and confidence interval of the best estimate are also visualized",
183 |     "Visualization of the GC correction in the normal and in the tumor sample",
184 |     "Genome-whide visualization of the allele-specific and absolute copy number results, and raw profile of the depth ratio and allele frequency",
185 |     "model_fit.pdf",
186 |     "Table with mutation and estimated number of mutated alleles (Mt)",
187 |     "Table listing the detected segments, with estimated copy number state at each sement",
188 |     "RData object dump of the maxima a posteriori computation",
189 |     "RData object dump of all the sample information",
190 |     "Log with version and time information")
191 | knitr::kable(data.frame(Files = res_list, Description = description_list))
192 | #dir("TEST", pattern = "Test")
193 | 
194 | ```
195 | 
196 | ```{r read_segs, echo=FALSE}
197 | seg.tab <- read.table("TEST/Test_segments.txt",
198 |                       header = TRUE, sep ="\t")
199 | 
200 | alt_res <- read.table("TEST/Test_alternative_solutions.txt",
201 |                       header = TRUE, sep ="\t")
202 | seg.tab <- seg.tab[seg.tab$CNt <= 4, ]
203 | is.num <- sapply(seg.tab, is.numeric)
204 | seg.tab[is.num] <- lapply(seg.tab[is.num], round, 3)
205 | ```
206 | 
207 | ## Segments results
208 | 
209 | The segmentation file with the allele-specific copy number calling is one of
210 | the main result of the analysis. A sample of the file is shown in the table below:
211 | 
212 | ```{r head_segs, echo=FALSE}
213 | knitr::kable(head(seg.tab))
214 | 
215 | ```
216 | 
217 | 
218 | The columns represents:
219 | 
220 |   1. **chromosome**: Chromosome
221 |   2. **start.pos**: Start position of the segment
222 |   3. **end.pos**: End position of the segment
223 |   4. **Bf**: B-allele frequency value
224 |   5. **N.BAF**: Number of observation to compute _Bf_ in the segment
225 |   6. **sd.BAF**: Standard deviation of _Bf_
226 |   7. **depth.ratio**: Adjusted and normalized depth ratio tumor / normal
227 |   8. **N.ratio**: Number of observation to compute _depth.ratio_ in the segment
228 |   9. **sd.ratio**: Standard deviation of _depth.rati_
229 |   10. **CNt**: Estimated total copy number value
230 |   11. **A**: Estimated number of A-alleles
231 |   12. **B**: Estimated number of B-alleles (minor allele)
232 |   13. **LPP**: Log-posterior probability of the segment
233 | 
234 | ## Gene wide overview
235 | 
236 | ### Allele-specific copy number
237 | 
238 | ```{r g_view, echo=FALSE, fig.height=5, fig.width=10, fig.align='center'}
239 | sequenza:::genome.view(seg.tab)
240 | ```
241 | 
242 | ### Total copy number
243 | 
244 | ```{r g_view_tot, echo=FALSE, fig.height=5, fig.width=10, fig.align='center'}
245 | sequenza:::genome.view(seg.tab, info.type = "CNt")
246 | ```
247 | 
248 | ### Raw profile
249 | 
250 | ```{r g_view_raw, echo=FALSE, fig.height=5, fig.width=10, fig.align='center'}
251 | sequenza:::plotRawGenome(test, cellularity = alt_res$cellularity[1],
252 |     ploidy = alt_res$ploidy[1])
253 | ```
254 | 
255 | 
256 | ## Grid search maximum likelihood
257 | 
258 | 
259 | ```{r CPplot, echo=TRUE, fig.height=5, fig.width=5, fig.align='center'}
260 | cp.plot(CP)
261 | cp.plot.contours(CP, add = TRUE,
262 |    likThresh = c(0.999, 0.95),
263 |    col = c("lightsalmon", "red"), pch = 20)
264 | ```
265 | 
266 | ## Chromosome view
267 | 
268 | _Chromosome view_ is the visualization that displays chromosome by crhosome, nutations,
269 | B-allele frequency and depth-ratio.
270 | The visualization makes it easier to ispect the segmentation results, comparing to
271 | a binned profile of the raw data.
272 | It also visualize the copy number calling using the _cellularity_ and _ploidy_ solution,
273 | making useful to asses if the copy number calling is acurate.
274 | In addition it provides a visualization of the mutation frequency that can also help to
275 | corroborate the solution.
276 | 
277 | ```{r c_view, echo=TRUE, fig.height=6, fig.width=8, fig.align='center'}
278 | chromosome.view(mut.tab = test$mutations[[1]], baf.windows = test$BAF[[1]],
279 |                 ratio.windows = test$ratio[[1]],  min.N.ratio = 1,
280 |                 segments = test$segments[[1]],
281 |                 main = test$chromosomes[1],
282 |                 cellularity = 0.89, ploidy = 1.9,
283 |                 avg.depth.ratio = 1)
284 | ```
285 | 


--------------------------------------------------------------------------------
/inst/extdata/example.seqz.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/sequenza/6b83b882f114d4927311388d16513e0d472d4077/inst/extdata/example.seqz.txt.gz


--------------------------------------------------------------------------------
/inst/extdata/example.seqz.txt.gz.tbi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/sequenza/6b83b882f114d4927311388d16513e0d472d4077/inst/extdata/example.seqz.txt.gz.tbi


--------------------------------------------------------------------------------
/man/CP.data.Rd:
--------------------------------------------------------------------------------
 1 | \name{CP.example}
 2 | \docType{data}
 3 | \alias{CP.example}
 4 | 
 5 | \title{Example of cellularity and ploidy results}
 6 | 
 7 | \description{
 8 |     Examples of results from the maximum a posteriori estimation from a set
 9 |     of cellularity and ploidy values, as returned by the functions
10 |     \code{\link{baf.model.fit}} and \code{\link{mufreq.model.fit}}.
11 | }
12 | 
13 | \usage{
14 |     data(CP.example)
15 | }
16 | 
17 | \format{
18 |     A list containing three items:
19 |         \describe{
20 |         \item{ploidy}{numeric vector of tested ploidy values.}
21 |         \item{cellularity}{numeric vector of tested cellularity values.}
22 |         \item{lpp}{numeric matrix of log-posterior probability for each
23 |         \var{(ploidy, cellularity)} pair.}
24 |     }
25 | }
26 | 
27 | \examples{
28 |     data(CP.example)
29 |     str(CP.example)
30 | 
31 |     ## Visualization of the object
32 |     image(x = CP.example$ploidy,
33 |         y = CP.example$cellularity,
34 |         z = CP.example$lpp)
35 | 
36 |     ## A better plot
37 |     cp.plot(CP.example)
38 |     cp.plot.contours(CP.example, add = TRUE)
39 | }
40 | 
41 | \keyword{datasets}
42 | 


--------------------------------------------------------------------------------
/man/baf.model.fit.Rd:
--------------------------------------------------------------------------------
  1 | \name{baf.model.fit}
  2 | \alias{baf.model.fit}
  3 | \alias{mufreq.model.fit}
  4 | \title{Model fitting using maximum a posteriori inference}
  5 | 
  6 | \description{
  7 |   Computes the log-posterior probability distribution for the specified range
  8 |   of cellularity and ploidy parameters
  9 | }
 10 | 
 11 | \usage{
 12 |     mufreq.model.fit(cellularity = seq(0.3, 1, by = 0.01),
 13 |         ploidy = seq(1, 7, by = 0.1), mc.cores = getOption("mc.cores", 2L),
 14 |         ...)
 15 |     baf.model.fit(cellularity = seq(0.3, 1, by = 0.01),
 16 |         ploidy = seq(1, 7, by = 0.1), mc.cores = getOption("mc.cores", 2L),
 17 |         ...)
 18 | }
 19 | 
 20 | \arguments{
 21 |     \item{cellularity}{vector of cellularity values to be tested.}
 22 |     \item{ploidy}{vector of ploidy values to be tested.}
 23 |     \item{mc.cores}{number of cores to use, defined as in
 24 |         \code{\link{pblapply}}.}
 25 |     \item{...}{any argument accepted by \code{\link{mufreq.bayes}} or
 26 |         \code{\link{baf.bayes}}.}
 27 | }
 28 | 
 29 | \value{
 30 |     A list of three items:
 31 |     \item{ploidy}{tested values of the ploidy parameter}
 32 |     \item{cellularity}{tested values of the cellularity parameter}
 33 |     \item{lpp}{log-posterior probability of each pair of
 34 |         cellularity/ploidy parameters.}
 35 | }
 36 | 
 37 | \details{
 38 |     \code{baf.model.fit} uses the function \code{\link{baf.bayes}} to infer
 39 |     the log-posterior probability of the model fit using the possible
 40 |     combinations of cellularity and ploidy values provided in the arguments.
 41 |     Similarly \code{mufreq.model.fit} fits the mutation/depth ratio model using
 42 |     the function \code{\link{mufreq.bayes}}.
 43 |     \code{baf.model.fit} is the defalt method used to infer cellularity and
 44 |     ploidy on segmented chromosomes. The \code{mufreq.model.fit} function
 45 |     estimates cellularity and ploidy using mutation frequency and depth ratio,
 46 |     however, the mutation data is more affected to background noise
 47 |     compared to the segmented B-allele frequency, hence it may give less
 48 |     accurate results.
 49 | }
 50 | 
 51 | \seealso{
 52 |   \code{\link{cp.plot}} for visualization of the resulting object,
 53 |   and \code{\link{get.ci}} to extract confidence intervals.
 54 | }
 55 | 
 56 | \examples{
 57 |     \dontrun{
 58 | 
 59 |     data.file <-  system.file("extdata", "example.seqz.txt.gz",
 60 |         package = "sequenza")
 61 |     # read all the chromosomes:
 62 |     seqz.data <- read.seqz(data.file)
 63 |     # Gather genome wide GC-stats from raw file:
 64 |     gc.stats <- gc.sample.stats(data.file)
 65 |     gc.normal.vect <- mean_gc(gc.stats$normal)
 66 |     gc.tumor.vect <- mean_gc(gc.stats$tumor)
 67 |     # Read only one chromosome:
 68 |     seqz.data <- read.seqz(data.file, chr_name = "1")
 69 | 
 70 |     # Correct the coverage of the loaded chromosome:
 71 |     seqz.data$adjusted.ratio <- round((seqz.data$depth.tumor /
 72 |         gc.tumor.vect[as.character(seqz.data$GC.percent)]) /
 73 |         (seqz.data$depth.normal /
 74 |         gc.normal.vect[as.character(seqz.data$GC.percent)]), 3)
 75 |     # Select the heterozygous positions
 76 |     seqz.hom <- seqz.data$zygosity.normal == 'hom'
 77 |     seqz.het <- seqz.data[!seqz.hom, ]
 78 |     # Detect breakpoints
 79 |     breaks <- find.breaks(seqz.het, gamma = 80, kmin = 10,
 80 |         baf.thres = c(0, 0.5))
 81 |     # use heterozygous and homozygous position to measure segment values
 82 |     seg.s1 <- segment.breaks(seqz.data, breaks = breaks)
 83 | 
 84 |     # filter out small ambiguous segments, and conveniently weight
 85 |     # the segments by size:
 86 |     seg.filtered <- seg.s1[(seg.s1$end.pos - seg.s1$start.pos) > 3e6, ]
 87 |     weights.seg  <- (seg.filtered$end.pos - seg.filtered$start.pos) / 1e6
 88 |     # Set the average depth ratio to 1:
 89 |     avg.depth.ratio <- 1
 90 |     # run the BAF model fit
 91 |     CP <- baf.model.fit(Bf = seg.filtered$Bf,
 92 |         depth.ratio = seg.filtered$depth.ratio, weight.ratio = weights.seg,
 93 |         weight.Bf = weights.seg, sd.ratio = seg.filtered$sd.ratio,
 94 |         sd.Bf = seg.filtered$sd.BAF, avg.depth.ratio = avg.depth.ratio,
 95 |         cellularity = seq(0.1, 1, 0.01), ploidy = seq(0.5, 3, 0.05))
 96 | 
 97 |     confint <- get.ci(CP)
 98 |     ploidy <- confint$max.ploidy
 99 |     cellularity <- confint$max.cellularity
100 | 
101 |     }
102 | }
103 | 


--------------------------------------------------------------------------------
/man/bayes.Rd:
--------------------------------------------------------------------------------
  1 | \name{baf.bayes}
  2 | \alias{baf.bayes}
  3 | \alias{mufreq.bayes}
  4 | \title{Model allele-specific copy numbers with specified cellularity and ploidy parameters}
  5 | 
  6 | \description{
  7 |   Given a pair of cellularity and ploidy parameters, the function returns the most likely allele-specific copy numbers with the corresponding log-posterior probability of the fit, for given values of B-allele frequency and depth ratio.
  8 | }
  9 | 
 10 | \usage{
 11 |   baf.bayes(Bf, depth.ratio, cellularity, ploidy, avg.depth.ratio,
 12 |             sd.Bf = 0.1, sd.ratio = 0.5, weight.Bf = 1, weight.ratio = 1,
 13 |             CNt.min = 0, CNt.max = 7, CNn = 2,
 14 |             priors.table = data.frame(CN = CNt.min:CNt.max, value = 1),
 15 |             ratio.priority = FALSE)
 16 |   mufreq.bayes(mufreq, depth.ratio, cellularity, ploidy, avg.depth.ratio,
 17 |             weight.mufreq = 100, weight.ratio = 100, CNt.min = 1, CNt.max = 7, CNn = 2,
 18 |             priors.table = data.frame(CN = CNt.min:CNt.max, value = 1))
 19 | }
 20 | 
 21 | \arguments{
 22 |   \item{Bf}{vector of B-allele frequencies (values can range from 0 to 0.5).}
 23 |   \item{mufreq}{vector of mutation frequencies (values can range from 0 to 1).}
 24 |   \item{depth.ratio}{vector of depth ratios.}
 25 |   \item{sd.ratio}{standard deviation observed in the depth ratio measures in a segment}
 26 |   \item{sd.Bf}{standard deviation observed in the B-allele frequency measures in a segment}
 27 |   \item{weight.Bf}{vector of weights for B-allele frequency values.}
 28 |   \item{weight.mufreq}{vector of weights for the mutation frequency values.}
 29 |   \item{weight.ratio}{vector of weights for the depth ratio values.}
 30 |   \item{cellularity}{fraction of tumor cells in the sample.}
 31 |   \item{ploidy}{2 * ratio between total DNA content in a tumor cell and a normal cell.}
 32 |   \item{avg.depth.ratio}{average normalized depth ratio.}
 33 |   \item{CNt.min}{minimum copy number to consider in the model.}
 34 |   \item{CNt.max}{maximum copy number to consider in the model.}
 35 |   \item{CNn}{copy number of the normal genome.}
 36 |   \item{priors.table}{data frame with columns \code{CN} and \code{value}, containing the copy numbers and the corresponding weights. To every copy number is assigned the value 1 as default, so any values different from 1 will change the corresponding weight.}
 37 |   \item{ratio.priority}{logical, if TRUE only the depth ratio will be used to determine the copy number state, while the Bf value will be used to determine the number of B-alleles.}
 38 | }
 39 | 
 40 | \value{
 41 |   \item{CNt}{copy number of the tumor cell at the tested point.}
 42 |   \item{A}{number of A-alleles at the tested point.}
 43 |   \item{B}{number of B-alleles at the tested point.}
 44 |   \item{CNn}{copy number of the normal cell at the tested point (equal to CNn given as argument).}
 45 |   \item{Mt}{number of mutated alleles at the tested point.}
 46 |   \item{LPP}{log-posterior probability of model fitting at the given point/segment.}
 47 | }
 48 | \details{
 49 |   \code{baf.bayes} and \code{mufreq.bayes} use a naive Bayesian approach to calculate the posterior probability of fitness of the data point with the model point resulting from the given values of cellularity and DNA-content.
 50 | }
 51 | 
 52 | \seealso{
 53 |    \code{baf.model.fit}, \code{mufreq.model.fit}.
 54 | }
 55 | 
 56 | \examples{
 57 |   \dontrun{
 58 | data.file <-  system.file("extdata", "example.seqz.txt.gz", package = "sequenza")
 59 | # read all the chromosomes:
 60 | seqz.data  <- read.seqz(data.file)
 61 | # Gather genome wide GC-stats from raw file:
 62 | gc.stats <- gc.sample.stats(data.file)
 63 | gc.vect  <- setNames(gc.stats$raw.mean, gc.stats$gc.values)
 64 | # Read only one chromosome:
 65 | seqz.data  <- read.seqz(data.file, chr.name = 1)
 66 | 
 67 | # Correct the coverage of the loaded chromosome:
 68 | seqz.data$adjusted.ratio <- seqz.data$depth.ratio /
 69 |                            gc.vect[as.character(seqz.data$GC.percent)]
 70 | # Select the heterozygous positions
 71 | seqz.hom  <- seqz.data$zygosity.normal == 'hom'
 72 | seqz.het  <- seqz.data[!seqz.hom, ]
 73 | # Detect breakpoints
 74 | breaks <- find.breaks(seqz.het, gamma = 80, kmin = 10, baf.thres = c(0, 0.5))
 75 | # use heterozygous and homozygous position to measure segment values
 76 | seg.s1 <- segment.breaks(seqz.data, breaks = breaks)
 77 | 
 78 | # filter out small ambiguous segments, and conveniently weight the segments by size:
 79 | seg.filtered <- seg.s1[(seg.s1$end.pos - seg.s1$start.pos) > 10e6, ]
 80 | weights.seg  <- 150 + round((seg.filtered$end.pos -
 81 |                              seg.filtered$start.pos) / 1e6, 0)
 82 | # get the genome wide mean of the normalized depth ratio:
 83 | avg.depth.ratio <- mean(gc.stats$adj[,2])
 84 | # run the BAF model fit
 85 | 
 86 | CP <- baf.model.fit(Bf = seg.filtered$Bf, depth.ratio = seg.filtered$depth.ratio,
 87 |                     weight.ratio = weights.seg,
 88 |                     weight.Bf = weights.seg,
 89 |                     avg.depth.ratio = avg.depth.ratio,
 90 |                     cellularity = seq(0.1,1,0.01),
 91 |                     ploidy = seq(0.5,3,0.05))
 92 | 
 93 | confint <- get.ci(CP)
 94 | ploidy   <- confint$max.ploidy
 95 | cellularity <- confint$max.cellularity
 96 | 
 97 | #detect copy number alteration on the segments:
 98 | 
 99 | cn.alleles <- baf.bayes(Bf = seg.s1$Bf, depth.ratio = seg.s1$depth.ratio,
100 |                         cellularity = cellularity, ploidy = ploidy,
101 |                         avg.depth.ratio = 1)
102 | 
103 | head(cbind(seg.s1, cn.alleles))
104 | 
105 | # create mutation table:
106 | mut.tab   <- mutation.table(seqz.data, mufreq.treshold = 0.15,
107 |                             min.reads = 40, max.mut.types = 1,
108 |                             min.type.freq = 0.9, segments = seg.s1)
109 | 
110 | mut.tab.clean <- na.exclude(mut.tab)
111 | 
112 | # Detect mutated alleles:
113 | mut.alleles <- mufreq.bayes(mufreq = mut.tab.clean$F,
114 |                             depth.ratio = mut.tab.clean$adjusted.ratio,
115 |                             cellularity = cellularity, ploidy = ploidy,
116 |                             avg.depth.ratio = avg.depth.ratio)
117 | head(cbind(mut.tab.clean[,c("chromosome","position","F",
118 |                             "adjusted.ratio", "mutation")],
119 |            mut.alleles))
120 | 
121 | 
122 |    }
123 | }


--------------------------------------------------------------------------------
/man/breaks.Rd:
--------------------------------------------------------------------------------
 1 | \name{find.breaks}
 2 | \alias{find.breaks}
 3 | \alias{segment.breaks}
 4 | \title{Segmentation of sequencing data using an allele-specific copy number algorithm}
 5 | 
 6 | \description{
 7 |   This function uses \code{\link{aspcf}} or \code{\link{pcf}} from the package \pkg{copynumber} to segment depth ratio and B-allele frequency obtained from sequencing data.
 8 | }
 9 | 
10 | \usage{
11 | find.breaks(seqz.baf, gamma = 80, kmin = 10, baf.thres = c(0, 0.5),
12 |             verbose = FALSE, seg.algo = "aspcf", ...)
13 | segment.breaks(seqz.tab, breaks, min.reads.baf = 1, weighted.mean = TRUE)
14 | }
15 | 
16 | \arguments{
17 |   \item{seqz.baf}{an seqz file containing only the heterozygous positions.}
18 |   \item{seqz.tab}{a complete seqz file.}
19 |   \item{gamma, kmin, baf.thres, verbose}{arguments passed to the segmentation algorithm.}
20 |   \item{breaks}{breaks as output by \code{find.breaks}.}
21 |   \item{min.reads.baf}{threshold on the depth of the positions included to calculate the average BAF for segment.}
22 |   \item{weighted.mean}{boolean to select if the segments have to calculated using the read depth as a weights to calculate depth ratio and B-allele frequency means.}
23 |   \item{seg.algo}{Selects the algorithm used for the segmentation. Available options are \code{\link{aspcf}} of \code{\link{pcf}}.}
24 |   \item{...}{additional arguments passed to \code{\link{aspcf}}.}
25 | }
26 | 
27 | \details{
28 |   \pkg{copynumber} is a package to perform efficient segmentation of SNP-array data. The function \code{find.breaks} uses the algorithms from the \pkg{copynumber} package to find break points, where the default parameters have been optimized for sequencing data, but a careful choice of an optimal \code{gamma} value is advised.
29 | }
30 | 
31 | \examples{
32 | 
33 |   \dontrun{
34 | 
35 | data.file <-  system.file("extdata", "example.seqz.txt.gz", package = "sequenza")
36 | # read all the chromosomes:
37 | seqz.data  <- read.seqz(data.file)
38 | # Gather genome wide GC-stats from raw file:
39 | gc.stats <- gc.sample.stats(data.file)
40 | gc.vect  <- setNames(gc.stats$raw.mean, gc.stats$gc.values)
41 | # Read only one chromosome:
42 | seqz.data  <- read.seqz(data.file, chr.name = 12)
43 | 
44 | # Correct the coverage of the loaded chromosome:
45 | seqz.data$adjusted.ratio <- seqz.data$depth.ratio /
46 |                            gc.vect[as.character(seqz.data$GC.percent)]
47 | # Select the heterozygous positions
48 | seqz.hom  <- seqz.data$zygosity.normal == 'hom'
49 | seqz.het  <- seqz.data[!seqz.hom, ]
50 | # Detect breakpoints
51 | breaks <- find.breaks(seqz.het, gamma = 80, kmin = 10, baf.thres = c(0, 0.5))
52 | # use heterozygous and homozygous position to measure segment values
53 | segment.breaks(seqz.data, breaks = breaks)
54 |    }
55 | }


--------------------------------------------------------------------------------
/man/chromosome.view.Rd:
--------------------------------------------------------------------------------
  1 | \name{chromosome.view}
  2 | \alias{chromosome.view}
  3 | \alias{genome.view}
  4 | 
  5 | \title{A graphical representation of multiple chromosomal
  6 |     features}
  7 | 
  8 | \description{
  9 |     A graphical representation of depth ratio, allele frequency and mutation
 10 |     frequency in multiple panels allineated by the coordinate of
 11 |     the same chromosome.
 12 | }
 13 | 
 14 | \usage{
 15 |     chromosome.view(baf.windows, ratio.windows, mut.tab = NULL,
 16 |         segments = NULL, min.N.baf = 1, min.N.ratio = 10000, main = "",
 17 |         vlines = FALSE, legend.inset = c(-20 * strwidth("a", units = "figure"),
 18 |         0), CNn = 2, cellularity = NULL, ploidy = NULL, avg.depth.ratio = NULL,
 19 |         model.lwd = 1, model.lty = "24", model.col = 1, x.chr.space = 10)
 20 |     genome.view(seg.cn, info.type = "AB", ...)
 21 | }
 22 | 
 23 | \arguments{
 24 |     \item{baf.windows}{matrix containing the windowed B-allele frequency values
 25 |         for one chromosome.}
 26 |     \item{ratio.windows}{matrix containing the windowed depth ratio values for
 27 |         one chromosome.}
 28 |     \item{mut.tab}{mutation table of one chromosome. If specified, the
 29 |         mutations will be drawn in a top panel. \code{mut.tab} must be output
 30 |         from the \code{\link{mutation.table}} function.}
 31 |     \item{segments}{segmentation for one chromosome. If specified, the
 32 |         segmented B-allele frequency and depth ratio values will be shown
 33 |         as red lines.}
 34 |     \item{min.N.baf}{minimum number of observations required in a BAF window
 35 |         for plotting.}
 36 |     \item{min.N.ratio}{minimum number of observations required in a depth ratio
 37 |         window for plotting.}
 38 |     \item{CNn}{copy number of the germline genome. }
 39 |     \item{vlines}{logical, if TRUE the plot will include dotted vertical lines
 40 |         corresponding to segment breaks.}
 41 |     \item{cellularity}{fraction of tumor cells in the sample.}
 42 |     \item{ploidy}{value of the estimated \code{ploidy} parameter.}
 43 |     \item{avg.depth.ratio}{the average value of the normalized depth ratio.}
 44 |     \item{main}{main title of the plot.}
 45 |     \item{legend.inset}{the inset argument to pass to the \code{\link{legend}}
 46 |         function. Defines the distance between the mutation legend and the
 47 |         plot border.}
 48 |     \item{model.lwd}{width of the theoretical lines, if the segments matrix
 49 |         contains the columns A, B and CNt.}
 50 |     \item{model.lty}{line type of the theoretical lines, if the segments matrix
 51 |         contains the columns A, B and CNt.}
 52 |     \item{model.col}{color of the theoretical lines, if the segments matrix
 53 |         contains the columns A, B and CNt.}
 54 |     \item{x.chr.space}{step in megabase on the positions to visualize on
 55 |         the x-axis.}
 56 |     \item{seg.cn}{genome wide segments, with the columns A, B and CNt.}
 57 |     \item{info.type}{information to plot in \code{genome.view}. Available
 58 |         options are "CNt" for total copy numbers and "AB" (default) for
 59 |         the alleles specific copy number.}
 60 |     \item{...}{optional arguments passed to \code{\link{plot}}.}
 61 | }
 62 | 
 63 | \details{
 64 |     \code{chromosome.view} is a plotting function based on the default
 65 |     \code{\link{plot}} function and \code{\link{par}} to display multiple
 66 |     panels. The plotting function \code{\link{plotWindows}} is used to plot
 67 |     the binned data of \code{depth-ratio} and \code{b-allele frequency}.
 68 |     The function displays the observations reulting from the sequencing
 69 |     post-procssing as well the results of the model.
 70 | }
 71 | 
 72 | \seealso{
 73 |     \code{\link{windowValues}}, \code{\link{find.breaks}}.
 74 | }
 75 | 
 76 | \examples{
 77 |     \dontrun{
 78 | 
 79 |     data.file <-  system.file("extdata", "example.seqz.txt.gz",
 80 |         package = "sequenza")
 81 |     # read all the chromosomes:
 82 |     seqz.data  <- read.seqz(data.file)
 83 |     # Gather genome wide GC-stats from raw file:
 84 |     gc.stats <- gc.sample.stats(data.file)
 85 |     gc.vect  <- setNames(gc.stats$raw.mean, gc.stats$gc.values)
 86 |     # Read only one chromosome:
 87 |     seqz.data  <- read.seqz(data.file, chr.name = 1)
 88 | 
 89 |     # Correct the coverage of the loaded chromosome:
 90 |     seqz.data$adjusted.ratio <- seqz.data$depth.ratio /
 91 |         gc.vect[as.character(seqz.data$GC.percent)]
 92 |     # Select the heterozygous positions
 93 |     seqz.hom  <- seqz.data$zygosity.normal == 'hom'
 94 |     seqz.het  <- seqz.data[!seqz.hom, ]
 95 |     # Detect breakpoints
 96 |     breaks <- find.breaks(seqz.het, gamma = 80, kmin = 10, baf.thres = c(0, 0.5))
 97 |     # use heterozygous and homozygous position to measure segment values
 98 |     seg.s1 <- segment.breaks(seqz.data, breaks = breaks)
 99 | 
100 |     # Binning the values of depth ratio and B allele frequency
101 |     seqz.r.win <- windowValues(x = seqz.data$adjusted.ratio,
102 |         positions = seqz.data$position, chromosomes = seqz.data$chromosome,
103 |         window = 1e6, overlap = 1, weight = seqz.data$depth.normal)
104 | 
105 |     seqz.b.win <- windowValues(x = seqz.het$Bf,
106 |         positions = seqz.het$position, chromosomes = seqz.het$chromosome,
107 |         window = 1e6, overlap = 1, weight = round(x = seqz.het$good.reads,
108 |             digits = 0))
109 |     # create mutation table:
110 |     mut.tab   <- mutation.table(seqz.data, mufreq.treshold = 0.15,
111 |         min.reads = 40, max.mut.types = 1, min.type.freq = 0.9,
112 |         segments = seg.s1)
113 |     # chromosome view without parametes:
114 |     chromosome.view(mut.tab = mut.tab[mut.tab$chromosome == "1",],
115 |         baf.windows = seqz.b.win[[1]], ratio.windows = seqz.r.win[[1]],
116 |         min.N.ratio = 1, segments = seg.s1[seg.s1$chromosome == "1",],
117 |         main = "Chromosome 1")
118 | 
119 |     # filter out small ambiguous segments, and weight the segments by size:
120 |     seg.filtered <- seg.s1[(seg.s1$end.pos - seg.s1$start.pos) > 10e6, ]
121 |     weights.seg  <- 150 + round((seg.filtered$end.pos -
122 |                              seg.filtered$start.pos) / 1e6, 0)
123 |     # get the genome wide mean of the normalized depth ratio:
124 |     avg.depth.ratio <- mean(gc.stats$adj[,2])
125 |     # run the BAF model fit
126 | 
127 |     CP <- baf.model.fit(Bf = seg.filtered$Bf, depth.ratio = seg.filtered$depth.ratio,
128 |         weight.ratio = weights.seg, weight.Bf = weights.seg,
129 |         avg.depth.ratio = avg.depth.ratio, cellularity = seq(0.1,1,0.01),
130 |         ploidy = seq(0.5,3,0.05))
131 | 
132 |     confint <- get.ci(CP)
133 |     ploidy   <- confint$max.ploidy
134 |     cellularity <- confint$max.cellularity
135 |     #detect copy number alteration on the segments:
136 |     cn.alleles <- baf.bayes(Bf = seg.s1$Bf, depth.ratio = seg.s1$depth.ratio,
137 |         cellularity = cellularity, ploidy = ploidy, avg.depth.ratio = 1)
138 | 
139 |     seg.s1 <- cbind(seg.s1, cn.alleles)
140 | 
141 |     # Chromosome view with estimated paramenters:
142 |     chromosome.view(mut.tab = mut.tab[mut.tab$chromosome == "1",],
143 |         baf.windows = seqz.b.win[[1]], ratio.windows = seqz.r.win[[1]],
144 |         min.N.ratio = 1, segments = seg.s1[seg.s1$chromosome == "1",],
145 |         main = "Chromosome 1", cellularity = cellularity, ploidy = ploidy,
146 |         avg.depth.ratio = 1, BAF.style = "lines")
147 |     }
148 | }
149 | 


--------------------------------------------------------------------------------
/man/cp.plot.Rd:
--------------------------------------------------------------------------------
 1 | \name{cp.plot}
 2 | \alias{cp.plot}
 3 | \alias{cp.plot.contours}
 4 | \alias{get.ci}
 5 | 
 6 | \title{Plot log-posterior probability for the output of the
 7 |  \code{\link{sequenza.fit}} function}
 8 | 
 9 | \description{
10 |   This function uses the \code{\link{colorgram}} function from the package
11 |   \pkg{squash} to plot log-posterior probability for the tested combinations of
12 |   cellularity and ploidy
13 | }
14 | 
15 | \usage{
16 |    cp.plot(cp.table, xlab = "Ploidy", ylab = "Cellularity",
17 |            zlab = "Scaled rank LPP",
18 |            colFn = colorRampPalette(c('white', 'lightblue')), ...)
19 |    cp.plot.contours(cp.table, likThresh = c(0.95), alternative = TRUE,
20 |                     col = palette(), legend.pos = "bottomright", pch = 18,
21 |                     alt.pch = 3, ...)
22 |    get.ci(cp.table, level = 0.95)
23 | }
24 | 
25 | \arguments{
26 |   \item{cp.table}{list, as output from \code{\link{baf.model.fit}} or \code{\link{mufreq.model.fit}}.}
27 |   \item{xlab}{xlab parameter as in the function \code{\link{colorgram}}.}
28 |   \item{ylab}{ylab parameter as in the function \code{\link{colorgram}}.}
29 |   \item{zlab}{zlab parameter as in the function \code{\link{colorgram}}.}
30 |   \item{colFn}{colFn parameter as in the function \code{\link{colorgram}}.}
31 |   \item{likThresh}{vector of quantiles to define tresholds for the confindent regions.}
32 |   \item{alternative}{boolean parameter, if \code{TRUE} the alternative solutions are computed and plotted.}
33 |   \item{col}{vector of colors.}
34 |   \item{legend.pos}{position for placing the legend.}
35 |   \item{pch}{character used to indicate the point estimate.}
36 |   \item{alt.pch}{if \code{alternative} is set to \code{TRUE} defines the character to indicate alternative solutions.}
37 |   \item{...}{additional arguments accepted by the function \code{\link{colorgram}} for \code{cp.plot}, or \code{\link{contour}} for \code{cp.plot.contours}.}
38 |   \item{level}{decimal value of the confidence interval}
39 | }
40 | 
41 | \value{
42 |   The \code{get.ci} function returns a list with 6 items:
43 |   \item{values.ploidy}{matrix of ploidy values with respective posterior probability.}
44 |   \item{confint.ploidy}{boundaries of the confidence interval of the estimated ploidy.}
45 |   \item{max.ploidy}{point estimate of the ploidy value that has the maximum posterior probability.}
46 |   \item{values.cellularity}{matrix of cellularity values with respective posterior probability.}
47 |   \item{confint.cellularity}{boundaries of the confidence interval of the estimated cellularity.}
48 |   \item{max.cellularity}{point estimate of the cellularity value that has the maximum posterior probability.}
49 | }
50 | 
51 | \examples{
52 | 
53 | data(CP.example)
54 | cp.plot(CP.example)
55 | cp.plot.contours(CP.example, add = TRUE)
56 | 
57 | # Plot more contours
58 | cp.plot(CP.example)
59 | cp.plot.contours(CP.example, likThresh = c(0.95, 0.9999), add = TRUE)
60 | 
61 | # Return the 95% confidence interval
62 | CP.example.ci <- get.ci(CP.example)
63 | str(CP.example.ci)
64 | 
65 | }
66 | 


--------------------------------------------------------------------------------
/man/example.seqz.Rd:
--------------------------------------------------------------------------------
 1 | \name{example.seqz}
 2 | \docType{data}
 3 | \alias{example.seqz}
 4 | 
 5 | \title{Example \dQuote{seqz} data}
 6 | 
 7 | \description{
 8 |     The \dQuote{seqz} file is produced by \command{sequenza-utils} and
 9 |     typically has the file extension \file{.seqz}. The data here is
10 |     representative of a seqz file derived from an exome-sequenced tumor sample,
11 |     such as could be obtained from TCGA.
12 | }
13 | 
14 | \usage{
15 |     data(example.seqz)
16 | }
17 | 
18 | \format{
19 |     A data frame with 53937 rows and 14 columns:
20 |     \tabular{rll}{
21 |         [,1] \tab chromosome    \tab Chromosome name \cr
22 |         [,2] \tab position      \tab Base position \cr
23 |         [,3] \tab base.ref      \tab Base in the reference genome\cr
24 |         [,4] \tab depth.normal  \tab Read depth in the normal sample\cr
25 |         [,5] \tab depth.tumor   \tab Read depth in the tumor sample\cr
26 |         [,6] \tab depth.ratio   \tab Ratio of \code{depth.tumor} and \code{depth.normal}\cr
27 |         [,7] \tab Af            \tab A-allele frequency in the tumor sample\cr
28 |         [,8] \tab Bf            \tab B-allele frequency in the tumor sample,
29 |                                      in heterozygous positions only\cr
30 |         [,9] \tab zygosity.normal\tab Zygosity of the normal sample:
31 |                                      "hom" for homozygous or "het" for heterozygous\cr
32 |         [,10] \tab GC.percent   \tab \% GC content\cr
33 |         [,11] \tab good.reads   \tab Number of reads from the tumor sample
34 |                                      which pass the quality threshold \cr
35 |         [,12] \tab AB.normal    \tab Base(s) found in the normal sample,
36 |                                      sorted by allele frequency if more than one\cr
37 |         [,13] \tab AB.tumor     \tab Base(s) found in the tumor sample \emph{but not} in
38 |                                      the normal specimen, with their observed
39 |                                      frequencies, separated by colons \cr
40 |         [,14] \tab tumor.strand \tab Identical to \code{AB.tumor} but indicating, for
41 |                                      each variant base, the fraction of reads
42 |                                      oriented in the forward direction \cr
43 |     }
44 | }
45 | 
46 | \source{
47 | This is derived from a TCGA specimen, but has been scrambled to anonymize the source.
48 | The reference genome is hg19.  The GC content was calculated in 50-base windows.
49 | }
50 | 
51 | \details{
52 |     \code{example.seqz} can be loaded in the standard R way via
53 |     \code{data(example.seqz)}, or it can be read from a text file using
54 |     \code{\link{read.seqz}}. The former is useful for examples and testing,
55 |     whereas the latter is representative of the standard workflow.
56 | }
57 | 
58 | \keyword{datasets}
59 | 


--------------------------------------------------------------------------------
/man/gc.Rd:
--------------------------------------------------------------------------------
 1 | \name{gc.sample.stats}
 2 | \alias{gc.sample.stats}
 3 | \alias{gc.summary.plot}
 4 | \alias{mean_gc}
 5 | \alias{median_gc}
 6 | 
 7 | 
 8 | \title{Collect display and correct GC-content related coverage bias}
 9 | 
10 | \description{
11 |   Collect information and perform statistics of depth of coverage in
12 |   relation with GC-content.
13 | }
14 | 
15 | \usage{
16 |   gc.sample.stats(file, col_types = "c--dd----d----", buffer = 33554432,
17 |                  parallel = 2L, verbose = TRUE)
18 |   gc.summary.plot(gc_list, mean.col = 1, median.col = 2,
19 |      scale.subset = 1.5, ...)
20 |   mean_gc(gc_list)
21 |   median_gc(gc_list)
22 | }
23 | 
24 | \arguments{
25 |   \item{file}{name of a file in the seqz format.}
26 |   \item{col_types}{a string describing the classes of each columns of the
27 |         input file (see \code{\link{read_tsv}}). The default value corresponds
28 |         to the columns of a seqz file used for carculating GC statistics.}
29 |   \item{buffer}{maximal size of each chunk in bytes(see
30 |         \code{\link{chunk.apply}}).}
31 |   \item{parallel}{integer, number of threads used to process a seqz file
32 |         (see \code{\link{chunk.apply}}).}
33 |   \item{verbose}{logical. If TRUE (the default) the function retuns information
34 |         in the console.}
35 |   \item{gc_list}{a normal or tumor list resulting from the
36 |      \code{\link{gc.sample.stats}} function.}
37 |   \item{mean.col}{color for the mean in the summary plot.}
38 |   \item{median.col}{color for the median in the summary plot.}
39 |   \item{scale.subset}{scale the depth values to sho in the plot. A value of 1
40 |     will show the average depth at the center of the plot.}
41 |   \item{...}{additional parametrers from \code{\link{colorgram}}.}
42 | }
43 | 
44 | \details{
45 |   \code{gc.sample.stats} extracts depths and GC-content inforation for the
46 |   tumor and the control samples from an seqz file
47 |   it returns a list with 3 elements: \code{file.metrics}, \code{normal} and
48 |   \code{tumor}.
49 | 
50 |   \code{file.metrics} is a \code{data.frame} serving as index of the seqz
51 |   file; the \code{normal} and \code{tumor} objects contains each 3 ojects:
52 |   \code{gc}, \code{depth} and \code{n}.
53 | 
54 |   \code{gc} and \code{depth} are vectors containing the recorded values of,
55 |   respectively, GC and coverage depth. the \code{n} object is a matrix
56 |   \code{gc}x\code{depth}, recording the number of time a certain
57 |   \code{gc}/\code{depth} pairs is observed in the data.
58 | 
59 | }
60 | 
61 | \value{
62 | A list with the following elements:
63 |   \item{file.metrics}{index of the seqz file.}
64 |   \item{tumor}{GC and coverage depth observations in the tumor sample.}
65 |   \item{normal}{GC and coverage depth observations in the control sample.}
66 | }
67 | 
68 | 
69 | \examples{
70 | 
71 | \dontrun{
72 | 
73 | data.file <-  system.file("extdata", "example.seqz.txt.gz", package = "sequenza")
74 | # read all the chromosomes:
75 | gc_info <- gc.sample.stats(data.file)
76 | 
77 | # mean values of depth coverage vs GC content
78 | 
79 | mean_gc(gc_info$normal)
80 | 
81 | # plot the information for the tumor and normal samples
82 | par(mfrow=c(1, 2))
83 | gc.summary.plot(gc_info$normal, main = "Normal GC stats")
84 | gc.summary.plot(gc_info$tumor, main = "Tumor GC stats")
85 | }
86 | }


--------------------------------------------------------------------------------
/man/model_points.Rd:
--------------------------------------------------------------------------------
 1 | \name{model.points}
 2 | 
 3 | \alias{model.points}
 4 | \alias{baf.model.points}
 5 | \alias{mufreq.model.points}
 6 | 
 7 | \title{Generate B-allele frequency, mutation frequency and depth ratios
 8 |     at given model points, cellularity and ploidy values}
 9 | 
10 | \description{
11 |     The \code{baf.model.points} and \code{mufreq.model.points} functions
12 |     combine \code{theoretical_baf}, \code{theoretical_mufreq} and
13 |     \code{theoretical_depth_ratio} to model the theoretical respective values
14 |         at known values of cellularity and ploidy.
15 | }
16 | 
17 | \usage{
18 |     baf.model.points(cellularity, ploidy, baf_types, avg.depth.ratio)
19 |     mufreq.model.points(cellularity, ploidy, mufreq_types, avg.depth.ratio)
20 | }
21 | 
22 | \arguments{
23 |     \item{cellularity}{fraction of tumor cells in the sample.}
24 |     \item{ploidy}{2 * ratio between total DNA content in a tumor cell
25 |         and a normal cell.}
26 |     \item{baf_types}{matrix with the sets of copy numbers and number of
27 |         mutated alleles over which to model mutation frequency and depth ratio.
28 |         The matrix can be generated with \code{\link{baf.types.matrix}}.}
29 |     \item{mufreq_types}{matrix with the sets of copy numbers and number of
30 |         mutated alleles over which to model mutation frequency and depth ratio.
31 |         The matrix can be generated with \code{\link{mufreq.types.matrix}}.}
32 |     \item{avg.depth.ratio}{average normalized depth ratio.}
33 | }
34 | \value{
35 |     For \code{baf.model.points} a data.frame with two columns:
36 |         \item{BAF}{modelled values of B-allele frequency.}
37 |         \item{depth_ratio}{modelled values of depth ratio.}
38 |     For \code{mufreq.model.points} a data.frame with two columns:
39 |         \item{mufreqs}{modelled values of mutation frequency.}
40 |         \item{depth_ratio}{modelled values of depth ratio.}
41 | }
42 | 
43 | \details{
44 |     The \code{baf.model.points} and \code{mufreq.model.points} functions
45 |     generate the theoretical values of B-allele frequency, mutation frequency
46 |     and depth ratio for the given type tags. To learn more about type tags
47 |     see \code{\link{types.matrix}}.
48 | }
49 | 
50 | \seealso{
51 |     \code{\link{types.matrix}}, \code{\link{theoretical.depth.ratio}},
52 |     \code{\link{theoretical.baf}} \code{\link{theoretical.mufreq}}.
53 | }
54 | 
55 | \examples{
56 |     # Simulate a cellularity of 0.5, ploidy of 2 and types from min CNt 0
57 |     # and max = 4 on an originally diploid genome:
58 |     types <- baf.types.matrix(CNt.min = 0, CNt.max = 4, CNn = 2)
59 |     cbind(types, baf.model.points(cellularity = 0.5, ploidy = 2,
60 |         baf_types = types, avg.depth.ratio = 1))
61 |     # Simulate a cellularity of 0.5, ploidy of 2 and types from min CNt 0
62 |     # and max = 4 on an originally monoallelic genome:
63 |     types <- mufreq.types.matrix(CNt.min = 0, CNt.max = 4, CNn = 1)
64 |     cbind(types, mufreq.model.points(cellularity = 0.5, ploidy = 2,
65 |         mufreq_types = types, avg.depth.ratio = 1))
66 | }
67 | 


--------------------------------------------------------------------------------
/man/mutations.Rd:
--------------------------------------------------------------------------------
 1 | \name{mutation.table}
 2 | \alias{mutation.table}
 3 | \title{Identify mutations}
 4 | 
 5 | \description{
 6 |  This function extracts positions from an seqz file that differ from the normal genome, applying various filters.
 7 | }
 8 | 
 9 | \usage{
10 | mutation.table(seqz.tab, mufreq.treshold = 0.15, min.reads = 40, min.reads.normal = 10,
11 |                max.mut.types = 3, min.type.freq = 0.9, min.fw.freq = 0, segments = NULL)
12 | }
13 | 
14 | \arguments{
15 |   \item{seqz.tab}{an seqz table, as output from \code{\link{read.seqz}}.}
16 |   \item{mufreq.treshold}{mutation frequency threshold.}
17 |   \item{min.reads}{minimum number of reads above the quality threshold to accept the mutation call.}
18 |   \item{min.reads.normal}{minimum number of reads used to determine the genotype in the normal sample.}
19 |   \item{max.mut.types}{maximum number of different base substitutions per position. Integer from 1 to 3 (since there are only 4 different bases). Default is 3, to accept \dQuote{noisy} mutation calls.}
20 |   \item{min.type.freq}{minimum frequency of aberrant types.}
21 |   \item{min.fw.freq}{minimum frequency of variant reads detected in the forward strand. Setting it to 0, all the variant calls with strand frequency in the interval outside 0 and 1, margin not comprised, would be discarded.}
22 |   \item{segments}{if specified, the values of depth ratio would be taken from the segments rather than from the raw data.}
23 | }
24 | 
25 | \details{
26 |   Calling mutations in impure tumor samples is a difficult task, because the degree of contamination by normal cells affects the measured mutation frequency. In highly impure samples, where the normal cells comprise the major component of the sample, mutations can be so diluted that it can be difficult to distinguish them from sequencing errors.
27 | 
28 |   The function \code{mutation.table} tries to separate true mutations from sequencing errors, based on the given threshold. In samples with low contamination, it should even be possible to catch sub-clonal mutations using this function.
29 | 
30 |   This function identifes only those mutations occuring in positions that are homozygous in the normal genome.
31 | }
32 | 
33 | \value{
34 | A data frame, which in addition to some of the columns of the seqz table, contains the following two columns:
35 | \item{F}{the mutation frequency}
36 | \item{mutation}{a character representation of the mutation. For example, a mutation from \samp{A} in the normal to \samp{G} in the tumor is annotated as \samp{A>G}.}
37 | 
38 | }
39 | \examples{
40 | 
41 |    \dontrun{
42 | 
43 | data.file <-  system.file("extdata", "example.seqz.txt.gz", package = "sequenza")
44 | seqz.data  <- read.seqz(data.file)
45 | 
46 | ## Normalize coverage by GC-content
47 | gc.stats <- gc.norm(x = seqz.data$depth.ratio,
48 |                     gc = seqz.data$GC.percent)
49 | gc.vect  <- setNames(gc.stats$raw.mean, gc.stats$gc.values)
50 | seqz.data$adjusted.ratio <- seqz.data$depth.ratio /
51 |                            gc.vect[as.character(seqz.data$GC.percent)]
52 | 
53 | ## Extract mutations
54 | mut.tab   <- mutation.table(seqz.data, mufreq.treshold = 0.15,
55 |                             min.reads = 40, max.mut.types = 1,
56 |                             min.type.freq = 0.9)
57 | mut.tab <- na.exclude(mut.tab)
58 |    }
59 | }
60 | 


--------------------------------------------------------------------------------
/man/plotWindows.Rd:
--------------------------------------------------------------------------------
 1 | \name{plotWindows}
 2 | \alias{plotWindows}
 3 | \title{Plot a binned values of a chromosome}
 4 | 
 5 | \description{
 6 |     The \code{plotWindows} function visualizes a \code{data.frame} produced by
 7 |     the \code{windowValues} or \code{windowBf} functions.
 8 | }
 9 | 
10 | \usage{
11 | plotWindows(seqz.window, m.lty = 1, m.lwd = 3, m.col = "black",
12 |     q.bg = "lightblue", log2.plot = FALSE, n.min = 1, xlim, ylim,
13 |     add = FALSE, ...)
14 | }
15 | 
16 | \arguments{
17 |     \item{seqz.window}{data frame of base-pair windows and corresponding
18 |         quartiles to be plotted. A list of such data frames can be output
19 |         from \code{\link{windowValues}} or \code{\link{windowBf}}.}
20 |     \item{m.lty}{line type used for plotting mean values.}
21 |     \item{m.lwd}{line width used for plotting mean values.}
22 |     \item{m.col}{line color used for plotting mean values.}
23 |     \item{q.bg}{background color for the area between the 0.25 and 0.75
24 |         quartiles.}
25 |     \item{log2.plot}{logical, if TRUE values are log2 scaled.}
26 |     \item{n.min}{minimum number of data points required for a binned window
27 |         to be plotted.}
28 |     \item{xlim}{limits of the x axis.}
29 |     \item{ylim}{limits of the y axis.}
30 |     \item{add}{logical, if TRUE the plot will be added to an existing opened
31 |         device.}
32 |     \item{...}{any other arguments accepted by \code{\link{plot}}.}
33 | }
34 | 
35 | \seealso{
36 |     \code{\link{chromosome.view}},
37 | }
38 | 
39 | \examples{
40 |     data.file <-  system.file("extdata", "example.seqz.txt.gz",
41 |         package = "sequenza")
42 |     seqz.data <- read.seqz(data.file)
43 |     # 1Mb windows, each window is overlapping with 1 other adjacent
44 |     # window: depth ratio
45 |     seqz.ratio <- windowValues(x = seqz.data$depth.ratio,
46 |         positions = seqz.data$position, chromosomes = seqz.data$chromosome,
47 |         window = 1e6, weight = seqz.data$depth.normal, start.coord = 1,
48 |         overlap = 1)
49 | 
50 |     plotWindows(seqz.ratio[[1]], log2.plot = FALSE, ylab = "Depth ratio",
51 |         xlab = "Position (bases)", main = names(seqz.ratio)[1], las = 1,
52 |         n.min = 1, ylim = c(0, 2.5))
53 | 
54 |     plotWindows(seqz.ratio[[17]], log2.plot = FALSE, ylab = "Depth ratio",
55 |         xlab = "Position (bases)", main = names(seqz.ratio)[1], las = 1,
56 |         n.min = 1, ylim = c(0, 2.5))
57 | }
58 | 


--------------------------------------------------------------------------------
/man/read_seqz.Rd:
--------------------------------------------------------------------------------
  1 | \name{read.seqz}
  2 | \alias{read.seqz}
  3 | \alias{read.acgt}
  4 | \title{Read a seqz or acgt format file}
  5 | 
  6 | \description{
  7 |     Efficiently reads a seqz file into R.
  8 | }
  9 | 
 10 | \usage{
 11 |     read.seqz(file, n_lines = NULL, col_types = "ciciidddcddccc", chr_name = NULL,
 12 |         buffer = 33554432, parallel = 1,
 13 |         col_names = c("chromosome", "position", "base.ref", "depth.normal",
 14 |             "depth.tumor", "depth.ratio", "Af", "Bf", "zygosity.normal",
 15 |             "GC.percent", "good.reads", "AB.normal", "AB.tumor",
 16 |             "tumor.strand"),...)
 17 | }
 18 | 
 19 | \arguments{
 20 |     \item{file}{file name}
 21 |     \item{col_types}{a string describing the classes of each columns of the
 22 |         input file (see \code{\link{read_tsv}}). The default value corresponds
 23 |         to the columns of a seqz file.}
 24 |     \item{chr_name}{if specified, only the selected chromosome will be
 25 |         extracted instead of the entire file. For \code{tabix}-indexed files
 26 |         this argument can also be used to extract coordinated-selected genomic
 27 |         regions. E.g. \code{chr_name="5:1-1000000"} will select the first
 28 |         megabase of chromosome 5.}
 29 |     \item{n_lines}{vector of length 2 specifying the first and last line to
 30 |         read from the file. If specified, only the selected portion of the
 31 |         file will be used.}
 32 |     \item{buffer}{maximal size of each chunk in bytes(see
 33 |         \code{\link{chunk.apply}}).}
 34 |     \item{parallel}{integer, number of threads used to process a seqz file
 35 |         (see \code{\link{chunk.apply}}).}
 36 |     \item{col_names}{names of the columns of the seqz file. The default
 37 |         corresponds to the column names of a seqz file.}
 38 |     \item{...}{any arguments accepted by \code{read_tsv}.}
 39 | }
 40 | 
 41 | \details{
 42 |     \code{read.seqz} is a function that allows to efficiently access a
 43 |     \code{seqz} file by chromosome or by line numbers. The function can also
 44 |     access coordinate specific regions with \code{tabix}-indexed \code{seqz}
 45 |     files.
 46 |     The specific content of a \code{seqz} file is explained in the \code{value}
 47 |     section.
 48 | }
 49 | 
 50 | \format{
 51 |     A seqz file is a tab-separated text file with 14 columns and a header row.
 52 |     The first 3 columns are derived from the original \code{pileup}
 53 |     file and contain:
 54 |     \describe{
 55 |         \item{chromosome}{the chromosome name}
 56 |         \item{position}{the base position}
 57 |         \item{base.ref}{the base in the reference genome.
 58 |             Note that this is NOT necessarily the same base as in
 59 |             the normal specimen.}
 60 |     The remaining 10 columns contain the following information:
 61 |         \item{depth.normal}{read depth observed in the normal sample}
 62 |         \item{depth.tumor}{read depth observed in the tumor sample}
 63 |         \item{depth.ratio}{ratio of \code{depth.tumor} and \code{depth.normal}}
 64 |         \item{Af}{A-allele frequency observed in the tumor sample}
 65 |         \item{Bf}{B-allele frequency observed in the tumor sample in
 66 |             heterozygous positions}
 67 |         \item{zygosity.normal}{zygosity of the reference sample. "hom"
 68 |             corresponds to AA or BB, whereas "het" corresponds to AB or BA}
 69 |         \item{GC.percent}{GC-content (percent), calculated from the reference
 70 |             genome in fixed nucleotide windows }
 71 |         \item{good.reads}{number of reads that passed the quality threshold
 72 |             (threshold specified in the pre-processing software), in the
 73 |             tumor specimen}
 74 |         \item{AB.normal}{base(s) found in the germline sample; for heterozygous
 75 |             positions AB are sorted using the values of Af and Bf respectively}
 76 |         \item{AB.tumor}{base(s) found in the tumor sample not present in the
 77 |             normal specimen. The field include all the variants found in the
 78 |             tumor alignment, separated by a colon. Each variant contains the
 79 |             base and the observed frequency}
 80 |         \item{tumor.strand}{frequency of the variant nucleotides detected on
 81 |             the forward orientation. The field have a consistent structure with
 82 |             \code{AB.tumor}, indicating the fraction, relative to the total
 83 |             number of reads presenting the specific variant, orientated
 84 |             in the forward direction}
 85 |     }
 86 | }
 87 | 
 88 | \seealso{
 89 |   \code{read_delim}.
 90 | }
 91 | 
 92 | \examples{
 93 |    \dontrun{
 94 | 
 95 |     data_file <-  system.file("extdata", "example.seqz.txt.gz", package = "sequenza")
 96 | 
 97 |     ## read chromosome 1 from an seqz file.
 98 |     seqz_data <- read.seqz(data_file, chr_name = 1)
 99 | 
100 |     ## Fast access to chromosome X using the file metrics
101 |     gc.stats <- gc.sample.stats(data_file)
102 |     chrX <- gc.stats$file.metrics[gc.stats$file.metrics$chr == "X", ]
103 |     seqz.data <- read.seqz(data_file, n_lines = c(chrX$start, chrX$end))
104 | 
105 |     ## Compare the running time of the two different methods.
106 |     system.time(seqz.data <- read.seqz(data_file, n_lines = c(chrX$start, chrX$end)))
107 |     system.time(seqz.data <- read.seqz(data_file, chr_name = "X"))
108 | 
109 |    }
110 | }
111 | 


--------------------------------------------------------------------------------
/man/theoretical.Rd:
--------------------------------------------------------------------------------
 1 | \name{theoretical.baf}
 2 | \alias{theoretical.baf}
 3 | \alias{theoretical.mufreq}
 4 | \alias{theoretical.depth.ratio}
 5 | 
 6 | \title{Calculates cellularity and ploidy dependent model points}
 7 | 
 8 | \description{
 9 |     Calculates the theoretically expected values of BAF, mutation frequency
10 |     or depth ratio for given values of cellularity, ploidy and copy number.
11 | }
12 | 
13 | \usage{
14 |     theoretical.depth.ratio(CNt, cellularity, ploidy, CNn = 2,
15 |         normal.ploidy = 2, avg.depth.ratio = 1)
16 |     theoretical.baf(CNt, B, cellularity, CNn = 2)
17 |     theoretical.mufreq(CNt, Mt, cellularity, CNn = 2)
18 | 
19 | }
20 | 
21 | \arguments{
22 |   \item{CNn}{copy number in the normal sample.}
23 |   \item{CNt}{copy number in the tumor sample.}
24 |   \item{B}{number of B-alleles in the tumor sample.}
25 |   \item{Mt}{number of alleles carrying a mutation in the tumor sample.}
26 |   \item{cellularity}{fraction of tumor cells in the sample.}
27 |   \item{ploidy}{2 * ratio between total DNA content in a tumor cell
28 |       and a normal cell.}
29 |   \item{normal.ploidy}{ploidy value in the normal sample. Default
30 |       is 2 for a diploid cell.}
31 |   \item{avg.depth.ratio}{average normalized depth ratio.}
32 | }
33 | 
34 | \details{
35 |     The observed B-allele frequency, depth ratio and mutation frequency are
36 |     affected by the cellularity of the tumor sample, which is the inverse of
37 |     the degree of contamination by normal cells.
38 |     Three functions are included, which for know values of cellularity and
39 |     ploidy they produce the expected values of B-allele frequency, mutation
40 |     frequency or depth ratio.
41 | 
42 |     \code{theoretical.baf} returns a dataframe with the possible copy numbers
43 |     of A and B alleles, along with their corresponding B-allele frequency and
44 |     the total copy number state (always the sum of A+B).
45 | 
46 |     \code{theoretical.depth.ratio} returns the theoretical depth ratio at a
47 |     single specific position, given values of cellularity, ploidy, the ratio
48 |     between the tumor copy number and the normal copy number at that position,
49 |     and the average depth ratio of the sample.
50 | 
51 |     \code{theoretical.mufreq} returns the theoretical mutation frequency at a
52 |     single specific position, given values of cellularity, copy number in the
53 |     normal and tumor samples at that position, and the number of mutated
54 |     alleles.
55 | }
56 | 
57 | \seealso{
58 |     \code{\link{model.points}}
59 | }
60 | 


--------------------------------------------------------------------------------
/man/type_matrix.Rd:
--------------------------------------------------------------------------------
 1 | \name{types.matrix}
 2 | \alias{types.matrix}
 3 | \alias{baf.types.matrix}
 4 | \alias{mufreq.types.matrix}
 5 | \title{Creates a matrix of type tags}
 6 | 
 7 | \description{
 8 |     Type tags are a utensil to distinguish genomic positions by their copy
 9 |     number state, number A and B alleles and the number of mutated alleles.
10 |     This function creates a matrix of all possible type tags, given the copy
11 |     number of the normal sample and the range of possible copy numbers in
12 |     the tumor sample.
13 | }
14 | 
15 | \usage{
16 |     baf.types.matrix(CNt.min, CNt.max, CNn = 2)
17 |     mufreq.types.matrix(CNt.min, CNt.max, CNn = 2)
18 | }
19 | 
20 | \arguments{
21 |     \item{CNt.min}{minimum copy number in the tumor.}
22 |     \item{CNt.max}{maximum copy number in the tumor.}
23 |     \item{CNn}{copy number of the normal sample.}
24 | }
25 | 
26 | \value{
27 |     \code{baf.types.matrix} returns a data.frame with the 3 columns:
28 |         \item{CNn}{number of alleles in the normal sample.}
29 |         \item{CNt}{numbers of alleles in the tumor sample.}
30 |         \item{B}{number of B alleles in the tumor sample.}
31 | 
32 |     \code{mufreq.types.matrix} returns a data.frame with the 3 columns:
33 |         \item{CNn}{number of alleles in the normal sample.}
34 |         \item{CNt}{numbers of alleles in the tumor sample.}
35 |         \item{Mt}{number of mutated alleles in the tumor sample.}
36 | }
37 | 
38 | \details{
39 |     A type consists of 3 integers signifying the copy number in the normal
40 |     and tumor samples and the number of B alleles (\code{baf.types.matrix}) or
41 |     mutated alleles (\code{mufreq.types.matrix}).
42 |     The two functions return all the possible types combination within the
43 |     range of tumor copy numbers in the arguments (\code{CNt.min:CNt.max}).
44 | }
45 | 
46 | \seealso{
47 |     \code{theoretical_mufreq}, \code{theoretical_depth_ratio},
48 |     \code{theoretical_baf}, \code{model_points}.
49 | }
50 | 
51 | \examples{
52 |     ## Generate matrix types from 0 to 4 copy number, being the
53 |     ## non-tumor chromosome diploid.
54 |     baf.types.matrix(CNt.min = 0, CNt.max = 4, CNn = 2 )
55 | 
56 |     ## Generate matrix types from 0 to 4 copy number, being the
57 |     ## non-tumor chromosome monoploid.
58 |     mufreq.types.matrix(CNt.min = 0, CNt.max = 4, CNn = 1 )
59 | }
60 | 


--------------------------------------------------------------------------------
/man/windowValues.Rd:
--------------------------------------------------------------------------------
 1 | \name{windowValues}
 2 | \alias{windowValues}
 3 | \alias{windowBf}
 4 | \title{Bins sequencing data for plotting}
 5 | 
 6 | \description{
 7 |   Given a variable with corresponding genomic positions, the function bins the
 8 |   values in windows of a specified size and calculates weighted mean and 25th
 9 |   and 75th percentile for each window. The resulting object are visualized by
10 |   the function \code{plotWindows}.
11 | }
12 | 
13 | \usage{
14 |     windowValues(x, positions, chromosomes, window = 1e6, overlap = 0,
15 |         weight = rep.int( x = 1, times = length(x)), start.coord = 1)
16 |     windowBf(Af, Bf, good.reads, positions, chromosomes, window = 1e6,
17 |         overlap = 0, start.coord = 1, conf = 0.95)
18 | }
19 | \arguments{
20 |     \item{x}{variable to be windowed.}
21 |     \item{positions}{base-pair positions.}
22 |     \item{chromosomes}{names or numbers of the chromosomes.}
23 |     \item{window}{size of windows used for binning data. Smaller windows will
24 |         take more time to compute.}
25 |     \item{overlap}{integer defining the number of overlapping windows. Default
26 |         is 0, no overlap.}
27 |     \item{weight}{weights to be assigned to each value of \code{x}, usually
28 |         related to the read depth.}
29 |     \item{start.coord}{coordinate at which to start computing the windows. If
30 |         NULL, will start at the first position available.}
31 |     \item{Af}{A-allele frequency for the \code{Bf} calculation.}
32 |     \item{Bf}{B-allele frequency for the \code{Bf} calculation.}
33 |     \item{good.reads}{number of reads passing filter for the \code{Bf}
34 |         calculation.}
35 |     \item{conf}{confidence intervals of the binned \code{Bf} value.}
36 | }
37 | 
38 | \details{
39 |     DNA sequencing produces an amount of data too large to be handled by
40 |     standard graphical devices. In addition, for samples analyzed with older
41 |     machines and with low or middle coverage (20x to 50x), measures such as
42 |     read depth are subject to big variations due to technical noise.
43 |     Using \code{windowValues} prior to plotting reduces the noise and the
44 |     amount of data to be plotted.
45 | 
46 |     The binning of the B-allele frequency requires a separate function,
47 |     \code{windowBf}, as the B-allele frequency calculation uses multiple
48 |     values: \code{Af}, \code{Bf} and \code{good.reads}.
49 | 
50 |     The output of \code{windowValues} and \code{windowBf} can be used as input
51 |     for \code{\link{plotWindows}}.
52 | }
53 | 
54 | \value{
55 |     a list of data.frame, one per chromosome. Each data.frame contains
56 |     base-pair windows covering the chromosome. Each row of the data.frame
57 |     correspond to a window and its weighted mean, 25th and 75th percentiles of
58 |     the input values, and the number of data points within each window.
59 | }
60 | 
61 | \seealso{
62 |     \code{plotWindows}
63 | }
64 | 
65 | \examples{
66 | 
67 | \dontrun{
68 |     data.file <-  system.file("extdata", "example.seqz.txt.gz",
69 |         package = "sequenza")
70 |     seqz.data <- read.seqz(data.file)
71 |     # 1Mb windows, each window is overlapping with 1 other
72 |     # adjacent window: depth ratio
73 |     seqz.ratio <- windowValues(x = seqz.data$depth.ratio,
74 |         positions = seqz.data$position, chromosomes = seqz.data$chromosome,
75 |         window = 1e6, weight = seqz.data$depth.normal, start.coord = 1,
76 |         overlap = 1)
77 | 
78 |     seqz.hom  <- seqz.data$zygosity.normal == 'hom'
79 |     seqz.het  <- seqz.data[!seqz.hom, ]
80 |     # 1Mb windows, each window is overlapping with 1 other adjacent window:
81 |     # B-allele frequency
82 |     seqz.bafs  <- windowValues(x = seqz.het$Bf, positions = seqz.het$position,
83 |         chromosomes = seqz.het$chromosome, window = 1e6,
84 |         weight = seqz.het$depth.tumor, start.coord = 1, overlap = 1)
85 |     # Repeat the same operation using windowBf
86 |     seqz.bafs  <- windowBf(Af = seqz.het$Bf, Bf = seqz.het$Bf,
87 |         good.reads = seqz.het$good.reads, positions = seqz.het$position,
88 |         chromosomes = seqz.het$chromosome, window = 1e6,
89 |         start.coord = 1, overlap = 1, conf = 0.95)
90 | }
91 | }
92 | 


--------------------------------------------------------------------------------
/man/workflow.Rd:
--------------------------------------------------------------------------------
  1 | \name{sequenza}
  2 | \alias{sequenza.extract}
  3 | \alias{sequenza.fit}
  4 | \alias{sequenza.results}
  5 | \title{Sequenza convenience functions for standard analysis}
  6 | 
  7 | \description{
  8 |   These three functions are intended to be the main user interface of the package, to run several of the functions of \code{sequenza} in a standardized pipeline.
  9 | }
 10 | 
 11 | \usage{
 12 |   sequenza.extract(file, window = 1e6, overlap = 1,
 13 |     gamma = 80, kmin = 10, gamma.pcf = 140, kmin.pcf = 40,
 14 |     mufreq.treshold = 0.10, min.reads = 40, min.reads.normal = 10,
 15 |     min.reads.baf = 1, max.mut.types = 1, min.type.freq = 0.9,
 16 |     min.fw.freq = 0, verbose = TRUE, chromosome.list = NULL,
 17 |     breaks = NULL, breaks.method = "het", assembly = "hg19",
 18 |     weighted.mean = TRUE, normalization.method = "mean",
 19 |     ignore.normal = FALSE, parallel = 1, gc.stats = NULL,
 20 |     segments.samples = FALSE)
 21 | 
 22 |   sequenza.fit(sequenza.extract, female = TRUE, N.ratio.filter = 10,
 23 |                N.BAF.filter = 1, segment.filter = 3e6,
 24 |                mufreq.treshold = 0.10, XY = c(X = "X", Y = "Y"),
 25 |                cellularity = seq(0.1,1,0.01), ploidy = seq(1, 7, 0.1),
 26 |                ratio.priority = FALSE, method = "baf",
 27 |                priors.table = data.frame(CN = 2, value = 2),
 28 |                chromosome.list = 1:24, mc.cores = getOption("mc.cores", 2L))
 29 | 
 30 |   sequenza.results(sequenza.extract, cp.table = NULL, sample.id, out.dir = getwd(),
 31 |                    cellularity = NULL, ploidy = NULL, female = TRUE, CNt.max = 20,
 32 |                    ratio.priority = FALSE, XY = c(X = "X", Y = "Y"),
 33 |                    chromosome.list = 1:24)
 34 | }
 35 | 
 36 | \arguments{
 37 |   \item{file}{the name of the seqz file to read.}
 38 |   \item{window}{size of windows used when plotting mean and quartile ranges of depth ratios and B-allele frequencies. Smaller windows will take more time to compute.}
 39 |   \item{overlap}{integer specifying the number of overlapping windows.}
 40 |   \item{gamma, kmin}{arguments passed to \code{\link{aspcf}} from the \pkg{copynumber} package.}
 41 |   \item{gamma.pcf, kmin.pcf}{arguments passed to \code{\link{pcf}} from the \pkg{copynumber} package. The arguments are effective only when \code{breaks.method} is set to "full".}
 42 |   \item{mufreq.treshold}{mutation frequency threshold.}
 43 |   \item{min.reads}{minimum number of reads above the quality threshold to accept the mutation call.}
 44 |   \item{min.reads.normal}{minimum number of reads used to determine the genotype in the normal sample.}
 45 |   \item{min.reads.baf}{threshold on the depth of the positions included to calculate the average BAF for segment.}
 46 |   \item{max.mut.types}{maximum number of different base substitutions per position. Integer from 1 to 3 (since there are only 4 bases). Default is 3, to accept "noisy" mutation calls.}
 47 |   \item{min.type.freq}{minimum frequency of aberrant types.}
 48 |     \item{min.fw.freq}{minimum frequency of variant reads detected in the forward strand. Setting it to 0, all the variant calls with strand frequency in the interval outside 0 and 1, margin not comprised, would be discarded.}
 49 |   \item{verbose}{logical, indicating whether to print information about the chromosome being processed.}
 50 |   \item{chromosome.list}{vector containing the index or the names of the chromosome to include in the model fitting.}
 51 |   \item{breaks}{Optional data.frame in the format chrom, start.pos, end.pos, defining a pre-existing segmentation. When the argument is set the built-in segmentation will be skipped in favor of the suggested breaks.}
 52 |   \item{breaks.method}{Argument indicating the resolution of the segmentation. Possible values are \code{fast}, \code{het} and \code{full}, where \code{fast} allows the lower resolution and \code{full} the higher. Custom values of \code{gamma} and \code{kmin} need to be adjusted to have optimal results.}
 53 |   \item{assembly}{assembly version of the genome, see \code{\link{aspcf}} or \code{\link{pcf}}.}
 54 |   \item{weighted.mean}{boolean to select if the segments should be calculated using the read depth as weights to calculate depth ratio and B-allele frequency means.}
 55 |   \item{normalization.method}{string defining the operation to perform during the GC-normalization process. Possible values are \code{mean} (default) and \code{median}. A \code{median} normalization is preferable with noisy data.}
 56 |   \item{ignore.normal}{boolean, when set to TRUE the process will ignore the normal coverage and perform the analysis by using the normalized tumor coverage.}
 57 |   \item{parallel}{integer, number of threads used to process a seqz file
 58 |         (see \code{\link{chunk.apply}}).}
 59 |   \item{gc.stats}{object returned from the function \code{\link{gc.sample.stats}}. If \code{NULL} the object will be computed from the input file.}
 60 |   \item{segments.samples}{EXPERIMENTAL. Segment both tumor and normal samples separately, and add it to the QC plots.}
 61 |   \item{sequenza.extract}{a list of objects as output from the \code{sequenza.extract} function.}
 62 |   \item{method}{method to use to fit the data; possible values are \code{baf} to use \code{\link{baf.model.fit}} or \code{mufreq} to use the \code{\link{mufreq.model.fit}} function to fit the data.}
 63 |   \item{cp.table}{a list of objects as output from the \code{sequenza.fit} function.}
 64 |   \item{female}{logical, indicating whether the sample is male or female, to properly handle the X and Y chromosomes. Implementation only works for the human normal karyotype.}
 65 |   \item{CNt.max}{maximum copy number to consider in the model.}
 66 |   \item{N.ratio.filter}{threshold of minimum number of observation of depth ratio in a segment.}
 67 |   \item{N.BAF.filter}{threshold of minimum number of observation of B-allele frequency in a segment.}
 68 |   \item{segment.filter}{threshold segment length (in base pairs) to filter out short segments, that can cause noise when fitting the cellularity and ploidy parameters. The threshold will not affect the allele-specific segmentation.}
 69 |   \item{XY}{character vector of length 2 specifying the labels used for the X and Y chromosomes.}
 70 |   \item{cellularity}{vector of candidate cellularity parameters.}
 71 |   \item{ploidy}{vector candidate ploidy parameters.}
 72 |   \item{priors.table}{data frame with the columns \code{CN} and \code{value}, containing the copy numbers and the corresponding weights. To every copy number is assigned the value 1 as default, so every values different then 1 will change the corresponding weight.}
 73 |   \item{ratio.priority}{logical, if TRUE only the depth ratio will be used to determine the copy number state, while the Bf value will be used to determine the number of B-alleles.}
 74 |   \item{sample.id}{identifier of the sample, to be used as a prefix for saved objects.}
 75 |   \item{out.dir}{output directory where the files and objects will be saved.}
 76 |   \item{mc.cores}{legacy argument to set the number of cores, but it refers to the \code{cl} of \code{\link{pblapply}}. It uses \code{\link{mclapply}} when set to an integer.}
 77 | 
 78 | }
 79 | 
 80 | \details{
 81 |    The first function, \code{sequenza.extract}, utilizes a range of functions from the sequenza package to read the raw data, normalize the depth.ratio for GC-content bias, perform allele-specific segmentation, filter for noisy mutations and bin the raw data for plotting. The computed objects are returned as a single list object.
 82 | 
 83 |    The segmentation by default is performed using only the heterozygous position and the \code{\link{aspcf}} function from \pkg{copynumber} package. The \code{full} option in the \code{breaks.method} argument allow to combine results of the segmentation of all the data available, using the \code{\link{pcf}} function, and the default \code{\link{aspcf}} using only the heterozygous positions.
 84 | 
 85 |    The second function, \code{sequenza.fit}, accepts the output from \code{sequenza.extract} and calls \code{\link{baf.model.fit}} to calculate the log-posterior probability for all pairs of the candidate ploidy and cellularity parameters.
 86 | 
 87 |    The third function, \code{sequenza.results}, saves a number of objects in a specified directory (default is the working directory). The objects are:
 88 | \itemize{
 89 |  \item The list of segments with resulting copy numbers and major and minor alleles.
 90 |  \item The candidate mutation list with variant allele frequency, and copy number and number of mutated allele, in relation of the clonal population (for sub-clonal population it needs to be processed with further methods).
 91 |  \item A plot of all the chromosomes in one image, representing the major and minor alleles and the absolute copy number changes (genome_view).
 92 |  \item Multiple plots with one chromosome per image, representing copy-number, B-allele frequency and mutation in parallel (chromosome_view).
 93 |  \item Results of the model fitting (CP_contours and confints_CP)
 94 |  \item A summary of the copy number state of the sample (CN_bars).
 95 | }
 96 | }
 97 | 
 98 | \seealso{
 99 |   \code{\link{genome.view}}, \code{\link{baf.bayes}}, \code{\link{cp.plot}}, \code{\link{get.ci}}.
100 | }
101 | 
102 | \examples{
103 |   \dontrun{
104 | 
105 | data.file <-  system.file("extdata", "example.seqz.txt.gz",
106 |               package = "sequenza")
107 | test <- sequenza.extract(data.file)
108 | test.CP   <- sequenza.fit(test)
109 | sequenza.results(test, test.CP, out.dir = "example",
110 |                  sample.id = "example")
111 | 
112 |    }
113 | }


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(sequenza)
3 | 
4 | test_check("sequenza")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test.model.R:
--------------------------------------------------------------------------------
 1 | context("Test sequenza models")
 2 | 
 3 | 
 4 | test_that("Testing theoretical depth ratio", {
 5 |     r1 <- theoretical.depth.ratio(CNt = 3, cellularity = 0.5, ploidy = 2)
 6 |     r2 <- theoretical.depth.ratio(CNt = 3, cellularity = 0.8, ploidy = 2)
 7 |     r3 <- theoretical.depth.ratio(CNt = 4, cellularity = 0.8, ploidy = 4)
 8 |     expect_equal(r1, 1.25)
 9 |     expect_equal(r2, 1.4)
10 |     expect_equal(r3, 1)
11 | })
12 | 
13 | test_that("Testing theoretical BAF", {
14 |     r1 <- theoretical.baf(CNt = 4, B = 2, cellularity = 0.5)
15 |     r2 <- theoretical.baf(CNt = 3, B = 1, cellularity = 0.8)
16 |     r3 <- theoretical.baf(CNt = 4, B = 0, cellularity = 0.8)
17 |     expect_equal(r1, 0.5)
18 |     expect_equal(round(r2, 3), 0.357)
19 |     expect_equal(round(r3, 3), 0.056)
20 | })
21 | 
22 | test_that("Testing theoretical mufreq", {
23 |     r1 <- theoretical.mufreq(CNt = 4, Mt = 2, cellularity = 0.5)
24 |     r2 <- theoretical.mufreq(CNt = 3, Mt = 1, cellularity = 0.8)
25 |     r3 <- theoretical.mufreq(CNt = 4, Mt = 0, cellularity = 0.8)
26 |     expect_equal(round(r1, 3), 0.333)
27 |     expect_equal(round(r2, 3), 0.286)
28 |     expect_equal(r3, 0)
29 | })
30 | 
31 | test_that("Testing baf types matrix", {
32 |     x <- 3
33 |     CNts <- do.call(c, sapply(1:x, FUN = function(x) rep(x, x)))
34 |     Bs <- do.call(c, sapply(1:x, FUN = function(x) {
35 |             y <- x:1
36 |             z <-  rep(x, x)
37 |             z - y
38 |         }
39 |     ))
40 |     mat_b <- data.frame(CNn = rep(2, length(Bs) + 1),
41 |                    CNt = c(0, CNts), B = c(0, Bs))
42 |     mat_b <- mat_b[mat_b[, "B"] <= mat_b[, "CNt"] / 2, ]
43 |     r1 <- baf.types.matrix(CNt.min = 0, CNt.max = 3, CNn = 2)
44 |     expect_equal(r1, mat_b)
45 | })
46 | 
47 | test_that("Testing mufreq types matrix", {
48 |     x <- 3
49 |     CNts <- do.call(c, sapply(1:x, FUN = function(x) rep(x, x + 1)))
50 |     Mts <- do.call(c, sapply(1:x, FUN = function(x) 0:x))
51 |     mat_m <- data.frame(CNn = rep(2, length(Mts) + 1),
52 |                    CNt = c(0, CNts), Mt = c(0, Mts))
53 |     r1 <- mufreq.types.matrix(CNt.min = 0, CNt.max = 3, CNn = 2)
54 |     expect_equal(r1, mat_m)
55 | })
56 | 


--------------------------------------------------------------------------------
/tests/testthat/test.read.R:
--------------------------------------------------------------------------------
 1 | context("Test read seqz data")
 2 | seqz_file <- system.file("extdata", "example.seqz.txt.gz", package = "sequenza")
 3 | col_names <- c("chromosome", "position", "base.ref", "depth.normal",
 4 |               "depth.tumor", "depth.ratio", "Af", "Bf", "zygosity.normal",
 5 |               "GC.percent", "good.reads", "AB.normal", "AB.tumor",
 6 |               "tumor.strand")
 7 | 
 8 | test_that("Testing read seqz data", {
 9 |     full_file <- read.seqz(seqz_file)
10 |     expect_equal(colnames(full_file), col_names)
11 | })
12 | 
13 | test_that("Testing tbi presence", {
14 |     tbi <- file.exists(paste(seqz_file, "tbi", sep = "."))
15 |     expect_equal(tbi, TRUE)
16 | })
17 | 
18 | test_that("Reading single chromosome", {
19 |     chr_17 <- read.seqz(seqz_file, chr_name = "17")
20 |     expect_equal(dim(chr_17), c(2988, 14))
21 | })
22 | 
23 | test_that("Reading selected region", {
24 |     chr_17_p <- read.seqz(seqz_file, chr_name = "17:7500000-7600000")
25 |     expect_equal(dim(chr_17_p), c(13, 14))
26 | })
27 | 


--------------------------------------------------------------------------------
/vignettes/sequenza.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Sequenza User Guide"
  3 | date: "`r Sys.Date()`"
  4 | author: "[Francesco Favero](mailto:favero.francesco@gmail.com)"
  5 | output:
  6 |   rmdformats::readthedown:
  7 |     self_contained: true
  8 | vignette: >
  9 |   %\VignetteIndexEntry{Sequenza User Guide}
 10 |   %\VignetteEngine{knitr::rmarkdown}
 11 |   %\VignetteEncoding{UTF-8}
 12 | ---
 13 | 
 14 | # About
 15 | 
 16 | > Sequenza: Copy Number Estimation from Tumor Genome Sequencing Data
 17 | 
 18 | 
 19 | ![](https://bytebucket.org/sequenzatools/icons/raw/324bd43ac4d10546b64b04c38d8c513e8420346d/svg/sequenza_tools/sequenzaalpha_150.svg)
 20 | 
 21 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/sequenza)](https://cran.r-project.org/package=sequenza)
 22 | [![CRAN_Downloads_Badge](http://cranlogs.r-pkg.org/badges/sequenza)](https://cran.r-project.org/package=sequenza)
 23 | [![CRAN_licence](https://img.shields.io/cran/l/sequenza.svg)](https://www.gnu.org/licenses/gpl-3.0.txt)
 24 | 
 25 | 
 26 | 
 27 | Sequenza is a tool to analyze genomic sequencing data from paired normal-tumor samples, including cellularity and ploidy estimation; mutation and copy number (allele-specific and total copy number) detection, quantification and visualization.
 28 | 
 29 | # Introduction
 30 | 
 31 | Deep sequence of tumor DNA along with corresponding normal DNA can provide a
 32 | valuable perspective on the mutations and aberrations that characterize the
 33 | tumor. However, analysis of this data can be impeded by tumor cellularity and
 34 | heterogeneity and by unwieldy data. Here we describe *Sequenza*, an R package
 35 | that enables the efficient estimation of tumor cellularity and ploidy, and
 36 | generation of copy number, loss-of-heterozygosity, and mutation frequency
 37 | profiles.
 38 | 
 39 | This document details a typical analysis of matched tumor-normal exome sequence
 40 | data using *sequenza*.
 41 | 
 42 | # Getting started
 43 | 
 44 | ## Minimum requirements
 45 |  - Software: R, Python, SAMtools, tabix
 46 |  - Operating system: Linux, OS X, Windows
 47 |  - Memory: Minimum 4 GB of RAM. Recommended >8 GB.
 48 |  - Disk space: 1.5 GB for sample (depending on sequencing depth)
 49 |  - R version: 3.2.0
 50 |  - Python version: 2.7, 3.4, 3.5, 3.6 (or PyPy)
 51 | 
 52 | ## Installation
 53 | 
 54 | The R package can be installed by:
 55 | 
 56 | ```r
 57 | setRepositories(graphics = FALSE, ind = 1:6)
 58 | install.packages("sequenza")
 59 | ```
 60 | 
 61 | To install the Python companion package *sequenza-utils* to preprocess BAM
 62 | files, refer to the [*sequenza-utils*](https://pypi.org/project/sequenza-utils)
 63 | project page, or simply use the python package manager from the command prompt:
 64 | 
 65 | ```bash
 66 | pip install sequenza-utils
 67 | ```
 68 | 
 69 | # Running sequenza
 70 | 
 71 | ## Preprocessing of input files
 72 | 
 73 | In order to obtain precise mutational and aberration patterns in a tumor sample,
 74 | Sequenza requires a matched normal sample from the same patient. Typically, the
 75 | following files are needed to get started with Sequenza:
 76 | 
 77 |  - A BAM file (or a derived pileup file) from the tumor specimen.
 78 |  - A BAM file (or a derived pileup file) from the normal specimen.
 79 |  - A FASTA reference genomic sequence file
 80 | 
 81 | 
 82 | The normal and tumor BAM files are processed together to generate a *seqz* file, which
 83 | is the required input for the analysis.
 84 | It is possible to generate a *seqz* starting from other processed data, such as
 85 | pileup, or VCF files. The available options are described in the
 86 | [*sequenza-utils*](http://sequenza-utils.readthedocs.io/) manual pages.
 87 | 
 88 | The *sequenza-utils* command provides various tools; here we highlight only the
 89 | basic usage:
 90 | 
 91 |  - Process a FASTA file to produce a GC [Wiggle](https://genome.ucsc.edu/goldenpath/help/wiggle.html)
 92 |  track file:
 93 |  
 94 | ```bash
 95 | sequenza−utils gc_wiggle −w 50 --fasta hg19.fa -o hg19.gc50Base.wig.gz
 96 | ```
 97 | 
 98 |  - Process BAM and Wiggle files to produce a *seqz* file:
 99 |  
100 | ```bash
101 | sequenza−utils bam2seqz -n normal.bam -t tumor.bam --fasta hg19.fa \
102 |     -gc hg19.gc50Base.wig.gz -o out.seqz.gz
103 | ```
104 | 
105 |  - Post-process by binning the original *seqz* file:
106 |  
107 | ```bash
108 | sequenza−utils seqz_binning --seqz out.seqz.gz -w 50 -o out small.seqz.gz
109 | ```
110 |  
111 | ## Sequenza analysis (in R)
112 | 
113 | ```{r load}
114 | library(sequenza)
115 | ```
116 | 
117 | In the package is provided a small *seqz* file
118 | 
119 | ```{r data}
120 | data.file <-  system.file("extdata", "example.seqz.txt.gz", package = "sequenza")
121 | ```
122 | 
123 | ```{r cp_data, echo=FALSE, message=FALSE}
124 | library(sequenza)
125 | data(CP.example)
126 | CP <- CP.example
127 | ```
128 | 
129 | 
130 | 
131 | The main interface consists of 3 functions:
132 | 
133 |  - sequenza.extract: process seqz data, normalization and segmentation
134 | ```{r extract, message=FALSE, warning=FALSE, results="hide"}
135 | test <- sequenza.extract(data.file, verbose = FALSE)
136 | ```
137 | 
138 |  - sequenza.fit: run grid-search approach to estimate cellularity and ploidy
139 | ```{r fit, eval=FALSE}
140 | CP <- sequenza.fit(test)
141 | ```
142 | 
143 |  - sequenza.results: write files and plots using suggested or selected solution
144 | ```{r results}
145 | sequenza.results(sequenza.extract = test,
146 |     cp.table = CP, sample.id = "Test",
147 |     out.dir="TEST")
148 | ```
149 | 
150 | # Plots and Results
151 | 
152 | The function _sequenza.results_ outputs various files in the specified path.
153 | The resulting files are either output in pdf of in plain text. The files include
154 | quality control assessments (eg evaluate GC-correction), visualization of the
155 | data and files such as segmentation with copy number calling and mutation lists.
156 | 
157 | ## Result files
158 | 
159 | Each generated file is briefly explained in the following table
160 | 
161 | ```{r res_dir, echo=FALSE}
162 | 
163 | # create results list 
164 | # 
165 | res_list <- c("alternative_fit.pdf", "alternative_solutions.txt",
166 |     "chromosome_depths.pdf", "chromosome_view.pdf",
167 |     "CN_bars.pdf", "confints_CP.txt",
168 |     "CP_contours.pdf", "gc_plots.pdf",
169 |     "genome_view.pdf", "model_fit.pdf",
170 |     "mutations.txt", "segments.txt",
171 |     "sequenza_cp_table.RData", "sequenza_extract.RData",
172 |     "sequenza_log.txt")
173 | res_list <- paste("Test", res_list, sep = "_")
174 | 
175 | description_list <- c(
176 |     "Alternative solution fir to the segments. One solution per slide",
177 |     "List of all ploidy/cellularity alternative solution",
178 |     "Visualization of sequencing coverage in the normal and in the tumor samples, before and after normalization",
179 |     "Visualization per chromosome of depth.ratio, B-allele frequency and mutations, using the selected or estimated solution. One chromosome per slide",
180 |     "Bar plot representing the percentage of genome in the detected copy number states",
181 |     "Table of the confidence inerval of the best solution from the model",
182 |     "Visualization of the likelihood density for each pair of cellularity/ploidy solution. The local maximum-likelihood points and confidence interval of the best estimate are also visualized",
183 |     "Visualization of the GC correction in the normal and in the tumor sample",
184 |     "Genome-whide visualization of the allele-specific and absolute copy number results, and raw profile of the depth ratio and allele frequency",
185 |     "model_fit.pdf",
186 |     "Table with mutation and estimated number of mutated alleles (Mt)",
187 |     "Table listing the detected segments, with estimated copy number state at each sement",
188 |     "RData object dump of the maxima a posteriori computation",
189 |     "RData object dump of all the sample information",
190 |     "Log with version and time information")
191 | knitr::kable(data.frame(Files = res_list, Description = description_list))
192 | #dir("TEST", pattern = "Test")
193 | 
194 | ```
195 | 
196 | ```{r read_segs, echo=FALSE}
197 | seg.tab <- read.table("TEST/Test_segments.txt",
198 |                       header = TRUE, sep ="\t")
199 | 
200 | alt_res <- read.table("TEST/Test_alternative_solutions.txt",
201 |                       header = TRUE, sep ="\t")
202 | seg.tab <- seg.tab[seg.tab$CNt <= 4, ]
203 | is.num <- sapply(seg.tab, is.numeric)
204 | seg.tab[is.num] <- lapply(seg.tab[is.num], round, 3)
205 | ```
206 | 
207 | ## Segments results
208 | 
209 | The segmentation file with the allele-specific copy number calling is one of
210 | the main result of the analysis. A sample of the file is shown in the table below:
211 | 
212 | ```{r head_segs, echo=FALSE}
213 | knitr::kable(head(seg.tab))
214 | 
215 | ```
216 | 
217 | 
218 | The columns represents:
219 | 
220 |   1. **chromosome**: Chromosome
221 |   2. **start.pos**: Start position of the segment
222 |   3. **end.pos**: End position of the segment
223 |   4. **Bf**: B-allele frequency value
224 |   5. **N.BAF**: Number of observation to compute _Bf_ in the segment
225 |   6. **sd.BAF**: Standard deviation of _Bf_
226 |   7. **depth.ratio**: Adjusted and normalized depth ratio tumor / normal
227 |   8. **N.ratio**: Number of observation to compute _depth.ratio_ in the segment
228 |   9. **sd.ratio**: Standard deviation of _depth.rati_
229 |   10. **CNt**: Estimated total copy number value
230 |   11. **A**: Estimated number of A-alleles
231 |   12. **B**: Estimated number of B-alleles (minor allele)
232 |   13. **LPP**: Log-posterior probability of the segment
233 | 
234 | ## Gene wide overview
235 | 
236 | ### Allele-specific copy number
237 | 
238 | ```{r g_view, echo=FALSE, fig.height=5, fig.width=10, fig.align='center'}
239 | sequenza:::genome.view(seg.tab)
240 | ```
241 | 
242 | ### Total copy number
243 | 
244 | ```{r g_view_tot, echo=FALSE, fig.height=5, fig.width=10, fig.align='center'}
245 | sequenza:::genome.view(seg.tab, info.type = "CNt")
246 | ```
247 | 
248 | ### Raw profile
249 | 
250 | ```{r g_view_raw, echo=FALSE, fig.height=5, fig.width=10, fig.align='center'}
251 | sequenza:::plotRawGenome(test, cellularity = alt_res$cellularity[1],
252 |     ploidy = alt_res$ploidy[1])
253 | ```
254 | 
255 | 
256 | ## Grid search maximum likelihood
257 | 
258 | 
259 | ```{r CPplot, echo=TRUE, fig.height=5, fig.width=5, fig.align='center'}
260 | cp.plot(CP)
261 | cp.plot.contours(CP, add = TRUE,
262 |    likThresh = c(0.999, 0.95),
263 |    col = c("lightsalmon", "red"), pch = 20)
264 | ```
265 | 
266 | ## Chromosome view
267 | 
268 | _Chromosome view_ is the visualization that displays chromosome by crhosome, nutations,
269 | B-allele frequency and depth-ratio.
270 | The visualization makes it easier to ispect the segmentation results, comparing to
271 | a binned profile of the raw data.
272 | It also visualize the copy number calling using the _cellularity_ and _ploidy_ solution,
273 | making useful to asses if the copy number calling is acurate.
274 | In addition it provides a visualization of the mutation frequency that can also help to
275 | corroborate the solution.
276 | 
277 | ```{r c_view, echo=TRUE, fig.height=6, fig.width=8, fig.align='center'}
278 | chromosome.view(mut.tab = test$mutations[[1]], baf.windows = test$BAF[[1]],
279 |                 ratio.windows = test$ratio[[1]],  min.N.ratio = 1,
280 |                 segments = test$segments[[1]],
281 |                 main = test$chromosomes[1],
282 |                 cellularity = 0.89, ploidy = 1.9,
283 |                 avg.depth.ratio = 1)
284 | ```
285 | 


--------------------------------------------------------------------------------