├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── AllClasses.R ├── AllGenerics.R ├── calcConsensus.R ├── calcInteger.R ├── calcRatios.R ├── countBreakpoints.R ├── data.R ├── findAneuploidCells.R ├── findClusters.R ├── findOutliers.R ├── findSuggestedK.R ├── findVariableGenes.R ├── inferMrca.R ├── internals.R ├── knnSmooth.R ├── logNorm.R ├── multipcf.R ├── plotAlluvial.R ├── plotConsensusLine.R ├── plotFreq.R ├── plotGeneCopy.R ├── plotHeatmap.R ├── plotMetrics.R ├── plotPca.R ├── plotPhylo.R ├── plotRatio.R ├── plotScree.R ├── plotSuggestedK.R ├── plotUmap.R ├── plotVariableGenes.R ├── runConsensusPhylo.R ├── runCountReads.R ├── runDistMat.R ├── runMetrics.R ├── runPca.R ├── runPhylo.R ├── runSegmentation.R ├── runUmap.R ├── runVarbin.R ├── runVst.R └── utils-pipe.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── data-raw ├── genes_granges.R ├── varbin_hg19_chrarm.R └── varbin_hg19_grangeslist.R ├── data ├── copykit_obj_filt_rle.rda ├── copykit_obj_filt_umap.rda ├── copykit_obj_rle.rda ├── hg19_genes.rda ├── hg19_rg.rda ├── hg38_genes.rda └── hg38_grangeslist.rda ├── docs ├── ._plotRatioPlotgif.gif ├── .nojekyll ├── 404.html ├── LICENSE-text.html ├── LICENSE.html ├── authors.html ├── copykit_workflow.html ├── docsearch.css ├── docsearch.js ├── index.html ├── link.svg ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml ├── plotRatioPlotgif.gif └── reference │ ├── filterCells.html │ ├── findClusters.html │ ├── geneCopyPlot.html │ ├── index.html │ ├── pipe.html │ ├── plotHeatmap.html │ ├── plotUmap.html │ ├── readVarbinCNA.html │ ├── runDistMat.html │ ├── runPhylo.html │ ├── runUmap.html │ ├── scCNA-class.html │ └── segment_ratios-scCNA-method.html ├── images ├── copykit_cheat_sheet_DCM1.png └── copykit_hexsticker_v2.png ├── inst └── WORDLIST ├── man ├── CopyKit-class.Rd ├── calcConsensus.Rd ├── calcInteger.Rd ├── calcRatios.Rd ├── data.Rd ├── dot-countBreakpoints.Rd ├── findAneuploidCells.Rd ├── findClusters.Rd ├── findOutliers.Rd ├── findSuggestedK.Rd ├── findVariableGenes.Rd ├── inferMrca.Rd ├── internals.Rd ├── knnSmooth.Rd ├── logNorm.Rd ├── pipe.Rd ├── plotAlluvial.Rd ├── plotConsensusLine.Rd ├── plotFreq.Rd ├── plotGeneCopy.Rd ├── plotHeatmap.Rd ├── plotMetrics.Rd ├── plotPca.Rd ├── plotPhylo.Rd ├── plotRatio.Rd ├── plotScree.Rd ├── plotSuggestedK.Rd ├── plotUmap.Rd ├── plotVariableGenes.Rd ├── runConsensusPhylo.Rd ├── runCountReads.Rd ├── runDistMat.Rd ├── runMetrics.Rd ├── runPca.Rd ├── runPhylo.Rd ├── runSegmentation.Rd ├── runUmap.Rd ├── runVarbin.Rd └── runVst.Rd ├── snakemake_pipelines ├── bowtie2_markdup │ ├── README.md │ ├── bowtie2_markdup.smk │ └── bowtie2_markdup_pairend.smk └── markdup │ └── markdup.smk ├── tests ├── spelling.R ├── testthat.R └── testthat │ ├── setup.R │ ├── test-dimred-plots.R │ ├── test-plotHeatmap.R │ └── test-runSegmentation.R └── vignettes ├── .gitignore └── CopyKit_UserGuide.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^copykit\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^LICENSE\.md$ 4 | ^README\.Rmd$ 5 | ^_pkgdown\.yml$ 6 | ^docs$ 7 | ^images$ 8 | ^snakemake_pipelines$ 9 | ^pkgdown$ 10 | ^data-raw$ 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | CopyKit.Rproj 3 | .Rhistory 4 | .RData 5 | inst/doc 6 | data-raw 7 | test_delete_folder 8 | .DS_Store 9 | snakemake_pipelines 10 | images 11 | ._.DS_Store 12 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: copykit 2 | Title: CopyKit 3 | URL: https://github.com/navinlabcode/copykit 4 | BugReports: https://github.com/navinlabcode/copykit/issues 5 | Version: 0.1.3 6 | Authors@R: c( 7 | person(given = "Darlan", 8 | family = "Conterno Minussi", 9 | role = c("aut", "cre"), 10 | email = "dcminussi@mdanderson.org", 11 | comment = c(ORCID = "0000-0001-8856-4625")), 12 | person(given = "Junke", 13 | family = "Wang", 14 | role = c("ctb"), 15 | email = "JWang48@mdanderson.org", 16 | comment = c(ORCID = "0000-0002-3733-5192")), 17 | person(given = "Yun", 18 | family = "Yan", 19 | role = c("ctb"), 20 | email = "yun.yan@uth.tmc.edu", 21 | comment = c(ORCID = "0000-0002-3701-9608")) ) 22 | Description: A suite of tools for single cell copy number analysis 23 | focusing on reads binning, segmentation, normalization, 24 | filtering, analysis, and visualization. CopyKit extends Bioconductors’ 25 | SingleCellExperiment class for the analysis of copy number datasets. 26 | We divide CopyKit functions into four modules: 1) Pre-processing, 27 | 2) Quality Control, 3) Data Analysis, and 4) Visualization. CopyKit 28 | can be applied to a broad applications for studying diverse areas of 29 | cancer biology, including intratumor heterogeneity, premalignant 30 | progression, and metastasis. 31 | License: AGPL (>= 3) + file LICENSE 32 | Encoding: UTF-8 33 | LazyData: true 34 | Suggests: 35 | testthat, 36 | spelling, 37 | knitr, 38 | rmarkdown, 39 | stringr 40 | Language: en-US 41 | biocViews: Software, SingleCell, DNASeq, Sequencing, 42 | Visualization, Normalization, Clustering, 43 | ImmunoOncology, DataImport, CellBiology, QualityControl 44 | Imports: 45 | dplyr, 46 | uwot, 47 | ggplot2, 48 | grDevices, 49 | magrittr, 50 | SummarizedExperiment, 51 | ComplexHeatmap, 52 | fastcluster, 53 | amap, 54 | ape, 55 | S4Vectors, 56 | scales, 57 | shiny, 58 | miniUI, 59 | tidyr, 60 | igraph (>= 1.2.8), 61 | BiocGenerics, 62 | grid, 63 | ggbeeswarm, 64 | dbscan, 65 | mixtools, 66 | stats, 67 | gtools, 68 | forcats, 69 | circlize, 70 | BiocParallel, 71 | fpc, 72 | ggnewscale, 73 | ggalluvial, 74 | viridis, 75 | ggtree, 76 | methods, 77 | withr, 78 | bluster, 79 | scquantum 80 | RoxygenNote: 7.3.2 81 | VignetteBuilder: knitr 82 | Depends: 83 | SingleCellExperiment, 84 | R (>= 4.0.0), 85 | Rsubread, 86 | GenomicRanges, 87 | DNAcopy 88 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2019 2 | COPYRIGHT HOLDER: CopyKit 3 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # copykit devel 2 | 3 | # copykit 0.1.3 4 | 5 | 6 | **Bug Fixes** 7 | Fixed gamma segmentation argument not being used when applying multipcf segmentation [#94](https://github.com/navinlabcode/copykit/issues/94) 8 | Fixed 'smoothed_bincounts' check [#97](https://github.com/navinlabcode/copykit/pull/98). Thanks @Puriney 9 | 10 | Fixed installation of multipcf after Bioconductor {{copynumber}} package deprecation. 11 | 12 | # copykit 0.1.2 13 | 14 | **New features** 15 | * New cell smoothing method with the function `knnSmooth()`. Uses k-nearest neighbors to smooth cells profiles and re-segments the datasets obtaining cleaner copy number profiles, with reduced overdispersion and improving downstream analysis. (Thanks to [Runmin Wei]([)https://github.com/WandeRum) for the helpful discussion.) 16 | * scquantum method is available for `calcInteger()` and is now a CopyKit import ([scquantum](https://github.com/navinlabcode/scquantum) is a single cell ploidy estimation tool developed by [Alexander Davis](https://github.com/alex-l-m)) 17 | * `calcInteger()` now accepts option methdo = 'metadata'. To use this option the user can add custom values of ploidy to every cell in the colData column 'ploidy' and run `calcInteger(ck, method = 'metadata')` to obtain the integer matrix on the CopyKit object. 18 | * runVst allows selection of the assay for the transformation 19 | 20 | **Changes** 21 | * `plotHeatmap()` order_cells argument now defaults to NULL. NULL option respects the order of the CopyKit object. order_cells argument can be set to 'consensus_tree' and 'hclust'. 22 | 23 | * Method 'scquantum' from `calcInteger()` adds 3 elements to the colData. 24 | 1. ploidy: contanining the inferred ploidy call for each cell 25 | 2. confidence_ratio: ratio from scquantum inferred ploidy to scquantum theoretical ploidy 26 | 3. ploidy_score: Score derived from the confidence ratio. Values closer to 0 indicate a better fit of the ploidy call 27 | 28 | * Significance thresholds for CBS alpha segmentation and Merge levels were reduced to increase sensitivity to focal amplifications. 29 | 30 | **Removed** 31 | * option 'phylogeny' from function argument `plotHeatmap()' 'order_cells' has been removed. 32 | 33 | **Bug Fixes** 34 | * Fixed error in plotGeneCopy not returning plots with geom violin and barplot. (Thanks to @Romeo1-1) 35 | * Fixed error in plotGeneCopy with duplicated sample names on a merged object. Now it warns the user of merged sample names. (Thanks to @Romeo1-1) 36 | * Allowing control of parameter merge_levels_alpha on `runVarbin()` and `runSegmentation()` to control the significance level of merge levels when merging not significant segments. 37 | 38 | 39 | # copykit 0.1.1 40 | 41 | * Reduced quality of heatmap raster that could quickly use all magick cache 42 | * Fixed hg38 scaffold issue for lower resolutions 500kb, 1Mb and 2.8Mb in which the quality control of low quality bins was too strict and causing problems especially on chromosome X. (Thanks to @Romeo1-1) 43 | 44 | # copykit 0.1.0 45 | 46 | * CopyKit goes 0.1.0. 47 | 48 | # copykit 0.0.0.9036 49 | 50 | * Added a `NEWS.md` file to track changes to the package. 51 | 52 | # copykit 0.0.0.9037 53 | 54 | * Changed values of resolution argument in `runVarbin()` to more accurately 55 | reflect the variable genomic scaffolds. 56 | Resolutions are: '55kb', '110kb', '195kb', '220kb', '280kb', '500kb', '1Mb', '2.8Kb' 57 | 58 | # copykit 0.0.0.9038 59 | 60 | * Adding PCA and PCA related functions 61 | - `runPca()` 62 | - `plotPca()` 63 | - `plotScree()` 64 | 65 | * Clustering functions can use n dimensions with either UMAP or PCA 66 | - added argument `ncomponents` to `findSuggestedK()` and `findClusters` 67 | 68 | # copykit 0.0.0.9039 69 | 70 | * Fixed a critical bug in runSegmentation to set the correct log base call 71 | during merge levels. 72 | 73 | # copykit 0.0.0.9040 74 | * Adding raster arguments to plotHeatmap(). 75 | -------------------------------------------------------------------------------- /R/AllClasses.R: -------------------------------------------------------------------------------- 1 | ### all clases for the copykit package 2 | 3 | ################################################################### 4 | # ape has no formal definition of the phylo-class 5 | # that can be used for @importClassesFrom 6 | # the solution adopted here is the same as the one adopted phyloseq package 7 | # which creates S3 and S4 placeholders for the phylo class 8 | 9 | #' @keywords internal 10 | phylo <- structure(list(), class = "phylo") 11 | 12 | #' Placeholder for the igraph class 13 | #' @exportClass igraph 14 | #' @name igraph-class 15 | #' @rdname CopyKit-class 16 | #' @keywords internal 17 | setOldClass("igraph") 18 | 19 | ################################################################### 20 | #' The CopyKit class 21 | #' 22 | #' S4 Class that extends the Bioconductor SingleCellExperiment class to hold 23 | #' single cell copy number datasets. 24 | #' 25 | #' @slot phylo Stores the single cell phylogenetic information with ape class 26 | #' phylo. 27 | #' @slot consensusPhylo Stores the consensus phylogenetic information with 28 | #' ape class phylo. 29 | #' @slot distMat Stores a distance matrix object used for graphs and heatmaps. 30 | #' @slot graph Stores an igraph object for network based clustering. 31 | #' @slot consensus stores a consensus data frame from 32 | #' \code{\link{calcConsensus}.} 33 | #' @return A CopyKit class object. 34 | #' @references The Bioconductor SingleCellExperiment Class 35 | #' DOI: 10.18129/B9.bioc.SingleCellExperiment 36 | #' @import methods 37 | #' @name CopyKit-class 38 | #' @rdname CopyKit-class 39 | #' @importMethodsFrom SummarizedExperiment colData 40 | #' @importClassesFrom SingleCellExperiment SingleCellExperiment 41 | #' @importClassesFrom SummarizedExperiment RangedSummarizedExperiment 42 | #' @importClassesFrom S4Vectors DataFrame SimpleList 43 | #' @exportClass CopyKit 44 | .CopyKit <- setClass( 45 | "CopyKit", 46 | slots = representation( 47 | phylo = "phylo", 48 | consensusPhylo = "phylo", 49 | distMat = "dist", 50 | graph = "igraph", 51 | consensus = "data.frame" 52 | ), 53 | contains = "SingleCellExperiment" 54 | ) 55 | 56 | #' @export 57 | #' @rdname CopyKit-class 58 | #' @param consensus A data frame with the consensus information. 59 | #' @param phylo A phylo object with a phylogenetic tree. 60 | #' @param consensusPhylo A phylo object with a phylogenetic consensus tree. 61 | #' @param graph A graph object with a graph made from the umap data. 62 | #' @importClassesFrom SingleCellExperiment SingleCellExperiment 63 | CopyKit <- function(consensus = data.frame(), 64 | phylo = structure(list(), class = "phylo"), 65 | consensusPhylo = structure(list(), class = "phylo"), 66 | distMat = dist(matrix(0, 0, 0)), 67 | graph = igraph::graph.empty(), 68 | ...) { 69 | cna <- 70 | SingleCellExperiment::SingleCellExperiment(...) 71 | .CopyKit( 72 | cna, 73 | phylo = phylo, 74 | consensusPhylo = consensusPhylo, 75 | distMat = distMat, 76 | graph = graph, 77 | consensus = consensus 78 | ) 79 | } 80 | -------------------------------------------------------------------------------- /R/AllGenerics.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | setGeneric("segment_ratios", function(x, ...) { 3 | standardGeneric("segment_ratios") 4 | }) 5 | 6 | #' @export 7 | setGeneric("ratios", function(x, ...) { 8 | standardGeneric("ratios") 9 | }) 10 | 11 | #' @export 12 | setGeneric("bincounts", function(x, ...) { 13 | standardGeneric("bincounts") 14 | }) 15 | 16 | #' @export 17 | setGeneric("consensus", function(x, ...) { 18 | standardGeneric("consensus") 19 | }) 20 | 21 | #' @export 22 | setGeneric("consensus<-", function(x, ..., value) { 23 | standardGeneric("consensus<-") 24 | }) 25 | 26 | #' @export 27 | setGeneric("phylo", function(x, ...) { 28 | standardGeneric("phylo") 29 | }) 30 | 31 | #' @export 32 | setGeneric("phylo<-", function(x, ..., value) { 33 | standardGeneric("phylo<-") 34 | }) 35 | 36 | #' @export 37 | setGeneric("consensusPhylo", function(x, ...) { 38 | standardGeneric("consensusPhylo") 39 | }) 40 | 41 | ##' @export 42 | setGeneric("consensusPhylo<-", function(x, ..., value) { 43 | standardGeneric("consensusPhylo<-") 44 | }) 45 | 46 | #' @export 47 | setGeneric("distMat", function(x, ...) { 48 | standardGeneric("distMat") 49 | }) 50 | 51 | #' @export 52 | setGeneric("distMat<-", function(x, ..., value) { 53 | standardGeneric("distMat<-") 54 | }) 55 | 56 | #' @export 57 | setGeneric("graph", function(x, ...) { 58 | standardGeneric("graph") 59 | }) 60 | 61 | #' @export 62 | setGeneric("graph<-", function(x, ..., value) { 63 | standardGeneric("graph<-") 64 | }) 65 | -------------------------------------------------------------------------------- /R/calcConsensus.R: -------------------------------------------------------------------------------- 1 | #' Calculate a consensus matrix of segment means based on \code{colData} 2 | #' 3 | #' @param scCNA The CopyKit object. 4 | #' @param assay String with the name of the assay to pull data from to calculate 5 | #' the consensus matrix. 6 | #' @param consensus_by A string with the column from colData that will be used 7 | #' to isolate the cells by factor and calculate the consensus. 8 | #' @param fun A string indicating the summarizing function to be used. 9 | #' @param BPPARAM A \linkS4class{BiocParallelParam} specifying how the function 10 | #' should be parallelized. 11 | #' 12 | #' @details Consensus profiles are calculated by averaging or taking the median 13 | #' of the ith segment mean of all single cells assigned to the same element of 14 | #' \link{colData}, 15 | #' 16 | #' @return A consensus matrix stored in the consensus slot of the CopyKit object 17 | #' @export 18 | #' 19 | #' @examples 20 | #' copykit_obj <- copykit_example_filtered() 21 | #' copykit_obj <- findClusters(copykit_obj) 22 | #' copykit_obj <- calcConsensus(copykit_obj) 23 | calcConsensus <- function(scCNA, 24 | assay = "segment_ratios", 25 | consensus_by = "subclones", 26 | fun = c("median", "mean"), 27 | BPPARAM = bpparam()) { 28 | fun <- match.arg(fun) 29 | 30 | if (consensus_by == "subclones" & 31 | is.null(SummarizedExperiment::colData(scCNA)$subclones)) { 32 | stop("Calculating consensus requires clusters. use findClusters(scCNA)") 33 | } 34 | 35 | if (consensus_by %!in% names(SummarizedExperiment::colData(scCNA))) { 36 | stop("consensus_by must be an element of colData(scCNA)") 37 | } 38 | 39 | if (length(consensus_by) != 1) { 40 | stop("consensus_by argument must have length == 1") 41 | } 42 | 43 | if (is.null(consensus_by)) { 44 | stop("Please provide information to consensus_by argument.") 45 | } 46 | 47 | consensus_info <- 48 | as.data.frame(SummarizedExperiment::colData(scCNA)) %>% 49 | dplyr::select(!!consensus_by) %>% 50 | droplevels() 51 | 52 | seg_data <- as.data.frame(t(SummarizedExperiment::assay(scCNA, assay))) 53 | 54 | # sanity check 55 | if (!identical(rownames(consensus_info), rownames(seg_data))) { 56 | stop("Order of elements in colData and segment_ratios must be identical.") 57 | } 58 | 59 | ## reading list with clusters 60 | long_list <- split(seg_data, consensus_info) 61 | 62 | consensus_list <- 63 | BiocParallel::bplapply(long_list, function(x) { 64 | apply(x, 2, fun) 65 | }, BPPARAM = BPPARAM) 66 | 67 | cs_df <- as.data.frame(t(do.call(rbind, consensus_list))) 68 | 69 | if (assay == "integer") { 70 | cs_df <- round(cs_df) 71 | } 72 | 73 | names(cs_df) <- names(consensus_list) 74 | 75 | # This hidden attribute will allow plotHeatmap to figure it out which 76 | # argument was used in 'consensus_by' 77 | attr(cs_df, "consensus_by") <- consensus_by 78 | 79 | # This hidden attribute will allow plotHeatmap to figure it out which 80 | # argument was used in 'assay' 81 | attr(cs_df, "consensus_assay") <- assay 82 | 83 | consensus(scCNA) <- cs_df 84 | 85 | return(scCNA) 86 | } 87 | -------------------------------------------------------------------------------- /R/calcRatios.R: -------------------------------------------------------------------------------- 1 | #' Calculates the ratios from a matrix of counts 2 | #' 3 | #' @param scCNA The scCNA object 4 | #' @param assay String with the name of the assay to pull data from to calculate 5 | #' the ratios. 6 | #' @param fun A string indicating the summarizing function to be used. 7 | #' 8 | #' @details Calculates a sample-wise normalization of the selected assay by the 9 | #' mean bin counts returns ratios where a value of 1 corresponds to the neutral 10 | #' copy number state of the sample 11 | #' 12 | #' @return A ratio matrix within the slot assay(scCNA, 'ratios') 13 | #' can be accessed with \code{ratios}. 14 | #' @export 15 | #' 16 | #' @importFrom SummarizedExperiment assay 17 | #' 18 | #' @examples 19 | #' copykit_obj <- mock_bincounts() 20 | #' copykit_obj <- calcRatios(copykit_obj) 21 | calcRatios <- function(scCNA, 22 | assay = c("ft", "bincounts", "smoothed_bincounts"), 23 | fun = c("mean", "median")) { 24 | assay <- match.arg(assay) 25 | fun <- match.arg(fun) 26 | 27 | counts <- SummarizedExperiment::assay(scCNA, assay) 28 | 29 | ratios_df <- sweep(counts, 2, apply(counts, 2, fun), "/") 30 | 31 | SummarizedExperiment::assay(scCNA, "ratios") <- round(ratios_df, 2) 32 | 33 | return(scCNA) 34 | } 35 | -------------------------------------------------------------------------------- /R/countBreakpoints.R: -------------------------------------------------------------------------------- 1 | #' countBreakpoints 2 | #' 3 | #' Considers changes in the segment ratios as breakpoints. 4 | #' Counts the breakpoints for each chromosome arm separately. 5 | #' 6 | #' @param scCNA 7 | #' 8 | #' @return The scCNA object with a column of breakpoint counts added to colData. 9 | #' @export 10 | #' 11 | #' @keywords internal 12 | #' 13 | #' @importFrom dplyr pull bind_rows mutate select 14 | #' @importFrom SummarizedExperiment rowRanges seqnames 15 | #' 16 | #' @examples 17 | #' copykit_obj <- copykit_example_filtered() 18 | #' copykit_obj <- .countBreakpoints(copykit_obj) 19 | .countBreakpoints <- function(scCNA) { 20 | 21 | # bindings for NSE 22 | arm <- chrarm <- NULL 23 | 24 | rg_chr <- SummarizedExperiment::rowRanges(scCNA) %>% 25 | as.data.frame() %>% 26 | dplyr::mutate(chrarm = paste0(seqnames, arm)) %>% 27 | dplyr::select(chrarm) 28 | 29 | dat_seg_cp <- segment_ratios(scCNA) 30 | 31 | # split by chrom 32 | message("Counting breakpoints.") 33 | dat_seg_split <- split(dat_seg_cp, dplyr::pull(rg_chr, chrarm)) 34 | 35 | brkpt_by_chrom <- 36 | lapply(dat_seg_split, function(x) { 37 | apply(x, 2, function(i) { 38 | length(rle(i)$values) - 1 39 | }) %>% 40 | unlist() 41 | }) 42 | 43 | brkpt_by_chrom_df <- dplyr::bind_rows(brkpt_by_chrom) %>% 44 | t() %>% 45 | as.data.frame() 46 | 47 | brkpt_count <- rowSums(brkpt_by_chrom_df) 48 | 49 | # making sure order is identical 50 | brkpt_count <- brkpt_count[SummarizedExperiment::colData(scCNA)$sample] 51 | 52 | SummarizedExperiment::colData(scCNA)$breakpoint_count <- 53 | brkpt_count 54 | 55 | return(scCNA) 56 | } 57 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2 | # Documentation for internal data objects 3 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 4 | 5 | #' hg38_grangeslist 6 | #' 7 | #' @name hg38_grangeslist 8 | #' @aliases hg38_grangeslist 9 | #' @docType data 10 | #' @return Contains a GrangesList object with the scaffolds for each of the 11 | #' resolutions used by runVarbin, runCountReads and runSegmentation on the 12 | #' hg38 genome assembly. 13 | #' @rdname data 14 | #' @keywords internal 15 | "hg38_grangeslist" 16 | 17 | #' hg19_rg 18 | #' 19 | #' @name hg19_rg 20 | #' @aliases hg19_rg 21 | #' @docType data 22 | #' @return Contains a GrangesList object with the scaffolds for each of the 23 | #' resolutions used by runVarbin, runCountReads and runSegmentation on the hg19 24 | #' genome assembly. 25 | #' @rdname data 26 | #' @keywords internal 27 | "hg38_grangeslist" 28 | 29 | 30 | #' copykit_obj_rle 31 | #' 32 | #' @name copykit_obj_rle 33 | #' @aliases copykit_obj_rle 34 | #' @docType data 35 | #' @details Contains a Rle object with the segment ratios values of the copykit 36 | #' BL1 example dataset. It is used by the functions copykit_example 37 | #' @rdname data 38 | #' @keywords internal 39 | "copykit_obj_filt_rle" 40 | 41 | #' copykit_obj_filt_rle 42 | #' 43 | #' @name copykit_obj_filt_rle 44 | #' @aliases copykit_obj_filt_rle 45 | #' @docType data 46 | #' @details Contains a Rle object with the segment ratios values of the copykit 47 | #' BL1 example filtered dataset. It is used by the functions 48 | #' copykit_example_filtered() 49 | #' @rdname data 50 | #' @keywords internal 51 | "copykit_obj_rle" 52 | 53 | #' copykit_obj_filt_umap 54 | #' 55 | #' @name copykit_obj_filt_umap 56 | #' @aliases copykit_obj_filt_umap 57 | #' @docType data 58 | #' @details Contains the umap reduced dimension for the BL1 dataset as generated 59 | #' for the CopyKit manuscript 60 | #' @rdname data 61 | #' @keywords internal 62 | "copykit_obj_filt_umap" 63 | 64 | #' hg19_genes 65 | #' 66 | #' @name hg19_genes 67 | #' @aliases hg19_genes 68 | #' @docType data 69 | #' @details Contains the GrangesObject for the genomic positions of genes in the 70 | #' hg19 genome assembly 71 | #' @source library(TxDb.Hsapiens.UCSC.hg19.knownGene) 72 | #' @rdname data 73 | #' @keywords internal 74 | "hg19_genes" 75 | 76 | #' hg38_genes 77 | #' 78 | #' @name hg38_genes 79 | #' @aliases hg38_genes 80 | #' @docType data 81 | #' @details Contains the GrangesObject for the genomic positions of genes in the 82 | #' hg38 genome assembly 83 | #' @source library(TxDb.Hsapiens.UCSC.hg38.knownGene) 84 | #' @rdname data 85 | #' @keywords internal 86 | "hg38_genes" 87 | -------------------------------------------------------------------------------- /R/findAneuploidCells.R: -------------------------------------------------------------------------------- 1 | #' findAneuploidCells 2 | #' 3 | #' Find cells that are not aneuploid in the dataset. 4 | #' 5 | #' @param scCNA The CopyKit object 6 | #' @param assay String with the name of the assay to pull data from to find 7 | #' normal cells. 8 | #' @param resolution A numeric scalar used as threshold to detect normal cells. 9 | #' @param remove_XY A boolean that removes chrX and chrY from the analysis. 10 | #' @param simul A boolean that if TRUE adds a simulated normal dataset to boost 11 | #' identifying normal cells in datasets with small proportions of normal cells. 12 | #' @param seed Seed passed on to reproduce simulated CV of normal cells. 13 | #' 14 | #' @details performs a sample-wise calculation of the segment means coefficient 15 | #' of variation and fits a Gaussian mixture model to the observed distribution 16 | #' from all cells. To increase the sensitivity of the model, the expected 17 | #' distribution of the coefficient of variation for diploid cells is simulated 18 | #' for a thousand cells (mean = 0, sd = 0.01). This way, CopyKit can adequately 19 | #' detect diploid cells even in datasets with limited amounts of diploid cells 20 | #' and guarantees that no aneuploid cell will be removed from datasets without 21 | #' any diploid cells. The distribution with the smallest CV 22 | #' is assumed originate from normal cells. Cells are classified as diploid 23 | #' if they have a coefficient of variance smaller than the mean plus five times 24 | #' the standard deviation of the normal cell distribution. 25 | #' 26 | #' @return information is added to \code{\link[SummarizedExperiment]{colData}} 27 | #' in a columns named 'is_aneuploid' being TRUE if a cell is detected as 28 | #' aneuploid and FALSE if the cell is detected as euploid. 29 | #' 30 | #' @export 31 | #' 32 | #' @importFrom SummarizedExperiment colData 33 | #' @importFrom dplyr filter bind_rows case_when 34 | #' @importFrom mixtools normalmixEM 35 | #' @importFrom stats rnorm sd 36 | #' 37 | #' @examples 38 | #' set.seed(1000) 39 | #' copykit_obj <- copykit_example()[,sample(500)] 40 | #' copykit_obj <- findAneuploidCells(copykit_obj) 41 | findAneuploidCells <- function(scCNA, 42 | assay = "segment_ratios", 43 | resolution = "auto", 44 | remove_XY = TRUE, 45 | simul = TRUE, 46 | seed = 17) { 47 | 48 | # bindings for NSE (non-standard evaluation) 49 | is_aneuploid <- NULL 50 | 51 | if (remove_XY == FALSE & simul == TRUE) { 52 | stop("Argument simul can't be used if remove_XY == FALSE.") 53 | } 54 | 55 | if (resolution != "auto" & !is.numeric(resolution)) { 56 | stop("Resolution must be of class numeric") 57 | } 58 | 59 | # retrieving data 60 | rg <- as.data.frame(SummarizedExperiment::rowRanges(scCNA)) 61 | seg <- SummarizedExperiment::assay(scCNA, assay) 62 | ncells <- ncol(scCNA) 63 | 64 | if (remove_XY == TRUE) { 65 | rg <- rg %>% 66 | dplyr::filter( 67 | !grepl("X", seqnames), 68 | !grepl("Y", seqnames) 69 | ) 70 | 71 | seg <- seg[1:nrow(rg), ] 72 | } 73 | 74 | # calculating the coefficient of variation 75 | cv <- vapply( 76 | seg, function(z) { 77 | sd(z) / mean(z) 78 | }, 79 | numeric(1) 80 | ) 81 | 82 | if (simul == TRUE) { 83 | withr::with_seed(seed, 84 | cv_simul <- rnorm(ncells, 85 | mean = 0, 86 | sd = 0.01 87 | ) 88 | ) 89 | 90 | names(cv_simul) <- paste0("simul", 1:length(cv_simul)) 91 | 92 | cv <- c(cv_simul, cv) 93 | } 94 | 95 | if (resolution == "auto") { 96 | fit <- tryCatch( 97 | withr::with_seed(seed, mixtools::normalmixEM(cv)), 98 | error = function(e) { 99 | message("Could not identify aneuploid cells in the dataset.") 100 | message("Marking all cells as diploid.") 101 | message("Check colData(scCNA)$find_normal_cv.") 102 | return("error") 103 | } 104 | ) 105 | 106 | # determining resolution 107 | if (length(fit) > 1) { 108 | resolution <- fit$mu[1] + 5 * fit$sigma[1] 109 | } else { 110 | resolution <- 1 111 | } 112 | } 113 | 114 | if (simul == TRUE) { 115 | cv <- cv[!grepl("simul", names(cv))] 116 | } 117 | 118 | cv_df <- data.frame(sample = names(cv), 119 | CV = cv) 120 | 121 | cv_df_low_cv <- cv_df %>% 122 | dplyr::mutate(is_aneuploid = case_when( 123 | CV > resolution ~ TRUE, 124 | TRUE ~ FALSE 125 | )) 126 | 127 | message( 128 | "Copykit detected ", 129 | nrow(cv_df_low_cv %>% 130 | dplyr::filter(is_aneuploid == FALSE)), 131 | " that are possibly diploid cells using a resolution of: ", 132 | round(resolution, 3) 133 | ) 134 | 135 | # reordering info to add to metadata 136 | info <- 137 | cv_df_low_cv[match( 138 | SummarizedExperiment::colData(scCNA)$sample, 139 | cv_df_low_cv$sample 140 | ), ] 141 | 142 | SummarizedExperiment::colData(scCNA)$is_aneuploid <- info$is_aneuploid 143 | SummarizedExperiment::colData(scCNA)$find_normal_cv <- 144 | round(info$CV, 2) 145 | 146 | message("Added information to colData(CopyKit).") 147 | 148 | return(scCNA) 149 | } 150 | -------------------------------------------------------------------------------- /R/findOutliers.R: -------------------------------------------------------------------------------- 1 | #' findOutliers() 2 | #' 3 | #' Uses a nearest neighbor approach to find noise copy number profiles within 4 | #' the segment means. 5 | #' 6 | #' @author Hua-Jun Wu 7 | #' @author Darlan Conterno Minussi 8 | #' @author Junke Wang 9 | #' 10 | #' @param scCNA CopyKit object. 11 | #' @param assay String with the name of the assay to pull data. 12 | #' @param k A numeric scalar with the number k-nearest-neighbor cells to 13 | #' calculate the mean correlation 14 | #' @param resolution A numeric scalar that set's how strict the 15 | #' correlation cut off will be. 16 | #' @param BPPARAM A \linkS4class{BiocParallelParam} specifying how the function 17 | #' should be parallelized. 18 | #' 19 | #' @details \code{findOutliers} To detect low-quality cells, CopyKit calculates 20 | #' the Pearson correlation matrix of all samples from the segment ratio means. 21 | #' Next, we calculate a sample-wise mean of the correlation between a sample 22 | #' and its k-nearest-neighbors. Samples in which the correlation value is lower 23 | #' than the defined threshold are classified as low-quality cells. 24 | #' 25 | #' @return Adds a column 'outlier' to 26 | #' \code{\link[SummarizedExperiment]{colData}}. Cells that pass the filtering 27 | #' criteria receive the label "kept", whereas cells that do not pass the 28 | #' filtering criteria receive the label "removed". 29 | #' 30 | #' @importFrom stats cor sd 31 | #' 32 | #' @export 33 | #' 34 | #' @examples 35 | #' set.seed(1000) 36 | #' copykit_obj <- copykit_example()[,sample(500)] 37 | #' copykit_obj <- findAneuploidCells(copykit_obj) 38 | #' copykit_obj <- copykit_obj[, colData(copykit_obj)$is_aneuploid == TRUE] 39 | #' copykit_obj <- findOutliers(copykit_obj) 40 | findOutliers <- function(scCNA, 41 | assay = "segment_ratios", 42 | k = 5, 43 | resolution = 0.9, 44 | BPPARAM = BiocParallel::bpparam()) { 45 | if (!is.numeric(resolution)) { 46 | stop("Resolution needs to be a number between 0 and 1") 47 | } 48 | 49 | if (resolution < 0 || resolution > 1) { 50 | stop("Resolution needs to be a number between 0 and 1") 51 | } 52 | 53 | seg <- SummarizedExperiment::assay(scCNA, assay) 54 | 55 | message("Calculating correlation matrix.") 56 | 57 | # correction to avoid correlations calculations with standard deviation zero 58 | zero_sd_idx <- which(apply(seg, 2, sd) == 0) 59 | 60 | if (length(zero_sd_idx) >= 1) { 61 | seg[1, zero_sd_idx] <- seg[1, zero_sd_idx] + 1e-3 62 | } 63 | 64 | # calculating correlations 65 | 66 | dst <- parCor(seg, BPPARAM = BPPARAM) 67 | 68 | dst_knn <- apply(as.matrix(dst), 1, function(x) { 69 | mean(sort(x, decreasing = TRUE)[2:(k + 1)]) 70 | }) 71 | 72 | dst_knn_df <- data.frame(sample = names(dst_knn), 73 | cor = dst_knn) 74 | 75 | dst_knn_df <- dst_knn_df %>% 76 | dplyr::mutate(outlier = dplyr::case_when( 77 | cor >= resolution ~ "FALSE", 78 | cor < resolution ~ "TRUE" 79 | )) 80 | 81 | n_filtered <- table(dst_knn_df$outlier)["TRUE"] 82 | message("Marked ", n_filtered, " cells as outliers.") 83 | 84 | message( 85 | "Adding information to metadata. Access with colData(scCNA)." 86 | ) 87 | if (identical( 88 | SummarizedExperiment::colData(scCNA)$sample, 89 | dst_knn_df$sample 90 | )) { 91 | SummarizedExperiment::colData(scCNA)$cell_corr_value <- 92 | round(dst_knn_df$cor, 3) 93 | SummarizedExperiment::colData(scCNA)$outlier <- 94 | dst_knn_df$outlier 95 | } else { 96 | stop("Sample names do not match colData info. Check colData(scCNA).") 97 | } 98 | 99 | message("Done.") 100 | return(scCNA) 101 | } 102 | -------------------------------------------------------------------------------- /R/findVariableGenes.R: -------------------------------------------------------------------------------- 1 | #' findVariableGenes 2 | #' 3 | #' Find the most variable genes in the dataset. 4 | #' 5 | #' @param scCNA scCNA object. 6 | #' @param genes A vector of strings containing the HUGO Symbol for the gene 7 | #' of interest. 8 | #' @param assay String with the name of the assay to pull data with the copy 9 | #' number states for each gene. 10 | #' @param top_n A numeric defining how many variable genes will be returned. 11 | #' 12 | #' @return A string vector with the HUGO genes in decreasing order of importance 13 | #' stored to the \code{\link[S4Vectors]{metadata}}. 14 | #' 15 | #' @details \code{findVariableGenes} Runs \code{\link[stats]{prcomp}} to the 16 | #' copy number states of the genes from the provided gene list and returns 17 | #' the one that have the largest absolute variance as assesed by the 18 | #' loadings of the first principal component. 19 | #' 20 | #' The resulting list of genes is stored within the metadata of the scCNA 21 | #' object and can be accessed with \code{\link[S4Vectors]{metadata}}. 22 | #' 23 | #' 24 | #' @importFrom SummarizedExperiment rowRanges 25 | #' @importFrom BiocGenerics subset 26 | #' @importFrom S4Vectors metadata subjectHits queryHits 27 | #' @importFrom stats prcomp 28 | #' @importFrom GenomicRanges findOverlaps 29 | #' 30 | #' @export 31 | #' 32 | #' @examples 33 | #' copykit_obj <- copykit_example_filtered() 34 | #' copykit_obj <- findVariableGenes(copykit_obj, 35 | #' genes = c("FHIT", "PTEN", "FOXO1", "BRCA1") 36 | #' ) 37 | findVariableGenes <- function(scCNA, 38 | genes, 39 | assay = "logr", 40 | top_n = 50) { 41 | # checks 42 | if (top_n > length(genes)) { 43 | top_n <- length(genes) 44 | } 45 | 46 | # obtaining df with genes positions 47 | # find_scaffold_genes in internals.R 48 | df <- find_scaffold_genes(scCNA, 49 | genes = genes 50 | ) 51 | 52 | # obtaining data and subsetting 53 | seg_data <- SummarizedExperiment::assay(scCNA, assay) 54 | seg_data_genes <- seg_data[df$pos, ] 55 | 56 | rownames(seg_data_genes) <- df$gene 57 | 58 | # running principal component analysis 59 | pca_obj <- prcomp(t(seg_data_genes)) 60 | 61 | pca_df <- data.frame( 62 | gene = names(pca_obj$rotation[, 1]), 63 | p1 = pca_obj$rotation[, 1] 64 | ) 65 | 66 | pca_df <- pca_df[order(abs(pca_df$p1), decreasing = TRUE), ] 67 | 68 | hvg <- as.character(pca_df$gene[1:top_n]) 69 | 70 | attr(hvg, "pca_pc1_loading") <- pca_obj$rotation[, 1] 71 | attr(hvg, "pca_df") <- pca_df 72 | 73 | S4Vectors::metadata(scCNA)$hvg <- hvg 74 | 75 | return(scCNA) 76 | } 77 | -------------------------------------------------------------------------------- /R/inferMrca.R: -------------------------------------------------------------------------------- 1 | #' inferMrca 2 | #' 3 | #' From the consensus matrix it infers a Most Recent Common Ancestral (MRCA) 4 | #' across all groups. 5 | #' 6 | #' @param scCNA the scCNA object 7 | #' @param value A numeric value used to compare the profiles to infer the mrca, 8 | #' usually equal to the mean segment ratio of cells (value = 1) or the average 9 | #' copy number of the cells 10 | #' 11 | #' @details Calculates the MRCA by inferring, for every bin, the value across 12 | #' all groups that is closest to the number supplied in the argument value. 13 | #' 14 | #' @return Returns a numeric vector added to the \code{\link[S4Vectors]{metadata}} 15 | #' of the scCNA object named `inferred_mrca` 16 | #' @export 17 | #' 18 | #' @importFrom SummarizedExperiment seqnames 19 | #' 20 | #' @examples 21 | #' copykit_obj <- copykit_example_filtered()[,1:300] 22 | #' copykit_obj <- findClusters(copykit_obj) 23 | #' copykit_obj <- calcConsensus(copykit_obj) 24 | #' copykit_obj <- inferMrca(copykit_obj) 25 | inferMrca <- function(scCNA, 26 | value = 1) { 27 | if (nrow(consensus(scCNA)) == 0) { 28 | stop("Consensus slot is empty. run calcConsensus().") 29 | } 30 | 31 | consensus_df <- as.data.frame(t(consensus(scCNA))) 32 | 33 | anc_profile <- apply( 34 | consensus_df, 35 | 2, 36 | function(x) x[which.min(abs(x - value))] 37 | ) 38 | 39 | metadata(scCNA)$inferred_mrca <- anc_profile 40 | 41 | return(scCNA) 42 | } 43 | -------------------------------------------------------------------------------- /R/knnSmooth.R: -------------------------------------------------------------------------------- 1 | #' knnSmooth 2 | #' 3 | #' Smooth bincounts based on k nearest neighbors. 4 | #' 5 | #' @author Darlan Conterno Minussi 6 | #' @author Runmin Wei 7 | #' 8 | #' @param scCNA The CopyKit object. 9 | #' @param k A numeric with the k nearest neighbor value for smoothing 10 | #' @param BPPARAM A \linkS4class{BiocParallelParam} specifying how the function 11 | #' should be parallelized. 12 | #' 13 | #' @return The CopyKit object with an assay smoothed_bincounts 14 | #' 15 | #' @details This function uses a k-nearest neighbors approach to smooth cells 16 | #' raw bincounts. To do so, the k-nearest neighbors are calculated with 17 | #' \code{\link[BiocNeighbors]{findKNN}}. The bincounts of the k-nearest neighbors 18 | #' for each cell are tallied and an assay called smoothed_bincounts is added to 19 | #' \code{\link{assay}}. After, \code{\link{runVst}} and 20 | #' \code{\link{runSegmentation}}. Are re-run by \code{knnSmooth}. 21 | #' 22 | #' This function results in a trade-off for the elimination of noise at the cost 23 | #' of risk of loss of subclonal structure. To minimize the risk of subclonal 24 | #' structure loss we recommend using the very small values of k. 25 | #' 26 | #' This function should be followed by applying \code{\link{runVst}} and 27 | #' \code{\link{runSegmentation}} to the CopyKit object. 28 | #' 29 | #' @importFrom BiocNeighbors findKNN 30 | #' 31 | #' @export 32 | #' 33 | #' @examples 34 | #' copykit_obj <- mock_bincounts(ncells = 10) 35 | #' copykit_obj <- runSegmentation(copykit_obj) 36 | #' copykit_obj <- knnSmooth(copykit_obj) 37 | #' 38 | #' 39 | knnSmooth <- function(scCNA, 40 | k = 4, 41 | BPPARAM = bpparam()) { 42 | # setup data 43 | bin <- bincounts(scCNA) 44 | seg <- segment_ratios(scCNA) 45 | 46 | # finding neighbors 47 | message("Finding neighbors.") 48 | neighbors <- BiocNeighbors::findKNN(t(seg), k = k) 49 | 50 | message(paste("Smoothing cells using k =", k)) 51 | # collect neighbors and sum counts 52 | smoothed_bins_list <- bplapply(seq_along(bin), function(i) { 53 | cells_neighbors_df <- bin[c(i, neighbors$index[i,])] 54 | smoothed_cell <- rowSums(cells_neighbors_df) 55 | smoothed_cell 56 | }) 57 | 58 | # re-adding names 59 | names(smoothed_bins_list) <- colnames(scCNA) 60 | 61 | smoothed_bins_df <- as.data.frame(do.call(cbind, 62 | smoothed_bins_list)) 63 | 64 | # adding knn smoothed bins to assay and re-running Vst 65 | assay(scCNA, 'smoothed_bincounts') <- smoothed_bins_df 66 | 67 | # re-running vst and segmentation 68 | scCNA <- runVst(scCNA, assay = 'smoothed_bincounts') 69 | scCNA <- runSegmentation(scCNA) 70 | 71 | message("Replacing segment_ratios assay.") 72 | message("Replacing logr assay.") 73 | 74 | # re-normalizing 75 | scCNA <- logNorm(scCNA) 76 | 77 | message("Done.") 78 | 79 | return(scCNA) 80 | 81 | } 82 | -------------------------------------------------------------------------------- /R/logNorm.R: -------------------------------------------------------------------------------- 1 | #' logNorm() 2 | #' 3 | #' Computes a log transformation of the selected assay 4 | #' 5 | #' @param scCNA scCNA object. 6 | #' @param transform String specifying the transformation to apply to the selected 7 | #' assay. 8 | #' @param assay String with the name of the assay to pull data from to run the 9 | #' segmentation. 10 | #' @param name String with the name for the target slot for the resulting 11 | #' transformed counts. 12 | #' 13 | #' @return A data frame with log transformed counts inside the 14 | #' \code{\link[SummarizedExperiment]{assay}} slot. 15 | #' 16 | #' @importFrom SummarizedExperiment assay 17 | #' 18 | #' @export 19 | #' 20 | #' @examples 21 | #' copykit_obj <- copykit_example() 22 | #' copykit_obj <- logNorm(copykit_obj) 23 | logNorm <- function(scCNA, 24 | transform = c("log", "log2", "log10", "log1p"), 25 | assay = "segment_ratios", 26 | name = "logr") { 27 | transform <- match.arg(transform) 28 | 29 | # obtaining data 30 | seg_ratios <- SummarizedExperiment::assay(scCNA, assay) 31 | 32 | # saving logr 33 | seg_ratios[seg_ratios == 0] <- 1e-3 34 | 35 | if (transform == "log") { 36 | seg_ratios_logr <- log(seg_ratios) 37 | } else if (transform == "log2") { 38 | seg_ratios_logr <- log2(seg_ratios) 39 | } else if (transform == "log1p") { 40 | seg_ratios_logr <- log1p(seg_ratios) 41 | } else if (transform == "log10") { 42 | seg_ratios_logr <- log10(seg_ratios) 43 | } 44 | 45 | SummarizedExperiment::assay(scCNA, name) <- round(seg_ratios_logr, 2) 46 | 47 | return(scCNA) 48 | } 49 | -------------------------------------------------------------------------------- /R/plotAlluvial.R: -------------------------------------------------------------------------------- 1 | #' plotAlluvial() 2 | #' 3 | #' Produces an alluvial plot from character elements of the metadata 4 | #' 5 | #' @param scCNA The CopyKit object. 6 | #' @param label A string with two or more elements from \code{\link[SummarizedExperiment]{colData}}. 7 | #' @param label_colors An optional named vector with the colors of each element 8 | #' from label. 9 | #' @param min_cells An optional numeric to filter stratum that do not reach 10 | #' the minimum amount of cells. 11 | #' 12 | #' @return A ggplot object containing an alluvial plot from ggalluvial 13 | #' 14 | #' @import ggalluvial 15 | #' @import ggplot2 16 | #' @importFrom ggalluvial stat_stratum geom_stratum 17 | #' @importFrom SummarizedExperiment colData 18 | #' @importFrom dplyr all_of across group_by count filter 19 | #' @importFrom scales hue_pal 20 | #' 21 | #' @export 22 | #' 23 | #' @examples 24 | #' copykit_obj <- copykit_example_filtered() 25 | #' copykit_obj <- findClusters(copykit_obj) 26 | #' colData(copykit_obj)$section <- stringr::str_extract( 27 | #' colData(copykit_obj)$sample, 28 | #' "(L[0-9]+L[0-9]+|L[0-9]+)" 29 | #' ) 30 | #' plotAlluvial(copykit_obj, label = c("subclones", "section")) 31 | plotAlluvial <- function(scCNA, 32 | label, 33 | label_colors = NULL, 34 | min_cells = NULL) { 35 | 36 | # bindings for NSE 37 | group <- cohort <- NULL 38 | 39 | # thanks for error solving from SO user twedl: 40 | # https://stackoverflow.com/a/53798038 41 | StatStratum <- ggalluvial::StatStratum 42 | 43 | meta <- as.data.frame(colData(scCNA)) 44 | 45 | # check 46 | if (all(is.numeric(meta[label]))) { 47 | stop("label argument must not contain numeric columns.") 48 | } 49 | 50 | # calculating frequencies across labels 51 | alluvial_dat <- meta %>% 52 | dplyr::group_by(dplyr::across(dplyr::all_of(label))) %>% 53 | dplyr::count() %>% 54 | ggalluvial::to_lodes_form( 55 | key = "class", 56 | value = "group", 57 | id = "cohort", 58 | axes = 1:length(label) 59 | ) 60 | 61 | if (!is.null(min_cells)) { 62 | alluvial_dat <- alluvial_dat %>% 63 | dplyr::filter(n > min_cells) 64 | } 65 | 66 | # managing colors 67 | if (is.null(label_colors)) { 68 | # defaults 69 | label_colors <- c( 70 | superclones_pal(), 71 | subclones_pal(), 72 | c( 73 | "removed" = "#DA614D", 74 | "kept" = "#5F917A" 75 | ), 76 | c( 77 | "TRUE" = "#396DB3", 78 | "FALSE" = "#11181D" 79 | ) 80 | ) 81 | 82 | # non defaults 83 | non_default <- label[label %!in% c( 84 | "superclones", 85 | "subclones", 86 | "is_aneuploid", 87 | "outlier" 88 | )] 89 | 90 | non_default_colors <- vector(mode = "list") 91 | 92 | for (i in seq_along(non_default)) { 93 | # luminescence and brightness 94 | l <- 65 95 | h <- 15 96 | 97 | groups_label <- unique(meta[[non_default[i]]]) 98 | 99 | non_default_colors[[i]] <- 100 | structure(scales::hue_pal( 101 | h = c(0, 360) + h, 102 | l = l 103 | )(length(groups_label)), 104 | names = groups_label 105 | ) 106 | names(non_default_colors)[i] <- non_default[i] 107 | 108 | l <- l - 10 109 | h <- h + 15 110 | } 111 | 112 | label_colors <- 113 | c(label_colors, unlist(unname(non_default_colors))) 114 | } 115 | 116 | # plot 117 | p <- ggplot( 118 | data = alluvial_dat, 119 | aes( 120 | x = class, 121 | stratum = group, 122 | alluvium = cohort, 123 | y = n 124 | ) 125 | ) + 126 | ggalluvial::geom_flow( 127 | aes(fill = group), 128 | stat = "alluvium", 129 | color = "black", 130 | alpha = .7, 131 | width = 1 / 8 132 | ) + 133 | ggalluvial::geom_stratum(aes(fill = group), color = "black", width = 1 / 8) + 134 | geom_text(stat = StatStratum, aes(label = group)) + 135 | theme_void() + 136 | theme( 137 | legend.position = "none", 138 | axis.text.x = element_text(color = "black") 139 | ) + 140 | scale_fill_manual( 141 | values = label_colors, 142 | limits = force 143 | ) 144 | 145 | # return plot 146 | p 147 | } 148 | -------------------------------------------------------------------------------- /R/plotPca.R: -------------------------------------------------------------------------------- 1 | #' plotPca 2 | #' 3 | #' Plots PCA embedding stored in \code{\link[SingleCellExperiment]{reducedDim}} 4 | #' slot. 5 | #' 6 | #' @author Darlan Conterno Minussi 7 | #' 8 | #' @param scCNA The CopyKit object. 9 | #' @param embedding String with the name of the reducedDim to pull data from. 10 | #' @param label A string with the elements from 11 | #' \code{\link[SummarizedExperiment]{colData}} to color the umap points. 12 | #' 13 | #' @details A reduced dimension representation with UMAP in the slot 14 | #' \code{\link[SingleCellExperiment]{reducedDim}} from the scCNA object. 15 | #' 16 | #' Columns from \code{\link[SummarizedExperiment]{colData}} can 17 | #' be used as an argument for 'label' to color the points on the plot. 18 | #' 19 | #' @return A ggplot object containing the reduced dimensions UMAP plot. 20 | #' 21 | #' @export 22 | #' 23 | #' @importFrom ggnewscale new_scale_color 24 | #' @import ggplot2 25 | #' @examples 26 | #' set.seed(1000) 27 | #' copykit_obj <- copykit_example_filtered()[,sample(300)] 28 | #' copykit_obj <- runPca(copykit_obj) 29 | #' 30 | #' plotPca(copykit_obj) 31 | #' 32 | #' copykit_obj <- findClusters(copykit_obj) 33 | #' 34 | #' plotPca(copykit_obj, label = "subclones") 35 | #' 36 | 37 | plotPca <- function(scCNA, 38 | embedding = "PCA", 39 | label = NULL) { 40 | 41 | 42 | # bindings for NSE objects 43 | PC1 <- PC2 <- NULL 44 | 45 | message("Plotting PCA.") 46 | 47 | # retrieving data 48 | df <- as.data.frame(SummarizedExperiment::colData(scCNA)) 49 | pca_df <- as.data.frame(SingleCellExperiment::reducedDim(scCNA, embedding)) 50 | 51 | # check if label exists 52 | if (!is.null(label) && !(label %in% colnames(df))) { 53 | stop("Label ", label, " is not a column of the scCNA object.") 54 | } 55 | 56 | if (!is.null(label)) { 57 | message("Coloring by: ", label, ". ") 58 | } 59 | 60 | # theme setup 61 | my_theme <- list( 62 | ggplot2::theme( 63 | axis.title.x = element_text(size = 14), 64 | axis.text.x = element_text(size = 12), 65 | axis.title.y = element_text(size = 14), 66 | axis.text.y = element_text(size = 12), 67 | axis.line = element_blank(), 68 | panel.border = element_rect(color = "black", fill = NA), 69 | legend.position = "right", 70 | legend.text = element_text(size = 14) 71 | ), 72 | xlab("PC1"), 73 | ylab("PC2") 74 | ) 75 | 76 | # Base plot 77 | p <- ggplot(pca_df, aes(PC1, PC2)) + 78 | theme_classic() + 79 | my_theme 80 | 81 | if (is.null(label)) { 82 | p <- p + 83 | geom_point() 84 | 85 | # return plot 86 | return(p) 87 | } 88 | 89 | if (all(label == "subclones")) { 90 | p <- p + 91 | geom_point(aes(fill = as.factor( 92 | SummarizedExperiment::colData(scCNA)$subclones 93 | )), 94 | size = 2.5, 95 | shape = 21, 96 | stroke = 0.1 97 | ) + 98 | scale_fill_manual( 99 | values = subclones_pal(), 100 | name = "subclones", 101 | limits = force 102 | ) 103 | 104 | # return plot 105 | return(p) 106 | } 107 | 108 | if (all(label == "superclones")) { 109 | p <- p + 110 | geom_point(aes(fill = as.factor( 111 | SummarizedExperiment::colData(scCNA)$superclones 112 | )), 113 | size = 2.5, 114 | shape = 21 115 | ) + 116 | scale_fill_manual( 117 | values = superclones_pal(), 118 | name = "superclones", 119 | limits = force 120 | ) 121 | 122 | # return plot 123 | return(p) 124 | } 125 | 126 | if ("subclones" %in% label && "superclones" %in% label) { 127 | p <- p + 128 | geom_point( 129 | aes( 130 | x = PC1, 131 | y = PC2, 132 | color = SummarizedExperiment::colData(scCNA)$superclones 133 | ), 134 | alpha = 1, 135 | size = 5 136 | ) + 137 | scale_color_manual( 138 | values = superclones_pal(), 139 | name = "superclones", 140 | limits = force 141 | ) + 142 | ggnewscale::new_scale_color() + 143 | geom_point(aes( 144 | x = PC1, 145 | y = PC2, 146 | fill = as.factor(SummarizedExperiment::colData(scCNA)$subclones) 147 | ), 148 | size = 2.5, 149 | shape = 21, 150 | stroke = 0.1 151 | ) + 152 | scale_fill_manual( 153 | values = subclones_pal(), 154 | name = "subclones", 155 | limits = force 156 | ) 157 | 158 | # return plot 159 | return(p) 160 | } 161 | 162 | if (!is.null(label) && !("subclones" %in% label && "superclones" %in% label)) { 163 | if (length(label) > 1) { 164 | stop("Label must be of length 1.") 165 | } 166 | 167 | lab <- dplyr::pull(df, 168 | var = label 169 | ) 170 | 171 | p <- p + 172 | geom_point(aes(fill = lab), 173 | size = 2.5, 174 | shape = 21, 175 | stroke = 0.1 176 | ) + 177 | theme_classic() + 178 | labs(fill = label) + 179 | my_theme 180 | 181 | # coloring by continuos variable 182 | if (is.numeric(lab)) { 183 | p <- p + 184 | geom_point(aes(fill = lab), 185 | size = 2.5, 186 | shape = 21, 187 | stroke = 0.1 188 | ) + 189 | ggplot2::scale_fill_viridis_c() 190 | } 191 | 192 | # return plot 193 | p 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /R/plotScree.R: -------------------------------------------------------------------------------- 1 | #' plotScree 2 | #' 3 | #' Plots the variance explained by the different principal components 4 | #' 5 | #' @param scCNA The CopyKit object 6 | #' @param ncomponents Number of principal components to plot. 7 | #' 8 | #' @return A ggplot object with The variance explained per principal component. 9 | #' @export 10 | #' 11 | #' @importFrom scales percent_format 12 | #' 13 | #' @examples 14 | #' set.seed(1000) 15 | #' copykit_obj <- copykit_example_filtered()[,sample(400)] 16 | #' copykit_obj <- runPca(copykit_obj) 17 | #' plotScree(copykit_obj) 18 | #' 19 | plotScree <- function(scCNA, 20 | ncomponents = 20) { 21 | 22 | # Bindings for NSE 23 | pcacomponents <- varexp <- NULL 24 | 25 | # sdev attribute is saved with the PCA redDim 26 | sdev <- attr(reducedDim(scCNA, "PCA"), 'var_explained') 27 | 28 | # Calculating variance explained 29 | ve <- sdev / sum(sdev) 30 | 31 | # Data frame for plotting 32 | df <- data.frame(pcacomponents = 1:ncomponents, 33 | varexp = ve[1:ncomponents]) 34 | 35 | p <- ggplot(df, aes(x = pcacomponents, 36 | y = varexp)) + 37 | geom_point() + 38 | theme_classic() + 39 | theme(axis.text.x = element_text(size = 14), 40 | axis.text.y = element_text(size = 14), 41 | axis.title = element_text(size = 16)) + 42 | labs(x = "PCA components", 43 | y = 'variance explained') + 44 | scale_y_continuous(labels = scales::percent_format()) 45 | 46 | p 47 | 48 | } 49 | -------------------------------------------------------------------------------- /R/plotSuggestedK.R: -------------------------------------------------------------------------------- 1 | #' plotSuggestedK 2 | #' 3 | #' Uses the information from \code{\link{findSuggestedK}} to plot the values 4 | #' of jaccard similarity from the tested k range on \code{\link{findSuggestedK}}. 5 | #' 6 | #' @param scCNA The scCNA object. 7 | #' @param geom A character with the geom to be used for plotting. 8 | #' 9 | #' @details \code{\link{plotSuggestedK}} access the \code{\link[S4Vectors]{metadata}} 10 | #' element suggestedK_df that is saved to the scDNA object after running 11 | #' \code{\link{findSuggestedK}}. The dataframe is used for plotting either a 12 | #' heatmap, when the argument geom = 'tile', or a dotplot when argument geom = 13 | #' 'dotplot' or a boxplot when geom = 'boxplot'. 14 | #' 15 | #' \itemize{ 16 | #' \item{geom = 'boxplot':} Plots a boxplot of the jaccard similarities across 17 | #' all clusters detected in the grid search. The large red points represent 18 | #' the mean jaccard similarity. 19 | #' 20 | #' #' \item{geom = 'tile':} Plots a heatmap of the jaccard similarities across 21 | #' all clusters detected in the grid search. The filling collors represent the 22 | #' jaccard similarity value. Rows represent clusters and columns the k value. 23 | #' 24 | #' #' \item{geom = 'dotplot':} Plots a dotplot of the jaccard similarities across 25 | #' all clusters detected in the grid search. Where the size of the dots represent 26 | #' the jaccard similarity for each assesed k value. 27 | #' 28 | #' #' \item{geom = 'scatterplot':} Plots a scatterplot of the jaccard similarity 29 | #' explained by the number of cells. Points are colored by subclone and lines 30 | #' represent a linear regression across the points. 31 | #' 32 | #' } 33 | #' 34 | #' @return A ggplot2 object with the plot of different tested k values and their 35 | #' jaccard similarity for each subclone 36 | #' 37 | #' @export 38 | #' 39 | #' @import ggplot2 40 | #' @importFrom dplyr mutate 41 | #' @importFrom tidyr complete 42 | #' @importFrom S4Vectors metadata 43 | #' @importFrom gtools mixedsort 44 | #' 45 | #' @examples 46 | #' copykit_obj <- copykit_example_filtered() 47 | #' copykit_obj <- findSuggestedK(copykit_obj) 48 | #' plotSuggestedK(copykit_obj) 49 | plotSuggestedK <- function(scCNA, 50 | geom = c("boxplot", "tile", "dotplot", "scatterplot")) { 51 | geom <- match.arg(geom) 52 | 53 | # bindings for NSE objects 54 | k <- subclones <- bootmean <- chosen <- mean_jac <- n_cells <- NULL 55 | 56 | df <- S4Vectors::metadata(scCNA)$suggestedK_df 57 | sug_k <- S4Vectors::metadata(scCNA)$suggestedK 58 | 59 | df <- dplyr::mutate(df, k = as.character(k)) 60 | 61 | # df expanded for geom tile 62 | df_exp <- tidyr::complete( 63 | df, 64 | k, 65 | subclones 66 | ) 67 | 68 | # common layers 69 | common_layers <- list( 70 | scale_y_discrete(limits = gtools::mixedsort(unique(df$subclones))), 71 | scale_x_discrete(limits = gtools::mixedsort(unique(df$k))), 72 | theme_classic(), labs(fill = "jaccard\nsimilarity") 73 | ) 74 | 75 | if (geom == "dotplot") { 76 | p <- ggplot(df, aes(k, subclones)) + 77 | geom_point(aes( 78 | size = bootmean, 79 | fill = bootmean 80 | ), 81 | shape = 21 82 | ) + 83 | scale_fill_viridis_c(option = 2) + 84 | common_layers 85 | } 86 | 87 | if (geom == "tile") { 88 | p <- ggplot(df_exp, aes(k, subclones)) + 89 | geom_tile(aes(fill = bootmean), color = "black") + 90 | scale_fill_viridis_c(na.value = "grey", option = 2) + 91 | common_layers + 92 | theme(panel.border = element_rect(fill = NA, size = 3)) 93 | } 94 | 95 | if (geom == "boxplot") { 96 | mean_per_k <- df %>% 97 | dplyr::group_by(k) %>% 98 | dplyr::summarise(mean_jac = mean(bootmean)) 99 | 100 | # adding color for chosen k 101 | df <- dplyr::mutate(df, chosen = ifelse(df$k == as.character(sug_k), 102 | TRUE, 103 | FALSE 104 | )) 105 | 106 | p <- ggplot() + 107 | geom_boxplot( 108 | data = df, aes(k, bootmean, fill = chosen), 109 | alpha = 1 110 | ) + 111 | geom_point( 112 | data = mean_per_k, aes(k, mean_jac), 113 | fill = "red", 114 | shape = 21, 115 | size = 3 116 | ) + 117 | scale_fill_manual(values = c("TRUE" = "khaki", "FALSE" = "grey90")) + 118 | scale_x_discrete(limits = gtools::mixedsort(unique(df$k))) + 119 | theme_classic() + 120 | theme(legend.position = "none", 121 | axis.title.x = element_text(size = 14), 122 | axis.text.x = element_text(size = 12), 123 | axis.title.y = element_text(size = 14), 124 | axis.text.y = element_text(size = 12)) + 125 | labs(y = "jaccard similarity") 126 | } 127 | 128 | if (geom == "scatterplot") { 129 | p <- ggplot(df, aes(x = n_cells, y = bootmean)) + 130 | geom_point(aes(fill = subclones), shape = 21) + 131 | stat_smooth(method = "lm", se = FALSE) + 132 | facet_wrap(vars(as.numeric(k)), scales = "free_x") + 133 | scale_fill_manual( 134 | values = subclones_pal(), 135 | limits = gtools::mixedsort(unique(df$subclones)) 136 | ) + 137 | theme_classic() + 138 | labs( 139 | x = "number of cells", 140 | y = "jaccard similarity" 141 | ) 142 | } 143 | 144 | # return plot 145 | p 146 | } 147 | -------------------------------------------------------------------------------- /R/plotVariableGenes.R: -------------------------------------------------------------------------------- 1 | #' plotVariableGenes 2 | #' 3 | #' Visualization for the most variable genes found with \code{findVariableGenes}. 4 | #' 5 | #' @param scCNA scCNA object. 6 | #' @param n A numeric defining how many variable genes will be plotted. 7 | #' 8 | #' @details \code{plotVariableGenes} plots the list of genes that was found 9 | #' using \code{findVariableGenes}. 10 | #' 11 | #' @seealso \code{\link{findVariableGenes}} 12 | #' 13 | #' @importFrom BiocGenerics subset 14 | #' @importFrom S4Vectors metadata subjectHits queryHits 15 | #' @import ggplot2 16 | #' 17 | #' @return A ggplot object with a plot of the variable genes detected. 18 | #' @export 19 | #' 20 | #' @examples 21 | #' copykit_obj <- copykit_example_filtered() 22 | #' copykit_obj <- findVariableGenes(copykit_obj, 23 | #' genes = c("FHIT", "PTEN", "FOXO1", "BRCA1") 24 | #' ) 25 | #' plotVariableGenes(copykit_obj) 26 | plotVariableGenes <- function(scCNA, 27 | n = 30) { 28 | 29 | # bindings for NSE 30 | gene <- p1 <- NULL 31 | 32 | # checks 33 | if (is.null(S4Vectors::metadata(scCNA)$hvg)) { 34 | stop("Run findVariableGenes() first.") 35 | } 36 | 37 | # extracting data 38 | hvg_obj <- S4Vectors::metadata(scCNA)$hvg 39 | 40 | if (n > length(hvg_obj)) { 41 | n <- length(hvg_obj) 42 | } 43 | 44 | pca_df <- attr(hvg_obj, "pca_df") 45 | hvg_obj <- hvg_obj[1:n] 46 | pca_df <- pca_df[hvg_obj, ] 47 | 48 | pca_df <- pca_df %>% 49 | dplyr::mutate(gene = as.factor(gene)) %>% 50 | dplyr::mutate(gene = forcats::fct_reorder(gene, abs(pca_df$p1))) 51 | 52 | 53 | # theme setup 54 | my_theme <- list( 55 | ggplot2::theme( 56 | axis.title.x = element_text(colour = "gray28", size = 20), 57 | axis.text.x = element_text(size = 10), 58 | axis.ticks.x = element_blank(), 59 | axis.ticks.y = element_blank(), 60 | axis.title.y = element_text(colour = "gray28", size = 20), 61 | axis.text.y = element_text(size = 10), 62 | # axis.line.x = element_blank(), 63 | legend.position = "right", 64 | legend.title = element_blank(), 65 | legend.text = element_text(size = 16) 66 | ) 67 | ) 68 | 69 | p <- ggplot(pca_df, aes(x = gene, y = abs(p1))) + 70 | geom_segment(aes( 71 | x = gene, 72 | xend = gene, 73 | y = 0, 74 | yend = abs(p1) 75 | )) + 76 | geom_point( 77 | size = 4, 78 | fill = "#21908C", 79 | shape = 21 80 | ) + 81 | theme_classic() + 82 | coord_flip() + 83 | scale_y_continuous( 84 | expand = c(0, 0), 85 | limits = c(0, max(abs(pca_df$p1) + 0.02)), 86 | breaks = c(0, max(abs(pca_df$p1) - 0.05)), 87 | labels = c("Less variable", "More variable") 88 | ) + 89 | labs( 90 | x = "", 91 | y = "" 92 | ) + 93 | my_theme 94 | 95 | # return plot 96 | p 97 | } 98 | -------------------------------------------------------------------------------- /R/runConsensusPhylo.R: -------------------------------------------------------------------------------- 1 | #' runConsensusPhylo 2 | #' 3 | #' Runs a minimal evolution tree algorithm for the consensus data frame 4 | #' 5 | #' @param scCNA The scCNA object. 6 | #' @param root A string indicating how to root the consensus tree. 7 | #' @param root_user A numeric with the vector to be used as root of the tree if 8 | #' \code{root} is set to 'user'. Must have the same length as the number of bins 9 | #' of the genome scaffold. 10 | #' 11 | #' @importFrom ape fastme.bal Ntip root.phylo drop.tip 12 | #' 13 | #' @return A phylo object with a consensus tree stored in the consensusPhylo slot 14 | #' of the CopyKit object. 15 | #' @export 16 | #' 17 | #' @examples 18 | #' copykit_obj <- copykit_example_filtered() 19 | #' copykit_obj <- findClusters(copykit_obj) 20 | #' copykit_obj <- calcConsensus(copykit_obj) 21 | #' copykit_obj <- runConsensusPhylo(copykit_obj) 22 | #' plotPhylo(copykit_obj, consensus = TRUE, label = "subclones") 23 | runConsensusPhylo <- function(scCNA, 24 | root = c("mrca", "neutral", "user"), 25 | root_user = NULL) { 26 | root <- match.arg(root) 27 | 28 | if (nrow(consensus(scCNA)) == 0) { 29 | stop("Consensus slot is empty. run calcConsensus().") 30 | } 31 | 32 | consensus_df <- as.data.frame(t(consensus(scCNA))) 33 | 34 | if (root == "neutral") { 35 | 36 | # adding a neutral state, will use as root 37 | consensus_df[nrow(consensus_df) + 1, ] <- 1 38 | consensus_df[nrow(consensus_df) + 1, ] <- 1 39 | } 40 | 41 | if (root == "mrca") { 42 | # obtain number closest to the ground state for each 43 | anc_profile <- apply( 44 | consensus_df, 45 | 2, 46 | function(x) x[which.min(abs(x - 1))] 47 | ) 48 | 49 | consensus_df[nrow(consensus_df) + 1, ] <- anc_profile 50 | consensus_df[nrow(consensus_df) + 1, ] <- anc_profile 51 | } 52 | 53 | if (root == "user") { 54 | if (length(root_user) != ncol(consensus_df)) { 55 | stop("Length of root_user argument must be the same as nrow(scCNA).") 56 | } 57 | 58 | anc_profile <- root_user 59 | 60 | consensus_df[nrow(consensus_df) + 1, ] <- anc_profile 61 | consensus_df[nrow(consensus_df) + 1, ] <- anc_profile 62 | } 63 | 64 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fri Nov 20 12:24:27 2020 65 | # tree ME 66 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fri Nov 20 12:24:35 2020 67 | 68 | tree <- ape::fastme.bal(dist(consensus_df, method = "manhattan")) 69 | 70 | tree <- 71 | ape::root.phylo(tree, 72 | outgroup = which(tree$tip.label == ape::Ntip(tree)), 73 | resolve.root = TRUE 74 | ) 75 | 76 | tree <- 77 | ape::drop.tip(tree, tip = as.character(c( 78 | nrow(consensus_df), nrow(consensus_df) - 1 79 | ))) 80 | 81 | tree <- ladderize(tree) 82 | 83 | consensusPhylo(scCNA) <- tree 84 | 85 | return(scCNA) 86 | } 87 | -------------------------------------------------------------------------------- /R/runDistMat.R: -------------------------------------------------------------------------------- 1 | #' Run distance matrix calculations 2 | #' 3 | #' Performs distance matrix calculations that can be downstream used for 4 | #' hierarchical clustering or phylogenetic analysis. Uses \code{amap::Dist()} 5 | #' in order to parallelize distance calculations. 6 | #' 7 | #' @author Darlan Conterno Minussi 8 | #' 9 | #' @param scCNA scCNA object. 10 | #' @param metric distance metric passed to calculate the distance matrix. 11 | #' @param n_threads Number of threads used to calculate the distance matrix. 12 | #' Passed to `amap::Dist`. 13 | #' 14 | #' @return A distance matrix in the slot \code{distMat} from scCNA object. 15 | #' Access the distance matrix with: \code{distMat(scCNA, withDimnames = TRUE)} 16 | #' @export 17 | #' 18 | #' @examples 19 | #' copykit_obj <- copykit_example_filtered()[,1:10] 20 | #' copykit_obj <- runDistMat(copykit_obj) 21 | runDistMat <- function(scCNA, 22 | metric = "euclidean", 23 | n_threads = 1) { 24 | # cores check 25 | if (n_threads < 1) { 26 | n_threads <- 1 27 | } 28 | 29 | message("Calculating distance matrix with metric: ", metric) 30 | message("Using ", n_threads, " cores.") 31 | 32 | seg_data <- t(segment_ratios(scCNA)) %>% 33 | as.data.frame() 34 | 35 | dist_mat <- 36 | amap::Dist(seg_data, 37 | method = metric, 38 | nbproc = n_threads 39 | ) 40 | 41 | distMat(scCNA) <- dist_mat 42 | 43 | message("Access distance matrix with copykit::distMat()") 44 | message("Done.") 45 | 46 | return(scCNA) 47 | } 48 | -------------------------------------------------------------------------------- /R/runMetrics.R: -------------------------------------------------------------------------------- 1 | #' Run metrics 2 | #' 3 | #' Calculates the overdispersion and the breakpoint counts for each cell. 4 | #' 5 | #' @author Darlan Conterno Minussi 6 | #' 7 | #' @param scCNA scCNA object. 8 | #' @param BPPARAM A \linkS4class{BiocParallelParam} specifying how the function 9 | #' should be parallelized. 10 | #' 11 | #' @details Adds the metrics to the scCNA \code{\link[SummarizedExperiment]{colData}}. 12 | #' Those metrics can be used for subsetting the data if desired. 13 | #' results can be visualized with \code{\link{plotMetrics}}. 14 | #' 15 | #' @return Adds columns 'overdispersion' and 'breakpoint_count' to 16 | #' \code{\link[SummarizedExperiment]{colData}}. 17 | #' 18 | #' @export 19 | #' @import ggplot2 20 | #' 21 | #' @examples 22 | #' copykit_obj <- mock_bincounts() 23 | #' copykit_obj <- runMetrics(copykit_obj) 24 | runMetrics <- function(scCNA, 25 | BPPARAM = bpparam()) { 26 | 27 | ################### 28 | # Retrieving data 29 | 30 | dat_seg <- segment_ratios(scCNA) 31 | dat_rat <- ratios(scCNA) 32 | dat_bin <- bincounts(scCNA) 33 | rg <- SummarizedExperiment::rowRanges(scCNA) 34 | 35 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fri Jun 25 13:22:01 2021 36 | # overdispersion ---- 37 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fri Jun 25 13:22:10 2021 38 | 39 | message("Calculating overdispersion.") 40 | 41 | overdisp <- BiocParallel::bplapply(dat_bin, 42 | overdispersion, 43 | BPPARAM = BPPARAM 44 | ) 45 | 46 | overdisp <- unlist(overdisp) 47 | 48 | SummarizedExperiment::colData(scCNA)$overdispersion <- overdisp 49 | 50 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fri Jun 25 13:22:25 2021 51 | # Breakpoint count 52 | # Performed for every chromosome 53 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Fri Jun 25 13:22:34 2021 54 | 55 | scCNA <- .countBreakpoints(scCNA) 56 | 57 | message("Done.") 58 | 59 | return(scCNA) 60 | } 61 | -------------------------------------------------------------------------------- /R/runPca.R: -------------------------------------------------------------------------------- 1 | #' runPca() 2 | #' 3 | #' Creates a pca embedding using the package uwot from the segment ratios values 4 | #' 5 | #' @author Darlan Conterno Minussi 6 | #' 7 | #' @param scCNA The CopyKit object. 8 | #' @param assay String with the name of the assay to pull data from to make the 9 | #'embedding. 10 | #' @param name String specifying the name to be used to store the result in the 11 | #' reducedDims of the output. 12 | #' @param scale A logical value indicating whether the variables should be 13 | #' scaled to have unit variance before the analysis takes place. 14 | #' @param ... Additional parameters passed to \code{\link[stats]{prcomp}}. 15 | #' 16 | #' @importFrom stats prcomp 17 | #' @importFrom SummarizedExperiment assay 18 | #' 19 | #' @return A reduced dimension representation with pca in the slot 20 | #' \code{reducedDim} from scCNA object. Access reduced dimensions slot with: 21 | #' \code{reducedDim(scCNA, 'PCA', withDimnames = FALSE)} 22 | #' @export 23 | #' 24 | #' @examples 25 | #' set.seed(1000) 26 | #' copykit_obj <- copykit_example_filtered()[,sample(100)] 27 | #' copykit_obj <- runPca(copykit_obj) 28 | 29 | runPca <- function(scCNA, 30 | assay = "logr", 31 | name = "PCA", 32 | scale = FALSE, 33 | ...) { 34 | 35 | 36 | seg_data <- t(SummarizedExperiment::assay(scCNA, assay)) %>% 37 | as.data.frame() 38 | 39 | message(sprintf("Using assay: %s", assay)) 40 | message(sprintf("Embedding data with PCA.")) 41 | 42 | pca <- prcomp(seg_data, 43 | scale. = scale, 44 | ...) 45 | 46 | # Saving results 47 | dat_pca <- pca$x 48 | rownames(dat_pca) <- rownames(seg_data) 49 | var_explained <- pca$sdev^2 50 | attr(dat_pca, "var_explained") <- var_explained 51 | rownames(pca$rotation) <- colnames(seg_data) 52 | attr(dat_pca, "rotation") <- pca$rotation 53 | 54 | SingleCellExperiment::reducedDim(scCNA, type = name) <- dat_pca 55 | 56 | 57 | 58 | 59 | 60 | message( 61 | "Access reduced dimensions slot with: SingleCellExperiment::reducedDim(scCNA, 'pca')." 62 | ) 63 | message("Done.") 64 | 65 | return(scCNA) 66 | 67 | } 68 | -------------------------------------------------------------------------------- /R/runPhylo.R: -------------------------------------------------------------------------------- 1 | #' Run phylogenetic analysis 2 | #' 3 | #' Performs phylogenetic analysis 4 | #' 5 | #' @author Darlan Conterno Minussi 6 | #' @author Junke Wang 7 | #' 8 | #' @param scCNA scCNA object. 9 | #' @param method Phylogenetic method to be run, currently accepts "nj" (neighbor-joining) and "me" (minimum evolution). Defaults to "nj". 10 | #' @param metric distance metric passed to construct the phylogeny (Defaults to "euclidean"). 11 | #' @param assay String with the name of the assay to pull data from to run phylogenetic analysis. Note that only assay named "integer" will be treated as integer. 12 | #' @param n_threads Number of threads used to calculate the distance matrix. Passed to `amap::Dist` 13 | #' 14 | #' @return A rooted phylogenetic tree object in the slot \code{phylo} from scCNA object. Access phylo slot with: \code{copykit::phylo(scCNA)} 15 | #' @export 16 | #' 17 | #' @importFrom ape nj fastme.bal ladderize 18 | #' @examples 19 | #' set.seed(1000) 20 | #' copykit_obj <- copykit_example_filtered()[,sample(50)] 21 | #' copykit_obj <- runPhylo(copykit_obj) 22 | runPhylo <- function(scCNA, 23 | method = "nj", 24 | metric = "euclidean", 25 | assay = "segment_ratios", 26 | n_threads = parallel::detectCores() / 4) { 27 | # cores check 28 | if (n_threads < 1) { 29 | n_threads <- 1 30 | } 31 | 32 | 33 | # getting data 34 | if (!assay %in% names(SummarizedExperiment::assays(scCNA))) { 35 | stop("No data found in the assay! Please check the assay name.") 36 | } 37 | 38 | seg_data <- SummarizedExperiment::assay(scCNA, assay) 39 | 40 | 41 | if (assay == "integer") { 42 | ## with integers 43 | message("Using integer data...") 44 | seg_data[, ncol(seg_data) + 1] <- 2 45 | seg_data[, ncol(seg_data) + 1] <- 2 46 | seg_data <- t(seg_data) %>% as.data.frame() 47 | } else { 48 | # with ratios 49 | message("Using ratio data...") 50 | seg_data[, ncol(seg_data) + 1] <- 1 51 | seg_data[, ncol(seg_data) + 1] <- 1 52 | seg_data <- t(seg_data) %>% as.data.frame() 53 | } 54 | 55 | 56 | # calculating distance matrix 57 | message("Calculating distance matrix") 58 | distMat <- amap::Dist(seg_data, 59 | method = metric, 60 | nbproc = n_threads 61 | ) 62 | 63 | # ordering cells 64 | if (method %in% c("nj", "me")) { 65 | if (method == "nj") { 66 | message("Creating neighbor-joining tree.") 67 | tree <- ape::nj(distMat) 68 | } 69 | 70 | if (method == "me") { 71 | message("Creating minimum evolution tree.") 72 | tree <- ape::fastme.bal(distMat) 73 | } 74 | } else { 75 | stop("Currently only nj and me trees are supported.") 76 | } 77 | 78 | 79 | # root the tree 80 | tree <- ape::root.phylo(tree, 81 | outgroup = which(tree$tip.label == paste0("V", Ntip(tree))), 82 | resolve.root = TRUE 83 | ) 84 | tree <- ape::drop.tip(tree, tip = as.character(c( 85 | paste0("V", nrow(seg_data)), paste0("V", nrow(seg_data) - 1) 86 | ))) 87 | 88 | tree <- ape::ladderize(tree) 89 | 90 | phylo(scCNA) <- tree 91 | 92 | message("Access slot with copykit::phylo(scCNA).") 93 | message("Done.") 94 | return(scCNA) 95 | } 96 | -------------------------------------------------------------------------------- /R/runUmap.R: -------------------------------------------------------------------------------- 1 | #' Creates UMAP embedding 2 | #' 3 | #' Creates a umap embedding using the package uwot from the segment ratios 4 | #' values 5 | #' 6 | #' @author Darlan Conterno Minussi 7 | #' 8 | #' @param scCNA scCNA object. 9 | #' @param assay String with the name of the assay to pull data from to make the 10 | #' embedding. 11 | #' @param seed Sets a seed for the pseudorandom number generator. 12 | #' @param name String specifying the name to be used to store the result in the 13 | #' reducedDims of the output. 14 | #' @param min_dist The effective minimum distance between embedded points. 15 | #' Smaller values will result in a more clustered/clumped embedding where nearby 16 | #' points on the manifold are drawn closer together, while larger values will 17 | #' result on a more even dispersal of points. The value should be set relative 18 | #' to the spread value, which determines the scale at which embedded points 19 | #' will be spread out. See \code{\link[uwot]{umap}}. 20 | #' @param n_neighbors The size of local neighborhood (in terms of number of 21 | #' neighboring sample points) used for manifold approximation. 22 | #' Larger values result in more global views of the manifold, 23 | #' while smaller values result in more local data being preserved. 24 | #' In general values should be in the range 2 to 100. 25 | #' See \code{\link[uwot]{umap}}. 26 | #' @param ncomponents The dimension of the space to embed into. See 27 | #' \code{\link[uwot]{umap}}. 28 | #' @param ... Additional parameters passed to \code{\link[uwot]{umap}}. 29 | #' 30 | #' @importFrom uwot umap 31 | #' @importFrom SummarizedExperiment assay 32 | #' @importFrom withr with_seed 33 | #' 34 | #' @return A reduced dimension representation with UMAP in the slot 35 | #' \code{reducedDim} from scCNA object. Access reduced dimensions slot with: 36 | #' \code{reducedDim(scCNA, 'umap', withDimnames = FALSE)} 37 | #' @export 38 | #' 39 | #' @examples 40 | #' copykit_obj <- copykit_example_filtered() 41 | #' copykit_obj <- runUmap(copykit_obj) 42 | runUmap <- function(scCNA, 43 | assay = "logr", 44 | seed = 17, 45 | min_dist = 0, 46 | n_neighbors = 50, 47 | name = "umap", 48 | ncomponents = 2, 49 | ...) { 50 | seg_data <- t(SummarizedExperiment::assay(scCNA, assay)) %>% 51 | as.data.frame() 52 | 53 | message("Using assay: ", assay) 54 | message("Embedding data with UMAP. Using seed ", seed) 55 | withr::with_seed( 56 | seed, 57 | dat_umap <- uwot::umap(seg_data, 58 | min_dist = min_dist, 59 | n_neighbors = n_neighbors, 60 | n_components = ncomponents, 61 | ...) 62 | ) 63 | 64 | SingleCellExperiment::reducedDim(scCNA, type = name) <- dat_umap 65 | 66 | message( 67 | "Access reduced dimensions slot with: reducedDim(scCNA, 'umap')." 68 | ) 69 | message("Done.") 70 | 71 | return(scCNA) 72 | } 73 | -------------------------------------------------------------------------------- /R/runVst.R: -------------------------------------------------------------------------------- 1 | #' Variance Stabilizing Transformation 2 | #' 3 | #' Performs variance stabilization transformation of the bin counts 4 | #' 5 | #' @param scCNA The scCNA object 6 | #' @param transformation A character indicating the variance stabilization 7 | #' transformation to be performed. See \link{runVst} details. 8 | #' @param assay A character indicating the assay slot to extract the bincounts 9 | #' for variance stabilization 10 | #' 11 | #' @details \code{runVst} performs variance stabilization to reduce the overdispersion 12 | #' from the negative binomial distribution nature of the bin counts and reduce 13 | #' technical bias. The argument \code{vst} controls the choice of the transformation 14 | #' allowing either the Freeman-Tukey transformation by using the option 'ft' (recommended) 15 | #' or a logarithmic transformation with the option 'log'. Using a 'log' transformation 16 | #' may result in long segmentation times for a few cells with large breakpoint counts. 17 | #' 18 | #' @references 19 | #' Freeman, M. F.; Tukey, J. W. (1950), "Transformations related to the angular 20 | #' and the square root", The Annals of Mathematical Statistics, 21 | #' 21 (4), pp. 607–611, doi:10.1214/aoms/1177729756, JSTOR 2236611 22 | #' 23 | #' @return A slot into the scCNA object containing the variance stabilized matrix. 24 | #' @importFrom SummarizedExperiment assay 25 | #' @importFrom S4Vectors metadata 26 | #' @export 27 | #' 28 | #' @examples 29 | #' copykit_obj <- mock_bincounts(ncells = 10) 30 | #' copykit_obj <- runVst(copykit_obj) 31 | runVst <- function(scCNA, 32 | transformation = c("ft", "log"), 33 | assay = 'bincounts') { 34 | transformation <- match.arg(transformation) 35 | 36 | message(paste("Running variance stabilization transformation:", 37 | transformation)) 38 | 39 | # recovering assay 40 | varbin_counts_df <- assay(scCNA, assay) 41 | 42 | if (transformation == "ft") { 43 | counts_df_ft <- as.data.frame(apply(varbin_counts_df, 44 | 2, 45 | function(x) sqrt(x) + sqrt(x + 1))) 46 | } 47 | 48 | if (transformation == "log") { 49 | varbin_counts_df[varbin_counts_df == 0] <- 1e-4 50 | counts_df_ft <- as.data.frame(apply(varbin_counts_df, 51 | 2, 52 | function(x) log(x))) 53 | } 54 | 55 | counts_df_ft <- as.data.frame(counts_df_ft) 56 | 57 | S4Vectors::metadata(scCNA)$vst <- transformation 58 | SummarizedExperiment::assay(scCNA, transformation) <- counts_df_ft 59 | 60 | return(scCNA) 61 | } 62 | -------------------------------------------------------------------------------- /R/utils-pipe.R: -------------------------------------------------------------------------------- 1 | #' Pipe operator 2 | #' 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 4 | #' 5 | #' @name %>% 6 | #' @rdname pipe 7 | #' @keywords internal 8 | #' @return pipe 9 | #' @export 10 | #' @importFrom magrittr %>% 11 | #' @usage lhs \%>\% rhs 12 | #' @examples iris %>% head() 13 | NULL 14 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | # CopyKit 17 | 18 | 19 | 20 | 21 | ## Overview 22 | 23 | The goal of **CopyKit** is to help you analyze single cell DNA sequencing datasets for copy number. 24 | For that, CopyKit presents four modules: 25 | 26 | 1) Pre-processing 27 | 2) Quality Control 28 | 3) Data Analysis 29 | 4) Visualization 30 | 31 | ## Installation 32 | 33 | You can install the development version of CopyKit from github with: 34 | 35 | ``` r 36 | devtools::install_github("navinlabcode/copykit") 37 | ``` 38 | 39 | CopyKit requires R version 4.0 or later. 40 | 41 | ## Documentation 42 | 43 | The complete documentation can be found at: 44 | [https://navinlabcode.github.io/CopyKit-UserGuide/](https://navinlabcode.github.io/CopyKit-UserGuide/) 45 | 46 | ## Issues 47 | 48 | CopyKit has not yet been peer-reviewed so please use it with caution. We do our best to provide a bug free software, however CopyKit is in its infancy and some growing pains are expected. Please let us know of any issues and we will be happy to take a look at it. We greatly appreciate suggestions. Please make sure to, if possible, include a reproducible example and the output of your sessionInfo() when opening an issue. 49 | 50 | ## Logo 51 | 52 | Thanks to [Aislyn Schalck](https://github.com/aislyn) for the awesome CopyKit logo design. 53 | 54 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # CopyKit 5 | 6 | 7 | 8 | 9 | ## Overview 10 | 11 | The goal of **CopyKit** is to help you analyze single cell DNA 12 | sequencing datasets for copy number. For that, CopyKit presents four 13 | modules: 14 | 15 | 1) Pre-processing 16 | 2) Quality Control 17 | 3) Data Analysis 18 | 4) Visualization 19 | 20 | ## Installation 21 | 22 | You can install the development version of CopyKit from github with: 23 | 24 | ``` r 25 | devtools::install_github("navinlabcode/copykit") 26 | ``` 27 | 28 | CopyKit requires R version 4.0 or later. 29 | 30 | ## Documentation 31 | 32 | The complete documentation can be found at: 33 | 34 | 35 | ## Issues 36 | 37 | CopyKit has not yet been peer-reviewed so please use it with caution. We 38 | do our best to provide a bug free software, however CopyKit is in its 39 | infancy and some growing pains are expected. Please let us know of any 40 | issues and we will be happy to take a look at it. We greatly appreciate 41 | suggestions. Please make sure to, if possible, include a reproducible 42 | example and the output of your sessionInfo() when opening an issue. 43 | 44 | ## Logo 45 | 46 | Thanks to [Aislyn Schalck](https://github.com/aislyn) for the awesome 47 | CopyKit logo design. 48 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | author: 2 | Darlan Conterno Minussi 3 | title: CopyKit 4 | destination: docs 5 | templates: 6 | params: 7 | bootswatch: simplex 8 | 9 | navbar: 10 | type: invert 11 | structure: 12 | left: 13 | - home 14 | - reference 15 | - tutorial 16 | - news 17 | right: github 18 | components: 19 | home: 20 | icon: fas fa-home fa-lg 21 | href: index.html 22 | reference: 23 | text: Reference 24 | href: reference/index.html 25 | 26 | 27 | -------------------------------------------------------------------------------- /data-raw/genes_granges.R: -------------------------------------------------------------------------------- 1 | ########################################## 2 | # Goal: Generate the GRanges object for the hg19 varbins 3 | # containing gene symbols 4 | # 5 | # # Output: hg19_genes object used in sysdata.rda 6 | # 7 | ########################################## 8 | # Author: Darlan Conterno Minussi 9 | ########################################## 10 | 11 | library(AnnotationHub) 12 | library(TxDb.Hsapiens.UCSC.hg19.knownGene) 13 | library(Organism.dplyr) 14 | 15 | src <- src_organism("TxDb.Hsapiens.UCSC.hg19.knownGene") 16 | hg19_genes <- genes(src, columns = "symbol") 17 | 18 | usethis::use_data(hg19_genes) 19 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Tue Feb 2 10:04:12 2021 20 | # hg38 genes 21 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Tue Feb 2 10:04:21 2021 22 | 23 | library(AnnotationHub) 24 | library(TxDb.Hsapiens.UCSC.hg38.knownGene) 25 | library(Organism.dplyr) 26 | 27 | src <- src_organism("TxDb.Hsapiens.UCSC.hg38.knownGene") 28 | hg38_genes <- genes(src, columns = "symbol") 29 | 30 | usethis::use_data(hg38_genes) 31 | -------------------------------------------------------------------------------- /data-raw/varbin_hg19_chrarm.R: -------------------------------------------------------------------------------- 1 | ########################################## 2 | # Goal: Add chr arm information to hg19 scaffold. 3 | # 4 | # Inputs: 5 | # hg19_rg.rda 6 | # 7 | # Output: use_data('hg19_rg.rda') 8 | # 9 | 10 | library(AnnotationHub) 11 | library(GenomicRanges) 12 | library(dplyr) 13 | library(stringr) 14 | library(devtools) 15 | 16 | load_all() 17 | 18 | hub <- AnnotationHub() 19 | 20 | hub_hg19 <- subset( 21 | hub, 22 | (hub$species == "Homo sapiens") & (hub$genome == "hg19") 23 | ) 24 | 25 | hub_df <- data.frame( 26 | ahid = hub_hg19$ah_id, 27 | title = hub_hg19$title 28 | ) 29 | 30 | g_cytoband <- hub_hg19[["AH5012"]] 31 | 32 | g_hg19 <- makeGRangesFromDataFrame(hg19_rg, keep.extra.columns = T) 33 | 34 | olaps <- findOverlaps(g_hg19, g_cytoband) 35 | 36 | df_olaps <- as.data.frame(olaps) 37 | 38 | df_olaps_dist <- df_olaps %>% distinct(queryHits, .keep_all = T) 39 | 40 | g_cyto_df <- as.data.frame(g_cytoband[df_olaps_dist$subjectHits]) 41 | 42 | g_cyto_df <- g_cyto_df %>% 43 | dplyr::mutate(arm = stringr::str_extract(name, "[pq]")) 44 | 45 | g_hg19$arm <- g_cyto_df$arm 46 | 47 | hg19_rg <- g_hg19 48 | use_data(hg19_rg, overwrite = TRUE) 49 | -------------------------------------------------------------------------------- /data-raw/varbin_hg19_grangeslist.R: -------------------------------------------------------------------------------- 1 | ########################################## 2 | # Goal: Generate the GRangesList object for the hg19 varbins at 100k and 3 | # 200k resolution. 4 | # 5 | # Inputs: 6 | # https://github.com/navinlabcode/CNV_pipeline/tree/master/lib/ 7 | # - varbin.gc.content.100k.bowtie.k50.hg19.bin_not_removed.txt 8 | # - varbin.gc.content.200k.bowtie.k50.hg19.bin_not_removed.txt 9 | # 10 | # Output: 'sysdata.rda' which additionally contains a GRangesList object 11 | # named 'varbin_hg19_grangeslist'. 12 | # 13 | # 1. Convert the raw coordinate from a mixture of 0-based and 1-based to 1-based. 14 | # For example, start=0, end=977835, bin_length=977386. 15 | # Equivalently, pure 0-based: [0, 977836); pure 1-based: [1, 977836] 16 | # 2. Provide the correct genomic locations and attache meta information (e.g., 17 | # GC content) for all varbins. 18 | # 3. Chr-style is chr1, chr2, ..., chrX, chrY. (No mitochondrion chromosome) 19 | # 20 | ########################################## 21 | # Author: Yun Yan (yun.yan@uth.tmc.edu) 22 | ########################################## 23 | library(devtools) 24 | library(usethis) 25 | library(GenomicRanges) 26 | library(GenomeInfoDb) 27 | 28 | # VarBins annotations are from the official pipeline's lib: UCSC style (chr1, chrX, chrY) 29 | fpath_varbin <- list( 30 | `res_100k` = "/volumes/seq/code/PIPELINES/CNA_pipeline_v1.4/lib/varbin.gc.content.100k.bowtie.k50.hg19.bin_not_removed.txt", 31 | `res_200k` = "/volumes/seq/code/PIPELINES/CNA_pipeline_v1.4/lib/varbin.gc.content.200k.bowtie.k50.hg19.bin_not_removed.txt" 32 | ) 33 | 34 | make_granges_from_varbin_file <- function(x) { 35 | df <- read.delim(x, header = T, stringsAsFactors = F) 36 | colnames(df) <- gsub(pattern = "\\.", replacement = "_", x = colnames(df)) 37 | # Input: 38 | # bin.chrom bin.start bin.end bin.length 39 | # chr1 0 977835 977836 40 | # chr1 977836 1200862 223027 41 | # chr1 1200863 1455237 254375 42 | # chr1 1455238 1758056 302819 43 | # ... 44 | # chr2 0 237130 237131 45 | # chr2 237131 454244 217114 46 | # chr2 454245 660285 206041 47 | 48 | ## Convert to pure 0-based coordination 49 | df$bin_end <- df$bin_end + 1 50 | 51 | ## Convert to a legal GRange object (automatically switched to 1-based) 52 | gr <- makeGRangesFromDataFrame( 53 | df = df, 54 | keep.extra.columns = T, 55 | seqnames.field = "bin_chrom", 56 | start.field = "bin_start", 57 | end.field = "bin_end", 58 | starts.in.df.are.0based = TRUE 59 | ) 60 | return(gr) 61 | 62 | # Output: 63 | # seqnames ranges strand | bin_length 64 | # [1] chr1 1-977836 * | 977836 65 | # [2] chr1 977837-1200863 * | 223027 66 | # [3] chr1 1200864-1455238 * | 254375 67 | # ... 68 | # [1] chr2 1-237131 * | 237131 69 | # [2] chr2 237132-454245 * | 217114 70 | # [3] chr2 454246-660286 * | 206041 71 | } 72 | 73 | list_gr <- lapply(fpath_varbin, make_granges_from_varbin_file) 74 | varbin_hg19_grangeslist <- GRangesList(unlist(list_gr)) 75 | cat(length(varbin_hg19_grangeslist), "resolutions are available:\n") 76 | cat(names(varbin_hg19_grangeslist), "\n") 77 | cat("How many varbins are available per resolution?:\n") 78 | print(sapply(varbin_hg19_grangeslist, length)) 79 | 80 | hg19_seqinfo <- GenomeInfoDb::Seqinfo(genome = "hg19") 81 | GenomeInfoDb::seqinfo(varbin_hg19_grangeslist) <- hg19_seqinfo 82 | 83 | # Export to package internal data 84 | # devtools::load_all('./') 85 | # usethis::use_data( 86 | # major_palette, minor_palette, hg19_genes, ## previously available 87 | # varbin_hg19_grangeslist, ## Added in this script 88 | # internal = TRUE, overwrite = TRUE) 89 | -------------------------------------------------------------------------------- /data/copykit_obj_filt_rle.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/copykit/4c01249bb04cf2b481bdb3ae734a4ee9dd04a6b5/data/copykit_obj_filt_rle.rda -------------------------------------------------------------------------------- /data/copykit_obj_filt_umap.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/copykit/4c01249bb04cf2b481bdb3ae734a4ee9dd04a6b5/data/copykit_obj_filt_umap.rda -------------------------------------------------------------------------------- /data/copykit_obj_rle.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/copykit/4c01249bb04cf2b481bdb3ae734a4ee9dd04a6b5/data/copykit_obj_rle.rda -------------------------------------------------------------------------------- /data/hg19_genes.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/copykit/4c01249bb04cf2b481bdb3ae734a4ee9dd04a6b5/data/hg19_genes.rda -------------------------------------------------------------------------------- /data/hg19_rg.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/copykit/4c01249bb04cf2b481bdb3ae734a4ee9dd04a6b5/data/hg19_rg.rda -------------------------------------------------------------------------------- /data/hg38_genes.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/copykit/4c01249bb04cf2b481bdb3ae734a4ee9dd04a6b5/data/hg38_genes.rda -------------------------------------------------------------------------------- /data/hg38_grangeslist.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/copykit/4c01249bb04cf2b481bdb3ae734a4ee9dd04a6b5/data/hg38_grangeslist.rda -------------------------------------------------------------------------------- /docs/._plotRatioPlotgif.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/copykit/4c01249bb04cf2b481bdb3ae734a4ee9dd04a6b5/docs/._plotRatioPlotgif.gif -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/copykit/4c01249bb04cf2b481bdb3ae734a4ee9dd04a6b5/docs/.nojekyll -------------------------------------------------------------------------------- /docs/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Page not found (404) • CopyKit 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 | 57 |
58 |
59 | 93 | 94 | 95 | 96 |
97 | 98 |
99 |
100 | 103 | 104 | Content not found. Please use links in the navbar. 105 | 106 |
107 | 108 |
109 | 110 | 111 | 112 |
113 | 116 | 117 |
118 |

Site built with pkgdown 1.4.1.

119 |
120 | 121 |
122 |
123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /docs/LICENSE-text.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | License • CopyKit 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 | 57 |
58 |
59 | 93 | 94 | 95 | 96 |
97 | 98 |
99 |
100 | 103 | 104 |
YEAR: 2019
105 | COPYRIGHT HOLDER: CopyKit
106 | 
107 | 108 |
109 | 110 |
111 | 112 | 113 | 114 |
115 | 118 | 119 |
120 |

Site built with pkgdown 1.4.1.

121 |
122 | 123 |
124 |
125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /docs/LICENSE.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | MIT License • CopyKit 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 | 57 |
58 |
59 | 93 | 94 | 95 | 96 |
97 | 98 |
99 |
100 | 103 | 104 |
105 | 106 |

Copyright (c) 2019 CopyKit

107 |

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

108 |

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

109 |

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

110 |
111 | 112 |
113 | 114 |
115 | 116 | 117 | 118 |
119 | 122 | 123 |
124 |

Site built with pkgdown 1.4.1.

125 |
126 | 127 |
128 |
129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Authors • CopyKit 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 | 57 |
58 |
59 | 93 | 94 | 95 | 96 |
97 | 98 |
99 |
100 | 103 | 104 |
    105 |
  • 106 |

    Darlan Conterno Minussi. Author, maintainer. ORCID 107 |

    108 |
  • 109 |
  • 110 |

    Yun Yan. Contributor. ORCID 111 |

    112 |
  • 113 |
114 | 115 |
116 | 117 |
118 | 119 | 120 | 121 |
122 | 125 | 126 |
127 |

Site built with pkgdown 1.4.1.

128 |
129 | 130 |
131 |
132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/pkgdown.css: -------------------------------------------------------------------------------- 1 | /* Sticky footer */ 2 | 3 | /** 4 | * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ 5 | * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css 6 | * 7 | * .Site -> body > .container 8 | * .Site-content -> body > .container .row 9 | * .footer -> footer 10 | * 11 | * Key idea seems to be to ensure that .container and __all its parents__ 12 | * have height set to 100% 13 | * 14 | */ 15 | 16 | html, body { 17 | height: 100%; 18 | } 19 | 20 | body > .container { 21 | display: flex; 22 | height: 100%; 23 | flex-direction: column; 24 | } 25 | 26 | body > .container .row { 27 | flex: 1 0 auto; 28 | } 29 | 30 | footer { 31 | margin-top: 45px; 32 | padding: 35px 0 36px; 33 | border-top: 1px solid #e5e5e5; 34 | color: #666; 35 | display: flex; 36 | flex-shrink: 0; 37 | } 38 | footer p { 39 | margin-bottom: 0; 40 | } 41 | footer div { 42 | flex: 1; 43 | } 44 | footer .pkgdown { 45 | text-align: right; 46 | } 47 | footer p { 48 | margin-bottom: 0; 49 | } 50 | 51 | img.icon { 52 | float: right; 53 | } 54 | 55 | img { 56 | max-width: 100%; 57 | } 58 | 59 | /* Fix bug in bootstrap (only seen in firefox) */ 60 | summary { 61 | display: list-item; 62 | } 63 | 64 | /* Typographic tweaking ---------------------------------*/ 65 | 66 | .contents .page-header { 67 | margin-top: calc(-60px + 1em); 68 | } 69 | 70 | /* Section anchors ---------------------------------*/ 71 | 72 | a.anchor { 73 | margin-left: -30px; 74 | display:inline-block; 75 | width: 30px; 76 | height: 30px; 77 | visibility: hidden; 78 | 79 | background-image: url(./link.svg); 80 | background-repeat: no-repeat; 81 | background-size: 20px 20px; 82 | background-position: center center; 83 | } 84 | 85 | .hasAnchor:hover a.anchor { 86 | visibility: visible; 87 | } 88 | 89 | @media (max-width: 767px) { 90 | .hasAnchor:hover a.anchor { 91 | visibility: hidden; 92 | } 93 | } 94 | 95 | 96 | /* Fixes for fixed navbar --------------------------*/ 97 | 98 | .contents h1, .contents h2, .contents h3, .contents h4 { 99 | padding-top: 60px; 100 | margin-top: -40px; 101 | } 102 | 103 | /* Sidebar --------------------------*/ 104 | 105 | #sidebar { 106 | margin-top: 30px; 107 | position: -webkit-sticky; 108 | position: sticky; 109 | top: 70px; 110 | } 111 | #sidebar h2 { 112 | font-size: 1.5em; 113 | margin-top: 1em; 114 | } 115 | 116 | #sidebar h2:first-child { 117 | margin-top: 0; 118 | } 119 | 120 | #sidebar .list-unstyled li { 121 | margin-bottom: 0.5em; 122 | } 123 | 124 | .orcid { 125 | height: 16px; 126 | /* margins are required by official ORCID trademark and display guidelines */ 127 | margin-left:4px; 128 | margin-right:4px; 129 | vertical-align: middle; 130 | } 131 | 132 | /* Reference index & topics ----------------------------------------------- */ 133 | 134 | .ref-index th {font-weight: normal;} 135 | 136 | .ref-index td {vertical-align: top;} 137 | .ref-index .icon {width: 40px;} 138 | .ref-index .alias {width: 40%;} 139 | .ref-index-icons .alias {width: calc(40% - 40px);} 140 | .ref-index .title {width: 60%;} 141 | 142 | .ref-arguments th {text-align: right; padding-right: 10px;} 143 | .ref-arguments th, .ref-arguments td {vertical-align: top;} 144 | .ref-arguments .name {width: 20%;} 145 | .ref-arguments .desc {width: 80%;} 146 | 147 | /* Nice scrolling for wide elements --------------------------------------- */ 148 | 149 | table { 150 | display: block; 151 | overflow: auto; 152 | } 153 | 154 | /* Syntax highlighting ---------------------------------------------------- */ 155 | 156 | pre { 157 | word-wrap: normal; 158 | word-break: normal; 159 | border: 1px solid #eee; 160 | } 161 | 162 | pre, code { 163 | background-color: #f8f8f8; 164 | color: #333; 165 | } 166 | 167 | pre code { 168 | overflow: auto; 169 | word-wrap: normal; 170 | white-space: pre; 171 | } 172 | 173 | pre .img { 174 | margin: 5px 0; 175 | } 176 | 177 | pre .img img { 178 | background-color: #fff; 179 | display: block; 180 | height: auto; 181 | } 182 | 183 | code a, pre a { 184 | color: #375f84; 185 | } 186 | 187 | a.sourceLine:hover { 188 | text-decoration: none; 189 | } 190 | 191 | .fl {color: #1514b5;} 192 | .fu {color: #000000;} /* function */ 193 | .ch,.st {color: #036a07;} /* string */ 194 | .kw {color: #264D66;} /* keyword */ 195 | .co {color: #888888;} /* comment */ 196 | 197 | .message { color: black; font-weight: bolder;} 198 | .error { color: orange; font-weight: bolder;} 199 | .warning { color: #6A0366; font-weight: bolder;} 200 | 201 | /* Clipboard --------------------------*/ 202 | 203 | .hasCopyButton { 204 | position: relative; 205 | } 206 | 207 | .btn-copy-ex { 208 | position: absolute; 209 | right: 0; 210 | top: 0; 211 | visibility: hidden; 212 | } 213 | 214 | .hasCopyButton:hover button.btn-copy-ex { 215 | visibility: visible; 216 | } 217 | 218 | /* headroom.js ------------------------ */ 219 | 220 | .headroom { 221 | will-change: transform; 222 | transition: transform 200ms linear; 223 | } 224 | .headroom--pinned { 225 | transform: translateY(0%); 226 | } 227 | .headroom--unpinned { 228 | transform: translateY(-100%); 229 | } 230 | 231 | /* mark.js ----------------------------*/ 232 | 233 | mark { 234 | background-color: rgba(255, 255, 51, 0.5); 235 | border-bottom: 2px solid rgba(255, 153, 51, 0.3); 236 | padding: 1px; 237 | } 238 | 239 | /* vertical spacing after htmlwidgets */ 240 | .html-widget { 241 | margin-bottom: 10px; 242 | } 243 | 244 | /* fontawesome ------------------------ */ 245 | 246 | .fab { 247 | font-family: "Font Awesome 5 Brands" !important; 248 | } 249 | 250 | /* don't display links in code chunks when printing */ 251 | /* source: https://stackoverflow.com/a/10781533 */ 252 | @media print { 253 | code a:link:after, code a:visited:after { 254 | content: ""; 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('.navbar-fixed-top').headroom(); 6 | 7 | $('body').css('padding-top', $('.navbar').height() + 10); 8 | $(window).resize(function(){ 9 | $('body').css('padding-top', $('.navbar').height() + 10); 10 | }); 11 | 12 | $('body').scrollspy({ 13 | target: '#sidebar', 14 | offset: 60 15 | }); 16 | 17 | $('[data-toggle="tooltip"]').tooltip(); 18 | 19 | var cur_path = paths(location.pathname); 20 | var links = $("#navbar ul li a"); 21 | var max_length = -1; 22 | var pos = -1; 23 | for (var i = 0; i < links.length; i++) { 24 | if (links[i].getAttribute("href") === "#") 25 | continue; 26 | // Ignore external links 27 | if (links[i].host !== location.host) 28 | continue; 29 | 30 | var nav_path = paths(links[i].pathname); 31 | 32 | var length = prefix_length(nav_path, cur_path); 33 | if (length > max_length) { 34 | max_length = length; 35 | pos = i; 36 | } 37 | } 38 | 39 | // Add class to parent
  • , and enclosing
  • if in dropdown 40 | if (pos >= 0) { 41 | var menu_anchor = $(links[pos]); 42 | menu_anchor.parent().addClass("active"); 43 | menu_anchor.closest("li.dropdown").addClass("active"); 44 | } 45 | }); 46 | 47 | function paths(pathname) { 48 | var pieces = pathname.split("/"); 49 | pieces.shift(); // always starts with / 50 | 51 | var end = pieces[pieces.length - 1]; 52 | if (end === "index.html" || end === "") 53 | pieces.pop(); 54 | return(pieces); 55 | } 56 | 57 | // Returns -1 if not found 58 | function prefix_length(needle, haystack) { 59 | if (needle.length > haystack.length) 60 | return(-1); 61 | 62 | // Special case for length-0 haystack, since for loop won't run 63 | if (haystack.length === 0) { 64 | return(needle.length === 0 ? 0 : -1); 65 | } 66 | 67 | for (var i = 0; i < haystack.length; i++) { 68 | if (needle[i] != haystack[i]) 69 | return(i); 70 | } 71 | 72 | return(haystack.length); 73 | } 74 | 75 | /* Clipboard --------------------------*/ 76 | 77 | function changeTooltipMessage(element, msg) { 78 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 79 | element.setAttribute('data-original-title', msg); 80 | $(element).tooltip('show'); 81 | element.setAttribute('data-original-title', tooltipOriginalTitle); 82 | } 83 | 84 | if(ClipboardJS.isSupported()) { 85 | $(document).ready(function() { 86 | var copyButton = ""; 87 | 88 | $(".examples, div.sourceCode").addClass("hasCopyButton"); 89 | 90 | // Insert copy buttons: 91 | $(copyButton).prependTo(".hasCopyButton"); 92 | 93 | // Initialize tooltips: 94 | $('.btn-copy-ex').tooltip({container: 'body'}); 95 | 96 | // Initialize clipboard: 97 | var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { 98 | text: function(trigger) { 99 | return trigger.parentNode.textContent; 100 | } 101 | }); 102 | 103 | clipboardBtnCopies.on('success', function(e) { 104 | changeTooltipMessage(e.trigger, 'Copied!'); 105 | e.clearSelection(); 106 | }); 107 | 108 | clipboardBtnCopies.on('error', function() { 109 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 110 | }); 111 | }); 112 | } 113 | })(window.jQuery || window.$) 114 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 2.3.1 2 | pkgdown: 1.4.1 3 | pkgdown_sha: ~ 4 | articles: {} 5 | 6 | -------------------------------------------------------------------------------- /docs/plotRatioPlotgif.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/copykit/4c01249bb04cf2b481bdb3ae734a4ee9dd04a6b5/docs/plotRatioPlotgif.gif -------------------------------------------------------------------------------- /docs/reference/pipe.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Pipe operator — %>% • CopyKit 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 53 | 54 | 55 | 56 | 57 | 58 | 59 |
    60 |
    61 | 95 | 96 | 97 | 98 |
    99 | 100 |
    101 |
    102 | 107 | 108 |
    109 |

    See magrittr::%>% for details.

    110 |
    111 | 112 |
    lhs %>% rhs
    113 | 114 | 115 | 116 |
    117 | 123 |
    124 | 125 | 126 |
    127 | 130 | 131 |
    132 |

    Site built with pkgdown 1.4.1.

    133 |
    134 | 135 |
    136 |
    137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /docs/reference/segment_ratios-scCNA-method.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Internal CopyKit functions — segment_ratios,scCNA-method • CopyKit 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 53 | 54 | 55 | 56 | 57 | 58 | 59 |
    60 |
    61 | 95 | 96 | 97 | 98 |
    99 | 100 |
    101 |
    102 | 107 | 108 |
    109 |

    Methods to get or set internal fields from the scCNA class

    110 |
    111 | 112 |
    # S4 method for scCNA
    113 | segment_ratios(x, withDimnames = TRUE)
    114 | 115 | 116 | 117 |
    118 | 124 |
    125 | 126 | 127 | 137 |
    138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | -------------------------------------------------------------------------------- /images/copykit_cheat_sheet_DCM1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/copykit/4c01249bb04cf2b481bdb3ae734a4ee9dd04a6b5/images/copykit_cheat_sheet_DCM1.png -------------------------------------------------------------------------------- /images/copykit_hexsticker_v2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/navinlabcode/copykit/4c01249bb04cf2b481bdb3ae734a4ee9dd04a6b5/images/copykit_hexsticker_v2.png -------------------------------------------------------------------------------- /inst/WORDLIST: -------------------------------------------------------------------------------- 1 | CopyKit 2 | github 3 | -------------------------------------------------------------------------------- /man/CopyKit-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AllClasses.R 3 | \docType{class} 4 | \name{igraph-class} 5 | \alias{igraph-class} 6 | \alias{CopyKit-class} 7 | \alias{.CopyKit} 8 | \alias{CopyKit} 9 | \title{Placeholder for the igraph class} 10 | \usage{ 11 | CopyKit( 12 | consensus = data.frame(), 13 | phylo = structure(list(), class = "phylo"), 14 | consensusPhylo = structure(list(), class = "phylo"), 15 | distMat = dist(matrix(0, 0, 0)), 16 | graph = igraph::graph.empty(), 17 | ... 18 | ) 19 | } 20 | \arguments{ 21 | \item{consensus}{A data frame with the consensus information.} 22 | 23 | \item{phylo}{A phylo object with a phylogenetic tree.} 24 | 25 | \item{consensusPhylo}{A phylo object with a phylogenetic consensus tree.} 26 | 27 | \item{graph}{A graph object with a graph made from the umap data.} 28 | } 29 | \value{ 30 | A CopyKit class object. 31 | } 32 | \description{ 33 | S4 Class that extends the Bioconductor SingleCellExperiment class to hold 34 | single cell copy number datasets. 35 | } 36 | \section{Slots}{ 37 | 38 | \describe{ 39 | \item{\code{phylo}}{Stores the single cell phylogenetic information with ape class 40 | phylo.} 41 | 42 | \item{\code{consensusPhylo}}{Stores the consensus phylogenetic information with 43 | ape class phylo.} 44 | 45 | \item{\code{distMat}}{Stores a distance matrix object used for graphs and heatmaps.} 46 | 47 | \item{\code{graph}}{Stores an igraph object for network based clustering.} 48 | 49 | \item{\code{consensus}}{stores a consensus data frame from 50 | \code{\link{calcConsensus}.}} 51 | }} 52 | 53 | \references{ 54 | The Bioconductor SingleCellExperiment Class 55 | DOI: 10.18129/B9.bioc.SingleCellExperiment 56 | } 57 | \keyword{internal} 58 | -------------------------------------------------------------------------------- /man/calcConsensus.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/calcConsensus.R 3 | \name{calcConsensus} 4 | \alias{calcConsensus} 5 | \title{Calculate a consensus matrix of segment means based on \code{colData}} 6 | \usage{ 7 | calcConsensus( 8 | scCNA, 9 | assay = "segment_ratios", 10 | consensus_by = "subclones", 11 | fun = c("median", "mean"), 12 | BPPARAM = bpparam() 13 | ) 14 | } 15 | \arguments{ 16 | \item{scCNA}{The CopyKit object.} 17 | 18 | \item{assay}{String with the name of the assay to pull data from to calculate 19 | the consensus matrix.} 20 | 21 | \item{consensus_by}{A string with the column from colData that will be used 22 | to isolate the cells by factor and calculate the consensus.} 23 | 24 | \item{fun}{A string indicating the summarizing function to be used.} 25 | 26 | \item{BPPARAM}{A \linkS4class{BiocParallelParam} specifying how the function 27 | should be parallelized.} 28 | } 29 | \value{ 30 | A consensus matrix stored in the consensus slot of the CopyKit object 31 | } 32 | \description{ 33 | Calculate a consensus matrix of segment means based on \code{colData} 34 | } 35 | \details{ 36 | Consensus profiles are calculated by averaging or taking the median 37 | of the ith segment mean of all single cells assigned to the same element of 38 | \link{colData}, 39 | } 40 | \examples{ 41 | copykit_obj <- copykit_example_filtered() 42 | copykit_obj <- findClusters(copykit_obj) 43 | copykit_obj <- calcConsensus(copykit_obj) 44 | } 45 | -------------------------------------------------------------------------------- /man/calcInteger.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/calcInteger.R 3 | \name{calcInteger} 4 | \alias{calcInteger} 5 | \title{calcInteger()} 6 | \usage{ 7 | calcInteger( 8 | scCNA, 9 | assay = c("bincounts", "smoothed_bincounts", "segment_ratios"), 10 | method = "fixed", 11 | ploidy_value = NULL, 12 | name = "integer", 13 | penalty = 25, 14 | BPPARAM = bpparam() 15 | ) 16 | } 17 | \arguments{ 18 | \item{scCNA}{The CopyKit object.} 19 | 20 | \item{assay}{String with the name of the assay to pull data from to calculate 21 | integers.} 22 | 23 | \item{method}{Method used to scale the ratio values to integer.} 24 | 25 | \item{ploidy_value}{If method of choice is 'fixed' a ploidy value should be 26 | provided.} 27 | 28 | \item{name}{String specifying the name to be used to store the result in the 29 | reducedDims of the output.} 30 | 31 | \item{penalty}{An integer passed on to scquantum::ploidy.inference() 32 | penalty argument} 33 | 34 | \item{BPPARAM}{A \linkS4class{BiocParallelParam} specifying how the function 35 | should be parallelized.} 36 | } 37 | \value{ 38 | The CopyKit object with an assay slot named 'integer' that contains 39 | a data frame with cells as columns and integerized segments as rows. And, in 40 | case of method = 'scquantum' CopyKit adds three new elements to \code{colData} 41 | named 'ploidy' and 'ploidy_score' and the 'confidence ratio' obtained from 42 | scquantum for each cell. 43 | } 44 | \description{ 45 | Calculates the integer copy number profile for each single cell 46 | } 47 | \details{ 48 | CopyKit support the following methods for calculating integer copy number 49 | matrices 50 | \itemize{ 51 | \item{fixed:} When method argument is set to 'fixed' copykit extracts the 52 | segment means from the scCNA object and multiplies those means by the value 53 | provided in the argument ploidy_value. 54 | 55 | 56 | \item{scquantum:} When the method argument is set to 'scquantum', CopyKit 57 | applies \code{\link[scquantum]{ploidy.inference}} function to perform a 58 | sample wise calculation returning the estimated compuational ploidy for 59 | every single cell 60 | } 61 | } 62 | \examples{ 63 | copykit_obj <- mock_bincounts(ncells_diploid = 0, ncells = 10) 64 | copykit_obj <- calcInteger(copykit_obj, method = "scquantum") 65 | } 66 | -------------------------------------------------------------------------------- /man/calcRatios.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/calcRatios.R 3 | \name{calcRatios} 4 | \alias{calcRatios} 5 | \title{Calculates the ratios from a matrix of counts} 6 | \usage{ 7 | calcRatios( 8 | scCNA, 9 | assay = c("ft", "bincounts", "smoothed_bincounts"), 10 | fun = c("mean", "median") 11 | ) 12 | } 13 | \arguments{ 14 | \item{scCNA}{The scCNA object} 15 | 16 | \item{assay}{String with the name of the assay to pull data from to calculate 17 | the ratios.} 18 | 19 | \item{fun}{A string indicating the summarizing function to be used.} 20 | } 21 | \value{ 22 | A ratio matrix within the slot assay(scCNA, 'ratios') 23 | can be accessed with \code{ratios}. 24 | } 25 | \description{ 26 | Calculates the ratios from a matrix of counts 27 | } 28 | \details{ 29 | Calculates a sample-wise normalization of the selected assay by the 30 | mean bin counts returns ratios where a value of 1 corresponds to the neutral 31 | copy number state of the sample 32 | } 33 | \examples{ 34 | copykit_obj <- mock_bincounts() 35 | copykit_obj <- calcRatios(copykit_obj) 36 | } 37 | -------------------------------------------------------------------------------- /man/data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R, R/internals.R 3 | \docType{data} 4 | \name{hg38_grangeslist} 5 | \alias{hg38_grangeslist} 6 | \alias{hg19_rg} 7 | \alias{copykit_obj_rle} 8 | \alias{copykit_obj_filt_rle} 9 | \alias{copykit_obj_filt_umap} 10 | \alias{hg19_genes} 11 | \alias{hg38_genes} 12 | \alias{copykit_example} 13 | \alias{copykit_example_filtered} 14 | \alias{mock_bincounts} 15 | \title{hg38_grangeslist} 16 | \format{ 17 | An object of class \code{CompressedGRangesList} of length 8. 18 | 19 | An object of class \code{CompressedGRangesList} of length 8. 20 | 21 | An object of class \code{list} of length 790. 22 | 23 | An object of class \code{list} of length 1502. 24 | 25 | An object of class \code{matrix} (inherits from \code{array}) with 790 rows and 2 columns. 26 | 27 | An object of class \code{GRanges} of length 23056. 28 | 29 | An object of class \code{GRanges} of length 30909. 30 | } 31 | \source{ 32 | library(TxDb.Hsapiens.UCSC.hg19.knownGene) 33 | 34 | library(TxDb.Hsapiens.UCSC.hg38.knownGene) 35 | } 36 | \usage{ 37 | hg38_grangeslist 38 | 39 | hg38_grangeslist 40 | 41 | copykit_obj_filt_rle 42 | 43 | copykit_obj_rle 44 | 45 | copykit_obj_filt_umap 46 | 47 | hg19_genes 48 | 49 | hg38_genes 50 | 51 | copykit_example() 52 | 53 | copykit_example_filtered() 54 | 55 | mock_bincounts( 56 | ncells = 30, 57 | ncells_diploid = 5, 58 | position_gain = 4900:5493, 59 | position_del = 6523:7056, 60 | genome = "hg38", 61 | resolution = "220kb", 62 | run_vst = TRUE, 63 | run_segmentation = TRUE, 64 | run_lognorm = TRUE 65 | ) 66 | } 67 | \arguments{ 68 | \item{ncells}{A numeric with the total number of cells to simulate.} 69 | 70 | \item{ncells_diploid}{A numeric with the number of diploid cells to simulate} 71 | 72 | \item{position_gain}{A vector with the index of the bin counts in which 73 | chromosomal gains will be added.} 74 | 75 | \item{position_del}{A vector with the index of the bin counts in which 76 | chromosomal deletions will be placed.} 77 | 78 | \item{genome}{The assembly genome information to add to the metadata.} 79 | 80 | \item{resolution}{The resolution of the scaffold to add to the object 81 | metadata} 82 | } 83 | \value{ 84 | Contains a GrangesList object with the scaffolds for each of the 85 | resolutions used by runVarbin, runCountReads and runSegmentation on the 86 | hg38 genome assembly. 87 | 88 | Contains a GrangesList object with the scaffolds for each of the 89 | resolutions used by runVarbin, runCountReads and runSegmentation on the hg19 90 | genome assembly. 91 | 92 | A CopyKit object with data from BL1 sample from the CopyKit 93 | manuscript. 94 | 95 | A CopyKit object with data from BL1 sample from the CopyKit 96 | manuscript with diploid and noise cells removed. 97 | 98 | A CopyKit object with simulated bincounts 99 | } 100 | \description{ 101 | hg38_grangeslist 102 | 103 | hg19_rg 104 | 105 | copykit_obj_rle 106 | 107 | copykit_obj_filt_rle 108 | 109 | copykit_obj_filt_umap 110 | 111 | hg19_genes 112 | 113 | hg38_genes 114 | 115 | CopyKit Example 116 | 117 | CopyKit Example 118 | 119 | mock_bincounts 120 | } 121 | \details{ 122 | Contains a Rle object with the segment ratios values of the copykit 123 | BL1 example dataset. It is used by the functions copykit_example 124 | 125 | Contains a Rle object with the segment ratios values of the copykit 126 | BL1 example filtered dataset. It is used by the functions 127 | copykit_example_filtered() 128 | 129 | Contains the umap reduced dimension for the BL1 dataset as generated 130 | for the CopyKit manuscript 131 | 132 | Contains the GrangesObject for the genomic positions of genes in the 133 | hg19 genome assembly 134 | 135 | Contains the GrangesObject for the genomic positions of genes in the 136 | hg38 genome assembly 137 | 138 | This function is largely used for examples of CopyKit functions. 139 | 140 | This function returns a CopyKit object with the unfiltered dataset 141 | of the sample BL1. This sample was processed with ACT and is a liver 142 | metastasis from a primary breast cancer patient. 143 | 144 | The CopyKit object contain only the segment ratio mean of each single cell 145 | and, therefore, is not compatible with the functions from the runVarbin 146 | module. 147 | 148 | This function is largely used for examples of CopyKit functions. 149 | 150 | This function returns a CopyKit object with the unfiltered dataset 151 | of the sample BL1. This sample was processed with ACT and is a liver 152 | metastasis from a primary breast cancer patient. 153 | 154 | The CopyKit object contain only the segment ratio mean of each single cell 155 | and, therefore, is not compatible with the functions from the runVarbin 156 | module. 157 | } 158 | \keyword{internal} 159 | -------------------------------------------------------------------------------- /man/dot-countBreakpoints.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/countBreakpoints.R 3 | \name{.countBreakpoints} 4 | \alias{.countBreakpoints} 5 | \title{countBreakpoints} 6 | \usage{ 7 | .countBreakpoints(scCNA) 8 | } 9 | \arguments{ 10 | \item{scCNA}{} 11 | } 12 | \value{ 13 | The scCNA object with a column of breakpoint counts added to colData. 14 | } 15 | \description{ 16 | Considers changes in the segment ratios as breakpoints. 17 | Counts the breakpoints for each chromosome arm separately. 18 | } 19 | \examples{ 20 | copykit_obj <- copykit_example_filtered() 21 | copykit_obj <- .countBreakpoints(copykit_obj) 22 | } 23 | \keyword{internal} 24 | -------------------------------------------------------------------------------- /man/findAneuploidCells.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/findAneuploidCells.R 3 | \name{findAneuploidCells} 4 | \alias{findAneuploidCells} 5 | \title{findAneuploidCells} 6 | \usage{ 7 | findAneuploidCells( 8 | scCNA, 9 | assay = "segment_ratios", 10 | resolution = "auto", 11 | remove_XY = TRUE, 12 | simul = TRUE, 13 | seed = 17 14 | ) 15 | } 16 | \arguments{ 17 | \item{scCNA}{The CopyKit object} 18 | 19 | \item{assay}{String with the name of the assay to pull data from to find 20 | normal cells.} 21 | 22 | \item{resolution}{A numeric scalar used as threshold to detect normal cells.} 23 | 24 | \item{remove_XY}{A boolean that removes chrX and chrY from the analysis.} 25 | 26 | \item{simul}{A boolean that if TRUE adds a simulated normal dataset to boost 27 | identifying normal cells in datasets with small proportions of normal cells.} 28 | 29 | \item{seed}{Seed passed on to reproduce simulated CV of normal cells.} 30 | } 31 | \value{ 32 | information is added to \code{\link[SummarizedExperiment]{colData}} 33 | in a columns named 'is_aneuploid' being TRUE if a cell is detected as 34 | aneuploid and FALSE if the cell is detected as euploid. 35 | } 36 | \description{ 37 | Find cells that are not aneuploid in the dataset. 38 | } 39 | \details{ 40 | performs a sample-wise calculation of the segment means coefficient 41 | of variation and fits a Gaussian mixture model to the observed distribution 42 | from all cells. To increase the sensitivity of the model, the expected 43 | distribution of the coefficient of variation for diploid cells is simulated 44 | for a thousand cells (mean = 0, sd = 0.01). This way, CopyKit can adequately 45 | detect diploid cells even in datasets with limited amounts of diploid cells 46 | and guarantees that no aneuploid cell will be removed from datasets without 47 | any diploid cells. The distribution with the smallest CV 48 | is assumed originate from normal cells. Cells are classified as diploid 49 | if they have a coefficient of variance smaller than the mean plus five times 50 | the standard deviation of the normal cell distribution. 51 | } 52 | \examples{ 53 | set.seed(1000) 54 | copykit_obj <- copykit_example()[,sample(500)] 55 | copykit_obj <- findAneuploidCells(copykit_obj) 56 | } 57 | -------------------------------------------------------------------------------- /man/findClusters.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/findClusters.R 3 | \name{findClusters} 4 | \alias{findClusters} 5 | \title{Find Clusters} 6 | \usage{ 7 | findClusters( 8 | scCNA, 9 | embedding = "umap", 10 | ncomponents = 2, 11 | method = c("hdbscan", "leiden", "louvain"), 12 | k_superclones = NULL, 13 | k_subclones = NULL, 14 | seed = 17 15 | ) 16 | } 17 | \arguments{ 18 | \item{scCNA}{scCNA object.} 19 | 20 | \item{embedding}{String with the name of the reducedDim to pull data from.} 21 | 22 | \item{ncomponents}{An integer with the number of components dimensions to 23 | use from the embedding.} 24 | 25 | \item{method}{A string with method used for clustering.} 26 | 27 | \item{k_superclones}{A numeric k-nearest-neighbor value. 28 | Used to find the superclones.} 29 | 30 | \item{k_subclones}{A numeric k-nearest-neighbor value. 31 | Used to find the subclones} 32 | 33 | \item{seed}{A numeric passed on to pseudo-random dependent functions.} 34 | } 35 | \value{ 36 | Cluster information is added to 37 | \code{\link[SummarizedExperiment]{colData}} in columns superclones or 38 | subclones. Superclones are prefixed by 's' whereas subclones are prefixed 39 | by 'c'. 40 | } 41 | \description{ 42 | Search for clusters in the scCNA data. 43 | } 44 | \details{ 45 | \code{findClusters} uses the reduced dimensional embedding resulting 46 | from \code{\link{runUmap}} to perform clustering at two levels, hereby 47 | referred to as superclones, and subclones. When clustering for superclones 48 | findClusters creates a graph representation of the dataset reduced 49 | dimension embedding using a shared nearest neighbor algorithm 50 | (SNN) \code{\link[bluster]{makeSNNGraph}}, from this graph the connected 51 | components are extracted and generally represent high-level structures 52 | that share large, lineage defining copy number events. At a more 53 | fine-grained resolution, CopyKit can also be used to detect subclones, 54 | i. e. groups of cells containing a unique copy number event per cluster, 55 | to do so the umap embedding is again used as the pre-processing step, 56 | this time to perform a density-based clustering with hdbscan 57 | \code{\link[dbscan]{hdbscan}}. Network clustering 58 | algorithms on top of the SNN graph such as the leiden algorithm 59 | \code{\link[leidenbase]{leiden_find_partition}}. 60 | 61 | \itemize{ 62 | \item{hdbscan}: hdbscan is an outlier aware clustering algorithm, since 63 | extensive filtering of the dataset can be applied before clustering with 64 | \code{\link{findOutliers}}, any cell classified as an outlier is inferred 65 | to the same cluster group as its closest, non-outlier, nearest-neighbor 66 | according to Euclidean distance. 67 | } 68 | } 69 | \examples{ 70 | copykit_obj <- copykit_example_filtered() 71 | copykit_obj <- findClusters(copykit_obj) 72 | } 73 | \references{ 74 | Laks, E., McPherson, A., Zahn, H., et al. (2019). Clonal 75 | Decomposition and DNA Replication States Defined by Scaled Single-Cell 76 | Genome Sequencing. Cell, 179(5), 1207–1221.e22. 77 | https://doi.org/10.1016/j.cell.2019.10.026 78 | 79 | Leland McInnes and John Healy and James Melville. UMAP: Uniform Manifold 80 | Approximation and Projection for Dimension Reduction. arXiv:1802.03426 81 | 82 | Lun ATL, McCarthy DJ, Marioni JC (2016). “A step-by-step workflow for 83 | low-level analysis of single-cell RNA-seq data with Bioconductor.” 84 | F1000Res., 5, 2122. doi: 10.12688/f1000research.9501.2. 85 | } 86 | \seealso{ 87 | \code{\link{findSuggestedK}}. 88 | 89 | \code{\link[dbscan]{hdbscan}} For hdbscan clustering. 90 | } 91 | \author{ 92 | Darlan Conterno Minussi 93 | } 94 | -------------------------------------------------------------------------------- /man/findOutliers.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/findOutliers.R 3 | \name{findOutliers} 4 | \alias{findOutliers} 5 | \title{findOutliers()} 6 | \usage{ 7 | findOutliers( 8 | scCNA, 9 | assay = "segment_ratios", 10 | k = 5, 11 | resolution = 0.9, 12 | BPPARAM = BiocParallel::bpparam() 13 | ) 14 | } 15 | \arguments{ 16 | \item{scCNA}{CopyKit object.} 17 | 18 | \item{assay}{String with the name of the assay to pull data.} 19 | 20 | \item{k}{A numeric scalar with the number k-nearest-neighbor cells to 21 | calculate the mean correlation} 22 | 23 | \item{resolution}{A numeric scalar that set's how strict the 24 | correlation cut off will be.} 25 | 26 | \item{BPPARAM}{A \linkS4class{BiocParallelParam} specifying how the function 27 | should be parallelized.} 28 | } 29 | \value{ 30 | Adds a column 'outlier' to 31 | \code{\link[SummarizedExperiment]{colData}}. Cells that pass the filtering 32 | criteria receive the label "kept", whereas cells that do not pass the 33 | filtering criteria receive the label "removed". 34 | } 35 | \description{ 36 | Uses a nearest neighbor approach to find noise copy number profiles within 37 | the segment means. 38 | } 39 | \details{ 40 | \code{findOutliers} To detect low-quality cells, CopyKit calculates 41 | the Pearson correlation matrix of all samples from the segment ratio means. 42 | Next, we calculate a sample-wise mean of the correlation between a sample 43 | and its k-nearest-neighbors. Samples in which the correlation value is lower 44 | than the defined threshold are classified as low-quality cells. 45 | } 46 | \examples{ 47 | set.seed(1000) 48 | copykit_obj <- copykit_example()[,sample(500)] 49 | copykit_obj <- findAneuploidCells(copykit_obj) 50 | copykit_obj <- copykit_obj[, colData(copykit_obj)$is_aneuploid == TRUE] 51 | copykit_obj <- findOutliers(copykit_obj) 52 | } 53 | \author{ 54 | Hua-Jun Wu 55 | 56 | Darlan Conterno Minussi 57 | 58 | Junke Wang 59 | } 60 | -------------------------------------------------------------------------------- /man/findSuggestedK.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/findSuggestedK.R 3 | \name{findSuggestedK} 4 | \alias{findSuggestedK} 5 | \alias{hdbscanCBI} 6 | \alias{leidenCBI} 7 | \alias{louvainCBI} 8 | \title{findSuggestedK} 9 | \usage{ 10 | findSuggestedK( 11 | scCNA, 12 | embedding = "umap", 13 | ncomponents = 2, 14 | k_range = NULL, 15 | method = c("hdbscan", "leiden", "louvain"), 16 | metric = c("median", "mean"), 17 | seed = 17, 18 | B = 200, 19 | BPPARAM = bpparam() 20 | ) 21 | 22 | hdbscanCBI(data, minPts, diss = inherits(data, "dist"), ...) 23 | 24 | leidenCBI(data, k, seed_leid, diss = inherits(data, "dist"), ...) 25 | 26 | louvainCBI(data, k, seed_leid, diss = inherits(data, "dist"), ...) 27 | } 28 | \arguments{ 29 | \item{scCNA}{The CopyKit object.} 30 | 31 | \item{embedding}{String with the name of the reducedDim embedding.} 32 | 33 | \item{ncomponents}{An integer with the number of components dimensions to 34 | use from the embedding.} 35 | 36 | \item{k_range}{A numeric range of values to be tested.} 37 | 38 | \item{method}{A string with the method of clustering to be tested.} 39 | 40 | \item{metric}{A string with the function to summarize the jaccard similarity 41 | value from all clusters.} 42 | 43 | \item{seed}{A numerical scalar with a seed value to be passed on to 44 | \code{\link[uwot]{umap}}.} 45 | 46 | \item{B}{A numeric with the number of bootstrapping iterations passed on to 47 | \code{\link[fpc]{clusterboot}}. Higher values yield better results at a cost 48 | of performance} 49 | 50 | \item{BPPARAM}{A \linkS4class{BiocParallelParam} specifying how the function 51 | should be parallelized.} 52 | } 53 | \value{ 54 | Adds a table with the mean jaccard coefficient of clusters for each 55 | tested k and the suggested k value to be used for clustering to 56 | \code{\link[S4Vectors]{metadata}} 57 | } 58 | \description{ 59 | Performs a grid search over a range of k values to assess cluster stability. 60 | } 61 | \details{ 62 | Performs a grid-search over a range of k values and returns the value 63 | that maximizes the jaccard similarity. Importantly, while this approach does 64 | not guarantee optimal clustering, it provides a guide that maximizes cluster 65 | stability. 66 | 67 | The default tested range is from 7 to the square root of the number of cells 68 | in the scCNA object. If sqrt(n_cells) is smaller than 7 a range of 5 to 15 69 | is tested. 70 | } 71 | \examples{ 72 | set.seed(1000) 73 | copykit_obj <- copykit_example_filtered()[,sample(300)] 74 | copykit_obj <- findSuggestedK(copykit_obj) 75 | } 76 | \references{ 77 | Hennig, C. (2007) Cluster-wise assessment of cluster stability. 78 | Computational Statistics and Data Analysis, 52, 258-271. 79 | 80 | Hennig, C. (2008) Dissolution point and isolation robustness: robustness 81 | criteria for general cluster analysis methods. 82 | Journal of Multivariate Analysis 99, 1154-1176. 83 | } 84 | \seealso{ 85 | \code{\link[fpc]{clusterboot}} 86 | 87 | \code{\link{plotSuggestedK}} 88 | } 89 | \keyword{internal} 90 | -------------------------------------------------------------------------------- /man/findVariableGenes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/findVariableGenes.R 3 | \name{findVariableGenes} 4 | \alias{findVariableGenes} 5 | \title{findVariableGenes} 6 | \usage{ 7 | findVariableGenes(scCNA, genes, assay = "logr", top_n = 50) 8 | } 9 | \arguments{ 10 | \item{scCNA}{scCNA object.} 11 | 12 | \item{genes}{A vector of strings containing the HUGO Symbol for the gene 13 | of interest.} 14 | 15 | \item{assay}{String with the name of the assay to pull data with the copy 16 | number states for each gene.} 17 | 18 | \item{top_n}{A numeric defining how many variable genes will be returned.} 19 | } 20 | \value{ 21 | A string vector with the HUGO genes in decreasing order of importance 22 | stored to the \code{\link[S4Vectors]{metadata}}. 23 | } 24 | \description{ 25 | Find the most variable genes in the dataset. 26 | } 27 | \details{ 28 | \code{findVariableGenes} Runs \code{\link[stats]{prcomp}} to the 29 | copy number states of the genes from the provided gene list and returns 30 | the one that have the largest absolute variance as assesed by the 31 | loadings of the first principal component. 32 | 33 | The resulting list of genes is stored within the metadata of the scCNA 34 | object and can be accessed with \code{\link[S4Vectors]{metadata}}. 35 | } 36 | \examples{ 37 | copykit_obj <- copykit_example_filtered() 38 | copykit_obj <- findVariableGenes(copykit_obj, 39 | genes = c("FHIT", "PTEN", "FOXO1", "BRCA1") 40 | ) 41 | } 42 | -------------------------------------------------------------------------------- /man/inferMrca.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/inferMrca.R 3 | \name{inferMrca} 4 | \alias{inferMrca} 5 | \title{inferMrca} 6 | \usage{ 7 | inferMrca(scCNA, value = 1) 8 | } 9 | \arguments{ 10 | \item{scCNA}{the scCNA object} 11 | 12 | \item{value}{A numeric value used to compare the profiles to infer the mrca, 13 | usually equal to the mean segment ratio of cells (value = 1) or the average 14 | copy number of the cells} 15 | } 16 | \value{ 17 | Returns a numeric vector added to the \code{\link[S4Vectors]{metadata}} 18 | of the scCNA object named `inferred_mrca` 19 | } 20 | \description{ 21 | From the consensus matrix it infers a Most Recent Common Ancestral (MRCA) 22 | across all groups. 23 | } 24 | \details{ 25 | Calculates the MRCA by inferring, for every bin, the value across 26 | all groups that is closest to the number supplied in the argument value. 27 | } 28 | \examples{ 29 | copykit_obj <- copykit_example_filtered()[,1:300] 30 | copykit_obj <- findClusters(copykit_obj) 31 | copykit_obj <- calcConsensus(copykit_obj) 32 | copykit_obj <- inferMrca(copykit_obj) 33 | } 34 | -------------------------------------------------------------------------------- /man/internals.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internals.R 3 | \docType{methods} 4 | \name{CopyKit-internals} 5 | \alias{CopyKit-internals} 6 | \alias{segment_ratios} 7 | \alias{segment_ratios,CopyKit-method} 8 | \alias{ratios} 9 | \alias{ratios,CopyKit-method} 10 | \alias{bincounts} 11 | \alias{bincounts,CopyKit-method} 12 | \alias{consensus} 13 | \alias{consensus,CopyKit-method} 14 | \alias{consensus<-} 15 | \alias{consensus<-,CopyKit-method} 16 | \alias{phylo} 17 | \alias{phylo,CopyKit-method} 18 | \alias{phylo<-} 19 | \alias{phylo<-,CopyKit-method} 20 | \alias{consensusPhylo} 21 | \alias{consensusPhylo,CopyKit-method} 22 | \alias{consensusPhylo<-} 23 | \alias{consensusPhylo<-,CopyKit-method} 24 | \alias{distMat} 25 | \alias{distMat,CopyKit-method} 26 | \alias{distMat<-} 27 | \alias{distMat<-,CopyKit-method} 28 | \alias{graph} 29 | \alias{graph,CopyKit-method} 30 | \alias{graph<-} 31 | \alias{graph<-,CopyKit-method} 32 | \alias{show} 33 | \alias{show,CopyKit-method} 34 | \alias{\%!in\%} 35 | \alias{ocean_balance_hex} 36 | \alias{ocean.balance} 37 | \alias{superclones_pal} 38 | \alias{subclones_pal} 39 | \alias{find_scaffold_genes} 40 | \alias{l2e.normal.sd} 41 | \alias{overdispersion} 42 | \alias{parCor} 43 | \alias{mergeLevels} 44 | \alias{combine.func} 45 | \title{CopyKit internal functions.} 46 | \format{ 47 | An object of class \code{character} of length 29. 48 | } 49 | \usage{ 50 | \S4method{segment_ratios}{CopyKit}(x, withDimnames = TRUE) 51 | 52 | \S4method{ratios}{CopyKit}(x, withDimnames = TRUE) 53 | 54 | \S4method{bincounts}{CopyKit}(x, withDimnames = TRUE) 55 | 56 | \S4method{consensus}{CopyKit}(x, withDimnames = TRUE) 57 | 58 | \S4method{consensus}{CopyKit}(x) <- value 59 | 60 | \S4method{phylo}{CopyKit}(x) 61 | 62 | \S4method{phylo}{CopyKit}(x) <- value 63 | 64 | \S4method{consensusPhylo}{CopyKit}(x) 65 | 66 | \S4method{consensusPhylo}{CopyKit}(x) <- value 67 | 68 | \S4method{distMat}{CopyKit}(x) 69 | 70 | \S4method{distMat}{CopyKit}(x) <- value 71 | 72 | \S4method{graph}{CopyKit}(x) 73 | 74 | \S4method{graph}{CopyKit}(x) <- value 75 | 76 | \S4method{show}{CopyKit}(object) 77 | 78 | x \%!in\% table 79 | 80 | ocean_balance_hex 81 | 82 | ocean.balance(n) 83 | 84 | superclones_pal() 85 | 86 | subclones_pal() 87 | 88 | find_scaffold_genes(scCNA, genes) 89 | 90 | l2e.normal.sd(xs) 91 | 92 | overdispersion(v) 93 | 94 | parCor(x, BPPARAM = BiocParallel::bpparam()) 95 | 96 | mergeLevels( 97 | vecObs, 98 | vecPred, 99 | pv.thres = 1e-04, 100 | ansari.sign = 0.05, 101 | thresMin = 0.05, 102 | thresMax = 0.5, 103 | verbose = 1, 104 | scale = TRUE 105 | ) 106 | 107 | combine.func( 108 | diff, 109 | vecObs, 110 | vecPredNow, 111 | mnNow, 112 | mn1, 113 | mn2, 114 | pv.thres = 1e-04, 115 | thresAbs = 0 116 | ) 117 | } 118 | \arguments{ 119 | \item{x}{CopyKit object.} 120 | } 121 | \value{ 122 | Hexadecimal values for ocean.balance function 123 | 124 | The default colors for heatmaps 125 | 126 | a named vector of default colors for CopyKit superclones. 127 | 128 | a named vector of default colors for CopyKit subclones. 129 | 130 | A data frame with the gene HUGO gene symbol and the position on the 131 | relevant scaffold from the varbin pipeline. 132 | 133 | A numeric vector with least squares sd estimation. 134 | 135 | A numerci vector with the estimation of the index of dispersion, 136 | which is used when estimating standard errors for each segment mean 137 | 138 | A matrix with the pairwise correlation from the segment ratio means. 139 | } 140 | \description{ 141 | This document establish setters and getters to facilitate access 142 | to fields for the CopyKit class object. The functions provided here are 143 | in addition to setters and getters available from the SingleCellExperiment 144 | class 145 | } 146 | \section{Getters}{ 147 | 148 | \describe{ 149 | \item{\code{segment_ratios}:}{Returns a data frame of normalized segment 150 | ratio means.} 151 | \item{\code{ratios}:}{Returns a data frame of normalized ratio means.} 152 | \item{\code{bincounts}:}{Returns a data frame of binned bincounts.} 153 | \item{\code{consensus}:}{Returns a data frame of normalized segment 154 | ratio means for the consensus matrix.} 155 | \item{\code{phylo}:}{Returns a phylo class object with a phylogenetic tree.} 156 | \item{\code{consensusPhylo}:}{Returns a phylo class object with a 157 | phylogenetic tree from the consensus matrix.} 158 | } 159 | } 160 | 161 | \author{ 162 | Alexander Davis 163 | 164 | Junke Wang 165 | } 166 | \keyword{datasets} 167 | \keyword{internal} 168 | -------------------------------------------------------------------------------- /man/knnSmooth.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/knnSmooth.R 3 | \name{knnSmooth} 4 | \alias{knnSmooth} 5 | \title{knnSmooth} 6 | \usage{ 7 | knnSmooth(scCNA, k = 4, BPPARAM = bpparam()) 8 | } 9 | \arguments{ 10 | \item{scCNA}{The CopyKit object.} 11 | 12 | \item{k}{A numeric with the k nearest neighbor value for smoothing} 13 | 14 | \item{BPPARAM}{A \linkS4class{BiocParallelParam} specifying how the function 15 | should be parallelized.} 16 | } 17 | \value{ 18 | The CopyKit object with an assay smoothed_bincounts 19 | } 20 | \description{ 21 | Smooth bincounts based on k nearest neighbors. 22 | } 23 | \details{ 24 | This function uses a k-nearest neighbors approach to smooth cells 25 | raw bincounts. To do so, the k-nearest neighbors are calculated with 26 | \code{\link[BiocNeighbors]{findKNN}}. The bincounts of the k-nearest neighbors 27 | for each cell are tallied and an assay called smoothed_bincounts is added to 28 | \code{\link{assay}}. After, \code{\link{runVst}} and 29 | \code{\link{runSegmentation}}. Are re-run by \code{knnSmooth}. 30 | 31 | This function results in a trade-off for the elimination of noise at the cost 32 | of risk of loss of subclonal structure. To minimize the risk of subclonal 33 | structure loss we recommend using the very small values of k. 34 | 35 | This function should be followed by applying \code{\link{runVst}} and 36 | \code{\link{runSegmentation}} to the CopyKit object. 37 | } 38 | \examples{ 39 | copykit_obj <- mock_bincounts(ncells = 10) 40 | copykit_obj <- runSegmentation(copykit_obj) 41 | copykit_obj <- knnSmooth(copykit_obj) 42 | 43 | 44 | } 45 | \author{ 46 | Darlan Conterno Minussi 47 | 48 | Runmin Wei 49 | } 50 | -------------------------------------------------------------------------------- /man/logNorm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/logNorm.R 3 | \name{logNorm} 4 | \alias{logNorm} 5 | \title{logNorm()} 6 | \usage{ 7 | logNorm( 8 | scCNA, 9 | transform = c("log", "log2", "log10", "log1p"), 10 | assay = "segment_ratios", 11 | name = "logr" 12 | ) 13 | } 14 | \arguments{ 15 | \item{scCNA}{scCNA object.} 16 | 17 | \item{transform}{String specifying the transformation to apply to the selected 18 | assay.} 19 | 20 | \item{assay}{String with the name of the assay to pull data from to run the 21 | segmentation.} 22 | 23 | \item{name}{String with the name for the target slot for the resulting 24 | transformed counts.} 25 | } 26 | \value{ 27 | A data frame with log transformed counts inside the 28 | \code{\link[SummarizedExperiment]{assay}} slot. 29 | } 30 | \description{ 31 | Computes a log transformation of the selected assay 32 | } 33 | \examples{ 34 | copykit_obj <- copykit_example() 35 | copykit_obj <- logNorm(copykit_obj) 36 | } 37 | -------------------------------------------------------------------------------- /man/pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-pipe.R 3 | \name{\%>\%} 4 | \alias{\%>\%} 5 | \title{Pipe operator} 6 | \usage{ 7 | lhs \%>\% rhs 8 | } 9 | \value{ 10 | pipe 11 | } 12 | \description{ 13 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 14 | } 15 | \examples{ 16 | iris \%>\% head() 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/plotAlluvial.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotAlluvial.R 3 | \name{plotAlluvial} 4 | \alias{plotAlluvial} 5 | \title{plotAlluvial()} 6 | \usage{ 7 | plotAlluvial(scCNA, label, label_colors = NULL, min_cells = NULL) 8 | } 9 | \arguments{ 10 | \item{scCNA}{The CopyKit object.} 11 | 12 | \item{label}{A string with two or more elements from \code{\link[SummarizedExperiment]{colData}}.} 13 | 14 | \item{label_colors}{An optional named vector with the colors of each element 15 | from label.} 16 | 17 | \item{min_cells}{An optional numeric to filter stratum that do not reach 18 | the minimum amount of cells.} 19 | } 20 | \value{ 21 | A ggplot object containing an alluvial plot from ggalluvial 22 | } 23 | \description{ 24 | Produces an alluvial plot from character elements of the metadata 25 | } 26 | \examples{ 27 | copykit_obj <- copykit_example_filtered() 28 | copykit_obj <- findClusters(copykit_obj) 29 | colData(copykit_obj)$section <- stringr::str_extract( 30 | colData(copykit_obj)$sample, 31 | "(L[0-9]+L[0-9]+|L[0-9]+)" 32 | ) 33 | plotAlluvial(copykit_obj, label = c("subclones", "section")) 34 | } 35 | -------------------------------------------------------------------------------- /man/plotConsensusLine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotConsensusLine.R 3 | \name{plotConsensusLine} 4 | \alias{plotConsensusLine} 5 | \title{plotConsensusLine} 6 | \usage{ 7 | plotConsensusLine(scCNA) 8 | } 9 | \arguments{ 10 | \item{scCNA}{The scCNA object} 11 | } 12 | \value{ 13 | An interactive plot where different groups 14 | can be selected for easier visualization 15 | } 16 | \description{ 17 | Opens an interactive app showing ratio line plots of the different consensus 18 | groups 19 | } 20 | \examples{ 21 | \dontrun{ 22 | copykit_obj <- copykit_example_filtered() 23 | copykit_obj <- findClusters(copykit_obj) 24 | copykit_obj <- calcConsensus(copykit_obj) 25 | plotConsensusLine(copykit_obj) 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /man/plotFreq.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotFreq.R 3 | \name{plotFreq} 4 | \alias{plotFreq} 5 | \title{plotFreq} 6 | \usage{ 7 | plotFreq( 8 | scCNA, 9 | high_threshold = 1.1, 10 | low_threshold = 0.9, 11 | assay = "segment_ratios", 12 | group = NULL, 13 | geom = c("area", "line"), 14 | BPPARAM = bpparam() 15 | ) 16 | } 17 | \arguments{ 18 | \item{scCNA}{scCNA object.} 19 | 20 | \item{high_threshold}{A numeric with the threshold above which events are 21 | considered amplifications.} 22 | 23 | \item{low_threshold}{A numeric with the threshold below which events are 24 | considered deletions.} 25 | 26 | \item{assay}{String with the name of the assay to pull data from to plot 27 | the frequency plot.} 28 | 29 | \item{group}{A string with the name of the columns from 30 | \code{\link[SummarizedExperiment]{colData}} to separate each frequency plot.} 31 | 32 | \item{geom}{A character with the desired geom} 33 | 34 | \item{BPPARAM}{A \linkS4class{BiocParallelParam} specifying how the function 35 | should be parallelized.} 36 | } 37 | \value{ 38 | A ggplot object with a frequency plot 39 | } 40 | \description{ 41 | plotFreq 42 | } 43 | \details{ 44 | \code{plotFreq} retrieves the data from the desired assay and creates 45 | an event matrix based on the high and low thresholds arguments. Values above 46 | the high threshold will be classified as gains whereas values below are 47 | classified as deletions. The resulting plot is a frequency plot where values 48 | above 0 represent the frequency of gains and values below 0 represent the 49 | frequency of deletions. 50 | 51 | If the argument 'group' is provided the frequency plot will be calculated 52 | separately for each group. Group can be any string column from 53 | \code{\link[SummarizedExperiment]{colData}} 54 | 55 | The following geoms are available: 56 | 57 | \itemize{ 58 | \item{area}: If geom = 'area' an area plot with the frequency is plotted. 59 | If the group argument is provided a different facet will be plotted for each 60 | group 61 | 62 | \item{line}: If geom = 'line' a line plot with the frequency is plotted. 63 | If the group argument lines are overlapped with different colors. 64 | 65 | } 66 | } 67 | \examples{ 68 | set.seed(1000) 69 | copykit_obj <- copykit_example_filtered()[, sample(40)] 70 | plotFreq(copykit_obj) 71 | } 72 | -------------------------------------------------------------------------------- /man/plotGeneCopy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotGeneCopy.R 3 | \name{plotGeneCopy} 4 | \alias{plotGeneCopy} 5 | \title{plotGeneCopy} 6 | \usage{ 7 | plotGeneCopy( 8 | scCNA, 9 | genes, 10 | geom = c("swarm", "barplot", "violin"), 11 | label = NULL, 12 | facet = NULL, 13 | dodge.width = 0, 14 | assay = "segment_ratios" 15 | ) 16 | } 17 | \arguments{ 18 | \item{scCNA}{scCNA object.} 19 | 20 | \item{genes}{A vector of strings containing the HUGO Symbol for the gene 21 | of interest.} 22 | 23 | \item{geom}{A string with the geom for plotting.} 24 | 25 | \item{label}{A string with the name of the column from 26 | \code{\link[SummarizedExperiment]{colData}} to color the points} 27 | 28 | \item{facet}{A string with the name of the column from 29 | \code{\link[SummarizedExperiment]{colData}} to separate the plot into facets.} 30 | 31 | \item{dodge.width}{A numeric that adds dodge between the label categories.} 32 | 33 | \item{assay}{String with the name of the assay for plotting.} 34 | } 35 | \value{ 36 | A ggplot object with a plot of the gene-wise copy number states. 37 | } 38 | \description{ 39 | Visualization for gene-wise copy number states 40 | } 41 | \details{ 42 | plotGeneCopy finds overlaps of the varbin scaffolds genomic ranges 43 | which can be accessed with \code{\link[SummarizedExperiment]{rowRanges}} 44 | with the genes from the assemblies of either hg19 or hg38. The genomic ranges 45 | from hg19 comes from package \code{TxDb.Hsapiens.UCSC.hg19.knownGene} whereas 46 | for hg38 package \code{TxDb.Hsapiens.UCSC.hg38.knownGene}. 47 | 48 | If the argument geom is set to 'barplot' plotGeneCopy calculates gene-wise 49 | frequencies of each copy number state for the selected genes across all of 50 | the cells. Geom 'barplot' can only be used with the argument 51 | assay set to 'integer'. 52 | } 53 | \examples{ 54 | copykit_obj <- copykit_example_filtered() 55 | copykit_obj <- findClusters(copykit_obj) 56 | plotGeneCopy(copykit_obj, genes = c("FHIT", "PTEN", "FOXO1", "BRCA1")) 57 | plotGeneCopy(copykit_obj, 58 | genes = c("FHIT", "PTEN", "FOXO1", "BRCA1"), 59 | label = "subclones" 60 | ) 61 | plotGeneCopy(copykit_obj, 62 | genes = c("FHIT", "PTEN", "FOXO1", "BRCA1"), 63 | label = "subclones", dodge.width = 0.8 64 | ) 65 | } 66 | \author{ 67 | Darlan Conterno Minussi 68 | } 69 | -------------------------------------------------------------------------------- /man/plotHeatmap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotHeatmap.R 3 | \name{plotHeatmap} 4 | \alias{plotHeatmap} 5 | \title{plotHeatmap} 6 | \usage{ 7 | plotHeatmap( 8 | scCNA, 9 | assay = "segment_ratios", 10 | order_cells = NULL, 11 | label = NULL, 12 | label_colors = NULL, 13 | group = NULL, 14 | consensus = FALSE, 15 | rounding_error = FALSE, 16 | genes = NULL, 17 | col = NULL, 18 | row_split = NULL, 19 | use_raster = TRUE, 20 | raster_quality = 2, 21 | n_threads = 1 22 | ) 23 | } 24 | \arguments{ 25 | \item{scCNA}{The CopyKit object.} 26 | 27 | \item{assay}{String with the assay to pull data from to plot heatmap.} 28 | 29 | \item{order_cells}{A string with the desired method to order the cells within} 30 | 31 | \item{label}{A vector with the string names of the columns from 32 | \code{\link[SummarizedExperiment]{colData}} for heatmap annotation.} 33 | 34 | \item{label_colors}{A named list with colors for the label annotation. 35 | Must match label length and have the same names as label} 36 | 37 | \item{group}{with the names of the columns from 38 | \code{\link[SummarizedExperiment]{colData}} to add a barplot with frequency 39 | of the groups to a consensus heatmap.} 40 | 41 | \item{consensus}{A boolean indicating if the consensus heatmap should be 42 | plotted.} 43 | 44 | \item{rounding_error}{A boolean indicating if the rounding error matrix 45 | should be plotted.} 46 | 47 | \item{genes}{A character vector with the HUGO symbol for genes to annotate 48 | on the heatmap.} 49 | 50 | \item{col}{A colorRamp2 vector that controls the color scale of the heatmap. 51 | See \code{\link[ComplexHeatmap]{Heatmap}} or ComplexHeatmap online docs for 52 | help.} 53 | 54 | \item{row_split}{A string with the names of the columns from 55 | \code{\link[SummarizedExperiment]{colData}} to split the heatmap.} 56 | 57 | \item{use_raster}{Whether render the heatmap body as a raster image. 58 | It helps to reduce file size when the matrix is huge. If number of rows or 59 | columns is more than 2000, it is by default turned on. Note if cell_fun is 60 | set, use_raster is enforced to be FALSE. 61 | see \code{\link[ComplexHeatmap]{Heatmap}}.} 62 | 63 | \item{raster_quality}{A value larger than 1. Larger values increase the 64 | file size.} 65 | 66 | \item{n_threads}{Number of threads passed on to \code{runDistMat}.} 67 | } 68 | \value{ 69 | A \code{ComplexHeatmap} object with a heatmap of copy number data 70 | where the columns are the genomic positions and each row is a cell. 71 | } 72 | \description{ 73 | Plots a heatmap of the copy number data. 74 | Each row is a cell and colums represent genomic positions. 75 | } 76 | \details{ 77 | \itemize{ 78 | \item{order_cells}: If order_cells argument is set to 'consensus_tree' 79 | \code{\link{plotHeatmap}} checks for the existence of a consensus matrix. 80 | From the consensus matrix, a minimum evolution tree is built and cells are 81 | ordered following the order of their respective groups from the tree. 82 | If order_cells is 'hclust' cells are ordered according to hierarchical 83 | clustering. 'hclust' calculation can be sped up by changing the parameter 84 | 'n_threads' if you have more threads available to use. If order_cells 85 | is NULL the order of cells will be the same as the current order inside 86 | the CopyKit object (colnames(CopyKit)). 87 | 88 | \item{label}: A vector with the string names of the columns from 89 | \code{\link[SummarizedExperiment]{colData}} for heatmap annotation. The 90 | 'label' argument can take as many columns as desired as argument as long 91 | as they are elements from \code{\link[SummarizedExperiment]{colData}}. 92 | 93 | \item{label_colors}: A named list, list element names must match column 94 | names for \code{\link[SummarizedExperiment]{colData}} and list elements 95 | must match the number of items present in the columns provided in argument 96 | 'label'. For example: to set colors for column 'outlier' containing 97 | elements 'TRUE' or 'FALSE' a valid input would be: 98 | 'list(outlier = c('FALSE' = 'green', 'TRUE' = 'red))'. 99 | Default colors are provided for 'superclones', 'subclones', 100 | 'is_aneuploid', and 'outlier' that can be override with 'label_colors'. 101 | 102 | \item{rounding_error}: Must be used with assay = 'integer'. 103 | \code{plotHeatmap} will access the ploidies into colData(scCNA)$ploidy 104 | that are generated from \code{\link{calcInteger}} and scale rounded integer 105 | values to the segment means. Later this scaled matrix will be subtracted 106 | from the 'integer' assay from \code{\link{calcInteger}} and the resulting 107 | matrix from this subtraction will be plotted. Useful to visualize regions 108 | of high rounding error. Such regions can indicate issues with the ploidy 109 | scaling in use. 110 | 111 | \item{consensus}: If set to TRUE, \code{\link{plotHeatmap}} will search for 112 | the consensus matrix in the slot \code{\link{consensus}} and plot the 113 | resulting matrix. Labels annotations can be added with the argument 'label'. 114 | 115 | } 116 | } 117 | \examples{ 118 | copykit_obj <- copykit_example_filtered() 119 | set.seed(1000) 120 | copykit_obj <- copykit_obj[, sample(200)] 121 | copykit_obj <- findClusters(copykit_obj) 122 | copykit_obj <- calcConsensus(copykit_obj) 123 | copykit_obj <- runConsensusPhylo(copykit_obj) 124 | colData(copykit_obj)$section <- stringr::str_extract( 125 | colData(copykit_obj)$sample, 126 | "(L[0-9]+L[0-9]+|L[0-9]+)" 127 | ) 128 | plotHeatmap(copykit_obj, label = c("section", "subclones")) 129 | } 130 | \references{ 131 | Zuguang Gu, Roland Eils, Matthias Schlesner, Complex heatmaps 132 | reveal patterns and correlations in multidimensional genomic data, 133 | Bioinformatics, Volume 32, Issue 18, 15 September 2016, Pages 2847–2849, 134 | https://doi.org/10.1093/bioinformatics/btw313 135 | } 136 | \seealso{ 137 | \code{\link{calcInteger}}. 138 | } 139 | \author{ 140 | Darlan Conterno Minussi 141 | } 142 | -------------------------------------------------------------------------------- /man/plotMetrics.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotMetrics.R 3 | \name{plotMetrics} 4 | \alias{plotMetrics} 5 | \title{plotMetrics} 6 | \usage{ 7 | plotMetrics(scCNA, metric, label = NULL, dodge.width = 0, ncol = 2) 8 | } 9 | \arguments{ 10 | \item{scCNA}{scCNA object.} 11 | 12 | \item{metric}{A character indicating which elements of \code{colData()} 13 | should be plotted.} 14 | 15 | \item{label}{A character indicating which element of the \code{colData()} to 16 | color the plots.} 17 | 18 | \item{dodge.width}{A numeric that adds dodge between the label categories.} 19 | 20 | \item{ncol}{A Integer specifying the number of columns to be used for the 21 | panels of a multi-facet plot.} 22 | } 23 | \value{ 24 | A ggplot object with swarm plots of the selected metrics. 25 | } 26 | \description{ 27 | Plots swarm plots from selected elements of \code{\link[SummarizedExperiment]{colData}}. 28 | } 29 | \examples{ 30 | copykit_obj <- mock_bincounts() 31 | copykit_obj <- runMetrics(copykit_obj) 32 | } 33 | \author{ 34 | Darlan Conterno Minussi 35 | } 36 | -------------------------------------------------------------------------------- /man/plotPca.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotPca.R 3 | \name{plotPca} 4 | \alias{plotPca} 5 | \title{plotPca} 6 | \usage{ 7 | plotPca(scCNA, embedding = "PCA", label = NULL) 8 | } 9 | \arguments{ 10 | \item{scCNA}{The CopyKit object.} 11 | 12 | \item{embedding}{String with the name of the reducedDim to pull data from.} 13 | 14 | \item{label}{A string with the elements from 15 | \code{\link[SummarizedExperiment]{colData}} to color the umap points.} 16 | } 17 | \value{ 18 | A ggplot object containing the reduced dimensions UMAP plot. 19 | } 20 | \description{ 21 | Plots PCA embedding stored in \code{\link[SingleCellExperiment]{reducedDim}} 22 | slot. 23 | } 24 | \details{ 25 | A reduced dimension representation with UMAP in the slot 26 | \code{\link[SingleCellExperiment]{reducedDim}} from the scCNA object. 27 | 28 | Columns from \code{\link[SummarizedExperiment]{colData}} can 29 | be used as an argument for 'label' to color the points on the plot. 30 | } 31 | \examples{ 32 | set.seed(1000) 33 | copykit_obj <- copykit_example_filtered()[,sample(300)] 34 | copykit_obj <- runPca(copykit_obj) 35 | 36 | plotPca(copykit_obj) 37 | 38 | copykit_obj <- findClusters(copykit_obj) 39 | 40 | plotPca(copykit_obj, label = "subclones") 41 | 42 | } 43 | \author{ 44 | Darlan Conterno Minussi 45 | } 46 | -------------------------------------------------------------------------------- /man/plotPhylo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotPhylo.R 3 | \name{plotPhylo} 4 | \alias{plotPhylo} 5 | \title{plotPhylo()} 6 | \usage{ 7 | plotPhylo( 8 | scCNA, 9 | label = NULL, 10 | label_colors = NULL, 11 | consensus = FALSE, 12 | group = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{scCNA}{scCNA object.} 17 | 18 | \item{label}{A string with the element of 19 | \code{\link[SummarizedExperiment]{colData}}. to annotate the tips of the 20 | tree.} 21 | 22 | \item{label_colors}{A named list with colors for the label annotation. 23 | Must match label length} 24 | 25 | \item{consensus}{A boolean indicating if the consensus phylogenetic tree 26 | should be plotted.} 27 | 28 | \item{group}{A string that if provided will plot the tip labels as pie charts 29 | with the proportions from the provided element from 30 | \code{\link[SummarizedExperiment]{colData}}} 31 | } 32 | \value{ 33 | A ggplot object with a phylogenetic tree visualization. 34 | } 35 | \description{ 36 | Plots a phylogenetic tree. 37 | } 38 | \examples{ 39 | set.seed(1000) 40 | copykit_obj <- copykit_example_filtered()[,sample(100)] 41 | copykit_obj <- findClusters(copykit_obj) 42 | copykit_obj <- runPhylo(copykit_obj) 43 | plotPhylo(copykit_obj, label = "subclones") 44 | } 45 | \author{ 46 | Junke Wang 47 | } 48 | -------------------------------------------------------------------------------- /man/plotRatio.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotRatio.R 3 | \name{plotRatio} 4 | \alias{plotRatio} 5 | \title{Plot ratio} 6 | \usage{ 7 | plotRatio(scCNA, sample_name = NULL) 8 | } 9 | \arguments{ 10 | \item{scCNA}{The CopyKit object.} 11 | 12 | \item{sample_name}{Optional character vector with the name of the sample to 13 | be visualized} 14 | } 15 | \value{ 16 | Opens an app for interactive visualization of the ratio plots where 17 | the desired cell can be selected. If a 'sample_name' is provided, 18 | returns a ggplot object with the ratio plot for the selected cell. 19 | } 20 | \description{ 21 | plotRatio allows for a visualization of the segment ratios together with the 22 | ratios. 23 | } 24 | \details{ 25 | plotRatio will return a ratio plot for the selected cell. If 26 | \code{\link{calcInteger}} was run and an 'integer' assay exists, plotRatio 27 | will color the ratio dots based on the copy number integer state of the 28 | segment automatically. 29 | } 30 | \examples{ 31 | \dontrun{ 32 | copykit_obj <- copykit_example_filtered() 33 | plotRatio(copykit_obj) 34 | } 35 | 36 | } 37 | \author{ 38 | Darlan Conterno Minussi 39 | } 40 | -------------------------------------------------------------------------------- /man/plotScree.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotScree.R 3 | \name{plotScree} 4 | \alias{plotScree} 5 | \title{plotScree} 6 | \usage{ 7 | plotScree(scCNA, ncomponents = 20) 8 | } 9 | \arguments{ 10 | \item{scCNA}{The CopyKit object} 11 | 12 | \item{ncomponents}{Number of principal components to plot.} 13 | } 14 | \value{ 15 | A ggplot object with The variance explained per principal component. 16 | } 17 | \description{ 18 | Plots the variance explained by the different principal components 19 | } 20 | \examples{ 21 | set.seed(1000) 22 | copykit_obj <- copykit_example_filtered()[,sample(400)] 23 | copykit_obj <- runPca(copykit_obj) 24 | plotScree(copykit_obj) 25 | 26 | } 27 | -------------------------------------------------------------------------------- /man/plotSuggestedK.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotSuggestedK.R 3 | \name{plotSuggestedK} 4 | \alias{plotSuggestedK} 5 | \title{plotSuggestedK} 6 | \usage{ 7 | plotSuggestedK(scCNA, geom = c("boxplot", "tile", "dotplot", "scatterplot")) 8 | } 9 | \arguments{ 10 | \item{scCNA}{The scCNA object.} 11 | 12 | \item{geom}{A character with the geom to be used for plotting.} 13 | } 14 | \value{ 15 | A ggplot2 object with the plot of different tested k values and their 16 | jaccard similarity for each subclone 17 | } 18 | \description{ 19 | Uses the information from \code{\link{findSuggestedK}} to plot the values 20 | of jaccard similarity from the tested k range on \code{\link{findSuggestedK}}. 21 | } 22 | \details{ 23 | \code{\link{plotSuggestedK}} access the \code{\link[S4Vectors]{metadata}} 24 | element suggestedK_df that is saved to the scDNA object after running 25 | \code{\link{findSuggestedK}}. The dataframe is used for plotting either a 26 | heatmap, when the argument geom = 'tile', or a dotplot when argument geom = 27 | 'dotplot' or a boxplot when geom = 'boxplot'. 28 | 29 | \itemize{ 30 | \item{geom = 'boxplot':} Plots a boxplot of the jaccard similarities across 31 | all clusters detected in the grid search. The large red points represent 32 | the mean jaccard similarity. 33 | 34 | #' \item{geom = 'tile':} Plots a heatmap of the jaccard similarities across 35 | all clusters detected in the grid search. The filling collors represent the 36 | jaccard similarity value. Rows represent clusters and columns the k value. 37 | 38 | #' \item{geom = 'dotplot':} Plots a dotplot of the jaccard similarities across 39 | all clusters detected in the grid search. Where the size of the dots represent 40 | the jaccard similarity for each assesed k value. 41 | 42 | #' \item{geom = 'scatterplot':} Plots a scatterplot of the jaccard similarity 43 | explained by the number of cells. Points are colored by subclone and lines 44 | represent a linear regression across the points. 45 | 46 | } 47 | } 48 | \examples{ 49 | copykit_obj <- copykit_example_filtered() 50 | copykit_obj <- findSuggestedK(copykit_obj) 51 | plotSuggestedK(copykit_obj) 52 | } 53 | -------------------------------------------------------------------------------- /man/plotUmap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotUmap.R 3 | \name{plotUmap} 4 | \alias{plotUmap} 5 | \title{plotUmap} 6 | \usage{ 7 | plotUmap(scCNA, embedding = "umap", label = NULL) 8 | } 9 | \arguments{ 10 | \item{scCNA}{The CopyKit object.} 11 | 12 | \item{embedding}{String with the name of the reducedDim to pull data from.} 13 | 14 | \item{label}{A string with the elements from 15 | \code{\link[SummarizedExperiment]{colData}} to color the umap points.} 16 | } 17 | \value{ 18 | A ggplot object containing the reduced dimensions UMAP plot. 19 | } 20 | \description{ 21 | Plots UMAP embedding stored in \code{\link[SingleCellExperiment]{reducedDim}} 22 | slot. 23 | } 24 | \details{ 25 | A reduced dimension representation with UMAP in the slot 26 | \code{\link[SingleCellExperiment]{reducedDim}} from the scCNA object. 27 | 28 | Columns from \code{\link[SummarizedExperiment]{colData}} can 29 | be used as an argument for 'label' to color the points on the plot. 30 | } 31 | \examples{ 32 | set.seed(1000) 33 | copykit_obj <- copykit_example_filtered()[,sample(300)] 34 | copykit_obj <- runUmap(copykit_obj) 35 | 36 | plotUmap(copykit_obj) 37 | 38 | copykit_obj <- findClusters(copykit_obj) 39 | 40 | plotUmap(copykit_obj, label = "subclones") 41 | 42 | colData(copykit_obj)$section <- stringr::str_extract( 43 | colData(copykit_obj)$sample, 44 | "(L[0-9]+L[0-9]+|L[0-9]+)" 45 | ) 46 | 47 | plotUmap(copykit_obj, label = c("section")) 48 | } 49 | \author{ 50 | Darlan Conterno Minussi 51 | } 52 | -------------------------------------------------------------------------------- /man/plotVariableGenes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotVariableGenes.R 3 | \name{plotVariableGenes} 4 | \alias{plotVariableGenes} 5 | \title{plotVariableGenes} 6 | \usage{ 7 | plotVariableGenes(scCNA, n = 30) 8 | } 9 | \arguments{ 10 | \item{scCNA}{scCNA object.} 11 | 12 | \item{n}{A numeric defining how many variable genes will be plotted.} 13 | } 14 | \value{ 15 | A ggplot object with a plot of the variable genes detected. 16 | } 17 | \description{ 18 | Visualization for the most variable genes found with \code{findVariableGenes}. 19 | } 20 | \details{ 21 | \code{plotVariableGenes} plots the list of genes that was found 22 | using \code{findVariableGenes}. 23 | } 24 | \examples{ 25 | copykit_obj <- copykit_example_filtered() 26 | copykit_obj <- findVariableGenes(copykit_obj, 27 | genes = c("FHIT", "PTEN", "FOXO1", "BRCA1") 28 | ) 29 | plotVariableGenes(copykit_obj) 30 | } 31 | \seealso{ 32 | \code{\link{findVariableGenes}} 33 | } 34 | -------------------------------------------------------------------------------- /man/runConsensusPhylo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runConsensusPhylo.R 3 | \name{runConsensusPhylo} 4 | \alias{runConsensusPhylo} 5 | \title{runConsensusPhylo} 6 | \usage{ 7 | runConsensusPhylo(scCNA, root = c("mrca", "neutral", "user"), root_user = NULL) 8 | } 9 | \arguments{ 10 | \item{scCNA}{The scCNA object.} 11 | 12 | \item{root}{A string indicating how to root the consensus tree.} 13 | 14 | \item{root_user}{A numeric with the vector to be used as root of the tree if 15 | \code{root} is set to 'user'. Must have the same length as the number of bins 16 | of the genome scaffold.} 17 | } 18 | \value{ 19 | A phylo object with a consensus tree stored in the consensusPhylo slot 20 | of the CopyKit object. 21 | } 22 | \description{ 23 | Runs a minimal evolution tree algorithm for the consensus data frame 24 | } 25 | \examples{ 26 | copykit_obj <- copykit_example_filtered() 27 | copykit_obj <- findClusters(copykit_obj) 28 | copykit_obj <- calcConsensus(copykit_obj) 29 | copykit_obj <- runConsensusPhylo(copykit_obj) 30 | plotPhylo(copykit_obj, consensus = TRUE, label = "subclones") 31 | } 32 | -------------------------------------------------------------------------------- /man/runCountReads.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runCountReads.R 3 | \name{runCountReads} 4 | \alias{runCountReads} 5 | \title{Aligns the reads from the BAM file to the variable binning pipeline.} 6 | \usage{ 7 | runCountReads( 8 | dir, 9 | genome = c("hg38", "hg19"), 10 | resolution = c("220kb", "55kb", "110kb", "195kb", "280kb", "500kb", "1Mb", "2.8Mb"), 11 | remove_Y = FALSE, 12 | min_bincount = 10, 13 | is_paired_end = FALSE, 14 | BPPARAM = bpparam() 15 | ) 16 | } 17 | \arguments{ 18 | \item{dir}{A path for the directory containing BAM files from short-read 19 | sequencing.} 20 | 21 | \item{genome}{Name of the genome assembly. Default: 'hg38'.} 22 | 23 | \item{resolution}{The resolution of the VarBin method. Default: '220kb'.} 24 | 25 | \item{remove_Y}{(default == FALSE) If set to TRUE, removes information from 26 | the chrY from the dataset.} 27 | 28 | \item{min_bincount}{A numerical indicating the minimum mean bin counts a 29 | cell should have to remain in the dataset.} 30 | 31 | \item{is_paired_end}{A boolean indicating if bam files are from single-read 32 | or pair end sequencing.} 33 | 34 | \item{BPPARAM}{A \linkS4class{BiocParallelParam} specifying how the function 35 | should be parallelized.} 36 | } 37 | \value{ 38 | A matrix of bin counts within the scCNA object that can be accessed 39 | with \code{bincounts} 40 | 41 | #' @references 42 | Navin, N., Kendall, J., Troge, J. et al. Tumour evolution inferred by 43 | single-cell sequencing. Nature 472, 90–94 (2011). 44 | https://doi.org/10.1038/nature09807 45 | 46 | Baslan, T., Kendall, J., Ward, B., et al (2015). Optimizing sparse sequencing 47 | of single cells for highly multiplex copy number profiling. 48 | Genome research, 25(5), 714–724. https://doi.org/10.1101/gr.188060.114 49 | } 50 | \description{ 51 | runCountReads performs the variable binning (VarBin) algorithm to a series of 52 | BAM files resulting from short-read sequencing. 53 | } 54 | \details{ 55 | \code{runCountReads} takes as input duplicate marked BAM files from 56 | whole genome sequencing and runs the variable binning pipeline algorithm. 57 | It is important that BAM files are duplicate marked. Briefly, the genome is 58 | split into pre-determined bins. The bin size is controlled by the argument 59 | \code{resolution}. By using VarBin, for a diploid cell, each bin will 60 | receive equal amount of reads, controlling for mappability. 61 | A lowess function is applied to perform GC correction across the bins. 62 | The argument \code{genome} can be set to 'hg38' or 'hg19' to select the 63 | scaffolds genome assembly. The scaffolds are GenomicRanges objects 64 | Information regarding the alignment of the reads to the bins and from the bam 65 | files are stored in the #' \code{\link[SummarizedExperiment]{colData}}. 66 | \code{min_bincount} Indicates the minimum mean bincount a cell must present 67 | to be kept in the dataset. Cells with low bincounts generally present bin 68 | dropouts due to low read count that will be poorly segmented. 69 | } 70 | \examples{ 71 | \dontrun{ 72 | copykit_obj <- runCountReads("/PATH/TO/BAM/FILES") 73 | } 74 | 75 | } 76 | \author{ 77 | Darlan Conterno Minussi 78 | } 79 | -------------------------------------------------------------------------------- /man/runDistMat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runDistMat.R 3 | \name{runDistMat} 4 | \alias{runDistMat} 5 | \title{Run distance matrix calculations} 6 | \usage{ 7 | runDistMat(scCNA, metric = "euclidean", n_threads = 1) 8 | } 9 | \arguments{ 10 | \item{scCNA}{scCNA object.} 11 | 12 | \item{metric}{distance metric passed to calculate the distance matrix.} 13 | 14 | \item{n_threads}{Number of threads used to calculate the distance matrix. 15 | Passed to `amap::Dist`.} 16 | } 17 | \value{ 18 | A distance matrix in the slot \code{distMat} from scCNA object. 19 | Access the distance matrix with: \code{distMat(scCNA, withDimnames = TRUE)} 20 | } 21 | \description{ 22 | Performs distance matrix calculations that can be downstream used for 23 | hierarchical clustering or phylogenetic analysis. Uses \code{amap::Dist()} 24 | in order to parallelize distance calculations. 25 | } 26 | \examples{ 27 | copykit_obj <- copykit_example_filtered()[,1:10] 28 | copykit_obj <- runDistMat(copykit_obj) 29 | } 30 | \author{ 31 | Darlan Conterno Minussi 32 | } 33 | -------------------------------------------------------------------------------- /man/runMetrics.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runMetrics.R 3 | \name{runMetrics} 4 | \alias{runMetrics} 5 | \title{Run metrics} 6 | \usage{ 7 | runMetrics(scCNA, BPPARAM = bpparam()) 8 | } 9 | \arguments{ 10 | \item{scCNA}{scCNA object.} 11 | 12 | \item{BPPARAM}{A \linkS4class{BiocParallelParam} specifying how the function 13 | should be parallelized.} 14 | } 15 | \value{ 16 | Adds columns 'overdispersion' and 'breakpoint_count' to 17 | \code{\link[SummarizedExperiment]{colData}}. 18 | } 19 | \description{ 20 | Calculates the overdispersion and the breakpoint counts for each cell. 21 | } 22 | \details{ 23 | Adds the metrics to the scCNA \code{\link[SummarizedExperiment]{colData}}. 24 | Those metrics can be used for subsetting the data if desired. 25 | results can be visualized with \code{\link{plotMetrics}}. 26 | } 27 | \examples{ 28 | copykit_obj <- mock_bincounts() 29 | copykit_obj <- runMetrics(copykit_obj) 30 | } 31 | \author{ 32 | Darlan Conterno Minussi 33 | } 34 | -------------------------------------------------------------------------------- /man/runPca.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runPca.R 3 | \name{runPca} 4 | \alias{runPca} 5 | \title{runPca()} 6 | \usage{ 7 | runPca(scCNA, assay = "logr", name = "PCA", scale = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{scCNA}{The CopyKit object.} 11 | 12 | \item{assay}{String with the name of the assay to pull data from to make the 13 | embedding.} 14 | 15 | \item{name}{String specifying the name to be used to store the result in the 16 | reducedDims of the output.} 17 | 18 | \item{scale}{A logical value indicating whether the variables should be 19 | scaled to have unit variance before the analysis takes place.} 20 | 21 | \item{...}{Additional parameters passed to \code{\link[stats]{prcomp}}.} 22 | } 23 | \value{ 24 | A reduced dimension representation with pca in the slot 25 | \code{reducedDim} from scCNA object. Access reduced dimensions slot with: 26 | \code{reducedDim(scCNA, 'PCA', withDimnames = FALSE)} 27 | } 28 | \description{ 29 | Creates a pca embedding using the package uwot from the segment ratios values 30 | } 31 | \examples{ 32 | set.seed(1000) 33 | copykit_obj <- copykit_example_filtered()[,sample(100)] 34 | copykit_obj <- runPca(copykit_obj) 35 | } 36 | \author{ 37 | Darlan Conterno Minussi 38 | } 39 | -------------------------------------------------------------------------------- /man/runPhylo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runPhylo.R 3 | \name{runPhylo} 4 | \alias{runPhylo} 5 | \title{Run phylogenetic analysis} 6 | \usage{ 7 | runPhylo( 8 | scCNA, 9 | method = "nj", 10 | metric = "euclidean", 11 | assay = "segment_ratios", 12 | n_threads = parallel::detectCores()/4 13 | ) 14 | } 15 | \arguments{ 16 | \item{scCNA}{scCNA object.} 17 | 18 | \item{method}{Phylogenetic method to be run, currently accepts "nj" (neighbor-joining) and "me" (minimum evolution). Defaults to "nj".} 19 | 20 | \item{metric}{distance metric passed to construct the phylogeny (Defaults to "euclidean").} 21 | 22 | \item{assay}{String with the name of the assay to pull data from to run phylogenetic analysis. Note that only assay named "integer" will be treated as integer.} 23 | 24 | \item{n_threads}{Number of threads used to calculate the distance matrix. Passed to `amap::Dist`} 25 | } 26 | \value{ 27 | A rooted phylogenetic tree object in the slot \code{phylo} from scCNA object. Access phylo slot with: \code{copykit::phylo(scCNA)} 28 | } 29 | \description{ 30 | Performs phylogenetic analysis 31 | } 32 | \examples{ 33 | set.seed(1000) 34 | copykit_obj <- copykit_example_filtered()[,sample(50)] 35 | copykit_obj <- runPhylo(copykit_obj) 36 | } 37 | \author{ 38 | Darlan Conterno Minussi 39 | 40 | Junke Wang 41 | } 42 | -------------------------------------------------------------------------------- /man/runSegmentation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runSegmentation.R 3 | \name{runSegmentation} 4 | \alias{runSegmentation} 5 | \title{Run Segmentation} 6 | \usage{ 7 | runSegmentation( 8 | scCNA, 9 | method = c("CBS", "multipcf"), 10 | seed = 17, 11 | alpha = 1e-05, 12 | merge_levels_alpha = 1e-05, 13 | gamma = 40, 14 | undo.splits = "prune", 15 | name = "segment_ratios", 16 | BPPARAM = bpparam() 17 | ) 18 | } 19 | \arguments{ 20 | \item{scCNA}{The scCNA object} 21 | 22 | \item{method}{A character with the segmentation method of choice.} 23 | 24 | \item{seed}{Numeric. Set seed for CBS permutation reproducibility} 25 | 26 | \item{alpha}{A numeric with the significance levels for the test to accept 27 | change-points for CBS segmentation. See \code{\link[DNAcopy]{segment}}.} 28 | 29 | \item{merge_levels_alpha}{A numeric with the significance levels for the 30 | merge levels test to accept two different segments.} 31 | 32 | \item{gamma}{A numeric passed on to 'multipcf' segmentation. Penalty for each 33 | discontinuity in the curve. \code{\link[copynumber]{multipcf}}.} 34 | 35 | \item{undo.splits}{A character string specifying how change-points are to be 36 | undone, if at all. Default is "none". Other choices are "prune", which uses 37 | a sum of squares criterion, and "sdundo", which undoes splits that are not 38 | at least this many SDs apart. See \code{\link[DNAcopy]{segment}}} 39 | 40 | \item{name}{Character. Target slot for the resulting segment ratios.} 41 | 42 | \item{BPPARAM}{A \linkS4class{BiocParallelParam} specifying how the function 43 | should be parallelized.} 44 | } 45 | \value{ 46 | The segment profile for all cells inside the scCNA object. 47 | } 48 | \description{ 49 | Runs a segmentation algorithm using the ratio data. 50 | } 51 | \details{ 52 | \itemize{ 53 | 54 | \item{CBS:} #' \code{runSegmentation} Fits a piece-wise constant function 55 | to the transformed the smoothed bin counts. Bin counts are smoothed with 56 | \code{\link[DNAcopy]{smooth.CNA}} using the Circular Binary Segmentation 57 | (CBS) algorithm from \code{\link[DNAcopy]{segment}} with default it 58 | applies undo.prune with value of 0.05. 59 | 60 | \item{multipcf:} Performs the joint segmentation from the 61 | \code{copynumber} package using the \code{\link[copynumber]{multipcf}} 62 | function. By fitting piecewise constant curves with common breakpoints 63 | for all samples. 64 | 65 | } 66 | 67 | 68 | The resulting segment means are further refined with MergeLevels to join 69 | adjacent segments with non-significant differences in segmented means. 70 | } 71 | \examples{ 72 | copykit_obj <- mock_bincounts(ncells = 10) 73 | copykit_obj <- runSegmentation(copykit_obj) 74 | } 75 | -------------------------------------------------------------------------------- /man/runUmap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runUmap.R 3 | \name{runUmap} 4 | \alias{runUmap} 5 | \title{Creates UMAP embedding} 6 | \usage{ 7 | runUmap( 8 | scCNA, 9 | assay = "logr", 10 | seed = 17, 11 | min_dist = 0, 12 | n_neighbors = 50, 13 | name = "umap", 14 | ncomponents = 2, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{scCNA}{scCNA object.} 20 | 21 | \item{assay}{String with the name of the assay to pull data from to make the 22 | embedding.} 23 | 24 | \item{seed}{Sets a seed for the pseudorandom number generator.} 25 | 26 | \item{min_dist}{The effective minimum distance between embedded points. 27 | Smaller values will result in a more clustered/clumped embedding where nearby 28 | points on the manifold are drawn closer together, while larger values will 29 | result on a more even dispersal of points. The value should be set relative 30 | to the spread value, which determines the scale at which embedded points 31 | will be spread out. See \code{\link[uwot]{umap}}.} 32 | 33 | \item{n_neighbors}{The size of local neighborhood (in terms of number of 34 | neighboring sample points) used for manifold approximation. 35 | Larger values result in more global views of the manifold, 36 | while smaller values result in more local data being preserved. 37 | In general values should be in the range 2 to 100. 38 | See \code{\link[uwot]{umap}}.} 39 | 40 | \item{name}{String specifying the name to be used to store the result in the 41 | reducedDims of the output.} 42 | 43 | \item{ncomponents}{The dimension of the space to embed into. See 44 | \code{\link[uwot]{umap}}.} 45 | 46 | \item{...}{Additional parameters passed to \code{\link[uwot]{umap}}.} 47 | } 48 | \value{ 49 | A reduced dimension representation with UMAP in the slot 50 | \code{reducedDim} from scCNA object. Access reduced dimensions slot with: 51 | \code{reducedDim(scCNA, 'umap', withDimnames = FALSE)} 52 | } 53 | \description{ 54 | Creates a umap embedding using the package uwot from the segment ratios 55 | values 56 | } 57 | \examples{ 58 | copykit_obj <- copykit_example_filtered() 59 | copykit_obj <- runUmap(copykit_obj) 60 | } 61 | \author{ 62 | Darlan Conterno Minussi 63 | } 64 | -------------------------------------------------------------------------------- /man/runVst.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/runVst.R 3 | \name{runVst} 4 | \alias{runVst} 5 | \title{Variance Stabilizing Transformation} 6 | \usage{ 7 | runVst(scCNA, transformation = c("ft", "log"), assay = "bincounts") 8 | } 9 | \arguments{ 10 | \item{scCNA}{The scCNA object} 11 | 12 | \item{transformation}{A character indicating the variance stabilization 13 | transformation to be performed. See \link{runVst} details.} 14 | 15 | \item{assay}{A character indicating the assay slot to extract the bincounts 16 | for variance stabilization} 17 | } 18 | \value{ 19 | A slot into the scCNA object containing the variance stabilized matrix. 20 | } 21 | \description{ 22 | Performs variance stabilization transformation of the bin counts 23 | } 24 | \details{ 25 | \code{runVst} performs variance stabilization to reduce the overdispersion 26 | from the negative binomial distribution nature of the bin counts and reduce 27 | technical bias. The argument \code{vst} controls the choice of the transformation 28 | allowing either the Freeman-Tukey transformation by using the option 'ft' (recommended) 29 | or a logarithmic transformation with the option 'log'. Using a 'log' transformation 30 | may result in long segmentation times for a few cells with large breakpoint counts. 31 | } 32 | \examples{ 33 | copykit_obj <- mock_bincounts(ncells = 10) 34 | copykit_obj <- runVst(copykit_obj) 35 | } 36 | \references{ 37 | Freeman, M. F.; Tukey, J. W. (1950), "Transformations related to the angular 38 | and the square root", The Annals of Mathematical Statistics, 39 | 21 (4), pp. 607–611, doi:10.1214/aoms/1177729756, JSTOR 2236611 40 | } 41 | -------------------------------------------------------------------------------- /snakemake_pipelines/bowtie2_markdup/README.md: -------------------------------------------------------------------------------- 1 | # Alignment and Mark Duplicates 2 | 3 | This [snakemake](https://snakemake.readthedocs.io/en/stable/) pipeline performs alignment and removal of PCR duplicates from a fastq file. 4 | 5 | # Output 6 | 7 | A sorted `.bam` file for each aligned sampled with duplicates marked. 8 | 9 | # Installation 10 | 11 | Make sure to install and properly run snakemake, clone this repository and run the snakefile. 12 | 13 | # Usage 14 | 15 | All `.fastq` files must be inside a folder called `fastq`. After that run: 16 | 17 | ``` 18 | snakemake --snakefile /PATH/TO/SNAKEFILE/bowtie_markdup.smk --cores 60 19 | ``` 20 | 21 | For paired-end reads, run: 22 | ``` 23 | snakemake --snakefile /PATH/TO/SNAKEFILE/bowtie2_markdup_pairend.smk --cores 60 24 | ``` 25 | 26 | 27 | # Dependencies 28 | 29 | - snakemake 30 | - sambamba 31 | 32 | # Pipeline steps 33 | 34 | This pipelines uses `bowtie2` to align reads to hg38 and `sambamba` to mark duplicates. 35 | 36 | -------------------------------------------------------------------------------- /snakemake_pipelines/bowtie2_markdup/bowtie2_markdup.smk: -------------------------------------------------------------------------------- 1 | samples, = glob_wildcards("fastq/{sample}.fastq.gz") 2 | samtools_path="samtools-1.13/samtools" 3 | 4 | rule all: 5 | input: 6 | expand('marked/{sample}.bam', sample=samples) 7 | 8 | 9 | rule bowtie2: 10 | input: 11 | r1 = "fastq/{sample}.fastq.gz", 12 | output: 13 | temp("mapped/{sample}.bam") 14 | log: 15 | "logs/bowtie2/{sample}.log" 16 | params: 17 | bowtie2_path="bowtie2-2.4.4/bowtie2", 18 | bowtie2_index="Homo_sapiens/UCSC/hg38/Sequence/Bowtie2_2.4.4_Index/hg38", 19 | threads: 8 20 | shell: 21 | "({params.bowtie2_path} -x {params.bowtie2_index} -p {threads} -U {input.r1} | {samtools_path} view -Sb -@ {threads} > {output}) 2> {log}" 22 | 23 | rule sort: 24 | input: 25 | "mapped/{sample}.bam" 26 | output: 27 | temp("sort/{sample}.bam") 28 | threads: 4 29 | shell: 30 | "{samtools_path} sort {input} -@ {threads} -o {output}" 31 | 32 | rule index: 33 | input: 34 | "sort/{sample}.bam" 35 | output: 36 | temp("sort/{sample}.bam.bai") 37 | shell: 38 | "{samtools_path} index {input}" 39 | 40 | rule sambamba_markdup: 41 | input: 42 | "sort/{sample}.bam", 43 | output: 44 | "marked/{sample}.bam" 45 | threads: 4 46 | shell: 47 | "sambamba/sambamba-0.7.0-linux-static markdup -t {threads} {input} {output}" 48 | 49 | -------------------------------------------------------------------------------- /snakemake_pipelines/bowtie2_markdup/bowtie2_markdup_pairend.smk: -------------------------------------------------------------------------------- 1 | (samples,reads,) = glob_wildcards("fastq/{sample}_{read}.fastq.gz") 2 | reads=["R1","R2"] 3 | samples=list(set(samples)) 4 | samtools_path="samtools/samtools-1.13/samtools" 5 | 6 | rule all: 7 | input: 8 | expand('marked/{sample}.bam', sample=samples) 9 | 10 | 11 | rule bowtie2: 12 | input: 13 | r1 = "fastq/{sample}_R1.fastq.gz", 14 | r2 = "fastq/{sample}_R2.fastq.gz" 15 | output: 16 | temp("mapped/{sample}.bam") 17 | log: 18 | "logs/bowtie2/{sample}.log" 19 | params: 20 | bowtie2_path="Bowtie/bowtie2-2.4.2-linux-x86_64/bowtie2", 21 | bowtie2_index="Homo_sapiens/UCSC/hg38/Sequence/Bowtie2_2.3.5.1_Index/hg38", 22 | threads: 8 23 | shell: 24 | "({params.bowtie2_path} -x {params.bowtie2_index} -p {threads} -1 {input.r1} -2 {input.r2} | {samtools_path} view -Sb -@ {threads} > {output}) 2> {log}" 25 | 26 | rule sort: 27 | input: 28 | "mapped/{sample}.bam" 29 | output: 30 | temp("sort/{sample}.bam") 31 | threads: 4 32 | shell: 33 | "{samtools_path} sort {input} -@ {threads} -o {output}" 34 | 35 | rule index: 36 | input: 37 | "sort/{sample}.bam" 38 | output: 39 | temp("sort/{sample}.bam.bai") 40 | shell: 41 | "{samtools_path} index {input}" 42 | 43 | rule sambamba_markdup: 44 | input: 45 | "sort/{sample}.bam", 46 | output: 47 | "marked/{sample}.bam" 48 | threads: 4 49 | shell: 50 | "sambamba/sambamba-0.8.1-linux-amd64-static markdup -t {threads} {input} {output}" 51 | 52 | -------------------------------------------------------------------------------- /snakemake_pipelines/markdup/markdup.smk: -------------------------------------------------------------------------------- 1 | samples, = glob_wildcards("bam/{sample}.bam") 2 | samtools_path="/PATH/TO/SAMTOOLS/" 3 | 4 | rule all: 5 | input: 6 | expand('marked/{sample}.bam', sample=samples) 7 | 8 | rule index: 9 | input: 10 | "bam/{sample}.bam" 11 | output: 12 | temp("bam/{sample}.bam.bai") 13 | shell: 14 | "{samtools_path} index {input}" 15 | 16 | rule sambamba_markdup: 17 | input: 18 | bai="bam/{sample}.bam.bai", 19 | bam="bam/{sample}.bam" 20 | output: 21 | "marked/{sample}.bam" 22 | threads: 4 23 | shell: 24 | "/PATH/TO/sambamba/sambamba-0.7.0-linux-static markdup -t {threads} {input.bam} {output}" 25 | 26 | -------------------------------------------------------------------------------- /tests/spelling.R: -------------------------------------------------------------------------------- 1 | if (requireNamespace("spelling", quietly = TRUE)) { 2 | spelling::spell_check_test( 3 | vignettes = TRUE, error = FALSE, 4 | skip_on_cran = TRUE 5 | ) 6 | } 7 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(copykit) 3 | 4 | test_check("copykit") 5 | -------------------------------------------------------------------------------- /tests/testthat/setup.R: -------------------------------------------------------------------------------- 1 | # Creating a test setup 2 | -------------------------------------------------------------------------------- /tests/testthat/test-dimred-plots.R: -------------------------------------------------------------------------------- 1 | set.seed(1000) 2 | copykit_obj <- copykit_example_filtered()[,sample(200)] 3 | copykit_obj <- findClusters(copykit_obj) 4 | copykit_obj <- runPca(copykit_obj) 5 | 6 | test_that("Testing reduced embedding plots: ", { 7 | # testing UMAP 8 | expect_s3_class(p <- plotUmap(copykit_obj), "ggplot") 9 | expect_s3_class(p <- plotUmap(copykit_obj, label = "subclones"), "ggplot") 10 | 11 | # testing PCA 12 | expect_s3_class(p <- plotPca(copykit_obj), 'ggplot') 13 | expect_s3_class(p <- plotPca(copykit_obj, label = 'subclones'), 'ggplot') 14 | }) 15 | -------------------------------------------------------------------------------- /tests/testthat/test-plotHeatmap.R: -------------------------------------------------------------------------------- 1 | # setup 2 | copykit_obj <- copykit_example_filtered() 3 | set.seed(1000) 4 | copykit_obj <- copykit_obj[, sample(70)] 5 | copykit_obj <- findClusters(copykit_obj) 6 | copykit_obj <- calcConsensus(copykit_obj) 7 | copykit_obj <- runConsensusPhylo(copykit_obj) 8 | 9 | #tests 10 | 11 | test_that("Testing plotting heatmap: ", { 12 | expect_s4_class(ht <- plotHeatmap(copykit_obj), "Heatmap") 13 | 14 | # checking visual parameters 15 | expect_s4_class(ht <- 16 | plotHeatmap(copykit_obj, label = "subclones"), 17 | "Heatmap") 18 | 19 | expect_s4_class(ht <- plotHeatmap(copykit_obj, genes = c("MYC", 20 | "TP53")), 21 | "Heatmap") 22 | }) 23 | -------------------------------------------------------------------------------- /tests/testthat/test-runSegmentation.R: -------------------------------------------------------------------------------- 1 | # setup 2 | copykit_obj_50kb <- mock_bincounts( 3 | ncells = 10, 4 | ncells_diploid = 5, 5 | position_gain = 1:50, 6 | position_del = 200:250, 7 | resolution = "55kb", 8 | run_segmentation = FALSE, 9 | run_lognorm = FALSE 10 | ) 11 | copykit_obj_50kb <- runVst(copykit_obj_50kb) 12 | 13 | copykit_obj_100kb <- mock_bincounts( 14 | ncells = 10, 15 | ncells_diploid = 5, 16 | position_gain = 1:50, 17 | position_del = 200:250, 18 | resolution = "110kb", 19 | run_segmentation = FALSE, 20 | run_lognorm = FALSE 21 | ) 22 | 23 | copykit_obj_175kb <- mock_bincounts( 24 | ncells = 10, 25 | ncells_diploid = 5, 26 | position_gain = 1:50, 27 | position_del = 200:250, 28 | resolution = "195kb", 29 | run_segmentation = FALSE, 30 | run_lognorm = FALSE 31 | ) 32 | 33 | copykit_obj_200kb <- mock_bincounts( 34 | ncells = 10, 35 | ncells_diploid = 5, 36 | position_gain = 1:50, 37 | position_del = 200:250, 38 | resolution = "220kb", 39 | run_segmentation = FALSE, 40 | run_lognorm = FALSE 41 | ) 42 | 43 | copykit_obj_250kb <- mock_bincounts( 44 | ncells = 10, 45 | ncells_diploid = 5, 46 | position_gain = 1:50, 47 | position_del = 200:250, 48 | resolution = "280kb", 49 | run_segmentation = FALSE, 50 | run_lognorm = FALSE 51 | ) 52 | 53 | copykit_obj_500kb <- mock_bincounts( 54 | ncells = 10, 55 | ncells_diploid = 5, 56 | position_gain = 1:50, 57 | position_del = 200:250, 58 | resolution = "500kb", 59 | run_segmentation = FALSE, 60 | run_lognorm = FALSE 61 | ) 62 | 63 | copykit_obj_1Mb <- mock_bincounts( 64 | ncells = 10, 65 | ncells_diploid = 5, 66 | position_gain = 1:50, 67 | position_del = 200:250, 68 | resolution = "1Mb", 69 | run_segmentation = FALSE, 70 | run_lognorm = FALSE 71 | ) 72 | 73 | copykit_obj_2Mb <- mock_bincounts( 74 | ncells = 10, 75 | ncells_diploid = 5, 76 | position_gain = 1:50, 77 | position_del = 200:250, 78 | resolution = "2.8Mb", 79 | run_segmentation = FALSE, 80 | run_lognorm = FALSE 81 | ) 82 | 83 | 84 | # test 85 | test_that("Testing CopyKit runSegmentation for different resolutions: ", { 86 | expect_s4_class(copykit_obj <- runSegmentation(copykit_obj_50kb), "CopyKit") 87 | expect_s4_class(copykit_obj <- runSegmentation(copykit_obj_100kb), "CopyKit") 88 | expect_s4_class(copykit_obj <- runSegmentation(copykit_obj_175kb), "CopyKit") 89 | expect_s4_class(copykit_obj <- runSegmentation(copykit_obj_200kb), "CopyKit") 90 | expect_s4_class(copykit_obj <- runSegmentation(copykit_obj_250kb), "CopyKit") 91 | expect_s4_class(copykit_obj <- runSegmentation(copykit_obj_500kb), "CopyKit") 92 | expect_s4_class(copykit_obj <- runSegmentation(copykit_obj_1Mb), "CopyKit") 93 | expect_s4_class(copykit_obj <- runSegmentation(copykit_obj_2Mb), "CopyKit") 94 | }) 95 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /vignettes/CopyKit_UserGuide.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "CopyKit_UserGuide" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{CopyKit_UserGuide} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | Please go to [https://github.com/navinlabcode/CopyKit-UserGuide](https://github.com/navinlabcode/CopyKit-UserGuide) for the complete user guide. 11 | 12 | ```{r} 13 | sessionInfo() 14 | ``` 15 | 16 | --------------------------------------------------------------------------------