├── .github └── .gitignore ├── vignettes └── .gitignore ├── data ├── variants.rda ├── variants.ref.rda └── submod_13r_3w.rda ├── _pkgdown.yml ├── man ├── figures │ ├── MPTevol.jpg │ └── MPTevol_Structure.jpg ├── write.fasta.Rd ├── set.colors.Rd ├── pipe.Rd ├── plotCNAProfile.Rd ├── tree2timescape.Rd ├── Seg-class.Rd ├── getKaKs.Rd ├── MPTevol-package.Rd ├── readCNAProfile.Rd ├── splitSegment.Rd ├── getClinSites.Rd ├── viewTrees.Rd ├── plotCNAtree.Rd ├── maf2variants.Rd ├── plotVafCluster.Rd ├── calPropDriver.Rd ├── calKaKs.Rd ├── plotMutTree.Rd ├── readMaf.Rd ├── calRoutines.Rd ├── plotCNA.Rd └── inferClonalTrees.Rd ├── docs ├── reference │ ├── figures │ │ ├── MPTevol.jpg │ │ └── MPTevol_Structure.jpg │ ├── Seg-class.html │ ├── set.colors.html │ ├── write.fasta.html │ ├── pipe.html │ ├── getKaKs.html │ ├── plotCNAProfile.html │ ├── MPTevol-package.html │ ├── tree2timescape.html │ ├── readCNAProfile.html │ └── index.html ├── pkgdown.yml ├── deps │ └── data-deps.txt ├── link.svg ├── sitemap.xml ├── pkgdown.js ├── 404.html ├── news │ └── index.html └── authors.html ├── NEWS.md ├── .Rbuildignore ├── .gitignore ├── R ├── readSegment.R ├── utils-pipe.R ├── classS4.R ├── MPTevol-package.R ├── auxiliary.R ├── plotMutTree.R ├── readCNAProfile.R ├── plotCNAProfile.R ├── getClinSites.R ├── maf2variants.R ├── viewTrees.R ├── calPropDriver.R ├── tree2timescape.R ├── plotVafCluster.R ├── readMaf.R ├── validation.R ├── plotCNATree.R ├── calKaKs.R └── splitSegment.R ├── MPTevol.Rproj ├── Makefile ├── inst └── extdata │ ├── meskit.split1.clinical.txt │ ├── meskit.split.clinical.txt │ └── tree_final.dist ├── NAMESPACE ├── DESCRIPTION ├── README.md └── README.Rmd /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | rsconnect 4 | MPTevol_cache 5 | -------------------------------------------------------------------------------- /data/variants.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qingjian1991/MPTevol/HEAD/data/variants.rda -------------------------------------------------------------------------------- /data/variants.ref.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qingjian1991/MPTevol/HEAD/data/variants.ref.rda -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://qingjian1991.github.io/MPTevol/ 2 | template: 3 | bootstrap: 5 4 | 5 | -------------------------------------------------------------------------------- /data/submod_13r_3w.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qingjian1991/MPTevol/HEAD/data/submod_13r_3w.rda -------------------------------------------------------------------------------- /man/figures/MPTevol.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qingjian1991/MPTevol/HEAD/man/figures/MPTevol.jpg -------------------------------------------------------------------------------- /man/figures/MPTevol_Structure.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qingjian1991/MPTevol/HEAD/man/figures/MPTevol_Structure.jpg -------------------------------------------------------------------------------- /docs/reference/figures/MPTevol.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qingjian1991/MPTevol/HEAD/docs/reference/figures/MPTevol.jpg -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # MPTevol 0.0.0.9000 2 | 3 | * Package initialization. 4 | * Added a `NEWS.md` file to track changes to the package. 5 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 2.11.4 2 | pkgdown: 2.0.2 3 | pkgdown_sha: ~ 4 | articles: {} 5 | last_built: 2022-03-29T08:58Z 6 | 7 | -------------------------------------------------------------------------------- /docs/reference/figures/MPTevol_Structure.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qingjian1991/MPTevol/HEAD/docs/reference/figures/MPTevol_Structure.jpg -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^MPTevol\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^LICENSE\.md$ 4 | ^README\.Rmd$ 5 | ^_pkgdown\.yml$ 6 | ^docs$ 7 | ^pkgdown$ 8 | ^Makefile$ 9 | ^Test.R$ 10 | ^documents$ 11 | ^\.github$ 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | analysis 4 | tmp 5 | Met2 6 | Met 7 | medicc 8 | vignettes/Met/ 9 | vignettes/MPTevol_cache 10 | vignettes/rsconnect 11 | documents/Met/ 12 | documents/MPTevol_cache 13 | documents/rsconnect 14 | documents/MPTevol_files 15 | documents/MPTevol.html 16 | Test.R 17 | -------------------------------------------------------------------------------- /docs/deps/data-deps.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /R/readSegment.R: -------------------------------------------------------------------------------- 1 | 2 | #' readSegment 3 | #' 4 | #' Read tab delimited Segment (can be plain text or *.gz compressed) file along with sample information file. 5 | #' 6 | #' 7 | #' 8 | #' @param segFile The segment file 9 | #' @param min.seg.size The smallest size of segments. Default 500. 10 | #' 11 | #' 12 | #' 13 | 14 | #' To Do 15 | -------------------------------------------------------------------------------- /R/utils-pipe.R: -------------------------------------------------------------------------------- 1 | #' Pipe operator 2 | #' 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 4 | #' 5 | #' @name %>% 6 | #' @rdname pipe 7 | #' @keywords internal 8 | #' @export 9 | #' @importFrom magrittr %>% 10 | #' @usage lhs \%>\% rhs 11 | #' @param lhs A value or the magrittr placeholder. 12 | #' @param rhs A function call using the magrittr semantics. 13 | #' @return The result of calling `rhs(lhs)`. 14 | NULL 15 | -------------------------------------------------------------------------------- /man/write.fasta.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/splitSegment.R 3 | \name{write.fasta} 4 | \alias{write.fasta} 5 | \title{write.fasta} 6 | \usage{ 7 | write.fasta( 8 | merge_A, 9 | major = "major", 10 | out.dir = "data", 11 | project.names = "tumor" 12 | ) 13 | } 14 | \arguments{ 15 | \item{project.names}{the project names used in the output.} 16 | } 17 | \description{ 18 | Prepare the formate of MEDICC input. 19 | } 20 | -------------------------------------------------------------------------------- /man/set.colors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/auxiliary.R 3 | \name{set.colors} 4 | \alias{set.colors} 5 | \title{Color setting} 6 | \usage{ 7 | set.colors(n = 36, rev = FALSE, random = FALSE) 8 | } 9 | \arguments{ 10 | \item{n}{number of colors to select. A total of 36 colors are saved.} 11 | 12 | \item{rev}{reverse output the colors.} 13 | 14 | \item{random}{random select number of colors.} 15 | } 16 | \description{ 17 | Color setting 18 | } 19 | -------------------------------------------------------------------------------- /MPTevol.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: XeLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /man/pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-pipe.R 3 | \name{\%>\%} 4 | \alias{\%>\%} 5 | \title{Pipe operator} 6 | \usage{ 7 | lhs \%>\% rhs 8 | } 9 | \arguments{ 10 | \item{lhs}{A value or the magrittr placeholder.} 11 | 12 | \item{rhs}{A function call using the magrittr semantics.} 13 | } 14 | \value{ 15 | The result of calling \code{rhs(lhs)}. 16 | } 17 | \description{ 18 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/plotCNAProfile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotCNAProfile.R 3 | \name{plotCNAProfile} 4 | \alias{plotCNAProfile} 5 | \title{Visualize CNA profile} 6 | \usage{ 7 | plotCNAProfile(cnaqc.list, min_length_show = 1e+05) 8 | } 9 | \arguments{ 10 | \item{cnaqc.list}{cnaqc.list} 11 | 12 | \item{min_length_show}{the minimal length of CNVs to show.} 13 | } 14 | \description{ 15 | This function plots the allele-specific CNAs of multiple-samples. 16 | See \code{\link[=readCNAProfile]{readCNAProfile()}} for examples. 17 | } 18 | -------------------------------------------------------------------------------- /R/classS4.R: -------------------------------------------------------------------------------- 1 | # Define the S4 Class 2 | 3 | #' Segment Class 4 | #' @slot data `data.table` of segment file containing CNA information. 5 | #' @slot sample.inof `data.frame` of sample information per patient. 6 | #' @slot ref.build human reference genome version. Default 'hg19'. Optional: 'hg18' or 'hg38'. 7 | #' @slot allele Indicate whether this is allele-specific CNAs. Default: TRUE. 8 | #' @export 9 | 10 | Segment <- setClass( 11 | Class = "Seg", 12 | slots = c( 13 | data = "data.table", 14 | sample.info = "data.frame", 15 | ref.build = "character", 16 | allele = "character" 17 | ) 18 | ) 19 | -------------------------------------------------------------------------------- /R/MPTevol-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | "_PACKAGE" 3 | 4 | ## usethis namespace: start 5 | #' @importFrom data.table .BY 6 | #' @importFrom data.table .EACHI 7 | #' @importFrom data.table .GRP 8 | #' @importFrom data.table .I 9 | #' @importFrom data.table .N 10 | #' @importFrom data.table .NGRP 11 | #' @importFrom data.table .SD 12 | #' @importFrom data.table := 13 | #' @importFrom data.table data.table 14 | #' @importFrom grDevices dev.off pdf 15 | #' @importFrom methods new 16 | #' @importFrom stats as.dist rnorm setNames 17 | #' @importFrom utils data read.delim read.table write.table 18 | ## usethis namespace: end 19 | NULL 20 | -------------------------------------------------------------------------------- /man/tree2timescape.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tree2timescape.R 3 | \name{tree2timescape} 4 | \alias{tree2timescape} 5 | \title{tree2timescape} 6 | \usage{ 7 | tree2timescape(results, samples = NULL) 8 | } 9 | \arguments{ 10 | \item{results}{the clonal trees that generated by \code{\link[=inferClonalTree]{inferClonalTree()}}.} 11 | 12 | \item{samples}{the samples to show in the fisher plot.} 13 | } 14 | \description{ 15 | This function generates the input of timescape to visual the fisher plot of 16 | clonal evolution by using the results of \code{\link[=inferClonalTree]{inferClonalTree()}}. 17 | } 18 | -------------------------------------------------------------------------------- /man/Seg-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/classS4.R 3 | \docType{class} 4 | \name{Seg-class} 5 | \alias{Seg-class} 6 | \alias{Segment} 7 | \title{Segment Class} 8 | \description{ 9 | Segment Class 10 | } 11 | \section{Slots}{ 12 | 13 | \describe{ 14 | \item{\code{data}}{\code{data.table} of segment file containing CNA information.} 15 | 16 | \item{\code{sample.inof}}{\code{data.frame} of sample information per patient.} 17 | 18 | \item{\code{ref.build}}{human reference genome version. Default 'hg19'. Optional: 'hg18' or 'hg38'.} 19 | 20 | \item{\code{allele}}{Indicate whether this is allele-specific CNAs. Default: TRUE.} 21 | }} 22 | 23 | -------------------------------------------------------------------------------- /man/getKaKs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/calKaKs.R 3 | \name{getKaKs} 4 | \alias{getKaKs} 5 | \title{getKaKs compares Ka/Ks between different groups} 6 | \usage{ 7 | getKaKs(df, vaf.cutoff = 0.05) 8 | } 9 | \arguments{ 10 | \item{df}{data. Six columns are required to calculate the Ka/Ks, 11 | including "Tumor_Sample_Barcode","Chromosome","Start_Position", 12 | "Reference_Allele","Tumor_Seq_Allele2" and "VAF".} 13 | 14 | \item{vaf.cutoff}{VAF cutoff. Removing mutations with low variant allele frequency (VAF).} 15 | } 16 | \description{ 17 | getKaKs compares Ka/Ks between different groups 18 | } 19 | \details{ 20 | The Ka/Ks is calculated by \code{\link[dndscv:dndscv]{dndscv::dndscv()}} 21 | } 22 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # h/t to @jimhester and @yihui for this parse block: 2 | # https://github.com/yihui/knitr/blob/dc5ead7bcfc0ebd2789fe99c527c7d91afb3de4a/Makefile#L1-L4 3 | # Note the portability change as suggested in the manual: 4 | # https://cran.r-project.org/doc/manuals/r-release/R-exts.html#Writing-portable-packages 5 | PKGNAME = `sed -n "s/Package: *\([^ ]*\)/\1/p" DESCRIPTION` 6 | PKGVERS = `sed -n "s/Version: *\([^ ]*\)/\1/p" DESCRIPTION` 7 | 8 | 9 | all: check 10 | 11 | build: install_deps 12 | R CMD build . 13 | 14 | check: build 15 | R CMD check --no-manual $(PKGNAME)_$(PKGVERS).tar.gz 16 | 17 | install_deps: 18 | Rscript \ 19 | -e 'if (!requireNamespace("remotes")) install.packages("remotes")' \ 20 | -e 'remotes::install_deps(dependencies = TRUE)' 21 | 22 | install: build 23 | R CMD INSTALL $(PKGNAME)_$(PKGVERS).tar.gz 24 | 25 | clean: 26 | @rm -rf $(PKGNAME)_$(PKGVERS).tar.gz $(PKGNAME).Rcheck 27 | -------------------------------------------------------------------------------- /man/MPTevol-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MPTevol-package.R 3 | \docType{package} 4 | \name{MPTevol-package} 5 | \alias{MPTevol} 6 | \alias{MPTevol-package} 7 | \title{MPTevol: Clonal Evolutionary History and Metastatic Routines Analysis for Multiple Primary Tumors} 8 | \description{ 9 | Provides a practical computation framework for dissecting the evolution of multiple primary tumors (MPT), reducing analysis complexity with modular design. 10 | } 11 | \seealso{ 12 | Useful links: 13 | \itemize{ 14 | \item \url{https://qingjian1991.github.io/MPTevol/} 15 | } 16 | 17 | } 18 | \author{ 19 | \strong{Maintainer}: Qinjian Chen \email{chenqingjian2010@163.com} (\href{https://orcid.org/xxx}{ORCID}) 20 | 21 | Authors: 22 | \itemize{ 23 | \item Shixiang Wang \email{w_shixiang@163.com} (\href{https://orcid.org/0000-0001-9855-7357}{ORCID}) 24 | } 25 | 26 | } 27 | \keyword{internal} 28 | -------------------------------------------------------------------------------- /man/readCNAProfile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/readCNAProfile.R 3 | \name{readCNAProfile} 4 | \alias{readCNAProfile} 5 | \title{Read CNA Profiles} 6 | \usage{ 7 | readCNAProfile(maf, seg, Patient_ID = NULL, purity = 1, ref = "hg19") 8 | } 9 | \arguments{ 10 | \item{maf}{Maf or MafList object generated by \code{readMaf()} function} 11 | 12 | \item{seg}{seg or seglist.} 13 | 14 | \item{Patient_ID}{Patient_ID: select the specific patients. 15 | IF not indicate, the input is Maf and seg, or the input is MafList and segList.} 16 | 17 | \item{purity}{purity information for each samples.} 18 | 19 | \item{ref}{human reference genome version. Default 'hg19'. Optional: 'hg18' or 'hg38'.} 20 | } 21 | \value{ 22 | cnaqc.list for cnaqc initiation. 23 | } 24 | \description{ 25 | We used a \code{CNAqc} object, containing a set of mutations, CNA calls and tumor purity values. 26 | The CNAqc was used to deal with the allele-specific CNAs. 27 | } 28 | \details{ 29 | This code reads the CNA Profiles for each patient. 30 | The tumor names of maf and seg are required to match each other. 31 | } 32 | -------------------------------------------------------------------------------- /inst/extdata/meskit.split1.clinical.txt: -------------------------------------------------------------------------------- 1 | Tumor_Sample_Barcode Tumor_ID Patient_ID Tumor_Sample_Label 2 | BRCA_1 BRCA BRCA BRCA_1 3 | BRCA_2 BRCA BRCA BRCA_2 4 | BRCA_3 BRCA BRCA BRCA_3 5 | BRCA_4 BRCA BRCA BRCA_4 6 | BRCA_5 BRCA BRCA BRCA_5 7 | LNET_1 LNET LNET LNET_1 8 | LNET_2 LNET LNET LNET_2 9 | LNET_3 LNET LNET LNET_3 10 | LNET_4 LNET LNET LNET_4 11 | LNET_5 LNET LNET LNET_5 12 | READ_1 READ Met1 READ_1 13 | READ_2 READ Met1 READ_2 14 | READ_3 READ Met1 READ_3 15 | READ_4 READ Met1 READ_4 16 | READ_5 READ Met1 READ_5 17 | OvaryLM_1 OvaryLM Met1 OvaryLM_1 18 | OvaryLM_2 OvaryLM Met1 OvaryLM_2 19 | OvaryLM_3 OvaryLM Met1 OvaryLM_3 20 | OvaryLM_4 OvaryLM Met1 OvaryLM_4 21 | OvaryLM_5 OvaryLM Met1 OvaryLM_5 22 | OvaryRM_1 OvaryRM Met1 OvaryRM_1 23 | OvaryRM_2 OvaryRM Met1 OvaryRM_2 24 | OvaryRM_3 OvaryRM Met1 OvaryRM_3 25 | OvaryRM_4 OvaryRM Met1 OvaryRM_4 26 | OvaryRM_5 OvaryRM Met1 OvaryRM_5 27 | OvaryRM_6 OvaryRM Met1 OvaryRM_6 28 | UterusM_1 UterusM Met1 UterusM_1 29 | UterusM_2 UterusM Uterus UterusM_2 30 | UterusM_3 UterusM Met1 UterusM_3 31 | UterusM_4 UterusM Uterus UterusM_4 32 | UterusM_5 UterusM Uterus UterusM_5 33 | UterusM_6 UterusM Uterus UterusM_6 34 | UterusM_7 UterusM Uterus UterusM_7 35 | -------------------------------------------------------------------------------- /inst/extdata/meskit.split.clinical.txt: -------------------------------------------------------------------------------- 1 | Tumor_Sample_Barcode Tumor_ID Patient_ID Tumor_Sample_Label 2 | BRCA_1 BRCA BRCA BRCA_1 3 | BRCA_2 BRCA BRCA BRCA_2 4 | BRCA_3 BRCA BRCA BRCA_3 5 | BRCA_4 BRCA BRCA BRCA_4 6 | BRCA_5 BRCA BRCA BRCA_5 7 | LNET_1 LNET LNET LNET_1 8 | LNET_2 LNET LNET LNET_2 9 | LNET_3 LNET LNET LNET_3 10 | LNET_4 LNET LNET LNET_4 11 | LNET_5 LNET LNET LNET_5 12 | READ_1 READ READ READ_1 13 | READ_2 READ READ READ_2 14 | READ_3 READ READ READ_3 15 | READ_4 READ READ READ_4 16 | READ_5 READ READ READ_5 17 | OvaryLM_1 OvaryLM OvaryLM OvaryLM_1 18 | OvaryLM_2 OvaryLM OvaryLM OvaryLM_2 19 | OvaryLM_3 OvaryLM OvaryLM OvaryLM_3 20 | OvaryLM_4 OvaryLM OvaryLM OvaryLM_4 21 | OvaryLM_5 OvaryLM OvaryLM OvaryLM_5 22 | OvaryRM_1 OvaryRM OvaryRM OvaryRM_1 23 | OvaryRM_2 OvaryRM OvaryRM OvaryRM_2 24 | OvaryRM_3 OvaryRM OvaryRM OvaryRM_3 25 | OvaryRM_4 OvaryRM OvaryRM OvaryRM_4 26 | OvaryRM_5 OvaryRM OvaryRM OvaryRM_5 27 | OvaryRM_6 OvaryRM OvaryRM OvaryRM_6 28 | UterusM_1 UterusM UterusM UterusM_1 29 | UterusM_2 UterusM UterusM UterusM_2 30 | UterusM_3 UterusM UterusM UterusM_3 31 | UterusM_4 UterusM UterusM UterusM_4 32 | UterusM_5 UterusM UterusM UterusM_5 33 | UterusM_6 UterusM UterusM UterusM_6 34 | UterusM_7 UterusM UterusM UterusM_7 35 | -------------------------------------------------------------------------------- /R/auxiliary.R: -------------------------------------------------------------------------------- 1 | #' Color setting 2 | #' 3 | #' @param n number of colors to select. A total of 36 colors are saved. 4 | #' @param rev reverse output the colors. 5 | #' @param random random select number of colors. 6 | #' 7 | #' @export 8 | set.colors <- function(n = 36, rev = FALSE, random = FALSE) { 9 | # set certain colors 10 | colorScale <- c( 11 | "#3C5488FF", "#00A087FF", "#F39B7fFF", 12 | "#8491B4FF", "#E64B35FF", "#4DBBD5FF", 13 | "#E41A1C", "#377EB8", "#7F0000", 14 | "#35978f", "#FC8D62", "#2166ac", 15 | "#E78AC3", "#A6D854", "#FFD92F", 16 | "#E5C494", "#8DD3C7", "#6E016B", 17 | "#BEBADA", "#e08214", "#80B1D3", 18 | "#d6604d", "#ffff99", "#FCCDE5", 19 | "#FF6A5A", "#BC80BD", "#CCEBC5", 20 | "#fb9a99", "#B6646A", "#9F994E", 21 | "#7570B3", "#c51b7d", "#66A61E", 22 | "#E6AB02", "#003c30", "#666666" 23 | ) 24 | 25 | if (random) { 26 | return(colorScale[sample(1:36, n, replace = TRUE)]) 27 | } 28 | 29 | if (n <= 36 & n > 0) { 30 | if (rev) { 31 | colors <- colorScale[36:(36 - n + 1)] 32 | } else { 33 | colors <- colorScale[1:n] 34 | } 35 | } else if (n > 36) { 36 | if (rev) { 37 | colors <- rev(colorScale) 38 | } else { 39 | colors <- colorScale 40 | } 41 | } else { 42 | colors <- character(0) 43 | } 44 | 45 | colors 46 | } 47 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export("%>%") 4 | export(calKaKs) 5 | export(calPropDriver) 6 | export(calRoutines) 7 | export(getClinSites) 8 | export(getKaKs) 9 | export(inferClonalTrees) 10 | export(maf2variants) 11 | export(plotCNA) 12 | export(plotCNAProfile) 13 | export(plotCNAtree) 14 | export(plotMutTree) 15 | export(plotVafCluster) 16 | export(readCNAProfile) 17 | export(readMaf) 18 | export(set.colors) 19 | export(splitSegment) 20 | export(tree2timescape) 21 | export(viewTrees) 22 | exportClasses(Seg) 23 | import(CNAqc) 24 | import(ComplexHeatmap) 25 | import(GenomicRanges) 26 | import(ape) 27 | import(clonevol) 28 | import(ggrepel) 29 | import(ggtree) 30 | import(methods) 31 | import(phangorn) 32 | import(treeio) 33 | importFrom(data.table,":=") 34 | importFrom(data.table,.BY) 35 | importFrom(data.table,.EACHI) 36 | importFrom(data.table,.GRP) 37 | importFrom(data.table,.I) 38 | importFrom(data.table,.N) 39 | importFrom(data.table,.NGRP) 40 | importFrom(data.table,.SD) 41 | importFrom(data.table,data.table) 42 | importFrom(data.table,fread) 43 | importFrom(data.table,setkey) 44 | importFrom(grDevices,dev.off) 45 | importFrom(grDevices,pdf) 46 | importFrom(magrittr,"%>%") 47 | importFrom(methods,new) 48 | importFrom(stats,as.dist) 49 | importFrom(stats,qnorm) 50 | importFrom(stats,rnorm) 51 | importFrom(stats,setNames) 52 | importFrom(utils,data) 53 | importFrom(utils,read.delim) 54 | importFrom(utils,read.table) 55 | importFrom(utils,write.table) 56 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: MPTevol 2 | Title: Clonal Evolutionary History and Metastatic Routines Analysis for 3 | Multiple Primary Tumors 4 | Version: 0.1.0 5 | Authors@R: c( 6 | person("Qinjian", "Chen", , "chenqingjian2010@163.com", role = c("aut", "cre"), 7 | comment = c(ORCID = "xxx")), 8 | person("Shixiang", "Wang", , "w_shixiang@163.com", role = "aut", 9 | comment = c(ORCID = "0000-0001-9855-7357")) 10 | ) 11 | Description: Provides a practical computation framework for dissecting the 12 | evolution of multiple primary tumors (MPT), reducing analysis 13 | complexity with modular design. 14 | License: GPL (>= 3) 15 | Depends: 16 | R (>= 4.0) 17 | Imports: 18 | ape, 19 | clonevol, 20 | CNAqc, 21 | ComplexHeatmap, 22 | data.table, 23 | dndscv, 24 | dplyr, 25 | GenomicRanges, 26 | ggplot2, 27 | ggrepel, 28 | ggtree, 29 | gtools, 30 | magrittr, 31 | MesKit, 32 | methods, 33 | phangorn, 34 | pio, 35 | purrr, 36 | stringr, 37 | tidyr, 38 | treeio 39 | Suggests: 40 | DT, 41 | easypar, 42 | ggpubr, 43 | IRanges, 44 | knitr, 45 | latex2exp, 46 | RColorBrewer, 47 | rmarkdown, 48 | tibble 49 | VignetteBuilder: 50 | knitr 51 | Remotes: 52 | github::caravagnalab/CNAqc, 53 | github::hdng/clonevol, 54 | github::im3sanger/dndscv 55 | biocViews: 56 | Encoding: UTF-8 57 | Roxygen: list(markdown = TRUE, roclets = c("collate", "namespace", "rd")) 58 | RoxygenNote: 7.1.2 59 | URL: https://qingjian1991.github.io/MPTevol/ 60 | -------------------------------------------------------------------------------- /man/splitSegment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/splitSegment.R 3 | \name{splitSegment} 4 | \alias{splitSegment} 5 | \title{Split the segment regions into several parts} 6 | \usage{ 7 | splitSegment( 8 | segfiles, 9 | sampleid, 10 | project.names = "tumor", 11 | out.dir = "data", 12 | N.baf = 30, 13 | cnv_min_length = 1e+05, 14 | max_CNt = 15, 15 | minLength = 1e+05, 16 | maxCNV = 4, 17 | medicc.py = "medicc.py", 18 | python = "python" 19 | ) 20 | } 21 | \arguments{ 22 | \item{segfiles}{The allele-specific copy number alterations files generated by \strong{sequenza}.} 23 | 24 | \item{sampleid}{the corresponding sample ids.} 25 | 26 | \item{project.names}{the project names used in the output.} 27 | 28 | \item{out.dir}{output dir.} 29 | 30 | \item{N.baf}{quality control for the sequenza output.} 31 | 32 | \item{cnv_min_length}{quality control for the sequenza output.} 33 | 34 | \item{max_CNt}{quality control for the sequenza output.} 35 | 36 | \item{minLength}{output control: the min length of CNVs to output.} 37 | 38 | \item{maxCNV}{output control: the max CNV to output. When the raw CNV greater than maxCNV, then its value was set to maxCNV.} 39 | 40 | \item{medicc.py}{the position of meidcc.py.} 41 | 42 | \item{python}{the position of meidcc.py.} 43 | } 44 | \description{ 45 | According to their shared status. 46 | The function first obtains the common shared regions across samples. 47 | The corresponding A allele and B allele are output as the format requirements of MEDICC. 48 | } 49 | \details{ 50 | This function takes the \strong{sequenza} results as the input and outputs 51 | the format requirements of MEDICC. 52 | } 53 | -------------------------------------------------------------------------------- /man/getClinSites.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/getClinSites.R 3 | \name{getClinSites} 4 | \alias{getClinSites} 5 | \title{getClinSites captures the clinical targetable sites of oncokb based on the mutation status} 6 | \usage{ 7 | getClinSites(maf, Patient_ID = NULL) 8 | } 9 | \arguments{ 10 | \item{maf}{Maf or MafList object generated by readMaf function} 11 | 12 | \item{Patient_ID}{Patient_ID. See specific patient.} 13 | } 14 | \description{ 15 | getClinSites captures the clinical targetable sites of oncokb based on the mutation status 16 | } 17 | \details{ 18 | We match the drivers genes between maf files and clinical sites in oncokb. 19 | We only match the gene names, whereas ignoring the cancer types and gene alterations. 20 | The main targertable alterations include gene fusions (like BCR-ABL1 fusion), 21 | Oncogenic mutations, Exon deletions/insertion, Amplifications, 22 | Deletions and Singles-nucleotide mutation (BRAC V600E). 23 | Please mannual check the mutation status. 24 | } 25 | \examples{ 26 | library(MesKit) 27 | data.type <- "split1" 28 | 29 | maf <- readMaf( 30 | mafFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.\%s.mutation.txt", data.type)), 31 | ccfFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.\%s.CCF.txt", data.type)), 32 | clinicalFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.\%s.clinical.txt", data.type)), 33 | refBuild = "hg19", 34 | ccf.conf.level = 0.95 35 | ) 36 | 37 | # see clinical targetable sites 38 | sites <- getClinSites(maf) 39 | 40 | # see one patinets 41 | sites <- getClinSites(maf, Patient_ID = "Breast") 42 | 43 | # View data 44 | DT::datatable(sites) 45 | } 46 | -------------------------------------------------------------------------------- /man/viewTrees.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/viewTrees.R 3 | \name{viewTrees} 4 | \alias{viewTrees} 5 | \title{Visualize the trees} 6 | \usage{ 7 | viewTrees( 8 | phyloTree, 9 | tree.format = "S4", 10 | normal.node = "NORMAL", 11 | group = NULL, 12 | group.colors = NULL, 13 | title = "Cancer", 14 | showBootstrap = TRUE, 15 | hexpand_ratio = 0.3 16 | ) 17 | } 18 | \arguments{ 19 | \item{phyloTree}{phyloTree: The tree is in Parenthetic format.} 20 | 21 | \item{tree.format}{the format of tree, S4 or list. Default is S4.} 22 | 23 | \item{normal.node}{the sample name of normal sample in the tree.} 24 | 25 | \item{group}{a list that used to indicate the sample groups.} 26 | 27 | \item{group.colors}{an array indicates the colors of sample groups.} 28 | 29 | \item{title}{title of the plot.} 30 | 31 | \item{showBootstrap}{whether showing the bootstrap values. Default is TRUE.} 32 | 33 | \item{hexpand_ratio}{hexpand ratio. see \code{\link[ggtree]{hexpand}}} 34 | } 35 | \value{ 36 | a ggtree object 37 | } 38 | \description{ 39 | Visualize the trees 40 | } 41 | \examples{ 42 | # This dist file is the output of MEDICC 43 | dist <- system.file(package = "MPTevol", "extdata", "tree_final.dist") 44 | 45 | # plot CNA trees without colored samples. 46 | plotCNAtree(dist = dist) 47 | 48 | # create a list to indicate the sample groups. 49 | grp <- list( 50 | NORMAL = "NORMAL", 51 | Breast = paste0("Breast_", 1:5), 52 | Coad = paste0("Coad_", 1:5), 53 | Lung = paste0("Lung_", 1:5), 54 | OveryLM = paste0("OveryLM_", 1:5), 55 | OveryRM = paste0("OveryRM_", 1:6), 56 | UterusM = paste0("UterusM_", c(1:7)) 57 | ) 58 | 59 | plotCNAtree(dist = dist, grp = grp) 60 | } 61 | -------------------------------------------------------------------------------- /man/plotCNAtree.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotCNATree.R 3 | \name{plotCNAtree} 4 | \alias{plotCNAtree} 5 | \title{plotCNAtree plots phylogenetic trees of CNAs} 6 | \usage{ 7 | plotCNAtree( 8 | dist, 9 | bootstrap.rep.num = 500, 10 | group = NULL, 11 | group.colors = NULL, 12 | title = "Cancer", 13 | normal.node = "NORMAL", 14 | hexpand_ratio = 0.3 15 | ) 16 | } 17 | \arguments{ 18 | \item{dist}{dist files that generated by MEDICC.} 19 | 20 | \item{bootstrap.rep.num}{number of bootstrap steps.} 21 | 22 | \item{group}{a list that used to indicate the sample groups} 23 | 24 | \item{group.colors}{an array indicates the colors of sample groups.} 25 | 26 | \item{title}{title of the plot.} 27 | 28 | \item{normal.node}{the sample name of normal sample in the tree.} 29 | 30 | \item{hexpand_ratio}{hexpand ratio. see \code{\link[ggtree]{hexpand}}} 31 | } 32 | \description{ 33 | The CNAs trees were constructed by MEDICC. 34 | } 35 | \examples{ 36 | # read samples distances. 37 | # This dist file is the output of MEDICC 38 | dist <- system.file(package = "MPTevol", "extdata", "tree_final.dist") 39 | 40 | # set group information 41 | group <- list( 42 | NORMAL = "NORMAL", 43 | Breast = paste0("Breast_", 1:5), 44 | Coad = paste0("Coad_", 1:5), 45 | Lung = paste0("Lung_", 1:5), 46 | OveryLM = paste0("OveryLM_", 1:5), 47 | OveryRM = paste0("OveryRM_", 1:6), 48 | UterusM = paste0("UterusM_", c(1:7)) 49 | ) 50 | 51 | # set group colors 52 | group.colors <- setNames(set.colors(n = length(group)), nm = names(group)) 53 | 54 | # built trees 55 | tree <- plotCNAtree( 56 | dist = dist, 57 | group = group, 58 | group.colors = group.colors 59 | ) 60 | 61 | tree$plot 62 | } 63 | -------------------------------------------------------------------------------- /man/maf2variants.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/maf2variants.R 3 | \name{maf2variants} 4 | \alias{maf2variants} 5 | \title{maf2variants} 6 | \usage{ 7 | maf2variants(maf, patient.id = NULL, ccf.cutoff = 0.1, extract.VAF = FALSE) 8 | } 9 | \arguments{ 10 | \item{maf}{Maf or MafList object generated by \code{readMaf()} function} 11 | 12 | \item{patient.id}{Select the specific patients. Default \code{NULL}, all patients are included.} 13 | 14 | \item{ccf.cutoff}{Removing low-CCF mutations (default: 0.1).} 15 | 16 | \item{extract.VAF}{Whether extract the VAF information. Default \code{FALSE}: extract CCF rather than VAF.} 17 | } 18 | \description{ 19 | Change the maf object into variants data frame for the subclonal structures analysis. 20 | } 21 | \details{ 22 | This function extracts the \code{Cluster} information from the CCF data. Therefore, the \code{Cluster} column is required in the ccfFile. 23 | 24 | For the output \code{variants}, the first five columns are Mutid, Hugo_Symbol, Variant_Classification, Patient_ID, Cluster. The remaining columns indicate variant cellular prevalence for each sample. 25 | } 26 | \examples{ 27 | #' data.type <- "split1" 28 | 29 | maf1 <- readMaf( 30 | mafFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.\%s.mutation.txt", data.type)), 31 | ccfFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.\%s.CCF.txt", data.type)), 32 | clinicalFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.\%s.clinical.txt", data.type)), 33 | refBuild = "hg19", 34 | ccf.conf.level = 0.95 35 | ) 36 | 37 | ccfs = maf2variants(maf1, patient.id = "Met1") 38 | 39 | #extract VAF rather than CCF. 40 | vafs = maf2variants(maf1, patient.id = "Met1", extract.VAF = T) 41 | 42 | } 43 | -------------------------------------------------------------------------------- /man/plotVafCluster.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotVafCluster.R 3 | \name{plotVafCluster} 4 | \alias{plotVafCluster} 5 | \title{plotVafCluster} 6 | \usage{ 7 | plotVafCluster( 8 | variants, 9 | cluster.col.name = "cluster", 10 | vaf.col.names, 11 | clone.colors = NULL, 12 | violin = FALSE, 13 | box = TRUE, 14 | jitter = TRUE, 15 | founding.cluster = 1, 16 | output.file = NULL, 17 | highlight = NULL, 18 | highlight.note.col.name = NULL 19 | ) 20 | } 21 | \arguments{ 22 | \item{variants}{data frame of the variants. 23 | At least cluster column and VAF or CCF columns are required. 24 | Cluster column should contain cluster identities as continuous integer values 25 | starting from 1.} 26 | 27 | \item{cluster.col.name}{the column names that containing cluster 28 | information (Default = "cluster").} 29 | 30 | \item{vaf.col.names}{the column names of samples containing VAF.} 31 | 32 | \item{clone.colors}{setting clone colors.} 33 | 34 | \item{violin}{whether plotting violin (Default = FALSE).} 35 | 36 | \item{box}{whether plotting box (Default = TRUE).} 37 | 38 | \item{jitter}{whether plotting jitter plot (Default = TRUE).} 39 | 40 | \item{founding.cluster}{the name of founding clones, one of the most important parameters. For most of circumstances, the founding cluster is the cluster with the highest average CCF cluster.} 41 | 42 | \item{output.file}{the output file name (Default = NULL)} 43 | 44 | \item{highlight}{column name to indicate whether highlight the sites (TRUE or FALSE).} 45 | 46 | \item{highlight.note.col.name}{highlight context.} 47 | } 48 | \value{ 49 | a ggplot object 50 | } 51 | \description{ 52 | Plot variant clustering in each sample by using combination of box, 53 | violin and jitter plots. 54 | } 55 | -------------------------------------------------------------------------------- /docs/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | /404.html 5 | 6 | 7 | /authors.html 8 | 9 | 10 | /index.html 11 | 12 | 13 | /LICENSE.html 14 | 15 | 16 | /news/index.html 17 | 18 | 19 | /reference/calKaKs.html 20 | 21 | 22 | /reference/calRoutines.html 23 | 24 | 25 | /reference/getClinSites.html 26 | 27 | 28 | /reference/getKaKs.html 29 | 30 | 31 | /reference/index.html 32 | 33 | 34 | /reference/inferClonalTrees.html 35 | 36 | 37 | /reference/MPTevol-package.html 38 | 39 | 40 | /reference/pipe.html 41 | 42 | 43 | /reference/plotCNAProfile.html 44 | 45 | 46 | /reference/plotCNAtree.html 47 | 48 | 49 | /reference/plotMutTree.html 50 | 51 | 52 | /reference/plotVafCluster.html 53 | 54 | 55 | /reference/readCNAProfile.html 56 | 57 | 58 | /reference/Seg-class.html 59 | 60 | 61 | /reference/set.colors.html 62 | 63 | 64 | /reference/splitSegment.html 65 | 66 | 67 | /reference/tree2timescape.html 68 | 69 | 70 | /reference/viewTrees.html 71 | 72 | 73 | /reference/write.fasta.html 74 | 75 | 76 | -------------------------------------------------------------------------------- /man/calPropDriver.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/calPropDriver.R 3 | \name{calPropDriver} 4 | \alias{calPropDriver} 5 | \title{calPropDriver calculates the proportions of driver mutation for mutation sets.} 6 | \usage{ 7 | calPropDriver( 8 | maf, 9 | patient.id = NULL, 10 | driverGene, 11 | class = "SP", 12 | classByTumor = FALSE, 13 | vaf.cutoff = 0.01, 14 | silent.columns = NULL 15 | ) 16 | } 17 | \arguments{ 18 | \item{maf}{Maf or MafList object generated by \code{readMaf()} function} 19 | 20 | \item{patient.id}{Select the specific patients. Default \code{NULL}, all patients are included.} 21 | 22 | \item{driverGene}{The driver Gene names (Genes Symbols)} 23 | 24 | \item{class}{The class which would be represented. 25 | "SP" (Shared pattern: Public/Shared/Private), other options: "CS" (Clonal status: Clonal/Subclonl) 26 | and "SPCS". see \code{\link[MesKit:classifyMut]{MesKit::classifyMut()}}.} 27 | 28 | \item{classByTumor}{Logical (Default: \code{FALSE}). Classify mutations based on "Tumor_ID".} 29 | 30 | \item{vaf.cutoff}{Removing mutations of low variant allele frequency (VAF).} 31 | 32 | \item{silent.columns}{The \code{Variant_Classification} field in the MAF files that indicates the silent mutations. Defaults: c("Silent", "3'Flank", "IGR", "Intron", "RNA")} 33 | } 34 | \description{ 35 | The mutations are classified by \code{classifyMut()} internally. 36 | } 37 | \examples{ 38 | 39 | # Get the driver gene. 40 | driverGene <- read.delim(system.file(package = "MPTevol", "extdata", "IntOGen-Drivers-Cancer_Genes.tsv"), header = T) \%>\% 41 | filter(CANCER_TYPE \%in\% c("BRCA", "COREAD", "LUAD", "LUSC")) \%>\% 42 | pull(SYMBOL) \%>\% 43 | unique() 44 | 45 | prop = calPropDriver(maf, patient.id = "BRCA", driverGene = driverGene) 46 | 47 | prop$BRCA$plot 48 | 49 | 50 | } 51 | -------------------------------------------------------------------------------- /man/calKaKs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/calKaKs.R 3 | \name{calKaKs} 4 | \alias{calKaKs} 5 | \title{calKaKs calculates the Ka/Ks of each group} 6 | \usage{ 7 | calKaKs( 8 | maf, 9 | patient.id = NULL, 10 | class = "SP", 11 | classByTumor = FALSE, 12 | vaf.cutoff = 0.05, 13 | parallel = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{maf}{Maf or MafList object generated by \code{readMaf()} function} 18 | 19 | \item{patient.id}{Select the specific patients. Default \code{NULL}, all patients are included.} 20 | 21 | \item{class}{The class which would be represented. 22 | "SP" (Shared pattern: Public/Shared/Private), other options: "CS" (Clonal status: Clonal/Subclonl) 23 | and "SPCS". see \code{\link[MesKit:classifyMut]{MesKit::classifyMut()}}.} 24 | 25 | \item{vaf.cutoff}{Removing mutations with low variant allele frequency (VAF).} 26 | 27 | \item{parallel}{If \code{TRUE} (default), run in parallel.} 28 | } 29 | \description{ 30 | The mutations are classified by \code{classifyMut()} internally. 31 | } 32 | \examples{ 33 | library(MesKit) 34 | data.type <- "split1" 35 | 36 | maf <- readMaf( 37 | mafFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.\%s.mutation.txt", data.type)), 38 | ccfFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.\%s.CCF.txt", data.type)), 39 | clinicalFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.\%s.clinical.txt", data.type)), 40 | refBuild = "hg19", 41 | ccf.conf.level = 0.95 42 | ) 43 | 44 | # calKaKas 45 | kaks <- calKaKs(maf, patient.id = "Breast", class = "SP", parallel = TRUE, vaf.cutoff = 0.05) 46 | kaks 47 | kaks <- calKaKs(maf, patient.id = "Breast", class = "CS", parallel = TRUE, vaf.cutoff = 0.05) 48 | kaks 49 | kaks <- calKaKs(maf, class = "SP", parallel = TRUE, vaf.cutoff = 0.05) 50 | kaks 51 | } 52 | -------------------------------------------------------------------------------- /man/plotMutTree.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotMutTree.R 3 | \name{plotMutTree} 4 | \alias{plotMutTree} 5 | \title{plotMutTree plots phylogenetic tree of mutations} 6 | \usage{ 7 | plotMutTree( 8 | maf, 9 | patient.id, 10 | method = "NJ", 11 | min.vaf = 0.02, 12 | bootstrap.rep.num = 500, 13 | group = NULL, 14 | group.colors = NULL, 15 | title = "cancer", 16 | hexpand_ratio = 0.3, 17 | ... 18 | ) 19 | } 20 | \arguments{ 21 | \item{maf}{Maf or MafList object generated by readMaf function.} 22 | 23 | \item{patient.id}{Select the specific patients.} 24 | 25 | \item{method}{Approach to construct phylogenetic trees. 26 | Choose one of "NJ"(Neibor-Joining), "MP"(maximum parsimony), 27 | "ML"(maximum likelihood), "FASTME.ols" or "FASTME.bal".} 28 | 29 | \item{min.vaf}{The minimum value of vaf. Default 0.} 30 | 31 | \item{bootstrap.rep.num}{Bootstrap iterations. Default 500.} 32 | 33 | \item{group}{a list that used to indicate the sample groups} 34 | 35 | \item{group.colors}{an array indicates the colors of sample groups.} 36 | 37 | \item{title}{title of the plot.} 38 | 39 | \item{...}{parameters in the getPhyloTree function.} 40 | 41 | \item{hexpand_ratio:}{hexpand ratio. see \code{\link[ggtree]{hexpand}}} 42 | } 43 | \description{ 44 | plotMutTree plots phylogenetic tree of mutations 45 | } 46 | \examples{ 47 | library(MesKit) 48 | data.type <- "split1" 49 | 50 | maf <- readMaf( 51 | mafFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.\%s.mutation.txt", data.type)), 52 | ccfFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.\%s.CCF.txt", data.type)), 53 | clinicalFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.\%s.clinical.txt", data.type)), 54 | refBuild = "hg19", 55 | ccf.conf.level = 0.95 56 | ) 57 | 58 | # construct a group 59 | group <- list( 60 | Coad = paste0("Coad_", 1:5), 61 | OveryLM = paste0("OveryLM_", 1:5), 62 | OveryRM = paste0("OveryRM_", 1:6), 63 | UterusM = paste0("UterusM_", c(1, 3)) 64 | ) 65 | 66 | plotMutTree(maf, patient.id = "Met1", group = group, title = "CRC Met") 67 | } 68 | -------------------------------------------------------------------------------- /man/readMaf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/readMaf.R 3 | \name{readMaf} 4 | \alias{readMaf} 5 | \title{readMaf} 6 | \usage{ 7 | readMaf( 8 | mafFile, 9 | clinicalFile, 10 | ccfFile = NULL, 11 | adjusted.VAF = FALSE, 12 | nonSyn.vc = NULL, 13 | use.indel.ccf = FALSE, 14 | ccf.conf.level = 0.95, 15 | remove.empty.VAF = TRUE, 16 | refBuild = "hg19" 17 | ) 18 | } 19 | \arguments{ 20 | \item{mafFile}{A tab delimited MAF file (plain text or *.gz compressed). Required.} 21 | 22 | \item{clinicalFile}{A clinical data file includes Tumor_Sample_Barcode, Tumor_ID, Patient_ID. Tumor_Sample_Label is optional. Default NULL.} 23 | 24 | \item{ccfFile}{A CCF file of somatic mutations. Default NULL.} 25 | 26 | \item{adjusted.VAF}{Whether adjusted VAF is included in mafFile. Default FALSE.} 27 | 28 | \item{nonSyn.vc}{List of Variant classifications which are considered as non-silent. Default NULL, use Variant Classifications with "Frame_Shift_Del","Frame_Shift_Ins","Splice_Site","Translation_Start_Site","Nonsense_Mutation","Nonstop_Mutation","In_Frame_Del","In_Frame_Ins","Missense_Mutation"} 29 | 30 | \item{use.indel.ccf}{Whether include indels in ccfFile. Default FALSE.} 31 | 32 | \item{ccf.conf.level}{The confidence level of CCF to identify clonal or subclonal. 33 | Only works when "CCF_std" or "CCF_CI_high" is provided in ccfFile. Default 0.95.} 34 | 35 | \item{remove.empty.VAF}{Whether removing the mutations with VAF=0. When making the comparison of pair-wide CCF, retained mutations with VAF=0.} 36 | 37 | \item{refBuild}{Human reference genome version. Default 'hg19'. Optional: 'hg18' or 'hg38'.} 38 | } 39 | \value{ 40 | an object of Maf or MafList. 41 | } 42 | \description{ 43 | Read tab delimited MAF (can be plain text or *.gz compressed) file along with sample information file. 44 | } 45 | \examples{ 46 | maf.File <- system.file("extdata/", "CRC_HZ.maf", package = "MesKit") 47 | clin.File <- system.file("extdata/", "CRC_HZ.clin.txt", package = "MesKit") 48 | ccf.File <- system.file("extdata/", "CRC_HZ.ccf.tsv", package = "MesKit") 49 | maf <- readMaf(mafFile=maf.File,clinicalFile = clin.File, refBuild="hg19") 50 | maf <- readMaf(mafFile=maf.File, clinicalFile = clin.File, ccfFile=ccf.File, refBuild="hg19") 51 | } 52 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # MPTevol 5 | 6 | 7 | 8 | 9 | Multiple primary tumors (MPT) is a special and rare cancer type, defined 10 | as more than two primary tumors presenting at the diagnosis in a single 11 | patient. The molecular characteristics and tumorigenesis of MPT remain 12 | unclear due to insufficient approaches. 13 | 14 | Here, we present `MPTevol`, a practical computational framework for 15 | comprehensively exploring the MPT from **multiregional sequencing 16 | (MRS)** experiments. `MPTevol` facilitates comparison genomic profiles 17 | across multiple primary tumor samples, detection of clonal evolutionary 18 | history and metastatic routines in MPT, and quantification of metastatic 19 | history. This package incorporates multiple cancer evolution analyses, 20 | for a one-stop solution of MPT analysis.The goal of MPTevol is to 21 | provide a practical computation framework for dissecting the evolution 22 | of multiple primary tumors (MPT), reducing analysis complexity with 23 | modular design. 24 | 25 | ## Package Overview 26 | 27 | ![](man/figures/MPTevol.jpg) 28 | 29 | ## Citations 30 | 31 | If you are using the MPTevol in academic research, please cite our 32 | paper: 33 | 34 | **Chen, Q., Wu, Q.-N., Rong, Y.-M., Wang, S., Zuo, Z., Bai, L., . . . 35 | Zhao, Q. (2022). [Deciphering clonal dynamics and metastatic routines in 36 | a rare patient of synchronous triple-primary tumors and multiple 37 | metastases with MPTevol](https://doi.org/10.1093/bib/bbac175). Briefings 38 | in Bioinformatics. ** 39 | 40 | ## Installation 41 | 42 | You can install the development version of MPTevol from 43 | [GitHub](https://github.com/) with: 44 | 45 | ``` r 46 | # install.packages("remotes") 47 | remotes::install_github("qingjian1991/MPTevol") 48 | ``` 49 | 50 | ## Documentation 51 | 52 | The overview of usage could be found at [MPTevol online 53 | vignette](https://rpubs.com/cqj_00/MPTevol). 54 | 55 | ## Package structure 56 | 57 | ![](man/figures/MPTevol_Structure.jpg) 58 | 59 | ## Authors 60 | 61 | This software is developed by: 62 | 63 | - Qingjian Chen, , Sun Yat-Sen Univerity 64 | Cancer Center. 65 | - Shixiang Wang, , Sun Yat-Sen Univerity Cancer 66 | Center. 67 | 68 | ## Supervised by 69 | 70 | - [Qi Zhao](zhaoqi@sysucc.org.cn) from Bioinformatic Center of Sun 71 | Yat-sen University Cancer Center. 72 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | # MPTevol 17 | 18 | 19 | 20 | 21 | Multiple primary tumors (MPT) is a special and rare cancer type, defined as more than two primary tumors presenting at the diagnosis in a single patient. The molecular characteristics and tumorigenesis of MPT remain unclear due to insufficient approaches. 22 | 23 | Here, we present `MPTevol`, a practical computational framework for comprehensively exploring the MPT from **multiregional sequencing (MRS)** experiments. `MPTevol` facilitates comparison genomic profiles across multiple primary tumor samples, detection of clonal evolutionary history and metastatic routines in MPT, and quantification of metastatic history. This package incorporates multiple cancer evolution analyses, for a one-stop solution of MPT analysis.The goal of MPTevol is to provide a practical computation framework for dissecting the evolution of multiple primary tumors (MPT), reducing analysis complexity with modular design. 24 | 25 | ## Package Overview 26 | 27 | ![](man/figures/MPTevol.jpg) 28 | 29 | ## Citations 30 | 31 | If you are using the MPTevol in academic research, please cite our paper: 32 | 33 | **Chen, Q., Wu, Q.-N., Rong, Y.-M., Wang, S., Zuo, Z., Bai, L., . . . Zhao, Q. (2022). [Deciphering clonal dynamics and metastatic routines in a rare patient of synchronous triple-primary tumors and multiple metastases with MPTevol](https://doi.org/10.1093/bib/bbac175). Briefings in Bioinformatics. doi:10.1093/bib/bbac175** 34 | 35 | ## Installation 36 | 37 | You can install the development version of MPTevol from [GitHub](https://github.com/) with: 38 | 39 | ``` r 40 | # install.packages("remotes") 41 | remotes::install_github("qingjian1991/MPTevol") 42 | ``` 43 | 44 | ## Documentation 45 | 46 | The overview of usage could be found at [MPTevol online vignette](https://rpubs.com/cqj_00/MPTevol). 47 | 48 | ## Package structure 49 | 50 | ![](man/figures/MPTevol_Structure.jpg) 51 | 52 | ## Authors 53 | 54 | This software is developed by: 55 | 56 | * Qingjian Chen, , Sun Yat-Sen Univerity Cancer Center. 57 | * Shixiang Wang, , Sun Yat-Sen Univerity Cancer Center. 58 | 59 | ## Supervised by 60 | 61 | * [Qi Zhao](zhaoqi@sysucc.org.cn) from Bioinformatic Center of Sun Yat-sen University Cancer Center. 62 | 63 | 64 | -------------------------------------------------------------------------------- /R/plotMutTree.R: -------------------------------------------------------------------------------- 1 | #' plotMutTree plots phylogenetic tree of mutations 2 | #' 3 | #' @param maf Maf or MafList object generated by readMaf function. 4 | #' @param patient.id Select the specific patients. 5 | #' @param method Approach to construct phylogenetic trees. 6 | #' Choose one of "NJ"(Neibor-Joining), "MP"(maximum parsimony), 7 | #' "ML"(maximum likelihood), "FASTME.ols" or "FASTME.bal". 8 | #' @param bootstrap.rep.num Bootstrap iterations. Default 500. 9 | #' @param min.vaf The minimum value of vaf. Default 0. 10 | #' @param group a list that used to indicate the sample groups 11 | #' @param group.colors an array indicates the colors of sample groups. 12 | #' @param title title of the plot. 13 | #' @param hexpand_ratio: hexpand ratio. see \code{\link[ggtree]{hexpand}} 14 | #' 15 | #' @examples 16 | #' library(MesKit) 17 | #' data.type <- "split1" 18 | #' 19 | #' maf <- readMaf( 20 | #' mafFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.mutation.txt", data.type)), 21 | #' ccfFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.CCF.txt", data.type)), 22 | #' clinicalFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.clinical.txt", data.type)), 23 | #' refBuild = "hg19", 24 | #' ccf.conf.level = 0.95 25 | #' ) 26 | #' 27 | #' # construct a group 28 | #' group <- list( 29 | #' Coad = paste0("Coad_", 1:5), 30 | #' OveryLM = paste0("OveryLM_", 1:5), 31 | #' OveryRM = paste0("OveryRM_", 1:6), 32 | #' UterusM = paste0("UterusM_", c(1, 3)) 33 | #' ) 34 | #' 35 | #' plotMutTree(maf, patient.id = "Met1", group = group, title = "CRC Met") 36 | #' @param ... parameters in the getPhyloTree function. 37 | #' 38 | #' @export 39 | 40 | plotMutTree <- function(maf, 41 | patient.id, 42 | method = "NJ", 43 | min.vaf = 0.02, 44 | bootstrap.rep.num = 500, 45 | group = NULL, 46 | group.colors = NULL, 47 | title = "cancer", 48 | hexpand_ratio = 0.3, 49 | ...) { 50 | message("Building trees") 51 | phyloTree <- MesKit::getPhyloTree(maf, 52 | patient.id = patient.id, 53 | method = method, min.vaf = min.vaf, 54 | bootstrap.rep.num = bootstrap.rep.num, 55 | ... 56 | ) 57 | 58 | message("Viewing trees") 59 | 60 | p_trees <- viewTrees( 61 | phyloTree = phyloTree, 62 | group = group, 63 | group.colors = group.colors, 64 | title = title, 65 | hexpand_ratio = hexpand_ratio 66 | ) 67 | 68 | return( 69 | list( 70 | phyloTree = phyloTree, 71 | plot = p_trees 72 | ) 73 | ) 74 | } 75 | -------------------------------------------------------------------------------- /R/readCNAProfile.R: -------------------------------------------------------------------------------- 1 | #' Read CNA Profiles 2 | #' 3 | #' We used a `CNAqc` object, containing a set of mutations, CNA calls and tumor purity values. 4 | #' The CNAqc was used to deal with the allele-specific CNAs. 5 | #' 6 | #' @details 7 | #' This code reads the CNA Profiles for each patient. 8 | #' The tumor names of maf and seg are required to match each other. 9 | #' 10 | #' @param Patient_ID Patient_ID: select the specific patients. 11 | #' IF not indicate, the input is Maf and seg, or the input is MafList and segList. 12 | #' @param maf Maf or MafList object generated by `readMaf()` function 13 | #' @param seg seg or seglist. 14 | #' @param purity purity information for each samples. 15 | #' @param ref human reference genome version. Default 'hg19'. Optional: 'hg18' or 'hg38'. 16 | #' @return cnaqc.list for cnaqc initiation. 17 | #' 18 | #' @export 19 | 20 | readCNAProfile <- function(maf, 21 | seg, 22 | Patient_ID = NULL, 23 | purity = 1, 24 | ref = "hg19") { 25 | if (!is.null(Patient_ID)) { 26 | maf <- maf[[Patient_ID]] 27 | seg <- seg[[Patient_ID]] 28 | } 29 | 30 | # read mutations. 31 | snvs <- maf@data %>% 32 | dplyr::mutate(DP = Ref_allele_depth + Alt_allele_depth) %>% 33 | dplyr::rename( 34 | chr = Chromosome, from = Start_Position, to = End_Position, 35 | ref = Reference_Allele, 36 | alt = Tumor_Seq_Allele2, 37 | NV = Alt_allele_depth 38 | ) %>% 39 | dplyr::select(chr, from, to, ref, alt, DP, NV, VAF, dplyr::everything()) 40 | 41 | # read CNAs. 42 | cna <- seg %>% 43 | dplyr::rename( 44 | chr = Chromosome, 45 | from = Start_Position, 46 | to = End_Position, 47 | Major = Major_CN, 48 | minor = Minor_CN 49 | ) %>% 50 | dplyr::select(chr, from, to, Major, minor, dplyr::everything()) 51 | 52 | if (!identical( 53 | unique(sort(snvs$Tumor_Sample_Label)), 54 | unique(sort(cna$Tumor_Sample_Label)) 55 | # TODO 56 | # What if snv has more samples or cnv has more samples? 57 | # a filter operation before asserting is a better option. 58 | )) { 59 | stop("The Tumor_Sample_Labels of SNV and CNA are not identical") 60 | } 61 | 62 | TumorSamples <- unique(sort(snvs$Tumor_Sample_Label)) 63 | cnaqc.list <- list() 64 | 65 | # Get purity info 66 | if (is.null(names(purity))) { 67 | # TODO what if each sample has different purity values? 68 | purity <- setNames( 69 | rep(purity, length(TumorSamples)), 70 | nm = TumorSamples 71 | ) 72 | } 73 | 74 | # TODO how to make sure each purity correctly match to corresponding 75 | # sample if the samples have different purity values? 76 | for (i in TumorSamples) { 77 | cnaqc.list[[i]] <- CNAqc::init( 78 | snvs %>% dplyr::filter(Tumor_Sample_Label == i), 79 | cna %>% dplyr::filter(Tumor_Sample_Label == i), 80 | purity[i], 81 | ref = ref 82 | ) 83 | } 84 | 85 | return(cnaqc.list) 86 | } 87 | -------------------------------------------------------------------------------- /R/plotCNAProfile.R: -------------------------------------------------------------------------------- 1 | #' Visualize CNA profile 2 | #' 3 | #' This function plots the allele-specific CNAs of multiple-samples. 4 | #' See [readCNAProfile()] for examples. 5 | #' 6 | #' @param cnaqc.list cnaqc.list 7 | #' @param min_length_show the minimal length of CNVs to show. 8 | #' 9 | #' @import CNAqc 10 | #' 11 | #' @export 12 | 13 | plotCNAProfile <- function(cnaqc.list, min_length_show = 1e5) { 14 | L <- x 15 | Ln <- names(L) 16 | if (is.null(Ln)) { 17 | Ln <- paste0("Sample ", 1:length(L)) 18 | } 19 | KARYO_colors <- CNAqc:::get_karyotypes_colors(NULL) # NOTE ::: is an invalid operation when submitted as CRAN/Bioc Package 20 | 21 | # KARYO_colors[1] = "white" 22 | # KARYO_colors$"3:1" = "" 23 | 24 | KARYO_colors <- setNames( 25 | c("white", "steelblue", "darkblue", "turquoise4", "#F3BA45", "#F7BCB4", "#EF7969"), 26 | nm = c("1:1", "1:0", "0:0", "2:0", "2:1", "2:2", "3:1") 27 | ) 28 | 29 | calls <- lapply(Ln, function(s) { 30 | W <- L[[s]]$cna %>% 31 | dplyr::mutate(label = paste(Major, minor, 32 | sep = ":" 33 | ), CN = minor + Major, sample = s) %>% 34 | dplyr::select(chr, from, to, label, CN, sample) 35 | CNAqc:::relative_to_absolute_coordinates(L[[s]], W) 36 | }) 37 | calls_flat <- suppressWarnings(Reduce( 38 | function(x, y) { 39 | dplyr::full_join(x, 40 | y, 41 | by = c("chr", "from", "to", "label", "CN", "sample") 42 | ) 43 | }, 44 | calls 45 | ) %>% dplyr::mutate(label = ifelse(label %in% names(KARYO_colors), 46 | label, "other_AMP" 47 | ))) 48 | KARYO_colors <- c(KARYO_colors, other_AMP = "#9A2414") 49 | chromosomes <- calls_flat$chr %>% unique() 50 | reference_genome <- CNAqc:::get_reference(L[[1]]$reference_genome) %>% 51 | dplyr::filter(chr %in% chromosomes) 52 | low <- min(reference_genome$from) 53 | upp <- max(reference_genome$to) 54 | 55 | bl_genome <- suppressMessages( 56 | blank_genome( # Where is blank_genome1 57 | ref = L[[1]]$reference_genome, 58 | chromosomes = chromosomes, 59 | label_chr = NA 60 | ) + ggplot2::labs(x = "", y = "") 61 | ) 62 | 63 | seg_id <- pio:::nmfy(Ln, seq_along(Ln)) 64 | calls_flat$sample_id <- seg_id[calls_flat$sample] 65 | 66 | calls_flat <- calls_flat %>% 67 | dplyr::filter(label != "1:1") %>% 68 | dplyr::filter(to - from >= min_length_show) 69 | 70 | bl_genome + 71 | ggplot2::geom_segment( 72 | data = calls_flat, 73 | ggplot2::aes( 74 | x = from, xend = to, y = sample_id, 75 | yend = sample_id, color = label 76 | ), size = 5 77 | ) + 78 | ggplot2::scale_color_manual(values = KARYO_colors[2:length(KARYO_colors)]) + 79 | ggplot2::coord_polar(theta = "x", clip = "off") + 80 | ggplot2::guides(color = ggplot2::guide_legend("Karyotype", row = 1)) + 81 | ggplot2::ylim(-4, max(seg_id) + 1) + 82 | ggplot2::labs( 83 | title = "Comparative CNA", 84 | subtitle = paste0("Tracks: ", paste(Ln, collapse = ", ")) 85 | ) + 86 | ggplot2::theme( 87 | legend.key.height = ggplot2::unit(0.1, "cm"), axis.text.y = ggplot2::element_blank(), 88 | panel.grid.major = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(), 89 | panel.border = ggplot2::element_rect(size = 0.3) 90 | ) 91 | } 92 | -------------------------------------------------------------------------------- /man/calRoutines.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/calRoutines.R 3 | \name{calRoutines} 4 | \alias{calRoutines} 5 | \title{calRoutines calculates the H index and JSI index according to the pair-wise comparison of CCF} 6 | \usage{ 7 | calRoutines( 8 | maf, 9 | PrimaryId, 10 | patient.id = NULL, 11 | CCF_cutoff = 0.1, 12 | pairByTumor = TRUE, 13 | use.tumorSampleLabel = FALSE, 14 | maf_drivers = NULL, 15 | subtitle = "both", 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{maf}{Maf or MafList object generated by \code{\link[MesKit:readMaf]{MesKit::readMaf()}} function.} 21 | 22 | \item{PrimaryId}{Primary tumor IDs to indicate the primary-metastases relationships.} 23 | 24 | \item{patient.id}{Select the specific patients. Default NULL, all patients are included.} 25 | 26 | \item{CCF_cutoff}{The minimal cutoffs for the present status. The mutations with CCF smaller than CCF_cutoff were consider as absent statuses.} 27 | 28 | \item{pairByTumor}{Compare JSI between different tumors. Default TRUE.} 29 | 30 | \item{use.tumorSampleLabel}{Logical (Default: FALSE). Rename the 'Tumor_Sample_Barcode' by 'Tumor_Sample_Label'.} 31 | 32 | \item{maf_drivers}{Driver information. Two columns are required, including "Mut_ID" and "is.driver". If provided, add the driver mutations in the plot. "Mut_ID" = str_c(Chromosome, Start_Position, Reference_Allele , Tumor_Seq_Allele2, sep = ":")} 33 | 34 | \item{subtitle}{the information shows in the subtitle. Options including "JSI", "Hindex", "both" and "none". Default is "both"} 35 | 36 | \item{...}{Other options passed to \code{\link[MesKit:subMaf]{MesKit::subMaf()}}.} 37 | } 38 | \description{ 39 | calRoutines calculates the H index and JSI index according to the pair-wise comparison of CCF 40 | } 41 | \examples{ 42 | library(MesKit) 43 | data.type <- "split1" 44 | 45 | maf <- readMaf( 46 | mafFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.\%s.mutation.txt", data.type)), 47 | ccfFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.\%s.CCF.txt", data.type)), 48 | clinicalFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.\%s.clinical.txt", data.type)), 49 | refBuild = "hg19", 50 | ccf.conf.level = 0.95 51 | ) 52 | cal <- calRoutines( 53 | maf = maf, 54 | patient.id = "Met1", 55 | PrimaryId = "Coad", 56 | pairByTumor = TRUE, 57 | use.tumorSampleLabel = TRUE, 58 | subtitle = "both" 59 | ) 60 | 61 | plot_grid(plotlist = cal$Met1$plist, nrow = 1) 62 | ## add driver information 63 | maf_driver <- data.frame( 64 | Mut_ID = c("5:112170777:CAGA:-", "1:147092680:-:C"), 65 | is.driver = c(TRUE, TRUE) 66 | ) 67 | cal <- calRoutines( 68 | maf = maf, 69 | patient.id = "Met1", 70 | PrimaryId = "Coad", 71 | pairByTumor = TRUE, 72 | use.tumorSampleLabel = TRUE, 73 | subtitle = "both", 74 | maf_drivers = maf_driver 75 | ) 76 | 77 | plot_grid(plotlist = cal$Met1$plist, nrow = 1) 78 | } 79 | \references{ 80 | \enumerate{ 81 | \item H index. Hu, Z., et al., Quantitative evidence for early metastatic seeding in colorectal cancer. Nat Genet, 2019. 51(7): p. 1113-1122. 82 | \item JSI index. Hu, Z., et al., Multi-cancer analysis of clonality and the timing of systemic spread in paired primary tumors and metastases. Nat Genet, 2020. 52(7): p. 701-708. 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /R/getClinSites.R: -------------------------------------------------------------------------------- 1 | #' getClinSites captures the clinical targetable sites of oncokb based on the mutation status 2 | #' @details 3 | #' We match the drivers genes between maf files and clinical sites in oncokb. 4 | #' We only match the gene names, whereas ignoring the cancer types and gene alterations. 5 | #' The main targertable alterations include gene fusions (like BCR-ABL1 fusion), 6 | #' Oncogenic mutations, Exon deletions/insertion, Amplifications, 7 | #' Deletions and Singles-nucleotide mutation (BRAC V600E). 8 | #' Please mannual check the mutation status. 9 | #' @param maf Maf or MafList object generated by readMaf function 10 | #' @param Patient_ID Patient_ID. See specific patient. 11 | #' 12 | #' @examples 13 | #' library(MesKit) 14 | #' data.type <- "split1" 15 | #' 16 | #' maf <- readMaf( 17 | #' mafFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.mutation.txt", data.type)), 18 | #' ccfFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.CCF.txt", data.type)), 19 | #' clinicalFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.clinical.txt", data.type)), 20 | #' refBuild = "hg19", 21 | #' ccf.conf.level = 0.95 22 | #' ) 23 | #' 24 | #' # see clinical targetable sites 25 | #' sites <- getClinSites(maf) 26 | #' 27 | #' # see one patinets 28 | #' sites <- getClinSites(maf, Patient_ID = "Breast") 29 | #' 30 | #' # View data 31 | #' DT::datatable(sites) 32 | #' @export 33 | #' 34 | getClinSites <- function(maf, Patient_ID = NULL) { 35 | 36 | # targets = read.table(system.file(package="MPTevol", "extdata", "oncokb_biomarker_drug_associations.tsv"), header = T, sep = "\t" ) 37 | 38 | message( 39 | " We match the drivers genes between maf files and clinical sites in oncokb. We only match the gene names, whereas ignoring the cancer types and gene alterations. The main targertable alterations include gene fusions (like BCR-ABL1 fusion), Oncogenic mutations, Exon deletions/insertion, Amplifications, Deletions and Singles-nucleotide mutation (BRAC V600E). Please mannual check the mutation status. 40 | Reference: oncokb.org 41 | " 42 | ) 43 | 44 | targets <- read.table( 45 | system.file( 46 | package = "MPTevol", "extdata", 47 | "oncokb_biomarker_drug_associations.tsv" 48 | ), 49 | header = T, sep = "\t" 50 | ) 51 | colnames(targets)[5] = "Drugs" 52 | 53 | if (!is.null(Patient_ID)) { 54 | maf <- maf[[Patient_ID]] 55 | Sites <- findSites(maf, targets = targets) 56 | DT::datatable(Sites) 57 | } else { 58 | Sites <- lapply(maf, findSites, targets = targets) 59 | } 60 | 61 | Sites 62 | } 63 | 64 | findSites <- function(tumor, targets) { 65 | select.columns <- c( 66 | "Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Variant_Classification", "Variant_Type", "Reference_Allele", "VAF", "Tumor_Sample_Barcode", "Tumor_ID", 67 | "Patient_ID", "Tumor_Sample_Label", "Tumor_Average_VAF" 68 | ) 69 | 70 | if ("Protein_Change" %in% colnames(tumor@data)) { 71 | select.columns <- c(select.columns, "Protein_Change") 72 | } 73 | 74 | if ("Clonal_Status" %in% colnames(tumor@data)) { 75 | select.columns <- c(select.columns, c("Clonal_Status", "Tumor_Average_CCF")) 76 | } 77 | 78 | tumor@data %>% 79 | dplyr::select(dplyr::all_of(select.columns)) %>% 80 | dplyr::inner_join( 81 | targets, 82 | by = c("Hugo_Symbol" = "Gene") 83 | ) 84 | } 85 | -------------------------------------------------------------------------------- /man/plotCNA.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotCNA.R 3 | \name{plotCNA} 4 | \alias{plotCNA} 5 | \title{plotCNA} 6 | \usage{ 7 | plotCNA( 8 | seg, 9 | patient.id = NULL, 10 | sampleOrder = NULL, 11 | chrSilent = NULL, 12 | refBuild = "hg19", 13 | sample.text.size = 11, 14 | chrom.text.size = 3, 15 | legend.text.size = 9, 16 | legend.title.size = 11, 17 | annot.text.size = 3, 18 | sample.bar.height = 0.5, 19 | chrom.bar.height = 0.5, 20 | showRownames = TRUE, 21 | removeEmptyChr = TRUE, 22 | showCytoband = FALSE, 23 | showGene = FALSE, 24 | GeneNames = NULL, 25 | use.tumorSampleLabel = FALSE, 26 | Type.name = "Type", 27 | Type.colors = setNames(c("#6baed6", "#084594", "#f4a582", "#d73027"), nm = c("Loss", 28 | "Deletion", "Gain", "Amplification")), 29 | rect.patients.size = 0 30 | ) 31 | } 32 | \arguments{ 33 | \item{seg}{Object generated by \code{\link{readSegment}} function.} 34 | 35 | \item{patient.id}{Select the specific patients. Default NULL, all patients are included.} 36 | 37 | \item{sampleOrder}{A named list which contains the sample order used in plotting the final profile. Default NULL.} 38 | 39 | \item{chrSilent}{Chromosomes excluded in the analysis. e.g, 1, 2, 3. Default NULL.} 40 | 41 | \item{refBuild}{Human reference genome versions of hg18, hg19 or hg38 by UCSC. Default "hg19".} 42 | 43 | \item{sample.text.size}{Fontsize of sample name. Default 11.} 44 | 45 | \item{chrom.text.size}{Fontsize of chromosome text. Default 3.} 46 | 47 | \item{legend.text.size}{Fontsize of legend text. Default 9.} 48 | 49 | \item{legend.title.size}{Fontsize of legend title. Default 11.} 50 | 51 | \item{annot.text.size}{Fontsize of cytoband or gene symbols. Default 3.} 52 | 53 | \item{sample.bar.height}{Bar height of each sample. Default 0.5.} 54 | 55 | \item{chrom.bar.height}{Bar height of each chromosome. Default 0.5.} 56 | 57 | \item{showRownames}{Logical (Default: TRUE). Show sample names of rows.} 58 | 59 | \item{removeEmptyChr}{Remove empty chromosomes that do not exist in all samples. Default TRUE.} 60 | 61 | \item{showCytoband}{Logical (Default: FALSE). Show cytobands on the plot. Only when the seg object is created with GISTIC results, this parameter can be TRUE.} 62 | 63 | \item{showGene}{Logical (Default: FALSE). Show gene symbols on the plot. Only when the seg object is created with txdb, this parameter can be TRUE.} 64 | 65 | \item{GeneNames}{The gene symbols to show. Show the selected genes rather than all genes.} 66 | 67 | \item{use.tumorSampleLabel}{Logical (Default: FALSE). Rename the 'Tumor_Sample_Barcode' with 'Tumor_Sample_Label'.} 68 | 69 | \item{Type.name}{The column to plot the CNVs.} 70 | 71 | \item{Type.colors}{Set the colors.} 72 | 73 | \item{rect.patients.size}{Set the line size of rect to indicate the borders of patients} 74 | } 75 | \value{ 76 | a heatmap plot of CNA profile 77 | } 78 | \description{ 79 | plotCNA 80 | } 81 | \examples{ 82 | segFile <- system.file("extdata", "CRC_HZ.seg.txt", package = "MesKit") 83 | seg <- readSegment(segFile = segFile) 84 | plotCNA(seg) 85 | 86 | ## showCytoband 87 | gisticAmpGenesFile <- system.file("extdata", "COREAD_amp_genes.conf_99.txt", package = "MesKit") 88 | gisticDelGenesFile <- system.file("extdata", "COREAD_del_genes.conf_99.txt", package = "MesKit") 89 | gisticAllLesionsFile <- system.file("extdata", "COREAD_all_lesions.conf_99.txt", package = "MesKit") 90 | seg <- readSegment(segFile = segFile, 91 | gisticAmpGenesFile = gisticAmpGenesFile, 92 | gisticDelGenesFile = gisticDelGenesFile, 93 | gisticAllLesionsFile = gisticAllLesionsFile) 94 | plotCNA(seg, showCytoband = TRUE) 95 | 96 | 97 | } 98 | -------------------------------------------------------------------------------- /inst/extdata/tree_final.dist: -------------------------------------------------------------------------------- 1 | 19 2 | READ_1 0.000000 58.000000 60.000000 44.000000 66.000000 98.000000 96.000000 93.000000 97.000000 86.000000 126.000000 108.000000 122.000000 102.000000 105.000000 114.000000 75.000000 72.000000 44.000000 3 | READ_2 58.000000 0.000000 28.000000 60.000000 68.000000 100.000000 98.000000 95.000000 99.000000 88.000000 128.000000 110.000000 124.000000 104.000000 107.000000 116.000000 77.000000 74.000000 60.000000 4 | READ_3 60.000000 28.000000 0.000000 62.000000 70.000000 102.000000 100.000000 97.000000 101.000000 90.000000 130.000000 112.000000 126.000000 106.000000 109.000000 118.000000 79.000000 76.000000 62.000000 5 | READ_4 44.000000 60.000000 62.000000 0.000000 68.000000 100.000000 98.000000 95.000000 99.000000 88.000000 128.000000 110.000000 124.000000 104.000000 107.000000 116.000000 77.000000 74.000000 34.000000 6 | READ_5 66.000000 68.000000 70.000000 68.000000 0.000000 86.000000 84.000000 81.000000 85.000000 74.000000 114.000000 96.000000 110.000000 90.000000 93.000000 102.000000 35.000000 32.000000 68.000000 7 | OvaryLM_1 98.000000 100.000000 102.000000 100.000000 86.000000 0.000000 44.000000 41.000000 45.000000 30.000000 56.000000 38.000000 52.000000 32.000000 35.000000 44.000000 95.000000 92.000000 100.000000 8 | OvaryLM_2 96.000000 98.000000 100.000000 98.000000 84.000000 44.000000 0.000000 23.000000 11.000000 32.000000 72.000000 54.000000 68.000000 48.000000 51.000000 60.000000 93.000000 90.000000 98.000000 9 | OvaryLM_3 93.000000 95.000000 97.000000 95.000000 81.000000 41.000000 23.000000 0.000000 24.000000 29.000000 69.000000 51.000000 65.000000 45.000000 48.000000 57.000000 90.000000 87.000000 95.000000 10 | OvaryLM_4 97.000000 99.000000 101.000000 99.000000 85.000000 45.000000 11.000000 24.000000 0.000000 33.000000 73.000000 55.000000 69.000000 49.000000 52.000000 61.000000 94.000000 91.000000 99.000000 11 | OvaryLM_5 86.000000 88.000000 90.000000 88.000000 74.000000 30.000000 32.000000 29.000000 33.000000 0.000000 58.000000 40.000000 54.000000 34.000000 37.000000 46.000000 83.000000 80.000000 88.000000 12 | OvaryRM_1 126.000000 128.000000 130.000000 128.000000 114.000000 56.000000 72.000000 69.000000 73.000000 58.000000 0.000000 36.000000 24.000000 42.000000 55.000000 34.000000 123.000000 120.000000 128.000000 13 | OvaryRM_2 108.000000 110.000000 112.000000 110.000000 96.000000 38.000000 54.000000 51.000000 55.000000 40.000000 36.000000 0.000000 32.000000 24.000000 37.000000 24.000000 105.000000 102.000000 110.000000 14 | OvaryRM_3 122.000000 124.000000 126.000000 124.000000 110.000000 52.000000 68.000000 65.000000 69.000000 54.000000 24.000000 32.000000 0.000000 38.000000 51.000000 30.000000 119.000000 116.000000 124.000000 15 | OvaryRM_4 102.000000 104.000000 106.000000 104.000000 90.000000 32.000000 48.000000 45.000000 49.000000 34.000000 42.000000 24.000000 38.000000 0.000000 31.000000 30.000000 99.000000 96.000000 104.000000 16 | OvaryRM_5 105.000000 107.000000 109.000000 107.000000 93.000000 35.000000 51.000000 48.000000 52.000000 37.000000 55.000000 37.000000 51.000000 31.000000 0.000000 43.000000 102.000000 99.000000 107.000000 17 | OvaryRM_6 114.000000 116.000000 118.000000 116.000000 102.000000 44.000000 60.000000 57.000000 61.000000 46.000000 34.000000 24.000000 30.000000 30.000000 43.000000 0.000000 111.000000 108.000000 116.000000 18 | UterusM_1 75.000000 77.000000 79.000000 77.000000 35.000000 95.000000 93.000000 90.000000 94.000000 83.000000 123.000000 105.000000 119.000000 99.000000 102.000000 111.000000 0.000000 25.000000 77.000000 19 | UterusM_3 72.000000 74.000000 76.000000 74.000000 32.000000 92.000000 90.000000 87.000000 91.000000 80.000000 120.000000 102.000000 116.000000 96.000000 99.000000 108.000000 25.000000 0.000000 74.000000 20 | diploid 44.000000 60.000000 62.000000 34.000000 68.000000 100.000000 98.000000 95.000000 99.000000 88.000000 128.000000 110.000000 124.000000 104.000000 107.000000 116.000000 77.000000 74.000000 0.000000 21 | -------------------------------------------------------------------------------- /R/maf2variants.R: -------------------------------------------------------------------------------- 1 | 2 | #' maf2variants 3 | #' @description Change the maf object into variants data frame for the subclonal structures analysis. 4 | 5 | #' @param maf Maf or MafList object generated by `readMaf()` function 6 | #' @param patient.id Select the specific patients. Default `NULL`, all patients are included. 7 | #' @param ccf.cutoff Removing low-CCF mutations (default: 0.1). 8 | #' @param extract.VAF Whether extract the VAF information. Default `FALSE`: extract CCF rather than VAF. 9 | #' 10 | #' @details 11 | #' 12 | #' This function extracts the `Cluster` information from the CCF data. Therefore, the `Cluster` column is required in the ccfFile. 13 | #' 14 | #' For the output `variants`, the first five columns are Mutid, Hugo_Symbol, Variant_Classification, Patient_ID, Cluster. The remaining columns indicate variant cellular prevalence for each sample. 15 | #' 16 | #' 17 | #' @examples 18 | #' #' data.type <- "split1" 19 | #' 20 | #' maf1 <- readMaf( 21 | #' mafFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.mutation.txt", data.type)), 22 | #' ccfFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.CCF.txt", data.type)), 23 | #' clinicalFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.clinical.txt", data.type)), 24 | #' refBuild = "hg19", 25 | #' ccf.conf.level = 0.95 26 | #' ) 27 | #' 28 | #' ccfs = maf2variants(maf1, patient.id = "Met1") 29 | #' 30 | #' #extract VAF rather than CCF. 31 | #' vafs = maf2variants(maf1, patient.id = "Met1", extract.VAF = T) 32 | #' 33 | #' @export 34 | #' 35 | 36 | maf2variants <- function( 37 | maf, 38 | patient.id = NULL, 39 | ccf.cutoff = 0.1, 40 | extract.VAF = FALSE 41 | ){ 42 | 43 | processMaf2Vars = function(m) { 44 | 45 | maf_data <- MesKit::getMafData(m) 46 | patient <- MesKit::getMafPatient(m) 47 | 48 | #Check whether the Cluster column exists in CCF data. 49 | if(!"Cluster" %in% colnames(maf_data)){ 50 | stop("The Cluster column is missing in maf data, stop.") 51 | } 52 | 53 | if(extract.VAF){ 54 | 55 | #VAF 56 | message("Extract VAF rather than CCF") 57 | mut_standardcol = c("Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Reference_Allele", "Tumor_Seq_Allele2", "Variant_Classification","Tumor_Sample_Barcode","Tumor_ID", "Patient_ID", "Tumor_Sample_Label", "VAF", "Cluster") 58 | 59 | vars = maf_data %>% 60 | dplyr::select(dplyr::all_of(mut_standardcol)) %>% 61 | dplyr::mutate(Mutid = str_c(Chromosome, Start_Position, End_Position, Reference_Allele,Tumor_Seq_Allele2, sep = ":")) %>% 62 | dplyr::rowwise() %>% 63 | dplyr::mutate(VAF = ifelse(max(VAF)<=1, VAF*100, VAF) ) %>% 64 | dplyr::filter(VAF >= ccf.cutoff*100/2) %>% 65 | dplyr::filter(!is.na(Cluster) & Cluster >=1 ) %>% 66 | tidyr::pivot_wider( 67 | id_cols = c(Mutid, Hugo_Symbol, Variant_Classification, Patient_ID, Cluster), 68 | names_from = c(Tumor_Sample_Label), 69 | values_from = c(VAF), 70 | values_fill = 0 71 | ) 72 | }else{ 73 | 74 | #CCF 75 | mut_standardcol = c("Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Reference_Allele", "Tumor_Seq_Allele2", "Variant_Classification","Tumor_Sample_Barcode","Tumor_ID", "Patient_ID", "Tumor_Sample_Label", "CCF", "Cluster") 76 | 77 | vars = maf_data %>% 78 | dplyr::select(dplyr::all_of(mut_standardcol)) %>% 79 | dplyr::mutate(Mutid = str_c(Chromosome, Start_Position, End_Position, Reference_Allele,Tumor_Seq_Allele2, sep = ":")) %>% 80 | dplyr::rowwise() %>% 81 | dplyr::mutate(CCF = ifelse(max(CCF)<=1, CCF*100, CCF) ) %>% 82 | dplyr::filter(CCF >= ccf.cutoff*100) %>% 83 | dplyr::filter(!is.na(Cluster) & Cluster >=1 ) %>% 84 | tidyr::pivot_wider( 85 | id_cols = c(Mutid, Hugo_Symbol, Variant_Classification, Patient_ID, Cluster), 86 | names_from = c(Tumor_Sample_Label), 87 | values_from = c(CCF), 88 | values_fill = 0 89 | ) 90 | 91 | } 92 | 93 | message(sprintf("Patient %s has the following cluster: %s", patient, 94 | str_c(sort(unique(vars$Cluster)), collapse = "; ") )) 95 | 96 | vars 97 | } 98 | 99 | 100 | if(is.null(patient.id)){ 101 | Vars <- lapply(maf, processMaf2Vars) 102 | names(Vars) <- names(maf) 103 | }else{ 104 | Vars = processMaf2Vars(maf[[patient.id]]) 105 | } 106 | 107 | return( 108 | Vars 109 | ) 110 | 111 | } 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /man/inferClonalTrees.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/inferClonalTrees.R 3 | \name{inferClonalTrees} 4 | \alias{inferClonalTrees} 5 | \title{inferClonalTrees} 6 | \usage{ 7 | inferClonalTrees( 8 | project.names, 9 | variants, 10 | vaf.col.names = NULL, 11 | ccf.col.names = NULL, 12 | sample.groups = NULL, 13 | founding.cluster = 1, 14 | ignore.clusters = NULL, 15 | cluster.col.name = "cluster", 16 | clone.colors = NULL, 17 | subclonal.test.model = "non-parametric", 18 | cancer.initiation.model = "monoclonal", 19 | sum.p = 0.05, 20 | alpha = 0.05, 21 | weighted = FALSE, 22 | consensus.tree = TRUE, 23 | plot.models = TRUE, 24 | plot.pairwise.CCF = F, 25 | highlight.note.col.name = NULL, 26 | highlight = "is.driver", 27 | highlight.CCF = FALSE 28 | ) 29 | } 30 | \arguments{ 31 | \item{project.names}{the project names used in the output.} 32 | 33 | \item{variants}{data frame of the variants. 34 | At least cluster column and VAF or CCF columns are required. 35 | Cluster column should contain cluster identities as continuous integer values 36 | starting from 1.} 37 | 38 | \item{vaf.col.names}{the column names of samples containing VAF.} 39 | 40 | \item{ccf.col.names}{the column names of samples containing CCF. 41 | Note: either setting \strong{vaf.col.names} or \strong{ccf.col.names}.} 42 | 43 | \item{sample.groups}{indicate the samples groups. 44 | An example is setNames(c("Primary","Primary","Met","Met"), nm = c("P1","P2","M1","M1") )} 45 | 46 | \item{founding.cluster}{the name of founding clones, one of the most important parameters. For most of circumstances, the founding cluster is the cluster with the highest average CCF cluster.} 47 | 48 | \item{ignore.clusters}{the clusters that ignores to analysis. 49 | For some clusters, especially clusters that have low vafs in all samples, 50 | were probably false-positive clusters.} 51 | 52 | \item{cluster.col.name}{the column names that containing cluster information.} 53 | 54 | \item{clone.colors}{setting clone colors.} 55 | 56 | \item{subclonal.test.model}{What model to use when generating the bootstrap values 57 | are: c('non-parametric', 'normal', 'normal-truncated', 'beta', 'beta-binomial'). 58 | (Default = "non-parametric")} 59 | 60 | \item{cancer.initiation.model}{cancer evolution model to use, c('monoclonal', 'polyclonal'). 61 | Monoclonal model assumes the orginal tumor (eg. primary tumor) arises from a 62 | single normal cell; polyclonal model assumes the original tumor can 63 | arise from multiple cells (ie. multiple founding clones). 64 | In the polyclonal model, the total VAF of the separate founding clones 65 | must not exceed 0.5.} 66 | 67 | \item{sum.p}{min probability that the cluster is non-negative in a sample(Default = 0.05).} 68 | 69 | \item{alpha}{alpha level in confidence interval estimate for the cluster (Default = 0.05).} 70 | 71 | \item{weighted}{weighted model (default = FALSE)} 72 | 73 | \item{consensus.tree}{whether build the consensus tree (Default = TRUE).} 74 | 75 | \item{plot.models}{whether plot the models (Default = TRUE).} 76 | 77 | \item{plot.pairwise.CCF}{whether plot pairwise CCF comparison (Default = FALSE).} 78 | 79 | \item{highlight.note.col.name}{highlight context.} 80 | 81 | \item{highlight}{column name to indicate whether highlight the sites (TRUE or FALSE).} 82 | 83 | \item{highlight.CCF}{highlight is CCF or VAF (Default = FALSE).} 84 | } 85 | \description{ 86 | This function have two main modules, 87 | including inferring the clonal trees and plot the clonal models. 88 | } 89 | \details{ 90 | Inferring the clonal trees is the central process in clonal construction. 91 | However, users always find that it is difficult to build clonal trees. 92 | Therefore, we should check the cluster structures before building clonal trees. 93 | Here are some suggestions about building clonal trees. 94 | \enumerate{ 95 | \item chose the optimal clustering methods. Before do mutation clustering. 96 | We should removing the low-quality mutations. 97 | The indels are suggested to be removed. 98 | The mutations in the LOH regions are suggested to be removed. 99 | The mutations in the cnv-regions are should be carefully checked. 100 | \item chose the right founding cluster. 101 | \item ignore some false-negative clusters. For some clusters, 102 | especially clusters that have low vafs in all samples, 103 | were probably false-positive clusters. Removing clusters that having too few mutations. 104 | \item try different cutoffs. The two parameters \strong{sum.p} 105 | and \strong{alpha} are used to determine whether a cluster is in a sample. 106 | A relaxed cutoffs (small values of the two parameters) enables more 107 | clusters are though to be present in the sample. 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('nav.navbar').headroom(); 6 | 7 | Toc.init({ 8 | $nav: $("#toc"), 9 | $scope: $("main h2, main h3, main h4, main h5, main h6") 10 | }); 11 | 12 | if ($('#toc').length) { 13 | $('body').scrollspy({ 14 | target: '#toc', 15 | offset: $("nav.navbar").outerHeight() + 1 16 | }); 17 | } 18 | 19 | // Activate popovers 20 | $('[data-bs-toggle="popover"]').popover({ 21 | container: 'body', 22 | html: true, 23 | trigger: 'focus', 24 | placement: "top", 25 | sanitize: false, 26 | }); 27 | 28 | $('[data-bs-toggle="tooltip"]').tooltip(); 29 | 30 | /* Clipboard --------------------------*/ 31 | 32 | function changeTooltipMessage(element, msg) { 33 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 34 | element.setAttribute('data-original-title', msg); 35 | $(element).tooltip('show'); 36 | element.setAttribute('data-original-title', tooltipOriginalTitle); 37 | } 38 | 39 | if(ClipboardJS.isSupported()) { 40 | $(document).ready(function() { 41 | var copyButton = ""; 42 | 43 | $("div.sourceCode").addClass("hasCopyButton"); 44 | 45 | // Insert copy buttons: 46 | $(copyButton).prependTo(".hasCopyButton"); 47 | 48 | // Initialize tooltips: 49 | $('.btn-copy-ex').tooltip({container: 'body'}); 50 | 51 | // Initialize clipboard: 52 | var clipboard = new ClipboardJS('[data-clipboard-copy]', { 53 | text: function(trigger) { 54 | return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); 55 | } 56 | }); 57 | 58 | clipboard.on('success', function(e) { 59 | changeTooltipMessage(e.trigger, 'Copied!'); 60 | e.clearSelection(); 61 | }); 62 | 63 | clipboard.on('error', function() { 64 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 65 | }); 66 | 67 | }); 68 | } 69 | 70 | /* Search marking --------------------------*/ 71 | var url = new URL(window.location.href); 72 | var toMark = url.searchParams.get("q"); 73 | var mark = new Mark("div.col-md-9"); 74 | if (toMark) { 75 | mark.mark(toMark, { 76 | accuracy: { 77 | value: "complementary", 78 | limiters: [",", ".", ":", "/"], 79 | } 80 | }); 81 | } 82 | 83 | /* Search --------------------------*/ 84 | /* Adapted from https://github.com/rstudio/bookdown/blob/2d692ba4b61f1e466c92e78fd712b0ab08c11d31/inst/resources/bs4_book/bs4_book.js#L25 */ 85 | // Initialise search index on focus 86 | var fuse; 87 | $("#search-input").focus(async function(e) { 88 | if (fuse) { 89 | return; 90 | } 91 | 92 | $(e.target).addClass("loading"); 93 | var response = await fetch($("#search-input").data("search-index")); 94 | var data = await response.json(); 95 | 96 | var options = { 97 | keys: ["what", "text", "code"], 98 | ignoreLocation: true, 99 | threshold: 0.1, 100 | includeMatches: true, 101 | includeScore: true, 102 | }; 103 | fuse = new Fuse(data, options); 104 | 105 | $(e.target).removeClass("loading"); 106 | }); 107 | 108 | // Use algolia autocomplete 109 | var options = { 110 | autoselect: true, 111 | debug: true, 112 | hint: false, 113 | minLength: 2, 114 | }; 115 | var q; 116 | async function searchFuse(query, callback) { 117 | await fuse; 118 | 119 | var items; 120 | if (!fuse) { 121 | items = []; 122 | } else { 123 | q = query; 124 | var results = fuse.search(query, { limit: 20 }); 125 | items = results 126 | .filter((x) => x.score <= 0.75) 127 | .map((x) => x.item); 128 | if (items.length === 0) { 129 | items = [{dir:"Sorry 😿",previous_headings:"",title:"No results found.",what:"No results found.",path:window.location.href}]; 130 | } 131 | } 132 | callback(items); 133 | } 134 | $("#search-input").autocomplete(options, [ 135 | { 136 | name: "content", 137 | source: searchFuse, 138 | templates: { 139 | suggestion: (s) => { 140 | if (s.title == s.what) { 141 | return `${s.dir} >
${s.title}
`; 142 | } else if (s.previous_headings == "") { 143 | return `${s.dir} >
${s.title}
> ${s.what}`; 144 | } else { 145 | return `${s.dir} >
${s.title}
> ${s.previous_headings} > ${s.what}`; 146 | } 147 | }, 148 | }, 149 | }, 150 | ]).on('autocomplete:selected', function(event, s) { 151 | window.location.href = s.path + "?q=" + q + "#" + s.id; 152 | }); 153 | }); 154 | })(window.jQuery || window.$) 155 | 156 | 157 | -------------------------------------------------------------------------------- /R/viewTrees.R: -------------------------------------------------------------------------------- 1 | #' Visualize the trees 2 | #' 3 | #' @param phyloTree phyloTree: The tree is in Parenthetic format. 4 | #' @param tree.format the format of tree, S4 or list. Default is S4. 5 | #' @param normal.node the sample name of normal sample in the tree. 6 | #' @param group a list that used to indicate the sample groups. 7 | #' @param group.colors an array indicates the colors of sample groups. 8 | #' @param showBootstrap whether showing the bootstrap values. Default is TRUE. 9 | #' @param title title of the plot. 10 | #' @param hexpand_ratio hexpand ratio. see \code{\link[ggtree]{hexpand}} 11 | #' 12 | #' @examples 13 | #' # This dist file is the output of MEDICC 14 | #' dist <- system.file(package = "MPTevol", "extdata", "tree_final.dist") 15 | #' 16 | #' # plot CNA trees without colored samples. 17 | #' plotCNAtree(dist = dist) 18 | #' 19 | #' # create a list to indicate the sample groups. 20 | #' grp <- list( 21 | #' NORMAL = "NORMAL", 22 | #' Breast = paste0("Breast_", 1:5), 23 | #' Coad = paste0("Coad_", 1:5), 24 | #' Lung = paste0("Lung_", 1:5), 25 | #' OveryLM = paste0("OveryLM_", 1:5), 26 | #' OveryRM = paste0("OveryRM_", 1:6), 27 | #' UterusM = paste0("UterusM_", c(1:7)) 28 | #' ) 29 | #' 30 | #' plotCNAtree(dist = dist, grp = grp) 31 | #' @return a ggtree object 32 | #' @export 33 | viewTrees <- function(phyloTree, 34 | tree.format = "S4", 35 | normal.node = "NORMAL", 36 | group = NULL, 37 | group.colors = NULL, 38 | title = "Cancer", 39 | showBootstrap = TRUE, 40 | hexpand_ratio = 0.3) { 41 | if (tree.format == "S4") { 42 | mtree <- phyloTree@tree 43 | } else { 44 | mtree <- phyloTree$tree 45 | } 46 | 47 | # all.length = mtree$edge.length 48 | root.length <- rev(mtree$edge.length)[1] 49 | 50 | # set outgroup and removing the Normal 51 | mtree <- treeio::root(mtree, outgroup = normal.node) 52 | mtree <- treeio::drop.tip(mtree, normal.node) 53 | 54 | # grp <- list( 55 | # ACA = mtree$tip.label[grepl("Aca", mtree$tip.label)] , 56 | # NEC = mtree$tip.label[grepl("Nec", mtree$tip.label)] 57 | # ) 58 | 59 | # combined bootstrap values. 60 | 61 | if (showBootstrap) { 62 | bootstrap.value <- ifelse(tree.format == "S4", 63 | phyloTree@bootstrap.value, 64 | phyloTree$bootstrap.value 65 | ) 66 | 67 | bp2 <- data.frame( 68 | node = 1:treeio::Nnode(mtree) + treeio::Ntip(mtree), 69 | bootstrap = bootstrap.value 70 | ) 71 | mtree <- dplyr::full_join(mtree, bp2, by = "node") 72 | } 73 | 74 | p_trees <- ggtree::ggtree(mtree, size = 1) + 75 | ggtree::geom_tiplab(size = 4) + 76 | # add scale bars. 77 | ggtree::geom_treescale(fontsize = 4, linesize = 1, x = 0.1) + 78 | # set root length 79 | ggtree::geom_rootedge(rootedge = root.length, size = 1, colour = "grey40") + 80 | ggtree::hexpand(hexpand_ratio, direction = 1) 81 | 82 | if (showBootstrap) { 83 | p_trees <- p_trees + 84 | ggtree::geom_nodepoint(ggplot2::aes(fill = cut(bootstrap, c(0, 70, 90, 100))), 85 | shape = 21, size = 2 86 | ) + 87 | ggtree::geom_nodelab(ggplot2::aes(label = round(bootstrap)), hjust = -0.2, size = 3.5) + 88 | ggplot2::labs(title = title) + 89 | ggplot2::scale_fill_manual( 90 | values = c("black", "grey", "white"), guide = "legend", 91 | name = "Bootstrap Percentage(BP)", 92 | breaks = c("(90,100]", "(70,90]", "(0,70]"), 93 | labels = expression(BP >= 90, 70 <= BP * " < 90", BP < 70) 94 | ) 95 | } else { 96 | p_trees <- p_trees + 97 | ggplot2::labs(title = title) 98 | } 99 | 100 | # color the groups 101 | if (!is.null(group)) { 102 | # check samples ids between trees and grp 103 | if (!identical(sort(purrr::reduce(group, c)), sort(mtree@phylo$tip.label))) { 104 | stop("the samplenames in grp were not identical to sample names in the tree") 105 | } 106 | 107 | p_trees <- ggtree::groupOTU(p_trees, group, "Sites") 108 | 109 | # get levels 110 | Sites <- levels(p_trees$data$Sites) 111 | 112 | if (!is.null(group.colors)) { 113 | if (!identical(sort(names(group.colors)), sort(levels(p_trees$data$Sites)))) { 114 | # get levels that were not in the group.colors 115 | Site1 <- setdiff(levels(p_trees$data$Sites), names(group.colors)) 116 | 117 | Site.colors <- setNames( 118 | c(set.colors(n = length(Site1), rev = T), group.colors), 119 | nm = c(Site1, names(group.colors)) 120 | ) 121 | 122 | Site.colors <- Site.colors[levels(p_trees$data$Sites)] 123 | } else { 124 | Site.colors <- group.colors[levels(p_trees$data$Sites)] 125 | } 126 | } else { 127 | Site.colors <- set.colors(n = length(Sites)) 128 | } 129 | 130 | p_trees <- p_trees + ggplot2::aes(color = Sites) + 131 | ggplot2::scale_colour_manual( 132 | values = Site.colors 133 | ) 134 | } 135 | 136 | p_trees <- p_trees + 137 | ggtree::theme_tree( 138 | plot.title = ggplot2::element_text(hjust = 0.5) 139 | ) 140 | 141 | p_trees 142 | } 143 | -------------------------------------------------------------------------------- /docs/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Page not found (404) • MPTevol 9 | 10 | 11 | 12 | 13 | 14 | 18 | 19 | 20 | Skip to contents 21 | 22 | 23 |
53 |
54 |
58 | 59 | Content not found. Please use links in the navbar. 60 | 61 |
62 |
63 | 64 | 65 |
69 | 70 | 74 | 75 |
76 |
77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /docs/news/index.html: -------------------------------------------------------------------------------- 1 | 2 | Changelog • MPTevol 6 | Skip to contents 7 | 8 | 9 |
35 |
36 |
40 | 41 |
42 |

MPTevol 0.0.0.9000

43 |
  • Package initialization.
  • 44 |
  • Added a NEWS.md file to track changes to the package.
  • 45 |
46 |
47 | 48 | 49 |
52 | 53 | 56 | 57 |
58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /R/calPropDriver.R: -------------------------------------------------------------------------------- 1 | #' calPropDriver calculates the proportions of driver mutation for mutation sets. 2 | #' 3 | #' The mutations are classified by `classifyMut()` internally. 4 | #' 5 | #' @param maf Maf or MafList object generated by `readMaf()` function 6 | #' @param patient.id Select the specific patients. Default `NULL`, all patients are included. 7 | #' @param driverGene The driver Gene names (Genes Symbols) 8 | #' @param vaf.cutoff Removing mutations of low variant allele frequency (VAF). 9 | #' @param class The class which would be represented. 10 | #' "SP" (Shared pattern: Public/Shared/Private), other options: "CS" (Clonal status: Clonal/Subclonl) 11 | #' and "SPCS". see [MesKit::classifyMut()]. 12 | #' @param classByTumor Logical (Default: `FALSE`). Classify mutations based on "Tumor_ID". 13 | #' @param silent.columns The `Variant_Classification` field in the MAF files that indicates the silent mutations. Defaults: c("Silent", "3'Flank", "IGR", "Intron", "RNA") 14 | #' 15 | #' @examples 16 | #' 17 | #' # Get the driver gene. 18 | #' driverGene <- read.delim(system.file(package = "MPTevol", "extdata", "IntOGen-Drivers-Cancer_Genes.tsv"), header = T) %>% 19 | #' filter(CANCER_TYPE %in% c("BRCA", "COREAD", "LUAD", "LUSC")) %>% 20 | #' pull(SYMBOL) %>% 21 | #' unique() 22 | #' 23 | #' prop = calPropDriver(maf, patient.id = "BRCA", driverGene = driverGene) 24 | #' 25 | #' prop$BRCA$plot 26 | #' 27 | #' 28 | #' @export 29 | #' 30 | calPropDriver <- function(maf, 31 | patient.id = NULL, 32 | driverGene, 33 | class = "SP", 34 | classByTumor = FALSE, 35 | vaf.cutoff = 0.01, 36 | silent.columns = NULL 37 | ) { 38 | 39 | # To do: be careful about the samples and tumors. 40 | 41 | class.levels <- NULL 42 | if (class == "SP") { 43 | class.levels <- c("Public", "Shared", "Private") 44 | } else if (class == "CS") { 45 | class.levels <- c("Clonal", "Subclonl") 46 | } else if (class == "SPCS") { 47 | class.levels <- c("Public_Clonal", "Shared_Clonal", "Shared_Subclonal", "Private_Subclonal") 48 | } 49 | 50 | if(is.null(silent.columns)){ 51 | silent.columns = c("Silent", "3'Flank", "IGR", "Intron", "RNA") 52 | } 53 | 54 | ###################################################################### 55 | 56 | estProp <- function(patient.id, maf_input, maf_class) { 57 | # Merge the maf input and mutation class 58 | message(patient.id) 59 | 60 | maf_merge <- maf_input[[patient.id]] %>% 61 | dplyr::mutate(Mut_ID = stringr::str_c(Hugo_Symbol, Chromosome, Start_Position, 62 | Reference_Allele, Tumor_Seq_Allele2, 63 | sep = ":" 64 | )) %>% 65 | dplyr::left_join( 66 | maf_class[[patient.id]] 67 | ) %>% 68 | dplyr::select( 69 | Hugo_Symbol, Chromosome, Start_Position, End_Position, 70 | Reference_Allele, Tumor_Seq_Allele2, Tumor_Sample_Barcode, 71 | Mutation_Type, Patient_ID, Tumor_ID, Variant_Classification, VAF 72 | ) 73 | 74 | maf_data = maf_merge %>% 75 | #only keep non-silent mutations 76 | filter(!Variant_Classification %in% silent.columns) %>% 77 | mutate(is.driver = ifelse(Hugo_Symbol %in% driverGene, TRUE, FALSE) ) %>% 78 | mutate(type = paste(Tumor_ID, Mutation_Type, sep = ":") ) %>% 79 | group_by(Tumor_ID, Mutation_Type, is.driver) %>% 80 | summarise(num = n()) %>% 81 | group_by(Tumor_ID, Mutation_Type) %>% 82 | mutate(num_total = sum(num)) %>% 83 | filter(is.driver) %>% 84 | mutate(prop = num/num_total, 85 | Mutation_Type = factor(Mutation_Type, levels = class.levels) 86 | ) 87 | 88 | 89 | p1 = maf_data %>% 90 | ggplot2::ggplot(ggplot2::aes(x = Tumor_ID, y = prop, fill = Mutation_Type)) + 91 | ggplot2::geom_bar(stat = "identity", position = ggplot2::position_dodge(width = 0.90)) + 92 | theme_bw() + 93 | ggplot2::labs(x = NULL, y = latex2exp::TeX("Prop of driver mutations")) + 94 | ggplot2::scale_fill_manual(values = set.colors(length(unique(maf_data$Mutation_Type)))) + 95 | ggplot2::theme( 96 | axis.title.x = element_blank(), 97 | axis.text.x = element_text(angle = 0, hjust = 0.5, size = 14), 98 | axis.title.y = element_text(size = 16) 99 | ) 100 | 101 | 102 | list( 103 | maf.prop = maf_data, 104 | plot = p1 105 | ) 106 | } 107 | ########################################################################## 108 | # running 109 | 110 | # Get the mutation groups. 111 | maf_input <- MesKit::subMaf(maf, patient.id = patient.id, mafObj = FALSE, use.tumorSampleLabel = TRUE) 112 | 113 | # get mutation classifications. 114 | maf_class <- MesKit::classifyMut(maf, patient.id = patient.id, class = class, classByTumor = classByTumor) 115 | 116 | # Note the different format between maf_input and maf_class when the patient.id is a single value. 117 | if (!is.null(patient.id)) { 118 | maf_class1 <- list() 119 | maf_class1[[patient.id]] <- maf_class 120 | maf_class <- maf_class1 121 | } 122 | 123 | prop <- lapply(names(maf_input), estProp, maf_input, maf_class) 124 | names(prop) <- names(maf_input) 125 | 126 | return( 127 | prop 128 | ) 129 | } 130 | 131 | 132 | 133 | -------------------------------------------------------------------------------- /R/tree2timescape.R: -------------------------------------------------------------------------------- 1 | #' tree2timescape 2 | #' 3 | #' This function generates the input of timescape to visual the fisher plot of 4 | #' clonal evolution by using the results of [inferClonalTree()]. 5 | #' 6 | #' @param results the clonal trees that generated by [inferClonalTree()]. 7 | #' @param samples the samples to show in the fisher plot. 8 | #' 9 | #' @import clonevol 10 | #' @export 11 | tree2timescape <- function(results, samples = NULL) { 12 | if (is.null(samples)) { 13 | samples <- names(results$models) 14 | } else { 15 | if (!all(samples %in% names(results$models))) { 16 | stop("check input samplesNames : ", samples[!samples %in% names(results$models)]) 17 | } 18 | } 19 | 20 | # store the clonevol results in a list 21 | res <- list( 22 | samples = samples, clonevol.clone.names = NULL, clonevol.clone.colors = NULL, 23 | timepoints = seq(1, length(samples)), num.models = nrow(results$matched$index), 24 | parents = list(), cell.fractions = list(), all = list() 25 | ) 26 | 27 | clonevol.clone.names <- NULL 28 | clone.nums <- NULL 29 | clonevol.clone.colors <- NULL 30 | 31 | # create the needed inputs to fishplot 32 | for (i in 1:nrow(results$matched$index)) { 33 | vv <- NULL 34 | for (s in samples) { 35 | v <- results$models[[s]][[results$matched$index[i, s]]] 36 | # if (rescale){v = rescale.vaf(v)} 37 | v <- clonevol:::rescale.vaf(v) 38 | v <- v[, c("lab", "vaf", "parent", "color")] 39 | 40 | ## scale vaf and make cell.frac 41 | max.vaf <- max(v$vaf) 42 | scale <- 0.5 / max.vaf * 2 * 100 43 | v$vaf <- v$vaf * scale 44 | v$vaf[v$vaf > 100] <- 100 # safeguard against rounding error making some vaf slightly > 100 45 | 46 | colnames(v) <- c("clone", s, "parent", "color") 47 | v <- v[!is.na(v$parent) & v$clone != "0", ] 48 | if (is.null(vv)) { 49 | vv <- v 50 | } else { 51 | vv <- merge(vv, v, all = TRUE) 52 | } 53 | } 54 | for (s in samples) { 55 | vv[is.na(vv[[s]]), s] <- 0 56 | } 57 | vv <- vv[order(as.integer(vv$clone)), ] 58 | vv$parent[vv$parent == "-1"] <- 0 59 | rownames(vv) <- vv$clone 60 | 61 | ## fishplot requires clones to be named in sequential order. Do that, but 62 | ## store the clonevol-generated names and colors for pass-through 63 | if (is.null(clone.nums)) { 64 | clone.nums <- c(0, seq(1, nrow(vv))) 65 | names(clone.nums) <- c(0, vv$clone) 66 | 67 | clonevol.clone.names <- names(clone.nums) 68 | names(clonevol.clone.names) <- as.character(clone.nums) 69 | res$clonevol.clone.names <- clonevol.clone.names[-1] 70 | 71 | clonevol.clone.colors <- c("white", vv$color) 72 | names(clonevol.clone.colors) <- as.character(clone.nums) 73 | res$clonevol.clone.colors <- clonevol.clone.colors[-1] 74 | } 75 | vv$clone <- clone.nums[vv$clone] 76 | vv$parent <- clone.nums[vv$parent] 77 | 78 | par <- vv$parent 79 | frac <- vv[, samples] 80 | res$parents[[i]] <- par 81 | res$cell.fractions[[i]] <- as.matrix(frac) 82 | res$all[[i]] <- vv 83 | } 84 | 85 | ############################## 86 | # for timescape input 87 | 88 | times <- list( 89 | clonal_prev = list(), 90 | tree_edges = list(), 91 | clone_colours = list() 92 | ) 93 | 94 | for (i in 1:res$num.models) { 95 | # re-set ancestor clonal prev 96 | 97 | # get sum of prev of certain clone. 98 | clonal_prev_ancestor <- res$all[[i]] %>% 99 | dplyr::mutate(clone = rownames(.)) %>% 100 | data.table::melt( 101 | id.vars = c("clone", "parent"), 102 | measure.vars = samples 103 | ) %>% 104 | dplyr::group_by(variable, parent) %>% 105 | dplyr::summarise(sumvalue = sum(value)) %>% 106 | dplyr::rename(clone = parent) %>% 107 | dplyr::mutate(clone = as.character(clone)) 108 | 109 | # prev = curent - ancestor 110 | times$clonal_prev[[i]] <- res$all[[i]] %>% 111 | dplyr::mutate(clone = rownames(.)) %>% 112 | data.table::melt( 113 | id.vars = c("clone", "parent"), 114 | measure.vars = samples 115 | ) %>% 116 | dplyr::left_join(clonal_prev_ancestor) %>% 117 | dplyr::mutate( 118 | sumvalue = ifelse(is.na(sumvalue), 0, sumvalue), 119 | value1 = value - sumvalue, 120 | # set value1 = 0 if value1 <=0 121 | value1 = ifelse(value1 < 0, 0, value1) 122 | ) %>% 123 | dplyr::select(clone, variable, value1) %>% 124 | dplyr::rename( 125 | clone_id = clone, 126 | timepoint = variable, 127 | clonal_prev = value1 128 | ) %>% # set arrange of samples. 129 | dplyr::mutate(timepoint = factor(timepoint, levels = samples)) %>% 130 | dplyr::arrange(timepoint) %>% 131 | dplyr::mutate(timepoint = as.character(timepoint)) 132 | 133 | # re-mapping clone ids. 134 | cloneNames <- setNames( 135 | c(0, rownames(res$all[[i]])), 136 | nm = c(0, res$all[[i]]$clone) 137 | ) 138 | 139 | times$tree_edges[[i]] <- data.frame( 140 | source = cloneNames[as.character(res$parents[[i]])], 141 | target = rownames(res$all[[i]]) 142 | ) %>% 143 | filter(source != "0") 144 | 145 | times$clone_colours[[i]] <- data.frame( 146 | clone_id = rownames(res$all[[i]]), 147 | colour = res$all[[i]]$color 148 | ) 149 | } 150 | 151 | times 152 | } 153 | -------------------------------------------------------------------------------- /docs/reference/Seg-class.html: -------------------------------------------------------------------------------- 1 | 2 | Segment Class — Seg-class • MPTevol 6 | Skip to contents 7 | 8 | 9 |
35 |
36 |
41 | 42 |
43 |

Segment Class

44 |
45 | 46 | 47 |
48 |

Slots

49 | 50 | 51 |
data
52 |

data.table of segment file containing CNA information.

53 | 54 | 55 |
sample.inof
56 |

data.frame of sample information per patient.

57 | 58 | 59 |
ref.build
60 |

human reference genome version. Default 'hg19'. Optional: 'hg18' or 'hg38'.

61 | 62 | 63 |
allele
64 |

Indicate whether this is allele-specific CNAs. Default: TRUE.

65 | 66 | 67 |
68 | 69 |
70 | 71 | 72 |
75 | 76 | 79 | 80 |
81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /docs/reference/set.colors.html: -------------------------------------------------------------------------------- 1 | 2 | Color setting — set.colors • MPTevol 6 | Skip to contents 7 | 8 | 9 |
35 |
36 |
41 | 42 |
43 |

Color setting

44 |
45 | 46 |
47 |

Usage

48 |
set.colors(n = 36, rev = FALSE, random = FALSE)
49 |
50 | 51 |
52 |

Arguments

53 |
n
54 |

number of colors to select. A total of 36 colors are saved.

55 |
rev
56 |

reverse output the colors.

57 |
random
58 |

random select number of colors.

59 |
60 | 61 |
63 | 64 | 65 |
68 | 69 | 72 | 73 |
74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /docs/reference/write.fasta.html: -------------------------------------------------------------------------------- 1 | 2 | write.fasta — write.fasta • MPTevol 6 | Skip to contents 7 | 8 | 9 |
35 |
36 |
41 | 42 |
43 |

Prepare the formate of MEDICC input.

44 |
45 | 46 |
47 |

Usage

48 |
write.fasta(
49 |   merge_A,
50 |   major = "major",
51 |   out.dir = "data",
52 |   project.names = "tumor"
53 | )
54 |
55 | 56 |
57 |

Arguments

58 |
project.names
59 |

the project names used in the output.

60 |
61 | 62 |
64 | 65 | 66 |
69 | 70 | 73 | 74 |
75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /docs/reference/pipe.html: -------------------------------------------------------------------------------- 1 | 2 | Pipe operator — %>% • MPTevol 6 | Skip to contents 7 | 8 | 9 |
35 |
36 |
41 | 42 |
43 |

See magrittr::%>% for details.

44 |
45 | 46 |
47 |

Usage

48 |
lhs %>% rhs
49 |
50 | 51 |
52 |

Arguments

53 |
lhs
54 |

A value or the magrittr placeholder.

55 |
rhs
56 |

A function call using the magrittr semantics.

57 |
58 |
59 |

Value

60 |

The result of calling rhs(lhs).

61 |
62 | 63 |
65 | 66 | 67 |
70 | 71 | 74 | 75 |
76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /R/plotVafCluster.R: -------------------------------------------------------------------------------- 1 | #' plotVafCluster 2 | #' 3 | #' Plot variant clustering in each sample by using combination of box, 4 | #' violin and jitter plots. 5 | #' 6 | #' @param variants data frame of the variants. 7 | #' At least cluster column and VAF or CCF columns are required. 8 | #' Cluster column should contain cluster identities as continuous integer values 9 | #' starting from 1. 10 | #' @param cluster.col.name the column names that containing cluster 11 | #' information (Default = "cluster"). 12 | #' @param vaf.col.names the column names of samples containing VAF. 13 | #' @param violin whether plotting violin (Default = FALSE). 14 | #' @param box whether plotting box (Default = TRUE). 15 | #' @param jitter whether plotting jitter plot (Default = TRUE). 16 | #' @param founding.cluster the name of founding clones, one of the most important parameters. For most of circumstances, the founding cluster is the cluster with the highest average CCF cluster. 17 | #' @param clone.colors setting clone colors. 18 | #' @param highlight column name to indicate whether highlight the sites (TRUE or FALSE). 19 | #' @param highlight.note.col.name highlight context. 20 | #' @param output.file the output file name (Default = NULL) 21 | #' @import clonevol 22 | #' @return a ggplot object 23 | #' @export 24 | plotVafCluster <- function(variants, 25 | cluster.col.name = "cluster", 26 | vaf.col.names, 27 | clone.colors = NULL, 28 | violin = FALSE, 29 | box = TRUE, 30 | jitter = TRUE, 31 | founding.cluster = 1, 32 | output.file = NULL, 33 | highlight = NULL, 34 | highlight.note.col.name = NULL) { 35 | if (is.null(clone.colors)) { 36 | 37 | # Visualizing the variant clusters 38 | set.colors <- c( 39 | "#C6C6C6", "#6FDCBF", "#5AA36E", "#E99692", 40 | "#B4D985", "#EA7D23", "#E53CFB", "#4B85B7", 41 | "#8439FA", "#BD8736", "#B3B371", "#A7C7DE", 42 | "#EE97FC", "#57C222", "#BFABD0", "#44589B", 43 | "#794C18", 44 | RColorBrewer::brewer.pal(n = 10, name = "Paired") 45 | ) 46 | 47 | clone.colors <- set.colors[1:length(unique(variants[, cluster.col.name]))] 48 | } 49 | 50 | pp <- clonevol::plot.variant.clusters(variants, 51 | show.cluster.size = F, 52 | show.cluster.label = F, 53 | cluster.col.name = cluster.col.name, 54 | vaf.col.names = vaf.col.names, 55 | violin = violin, 56 | box = box, 57 | jitter = jitter, 58 | jitter.shape = 1, 59 | variant.class.col.name = cluster.col.name, 60 | # vaf.limits = 61 | jitter.color = clone.colors, 62 | jitter.size = 1.2, 63 | jitter.alpha = 1, 64 | jitter.width = 0.2, 65 | jitter.center.method = "median", 66 | jitter.center.size = 1, 67 | jitter.center.color = "darkgray", 68 | jitter.center.display.value = "none", 69 | display.plot = FALSE, 70 | horizontal = TRUE, 71 | order.by.total.vaf = F, 72 | highlight = highlight, 73 | highlight.shape = 21, 74 | highlight.color = "blue", 75 | highlight.fill.color = "green", 76 | highlight.size = 2.5, 77 | highlight.note.col.name = NULL, 78 | highlight.note.size = 2, 79 | highlight.note.color = "blue", 80 | highlight.note.angle = 0, 81 | founding.cluster = founding.cluster, 82 | ccf = FALSE 83 | ) 84 | 85 | # add annotation of driver genes. 86 | if (!is.null(highlight)) { 87 | if (any(mutdata[, highlight])) { 88 | labels <- pp[[1]]$data 89 | # select colors for annotation. 90 | clone.colors.sel <- unique(clone.colors[labels[, cluster.col.name][labels[, highlight]]]) 91 | 92 | pp1 <- labels %>% 93 | dplyr::filter(is.driver) %>% 94 | dplyr::mutate(cluster_1 = factor(cluster)) %>% 95 | ggplot2::ggplot(ggplot2::aes(y = cluster, x = 1, label = gene_site)) + 96 | ggplot2::theme_classic() + 97 | ggplot2::geom_point(ggplot2::aes(color = cluster_1), size = 3) + 98 | ggrepel::geom_text_repel( 99 | nudge_x = 0.15, 100 | direction = "y", 101 | hjust = 0, 102 | segment.size = 0.2, 103 | size = 3, 104 | ) + 105 | ggplot2::ylim(0, length(clone.colors) + 1) + 106 | ggplot2::xlim(1, 0.8) + 107 | ggplot2::scale_color_manual(values = clone.colors.sel, guide = "none") + 108 | ggplot2::theme( 109 | axis.line = ggplot2::element_blank(), 110 | axis.ticks = ggplot2::element_blank(), 111 | axis.text = ggplot2::element_blank(), 112 | axis.title.x = ggplot2::element_blank(), 113 | axis.title.y = ggplot2::element_blank(), 114 | plot.title = ggplot2::element_text(hjust = 0.5) 115 | ) 116 | 117 | pp[[length(pp) + 1]] <- pp1 118 | } 119 | } 120 | 121 | if (!is.null(output.file)) { 122 | pdf(output.file, width = 2 * length(pp), height = 4) 123 | ggpubr::ggarrange(plotlist = pp, ncol = length(pp), align = "h") 124 | dev.off() 125 | } 126 | 127 | ggpubr::ggarrange(plotlist = pp, ncol = length(pp), align = "h") 128 | } 129 | -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | Authors and Citation • MPTevol 6 | Skip to contents 7 | 8 | 9 |
35 |
36 |
39 | 40 |
41 |

Authors

42 | 43 |
  • 44 |

    Qinjian Chen. Author, maintainer. 45 |

    46 |
  • 47 |
  • 48 |

    Shixiang Wang. Author. 49 |

    50 |
  • 51 |
52 | 53 |
54 |

Citation

55 |

56 | 57 |

Chen Q, Wang S (2022). 58 | MPTevol: Clonal Evolutionary History and Metastatic Routines Analysis for 59 | Multiple Primary Tumors. 60 | R package version 0.1.0. 61 |

62 |
@Manual{,
63 |   title = {MPTevol: Clonal Evolutionary History and Metastatic Routines Analysis for
64 | Multiple Primary Tumors},
65 |   author = {Qinjian Chen and Shixiang Wang},
66 |   year = {2022},
67 |   note = {R package version 0.1.0},
68 | }
69 |
70 |
72 | 73 | 74 |
77 | 78 | 81 | 82 |
83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /docs/reference/getKaKs.html: -------------------------------------------------------------------------------- 1 | 2 | getKaKs compares Ka/Ks between different groups — getKaKs • MPTevol 6 | Skip to contents 7 | 8 | 9 |
35 |
36 |
41 | 42 |
43 |

getKaKs compares Ka/Ks between different groups

44 |
45 | 46 |
47 |

Usage

48 |
getKaKs(df, vaf_cutoff = 0.05)
49 |
50 | 51 |
52 |

Arguments

53 |
df
54 |

data. Six columns are required to calculate the Ka/Ks, 55 | including "Tumor_Sample_Barcode","Chromosome","Start_Position", 56 | "Reference_Allele","Tumor_Seq_Allele2" and "VAF".

57 |
vaf_cutoff
58 |

VAF cutoff. Removing mutations with low variant allele frequency (VAF).

59 |
60 | 61 |
63 | 64 | 65 |
68 | 69 | 72 | 73 |
74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /docs/reference/plotCNAProfile.html: -------------------------------------------------------------------------------- 1 | 2 | Visualize CNA profile — plotCNAProfile • MPTevol 8 | Skip to contents 9 | 10 | 11 |
37 |
38 |
43 | 44 |
45 |

This function plots the allele-specific CNAs of multiple-samples. 46 | See readCNAProfile() for examples.

47 |
48 | 49 |
50 |

Usage

51 |
plotCNAProfile(cnaqc.list, min_length_show = 1e+05)
52 |
53 | 54 |
55 |

Arguments

56 |
cnaqc.list
57 |

cnaqc.list

58 |
min_length_show
59 |

the minimal length of CNVs to show.

60 |
61 | 62 |
64 | 65 | 66 |
69 | 70 | 73 | 74 |
75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /docs/reference/MPTevol-package.html: -------------------------------------------------------------------------------- 1 | 2 | MPTevol: Clonal Evolutionary History and Metastatic Routines Analysis for Multiple Primary Tumors — MPTevol-package • MPTevol 6 | Skip to contents 7 | 8 | 9 |
35 |
36 |
41 | 42 |
43 |

Provides a practical computation framework for dissecting the evolution of multiple primary tumors (MPT), reducing analysis complexity with modular design.

44 |
45 | 46 | 47 |
48 |

Author

49 |

Maintainer: Qinjian Chen chenqingjian2010@163.com (ORCID)

50 |

Authors:

52 | 53 |
54 | 55 | 56 |
59 | 60 | 63 | 64 |
65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | -------------------------------------------------------------------------------- /docs/reference/tree2timescape.html: -------------------------------------------------------------------------------- 1 | 2 | tree2timescape — tree2timescape • MPTevol 8 | Skip to contents 9 | 10 | 11 |
37 |
38 |
43 | 44 |
45 |

This function generates the input of timescape to visual the fisher plot of 46 | clonal evolution by using the results of inferClonalTree().

47 |
48 | 49 |
50 |

Usage

51 |
tree2timescape(results, samples = NULL)
52 |
53 | 54 |
55 |

Arguments

56 |
results
57 |

the clonal trees that generated by inferClonalTree().

58 |
samples
59 |

the samples to show in the fisher plot.

60 |
61 | 62 |
64 | 65 | 66 |
69 | 70 | 73 | 74 |
75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /R/readMaf.R: -------------------------------------------------------------------------------- 1 | #' readMaf 2 | #' @description Read tab delimited MAF (can be plain text or *.gz compressed) file along with sample information file. 3 | #' 4 | #' @param mafFile A tab delimited MAF file (plain text or *.gz compressed). Required. 5 | #' @param clinicalFile A clinical data file includes Tumor_Sample_Barcode, Tumor_ID, Patient_ID. Tumor_Sample_Label is optional. Default NULL. 6 | #' @param ccfFile A CCF file of somatic mutations. Default NULL. 7 | #' @param adjusted.VAF Whether adjusted VAF is included in mafFile. Default FALSE. 8 | #' @param nonSyn.vc List of Variant classifications which are considered as non-silent. Default NULL, use Variant Classifications with "Frame_Shift_Del","Frame_Shift_Ins","Splice_Site","Translation_Start_Site","Nonsense_Mutation","Nonstop_Mutation","In_Frame_Del","In_Frame_Ins","Missense_Mutation" 9 | #' @param use.indel.ccf Whether include indels in ccfFile. Default FALSE. 10 | #' @param ccf.conf.level The confidence level of CCF to identify clonal or subclonal. 11 | #' Only works when "CCF_std" or "CCF_CI_high" is provided in ccfFile. Default 0.95. 12 | #' @param remove.empty.VAF Whether removing the mutations with VAF=0. When making the comparison of pair-wide CCF, retained mutations with VAF=0. 13 | #' @param refBuild Human reference genome version. Default 'hg19'. Optional: 'hg18' or 'hg38'. 14 | #' 15 | #' @examples 16 | #' maf.File <- system.file("extdata/", "CRC_HZ.maf", package = "MesKit") 17 | #' clin.File <- system.file("extdata/", "CRC_HZ.clin.txt", package = "MesKit") 18 | #' ccf.File <- system.file("extdata/", "CRC_HZ.ccf.tsv", package = "MesKit") 19 | #' maf <- readMaf(mafFile=maf.File,clinicalFile = clin.File, refBuild="hg19") 20 | #' maf <- readMaf(mafFile=maf.File, clinicalFile = clin.File, ccfFile=ccf.File, refBuild="hg19") 21 | #' @return an object of Maf or MafList. 22 | #' @import methods 23 | #' @importFrom data.table fread setkey 24 | #' @importFrom stats qnorm 25 | #' @export readMaf 26 | 27 | ## read.maf main function 28 | readMaf <- function( 29 | mafFile, 30 | clinicalFile, 31 | ccfFile = NULL, 32 | adjusted.VAF = FALSE, 33 | nonSyn.vc = NULL, 34 | use.indel.ccf = FALSE, 35 | ccf.conf.level = 0.95, 36 | remove.empty.VAF = TRUE, 37 | refBuild = "hg19" 38 | ) { 39 | 40 | refBuild <- match.arg(refBuild, choices = c('hg18', 'hg19', 'hg38'), several.ok = FALSE) 41 | 42 | ## get non-silent muation types 43 | if (is.null(nonSyn.vc)) { 44 | nonSyn.vc <- c( 45 | "Frame_Shift_Del", 46 | "Frame_Shift_Ins", 47 | "Splice_Site", 48 | "Translation_Start_Site", 49 | "Nonsense_Mutation", 50 | "Nonstop_Mutation", 51 | "In_Frame_Del", 52 | "In_Frame_Ins", 53 | "Missense_Mutation" 54 | ) 55 | } 56 | 57 | maf_data <- data.table::fread( 58 | file = mafFile, 59 | quote = "", 60 | header = TRUE, 61 | data.table = TRUE, 62 | fill = TRUE, 63 | sep = '\t', 64 | skip = "Hugo_Symbol", 65 | stringsAsFactors = FALSE 66 | ) 67 | 68 | clin_data <- data.table::fread( 69 | file = clinicalFile, 70 | quote = "", 71 | header = TRUE, 72 | data.table = TRUE, 73 | fill = TRUE, 74 | sep = '\t', 75 | stringsAsFactors = FALSE 76 | ) 77 | 78 | 79 | ## merge maf data and clinical data 80 | maf_col <- colnames(maf_data) 81 | clin_col <- colnames(clin_data) 82 | is_col <- intersect(maf_col, clin_col) 83 | is_col <- is_col[is_col!="Tumor_Sample_Barcode"] 84 | maf_data <- dplyr::select(maf_data, -all_of(is_col)) 85 | maf_data <- dplyr::left_join( 86 | maf_data, 87 | clin_data, 88 | by = c( 89 | "Tumor_Sample_Barcode" 90 | ) 91 | ) 92 | 93 | # check maf data 94 | maf_data <- validMaf(maf_data, remove.empty.VAF) 95 | 96 | ## calculate Total_allele_depth 97 | maf_data <- maf_data %>% 98 | dplyr::mutate(Total_allele_depth = .data$Ref_allele_depth + .data$Alt_allele_depth) %>% 99 | as.data.frame() 100 | 101 | if(adjusted.VAF){ 102 | maf_data$VAF_adj <- maf_data$VAF 103 | } 104 | 105 | 106 | ## read ccf files 107 | if (!is.null(ccfFile)) { 108 | ccf_data <- suppressWarnings(data.table::fread( 109 | ccfFile, 110 | quote = "", 111 | header = TRUE, 112 | fill = TRUE, 113 | sep = '\t', 114 | stringsAsFactors = FALSE 115 | )) 116 | ## check ccf_data 117 | ccf_data <- validCCF(ccf_data, maf_data, use.indel.ccf = use.indel.ccf) 118 | ## merge ccf_data to maf_data 119 | maf_data <- MesKit:::readCCF(maf_data, ccf_data, ccf.conf.level, sample.info, adjusted.VAF, use.indel.ccf = use.indel.ccf) 120 | } 121 | 122 | ## calculate average adjust VAF 123 | if("VAF_adj" %in% colnames(maf_data)){ 124 | maf_data <- maf_data %>% 125 | dplyr::group_by(.data$Patient_ID, .data$Tumor_ID, .data$Chromosome, 126 | .data$Start_Position, .data$Reference_Allele,.data$Tumor_Seq_Allele2) %>% 127 | dplyr::mutate(Tumor_Average_VAF = round( 128 | sum(.data$VAF_adj * .data$Total_allele_depth)/ 129 | sum(.data$Total_allele_depth) 130 | ,3)) 131 | 132 | } 133 | 134 | maf_data <- maf_data %>% 135 | dplyr::ungroup() %>% 136 | dplyr::select(-"Total_allele_depth") %>% 137 | as.data.frame() 138 | 139 | data_list <- split(maf_data, maf_data$Patient_ID) 140 | maf_patient_list <- list() 141 | for(data in data_list){ 142 | patient <- unique(data$Patient_ID) 143 | sample.info <- data %>% 144 | dplyr::select("Tumor_Sample_Barcode","Tumor_ID") %>% 145 | dplyr::distinct(.data$Tumor_Sample_Barcode, .keep_all = TRUE) 146 | if(nrow(sample.info) < 2){ 147 | n <- nrow(sample.info) 148 | stop(paste0(patient," has only ",n," tumor samples.", 149 | "A minimum of two tumor samples are required for each patient.")) 150 | } 151 | ## set Maf 152 | maf <- MesKit:::Maf( 153 | data = data.table::setDT(data), 154 | sample.info = as.data.frame(sample.info), 155 | nonSyn.vc = nonSyn.vc, 156 | ref.build = refBuild 157 | ) 158 | maf_patient_list[[patient]] <- maf 159 | } 160 | 161 | if(length(data_list) > 1){ 162 | ## set MafList 163 | maf_list <- MesKit:::MafList(maf_patient_list) 164 | return(maf_list) 165 | }else{ 166 | return(maf_patient_list[[1]]) 167 | } 168 | } 169 | 170 | 171 | 172 | -------------------------------------------------------------------------------- /R/validation.R: -------------------------------------------------------------------------------- 1 | validMaf <- function(maf_data, remove.empty.VAF = TRUE){ 2 | ## check required columns 3 | maf_standardcol <- c("Hugo_Symbol","Chromosome","Start_Position","End_Position", 4 | "Variant_Classification", "Variant_Type", "Reference_Allele", 5 | "Tumor_Seq_Allele2","Ref_allele_depth","Alt_allele_depth", 6 | "VAF", "Tumor_Sample_Barcode","Patient_ID","Tumor_ID") 7 | 8 | if(!all(maf_standardcol %in% colnames(maf_data))){ 9 | missing_fileds_maf <- maf_standardcol[!maf_standardcol %in% colnames(maf_data)] 10 | info <- paste(missing_fileds_maf, collapse = ",") 11 | stop(paste0("Missing ", info, " from mafFile")) 12 | } 13 | 14 | maf_data$Chromosome <- as.character(maf_data$Chromosome) 15 | maf_data$Tumor_Sample_Barcode <- as.character(maf_data$Tumor_Sample_Barcode) 16 | maf_data$Patient_ID <- as.character(maf_data$Patient_ID) 17 | maf_data$Tumor_ID <- as.character(maf_data$Tumor_ID) 18 | 19 | #***** modify by CQJ ****** 20 | # retaining mutations with VAF==0, thus the pair-wide comparisons of CCF are optimal. 21 | 22 | if(remove.empty.VAF ){ 23 | ## remove VAF = 0 24 | maf_data <- maf_data[maf_data$VAF!=0] 25 | } 26 | 27 | 28 | ## remove mutation in chromosome M and chromosome MT 29 | maf_data <- maf_data[!maf_data$Chromosome %in% c("M", "MT")] 30 | 31 | ## sort HugoSymbol 32 | # maf_data <- preprocess_HugoSymbol(maf_data) 33 | 34 | ## Rescale vaf coloum 0-1 35 | if(max(maf_data$VAF, na.rm = TRUE) > 1){ 36 | maf_data$VAF <- as.numeric(as.character(maf_data$VAF))/100 37 | } 38 | 39 | return(maf_data) 40 | } 41 | 42 | 43 | validCCF <- function(ccf_data, maf_data, use.indel.ccf){ 44 | 45 | patients_in_maf <- sort(unique(maf_data$Patient_ID)) 46 | patients_in_ccf <- sort(unique(ccf_data$Patient_ID)) 47 | if(!identical(patients_in_maf, patients_in_ccf)){ 48 | patient_setdiff <- setdiff(patients_in_maf, patients_in_ccf) 49 | warning("Patient: ",paste0(paste(patient_setdiff, collapse = ", "), " are not in ccf data")) 50 | } 51 | 52 | tsb_in_maf <- sort(unique(maf_data$Tumor_Sample_Barcode)) 53 | tsb_in_ccf <- sort(unique(ccf_data$Tumor_Sample_Barcode)) 54 | if(!identical(tsb_in_maf, tsb_in_ccf)){ 55 | tsb_setdiff <- setdiff(tsb_in_maf, tsb_in_ccf) 56 | warning("Tumor sample barcodes: ",paste0(paste(tsb_setdiff, collapse = ", "), " are not in ccf data")) 57 | } 58 | 59 | 60 | ccf_standardcol <- c("Patient_ID", "Tumor_Sample_Barcode", "Chromosome", "Start_Position", "CCF") 61 | 62 | #***** modify by CQJ ****** 63 | #* add the Cluster information in CCF data. 64 | 65 | if("Cluster" %in% colnames(ccf_data)){ 66 | 67 | ccf_data$Cluster = as.integer(ccf_data$Cluster) 68 | 69 | if( all(is.na(ccf_data$Cluster))) { 70 | warning("Please checking the Cluster column, the Cluster should be integer") 71 | }else{ 72 | ccf_standardcol = c(ccf_standardcol, "Cluster") 73 | } 74 | } 75 | 76 | 77 | if(!all(ccf_standardcol %in% colnames(ccf_data))){ 78 | missing_fileds_ccf <- ccf_standardcol[!ccf_standardcol %in% colnames(ccf_data)] 79 | info <- paste(missing_fileds_ccf, collapse = ", ") 80 | stop(paste0("Missing ", info, " from ccfFile")) 81 | } 82 | 83 | if(use.indel.ccf){ 84 | indel.ccf.col <- c("Reference_Allele", "Tumor_Seq_Allele2") 85 | if(!all(indel.ccf.col %in% colnames(ccf_data))){ 86 | missing_fileds_ccf <- indel.ccf.col[!indel.ccf.col %in% colnames(ccf_data)] 87 | info <- paste(missing_fileds_ccf, collapse = ", ") 88 | stop(paste0("Missing ", info, " from ccfFile when use.indel.ccf is TRUE")) 89 | } 90 | }else{ 91 | indel.ccf.col <- NULL 92 | } 93 | 94 | ccf_data$Patient_ID <- as.character(ccf_data$Patient_ID) 95 | ccf_data$Tumor_Sample_Barcode <- as.character(ccf_data$Tumor_Sample_Barcode) 96 | ccf_data$Chromosome <- as.character(ccf_data$Chromosome) 97 | ccf_data$CCF <- as.numeric(ccf_data$CCF) 98 | # if("CCF_Std" %in% colnames(ccf_data)){ 99 | # ccf_data$CCF_Std <- as.numeric(ccf_data$CCF_Std) 100 | # } 101 | 102 | 103 | # if("CCF_Std" %in% colnames(ccf_data)){ 104 | # ccf_data$CCF_Std <- as.numeric(ccf_data$CCF_Std) 105 | # ccf_data <- dplyr::select(ccf_data, "Patient_ID", "Tumor_Sample_Barcode", "Chromosome", "Start_Position", "CCF", "CCF_Std", dplyr::all_of(indel.ccf.col)) 106 | # }else{ 107 | # ccf_data <- dplyr::select(ccf_data, "Patient_ID", "Tumor_Sample_Barcode", "Chromosome", "Start_Position", "CCF", dplyr::all_of(indel.ccf.col)) 108 | # } 109 | # 110 | 111 | keep.columns = c("Patient_ID", "Tumor_Sample_Barcode", "Chromosome", "Start_Position", "CCF") 112 | 113 | if("CCF_Std" %in% colnames(ccf_data)){ 114 | keep.columns = c(keep.columns, "CCF_Std") 115 | } 116 | 117 | if("Cluster" %in% colnames(ccf_data)){ 118 | keep.columns = c(keep.columns, "Cluster") 119 | } 120 | 121 | ccf_data <- dplyr::select(ccf_data, 122 | dplyr::all_of(keep.columns), 123 | dplyr::all_of(indel.ccf.col)) 124 | 125 | 126 | return(ccf_data) 127 | } 128 | 129 | validSeg <- function(seg){ 130 | seg_standardcol <- c("Patient_ID","Tumor_Sample_Barcode", 131 | "Chromosome","Start_Position", 132 | "End_Position") 133 | if(!all(seg_standardcol %in% colnames(seg))){ 134 | missing_fileds_seg <- seg_standardcol[!seg_standardcol %in% colnames(seg)] 135 | info <- paste(missing_fileds_seg, collapse = ", ") 136 | stop(paste0("Missing ", info, " from segFile")) 137 | } 138 | seg$Chromosome = gsub(pattern = 'chr', replacement = '', x = seg$Chromosome, fixed = TRUE) 139 | seg$Chromosome = gsub(pattern = 'X', replacement = '23', x = seg$Chromosome, fixed = TRUE) 140 | seg$Chromosome = gsub(pattern = 'Y', replacement = '24', x = seg$Chromosome, fixed = TRUE) 141 | 142 | seg$Patient_ID <- as.character(seg$Patient_ID) 143 | seg$Tumor_Sample_Barcode <- as.character(seg$Tumor_Sample_Barcode) 144 | seg$Start_Position <- as.numeric(seg$Start_Position) 145 | seg$End_Position <- as.numeric(seg$End_Position) 146 | 147 | return(seg) 148 | } 149 | 150 | 151 | validClinicalData <- function(clin_data, maf_data){ 152 | ## check Tumor_Sample_Barcode of maf data and clinical data 153 | clin_tb_count <- table(clin_data$Tumor_Sample_Barcode) 154 | if(length(which(clin_tb_count > 1)) > 0){ 155 | rep_tb <- names(clin_tb_count)[which(clin_tb_count > 1)] 156 | stop(paste0("There are more than one ", paste(rep_tb, collapse = ", "), " in clinical data!")) 157 | } 158 | 159 | 160 | maf_tb <- unique(maf_data$Tumor_Sample_Barcode) 161 | clin_tb <- unique(clin_data$Tumor_Sample_Barcode) 162 | tb_setdiff <- setdiff(maf_tb, clin_tb) 163 | 164 | if(length(tb_setdiff) > 0){ 165 | stop(paste0("Information about Tumor_Sample_Barcode ", paste(tb_setdiff, collapse = ", "), " cannot be found in clinical data!")) 166 | } 167 | 168 | return(clin_data) 169 | 170 | } 171 | 172 | 173 | -------------------------------------------------------------------------------- /R/plotCNATree.R: -------------------------------------------------------------------------------- 1 | # library(ggtree) 2 | # library(treeio) 3 | # library(phangorn) 4 | # library(MEDICCquant) 5 | 6 | 7 | #' plotCNAtree plots phylogenetic trees of CNAs 8 | #' 9 | #' The CNAs trees were constructed by MEDICC. 10 | #' 11 | #' @param dist dist files that generated by MEDICC. 12 | #' @param bootstrap.rep.num number of bootstrap steps. 13 | #' @param group a list that used to indicate the sample groups 14 | #' @param group.colors an array indicates the colors of sample groups. 15 | #' @param title title of the plot. 16 | #' @param normal.node the sample name of normal sample in the tree. 17 | #' @param hexpand_ratio hexpand ratio. see \code{\link[ggtree]{hexpand}} 18 | #' 19 | #' @examples 20 | #' # read samples distances. 21 | #' # This dist file is the output of MEDICC 22 | #' dist <- system.file(package = "MPTevol", "extdata", "tree_final.dist") 23 | #' 24 | #' # set group information 25 | #' group <- list( 26 | #' NORMAL = "NORMAL", 27 | #' Breast = paste0("Breast_", 1:5), 28 | #' Coad = paste0("Coad_", 1:5), 29 | #' Lung = paste0("Lung_", 1:5), 30 | #' OveryLM = paste0("OveryLM_", 1:5), 31 | #' OveryRM = paste0("OveryRM_", 1:6), 32 | #' UterusM = paste0("UterusM_", c(1:7)) 33 | #' ) 34 | #' 35 | #' # set group colors 36 | #' group.colors <- setNames(set.colors(n = length(group)), nm = names(group)) 37 | #' 38 | #' # built trees 39 | #' tree <- plotCNAtree( 40 | #' dist = dist, 41 | #' group = group, 42 | #' group.colors = group.colors 43 | #' ) 44 | #' 45 | #' tree$plot 46 | #' @import ggtree 47 | #' @import treeio 48 | #' @import phangorn 49 | #' @import ape 50 | #' @export 51 | plotCNAtree <- function(dist, 52 | bootstrap.rep.num = 500, 53 | group = NULL, 54 | group.colors = NULL, 55 | title = "Cancer", 56 | normal.node = "NORMAL", 57 | hexpand_ratio = 0.3) { 58 | message("Calculate the bootstraps") 59 | 60 | # get trees and using NJ to get the bootstraps. 61 | phyloTree <- bootstrap.trees( 62 | dist = dist, 63 | title = title, 64 | bootstrap.rep.num = bootstrap.rep.num 65 | ) 66 | 67 | mtree <- phyloTree$tree 68 | bootstrap.value <- phyloTree$bootstrap.value 69 | 70 | 71 | # all.length = mtree$edge.length 72 | root.length <- rev(mtree$edge.length)[1] 73 | 74 | # set outgroup and removing the Normal 75 | mtree <- treeio::root(mtree, outgroup = normal.node) 76 | mtree <- treeio::drop.tip(mtree, normal.node) 77 | 78 | # combined bootstrap 79 | bp2 <- data.frame( 80 | node = 1:(treeio::Nnode(mtree)) + treeio::Ntip(mtree), 81 | bootstrap = bootstrap.value 82 | ) 83 | mtree <- dplyr::full_join(mtree, bp2, by = "node") 84 | 85 | p_trees <- ggtree::ggtree(mtree, size = 1) + 86 | ggtree::geom_tiplab(size = 4) + 87 | ggtree::geom_treescale(fontsize = 6, linesize = 1, offset = 1) + 88 | # set root length 89 | ggtree::geom_rootedge(rootedge = root.length, size = 1, colour = "grey40") + 90 | ggtree::hexpand(hexpand_ratio, direction = 1) 91 | 92 | p_trees <- p_trees + 93 | ggtree::geom_nodepoint(ggplot2::aes(fill = cut(bootstrap, c(0, 70, 90, 100))), 94 | shape = 21, size = 2.5 95 | ) + 96 | ggplot2::scale_fill_manual( 97 | values = c("black", "grey", "white"), guide = "legend", 98 | name = "Bootstrap Percentage(BP)", 99 | breaks = c("(90,100]", "(70,90]", "(0,70]"), 100 | labels = expression(BP >= 90, 70 <= BP * " < 90", BP < 70) 101 | ) + 102 | ggtree::theme_tree(legend.position = c(0.8, 0.25)) 103 | 104 | if (!is.null(group)) { 105 | 106 | # check samples ids between trees and group 107 | if (!identical(sort(purrr::reduce(group, c)), sort(mtree@phylo$tip.label))) { 108 | stop("the samplenames in group were not identical to sample names in the tree") 109 | } 110 | 111 | p_trees <- ggtree::groupOTU(p_trees, group, "Sites") 112 | 113 | # get levels 114 | Sites <- levels(p_trees$data$Sites) 115 | 116 | if (!is.null(group.colors)) { 117 | if (!identical(sort(names(group.colors)), sort(levels(p_trees$data$Sites)))) { 118 | 119 | # get levels that were not in the group.colors 120 | Site1 <- setdiff(levels(p_trees$data$Sites), names(group.colors)) 121 | Site.colors <- setNames( 122 | c(set.colors(n = length(Site1), rev = T), group.colors), 123 | nm = c(Site1, names(group.colors)) 124 | ) 125 | 126 | Site.colors <- Site.colors[levels(p_trees$data$Sites)] 127 | } else { 128 | Site.colors <- group.colors[levels(p_trees$data$Sites)] 129 | } 130 | } else { 131 | Site.colors <- set.colors(n = length(Sites)) 132 | } 133 | 134 | p_trees <- p_trees + 135 | ggplot2::aes(color = Sites) + 136 | ggplot2::scale_colour_manual( 137 | values = Site.colors 138 | ) 139 | } 140 | 141 | 142 | p_trees <- p_trees + 143 | ggplot2::labs(title = title) + 144 | ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) 145 | 146 | return( 147 | list( 148 | phyloTree = phyloTree, 149 | plot = p_trees 150 | ) 151 | ) 152 | } 153 | 154 | 155 | 156 | ######################################################################################## 157 | 158 | # Supporting Functions 159 | 160 | 161 | # functions to get the bootstrap values. 162 | 163 | medicc.resample.distance.matrices <- function(D, niter = 100) { 164 | result <- list() 165 | # pb=txtProgressBar(min=0,max=niter,style=3) 166 | for (s in 1:niter) { 167 | # setTxtProgressBar(pb,s) 168 | Dnew <- as.matrix(D) 169 | for (i in 1:nrow(Dnew)) { 170 | for (j in 1:i) { 171 | Dnew[i, j] <- rnorm(1, mean = Dnew[i, j], sd = sqrt(Dnew[i, j])) 172 | Dnew[j, i] <- Dnew[i, j] 173 | } 174 | } 175 | Dnew[Dnew < 0] <- 0 176 | Dnew <- round(Dnew) 177 | Dnew <- as.dist(Dnew) 178 | result[[s]] <- Dnew 179 | } 180 | return(result) 181 | } 182 | 183 | 184 | bootstrap.trees <- function(dist, bootstrap.rep.num = 1000, title = "Cancer") { 185 | 186 | # using NJ to create a new tree with bootstrap values. 187 | getTrees <- function(D) { 188 | matTree <- ape::nj(D) 189 | root_num <- which(matTree$tip.label == "diploid") 190 | matTree <- treeio::root(matTree, root_num) 191 | matTree 192 | } 193 | 194 | D <- as.matrix(read.table(dist, row.names = 1, skip = 1)) 195 | colnames(D) <- rownames(D) 196 | 197 | matTree <- getTrees(D) # getTrees is defined the beginning of this function. 198 | # plot(matTree) 199 | # bootstrap 200 | # resampled trees. 201 | resampled <- medicc.resample.distance.matrices(D, bootstrap.rep.num) 202 | 203 | bootstrap.value <- prop.clades(matTree, lapply(resampled, getTrees), rooted = is.rooted(matTree)) / bootstrap.rep.num * 100 204 | 205 | # bootstrap.value <- ape::boot.phylo(matTree, mut_dat, function(e){nj(dist.gene(e))},B = bootstrap.rep.num,quiet = TRUE,rooted = TRUE)/(bootstrap.rep.num)*100 206 | 207 | # plot( matTree, main = title) 208 | # nodelabels(bootstrap.value) 209 | 210 | # for MesKit to plot trees. 211 | matTree$tip.label[which(matTree$tip.label == "diploid")] <- "NORMAL" 212 | 213 | phyloTree <- list( 214 | tree = matTree, 215 | bootstrap.value = bootstrap.value[1:(length(bootstrap.value) - 1)], 216 | patientID = title 217 | ) 218 | 219 | phyloTree 220 | } 221 | -------------------------------------------------------------------------------- /R/calKaKs.R: -------------------------------------------------------------------------------- 1 | #' calKaKs calculates the Ka/Ks of each group 2 | #' 3 | #' The mutations are classified by `classifyMut()` internally. 4 | #' 5 | #' @param maf Maf or MafList object generated by `readMaf()` function 6 | #' @param patient.id Select the specific patients. Default `NULL`, all patients are included. 7 | #' @param vaf.cutoff Removing mutations with low variant allele frequency (VAF). 8 | #' @param class The class which would be represented. 9 | #' "SP" (Shared pattern: Public/Shared/Private), other options: "CS" (Clonal status: Clonal/Subclonl) 10 | #' and "SPCS". see [MesKit::classifyMut()]. 11 | #' @param parallel If `TRUE` (default), run in parallel. 12 | #' 13 | #' @examples 14 | #' library(MesKit) 15 | #' data.type <- "split1" 16 | #' 17 | #' maf <- readMaf( 18 | #' mafFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.mutation.txt", data.type)), 19 | #' ccfFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.CCF.txt", data.type)), 20 | #' clinicalFile = system.file(package = "MPTevol", "extdata", sprintf("meskit.%s.clinical.txt", data.type)), 21 | #' refBuild = "hg19", 22 | #' ccf.conf.level = 0.95 23 | #' ) 24 | #' 25 | #' # calKaKas 26 | #' kaks <- calKaKs(maf, patient.id = "Breast", class = "SP", parallel = TRUE, vaf.cutoff = 0.05) 27 | #' kaks 28 | #' kaks <- calKaKs(maf, patient.id = "Breast", class = "CS", parallel = TRUE, vaf.cutoff = 0.05) 29 | #' kaks 30 | #' kaks <- calKaKs(maf, class = "SP", parallel = TRUE, vaf.cutoff = 0.05) 31 | #' kaks 32 | #' @export 33 | calKaKs <- function(maf, 34 | patient.id = NULL, 35 | class = "SP", 36 | classByTumor = FALSE, 37 | vaf.cutoff = 0.05, 38 | parallel = TRUE) { 39 | 40 | # To do: be careful about the samples and tumors. 41 | 42 | class.levels <- NULL 43 | if (class == "SP") { 44 | class.levels <- c("Public", "Shared", "Private") 45 | } else if (class == "CS") { 46 | class.levels <- c("Clonal", "Subclonl") 47 | } else if (class == "SPCS") { 48 | class.levels <- c("Public_Clonal", "Shared_Clonal", "Shared_Subclonal", "Private_Subclonal") 49 | } 50 | 51 | ###################################################################### 52 | 53 | estKaKs <- function(patient.id, maf_input, maf_class) { 54 | # Merge the maf input and mutation class 55 | message(patient.id) 56 | 57 | maf_merge <- maf_input[[patient.id]] %>% 58 | dplyr::mutate(Mut_ID = stringr::str_c(Hugo_Symbol, Chromosome, Start_Position, 59 | Reference_Allele, Tumor_Seq_Allele2, 60 | sep = ":" 61 | )) %>% 62 | dplyr::left_join( 63 | maf_class[[patient.id]] 64 | ) %>% 65 | dplyr::select( 66 | Hugo_Symbol, Chromosome, Start_Position, End_Position, 67 | Reference_Allele, Tumor_Seq_Allele2, Tumor_Sample_Barcode, 68 | Mutation_Type, Patient_ID, Tumor_ID, Variant_Classification, VAF 69 | ) 70 | 71 | maf_list <- maf_merge %>% 72 | split(paste(maf_merge$Tumor_ID, maf_merge$Mutation_Type, sep = ":")) 73 | 74 | # We use the easypar to do parallel calculations. 75 | if (parallel) { 76 | maf_KaKs <- easypar::run( 77 | FUN = getKaKs, 78 | PARAMS = lapply(1:length(maf_list), function(x) { 79 | list(df = maf_list[[x]], vaf.cutoff = vaf.cutoff) 80 | }), 81 | parallel = TRUE, 82 | outfile = NULL, 83 | export = NULL, 84 | packages = "tidyverse", 85 | filter_errors = FALSE 86 | ) 87 | } else { 88 | maf_KaKs <- lapply(maf_list, getKaKs) 89 | } 90 | 91 | names(maf_KaKs) <- names(maf_list) 92 | 93 | # Remving groups that fail to calculate the KaKs 94 | maf_KaKs <- maf_KaKs[lapply(maf_KaKs, function(x) is.numeric(nrow(x))) %>% unlist()] 95 | KaKs_data <- list() 96 | 97 | for (i in names(maf_KaKs)) { 98 | KaKs_data[[i]] <- maf_KaKs[[i]] %>% 99 | dplyr::mutate(type = i) 100 | } 101 | 102 | KaKs_data <- purrr::reduce(KaKs_data, rbind) 103 | 104 | KaKs_data <- KaKs_data %>% 105 | dplyr::mutate( 106 | Tumor_ID = mapply(function(x) x[1], stringr::str_split(type, ":")), 107 | Type = mapply(function(x) x[2], stringr::str_split(type, ":")) 108 | ) %>% 109 | dplyr::mutate( 110 | Type = factor(Type, levels = class.levels) 111 | ) %>% 112 | dplyr::filter(!is.na(Type)) 113 | 114 | 115 | p1 <- KaKs_data %>% 116 | dplyr::filter(name %in% c("wall")) %>% 117 | dplyr::mutate(name = factor(name, levels = c("wall"))) %>% 118 | # mutate(Type = factor(Type, levels = c("Shared_Clonal","Private_Clonal","Private_Subclonal") )) %>% 119 | ggplot2::ggplot(ggplot2::aes(x = Tumor_ID, y = mle, fill = Type)) + 120 | # ggpubr::theme_pubr() + 121 | ggplot2::geom_bar(stat = "identity", position = ggplot2::position_dodge(width = 0.90)) + 122 | # geom_linerange(aes(ymin = cilow, ymax = cihigh), position = position_dodge(width = 0.90) ) + 123 | ggplot2::geom_hline(yintercept = 1, linetype = 2, size = 1) + 124 | ggplot2::labs(x = NULL, y = latex2exp::TeX("Dn/Ds ($\\omega_{all}$)")) + 125 | ggplot2::scale_fill_manual(values = set.colors(length(unique(KaKs_data$Type)))) + 126 | ggplot2::theme( 127 | axis.title.x = element_blank(), 128 | axis.text.x = element_text(angle = 0, hjust = 0.5, size = 14) 129 | ) 130 | 131 | list( 132 | KaKs_data = KaKs_data, 133 | plot = p1 134 | ) 135 | } 136 | ########################################################################## 137 | # running 138 | 139 | # Get the mutation groups. 140 | maf_input <- MesKit::subMaf(maf, patient.id = patient.id, mafObj = FALSE, use.tumorSampleLabel = TRUE) 141 | 142 | # get mutation classifications. 143 | maf_class <- MesKit::classifyMut(maf, patient.id = patient.id, class = class, classByTumor = classByTumor) 144 | 145 | # Note the different format between maf_input and maf_class when the patient.id is a single value. 146 | if (!is.null(patient.id)) { 147 | maf_class1 <- list() 148 | maf_class1[[patient.id]] <- maf_class 149 | maf_class <- maf_class1 150 | } 151 | 152 | kaks <- lapply(names(maf_input), estKaKs, maf_input, maf_class) 153 | names(kaks) <- names(maf_input) 154 | 155 | return( 156 | kaks 157 | ) 158 | } 159 | 160 | 161 | 162 | 163 | #' getKaKs compares Ka/Ks between different groups 164 | #' 165 | #' @param df data. Six columns are required to calculate the Ka/Ks, 166 | #' including "Tumor_Sample_Barcode","Chromosome","Start_Position", 167 | #' "Reference_Allele","Tumor_Seq_Allele2" and "VAF". 168 | #' @param vaf.cutoff VAF cutoff. Removing mutations with low variant allele frequency (VAF). 169 | #' 170 | #' @details The Ka/Ks is calculated by [dndscv::dndscv()] 171 | #' 172 | #' @export 173 | getKaKs <- function(df, vaf.cutoff = 0.05) { 174 | data(list = sprintf("submod_%s", "13r_3w"), package = "dndscv") # TODO not valid operation in package 175 | 176 | mutations <- df %>% 177 | dplyr::filter(VAF >= vaf.cutoff) %>% 178 | dplyr::select(c("Tumor_Sample_Barcode", "Chromosome", "Start_Position", "Reference_Allele", "Tumor_Seq_Allele2")) 179 | 180 | if (nrow(mutations) <= 40) { 181 | stop("without enough mutations") 182 | } 183 | 184 | kaks <- dndscv::dndscv( 185 | mutations = mutations, 186 | max_muts_per_gene_per_sample = Inf, 187 | max_coding_muts_per_sample = Inf, 188 | # gene_list = intersect( Genes_Covered, genes), 189 | sm = submod_13r_3w, 190 | outp = 1 191 | ) 192 | 193 | kaks$globaldnds 194 | } 195 | -------------------------------------------------------------------------------- /docs/reference/readCNAProfile.html: -------------------------------------------------------------------------------- 1 | 2 | Read CNA Profiles — readCNAProfile • MPTevol 8 | Skip to contents 9 | 10 | 11 |
37 |
38 |
43 | 44 |
45 |

We used a CNAqc object, containing a set of mutations, CNA calls and tumor purity values. 46 | The CNAqc was used to deal with the allele-specific CNAs.

47 |
48 | 49 |
50 |

Usage

51 |
readCNAProfile(maf, seg, Patient_ID = NULL, purity = 1, ref = "hg19")
52 |
53 | 54 |
55 |

Arguments

56 |
maf
57 |

Maf or MafList object generated by readMaf() function

58 |
seg
59 |

seg or seglist.

60 |
Patient_ID
61 |

Patient_ID: select the specific patients. 62 | IF not indicate, the input is Maf and seg, or the input is MafList and segList.

63 |
purity
64 |

purity information for each samples.

65 |
ref
66 |

human reference genome version. Default 'hg19'. Optional: 'hg18' or 'hg38'.

67 |
68 |
69 |

Value

70 |

cnaqc.list for cnaqc initiation.

71 |
72 |
73 |

Details

74 |

This code reads the CNA Profiles for each patient. 75 | The tumor names of maf and seg are required to match each other.

76 |
77 | 78 |
80 | 81 | 82 |
85 | 86 | 89 | 90 |
91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /R/splitSegment.R: -------------------------------------------------------------------------------- 1 | #' Split the segment regions into several parts 2 | #' 3 | #' According to their shared status. 4 | #' The function first obtains the common shared regions across samples. 5 | #' The corresponding A allele and B allele are output as the format requirements of MEDICC. 6 | #' 7 | #' @param segfiles The allele-specific copy number alterations files generated by **sequenza**. 8 | #' @param sampleid the corresponding sample ids. 9 | #' @param out.dir output dir. 10 | #' @param project.names the project names used in the output. 11 | #' @param N.baf quality control for the sequenza output. 12 | #' @param cnv_min_length quality control for the sequenza output. 13 | #' @param max_CNt quality control for the sequenza output. 14 | #' @param minLength output control: the min length of CNVs to output. 15 | #' @param maxCNV output control: the max CNV to output. When the raw CNV greater than maxCNV, then its value was set to maxCNV. 16 | #' @param medicc.py the position of meidcc.py. 17 | #' @param python the position of meidcc.py. 18 | #' 19 | #' @details 20 | #' This function takes the **sequenza** results as the input and outputs 21 | #' the format requirements of MEDICC. 22 | #' 23 | #' @import GenomicRanges 24 | #' @import ComplexHeatmap 25 | #' @export 26 | splitSegment <- function(segfiles, 27 | sampleid, 28 | project.names = "tumor", 29 | out.dir = "data", 30 | N.baf = 30, cnv_min_length = 1e5, max_CNt = 15, 31 | minLength = 1e5, maxCNV = 4, 32 | medicc.py = "medicc.py", 33 | python = "python") { 34 | seglist <- list() 35 | # read segs 36 | for (i in 1:length(sampleid)) { 37 | seg <- read.delim(file = segfiles[i], header = T, stringsAsFactors = F) %>% 38 | # dplyr::filter(chromosome == "chr1") %>% 39 | dplyr::mutate(sample = sampleid[i]) 40 | 41 | seglist[[i]] <- seg 42 | } 43 | 44 | # removing low-confidence regions 45 | # seglist = base::Reduce(rbind, seglist) %>% 46 | # dplyr::filter(N.BAF >= N.baf & (end.pos - start.pos) >= cnv_min_length & CNt <= max_CNt) 47 | 48 | # change low-confidence regions into 2,1,1 49 | seglist <- base::Reduce(rbind, seglist) %>% 50 | dplyr::mutate(keep = ifelse(N.BAF >= N.baf & 51 | (end.pos - start.pos) >= cnv_min_length & 52 | CNt <= max_CNt, TRUE, FALSE)) %>% 53 | dplyr::mutate( 54 | CNt = ifelse(keep, CNt, 2), 55 | A = ifelse(keep, A, 1), 56 | B = ifelse(keep, B, 1) 57 | ) 58 | 59 | gseg <- GenomicRanges::GRanges( 60 | seqnames = seglist$chromosome, 61 | ranges = IRanges::IRanges(seglist$start.pos, seglist$end.pos), 62 | strand = "+" 63 | ) 64 | 65 | # metadata columns can be added to a GRanges object 66 | GenomicRanges::mcols(gseg) <- seglist 67 | 68 | # split regions into small regions. 69 | segdis <- GenomicRanges::disjoin(gseg) 70 | # add region infor to data 71 | GenomicRanges::mcols(segdis) <- data.frame(segdis) 72 | 73 | # set the minimal site of each segs. 74 | # minLength = 1e5 75 | 76 | # get overlaps regions 77 | overlaps <- GenomicRanges::findOverlaps(segdis, gseg) 78 | 79 | # combined information 80 | merge <- cbind( 81 | GenomicRanges::mcols(segdis[queryHits(overlaps), ]), 82 | GenomicRanges::mcols(gseg[subjectHits(overlaps), ]) 83 | ) %>% 84 | base::as.data.frame() %>% 85 | # filter seg length 86 | dplyr::filter(width >= minLength) %>% 87 | dplyr::select(seqnames, start, end, width, CNt, A, B, sample) 88 | 89 | # removing neutral region. CNt = 2 and A = 1 and B =1. 90 | merge_summary <- merge %>% 91 | dplyr::group_by(seqnames, start, end, width, CNt, A, B) %>% 92 | dplyr::summarise(num = dplyr::n()) %>% 93 | dplyr::filter(num == length(sampleid)) 94 | 95 | merge <- dplyr::left_join(merge, merge_summary) %>% 96 | dplyr::filter(is.na(num)) %>% 97 | dplyr::mutate(num = NULL) 98 | 99 | # major info: A 100 | merge_A <- merge %>% 101 | dplyr::select(seqnames, start, end, width, A, sample) %>% 102 | tidyr::spread(key = sample, value = A, fill = 1) 103 | # change cnv number when it >=10 104 | tmp <- merge_A[, 5:ncol(merge_A)] 105 | tmp[tmp >= maxCNV] <- maxCNV 106 | merge_A <- cbind(merge_A[, 1:4], tmp) 107 | 108 | # minor info: B 109 | merge_B <- merge %>% 110 | dplyr::select(seqnames, start, end, width, B, sample) %>% 111 | tidyr::spread(key = sample, value = B, fill = 1) 112 | 113 | tmp <- merge_B[, 5:ncol(merge_B)] 114 | tmp[tmp >= maxCNV] <- maxCNV 115 | merge_B <- cbind(merge_B[, 1:4], tmp) 116 | 117 | if (!file.exists(out.dir)) { 118 | system(paste0("mkdir ", out.dir)) 119 | } 120 | 121 | file <- sprintf("%s/%s.descr.txt", out.dir, project.names) 122 | 123 | # output 124 | write.table(merge_A, file = sprintf("%s/%s.major.txt", out.dir, project.names), quote = F, row.names = F, sep = "\t") 125 | write.table(merge_B, file = sprintf("%s/%s.minor.txt", out.dir, project.names), quote = F, row.names = F, sep = "\t") 126 | 127 | write.fasta(merge_A = merge_A, major = "major", out.dir = out.dir, project.names = project.names) 128 | write.fasta(merge_A = merge_B, major = "minor", out.dir = out.dir, project.names = project.names) 129 | 130 | # plot the heatmaps to provide better quality controls 131 | major <- merge_A %>% 132 | dplyr::mutate(seq = stringr::str_c(seqnames, start, end, sep = "_")) %>% 133 | tibble::column_to_rownames(var = "seq") %>% 134 | dplyr::mutate(seqnames = NULL, start = NULL, end = NULL, width = NULL) 135 | 136 | minor <- merge_B %>% 137 | dplyr::mutate(seq = stringr::str_c(seqnames, start, end, sep = "_")) %>% 138 | tibble::column_to_rownames(var = "seq") %>% 139 | dplyr::mutate(seqnames = NULL, start = NULL, end = NULL, width = NULL) 140 | 141 | plist <- list() 142 | plist$major <- ComplexHeatmap::Heatmap(major, 143 | row_names_gp = grid::gpar(fontsize = 6) 144 | ) 145 | 146 | plist$minor <- ComplexHeatmap::Heatmap(minor, 147 | row_names_gp = grid::gpar(fontsize = 6) 148 | ) 149 | 150 | message( 151 | sprintf("nohup %s %s %s/%s.descr.txt %s/%s.run -v >%s.run.info.txt &", python, medicc.py, out.dir, project.names, out.dir, project.names, project.names) 152 | ) 153 | 154 | return(list( 155 | major = merge_A, 156 | minor = merge_B, 157 | plist = plist 158 | )) 159 | } 160 | 161 | #' write.fasta 162 | #' 163 | #' Prepare the formate of MEDICC input. 164 | #' 165 | #' @param project.names the project names used in the output. 166 | write.fasta <- function(merge_A, major = "major", out.dir = "data", project.names = "tumor") { 167 | system(sprintf("mkdir %s", out.dir)) 168 | chrs <- unique(merge_A$seqnames)[!(unique(merge_A$seqnames) %in% c("chrX", "chrY"))] 169 | num <- ncol(merge_A) 170 | 171 | for (i in chrs) { 172 | merge_B <- subset(merge_A, seqnames == i) 173 | text <- rep("0", 2 * (num - 3)) 174 | text[1] <- ">diploid" 175 | text[2] <- paste0(rep(1, nrow(merge_B)), collapse = "") 176 | 177 | for (j in 5:num) { 178 | text[2 * (j - 3) - 1] <- sprintf(">%s", colnames(merge_B)[j]) 179 | text[2 * (j - 3)] <- merge_B[, j] %>% stringr::str_c(collapse = "") 180 | } 181 | write.table(text, file = sprintf("%s/%s_%s_%s.fasta", out.dir, project.names, major, i), quote = F, col.names = F, row.names = F) 182 | } 183 | 184 | # write desc 185 | chrinfo <- rep("0", length(chrs)) 186 | for (i in 1:length(chrs)) { 187 | chrinfo[i] <- sprintf( 188 | "%s %s %s", chrs[i], 189 | sprintf("%s_%s_%s.fasta", project.names, "major", chrs[i]), 190 | sprintf("%s_%s_%s.fasta", project.names, "minor", chrs[i]) 191 | ) 192 | } 193 | write.table(chrinfo, file = sprintf("%s/%s.descr.txt", out.dir, project.names), quote = F, col.names = F, row.names = F) 194 | } 195 | -------------------------------------------------------------------------------- /docs/reference/index.html: -------------------------------------------------------------------------------- 1 | 2 | Function reference • MPTevol 6 | Skip to contents 7 | 8 | 9 |
35 |
36 |
39 | 40 |
41 |

All functions

42 | 43 | 44 | 45 | 46 |
47 | 48 | 49 | 50 | 51 |
52 | 53 | calKaKs() 54 |
55 |
calKaKs calculates the Ka/Ks of each group
56 |
57 | 58 | calRoutines() 59 |
60 |
calRoutines calculates the H index and JSI index according to the pair-wise comparison of CCF
61 |
62 | 63 | getClinSites() 64 |
65 |
getClinSites captures the clinical targetable sites of oncokb based on the mutation status
66 |
67 | 68 | getKaKs() 69 |
70 |
getKaKs compares Ka/Ks between different groups
71 |
72 | 73 | inferClonalTrees() 74 |
75 |
inferClonalTrees
76 |
77 | 78 | plotCNAProfile() 79 |
80 |
Visualize CNA profile
81 |
82 | 83 | plotCNAtree() 84 |
85 |
plotCNAtree plots phylogenetic trees of CNAs
86 |
87 | 88 | plotMutTree() 89 |
90 |
plotMutTree plots phylogenetic tree of mutations
91 |
92 | 93 | plotVafCluster() 94 |
95 |
plotVafCluster
96 |
97 | 98 | readCNAProfile() 99 |
100 |
Read CNA Profiles
101 |
102 | 103 | Seg-class 104 |
105 |
Segment Class
106 |
107 | 108 | set.colors() 109 |
110 |
Color setting
111 |
112 | 113 | splitSegment() 114 |
115 |
Split the segment regions into several parts
116 |
117 | 118 | tree2timescape() 119 |
120 |
tree2timescape
121 |
122 | 123 | viewTrees() 124 |
125 |
Visualize the trees
126 |
127 | 128 | write.fasta() 129 |
130 |
write.fasta
131 |
132 |
133 | 134 | 135 |
144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | --------------------------------------------------------------------------------