├── .DS_Store ├── data ├── .DS_Store ├── svpluscnv_data.rda └── datalist ├── figure ├── .DS_Store ├── plot_readme_1-1.png ├── plot_readme_10-1.png ├── plot_readme_2-1.png ├── plot_readme_3-1.png ├── plot_readme_3b-1.png ├── plot_readme_3c-1.png ├── plot_readme_3d-1.png ├── plot_readme_4-1.png ├── plot_readme_5-1.png ├── plot_readme_6-1.png ├── plot_readme_7-1.png ├── plot_readme_8-1.png └── plot_readme_9-1.png ├── vignettes ├── .DS_Store └── figure │ ├── plot_vignette_1-1.png │ ├── plot_vignette_2-1.png │ ├── plot_vignette_3-1.png │ └── plot_vignette_4-1.png ├── man ├── .Rapp.history ├── nbl_svdat.Rd ├── cnv_blacklist_regions.Rd ├── nbl_segdat.Rd ├── svdat_lung_ccle.Rd ├── segdat_lung_ccle.Rd ├── refseq_hg19.Rd ├── refseq_hg38.Rd ├── extract.bins-methods.Rd ├── IQM.Rd ├── freq.threshold-methods.Rd ├── chr.sort.Rd ├── refSeqDat-class.Rd ├── IQSD.Rd ├── d3gb.chr.lim.Rd ├── map2color.Rd ├── genecnv-class.Rd ├── get.genesgr.Rd ├── ave.segmean.Rd ├── svcnvio-class.Rd ├── gene.symbol.info-methods.Rd ├── hbd.mat-methods.Rd ├── chromosome.limit.coords.Rd ├── cnvfreq-class.Rd ├── break.iqm-class.Rd ├── null.freq-class.Rd ├── createRandomString.Rd ├── validate.cnv.Rd ├── med.segmean.Rd ├── merge2lists.Rd ├── breaks-class.Rd ├── upgr.Rd ├── dngr.Rd ├── svc.breaks.Rd ├── geneBreakOverlap.Rd ├── chr.arm.cnv.Rd ├── hot.spot.samples.Rd ├── amp.del.Rd ├── shattered.eval.Rd ├── segment.gap.Rd ├── bed2chromo.reg.Rd ├── pct.genome.changed.Rd ├── get.chr.bins.Rd ├── match.breaks.Rd ├── gene.cnv.Rd ├── validate.svc.Rd ├── shattered.map.plot.Rd ├── clean.cnv.artifact.Rd ├── break.density.Rd ├── cnv.breaks.Rd ├── freq.p.test.Rd ├── circ.wg.plot.Rd ├── cnv.freq.Rd ├── break.annot-class.Rd ├── gene.track.view.Rd ├── chromo.regs-class.Rd ├── svc.break.annot.Rd ├── shattered.regions.cnv.Rd ├── circ.chromo.plot.Rd ├── brk.burden.iqm.Rd ├── sv.model.view.Rd ├── cnv.break.annot.Rd └── shattered.regions.Rd ├── svpluscnv-manual.pdf ├── R ├── get.genesgr.r ├── pct.genome.changed.r ├── hot.spot.samples.R ├── segment.means.r ├── chr.arm.cnv.r ├── svpluscnv.data.r ├── shattered.map.plot.r ├── gene.cnv.r ├── freq.p.test.r ├── internal_functions.r ├── clean.cnv.artifact.r ├── validate.input.data.r ├── gene.track.view.r ├── cnv.freq.plot.r ├── shattered.regions.cnv.r ├── brk.burden.iqm.r ├── circular.plot.r ├── sv.model.view.r └── breakpoint.density.r ├── DESCRIPTION └── NAMESPACE /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/.DS_Store -------------------------------------------------------------------------------- /data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/data/.DS_Store -------------------------------------------------------------------------------- /figure/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/.DS_Store -------------------------------------------------------------------------------- /vignettes/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/vignettes/.DS_Store -------------------------------------------------------------------------------- /man/.Rapp.history: -------------------------------------------------------------------------------- 1 | load("/Users/lopezg16/Box Sync/git/svpluscnv/data/hg19.rda") 2 | refseq_hg19 3 | -------------------------------------------------------------------------------- /svpluscnv-manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/svpluscnv-manual.pdf -------------------------------------------------------------------------------- /data/svpluscnv_data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/data/svpluscnv_data.rda -------------------------------------------------------------------------------- /figure/plot_readme_1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_1-1.png -------------------------------------------------------------------------------- /figure/plot_readme_10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_10-1.png -------------------------------------------------------------------------------- /figure/plot_readme_2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_2-1.png -------------------------------------------------------------------------------- /figure/plot_readme_3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_3-1.png -------------------------------------------------------------------------------- /figure/plot_readme_3b-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_3b-1.png -------------------------------------------------------------------------------- /figure/plot_readme_3c-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_3c-1.png -------------------------------------------------------------------------------- /figure/plot_readme_3d-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_3d-1.png -------------------------------------------------------------------------------- /figure/plot_readme_4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_4-1.png -------------------------------------------------------------------------------- /figure/plot_readme_5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_5-1.png -------------------------------------------------------------------------------- /figure/plot_readme_6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_6-1.png -------------------------------------------------------------------------------- /figure/plot_readme_7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_7-1.png -------------------------------------------------------------------------------- /figure/plot_readme_8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_8-1.png -------------------------------------------------------------------------------- /figure/plot_readme_9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_9-1.png -------------------------------------------------------------------------------- /vignettes/figure/plot_vignette_1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/vignettes/figure/plot_vignette_1-1.png -------------------------------------------------------------------------------- /vignettes/figure/plot_vignette_2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/vignettes/figure/plot_vignette_2-1.png -------------------------------------------------------------------------------- /vignettes/figure/plot_vignette_3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/vignettes/figure/plot_vignette_3-1.png -------------------------------------------------------------------------------- /vignettes/figure/plot_vignette_4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/vignettes/figure/plot_vignette_4-1.png -------------------------------------------------------------------------------- /data/datalist: -------------------------------------------------------------------------------- 1 | cnv_blacklist_regions 2 | hg19: refseq_hg19 3 | hg38: refseq_hg38 4 | nbl_target_cnv: nbl_segdat 5 | nbl_target_sv: nbl_svdat 6 | segdat_lung_ccle 7 | svdat_lung_ccle 8 | -------------------------------------------------------------------------------- /man/nbl_svdat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/svpluscnv.data.r 3 | \docType{data} 4 | \name{nbl_svdat} 5 | \alias{nbl_svdat} 6 | \title{TARGET Neuroblastoma SVC} 7 | \format{ 8 | An object of class \code{data.frame} with 7366 rows and 8 columns. 9 | } 10 | \usage{ 11 | nbl_svdat 12 | } 13 | \description{ 14 | TARGET CGI structural variants: https://target-data.nci.nih.gov/ 15 | } 16 | \keyword{SVs} 17 | -------------------------------------------------------------------------------- /man/cnv_blacklist_regions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/svpluscnv.data.r 3 | \docType{data} 4 | \name{cnv_blacklist_regions} 5 | \alias{cnv_blacklist_regions} 6 | \title{Low coverage regions} 7 | \format{ 8 | An object of class \code{data.frame} with 60 rows and 3 columns. 9 | } 10 | \usage{ 11 | cnv_blacklist_regions 12 | } 13 | \description{ 14 | Low coverage regions 15 | } 16 | \keyword{CNV} 17 | \keyword{segmentation} 18 | -------------------------------------------------------------------------------- /man/nbl_segdat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/svpluscnv.data.r 3 | \docType{data} 4 | \name{nbl_segdat} 5 | \alias{nbl_segdat} 6 | \title{TARGET Neuroblastoma CNV} 7 | \format{ 8 | An object of class \code{data.frame} with 17680 rows and 6 columns. 9 | } 10 | \usage{ 11 | nbl_segdat 12 | } 13 | \description{ 14 | TARGET CNV segmentation: https://target-data.nci.nih.gov/ 15 | } 16 | \keyword{CNV} 17 | \keyword{SVs} 18 | \keyword{segmentation,} 19 | -------------------------------------------------------------------------------- /man/svdat_lung_ccle.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/svpluscnv.data.r 3 | \docType{data} 4 | \name{svdat_lung_ccle} 5 | \alias{svdat_lung_ccle} 6 | \title{Lung CCLE SVC data} 7 | \format{ 8 | An object of class \code{data.frame} with 23040 rows and 8 columns. 9 | } 10 | \usage{ 11 | svdat_lung_ccle 12 | } 13 | \description{ 14 | CCLE translocation data from LUNG tissue cell lines (DepMap): https://depmap.org/portal/download/ 15 | } 16 | \keyword{SVs} 17 | -------------------------------------------------------------------------------- /man/segdat_lung_ccle.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/svpluscnv.data.r 3 | \docType{data} 4 | \name{segdat_lung_ccle} 5 | \alias{segdat_lung_ccle} 6 | \title{Lung CCLE CNV data} 7 | \format{ 8 | An object of class \code{data.frame} with 134358 rows and 6 columns. 9 | } 10 | \usage{ 11 | segdat_lung_ccle 12 | } 13 | \description{ 14 | CCLE CNV segmentation data from LUNG tissue cell lines (DepMap): https://depmap.org/portal/download/ 15 | } 16 | \keyword{CNV} 17 | \keyword{segmentation} 18 | -------------------------------------------------------------------------------- /man/refseq_hg19.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/svpluscnv.data.r 3 | \docType{data} 4 | \name{refseq_hg19} 5 | \alias{refseq_hg19} 6 | \title{Reference transcript and exon annotations for hg19} 7 | \format{ 8 | An object of class \code{refSeqDat} of length 1. 9 | } 10 | \usage{ 11 | refseq_hg19 12 | } 13 | \description{ 14 | refSeq annotations for hg19 version from UCSC (http://genome.ucsc.edu/cgi-bin/hgTables) 15 | } 16 | \keyword{exons} 17 | \keyword{genes,} 18 | \keyword{transcripts,} 19 | -------------------------------------------------------------------------------- /man/refseq_hg38.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/svpluscnv.data.r 3 | \docType{data} 4 | \name{refseq_hg38} 5 | \alias{refseq_hg38} 6 | \title{Reference transcript and exon annotations for hg38} 7 | \format{ 8 | An object of class \code{refSeqDat} of length 1. 9 | } 10 | \usage{ 11 | refseq_hg38 12 | } 13 | \description{ 14 | refSeq annotations for hg38 version from UCSC (http://genome.ucsc.edu/cgi-bin/hgTables) 15 | } 16 | \keyword{exons} 17 | \keyword{genes,} 18 | \keyword{transcripts,} 19 | -------------------------------------------------------------------------------- /man/extract.bins-methods.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/shattered.regions.r 3 | \docType{methods} 4 | \name{extract.bins} 5 | \alias{extract.bins} 6 | \alias{extract.bins,chromo.regs-method} 7 | \title{Return the genomicRanges object containing the genomic bins} 8 | \usage{ 9 | extract.bins(object) 10 | 11 | \S4method{extract.bins}{chromo.regs}(object) 12 | } 13 | \arguments{ 14 | \item{object}{(chromo.regs) An object of class chromo.regs} 15 | } 16 | \value{ 17 | an genomicRanges object with defined genomic bins 18 | } 19 | \description{ 20 | Return the genomicRanges object containing the genomic bins 21 | } 22 | -------------------------------------------------------------------------------- /man/IQM.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal_functions.r 3 | \name{IQM} 4 | \alias{IQM} 5 | \title{Inter-quantile mean} 6 | \usage{ 7 | IQM(x, lowQ = 0.1, upQ = 0.9) 8 | } 9 | \arguments{ 10 | \item{x}{numeric vector to compute interquantile average} 11 | 12 | \item{lowQ}{lower quantile} 13 | 14 | \item{upQ}{upper quantile} 15 | } 16 | \value{ 17 | (numeric) the IQM value 18 | } 19 | \description{ 20 | Obtains interquantile mean for a defined 'x' vector and both lower and upper quantiles 21 | } 22 | \examples{ 23 | 24 | x <- rnorm(100) 25 | IQM(x) 26 | } 27 | \keyword{interquartile} 28 | \keyword{statistics,} 29 | -------------------------------------------------------------------------------- /man/freq.threshold-methods.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/freq.p.test.r 3 | \docType{methods} 4 | \name{freq.threshold} 5 | \alias{freq.threshold} 6 | \alias{freq.threshold,null.freq-method} 7 | \title{Return frequency threshold from null.freq object} 8 | \usage{ 9 | freq.threshold(object) 10 | 11 | \S4method{freq.threshold}{null.freq}(object) 12 | } 13 | \arguments{ 14 | \item{object}{(null.freq) An object of class null.freq} 15 | } 16 | \value{ 17 | an instance of the class 'chromo.regs' containing breakpoint mapping onto genes 18 | } 19 | \description{ 20 | Return frequency threshold from null.freq object 21 | } 22 | -------------------------------------------------------------------------------- /man/chr.sort.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validate.input.data.r 3 | \name{chr.sort} 4 | \alias{chr.sort} 5 | \title{Chromosome ordering} 6 | \usage{ 7 | chr.sort(chrlist) 8 | } 9 | \arguments{ 10 | \item{chrlist}{(character): a vector containing chromosome names (chr1, chr2...chrX,chrY )} 11 | } 12 | \value{ 13 | a character vector of sorted chromosomes 14 | } 15 | \description{ 16 | A function to order a list of chromosomes 17 | } 18 | \examples{ 19 | 20 | chrlist <- paste("chr",c("X","Y",sample(1:22)),sep="") 21 | chr_sorted <- chr.sort(chrlist) 22 | } 23 | \keyword{CNV,} 24 | \keyword{genes} 25 | \keyword{segmentation,} 26 | -------------------------------------------------------------------------------- /man/refSeqDat-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/svpluscnv.data.r 3 | \docType{class} 4 | \name{refSeqDat-class} 5 | \alias{refSeqDat-class} 6 | \alias{refSeqDat} 7 | \title{Data class refSeqDat} 8 | \arguments{ 9 | \item{data}{(data.table): transcript information} 10 | 11 | \item{exonStarts}{(list): every transcript exonic end position} 12 | 13 | \item{genome.v}{(character): the genome version encoding transcript data} 14 | } 15 | \value{ 16 | an instance of the class 'refSeqDat' containing transcript exonic coordinates 17 | } 18 | \description{ 19 | Class to store refseq data from UCSC containing exon level info for known transcripts 20 | } 21 | -------------------------------------------------------------------------------- /man/IQSD.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal_functions.r 3 | \name{IQSD} 4 | \alias{IQSD} 5 | \title{Inter-quantile standard deviation} 6 | \usage{ 7 | IQSD(x, lowQ = 0.1, upQ = 0.9) 8 | } 9 | \arguments{ 10 | \item{x}{numeric vector to compute interquantile standard deviation} 11 | 12 | \item{lowQ}{lower quantile} 13 | 14 | \item{upQ}{upper quantile} 15 | } 16 | \value{ 17 | (numeric) the IQSD value 18 | } 19 | \description{ 20 | Obtains inter quantile standard deviation for a defined 'x' vector and both lower and upper quantiles 21 | } 22 | \examples{ 23 | 24 | x <- rnorm(100) 25 | IQSD(x) 26 | } 27 | \keyword{interquartile} 28 | \keyword{statistics,} 29 | -------------------------------------------------------------------------------- /man/d3gb.chr.lim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal_functions.r 3 | \name{d3gb.chr.lim} 4 | \alias{d3gb.chr.lim} 5 | \title{Chromosome start and end} 6 | \usage{ 7 | d3gb.chr.lim(genome.v) 8 | } 9 | \arguments{ 10 | \item{genome.v}{(character) reference genome version to retrieve gene annotations (hg19 or GRCh37 and hg38 or GRCh38)} 11 | } 12 | \value{ 13 | (data.table) a table containing start and end positions for each chromosome 14 | } 15 | \description{ 16 | Obtains a chromosome start and end positions from a reference genome version 17 | } 18 | \examples{ 19 | 20 | d3gb.chr.lim(genome.v="hg19") 21 | 22 | } 23 | \keyword{CNV,} 24 | \keyword{genes} 25 | \keyword{segmentation,} 26 | -------------------------------------------------------------------------------- /man/map2color.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal_functions.r 3 | \name{map2color} 4 | \alias{map2color} 5 | \title{Color map from numeric vector} 6 | \usage{ 7 | map2color(x, pal = NULL, limits = NULL) 8 | } 9 | \arguments{ 10 | \item{x}{numeric vector} 11 | 12 | \item{pal}{color palette} 13 | 14 | \item{limits}{numeric limit fr color mapping} 15 | } 16 | \value{ 17 | a color vector graded according to x 18 | } 19 | \description{ 20 | Produces a vector of colors based on a given palette. The colors are defined by the inpuit vector 21 | } 22 | \examples{ 23 | 24 | x <- rnorm(100) 25 | x_color <- map2color(x) 26 | head(x_color) 27 | } 28 | \keyword{color,} 29 | \keyword{number} 30 | -------------------------------------------------------------------------------- /man/genecnv-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gene.cnv.r 3 | \docType{class} 4 | \name{genecnv-class} 5 | \alias{genecnv-class} 6 | \alias{genecnv} 7 | \title{Data class cnvmat} 8 | \arguments{ 9 | \item{cnvmat}{(data.frame): matrix containing average CNV per gene (rows) for each sample (columns)} 10 | 11 | \item{genesgr}{(S4): a GenomicRanges object with genomic feature annotations such as gene coordinates} 12 | 13 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv} 14 | 15 | \item{param}{(list):} 16 | } 17 | \value{ 18 | an instance of the class 'genecnv' containing gene level copy number info 19 | } 20 | \description{ 21 | Class to store breakpoint annotations 22 | } 23 | -------------------------------------------------------------------------------- /man/get.genesgr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get.genesgr.r 3 | \name{get.genesgr} 4 | \alias{get.genesgr} 5 | \title{Genes GRanges} 6 | \usage{ 7 | get.genesgr(genome.v = "hg19", chrlist = NULL) 8 | } 9 | \arguments{ 10 | \item{genome.v}{(hg19 or GRCh37 and hg38 or GRCh38) reference genome version to retrieve gene annotations} 11 | 12 | \item{chrlist}{(character)} 13 | } 14 | \value{ 15 | a GRanges class object from the specified human genome version 16 | } 17 | \description{ 18 | Retrieves a GRanges object containinng gene annotations for an specified genome version 19 | } 20 | \examples{ 21 | 22 | get.genesgr(genome.v = "hg19",chrlist=NULL) 23 | 24 | } 25 | \keyword{CNV,} 26 | \keyword{genes} 27 | \keyword{segmentation,} 28 | -------------------------------------------------------------------------------- /man/ave.segmean.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/segment.means.r 3 | \name{ave.segmean} 4 | \alias{ave.segmean} 5 | \title{Average sample CNV} 6 | \usage{ 7 | ave.segmean(cnv) 8 | } 9 | \arguments{ 10 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv} 11 | } 12 | \value{ 13 | (numeric) a vector containing the weighted average logR from segmented data 14 | } 15 | \description{ 16 | Obtain the weighted average segment mean log2 ratios from each sample within a CNV segmentaton data.frame 17 | } 18 | \examples{ 19 | 20 | ## validate input CNV data.frames 21 | cnv <- validate.cnv(segdat_lung_ccle) 22 | 23 | ave_seg_mean <- ave.segmean(cnv) 24 | head(ave_seg_mean) 25 | } 26 | \keyword{CNV,} 27 | \keyword{segmentation} 28 | -------------------------------------------------------------------------------- /man/svcnvio-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validate.input.data.r 3 | \docType{class} 4 | \name{svcnvio-class} 5 | \alias{svcnvio-class} 6 | \alias{svcnvio} 7 | \title{Data class svcnvio} 8 | \arguments{ 9 | \item{data}{(data.table): cnv or svc data.table to be validated by 'validate.cnv' or 'validate.svc' respectivelly} 10 | 11 | \item{type}{(character): the data type "cnv" or "svc" defined by "validate.cnv" or "validate.svc" respectivelly} 12 | } 13 | \value{ 14 | an instance of the class 'svcnvio' containing SV data derived from CNV or SVC data types; A unique id (uid) column is also added 15 | } 16 | \description{ 17 | Class to store CNV segmentation data 18 | } 19 | \seealso{ 20 | Additional data format information in the man pages of validate.cnv and validate.svc 21 | } 22 | -------------------------------------------------------------------------------- /man/gene.symbol.info-methods.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/svpluscnv.data.r 3 | \docType{methods} 4 | \name{gene.symbol.info} 5 | \alias{gene.symbol.info} 6 | \alias{gene.symbol.info,refSeqDat-method} 7 | \title{Return coordinates of an specified gene} 8 | \usage{ 9 | gene.symbol.info(object, symbol) 10 | 11 | \S4method{gene.symbol.info}{refSeqDat}(object, symbol) 12 | } 13 | \arguments{ 14 | \item{object}{(refSeqDat) An object of class refSeqDat containing gene transcript mapping. svpluscnv includes two selfloaded objects: refseq_hg19 & refseq_hg38} 15 | 16 | \item{symbol}{(character) a valid HGNC gene symbol included in the refseq object} 17 | } 18 | \value{ 19 | A list containing chr, start, end coordinates 20 | } 21 | \description{ 22 | Return coordinates of an specified gene 23 | } 24 | -------------------------------------------------------------------------------- /man/hbd.mat-methods.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/shattered.regions.r 3 | \docType{methods} 4 | \name{hbd.mat} 5 | \alias{hbd.mat} 6 | \alias{hbd.mat,chromo.regs-method} 7 | \title{Return the binary matrix containing high confidence high-breakpoint-densityregion definitions} 8 | \usage{ 9 | hbd.mat(object, conf = "hc") 10 | 11 | \S4method{hbd.mat}{chromo.regs}(object, conf = "hc") 12 | } 13 | \arguments{ 14 | \item{object}{(chromo.regs) An object of class chromo.regs} 15 | 16 | \item{conf}{(character) Either "hc" for high confidence HBD or else include all} 17 | } 18 | \value{ 19 | an instance of the class 'chromo.regs' containing breakpoint mapping onto genes 20 | } 21 | \description{ 22 | Return the binary matrix containing high confidence high-breakpoint-densityregion definitions 23 | } 24 | -------------------------------------------------------------------------------- /man/chromosome.limit.coords.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validate.input.data.r 3 | \name{chromosome.limit.coords} 4 | \alias{chromosome.limit.coords} 5 | \title{Chromosome limit map} 6 | \usage{ 7 | chromosome.limit.coords(cnv) 8 | } 9 | \arguments{ 10 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv} 11 | } 12 | \value{ 13 | data.table indicating start and end mapped positions of each chromosome 14 | } 15 | \description{ 16 | Obtain chromosome start and end positions based on mapped regions from CNV segmentation data 17 | } 18 | \examples{ 19 | 20 | ## validate input data.frame 21 | cnv <- validate.cnv(segdat_lung_ccle) 22 | 23 | chr.lim <- chromosome.limit.coords(cnv) 24 | } 25 | \keyword{CNV,} 26 | \keyword{mapping} 27 | \keyword{segmentation,} 28 | -------------------------------------------------------------------------------- /man/cnvfreq-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cnv.freq.plot.r 3 | \docType{class} 4 | \name{cnvfreq-class} 5 | \alias{cnvfreq-class} 6 | \alias{cnvfreq} 7 | \title{Data class cnvfreq} 8 | \arguments{ 9 | \item{freqsum}{(data.table): the frequency of gains and losses in each defined genomic bin} 10 | 11 | \item{chrlimits}{(data.frame): a table containing the chromosome limit coordinates and global genomic coordinates} 12 | 13 | \item{bin.mat}{(numeric): a matrix of genomic bins versus samples} 14 | 15 | \item{plot}{(graphical): a recorded plot object} 16 | 17 | \item{param}{(list): a list of parametres provided} 18 | } 19 | \value{ 20 | an instance of the class 'cnvfreq' 21 | } 22 | \description{ 23 | Class to store breakpoint annotations in association with genomic features (e.g. gene loci) 24 | } 25 | -------------------------------------------------------------------------------- /man/break.iqm-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brk.burden.iqm.r 3 | \docType{class} 4 | \name{break.iqm-class} 5 | \alias{break.iqm-class} 6 | \alias{break.iqm} 7 | \title{Data class break.iqm} 8 | \arguments{ 9 | \item{summary}{(data.table): the frequency of gains and losses in each defined genomic bin} 10 | 11 | \item{brk.mat}{(numeric): a matrix of genomic bins versus samples} 12 | 13 | \item{chrlimits}{(data.frame): a table containing the chromosome limit coordinates and global genomic coordinates} 14 | 15 | \item{plot}{(graphical): a recorded plot object} 16 | 17 | \item{param}{(list): a list of parametres provided} 18 | } 19 | \value{ 20 | an instance of the class 'cnvfreq' 21 | } 22 | \description{ 23 | Class to store breakpoint annotations in association with genomic features (e.g. gene loci) 24 | } 25 | -------------------------------------------------------------------------------- /man/null.freq-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/freq.p.test.r 3 | \docType{class} 4 | \name{null.freq-class} 5 | \alias{null.freq-class} 6 | \alias{null.freq} 7 | \title{Data class null.freq} 8 | \arguments{ 9 | \item{freq.cut}{(numeric): the value from observed distribution that satisfies certain p-value cutoff} 10 | 11 | \item{pvalues}{(numeric): a vector containing the total number of breakpoints in each sample} 12 | 13 | \item{observed}{(numeric): vector of observed distribution} 14 | 15 | \item{null}{(numeric): vector of null distribution} 16 | 17 | \item{param}{(list): a list of parametres provided} 18 | } 19 | \value{ 20 | an instance of the class 'freq.cut' 21 | } 22 | \description{ 23 | Class to store observed and null distr. as well as ampirical corrected p-values associated with observed values 24 | } 25 | -------------------------------------------------------------------------------- /man/createRandomString.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal_functions.r 3 | \name{createRandomString} 4 | \alias{createRandomString} 5 | \title{Unique random string generator} 6 | \usage{ 7 | createRandomString(n = 1, strlen = 10) 8 | } 9 | \arguments{ 10 | \item{n}{the number of unique random strings to return} 11 | 12 | \item{strlen}{random string length} 13 | } 14 | \value{ 15 | a vector of unique random character strings 16 | } 17 | \description{ 18 | Generates n unique random character strings of a given length. Note that the length must be big enought in order to avoid offsetting the number n of strings requested 19 | } 20 | \examples{ 21 | 22 | # To ensure reproducibility make sure to set the seed 23 | set.seed(123456789) 24 | 25 | createRandomString(1, 10) 26 | } 27 | \keyword{random} 28 | \keyword{string} 29 | -------------------------------------------------------------------------------- /man/validate.cnv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validate.input.data.r 3 | \name{validate.cnv} 4 | \alias{validate.cnv} 5 | \title{Initialization of CNV data} 6 | \usage{ 7 | validate.cnv(cnv.df) 8 | } 9 | \arguments{ 10 | \item{cnv.df}{(data.frame) segmentation data with at least 6 columns: sample, chromosome, start, end, probes, segment_mean} 11 | } 12 | \value{ 13 | an instance of the class 'svcnvio' containing segmentation data derived from CNV data type; A unique id (uid) column is also added 14 | } 15 | \description{ 16 | This function validates and reformats the CNV segmentation data type containing copy number log-ratios. It is used internaly by 'svpluscnv' functions that require this type of data. 17 | } 18 | \examples{ 19 | 20 | validate.cnv(segdat_lung_ccle) 21 | } 22 | \keyword{CNV,} 23 | \keyword{segmentation} 24 | -------------------------------------------------------------------------------- /man/med.segmean.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/segment.means.r 3 | \name{med.segmean} 4 | \alias{med.segmean} 5 | \title{Median sample CNV} 6 | \usage{ 7 | med.segmean(cnv) 8 | } 9 | \arguments{ 10 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv} 11 | } 12 | \value{ 13 | (numeric) a vector containing the median logR value of a segmented data.frame 14 | } 15 | \description{ 16 | Obtain the median weighted segment mean from a segmentaton file; The weighted median refers to the logR that occupies a center of all segments ordered by their log ratio 17 | } 18 | \examples{ 19 | 20 | ## validate input CNV data.frames 21 | cnv <- validate.cnv(segdat_lung_ccle) 22 | 23 | med_seg_mean <- med.segmean(cnv) 24 | head(med_seg_mean) 25 | } 26 | \keyword{CNV,} 27 | \keyword{segmentation} 28 | -------------------------------------------------------------------------------- /man/merge2lists.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal_functions.r 3 | \name{merge2lists} 4 | \alias{merge2lists} 5 | \title{Merge two lists} 6 | \usage{ 7 | merge2lists(x, y, fun = "unique") 8 | } 9 | \arguments{ 10 | \item{x}{(list): input list 1} 11 | 12 | \item{y}{(list): input list 2} 13 | 14 | \item{fun}{(character): Either 'unique' or 'intersect' are accepted} 15 | } 16 | \value{ 17 | (list) merged list from x and y 18 | } 19 | \description{ 20 | Merge of 2 lists into one that contains unique or intersect vectors for each list entry with shared names 21 | } 22 | \examples{ 23 | 24 | x <- sapply(letters[1:10], function(i) sample(1:10)[1:sample(2:10)[1]], simplify=FALSE ) 25 | y <- sapply(letters[5:15], function(i) sample(1:10)[1:sample(2:10)[1]], simplify=FALSE ) 26 | merge2lists(x,y) 27 | } 28 | \keyword{lists} 29 | \keyword{merge} 30 | -------------------------------------------------------------------------------- /man/breaks-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/breakpoint.density.r 3 | \docType{class} 4 | \name{breaks-class} 5 | \alias{breaks-class} 6 | \alias{breaks} 7 | \title{Data class breaks} 8 | \arguments{ 9 | \item{breaks}{(data.table): the breakpoint info containing data.table, this will be occupied by the CNV segmentation data in the case of cnv.break.annot or SV for sv.break.annot. Unique random string rownames are added to the returned breaks data.frame.} 10 | 11 | \item{burden}{(numeric): a vector containing the total number of breakpoints in each sample} 12 | 13 | \item{param}{(list): a list of parametres provided} 14 | } 15 | \value{ 16 | an instance of the class 'breaks' containing breakpoint and breakpoint burden information 17 | } 18 | \description{ 19 | Class to store breakpoint annotations in association with genomic features (e.g. gene loci) 20 | } 21 | -------------------------------------------------------------------------------- /man/upgr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/break.annot.r 3 | \name{upgr} 4 | \alias{upgr} 5 | \title{Generate GRanges of upstream regions} 6 | \usage{ 7 | upgr(ggr, upstr = 50000) 8 | } 9 | \arguments{ 10 | \item{ggr}{(S4) a GenomicRanges object containing gene annotations. It is crutial that the genome version 'genesgr' and the input 'sv' are the same. The GRanges object must contain 'strand' and a metadata field 'gene_id' with unique values. Seqnames are expected in the format (chr1, chr2, ...).} 11 | 12 | \item{upstr}{(numeric) size in base pairs to define gene upstream region onto which breakpoint overlaps will be identified. The strand value, start and stop positions defined in genesgr will be used to create a GRanges object of upstream regions.} 13 | } 14 | \value{ 15 | (S4) aa GRanges object of upstream regions 16 | } 17 | \description{ 18 | Generate GRanges of upstream regions 19 | } 20 | -------------------------------------------------------------------------------- /man/dngr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/break.annot.r 3 | \name{dngr} 4 | \alias{dngr} 5 | \title{Generate GRanges of downstream regions} 6 | \usage{ 7 | dngr(ggr, dnstr = 50000) 8 | } 9 | \arguments{ 10 | \item{ggr}{(S4) a GenomicRanges object containing gene annotations. It is crutial that the genome version 'genesgr' and the input 'sv' are the same. The GRanges object must contain 'strand' and a metadata field 'gene_id' with unique values. Seqnames are expected in the format (chr1, chr2, ...).} 11 | 12 | \item{dnstr}{(numeric) size in base pairs to define gene downstream region onto which breakpoint overlaps will be identified. The strand value, start and stop positions defined in genesgr will be used to create a GRanges object of downstream regions.} 13 | } 14 | \value{ 15 | (S4) aa GRanges object of downstream regions 16 | } 17 | \description{ 18 | Generate GRanges of downstream regions 19 | } 20 | -------------------------------------------------------------------------------- /man/svc.breaks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/breakpoint.density.r 3 | \name{svc.breaks} 4 | \alias{svc.breaks} 5 | \title{Identify SVC breakpoints} 6 | \usage{ 7 | svc.breaks(svc, chrlist = NULL, low.cov = NULL) 8 | } 9 | \arguments{ 10 | \item{svc}{(S4) an object of class svcnvio containing data type 'svc' initialized by validate.svc} 11 | 12 | \item{chrlist}{(character) list of chromosomes to include chr1, chr2, etc...} 13 | 14 | \item{low.cov}{(data.table) a data.table (chrom, start, end) indicating low coverage regions to exclude from the analysis} 15 | } 16 | \value{ 17 | an instance of the class 'breaks' containing breakpoint and breakpoint burden information 18 | } 19 | \description{ 20 | Transform structural varian (SVC) data.frame into a 'breaks' object 21 | } 22 | \examples{ 23 | 24 | ## Obtain breakpoints from SV calls data 25 | svc <- validate.svc(svdat_lung_ccle) 26 | 27 | svc.breaks(svc) 28 | } 29 | \keyword{Structural} 30 | \keyword{variants} 31 | -------------------------------------------------------------------------------- /man/geneBreakOverlap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/break.annot.r 3 | \name{geneBreakOverlap} 4 | \alias{geneBreakOverlap} 5 | \title{Find overlaps between genomic features and breakpoints} 6 | \usage{ 7 | geneBreakOverlap(ggr, svgr) 8 | } 9 | \arguments{ 10 | \item{ggr}{(S4) a GenomicRanges object containing gene annotations. It is crutial that the genome version 'genesgr' and the input 'sv' are the same. The GRanges object must contain 'strand' and a metadata field 'gene_id' with unique values. Seqnames are expected in the format (chr1, chr2, ...).} 11 | 12 | \item{svgr}{(S4) a GenomicRanges object containing SV breakpoint ends. Metadata must contain 'rowid' and 'sampleid' fields. Seqnames are expected in the format (chr1, chr2, ...). Used by 'svc.break.annot' and 'cnv.break.annot'} 13 | } 14 | \value{ 15 | a list containing two lists: geneBreaks, geneSamples 16 | } 17 | \description{ 18 | Find overlaps between genomic features and breakpoints 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/chr.arm.cnv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/chr.arm.cnv.r 3 | \name{chr.arm.cnv} 4 | \alias{chr.arm.cnv} 5 | \title{Chromosome arm mean CNV} 6 | \usage{ 7 | chr.arm.cnv(cnv, genome.v = "hg19", verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv} 11 | 12 | \item{genome.v}{(character) (hg19 or hg38) reference genome version to draw chromosome limits and centromeres} 13 | 14 | \item{verbose}{(logical) whether to return internal messages} 15 | } 16 | \value{ 17 | a matrix of chromosome arms (rows) versus samples (cols) with average segment logRs per cell 18 | } 19 | \description{ 20 | Obtains a matrix with the weighted average CN per chromosome arm 21 | } 22 | \examples{ 23 | 24 | # initialize CNV data 25 | cnv <- validate.cnv(segdat_lung_ccle) 26 | 27 | arm_mat <- chr.arm.cnv(cnv, genome.v="hg19") 28 | dim(arm_mat) 29 | } 30 | \keyword{CNV,} 31 | \keyword{arm} 32 | \keyword{chromosome} 33 | \keyword{segmentation,} 34 | -------------------------------------------------------------------------------- /man/hot.spot.samples.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hot.spot.samples.R 3 | \name{hot.spot.samples} 4 | \alias{hot.spot.samples} 5 | \title{Hot-spot sample retrieval} 6 | \usage{ 7 | hot.spot.samples(chromo.regs.obj, freq.cut) 8 | } 9 | \arguments{ 10 | \item{chromo.regs.obj}{(chromo.regs) An object of class chromo.regs} 11 | 12 | \item{freq.cut}{(numeric) the hot spot threshold above which peaks are defined for sample ID retrieval} 13 | } 14 | \value{ 15 | a list comprising two lists: peakRegions, peakRegionsSamples 16 | } 17 | \description{ 18 | Collects sample ids with shattered regions detected at hot-spots based on certain p-value cutoff 19 | } 20 | \examples{ 21 | # validate input data.frames 22 | cnv <- validate.cnv(segdat_lung_ccle) 23 | svc <- validate.svc(svdat_lung_ccle) 24 | 25 | chromo.regs.obj <- shattered.regions(cnv,svc) 26 | mat<-hbd.mat(chromo.regs.obj) 27 | 28 | pcut.obj <- freq.p.test(mat,plot=FALSE) 29 | pcut <- freq.threshold(pcut.obj) 30 | 31 | res <- hot.spot.samples(chromo.regs.obj,pcut) 32 | 33 | } 34 | -------------------------------------------------------------------------------- /man/amp.del.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gene.cnv.r 3 | \name{amp.del} 4 | \alias{amp.del} 5 | \title{Amplifications and deletions} 6 | \usage{ 7 | amp.del(genecnv.obj, logr.cut = 2) 8 | } 9 | \arguments{ 10 | \item{genecnv.obj}{(genecnv) an instance of the class 'genecnv' containing gene level copy number info} 11 | 12 | \item{logr.cut}{(numeric) the log-ratio cutoff above which genes are considered amplified (e.g 2 = 8 copies for amplification and 0.5 copies for deep deletions, in diploid regions)} 13 | } 14 | \value{ 15 | (list) A list of lists including amplified.list, amplified.rank, deepdel.list and deepdel.rank 16 | } 17 | \description{ 18 | Retrieve amplification and deletion events from a 'genecnv.obj' generated by 'gene.cnv' function 19 | } 20 | \examples{ 21 | 22 | ## validate input data.frames 23 | cnv <- validate.cnv(segdat_lung_ccle) 24 | 25 | genecnv.obj <- gene.cnv(cnv) 26 | 27 | geneampdel <- amp.del(genecnv.obj, logr.cut = 2) 28 | lapply(geneampdel,head) 29 | } 30 | \keyword{CNV,} 31 | \keyword{genes} 32 | \keyword{segmentation,} 33 | -------------------------------------------------------------------------------- /man/shattered.eval.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/shattered.regions.r 3 | \name{shattered.eval} 4 | \alias{shattered.eval} 5 | \title{Evaluate true catastrophic events 6 | Evaluate shattered regions based on interleaved breaks and breakpoint dispersion parameters in order to identify true catastrophic chromosomal alterations} 7 | \usage{ 8 | shattered.eval( 9 | chromo.regs.obj, 10 | interleaved.cut = 0.5, 11 | dist.iqm.cut = 1e+05, 12 | verbose = TRUE 13 | ) 14 | } 15 | \arguments{ 16 | \item{chromo.regs.obj}{(chromo.regs) An object of class chromo.regs} 17 | 18 | \item{interleaved.cut}{(numeric) the percentage of non interleaved structural variant calls} 19 | 20 | \item{dist.iqm.cut}{(numeric) interquantile average of the distance between breakpoints within a shattered region} 21 | 22 | \item{verbose}{(logical)} 23 | } 24 | \value{ 25 | an instance of the class 'chromo.regs' containing breakpoint mapping onto genes 26 | } 27 | \description{ 28 | Evaluate true catastrophic events 29 | Evaluate shattered regions based on interleaved breaks and breakpoint dispersion parameters in order to identify true catastrophic chromosomal alterations 30 | } 31 | -------------------------------------------------------------------------------- /man/segment.gap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/clean.cnv.artifact.r 3 | \name{segment.gap} 4 | \alias{segment.gap} 5 | \title{CNV segmentation gap filling} 6 | \usage{ 7 | segment.gap(cnv, minsize = 5000, chrlist = NULL, verbose = FALSE) 8 | } 9 | \arguments{ 10 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv} 11 | 12 | \item{minsize}{(numeric) the minimum gap size required to fill the gap} 13 | 14 | \item{chrlist}{(character) list of chromosomes to include chr1, chr2, etc...} 15 | 16 | \item{verbose}{(logical) whether to return internal messages} 17 | } 18 | \value{ 19 | a data.frame containing CNV data 20 | } 21 | \description{ 22 | Fills the gaps in a segmentation data.frame. Chromosome limits are defined for the complete segmentation dataset then segments fill the missing terminal regions. 23 | The CN log-ratio of the added segments is set to the average of the closest neighbours in each sample. 24 | } 25 | \examples{ 26 | 27 | ## validate input data.frames 28 | cnv <- validate.cnv(segdat_lung_ccle) 29 | 30 | cnv2 <- segment.gap(cnv) 31 | cnv2 32 | } 33 | \keyword{CNV,} 34 | \keyword{segmentation} 35 | -------------------------------------------------------------------------------- /man/bed2chromo.reg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/shattered.regions.r 3 | \name{bed2chromo.reg} 4 | \alias{bed2chromo.reg} 5 | \title{Transforms a bed format data.frame containing genomic regions into a matrix of n samples versus m defined genomic bins where bins overlapping with bed segments take value = 1} 6 | \usage{ 7 | bed2chromo.reg( 8 | bed, 9 | bingr = NULL, 10 | genome.v = "hg19", 11 | window.size = 10, 12 | slide.size = 2 13 | ) 14 | } 15 | \arguments{ 16 | \item{bed}{(data.frame) An data.frame} 17 | 18 | \item{bingr}{(S4) a GenomicRanges object containing the} 19 | 20 | \item{genome.v}{(hg19 or hg38) reference genome version to generate genoic bins (ignored if bingr is not NULL)} 21 | 22 | \item{window.size}{(numeric) size in megabases to generate genomic bins} 23 | 24 | \item{slide.size}{(numeric) size in megabases of the sliding genomic window; slide.size must be <= 1} 25 | } 26 | \value{ 27 | an instance of the class 'chromo.regs' containing information about shattered regions 28 | } 29 | \description{ 30 | Transforms a bed format data.frame containing genomic regions into a matrix of n samples versus m defined genomic bins where bins overlapping with bed segments take value = 1 31 | } 32 | -------------------------------------------------------------------------------- /man/pct.genome.changed.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pct.genome.changed.r 3 | \name{pct.genome.changed} 4 | \alias{pct.genome.changed} 5 | \title{Percent genome change calculation} 6 | \usage{ 7 | pct.genome.changed(cnv, fc.pct = 0.2, discard.sex = TRUE) 8 | } 9 | \arguments{ 10 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv} 11 | 12 | \item{fc.pct}{(numeric) percentage CNV gain/loss for a segment to be considered changed (e.g. 0.2 = 20 percent change 0.8 < segmean && segmean > 1.2)} 13 | 14 | \item{discard.sex}{(logical) whether sex chromosomes should be included} 15 | } 16 | \value{ 17 | (numeric) vector containing percent genome changed values (0-1) 18 | } 19 | \description{ 20 | Calculates the percentage of genome changed using CNV segmentation profiles. Genome change is defined based on the fold change CNV log-ratio between a sampele and a reference. 21 | } 22 | \examples{ 23 | 24 | ## validate input CNV data.frames 25 | cnv <- validate.cnv(segdat_lung_ccle) 26 | 27 | pct_changed <- pct.genome.changed(cnv) 28 | head(pct_changed) 29 | } 30 | \seealso{ 31 | Additional data format information in the man pages of validate.cnv 32 | } 33 | \keyword{CNV,} 34 | \keyword{segmentation} 35 | -------------------------------------------------------------------------------- /man/get.chr.bins.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/shattered.regions.r 3 | \name{get.chr.bins} 4 | \alias{get.chr.bins} 5 | \title{Generates a GenomicRanges objact containing genomic bins based on a given bin size. If a cnv (svcnvio) object is provided the chromosome limits 6 | will be obtaind from mapped regions, otherwise chromosome limits will be obtained from the database (D3GB)} 7 | \usage{ 8 | get.chr.bins(cnv = NULL, genome.v = "hg19", window.size = 10, slide.size = 2) 9 | } 10 | \arguments{ 11 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv} 12 | 13 | \item{genome.v}{(hg19 or hg38) reference genome version to generate genoic bins (ignored if cnv is not NULL)} 14 | 15 | \item{window.size}{(numeric) size in megabases to generate genomic bins} 16 | 17 | \item{slide.size}{(numeric) size in megabases of the sliding genomic window; slide.size must be <= 1} 18 | } 19 | \value{ 20 | an instance of the class 'chromo.regs' containing information about shattered regions 21 | } 22 | \description{ 23 | Generates a GenomicRanges objact containing genomic bins based on a given bin size. If a cnv (svcnvio) object is provided the chromosome limits 24 | will be obtaind from mapped regions, otherwise chromosome limits will be obtained from the database (D3GB) 25 | } 26 | -------------------------------------------------------------------------------- /man/match.breaks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/breakpoint.density.r 3 | \name{match.breaks} 4 | \alias{match.breaks} 5 | \title{Breakpoint matching} 6 | \usage{ 7 | match.breaks(brk1, brk2, maxgap = 1e+05, verbose = FALSE, plot = TRUE) 8 | } 9 | \arguments{ 10 | \item{brk1}{(S4) an object of class breaks as returned by `svc.breaks` and `cnv.breaks`} 11 | 12 | \item{brk2}{(S4) an object of class breaks as returned by `svc.breaks` and `cnv.breaks` to compare against brk1} 13 | 14 | \item{maxgap}{(numeric) distance (base pairs) limit for nreakpoints to be consider colocalized} 15 | 16 | \item{verbose}{(logical) whether to return internal messages} 17 | 18 | \item{plot}{(logical) whether to plot into open device} 19 | } 20 | \value{ 21 | an object containing co-localizing breakpoints from two input 'breaks' 22 | } 23 | \description{ 24 | Match common breakpoints from two different datasets or data types based on their co-localization in the genome. 25 | } 26 | \examples{ 27 | 28 | # initialize CNV and SVC data 29 | cnv <- validate.cnv(segdat_lung_ccle) 30 | svc <- validate.svc(svdat_lung_ccle) 31 | 32 | ## Obtain breakpoints from CNV and SVC 33 | brk1 <- cnv.breaks(cnv) 34 | brk2 <- svc.breaks(svc) 35 | 36 | common.brk <- match.breaks(brk1, brk2) 37 | 38 | } 39 | \keyword{CNV,} 40 | \keyword{SV,} 41 | \keyword{breakpoints} 42 | \keyword{genomic} 43 | -------------------------------------------------------------------------------- /man/gene.cnv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gene.cnv.r 3 | \name{gene.cnv} 4 | \alias{gene.cnv} 5 | \title{Gene-level CNV} 6 | \usage{ 7 | gene.cnv( 8 | cnv, 9 | genome.v = "hg19", 10 | genesgr = NULL, 11 | chrlist = NULL, 12 | fill.gaps = FALSE, 13 | verbose = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv} 18 | 19 | \item{genome.v}{(hg19 or hg38) reference genome version to fetch gene annotations when 'genesgr=NULL'} 20 | 21 | \item{genesgr}{(S4) a GenomicRanges object containing genomic feature annotations (if not NULL overides genome.v). It must containg 'strand' and a metadata field 'gene_id' with unique values. Seqnames are expected in the format (chr1, chr2, ...)} 22 | 23 | \item{chrlist}{(character) list of chromosomes to include chr1, chr2, etc...} 24 | 25 | \item{fill.gaps}{(logical) whether to fill the gaps in the segmentation file using gap neighbour segmean average as log ratio} 26 | 27 | \item{verbose}{(logical)} 28 | } 29 | \value{ 30 | an instance of the class 'genecnv' containing gene level copy number info 31 | } 32 | \description{ 33 | Obtains a gene-level copy number matrix from a segmentation profile. 34 | } 35 | \examples{ 36 | 37 | ## validate input data.frames 38 | cnv <- validate.cnv(segdat_lung_ccle) 39 | 40 | gene.cnv(cnv) 41 | } 42 | \keyword{CNV,} 43 | \keyword{genes} 44 | \keyword{segmentation,} 45 | -------------------------------------------------------------------------------- /R/get.genesgr.r: -------------------------------------------------------------------------------- 1 | #' Genes GRanges 2 | #' 3 | #' Retrieves a GRanges object containinng gene annotations for an specified genome version 4 | #' 5 | #' @param genome.v (hg19 or GRCh37 and hg38 or GRCh38) reference genome version to retrieve gene annotations 6 | #' @param chrlist (character) 7 | #' @return a GRanges class object from the specified human genome version 8 | #' @keywords CNV, segmentation, genes 9 | #' @export 10 | #' @examples 11 | #' 12 | #' get.genesgr(genome.v = "hg19",chrlist=NULL) 13 | #' 14 | 15 | get.genesgr<- function(genome.v="hg19",chrlist=NULL){ 16 | 17 | if(genome.v %in% c("hg19","GRCh37")){ 18 | genesgr = GenomicFeatures::genes(TxDb.Hsapiens.UCSC.hg19.knownGene, columns="gene_id") 19 | }else if(genome.v %in% c("hg38","GRCh38")){ 20 | genesgr = GenomicFeatures::genes(TxDb.Hsapiens.UCSC.hg38.knownGene, columns="gene_id") 21 | }else{stop("Unspecified, or non available genome")} 22 | 23 | if(is.null(chrlist)){ 24 | chrlist <- paste("chr",c(1:22,"X","Y"),sep="") 25 | } 26 | 27 | err <- capture.output( 28 | genesgr@elementMetadata$gene_id <- mapIds(org.Hs.eg.db, genesgr@elementMetadata$gene_id, 'SYMBOL','ENTREZID'), 29 | type="message") 30 | 31 | genesgr <- genesgr[which(!is.na(genesgr$gene_id))] 32 | genesgr <- genesgr[which(lapply(genesgr@elementMetadata$gene_id,length) > 0)] 33 | genesgr <- genesgr[which(as.character(genesgr@seqnames) %in% chrlist)] 34 | 35 | return(genesgr) 36 | } 37 | -------------------------------------------------------------------------------- /man/validate.svc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/validate.input.data.r 3 | \name{validate.svc} 4 | \alias{validate.svc} 5 | \title{Initialization of SVC data} 6 | \usage{ 7 | validate.svc(sv.df) 8 | } 9 | \arguments{ 10 | \item{sv.df}{(data.frame) structural variant table including the following fields: sample, chrom1, pos1, strand1, chrom2, pos2, strand2, svclass} 11 | } 12 | \value{ 13 | an instance of the class 'svcnvio' containing SV data derived from SVC data type; A unique id (uid) column is also added 14 | } 15 | \description{ 16 | This function validates and reformats the SV (structural variant) calls input. It is used internaly by 'svpluscnv' functions that require this type of data. 17 | A few formatting rules are enforced: 18 | 1) The input must obtain 8 columns in the following order(sample ID, chromosome of origin, strand of origin, position of origin,, chromosome of destination, strand of destination, position of destination, SV class) 19 | 2) SV classes accepted: DEL(deletion), DUP(duplication), INS(insertion), TRA(translocation), INV(inversion) and BND(break end) 20 | 3) Any variant in which chromosome of origin and destination differ are encoded as TRA (translocation) 21 | 4) pos1 < pos2 is enforced for all variants in which chromosome of origin and destination are the same 22 | 5) The class BND can be used to operate with complex events as long as both break ends are the same chromosome 23 | } 24 | \examples{ 25 | 26 | validate.svc(svdat_lung_ccle) 27 | } 28 | \keyword{SV,} 29 | \keyword{structural} 30 | \keyword{variants} 31 | -------------------------------------------------------------------------------- /man/shattered.map.plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/shattered.map.plot.r 3 | \name{shattered.map.plot} 4 | \alias{shattered.map.plot} 5 | \title{Shattered regions genomic map} 6 | \usage{ 7 | shattered.map.plot( 8 | chromo.regs.obj, 9 | conf = "hc", 10 | genome.v = "hg19", 11 | chrlist = NULL, 12 | freq.cut = NULL, 13 | add.legend = "top" 14 | ) 15 | } 16 | \arguments{ 17 | \item{chromo.regs.obj}{(chromo.regs) An object of class chromo.regs} 18 | 19 | \item{conf}{(character) either 'hc' for high confidence objects or else all included} 20 | 21 | \item{genome.v}{(character) reference genome version to draw chromosome limits and centromeres either hg19 or hg38 accepted} 22 | 23 | \item{chrlist}{(character) vector containing chromosomes to include in the analysis; if NULL all chromosomes available in the input will be included} 24 | 25 | \item{freq.cut}{the value to draw an horizontal line; use 'freq.p.test' to obtain a threshold for statisticaly significant hot spots} 26 | 27 | \item{add.legend}{the position of the legend in the plot; if null, no legend will be draw} 28 | } 29 | \value{ 30 | a plot into open device 31 | } 32 | \description{ 33 | Plots a genome wide map of shattered region frequencies 34 | } 35 | \examples{ 36 | 37 | 38 | ## validate input data.frames 39 | cnv <- validate.cnv(segdat_lung_ccle) 40 | svc <- validate.svc(svdat_lung_ccle) 41 | 42 | ## obtain shattered regions 43 | chromo.regs.obj <- shattered.regions(cnv,svc) 44 | 45 | shattered.map.plot(chromo.regs.obj) 46 | } 47 | \keyword{chromosome} 48 | \keyword{genome} 49 | \keyword{map} 50 | \keyword{shattering,} 51 | -------------------------------------------------------------------------------- /man/clean.cnv.artifact.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/clean.cnv.artifact.r 3 | \name{clean.cnv.artifact} 4 | \alias{clean.cnv.artifact} 5 | \title{CNV artifact detection and filtering} 6 | \usage{ 7 | clean.cnv.artifact( 8 | cnv, 9 | n.reps = 4, 10 | cnv.size = 2e+06, 11 | pc.overlap = 0.99, 12 | fill.gaps = TRUE, 13 | minsize = 5000, 14 | verbose = TRUE 15 | ) 16 | } 17 | \arguments{ 18 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv} 19 | 20 | \item{n.reps}{(numeric) number of samples with identical segment to consider artifact} 21 | 22 | \item{cnv.size}{(numeric) only smaller segments will be modified in the cnv data.frame} 23 | 24 | \item{pc.overlap}{(numeric) minimun percentage overlap for a pair of segments to be consider identical} 25 | 26 | \item{fill.gaps}{(logical) whether to fill gaps from the segmentaed file after filtering artifacts} 27 | 28 | \item{minsize}{(numeric) the minimum gap size required to fill the gap. Only used if 'fill.gaps=TRUE'} 29 | 30 | \item{verbose}{(logical) whether to print internal messages} 31 | } 32 | \value{ 33 | a data.frame containing CNV data 34 | } 35 | \description{ 36 | Detects identical or near-identical CNV segments across multiple samples susceptible of representing common variants or technical artifacts. Then those segments CNV log-ratio is replaced by the flanking segments average 37 | } 38 | \examples{ 39 | 40 | ## validate input data.frame 41 | cnv <- validate.cnv(segdat_lung_ccle) 42 | 43 | cnvcl <- clean.cnv.artifact(cnv) 44 | cnvcl 45 | } 46 | \keyword{CNV,} 47 | \keyword{filter} 48 | \keyword{segmentation,} 49 | -------------------------------------------------------------------------------- /man/break.density.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/breakpoint.density.r 3 | \name{break.density} 4 | \alias{break.density} 5 | \title{Breakpoint density map} 6 | \usage{ 7 | break.density( 8 | brk, 9 | chr.lim = NULL, 10 | genome.v = "hg19", 11 | window.size = 10, 12 | slide.size = 2, 13 | verbose = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{brk}{(breaks) An instance of the class 'breaks' obtained from CNV segmentation data (svpluscnv::cnv.breaks) or Structural Variant calls (svpluscnv::svc.breaks).} 18 | 19 | \item{chr.lim}{(data.frame) 3 column table (chrom, begin, end) indicating the chromosome most distal coordinates with coverage. Also returned by the function svpluscnv::chromosome.limit.coords.} 20 | 21 | \item{genome.v}{(hg19 or hg38) reference genome version to draw chromosome limits and centromeres} 22 | 23 | \item{window.size}{(numeric) size in megabases of the genmome bin onto which breakpoints will be mapped} 24 | 25 | \item{slide.size}{(numeric) size in megabases of the sliding genomic window; if slide.size < window.size the genomic bins will overlap} 26 | 27 | \item{verbose}{(logical) whether to return internal messages} 28 | } 29 | \value{ 30 | a matrix of samples (rows) and genomic bins (cols) qith the number of breakpoints mapped in heach cell 31 | } 32 | \description{ 33 | Generating a genomic map based on a defined bin size and sliding window and counts the number of breakpoints mapped onto each bin. This function is used internally by svpluscnv::shattered.regions and svpluscnv::shattered.regions.cnv 34 | } 35 | \examples{ 36 | 37 | # initialize CNV data 38 | cnv <- validate.cnv(segdat_lung_ccle) 39 | 40 | # obtain CNV breakpoints 41 | brk <- cnv.breaks(cnv) 42 | 43 | break.density(brk) 44 | } 45 | \keyword{CNV,} 46 | \keyword{segmentation} 47 | -------------------------------------------------------------------------------- /man/cnv.breaks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/breakpoint.density.r 3 | \name{cnv.breaks} 4 | \alias{cnv.breaks} 5 | \title{Identify CNV breakpoints} 6 | \usage{ 7 | cnv.breaks( 8 | cnv, 9 | fc.pct = 0.2, 10 | break.width = 10000, 11 | min.cnv.size = NULL, 12 | min.num.probes = NULL, 13 | chrlist = NULL, 14 | low.cov = NULL, 15 | clean.brk = NULL, 16 | verbose = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv} 21 | 22 | \item{fc.pct}{(numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents a fold change of 0.8 or 1.2} 23 | 24 | \item{break.width}{(numeric) the maximum distance between a segment end and the subsequent segment start positions beyond which breakpoints are discarded} 25 | 26 | \item{min.cnv.size}{(numeric) The minimun segment size (in base pairs) to include in the analysis} 27 | 28 | \item{min.num.probes}{(numeric) The minimun number of probes per segment to include in the analysis} 29 | 30 | \item{chrlist}{(character) list of chromosomes to include chr1, chr2, etc...} 31 | 32 | \item{low.cov}{(data.frame) a data.frame (chr, start, end) indicating low coverage regions to exclude from the analysis} 33 | 34 | \item{clean.brk}{(numeric) identical breakpoints across multiple samples tend to be artifacts; remove breaks > N} 35 | 36 | \item{verbose}{(logical) whether to return} 37 | } 38 | \value{ 39 | an instance of the class 'breaks' containing breakpoint and breakpoint burden information 40 | } 41 | \description{ 42 | Identify CNV breakpoints filtered by the change in copy number log-ratio between contiguous segments 43 | } 44 | \examples{ 45 | 46 | # initialized CNV data 47 | cnv <- validate.cnv(segdat_lung_ccle) 48 | 49 | cnv.breaks(cnv) 50 | 51 | } 52 | \keyword{CNV,} 53 | \keyword{segmentation} 54 | -------------------------------------------------------------------------------- /man/freq.p.test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/freq.p.test.r 3 | \name{freq.p.test} 4 | \alias{freq.p.test} 5 | \title{Frequency hot spot detection 6 | 7 | Obtains significance cutoff for the frequency of binary events encoded in a matrix such as that generated by shattered.regions and shattered.regions.cnv algorithms} 8 | \usage{ 9 | freq.p.test( 10 | mat, 11 | method = "fdr", 12 | p.cut = 0.05, 13 | iter = 100, 14 | zerofreq = TRUE, 15 | plot = TRUE, 16 | verbose = FALSE 17 | ) 18 | } 19 | \arguments{ 20 | \item{mat}{(numeric matrix) a binary matrix where columns will be tested for their sum value compared to a permutated matrix} 21 | 22 | \item{method}{(character) the method to pass to p.adjust function} 23 | 24 | \item{p.cut}{(numeric) the cutoff for multiple hypothesis corrected p.value} 25 | 26 | \item{iter}{(numeric) Number of iterations to produce null distribution (note that null size will be iter*ncol(mat))} 27 | 28 | \item{zerofreq}{(logical) whether to remove bins with observed frequency = 0; It is recommended to set to TRUE when the bins span genomic regions of low coverage} 29 | 30 | \item{plot}{(logical) whether to generate a histogram comparing observed and null frequency distributions} 31 | 32 | \item{verbose}{(logical) whether to return messages} 33 | } 34 | \value{ 35 | an instance of the class 'freq.cut' 36 | } 37 | \description{ 38 | Frequency hot spot detection 39 | 40 | Obtains significance cutoff for the frequency of binary events encoded in a matrix such as that generated by shattered.regions and shattered.regions.cnv algorithms 41 | } 42 | \examples{ 43 | 44 | ## validate input data.frames 45 | cnv <- validate.cnv(segdat_lung_ccle) 46 | 47 | ## obtain a matrix of genomic bins vs samples indicating high density of breaks 48 | shatt.regions <- shattered.regions.cnv(cnv) 49 | mat <- shatt.regions@high.density.regions.hc 50 | 51 | freq.p.test(mat) 52 | } 53 | \keyword{empirical} 54 | \keyword{p.adjust} 55 | \keyword{p.value,} 56 | -------------------------------------------------------------------------------- /man/circ.wg.plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/circular.plot.r 3 | \name{circ.wg.plot} 4 | \alias{circ.wg.plot} 5 | \title{Circular visualization CNV and SVC} 6 | \usage{ 7 | circ.wg.plot( 8 | cnv, 9 | svc, 10 | sample.id = NULL, 11 | genome.v = "hg19", 12 | lrr.pct = 0.2, 13 | lrr.max = 4, 14 | chrlist = NULL, 15 | add.cnv.legend = "topleft", 16 | add.svc.legend = "toprigh", 17 | ... 18 | ) 19 | } 20 | \arguments{ 21 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv} 22 | 23 | \item{svc}{(S4) an object of class svcnvio containing data type 'svc' initialized by validate.svc} 24 | 25 | \item{sample.id}{(character) the id of the sample to be plotted} 26 | 27 | \item{genome.v}{(character) (hg19 or h38) reference genome version to draw chromosome limits and centromeres} 28 | 29 | \item{lrr.pct}{(numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents a fold change of 0.8 or 1.2} 30 | 31 | \item{lrr.max}{(numeric) maximum CNV to be plotted} 32 | 33 | \item{chrlist}{(character) vector containing chromosomes to plot; by default all chromosomes plotted} 34 | 35 | \item{add.cnv.legend}{(x,y or coordinates) the position parameter passed to legend to plot CNV (outer tracks) description} 36 | 37 | \item{add.svc.legend}{(x,y or coordinates) the position parameter passed to legend to plot SVC (central track) description} 38 | } 39 | \value{ 40 | circos plot into open device 41 | } 42 | \description{ 43 | Produces a circos plot combining CNV and SVC of the whole genome 44 | } 45 | \examples{ 46 | 47 | ## validate input data.frames 48 | cnv <- validate.cnv(segdat_lung_ccle) 49 | svc <- validate.svc(svdat_lung_ccle) 50 | 51 | ## select a random sample id 52 | id <- "A549_LUNG" 53 | 54 | circ.wg.plot(cnv, svc, sample.id=id) 55 | } 56 | \keyword{CNV,} 57 | \keyword{circular} 58 | \keyword{plot} 59 | \keyword{segmentation,} 60 | \keyword{structural} 61 | \keyword{variant,} 62 | \keyword{visualization,} 63 | -------------------------------------------------------------------------------- /man/cnv.freq.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cnv.freq.plot.r 3 | \name{cnv.freq} 4 | \alias{cnv.freq} 5 | \title{CNV frequency map} 6 | \usage{ 7 | cnv.freq( 8 | cnv, 9 | fc.pct = 0.2, 10 | genome.v = "hg19", 11 | ploidy = FALSE, 12 | g.bin = 1, 13 | sampleids = NULL, 14 | cex.axis = 1, 15 | cex.lab = 1, 16 | label.line = -1.2, 17 | plot = TRUE, 18 | verbose = TRUE 19 | ) 20 | } 21 | \arguments{ 22 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv} 23 | 24 | \item{fc.pct}{(numeric) percentage CNV gain/loss for a segment to be considered changed (i.e. 0.2 = 20 percent change 0.8 < segmean && segmean > 1.2)} 25 | 26 | \item{genome.v}{(character) (hg19 or h38) reference genome version to draw chromosome limits and centromeres} 27 | 28 | \item{ploidy}{(logical) whether to apply ploidy correction; the function med.segmean will be used to obtain each sample's ploidy logR then this value substracted to each sample's logR values} 29 | 30 | \item{g.bin}{(numeric) size in megabases of the genmome bin to compute break density} 31 | 32 | \item{sampleids}{(character) vector containing list of samples to include in plot. if set to NULL, all samples in the input will be used} 33 | 34 | \item{cex.axis, cex.lab, label.line}{(numeric) plot parameters} 35 | 36 | \item{plot}{(logical) whether produce a graphical output} 37 | 38 | \item{verbose}{(logical) whether to return internal messages} 39 | } 40 | \value{ 41 | an instance of the class 'cnvfreq' and optionally a plot into open device 42 | } 43 | \description{ 44 | Creates a map of CNVs using genome binning and plots CNV frequency across the genome. This function optionally returns text, graphical or both outputs. 45 | Additionaly, calculates the proportion of samples with a given percentage of chromosome arm gained/lost 46 | } 47 | \examples{ 48 | 49 | ## validate input data.frame 50 | cnv <- validate.cnv(nbl_segdat) 51 | 52 | cnv.freq(cnv, genome.v = "hg19") 53 | } 54 | \keyword{CNV,} 55 | \keyword{plot} 56 | \keyword{segmentation,} 57 | -------------------------------------------------------------------------------- /man/break.annot-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/break.annot.r 3 | \docType{class} 4 | \name{break.annot-class} 5 | \alias{break.annot-class} 6 | \alias{break.annot} 7 | \title{break.annot class} 8 | \arguments{ 9 | \item{input}{(data.frame): the breakpoint info containing data.frame, this will be occupied by the CNV segmentation data in the case of cnv.break.annot or SV for sv.break.annot. Unique random string rownames are added to the provided data.frame.} 10 | 11 | \item{genesgr}{(GRanges): a GRanges object with genomic features (e.g. genes) to which breakpoints are mapped} 12 | 13 | \item{disruptSamples}{(list): a list which names correspond to genomic features and values correspond to sample ids harboring breakpoints overlapping with said features} 14 | 15 | \item{disruptBreaks}{(list): a list which names correspond to genomic features and values correspond to the ids of breakpount mapped onto them. Break ids are linked to the 'input' data.frame rownames} 16 | 17 | \item{upstreamSamples}{(list): a list which names correspond to genomic features and values correspond to sample ids harboring breakpoints overlapping with upstream region of said features} 18 | 19 | \item{upstreamBreaks}{(list): a list which names correspond to genomic features and values correspond to the ids of breakpount mapped onto upstream regions Break ids are linked to the 'input' data.frame rownames} 20 | 21 | \item{dnstreamSamples}{(list): a list which names correspond to genomic features and values correspond to sample ids harboring breakpoints overlapping with downstream region of said features} 22 | 23 | \item{dnstreamBreaks}{(list): a list which names correspond to genomic features and values correspond to the ids of breakpount mapped onto downstream regions Break ids are linked to the ''input' brk object} 24 | 25 | \item{param}{(list): a list of parametres provided for the annotation function} 26 | } 27 | \value{ 28 | an instance of the class 'break.annot' containing breakpoint mapping onto genes 29 | } 30 | \description{ 31 | Class instance to store breakpoint annotations in association with genomic features (e.g. gene loci) 32 | } 33 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: svpluscnv 2 | Title: svpluscnv: analysis and visualization of complex structural variation data 3 | Version: 0.99.1 4 | Author: 5 | person("Gonzalo", "Lopez", 6 | role = c("aut", "cre"), 7 | email = "gonzolgarcia@gmail.com", 8 | comment = c(ORCID = "0000-0002-5092-1284")) 9 | person("Laura", "Egolf", 10 | role = c("aut"), 11 | email = "laura.e.egolf@gmail.com", 12 | comment = c(ORCID = "0000-0002-7103-4801")) 13 | person("Federico", "Giorgi", 14 | role = c("ctb"), 15 | email = "federico.giorgi@gmail.com", 16 | comment = c(ORCID = "0000-0002-7325-9908")) 17 | Maintainer: 18 | Gonzao Lopez 19 | Description: svpluscnv R package is a "swiss army knife"" for the integration and interpretation of orthogonal datasets including copy number variant (CNV) segmentation profiles and sequencing-based structural variant calls (SVC). The package implements analysis and visualization tools to evaluate chromosomal instability and ploidy, identify genes harboring recurrent SVs and systematically characterize hot-spot genomic locations harboring complex rearrangements such as chromothripsis and chromoplexia. 20 | License: GPL-3 21 | Encoding: UTF-8 22 | LazyData: true 23 | RoxygenNote: 7.1.1 24 | biocViews: StructuralVariation, VariantAnnotation, CopyNumberVariation, Sequencing, GenomicVariation 25 | Depends: R (>= 3.6) 26 | Imports: IRanges, GenomicRanges, tidyr, data.table, circlize, D3GB, shape, org.Hs.eg.db, TxDb.Hsapiens.UCSC.hg19.knownGene, TxDb.Hsapiens.UCSC.hg38.knownGene, methods, stats, graphics, utils, grDevices, taRifx, S4Vectors, AnnotationDbi,GenomicAlignments,GenomicFeatures,scales,plot3D 27 | Suggests: 28 | BiocStyle, 29 | knitr, 30 | rmarkdown 31 | Collate: 32 | validate.input.data.r internal_functions.r break.annot.r breakpoint.density.r brk.burden.iqm.r shattered.regions.r chr.arm.cnv.r segment.means.r circular.plot.r cnv.freq.plot.r clean.cnv.artifact.r freq.p.test.r gene.cnv.r gene.track.view.r get.genesgr.r hot.spot.samples.R pct.genome.changed.r shattered.map.plot.r shattered.regions.cnv.r sv.model.view.r svpluscnv.data.r 33 | VignetteBuilder: knitr 34 | git_url: https://github.com/ccbiolab/svpluscnv 35 | -------------------------------------------------------------------------------- /man/gene.track.view.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gene.track.view.r 3 | \name{gene.track.view} 4 | \alias{gene.track.view} 5 | \title{Gene track visualization} 6 | \usage{ 7 | gene.track.view( 8 | chrom = NULL, 9 | start = NULL, 10 | stop = NULL, 11 | symbol = NULL, 12 | upstr = NULL, 13 | dnstr = NULL, 14 | genome.v = "hg19", 15 | cex.text = 0.6, 16 | addtext = TRUE, 17 | plot = TRUE, 18 | summary = TRUE, 19 | ... 20 | ) 21 | } 22 | \arguments{ 23 | \item{chrom}{(character) Chromosome (e.g. chr9)} 24 | 25 | \item{start}{(numeric) Genomic coordinate from specified chromosome to start plotting} 26 | 27 | \item{stop}{(numeric) Genomic coordinate from specified chromosome to stop plotting} 28 | 29 | \item{symbol}{(character) Gene acceoted hgnc symbol to retrieve coordinates and area plotting ()} 30 | 31 | \item{upstr}{(numeric) Distance upstream specified gene to extend the area plotted} 32 | 33 | \item{dnstr}{(numeric) Distance downstream specified gene to extend the area plotted} 34 | 35 | \item{genome.v}{(character) Reference genome version to draw chromosome limits and centromeres (hg19 or hg38)} 36 | 37 | \item{cex.text}{(numeric) The magnification to be used for transcript RefSeq text added} 38 | 39 | \item{addtext}{(logic) Whether to include transcript RefSeq ids in the plot} 40 | 41 | \item{plot}{(logic) Whether to generate plot in open device} 42 | 43 | \item{summary}{(logic) Whether to produce a data.table output with transcript information} 44 | 45 | \item{...}{Additional graphical parameters} 46 | } 47 | \value{ 48 | A data.frame with gene isoform annotations and/or plot into open device 49 | } 50 | \description{ 51 | Creates a track visualization of a genomic region defined by gene boundaries or custom provided 52 | } 53 | \examples{ 54 | 55 | # obtain the coordinates of a desired genomic regionbased on a known gene locus 56 | refSeqGene <- gene.symbol.info(refseq_hg19,"PTPRD") 57 | chrom <- refSeqGene$chrom 58 | start <- refSeqGene$start - 150000; 59 | stop <- refSeqGene$stop + 50000; 60 | 61 | gene.track.view(symbol="PTPRD", genome.v="hg19") 62 | } 63 | \keyword{CNV,} 64 | \keyword{segmentation} 65 | -------------------------------------------------------------------------------- /man/chromo.regs-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/shattered.regions.r 3 | \docType{class} 4 | \name{chromo.regs-class} 5 | \alias{chromo.regs-class} 6 | \alias{chromo.regs} 7 | \title{Data class chromo.regs} 8 | \arguments{ 9 | \item{regions.summary}{(list): a list of data.frames sumarizing the information of shattered regions found in each sample} 10 | 11 | \item{high.density.regions}{(matrix): a numeric matrix representing high breakpoint density genomic bins in each sample (values 1 = high density break; 0 = normal)} 12 | 13 | \item{high.density.regions.hc}{(matrix): a numeric matrix representing high breakpoint density genomic bins in each sample (values 1 = high density break; 0 = normal). 14 | Only those bins that overlap with high confidence regions defined in regions.summary are set to = 1} 15 | 16 | \item{cnv.brk.dens}{(matrix): a numeric matrix representing the number of CNV segmentation breakpoints found in at genomic bins in each sample} 17 | 18 | \item{svc.brk.dens}{(matrix): a numeric matrix representing the number of SV breakpoints found at genomic bins in each sample} 19 | 20 | \item{cnv.brk.common.dens}{(matrix): a numeric matrix representing the number of CNV breakpoints colocalizing SV breakpoints found at genomic bins in each sample} 21 | 22 | \item{svc.brk.common.dens}{(matrix): a numeric matrix representing the number of SV breakpoints colocalizing CNV breakpoints found at genomic bins in each sample} 23 | 24 | \item{cnvbrk}{(S4): on object generated by cnv.breaks function} 25 | 26 | \item{svcbrk}{(S4): on object generated by svc.breaks function} 27 | 28 | \item{common.brk}{(list): on object generated by match.breaks function} 29 | 30 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv} 31 | 32 | \item{svc}{(S4) an object of class svcnvio containing data type 'svc' validated by validate.svc} 33 | 34 | \item{param}{(list): list of configuration parameters provided or set as default} 35 | } 36 | \value{ 37 | an instance of the class 'chromo.regs' containing breakpoint mapping onto genes 38 | } 39 | \description{ 40 | Class to store shattered regions and information produced by shattered.regions and shattered.regions.cnv functions 41 | } 42 | -------------------------------------------------------------------------------- /man/svc.break.annot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/break.annot.r 3 | \name{svc.break.annot} 4 | \alias{svc.break.annot} 5 | \title{Identification of recurrently altered genes using SVC data} 6 | \usage{ 7 | svc.break.annot( 8 | svc, 9 | genome.v = "hg19", 10 | genesgr = NULL, 11 | upstr = 50000, 12 | dnstr = 50000, 13 | svc.seg.size = 2e+05, 14 | verbose = TRUE 15 | ) 16 | } 17 | \arguments{ 18 | \item{svc}{(S4) an object of class svcnvio containing data type 'svc' validated by validate.svc} 19 | 20 | \item{genome.v}{(character): either 'hg19' or 'hg38' accepted; reference genome version to retrieve gene annotations including genomic coordinates and strand} 21 | 22 | \item{genesgr}{(S4) a GenomicRanges object containing gene annotations (if not NULL overides genome.v). It is crutial that the genome version 'genesgr' and the input 'sv' are the same. The GRanges object must contain 'strand' and a metadata field 'gene_id' with unique values. Seqnames are expected in the format (chr1, chr2, ...).} 23 | 24 | \item{upstr}{(numeric) size in base pairs to define gene upstream region onto which breakpoint overlaps will be identified. The strand value, start and stop positions defined in genesgr will be used to create a GRanges object of upstream regions.} 25 | 26 | \item{dnstr}{(numeric) size in base pairs to define gene downstream region onto which breakpoint overlaps will be identified. The strand value, start and stop positions defined in genesgr will be used to create a GRanges object of downstream regions.} 27 | 28 | \item{svc.seg.size}{(numeric) base pairs for maximum allowed segmental variants (DEL, DUP, INV or INS) size. Larger segmental SVs are treated as translocations and only the breakpoint position will be overlapped with genomic features.} 29 | 30 | \item{verbose}{(logical) whether to return internal messages} 31 | } 32 | \value{ 33 | an instance of the class 'break.annot' containing breakpoint mapping onto genes 34 | } 35 | \description{ 36 | Identify recurrently altered genes by strutural variants. The function will identify overlaps between genomic features (e.g. genes) and SVs breakpoints. 37 | } 38 | \examples{ 39 | 40 | # Initialize SVC data 41 | svc <- validate.svc(svdat_lung_ccle) 42 | 43 | svc.break.annot(svc, genome.v="hg19") 44 | } 45 | \keyword{Structural} 46 | \keyword{annotation} 47 | \keyword{variants,} 48 | -------------------------------------------------------------------------------- /man/shattered.regions.cnv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/shattered.regions.cnv.r 3 | \name{shattered.regions.cnv} 4 | \alias{shattered.regions.cnv} 5 | \title{CNV-only based shattered region detection} 6 | \usage{ 7 | shattered.regions.cnv( 8 | cnv, 9 | fc.pct = 0.2, 10 | min.cnv.size = 0, 11 | min.num.probes = 0, 12 | low.cov = NULL, 13 | clean.brk = NULL, 14 | window.size = 10, 15 | slide.size = 2, 16 | num.breaks = 10, 17 | num.sd = 5, 18 | dist.iqm.cut = 1e+05, 19 | chrlist = NULL, 20 | chr.lim = NULL, 21 | verbose = TRUE 22 | ) 23 | } 24 | \arguments{ 25 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv} 26 | 27 | \item{fc.pct}{(numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents 20 percent fold change} 28 | 29 | \item{min.cnv.size}{(numeric) The minimun segment size (in base pairs) to include in the analysis} 30 | 31 | \item{min.num.probes}{(numeric) The minimun number of probes per segment to include in the analysis} 32 | 33 | \item{low.cov}{(data.frame) a data.frame (chr, start, end) indicating low coverage regions to exclude from the analysis} 34 | 35 | \item{clean.brk}{(numeric) inherited from cnv.breaks(); n cutoff for redundant breakpoints to filter out; if NULL, no filter will be applied} 36 | 37 | \item{window.size}{(numeric) size in megabases of the genmome bin to compute break density} 38 | 39 | \item{slide.size}{(numeric) size in megabases of the sliding genmome window} 40 | 41 | \item{num.breaks}{(numeric) size in megabases of the genmome bin to compute break density} 42 | 43 | \item{num.sd}{(numeric) size in megabases of the sliding genmome window} 44 | 45 | \item{dist.iqm.cut}{(numeric) interquantile average of the distance between breakpoints within a shattered region} 46 | 47 | \item{chrlist}{(character) vector containing chromosomes to include in the analysis; if NULL all chromosomes available in the input will be included} 48 | 49 | \item{verbose}{(logical)} 50 | } 51 | \value{ 52 | an instance of the class 'chromo.regs' containing breakpoint mapping onto genes 53 | } 54 | \description{ 55 | Caller for the identification of shattered genomic regions based on CNV breakpoint densities 56 | } 57 | \examples{ 58 | 59 | ## validate input data.frames 60 | cnv <- validate.cnv(segdat_lung_ccle) 61 | 62 | shattered.regions.cnv(cnv) 63 | } 64 | \keyword{CNV,} 65 | \keyword{segmentation} 66 | -------------------------------------------------------------------------------- /R/pct.genome.changed.r: -------------------------------------------------------------------------------- 1 | #' Percent genome change calculation 2 | #' 3 | #' Calculates the percentage of genome changed using CNV segmentation profiles. Genome change is defined based on the fold change CNV log-ratio between a sampele and a reference. 4 | #' 5 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv 6 | #' @param fc.pct (numeric) percentage CNV gain/loss for a segment to be considered changed (e.g. 0.2 = 20 percent change 0.8 < segmean && segmean > 1.2) 7 | #' @param discard.sex (logical) whether sex chromosomes should be included 8 | #' @return (numeric) vector containing percent genome changed values (0-1) 9 | #' @seealso Additional data format information in the man pages of validate.cnv 10 | #' @keywords CNV, segmentation 11 | #' @export 12 | #' @examples 13 | #' 14 | #' ## validate input CNV data.frames 15 | #' cnv <- validate.cnv(segdat_lung_ccle) 16 | #' 17 | #' pct_changed <- pct.genome.changed(cnv) 18 | #' head(pct_changed) 19 | 20 | pct.genome.changed <- function(cnv, 21 | fc.pct=0.2, 22 | discard.sex=TRUE){ 23 | 24 | cnvdat <- cnv@data 25 | if(discard.sex == TRUE) cnvdat <- cnvdat[which(!cnvdat$chrom %in% c("chrX","chrY")),] 26 | 27 | width <- cnvdat$end - cnvdat$start 28 | segmean <- cnvdat$segmean 29 | sample <- cnvdat$sample 30 | df <- data.table(sample,width,segmean) 31 | idx_changed <- c(which(df$segmean < log2(1-fc.pct)),which(df$segmean >= log2(1+fc.pct))) 32 | idx_normal <- setdiff(1:nrow(df),idx_changed) 33 | df_normal <- df[idx_normal,] 34 | df_changed <- df[idx_changed,] 35 | 36 | length_changed_df <- aggregate(width~sample ,df_changed,sum) 37 | length_normal_df <- aggregate(width~sample ,df_normal,sum) 38 | 39 | nochange <- setdiff(length_normal_df$sample,length_changed_df$sample) 40 | fullchange <- setdiff(length_changed_df$sample,length_normal_df$sample) 41 | nochange_x <- rep(0,length(nochange)) 42 | names(nochange_x) <- nochange 43 | fullchange_x <- rep(0,length(fullchange)) 44 | names(fullchange_x) <- fullchange 45 | 46 | length_changed <- c(length_changed_df[,2],nochange_x) 47 | names(length_changed)<- c(length_changed_df[,1],nochange) 48 | 49 | length_normal <- c(length_normal_df[,2],fullchange_x) 50 | names(length_normal)<- c(length_normal_df[,1],fullchange) 51 | 52 | pct.change<- length_changed/apply(cbind(length_normal[names(length_changed)],length_changed),1,sum) 53 | 54 | return(pct.change) 55 | } 56 | 57 | 58 | -------------------------------------------------------------------------------- /man/circ.chromo.plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/circular.plot.r 3 | \name{circ.chromo.plot} 4 | \alias{circ.chromo.plot} 5 | \title{Circular visualization of shattered regions} 6 | \usage{ 7 | circ.chromo.plot( 8 | chromo.regs.obj, 9 | sample.id, 10 | print.name = TRUE, 11 | genome.v = "hg19", 12 | lrr.pct = 0.2, 13 | lrr.max = 4, 14 | high.conf = FALSE, 15 | chrlist = NULL, 16 | add.cnv.legend = "topleft", 17 | add.svc.legend = "toprigh", 18 | ... 19 | ) 20 | } 21 | \arguments{ 22 | \item{chromo.regs.obj}{(chromo.regs) An object of class chromo.regs} 23 | 24 | \item{sample.id}{(character) the id of a sample to be plotted within} 25 | 26 | \item{print.name}{(logical) whether to print the sample id in the center of the circular plot} 27 | 28 | \item{genome.v}{(character) (hg19 or h38) reference genome version to draw chromosome limits and centromeres} 29 | 30 | \item{lrr.pct}{(numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents 20 percent fold change} 31 | 32 | \item{lrr.max}{(numeric) CNV plot limit} 33 | 34 | \item{high.conf}{(logical) Whether to plot only high confidence shattered regions (see https://github.com/ccbiolab/svpluscnv#identification-of-shattered-regions for more information)} 35 | 36 | \item{chrlist}{(character) vector containing chromosomes to plot; by default only chromosomes with shattered regions are ploted} 37 | 38 | \item{add.cnv.legend}{(x,y or coordinates) the position parameter passed to legend to plot shattered regions and CNV (outer track) description} 39 | 40 | \item{add.svc.legend}{(x,y or coordinates) the position parameter passed to legend to plot SVC (central track) description} 41 | 42 | \item{...}{Additional graphical parameters} 43 | } 44 | \value{ 45 | circos plot into open device 46 | } 47 | \description{ 48 | Produces a circos plot combining CNV and SVC date sooming into the chromosomes harboring shattered regions 49 | } 50 | \examples{ 51 | 52 | ## validate input data.frames 53 | cnv <- validate.cnv(segdat_lung_ccle) 54 | svc <- validate.svc(svdat_lung_ccle) 55 | 56 | ## obtain shattered regions 57 | shatt.regions <- shattered.regions(cnv,svc) 58 | 59 | # select a random sample from the 60 | id <- "SCLC21H_LUNG" 61 | 62 | circ.chromo.plot(shatt.regions, sample.id = id) 63 | } 64 | \keyword{CNV,} 65 | \keyword{circular} 66 | \keyword{plot} 67 | \keyword{segmentation,} 68 | \keyword{structural} 69 | \keyword{variant,} 70 | \keyword{visualization,} 71 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(IQM) 4 | export(IQSD) 5 | export(amp.del) 6 | export(ave.segmean) 7 | export(bed2chromo.reg) 8 | export(break.density) 9 | export(brk.burden.iqm) 10 | export(chr.arm.cnv) 11 | export(chr.sort) 12 | export(chromosome.limit.coords) 13 | export(circ.chromo.plot) 14 | export(circ.wg.plot) 15 | export(clean.cnv.artifact) 16 | export(cnv.break.annot) 17 | export(cnv.breaks) 18 | export(cnv.freq) 19 | export(createRandomString) 20 | export(d3gb.chr.lim) 21 | export(dngr) 22 | export(extract.bins) 23 | export(freq.p.test) 24 | export(freq.threshold) 25 | export(gene.cnv) 26 | export(gene.symbol.info) 27 | export(gene.track.view) 28 | export(geneBreakOverlap) 29 | export(get.chr.bins) 30 | export(get.genesgr) 31 | export(hbd.mat) 32 | export(hot.spot.samples) 33 | export(map2color) 34 | export(match.breaks) 35 | export(med.segmean) 36 | export(merge2lists) 37 | export(pct.genome.changed) 38 | export(segment.gap) 39 | export(shattered.eval) 40 | export(shattered.map.plot) 41 | export(shattered.regions) 42 | export(shattered.regions.cnv) 43 | export(sv.model.view) 44 | export(svc.break.annot) 45 | export(svc.breaks) 46 | export(upgr) 47 | export(validate.cnv) 48 | export(validate.svc) 49 | exportClasses(break.annot) 50 | exportClasses(break.iqm) 51 | exportClasses(breaks) 52 | exportClasses(chromo.regs) 53 | exportClasses(cnvfreq) 54 | exportClasses(genecnv) 55 | exportClasses(null.freq) 56 | exportClasses(refSeqDat) 57 | exportClasses(svcnvio) 58 | exportMethods(extract.bins) 59 | exportMethods(hbd.mat) 60 | # import 61 | import(data.table, except=c("shift")) 62 | import(GenomicRanges) 63 | import(tidyr) 64 | import(circlize) 65 | import(D3GB) 66 | import(shape) 67 | import(org.Hs.eg.db) 68 | import(TxDb.Hsapiens.UCSC.hg19.knownGene) 69 | import(TxDb.Hsapiens.UCSC.hg38.knownGene) 70 | import(methods) 71 | import(GenomicAlignments,except=c("first","last","second")) 72 | import(GenomicFeatures) 73 | import(scales) 74 | # importFrom 75 | importFrom("taRifx","remove.factors") 76 | importFrom("S4Vectors","queryHits","subjectHits") 77 | 78 | importFrom("grDevices", "colorRampPalette", "rainbow", "rgb") 79 | 80 | importFrom("graphics", "arrows", "axTicks", "axis", "barplot", "grid", 81 | "hist", "legend", "lines", "mtext", "par", "plot", "points", 82 | "polygon", "rect", "text") 83 | 84 | importFrom("stats", "aggregate", "na.omit", "p.adjust", "quantile", 85 | "sd", "setNames") 86 | 87 | importFrom("utils", "capture.output", "setTxtProgressBar", 88 | "txtProgressBar") 89 | importFrom("plot3D","colkey") 90 | importFrom("IRanges","IRanges") 91 | importFrom("AnnotationDbi","mapIds") 92 | #### 93 | -------------------------------------------------------------------------------- /R/hot.spot.samples.R: -------------------------------------------------------------------------------- 1 | #' Hot-spot sample retrieval 2 | #' 3 | #' Collects sample ids with shattered regions detected at hot-spots based on certain p-value cutoff 4 | #' 5 | #' @param chromo.regs.obj (chromo.regs) An object of class chromo.regs 6 | #' @param freq.cut (numeric) the hot spot threshold above which peaks are defined for sample ID retrieval 7 | #' @return a list comprising two lists: peakRegions, peakRegionsSamples 8 | #' @export 9 | #' @examples 10 | #' # validate input data.frames 11 | #' cnv <- validate.cnv(segdat_lung_ccle) 12 | #' svc <- validate.svc(svdat_lung_ccle) 13 | #' 14 | #' chromo.regs.obj <- shattered.regions(cnv,svc) 15 | #' mat<-hbd.mat(chromo.regs.obj) 16 | #' 17 | #' pcut.obj <- freq.p.test(mat,plot=FALSE) 18 | #' pcut <- freq.threshold(pcut.obj) 19 | #' 20 | #' res <- hot.spot.samples(chromo.regs.obj,pcut) 21 | #' 22 | 23 | 24 | hot.spot.samples <- function(chromo.regs.obj, freq.cut){ 25 | 26 | freq.matrix <- apply(chromo.regs.obj@high.density.regions.hc,2,sum) 27 | textRegions <- names(which(freq.matrix >= freq.cut)) 28 | hitRegions <- data.table(do.call(rbind,strsplit(textRegions," ")),textRegions) 29 | colnames(hitRegions) <- c("chr","start","end","regid") 30 | hitRegions$start <- as.numeric(hitRegions$start) 31 | hitRegions$end <- as.numeric(hitRegions$end) 32 | 33 | 34 | # collapes contiguous bins into unique regions 35 | bins2remove <- c() 36 | for(i in 2:nrow(hitRegions)){ 37 | if(hitRegions[i]$chr == hitRegions[i-1]$chr){ 38 | if(hitRegions[i]$start < (hitRegions[i-1]$end)){ 39 | hitRegions[i]$start <- hitRegions[i-1]$start 40 | bins2remove <- c(bins2remove,textRegions[i-1]) 41 | } 42 | } 43 | } 44 | hitRegionsPost<- hitRegions[which(hitRegions$regid %in% setdiff(hitRegions$regid,bins2remove))] 45 | 46 | hitRegions_gr <- with(hitRegions, GRanges(chr, IRanges(start=start, end=end))) 47 | hitRegionsPost_gr <- with(hitRegionsPost, GRanges(chr, IRanges(start=start, end=end))) 48 | hits <-GenomicAlignments::findOverlaps(hitRegionsPost_gr,hitRegions_gr) 49 | 50 | regList <- list() 51 | for(i in unique(queryHits(hits))) regList[[hitRegionsPost[i]$regid]] <- textRegions[subjectHits(hits)[which(queryHits(hits) == i)]] 52 | 53 | # obtain the genomic bins with maximum number of samples 54 | peakRegions <- lapply(regList, function(x) 55 | names(which(freq.matrix[x] == max(freq.matrix[x])))) 56 | 57 | # collect samples with shattered region in the peaks 58 | peakRegionsSamples <- lapply(peakRegions, function(x) 59 | names(which(apply(cbind(chromo.regs.obj@high.density.regions.hc[,x]),1,sum) > 0))) 60 | 61 | return(list(peakRegions=peakRegions,peakRegionsSamples=peakRegionsSamples)) 62 | 63 | } 64 | 65 | -------------------------------------------------------------------------------- /R/segment.means.r: -------------------------------------------------------------------------------- 1 | #' Average sample CNV 2 | #' 3 | #' Obtain the weighted average segment mean log2 ratios from each sample within a CNV segmentaton data.frame 4 | #' 5 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv 6 | #' @return (numeric) a vector containing the weighted average logR from segmented data 7 | #' @keywords CNV, segmentation 8 | #' @export 9 | #' @examples 10 | #' 11 | #' ## validate input CNV data.frames 12 | #' cnv <- validate.cnv(segdat_lung_ccle) 13 | #' 14 | #' ave_seg_mean <- ave.segmean(cnv) 15 | #' head(ave_seg_mean) 16 | 17 | 18 | #################### 19 | 20 | 21 | ave.segmean <- function(cnv){ 22 | 23 | stopifnot(cnv@type == "cnv") 24 | cnvdat <- cnv@data 25 | 26 | 27 | width <- as.numeric(cnvdat$end - cnvdat$start) 28 | sample <- cnvdat$sample 29 | segmean <- cnvdat$segmean 30 | 31 | df <- stats::aggregate(width~sample,data.table(sample,width),sum) 32 | glen <- df$width 33 | names(glen) <- df$sample 34 | 35 | w.segmean <- segmean*width/glen[sample] 36 | df2 <- stats::aggregate(w.segmean~sample,data.table(sample,w.segmean),sum) 37 | ave <- df2$w.segmean 38 | names(ave) <- df2$sample 39 | return(ave) 40 | 41 | } 42 | 43 | 44 | #' Median sample CNV 45 | #' 46 | #' Obtain the median weighted segment mean from a segmentaton file; The weighted median refers to the logR that occupies a center of all segments ordered by their log ratio 47 | #' 48 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv 49 | #' @return (numeric) a vector containing the median logR value of a segmented data.frame 50 | #' @keywords CNV, segmentation 51 | #' @export 52 | #' @examples 53 | #' 54 | #' ## validate input CNV data.frames 55 | #' cnv <- validate.cnv(segdat_lung_ccle) 56 | #' 57 | #' med_seg_mean <- med.segmean(cnv) 58 | #' head(med_seg_mean) 59 | 60 | 61 | #################### 62 | 63 | 64 | med.segmean <- function(cnv){ 65 | 66 | stopifnot(cnv@type == "cnv") 67 | cnvdat <- cnv@data 68 | 69 | glen <- as.numeric(cnvdat$end-cnvdat$start) 70 | sample <- cnvdat$sample 71 | segmean <- cnvdat$segmean 72 | dt <- data.table(sample,glen,segmean) 73 | out <-rep(NA,length(unique(dt$sample))) 74 | names(out) <- unique(dt$sample) 75 | 76 | for(i in unique(dt$sample)){ 77 | 78 | minidf <- dt[which(dt$sample == i)] 79 | miniord <-minidf[order(minidf$segmean)] 80 | medseg <- which(abs(cumsum(miniord$glen)/sum(miniord$glen) - 0.5) == min(abs(cumsum(miniord$glen)/sum(miniord$glen) - 0.5))) 81 | out[i] <- mean(miniord$segmean[medseg]) 82 | 83 | } 84 | return(out) 85 | } 86 | 87 | -------------------------------------------------------------------------------- /man/brk.burden.iqm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/brk.burden.iqm.r 3 | \name{brk.burden.iqm} 4 | \alias{brk.burden.iqm} 5 | \title{Evaluates the breakpoint burden based on a instance 'breaks' produced by svpluscnv::scv_breaks or svpluscnv::cnv_breaks. 6 | Breakpoint densities are calculated for each chromosome arm and the inter quantile mean (svpluscnv::IQM) of al chromosome arms is reported for each sample. 7 | A Graphical output is generated indicating every sample's arm burden ordered by their IQM.} 8 | \usage{ 9 | brk.burden.iqm( 10 | brk, 11 | sample.col = NULL, 12 | min.arm.size = 2e+07, 13 | bp.unit = 1e+07, 14 | genome.v = "hg19", 15 | chr.lim = NULL, 16 | plot = TRUE, 17 | verbose = TRUE 18 | ) 19 | } 20 | \arguments{ 21 | \item{brk}{(breaks) An instance of the class 'breaks' obtained from CNV segmentation data (svpluscnv::cnv.breaks) or Structural Variant calls (svpluscnv::svc.breaks).} 22 | 23 | \item{sample.col}{(character) A vector of valid colors. Names must match sample column from 'brk'. If null a gradiant color based on breakpoint burden IQM will be used.} 24 | 25 | \item{min.arm.size}{(numeric) minimum size in base pairs for a chromosome arm to be included in the analysis. Size will be calculated based on the 'genome.v' centromere location (excluding centromere bands). Chromosome start and en locations can be provided in 'chr.lim'.} 26 | 27 | \item{bp.unit}{(numeric) The genomic size unit in base pairs to report brekpoint densities. This parameter is also used for the y axis of the plot.} 28 | 29 | \item{genome.v}{(hg19 or hg38) reference genome version to draw chromosome limits and centromeres} 30 | 31 | \item{chr.lim}{(data.frame) 3 column table (chrom, begin, end) indicating the chromosome most distal coordinates with coverage. Also returned by the function svpluscnv::chromosome.limit.coords.} 32 | 33 | \item{plot}{(logical) whether produce a graphical output} 34 | 35 | \item{verbose}{(logical) whether to return internal messages} 36 | } 37 | \value{ 38 | an instance of the class 'cnvfreq' and optionally a plot into open device 39 | } 40 | \description{ 41 | Evaluates the breakpoint burden based on a instance 'breaks' produced by svpluscnv::scv_breaks or svpluscnv::cnv_breaks. 42 | Breakpoint densities are calculated for each chromosome arm and the inter quantile mean (svpluscnv::IQM) of al chromosome arms is reported for each sample. 43 | A Graphical output is generated indicating every sample's arm burden ordered by their IQM. 44 | } 45 | \examples{ 46 | 47 | # initialize CNV data 48 | svc <- validate.svc(nbl_svdat) 49 | 50 | # obtain CNV breakpoints 51 | brk <- cnv.breaks(cnv) 52 | 53 | brk.burden.iqm(brk) 54 | } 55 | \keyword{burden,} 56 | \keyword{chromosomal} 57 | \keyword{instability} 58 | \keyword{mutational} 59 | \keyword{structural} 60 | \keyword{variants,} 61 | -------------------------------------------------------------------------------- /man/sv.model.view.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sv.model.view.r 3 | \name{sv.model.view} 4 | \alias{sv.model.view} 5 | \title{SV integrated visualization} 6 | \usage{ 7 | sv.model.view( 8 | cnv, 9 | svc, 10 | chrom, 11 | start, 12 | stop, 13 | sampleids = NULL, 14 | cnvlim = c(-2, 2), 15 | addlegend = "both", 16 | cex.legend = 1, 17 | interval = NULL, 18 | addtext = NULL, 19 | cex.text = 0.8, 20 | plot = TRUE, 21 | summary = TRUE, 22 | ... 23 | ) 24 | } 25 | \arguments{ 26 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv} 27 | 28 | \item{svc}{(S4) an object of class svcnvio containing data type 'svc' initialized by validate.svc} 29 | 30 | \item{chrom}{(character) chromosome (e.g chr9)} 31 | 32 | \item{start}{(numeric) genomic coordinate from specified chromosome to start plotting} 33 | 34 | \item{stop}{(numeric) genomic coordinate from specified chromosome to stop plotting} 35 | 36 | \item{sampleids}{(character) a vector containing a list of sample ids represented in svc and/or cnv objects to be plotted} 37 | 38 | \item{cnvlim}{(numeric) limits for color coding of background CNV log-ratios. Use to modify the CNV color contrast at different levels.} 39 | 40 | \item{addlegend}{(character) One of 'sv' (show SV type legend), 'cnv' (show CNV background color legend) or 'both'.} 41 | 42 | \item{cex.legend}{(numeric) The cex values for each legend} 43 | 44 | \item{interval}{(numeric) The axis interval in base pairs} 45 | 46 | \item{addtext}{(character) a vector indicating what SV types should include text labels indicating brakpoint partners genomic locations. The added labels are point breakpoint locations outside the plot area. (e.g. c("TRA","INV") )} 47 | 48 | \item{cex.text}{(numeric) The magnification to be used for SV text info added} 49 | 50 | \item{plot}{(logic) whether to produce a graphical output} 51 | 52 | \item{summary}{(logic) whether the function shoud return CNV segment 'segbrk' and SV 'svbrk' breakpoints tabular output} 53 | 54 | \item{...}{additional plot parameters from graphics plot function} 55 | } 56 | \value{ 57 | a data.frame with CNV and SVN breakpoint annotations and/or plot into open device 58 | } 59 | \description{ 60 | Integrated visualization of SVC and CNV data for defined genomic locations. CNV and SVC data is overlayed into a sample-based track visualization map. 61 | } 62 | \examples{ 63 | 64 | ## validate input data.frames 65 | cnv <- validate.cnv(segdat_lung_ccle) 66 | svc <- validate.svc(svdat_lung_ccle) 67 | 68 | # obtain the coordinates of a desired genomic regionbased on a known gene locus 69 | refSeqGene <- gene.symbol.info(refseq_hg19,"PTPRD") 70 | start <- refSeqGene$start - 150000; 71 | stop <- refSeqGene$stop+ 50000; 72 | chrom <- refSeqGene$chrom 73 | 74 | sv.model.view(cnv, svc, chrom, start, stop) 75 | 76 | } 77 | \keyword{CNV,} 78 | \keyword{segmentation} 79 | \keyword{structural} 80 | \keyword{variant,} 81 | -------------------------------------------------------------------------------- /R/chr.arm.cnv.r: -------------------------------------------------------------------------------- 1 | #' Chromosome arm mean CNV 2 | #' 3 | #' Obtains a matrix with the weighted average CN per chromosome arm 4 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv 5 | #' @param genome.v (character) (hg19 or hg38) reference genome version to draw chromosome limits and centromeres 6 | #' @param verbose (logical) whether to return internal messages 7 | #' @return a matrix of chromosome arms (rows) versus samples (cols) with average segment logRs per cell 8 | #' @keywords CNV, segmentation, chromosome arm 9 | #' @export 10 | #' @examples 11 | #' 12 | #' # initialize CNV data 13 | #' cnv <- validate.cnv(segdat_lung_ccle) 14 | #' 15 | #' arm_mat <- chr.arm.cnv(cnv, genome.v="hg19") 16 | #' dim(arm_mat) 17 | 18 | 19 | chr.arm.cnv <- function(cnv, 20 | genome.v="hg19", 21 | verbose=FALSE){ 22 | 23 | stopifnot(cnv@type == "cnv") 24 | cnvdat <- cnv@data 25 | 26 | if(genome.v %in% c("GRCh37","hg19")){ 27 | bands <- GRCh37.bands 28 | }else if(genome.v %in% c("GRCh38","hg38")){ 29 | bands <- GRCh38.bands 30 | }else{stop("Genome version not provided")} 31 | 32 | centromeres_start <- bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"start"] 33 | centromeres_end <- bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"end"] 34 | names(centromeres_start) <- names(centromeres_end) <- paste("chr",bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"chr"],sep="") 35 | 36 | chr.lim <- chromosome.limit.coords(cnv) 37 | chrarms <- rbind(cbind(chr.lim$begin,centromeres_start[chr.lim$chrom]),cbind(centromeres_end[chr.lim$chrom],chr.lim$end)) 38 | chrarms <- data.table(rownames(chrarms),chrarms,c(paste(chr.lim$chrom,"p",sep=""), paste(chr.lim$chrom,"q",sep=""))) 39 | colnames(chrarms) <- c("chrom","start","end","arm") 40 | 41 | chrarms <- chrarms[which(chrarms$end -chrarms$start > 0),] 42 | 43 | chrarmsGR <- with(chrarms,GRanges(chrom, IRanges(start=start, end=end))) 44 | 45 | cnvdat_gr <- with(cnvdat, GRanges(chrom, IRanges(start=start, end=end))) 46 | hits <- GenomicAlignments::findOverlaps(chrarmsGR,cnvdat_gr) 47 | 48 | armcnvmat <- matrix(ncol=length(unique(cnvdat$sample)), nrow=nrow(chrarms) ) 49 | colnames(armcnvmat) <- unique(cnvdat$sample) 50 | rownames(armcnvmat) <- chrarms$arm 51 | 52 | for(i in unique(queryHits(hits))){ 53 | arm <- chrarms[i,"arm"][[1]] 54 | 55 | if(verbose) cat("\r",arm) 56 | 57 | armdf <- cnvdat[subjectHits(hits)[which(queryHits(hits) == i)],] 58 | armdf[which(armdf$start < chrarms[i,"start"]),"start"] <- chrarms[i,"start"] 59 | armdf[which(armdf$end > chrarms[i,"end"]),"end"] <- chrarms[i,"end"] 60 | 61 | arm.width <- armdf$end - armdf$start 62 | armdf <- data.table(armdf,arm.width) 63 | armlength <- aggregate(arm.width~sample,armdf,sum)[,2] 64 | names(armlength) <- aggregate(arm.width~sample,armdf,sum)[,1] 65 | part <- armdf$segmean * armdf$arm.width / armlength[armdf$sample] 66 | 67 | armdf <- data.table(armdf,arm.width,part,armlength[armdf$sample]) 68 | 69 | meanArmSegment <- aggregate(part~sample,armdf,sum) 70 | 71 | num <- as.numeric(meanArmSegment[,2]) 72 | names(num) <- as.character(meanArmSegment[,1]) 73 | armcnvmat[arm,names(num)] <- num 74 | } 75 | return(armcnvmat) 76 | } 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /man/cnv.break.annot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/break.annot.r 3 | \name{cnv.break.annot} 4 | \alias{cnv.break.annot} 5 | \title{Identification of recurrently altered genes using CNV data 6 | Identify recurrently altered genes by CNV. The function will identify overlaps between genomic features (e.g. genes) and CNV breakpoints. As opposed to 'gene.cnv' function that returns the overal CNV of each gene, this function allows identifying sub-genic events and may help detecting other rearrangements.} 7 | \usage{ 8 | cnv.break.annot( 9 | cnv, 10 | fc.pct = 0.2, 11 | genome.v = "hg19", 12 | genesgr = NULL, 13 | upstr = 150000, 14 | dnstr = 150000, 15 | break.width = 10000, 16 | min.cnv.size = NULL, 17 | min.num.probes = NULL, 18 | low.cov = NULL, 19 | clean.brk = NULL, 20 | verbose = TRUE 21 | ) 22 | } 23 | \arguments{ 24 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv} 25 | 26 | \item{fc.pct}{(numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents a fold change of 0.8 or 1.2.} 27 | 28 | \item{genome.v}{(character): either 'hg19' or 'hg38' accepted; reference genome version to retrieve gene annotations including genomic coordinates and strand} 29 | 30 | \item{genesgr}{(S4) a GenomicRanges object containing gene annotations (if not NULL overides genome.v). It is crutial that the genome version 'genesgr' and the input 'sv' are the same. The GRanges object must contain 'strand' and a metadata field 'gene_id' with unique values. Seqnames are expected in the format (chr1, chr2, ...).} 31 | 32 | \item{upstr}{(numeric) size in base pairs to define gene upstream region onto which breakpoint overlaps will be identified. The strand value, start and stop positions defined in genesgr will be used to create a GRanges object of upstream regions.} 33 | 34 | \item{dnstr}{(numeric) size in base pairs to define gene downstream region onto which breakpoint overlaps will be identified. The strand value, start and stop positions defined in genesgr will be used to create a GRanges object of downstream regions.} 35 | 36 | \item{break.width}{(numeric) maximum breakpoint size to be considered} 37 | 38 | \item{min.cnv.size}{(numeric) The minimun segment size (in base pairs) to include in the analysis} 39 | 40 | \item{min.num.probes}{(numeric) The minimun number of probes per segment to include in the analysis} 41 | 42 | \item{low.cov}{(data.frame) a data.frame (chr, start, end) indicating low coverage regions to exclude from the analysis} 43 | 44 | \item{clean.brk}{(numeric) Identical segments removal when present in above a given number. Identical CNV segments across multiple samples may represent artifact of common germline variants, this is particularly relevant when the segmentation data was generated with a non-paired reference. For paired datasets (e.g. tumor vs. normal) better leave as NULL.} 45 | 46 | \item{verbose}{(logical) whether to return internal messages} 47 | } 48 | \value{ 49 | an instance of the class 'break.annot' containing breakpoint mapping onto genes 50 | } 51 | \description{ 52 | Identification of recurrently altered genes using CNV data 53 | Identify recurrently altered genes by CNV. The function will identify overlaps between genomic features (e.g. genes) and CNV breakpoints. As opposed to 'gene.cnv' function that returns the overal CNV of each gene, this function allows identifying sub-genic events and may help detecting other rearrangements. 54 | } 55 | \examples{ 56 | 57 | # Initialize CNV data 58 | cnv <- validate.cnv(segdat_lung_ccle) 59 | 60 | cnv.break.annot(cnv) 61 | } 62 | \keyword{CNV,} 63 | \keyword{segmentation} 64 | -------------------------------------------------------------------------------- /R/svpluscnv.data.r: -------------------------------------------------------------------------------- 1 | #' Data class refSeqDat 2 | #' 3 | #' Class to store refseq data from UCSC containing exon level info for known transcripts 4 | #' 5 | #' @param data (data.table): transcript information 6 | #' @param exonStarts (list): every transcript exonic start position 7 | #' @param exonStarts (list): every transcript exonic end position 8 | #' @param genome.v (character): the genome version encoding transcript data 9 | #' @return an instance of the class 'refSeqDat' containing transcript exonic coordinates 10 | #' @export 11 | 12 | refSeqDat <- setClass("refSeqDat", representation( 13 | data = "data.table", 14 | exonStarts = "list", 15 | exonEnds= "list", 16 | genome.v="character" 17 | )) 18 | 19 | setMethod("show","refSeqDat",function(object){ 20 | writeLines(paste("An object of class refSeqDat from svpluscnv with ",nrow(object@data),"transcipts from",object@genome.v,"genome version")) 21 | }) 22 | 23 | 24 | #' 25 | #' Return coordinates of an specified gene 26 | #' 27 | #' @param object (refSeqDat) An object of class refSeqDat containing gene transcript mapping. svpluscnv includes two selfloaded objects: refseq_hg19 & refseq_hg38 28 | #' @param symbol (character) a valid HGNC gene symbol included in the refseq object 29 | #' @export 30 | #' @docType methods 31 | #' @return A list containing chr, start, end coordinates 32 | #' @rdname gene.symbol.info-methods 33 | 34 | setGeneric("gene.symbol.info", function(object, symbol) standardGeneric("gene.symbol.info")) 35 | 36 | #' @rdname gene.symbol.info-methods 37 | setMethod("gene.symbol.info", "refSeqDat", function(object, symbol){ 38 | DT <- object@data[which(object@data$name2 == symbol)] 39 | return(list( 40 | chrom = unique(DT$chrom), 41 | start = min(DT$txStart), 42 | stop = max(DT$txEnd) 43 | )) 44 | }) 45 | 46 | 47 | utils::globalVariables(c("refseq_hg19", "refseq_hg38")) 48 | 49 | #' Reference transcript and exon annotations for hg19 50 | #' 51 | #' refSeq annotations for hg19 version from UCSC (http://genome.ucsc.edu/cgi-bin/hgTables) 52 | #' 53 | #' @name refseq_hg19 54 | #' @docType data 55 | #' @keywords genes, transcripts, exons 56 | #' 57 | "refseq_hg19" 58 | 59 | 60 | #' Reference transcript and exon annotations for hg38 61 | #' 62 | #' refSeq annotations for hg38 version from UCSC (http://genome.ucsc.edu/cgi-bin/hgTables) 63 | #' 64 | #' @name refseq_hg38 65 | #' @docType data 66 | #' @keywords genes, transcripts, exons 67 | #' 68 | "refseq_hg38" 69 | 70 | 71 | utils::globalVariables(c("segdat_lung_ccle", "svdat_lung_ccle","cnv_blacklist_regions","nbl_segdat","nbl_svdat")) 72 | 73 | #' Lung CCLE CNV data 74 | #' 75 | #' CCLE CNV segmentation data from LUNG tissue cell lines (DepMap): https://depmap.org/portal/download/ 76 | #' @name segdat_lung_ccle 77 | #' @docType data 78 | #' @keywords CNV segmentation 79 | "segdat_lung_ccle" 80 | 81 | #' Lung CCLE SVC data 82 | #' 83 | #' CCLE translocation data from LUNG tissue cell lines (DepMap): https://depmap.org/portal/download/ 84 | #' @name svdat_lung_ccle 85 | #' @docType data 86 | #' @keywords SVs 87 | "svdat_lung_ccle" 88 | 89 | #' Low coverage regions 90 | #' 91 | #' @name cnv_blacklist_regions 92 | #' @docType data 93 | #' @keywords CNV segmentation 94 | "cnv_blacklist_regions" 95 | 96 | #' TARGET Neuroblastoma CNV 97 | #' 98 | #' TARGET CNV segmentation: https://target-data.nci.nih.gov/ 99 | #' @name nbl_segdat 100 | #' @docType data 101 | #' @keywords CNV segmentation, SVs 102 | "nbl_segdat" 103 | 104 | #' TARGET Neuroblastoma SVC 105 | #' 106 | #' TARGET CGI structural variants: https://target-data.nci.nih.gov/ 107 | #' 108 | #' @name nbl_svdat 109 | #' @docType data 110 | #' @keywords SVs 111 | "nbl_svdat" 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /man/shattered.regions.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/shattered.regions.r 3 | \name{shattered.regions} 4 | \alias{shattered.regions} 5 | \title{Shattered region detection} 6 | \usage{ 7 | shattered.regions( 8 | cnv, 9 | svc, 10 | fc.pct = 0.2, 11 | min.cnv.size = 0, 12 | min.num.probes = 0, 13 | low.cov = NULL, 14 | clean.brk = NULL, 15 | window.size = 10, 16 | slide.size = 2, 17 | num.cnv.breaks = 6, 18 | num.cnv.sd = 5, 19 | num.svc.breaks = 6, 20 | num.svc.sd = 5, 21 | num.common.breaks = 3, 22 | num.common.sd = 3, 23 | maxgap = 10000, 24 | chrlist = NULL, 25 | chr.lim = NULL, 26 | interleaved.cut = 0.33, 27 | dist.iqm.cut = 1e+05, 28 | verbose = TRUE 29 | ) 30 | } 31 | \arguments{ 32 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv} 33 | 34 | \item{svc}{(S4) an object of class svcnvio containing data type 'svc' initialized by validate.svc} 35 | 36 | \item{fc.pct}{(numeric) inherited from cnv.breaks(); copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents a fold change of 0.8 or 1.2} 37 | 38 | \item{min.cnv.size}{(numeric) inherited from cnv.breaks(); The minimun segment size (in base pairs) to include in the analysis} 39 | 40 | \item{min.num.probes}{(numeric) inherited from cnv.breaks(); The minimun number of probes per segment to include in the analysis} 41 | 42 | \item{low.cov}{(data.frame) inherited from cnv.breaks(), svc.breaks() and match.breaks; a data.frame (chr, start, end) indicating low coverage regions to exclude from the analysis} 43 | 44 | \item{clean.brk}{(numeric) inherited from cnv.breaks(); n cutoff for redundant breakpoints to filter out; if NULL, no filter will be applied} 45 | 46 | \item{window.size}{(numeric) size in megabases of the genmome bin to compute break density} 47 | 48 | \item{slide.size}{(numeric) size in megabases of the sliding genmome window} 49 | 50 | \item{num.cnv.breaks}{(numeric) number of segmentation breakpoints per segments to be considered high-density break} 51 | 52 | \item{num.cnv.sd}{(numeric) number of standard deviations above the sample average for num.cnv.breaks} 53 | 54 | \item{num.svc.breaks}{(numeric) number of svc breakpoints per segments to be considered high-density break} 55 | 56 | \item{num.svc.sd}{(numeric) number of standard deviations above the sample average for num.svc.breaks} 57 | 58 | \item{num.common.breaks}{(numeric) number of common SV and segmentation breakpoints per segments to be considered high-density break} 59 | 60 | \item{num.common.sd}{(numeric) number of standard deviations above the sample average for num.common.breaks} 61 | 62 | \item{maxgap}{(numeric) inherited from match.breaks(); sets the maximum gap between co-localizing orthogonal breakpoints} 63 | 64 | \item{chrlist}{(character) vector containing chromosomes to include in the analysis; if NULL all chromosomes available in the input will be included} 65 | 66 | \item{chr.lim}{(data.frame) 3 column table (chrom, begin, end) indicating the chromosome most distal coordinates with coverage. Also returned by the function svpluscnv::chromosome.limit.coords.} 67 | 68 | \item{interleaved.cut}{(numeric) 0-1 value indicating percentage of interleaved (non-contiguous) SV breakpoint pairs} 69 | 70 | \item{dist.iqm.cut}{(numeric) interquantile average of the distance between breakpoints within a shattered region} 71 | 72 | \item{verbose}{(logical)} 73 | } 74 | \value{ 75 | an instance of the class 'chromo.regs' containing breakpoint mapping onto genes 76 | } 77 | \description{ 78 | Caller for the identification of shattered genomic regions based on CNV and SVC data 79 | } 80 | \examples{ 81 | 82 | ## validate input data.frames 83 | cnv <- validate.cnv(segdat_lung_ccle) 84 | svc <- validate.svc(svdat_lung_ccle) 85 | 86 | shattered.regions(cnv,svc) 87 | } 88 | \keyword{chromoplexy,} 89 | \keyword{chromosome} 90 | \keyword{chromothripsis,} 91 | \keyword{shattering} 92 | -------------------------------------------------------------------------------- /R/shattered.map.plot.r: -------------------------------------------------------------------------------- 1 | #' Shattered regions genomic map 2 | #' 3 | #' Plots a genome wide map of shattered region frequencies 4 | #' 5 | #' @param chromo.regs.obj (chromo.regs) An object of class chromo.regs 6 | #' @param conf (character) either 'hc' for high confidence objects or else all included 7 | #' @param genome.v (character) reference genome version to draw chromosome limits and centromeres either hg19 or hg38 accepted 8 | #' @param chrlist (character) vector containing chromosomes to include in the analysis; if NULL all chromosomes available in the input will be included 9 | #' @param freq.cut the value to draw an horizontal line; use 'freq.p.test' to obtain a threshold for statisticaly significant hot spots 10 | #' @param add.legend the position of the legend in the plot; if null, no legend will be draw 11 | #' @return a plot into open device 12 | #' @keywords chromosome shattering, genome map 13 | #' @export 14 | #' @examples 15 | #' 16 | #' 17 | #' ## validate input data.frames 18 | #' cnv <- validate.cnv(segdat_lung_ccle) 19 | #' svc <- validate.svc(svdat_lung_ccle) 20 | #' 21 | #' ## obtain shattered regions 22 | #' chromo.regs.obj <- shattered.regions(cnv,svc) 23 | #' 24 | #' shattered.map.plot(chromo.regs.obj) 25 | 26 | shattered.map.plot <- function(chromo.regs.obj, 27 | conf="hc", 28 | genome.v = "hg19", 29 | chrlist=NULL, 30 | freq.cut=NULL, 31 | add.legend="top"){ 32 | 33 | 34 | if(genome.v %in% c("hg19","GRCh37")){ bands <- GRCh37.bands 35 | }else if(genome.v %in% c("hg38","GRCh38")){ bands <- GRCh38.bands} 36 | 37 | centromeres <- bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"start"] 38 | names(centromeres) <- paste("chr",bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"chr"],sep="") 39 | 40 | chrlengths <- vapply(unique(bands$chr), function(i) max(bands$end[which(bands$chr == i)]), 1) 41 | names(chrlengths) <- paste("chr",unique(bands$chr),sep="") 42 | 43 | if(is.null(chrlist)){ 44 | chrlist <- unique(do.call(rbind,strsplit(colnames(chromo.regs.obj@high.density.regions)," "))[,1]) 45 | } 46 | stopifnot( length(which(!chrlist %in% names(chrlengths))) == 0 ) 47 | 48 | if(conf == "hc") { 49 | highDensitiRegionsFreq <- apply(chromo.regs.obj@high.density.regions.hc,2,sum) 50 | }else{ 51 | highDensitiRegionsFreq <- apply(chromo.regs.obj@high.density.regions,2,sum) 52 | } 53 | 54 | p_chrcols <- rep(c("salmon4","salmon4"),12) 55 | q_chrcols <- rep(c("salmon","salmon"),12) 56 | names(p_chrcols) <- names(q_chrcols) <- chrlist 57 | chrom <- do.call(rbind,strsplit(names(highDensitiRegionsFreq)," "))[,1] 58 | coloresBarplot <- rep("white",length(chrom)) 59 | 60 | parm <- which(as.numeric(do.call(rbind,strsplit(names(highDensitiRegionsFreq)," "))[,3]) - centromeres[chrom] > 0) 61 | qarm <- which(as.numeric(do.call(rbind,strsplit(names(highDensitiRegionsFreq)," "))[,2]) - centromeres[chrom] < 0) 62 | coloresBarplot[parm] <- p_chrcols[names(parm)] 63 | coloresBarplot[qarm] <- q_chrcols[names(qarm)] 64 | 65 | 66 | axislab <- chrstarts<- chrend <- chrlengths[chrlist] 67 | tab <- data.table(do.call(rbind,strsplit(names(highDensitiRegionsFreq)," ")),names(highDensitiRegionsFreq)) 68 | colnames(tab) <- c("chrom","start","end","regid") 69 | tab <- tab[which(tab$chrom %in% chrlist)] 70 | highDensitiRegionsFreq <- highDensitiRegionsFreq[which(tab$chrom %in% chrlist)] 71 | tab$start <-as.numeric(tab$start) 72 | tab$end <-as.numeric(tab$end) 73 | 74 | for(i in unique(tab$chrom)) chrend[i] <- max(tab[which(tab[,1] == i),3]) 75 | for(i in 0:(length(chrend)-1) ) axislab[i+1] <- chrend[i+1]/2 + sum( chrend[0:i]) 76 | for(i in 0:(length(chrend)-1) ) chrstarts[i+1] <- sum(chrend[0:i]) 77 | data <- cbind( (tab$end + tab$start) / 2 + chrstarts[tab$chrom], highDensitiRegionsFreq) 78 | 79 | 80 | altcols <- rep(c(rgb(0.1,0.1,0.1,alpha=0.1),rgb(0.8,0.8,0.8,alpha=0.1)),12) 81 | altcols2<- rep(c(rgb(0.1,0.1,0.1,alpha=1),rgb(0.4,0.4,0.4,alpha=1)),12) 82 | ctrmr <- chrstarts+centromeres[names(chrstarts)] 83 | 84 | plot(data[,1:2],type='h',col=coloresBarplot,xaxt='n',lwd=1.5,ylim=c(0, max(data[,2])+5), 85 | las=1,bty='n',yaxt='n',family="Arial",ylab="",xlab="") 86 | for(i in 1:length(chrstarts) ) rect( chrstarts[i],0,chrstarts[i]+chrlengths[i],1000, col=altcols[i],border=NA ) 87 | mtext(gsub("chr","",names(axislab)),side=1,at=axislab,las=1,col=altcols2,cex=c(rep(1,17),rep(0.8,5),1) ) 88 | if(!is.null(freq.cut)) lines(c(0,chrstarts["chrX"]+chrlengths["chrX"]),c(freq.cut,freq.cut),lty=3,col="black") 89 | axis(2,las=1,pos= 0, cex=1.2) 90 | axis(4,las=1,pos= max(data[,1])+10000, cex=1.2, at=axTicks(2), labels=sprintf("%.2f",axTicks(2)/dim(chromo.regs.obj@high.density.regions)[1]) ) 91 | mtext("Frequency",side=4,line=1.5) 92 | mtext("#samples",side=2,line=1) 93 | if(!is.null(add.legend)) legend(add.legend,c("short (p) arm","long (q) arm"),border=c("salmon","salmon4"),fill=c("salmon","salmon4"),bty='n',ncol=2) 94 | } 95 | 96 | -------------------------------------------------------------------------------- /R/gene.cnv.r: -------------------------------------------------------------------------------- 1 | #' Data class cnvmat 2 | #' 3 | #' Class to store breakpoint annotations 4 | #' 5 | #' @param cnvmat (data.frame): matrix containing average CNV per gene (rows) for each sample (columns) 6 | #' @param genesgr (S4): a GenomicRanges object with genomic feature annotations such as gene coordinates 7 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv 8 | #' @param param (list): 9 | #' @return an instance of the class 'genecnv' containing gene level copy number info 10 | #' @export 11 | 12 | genecnv <- setClass("genecnv", 13 | representation( 14 | cnvmat = "matrix", 15 | genesgr = "GRanges", 16 | cnv = "svcnvio", 17 | param = "list" 18 | )) 19 | 20 | 21 | setMethod("show","genecnv",function(object){ 22 | writeLines(paste("An object of class genecnv from svpluscnv containing gene level CNV data 23 | \nNumber of samples=",ncol(object@cnvmat), 24 | "\nAltered genes=",nrow(object@cnvmat))) 25 | }) 26 | 27 | 28 | #' Gene-level CNV 29 | #' 30 | #' Obtains a gene-level copy number matrix from a segmentation profile. 31 | #' 32 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv 33 | #' @param genome.v (hg19 or hg38) reference genome version to fetch gene annotations when 'genesgr=NULL' 34 | #' @param genesgr (S4) a GenomicRanges object containing genomic feature annotations (if not NULL overides genome.v). It must containg 'strand' and a metadata field 'gene_id' with unique values. Seqnames are expected in the format (chr1, chr2, ...) 35 | #' @param chrlist (character) list of chromosomes to include chr1, chr2, etc... 36 | #' @param fill.gaps (logical) whether to fill the gaps in the segmentation file using gap neighbour segmean average as log ratio 37 | #' @param verbose (logical) 38 | #' @return an instance of the class 'genecnv' containing gene level copy number info 39 | #' @keywords CNV, segmentation, genes 40 | #' @export 41 | #' @examples 42 | #' 43 | #' ## validate input data.frames 44 | #' cnv <- validate.cnv(segdat_lung_ccle) 45 | #' 46 | #' gene.cnv(cnv) 47 | 48 | gene.cnv <- function(cnv, 49 | genome.v="hg19", 50 | genesgr=NULL, 51 | chrlist=NULL, 52 | fill.gaps=FALSE, 53 | verbose=TRUE){ 54 | 55 | stopifnot(cnv@type == "cnv") 56 | cnv2<-cnv 57 | if(fill.gaps) cnv2 <- segment.gap(cnv2, chrlist=chrlist, verbose=verbose) 58 | cnvdat <- cnv2@data 59 | 60 | if(is.null(chrlist)) chrlist <- unique(cnvdat$chrom) 61 | chrlist <- chr.sort(chrlist) 62 | 63 | 64 | 65 | 66 | if(!is.null(genesgr)){ 67 | if(anyDuplicated(genesgr@elementMetadata$gene_id) > 0) stop("The genesgr provided object contains duplicated gene_id values") 68 | }else{ 69 | genesgr <- get.genesgr(genome.v=genome.v) 70 | } 71 | 72 | cnvdat_gr <- with(cnvdat, GRanges(chrom, IRanges::IRanges(start=start, end=end))) 73 | 74 | hits <- GenomicAlignments::findOverlaps(genesgr,cnvdat_gr) 75 | 76 | overlaps_all <- pintersect(genesgr[queryHits(hits),], cnvdat_gr[subjectHits(hits),]) 77 | width_overlap <- width(overlaps_all) 78 | 79 | df <- data.table(cnvdat[subjectHits(hits),c("sample","segmean")],genesgr[queryHits(hits)]@elementMetadata$gene_id,width_overlap) 80 | colnames(df) <- c("sample","segmean","gene_id","width") 81 | 82 | a <- sapply(unique(cnvdat$sample), 83 | function(i) df[sample == i, .(CN=mean(segmean)), by = "gene_id"], 84 | simplify=FALSE) 85 | 86 | newfunc <- function(dfi) { 87 | cn<- dfi$CN 88 | names(cn) <- dfi$gene_id 89 | return(cn) 90 | } 91 | 92 | b<- lapply(a, function(x) newfunc(x)[genesgr@elementMetadata$gene_id] ) 93 | cnvmat <- do.call(cbind,b) 94 | rownames(cnvmat) <- genesgr@elementMetadata$gene_id 95 | 96 | out <- genecnv( 97 | cnvmat=cnvmat, 98 | genesgr=genesgr, 99 | cnv=cnv, 100 | param=list(genome.v=genome.v, 101 | chrlist=chrlist, 102 | fill.gaps=fill.gaps, 103 | verbose=verbose 104 | ) 105 | ) 106 | return(out) 107 | 108 | } 109 | 110 | 111 | 112 | 113 | #' Amplifications and deletions 114 | #' 115 | #' Retrieve amplification and deletion events from a 'genecnv.obj' generated by 'gene.cnv' function 116 | #' 117 | #' @param genecnv.obj (genecnv) an instance of the class 'genecnv' containing gene level copy number info 118 | #' @param logr.cut (numeric) the log-ratio cutoff above which genes are considered amplified (e.g 2 = 8 copies for amplification and 0.5 copies for deep deletions, in diploid regions) 119 | #' @return (list) A list of lists including amplified.list, amplified.rank, deepdel.list and deepdel.rank 120 | #' @keywords CNV, segmentation, genes 121 | #' @export 122 | #' @examples 123 | #' 124 | #' ## validate input data.frames 125 | #' cnv <- validate.cnv(segdat_lung_ccle) 126 | #' 127 | #' genecnv.obj <- gene.cnv(cnv) 128 | #' 129 | #' geneampdel <- amp.del(genecnv.obj, logr.cut = 2) 130 | #' lapply(geneampdel,head) 131 | 132 | amp.del <- function(genecnv.obj, logr.cut=2){ 133 | 134 | amp_list <- apply(genecnv.obj@cnvmat, 1, function(x) names(which(x >= 2))) 135 | amp_list <- amp_list[which(unlist(lapply(amp_list,length)) > 0)] 136 | amp_rank <- sort(unlist(lapply(amp_list,length)),decreasing=TRUE) 137 | 138 | del_list <- apply(genecnv.obj@cnvmat, 1, function(x) names(which(x <= -2))) 139 | del_list <- del_list[which(unlist(lapply(del_list,length)) > 0)] 140 | del_rank <- sort(unlist(lapply(del_list,length)),decreasing=TRUE) 141 | 142 | return(list(amplified.list = amp_list, 143 | amplified.rank = amp_rank, 144 | deepdel.list = del_list, 145 | deepdel.rank = del_rank)) 146 | } 147 | 148 | 149 | -------------------------------------------------------------------------------- /R/freq.p.test.r: -------------------------------------------------------------------------------- 1 | #' Data class null.freq 2 | #' 3 | #' Class to store observed and null distr. as well as ampirical corrected p-values associated with observed values 4 | #' 5 | #' @param freq.cut (numeric): the value from observed distribution that satisfies certain p-value cutoff 6 | #' @param pvalues (numeric): a vector containing the total number of breakpoints in each sample 7 | #' @param observed (numeric): vector of observed distribution 8 | #' @param null (numeric): vector of null distribution 9 | #' @param param (list): a list of parametres provided 10 | #' @return an instance of the class 'freq.cut' 11 | #' @export 12 | null.freq <- setClass("null.freq", representation( 13 | freq.cut = 'numeric', 14 | pvalues = 'numeric', 15 | observed = 'numeric', 16 | null='numeric', 17 | param = 'list' 18 | )) 19 | 20 | 21 | setMethod("show","null.freq",function(object){ 22 | writeLines(paste("An object of class null.freq from svpluscnv")) 23 | }) 24 | 25 | 26 | #' 27 | #' Return frequency threshold from null.freq object 28 | #' 29 | #' @param object (null.freq) An object of class null.freq 30 | #' @return an instance of the class 'chromo.regs' containing breakpoint mapping onto genes 31 | #' @export 32 | #' @docType methods 33 | #' @rdname freq.threshold-methods 34 | 35 | setGeneric("freq.threshold", function(object) standardGeneric("freq.threshold")) 36 | 37 | #' @rdname freq.threshold-methods 38 | 39 | setMethod("freq.threshold", "null.freq", function(object) object@freq.cut) 40 | 41 | 42 | #' Frequency hot spot detection 43 | #' 44 | #' Obtains significance cutoff for the frequency of binary events encoded in a matrix such as that generated by shattered.regions and shattered.regions.cnv algorithms 45 | #' 46 | #' @param mat (numeric matrix) a binary matrix where columns will be tested for their sum value compared to a permutated matrix 47 | #' @param method (character) the method to pass to p.adjust function 48 | #' @param p.cut (numeric) the cutoff for multiple hypothesis corrected p.value 49 | #' @param iter (numeric) Number of iterations to produce null distribution (note that null size will be iter*ncol(mat)) 50 | #' @param zerofreq (logical) whether to remove bins with observed frequency = 0; It is recommended to set to TRUE when the bins span genomic regions of low coverage 51 | #' @param plot (logical) whether to generate a histogram comparing observed and null frequency distributions 52 | #' @param verbose (logical) whether to return messages 53 | #' @return an instance of the class 'freq.cut' 54 | #' @keywords empirical p.value, p.adjust 55 | #' @export 56 | #' @examples 57 | #' 58 | #' ## validate input data.frames 59 | #' cnv <- validate.cnv(segdat_lung_ccle) 60 | #' 61 | #' ## obtain a matrix of genomic bins vs samples indicating high density of breaks 62 | #' shatt.regions <- shattered.regions.cnv(cnv) 63 | #' mat <- shatt.regions@high.density.regions.hc 64 | #' 65 | #' freq.p.test(mat) 66 | 67 | 68 | 69 | freq.p.test <- function(mat, 70 | method="fdr", 71 | p.cut= 0.05, 72 | iter=100, 73 | zerofreq=TRUE, 74 | plot=TRUE, 75 | verbose=FALSE){ 76 | 77 | stopifnot(is.numeric(mat)) 78 | 79 | # obtain a frequency vector 80 | highDensitiBinsFreq <- apply(mat,2,sum) 81 | 82 | if(zerofreq){ 83 | bins.nozero <- names(which(highDensitiBinsFreq > 0)) 84 | mat <- mat[,bins.nozero] 85 | highDensitiBinsFreq <- highDensitiBinsFreq[bins.nozero] 86 | if(verbose) message( paste("Testing ",dim(mat)[2],"non-zero bins in ",dim(mat)[1], "samples") ) 87 | }else{ 88 | if(verbose) message( paste("Testing ",dim(mat)[2],"bins in ",dim(mat)[1], "samples") ) 89 | } 90 | 91 | 92 | # create null distribution by sample shuffling 93 | highDensitiBinsFreqRandomFreq<-list() 94 | for(i in 1:iter){ 95 | highDensitiBinsRandom<- t(apply(mat,1,sample)) 96 | highDensitiBinsFreqRandomFreq[[i]] <- apply(highDensitiBinsRandom,2,sum) 97 | } 98 | highDensitiBinsFreqRandomFreqNull <- unlist(highDensitiBinsFreqRandomFreq) 99 | if(zerofreq)highDensitiBinsFreqRandomFreqNull[which(highDensitiBinsFreqRandomFreqNull == 0)] <- 1 100 | 101 | # obtain the frequency cutoff for statistical significance (e.g. FDR < 0.01) 102 | pvalues <- highDensitiBinsFreq 103 | for(i in 0:max(highDensitiBinsFreq)){ 104 | pvalues[which(highDensitiBinsFreq == i)] <- length(which(highDensitiBinsFreqRandomFreqNull >i))/ length(highDensitiBinsFreqRandomFreqNull) 105 | } 106 | 107 | freq.cut <- min(highDensitiBinsFreq[names(which(p.adjust(pvalues, method=method) < p.cut))]) 108 | 109 | if(plot){ 110 | xstart<- 1 111 | obsd <- highDensitiBinsFreq[which(highDensitiBinsFreq >= xstart)] 112 | nulld<- highDensitiBinsFreqRandomFreqNull[which(highDensitiBinsFreqRandomFreqNull >= xstart)] 113 | 114 | max_freq <- max(as.numeric(names(table(obsd))),as.numeric(names(table(nulld)))) 115 | 116 | h1 <- hist(obsd, breaks=seq(xstart,max_freq,1),plot=FALSE) 117 | h2 <- hist(nulld, breaks=seq(xstart,max_freq,1),plot=FALSE) 118 | 119 | max_density <- max(h1$density,h2$density) 120 | hist(obsd, breaks=seq(xstart,max_freq,1),col='salmon', border=NA, xlim=c(0,max_freq), ylim=c(0,max_density), 121 | las=1,cex.axis=1.4,ylab="",prob = TRUE ,main="",xlab="n samples",right = TRUE) 122 | hist(nulld, breaks=seq(xstart,max_freq,1), add=TRUE,col=scales::alpha('black',.5), 123 | border=NA, prob = TRUE) 124 | legend("topright",c("Observed","Null"), 125 | fill=c('salmon',scales::alpha('black',.5)),border=NA,bty='n',cex=1.1) 126 | lines(c(freq.cut,freq.cut),c(0,max(h1$density)/2)) # cutoff for statistical significance 127 | text(freq.cut+max_freq/25,max(h1$density)/3,paste(method, "<", p.cut),srt=90) 128 | } 129 | 130 | return(null.freq( 131 | freq.cut = freq.cut, 132 | pvalues = pvalues, 133 | observed = highDensitiBinsFreq, 134 | null = highDensitiBinsFreqRandomFreqNull, 135 | param = list(method=method, p.cut= p.cut, iter=iter) 136 | )) 137 | } 138 | 139 | 140 | -------------------------------------------------------------------------------- /R/internal_functions.r: -------------------------------------------------------------------------------- 1 | #' Inter-quantile mean 2 | #' 3 | #' Obtains interquantile mean for a defined 'x' vector and both lower and upper quantiles 4 | #' 5 | #' @param x numeric vector to compute interquantile average 6 | #' @param lowQ lower quantile 7 | #' @param upQ upper quantile 8 | #' @return (numeric) the IQM value 9 | #' @keywords statistics, interquartile 10 | #' @export 11 | #' @examples 12 | #' 13 | #' x <- rnorm(100) 14 | #' IQM(x) 15 | 16 | 17 | IQM <- function(x, lowQ=0.1, upQ=0.9){ 18 | 19 | stopifnot(is.numeric(x)) 20 | 21 | rx <- rank(x,ties.method ='random') 22 | qt1<-quantile(rx,lowQ) 23 | qt2<-quantile(rx,upQ) 24 | 25 | inter_quantile_mean <- mean(x[intersect(which(rx > qt1),which(rx < qt2))]) 26 | 27 | return(inter_quantile_mean) 28 | } 29 | 30 | 31 | #' Inter-quantile standard deviation 32 | #' 33 | #' Obtains inter quantile standard deviation for a defined 'x' vector and both lower and upper quantiles 34 | #' 35 | #' @param x numeric vector to compute interquantile standard deviation 36 | #' @param lowQ lower quantile 37 | #' @param upQ upper quantile 38 | #' @return (numeric) the IQSD value 39 | #' @keywords statistics, interquartile 40 | #' @export 41 | #' @examples 42 | #' 43 | #' x <- rnorm(100) 44 | #' IQSD(x) 45 | 46 | 47 | IQSD <- function(x,lowQ=0.1,upQ=0.9){ 48 | stopifnot(is.numeric(x)) 49 | 50 | rx <- rank(x,ties.method ='random') 51 | qt1<-quantile(rx,lowQ) 52 | qt2<-quantile(rx,upQ) 53 | 54 | inter_quantile_mean <- sd(x[intersect(which(rx > qt1),which(rx < qt2))]) 55 | return(inter_quantile_mean) 56 | 57 | } 58 | 59 | #' Color map from numeric vector 60 | #' 61 | #' Produces a vector of colors based on a given palette. The colors are defined by the inpuit vector 62 | #' 63 | #' @param x numeric vector 64 | #' @param pal color palette 65 | #' @param limits numeric limit fr color mapping 66 | #' @return a color vector graded according to x 67 | #' @keywords color, number 68 | #' @export 69 | #' @examples 70 | #' 71 | #' x <- rnorm(100) 72 | #' x_color <- map2color(x) 73 | #' head(x_color) 74 | 75 | map2color <- function(x, pal=NULL, limits=NULL){ 76 | if(is.null(limits)) limits = range(x) 77 | if(is.null(pal)) pal <- colorRampPalette(c("lightblue","white","salmon"))(256) 78 | return(pal[findInterval(x, seq(limits[1], limits[2], length.out = length(pal)+1), all.inside=TRUE)]) 79 | } 80 | 81 | 82 | 83 | #' Unique random string generator 84 | #' 85 | #' Generates n unique random character strings of a given length. Note that the length must be big enought in order to avoid offsetting the number n of strings requested 86 | #' 87 | #' @param n the number of unique random strings to return 88 | #' @param strlen random string length 89 | #' @return a vector of unique random character strings 90 | #' @keywords random string 91 | #' @export 92 | #' @examples 93 | #' 94 | #' # To ensure reproducibility make sure to set the seed 95 | #' set.seed(123456789) 96 | #' 97 | #' createRandomString(1, 10) 98 | 99 | 100 | createRandomString <- function(n=1, strlen=10){ 101 | 102 | strlenchain <- strlen*n*2 103 | 104 | chain <- paste(sample(c(letters, LETTERS),strlenchain, replace=TRUE),collapse="") 105 | idresult <- strsplit(gsub(paste("(.{",strlen,"})",sep=""), "\\1 ", chain)," ") 106 | 107 | if(anyDuplicated(idresult[[1]]) != 0) stop("Repeated strings were produced; try modifying the 'seed' or increasing 'strlen'") 108 | 109 | return(idresult[[1]][1:n]) 110 | } 111 | 112 | 113 | 114 | #' Chromosome start and end 115 | #' 116 | #' Obtains a chromosome start and end positions from a reference genome version 117 | #' 118 | #' @param genome.v (character) reference genome version to retrieve gene annotations (hg19 or GRCh37 and hg38 or GRCh38) 119 | #' @return (data.table) a table containing start and end positions for each chromosome 120 | #' @keywords CNV, segmentation, genes 121 | #' @export 122 | #' @examples 123 | #' 124 | #' d3gb.chr.lim(genome.v="hg19") 125 | #' 126 | 127 | d3gb.chr.lim <- function(genome.v){ 128 | 129 | stopifnot(genome.v %in% c("hg19","hg38","GRCh37","GRCh38")) 130 | 131 | if(genome.v %in% c("hg19","GRCh37")){ bands <- GRCh37.bands 132 | }else if(genome.v %in% c("hg38","GRCh38")){ bands <- GRCh38.bands} 133 | 134 | ends<- aggregate(end ~ chr, bands, max) 135 | ends<- ends[order(ends$chr),] 136 | ends<- ends[suppressWarnings(order(as.numeric(as.character(ends$chr)) )),] 137 | 138 | chr.lim <- data.table(paste("chr",ends$chr,sep=""),rep(0,length(ends)),ends$end) 139 | colnames(chr.lim) <-c("chrom","begin","end") 140 | 141 | return(chr.lim) 142 | } 143 | 144 | #' Merge two lists 145 | #' 146 | #' Merge of 2 lists into one that contains unique or intersect vectors for each list entry with shared names 147 | #' 148 | #' @param x (list): input list 1 149 | #' @param y (list): input list 2 150 | #' @param fun (character): Either 'unique' or 'intersect' are accepted 151 | #' @return (list) merged list from x and y 152 | #' @keywords merge lists 153 | #' @export 154 | #' @examples 155 | #' 156 | #' x <- sapply(letters[1:10], function(i) sample(1:10)[1:sample(2:10)[1]], simplify=FALSE ) 157 | #' y <- sapply(letters[5:15], function(i) sample(1:10)[1:sample(2:10)[1]], simplify=FALSE ) 158 | #' merge2lists(x,y) 159 | 160 | merge2lists <- function(x,y,fun="unique"){ 161 | 162 | mergedList <- list() 163 | 164 | if(fun == "unique"){ 165 | for(i in unique(c(names(x),names(y)))){ 166 | if(length(y[[i]]) == 0 & length(x[[i]]) > 0){ 167 | mergedList[[i]] <- x[[i]] 168 | }else if(length(y[[i]]) > 0 & length(x[[i]]) == 0){ 169 | mergedList[[i]] <- y[[i]] 170 | }else if(length(y[[i]]) > 0 & length(x[[i]]) > 0){ 171 | mergedList[[i]] <- unique(c(x[[i]],y[[i]])) 172 | } 173 | } 174 | }else if(fun == "intersect"){ 175 | for(i in intersect(names(x),names(y)) ){ 176 | commonElements <- intersect(x[[i]],y[[i]]) 177 | if(length(commonElements) > 0){ 178 | mergedList[[i]] <- commonElements 179 | } 180 | } 181 | }else{ 182 | stop(paste("Unknown function:",fun) ) 183 | } 184 | 185 | return(mergedList) 186 | 187 | } 188 | 189 | 190 | 191 | 192 | -------------------------------------------------------------------------------- /R/clean.cnv.artifact.r: -------------------------------------------------------------------------------- 1 | #' CNV segmentation gap filling 2 | #' 3 | #' Fills the gaps in a segmentation data.frame. Chromosome limits are defined for the complete segmentation dataset then segments fill the missing terminal regions. 4 | #' The CN log-ratio of the added segments is set to the average of the closest neighbours in each sample. 5 | #' 6 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv 7 | #' @param minsize (numeric) the minimum gap size required to fill the gap 8 | #' @param chrlist (character) list of chromosomes to include chr1, chr2, etc... 9 | #' @param verbose (logical) whether to return internal messages 10 | #' @return a data.frame containing CNV data 11 | #' @keywords CNV, segmentation 12 | #' @export 13 | #' @examples 14 | #' 15 | #' ## validate input data.frames 16 | #' cnv <- validate.cnv(segdat_lung_ccle) 17 | #' 18 | #' cnv2 <- segment.gap(cnv) 19 | #' cnv2 20 | 21 | segment.gap <- function(cnv, 22 | minsize=5000, 23 | chrlist=NULL, 24 | verbose=FALSE){ 25 | 26 | stopifnot(cnv@type == "cnv") 27 | cnvdat <- cnv@data 28 | 29 | chrlims <- chromosome.limit.coords(cnv) 30 | if(is.null(chrlist)) chrlist <- chrlims$chrom 31 | 32 | chrlims_df<- data.frame(chrlims) 33 | rownames(chrlims_df) <- chrlims_df$chrom 34 | 35 | cnvdat_df <- data.frame(cnvdat) 36 | 37 | if(verbose){ 38 | message("Filling gaps is the segmentation data.frame") 39 | pb <- txtProgressBar(style=3) 40 | cc <-0 41 | tot <- nrow(cnvdat_df) 42 | } 43 | newsegments<-list() 44 | if(cnvdat_df[1,"start"] > chrlims_df[cnvdat_df[1,"chrom"],"begin"]){ 45 | newsegments[["1"]] <- data.frame(cnvdat_df[1,c("sample","chrom")],chrlims_df[cnvdat_df[1,"chrom"],"begin"],cnvdat_df[1,"start"]-1,0,cnvdat_df[1,"segmean"]) 46 | } 47 | 48 | for(i in 2:nrow(cnvdat_df)){ 49 | if(cnvdat_df[i,"chrom"] == cnvdat_df[i-1,"chrom"] ){ 50 | if( cnvdat_df[i,"start"] - cnvdat_df[i-1,"end"] > minsize){ 51 | newsegments[[as.character(i)]] <- data.frame(cnvdat_df[i,c("sample","chrom")],cnvdat_df[i-1,"end"]+1,cnvdat_df[i,"start"]-1,0,mean(cnvdat_df[c(i,i-1),"segmean"]) ) 52 | } 53 | }else{ 54 | if(cnvdat_df[i,"start"] > chrlims_df[cnvdat_df[i,"chrom"],"begin"]){ 55 | newsegments[[as.character(i)]] <- data.frame(cnvdat_df[i,c("sample","chrom")],chrlims_df[cnvdat_df[i,"chrom"],"begin"],cnvdat_df[i,"start"]-1,0,cnvdat_df[i,"segmean"]) 56 | } 57 | if(cnvdat_df[i-1,"end"] < chrlims_df[cnvdat_df[i-1,"chrom"],"end"]){ 58 | newsegments[[as.character(i)]] <- data.frame(cnvdat_df[i-1,c("sample","chrom")],cnvdat_df[i-1,"end"]+1,chrlims_df[cnvdat_df[i-1,"chrom"],"end"],0,cnvdat_df[i,"segmean"]) 59 | } 60 | } 61 | if(verbose) cc <- cc+1 62 | if(verbose) setTxtProgressBar(pb, cc/tot) 63 | } 64 | if(cnvdat_df[i,"end"] < chrlims_df[cnvdat_df[i,"chrom"],"end"]) newsegments[[as.character(i)]] <- data.frame(cnvdat_df[i,c("sample","chrom")],cnvdat_df[i,"end"]+1,chrlims_df[cnvdat_df[i,"chrom"],"end"],0,cnvdat_df[i-1,"segmean"]) 65 | if(verbose) close(pb) 66 | 67 | newsegments <- lapply(newsegments, setNames, colnames(cnvdat_df)[1:6]) 68 | 69 | segout <- rbind(cnvdat_df[,1:6], do.call(rbind,newsegments)) 70 | out <- validate.cnv(segout) 71 | 72 | return(out) 73 | } 74 | 75 | 76 | #' CNV artifact detection and filtering 77 | #' 78 | #' Detects identical or near-identical CNV segments across multiple samples susceptible of representing common variants or technical artifacts. Then those segments CNV log-ratio is replaced by the flanking segments average 79 | #' 80 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv 81 | #' @param n.reps (numeric) number of samples with identical segment to consider artifact 82 | #' @param cnv.size (numeric) only smaller segments will be modified in the cnv data.frame 83 | #' @param pc.overlap (numeric) minimun percentage overlap for a pair of segments to be consider identical 84 | #' @param fill.gaps (logical) whether to fill gaps from the segmentaed file after filtering artifacts 85 | #' @param minsize (numeric) the minimum gap size required to fill the gap. Only used if 'fill.gaps=TRUE' 86 | #' @param verbose (logical) whether to print internal messages 87 | #' @return a data.frame containing CNV data 88 | #' @keywords CNV, segmentation, filter 89 | #' @export 90 | #' @examples 91 | #' 92 | #' ## validate input data.frame 93 | #' cnv <- validate.cnv(segdat_lung_ccle) 94 | #' 95 | #' cnvcl <- clean.cnv.artifact(cnv) 96 | #' cnvcl 97 | 98 | clean.cnv.artifact<- function(cnv, 99 | n.reps=4, 100 | cnv.size=2000000, 101 | pc.overlap=0.99, 102 | fill.gaps=TRUE, 103 | minsize=5000, 104 | verbose=TRUE){ 105 | 106 | stopifnot(cnv@type == "cnv") 107 | cnvdat <- cnv@data 108 | 109 | all_artifacts_l <-list() 110 | 111 | cnvdat_short <- cnvdat[which(cnvdat$end - cnvdat$start < cnv.size),] 112 | 113 | for(chr in unique(cnvdat$chrom)){ 114 | 115 | if(verbose) cat("\r",chr) 116 | 117 | segchr <- cnvdat_short[which(cnvdat_short$chrom == chr),] 118 | segchr.gr <- with(segchr, GRanges(chrom, IRanges(start=start, end=end))) 119 | hits = GenomicAlignments::findOverlaps(segchr.gr,segchr.gr) 120 | overlaps <- pintersect(segchr.gr[queryHits(hits)], segchr.gr[subjectHits(hits)]) 121 | 122 | percentOverlapA <- width(overlaps) / width(segchr.gr[queryHits(hits)]) 123 | percentOverlapB <- width(overlaps) / width(segchr.gr[subjectHits(hits)]) 124 | hits_p <- as.data.frame(hits[intersect(which(percentOverlapA >= pc.overlap),which(percentOverlapB >= pc.overlap)),]) 125 | reps <- aggregate(subjectHits~queryHits,hits_p,paste,simplify=FALSE) 126 | reps_list <- reps$subjectHits 127 | names(reps_list) <- reps$queryHits 128 | reps_list_collapse <- lapply(lapply(reps_list,sort),paste,collapse=" ") 129 | groups_a <- table(unlist(reps_list_collapse)) 130 | all_artifacts <- as.numeric(unlist(strsplit(names(which(groups_a > n.reps))," "))) 131 | all_artifacts_l[[chr]] <- segchr[all_artifacts,] 132 | } 133 | 134 | all_artifacts <- do.call(rbind,unname(all_artifacts_l)) 135 | toremove <- unite(all_artifacts, "newcol", c("sample","chrom","start","end"), remove=FALSE,sep=":")$newcol 136 | allsegids <- unite(cnvdat, "newcol", c("sample","chrom","start","end"), remove=FALSE,sep=":")$newcol 137 | cnvdat_clean <- svcnvio(data = cnvdat[which(!allsegids %in% toremove),],type = "cnv") 138 | 139 | if(fill.gaps){ 140 | segclean_fill <- segment.gap(cnvdat_clean, minsize=minsize, verbose=verbose) 141 | return(segclean_fill) 142 | }else{ 143 | return(cnvdat_clean) 144 | } 145 | 146 | } 147 | 148 | 149 | 150 | -------------------------------------------------------------------------------- /R/validate.input.data.r: -------------------------------------------------------------------------------- 1 | #' Data class svcnvio 2 | #' 3 | #' Class to store CNV segmentation data 4 | #' 5 | #' @param data (data.table): cnv or svc data.table to be validated by 'validate.cnv' or 'validate.svc' respectivelly 6 | #' @param type (character): the data type "cnv" or "svc" defined by "validate.cnv" or "validate.svc" respectivelly 7 | #' @seealso Additional data format information in the man pages of validate.cnv and validate.svc 8 | #' @return an instance of the class 'svcnvio' containing SV data derived from CNV or SVC data types; A unique id (uid) column is also added 9 | #' @export 10 | 11 | svcnvio <- setClass("svcnvio", representation( 12 | data = "data.table", 13 | type = "character" 14 | )) 15 | 16 | setMethod("show","svcnvio",function(object){ 17 | writeLines(paste("An object of class svcnvio from svpluscnv storing",object@type,"data from",length(unique(object@data$sample)),"samples")) 18 | }) 19 | 20 | #' Initialization of SVC data 21 | #' 22 | #' This function validates and reformats the SV (structural variant) calls input. It is used internaly by 'svpluscnv' functions that require this type of data. 23 | #' A few formatting rules are enforced: 24 | #' 1) The input must obtain 8 columns in the following order(sample ID, chromosome of origin, strand of origin, position of origin,, chromosome of destination, strand of destination, position of destination, SV class) 25 | #' 2) SV classes accepted: DEL(deletion), DUP(duplication), INS(insertion), TRA(translocation), INV(inversion) and BND(break end) 26 | #' 3) Any variant in which chromosome of origin and destination differ are encoded as TRA (translocation) 27 | #' 4) pos1 < pos2 is enforced for all variants in which chromosome of origin and destination are the same 28 | #' 5) The class BND can be used to operate with complex events as long as both break ends are the same chromosome 29 | #' 30 | #' @param sv.df (data.frame) structural variant table including the following fields: sample, chrom1, pos1, strand1, chrom2, pos2, strand2, svclass 31 | #' @return an instance of the class 'svcnvio' containing SV data derived from SVC data type; A unique id (uid) column is also added 32 | #' @keywords SV, structural variants 33 | #' @export 34 | #' @examples 35 | #' 36 | #' validate.svc(svdat_lung_ccle) 37 | 38 | 39 | validate.svc <- function(sv.df){ 40 | 41 | stopifnot(ncol(sv.df) >= 8) 42 | uid <- paste("svc_",createRandomString(nrow(sv.df),10),sep="") 43 | svc <- data.table(remove.factors(sv.df[,1:8]),uid) 44 | 45 | colnames(svc) <- c("sample","chrom1","pos1","strand1","chrom2","pos2","strand2","svclass","uid") 46 | if(length(grep("chr",svc[1]$chrom1)) == 0) svc$chrom1 <- paste("chr",svc$chrom1,sep="") 47 | if(length(grep("chr",svc[1]$chrom2)) == 0) svc$chrom2 <- paste("chr",svc$chrom2,sep="") 48 | 49 | stopifnot(is.numeric(svc$pos1)) 50 | stopifnot(is.numeric(svc$pos2)) 51 | stopifnot(is.character(svc$chrom1)) 52 | stopifnot(is.character(svc$chrom2)) 53 | stopifnot(is.character(svc$sample)) 54 | 55 | svc[grep("INV",svc$svclass)]$svclass <- "INV" 56 | svc[grep("DUP",svc$svclass)]$svclass <- "DUP" 57 | 58 | extrachr <- which(unlist(lapply(apply(svc[,c("chrom1","chrom2")],1,unique),length)) == 2) 59 | svc[extrachr]$svclass <- "TRA" 60 | 61 | wrong_class <- setdiff(unique(svc$svclass),c("DEL","DUP","TRA","INV","INS","BND")) 62 | try(if(length(wrong_class) > 0) message(paste("SV classes not accepted:", paste(wrong_class,collapse=","), "will be set as BND") )) 63 | svc[which(!svc$svclass %in% c("DEL","DUP","TRA","INV","INS","BND"))]$svclass <- "BND" 64 | 65 | # ensure that pos1 is upstream pos2 66 | intrachr <- which(unlist(lapply(apply(svc[,c("chrom1","chrom2")],1,unique),length)) == 1) 67 | intrachr_rev <- intersect(which(svc$pos2 -svc$pos1 < 0),intrachr) 68 | 69 | 70 | if(length(intrachr_rev) > 0){ 71 | svcrev <- svc[intrachr_rev,c(1,2,6,7,5,3,4,8,9)] 72 | colnames(svcrev) <- c("sample","chrom1","pos1","strand1","chrom2","pos2","strand2","svclass","uid") 73 | svc <- rbind(svcrev,svc[setdiff(1:nrow(svc),intrachr_rev)]) 74 | } 75 | 76 | stopifnot(nrow(svc) > 0) 77 | 78 | return(svcnvio( 79 | data=svc, 80 | type="svc" 81 | )) 82 | 83 | } 84 | 85 | #' Chromosome ordering 86 | #' 87 | #' A function to order a list of chromosomes 88 | #' 89 | #' @param chrlist (character): a vector containing chromosome names (chr1, chr2...chrX,chrY ) 90 | #' @return a character vector of sorted chromosomes 91 | #' @keywords CNV, segmentation, genes 92 | #' @export 93 | #' @examples 94 | #' 95 | #' chrlist <- paste("chr",c("X","Y",sample(1:22)),sep="") 96 | #' chr_sorted <- chr.sort(chrlist) 97 | 98 | 99 | chr.sort <- function(chrlist){ 100 | chrunique <- sort(gsub("chr","",unique(chrlist))) 101 | chrsort <- paste("chr",chrunique[suppressWarnings(order(as.numeric(chrunique) ))],sep="") 102 | return(chrsort) 103 | } 104 | 105 | 106 | #' Initialization of CNV data 107 | #' 108 | #' This function validates and reformats the CNV segmentation data type containing copy number log-ratios. It is used internaly by 'svpluscnv' functions that require this type of data. 109 | #' 110 | #' @param cnv.df (data.frame) segmentation data with at least 6 columns: sample, chromosome, start, end, probes, segment_mean 111 | #' @return an instance of the class 'svcnvio' containing segmentation data derived from CNV data type; A unique id (uid) column is also added 112 | #' @keywords CNV, segmentation 113 | #' @export 114 | #' @examples 115 | #' 116 | #' validate.cnv(segdat_lung_ccle) 117 | 118 | 119 | validate.cnv <- function(cnv.df){ 120 | 121 | stopifnot(ncol(cnv.df) >= 6) 122 | uid <- paste("cnv_",createRandomString(nrow(cnv.df),10),sep="") 123 | cnvdat <- data.table(cnv.df[,1:6],uid) 124 | 125 | colnames(cnvdat) <- c("sample","chrom","start","end","probes","segmean","uid") 126 | if(length(grep("chr",cnvdat[1,2])) == 0) cnvdat[,"chrom"] <- paste("chr",cnvdat$chrom,sep="") 127 | stopifnot(is.numeric(cnvdat$start)) 128 | stopifnot(is.numeric(cnvdat$end)) 129 | stopifnot(is.numeric(cnvdat$segmean)) 130 | stopifnot(is.character(cnvdat$sample)) 131 | stopifnot(is.character(cnvdat$chrom)) 132 | 133 | chrlist <- chr.sort(unique(cnvdat$chrom)) 134 | 135 | cnvdat <- cnvdat[order(cnvdat$start),] 136 | cnvdat <- cnvdat[order(match(cnvdat$chrom, chrlist)),] 137 | cnvdat <- cnvdat[order(cnvdat$sample),] 138 | 139 | stopifnot(nrow(cnvdat) > 0) 140 | 141 | return(svcnvio( 142 | data=cnvdat, 143 | type="cnv" 144 | )) 145 | 146 | } 147 | 148 | 149 | #' Chromosome limit map 150 | #' 151 | #' Obtain chromosome start and end positions based on mapped regions from CNV segmentation data 152 | #' 153 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv 154 | #' @keywords CNV, segmentation, mapping 155 | #' @return data.table indicating start and end mapped positions of each chromosome 156 | #' @export 157 | #' @examples 158 | #' 159 | #' ## validate input data.frame 160 | #' cnv <- validate.cnv(segdat_lung_ccle) 161 | #' 162 | #' chr.lim <- chromosome.limit.coords(cnv) 163 | 164 | chromosome.limit.coords <- function(cnv){ 165 | 166 | stopifnot(cnv@type == "cnv") 167 | cnvdat <- cnv@data 168 | 169 | chrlist <- chr.sort(unique(cnvdat$chrom)) 170 | chrmin <- chrmax <- list() 171 | for(chr in chrlist){ 172 | if(chr %in% cnvdat$chrom){ 173 | chrmin[[chr]] <- min(cnvdat[which(cnvdat$chrom == chr)]$start) 174 | chrmax[[chr]] <- max(cnvdat[which(cnvdat$chrom == chr)]$end) 175 | } 176 | } 177 | begin <- unlist(chrmin) 178 | end <- unlist(chrmax) 179 | chr.lim <- data.table(chrlist,begin,end) 180 | colnames(chr.lim) <- c("chrom","begin","end") 181 | return(chr.lim) 182 | } 183 | 184 | 185 | 186 | 187 | -------------------------------------------------------------------------------- /R/gene.track.view.r: -------------------------------------------------------------------------------- 1 | #' Gene track visualization 2 | #' 3 | #' Creates a track visualization of a genomic region defined by gene boundaries or custom provided 4 | #' 5 | #' @param chrom (character) Chromosome (e.g. chr9) 6 | #' @param start (numeric) Genomic coordinate from specified chromosome to start plotting 7 | #' @param stop (numeric) Genomic coordinate from specified chromosome to stop plotting 8 | #' @param symbol (character) Gene acceoted hgnc symbol to retrieve coordinates and area plotting () 9 | #' @param upstr (numeric) Distance upstream specified gene to extend the area plotted 10 | #' @param dnstr (numeric) Distance downstream specified gene to extend the area plotted 11 | #' @param genome.v (character) Reference genome version to draw chromosome limits and centromeres (hg19 or hg38) 12 | #' @param addtext (logic) Whether to include transcript RefSeq ids in the plot 13 | #' @param cex.text (numeric) The magnification to be used for transcript RefSeq text added 14 | #' @param plot (logic) Whether to generate plot in open device 15 | #' @param summary (logic) Whether to produce a data.table output with transcript information 16 | #' @return A data.frame with gene isoform annotations and/or plot into open device 17 | #' @param ... Additional graphical parameters 18 | #' @keywords CNV, segmentation 19 | #' @export 20 | #' @examples 21 | #' 22 | #' # obtain the coordinates of a desired genomic regionbased on a known gene locus 23 | #' refSeqGene <- gene.symbol.info(refseq_hg19,"PTPRD") 24 | #' chrom <- refSeqGene$chrom 25 | #' start <- refSeqGene$start - 150000; 26 | #' stop <- refSeqGene$stop + 50000; 27 | #' 28 | #' gene.track.view(symbol="PTPRD", genome.v="hg19") 29 | 30 | 31 | gene.track.view <- function(chrom=NULL, start=NULL, stop=NULL, 32 | symbol=NULL,upstr=NULL,dnstr=NULL, 33 | genome.v="hg19", 34 | cex.text=0.6, 35 | addtext=TRUE, 36 | plot = TRUE, 37 | summary=TRUE, 38 | ...){ 39 | 40 | if(genome.v %in% c("hg19","GRCh37")){ 41 | refseq <- refseq_hg19 42 | refseq@data <- refseq@data[order(refseq@data$txStart)] 43 | refseq_gr <- with(refseq@data, GRanges(chrom, IRanges(start=txStart, end=txEnd), symbol=name2,transcript=name)) 44 | }else if(genome.v %in% c("hg38","GRCh38")){ 45 | refseq <- refseq_hg38 46 | refseq@data <- refseq@data[order(refseq@data$txStart),] 47 | refseq_gr <- with(refseq@data, GRanges(chrom, IRanges(start=txStart, end=txEnd), symbol=name2,transcript=name)) 48 | }else{stop("Unspecified, or non available genome")} 49 | 50 | # define genomic region to plot 51 | if(!is.null(symbol) && symbol %in% refseq@data$name2){ 52 | isonames <- refseq@data$name2[which(refseq@data$name2 == symbol)] 53 | names(isonames) <- refseq@data$name[which(refseq@data$name2 == symbol)] 54 | 55 | strand <- refseq@data$strand[which(refseq@data$name2 == symbol)][1] 56 | if(is.null(upstr)) upstr= 10000 57 | if(is.null(dnstr)) dnstr= 5000 58 | chrom <- unique(refseq@data$chrom[which(refseq@data$name2 == symbol)]) 59 | if(strand == "-"){ 60 | start <- min(refseq@data$txStart[which(refseq@data$name2 == symbol)]) - dnstr 61 | stop <- max(refseq@data$txEnd[which(refseq@data$name2 == symbol)]) + upstr 62 | }else{ 63 | start <- min(refseq@data$txStart[which(refseq@data$name2 == symbol)]) - upstr 64 | stop <- max(refseq@data$txEnd[which(refseq@data$name2 == symbol)]) + dnstr 65 | } 66 | }else if(!is.null(chrom) && !is.null(start) && !is.null(stop)){ 67 | coordgr <- with(data.frame(chrom,start,stop), GRanges(chrom, IRanges(start=start, end=stop))) 68 | isonames <- refseq_gr[queryHits(GenomicAlignments::findOverlaps(refseq_gr,coordgr))]@elementMetadata$symbol 69 | names(isonames) <- refseq_gr[queryHits(GenomicAlignments::findOverlaps(refseq_gr,coordgr))]@elementMetadata$transcript 70 | if(length(isonames) == 0) stop("There is no transcripts in specified coordinates!") 71 | }else{ 72 | stop("Genomic coordinates or a valid hgnc gene symbol must be provided!") 73 | } 74 | 75 | isonames_list <- sapply(unique(isonames), function(i) names(which(isonames==i)),simplify = FALSE) 76 | exons_coord <- sapply(names(isonames), function(i) cbind(refseq@exonStarts[[i]],refseq@exonEnds[[i]]) ,simplify = FALSE) 77 | refseq_df <- refseq@data[which(refseq@data$name %in% names(exons_coord)),] 78 | rownames(refseq_df) <- names(exons_coord) 79 | if(plot){ 80 | geneRanges <- t(sapply(names(isonames), function(i) 81 | c(min(refseq@data$txStart[which(refseq@data$name == i)]),max(refseq@data$txEnd[which(refseq@data$name == i)])))) 82 | 83 | hits <- findOverlaps(IRanges(geneRanges[,1],geneRanges[,2])) 84 | hitsNames <- data.frame(names(isonames)[queryHits(hits)],names(isonames)[subjectHits(hits)]) 85 | maxOverlaps <- max(table(hitsNames[,1])) 86 | 87 | ylimit <- 1 + 0.5*maxOverlaps 88 | plot(x=NULL,y=NULL,xlim=range(c(start,stop)),ylim=range(c(-1.5,ylimit)), 89 | xaxt='n',yaxt='n',xlab='',ylab='',bty='n',...) 90 | 91 | 92 | rect(-1e6,0,1e16,10000,col = "grey90") 93 | 94 | seqYpos <- rep(seq(0.1,ylimit,ylimit/maxOverlaps ),length(isonames_list)) +0.5 95 | isoct <- 0 96 | 97 | for(gene in names(isonames_list)){ 98 | 99 | for(iso in isonames_list[[gene]]){ 100 | isoct <- isoct +1 101 | ypos <- seqYpos[isoct] 102 | refseq_iso <- refseq_df[which(refseq_df$name == iso)] 103 | 104 | iso_length <- refseq_iso$txEnd - refseq_iso$txStart 105 | plot_length <- stop-start 106 | narrows <- ceiling(20*iso_length/plot_length) 107 | arrow_x <- seq(refseq_iso$txStart,refseq_iso$txEnd , iso_length/narrows) 108 | 109 | strandpos <- exons_coord[[iso]][1,1]-(stop-start)/100 110 | if(refseq_iso$strand == "-" ){ 111 | points(strandpos, ypos, pch="-", col="red") 112 | arrows(arrow_x[2:(narrows+1)]+plot_length/200,rep(ypos,narrows), arrow_x[1:narrows],rep(ypos,narrows),length=0.1) 113 | }else if(refseq_iso$strand == "+" ){ 114 | points(strandpos,ypos,pch="+",cex=1,col="blue") 115 | arrows(arrow_x[1:narrows]-plot_length/200,rep(ypos,narrows),arrow_x[2:(narrows+1)],rep(ypos,narrows),length=0.1) 116 | } 117 | 118 | lines(matrix(c(refseq_iso$txStart,refseq_iso$txEnd, ypos, ypos), 2, 2), lwd=2) 119 | 120 | bordercolor <- "black"; bgcolor<-"grey" 121 | for(i in 1:nrow(exons_coord[[iso]])){ 122 | polygon(rbind( 123 | c(exons_coord[[iso]][i,1],ypos+0.2), 124 | c(exons_coord[[iso]][i,1],ypos-0.2), 125 | c(exons_coord[[iso]][i,2],ypos-0.2), 126 | c(exons_coord[[iso]][i,2],ypos+0.2) 127 | ),lwd=1,col=bgcolor,border=bordercolor) 128 | } 129 | if(addtext){ 130 | text(refseq_iso$txEnd,ypos,label=iso,cex=cex.text,pos=4) 131 | } 132 | } 133 | } 134 | 135 | interval <- round((stop - start)/5000) * 1000 136 | xlabs <- seq(floor(start/10000)*10000, ceiling(stop/10000)*10000,interval) 137 | axis(1, at = xlabs, lwd.ticks=1.5 ,pos=0, ...) 138 | mtext(gsub("chr","Chr ",chrom),side=2,las=1,...) 139 | 140 | } 141 | if(summary){ 142 | return( 143 | refSeqDat(data=refseq@data[which(refseq@data$name %in% unlist(isonames_list))], 144 | exonStarts = refseq@exonStarts[unlist(isonames_list)], 145 | exonEnds = refseq@exonEnds[unlist(isonames_list)], 146 | genome.v=genome.v) 147 | ) 148 | } 149 | } 150 | 151 | -------------------------------------------------------------------------------- /R/cnv.freq.plot.r: -------------------------------------------------------------------------------- 1 | #' Data class cnvfreq 2 | #' 3 | #' Class to store breakpoint annotations in association with genomic features (e.g. gene loci) 4 | #' 5 | #' @param freqsum (data.table): the frequency of gains and losses in each defined genomic bin 6 | #' @param chrlimits (data.frame): a table containing the chromosome limit coordinates and global genomic coordinates 7 | #' @param bin.mat (numeric): a matrix of genomic bins versus samples 8 | #' @param plot (graphical): a recorded plot object 9 | #' @param param (list): a list of parametres provided 10 | #' @return an instance of the class 'cnvfreq' 11 | #' @export 12 | 13 | cnvfreq <- setClass("cnvfreq", representation( 14 | freqsum = "data.table", 15 | chrlimits = "data.frame", 16 | bin.mat = "matrix", 17 | plot = "recordedplot", 18 | param = "list" 19 | )) 20 | 21 | 22 | setMethod("show","cnvfreq",function(object){ 23 | writeLines(paste("An object of class cnvfreq from svpluscnv containing the following stats: 24 | \nNumber of samples=",ncol(object@bin.mat), 25 | "\nNumber of genomic bins =",nrow(object@bin.mat))) 26 | }) 27 | 28 | 29 | #' CNV frequency map 30 | #' 31 | #' Creates a map of CNVs using genome binning and plots CNV frequency across the genome. This function optionally returns text, graphical or both outputs. 32 | #' Additionaly, calculates the proportion of samples with a given percentage of chromosome arm gained/lost 33 | #' 34 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv 35 | #' @param fc.pct (numeric) percentage CNV gain/loss for a segment to be considered changed (i.e. 0.2 = 20 percent change 0.8 < segmean && segmean > 1.2) 36 | #' @param genome.v (character) (hg19 or h38) reference genome version to draw chromosome limits and centromeres 37 | #' @param ploidy (logical) whether to apply ploidy correction; the function med.segmean will be used to obtain each sample's ploidy logR then this value substracted to each sample's logR values 38 | #' @param g.bin (numeric) size in megabases of the genmome bin to compute break density 39 | #' @param sampleids (character) vector containing list of samples to include in plot. if set to NULL, all samples in the input will be used 40 | #' @param cex.axis,cex.lab,label.line (numeric) plot parameters 41 | #' @param plot (logical) whether produce a graphical output 42 | #' @param verbose (logical) whether to return internal messages 43 | #' @return an instance of the class 'cnvfreq' and optionally a plot into open device 44 | #' @keywords CNV, segmentation, plot 45 | #' @export 46 | #' @examples 47 | #' 48 | #' ## validate input data.frame 49 | #' cnv <- validate.cnv(nbl_segdat) 50 | #' 51 | #' cnv.freq(cnv, genome.v = "hg19") 52 | 53 | cnv.freq <- function(cnv, 54 | fc.pct= 0.2, 55 | genome.v= "hg19", 56 | ploidy=FALSE, 57 | g.bin= 1, 58 | sampleids=NULL, 59 | cex.axis= 1, 60 | cex.lab= 1, 61 | label.line= -1.2, 62 | plot=TRUE, 63 | verbose=TRUE){ 64 | 65 | stopifnot(cnv@type == "cnv") 66 | cnvdat <- cnv@data 67 | 68 | if(!is.null(sampleids)) cnvdat <- cnvdat[which(cnvdat$sample %in% sampleids),] 69 | 70 | if(ploidy){ 71 | ploidy_val <- med.segmean(cnv) 72 | cnvdat$segmean <- cnvdat$segmean - ploidy_val[cnvdat$sample] 73 | } 74 | 75 | stopifnot(genome.v %in% c("hg19","hg38","GRCh37","GRCh38")) 76 | if(genome.v %in% c("hg19","GRCh37")){ bands <- GRCh37.bands 77 | }else if(genome.v %in% c("hg38","GRCh38")){ bands <- GRCh38.bands} 78 | 79 | centromeres <- bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"start"] 80 | names(centromeres) <- paste("chr",bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"chr"],sep="") 81 | 82 | # define chromosome mapped limits and the global genome coordinates for each chromosome start 83 | chrlimits <- chromosome.limit.coords(cnv) 84 | offset <- c(0,vapply(seq_len(nrow(chrlimits)-1), 85 | function(i) sum(chrlimits[seq_len(i),"end"]) + i*g.bin,1)) 86 | chrlabelpos <- offset + chrlimits$end/2 87 | chrlimits <- data.frame(offset,as.data.frame(chrlimits),chrlabelpos) 88 | rownames(chrlimits) <- chrlimits$chrom 89 | 90 | g.bin.mb <- g.bin*1e6 91 | 92 | if(verbose) message("Generating binned genome map ") 93 | 94 | chrbins <- list() 95 | 96 | for(chr in rownames(chrlimits)){ 97 | seqpos <- seq(chrlimits[chr,"begin"],chrlimits[chr,"end"]+g.bin.mb,g.bin.mb) 98 | ranges <- t( vapply(seq(2,length(seqpos)), function(i) c(seqpos[i-1],seqpos[i]),double(2)) ) 99 | chrcol<- rep(chr,length(seqpos)-1) 100 | segcol_del <- segcol_gain <- rep("grey",length(chrcol)) 101 | segcol_del[which(ranges[,2] <= centromeres[chr])] <- "lightblue" 102 | segcol_del[which(ranges[,2] > centromeres[chr])] <- "blue" 103 | segcol_gain[which(ranges[,2] <= centromeres[chr])] <- "salmon" 104 | segcol_gain[which(ranges[,2] > centromeres[chr])] <- "red" 105 | chrbins[[chr]] <- data.table(chrcol,ranges,segcol_del,segcol_gain) 106 | } 107 | 108 | chrbins.df <- do.call(rbind,unname(chrbins) ) 109 | chrbins.df<- data.table(chrbins.df,unite(chrbins.df[,c(1,2,3)],paste)$paste) 110 | colnames(chrbins.df) <- c("chr","start","end","segcol_del","segcol_gain","binid") 111 | 112 | 113 | if(verbose) message("Calculating mean segmean per genomic bin") 114 | # find overlaps between bins and cnv segments 115 | binsGR <- with(chrbins.df, GRanges(chr, IRanges(start=start, end=end))) 116 | segGR <- with(cnvdat, GRanges(chrom, IRanges(start=start, end=end))) 117 | hits <-GenomicAlignments::findOverlaps(binsGR,segGR) 118 | 119 | outmat <- matrix(ncol=length(unique(cnvdat$sample)),nrow=nrow(chrbins.df)) 120 | colnames(outmat) <- unique(cnvdat$sample) 121 | rownames(outmat) <- chrbins.df$binid 122 | 123 | for(i in seq_len(nrow(chrbins.df)) ){ 124 | segtmp<- cnvdat[subjectHits(hits)[which(queryHits(hits) == i)],] 125 | if(nrow(segtmp)>0){ 126 | a <- aggregate(segmean~sample,segtmp, sum) 127 | outmat[i,a$sample]<- a$segmean 128 | }else{ 129 | outmat[i,a$sample]<- NA 130 | } 131 | } 132 | 133 | if(verbose) message("Calculating gain/loss frequencies per genomic bin") 134 | outmat[which(is.na(outmat),arr.ind=TRUE)] <- 0 135 | 136 | outmat_gain<-outmat_loss<-outmat 137 | outmat_gain[]<-outmat_loss[]<-0 138 | nsamples <- ncol(outmat_gain) 139 | 140 | outmat_gain[which(outmat > log2(1+fc.pct), arr.ind=TRUE)] <- 1 141 | outmat_loss[which(outmat < log2(1-fc.pct), arr.ind=TRUE)] <- 1 142 | freq.gains <- apply(outmat_gain,1,sum)/nsamples 143 | freq.loss <- apply(outmat_loss,1,sum)/nsamples 144 | 145 | if(plot){ 146 | plot.end<- chrlimits$offset[nrow(chrlimits)]+chrlimits$end[nrow(chrlimits)] 147 | bin.loc <- chrlimits[chrbins.df[names(freq.gains),on="binid"]$chr,"offset"] + chrbins.df[names(freq.gains),,on="binid"]$start 148 | 149 | if(verbose) message("Plotting ...") 150 | altcols <- rep(c(rgb(0.1,0.1,0.1,alpha=0.1),rgb(0.8,0.8,0.8,alpha=0.1)),12) 151 | altcols2<- rep(c(rgb(0.1,0.1,0.1,alpha=1),rgb(0.4,0.4,0.4,alpha=1)),12) 152 | 153 | plot(x=NULL,y=NULL,xlim=c(0,plot.end),ylim=c(-1,1),bty='n',xaxt='n',yaxt='n',xlab="",ylab="") 154 | for(i in seq_len(length(chrlimits$offset)) ) rect( chrlimits$offset[i],-1,chrlimits$offset[i]+chrlimits$end[i],1, col=altcols[i],border=NA ) 155 | points(bin.loc,freq.gains,type='h',col=chrbins.df$segcol_gain) 156 | points(bin.loc,-freq.loss,type='h',col=chrbins.df$segcol_del) 157 | lines(c(0,plot.end),c(0,0),col="lightgrey") 158 | lines(c(0,plot.end),c(0.5,0.5),col="lightgrey",lty=3) 159 | lines(c(0,plot.end),c(-0.5,-0.5),col="lightgrey",lty=3) 160 | mtext(gsub("chr","",rownames(chrlimits))[seq(1,nrow(chrlimits),2)],side=1,at=chrlimits$chrlabelpos[seq(1,nrow(chrlimits),2)],las=1,col=altcols2[seq(1,nrow(chrlimits),2)],line=label.line,cex=cex.lab) 161 | mtext(gsub("chr","",rownames(chrlimits))[seq(2,nrow(chrlimits),2)],side=3,at=chrlimits$chrlabelpos[seq(2,nrow(chrlimits),2)],las=1,col=altcols2[seq(2,nrow(chrlimits),2)],line=label.line,cex=cex.lab) 162 | mtext("Frequency",side=4,line=1) 163 | mtext("#samples",side=2,line=1) 164 | axis(4,c(100,50,0,50,100),at=c(-1,-0.5,0,0.5,1),las=1,pos=plot.end, cex.axis=cex.axis) 165 | axis(2,c(nsamples,round(nsamples/2),0,round(nsamples/2),nsamples),at=c(-1,-0.5,0,0.5,1),las=1, pos=0, cex.axis=cex.axis) 166 | p <- recordPlot() 167 | }else{ 168 | p <- recordPlot(load=NULL, attach=NULL) 169 | } 170 | 171 | 172 | summary <- data.table(chrbins.df[,c("chr","start","end")],bin.loc,freq.gains,freq.loss) 173 | 174 | return(cnvfreq( 175 | freqsum = summary, 176 | bin.mat = outmat, 177 | chrlimits = chrlimits, 178 | plot=p, 179 | param = list( 180 | fc.pct= fc.pct, 181 | genome.v= genome.v, 182 | g.bin= g.bin, 183 | sampleids=sampleids, 184 | cex.axis= cex.axis, 185 | cex.lab= cex.lab, 186 | label.line= label.line 187 | ) 188 | ) 189 | ) 190 | } 191 | 192 | 193 | -------------------------------------------------------------------------------- /R/shattered.regions.cnv.r: -------------------------------------------------------------------------------- 1 | #' CNV-only based shattered region detection 2 | #' 3 | #' Caller for the identification of shattered genomic regions based on CNV breakpoint densities 4 | #' 5 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv 6 | #' @param fc.pct (numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents 20 percent fold change 7 | #' @param min.cnv.size (numeric) The minimun segment size (in base pairs) to include in the analysis 8 | #' @param min.num.probes (numeric) The minimun number of probes per segment to include in the analysis 9 | #' @param low.cov (data.frame) a data.frame (chr, start, end) indicating low coverage regions to exclude from the analysis 10 | #' @param clean.brk (numeric) inherited from cnv.breaks(); n cutoff for redundant breakpoints to filter out; if NULL, no filter will be applied 11 | #' @param window.size (numeric) size in megabases of the genmome bin to compute break density 12 | #' @param slide.size (numeric) size in megabases of the sliding genmome window 13 | #' @param num.breaks (numeric) size in megabases of the genmome bin to compute break density 14 | #' @param num.sd (numeric) size in megabases of the sliding genmome window 15 | #' @param dist.iqm.cut (numeric) interquantile average of the distance between breakpoints within a shattered region 16 | #' @param chrlist (character) vector containing chromosomes to include in the analysis; if NULL all chromosomes available in the input will be included 17 | #' @param verbose (logical) 18 | #' @return an instance of the class 'chromo.regs' containing breakpoint mapping onto genes 19 | #' @keywords CNV, segmentation 20 | #' @export 21 | #' @examples 22 | #' 23 | #' ## validate input data.frames 24 | #' cnv <- validate.cnv(segdat_lung_ccle) 25 | #' 26 | #' shattered.regions.cnv(cnv) 27 | 28 | shattered.regions.cnv <- function(cnv, 29 | fc.pct = 0.2, 30 | min.cnv.size = 0, 31 | min.num.probes=0, 32 | low.cov = NULL, 33 | clean.brk=NULL, 34 | window.size = 10, 35 | slide.size = 2, 36 | num.breaks = 10, 37 | num.sd = 5, 38 | dist.iqm.cut = 1e+05, 39 | chrlist=NULL, 40 | chr.lim=NULL, 41 | verbose=TRUE 42 | ){ 43 | 44 | stopifnot(cnv@type == "cnv") 45 | cnvdat <- cnv@data 46 | 47 | if(is.null(chr.lim)){ 48 | chr.lim <- chromosome.limit.coords(cnv) 49 | }else{ 50 | stopifnot(ncol(chr.lim) == 3) 51 | } 52 | 53 | if(!is.null(chrlist)){ 54 | chr.lim <- chr.lim[which(chr.lim$chrom %in% chrlist)] 55 | } 56 | 57 | cnvbrk <- cnv.breaks(cnv = cnv, 58 | fc.pct = fc.pct, 59 | min.cnv.size = min.cnv.size, 60 | low.cov = low.cov, 61 | clean.brk=clean.brk, 62 | chrlist = chrlist, 63 | verbose = verbose) 64 | 65 | if(verbose) message("Mapping CNV breakpoints across the genome:") 66 | cnv.brk.dens <- break.density(cnvbrk, 67 | chr.lim = chr.lim, 68 | window.size = window.size, 69 | slide.size = slide.size, 70 | verbose = verbose) 71 | 72 | 73 | 74 | # calculate inter quantile mean and standard deviation per sample 75 | iqmdata1<- sddata<- cnvbrk@burden 76 | iqmdata1[] <- sddata[] <- 0 77 | 78 | iqmdata <- apply(cnv.brk.dens,1,IQM,lowQ=0.1,upQ=0.9) 79 | sddata <- apply(cnv.brk.dens,1,IQSD,lowQ=0.1,upQ=0.9) 80 | 81 | a <- sapply(rownames(cnv.brk.dens),function(i) names(which(cnv.brk.dens[i,] > iqmdata[i]+num.sd*sddata[i] )),simplify=FALSE) 82 | b <- sapply(rownames(cnv.brk.dens),function(i) names(which(cnv.brk.dens[i,] >= num.breaks)),simplify=FALSE) 83 | 84 | # condition for chromothripsis: at least n=breaks > 6 (svc SND cnv) AND n-breaks > u+2*sd (svc AND cnv) 85 | res <- sapply(rownames(cnv.brk.dens),function(i) Reduce(intersect, list(b[[i]],a[[i]])) ,simplify=FALSE) 86 | 87 | highDensityRegions <- cnv.brk.dens 88 | highDensityRegions[] <- 0 89 | for(cl in rownames(cnv.brk.dens)) highDensityRegions[cl,res[[cl]]] <- 1 90 | 91 | res <- res[which(unlist(lapply(res,length)) >0)] 92 | 93 | if(verbose){ 94 | message("Locating shattered regions by CNV only...") 95 | pb <- txtProgressBar(style=3) 96 | cc <-0 97 | tot <- length(res) 98 | } 99 | 100 | restab <- list() 101 | for(cl in names(res)){ 102 | if(verbose) cc <- cc+1 103 | if(verbose) setTxtProgressBar(pb, cc/tot) 104 | 105 | tab <- data.table(do.call(rbind,strsplit(res[[cl]]," "))) 106 | colnames(tab) <- c("chrom","start","end") 107 | tab$start <- as.numeric(tab$start ) 108 | tab$end <- as.numeric(tab$end ) 109 | 110 | tabgr = with(tab, GRanges(chrom, IRanges(start=start, end=end))) 111 | hits = as.data.frame(GenomicAlignments::findOverlaps(tabgr,tabgr)) 112 | 113 | agg <- aggregate(subjectHits ~ queryHits, hits, paste,simplify=FALSE) 114 | prev<-c(); cnum <- 0 115 | agglist <- list() 116 | for(x in agg$subjectHits){ 117 | if(length(intersect(x,prev) > 0)){ 118 | agglist[[cnum]] <- unique(c(x,prev)) 119 | prev <- agglist[[cnum]] 120 | }else{ 121 | cnum <- cnum+1 122 | agglist[[cnum]]<- x 123 | prev <-agglist[[cnum]] 124 | } 125 | } 126 | agglistUniq <- list() 127 | for(i in 1:length(agglist)){ 128 | chr <- as.character(unique(tab[as.numeric(agglist[[i]]),"chrom"])) 129 | start <-min( tab[as.numeric(agglist[[i]]),"start"]) 130 | end <- max( tab[as.numeric(agglist[[i]]),"end"]) 131 | segNum <- length(agglist[[i]]) 132 | agglistUniq[[i]] <- data.table(chr,start,end,segNum) 133 | } 134 | tabmerged <- do.call(rbind,agglistUniq) 135 | colnames(tabmerged) <- c("chrom","start","end","nseg") 136 | restab[[cl]] <- tabmerged 137 | } 138 | if(verbose) close(pb) 139 | 140 | if(verbose){ 141 | message("Evaluating shattered regions by CNV data only...") 142 | pb <- txtProgressBar(style=3) 143 | cc <-0 144 | tot <- length(restab) 145 | } 146 | for(cl in names(restab)){ 147 | if(verbose) cc <- cc+1 148 | if(verbose) setTxtProgressBar(pb, cc/tot) 149 | regions <- restab[[cl]] 150 | br1 <- cnvbrk@breaks[which(cnvbrk@breaks$sample == cl),2:3] 151 | br1.gr <- with(br1, GRanges(chrom, IRanges(start=pos, end=pos))) 152 | regions_gr <- with(regions, GRanges(chrom, IRanges(start=start, end=end))) 153 | hits_1 = GenomicAlignments::findOverlaps(regions_gr,br1.gr) 154 | n.brk <- dist.iqm <- start <- end <- rep(0,nrow(regions)) 155 | conf <- rep("HC",nrow(regions)) 156 | for(i in 1:nrow(regions)){ 157 | sites <- sort(unique(br1[subjectHits(hits_1)[which(queryHits(hits_1) == i)]]$pos)) 158 | dist.iqm[i] <- IQM(sites[2:length(sites)] - sites[1:(length(sites)-1) ],lowQ = 0.2,upQ = 0.8) 159 | n.brk[i] <- length(sites) 160 | start[i] <- min(sites) 161 | end[i] <- max(sites) 162 | } 163 | conf[which(dist.iqm < dist.iqm.cut )] <-"lc" 164 | chrom <- regions$chrom 165 | nbins <- regions$nseg 166 | restab[[cl]] <- data.table(chrom,start,end,nbins,dist.iqm,n.brk,conf) 167 | } 168 | if(verbose) close(pb) 169 | 170 | bins <- data.table(do.call(rbind,strsplit(colnames(highDensityRegions)," ")),colnames(highDensityRegions)) 171 | colnames(bins) <- c("chrom","start","end","binid") 172 | bins$start <- as.numeric(bins$start) 173 | bins$end <- as.numeric(bins$end) 174 | 175 | 176 | binsGR <- with(bins, GRanges(chrom, IRanges(start=start, end=end))) 177 | highDensityRegionsHC <- highDensityRegions 178 | for(cl in names(restab)){ 179 | lc <- restab[[cl]][which(restab[[cl]]$conf == "lc"),] 180 | if(nrow(lc) > 0){ 181 | lcGR<- with(lc, GRanges(chrom, IRanges(start=start, end=end))) 182 | hits = GenomicAlignments::findOverlaps(binsGR,lcGR) 183 | highDensityRegionsHC[cl,bins$bins[unique(queryHits(hits)),]] <- 0 184 | } 185 | } 186 | 187 | results <- chromo.regs( 188 | regions.summary = restab, 189 | high.density.regions = highDensityRegions, 190 | high.density.regions.hc = highDensityRegionsHC, 191 | cnv.brk.dens = cnv.brk.dens, 192 | svc.brk.dens = matrix(), 193 | cnv.brk.common.dens = matrix(), 194 | svc.brk.common.dens = matrix(), 195 | cnvbrk = cnvbrk, 196 | svcbrk = breaks(), 197 | common.brk = list(), 198 | cnv = cnv, 199 | svc = svcnvio(), 200 | param=list( 201 | fc.pct = fc.pct, 202 | min.cnv.size = min.cnv.size, 203 | min.num.probes=min.num.probes, 204 | low.cov = low.cov, 205 | clean.brk=clean.brk, 206 | window.size = window.size, 207 | slide.size = slide.size, 208 | num.breaks = num.breaks, 209 | num.sd = num.sd, 210 | dist.iqm.cut = dist.iqm.cut) 211 | ) 212 | return(results) 213 | } 214 | 215 | -------------------------------------------------------------------------------- /R/brk.burden.iqm.r: -------------------------------------------------------------------------------- 1 | #' Data class break.iqm 2 | #' 3 | #' Class to store breakpoint annotations in association with genomic features (e.g. gene loci) 4 | #' 5 | #' @param summary (data.table): the frequency of gains and losses in each defined genomic bin 6 | #' @param brk.mat (numeric): a matrix of genomic bins versus samples 7 | #' @param chrlimits (data.frame): a table containing the chromosome limit coordinates and global genomic coordinates 8 | #' @param plot (graphical): a recorded plot object 9 | #' @param param (list): a list of parametres provided 10 | #' @return an instance of the class 'cnvfreq' 11 | #' @export 12 | 13 | break.iqm <- setClass("break.iqm", representation( 14 | summary = "data.table", 15 | brk.mat = "matrix", 16 | chrlimits = "data.table", 17 | plot = "recordedplot", 18 | param = "list" 19 | )) 20 | 21 | 22 | setMethod("show","break.iqm",function(object){ 23 | writeLines(paste("An object of class break.iqm from svpluscnv containing the following stats: 24 | \nNumber of samples=",nrow(object@brk.mat))) 25 | }) 26 | 27 | 28 | #' Evaluates the breakpoint burden based on a instance 'breaks' produced by svpluscnv::scv_breaks or svpluscnv::cnv_breaks. 29 | #' Breakpoint densities are calculated for each chromosome arm and the inter quantile mean (svpluscnv::IQM) of al chromosome arms is reported for each sample. 30 | #' A Graphical output is generated indicating every sample's arm burden ordered by their IQM. 31 | #' 32 | #' @param brk (breaks) An instance of the class 'breaks' obtained from CNV segmentation data (svpluscnv::cnv.breaks) or Structural Variant calls (svpluscnv::svc.breaks). 33 | #' @param sample.col (character) A vector of valid colors. Names must match sample column from 'brk'. If null a gradiant color based on breakpoint burden IQM will be used. 34 | #' @param chr.lim (data.frame) 3 column table (chrom, begin, end) indicating the chromosome most distal coordinates with coverage. Also returned by the function svpluscnv::chromosome.limit.coords. 35 | #' @param genome.v (hg19 or hg38) reference genome version to draw chromosome limits and centromeres 36 | #' @param min.arm.size (numeric) minimum size in base pairs for a chromosome arm to be included in the analysis. Size will be calculated based on the 'genome.v' centromere location (excluding centromere bands). Chromosome start and en locations can be provided in 'chr.lim'. 37 | #' @param bp.unit (numeric) The genomic size unit in base pairs to report brekpoint densities. This parameter is also used for the y axis of the plot. 38 | #' @param plot (logical) whether produce a graphical output 39 | #' @param verbose (logical) whether to return internal messages 40 | #' @return an instance of the class 'cnvfreq' and optionally a plot into open device 41 | #' @keywords structural variants, mutational burden, chromosomal instability 42 | #' @export 43 | #' @examples 44 | #' 45 | #' # initialize CNV data 46 | #' svc <- validate.svc(nbl_svdat) 47 | #' 48 | #' # obtain CNV breakpoints 49 | #' brk <- cnv.breaks(cnv) 50 | #' 51 | #' brk.burden.iqm(brk) 52 | 53 | 54 | brk.burden.iqm <- function(brk, 55 | sample.col = NULL, 56 | min.arm.size = 2e7, 57 | bp.unit=1e7, 58 | genome.v="hg19", 59 | chr.lim= NULL, 60 | plot=TRUE, 61 | verbose=TRUE){ 62 | 63 | stopifnot(isS4(brk)) 64 | 65 | # fetch citogenetic bands from genome version (D3GB) 66 | if(genome.v %in% c("GRCh37","hg19")){ 67 | bands <- remove.factors(GRCh37.bands) 68 | }else if(genome.v %in% c("GRCh38","hg38")){ 69 | bands <- remove.factors(GRCh38.bands) 70 | }else{stop("Genome version not provided")} 71 | 72 | # define default chromosome arm boundaries 73 | centromeres_start <- bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"start"] 74 | centromeres_end <- bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"end"] 75 | chromosome_start <- sapply(as.character(unique(bands$chr)), function(i) min( bands$start[which(bands$chr == i)] )) 76 | chromosome_end <- sapply(as.character(unique(bands$chr)), function(i) max( bands$end[which(bands$chr == i)] )) 77 | names(chromosome_start) <- names(chromosome_end) <- names(centromeres_start) <- names(centromeres_end) <- paste("chr",bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"chr"],sep="") 78 | 79 | # define chromosome arm boundaries based on provided chromosome limits 80 | if(!is.null(chr.lim)){ 81 | centromeres_start <- centromeres_start[chr.lim$chrom] 82 | centromeres_end <- centromeres_end[chr.lim$chrom] 83 | chromosome_start <- chromosome_start[chr.lim$chrom] 84 | chromosome_end <- chromosome_end[chr.lim$chrom] 85 | chromosome_end[] <- chr.lim$end 86 | chromosome_start[] <- chr.lim$begin 87 | }else{ 88 | chr.lim<- data.table(names(centromeres_start),centromeres_start,chromosome_end) 89 | colnames(chr.lim) <- c("chrom","begin","end") 90 | } 91 | 92 | # obtain number of breakpoints per sample mapped onto chromosome arms 93 | mapped_p <- names(which(centromeres_start -chromosome_start > min.arm.size)) 94 | mapped_q <- names(which(chromosome_end -centromeres_end > min.arm.size)) 95 | p.arm.df <- data.frame(mapped_p,chromosome_start[mapped_p],centromeres_start[mapped_p]) 96 | q.arm.df <- data.frame(mapped_q,centromeres_end[mapped_q],chromosome_end[mapped_q]) 97 | colnames(p.arm.df) <- colnames(q.arm.df) <- c("chrom","start","end") 98 | 99 | p.arm.gr <- with(p.arm.df,GRanges(chrom, IRanges(start=start, end=end))) 100 | q.arm.gr <- with(q.arm.df,GRanges(chrom, IRanges(start=start, end=end))) 101 | 102 | breaks.gr <- with(brk@breaks, GRanges(chrom, IRanges(start=pos,end=pos))) 103 | 104 | p.hits <- GenomicAlignments::findOverlaps(breaks.gr,p.arm.gr) 105 | q.hits <- GenomicAlignments::findOverlaps(breaks.gr,q.arm.gr) 106 | 107 | p.armname <- paste(mapped_p,"p",sep="") 108 | q.armname <- paste(mapped_q,"q",sep="") 109 | arm.size <- c(p.arm.df$end -p.arm.df$start, q.arm.df$end -q.arm.df$start) 110 | names(arm.size) <- c(p.armname,q.armname) 111 | 112 | template <- rep(0, length(c(p.armname,q.armname))) 113 | names(template) <- c(p.armname,q.armname) 114 | arm.brk.dens <- sapply(unique(brk@breaks$sample), function(i) template, simplify=FALSE) 115 | 116 | p.hits.info <- data.table(brk@breaks$sample[queryHits(p.hits)],p.armname[subjectHits(p.hits)]) 117 | q.hits.info <- data.table(brk@breaks$sample[queryHits(q.hits)],q.armname[subjectHits(q.hits)]) 118 | 119 | total.brk <- list() 120 | for(sample.id in names(arm.brk.dens)){ 121 | input <- c(table(p.hits.info$V2[which(p.hits.info$V1 == sample.id)]), 122 | table(q.hits.info$V2[which(q.hits.info$V1 == sample.id)])) 123 | arm.brk.dens[[sample.id]][names(input)] <- input*bp.unit/arm.size[names(input)] 124 | total.brk[[sample.id]] <- sum(input) 125 | } 126 | 127 | # claculate IQM for each sample 128 | arm.brk.iqm <- log10(1+sort(unlist(lapply(arm.brk.dens,IQM)))) 129 | 130 | # obtain gradient of default colors 131 | if(is.null(sample.col)){ 132 | sample.col <- rep("green",length(unique(brk@breaks$sample))) 133 | names(sample.col) <- unique(brk@breaks$sample) 134 | sample.col.tmp <- map2color(arm.brk.iqm,pal <- colorRampPalette(c("darkgreen","orange","red"))(256)) 135 | names(sample.col.tmp) <- names(arm.brk.iqm) 136 | sample.col[names(sample.col.tmp)] <- sample.col.tmp 137 | } 138 | 139 | # plot 140 | 141 | if(plot){ 142 | datavector <- log10(1+unlist(lapply(arm.brk.dens[names(arm.brk.iqm)],sort))) 143 | datacolor <- unlist(sapply(names(arm.brk.iqm), function(i) rep(sample.col[i], length(template)),simplify=FALSE)) 144 | names(datacolor) <- names(datavector) 145 | 146 | npoints <- length(template) 147 | plot(datavector,pch=20,xaxt='n',yaxt='n',col="white",xlab="",ylab='', 148 | xaxt='n',bty='n',xlim=c(100,length(datavector)-100)) 149 | altcol<-"grey95" 150 | for(i in 1:length(arm.brk.dens)){ 151 | rect((i-1)*npoints,-10,i*npoints,50,col=altcol,border=NA) 152 | if(altcol == "grey95"){ altcol <- "grey85" 153 | }else{altcol <- "grey95"} 154 | } 155 | abline(h=seq(-2,6,0.5),lty=1,lwd=.2,col="black") 156 | 157 | points(datavector,pch=20,cex=0.3,col=datacolor) 158 | axis(2,labels=sprintf("%.2f",10^(seq(-2,4,0.5))-1),at=seq(-2,4,0.5),las=3,family="Courier",font=1,line=0,cex.axis=1.2,las=1) 159 | 160 | mtext(paste("log10(1+breaks/",bp.unit,")",sep=""),side=2,line=4,cex=1.3) 161 | lines(seq(npoints/2,length(datavector),length(datavector)/length(arm.brk.iqm)),log2(1+arm.brk.iqm) ) 162 | p <- recordPlot() 163 | }else{ 164 | p <- recordPlot(load=NULL, attach=NULL) 165 | } 166 | 167 | # save plot 168 | 169 | # create summary 170 | nbreaks <- table(brk@breaks$sample)[names(arm.brk.iqm)] 171 | nbreaks.map <- unlist(total.brk)[names(arm.brk.iqm)] 172 | brk.dens <- (nbreaks.map*bp.unit/sum(arm.size))[names(arm.brk.iqm)] 173 | 174 | summary <- data.table(names(arm.brk.iqm), 175 | arm.brk.iqm, 176 | sample.col[names(arm.brk.iqm)], 177 | as.numeric(nbreaks), 178 | nbreaks.map, 179 | brk.dens ) 180 | colnames(summary) <- c("sample","brk.iqm","color","total breaks","nbreaks mapped","overal density") 181 | 182 | 183 | return(break.iqm( 184 | summary = summary, 185 | brk.mat = do.call(rbind,arm.brk.dens), 186 | chrlimits = chr.lim, 187 | plot=p, 188 | param = list( 189 | min.arm.size= min.arm.size, 190 | bp.unit=bp.unit, 191 | genome.v= genome.v, 192 | verbose= verbose 193 | ) 194 | ) 195 | ) 196 | } 197 | 198 | 199 | 200 | -------------------------------------------------------------------------------- /R/circular.plot.r: -------------------------------------------------------------------------------- 1 | #' Circular visualization of shattered regions 2 | #' 3 | #' Produces a circos plot combining CNV and SVC date sooming into the chromosomes harboring shattered regions 4 | #' 5 | #' @param chromo.regs.obj (chromo.regs) An object of class chromo.regs 6 | #' @param sample.id (character) the id of a sample to be plotted within 7 | #' @param print.name (logical) whether to print the sample id in the center of the circular plot 8 | #' @param genome.v (character) (hg19 or h38) reference genome version to draw chromosome limits and centromeres 9 | #' @param lrr.pct (numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents 20 percent fold change 10 | #' @param lrr.max (numeric) CNV plot limit 11 | #' @param high.conf (logical) Whether to plot only high confidence shattered regions (see https://github.com/ccbiolab/svpluscnv#identification-of-shattered-regions for more information) 12 | #' @param chrlist (character) vector containing chromosomes to plot; by default only chromosomes with shattered regions are ploted 13 | #' @param add.cnv.legend (x,y or coordinates) the position parameter passed to legend to plot shattered regions and CNV (outer track) description 14 | #' @param add.svc.legend (x,y or coordinates) the position parameter passed to legend to plot SVC (central track) description 15 | #' @param ... Additional graphical parameters 16 | #' @return circos plot into open device 17 | #' @keywords CNV, segmentation, structural variant, visualization, circular plot 18 | #' @export 19 | #' @examples 20 | #' 21 | #' ## validate input data.frames 22 | #' cnv <- validate.cnv(segdat_lung_ccle) 23 | #' svc <- validate.svc(svdat_lung_ccle) 24 | #' 25 | #' ## obtain shattered regions 26 | #' shatt.regions <- shattered.regions(cnv,svc) 27 | #' 28 | #' # select a random sample from the 29 | #' id <- "SCLC21H_LUNG" 30 | #' 31 | #' circ.chromo.plot(shatt.regions, sample.id = id) 32 | 33 | circ.chromo.plot <- function(chromo.regs.obj, 34 | sample.id, 35 | print.name=TRUE, 36 | genome.v = "hg19", 37 | lrr.pct = 0.2, 38 | lrr.max = 4, 39 | high.conf=FALSE, 40 | chrlist=NULL, 41 | add.cnv.legend="topleft", 42 | add.svc.legend="toprigh", 43 | ...){ 44 | 45 | 46 | if(sample.id %in% chromo.regs.obj@cnv@data$sample){ 47 | cnvdat <- chromo.regs.obj@cnv@data[which(chromo.regs.obj@cnv@data$sample == sample.id),] 48 | } 49 | if(sample.id %in% chromo.regs.obj@svc@data$sample){ 50 | svcdat <- chromo.regs.obj@svc@data[which(chromo.regs.obj@svc@data$sample == sample.id),] 51 | }else{ 52 | svcdat <- data.table() 53 | } 54 | regions <- chromo.regs.obj@regions.summary[[sample.id]] 55 | if(high.conf == TRUE) regions <- regions[which(regions$conf == "HC")] 56 | 57 | stopifnot(nrow(regions) > 0) 58 | 59 | stopifnot(nrow(chromo.regs.obj@cnv@data) > 0 | nrow(chromo.regs.obj@svc@data) > 0) 60 | 61 | if(is.null(chrlist)) chrlist <- unique(regions$chrom) 62 | 63 | if(nrow(svcdat) > 0){ 64 | alllinks1 <- data.table(svcdat$chrom1,svcdat$pos1,svcdat$pos1 ) 65 | alllinks2 <- data.table(svcdat$chrom2,svcdat$pos2,svcdat$pos2 ) 66 | colnames(alllinks1) <- colnames(alllinks2) <- c("chr","start","end") 67 | map = setNames(c("blue", "red", "orange","black","green","grey"), c("DEL", "DUP","INV","TRA","INS","BND")) 68 | alllinkcolors <- map[svcdat$svclass] 69 | zoomchr <- intersect(which(alllinks1$chr %in% chrlist),which(alllinks2$chr %in% chrlist)) 70 | links1<-alllinks1[zoomchr,] 71 | links2<-alllinks2[zoomchr,] 72 | linkcolors<-alllinkcolors[zoomchr] 73 | } 74 | 75 | if(nrow(cnvdat) > 0){ 76 | colores <- rep("black",nrow(cnvdat)) 77 | colores[which(cnvdat$segmean < log2(1 - lrr.pct)) ] <- "blue" 78 | colores[which(cnvdat$segmean > log2(1 + lrr.pct)) ] <- "red" 79 | cnv.df <- data.frame(cnvdat[,c("chrom","start","end","segmean")],colores) 80 | cnv.df[,"colores"] <- as.character(cnv.df[,"colores"]) 81 | cnv.df[which(cnv.df$segmean < log2(1/lrr.max) ),"segmean"] <- log2(1/lrr.max) 82 | cnv.df[which(cnv.df$segmean > log2(lrr.max)),"segmean"] <- log2(lrr.max) 83 | allcnvlist <- list() 84 | for(i in chrlist) allcnvlist[[i]] <- cnv.df[which(cnv.df$chrom == i),] 85 | 86 | cnvlist <- list() 87 | for(i in chrlist) cnvlist[[i]] <- cnv.df[which(cnv.df$chrom == i),] 88 | } 89 | 90 | reg.map = setNames(c("pink", "purple"), c("lc", "HC")) 91 | reg.col <- unname(reg.map[regions$conf]) 92 | value <- rep(0.1,nrow(regions)) 93 | regions.plot <- as.data.frame(data.table(regions,reg.col,value)) 94 | 95 | p.regions <- list() 96 | for(chr in chrlist){ 97 | p.regions[[chr]] <- regions.plot[which(regions$chrom == chr),c("chrom","start","end","value","reg.col")] 98 | colnames(p.regions[[chr]]) <- c("chrom","start","end","value","color") 99 | } 100 | 101 | circos.initializeWithIdeogram(species=genome.v,chromosome.index=chrlist,plotType=c("axis","labels"), track.height=0.05, axis.labels.cex=0.4,labels.cex=1.3) 102 | circos.genomicIdeogram(track.height = 0.03) 103 | circos.genomicTrack(p.regions, bg.lwd =0.01, ylim=c(0,0.02), track.height=0.05, 104 | panel.fun = function(region, value, ...) { 105 | circos.genomicRect(region, value, ytop = 0.02, ybottom = 0, col = p.regions[[CELL_META$sector.index]][,"color"], border = NA, ...) 106 | circos.lines(CELL_META$cell.xlim, c(0.01, 0.01), lty = 2, col = "#00000040") 107 | }) 108 | 109 | circos.genomicTrackPlotRegion(cnvlist, bg.lwd =0.2, bg.col=rainbow(length(cnvlist),alpha=0.1),ylim=c(-2.5,2.5), track.height=0.2, 110 | panel.fun = function(region, value, ...) { 111 | circos.genomicLines(region, value, col=as.character(cnvlist[[CELL_META$sector.index]][,"colores"]), numeric.column = c(1), type="segment") 112 | }) 113 | if(nrow(svcdat) > 0) circos.genomicLink(links1, links2, col = linkcolors, border = NA) 114 | if(print.name == TRUE) text(0, 0, gsub("_","\n",sample.id),...) 115 | 116 | if(!is.null(add.cnv.legend)){ 117 | legend(add.cnv.legend,c("shattered regions","CNV gain","CNV neutral","CNV loss"),fill=c("purple",NA,NA,NA), 118 | lty=c(2,1,1,1), col=c("black","red","black","blue"),border=NA, bty='n', title=expression(bold("CNV (outer)"))) 119 | } 120 | 121 | if(!is.null(add.svc.legend)){ 122 | map.legend <- map[sort(unique(svcdat$svclass))] 123 | legend(add.svc.legend,names(map.legend),lty=1, col=map.legend, bty='n', title=expression(bold("SVC (center)"))) 124 | } 125 | 126 | } 127 | 128 | 129 | 130 | #' Circular visualization CNV and SVC 131 | #' 132 | #' Produces a circos plot combining CNV and SVC of the whole genome 133 | #' 134 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv 135 | #' @param svc (S4) an object of class svcnvio containing data type 'svc' initialized by validate.svc 136 | #' @param sample.id (character) the id of the sample to be plotted 137 | #' @param genome.v (character) (hg19 or h38) reference genome version to draw chromosome limits and centromeres 138 | #' @param lrr.pct (numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents a fold change of 0.8 or 1.2 139 | #' @param lrr.max (numeric) maximum CNV to be plotted 140 | #' @param chrlist (character) vector containing chromosomes to plot; by default all chromosomes plotted 141 | #' @param add.cnv.legend (x,y or coordinates) the position parameter passed to legend to plot CNV (outer tracks) description 142 | #' @param add.svc.legend (x,y or coordinates) the position parameter passed to legend to plot SVC (central track) description 143 | #' @return circos plot into open device 144 | #' @keywords CNV, segmentation, structural variant, visualization, circular plot 145 | #' @export 146 | #' @examples 147 | #' 148 | #' ## validate input data.frames 149 | #' cnv <- validate.cnv(segdat_lung_ccle) 150 | #' svc <- validate.svc(svdat_lung_ccle) 151 | #' 152 | #' ## select a random sample id 153 | #' id <- "A549_LUNG" 154 | #' 155 | #' circ.wg.plot(cnv, svc, sample.id=id) 156 | 157 | 158 | circ.wg.plot <- function(cnv, 159 | svc, 160 | sample.id=NULL, 161 | genome.v = "hg19", 162 | lrr.pct = 0.2, 163 | lrr.max = 4, 164 | chrlist=NULL, 165 | add.cnv.legend="topleft", 166 | add.svc.legend="toprigh", 167 | ...){ 168 | 169 | stopifnot(cnv@type == "cnv") 170 | cnvdat <- cnv@data 171 | 172 | stopifnot(svc@type == "svc") 173 | svcdat <- svc@data 174 | 175 | if(is.null(sample.id)){ 176 | sample.id <- intersect(cnvdat$sample,svcdat$sample) 177 | stopifnot(length(sample.id) == 1) 178 | } 179 | cnvdat <- cnvdat[which(cnvdat$sample == sample.id),] 180 | svcdat <- svcdat[which(svcdat$sample == sample.id),] 181 | 182 | if(is.null(chrlist)) chrlist <- chr.sort(unique(cnvdat$chrom)) 183 | 184 | alllinks1 <- data.table(svcdat$chrom1,svcdat$pos1,svcdat$pos1 ) 185 | alllinks2 <- data.table(svcdat$chrom2,svcdat$pos2,svcdat$pos2 ) 186 | colnames(alllinks1) <- colnames(alllinks2) <- c("chr","start","end") 187 | map = setNames(c("blue", "red", "orange","black","green","black"), c("DEL", "DUP","INV","TRA","INS","BND")) 188 | alllinkcolors <- map[as.character(svcdat$svclass)] 189 | 190 | cnvcirc <- cnvdat[,c("chrom","start","end","segmean")] 191 | colores <- rep("black",nrow(cnvcirc)) 192 | colores[which(cnvcirc$segmean < log2(1 - lrr.pct)) ] <- "blue" 193 | colores[which(cnvcirc$segmean > log2(1 + lrr.pct)) ] <- "red" 194 | cnvcirc <- data.table(cnvcirc,colores) 195 | cnvcirc[which(cnvcirc$segmean < log2(1/lrr.max) ),"segmean"] <- log2(1/lrr.max) 196 | cnvcirc[which(cnvcirc$segmean > log2(lrr.max)),"segmean"] <- log2(lrr.max) 197 | allcnvlist <- list() 198 | for(i in chrlist) allcnvlist[[i]] <- as.data.frame(cnvcirc[which(cnvcirc$chrom == i),]) 199 | 200 | circos.initializeWithIdeogram(species=genome.v, chromosome.index=chrlist, plotType=c("ideogram","labels")) 201 | text(0, 0, gsub("_","\n",sample.id), cex = 1) 202 | circos.genomicTrackPlotRegion(allcnvlist, bg.lwd =0.2, bg.col=rainbow(length(allcnvlist),alpha=0.1),ylim=c(-2.4,2.4), track.height=0.2, panel.fun = function(region, value, ...) { 203 | circos.genomicLines(region, value, col=as.character(allcnvlist[[CELL_META$sector.index]][,"colores"]), numeric.column = c(1), type="segment") 204 | }) 205 | circos.genomicLink(alllinks1, alllinks2, col = alllinkcolors, border = NA) 206 | 207 | if(!is.null(add.cnv.legend)){ 208 | legend(add.cnv.legend,c("CNV gain","CNV neutral","CNV loss"),lty=1, col=c("red","black","blue"), 209 | bty='n', title=expression(bold("CNV (outer)"))) 210 | } 211 | 212 | if(!is.null(add.svc.legend)){ 213 | map.legend <- map[sort(unique(svcdat$svclass))] 214 | legend(add.svc.legend,names(map.legend),lty=1, col=map.legend, bty='n', title=expression(bold("SVC (center)"))) 215 | } 216 | 217 | } 218 | 219 | 220 | 221 | 222 | 223 | 224 | -------------------------------------------------------------------------------- /R/sv.model.view.r: -------------------------------------------------------------------------------- 1 | #' SV integrated visualization 2 | #' 3 | #' Integrated visualization of SVC and CNV data for defined genomic locations. CNV and SVC data is overlayed into a sample-based track visualization map. 4 | #' 5 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv 6 | #' @param svc (S4) an object of class svcnvio containing data type 'svc' initialized by validate.svc 7 | #' @param chrom (character) chromosome (e.g chr9) 8 | #' @param start (numeric) genomic coordinate from specified chromosome to start plotting 9 | #' @param stop (numeric) genomic coordinate from specified chromosome to stop plotting 10 | #' @param sampleids (character) a vector containing a list of sample ids represented in svc and/or cnv objects to be plotted 11 | #' @param cnvlim (numeric) limits for color coding of background CNV log-ratios. Use to modify the CNV color contrast at different levels. 12 | #' @param addlegend (character) One of 'sv' (show SV type legend), 'cnv' (show CNV background color legend) or 'both'. 13 | #' @param cex.legend (numeric) The cex values for each legend 14 | #' @param interval (numeric) The axis interval in base pairs 15 | #' @param addtext (character) a vector indicating what SV types should include text labels indicating brakpoint partners genomic locations. The added labels are point breakpoint locations outside the plot area. (e.g. c("TRA","INV") ) 16 | #' @param cex.text (numeric) The magnification to be used for SV text info added 17 | #' @param plot (logic) whether to produce a graphical output 18 | #' @param summary (logic) whether the function shoud return CNV segment 'segbrk' and SV 'svbrk' breakpoints tabular output 19 | #' @param ... additional plot parameters from graphics plot function 20 | #' @return a data.frame with CNV and SVN breakpoint annotations and/or plot into open device 21 | #' @keywords structural variant, CNV, segmentation 22 | #' @export 23 | #' @examples 24 | #' 25 | #' ## validate input data.frames 26 | #' cnv <- validate.cnv(segdat_lung_ccle) 27 | #' svc <- validate.svc(svdat_lung_ccle) 28 | #' 29 | #' # obtain the coordinates of a desired genomic regionbased on a known gene locus 30 | #' refSeqGene <- gene.symbol.info(refseq_hg19,"PTPRD") 31 | #' start <- refSeqGene$start - 150000; 32 | #' stop <- refSeqGene$stop+ 50000; 33 | #' chrom <- refSeqGene$chrom 34 | #' 35 | #' sv.model.view(cnv, svc, chrom, start, stop) 36 | #' 37 | 38 | 39 | 40 | sv.model.view <- function(cnv, svc, chrom, start, stop, 41 | sampleids=NULL, 42 | cnvlim=c(-2,2), 43 | addlegend='both', 44 | cex.legend=1, 45 | interval=NULL, 46 | addtext=NULL, 47 | cex.text=.8, 48 | plot=TRUE, 49 | summary=TRUE, 50 | ...){ 51 | 52 | 53 | stopifnot(!is.null(chrom) && !is.null(start) && !is.null(stop)) 54 | 55 | stopifnot(cnv@type == "cnv") 56 | cnvdat <- cnv@data 57 | 58 | stopifnot(svc@type == "svc") 59 | svcdat <- svc@data 60 | 61 | if(!is.null(sampleids)){ 62 | missing.samples <- setdiff(sampleids,c(svcdat$sample,cnvdat$sample)) 63 | if(length(missing.samples) == length(unique(sampleids))){ 64 | stop("None of the samples provided were found in 'sv' and 'cnv' input data!") 65 | }else if(length(missing.samples) > 0){ 66 | warning(paste("The following samples provided are not found in 'sv' and 'cnv' input data:", paste(missing.samples,collapse=" "),sep=" ")) 67 | } 68 | svcdat<-svcdat[which(svcdat$sample %in% intersect(sampleids,svcdat$sample)),] 69 | cnvdat<-cnvdat[which(cnvdat$sample %in% intersect(sampleids,cnvdat$sample)),] 70 | } 71 | 72 | genegr <- with(data.frame(chrom,start,stop), GRanges(chrom, IRanges(start=start, end=stop))) 73 | 74 | # Find samples with SV breaks within defined genomic region 75 | sv1gr = with(svcdat, GRanges(chrom1, IRanges(start=pos1, end=pos1))) 76 | sv2gr = with(svcdat, GRanges(chrom2, IRanges(start=pos2, end=pos2))) 77 | 78 | sv_hits1 = GenomicAlignments::findOverlaps(sv1gr,genegr) 79 | sv_hits2 = GenomicAlignments::findOverlaps(sv2gr,genegr) 80 | svtab <- svcdat[sort(unique(c(queryHits(sv_hits1),queryHits(sv_hits2)))),] 81 | svBreakSamples <- unique(svtab$sample) 82 | if(length(svBreakSamples) == 0) warning("Thre is no SV breakpoints in the defined genomic region") 83 | 84 | # obtain SVs for plotting with different colors for each svclass 85 | svcolormap = setNames(c("blue", "red", "orange", "black", "green","grey20"), 86 | c("DEL", "DUP", "INV", "TRA", "INS", "BND")) 87 | svcolor <- svcolormap[svtab$svclass] 88 | svtab_plot <- data.table(svtab,svcolor) 89 | svtab_plot_seg <- svtab_plot[which(svtab_plot$svclass != "TRA")] 90 | svtab_plot_tra <- svtab_plot[which(svtab_plot$svclass == "TRA")] 91 | 92 | # Find samples with CNV segment breaks within defined genomic region 93 | seg1br = with(cnvdat, GRanges(chrom, IRanges(start=start, end=start))) 94 | seg2br = with(cnvdat, GRanges(chrom, IRanges(start=end, end=end))) 95 | seg_hits1 = GenomicAlignments::findOverlaps(seg1br,genegr) 96 | seg_hits2 = GenomicAlignments::findOverlaps(seg2br,genegr) 97 | segBreakSamples <- unique(cnvdat[sort(unique(c(queryHits(seg_hits1),queryHits(seg_hits2))))]$sample) 98 | if(length(segBreakSamples) == 0) warning("Thre is no CNV segment breakpoints in the defined genomic region") 99 | segbrk <- cnvdat[sort(unique(c(queryHits(seg_hits1),queryHits(seg_hits2))))] 100 | 101 | if(plot==TRUE){ 102 | # Find overlap between all CNV segments and the defined genomic region for plotting 103 | 104 | seggr <- with(cnvdat, GRanges(chrom, IRanges(start=start, end=end))) 105 | hits_seg = GenomicAlignments::findOverlaps(seggr,genegr) 106 | seg_plot <- cnvdat[queryHits(hits_seg)] 107 | segcolor <- map2color(seg_plot$segmean, 108 | pal=colorRampPalette(c("lightblue","white","salmon"))(256), 109 | limits=cnvlim) 110 | seg_plot <- data.table(seg_plot,segcolor) 111 | 112 | if(!is.null(sampleids)){ 113 | sample_order <- 1:length(sampleids) 114 | names(sample_order) <- sampleids 115 | }else{ 116 | sample_order <- 1:length(unique(c(svBreakSamples,segBreakSamples))) 117 | names(sample_order) <- unique(c(svBreakSamples,segBreakSamples)) 118 | } 119 | 120 | if(!is.null(addlegend)){ 121 | plot_ylim <- length(sample_order)*10/100+length(sample_order) 122 | legend_ypos <- plot_ylim - length(sample_order)*3/100 123 | if(length(sample_order) < 10) plot_ylim <- length(sample_order) +1 124 | }else{ 125 | plot_ylim <- length(sample_order) 126 | } 127 | 128 | plot(x=NULL,y=NULL,xlim=range(c(start,stop)),ylim=range(c(0,plot_ylim)), 129 | xaxt='n',yaxt='n',xlab='',ylab='',bty='n', ...) 130 | 131 | mtext(side=2,at=sample_order-0.5,text=names(sample_order),las=2,line = 0.5, ...) 132 | 133 | for(sid in names(sample_order)){ 134 | ypos <- sample_order[sid] 135 | polygon(rbind( 136 | c(start-1e7,ypos+0.02), 137 | c(start-1e7,ypos-0.98), 138 | c(stop+1e7,ypos-0.98), 139 | c(stop+1e7,ypos+0.02)), 140 | col=rep(c("grey80","grey80"),length(sample_order))[ypos],border=NA) 141 | } 142 | 143 | for(sid in names(sample_order)){ 144 | seg_sample_plot <- seg_plot[which(seg_plot$sample == sid),] 145 | ypos <- sample_order[sid] 146 | for(i in 1:nrow(seg_sample_plot)){ 147 | polygon(rbind( 148 | c(seg_sample_plot[i]$start,ypos), 149 | c(seg_sample_plot[i]$start,ypos-1), 150 | c(seg_sample_plot[i]$end,ypos-1), 151 | c(seg_sample_plot[i]$end,ypos) 152 | ),col=seg_sample_plot[i]$segcolor,border=NA) 153 | } 154 | } 155 | 156 | 157 | for(sid in unique(svtab_plot_tra$sample)){ 158 | svtab_plot_tra_i <- svtab_plot_tra[which(svtab_plot_tra$sample == sid),] 159 | ypos <- sample_order[sid] 160 | addrnorm <- rep(c(0,0.3,-0.3,0.1,-0.1,0.2,-0.2),nrow(svtab_plot_tra_i)) 161 | for(i in 1:nrow(svtab_plot_tra_i)){ 162 | if(svtab_plot_tra_i[i]$chrom2 == chrom){ 163 | points(svtab_plot_tra_i[i]$pos2,ypos-0.5+addrnorm[i],pch=10) 164 | lines(c(svtab_plot_tra_i[i]$pos2,svtab_plot_tra_i[i]$pos2),c(ypos,ypos-1),lwd=1,lty=3) 165 | if("TRA" %in% addtext){ 166 | text(svtab_plot_tra_i[i]$pos2,ypos-0.5+addrnorm[i], 167 | paste(" ",svtab_plot_tra_i[i]$chrom1,":",svtab_plot_tra_i[i]$pos1,sep=""), 168 | pos=4,offset=0,cex=cex.text) 169 | } 170 | } 171 | if(svtab_plot_tra_i[i,"chrom1"] == chrom){ 172 | points(svtab_plot_tra_i[i]$pos1,ypos-0.5+addrnorm[i],pch=10) 173 | lines(c(svtab_plot_tra_i[i]$pos1,svtab_plot_tra_i[i]$pos1),c(ypos,ypos-1),lwd=1,lty=3) 174 | if("TRA" %in% addtext) { 175 | text(svtab_plot_tra_i[i]$pos1,ypos-0.5+addrnorm[i], 176 | paste(" ",svtab_plot_tra_i[i]$chrom2,":",svtab_plot_tra_i[i]$pos2,sep=""), 177 | pos=4,offset=0,cex=cex.text) 178 | } 179 | } 180 | } 181 | } 182 | 183 | for(sid in unique(svtab_plot_seg$sample)){ 184 | svtab_plot_seg_i <- svtab_plot_seg[which(svtab_plot_seg$sample == sid)] 185 | ypos <- sample_order[sid] 186 | addrnorm <- rep(c(0,0.2,-0.2,0.1,-0.1,0.3,-0.3),nrow(svtab_plot_seg_i)) 187 | for(i in 1:nrow(svtab_plot_seg_i)){ 188 | polygon(rbind( 189 | c(svtab_plot_seg_i[i]$pos1,ypos-0.4-addrnorm[i]), 190 | c(svtab_plot_seg_i[i]$pos1,ypos-0.6-addrnorm[i]), 191 | c(svtab_plot_seg_i[i]$pos2,ypos-0.6-addrnorm[i]), 192 | c(svtab_plot_seg_i[i]$pos2,ypos-0.4-addrnorm[i]) 193 | ),col=NA,border=svtab_plot_seg_i[i]$svcolor) 194 | 195 | if(svtab_plot_seg_i[i]$svclass %in% addtext){ 196 | if(svtab_plot_seg_i[i]$pos1 < start){ 197 | text(start,ypos-0.5-addrnorm[i], 198 | paste("<-",svtab_plot_seg_i[i]$pos1,sep=""), 199 | pos=4,offset=0,cex=cex.text) 200 | } 201 | if(svtab_plot_seg_i[i]$pos2 > stop){ 202 | text(stop,ypos-0.5-addrnorm[i], 203 | paste(svtab_plot_seg_i[i]$pos2,"->",sep=""), 204 | pos=2,offset=0,cex=cex.text) 205 | } 206 | } 207 | } 208 | } 209 | 210 | if(is.null(interval)) interval <- round((stop - start)/5000) * 1000 211 | xlabs <- seq(floor(start/10000)*10000, ceiling(stop/10000)*10000,interval) 212 | axis(1, at = xlabs,labels=TRUE, lwd.ticks=1.5, pos=0,...) 213 | 214 | if(is.null(cex.legend)) cex.legend <- 1 215 | 216 | if(addlegend %in% c("sv","both")) { 217 | fillx <- c("white", "white", "white", "white", "white",NA) 218 | borderx <- c("blue", "red","orange","green","grey20",NA) 219 | pchx <- c(NA,NA,NA,NA,NA,10) 220 | names(fillx) <- names(borderx) <- names(pchx) <- c("DEL", "DUP", "INV","INS","BND", "TRA") 221 | svclassin <- sort(unique(svtab_plot$svclass)) 222 | legend(x= start, y =legend_ypos+0.2, legend = svclassin, bg=NA, 223 | bty = "n", fill = fillx[svclassin], border=borderx[svclassin], 224 | pch = pchx[svclassin], horiz = TRUE, x.intersp=0.2, cex = cex.legend) 225 | } 226 | if(addlegend %in% c("cnv","both")) { 227 | colkey(colorRampPalette(c("lightblue","white","salmon"))(256),clim = c(-4,4),side=3,add=TRUE,side.clab=1,length=0.5,shift=0.2,lwd.ticks = 2, dist = -0.12) 228 | } 229 | } 230 | if(summary){ 231 | return(list(svbrk=svcdat,segbrk=segbrk)) 232 | } 233 | } 234 | 235 | 236 | 237 | -------------------------------------------------------------------------------- /R/breakpoint.density.r: -------------------------------------------------------------------------------- 1 | #' Data class breaks 2 | #' 3 | #' Class to store breakpoint annotations in association with genomic features (e.g. gene loci) 4 | #' 5 | #' @param breaks (data.table): the breakpoint info containing data.table, this will be occupied by the CNV segmentation data in the case of cnv.break.annot or SV for sv.break.annot. Unique random string rownames are added to the returned breaks data.frame. 6 | #' @param burden (numeric): a vector containing the total number of breakpoints in each sample 7 | #' @param param (list): a list of parametres provided 8 | #' @return an instance of the class 'breaks' containing breakpoint and breakpoint burden information 9 | #' @export 10 | breaks <- setClass("breaks", representation( 11 | breaks = "data.table", 12 | burden = "numeric", 13 | param = "list" 14 | )) 15 | 16 | 17 | setMethod("show","breaks",function(object){ 18 | writeLines(paste("An object of class breaks from svpluscnv containing",object@param$datatype,"breakpoints: 19 | \nNumber of samples=",length(object@burden), 20 | "\nTotal number of breakpoints =",nrow(object@breaks))) 21 | }) 22 | 23 | 24 | #' Identify CNV breakpoints 25 | #' 26 | #' Identify CNV breakpoints filtered by the change in copy number log-ratio between contiguous segments 27 | #' 28 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv 29 | #' @param fc.pct (numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents a fold change of 0.8 or 1.2 30 | #' @param break.width (numeric) the maximum distance between a segment end and the subsequent segment start positions beyond which breakpoints are discarded 31 | #' @param min.cnv.size (numeric) The minimun segment size (in base pairs) to include in the analysis 32 | #' @param min.num.probes (numeric) The minimun number of probes per segment to include in the analysis 33 | #' @param chrlist (character) list of chromosomes to include chr1, chr2, etc... 34 | #' @param low.cov (data.frame) a data.frame (chr, start, end) indicating low coverage regions to exclude from the analysis 35 | #' @param clean.brk (numeric) identical breakpoints across multiple samples tend to be artifacts; remove breaks > N 36 | #' @param verbose (logical) whether to return 37 | #' @return an instance of the class 'breaks' containing breakpoint and breakpoint burden information 38 | #' @keywords CNV, segmentation 39 | #' @export 40 | #' @examples 41 | #' 42 | #' # initialized CNV data 43 | #' cnv <- validate.cnv(segdat_lung_ccle) 44 | #' 45 | #' cnv.breaks(cnv) 46 | #' 47 | 48 | 49 | cnv.breaks <- function(cnv, 50 | fc.pct = 0.2, 51 | break.width = 10000, 52 | min.cnv.size = NULL, 53 | min.num.probes = NULL, 54 | chrlist = NULL, 55 | low.cov = NULL, 56 | clean.brk = NULL, 57 | verbose = TRUE){ 58 | 59 | 60 | stopifnot(cnv@type == "cnv") 61 | cnvdat <- cnv@data 62 | 63 | if(is.null(chrlist)) chrlist <- unique(cnvdat$chrom) 64 | chrlist <- chr.sort(chrlist) 65 | 66 | brk.burden <- rep(0,length(unique(cnvdat$sample))) 67 | names(brk.burden) <- unique(cnvdat$sample) 68 | 69 | if(!is.null(min.cnv.size)) cnvdat <- cnvdat[which(cnvdat$end - cnvdat$start >= min.cnv.size),] 70 | if(!is.null(min.num.probes)) cnvdat <- cnvdat[which(cnvdat$probes >= min.num.probes),] 71 | 72 | lastrow <- nrow(cnvdat) 73 | pos <- round(apply(cbind(cnvdat[2:(lastrow),"start"], cnvdat[1:(lastrow-1),"end"]),1,mean)) 74 | chrom <- cnvdat[2:(lastrow),"chrom"] 75 | sample <- cnvdat[2:(lastrow),"sample"] 76 | width <- cnvdat[2:(lastrow),"start"] - cnvdat[1:(lastrow-1),"end"] 77 | FC <- (2^cnvdat[1:(lastrow-1),"segmean"]) / (2^cnvdat[2:lastrow,"segmean"]) 78 | uid <- paste("brk_",createRandomString(nrow(cnvdat)-1,8),sep="") 79 | breakpoints <- data.table(sample,chrom,pos,width,FC,uid) 80 | colnames(breakpoints) <- c("sample","chrom","pos","width","FC","uid") 81 | 82 | break_idx <- c(which( log2(FC) >= log2(1+fc.pct)),which( log2(FC) < log2(1 - fc.pct))) 83 | 84 | samechr <- which(apply(cbind(cnvdat[1:(lastrow-1),"chrom"],cnvdat[2:(lastrow),"chrom"]),1,anyDuplicated) == 2) 85 | 86 | samesample <- which(apply(cbind(cnvdat[1:(lastrow-1),"sample"],cnvdat[2:(lastrow),"sample"]),1,anyDuplicated) == 2) 87 | 88 | if(is.null(break.width)) break.width <- Inf 89 | brwidthin <- which(width < break.width) 90 | 91 | breakpoints <- breakpoints[Reduce(intersect, list(break_idx,samechr,samesample,brwidthin)),] 92 | 93 | 94 | if(!is.null(low.cov)){ 95 | message("Filtering breakpoints in low coverage regiomns") 96 | colnames(low.cov) <- c("chrom","start","end") 97 | low_cov_GR = with(low.cov, GRanges(chrom, IRanges(start=start, end=end))) 98 | breakpoints_GR = with(breakpoints, GRanges(chrom, IRanges(start=start, end=end))) 99 | overlapgr <- GenomicAlignments::findOverlaps(breakpoints_GR,low_cov_GR,ignore.strand=TRUE) 100 | breakpoints <- breakpoints[setdiff(1:nrow(breakpoints),queryHits(overlapgr)),] 101 | } 102 | 103 | if(!is.null(clean.brk)){ 104 | breakids <- unite(breakpoints[,c(2:4)],"newcol")$newcol 105 | breakids.freq <- sort(table(breakids),decreasing=TRUE) 106 | breakpoints <- breakpoints[which(breakids %in% names(which(breakids.freq < clean.brk))),] 107 | } 108 | 109 | brk.burden.sub <- table(breakpoints$sample) 110 | brk.burden[names(brk.burden.sub)] <- brk.burden.sub 111 | 112 | return(breaks(breaks=breakpoints, 113 | burden=brk.burden, 114 | param=list( 115 | datatype=cnv@type, 116 | fc.pct = fc.pct, 117 | min.cnv.size = min.cnv.size, 118 | min.num.probes=min.num.probes, 119 | low.cov=low.cov, 120 | clean.brk=clean.brk 121 | ) 122 | ) 123 | ) 124 | } 125 | 126 | 127 | 128 | #' Identify SVC breakpoints 129 | #' 130 | #' Transform structural varian (SVC) data.frame into a 'breaks' object 131 | #' 132 | #' @param svc (S4) an object of class svcnvio containing data type 'svc' initialized by validate.svc 133 | #' @param chrlist (character) list of chromosomes to include chr1, chr2, etc... 134 | #' @param low.cov (data.table) a data.table (chrom, start, end) indicating low coverage regions to exclude from the analysis 135 | #' @return an instance of the class 'breaks' containing breakpoint and breakpoint burden information 136 | #' @keywords Structural variants 137 | #' @export 138 | #' @examples 139 | #' 140 | #' ## Obtain breakpoints from SV calls data 141 | #' svc <- validate.svc(svdat_lung_ccle) 142 | #' 143 | #' svc.breaks(svc) 144 | 145 | 146 | 147 | svc.breaks <- function(svc, chrlist=NULL,low.cov=NULL){ 148 | 149 | stopifnot(svc@type == "svc") 150 | 151 | if(!is.null(chrlist) ){ 152 | svcdat <- svc@data[intersect(which(svc@data$chrom1 %in% chrlist),which(svc@data$chrom2 %in% chrlist))] 153 | }else{ 154 | svcdat <- svc@data 155 | } 156 | stopifnot(nrow(svcdat) > 0) 157 | 158 | 159 | brk.burden <- rep(0,length(unique(svcdat$sample))) 160 | names(brk.burden) <- unique(svcdat$sample) 161 | 162 | 163 | uid<- paste("brk_",createRandomString(nrow(svcdat)*2,8),sep="") 164 | svcdat.breaks <- data.table(c(svcdat$sample,svcdat$sample), 165 | c(svcdat$chrom1,svcdat$chrom2), 166 | c(svcdat$pos1,svcdat$pos2), 167 | c(svcdat$strand1,svcdat$strand2), 168 | c(svcdat$svclass,svcdat$svclass), 169 | c(svcdat$uid,svcdat$uid), 170 | uid) 171 | 172 | colnames(svcdat.breaks) <- c("sample","chrom","pos","strand","svclass","svcuid","uid") 173 | if(!is.null(low.cov)){ 174 | low.cov.df <- data.table(low.cov[,1:3]) 175 | colnames(low.cov.df) <- c("chrom","start","end") 176 | 177 | svc_ranges <- with(svcdat.breaks, GRanges(chrom, IRanges(start=pos, end=pos))) 178 | low.cov_ranges <- with(low.cov.df, GRanges(chrom, IRanges(start=start, end=end))) 179 | 180 | low.cov_ranges = GenomicAlignments::findOverlaps(svc_ranges,low.cov_ranges) 181 | 182 | svcdat.breaks <- svcdat.breaks[which(!svcdat.breaks$id %in% queryHits(low.cov_ranges)),] 183 | }else{ 184 | svcdat.breaks <- svcdat.breaks 185 | } 186 | 187 | brk.burden.sub <- table(svcdat.breaks$sample) 188 | brk.burden[names(brk.burden.sub)] <- brk.burden.sub 189 | 190 | 191 | return(breaks(breaks=svcdat.breaks, 192 | burden=brk.burden, 193 | param=list( 194 | datatype=svc@type, 195 | low.cov=low.cov 196 | ) 197 | ) 198 | ) 199 | 200 | } 201 | 202 | 203 | 204 | 205 | #' Breakpoint density map 206 | #' 207 | #' Generating a genomic map based on a defined bin size and sliding window and counts the number of breakpoints mapped onto each bin. This function is used internally by svpluscnv::shattered.regions and svpluscnv::shattered.regions.cnv 208 | #' 209 | #' @param brk (breaks) An instance of the class 'breaks' obtained from CNV segmentation data (svpluscnv::cnv.breaks) or Structural Variant calls (svpluscnv::svc.breaks). 210 | #' @param chr.lim (data.frame) 3 column table (chrom, begin, end) indicating the chromosome most distal coordinates with coverage. Also returned by the function svpluscnv::chromosome.limit.coords. 211 | #' @param genome.v (hg19 or hg38) reference genome version to draw chromosome limits and centromeres 212 | #' @param window.size (numeric) size in megabases of the genmome bin onto which breakpoints will be mapped 213 | #' @param slide.size (numeric) size in megabases of the sliding genomic window; if slide.size < window.size the genomic bins will overlap 214 | #' @param verbose (logical) whether to return internal messages 215 | #' @return a matrix of samples (rows) and genomic bins (cols) qith the number of breakpoints mapped in heach cell 216 | #' @keywords CNV, segmentation 217 | #' @export 218 | #' @examples 219 | #' 220 | #' # initialize CNV data 221 | #' cnv <- validate.cnv(segdat_lung_ccle) 222 | #' 223 | #' # obtain CNV breakpoints 224 | #' brk <- cnv.breaks(cnv) 225 | #' 226 | #' break.density(brk) 227 | 228 | 229 | break.density <- function(brk, 230 | chr.lim=NULL, 231 | genome.v = "hg19", 232 | window.size = 10, 233 | slide.size=2, 234 | verbose=TRUE){ 235 | if(is.null(chr.lim)){ 236 | chr.lim<- d3gb.chr.lim(genome.v=genome.v) 237 | }else{ 238 | stopifnot(ncol(chr.lim) == 3) 239 | } 240 | 241 | chr.begin <- chr.lim$begin 242 | chr.end <- chr.lim$end 243 | names(chr.begin) <- names(chr.end) <- chr.lim$chrom 244 | 245 | # make sure both chr.lim and breaks have same chromosome names 246 | seqnames <- intersect(chr.lim$chrom,brk@breaks$chr) 247 | stopifnot(length(seqnames) > 0) 248 | 249 | # a template vector to save breakpoint counts 250 | templatevector <- brk@burden 251 | templatevector[]<-0 252 | 253 | WS <- window.size * 1e+6 254 | SS <- slide.size * 1e+6 255 | offset <- window.size/slide.size 256 | 257 | chrlist <- chr.sort(chr.lim$chrom) 258 | 259 | # count breaks for each chromosome for each fragment 260 | fragment <- list() 261 | for(chr in chrlist){ 262 | 263 | if(verbose) cat("\r",chr) 264 | 265 | chr_breaks <- brk@breaks[which(brk@breaks$chrom == chr),] 266 | frag <- seq(chr.begin[chr],chr.end[chr]+SS,SS) 267 | 268 | for(i in (1+offset):length(frag)){ 269 | start <- frag[i - offset] 270 | stop <- frag[i] 271 | fragment[[paste(chr,start,stop)]] <- templatevector 272 | break.position <- chr_breaks$pos 273 | res_bp <- table(chr_breaks[intersect(which(break.position > start),which(break.position < stop)),"sample"]) 274 | fragment[[paste(chr,start,stop)]][names(res_bp)] <- res_bp 275 | } 276 | } 277 | if(verbose) cat("\nDone!\n") 278 | 279 | return( do.call(cbind,fragment)) 280 | 281 | } 282 | 283 | 284 | 285 | 286 | #' Breakpoint matching 287 | #' 288 | #' Match common breakpoints from two different datasets or data types based on their co-localization in the genome. 289 | #' 290 | #' @param brk1 (S4) an object of class breaks as returned by `svc.breaks` and `cnv.breaks` 291 | #' @param brk2 (S4) an object of class breaks as returned by `svc.breaks` and `cnv.breaks` to compare against brk1 292 | #' @param maxgap (numeric) distance (base pairs) limit for nreakpoints to be consider colocalized 293 | #' @param plot (logical) whether to plot into open device 294 | #' @param verbose (logical) whether to return internal messages 295 | #' @return an object containing co-localizing breakpoints from two input 'breaks' 296 | #' @keywords CNV, SV, genomic breakpoints 297 | #' @export 298 | #' @examples 299 | #' 300 | #' # initialize CNV and SVC data 301 | #' cnv <- validate.cnv(segdat_lung_ccle) 302 | #' svc <- validate.svc(svdat_lung_ccle) 303 | #' 304 | #' ## Obtain breakpoints from CNV and SVC 305 | #' brk1 <- cnv.breaks(cnv) 306 | #' brk2 <- svc.breaks(svc) 307 | #' 308 | #' common.brk <- match.breaks(brk1, brk2) 309 | #' 310 | 311 | 312 | 313 | match.breaks <- function(brk1, 314 | brk2, 315 | maxgap=100000, 316 | verbose=FALSE, 317 | plot=TRUE){ 318 | 319 | common_samples <- intersect(names(brk1@burden),names(brk2@burden)) 320 | stopifnot(length(common_samples) > 0) 321 | 322 | brk1_match <- brk2_match <- res <- list() 323 | for(id in common_samples){ 324 | 325 | brk1_i <- brk1@breaks[which(brk1@breaks$sample == id),] 326 | brk_ranges1 <- with(brk1_i, GRanges(chrom, IRanges(start=pos, end=pos))) 327 | 328 | brk2_i <- brk2@breaks[which(brk2@breaks$sample == id),] 329 | brk_ranges2 <- with(brk2_i, GRanges(chrom, IRanges(start=pos, end=pos))) 330 | 331 | 332 | options(warn=-1) 333 | seg_seg = GenomicAlignments::findOverlaps(brk_ranges1, brk_ranges2, maxgap=maxgap) 334 | options(warn=0) 335 | 336 | brk_match1 <- sort(unique(queryHits(seg_seg))) 337 | brk_match2 <- sort(unique(subjectHits(seg_seg))) 338 | 339 | res[[id]] <- data.table(id,length(brk_match1), nrow(brk1_i), length(brk_match2), nrow(brk2_i)) 340 | colnames(res[[id]]) <- c("sample","matched.brk1", "total.brk1", "matched.brk2", "total.brk2") 341 | 342 | brk1_match[[id]] <- brk1_i[brk_match1,] 343 | brk2_match[[id]] <- brk2_i[brk_match2,] 344 | } 345 | 346 | restab <- do.call(rbind,res) 347 | 348 | if(plot){ 349 | def.par <- par(no.readonly = TRUE) 350 | par(mfrow=c(2,1)) 351 | restab <- restab[order(restab$total.brk2)] 352 | m2 <- sprintf("%.1f",100*mean(na.omit(restab$matched.brk2/restab$total.brk2))) 353 | barplot(rbind(restab$matched.brk2, restab$total.brk2 - restab$matched.brk2), 354 | border=NA,las=2,xlab="",horiz=FALSE,cex.main=.7,cex.names=.4, 355 | names=restab$sample,ylab="#samples" ) 356 | legend("top",paste(brk2@param$datatype," breaks matched by ", 357 | brk1@param$datatype, 358 | " breaks\n","Average = ",m2,"%",sep=""),bty='n') 359 | grid(ny=NULL,nx=NA) 360 | 361 | restab <- restab[order(restab$total.brk1)] 362 | m2 <- sprintf("%.1f",100*mean(na.omit(restab$matched.brk1/restab$total.brk1))) 363 | barplot(rbind(restab$matched.brk1, restab$total.brk1 - restab$matched.brk1), 364 | border=NA,las=2,xlab="",horiz=FALSE,cex.main=.7,cex.names=.4, 365 | names=restab$sample,ylab="#samples") 366 | legend("top",paste(brk1@param$datatype, 367 | " breaks matched by ",brk2@param$datatype, 368 | " breaks\n","Average = ",m2,"%",sep=""),bty='n') 369 | grid(ny=NULL,nx=NA) 370 | par(def.par) 371 | } 372 | 373 | return(list( 374 | brk1_match = do.call(rbind,brk1_match), 375 | brk2_match = do.call(rbind,brk2_match), 376 | restab= restab)) 377 | } 378 | 379 | --------------------------------------------------------------------------------