├── .DS_Store
├── data
    ├── .DS_Store
    ├── svpluscnv_data.rda
    └── datalist
├── figure
    ├── .DS_Store
    ├── plot_readme_1-1.png
    ├── plot_readme_10-1.png
    ├── plot_readme_2-1.png
    ├── plot_readme_3-1.png
    ├── plot_readme_3b-1.png
    ├── plot_readme_3c-1.png
    ├── plot_readme_3d-1.png
    ├── plot_readme_4-1.png
    ├── plot_readme_5-1.png
    ├── plot_readme_6-1.png
    ├── plot_readme_7-1.png
    ├── plot_readme_8-1.png
    └── plot_readme_9-1.png
├── vignettes
    ├── .DS_Store
    └── figure
    │   ├── plot_vignette_1-1.png
    │   ├── plot_vignette_2-1.png
    │   ├── plot_vignette_3-1.png
    │   └── plot_vignette_4-1.png
├── man
    ├── .Rapp.history
    ├── nbl_svdat.Rd
    ├── cnv_blacklist_regions.Rd
    ├── nbl_segdat.Rd
    ├── svdat_lung_ccle.Rd
    ├── segdat_lung_ccle.Rd
    ├── refseq_hg19.Rd
    ├── refseq_hg38.Rd
    ├── extract.bins-methods.Rd
    ├── IQM.Rd
    ├── freq.threshold-methods.Rd
    ├── chr.sort.Rd
    ├── refSeqDat-class.Rd
    ├── IQSD.Rd
    ├── d3gb.chr.lim.Rd
    ├── map2color.Rd
    ├── genecnv-class.Rd
    ├── get.genesgr.Rd
    ├── ave.segmean.Rd
    ├── svcnvio-class.Rd
    ├── gene.symbol.info-methods.Rd
    ├── hbd.mat-methods.Rd
    ├── chromosome.limit.coords.Rd
    ├── cnvfreq-class.Rd
    ├── break.iqm-class.Rd
    ├── null.freq-class.Rd
    ├── createRandomString.Rd
    ├── validate.cnv.Rd
    ├── med.segmean.Rd
    ├── merge2lists.Rd
    ├── breaks-class.Rd
    ├── upgr.Rd
    ├── dngr.Rd
    ├── svc.breaks.Rd
    ├── geneBreakOverlap.Rd
    ├── chr.arm.cnv.Rd
    ├── hot.spot.samples.Rd
    ├── amp.del.Rd
    ├── shattered.eval.Rd
    ├── segment.gap.Rd
    ├── bed2chromo.reg.Rd
    ├── pct.genome.changed.Rd
    ├── get.chr.bins.Rd
    ├── match.breaks.Rd
    ├── gene.cnv.Rd
    ├── validate.svc.Rd
    ├── shattered.map.plot.Rd
    ├── clean.cnv.artifact.Rd
    ├── break.density.Rd
    ├── cnv.breaks.Rd
    ├── freq.p.test.Rd
    ├── circ.wg.plot.Rd
    ├── cnv.freq.Rd
    ├── break.annot-class.Rd
    ├── gene.track.view.Rd
    ├── chromo.regs-class.Rd
    ├── svc.break.annot.Rd
    ├── shattered.regions.cnv.Rd
    ├── circ.chromo.plot.Rd
    ├── brk.burden.iqm.Rd
    ├── sv.model.view.Rd
    ├── cnv.break.annot.Rd
    └── shattered.regions.Rd
├── svpluscnv-manual.pdf
├── R
    ├── get.genesgr.r
    ├── pct.genome.changed.r
    ├── hot.spot.samples.R
    ├── segment.means.r
    ├── chr.arm.cnv.r
    ├── svpluscnv.data.r
    ├── shattered.map.plot.r
    ├── gene.cnv.r
    ├── freq.p.test.r
    ├── internal_functions.r
    ├── clean.cnv.artifact.r
    ├── validate.input.data.r
    ├── gene.track.view.r
    ├── cnv.freq.plot.r
    ├── shattered.regions.cnv.r
    ├── brk.burden.iqm.r
    ├── circular.plot.r
    ├── sv.model.view.r
    └── breakpoint.density.r
├── DESCRIPTION
└── NAMESPACE


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/.DS_Store


--------------------------------------------------------------------------------
/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/data/.DS_Store


--------------------------------------------------------------------------------
/figure/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/.DS_Store


--------------------------------------------------------------------------------
/vignettes/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/vignettes/.DS_Store


--------------------------------------------------------------------------------
/man/.Rapp.history:
--------------------------------------------------------------------------------
1 | load("/Users/lopezg16/Box Sync/git/svpluscnv/data/hg19.rda")
2 | refseq_hg19
3 | 


--------------------------------------------------------------------------------
/svpluscnv-manual.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/svpluscnv-manual.pdf


--------------------------------------------------------------------------------
/data/svpluscnv_data.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/data/svpluscnv_data.rda


--------------------------------------------------------------------------------
/figure/plot_readme_1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_1-1.png


--------------------------------------------------------------------------------
/figure/plot_readme_10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_10-1.png


--------------------------------------------------------------------------------
/figure/plot_readme_2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_2-1.png


--------------------------------------------------------------------------------
/figure/plot_readme_3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_3-1.png


--------------------------------------------------------------------------------
/figure/plot_readme_3b-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_3b-1.png


--------------------------------------------------------------------------------
/figure/plot_readme_3c-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_3c-1.png


--------------------------------------------------------------------------------
/figure/plot_readme_3d-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_3d-1.png


--------------------------------------------------------------------------------
/figure/plot_readme_4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_4-1.png


--------------------------------------------------------------------------------
/figure/plot_readme_5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_5-1.png


--------------------------------------------------------------------------------
/figure/plot_readme_6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_6-1.png


--------------------------------------------------------------------------------
/figure/plot_readme_7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_7-1.png


--------------------------------------------------------------------------------
/figure/plot_readme_8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_8-1.png


--------------------------------------------------------------------------------
/figure/plot_readme_9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/figure/plot_readme_9-1.png


--------------------------------------------------------------------------------
/vignettes/figure/plot_vignette_1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/vignettes/figure/plot_vignette_1-1.png


--------------------------------------------------------------------------------
/vignettes/figure/plot_vignette_2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/vignettes/figure/plot_vignette_2-1.png


--------------------------------------------------------------------------------
/vignettes/figure/plot_vignette_3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/vignettes/figure/plot_vignette_3-1.png


--------------------------------------------------------------------------------
/vignettes/figure/plot_vignette_4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ccbiolab/svpluscnv/HEAD/vignettes/figure/plot_vignette_4-1.png


--------------------------------------------------------------------------------
/data/datalist:
--------------------------------------------------------------------------------
1 | cnv_blacklist_regions
2 | hg19: refseq_hg19
3 | hg38: refseq_hg38
4 | nbl_target_cnv: nbl_segdat
5 | nbl_target_sv: nbl_svdat
6 | segdat_lung_ccle
7 | svdat_lung_ccle
8 | 


--------------------------------------------------------------------------------
/man/nbl_svdat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/svpluscnv.data.r
 3 | \docType{data}
 4 | \name{nbl_svdat}
 5 | \alias{nbl_svdat}
 6 | \title{TARGET Neuroblastoma SVC}
 7 | \format{
 8 | An object of class \code{data.frame} with 7366 rows and 8 columns.
 9 | }
10 | \usage{
11 | nbl_svdat
12 | }
13 | \description{
14 | TARGET CGI structural variants: https://target-data.nci.nih.gov/
15 | }
16 | \keyword{SVs}
17 | 


--------------------------------------------------------------------------------
/man/cnv_blacklist_regions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/svpluscnv.data.r
 3 | \docType{data}
 4 | \name{cnv_blacklist_regions}
 5 | \alias{cnv_blacklist_regions}
 6 | \title{Low coverage regions}
 7 | \format{
 8 | An object of class \code{data.frame} with 60 rows and 3 columns.
 9 | }
10 | \usage{
11 | cnv_blacklist_regions
12 | }
13 | \description{
14 | Low coverage regions
15 | }
16 | \keyword{CNV}
17 | \keyword{segmentation}
18 | 


--------------------------------------------------------------------------------
/man/nbl_segdat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/svpluscnv.data.r
 3 | \docType{data}
 4 | \name{nbl_segdat}
 5 | \alias{nbl_segdat}
 6 | \title{TARGET Neuroblastoma CNV}
 7 | \format{
 8 | An object of class \code{data.frame} with 17680 rows and 6 columns.
 9 | }
10 | \usage{
11 | nbl_segdat
12 | }
13 | \description{
14 | TARGET CNV segmentation: https://target-data.nci.nih.gov/
15 | }
16 | \keyword{CNV}
17 | \keyword{SVs}
18 | \keyword{segmentation,}
19 | 


--------------------------------------------------------------------------------
/man/svdat_lung_ccle.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/svpluscnv.data.r
 3 | \docType{data}
 4 | \name{svdat_lung_ccle}
 5 | \alias{svdat_lung_ccle}
 6 | \title{Lung CCLE SVC data}
 7 | \format{
 8 | An object of class \code{data.frame} with 23040 rows and 8 columns.
 9 | }
10 | \usage{
11 | svdat_lung_ccle
12 | }
13 | \description{
14 | CCLE translocation data from LUNG tissue cell lines (DepMap): https://depmap.org/portal/download/
15 | }
16 | \keyword{SVs}
17 | 


--------------------------------------------------------------------------------
/man/segdat_lung_ccle.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/svpluscnv.data.r
 3 | \docType{data}
 4 | \name{segdat_lung_ccle}
 5 | \alias{segdat_lung_ccle}
 6 | \title{Lung CCLE CNV data}
 7 | \format{
 8 | An object of class \code{data.frame} with 134358 rows and 6 columns.
 9 | }
10 | \usage{
11 | segdat_lung_ccle
12 | }
13 | \description{
14 | CCLE CNV segmentation data from LUNG tissue cell lines (DepMap): https://depmap.org/portal/download/
15 | }
16 | \keyword{CNV}
17 | \keyword{segmentation}
18 | 


--------------------------------------------------------------------------------
/man/refseq_hg19.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/svpluscnv.data.r
 3 | \docType{data}
 4 | \name{refseq_hg19}
 5 | \alias{refseq_hg19}
 6 | \title{Reference transcript and exon annotations for hg19}
 7 | \format{
 8 | An object of class \code{refSeqDat} of length 1.
 9 | }
10 | \usage{
11 | refseq_hg19
12 | }
13 | \description{
14 | refSeq annotations for hg19 version from UCSC (http://genome.ucsc.edu/cgi-bin/hgTables)
15 | }
16 | \keyword{exons}
17 | \keyword{genes,}
18 | \keyword{transcripts,}
19 | 


--------------------------------------------------------------------------------
/man/refseq_hg38.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/svpluscnv.data.r
 3 | \docType{data}
 4 | \name{refseq_hg38}
 5 | \alias{refseq_hg38}
 6 | \title{Reference transcript and exon annotations for hg38}
 7 | \format{
 8 | An object of class \code{refSeqDat} of length 1.
 9 | }
10 | \usage{
11 | refseq_hg38
12 | }
13 | \description{
14 | refSeq annotations for hg38 version from UCSC (http://genome.ucsc.edu/cgi-bin/hgTables)
15 | }
16 | \keyword{exons}
17 | \keyword{genes,}
18 | \keyword{transcripts,}
19 | 


--------------------------------------------------------------------------------
/man/extract.bins-methods.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/shattered.regions.r
 3 | \docType{methods}
 4 | \name{extract.bins}
 5 | \alias{extract.bins}
 6 | \alias{extract.bins,chromo.regs-method}
 7 | \title{Return the genomicRanges object  containing the genomic bins}
 8 | \usage{
 9 | extract.bins(object)
10 | 
11 | \S4method{extract.bins}{chromo.regs}(object)
12 | }
13 | \arguments{
14 | \item{object}{(chromo.regs) An object of class chromo.regs}
15 | }
16 | \value{
17 | an genomicRanges object with defined genomic bins
18 | }
19 | \description{
20 | Return the genomicRanges object  containing the genomic bins
21 | }
22 | 


--------------------------------------------------------------------------------
/man/IQM.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal_functions.r
 3 | \name{IQM}
 4 | \alias{IQM}
 5 | \title{Inter-quantile mean}
 6 | \usage{
 7 | IQM(x, lowQ = 0.1, upQ = 0.9)
 8 | }
 9 | \arguments{
10 | \item{x}{numeric vector to compute interquantile average}
11 | 
12 | \item{lowQ}{lower quantile}
13 | 
14 | \item{upQ}{upper quantile}
15 | }
16 | \value{
17 | (numeric) the IQM value
18 | }
19 | \description{
20 | Obtains interquantile mean for a defined 'x' vector and both lower and upper quantiles
21 | }
22 | \examples{
23 | 
24 | x <- rnorm(100)
25 | IQM(x)
26 | }
27 | \keyword{interquartile}
28 | \keyword{statistics,}
29 | 


--------------------------------------------------------------------------------
/man/freq.threshold-methods.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/freq.p.test.r
 3 | \docType{methods}
 4 | \name{freq.threshold}
 5 | \alias{freq.threshold}
 6 | \alias{freq.threshold,null.freq-method}
 7 | \title{Return frequency threshold from null.freq object}
 8 | \usage{
 9 | freq.threshold(object)
10 | 
11 | \S4method{freq.threshold}{null.freq}(object)
12 | }
13 | \arguments{
14 | \item{object}{(null.freq) An object of class null.freq}
15 | }
16 | \value{
17 | an instance of the class 'chromo.regs' containing breakpoint mapping onto genes
18 | }
19 | \description{
20 | Return frequency threshold from null.freq object
21 | }
22 | 


--------------------------------------------------------------------------------
/man/chr.sort.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validate.input.data.r
 3 | \name{chr.sort}
 4 | \alias{chr.sort}
 5 | \title{Chromosome ordering}
 6 | \usage{
 7 | chr.sort(chrlist)
 8 | }
 9 | \arguments{
10 | \item{chrlist}{(character): a vector containing chromosome names (chr1, chr2...chrX,chrY  )}
11 | }
12 | \value{
13 | a character vector of sorted chromosomes
14 | }
15 | \description{
16 | A function to order a list of chromosomes
17 | }
18 | \examples{
19 | 
20 | chrlist <- paste("chr",c("X","Y",sample(1:22)),sep="")
21 | chr_sorted <- chr.sort(chrlist)
22 | }
23 | \keyword{CNV,}
24 | \keyword{genes}
25 | \keyword{segmentation,}
26 | 


--------------------------------------------------------------------------------
/man/refSeqDat-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/svpluscnv.data.r
 3 | \docType{class}
 4 | \name{refSeqDat-class}
 5 | \alias{refSeqDat-class}
 6 | \alias{refSeqDat}
 7 | \title{Data class refSeqDat}
 8 | \arguments{
 9 | \item{data}{(data.table): transcript information}
10 | 
11 | \item{exonStarts}{(list): every transcript exonic end position}
12 | 
13 | \item{genome.v}{(character): the genome version encoding transcript data}
14 | }
15 | \value{
16 | an instance of the class 'refSeqDat' containing transcript exonic coordinates
17 | }
18 | \description{
19 | Class to store refseq data from UCSC containing exon level info for known transcripts
20 | }
21 | 


--------------------------------------------------------------------------------
/man/IQSD.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal_functions.r
 3 | \name{IQSD}
 4 | \alias{IQSD}
 5 | \title{Inter-quantile standard deviation}
 6 | \usage{
 7 | IQSD(x, lowQ = 0.1, upQ = 0.9)
 8 | }
 9 | \arguments{
10 | \item{x}{numeric vector to compute interquantile standard deviation}
11 | 
12 | \item{lowQ}{lower quantile}
13 | 
14 | \item{upQ}{upper quantile}
15 | }
16 | \value{
17 | (numeric) the IQSD value
18 | }
19 | \description{
20 | Obtains inter quantile standard deviation for a defined 'x' vector and both lower and upper quantiles
21 | }
22 | \examples{
23 | 
24 | x <- rnorm(100)
25 | IQSD(x)
26 | }
27 | \keyword{interquartile}
28 | \keyword{statistics,}
29 | 


--------------------------------------------------------------------------------
/man/d3gb.chr.lim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal_functions.r
 3 | \name{d3gb.chr.lim}
 4 | \alias{d3gb.chr.lim}
 5 | \title{Chromosome start and end}
 6 | \usage{
 7 | d3gb.chr.lim(genome.v)
 8 | }
 9 | \arguments{
10 | \item{genome.v}{(character) reference genome version to retrieve gene annotations (hg19 or GRCh37 and hg38 or GRCh38)}
11 | }
12 | \value{
13 | (data.table) a table containing start and end positions for each chromosome
14 | }
15 | \description{
16 | Obtains a chromosome start and end positions from a reference genome version
17 | }
18 | \examples{
19 | 
20 | d3gb.chr.lim(genome.v="hg19")
21 | 
22 | }
23 | \keyword{CNV,}
24 | \keyword{genes}
25 | \keyword{segmentation,}
26 | 


--------------------------------------------------------------------------------
/man/map2color.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal_functions.r
 3 | \name{map2color}
 4 | \alias{map2color}
 5 | \title{Color map from numeric vector}
 6 | \usage{
 7 | map2color(x, pal = NULL, limits = NULL)
 8 | }
 9 | \arguments{
10 | \item{x}{numeric vector}
11 | 
12 | \item{pal}{color palette}
13 | 
14 | \item{limits}{numeric limit fr color mapping}
15 | }
16 | \value{
17 | a color vector graded according to x
18 | }
19 | \description{
20 | Produces a vector of colors based on a given palette. The colors are defined by the inpuit vector
21 | }
22 | \examples{
23 | 
24 | x <- rnorm(100)
25 | x_color <- map2color(x)
26 | head(x_color)
27 | }
28 | \keyword{color,}
29 | \keyword{number}
30 | 


--------------------------------------------------------------------------------
/man/genecnv-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gene.cnv.r
 3 | \docType{class}
 4 | \name{genecnv-class}
 5 | \alias{genecnv-class}
 6 | \alias{genecnv}
 7 | \title{Data class cnvmat}
 8 | \arguments{
 9 | \item{cnvmat}{(data.frame): matrix containing average CNV per gene (rows) for each sample (columns)}
10 | 
11 | \item{genesgr}{(S4): a GenomicRanges object with genomic feature annotations such as gene coordinates}
12 | 
13 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv}
14 | 
15 | \item{param}{(list):}
16 | }
17 | \value{
18 | an instance of the class 'genecnv' containing gene level copy number info
19 | }
20 | \description{
21 | Class to store breakpoint annotations
22 | }
23 | 


--------------------------------------------------------------------------------
/man/get.genesgr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get.genesgr.r
 3 | \name{get.genesgr}
 4 | \alias{get.genesgr}
 5 | \title{Genes GRanges}
 6 | \usage{
 7 | get.genesgr(genome.v = "hg19", chrlist = NULL)
 8 | }
 9 | \arguments{
10 | \item{genome.v}{(hg19 or GRCh37 and hg38 or GRCh38) reference genome version to retrieve gene annotations}
11 | 
12 | \item{chrlist}{(character)}
13 | }
14 | \value{
15 | a GRanges class object from the specified human genome version
16 | }
17 | \description{
18 | Retrieves a GRanges object containinng gene annotations for an specified genome version
19 | }
20 | \examples{
21 | 
22 | get.genesgr(genome.v = "hg19",chrlist=NULL)
23 | 
24 | }
25 | \keyword{CNV,}
26 | \keyword{genes}
27 | \keyword{segmentation,}
28 | 


--------------------------------------------------------------------------------
/man/ave.segmean.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/segment.means.r
 3 | \name{ave.segmean}
 4 | \alias{ave.segmean}
 5 | \title{Average sample CNV}
 6 | \usage{
 7 | ave.segmean(cnv)
 8 | }
 9 | \arguments{
10 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv}
11 | }
12 | \value{
13 | (numeric) a vector containing the weighted average logR from segmented data
14 | }
15 | \description{
16 | Obtain the weighted average segment mean log2 ratios from each sample within a CNV segmentaton data.frame
17 | }
18 | \examples{
19 | 
20 | ## validate input CNV data.frames
21 | cnv <- validate.cnv(segdat_lung_ccle)
22 | 
23 | ave_seg_mean <- ave.segmean(cnv)
24 | head(ave_seg_mean)
25 | }
26 | \keyword{CNV,}
27 | \keyword{segmentation}
28 | 


--------------------------------------------------------------------------------
/man/svcnvio-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validate.input.data.r
 3 | \docType{class}
 4 | \name{svcnvio-class}
 5 | \alias{svcnvio-class}
 6 | \alias{svcnvio}
 7 | \title{Data class svcnvio}
 8 | \arguments{
 9 | \item{data}{(data.table): cnv or svc data.table to be validated by 'validate.cnv' or 'validate.svc' respectivelly}
10 | 
11 | \item{type}{(character): the data type  "cnv" or "svc" defined by "validate.cnv" or "validate.svc" respectivelly}
12 | }
13 | \value{
14 | an instance of the class 'svcnvio' containing SV data derived from CNV or SVC data types;  A unique id (uid) column is also added
15 | }
16 | \description{
17 | Class to store CNV segmentation data
18 | }
19 | \seealso{
20 | Additional data format information in the man pages of validate.cnv and validate.svc
21 | }
22 | 


--------------------------------------------------------------------------------
/man/gene.symbol.info-methods.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/svpluscnv.data.r
 3 | \docType{methods}
 4 | \name{gene.symbol.info}
 5 | \alias{gene.symbol.info}
 6 | \alias{gene.symbol.info,refSeqDat-method}
 7 | \title{Return coordinates of an specified gene}
 8 | \usage{
 9 | gene.symbol.info(object, symbol)
10 | 
11 | \S4method{gene.symbol.info}{refSeqDat}(object, symbol)
12 | }
13 | \arguments{
14 | \item{object}{(refSeqDat) An object of class refSeqDat containing gene transcript mapping. svpluscnv includes two selfloaded objects: refseq_hg19 & refseq_hg38}
15 | 
16 | \item{symbol}{(character) a valid HGNC gene symbol included in the refseq object}
17 | }
18 | \value{
19 | A list containing chr, start, end coordinates
20 | }
21 | \description{
22 | Return coordinates of an specified gene
23 | }
24 | 


--------------------------------------------------------------------------------
/man/hbd.mat-methods.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/shattered.regions.r
 3 | \docType{methods}
 4 | \name{hbd.mat}
 5 | \alias{hbd.mat}
 6 | \alias{hbd.mat,chromo.regs-method}
 7 | \title{Return the binary matrix containing high confidence high-breakpoint-densityregion definitions}
 8 | \usage{
 9 | hbd.mat(object, conf = "hc")
10 | 
11 | \S4method{hbd.mat}{chromo.regs}(object, conf = "hc")
12 | }
13 | \arguments{
14 | \item{object}{(chromo.regs) An object of class chromo.regs}
15 | 
16 | \item{conf}{(character) Either "hc" for high confidence HBD or else include all}
17 | }
18 | \value{
19 | an instance of the class 'chromo.regs' containing breakpoint mapping onto genes
20 | }
21 | \description{
22 | Return the binary matrix containing high confidence high-breakpoint-densityregion definitions
23 | }
24 | 


--------------------------------------------------------------------------------
/man/chromosome.limit.coords.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validate.input.data.r
 3 | \name{chromosome.limit.coords}
 4 | \alias{chromosome.limit.coords}
 5 | \title{Chromosome limit map}
 6 | \usage{
 7 | chromosome.limit.coords(cnv)
 8 | }
 9 | \arguments{
10 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv}
11 | }
12 | \value{
13 | data.table indicating start and end mapped positions of each chromosome
14 | }
15 | \description{
16 | Obtain chromosome start and end positions based on mapped regions from CNV segmentation data
17 | }
18 | \examples{
19 | 
20 | ## validate input data.frame
21 | cnv <- validate.cnv(segdat_lung_ccle)
22 | 
23 | chr.lim <- chromosome.limit.coords(cnv)
24 | }
25 | \keyword{CNV,}
26 | \keyword{mapping}
27 | \keyword{segmentation,}
28 | 


--------------------------------------------------------------------------------
/man/cnvfreq-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cnv.freq.plot.r
 3 | \docType{class}
 4 | \name{cnvfreq-class}
 5 | \alias{cnvfreq-class}
 6 | \alias{cnvfreq}
 7 | \title{Data class cnvfreq}
 8 | \arguments{
 9 | \item{freqsum}{(data.table): the frequency of gains and losses in each defined genomic bin}
10 | 
11 | \item{chrlimits}{(data.frame): a table containing the chromosome limit coordinates and global genomic coordinates}
12 | 
13 | \item{bin.mat}{(numeric): a matrix of genomic bins versus samples}
14 | 
15 | \item{plot}{(graphical): a recorded plot object}
16 | 
17 | \item{param}{(list): a list of parametres provided}
18 | }
19 | \value{
20 | an instance of the class 'cnvfreq'
21 | }
22 | \description{
23 | Class to store breakpoint annotations in association with genomic features (e.g. gene loci)
24 | }
25 | 


--------------------------------------------------------------------------------
/man/break.iqm-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brk.burden.iqm.r
 3 | \docType{class}
 4 | \name{break.iqm-class}
 5 | \alias{break.iqm-class}
 6 | \alias{break.iqm}
 7 | \title{Data class break.iqm}
 8 | \arguments{
 9 | \item{summary}{(data.table): the frequency of gains and losses in each defined genomic bin}
10 | 
11 | \item{brk.mat}{(numeric): a matrix of genomic bins versus samples}
12 | 
13 | \item{chrlimits}{(data.frame): a table containing the chromosome limit coordinates and global genomic coordinates}
14 | 
15 | \item{plot}{(graphical): a recorded plot object}
16 | 
17 | \item{param}{(list): a list of parametres provided}
18 | }
19 | \value{
20 | an instance of the class 'cnvfreq'
21 | }
22 | \description{
23 | Class to store breakpoint annotations in association with genomic features (e.g. gene loci)
24 | }
25 | 


--------------------------------------------------------------------------------
/man/null.freq-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/freq.p.test.r
 3 | \docType{class}
 4 | \name{null.freq-class}
 5 | \alias{null.freq-class}
 6 | \alias{null.freq}
 7 | \title{Data class null.freq}
 8 | \arguments{
 9 | \item{freq.cut}{(numeric): the value from observed distribution that satisfies certain p-value cutoff}
10 | 
11 | \item{pvalues}{(numeric): a vector containing the total number of breakpoints in each sample}
12 | 
13 | \item{observed}{(numeric): vector of observed distribution}
14 | 
15 | \item{null}{(numeric): vector of null distribution}
16 | 
17 | \item{param}{(list): a list of parametres provided}
18 | }
19 | \value{
20 | an instance of the class 'freq.cut'
21 | }
22 | \description{
23 | Class to store observed and null distr. as well as ampirical corrected p-values associated with observed values
24 | }
25 | 


--------------------------------------------------------------------------------
/man/createRandomString.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal_functions.r
 3 | \name{createRandomString}
 4 | \alias{createRandomString}
 5 | \title{Unique random string generator}
 6 | \usage{
 7 | createRandomString(n = 1, strlen = 10)
 8 | }
 9 | \arguments{
10 | \item{n}{the number of unique random strings to return}
11 | 
12 | \item{strlen}{random string length}
13 | }
14 | \value{
15 | a vector of unique random character strings
16 | }
17 | \description{
18 | Generates n unique random character strings of a given length. Note that the length must be big enought in order to avoid offsetting the number n of strings requested
19 | }
20 | \examples{
21 | 
22 | # To ensure reproducibility make sure to set the seed
23 | set.seed(123456789)
24 | 
25 | createRandomString(1, 10)
26 | }
27 | \keyword{random}
28 | \keyword{string}
29 | 


--------------------------------------------------------------------------------
/man/validate.cnv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validate.input.data.r
 3 | \name{validate.cnv}
 4 | \alias{validate.cnv}
 5 | \title{Initialization of CNV data}
 6 | \usage{
 7 | validate.cnv(cnv.df)
 8 | }
 9 | \arguments{
10 | \item{cnv.df}{(data.frame) segmentation data with at least 6 columns: sample, chromosome, start, end, probes, segment_mean}
11 | }
12 | \value{
13 | an instance of the class 'svcnvio' containing segmentation data derived from CNV data type;  A unique id (uid) column is also added
14 | }
15 | \description{
16 | This function validates and reformats the CNV segmentation data type containing copy number log-ratios. It is used internaly by 'svpluscnv' functions that require this type of data.
17 | }
18 | \examples{
19 | 
20 | validate.cnv(segdat_lung_ccle)
21 | }
22 | \keyword{CNV,}
23 | \keyword{segmentation}
24 | 


--------------------------------------------------------------------------------
/man/med.segmean.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/segment.means.r
 3 | \name{med.segmean}
 4 | \alias{med.segmean}
 5 | \title{Median sample CNV}
 6 | \usage{
 7 | med.segmean(cnv)
 8 | }
 9 | \arguments{
10 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv}
11 | }
12 | \value{
13 | (numeric) a vector containing the median logR value of a segmented data.frame
14 | }
15 | \description{
16 | Obtain the median weighted segment mean from a segmentaton file; The weighted median refers to the logR that occupies a center of all segments ordered by their log ratio
17 | }
18 | \examples{
19 | 
20 | ## validate input CNV data.frames
21 | cnv <- validate.cnv(segdat_lung_ccle)
22 | 
23 | med_seg_mean <- med.segmean(cnv)
24 | head(med_seg_mean)
25 | }
26 | \keyword{CNV,}
27 | \keyword{segmentation}
28 | 


--------------------------------------------------------------------------------
/man/merge2lists.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal_functions.r
 3 | \name{merge2lists}
 4 | \alias{merge2lists}
 5 | \title{Merge two lists}
 6 | \usage{
 7 | merge2lists(x, y, fun = "unique")
 8 | }
 9 | \arguments{
10 | \item{x}{(list): input list 1}
11 | 
12 | \item{y}{(list): input list 2}
13 | 
14 | \item{fun}{(character): Either 'unique' or 'intersect' are accepted}
15 | }
16 | \value{
17 | (list) merged list from x and y
18 | }
19 | \description{
20 | Merge of 2 lists into one that contains unique or intersect vectors for each list entry with shared names
21 | }
22 | \examples{
23 | 
24 | x <- sapply(letters[1:10], function(i) sample(1:10)[1:sample(2:10)[1]], simplify=FALSE )
25 | y <- sapply(letters[5:15], function(i) sample(1:10)[1:sample(2:10)[1]], simplify=FALSE )
26 | merge2lists(x,y)
27 | }
28 | \keyword{lists}
29 | \keyword{merge}
30 | 


--------------------------------------------------------------------------------
/man/breaks-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/breakpoint.density.r
 3 | \docType{class}
 4 | \name{breaks-class}
 5 | \alias{breaks-class}
 6 | \alias{breaks}
 7 | \title{Data class breaks}
 8 | \arguments{
 9 | \item{breaks}{(data.table): the breakpoint info containing data.table, this will be occupied by the CNV segmentation data in the case of cnv.break.annot or SV for sv.break.annot. Unique random string rownames are added to the returned breaks data.frame.}
10 | 
11 | \item{burden}{(numeric): a vector containing the total number of breakpoints in each sample}
12 | 
13 | \item{param}{(list): a list of parametres provided}
14 | }
15 | \value{
16 | an instance of the class 'breaks' containing breakpoint and breakpoint burden information
17 | }
18 | \description{
19 | Class to store breakpoint annotations in association with genomic features (e.g. gene loci)
20 | }
21 | 


--------------------------------------------------------------------------------
/man/upgr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/break.annot.r
 3 | \name{upgr}
 4 | \alias{upgr}
 5 | \title{Generate GRanges of upstream regions}
 6 | \usage{
 7 | upgr(ggr, upstr = 50000)
 8 | }
 9 | \arguments{
10 | \item{ggr}{(S4) a GenomicRanges object containing gene annotations. It is crutial that the genome version 'genesgr' and the input 'sv' are the same. The GRanges object must contain 'strand' and a metadata field 'gene_id' with unique values. Seqnames are expected in the format (chr1, chr2, ...).}
11 | 
12 | \item{upstr}{(numeric) size in base pairs to define gene upstream region onto which breakpoint overlaps will be identified. The strand value, start and stop positions defined in genesgr will be used to create a GRanges object of upstream regions.}
13 | }
14 | \value{
15 | (S4) aa GRanges object of upstream regions
16 | }
17 | \description{
18 | Generate GRanges of upstream regions
19 | }
20 | 


--------------------------------------------------------------------------------
/man/dngr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/break.annot.r
 3 | \name{dngr}
 4 | \alias{dngr}
 5 | \title{Generate GRanges of downstream regions}
 6 | \usage{
 7 | dngr(ggr, dnstr = 50000)
 8 | }
 9 | \arguments{
10 | \item{ggr}{(S4) a GenomicRanges object containing gene annotations. It is crutial that the genome version 'genesgr' and the input 'sv' are the same. The GRanges object must contain 'strand' and a metadata field 'gene_id' with unique values. Seqnames are expected in the format (chr1, chr2, ...).}
11 | 
12 | \item{dnstr}{(numeric) size in base pairs to define gene downstream region onto which breakpoint overlaps will be identified. The strand value, start and stop positions defined in genesgr will be used to create a GRanges object of downstream regions.}
13 | }
14 | \value{
15 | (S4) aa GRanges object of downstream regions
16 | }
17 | \description{
18 | Generate GRanges of downstream regions
19 | }
20 | 


--------------------------------------------------------------------------------
/man/svc.breaks.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/breakpoint.density.r
 3 | \name{svc.breaks}
 4 | \alias{svc.breaks}
 5 | \title{Identify SVC breakpoints}
 6 | \usage{
 7 | svc.breaks(svc, chrlist = NULL, low.cov = NULL)
 8 | }
 9 | \arguments{
10 | \item{svc}{(S4) an object of class svcnvio containing data type 'svc' initialized by validate.svc}
11 | 
12 | \item{chrlist}{(character) list of chromosomes to include chr1, chr2, etc...}
13 | 
14 | \item{low.cov}{(data.table) a data.table (chrom, start, end) indicating low coverage regions to exclude from the analysis}
15 | }
16 | \value{
17 | an instance of the class 'breaks' containing breakpoint and breakpoint burden information
18 | }
19 | \description{
20 | Transform structural varian (SVC) data.frame into a 'breaks' object
21 | }
22 | \examples{
23 | 
24 | ## Obtain breakpoints from SV calls data
25 | svc <- validate.svc(svdat_lung_ccle)
26 | 
27 | svc.breaks(svc)
28 | }
29 | \keyword{Structural}
30 | \keyword{variants}
31 | 


--------------------------------------------------------------------------------
/man/geneBreakOverlap.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/break.annot.r
 3 | \name{geneBreakOverlap}
 4 | \alias{geneBreakOverlap}
 5 | \title{Find overlaps between genomic features and breakpoints}
 6 | \usage{
 7 | geneBreakOverlap(ggr, svgr)
 8 | }
 9 | \arguments{
10 | \item{ggr}{(S4) a GenomicRanges object containing gene annotations. It is crutial that the genome version 'genesgr' and the input 'sv' are the same. The GRanges object must contain 'strand' and a metadata field 'gene_id' with unique values. Seqnames are expected in the format (chr1, chr2, ...).}
11 | 
12 | \item{svgr}{(S4) a GenomicRanges object containing SV breakpoint ends. Metadata must contain 'rowid' and 'sampleid' fields. Seqnames are expected in the format (chr1, chr2, ...). Used by 'svc.break.annot' and 'cnv.break.annot'}
13 | }
14 | \value{
15 | a list containing two lists: geneBreaks, geneSamples
16 | }
17 | \description{
18 | Find overlaps between genomic features and breakpoints
19 | }
20 | \keyword{internal}
21 | 


--------------------------------------------------------------------------------
/man/chr.arm.cnv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/chr.arm.cnv.r
 3 | \name{chr.arm.cnv}
 4 | \alias{chr.arm.cnv}
 5 | \title{Chromosome arm mean CNV}
 6 | \usage{
 7 | chr.arm.cnv(cnv, genome.v = "hg19", verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv}
11 | 
12 | \item{genome.v}{(character) (hg19 or hg38) reference genome version to draw chromosome limits and centromeres}
13 | 
14 | \item{verbose}{(logical) whether to return internal messages}
15 | }
16 | \value{
17 | a matrix of chromosome arms (rows) versus samples (cols) with average segment logRs per cell
18 | }
19 | \description{
20 | Obtains a matrix with the weighted average CN per chromosome arm
21 | }
22 | \examples{
23 | 
24 | # initialize CNV data
25 | cnv <- validate.cnv(segdat_lung_ccle)
26 | 
27 | arm_mat <- chr.arm.cnv(cnv, genome.v="hg19")
28 | dim(arm_mat)
29 | }
30 | \keyword{CNV,}
31 | \keyword{arm}
32 | \keyword{chromosome}
33 | \keyword{segmentation,}
34 | 


--------------------------------------------------------------------------------
/man/hot.spot.samples.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/hot.spot.samples.R
 3 | \name{hot.spot.samples}
 4 | \alias{hot.spot.samples}
 5 | \title{Hot-spot sample retrieval}
 6 | \usage{
 7 | hot.spot.samples(chromo.regs.obj, freq.cut)
 8 | }
 9 | \arguments{
10 | \item{chromo.regs.obj}{(chromo.regs) An object of class chromo.regs}
11 | 
12 | \item{freq.cut}{(numeric) the hot spot threshold above which peaks are defined for sample ID retrieval}
13 | }
14 | \value{
15 | a list comprising two lists: peakRegions, peakRegionsSamples
16 | }
17 | \description{
18 | Collects sample ids with shattered regions detected at hot-spots based on certain p-value cutoff
19 | }
20 | \examples{
21 | # validate input data.frames
22 | cnv <- validate.cnv(segdat_lung_ccle)
23 | svc <- validate.svc(svdat_lung_ccle)
24 | 
25 | chromo.regs.obj <- shattered.regions(cnv,svc)
26 | mat<-hbd.mat(chromo.regs.obj)
27 | 
28 | pcut.obj <- freq.p.test(mat,plot=FALSE)
29 | pcut <- freq.threshold(pcut.obj)
30 | 
31 | res <- hot.spot.samples(chromo.regs.obj,pcut)
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/man/amp.del.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gene.cnv.r
 3 | \name{amp.del}
 4 | \alias{amp.del}
 5 | \title{Amplifications and deletions}
 6 | \usage{
 7 | amp.del(genecnv.obj, logr.cut = 2)
 8 | }
 9 | \arguments{
10 | \item{genecnv.obj}{(genecnv) an instance of the class 'genecnv' containing gene level copy number info}
11 | 
12 | \item{logr.cut}{(numeric) the log-ratio cutoff above which genes are considered amplified (e.g 2 = 8 copies for amplification and 0.5 copies for deep deletions, in diploid regions)}
13 | }
14 | \value{
15 | (list) A list of lists including amplified.list, amplified.rank, deepdel.list and deepdel.rank
16 | }
17 | \description{
18 | Retrieve amplification and deletion events from a 'genecnv.obj' generated by 'gene.cnv' function
19 | }
20 | \examples{
21 | 
22 | ## validate input data.frames
23 | cnv <- validate.cnv(segdat_lung_ccle)
24 | 
25 | genecnv.obj <- gene.cnv(cnv)
26 | 
27 | geneampdel <- amp.del(genecnv.obj, logr.cut = 2)
28 | lapply(geneampdel,head)
29 | }
30 | \keyword{CNV,}
31 | \keyword{genes}
32 | \keyword{segmentation,}
33 | 


--------------------------------------------------------------------------------
/man/shattered.eval.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/shattered.regions.r
 3 | \name{shattered.eval}
 4 | \alias{shattered.eval}
 5 | \title{Evaluate true catastrophic events
 6 | Evaluate shattered regions based on interleaved breaks and breakpoint dispersion parameters in order to identify true catastrophic chromosomal alterations}
 7 | \usage{
 8 | shattered.eval(
 9 |   chromo.regs.obj,
10 |   interleaved.cut = 0.5,
11 |   dist.iqm.cut = 1e+05,
12 |   verbose = TRUE
13 | )
14 | }
15 | \arguments{
16 | \item{chromo.regs.obj}{(chromo.regs) An object of class chromo.regs}
17 | 
18 | \item{interleaved.cut}{(numeric) the percentage of non interleaved structural variant calls}
19 | 
20 | \item{dist.iqm.cut}{(numeric) interquantile average of the distance between breakpoints within a shattered region}
21 | 
22 | \item{verbose}{(logical)}
23 | }
24 | \value{
25 | an instance of the class 'chromo.regs' containing breakpoint mapping onto genes
26 | }
27 | \description{
28 | Evaluate true catastrophic events
29 | Evaluate shattered regions based on interleaved breaks and breakpoint dispersion parameters in order to identify true catastrophic chromosomal alterations
30 | }
31 | 


--------------------------------------------------------------------------------
/man/segment.gap.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/clean.cnv.artifact.r
 3 | \name{segment.gap}
 4 | \alias{segment.gap}
 5 | \title{CNV segmentation gap filling}
 6 | \usage{
 7 | segment.gap(cnv, minsize = 5000, chrlist = NULL, verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv}
11 | 
12 | \item{minsize}{(numeric) the minimum gap size required to fill the gap}
13 | 
14 | \item{chrlist}{(character) list of chromosomes to include chr1, chr2, etc...}
15 | 
16 | \item{verbose}{(logical) whether to return internal messages}
17 | }
18 | \value{
19 | a data.frame containing CNV data
20 | }
21 | \description{
22 | Fills the gaps in a segmentation data.frame. Chromosome limits are defined for the complete segmentation dataset then segments fill the missing terminal regions. 
23 | The CN log-ratio of the added segments is set to the average of the closest neighbours in each sample.
24 | }
25 | \examples{
26 | 
27 | ## validate input data.frames
28 | cnv <- validate.cnv(segdat_lung_ccle)
29 | 
30 | cnv2 <- segment.gap(cnv)
31 | cnv2
32 | }
33 | \keyword{CNV,}
34 | \keyword{segmentation}
35 | 


--------------------------------------------------------------------------------
/man/bed2chromo.reg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/shattered.regions.r
 3 | \name{bed2chromo.reg}
 4 | \alias{bed2chromo.reg}
 5 | \title{Transforms a bed format data.frame containing genomic regions into a matrix of n samples versus m defined genomic bins where bins overlapping with bed segments take value = 1}
 6 | \usage{
 7 | bed2chromo.reg(
 8 |   bed,
 9 |   bingr = NULL,
10 |   genome.v = "hg19",
11 |   window.size = 10,
12 |   slide.size = 2
13 | )
14 | }
15 | \arguments{
16 | \item{bed}{(data.frame) An data.frame}
17 | 
18 | \item{bingr}{(S4) a GenomicRanges object containing the}
19 | 
20 | \item{genome.v}{(hg19 or hg38) reference genome version to generate genoic bins (ignored if bingr is not NULL)}
21 | 
22 | \item{window.size}{(numeric) size in megabases to generate genomic bins}
23 | 
24 | \item{slide.size}{(numeric) size in megabases of the sliding genomic window; slide.size must be <= 1}
25 | }
26 | \value{
27 | an instance of the class 'chromo.regs' containing information about shattered regions
28 | }
29 | \description{
30 | Transforms a bed format data.frame containing genomic regions into a matrix of n samples versus m defined genomic bins where bins overlapping with bed segments take value = 1
31 | }
32 | 


--------------------------------------------------------------------------------
/man/pct.genome.changed.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pct.genome.changed.r
 3 | \name{pct.genome.changed}
 4 | \alias{pct.genome.changed}
 5 | \title{Percent genome change calculation}
 6 | \usage{
 7 | pct.genome.changed(cnv, fc.pct = 0.2, discard.sex = TRUE)
 8 | }
 9 | \arguments{
10 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv}
11 | 
12 | \item{fc.pct}{(numeric) percentage CNV gain/loss for a segment to be considered changed (e.g. 0.2 = 20 percent change 0.8 < segmean && segmean > 1.2)}
13 | 
14 | \item{discard.sex}{(logical) whether sex chromosomes should be included}
15 | }
16 | \value{
17 | (numeric) vector containing percent genome changed values (0-1)
18 | }
19 | \description{
20 | Calculates the percentage of genome changed using CNV segmentation profiles. Genome change is defined based on the fold change CNV log-ratio between a sampele and a reference.
21 | }
22 | \examples{
23 | 
24 | ## validate input CNV data.frames
25 | cnv <- validate.cnv(segdat_lung_ccle)
26 | 
27 | pct_changed <- pct.genome.changed(cnv)
28 | head(pct_changed)
29 | }
30 | \seealso{
31 | Additional data format information in the man pages of validate.cnv
32 | }
33 | \keyword{CNV,}
34 | \keyword{segmentation}
35 | 


--------------------------------------------------------------------------------
/man/get.chr.bins.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/shattered.regions.r
 3 | \name{get.chr.bins}
 4 | \alias{get.chr.bins}
 5 | \title{Generates a GenomicRanges objact containing genomic bins based on a given bin size. If a cnv (svcnvio) object is provided the chromosome limits 
 6 | will be obtaind from mapped regions, otherwise chromosome limits will be obtained from the database (D3GB)}
 7 | \usage{
 8 | get.chr.bins(cnv = NULL, genome.v = "hg19", window.size = 10, slide.size = 2)
 9 | }
10 | \arguments{
11 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv}
12 | 
13 | \item{genome.v}{(hg19 or hg38) reference genome version to generate genoic bins (ignored if cnv is not NULL)}
14 | 
15 | \item{window.size}{(numeric) size in megabases to generate genomic bins}
16 | 
17 | \item{slide.size}{(numeric) size in megabases of the sliding genomic window; slide.size must be <= 1}
18 | }
19 | \value{
20 | an instance of the class 'chromo.regs' containing information about shattered regions
21 | }
22 | \description{
23 | Generates a GenomicRanges objact containing genomic bins based on a given bin size. If a cnv (svcnvio) object is provided the chromosome limits 
24 | will be obtaind from mapped regions, otherwise chromosome limits will be obtained from the database (D3GB)
25 | }
26 | 


--------------------------------------------------------------------------------
/man/match.breaks.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/breakpoint.density.r
 3 | \name{match.breaks}
 4 | \alias{match.breaks}
 5 | \title{Breakpoint matching}
 6 | \usage{
 7 | match.breaks(brk1, brk2, maxgap = 1e+05, verbose = FALSE, plot = TRUE)
 8 | }
 9 | \arguments{
10 | \item{brk1}{(S4) an object of class breaks as returned by `svc.breaks` and `cnv.breaks`}
11 | 
12 | \item{brk2}{(S4) an object of class breaks as returned by `svc.breaks` and `cnv.breaks` to compare against brk1}
13 | 
14 | \item{maxgap}{(numeric) distance (base pairs) limit for nreakpoints to be consider colocalized}
15 | 
16 | \item{verbose}{(logical) whether to return internal messages}
17 | 
18 | \item{plot}{(logical) whether to plot into open device}
19 | }
20 | \value{
21 | an object containing co-localizing breakpoints from two input 'breaks'
22 | }
23 | \description{
24 | Match common breakpoints from two different datasets or data types based on their co-localization in the genome.
25 | }
26 | \examples{
27 | 
28 | # initialize CNV and SVC data
29 | cnv <- validate.cnv(segdat_lung_ccle)
30 | svc <- validate.svc(svdat_lung_ccle)
31 | 
32 | ## Obtain breakpoints from CNV and SVC
33 | brk1 <- cnv.breaks(cnv)
34 | brk2 <- svc.breaks(svc)
35 | 
36 | common.brk <- match.breaks(brk1, brk2)
37 | 
38 | }
39 | \keyword{CNV,}
40 | \keyword{SV,}
41 | \keyword{breakpoints}
42 | \keyword{genomic}
43 | 


--------------------------------------------------------------------------------
/man/gene.cnv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gene.cnv.r
 3 | \name{gene.cnv}
 4 | \alias{gene.cnv}
 5 | \title{Gene-level CNV}
 6 | \usage{
 7 | gene.cnv(
 8 |   cnv,
 9 |   genome.v = "hg19",
10 |   genesgr = NULL,
11 |   chrlist = NULL,
12 |   fill.gaps = FALSE,
13 |   verbose = TRUE
14 | )
15 | }
16 | \arguments{
17 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv}
18 | 
19 | \item{genome.v}{(hg19 or hg38) reference genome version to fetch gene annotations when 'genesgr=NULL'}
20 | 
21 | \item{genesgr}{(S4) a GenomicRanges object containing genomic feature annotations (if not NULL overides genome.v). It must containg 'strand' and a metadata field 'gene_id' with unique values. Seqnames are expected in the format (chr1, chr2, ...)}
22 | 
23 | \item{chrlist}{(character) list of chromosomes to include chr1, chr2, etc...}
24 | 
25 | \item{fill.gaps}{(logical) whether to fill the gaps in the segmentation file using gap neighbour segmean average as log ratio}
26 | 
27 | \item{verbose}{(logical)}
28 | }
29 | \value{
30 | an instance of the class 'genecnv' containing gene level copy number info
31 | }
32 | \description{
33 | Obtains a gene-level copy number matrix from a segmentation profile.
34 | }
35 | \examples{
36 | 
37 | ## validate input data.frames
38 | cnv <- validate.cnv(segdat_lung_ccle)
39 | 
40 | gene.cnv(cnv)
41 | }
42 | \keyword{CNV,}
43 | \keyword{genes}
44 | \keyword{segmentation,}
45 | 


--------------------------------------------------------------------------------
/R/get.genesgr.r:
--------------------------------------------------------------------------------
 1 | #' Genes GRanges 
 2 | #' 
 3 | #' Retrieves a GRanges object containinng gene annotations for an specified genome version 
 4 | #' 
 5 | #' @param genome.v (hg19 or GRCh37 and hg38 or GRCh38) reference genome version to retrieve gene annotations 
 6 | #' @param chrlist (character)  
 7 | #' @return a GRanges class object from the specified human genome version 
 8 | #' @keywords CNV, segmentation, genes
 9 | #' @export
10 | #' @examples
11 | #' 
12 | #' get.genesgr(genome.v = "hg19",chrlist=NULL)
13 | #' 
14 | 
15 | get.genesgr<- function(genome.v="hg19",chrlist=NULL){
16 | 
17 |     if(genome.v %in% c("hg19","GRCh37")){
18 |         genesgr = GenomicFeatures::genes(TxDb.Hsapiens.UCSC.hg19.knownGene, columns="gene_id")
19 |     }else if(genome.v %in% c("hg38","GRCh38")){
20 |         genesgr = GenomicFeatures::genes(TxDb.Hsapiens.UCSC.hg38.knownGene, columns="gene_id")
21 |     }else{stop("Unspecified, or non available genome")}
22 |     
23 |     if(is.null(chrlist)){ 
24 |         chrlist <- paste("chr",c(1:22,"X","Y"),sep="")
25 |     }
26 | 
27 |     err <- capture.output(
28 |         genesgr@elementMetadata$gene_id <- mapIds(org.Hs.eg.db, genesgr@elementMetadata$gene_id,  'SYMBOL','ENTREZID'),
29 |         type="message")
30 |     
31 |     genesgr <- genesgr[which(!is.na(genesgr$gene_id))]
32 |     genesgr <- genesgr[which(lapply(genesgr@elementMetadata$gene_id,length) > 0)]
33 |     genesgr <- genesgr[which(as.character(genesgr@seqnames) %in% chrlist)]
34 | 
35 |     return(genesgr)
36 | }
37 | 


--------------------------------------------------------------------------------
/man/validate.svc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/validate.input.data.r
 3 | \name{validate.svc}
 4 | \alias{validate.svc}
 5 | \title{Initialization of SVC data}
 6 | \usage{
 7 | validate.svc(sv.df)
 8 | }
 9 | \arguments{
10 | \item{sv.df}{(data.frame) structural variant table including the following fields: sample, chrom1, pos1, strand1, chrom2, pos2, strand2, svclass}
11 | }
12 | \value{
13 | an instance of the class 'svcnvio' containing SV data derived from SVC data type;  A unique id (uid) column is also added
14 | }
15 | \description{
16 | This function validates and reformats the SV (structural variant) calls input. It is used internaly by 'svpluscnv' functions that require this type of data.
17 | A few formatting rules are enforced:
18 | 1) The input must obtain 8 columns in the following order(sample ID, chromosome of origin, strand of origin, position of origin,, chromosome of destination, strand of destination, position of destination, SV class)
19 | 2) SV classes accepted: DEL(deletion), DUP(duplication), INS(insertion), TRA(translocation), INV(inversion) and BND(break end)
20 | 3) Any variant in which chromosome of origin and destination differ are encoded as TRA (translocation)
21 | 4) pos1 < pos2 is enforced for all variants in which chromosome of origin and destination are the same
22 | 5) The class BND can be used to operate with complex events as long as both break ends are the same chromosome
23 | }
24 | \examples{
25 | 
26 | validate.svc(svdat_lung_ccle)
27 | }
28 | \keyword{SV,}
29 | \keyword{structural}
30 | \keyword{variants}
31 | 


--------------------------------------------------------------------------------
/man/shattered.map.plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/shattered.map.plot.r
 3 | \name{shattered.map.plot}
 4 | \alias{shattered.map.plot}
 5 | \title{Shattered regions genomic map}
 6 | \usage{
 7 | shattered.map.plot(
 8 |   chromo.regs.obj,
 9 |   conf = "hc",
10 |   genome.v = "hg19",
11 |   chrlist = NULL,
12 |   freq.cut = NULL,
13 |   add.legend = "top"
14 | )
15 | }
16 | \arguments{
17 | \item{chromo.regs.obj}{(chromo.regs) An object of class chromo.regs}
18 | 
19 | \item{conf}{(character) either 'hc' for high confidence objects or else all included}
20 | 
21 | \item{genome.v}{(character)  reference genome version to draw chromosome limits and centromeres either hg19 or hg38 accepted}
22 | 
23 | \item{chrlist}{(character) vector containing chromosomes to include in the analysis; if NULL all chromosomes available in the input will be included}
24 | 
25 | \item{freq.cut}{the value to draw an horizontal line; use 'freq.p.test' to obtain a threshold for statisticaly significant hot spots}
26 | 
27 | \item{add.legend}{the position of the legend in the plot; if null, no legend will be draw}
28 | }
29 | \value{
30 | a plot into open device
31 | }
32 | \description{
33 | Plots a genome wide map of shattered region frequencies
34 | }
35 | \examples{
36 | 
37 | 
38 | ## validate input data.frames
39 | cnv <- validate.cnv(segdat_lung_ccle)
40 | svc <- validate.svc(svdat_lung_ccle)
41 | 
42 | ## obtain shattered regions
43 | chromo.regs.obj <- shattered.regions(cnv,svc)
44 | 
45 | shattered.map.plot(chromo.regs.obj)
46 | }
47 | \keyword{chromosome}
48 | \keyword{genome}
49 | \keyword{map}
50 | \keyword{shattering,}
51 | 


--------------------------------------------------------------------------------
/man/clean.cnv.artifact.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/clean.cnv.artifact.r
 3 | \name{clean.cnv.artifact}
 4 | \alias{clean.cnv.artifact}
 5 | \title{CNV artifact detection and filtering}
 6 | \usage{
 7 | clean.cnv.artifact(
 8 |   cnv,
 9 |   n.reps = 4,
10 |   cnv.size = 2e+06,
11 |   pc.overlap = 0.99,
12 |   fill.gaps = TRUE,
13 |   minsize = 5000,
14 |   verbose = TRUE
15 | )
16 | }
17 | \arguments{
18 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv}
19 | 
20 | \item{n.reps}{(numeric) number of samples with identical segment to consider artifact}
21 | 
22 | \item{cnv.size}{(numeric) only smaller segments will be modified in the cnv data.frame}
23 | 
24 | \item{pc.overlap}{(numeric) minimun percentage overlap for a pair of segments to be consider identical}
25 | 
26 | \item{fill.gaps}{(logical) whether to fill gaps from the segmentaed file after filtering artifacts}
27 | 
28 | \item{minsize}{(numeric) the minimum gap size required to fill the gap. Only used if 'fill.gaps=TRUE'}
29 | 
30 | \item{verbose}{(logical) whether to print internal messages}
31 | }
32 | \value{
33 | a data.frame containing CNV data
34 | }
35 | \description{
36 | Detects identical or near-identical CNV segments across multiple samples susceptible of representing common variants or technical artifacts. Then those segments CNV log-ratio is replaced by the flanking segments average
37 | }
38 | \examples{
39 | 
40 | ## validate input data.frame
41 | cnv <- validate.cnv(segdat_lung_ccle)
42 | 
43 | cnvcl <- clean.cnv.artifact(cnv)
44 | cnvcl
45 | }
46 | \keyword{CNV,}
47 | \keyword{filter}
48 | \keyword{segmentation,}
49 | 


--------------------------------------------------------------------------------
/man/break.density.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/breakpoint.density.r
 3 | \name{break.density}
 4 | \alias{break.density}
 5 | \title{Breakpoint density map}
 6 | \usage{
 7 | break.density(
 8 |   brk,
 9 |   chr.lim = NULL,
10 |   genome.v = "hg19",
11 |   window.size = 10,
12 |   slide.size = 2,
13 |   verbose = TRUE
14 | )
15 | }
16 | \arguments{
17 | \item{brk}{(breaks) An instance of the class 'breaks' obtained from CNV segmentation data (svpluscnv::cnv.breaks) or Structural Variant calls (svpluscnv::svc.breaks).}
18 | 
19 | \item{chr.lim}{(data.frame) 3 column table (chrom, begin, end) indicating the chromosome most distal coordinates with coverage. Also returned by the function svpluscnv::chromosome.limit.coords.}
20 | 
21 | \item{genome.v}{(hg19 or hg38) reference genome version to draw chromosome limits and centromeres}
22 | 
23 | \item{window.size}{(numeric) size in megabases of the genmome bin onto which breakpoints will be mapped}
24 | 
25 | \item{slide.size}{(numeric) size in megabases of the sliding genomic window; if slide.size < window.size the genomic bins will overlap}
26 | 
27 | \item{verbose}{(logical) whether to return internal messages}
28 | }
29 | \value{
30 | a matrix of samples (rows) and genomic bins (cols) qith the number of breakpoints mapped in heach cell
31 | }
32 | \description{
33 | Generating a genomic map based on a defined bin size and sliding window and counts the number of breakpoints mapped onto each bin. This function is used internally by svpluscnv::shattered.regions and svpluscnv::shattered.regions.cnv
34 | }
35 | \examples{
36 | 
37 | # initialize CNV data
38 | cnv <- validate.cnv(segdat_lung_ccle)
39 | 
40 | # obtain CNV breakpoints
41 | brk <- cnv.breaks(cnv)
42 | 
43 | break.density(brk)
44 | }
45 | \keyword{CNV,}
46 | \keyword{segmentation}
47 | 


--------------------------------------------------------------------------------
/man/cnv.breaks.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/breakpoint.density.r
 3 | \name{cnv.breaks}
 4 | \alias{cnv.breaks}
 5 | \title{Identify CNV breakpoints}
 6 | \usage{
 7 | cnv.breaks(
 8 |   cnv,
 9 |   fc.pct = 0.2,
10 |   break.width = 10000,
11 |   min.cnv.size = NULL,
12 |   min.num.probes = NULL,
13 |   chrlist = NULL,
14 |   low.cov = NULL,
15 |   clean.brk = NULL,
16 |   verbose = TRUE
17 | )
18 | }
19 | \arguments{
20 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv}
21 | 
22 | \item{fc.pct}{(numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents a fold change of 0.8 or 1.2}
23 | 
24 | \item{break.width}{(numeric) the maximum distance between a segment end and the subsequent segment start positions beyond which breakpoints are discarded}
25 | 
26 | \item{min.cnv.size}{(numeric) The minimun segment size (in base pairs) to include in the analysis}
27 | 
28 | \item{min.num.probes}{(numeric) The minimun number of probes per segment to include in the analysis}
29 | 
30 | \item{chrlist}{(character) list of chromosomes to include chr1, chr2, etc...}
31 | 
32 | \item{low.cov}{(data.frame) a data.frame (chr, start, end) indicating low coverage regions to exclude from the analysis}
33 | 
34 | \item{clean.brk}{(numeric) identical breakpoints across multiple samples tend to be artifacts; remove breaks > N}
35 | 
36 | \item{verbose}{(logical) whether to return}
37 | }
38 | \value{
39 | an instance of the class 'breaks' containing breakpoint and breakpoint burden information
40 | }
41 | \description{
42 | Identify CNV breakpoints filtered by the change in copy number log-ratio between contiguous segments
43 | }
44 | \examples{
45 | 
46 | # initialized CNV data
47 | cnv <- validate.cnv(segdat_lung_ccle)
48 | 
49 | cnv.breaks(cnv)
50 | 
51 | }
52 | \keyword{CNV,}
53 | \keyword{segmentation}
54 | 


--------------------------------------------------------------------------------
/man/freq.p.test.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/freq.p.test.r
 3 | \name{freq.p.test}
 4 | \alias{freq.p.test}
 5 | \title{Frequency hot spot detection
 6 |  
 7 | Obtains significance cutoff for the frequency of binary events encoded in a matrix such as that generated by shattered.regions and shattered.regions.cnv algorithms}
 8 | \usage{
 9 | freq.p.test(
10 |   mat,
11 |   method = "fdr",
12 |   p.cut = 0.05,
13 |   iter = 100,
14 |   zerofreq = TRUE,
15 |   plot = TRUE,
16 |   verbose = FALSE
17 | )
18 | }
19 | \arguments{
20 | \item{mat}{(numeric matrix) a binary matrix where columns will be tested for their sum value compared to a permutated matrix}
21 | 
22 | \item{method}{(character) the method to pass to p.adjust function}
23 | 
24 | \item{p.cut}{(numeric) the cutoff for multiple hypothesis corrected p.value}
25 | 
26 | \item{iter}{(numeric) Number of iterations to produce null distribution (note that null size will be iter*ncol(mat))}
27 | 
28 | \item{zerofreq}{(logical) whether to remove bins with observed frequency = 0; It is recommended to set to TRUE when the bins span genomic regions of low coverage}
29 | 
30 | \item{plot}{(logical) whether to generate a histogram comparing observed and null frequency distributions}
31 | 
32 | \item{verbose}{(logical) whether to return messages}
33 | }
34 | \value{
35 | an instance of the class 'freq.cut'
36 | }
37 | \description{
38 | Frequency hot spot detection
39 |  
40 | Obtains significance cutoff for the frequency of binary events encoded in a matrix such as that generated by shattered.regions and shattered.regions.cnv algorithms
41 | }
42 | \examples{
43 | 
44 | ## validate input data.frames
45 | cnv <- validate.cnv(segdat_lung_ccle)
46 | 
47 | ## obtain a matrix of genomic bins vs samples indicating high density of breaks
48 | shatt.regions <- shattered.regions.cnv(cnv)
49 | mat <- shatt.regions@high.density.regions.hc
50 | 
51 | freq.p.test(mat)
52 | }
53 | \keyword{empirical}
54 | \keyword{p.adjust}
55 | \keyword{p.value,}
56 | 


--------------------------------------------------------------------------------
/man/circ.wg.plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/circular.plot.r
 3 | \name{circ.wg.plot}
 4 | \alias{circ.wg.plot}
 5 | \title{Circular visualization CNV and SVC}
 6 | \usage{
 7 | circ.wg.plot(
 8 |   cnv,
 9 |   svc,
10 |   sample.id = NULL,
11 |   genome.v = "hg19",
12 |   lrr.pct = 0.2,
13 |   lrr.max = 4,
14 |   chrlist = NULL,
15 |   add.cnv.legend = "topleft",
16 |   add.svc.legend = "toprigh",
17 |   ...
18 | )
19 | }
20 | \arguments{
21 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv}
22 | 
23 | \item{svc}{(S4) an object of class svcnvio containing data type 'svc' initialized by validate.svc}
24 | 
25 | \item{sample.id}{(character) the id of the sample to be plotted}
26 | 
27 | \item{genome.v}{(character) (hg19 or h38) reference genome version to draw chromosome limits and centromeres}
28 | 
29 | \item{lrr.pct}{(numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents a fold change of 0.8 or 1.2}
30 | 
31 | \item{lrr.max}{(numeric) maximum CNV to be plotted}
32 | 
33 | \item{chrlist}{(character) vector containing chromosomes to plot; by default all chromosomes plotted}
34 | 
35 | \item{add.cnv.legend}{(x,y or coordinates) the position parameter passed to legend to plot CNV (outer tracks) description}
36 | 
37 | \item{add.svc.legend}{(x,y or coordinates) the position parameter passed to legend to plot SVC (central track) description}
38 | }
39 | \value{
40 | circos plot into open device
41 | }
42 | \description{
43 | Produces a circos plot combining CNV and SVC of the whole genome
44 | }
45 | \examples{
46 | 
47 | ## validate input data.frames
48 | cnv <- validate.cnv(segdat_lung_ccle)
49 | svc <- validate.svc(svdat_lung_ccle)
50 | 
51 | ## select a random sample id
52 | id <- "A549_LUNG"
53 | 
54 | circ.wg.plot(cnv, svc, sample.id=id)
55 | }
56 | \keyword{CNV,}
57 | \keyword{circular}
58 | \keyword{plot}
59 | \keyword{segmentation,}
60 | \keyword{structural}
61 | \keyword{variant,}
62 | \keyword{visualization,}
63 | 


--------------------------------------------------------------------------------
/man/cnv.freq.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cnv.freq.plot.r
 3 | \name{cnv.freq}
 4 | \alias{cnv.freq}
 5 | \title{CNV frequency map}
 6 | \usage{
 7 | cnv.freq(
 8 |   cnv,
 9 |   fc.pct = 0.2,
10 |   genome.v = "hg19",
11 |   ploidy = FALSE,
12 |   g.bin = 1,
13 |   sampleids = NULL,
14 |   cex.axis = 1,
15 |   cex.lab = 1,
16 |   label.line = -1.2,
17 |   plot = TRUE,
18 |   verbose = TRUE
19 | )
20 | }
21 | \arguments{
22 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv}
23 | 
24 | \item{fc.pct}{(numeric) percentage CNV gain/loss for a segment to be considered changed (i.e. 0.2 = 20 percent change 0.8 < segmean && segmean > 1.2)}
25 | 
26 | \item{genome.v}{(character) (hg19 or h38) reference genome version to draw chromosome limits and centromeres}
27 | 
28 | \item{ploidy}{(logical) whether to apply ploidy correction; the function med.segmean will be used to obtain each sample's ploidy logR then this value substracted to each sample's logR values}
29 | 
30 | \item{g.bin}{(numeric) size in megabases of the genmome bin to compute break density}
31 | 
32 | \item{sampleids}{(character) vector containing list of samples to include in plot. if set to NULL, all samples in the input will be used}
33 | 
34 | \item{cex.axis, cex.lab, label.line}{(numeric) plot parameters}
35 | 
36 | \item{plot}{(logical) whether produce a graphical output}
37 | 
38 | \item{verbose}{(logical) whether to return internal messages}
39 | }
40 | \value{
41 | an instance of the class 'cnvfreq' and optionally a plot into open device
42 | }
43 | \description{
44 | Creates a map of CNVs using genome binning and plots CNV frequency across the genome. This function optionally returns text, graphical or both outputs.
45 | Additionaly, calculates the proportion of samples with a given percentage of chromosome arm gained/lost
46 | }
47 | \examples{
48 | 
49 | ## validate input data.frame
50 | cnv <- validate.cnv(nbl_segdat)
51 | 
52 | cnv.freq(cnv, genome.v = "hg19")
53 | }
54 | \keyword{CNV,}
55 | \keyword{plot}
56 | \keyword{segmentation,}
57 | 


--------------------------------------------------------------------------------
/man/break.annot-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/break.annot.r
 3 | \docType{class}
 4 | \name{break.annot-class}
 5 | \alias{break.annot-class}
 6 | \alias{break.annot}
 7 | \title{break.annot class}
 8 | \arguments{
 9 | \item{input}{(data.frame): the breakpoint info containing data.frame, this will be occupied by the CNV segmentation data in the case of cnv.break.annot or SV for sv.break.annot. Unique random string rownames are added to the provided data.frame.}
10 | 
11 | \item{genesgr}{(GRanges): a GRanges object with genomic features (e.g. genes) to which breakpoints are mapped}
12 | 
13 | \item{disruptSamples}{(list): a list which names correspond to genomic features and values correspond to sample ids harboring breakpoints overlapping with said features}
14 | 
15 | \item{disruptBreaks}{(list): a list which names correspond to genomic features and values correspond to the ids of breakpount mapped onto them. Break ids are linked to the 'input' data.frame rownames}
16 | 
17 | \item{upstreamSamples}{(list): a list which names correspond to genomic features and values correspond to sample ids harboring breakpoints overlapping with upstream region of said features}
18 | 
19 | \item{upstreamBreaks}{(list): a list which names correspond to genomic features and values correspond to the ids of breakpount mapped onto upstream regions Break ids are linked to the 'input' data.frame rownames}
20 | 
21 | \item{dnstreamSamples}{(list): a list which names correspond to genomic features and values correspond to sample ids harboring breakpoints overlapping with downstream region of said features}
22 | 
23 | \item{dnstreamBreaks}{(list): a list which names correspond to genomic features and values correspond to the ids of breakpount mapped onto downstream regions Break ids are linked to the ''input' brk object}
24 | 
25 | \item{param}{(list): a list of parametres provided for the annotation function}
26 | }
27 | \value{
28 | an instance of the class 'break.annot' containing breakpoint mapping onto genes
29 | }
30 | \description{
31 | Class instance to store breakpoint annotations in association with genomic features (e.g. gene loci)
32 | }
33 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: svpluscnv
 2 | Title: svpluscnv: analysis and visualization of complex structural variation data
 3 | Version: 0.99.1
 4 | Author:
 5 | 	person("Gonzalo", "Lopez",
 6 | 		role = c("aut", "cre"),
 7 | 		email = "gonzolgarcia@gmail.com",
 8 | 		comment = c(ORCID = "0000-0002-5092-1284"))
 9 | 	person("Laura", "Egolf",
10 | 		role = c("aut"),
11 | 		email = "laura.e.egolf@gmail.com",
12 | 		comment = c(ORCID = "0000-0002-7103-4801"))
13 | 	person("Federico", "Giorgi",
14 | 		role = c("ctb"),
15 | 		email = "federico.giorgi@gmail.com",
16 | 		comment = c(ORCID = "0000-0002-7325-9908"))
17 | Maintainer: 
18 |     Gonzao Lopez <gonzolgarcia@gmail.com>
19 | Description: svpluscnv R package is a "swiss army knife"" for the integration and interpretation of orthogonal datasets including copy number variant (CNV) segmentation profiles and sequencing-based structural variant calls (SVC). The package implements analysis and visualization tools to evaluate chromosomal instability and ploidy, identify genes harboring recurrent SVs and systematically characterize hot-spot genomic locations harboring complex rearrangements such as chromothripsis and chromoplexia.
20 | License: GPL-3
21 | Encoding: UTF-8
22 | LazyData: true
23 | RoxygenNote: 7.1.1
24 | biocViews: StructuralVariation, VariantAnnotation, CopyNumberVariation, Sequencing, GenomicVariation
25 | Depends: R (>= 3.6)
26 | Imports: IRanges, GenomicRanges, tidyr, data.table, circlize, D3GB, shape, org.Hs.eg.db, TxDb.Hsapiens.UCSC.hg19.knownGene, TxDb.Hsapiens.UCSC.hg38.knownGene, methods, stats, graphics, utils, grDevices, taRifx, S4Vectors, AnnotationDbi,GenomicAlignments,GenomicFeatures,scales,plot3D
27 | Suggests: 
28 | 	BiocStyle,
29 | 	knitr,
30 | 	rmarkdown
31 | Collate: 
32 |     validate.input.data.r internal_functions.r break.annot.r breakpoint.density.r brk.burden.iqm.r shattered.regions.r chr.arm.cnv.r  segment.means.r circular.plot.r cnv.freq.plot.r  clean.cnv.artifact.r  freq.p.test.r gene.cnv.r gene.track.view.r get.genesgr.r hot.spot.samples.R pct.genome.changed.r shattered.map.plot.r shattered.regions.cnv.r  sv.model.view.r svpluscnv.data.r
33 | VignetteBuilder: knitr
34 | git_url: https://github.com/ccbiolab/svpluscnv
35 | 


--------------------------------------------------------------------------------
/man/gene.track.view.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gene.track.view.r
 3 | \name{gene.track.view}
 4 | \alias{gene.track.view}
 5 | \title{Gene track visualization}
 6 | \usage{
 7 | gene.track.view(
 8 |   chrom = NULL,
 9 |   start = NULL,
10 |   stop = NULL,
11 |   symbol = NULL,
12 |   upstr = NULL,
13 |   dnstr = NULL,
14 |   genome.v = "hg19",
15 |   cex.text = 0.6,
16 |   addtext = TRUE,
17 |   plot = TRUE,
18 |   summary = TRUE,
19 |   ...
20 | )
21 | }
22 | \arguments{
23 | \item{chrom}{(character) Chromosome (e.g. chr9)}
24 | 
25 | \item{start}{(numeric) Genomic coordinate from specified chromosome to start plotting}
26 | 
27 | \item{stop}{(numeric) Genomic coordinate from specified chromosome to stop plotting}
28 | 
29 | \item{symbol}{(character) Gene acceoted hgnc symbol to retrieve coordinates and area plotting ()}
30 | 
31 | \item{upstr}{(numeric) Distance upstream specified gene to extend the area plotted}
32 | 
33 | \item{dnstr}{(numeric) Distance downstream specified gene to extend the area plotted}
34 | 
35 | \item{genome.v}{(character) Reference genome version to draw chromosome limits and centromeres (hg19 or hg38)}
36 | 
37 | \item{cex.text}{(numeric) The magnification to be used for transcript RefSeq text added}
38 | 
39 | \item{addtext}{(logic) Whether to include transcript RefSeq ids in the plot}
40 | 
41 | \item{plot}{(logic) Whether to generate plot in open device}
42 | 
43 | \item{summary}{(logic) Whether to produce a data.table output with transcript information}
44 | 
45 | \item{...}{Additional graphical parameters}
46 | }
47 | \value{
48 | A data.frame with gene isoform annotations and/or plot into open device
49 | }
50 | \description{
51 | Creates a track visualization of a genomic region defined by gene boundaries or custom provided
52 | }
53 | \examples{
54 | 
55 | # obtain the coordinates of a desired genomic regionbased on a known gene locus 
56 | refSeqGene <- gene.symbol.info(refseq_hg19,"PTPRD")
57 | chrom <- refSeqGene$chrom
58 | start <- refSeqGene$start - 150000;
59 | stop <- refSeqGene$stop + 50000;
60 | 
61 | gene.track.view(symbol="PTPRD", genome.v="hg19")
62 | }
63 | \keyword{CNV,}
64 | \keyword{segmentation}
65 | 


--------------------------------------------------------------------------------
/man/chromo.regs-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/shattered.regions.r
 3 | \docType{class}
 4 | \name{chromo.regs-class}
 5 | \alias{chromo.regs-class}
 6 | \alias{chromo.regs}
 7 | \title{Data class chromo.regs}
 8 | \arguments{
 9 | \item{regions.summary}{(list): a list of data.frames sumarizing the information of shattered regions found in each sample}
10 | 
11 | \item{high.density.regions}{(matrix): a numeric matrix representing high breakpoint density genomic bins in each sample (values 1 = high density break; 0 = normal)}
12 | 
13 | \item{high.density.regions.hc}{(matrix): a numeric matrix representing high breakpoint density genomic bins in each sample (values 1 = high density break; 0 = normal). 
14 | Only those bins that overlap with high confidence regions defined in regions.summary are set to = 1}
15 | 
16 | \item{cnv.brk.dens}{(matrix): a numeric matrix representing the number of CNV segmentation breakpoints found in at genomic bins in each sample}
17 | 
18 | \item{svc.brk.dens}{(matrix): a numeric matrix representing the number of SV breakpoints found at genomic bins in each sample}
19 | 
20 | \item{cnv.brk.common.dens}{(matrix): a numeric matrix representing the number of CNV breakpoints colocalizing SV breakpoints found at genomic bins in each sample}
21 | 
22 | \item{svc.brk.common.dens}{(matrix): a numeric matrix representing the number of SV breakpoints colocalizing CNV breakpoints found at genomic bins in each sample}
23 | 
24 | \item{cnvbrk}{(S4): on object generated by cnv.breaks function}
25 | 
26 | \item{svcbrk}{(S4): on object generated by svc.breaks function}
27 | 
28 | \item{common.brk}{(list): on object generated by match.breaks function}
29 | 
30 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv}
31 | 
32 | \item{svc}{(S4) an object of class svcnvio containing data type 'svc' validated by validate.svc}
33 | 
34 | \item{param}{(list): list of configuration parameters provided or set as default}
35 | }
36 | \value{
37 | an instance of the class 'chromo.regs' containing breakpoint mapping onto genes
38 | }
39 | \description{
40 | Class to store shattered regions and information produced by shattered.regions and shattered.regions.cnv functions
41 | }
42 | 


--------------------------------------------------------------------------------
/man/svc.break.annot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/break.annot.r
 3 | \name{svc.break.annot}
 4 | \alias{svc.break.annot}
 5 | \title{Identification of recurrently altered genes using SVC data}
 6 | \usage{
 7 | svc.break.annot(
 8 |   svc,
 9 |   genome.v = "hg19",
10 |   genesgr = NULL,
11 |   upstr = 50000,
12 |   dnstr = 50000,
13 |   svc.seg.size = 2e+05,
14 |   verbose = TRUE
15 | )
16 | }
17 | \arguments{
18 | \item{svc}{(S4) an object of class svcnvio containing data type 'svc' validated by validate.svc}
19 | 
20 | \item{genome.v}{(character): either 'hg19' or 'hg38' accepted; reference genome version to retrieve gene annotations including genomic coordinates and strand}
21 | 
22 | \item{genesgr}{(S4) a GenomicRanges object containing gene annotations (if not NULL overides genome.v). It is crutial that the genome version 'genesgr' and the input 'sv' are the same. The GRanges object must contain 'strand' and a metadata field 'gene_id' with unique values. Seqnames are expected in the format (chr1, chr2, ...).}
23 | 
24 | \item{upstr}{(numeric) size in base pairs to define gene upstream region onto which breakpoint overlaps will be identified. The strand value, start and stop positions defined in genesgr will be used to create a GRanges object of upstream regions.}
25 | 
26 | \item{dnstr}{(numeric) size in base pairs to define gene downstream region onto which breakpoint overlaps will be identified. The strand value, start and stop positions defined in genesgr will be used to create a GRanges object of downstream regions.}
27 | 
28 | \item{svc.seg.size}{(numeric) base pairs for maximum allowed segmental variants (DEL, DUP, INV or INS) size. Larger segmental SVs are treated as translocations and only the breakpoint position will be overlapped with genomic features.}
29 | 
30 | \item{verbose}{(logical) whether to return internal messages}
31 | }
32 | \value{
33 | an instance of the class 'break.annot' containing breakpoint mapping onto genes
34 | }
35 | \description{
36 | Identify recurrently altered genes by strutural variants. The function will identify overlaps between genomic features (e.g. genes) and SVs breakpoints.
37 | }
38 | \examples{
39 | 
40 | # Initialize SVC data
41 | svc <- validate.svc(svdat_lung_ccle)
42 | 
43 | svc.break.annot(svc, genome.v="hg19")
44 | }
45 | \keyword{Structural}
46 | \keyword{annotation}
47 | \keyword{variants,}
48 | 


--------------------------------------------------------------------------------
/man/shattered.regions.cnv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/shattered.regions.cnv.r
 3 | \name{shattered.regions.cnv}
 4 | \alias{shattered.regions.cnv}
 5 | \title{CNV-only based shattered region detection}
 6 | \usage{
 7 | shattered.regions.cnv(
 8 |   cnv,
 9 |   fc.pct = 0.2,
10 |   min.cnv.size = 0,
11 |   min.num.probes = 0,
12 |   low.cov = NULL,
13 |   clean.brk = NULL,
14 |   window.size = 10,
15 |   slide.size = 2,
16 |   num.breaks = 10,
17 |   num.sd = 5,
18 |   dist.iqm.cut = 1e+05,
19 |   chrlist = NULL,
20 |   chr.lim = NULL,
21 |   verbose = TRUE
22 | )
23 | }
24 | \arguments{
25 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv}
26 | 
27 | \item{fc.pct}{(numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents 20 percent fold change}
28 | 
29 | \item{min.cnv.size}{(numeric) The minimun segment size (in base pairs) to include in the analysis}
30 | 
31 | \item{min.num.probes}{(numeric) The minimun number of probes per segment to include in the analysis}
32 | 
33 | \item{low.cov}{(data.frame) a data.frame (chr, start, end) indicating low coverage regions to exclude from the analysis}
34 | 
35 | \item{clean.brk}{(numeric) inherited from cnv.breaks(); n cutoff for redundant breakpoints to filter out; if NULL, no filter will be applied}
36 | 
37 | \item{window.size}{(numeric) size in megabases of the genmome bin to compute break density}
38 | 
39 | \item{slide.size}{(numeric) size in megabases of the sliding genmome window}
40 | 
41 | \item{num.breaks}{(numeric) size in megabases of the genmome bin to compute break density}
42 | 
43 | \item{num.sd}{(numeric) size in megabases of the sliding genmome window}
44 | 
45 | \item{dist.iqm.cut}{(numeric) interquantile average of the distance between breakpoints within a shattered region}
46 | 
47 | \item{chrlist}{(character) vector containing chromosomes to include in the analysis; if NULL all chromosomes available in the input will be included}
48 | 
49 | \item{verbose}{(logical)}
50 | }
51 | \value{
52 | an instance of the class 'chromo.regs' containing breakpoint mapping onto genes
53 | }
54 | \description{
55 | Caller for the identification of shattered genomic regions based on CNV breakpoint densities
56 | }
57 | \examples{
58 | 
59 | ## validate input data.frames
60 | cnv <- validate.cnv(segdat_lung_ccle)
61 | 
62 | shattered.regions.cnv(cnv)
63 | }
64 | \keyword{CNV,}
65 | \keyword{segmentation}
66 | 


--------------------------------------------------------------------------------
/R/pct.genome.changed.r:
--------------------------------------------------------------------------------
 1 | #' Percent genome change calculation
 2 | #' 
 3 | #' Calculates the percentage of genome changed using CNV segmentation profiles. Genome change is defined based on the fold change CNV log-ratio between a sampele and a reference. 
 4 | #' 
 5 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv
 6 | #' @param fc.pct (numeric) percentage CNV gain/loss for a segment to be considered changed (e.g. 0.2 = 20 percent change 0.8 < segmean && segmean > 1.2)
 7 | #' @param discard.sex (logical) whether sex chromosomes should be included
 8 | #' @return (numeric) vector containing percent genome changed values (0-1)
 9 | #' @seealso Additional data format information in the man pages of validate.cnv
10 | #' @keywords CNV, segmentation
11 | #' @export
12 | #' @examples
13 | #' 
14 | #' ## validate input CNV data.frames
15 | #' cnv <- validate.cnv(segdat_lung_ccle)
16 | #' 
17 | #' pct_changed <- pct.genome.changed(cnv)
18 | #' head(pct_changed)
19 | 
20 | pct.genome.changed <- function(cnv, 
21 |                                fc.pct=0.2, 
22 |                                discard.sex=TRUE){
23 | 
24 | cnvdat <- cnv@data
25 | if(discard.sex == TRUE) cnvdat <- cnvdat[which(!cnvdat$chrom %in% c("chrX","chrY")),]
26 | 
27 | width <- cnvdat$end - cnvdat$start
28 | segmean <- cnvdat$segmean
29 | sample <- cnvdat$sample
30 | df <- data.table(sample,width,segmean)
31 | idx_changed <- c(which(df$segmean < log2(1-fc.pct)),which(df$segmean >= log2(1+fc.pct)))
32 | idx_normal <- setdiff(1:nrow(df),idx_changed)
33 | df_normal <- df[idx_normal,]
34 | df_changed <-  df[idx_changed,]
35 |   
36 | length_changed_df <- aggregate(width~sample ,df_changed,sum)
37 | length_normal_df <- aggregate(width~sample ,df_normal,sum)
38 |   
39 | nochange <- setdiff(length_normal_df$sample,length_changed_df$sample)
40 | fullchange <- setdiff(length_changed_df$sample,length_normal_df$sample)
41 | nochange_x <- rep(0,length(nochange))
42 | names(nochange_x) <- nochange
43 | fullchange_x <- rep(0,length(fullchange))
44 | names(fullchange_x) <- fullchange
45 |   
46 | length_changed <- c(length_changed_df[,2],nochange_x)
47 | names(length_changed)<- c(length_changed_df[,1],nochange)
48 |   
49 | length_normal <- c(length_normal_df[,2],fullchange_x)
50 | names(length_normal)<- c(length_normal_df[,1],fullchange)
51 |   
52 | pct.change<- length_changed/apply(cbind(length_normal[names(length_changed)],length_changed),1,sum)
53 |   
54 | return(pct.change)
55 | }
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/man/circ.chromo.plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/circular.plot.r
 3 | \name{circ.chromo.plot}
 4 | \alias{circ.chromo.plot}
 5 | \title{Circular visualization of shattered regions}
 6 | \usage{
 7 | circ.chromo.plot(
 8 |   chromo.regs.obj,
 9 |   sample.id,
10 |   print.name = TRUE,
11 |   genome.v = "hg19",
12 |   lrr.pct = 0.2,
13 |   lrr.max = 4,
14 |   high.conf = FALSE,
15 |   chrlist = NULL,
16 |   add.cnv.legend = "topleft",
17 |   add.svc.legend = "toprigh",
18 |   ...
19 | )
20 | }
21 | \arguments{
22 | \item{chromo.regs.obj}{(chromo.regs) An object of class chromo.regs}
23 | 
24 | \item{sample.id}{(character) the id of a sample to be plotted within}
25 | 
26 | \item{print.name}{(logical) whether to print the sample id  in the center of the circular plot}
27 | 
28 | \item{genome.v}{(character) (hg19 or h38) reference genome version to draw chromosome limits and centromeres}
29 | 
30 | \item{lrr.pct}{(numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents 20 percent fold change}
31 | 
32 | \item{lrr.max}{(numeric) CNV plot limit}
33 | 
34 | \item{high.conf}{(logical) Whether to plot only high confidence shattered regions (see https://github.com/ccbiolab/svpluscnv#identification-of-shattered-regions for more information)}
35 | 
36 | \item{chrlist}{(character) vector containing chromosomes to plot; by default only chromosomes with shattered regions are ploted}
37 | 
38 | \item{add.cnv.legend}{(x,y or coordinates) the position parameter passed to legend to plot shattered regions and CNV (outer track) description}
39 | 
40 | \item{add.svc.legend}{(x,y or coordinates) the position parameter passed to legend to plot SVC (central track) description}
41 | 
42 | \item{...}{Additional graphical parameters}
43 | }
44 | \value{
45 | circos plot into open device
46 | }
47 | \description{
48 | Produces a circos plot combining CNV and SVC date sooming into the chromosomes harboring shattered regions
49 | }
50 | \examples{
51 | 
52 | ## validate input data.frames
53 | cnv <- validate.cnv(segdat_lung_ccle)
54 | svc <- validate.svc(svdat_lung_ccle)
55 | 
56 | ## obtain shattered regions
57 | shatt.regions <- shattered.regions(cnv,svc)
58 | 
59 | # select a random sample from the 
60 | id <- "SCLC21H_LUNG"
61 | 
62 | circ.chromo.plot(shatt.regions, sample.id = id)
63 | }
64 | \keyword{CNV,}
65 | \keyword{circular}
66 | \keyword{plot}
67 | \keyword{segmentation,}
68 | \keyword{structural}
69 | \keyword{variant,}
70 | \keyword{visualization,}
71 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(IQM)
 4 | export(IQSD)
 5 | export(amp.del)
 6 | export(ave.segmean)
 7 | export(bed2chromo.reg)
 8 | export(break.density)
 9 | export(brk.burden.iqm)
10 | export(chr.arm.cnv)
11 | export(chr.sort)
12 | export(chromosome.limit.coords)
13 | export(circ.chromo.plot)
14 | export(circ.wg.plot)
15 | export(clean.cnv.artifact)
16 | export(cnv.break.annot)
17 | export(cnv.breaks)
18 | export(cnv.freq)
19 | export(createRandomString)
20 | export(d3gb.chr.lim)
21 | export(dngr)
22 | export(extract.bins)
23 | export(freq.p.test)
24 | export(freq.threshold)
25 | export(gene.cnv)
26 | export(gene.symbol.info)
27 | export(gene.track.view)
28 | export(geneBreakOverlap)
29 | export(get.chr.bins)
30 | export(get.genesgr)
31 | export(hbd.mat)
32 | export(hot.spot.samples)
33 | export(map2color)
34 | export(match.breaks)
35 | export(med.segmean)
36 | export(merge2lists)
37 | export(pct.genome.changed)
38 | export(segment.gap)
39 | export(shattered.eval)
40 | export(shattered.map.plot)
41 | export(shattered.regions)
42 | export(shattered.regions.cnv)
43 | export(sv.model.view)
44 | export(svc.break.annot)
45 | export(svc.breaks)
46 | export(upgr)
47 | export(validate.cnv)
48 | export(validate.svc)
49 | exportClasses(break.annot)
50 | exportClasses(break.iqm)
51 | exportClasses(breaks)
52 | exportClasses(chromo.regs)
53 | exportClasses(cnvfreq)
54 | exportClasses(genecnv)
55 | exportClasses(null.freq)
56 | exportClasses(refSeqDat)
57 | exportClasses(svcnvio)
58 | exportMethods(extract.bins)
59 | exportMethods(hbd.mat)
60 | # import
61 | import(data.table, except=c("shift"))
62 | import(GenomicRanges)
63 | import(tidyr)
64 | import(circlize)
65 | import(D3GB)
66 | import(shape)
67 | import(org.Hs.eg.db)
68 | import(TxDb.Hsapiens.UCSC.hg19.knownGene)
69 | import(TxDb.Hsapiens.UCSC.hg38.knownGene)
70 | import(methods)
71 | import(GenomicAlignments,except=c("first","last","second"))
72 | import(GenomicFeatures)
73 | import(scales)
74 | # importFrom
75 | importFrom("taRifx","remove.factors")
76 | importFrom("S4Vectors","queryHits","subjectHits")
77 | 
78 | importFrom("grDevices", "colorRampPalette", "rainbow", "rgb")
79 | 
80 | importFrom("graphics", "arrows", "axTicks", "axis", "barplot", "grid",
81 |            "hist", "legend", "lines", "mtext", "par", "plot", "points",
82 |            "polygon", "rect", "text")
83 | 
84 | importFrom("stats", "aggregate", "na.omit", "p.adjust", "quantile",
85 |            "sd", "setNames")
86 | 
87 | importFrom("utils", "capture.output", "setTxtProgressBar",
88 |            "txtProgressBar")
89 | importFrom("plot3D","colkey")
90 | importFrom("IRanges","IRanges")
91 | importFrom("AnnotationDbi","mapIds")
92 | ####
93 | 


--------------------------------------------------------------------------------
/R/hot.spot.samples.R:
--------------------------------------------------------------------------------
 1 | #' Hot-spot sample retrieval
 2 | #' 
 3 | #' Collects sample ids with shattered regions detected at hot-spots based on certain p-value cutoff
 4 | #' 
 5 | #' @param chromo.regs.obj (chromo.regs) An object of class chromo.regs 
 6 | #' @param freq.cut (numeric) the hot spot threshold above which peaks are defined for sample ID retrieval
 7 | #' @return a list comprising two lists: peakRegions, peakRegionsSamples
 8 | #' @export
 9 | #' @examples
10 | #' # validate input data.frames
11 | #' cnv <- validate.cnv(segdat_lung_ccle)
12 | #' svc <- validate.svc(svdat_lung_ccle)
13 | #' 
14 | #' chromo.regs.obj <- shattered.regions(cnv,svc)
15 | #' mat<-hbd.mat(chromo.regs.obj)
16 | #' 
17 | #' pcut.obj <- freq.p.test(mat,plot=FALSE)
18 | #' pcut <- freq.threshold(pcut.obj)
19 | #' 
20 | #' res <- hot.spot.samples(chromo.regs.obj,pcut)
21 | #' 
22 | 
23 | 
24 | hot.spot.samples <- function(chromo.regs.obj, freq.cut){
25 | 
26 | freq.matrix <- apply(chromo.regs.obj@high.density.regions.hc,2,sum)
27 | textRegions <- names(which(freq.matrix >= freq.cut))
28 | hitRegions <- data.table(do.call(rbind,strsplit(textRegions," ")),textRegions)
29 | colnames(hitRegions) <- c("chr","start","end","regid")
30 | hitRegions$start <- as.numeric(hitRegions$start)
31 | hitRegions$end <- as.numeric(hitRegions$end)
32 | 
33 | 
34 | # collapes contiguous bins into unique regions
35 | bins2remove <- c()
36 | for(i in 2:nrow(hitRegions)){ 
37 |     if(hitRegions[i]$chr == hitRegions[i-1]$chr){
38 |         if(hitRegions[i]$start < (hitRegions[i-1]$end)){
39 |             hitRegions[i]$start <- hitRegions[i-1]$start
40 |             bins2remove <- c(bins2remove,textRegions[i-1])
41 |         }
42 |     }
43 | }
44 | hitRegionsPost<- hitRegions[which(hitRegions$regid %in% setdiff(hitRegions$regid,bins2remove))]
45 | 
46 | hitRegions_gr <- with(hitRegions, GRanges(chr, IRanges(start=start, end=end)))
47 | hitRegionsPost_gr <- with(hitRegionsPost, GRanges(chr, IRanges(start=start, end=end)))
48 | hits <-GenomicAlignments::findOverlaps(hitRegionsPost_gr,hitRegions_gr)
49 | 
50 | regList <- list()
51 | for(i in unique(queryHits(hits))) regList[[hitRegionsPost[i]$regid]] <- textRegions[subjectHits(hits)[which(queryHits(hits) == i)]]
52 | 
53 | # obtain the genomic bins with maximum number of samples
54 | peakRegions <- lapply(regList, function(x) 
55 |     names(which(freq.matrix[x] == max(freq.matrix[x]))))
56 | 
57 | # collect samples with shattered region in the peaks 
58 | peakRegionsSamples <- lapply(peakRegions, function(x) 
59 |     names(which(apply(cbind(chromo.regs.obj@high.density.regions.hc[,x]),1,sum) > 0)))
60 | 
61 | return(list(peakRegions=peakRegions,peakRegionsSamples=peakRegionsSamples))
62 | 
63 | }
64 | 
65 | 


--------------------------------------------------------------------------------
/R/segment.means.r:
--------------------------------------------------------------------------------
 1 | #' Average sample CNV
 2 | #' 
 3 | #' Obtain the weighted average segment mean log2 ratios from each sample within a CNV segmentaton data.frame
 4 | #' 
 5 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv
 6 | #' @return (numeric) a vector containing the weighted average logR from segmented data
 7 | #' @keywords CNV, segmentation
 8 | #' @export
 9 | #' @examples
10 | #' 
11 | #' ## validate input CNV data.frames
12 | #' cnv <- validate.cnv(segdat_lung_ccle)
13 | #' 
14 | #' ave_seg_mean <- ave.segmean(cnv)
15 | #' head(ave_seg_mean)
16 | 
17 | 
18 | ####################
19 | 
20 | 
21 | ave.segmean <- function(cnv){
22 |   
23 | stopifnot(cnv@type == "cnv")
24 | cnvdat <- cnv@data
25 |     
26 | 
27 |   width <- as.numeric(cnvdat$end - cnvdat$start)
28 |   sample <- cnvdat$sample
29 |   segmean <- cnvdat$segmean
30 | 
31 |   df <- stats::aggregate(width~sample,data.table(sample,width),sum)
32 |   glen <- df$width
33 |   names(glen) <- df$sample
34 |   
35 |   w.segmean <- segmean*width/glen[sample]
36 |   df2 <- stats::aggregate(w.segmean~sample,data.table(sample,w.segmean),sum)
37 |   ave <- df2$w.segmean
38 |   names(ave) <- df2$sample
39 |   return(ave)
40 |   
41 |   }
42 | 
43 | 
44 | #' Median sample CNV
45 | #' 
46 | #' Obtain the median weighted segment mean from a segmentaton file; The weighted median refers to the logR that occupies a center of all segments ordered by their log ratio
47 | #' 
48 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv
49 | #' @return (numeric) a vector containing the median logR value of a segmented data.frame
50 | #' @keywords CNV, segmentation
51 | #' @export
52 | #' @examples
53 | #' 
54 | #' ## validate input CNV data.frames
55 | #' cnv <- validate.cnv(segdat_lung_ccle)
56 | #' 
57 | #' med_seg_mean <- med.segmean(cnv)
58 | #' head(med_seg_mean)
59 | 
60 | 
61 | ####################
62 | 
63 | 
64 | med.segmean <- function(cnv){
65 |     
66 |     stopifnot(cnv@type == "cnv")
67 |     cnvdat <- cnv@data
68 |     
69 |     glen <- as.numeric(cnvdat$end-cnvdat$start)
70 |     sample <- cnvdat$sample
71 |     segmean <- cnvdat$segmean
72 |     dt <- data.table(sample,glen,segmean)
73 |     out <-rep(NA,length(unique(dt$sample)))
74 |     names(out) <- unique(dt$sample)
75 |     
76 |     for(i in unique(dt$sample)){
77 | 
78 |         minidf <- dt[which(dt$sample == i)]
79 |         miniord <-minidf[order(minidf$segmean)]
80 |         medseg <- which(abs(cumsum(miniord$glen)/sum(miniord$glen) - 0.5) == min(abs(cumsum(miniord$glen)/sum(miniord$glen) - 0.5)))
81 |         out[i] <- mean(miniord$segmean[medseg])
82 |         
83 |     }
84 |     return(out)
85 | }
86 | 
87 | 


--------------------------------------------------------------------------------
/man/brk.burden.iqm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brk.burden.iqm.r
 3 | \name{brk.burden.iqm}
 4 | \alias{brk.burden.iqm}
 5 | \title{Evaluates the breakpoint burden based on a instance 'breaks' produced by svpluscnv::scv_breaks or svpluscnv::cnv_breaks. 
 6 | Breakpoint densities are calculated for each chromosome arm and the inter quantile mean (svpluscnv::IQM) of al chromosome arms is reported for each sample.
 7 | A Graphical output is generated indicating every sample's arm burden ordered by their IQM.}
 8 | \usage{
 9 | brk.burden.iqm(
10 |   brk,
11 |   sample.col = NULL,
12 |   min.arm.size = 2e+07,
13 |   bp.unit = 1e+07,
14 |   genome.v = "hg19",
15 |   chr.lim = NULL,
16 |   plot = TRUE,
17 |   verbose = TRUE
18 | )
19 | }
20 | \arguments{
21 | \item{brk}{(breaks) An instance of the class 'breaks' obtained from CNV segmentation data (svpluscnv::cnv.breaks) or Structural Variant calls (svpluscnv::svc.breaks).}
22 | 
23 | \item{sample.col}{(character) A vector of valid colors. Names must match sample column from 'brk'. If null a gradiant color based on breakpoint burden IQM will be used.}
24 | 
25 | \item{min.arm.size}{(numeric) minimum size in base pairs for a chromosome arm to be included in the analysis. Size will be calculated based on the 'genome.v' centromere location (excluding centromere bands). Chromosome start and en locations can be provided in 'chr.lim'.}
26 | 
27 | \item{bp.unit}{(numeric) The genomic size unit in base pairs to report brekpoint densities. This parameter is also used for the y axis of the plot.}
28 | 
29 | \item{genome.v}{(hg19 or hg38) reference genome version to draw chromosome limits and centromeres}
30 | 
31 | \item{chr.lim}{(data.frame) 3 column table (chrom, begin, end) indicating the chromosome most distal coordinates with coverage. Also returned by the function svpluscnv::chromosome.limit.coords.}
32 | 
33 | \item{plot}{(logical) whether produce a graphical output}
34 | 
35 | \item{verbose}{(logical) whether to return internal messages}
36 | }
37 | \value{
38 | an instance of the class 'cnvfreq' and optionally a plot into open device
39 | }
40 | \description{
41 | Evaluates the breakpoint burden based on a instance 'breaks' produced by svpluscnv::scv_breaks or svpluscnv::cnv_breaks. 
42 | Breakpoint densities are calculated for each chromosome arm and the inter quantile mean (svpluscnv::IQM) of al chromosome arms is reported for each sample.
43 | A Graphical output is generated indicating every sample's arm burden ordered by their IQM.
44 | }
45 | \examples{
46 | 
47 | # initialize CNV data
48 | svc <- validate.svc(nbl_svdat)
49 | 
50 | # obtain CNV breakpoints
51 | brk <- cnv.breaks(cnv)
52 | 
53 | brk.burden.iqm(brk)
54 | }
55 | \keyword{burden,}
56 | \keyword{chromosomal}
57 | \keyword{instability}
58 | \keyword{mutational}
59 | \keyword{structural}
60 | \keyword{variants,}
61 | 


--------------------------------------------------------------------------------
/man/sv.model.view.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sv.model.view.r
 3 | \name{sv.model.view}
 4 | \alias{sv.model.view}
 5 | \title{SV integrated visualization}
 6 | \usage{
 7 | sv.model.view(
 8 |   cnv,
 9 |   svc,
10 |   chrom,
11 |   start,
12 |   stop,
13 |   sampleids = NULL,
14 |   cnvlim = c(-2, 2),
15 |   addlegend = "both",
16 |   cex.legend = 1,
17 |   interval = NULL,
18 |   addtext = NULL,
19 |   cex.text = 0.8,
20 |   plot = TRUE,
21 |   summary = TRUE,
22 |   ...
23 | )
24 | }
25 | \arguments{
26 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv}
27 | 
28 | \item{svc}{(S4) an object of class svcnvio containing data type 'svc' initialized by validate.svc}
29 | 
30 | \item{chrom}{(character) chromosome (e.g chr9)}
31 | 
32 | \item{start}{(numeric) genomic coordinate from specified chromosome to start plotting}
33 | 
34 | \item{stop}{(numeric) genomic coordinate from specified chromosome to stop plotting}
35 | 
36 | \item{sampleids}{(character) a vector containing a list of sample ids represented in svc and/or cnv objects to be plotted}
37 | 
38 | \item{cnvlim}{(numeric) limits for color coding of background CNV log-ratios. Use to modify the CNV color contrast at different levels.}
39 | 
40 | \item{addlegend}{(character) One of 'sv' (show SV type legend), 'cnv' (show CNV background color legend) or 'both'.}
41 | 
42 | \item{cex.legend}{(numeric) The cex values for each legend}
43 | 
44 | \item{interval}{(numeric) The axis interval in base pairs}
45 | 
46 | \item{addtext}{(character) a vector indicating what SV types should include text labels indicating brakpoint partners genomic locations. The added labels are point breakpoint locations outside the plot area. (e.g. c("TRA","INV") )}
47 | 
48 | \item{cex.text}{(numeric) The magnification to be used for SV text info added}
49 | 
50 | \item{plot}{(logic) whether to produce a graphical output}
51 | 
52 | \item{summary}{(logic) whether the function shoud return CNV segment 'segbrk' and SV 'svbrk' breakpoints tabular output}
53 | 
54 | \item{...}{additional plot parameters from graphics plot function}
55 | }
56 | \value{
57 | a data.frame with CNV and SVN breakpoint annotations and/or plot into open device
58 | }
59 | \description{
60 | Integrated visualization of SVC and CNV data for defined genomic locations. CNV and SVC data is overlayed into a sample-based track visualization map.
61 | }
62 | \examples{
63 | 
64 | ## validate input data.frames
65 | cnv <- validate.cnv(segdat_lung_ccle)
66 | svc <- validate.svc(svdat_lung_ccle)
67 | 
68 | # obtain the coordinates of a desired genomic regionbased on a known gene locus 
69 | refSeqGene <- gene.symbol.info(refseq_hg19,"PTPRD")
70 | start <- refSeqGene$start - 150000;
71 | stop <- refSeqGene$stop+ 50000;
72 | chrom <- refSeqGene$chrom
73 | 
74 | sv.model.view(cnv, svc, chrom, start, stop)
75 | 
76 | }
77 | \keyword{CNV,}
78 | \keyword{segmentation}
79 | \keyword{structural}
80 | \keyword{variant,}
81 | 


--------------------------------------------------------------------------------
/R/chr.arm.cnv.r:
--------------------------------------------------------------------------------
 1 | #' Chromosome arm mean CNV
 2 | #'
 3 | #' Obtains a matrix with the weighted average CN per chromosome arm 
 4 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv
 5 | #' @param genome.v (character) (hg19 or hg38) reference genome version to draw chromosome limits and centromeres
 6 | #' @param verbose (logical) whether to return internal messages
 7 | #' @return a matrix of chromosome arms (rows) versus samples (cols) with average segment logRs per cell
 8 | #' @keywords CNV, segmentation, chromosome arm
 9 | #' @export 
10 | #' @examples
11 | #' 
12 | #' # initialize CNV data
13 | #' cnv <- validate.cnv(segdat_lung_ccle)
14 | #' 
15 | #' arm_mat <- chr.arm.cnv(cnv, genome.v="hg19")
16 | #' dim(arm_mat)
17 | 
18 | 
19 | chr.arm.cnv <- function(cnv,
20 |                     genome.v="hg19",
21 |                     verbose=FALSE){
22 |  
23 |     stopifnot(cnv@type == "cnv")
24 |     cnvdat <- cnv@data
25 |     
26 |     if(genome.v %in% c("GRCh37","hg19")){ 
27 |         bands <- GRCh37.bands
28 |     }else if(genome.v %in% c("GRCh38","hg38")){ 
29 |         bands <- GRCh38.bands
30 |     }else{stop("Genome version not provided")}
31 |   
32 |     centromeres_start <- bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"start"]
33 |     centromeres_end <- bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"end"]
34 |     names(centromeres_start) <-  names(centromeres_end) <- paste("chr",bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"chr"],sep="")
35 |   
36 |     chr.lim <- chromosome.limit.coords(cnv)
37 |     chrarms <- rbind(cbind(chr.lim$begin,centromeres_start[chr.lim$chrom]),cbind(centromeres_end[chr.lim$chrom],chr.lim$end))
38 |     chrarms <- data.table(rownames(chrarms),chrarms,c(paste(chr.lim$chrom,"p",sep=""), paste(chr.lim$chrom,"q",sep="")))
39 |     colnames(chrarms) <- c("chrom","start","end","arm")
40 |   
41 |     chrarms <- chrarms[which(chrarms$end -chrarms$start > 0),]
42 | 
43 |     chrarmsGR <- with(chrarms,GRanges(chrom, IRanges(start=start, end=end)))
44 | 
45 |     cnvdat_gr <- with(cnvdat, GRanges(chrom, IRanges(start=start, end=end)))
46 |     hits <- GenomicAlignments::findOverlaps(chrarmsGR,cnvdat_gr)
47 |   
48 |     armcnvmat <- matrix(ncol=length(unique(cnvdat$sample)), nrow=nrow(chrarms) )
49 |     colnames(armcnvmat) <- unique(cnvdat$sample)
50 |     rownames(armcnvmat) <- chrarms$arm
51 | 
52 |     for(i in unique(queryHits(hits))){ 
53 |         arm <- chrarms[i,"arm"][[1]]
54 |     
55 |         if(verbose) cat("\r",arm)
56 |     
57 |         armdf <- cnvdat[subjectHits(hits)[which(queryHits(hits) == i)],]
58 |         armdf[which(armdf$start < chrarms[i,"start"]),"start"] <- chrarms[i,"start"]
59 |         armdf[which(armdf$end > chrarms[i,"end"]),"end"] <- chrarms[i,"end"]
60 | 
61 |         arm.width <- armdf$end - armdf$start
62 |         armdf <- data.table(armdf,arm.width)
63 |         armlength <- aggregate(arm.width~sample,armdf,sum)[,2]
64 |         names(armlength) <- aggregate(arm.width~sample,armdf,sum)[,1]
65 |         part <- armdf$segmean * armdf$arm.width / armlength[armdf$sample]
66 |     
67 |         armdf <- data.table(armdf,arm.width,part,armlength[armdf$sample])
68 |     
69 |         meanArmSegment <- aggregate(part~sample,armdf,sum)
70 |     
71 |         num <-  as.numeric(meanArmSegment[,2])
72 |         names(num) <- as.character(meanArmSegment[,1])
73 |         armcnvmat[arm,names(num)] <- num
74 |     }
75 |     return(armcnvmat)
76 | }
77 | 
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/man/cnv.break.annot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/break.annot.r
 3 | \name{cnv.break.annot}
 4 | \alias{cnv.break.annot}
 5 | \title{Identification of recurrently altered genes using CNV data
 6 | Identify recurrently altered genes by CNV. The function will identify overlaps between genomic features (e.g. genes) and CNV  breakpoints. As opposed to 'gene.cnv' function that returns the overal CNV of each gene, this function allows identifying sub-genic events and may help detecting other rearrangements.}
 7 | \usage{
 8 | cnv.break.annot(
 9 |   cnv,
10 |   fc.pct = 0.2,
11 |   genome.v = "hg19",
12 |   genesgr = NULL,
13 |   upstr = 150000,
14 |   dnstr = 150000,
15 |   break.width = 10000,
16 |   min.cnv.size = NULL,
17 |   min.num.probes = NULL,
18 |   low.cov = NULL,
19 |   clean.brk = NULL,
20 |   verbose = TRUE
21 | )
22 | }
23 | \arguments{
24 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv}
25 | 
26 | \item{fc.pct}{(numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents a fold change of 0.8 or 1.2.}
27 | 
28 | \item{genome.v}{(character): either 'hg19' or 'hg38' accepted; reference genome version to retrieve gene annotations including genomic coordinates and strand}
29 | 
30 | \item{genesgr}{(S4) a GenomicRanges object containing gene annotations (if not NULL overides genome.v). It is crutial that the genome version 'genesgr' and the input 'sv' are the same. The GRanges object must contain 'strand' and a metadata field 'gene_id' with unique values. Seqnames are expected in the format (chr1, chr2, ...).}
31 | 
32 | \item{upstr}{(numeric) size in base pairs to define gene upstream region onto which breakpoint overlaps will be identified. The strand value, start and stop positions defined in genesgr will be used to create a GRanges object of upstream regions.}
33 | 
34 | \item{dnstr}{(numeric) size in base pairs to define gene downstream region onto which breakpoint overlaps will be identified. The strand value, start and stop positions defined in genesgr will be used to create a GRanges object of downstream regions.}
35 | 
36 | \item{break.width}{(numeric) maximum breakpoint size to be considered}
37 | 
38 | \item{min.cnv.size}{(numeric) The minimun segment size (in base pairs) to include in the analysis}
39 | 
40 | \item{min.num.probes}{(numeric) The minimun number of probes per segment to include in the analysis}
41 | 
42 | \item{low.cov}{(data.frame) a data.frame (chr, start, end) indicating low coverage regions to exclude from the analysis}
43 | 
44 | \item{clean.brk}{(numeric) Identical segments removal when present in above a given number. Identical CNV segments across multiple samples may represent artifact of common germline variants, this is particularly relevant when the segmentation data was generated with a non-paired reference. For paired datasets (e.g. tumor vs. normal) better leave as NULL.}
45 | 
46 | \item{verbose}{(logical) whether to return internal messages}
47 | }
48 | \value{
49 | an instance of the class 'break.annot' containing breakpoint mapping onto genes
50 | }
51 | \description{
52 | Identification of recurrently altered genes using CNV data
53 | Identify recurrently altered genes by CNV. The function will identify overlaps between genomic features (e.g. genes) and CNV  breakpoints. As opposed to 'gene.cnv' function that returns the overal CNV of each gene, this function allows identifying sub-genic events and may help detecting other rearrangements.
54 | }
55 | \examples{
56 | 
57 | # Initialize CNV data
58 | cnv <- validate.cnv(segdat_lung_ccle)
59 | 
60 | cnv.break.annot(cnv)
61 | }
62 | \keyword{CNV,}
63 | \keyword{segmentation}
64 | 


--------------------------------------------------------------------------------
/R/svpluscnv.data.r:
--------------------------------------------------------------------------------
  1 | #' Data class refSeqDat
  2 | #' 
  3 | #' Class to store refseq data from UCSC containing exon level info for known transcripts
  4 | #' 
  5 | #' @param data (data.table): transcript information
  6 | #' @param exonStarts (list): every transcript exonic start position
  7 | #' @param exonStarts (list): every transcript exonic end position
  8 | #' @param genome.v (character): the genome version encoding transcript data
  9 | #' @return an instance of the class 'refSeqDat' containing transcript exonic coordinates
 10 | #' @export
 11 | 
 12 | refSeqDat <- setClass("refSeqDat", representation(
 13 |     data  = "data.table",
 14 |     exonStarts = "list",
 15 |     exonEnds= "list",
 16 |     genome.v="character"
 17 | ))
 18 | 
 19 | setMethod("show","refSeqDat",function(object){
 20 |     writeLines(paste("An object of class refSeqDat from svpluscnv with ",nrow(object@data),"transcipts from",object@genome.v,"genome version"))
 21 | })
 22 | 
 23 | 
 24 | #'
 25 | #' Return coordinates of an specified gene
 26 | #' 
 27 | #' @param object (refSeqDat) An object of class refSeqDat containing gene transcript mapping. svpluscnv includes two selfloaded objects: refseq_hg19 & refseq_hg38
 28 | #' @param symbol (character) a valid HGNC gene symbol included in the refseq object
 29 | #' @export
 30 | #' @docType methods
 31 | #' @return A list containing chr, start, end coordinates
 32 | #' @rdname gene.symbol.info-methods
 33 | 
 34 | setGeneric("gene.symbol.info", function(object, symbol) standardGeneric("gene.symbol.info"))
 35 | 
 36 | #' @rdname gene.symbol.info-methods
 37 | setMethod("gene.symbol.info", "refSeqDat", function(object, symbol){
 38 |     DT <- object@data[which(object@data$name2 == symbol)]
 39 |     return(list(
 40 |         chrom = unique(DT$chrom),
 41 |         start = min(DT$txStart),
 42 |         stop = max(DT$txEnd)
 43 |     ))
 44 | })
 45 | 
 46 | 
 47 | utils::globalVariables(c("refseq_hg19", "refseq_hg38"))
 48 | 
 49 | #' Reference transcript and exon annotations for hg19 
 50 | #' 
 51 | #' refSeq annotations for hg19 version from UCSC (http://genome.ucsc.edu/cgi-bin/hgTables)
 52 | #' 
 53 | #' @name refseq_hg19
 54 | #' @docType data
 55 | #' @keywords genes, transcripts, exons
 56 | #' 
 57 | "refseq_hg19"
 58 | 
 59 | 
 60 | #' Reference transcript and exon annotations for hg38
 61 | #' 
 62 | #' refSeq annotations for hg38 version from UCSC (http://genome.ucsc.edu/cgi-bin/hgTables)
 63 | #' 
 64 | #' @name refseq_hg38
 65 | #' @docType data
 66 | #' @keywords genes, transcripts, exons
 67 | #' 
 68 | "refseq_hg38"
 69 | 
 70 | 
 71 | utils::globalVariables(c("segdat_lung_ccle", "svdat_lung_ccle","cnv_blacklist_regions","nbl_segdat","nbl_svdat"))
 72 | 
 73 | #' Lung CCLE CNV data
 74 | #' 
 75 | #' CCLE CNV segmentation data from LUNG tissue cell lines (DepMap): https://depmap.org/portal/download/
 76 | #' @name segdat_lung_ccle
 77 | #' @docType data
 78 | #' @keywords CNV segmentation
 79 | "segdat_lung_ccle"
 80 | 
 81 | #' Lung CCLE SVC data
 82 | #' 
 83 | #' CCLE translocation data from LUNG tissue cell lines (DepMap): https://depmap.org/portal/download/
 84 | #' @name svdat_lung_ccle
 85 | #' @docType data
 86 | #' @keywords SVs
 87 | "svdat_lung_ccle"
 88 | 
 89 | #' Low coverage regions
 90 | #' 
 91 | #' @name cnv_blacklist_regions
 92 | #' @docType data
 93 | #' @keywords CNV segmentation
 94 | "cnv_blacklist_regions"
 95 | 
 96 | #' TARGET Neuroblastoma CNV
 97 | #' 
 98 | #' TARGET CNV segmentation: https://target-data.nci.nih.gov/
 99 | #' @name nbl_segdat
100 | #' @docType data
101 | #' @keywords CNV segmentation, SVs
102 | "nbl_segdat"
103 | 
104 | #' TARGET Neuroblastoma SVC
105 | #' 
106 | #' TARGET CGI structural variants: https://target-data.nci.nih.gov/
107 | #' 
108 | #' @name nbl_svdat
109 | #' @docType data
110 | #' @keywords  SVs
111 | "nbl_svdat"
112 | 
113 | 
114 | 
115 | 


--------------------------------------------------------------------------------
/man/shattered.regions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/shattered.regions.r
 3 | \name{shattered.regions}
 4 | \alias{shattered.regions}
 5 | \title{Shattered region detection}
 6 | \usage{
 7 | shattered.regions(
 8 |   cnv,
 9 |   svc,
10 |   fc.pct = 0.2,
11 |   min.cnv.size = 0,
12 |   min.num.probes = 0,
13 |   low.cov = NULL,
14 |   clean.brk = NULL,
15 |   window.size = 10,
16 |   slide.size = 2,
17 |   num.cnv.breaks = 6,
18 |   num.cnv.sd = 5,
19 |   num.svc.breaks = 6,
20 |   num.svc.sd = 5,
21 |   num.common.breaks = 3,
22 |   num.common.sd = 3,
23 |   maxgap = 10000,
24 |   chrlist = NULL,
25 |   chr.lim = NULL,
26 |   interleaved.cut = 0.33,
27 |   dist.iqm.cut = 1e+05,
28 |   verbose = TRUE
29 | )
30 | }
31 | \arguments{
32 | \item{cnv}{(S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv}
33 | 
34 | \item{svc}{(S4) an object of class svcnvio containing data type 'svc' initialized by validate.svc}
35 | 
36 | \item{fc.pct}{(numeric) inherited from cnv.breaks(); copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents a fold change of 0.8 or 1.2}
37 | 
38 | \item{min.cnv.size}{(numeric) inherited from cnv.breaks(); The minimun segment size (in base pairs) to include in the analysis}
39 | 
40 | \item{min.num.probes}{(numeric) inherited from cnv.breaks(); The minimun number of probes per segment to include in the analysis}
41 | 
42 | \item{low.cov}{(data.frame) inherited from cnv.breaks(), svc.breaks() and match.breaks; a data.frame (chr, start, end) indicating low coverage regions to exclude from the analysis}
43 | 
44 | \item{clean.brk}{(numeric) inherited from cnv.breaks(); n cutoff for redundant breakpoints to filter out; if NULL, no filter will be applied}
45 | 
46 | \item{window.size}{(numeric) size in megabases of the genmome bin to compute break density}
47 | 
48 | \item{slide.size}{(numeric) size in megabases of the sliding genmome window}
49 | 
50 | \item{num.cnv.breaks}{(numeric) number of segmentation breakpoints per segments to be considered high-density break}
51 | 
52 | \item{num.cnv.sd}{(numeric) number of standard deviations above the sample average for num.cnv.breaks}
53 | 
54 | \item{num.svc.breaks}{(numeric) number of svc breakpoints per segments to be considered high-density break}
55 | 
56 | \item{num.svc.sd}{(numeric) number of standard deviations above the sample average for num.svc.breaks}
57 | 
58 | \item{num.common.breaks}{(numeric) number of common SV and segmentation breakpoints per segments to be considered high-density break}
59 | 
60 | \item{num.common.sd}{(numeric) number of standard deviations above the sample average for num.common.breaks}
61 | 
62 | \item{maxgap}{(numeric) inherited from match.breaks(); sets the maximum gap between co-localizing orthogonal breakpoints}
63 | 
64 | \item{chrlist}{(character) vector containing chromosomes to include in the analysis; if NULL all chromosomes available in the input will be included}
65 | 
66 | \item{chr.lim}{(data.frame) 3 column table (chrom, begin, end) indicating the chromosome most distal coordinates with coverage. Also returned by the function svpluscnv::chromosome.limit.coords.}
67 | 
68 | \item{interleaved.cut}{(numeric) 0-1 value indicating percentage of interleaved (non-contiguous) SV breakpoint pairs}
69 | 
70 | \item{dist.iqm.cut}{(numeric) interquantile average of the distance between breakpoints within a shattered region}
71 | 
72 | \item{verbose}{(logical)}
73 | }
74 | \value{
75 | an instance of the class 'chromo.regs' containing breakpoint mapping onto genes
76 | }
77 | \description{
78 | Caller for the identification of shattered genomic regions based on CNV and SVC data
79 | }
80 | \examples{
81 | 
82 | ## validate input data.frames
83 | cnv <- validate.cnv(segdat_lung_ccle)
84 | svc <- validate.svc(svdat_lung_ccle)
85 | 
86 | shattered.regions(cnv,svc)
87 | }
88 | \keyword{chromoplexy,}
89 | \keyword{chromosome}
90 | \keyword{chromothripsis,}
91 | \keyword{shattering}
92 | 


--------------------------------------------------------------------------------
/R/shattered.map.plot.r:
--------------------------------------------------------------------------------
 1 | #' Shattered regions genomic map
 2 | #' 
 3 | #' Plots a genome wide map of shattered region frequencies
 4 | #' 
 5 | #' @param chromo.regs.obj (chromo.regs) An object of class chromo.regs 
 6 | #' @param conf (character) either 'hc' for high confidence objects or else all included
 7 | #' @param genome.v (character)  reference genome version to draw chromosome limits and centromeres either hg19 or hg38 accepted
 8 | #' @param chrlist (character) vector containing chromosomes to include in the analysis; if NULL all chromosomes available in the input will be included
 9 | #' @param freq.cut the value to draw an horizontal line; use 'freq.p.test' to obtain a threshold for statisticaly significant hot spots 
10 | #' @param add.legend the position of the legend in the plot; if null, no legend will be draw
11 | #' @return a plot into open device
12 | #' @keywords chromosome shattering, genome map
13 | #' @export
14 | #' @examples
15 | #' 
16 | #' 
17 | #' ## validate input data.frames
18 | #' cnv <- validate.cnv(segdat_lung_ccle)
19 | #' svc <- validate.svc(svdat_lung_ccle)
20 | #' 
21 | #' ## obtain shattered regions
22 | #' chromo.regs.obj <- shattered.regions(cnv,svc)
23 | #' 
24 | #' shattered.map.plot(chromo.regs.obj)
25 | 
26 | shattered.map.plot <- function(chromo.regs.obj,
27 |                           conf="hc",
28 |                           genome.v = "hg19",
29 |                           chrlist=NULL,
30 |                           freq.cut=NULL,
31 |                           add.legend="top"){
32 | 
33 | 
34 |     if(genome.v %in% c("hg19","GRCh37")){ bands <- GRCh37.bands
35 |     }else if(genome.v %in% c("hg38","GRCh38")){ bands <- GRCh38.bands}
36 |     
37 |     centromeres <- bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"start"]
38 |     names(centromeres) <- paste("chr",bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"chr"],sep="")
39 | 
40 |     chrlengths <- vapply(unique(bands$chr), function(i) max(bands$end[which(bands$chr == i)]), 1)
41 |     names(chrlengths) <- paste("chr",unique(bands$chr),sep="")
42 |     
43 |     if(is.null(chrlist)){
44 |         chrlist <- unique(do.call(rbind,strsplit(colnames(chromo.regs.obj@high.density.regions)," "))[,1])
45 |     }
46 |     stopifnot( length(which(!chrlist %in% names(chrlengths))) == 0 )
47 |     
48 |     if(conf == "hc") {
49 |         highDensitiRegionsFreq <- apply(chromo.regs.obj@high.density.regions.hc,2,sum)
50 |     }else{
51 |         highDensitiRegionsFreq <- apply(chromo.regs.obj@high.density.regions,2,sum)
52 |     }
53 | 
54 | p_chrcols  <- rep(c("salmon4","salmon4"),12)
55 | q_chrcols  <- rep(c("salmon","salmon"),12)
56 | names(p_chrcols) <- names(q_chrcols) <- chrlist
57 | chrom <- do.call(rbind,strsplit(names(highDensitiRegionsFreq)," "))[,1]
58 | coloresBarplot  <- rep("white",length(chrom))
59 | 
60 | parm <- which(as.numeric(do.call(rbind,strsplit(names(highDensitiRegionsFreq)," "))[,3]) - centromeres[chrom] > 0)
61 | qarm <- which(as.numeric(do.call(rbind,strsplit(names(highDensitiRegionsFreq)," "))[,2]) - centromeres[chrom] < 0)
62 | coloresBarplot[parm] <- p_chrcols[names(parm)]
63 | coloresBarplot[qarm] <- q_chrcols[names(qarm)]
64 | 
65 | 
66 | axislab <- chrstarts<-  chrend <- chrlengths[chrlist]
67 | tab <- data.table(do.call(rbind,strsplit(names(highDensitiRegionsFreq)," ")),names(highDensitiRegionsFreq))
68 | colnames(tab) <- c("chrom","start","end","regid")
69 | tab <- tab[which(tab$chrom %in% chrlist)]
70 | highDensitiRegionsFreq <- highDensitiRegionsFreq[which(tab$chrom %in% chrlist)]
71 | tab$start <-as.numeric(tab$start)
72 | tab$end <-as.numeric(tab$end)
73 | 
74 | for(i in unique(tab$chrom)) chrend[i] <- max(tab[which(tab[,1] == i),3])
75 | for(i in 0:(length(chrend)-1) ) axislab[i+1] <- chrend[i+1]/2 + sum( chrend[0:i])
76 | for(i in 0:(length(chrend)-1) ) chrstarts[i+1] <- sum(chrend[0:i])
77 | data <- cbind( (tab$end + tab$start) / 2 + chrstarts[tab$chrom], highDensitiRegionsFreq)
78 | 
79 | 
80 | altcols <- rep(c(rgb(0.1,0.1,0.1,alpha=0.1),rgb(0.8,0.8,0.8,alpha=0.1)),12)
81 | altcols2<- rep(c(rgb(0.1,0.1,0.1,alpha=1),rgb(0.4,0.4,0.4,alpha=1)),12)
82 | ctrmr <- chrstarts+centromeres[names(chrstarts)]
83 | 
84 | plot(data[,1:2],type='h',col=coloresBarplot,xaxt='n',lwd=1.5,ylim=c(0, max(data[,2])+5),
85 |      las=1,bty='n',yaxt='n',family="Arial",ylab="",xlab="")
86 | for(i in 1:length(chrstarts) ) rect( chrstarts[i],0,chrstarts[i]+chrlengths[i],1000, col=altcols[i],border=NA )
87 | mtext(gsub("chr","",names(axislab)),side=1,at=axislab,las=1,col=altcols2,cex=c(rep(1,17),rep(0.8,5),1) )
88 | if(!is.null(freq.cut)) lines(c(0,chrstarts["chrX"]+chrlengths["chrX"]),c(freq.cut,freq.cut),lty=3,col="black")    
89 | axis(2,las=1,pos= 0, cex=1.2)
90 | axis(4,las=1,pos= max(data[,1])+10000, cex=1.2, at=axTicks(2), labels=sprintf("%.2f",axTicks(2)/dim(chromo.regs.obj@high.density.regions)[1]) )
91 | mtext("Frequency",side=4,line=1.5)
92 | mtext("#samples",side=2,line=1)
93 | if(!is.null(add.legend)) legend(add.legend,c("short (p) arm","long (q) arm"),border=c("salmon","salmon4"),fill=c("salmon","salmon4"),bty='n',ncol=2)
94 | }
95 | 
96 | 


--------------------------------------------------------------------------------
/R/gene.cnv.r:
--------------------------------------------------------------------------------
  1 | #' Data class cnvmat
  2 | #' 
  3 | #' Class to store breakpoint annotations
  4 | #' 
  5 | #' @param cnvmat (data.frame): matrix containing average CNV per gene (rows) for each sample (columns)
  6 | #' @param genesgr (S4): a GenomicRanges object with genomic feature annotations such as gene coordinates
  7 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv
  8 | #' @param param (list):
  9 | #' @return an instance of the class 'genecnv' containing gene level copy number info
 10 | #' @export
 11 | 
 12 | genecnv <- setClass("genecnv",
 13 |                         representation(
 14 |                             cnvmat  = "matrix",
 15 |                             genesgr = "GRanges",
 16 |                             cnv = "svcnvio",
 17 |                             param = "list"
 18 |                         ))
 19 | 
 20 | 
 21 | setMethod("show","genecnv",function(object){
 22 |     writeLines(paste("An object of class genecnv from svpluscnv containing gene level CNV data
 23 |                 \nNumber of samples=",ncol(object@cnvmat),
 24 |                 "\nAltered genes=",nrow(object@cnvmat)))
 25 | })
 26 | 
 27 | 
 28 | #' Gene-level CNV
 29 | #' 
 30 | #' Obtains a gene-level copy number matrix from a segmentation profile.
 31 | #'  
 32 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv
 33 | #' @param genome.v (hg19 or hg38) reference genome version to fetch gene annotations when 'genesgr=NULL'
 34 | #' @param genesgr (S4) a GenomicRanges object containing genomic feature annotations (if not NULL overides genome.v). It must containg 'strand' and a metadata field 'gene_id' with unique values. Seqnames are expected in the format (chr1, chr2, ...) 
 35 | #' @param chrlist (character) list of chromosomes to include chr1, chr2, etc...
 36 | #' @param fill.gaps (logical) whether to fill the gaps in the segmentation file using gap neighbour segmean average as log ratio
 37 | #' @param verbose (logical) 
 38 | #' @return an instance of the class 'genecnv' containing gene level copy number info
 39 | #' @keywords CNV, segmentation, genes
 40 | #' @export
 41 | #' @examples
 42 | #' 
 43 | #' ## validate input data.frames
 44 | #' cnv <- validate.cnv(segdat_lung_ccle)
 45 | #' 
 46 | #' gene.cnv(cnv)
 47 | 
 48 | gene.cnv <- function(cnv, 
 49 |                      genome.v="hg19",
 50 |                      genesgr=NULL,
 51 |                      chrlist=NULL, 
 52 |                      fill.gaps=FALSE,
 53 |                      verbose=TRUE){
 54 | 
 55 | stopifnot(cnv@type == "cnv")
 56 | cnv2<-cnv
 57 | if(fill.gaps) cnv2 <- segment.gap(cnv2, chrlist=chrlist, verbose=verbose)
 58 | cnvdat <- cnv2@data
 59 |     
 60 | if(is.null(chrlist)) chrlist <- unique(cnvdat$chrom)
 61 | chrlist <- chr.sort(chrlist)
 62 | 
 63 | 
 64 | 
 65 | 
 66 | if(!is.null(genesgr)){
 67 |     if(anyDuplicated(genesgr@elementMetadata$gene_id) > 0) stop("The genesgr provided object contains duplicated gene_id values")
 68 | }else{
 69 |     genesgr <- get.genesgr(genome.v=genome.v)
 70 | }
 71 | 
 72 | cnvdat_gr <- with(cnvdat, GRanges(chrom, IRanges::IRanges(start=start, end=end)))
 73 | 
 74 | hits <- GenomicAlignments::findOverlaps(genesgr,cnvdat_gr)
 75 | 
 76 | overlaps_all <- pintersect(genesgr[queryHits(hits),], cnvdat_gr[subjectHits(hits),])
 77 | width_overlap <- width(overlaps_all)
 78 | 
 79 | df <- data.table(cnvdat[subjectHits(hits),c("sample","segmean")],genesgr[queryHits(hits)]@elementMetadata$gene_id,width_overlap)
 80 | colnames(df) <- c("sample","segmean","gene_id","width")
 81 | 
 82 | a <- sapply(unique(cnvdat$sample), 
 83 |           function(i) df[sample == i, .(CN=mean(segmean)), by = "gene_id"],
 84 |           simplify=FALSE)
 85 | 
 86 | newfunc <- function(dfi) { 
 87 |     cn<- dfi$CN
 88 |     names(cn) <- dfi$gene_id
 89 |     return(cn)
 90 |     }
 91 | 
 92 | b<- lapply(a, function(x) newfunc(x)[genesgr@elementMetadata$gene_id] )
 93 | cnvmat <- do.call(cbind,b)
 94 | rownames(cnvmat) <- genesgr@elementMetadata$gene_id
 95 | 
 96 | out <- genecnv(
 97 |     cnvmat=cnvmat,
 98 |     genesgr=genesgr,
 99 |     cnv=cnv,
100 |     param=list(genome.v=genome.v,
101 |                chrlist=chrlist, 
102 |                fill.gaps=fill.gaps,
103 |                verbose=verbose
104 |     )
105 | )
106 | return(out)
107 | 
108 | }
109 | 
110 | 
111 | 
112 | 
113 | #' Amplifications and deletions
114 | #' 
115 | #' Retrieve amplification and deletion events from a 'genecnv.obj' generated by 'gene.cnv' function
116 | #' 
117 | #' @param genecnv.obj (genecnv) an instance of the class 'genecnv' containing gene level copy number info
118 | #' @param logr.cut (numeric) the log-ratio cutoff above which genes are considered amplified (e.g 2 = 8 copies for amplification and 0.5 copies for deep deletions, in diploid regions)
119 | #' @return (list) A list of lists including amplified.list, amplified.rank, deepdel.list and deepdel.rank
120 | #' @keywords CNV, segmentation, genes
121 | #' @export
122 | #' @examples
123 | #' 
124 | #' ## validate input data.frames
125 | #' cnv <- validate.cnv(segdat_lung_ccle)
126 | #' 
127 | #' genecnv.obj <- gene.cnv(cnv)
128 | #' 
129 | #' geneampdel <- amp.del(genecnv.obj, logr.cut = 2)
130 | #' lapply(geneampdel,head)
131 | 
132 | amp.del <- function(genecnv.obj, logr.cut=2){
133 |     
134 |     amp_list <- apply(genecnv.obj@cnvmat, 1, function(x) names(which(x >= 2)))
135 |     amp_list <- amp_list[which(unlist(lapply(amp_list,length)) > 0)]
136 |     amp_rank <- sort(unlist(lapply(amp_list,length)),decreasing=TRUE)
137 | 
138 |     del_list <- apply(genecnv.obj@cnvmat, 1, function(x) names(which(x <= -2)))
139 |     del_list <- del_list[which(unlist(lapply(del_list,length)) > 0)]
140 |     del_rank <- sort(unlist(lapply(del_list,length)),decreasing=TRUE)
141 |     
142 |     return(list(amplified.list = amp_list,
143 |                 amplified.rank = amp_rank,
144 |                 deepdel.list = del_list,
145 |                 deepdel.rank = del_rank))
146 | }
147 | 
148 | 
149 | 


--------------------------------------------------------------------------------
/R/freq.p.test.r:
--------------------------------------------------------------------------------
  1 | #' Data class null.freq
  2 | #' 
  3 | #' Class to store observed and null distr. as well as ampirical corrected p-values associated with observed values 
  4 | #' 
  5 | #' @param freq.cut (numeric): the value from observed distribution that satisfies certain p-value cutoff
  6 | #' @param pvalues (numeric): a vector containing the total number of breakpoints in each sample 
  7 | #' @param observed (numeric): vector of observed distribution 
  8 | #' @param null (numeric): vector of null distribution
  9 | #' @param param (list): a list of parametres provided 
 10 | #' @return an instance of the class 'freq.cut'
 11 | #' @export
 12 | null.freq <- setClass("null.freq", representation(
 13 |     freq.cut  = 'numeric',
 14 |     pvalues = 'numeric',
 15 |     observed = 'numeric',
 16 |     null='numeric',
 17 |     param = 'list'
 18 | ))
 19 | 
 20 | 
 21 | setMethod("show","null.freq",function(object){
 22 |     writeLines(paste("An object of class null.freq from svpluscnv"))
 23 | })
 24 | 
 25 | 
 26 | #' 
 27 | #' Return frequency threshold from null.freq object
 28 | #' 
 29 | #' @param object (null.freq) An object of class null.freq 
 30 | #' @return an instance of the class 'chromo.regs' containing breakpoint mapping onto genes
 31 | #' @export
 32 | #' @docType methods
 33 | #' @rdname freq.threshold-methods
 34 | 
 35 | setGeneric("freq.threshold", function(object) standardGeneric("freq.threshold"))
 36 | 
 37 | #' @rdname freq.threshold-methods
 38 | 
 39 | setMethod("freq.threshold", "null.freq", function(object) object@freq.cut)
 40 | 
 41 | 
 42 | #' Frequency hot spot detection
 43 | #'  
 44 | #' Obtains significance cutoff for the frequency of binary events encoded in a matrix such as that generated by shattered.regions and shattered.regions.cnv algorithms
 45 | #' 
 46 | #' @param mat (numeric matrix) a binary matrix where columns will be tested for their sum value compared to a permutated matrix
 47 | #' @param method (character) the method to pass to p.adjust function
 48 | #' @param p.cut (numeric) the cutoff for multiple hypothesis corrected p.value  
 49 | #' @param iter (numeric) Number of iterations to produce null distribution (note that null size will be iter*ncol(mat))
 50 | #' @param zerofreq (logical) whether to remove bins with observed frequency = 0; It is recommended to set to TRUE when the bins span genomic regions of low coverage   
 51 | #' @param plot (logical) whether to generate a histogram comparing observed and null frequency distributions   
 52 | #' @param verbose (logical) whether to return messages   
 53 | #' @return an instance of the class 'freq.cut'
 54 | #' @keywords empirical p.value, p.adjust  
 55 | #' @export
 56 | #' @examples
 57 | #' 
 58 | #' ## validate input data.frames
 59 | #' cnv <- validate.cnv(segdat_lung_ccle)
 60 | #' 
 61 | #' ## obtain a matrix of genomic bins vs samples indicating high density of breaks
 62 | #' shatt.regions <- shattered.regions.cnv(cnv)
 63 | #' mat <- shatt.regions@high.density.regions.hc
 64 | #' 
 65 | #' freq.p.test(mat)
 66 | 
 67 | 
 68 | 
 69 | freq.p.test <- function(mat, 
 70 |                         method="fdr", 
 71 |                         p.cut= 0.05,
 72 |                         iter=100,
 73 |                         zerofreq=TRUE,
 74 |                         plot=TRUE,
 75 |                         verbose=FALSE){
 76 | 
 77 | stopifnot(is.numeric(mat))
 78 | 
 79 | # obtain a frequency vector
 80 | highDensitiBinsFreq <- apply(mat,2,sum)
 81 | 
 82 | if(zerofreq){
 83 |     bins.nozero <- names(which(highDensitiBinsFreq > 0))
 84 |     mat <- mat[,bins.nozero]
 85 |     highDensitiBinsFreq <- highDensitiBinsFreq[bins.nozero]
 86 |     if(verbose) message( paste("Testing ",dim(mat)[2],"non-zero bins in ",dim(mat)[1], "samples") )
 87 | }else{
 88 |     if(verbose) message( paste("Testing ",dim(mat)[2],"bins in ",dim(mat)[1], "samples") )
 89 | }
 90 | 
 91 | 
 92 | # create null distribution by sample shuffling
 93 | highDensitiBinsFreqRandomFreq<-list()
 94 | for(i in 1:iter){
 95 |     highDensitiBinsRandom<- t(apply(mat,1,sample))
 96 |     highDensitiBinsFreqRandomFreq[[i]] <- apply(highDensitiBinsRandom,2,sum)
 97 |     }
 98 | highDensitiBinsFreqRandomFreqNull <- unlist(highDensitiBinsFreqRandomFreq)
 99 | if(zerofreq)highDensitiBinsFreqRandomFreqNull[which(highDensitiBinsFreqRandomFreqNull == 0)] <- 1
100 | 
101 | # obtain the frequency cutoff for statistical significance (e.g. FDR < 0.01)
102 | pvalues <- highDensitiBinsFreq
103 | for(i in 0:max(highDensitiBinsFreq)){
104 |     pvalues[which(highDensitiBinsFreq == i)] <- length(which(highDensitiBinsFreqRandomFreqNull >i))/ length(highDensitiBinsFreqRandomFreqNull)
105 |     }
106 | 
107 | freq.cut <- min(highDensitiBinsFreq[names(which(p.adjust(pvalues, method=method) < p.cut))])
108 | 
109 | if(plot){
110 |     xstart<- 1
111 |     obsd <- highDensitiBinsFreq[which(highDensitiBinsFreq >= xstart)]
112 |     nulld<- highDensitiBinsFreqRandomFreqNull[which(highDensitiBinsFreqRandomFreqNull >= xstart)]
113 | 
114 |     max_freq <- max(as.numeric(names(table(obsd))),as.numeric(names(table(nulld))))
115 |     
116 |     h1 <- hist(obsd, breaks=seq(xstart,max_freq,1),plot=FALSE)
117 |     h2 <- hist(nulld, breaks=seq(xstart,max_freq,1),plot=FALSE)
118 |     
119 |     max_density <- max(h1$density,h2$density)
120 |     hist(obsd, breaks=seq(xstart,max_freq,1),col='salmon', border=NA, xlim=c(0,max_freq), ylim=c(0,max_density),
121 |          las=1,cex.axis=1.4,ylab="",prob = TRUE ,main="",xlab="n samples",right = TRUE)
122 |     hist(nulld, breaks=seq(xstart,max_freq,1), add=TRUE,col=scales::alpha('black',.5), 
123 |          border=NA, prob = TRUE)
124 |     legend("topright",c("Observed","Null"),
125 |            fill=c('salmon',scales::alpha('black',.5)),border=NA,bty='n',cex=1.1)
126 |     lines(c(freq.cut,freq.cut),c(0,max(h1$density)/2))   # cutoff for statistical significance
127 |     text(freq.cut+max_freq/25,max(h1$density)/3,paste(method, "<", p.cut),srt=90)
128 | }
129 | 
130 | return(null.freq(
131 |     freq.cut = freq.cut,
132 |     pvalues = pvalues,
133 |     observed = highDensitiBinsFreq,
134 |     null = highDensitiBinsFreqRandomFreqNull,
135 |     param = list(method=method, p.cut= p.cut, iter=iter)
136 |     ))
137 | }
138 | 
139 | 
140 | 


--------------------------------------------------------------------------------
/R/internal_functions.r:
--------------------------------------------------------------------------------
  1 | #' Inter-quantile mean
  2 | #' 
  3 | #' Obtains interquantile mean for a defined 'x' vector and both lower and upper quantiles
  4 | #' 
  5 | #' @param x numeric vector to compute interquantile average
  6 | #' @param lowQ lower quantile
  7 | #' @param upQ upper quantile
  8 | #' @return (numeric) the IQM value
  9 | #' @keywords statistics, interquartile 
 10 | #' @export
 11 | #' @examples
 12 | #' 
 13 | #' x <- rnorm(100)
 14 | #' IQM(x)
 15 | 
 16 | 
 17 | IQM <- function(x, lowQ=0.1, upQ=0.9){
 18 |     
 19 |     stopifnot(is.numeric(x))
 20 |     
 21 |     rx <- rank(x,ties.method ='random')
 22 |     qt1<-quantile(rx,lowQ)
 23 |     qt2<-quantile(rx,upQ)
 24 |     
 25 |     inter_quantile_mean <- mean(x[intersect(which(rx > qt1),which(rx < qt2))])
 26 |     
 27 |     return(inter_quantile_mean)
 28 | }
 29 | 
 30 | 
 31 | #' Inter-quantile standard deviation
 32 | #' 
 33 | #' Obtains inter quantile standard deviation for a defined 'x' vector and both lower and upper quantiles
 34 | #' 
 35 | #' @param x numeric vector to compute interquantile standard deviation
 36 | #' @param lowQ lower quantile
 37 | #' @param upQ upper quantile
 38 | #' @return (numeric) the IQSD value
 39 | #' @keywords statistics, interquartile 
 40 | #' @export
 41 | #' @examples
 42 | #' 
 43 | #' x <- rnorm(100)
 44 | #' IQSD(x)
 45 | 
 46 | 
 47 | IQSD <- function(x,lowQ=0.1,upQ=0.9){
 48 |     stopifnot(is.numeric(x))
 49 |     
 50 |     rx <- rank(x,ties.method ='random')
 51 |     qt1<-quantile(rx,lowQ)
 52 |     qt2<-quantile(rx,upQ)
 53 |     
 54 |     inter_quantile_mean <- sd(x[intersect(which(rx > qt1),which(rx < qt2))])
 55 |     return(inter_quantile_mean)
 56 |     
 57 | }
 58 | 
 59 | #' Color map from numeric vector
 60 | #' 
 61 | #' Produces a vector of colors based on a given palette. The colors are defined by the inpuit vector
 62 | #' 
 63 | #' @param x numeric vector 
 64 | #' @param pal color palette
 65 | #' @param limits numeric limit fr color mapping
 66 | #' @return a color vector graded according to x
 67 | #' @keywords color, number 
 68 | #' @export
 69 | #' @examples
 70 | #' 
 71 | #' x <- rnorm(100)
 72 | #' x_color <- map2color(x)
 73 | #' head(x_color)
 74 | 
 75 | map2color <- function(x, pal=NULL, limits=NULL){
 76 |     if(is.null(limits)) limits = range(x)
 77 |     if(is.null(pal)) pal <- colorRampPalette(c("lightblue","white","salmon"))(256)
 78 |     return(pal[findInterval(x, seq(limits[1], limits[2], length.out = length(pal)+1), all.inside=TRUE)])
 79 | }
 80 | 
 81 | 
 82 | 
 83 | #' Unique random string generator
 84 | #' 
 85 | #' Generates n unique random character strings of a given length. Note that the length must be big enought in order to avoid offsetting the number n of strings requested
 86 | #' 
 87 | #' @param n the number of unique random strings to return
 88 | #' @param strlen random string length
 89 | #' @return a vector of unique random character strings
 90 | #' @keywords random string
 91 | #' @export
 92 | #' @examples
 93 | #' 
 94 | #' # To ensure reproducibility make sure to set the seed
 95 | #' set.seed(123456789)
 96 | #' 
 97 | #' createRandomString(1, 10)
 98 | 
 99 | 
100 | createRandomString <- function(n=1, strlen=10){
101 |     
102 |     strlenchain <- strlen*n*2
103 |     
104 |     chain <- paste(sample(c(letters, LETTERS),strlenchain, replace=TRUE),collapse="")
105 |     idresult <- strsplit(gsub(paste("(.{",strlen,"})",sep=""), "\\1 ", chain)," ")
106 |     
107 |     if(anyDuplicated(idresult[[1]]) != 0) stop("Repeated strings were produced; try modifying the 'seed' or increasing 'strlen'")
108 |     
109 |     return(idresult[[1]][1:n])
110 | }
111 | 
112 | 
113 | 
114 | #' Chromosome start and end
115 | #' 
116 | #' Obtains a chromosome start and end positions from a reference genome version
117 | #' 
118 | #' @param genome.v (character) reference genome version to retrieve gene annotations (hg19 or GRCh37 and hg38 or GRCh38) 
119 | #' @return (data.table) a table containing start and end positions for each chromosome
120 | #' @keywords CNV, segmentation, genes
121 | #' @export
122 | #' @examples
123 | #' 
124 | #' d3gb.chr.lim(genome.v="hg19")
125 | #' 
126 | 
127 | d3gb.chr.lim <- function(genome.v){
128 |     
129 |     stopifnot(genome.v %in% c("hg19","hg38","GRCh37","GRCh38"))
130 |     
131 |     if(genome.v %in% c("hg19","GRCh37")){ bands <- GRCh37.bands
132 |     }else if(genome.v %in% c("hg38","GRCh38")){ bands <- GRCh38.bands}
133 |     
134 |     ends<- aggregate(end ~ chr, bands, max)
135 |     ends<- ends[order(ends$chr),]
136 |     ends<- ends[suppressWarnings(order(as.numeric(as.character(ends$chr)) )),]
137 |     
138 |     chr.lim <- data.table(paste("chr",ends$chr,sep=""),rep(0,length(ends)),ends$end)
139 |     colnames(chr.lim) <-c("chrom","begin","end")
140 |     
141 |     return(chr.lim)
142 | }
143 | 
144 | #' Merge two lists
145 | #' 
146 | #' Merge of 2 lists into one that contains unique or intersect vectors for each list entry with shared names 
147 | #' 
148 | #' @param x (list): input list 1
149 | #' @param y (list): input list 2
150 | #' @param fun (character): Either 'unique' or 'intersect' are accepted
151 | #' @return (list) merged list from x and y 
152 | #' @keywords merge lists 
153 | #' @export
154 | #' @examples
155 | #' 
156 | #' x <- sapply(letters[1:10], function(i) sample(1:10)[1:sample(2:10)[1]], simplify=FALSE )
157 | #' y <- sapply(letters[5:15], function(i) sample(1:10)[1:sample(2:10)[1]], simplify=FALSE )
158 | #' merge2lists(x,y)
159 | 
160 | merge2lists <- function(x,y,fun="unique"){
161 | 
162 | mergedList <- list()
163 | 
164 | if(fun == "unique"){
165 |     for(i in unique(c(names(x),names(y)))){
166 |         if(length(y[[i]]) == 0 & length(x[[i]]) > 0){
167 |             mergedList[[i]] <- x[[i]]
168 |         }else if(length(y[[i]]) > 0 & length(x[[i]]) == 0){
169 |             mergedList[[i]] <- y[[i]]
170 |         }else if(length(y[[i]]) > 0 & length(x[[i]]) > 0){
171 |             mergedList[[i]] <- unique(c(x[[i]],y[[i]]))
172 |         }
173 |     }
174 | }else if(fun == "intersect"){
175 |     for(i in intersect(names(x),names(y)) ){
176 |         commonElements <- intersect(x[[i]],y[[i]])
177 |         if(length(commonElements) > 0){
178 |             mergedList[[i]] <- commonElements
179 |         }
180 |     }
181 | }else{
182 |     stop(paste("Unknown function:",fun) )
183 | }
184 | 
185 | return(mergedList)
186 | 
187 | }
188 | 
189 | 
190 | 
191 | 
192 | 


--------------------------------------------------------------------------------
/R/clean.cnv.artifact.r:
--------------------------------------------------------------------------------
  1 | #' CNV segmentation gap filling
  2 | #' 
  3 | #' Fills the gaps in a segmentation data.frame. Chromosome limits are defined for the complete segmentation dataset then segments fill the missing terminal regions. 
  4 | #' The CN log-ratio of the added segments is set to the average of the closest neighbours in each sample.
  5 | #' 
  6 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv
  7 | #' @param minsize (numeric) the minimum gap size required to fill the gap
  8 | #' @param chrlist (character) list of chromosomes to include chr1, chr2, etc...
  9 | #' @param verbose (logical) whether to return internal messages
 10 | #' @return a data.frame containing CNV data
 11 | #' @keywords CNV, segmentation
 12 | #' @export
 13 | #' @examples
 14 | #' 
 15 | #' ## validate input data.frames
 16 | #' cnv <- validate.cnv(segdat_lung_ccle)
 17 | #' 
 18 | #' cnv2 <- segment.gap(cnv)
 19 | #' cnv2
 20 | 
 21 | segment.gap <- function(cnv,
 22 |                         minsize=5000,
 23 |                         chrlist=NULL,
 24 |                         verbose=FALSE){
 25 |     
 26 |     stopifnot(cnv@type == "cnv")
 27 |     cnvdat <- cnv@data
 28 |     
 29 |     chrlims <- chromosome.limit.coords(cnv)
 30 |     if(is.null(chrlist)) chrlist <- chrlims$chrom
 31 |     
 32 |     chrlims_df<- data.frame(chrlims)
 33 |     rownames(chrlims_df) <- chrlims_df$chrom
 34 |     
 35 |     cnvdat_df <- data.frame(cnvdat)
 36 |     
 37 |     if(verbose){
 38 |         message("Filling gaps is the segmentation data.frame")
 39 |         pb <- txtProgressBar(style=3)
 40 |         cc <-0
 41 |         tot <- nrow(cnvdat_df)
 42 |     }
 43 |     newsegments<-list()
 44 |     if(cnvdat_df[1,"start"] > chrlims_df[cnvdat_df[1,"chrom"],"begin"]){
 45 |         newsegments[["1"]] <- data.frame(cnvdat_df[1,c("sample","chrom")],chrlims_df[cnvdat_df[1,"chrom"],"begin"],cnvdat_df[1,"start"]-1,0,cnvdat_df[1,"segmean"])
 46 |     }
 47 |     
 48 |     for(i in 2:nrow(cnvdat_df)){
 49 |         if(cnvdat_df[i,"chrom"] == cnvdat_df[i-1,"chrom"] ){
 50 |             if( cnvdat_df[i,"start"] - cnvdat_df[i-1,"end"] > minsize){
 51 |                 newsegments[[as.character(i)]] <- data.frame(cnvdat_df[i,c("sample","chrom")],cnvdat_df[i-1,"end"]+1,cnvdat_df[i,"start"]-1,0,mean(cnvdat_df[c(i,i-1),"segmean"]) )
 52 |             }
 53 |         }else{
 54 |             if(cnvdat_df[i,"start"] > chrlims_df[cnvdat_df[i,"chrom"],"begin"]){ 
 55 |                 newsegments[[as.character(i)]] <- data.frame(cnvdat_df[i,c("sample","chrom")],chrlims_df[cnvdat_df[i,"chrom"],"begin"],cnvdat_df[i,"start"]-1,0,cnvdat_df[i,"segmean"])
 56 |             }
 57 |             if(cnvdat_df[i-1,"end"] < chrlims_df[cnvdat_df[i-1,"chrom"],"end"]){
 58 |                 newsegments[[as.character(i)]] <- data.frame(cnvdat_df[i-1,c("sample","chrom")],cnvdat_df[i-1,"end"]+1,chrlims_df[cnvdat_df[i-1,"chrom"],"end"],0,cnvdat_df[i,"segmean"])
 59 |             }
 60 |         }
 61 |         if(verbose) cc <- cc+1
 62 |         if(verbose) setTxtProgressBar(pb, cc/tot)
 63 |     }
 64 |     if(cnvdat_df[i,"end"] < chrlims_df[cnvdat_df[i,"chrom"],"end"]) newsegments[[as.character(i)]] <- data.frame(cnvdat_df[i,c("sample","chrom")],cnvdat_df[i,"end"]+1,chrlims_df[cnvdat_df[i,"chrom"],"end"],0,cnvdat_df[i-1,"segmean"])
 65 |     if(verbose) close(pb)
 66 |     
 67 |     newsegments <- lapply(newsegments, setNames, colnames(cnvdat_df)[1:6])
 68 |     
 69 |     segout <- rbind(cnvdat_df[,1:6], do.call(rbind,newsegments))
 70 |     out <- validate.cnv(segout)
 71 |     
 72 |     return(out)
 73 | }
 74 | 
 75 | 
 76 | #' CNV artifact detection and filtering
 77 | #' 
 78 | #' Detects identical or near-identical CNV segments across multiple samples susceptible of representing common variants or technical artifacts. Then those segments CNV log-ratio is replaced by the flanking segments average 
 79 | #' 
 80 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' validated by validate.cnv
 81 | #' @param n.reps (numeric) number of samples with identical segment to consider artifact
 82 | #' @param cnv.size (numeric) only smaller segments will be modified in the cnv data.frame
 83 | #' @param pc.overlap (numeric) minimun percentage overlap for a pair of segments to be consider identical 
 84 | #' @param fill.gaps (logical) whether to fill gaps from the segmentaed file after filtering artifacts
 85 | #' @param minsize (numeric) the minimum gap size required to fill the gap. Only used if 'fill.gaps=TRUE'
 86 | #' @param verbose  (logical) whether to print internal messages
 87 | #' @return a data.frame containing CNV data
 88 | #' @keywords CNV, segmentation, filter
 89 | #' @export
 90 | #' @examples
 91 | #' 
 92 | #' ## validate input data.frame
 93 | #' cnv <- validate.cnv(segdat_lung_ccle)
 94 | #' 
 95 | #' cnvcl <- clean.cnv.artifact(cnv)
 96 | #' cnvcl
 97 | 
 98 | clean.cnv.artifact<- function(cnv,
 99 |                               n.reps=4,
100 |                               cnv.size=2000000,
101 |                               pc.overlap=0.99,
102 |                               fill.gaps=TRUE,
103 |                               minsize=5000,
104 |                               verbose=TRUE){
105 | 
106 |     stopifnot(cnv@type == "cnv")
107 |     cnvdat <- cnv@data
108 |     
109 | all_artifacts_l <-list()
110 | 
111 | cnvdat_short <- cnvdat[which(cnvdat$end - cnvdat$start < cnv.size),]
112 | 
113 | for(chr in unique(cnvdat$chrom)){
114 | 
115 |   if(verbose) cat("\r",chr)
116 | 
117 |   segchr <- cnvdat_short[which(cnvdat_short$chrom == chr),]
118 |   segchr.gr <- with(segchr, GRanges(chrom, IRanges(start=start, end=end)))
119 |   hits = GenomicAlignments::findOverlaps(segchr.gr,segchr.gr)
120 |   overlaps <- pintersect(segchr.gr[queryHits(hits)], segchr.gr[subjectHits(hits)])
121 |   
122 |   percentOverlapA <- width(overlaps) / width(segchr.gr[queryHits(hits)])
123 |   percentOverlapB <- width(overlaps) / width(segchr.gr[subjectHits(hits)])
124 |   hits_p <- as.data.frame(hits[intersect(which(percentOverlapA >= pc.overlap),which(percentOverlapB >= pc.overlap)),])
125 |   reps <- aggregate(subjectHits~queryHits,hits_p,paste,simplify=FALSE)
126 |   reps_list <- reps$subjectHits
127 |   names(reps_list) <- reps$queryHits
128 |   reps_list_collapse <- lapply(lapply(reps_list,sort),paste,collapse=" ")
129 |   groups_a <- table(unlist(reps_list_collapse))
130 |   all_artifacts <- as.numeric(unlist(strsplit(names(which(groups_a > n.reps))," ")))
131 |   all_artifacts_l[[chr]] <- segchr[all_artifacts,]
132 | }
133 | 
134 | all_artifacts <- do.call(rbind,unname(all_artifacts_l))
135 | toremove <- unite(all_artifacts, "newcol", c("sample","chrom","start","end"), remove=FALSE,sep=":")$newcol
136 | allsegids <- unite(cnvdat, "newcol", c("sample","chrom","start","end"), remove=FALSE,sep=":")$newcol
137 | cnvdat_clean <- svcnvio(data = cnvdat[which(!allsegids %in% toremove),],type = "cnv")
138 | 
139 | if(fill.gaps){ 
140 |     segclean_fill <-  segment.gap(cnvdat_clean, minsize=minsize, verbose=verbose)
141 |     return(segclean_fill)
142 |   }else{
143 |     return(cnvdat_clean)
144 |   }
145 | 
146 | }
147 | 
148 | 
149 | 
150 | 


--------------------------------------------------------------------------------
/R/validate.input.data.r:
--------------------------------------------------------------------------------
  1 | #' Data class svcnvio
  2 | #' 
  3 | #' Class to store CNV segmentation data
  4 | #' 
  5 | #' @param data (data.table): cnv or svc data.table to be validated by 'validate.cnv' or 'validate.svc' respectivelly
  6 | #' @param type (character): the data type  "cnv" or "svc" defined by "validate.cnv" or "validate.svc" respectivelly
  7 | #' @seealso Additional data format information in the man pages of validate.cnv and validate.svc
  8 | #' @return an instance of the class 'svcnvio' containing SV data derived from CNV or SVC data types;  A unique id (uid) column is also added
  9 | #' @export
 10 | 
 11 | svcnvio <- setClass("svcnvio", representation(
 12 |     data  = "data.table",
 13 |     type = "character"
 14 | ))
 15 | 
 16 | setMethod("show","svcnvio",function(object){
 17 |     writeLines(paste("An object of class svcnvio from svpluscnv storing",object@type,"data from",length(unique(object@data$sample)),"samples"))
 18 | })
 19 | 
 20 | #' Initialization of SVC data
 21 | #' 
 22 | #' This function validates and reformats the SV (structural variant) calls input. It is used internaly by 'svpluscnv' functions that require this type of data.
 23 | #' A few formatting rules are enforced:
 24 | #' 1) The input must obtain 8 columns in the following order(sample ID, chromosome of origin, strand of origin, position of origin,, chromosome of destination, strand of destination, position of destination, SV class)
 25 | #' 2) SV classes accepted: DEL(deletion), DUP(duplication), INS(insertion), TRA(translocation), INV(inversion) and BND(break end)
 26 | #' 3) Any variant in which chromosome of origin and destination differ are encoded as TRA (translocation)
 27 | #' 4) pos1 < pos2 is enforced for all variants in which chromosome of origin and destination are the same
 28 | #' 5) The class BND can be used to operate with complex events as long as both break ends are the same chromosome
 29 | #' 
 30 | #' @param sv.df (data.frame) structural variant table including the following fields: sample, chrom1, pos1, strand1, chrom2, pos2, strand2, svclass
 31 | #' @return an instance of the class 'svcnvio' containing SV data derived from SVC data type;  A unique id (uid) column is also added
 32 | #' @keywords SV, structural variants
 33 | #' @export
 34 | #' @examples
 35 | #' 
 36 | #' validate.svc(svdat_lung_ccle)
 37 | 
 38 | 
 39 | validate.svc <- function(sv.df){
 40 |     
 41 |     stopifnot(ncol(sv.df) >= 8)
 42 |     uid <-  paste("svc_",createRandomString(nrow(sv.df),10),sep="")
 43 |     svc <- data.table(remove.factors(sv.df[,1:8]),uid)
 44 |     
 45 |     colnames(svc) <- c("sample","chrom1","pos1","strand1","chrom2","pos2","strand2","svclass","uid")
 46 |     if(length(grep("chr",svc[1]$chrom1)) == 0) svc$chrom1 <- paste("chr",svc$chrom1,sep="")
 47 |     if(length(grep("chr",svc[1]$chrom2)) == 0) svc$chrom2 <- paste("chr",svc$chrom2,sep="")
 48 |     
 49 |     stopifnot(is.numeric(svc$pos1))
 50 |     stopifnot(is.numeric(svc$pos2))
 51 |     stopifnot(is.character(svc$chrom1))
 52 |     stopifnot(is.character(svc$chrom2))
 53 |     stopifnot(is.character(svc$sample))
 54 |     
 55 |     svc[grep("INV",svc$svclass)]$svclass <- "INV"
 56 |     svc[grep("DUP",svc$svclass)]$svclass <- "DUP"
 57 |     
 58 |     extrachr <- which(unlist(lapply(apply(svc[,c("chrom1","chrom2")],1,unique),length)) == 2) 
 59 |     svc[extrachr]$svclass <- "TRA"
 60 |     
 61 |     wrong_class <- setdiff(unique(svc$svclass),c("DEL","DUP","TRA","INV","INS","BND"))
 62 |     try(if(length(wrong_class) > 0) message(paste("SV classes not accepted:", paste(wrong_class,collapse=","), "will be set as BND") ))
 63 |     svc[which(!svc$svclass %in% c("DEL","DUP","TRA","INV","INS","BND"))]$svclass <- "BND"
 64 |     
 65 |     # ensure that pos1 is upstream pos2
 66 |     intrachr <- which(unlist(lapply(apply(svc[,c("chrom1","chrom2")],1,unique),length)) == 1) 
 67 |     intrachr_rev <- intersect(which(svc$pos2 -svc$pos1 < 0),intrachr)
 68 |     
 69 |     
 70 |     if(length(intrachr_rev) > 0){
 71 |         svcrev <- svc[intrachr_rev,c(1,2,6,7,5,3,4,8,9)]
 72 |         colnames(svcrev) <- c("sample","chrom1","pos1","strand1","chrom2","pos2","strand2","svclass","uid")
 73 |         svc <- rbind(svcrev,svc[setdiff(1:nrow(svc),intrachr_rev)])
 74 |     }
 75 |     
 76 |     stopifnot(nrow(svc) > 0)
 77 |     
 78 |     return(svcnvio(
 79 |         data=svc,
 80 |         type="svc"
 81 |     ))
 82 |     
 83 | }
 84 | 
 85 | #' Chromosome ordering
 86 | #' 
 87 | #' A function to order a list of chromosomes 
 88 | #' 
 89 | #' @param chrlist (character): a vector containing chromosome names (chr1, chr2...chrX,chrY  ) 
 90 | #' @return a character vector of sorted chromosomes
 91 | #' @keywords CNV, segmentation, genes
 92 | #' @export
 93 | #' @examples
 94 | #' 
 95 | #' chrlist <- paste("chr",c("X","Y",sample(1:22)),sep="")
 96 | #' chr_sorted <- chr.sort(chrlist)
 97 | 
 98 | 
 99 | chr.sort <- function(chrlist){ 
100 |     chrunique <- sort(gsub("chr","",unique(chrlist)))
101 |     chrsort <- paste("chr",chrunique[suppressWarnings(order(as.numeric(chrunique) ))],sep="")
102 |     return(chrsort)
103 | }
104 | 
105 | 
106 | #' Initialization of CNV data
107 | #' 
108 | #' This function validates and reformats the CNV segmentation data type containing copy number log-ratios. It is used internaly by 'svpluscnv' functions that require this type of data.
109 | #' 
110 | #' @param cnv.df (data.frame) segmentation data with at least 6 columns: sample, chromosome, start, end, probes, segment_mean
111 | #' @return an instance of the class 'svcnvio' containing segmentation data derived from CNV data type;  A unique id (uid) column is also added
112 | #' @keywords CNV, segmentation
113 | #' @export
114 | #' @examples
115 | #' 
116 | #' validate.cnv(segdat_lung_ccle)
117 | 
118 | 
119 | validate.cnv <- function(cnv.df){
120 |     
121 |     stopifnot(ncol(cnv.df) >= 6)
122 |     uid <-  paste("cnv_",createRandomString(nrow(cnv.df),10),sep="")
123 |     cnvdat <- data.table(cnv.df[,1:6],uid)
124 |     
125 |     colnames(cnvdat) <- c("sample","chrom","start","end","probes","segmean","uid")
126 |     if(length(grep("chr",cnvdat[1,2])) == 0) cnvdat[,"chrom"] <- paste("chr",cnvdat$chrom,sep="")
127 |     stopifnot(is.numeric(cnvdat$start))
128 |     stopifnot(is.numeric(cnvdat$end))
129 |     stopifnot(is.numeric(cnvdat$segmean))
130 |     stopifnot(is.character(cnvdat$sample))
131 |     stopifnot(is.character(cnvdat$chrom))
132 |     
133 |     chrlist <- chr.sort(unique(cnvdat$chrom))
134 |         
135 |     cnvdat <- cnvdat[order(cnvdat$start),]
136 |     cnvdat <- cnvdat[order(match(cnvdat$chrom, chrlist)),]
137 |     cnvdat <- cnvdat[order(cnvdat$sample),]
138 |     
139 |     stopifnot(nrow(cnvdat) > 0)
140 |     
141 |     return(svcnvio(
142 |         data=cnvdat,
143 |         type="cnv"
144 |     ))
145 |     
146 | }
147 | 
148 | 
149 | #' Chromosome limit map
150 | #' 
151 | #' Obtain chromosome start and end positions based on mapped regions from CNV segmentation data
152 | #' 
153 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv
154 | #' @keywords CNV, segmentation, mapping
155 | #' @return data.table indicating start and end mapped positions of each chromosome
156 | #' @export
157 | #' @examples
158 | #' 
159 | #' ## validate input data.frame
160 | #' cnv <- validate.cnv(segdat_lung_ccle)
161 | #' 
162 | #' chr.lim <- chromosome.limit.coords(cnv)
163 | 
164 | chromosome.limit.coords <- function(cnv){
165 |     
166 |     stopifnot(cnv@type == "cnv")
167 |     cnvdat <- cnv@data
168 |     
169 |     chrlist <- chr.sort(unique(cnvdat$chrom))
170 |     chrmin <- chrmax <- list()
171 |     for(chr in chrlist){
172 |         if(chr %in% cnvdat$chrom){
173 |             chrmin[[chr]] <- min(cnvdat[which(cnvdat$chrom == chr)]$start) 
174 |             chrmax[[chr]] <- max(cnvdat[which(cnvdat$chrom == chr)]$end) 
175 |         }
176 |     }
177 |     begin <- unlist(chrmin)
178 |     end <- unlist(chrmax)
179 |     chr.lim <- data.table(chrlist,begin,end)
180 |     colnames(chr.lim) <- c("chrom","begin","end")
181 |     return(chr.lim)
182 | }
183 | 
184 | 
185 | 
186 | 
187 | 


--------------------------------------------------------------------------------
/R/gene.track.view.r:
--------------------------------------------------------------------------------
  1 | #' Gene track visualization
  2 | #' 
  3 | #' Creates a track visualization of a genomic region defined by gene boundaries or custom provided
  4 | #' 
  5 | #' @param chrom (character) Chromosome (e.g. chr9)
  6 | #' @param start (numeric) Genomic coordinate from specified chromosome to start plotting
  7 | #' @param stop (numeric) Genomic coordinate from specified chromosome to stop plotting
  8 | #' @param symbol (character) Gene acceoted hgnc symbol to retrieve coordinates and area plotting ()
  9 | #' @param upstr (numeric) Distance upstream specified gene to extend the area plotted
 10 | #' @param dnstr (numeric) Distance downstream specified gene to extend the area plotted
 11 | #' @param genome.v (character) Reference genome version to draw chromosome limits and centromeres (hg19 or hg38) 
 12 | #' @param addtext (logic) Whether to include transcript RefSeq ids in the plot
 13 | #' @param cex.text (numeric) The magnification to be used for transcript RefSeq text added
 14 | #' @param plot (logic) Whether to generate plot in open device
 15 | #' @param summary (logic) Whether to produce a data.table output with transcript information
 16 | #' @return A data.frame with gene isoform annotations and/or plot into open device
 17 | #' @param ... Additional graphical parameters
 18 | #' @keywords CNV, segmentation
 19 | #' @export
 20 | #' @examples
 21 | #' 
 22 | #' # obtain the coordinates of a desired genomic regionbased on a known gene locus 
 23 | #' refSeqGene <- gene.symbol.info(refseq_hg19,"PTPRD")
 24 | #' chrom <- refSeqGene$chrom
 25 | #' start <- refSeqGene$start - 150000;
 26 | #' stop <- refSeqGene$stop + 50000;
 27 | #' 
 28 | #' gene.track.view(symbol="PTPRD", genome.v="hg19")
 29 | 
 30 | 
 31 | gene.track.view <- function(chrom=NULL, start=NULL, stop=NULL, 
 32 |                        symbol=NULL,upstr=NULL,dnstr=NULL,
 33 |                        genome.v="hg19",
 34 |                        cex.text=0.6,
 35 |                        addtext=TRUE,
 36 |                        plot = TRUE,
 37 |                        summary=TRUE,
 38 |                        ...){
 39 |     
 40 |     if(genome.v %in% c("hg19","GRCh37")){
 41 |         refseq <- refseq_hg19
 42 |         refseq@data <- refseq@data[order(refseq@data$txStart)]
 43 |         refseq_gr <- with(refseq@data, GRanges(chrom, IRanges(start=txStart, end=txEnd), symbol=name2,transcript=name)) 
 44 |     }else if(genome.v %in% c("hg38","GRCh38")){
 45 |         refseq <- refseq_hg38
 46 |         refseq@data <- refseq@data[order(refseq@data$txStart),]
 47 |         refseq_gr <- with(refseq@data, GRanges(chrom, IRanges(start=txStart, end=txEnd), symbol=name2,transcript=name)) 
 48 |     }else{stop("Unspecified, or non available genome")}
 49 |     
 50 |     # define genomic region to plot
 51 |     if(!is.null(symbol) && symbol %in% refseq@data$name2){
 52 |         isonames <- refseq@data$name2[which(refseq@data$name2 == symbol)]
 53 |         names(isonames) <- refseq@data$name[which(refseq@data$name2 == symbol)]
 54 |         
 55 |         strand <- refseq@data$strand[which(refseq@data$name2 == symbol)][1]
 56 |         if(is.null(upstr)) upstr= 10000
 57 |         if(is.null(dnstr)) dnstr= 5000
 58 |         chrom <- unique(refseq@data$chrom[which(refseq@data$name2 == symbol)])
 59 |         if(strand == "-"){
 60 |             start <- min(refseq@data$txStart[which(refseq@data$name2 == symbol)]) - dnstr
 61 |             stop <- max(refseq@data$txEnd[which(refseq@data$name2 == symbol)]) + upstr
 62 |         }else{
 63 |             start <- min(refseq@data$txStart[which(refseq@data$name2 == symbol)]) - upstr
 64 |             stop <- max(refseq@data$txEnd[which(refseq@data$name2 == symbol)]) + dnstr
 65 |         }
 66 |     }else if(!is.null(chrom) && !is.null(start) && !is.null(stop)){
 67 |         coordgr <- with(data.frame(chrom,start,stop), GRanges(chrom, IRanges(start=start, end=stop))) 
 68 |         isonames <- refseq_gr[queryHits(GenomicAlignments::findOverlaps(refseq_gr,coordgr))]@elementMetadata$symbol
 69 |         names(isonames) <- refseq_gr[queryHits(GenomicAlignments::findOverlaps(refseq_gr,coordgr))]@elementMetadata$transcript
 70 |         if(length(isonames) == 0) stop("There is no transcripts in specified coordinates!")        
 71 |     }else{
 72 |         stop("Genomic coordinates or a valid hgnc gene symbol must be provided!")
 73 |     }
 74 |     
 75 |     isonames_list <- sapply(unique(isonames), function(i) names(which(isonames==i)),simplify = FALSE)
 76 |     exons_coord <- sapply(names(isonames), function(i) cbind(refseq@exonStarts[[i]],refseq@exonEnds[[i]]) ,simplify = FALSE)
 77 |     refseq_df <- refseq@data[which(refseq@data$name %in% names(exons_coord)),]
 78 |     rownames(refseq_df) <- names(exons_coord)
 79 | if(plot){
 80 |     geneRanges <- t(sapply(names(isonames), function(i)
 81 |         c(min(refseq@data$txStart[which(refseq@data$name == i)]),max(refseq@data$txEnd[which(refseq@data$name == i)]))))
 82 | 
 83 |     hits <- findOverlaps(IRanges(geneRanges[,1],geneRanges[,2]))
 84 |     hitsNames  <- data.frame(names(isonames)[queryHits(hits)],names(isonames)[subjectHits(hits)])
 85 |     maxOverlaps <- max(table(hitsNames[,1]))
 86 |     
 87 |     ylimit <- 1 + 0.5*maxOverlaps
 88 |     plot(x=NULL,y=NULL,xlim=range(c(start,stop)),ylim=range(c(-1.5,ylimit)),
 89 |          xaxt='n',yaxt='n',xlab='',ylab='',bty='n',...)
 90 |     
 91 |     
 92 |     rect(-1e6,0,1e16,10000,col = "grey90")
 93 |     
 94 |     seqYpos <- rep(seq(0.1,ylimit,ylimit/maxOverlaps ),length(isonames_list)) +0.5
 95 |     isoct <- 0
 96 | 
 97 |     for(gene in names(isonames_list)){
 98 |         
 99 |         for(iso in isonames_list[[gene]]){
100 |             isoct <- isoct +1
101 |             ypos <- seqYpos[isoct]
102 |             refseq_iso <-  refseq_df[which(refseq_df$name == iso)]
103 |             
104 |             iso_length <- refseq_iso$txEnd - refseq_iso$txStart
105 |             plot_length <-  stop-start
106 |             narrows <- ceiling(20*iso_length/plot_length)
107 |             arrow_x <- seq(refseq_iso$txStart,refseq_iso$txEnd , iso_length/narrows)
108 |             
109 |             strandpos <- exons_coord[[iso]][1,1]-(stop-start)/100
110 |             if(refseq_iso$strand == "-" ){ 
111 |                 points(strandpos, ypos, pch="-", col="red")
112 |                 arrows(arrow_x[2:(narrows+1)]+plot_length/200,rep(ypos,narrows), arrow_x[1:narrows],rep(ypos,narrows),length=0.1)
113 |             }else if(refseq_iso$strand == "+" ){
114 |                 points(strandpos,ypos,pch="+",cex=1,col="blue")
115 |                 arrows(arrow_x[1:narrows]-plot_length/200,rep(ypos,narrows),arrow_x[2:(narrows+1)],rep(ypos,narrows),length=0.1)
116 |             }
117 |             
118 |             lines(matrix(c(refseq_iso$txStart,refseq_iso$txEnd, ypos, ypos), 2, 2), lwd=2)
119 |             
120 |             bordercolor <- "black"; bgcolor<-"grey"
121 |             for(i in 1:nrow(exons_coord[[iso]])){ 
122 |                 polygon(rbind(
123 |                     c(exons_coord[[iso]][i,1],ypos+0.2),
124 |                     c(exons_coord[[iso]][i,1],ypos-0.2),
125 |                     c(exons_coord[[iso]][i,2],ypos-0.2),
126 |                     c(exons_coord[[iso]][i,2],ypos+0.2)
127 |                 ),lwd=1,col=bgcolor,border=bordercolor)
128 |             }
129 |             if(addtext){
130 |                 text(refseq_iso$txEnd,ypos,label=iso,cex=cex.text,pos=4)
131 |             }
132 |         }
133 |     }
134 |     
135 |     interval <- round((stop - start)/5000) * 1000
136 |     xlabs <- seq(floor(start/10000)*10000, ceiling(stop/10000)*10000,interval)
137 |     axis(1, at = xlabs, lwd.ticks=1.5 ,pos=0, ...)
138 |     mtext(gsub("chr","Chr ",chrom),side=2,las=1,...)
139 |     
140 | }
141 |     if(summary){
142 |         return(
143 |             refSeqDat(data=refseq@data[which(refseq@data$name %in% unlist(isonames_list))],
144 |                 exonStarts = refseq@exonStarts[unlist(isonames_list)],
145 |                 exonEnds = refseq@exonEnds[unlist(isonames_list)],
146 |                 genome.v=genome.v)
147 |                )
148 |     }
149 | }
150 | 
151 | 


--------------------------------------------------------------------------------
/R/cnv.freq.plot.r:
--------------------------------------------------------------------------------
  1 | #' Data class cnvfreq
  2 | #' 
  3 | #' Class to store breakpoint annotations in association with genomic features (e.g. gene loci)
  4 | #' 
  5 | #' @param freqsum (data.table): the frequency of gains and losses in each defined genomic bin
  6 | #' @param chrlimits (data.frame): a table containing the chromosome limit coordinates and global genomic coordinates
  7 | #' @param bin.mat (numeric): a matrix of genomic bins versus samples
  8 | #' @param plot (graphical): a recorded plot object
  9 | #' @param param (list): a list of parametres provided 
 10 | #' @return an instance of the class 'cnvfreq' 
 11 | #' @export
 12 | 
 13 | cnvfreq <- setClass("cnvfreq", representation(
 14 |     freqsum  = "data.table",
 15 |     chrlimits = "data.frame",
 16 |     bin.mat = "matrix",
 17 |     plot = "recordedplot",
 18 |     param = "list"
 19 | ))
 20 | 
 21 | 
 22 | setMethod("show","cnvfreq",function(object){
 23 |     writeLines(paste("An object of class cnvfreq from svpluscnv containing the following stats:
 24 |                 \nNumber of samples=",ncol(object@bin.mat),
 25 |                 "\nNumber of genomic bins =",nrow(object@bin.mat)))
 26 | })
 27 | 
 28 | 
 29 | #' CNV frequency map
 30 | #' 
 31 | #' Creates a map of CNVs using genome binning and plots CNV frequency across the genome. This function optionally returns text, graphical or both outputs.
 32 | #' Additionaly, calculates the proportion of samples with a given percentage of chromosome arm gained/lost 
 33 | #' 
 34 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv
 35 | #' @param fc.pct (numeric) percentage CNV gain/loss for a segment to be considered changed (i.e. 0.2 = 20 percent change 0.8 < segmean && segmean > 1.2)
 36 | #' @param genome.v (character) (hg19 or h38) reference genome version to draw chromosome limits and centromeres
 37 | #' @param ploidy (logical) whether to apply ploidy correction; the function med.segmean will be used to obtain each sample's ploidy logR then this value substracted to each sample's logR values
 38 | #' @param g.bin (numeric) size in megabases of the genmome bin to compute break density 
 39 | #' @param sampleids (character) vector containing list of samples to include in plot. if set to NULL, all samples in the input will be used
 40 | #' @param cex.axis,cex.lab,label.line (numeric) plot parameters
 41 | #' @param plot (logical) whether produce a graphical output
 42 | #' @param verbose (logical) whether to return internal messages
 43 | #' @return an instance of the class 'cnvfreq' and optionally a plot into open device
 44 | #' @keywords CNV, segmentation, plot
 45 | #' @export
 46 | #' @examples
 47 | #' 
 48 | #' ## validate input data.frame
 49 | #' cnv <- validate.cnv(nbl_segdat)
 50 | #' 
 51 | #' cnv.freq(cnv, genome.v = "hg19")
 52 | 
 53 | cnv.freq <- function(cnv,
 54 |                      fc.pct= 0.2,
 55 |                      genome.v= "hg19",
 56 |                      ploidy=FALSE,
 57 |                      g.bin= 1,
 58 |                      sampleids=NULL,
 59 |                      cex.axis= 1,
 60 |                      cex.lab= 1,
 61 |                      label.line= -1.2,
 62 |                      plot=TRUE,
 63 |                      verbose=TRUE){
 64 |   
 65 | stopifnot(cnv@type == "cnv")
 66 | cnvdat <- cnv@data
 67 |     
 68 | if(!is.null(sampleids)) cnvdat <- cnvdat[which(cnvdat$sample %in% sampleids),]
 69 |   
 70 | if(ploidy){
 71 |     ploidy_val <- med.segmean(cnv)
 72 |     cnvdat$segmean <- cnvdat$segmean - ploidy_val[cnvdat$sample]
 73 | }
 74 | 
 75 | stopifnot(genome.v %in% c("hg19","hg38","GRCh37","GRCh38"))
 76 | if(genome.v %in% c("hg19","GRCh37")){ bands <- GRCh37.bands
 77 | }else if(genome.v %in% c("hg38","GRCh38")){ bands <- GRCh38.bands}
 78 | 
 79 | centromeres <- bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"start"]
 80 | names(centromeres) <- paste("chr",bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"chr"],sep="")
 81 | 
 82 | # define chromosome mapped limits and the global genome coordinates for each chromosome start
 83 | chrlimits <-   chromosome.limit.coords(cnv)
 84 | offset <- c(0,vapply(seq_len(nrow(chrlimits)-1), 
 85 |                      function(i) sum(chrlimits[seq_len(i),"end"]) + i*g.bin,1))
 86 | chrlabelpos <- offset + chrlimits$end/2
 87 | chrlimits <- data.frame(offset,as.data.frame(chrlimits),chrlabelpos)
 88 | rownames(chrlimits) <- chrlimits$chrom
 89 | 
 90 | g.bin.mb <- g.bin*1e6
 91 |   
 92 | if(verbose) message("Generating binned genome map ")
 93 | 
 94 | chrbins <- list()
 95 | 
 96 | for(chr in rownames(chrlimits)){
 97 |     seqpos <- seq(chrlimits[chr,"begin"],chrlimits[chr,"end"]+g.bin.mb,g.bin.mb)
 98 |     ranges <-  t( vapply(seq(2,length(seqpos)), function(i) c(seqpos[i-1],seqpos[i]),double(2)) )
 99 |     chrcol<- rep(chr,length(seqpos)-1)
100 |     segcol_del <- segcol_gain <- rep("grey",length(chrcol))
101 |     segcol_del[which(ranges[,2] <= centromeres[chr])] <- "lightblue"
102 |     segcol_del[which(ranges[,2] > centromeres[chr])] <- "blue"
103 |     segcol_gain[which(ranges[,2] <= centromeres[chr])] <- "salmon"
104 |     segcol_gain[which(ranges[,2] > centromeres[chr])] <- "red"
105 |     chrbins[[chr]] <- data.table(chrcol,ranges,segcol_del,segcol_gain)
106 | }
107 |   
108 |   chrbins.df <- do.call(rbind,unname(chrbins) )
109 |   chrbins.df<- data.table(chrbins.df,unite(chrbins.df[,c(1,2,3)],paste)$paste)
110 |   colnames(chrbins.df) <- c("chr","start","end","segcol_del","segcol_gain","binid")
111 | 
112 | 
113 |   if(verbose) message("Calculating mean segmean per genomic bin")
114 |   # find overlaps between bins and cnv segments
115 |   binsGR <- with(chrbins.df, GRanges(chr, IRanges(start=start, end=end)))
116 |   segGR <- with(cnvdat, GRanges(chrom, IRanges(start=start, end=end)))
117 |   hits <-GenomicAlignments::findOverlaps(binsGR,segGR)
118 |   
119 |   outmat <- matrix(ncol=length(unique(cnvdat$sample)),nrow=nrow(chrbins.df))
120 |   colnames(outmat) <- unique(cnvdat$sample)
121 |   rownames(outmat) <- chrbins.df$binid
122 | 
123 |   for(i in seq_len(nrow(chrbins.df)) ){
124 |     segtmp<- cnvdat[subjectHits(hits)[which(queryHits(hits) == i)],]
125 |     if(nrow(segtmp)>0){
126 |       a <- aggregate(segmean~sample,segtmp, sum)  
127 |       outmat[i,a$sample]<- a$segmean
128 |     }else{
129 |       outmat[i,a$sample]<- NA
130 |     }
131 |   }
132 | 
133 |   if(verbose) message("Calculating gain/loss frequencies per genomic bin")
134 |   outmat[which(is.na(outmat),arr.ind=TRUE)] <- 0
135 |   
136 |   outmat_gain<-outmat_loss<-outmat
137 |   outmat_gain[]<-outmat_loss[]<-0
138 |   nsamples <- ncol(outmat_gain)
139 |   
140 |   outmat_gain[which(outmat > log2(1+fc.pct), arr.ind=TRUE)] <-  1
141 |   outmat_loss[which(outmat < log2(1-fc.pct), arr.ind=TRUE)] <-  1
142 |   freq.gains <- apply(outmat_gain,1,sum)/nsamples
143 |   freq.loss <- apply(outmat_loss,1,sum)/nsamples
144 | 
145 | if(plot){
146 |     plot.end<- chrlimits$offset[nrow(chrlimits)]+chrlimits$end[nrow(chrlimits)]
147 |     bin.loc <- chrlimits[chrbins.df[names(freq.gains),on="binid"]$chr,"offset"] + chrbins.df[names(freq.gains),,on="binid"]$start
148 | 
149 |     if(verbose) message("Plotting ...")
150 |     altcols <- rep(c(rgb(0.1,0.1,0.1,alpha=0.1),rgb(0.8,0.8,0.8,alpha=0.1)),12)
151 |     altcols2<- rep(c(rgb(0.1,0.1,0.1,alpha=1),rgb(0.4,0.4,0.4,alpha=1)),12)
152 |   
153 |     plot(x=NULL,y=NULL,xlim=c(0,plot.end),ylim=c(-1,1),bty='n',xaxt='n',yaxt='n',xlab="",ylab="")
154 |     for(i in seq_len(length(chrlimits$offset)) ) rect( chrlimits$offset[i],-1,chrlimits$offset[i]+chrlimits$end[i],1, col=altcols[i],border=NA )
155 |     points(bin.loc,freq.gains,type='h',col=chrbins.df$segcol_gain)
156 |     points(bin.loc,-freq.loss,type='h',col=chrbins.df$segcol_del)
157 |     lines(c(0,plot.end),c(0,0),col="lightgrey")
158 |     lines(c(0,plot.end),c(0.5,0.5),col="lightgrey",lty=3)
159 |     lines(c(0,plot.end),c(-0.5,-0.5),col="lightgrey",lty=3)
160 |     mtext(gsub("chr","",rownames(chrlimits))[seq(1,nrow(chrlimits),2)],side=1,at=chrlimits$chrlabelpos[seq(1,nrow(chrlimits),2)],las=1,col=altcols2[seq(1,nrow(chrlimits),2)],line=label.line,cex=cex.lab)
161 |     mtext(gsub("chr","",rownames(chrlimits))[seq(2,nrow(chrlimits),2)],side=3,at=chrlimits$chrlabelpos[seq(2,nrow(chrlimits),2)],las=1,col=altcols2[seq(2,nrow(chrlimits),2)],line=label.line,cex=cex.lab)
162 |     mtext("Frequency",side=4,line=1)
163 |     mtext("#samples",side=2,line=1)
164 |     axis(4,c(100,50,0,50,100),at=c(-1,-0.5,0,0.5,1),las=1,pos=plot.end, cex.axis=cex.axis)
165 |     axis(2,c(nsamples,round(nsamples/2),0,round(nsamples/2),nsamples),at=c(-1,-0.5,0,0.5,1),las=1, pos=0, cex.axis=cex.axis)
166 |     p <- recordPlot()
167 | }else{
168 |   p <- recordPlot(load=NULL, attach=NULL)
169 | }
170 | 
171 | 
172 | summary <- data.table(chrbins.df[,c("chr","start","end")],bin.loc,freq.gains,freq.loss)
173 | 
174 | return(cnvfreq(
175 |             freqsum = summary,
176 |             bin.mat = outmat,
177 |             chrlimits = chrlimits,
178 |             plot=p,
179 |             param = list(
180 |                 fc.pct= fc.pct,
181 |                 genome.v= genome.v,
182 |                 g.bin= g.bin,
183 |                 sampleids=sampleids,
184 |                 cex.axis= cex.axis,
185 |                 cex.lab= cex.lab,
186 |                 label.line= label.line   
187 |                 )
188 |             )
189 |        )
190 | }
191 | 
192 | 
193 | 


--------------------------------------------------------------------------------
/R/shattered.regions.cnv.r:
--------------------------------------------------------------------------------
  1 | #' CNV-only based shattered region detection
  2 | #' 
  3 | #' Caller for the identification of shattered genomic regions based on CNV breakpoint densities
  4 | #' 
  5 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv
  6 | #' @param fc.pct (numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents 20 percent fold change
  7 | #' @param min.cnv.size (numeric) The minimun segment size (in base pairs) to include in the analysis 
  8 | #' @param min.num.probes (numeric) The minimun number of probes per segment to include in the analysis 
  9 | #' @param low.cov (data.frame) a data.frame (chr, start, end) indicating low coverage regions to exclude from the analysis
 10 | #' @param clean.brk (numeric) inherited from cnv.breaks(); n cutoff for redundant breakpoints to filter out; if NULL, no filter will be applied
 11 | #' @param window.size (numeric) size in megabases of the genmome bin to compute break density 
 12 | #' @param slide.size (numeric) size in megabases of the sliding genmome window
 13 | #' @param num.breaks (numeric) size in megabases of the genmome bin to compute break density 
 14 | #' @param num.sd (numeric) size in megabases of the sliding genmome window
 15 | #' @param dist.iqm.cut (numeric) interquantile average of the distance between breakpoints within a shattered region
 16 | #' @param chrlist (character) vector containing chromosomes to include in the analysis; if NULL all chromosomes available in the input will be included
 17 | #' @param verbose (logical)
 18 | #' @return an instance of the class 'chromo.regs' containing breakpoint mapping onto genes
 19 | #' @keywords CNV, segmentation
 20 | #' @export
 21 | #' @examples
 22 | #' 
 23 | #' ## validate input data.frames
 24 | #' cnv <- validate.cnv(segdat_lung_ccle)
 25 | #' 
 26 | #' shattered.regions.cnv(cnv)
 27 | 
 28 | shattered.regions.cnv <- function(cnv,
 29 |                               fc.pct = 0.2,
 30 |                               min.cnv.size = 0,
 31 |                               min.num.probes=0, 
 32 |                               low.cov = NULL,
 33 |                               clean.brk=NULL,
 34 |                               window.size = 10,
 35 |                               slide.size = 2,
 36 |                               num.breaks = 10,
 37 |                               num.sd = 5,
 38 |                               dist.iqm.cut = 1e+05,
 39 |                               chrlist=NULL,
 40 |                               chr.lim=NULL,
 41 |                               verbose=TRUE
 42 |                               ){
 43 |   
 44 |   stopifnot(cnv@type == "cnv")
 45 |   cnvdat <- cnv@data
 46 | 
 47 |   if(is.null(chr.lim)){
 48 |     chr.lim <- chromosome.limit.coords(cnv)
 49 |   }else{
 50 |     stopifnot(ncol(chr.lim) == 3)   
 51 |   }
 52 | 
 53 |   if(!is.null(chrlist)){
 54 |     chr.lim <- chr.lim[which(chr.lim$chrom %in% chrlist)]
 55 |   }
 56 |   
 57 |   cnvbrk <- cnv.breaks(cnv = cnv, 
 58 |                        fc.pct = fc.pct, 
 59 |                        min.cnv.size = min.cnv.size, 
 60 |                        low.cov = low.cov, 
 61 |                        clean.brk=clean.brk,
 62 |                        chrlist = chrlist,
 63 |                        verbose = verbose)
 64 |   
 65 |   if(verbose) message("Mapping CNV breakpoints across the genome:")
 66 |   cnv.brk.dens <- break.density(cnvbrk, 
 67 |                                 chr.lim = chr.lim, 
 68 |                                 window.size = window.size, 
 69 |                                 slide.size = slide.size,
 70 |                                 verbose = verbose)
 71 |   
 72 | 
 73 |   
 74 |   # calculate inter quantile mean and standard deviation per sample
 75 |   iqmdata1<- sddata<- cnvbrk@burden
 76 |   iqmdata1[] <- sddata[] <- 0
 77 |   
 78 |   iqmdata <- apply(cnv.brk.dens,1,IQM,lowQ=0.1,upQ=0.9)
 79 |   sddata <- apply(cnv.brk.dens,1,IQSD,lowQ=0.1,upQ=0.9)
 80 | 
 81 |   a <- sapply(rownames(cnv.brk.dens),function(i) names(which(cnv.brk.dens[i,] > iqmdata[i]+num.sd*sddata[i] )),simplify=FALSE)
 82 |   b <- sapply(rownames(cnv.brk.dens),function(i) names(which(cnv.brk.dens[i,] >= num.breaks)),simplify=FALSE)
 83 |   
 84 |   # condition for chromothripsis: at least n=breaks > 6 (svc SND cnv)  AND n-breaks > u+2*sd (svc AND cnv) 
 85 |   res <- sapply(rownames(cnv.brk.dens),function(i) Reduce(intersect, list(b[[i]],a[[i]])) ,simplify=FALSE)
 86 |   
 87 |   highDensityRegions <- cnv.brk.dens
 88 |   highDensityRegions[] <- 0
 89 |   for(cl in rownames(cnv.brk.dens)) highDensityRegions[cl,res[[cl]]] <- 1
 90 |   
 91 |   res <- res[which(unlist(lapply(res,length)) >0)]
 92 | 
 93 |   if(verbose){
 94 |     message("Locating shattered regions by CNV only...")
 95 |     pb <- txtProgressBar(style=3)
 96 |     cc <-0
 97 |     tot <- length(res)
 98 |   }
 99 |   
100 |   restab <- list()
101 |   for(cl in names(res)){
102 |     if(verbose) cc <- cc+1
103 |     if(verbose) setTxtProgressBar(pb, cc/tot)
104 | 
105 |       tab <- data.table(do.call(rbind,strsplit(res[[cl]]," ")))
106 |       colnames(tab) <- c("chrom","start","end")
107 |       tab$start <- as.numeric(tab$start )
108 |       tab$end <- as.numeric(tab$end )
109 |       
110 |       tabgr = with(tab, GRanges(chrom, IRanges(start=start, end=end))) 
111 |       hits = as.data.frame(GenomicAlignments::findOverlaps(tabgr,tabgr))
112 |       
113 |       agg <- aggregate(subjectHits ~ queryHits, hits, paste,simplify=FALSE)
114 |       prev<-c(); cnum <- 0
115 |       agglist <- list()
116 |       for(x in agg$subjectHits){
117 |         if(length(intersect(x,prev) > 0)){
118 |           agglist[[cnum]] <- unique(c(x,prev))
119 |           prev <- agglist[[cnum]]
120 |         }else{
121 |           cnum <- cnum+1
122 |           agglist[[cnum]]<- x
123 |           prev <-agglist[[cnum]]
124 |         }
125 |       }
126 |       agglistUniq <- list()
127 |       for(i in 1:length(agglist)){
128 |         chr <- as.character(unique(tab[as.numeric(agglist[[i]]),"chrom"]))
129 |         start <-min( tab[as.numeric(agglist[[i]]),"start"])
130 |         end <- max( tab[as.numeric(agglist[[i]]),"end"])
131 |         segNum <- length(agglist[[i]])
132 |         agglistUniq[[i]] <-  data.table(chr,start,end,segNum)
133 |       }
134 |       tabmerged <- do.call(rbind,agglistUniq)
135 |       colnames(tabmerged) <- c("chrom","start","end","nseg")
136 |       restab[[cl]] <- tabmerged
137 |   }
138 |   if(verbose) close(pb)
139 |   
140 |   if(verbose){
141 |     message("Evaluating shattered regions by CNV data only...")
142 |     pb <- txtProgressBar(style=3)
143 |     cc <-0
144 |     tot <- length(restab)
145 |   }
146 |   for(cl in names(restab)){
147 |     if(verbose) cc <- cc+1
148 |     if(verbose) setTxtProgressBar(pb, cc/tot)
149 |     regions <-   restab[[cl]]
150 |     br1 <- cnvbrk@breaks[which(cnvbrk@breaks$sample == cl),2:3]
151 |     br1.gr <- with(br1, GRanges(chrom, IRanges(start=pos, end=pos)))
152 |     regions_gr <- with(regions, GRanges(chrom, IRanges(start=start, end=end)))
153 |     hits_1 = GenomicAlignments::findOverlaps(regions_gr,br1.gr)
154 |     n.brk <- dist.iqm <- start <- end <- rep(0,nrow(regions))
155 |     conf <- rep("HC",nrow(regions))
156 |     for(i in 1:nrow(regions)){
157 |       sites <- sort(unique(br1[subjectHits(hits_1)[which(queryHits(hits_1) == i)]]$pos))
158 |       dist.iqm[i]  <- IQM(sites[2:length(sites)] - sites[1:(length(sites)-1) ],lowQ = 0.2,upQ = 0.8)
159 |       n.brk[i] <- length(sites)
160 |       start[i] <- min(sites)
161 |       end[i] <- max(sites)
162 |     }
163 |     conf[which(dist.iqm < dist.iqm.cut )] <-"lc"
164 |     chrom <- regions$chrom
165 |     nbins <- regions$nseg
166 |     restab[[cl]] <- data.table(chrom,start,end,nbins,dist.iqm,n.brk,conf)
167 |   }
168 |   if(verbose) close(pb)
169 |   
170 |   bins <- data.table(do.call(rbind,strsplit(colnames(highDensityRegions)," ")),colnames(highDensityRegions))
171 |   colnames(bins) <- c("chrom","start","end","binid")
172 |   bins$start <- as.numeric(bins$start)
173 |   bins$end <- as.numeric(bins$end)
174 |   
175 |   
176 |   binsGR <- with(bins, GRanges(chrom, IRanges(start=start, end=end)))
177 |   highDensityRegionsHC <- highDensityRegions
178 |   for(cl in names(restab)){
179 |     lc <- restab[[cl]][which(restab[[cl]]$conf == "lc"),]
180 |     if(nrow(lc) > 0){
181 |       lcGR<- with(lc, GRanges(chrom, IRanges(start=start, end=end)))
182 |       hits = GenomicAlignments::findOverlaps(binsGR,lcGR)
183 |       highDensityRegionsHC[cl,bins$bins[unique(queryHits(hits)),]] <- 0
184 |     }
185 |   }
186 |   
187 |   results <- chromo.regs(
188 |     regions.summary = restab,
189 |     high.density.regions = highDensityRegions,
190 |     high.density.regions.hc = highDensityRegionsHC,
191 |     cnv.brk.dens = cnv.brk.dens,
192 |     svc.brk.dens = matrix(),
193 |     cnv.brk.common.dens = matrix(),
194 |     svc.brk.common.dens = matrix(),
195 |     cnvbrk = cnvbrk,
196 |     svcbrk = breaks(),
197 |     common.brk = list(),
198 |     cnv = cnv,
199 |     svc = svcnvio(),
200 |     param=list(
201 |         fc.pct = fc.pct,
202 |         min.cnv.size = min.cnv.size,
203 |         min.num.probes=min.num.probes, 
204 |         low.cov = low.cov,
205 |         clean.brk=clean.brk,
206 |         window.size = window.size,
207 |         slide.size = slide.size,
208 |         num.breaks = num.breaks,
209 |         num.sd = num.sd,
210 |         dist.iqm.cut = dist.iqm.cut)
211 |   )
212 | return(results)
213 | }
214 | 
215 | 


--------------------------------------------------------------------------------
/R/brk.burden.iqm.r:
--------------------------------------------------------------------------------
  1 | #' Data class break.iqm
  2 | #' 
  3 | #' Class to store breakpoint annotations in association with genomic features (e.g. gene loci)
  4 | #' 
  5 | #' @param summary (data.table): the frequency of gains and losses in each defined genomic bin
  6 | #' @param brk.mat (numeric): a matrix of genomic bins versus samples
  7 | #' @param chrlimits (data.frame): a table containing the chromosome limit coordinates and global genomic coordinates
  8 | #' @param plot (graphical): a recorded plot object
  9 | #' @param param (list): a list of parametres provided 
 10 | #' @return an instance of the class 'cnvfreq' 
 11 | #' @export
 12 | 
 13 | break.iqm <- setClass("break.iqm", representation(
 14 |     summary  = "data.table",
 15 |     brk.mat = "matrix",
 16 |     chrlimits = "data.table",
 17 |     plot = "recordedplot",
 18 |     param = "list"
 19 | ))
 20 | 
 21 | 
 22 | setMethod("show","break.iqm",function(object){
 23 |     writeLines(paste("An object of class break.iqm from svpluscnv containing the following stats:
 24 |                 \nNumber of samples=",nrow(object@brk.mat)))
 25 | })
 26 | 
 27 | 
 28 | #' Evaluates the breakpoint burden based on a instance 'breaks' produced by svpluscnv::scv_breaks or svpluscnv::cnv_breaks. 
 29 | #' Breakpoint densities are calculated for each chromosome arm and the inter quantile mean (svpluscnv::IQM) of al chromosome arms is reported for each sample.
 30 | #' A Graphical output is generated indicating every sample's arm burden ordered by their IQM. 
 31 | #' 
 32 | #' @param brk (breaks) An instance of the class 'breaks' obtained from CNV segmentation data (svpluscnv::cnv.breaks) or Structural Variant calls (svpluscnv::svc.breaks).
 33 | #' @param sample.col (character) A vector of valid colors. Names must match sample column from 'brk'. If null a gradiant color based on breakpoint burden IQM will be used. 
 34 | #' @param chr.lim (data.frame) 3 column table (chrom, begin, end) indicating the chromosome most distal coordinates with coverage. Also returned by the function svpluscnv::chromosome.limit.coords.
 35 | #' @param genome.v (hg19 or hg38) reference genome version to draw chromosome limits and centromeres
 36 | #' @param min.arm.size (numeric) minimum size in base pairs for a chromosome arm to be included in the analysis. Size will be calculated based on the 'genome.v' centromere location (excluding centromere bands). Chromosome start and en locations can be provided in 'chr.lim'.
 37 | #' @param bp.unit (numeric) The genomic size unit in base pairs to report brekpoint densities. This parameter is also used for the y axis of the plot. 
 38 | #' @param plot (logical) whether produce a graphical output
 39 | #' @param verbose (logical) whether to return internal messages
 40 | #' @return an instance of the class 'cnvfreq' and optionally a plot into open device
 41 | #' @keywords structural variants, mutational burden, chromosomal instability
 42 | #' @export
 43 | #' @examples
 44 | #' 
 45 | #' # initialize CNV data
 46 | #' svc <- validate.svc(nbl_svdat)
 47 | #' 
 48 | #' # obtain CNV breakpoints
 49 | #' brk <- cnv.breaks(cnv)
 50 | #' 
 51 | #' brk.burden.iqm(brk)
 52 | 
 53 | 
 54 | brk.burden.iqm <- function(brk,
 55 |                            sample.col = NULL,
 56 |                            min.arm.size = 2e7,
 57 |                            bp.unit=1e7,
 58 |                            genome.v="hg19",
 59 |                            chr.lim= NULL,
 60 |                            plot=TRUE,
 61 |                            verbose=TRUE){
 62 |     
 63 | stopifnot(isS4(brk))
 64 | 
 65 | # fetch citogenetic bands from genome version (D3GB)
 66 | if(genome.v %in% c("GRCh37","hg19")){ 
 67 |     bands <- remove.factors(GRCh37.bands)
 68 | }else if(genome.v %in% c("GRCh38","hg38")){
 69 |     bands <- remove.factors(GRCh38.bands)
 70 | }else{stop("Genome version not provided")}
 71 |     
 72 | # define default chromosome arm boundaries 
 73 | centromeres_start <- bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"start"]
 74 | centromeres_end <- bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"end"]
 75 | chromosome_start <- sapply(as.character(unique(bands$chr)), function(i) min( bands$start[which(bands$chr == i)] ))
 76 | chromosome_end <- sapply(as.character(unique(bands$chr)), function(i) max( bands$end[which(bands$chr == i)] ))
 77 | names(chromosome_start) <-  names(chromosome_end) <-  names(centromeres_start) <-  names(centromeres_end) <- paste("chr",bands[intersect(which(bands$score == "acen"),grep("q",bands$name)),"chr"],sep="")
 78 |     
 79 | # define chromosome arm boundaries based on provided chromosome limits
 80 | if(!is.null(chr.lim)){
 81 |     centromeres_start <- centromeres_start[chr.lim$chrom]
 82 |     centromeres_end <- centromeres_end[chr.lim$chrom]
 83 |     chromosome_start <- chromosome_start[chr.lim$chrom]
 84 |     chromosome_end <- chromosome_end[chr.lim$chrom]
 85 |     chromosome_end[] <- chr.lim$end
 86 |     chromosome_start[] <- chr.lim$begin
 87 | }else{
 88 |     chr.lim<- data.table(names(centromeres_start),centromeres_start,chromosome_end)
 89 |     colnames(chr.lim) <- c("chrom","begin","end")
 90 | }
 91 |     
 92 | # obtain number of breakpoints per sample mapped onto chromosome arms 
 93 | mapped_p <- names(which(centromeres_start -chromosome_start > min.arm.size))
 94 | mapped_q <- names(which(chromosome_end -centromeres_end > min.arm.size))
 95 | p.arm.df <- data.frame(mapped_p,chromosome_start[mapped_p],centromeres_start[mapped_p])
 96 | q.arm.df <- data.frame(mapped_q,centromeres_end[mapped_q],chromosome_end[mapped_q])
 97 | colnames(p.arm.df) <- colnames(q.arm.df) <- c("chrom","start","end")
 98 | 
 99 | p.arm.gr <- with(p.arm.df,GRanges(chrom, IRanges(start=start, end=end)))
100 | q.arm.gr <- with(q.arm.df,GRanges(chrom, IRanges(start=start, end=end)))
101 |     
102 | breaks.gr <- with(brk@breaks, GRanges(chrom, IRanges(start=pos,end=pos)))
103 | 
104 | p.hits <- GenomicAlignments::findOverlaps(breaks.gr,p.arm.gr)
105 | q.hits <- GenomicAlignments::findOverlaps(breaks.gr,q.arm.gr)
106 | 
107 | p.armname <- paste(mapped_p,"p",sep="")
108 | q.armname <- paste(mapped_q,"q",sep="")
109 | arm.size <- c(p.arm.df$end -p.arm.df$start, q.arm.df$end -q.arm.df$start)
110 | names(arm.size) <- c(p.armname,q.armname)
111 | 
112 | template <- rep(0, length(c(p.armname,q.armname)))
113 | names(template) <- c(p.armname,q.armname)
114 | arm.brk.dens <- sapply(unique(brk@breaks$sample), function(i) template, simplify=FALSE)
115 | 
116 | p.hits.info <- data.table(brk@breaks$sample[queryHits(p.hits)],p.armname[subjectHits(p.hits)])
117 | q.hits.info <- data.table(brk@breaks$sample[queryHits(q.hits)],q.armname[subjectHits(q.hits)])
118 | 
119 | total.brk <- list()
120 | for(sample.id  in names(arm.brk.dens)){
121 |     input <- c(table(p.hits.info$V2[which(p.hits.info$V1 == sample.id)]),
122 |                table(q.hits.info$V2[which(q.hits.info$V1 == sample.id)]))
123 |     arm.brk.dens[[sample.id]][names(input)] <- input*bp.unit/arm.size[names(input)]
124 |     total.brk[[sample.id]] <- sum(input)
125 | }
126 | 
127 | # claculate IQM for each sample
128 | arm.brk.iqm <- log10(1+sort(unlist(lapply(arm.brk.dens,IQM))))
129 | 
130 | # obtain gradient of default colors
131 | if(is.null(sample.col)){
132 |     sample.col <- rep("green",length(unique(brk@breaks$sample)))
133 |     names(sample.col) <- unique(brk@breaks$sample)
134 |     sample.col.tmp <- map2color(arm.brk.iqm,pal <- colorRampPalette(c("darkgreen","orange","red"))(256))
135 |     names(sample.col.tmp) <- names(arm.brk.iqm)
136 |     sample.col[names(sample.col.tmp)] <- sample.col.tmp
137 |     }
138 | 
139 | # plot 
140 | 
141 | if(plot){
142 |     datavector <- log10(1+unlist(lapply(arm.brk.dens[names(arm.brk.iqm)],sort)))
143 |     datacolor <- unlist(sapply(names(arm.brk.iqm), function(i) rep(sample.col[i], length(template)),simplify=FALSE))
144 |     names(datacolor) <- names(datavector)
145 |     
146 |     npoints <- length(template)
147 |     plot(datavector,pch=20,xaxt='n',yaxt='n',col="white",xlab="",ylab='',
148 |          xaxt='n',bty='n',xlim=c(100,length(datavector)-100))
149 |     altcol<-"grey95"
150 |     for(i in 1:length(arm.brk.dens)){ 
151 |         rect((i-1)*npoints,-10,i*npoints,50,col=altcol,border=NA)
152 |         if(altcol == "grey95"){ altcol <- "grey85"
153 |         }else{altcol <- "grey95"}
154 |     }
155 |     abline(h=seq(-2,6,0.5),lty=1,lwd=.2,col="black")
156 |     
157 |     points(datavector,pch=20,cex=0.3,col=datacolor)
158 |     axis(2,labels=sprintf("%.2f",10^(seq(-2,4,0.5))-1),at=seq(-2,4,0.5),las=3,family="Courier",font=1,line=0,cex.axis=1.2,las=1)
159 | 
160 |     mtext(paste("log10(1+breaks/",bp.unit,")",sep=""),side=2,line=4,cex=1.3)
161 |     lines(seq(npoints/2,length(datavector),length(datavector)/length(arm.brk.iqm)),log2(1+arm.brk.iqm) )
162 |     p <- recordPlot()
163 | }else{
164 |     p <- recordPlot(load=NULL, attach=NULL)
165 | }
166 | 
167 | # save plot
168 | 
169 | # create summary 
170 | nbreaks <- table(brk@breaks$sample)[names(arm.brk.iqm)]
171 | nbreaks.map <- unlist(total.brk)[names(arm.brk.iqm)]
172 | brk.dens <- (nbreaks.map*bp.unit/sum(arm.size))[names(arm.brk.iqm)]
173 | 
174 | summary <- data.table(names(arm.brk.iqm), 
175 |                       arm.brk.iqm, 
176 |                       sample.col[names(arm.brk.iqm)], 
177 |                       as.numeric(nbreaks), 
178 |                       nbreaks.map, 
179 |                       brk.dens )
180 | colnames(summary) <- c("sample","brk.iqm","color","total breaks","nbreaks mapped","overal density")
181 | 
182 | 
183 | return(break.iqm(
184 |     summary = summary,
185 |     brk.mat = do.call(rbind,arm.brk.dens),
186 |     chrlimits = chr.lim,
187 |     plot=p,
188 |     param = list(
189 |         min.arm.size= min.arm.size,
190 |         bp.unit=bp.unit,
191 |         genome.v= genome.v,
192 |         verbose= verbose
193 |     )
194 | )
195 | )
196 | }
197 | 
198 | 
199 | 
200 | 


--------------------------------------------------------------------------------
/R/circular.plot.r:
--------------------------------------------------------------------------------
  1 | #' Circular visualization of shattered regions
  2 | #' 
  3 | #' Produces a circos plot combining CNV and SVC date sooming into the chromosomes harboring shattered regions 
  4 | #' 
  5 | #' @param chromo.regs.obj (chromo.regs) An object of class chromo.regs 
  6 | #' @param sample.id (character) the id of a sample to be plotted within 
  7 | #' @param print.name (logical) whether to print the sample id  in the center of the circular plot
  8 | #' @param genome.v (character) (hg19 or h38) reference genome version to draw chromosome limits and centromeres
  9 | #' @param lrr.pct (numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents 20 percent fold change
 10 | #' @param lrr.max (numeric) CNV plot limit
 11 | #' @param high.conf (logical) Whether to plot only high confidence shattered regions (see https://github.com/ccbiolab/svpluscnv#identification-of-shattered-regions for more information)
 12 | #' @param chrlist (character) vector containing chromosomes to plot; by default only chromosomes with shattered regions are ploted
 13 | #' @param add.cnv.legend (x,y or coordinates) the position parameter passed to legend to plot shattered regions and CNV (outer track) description
 14 | #' @param add.svc.legend (x,y or coordinates) the position parameter passed to legend to plot SVC (central track) description
 15 | #' @param ... Additional graphical parameters
 16 | #' @return circos plot into open device
 17 | #' @keywords CNV, segmentation, structural variant, visualization, circular plot
 18 | #' @export
 19 | #' @examples
 20 | #' 
 21 | #' ## validate input data.frames
 22 | #' cnv <- validate.cnv(segdat_lung_ccle)
 23 | #' svc <- validate.svc(svdat_lung_ccle)
 24 | #' 
 25 | #' ## obtain shattered regions
 26 | #' shatt.regions <- shattered.regions(cnv,svc)
 27 | #' 
 28 | #' # select a random sample from the 
 29 | #' id <- "SCLC21H_LUNG"
 30 | #' 
 31 | #' circ.chromo.plot(shatt.regions, sample.id = id)
 32 | 
 33 | circ.chromo.plot <- function(chromo.regs.obj, 
 34 |                              sample.id,
 35 |                              print.name=TRUE,
 36 |                              genome.v = "hg19",
 37 |                              lrr.pct = 0.2,
 38 |                              lrr.max = 4,
 39 |                              high.conf=FALSE,
 40 |                              chrlist=NULL,
 41 |                              add.cnv.legend="topleft",
 42 |                              add.svc.legend="toprigh",
 43 |                              ...){
 44 | 
 45 | 
 46 | if(sample.id %in% chromo.regs.obj@cnv@data$sample){
 47 |     cnvdat <- chromo.regs.obj@cnv@data[which(chromo.regs.obj@cnv@data$sample == sample.id),]
 48 | }
 49 | if(sample.id %in% chromo.regs.obj@svc@data$sample){
 50 |     svcdat <- chromo.regs.obj@svc@data[which(chromo.regs.obj@svc@data$sample == sample.id),]
 51 | }else{
 52 |     svcdat <- data.table()
 53 | }
 54 | regions <- chromo.regs.obj@regions.summary[[sample.id]]
 55 | if(high.conf == TRUE) regions <-  regions[which(regions$conf == "HC")] 
 56 | 
 57 | stopifnot(nrow(regions) > 0)
 58 | 
 59 | stopifnot(nrow(chromo.regs.obj@cnv@data) > 0 | nrow(chromo.regs.obj@svc@data) >  0)
 60 | 
 61 | if(is.null(chrlist)) chrlist <- unique(regions$chrom)
 62 |   
 63 | if(nrow(svcdat) >  0){
 64 |     alllinks1 <- data.table(svcdat$chrom1,svcdat$pos1,svcdat$pos1 )
 65 |     alllinks2 <- data.table(svcdat$chrom2,svcdat$pos2,svcdat$pos2 )
 66 |     colnames(alllinks1) <- colnames(alllinks2) <- c("chr","start","end")
 67 |     map = setNames(c("blue", "red", "orange","black","green","grey"), c("DEL", "DUP","INV","TRA","INS","BND"))
 68 |     alllinkcolors <- map[svcdat$svclass]
 69 |     zoomchr <- intersect(which(alllinks1$chr %in% chrlist),which(alllinks2$chr %in% chrlist))
 70 |     links1<-alllinks1[zoomchr,]
 71 |     links2<-alllinks2[zoomchr,]
 72 |     linkcolors<-alllinkcolors[zoomchr]
 73 | }
 74 | 
 75 | if(nrow(cnvdat) >  0){
 76 |     colores <- rep("black",nrow(cnvdat))
 77 |     colores[which(cnvdat$segmean < log2(1 - lrr.pct)) ] <- "blue"
 78 |     colores[which(cnvdat$segmean > log2(1 + lrr.pct)) ] <- "red"
 79 |     cnv.df <- data.frame(cnvdat[,c("chrom","start","end","segmean")],colores)
 80 |     cnv.df[,"colores"] <- as.character(cnv.df[,"colores"])
 81 |     cnv.df[which(cnv.df$segmean < log2(1/lrr.max) ),"segmean"] <- log2(1/lrr.max) 
 82 |     cnv.df[which(cnv.df$segmean > log2(lrr.max)),"segmean"] <- log2(lrr.max)
 83 |     allcnvlist <- list()
 84 |     for(i in chrlist) allcnvlist[[i]] <- cnv.df[which(cnv.df$chrom == i),]
 85 | 
 86 |     cnvlist <- list()
 87 |     for(i in chrlist) cnvlist[[i]] <- cnv.df[which(cnv.df$chrom == i),]
 88 | }
 89 | 
 90 | reg.map = setNames(c("pink", "purple"), c("lc", "HC"))
 91 | reg.col <- unname(reg.map[regions$conf])
 92 | value <- rep(0.1,nrow(regions))
 93 | regions.plot <- as.data.frame(data.table(regions,reg.col,value))
 94 |   
 95 | p.regions <- list()
 96 | for(chr in chrlist){
 97 |     p.regions[[chr]] <- regions.plot[which(regions$chrom == chr),c("chrom","start","end","value","reg.col")]
 98 |     colnames(p.regions[[chr]]) <- c("chrom","start","end","value","color")
 99 | }
100 |   
101 | circos.initializeWithIdeogram(species=genome.v,chromosome.index=chrlist,plotType=c("axis","labels"), track.height=0.05, axis.labels.cex=0.4,labels.cex=1.3)
102 | circos.genomicIdeogram(track.height = 0.03)
103 | circos.genomicTrack(p.regions, bg.lwd =0.01, ylim=c(0,0.02), track.height=0.05,
104 |                     panel.fun = function(region, value, ...) {
105 |                       circos.genomicRect(region, value, ytop = 0.02, ybottom = 0, col = p.regions[[CELL_META$sector.index]][,"color"],  border = NA, ...)
106 |                       circos.lines(CELL_META$cell.xlim, c(0.01, 0.01), lty = 2, col = "#00000040")
107 |                     })
108 |     
109 | circos.genomicTrackPlotRegion(cnvlist, bg.lwd =0.2, bg.col=rainbow(length(cnvlist),alpha=0.1),ylim=c(-2.5,2.5), track.height=0.2, 
110 |                               panel.fun = function(region, value, ...) {
111 |                                 circos.genomicLines(region, value, col=as.character(cnvlist[[CELL_META$sector.index]][,"colores"]), numeric.column = c(1), type="segment")
112 |                               })
113 | if(nrow(svcdat) >  0) circos.genomicLink(links1, links2, col = linkcolors, border = NA)
114 | if(print.name == TRUE) text(0, 0,  gsub("_","\n",sample.id),...)
115 | 
116 | if(!is.null(add.cnv.legend)){
117 |   legend(add.cnv.legend,c("shattered regions","CNV gain","CNV neutral","CNV loss"),fill=c("purple",NA,NA,NA),
118 |          lty=c(2,1,1,1), col=c("black","red","black","blue"),border=NA, bty='n', title=expression(bold("CNV (outer)")))
119 |   }
120 | 
121 | if(!is.null(add.svc.legend)){
122 |   map.legend <- map[sort(unique(svcdat$svclass))]
123 |   legend(add.svc.legend,names(map.legend),lty=1, col=map.legend, bty='n', title=expression(bold("SVC (center)")))
124 |   }
125 | 
126 | }
127 | 
128 | 
129 | 
130 | #' Circular visualization CNV and SVC
131 | #' 
132 | #' Produces a circos plot combining CNV and SVC of the whole genome  
133 | #' 
134 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv
135 | #' @param svc (S4) an object of class svcnvio containing data type 'svc' initialized by validate.svc
136 | #' @param sample.id (character) the id of the sample to be plotted
137 | #' @param genome.v (character) (hg19 or h38) reference genome version to draw chromosome limits and centromeres
138 | #' @param lrr.pct (numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents a fold change of 0.8 or 1.2
139 | #' @param lrr.max (numeric) maximum CNV to be plotted
140 | #' @param chrlist (character) vector containing chromosomes to plot; by default all chromosomes plotted
141 | #' @param add.cnv.legend (x,y or coordinates) the position parameter passed to legend to plot CNV (outer tracks) description
142 | #' @param add.svc.legend (x,y or coordinates) the position parameter passed to legend to plot SVC (central track) description
143 | #' @return circos plot into open device
144 | #' @keywords CNV, segmentation, structural variant, visualization, circular plot
145 | #' @export
146 | #' @examples
147 | #' 
148 | #' ## validate input data.frames
149 | #' cnv <- validate.cnv(segdat_lung_ccle)
150 | #' svc <- validate.svc(svdat_lung_ccle)
151 | #' 
152 | #' ## select a random sample id
153 | #' id <- "A549_LUNG"
154 | #' 
155 | #' circ.wg.plot(cnv, svc, sample.id=id)
156 | 
157 | 
158 | circ.wg.plot <- function(cnv, 
159 |                          svc, 
160 |                          sample.id=NULL,
161 |                          genome.v = "hg19",
162 |                          lrr.pct = 0.2,
163 |                          lrr.max = 4,
164 |                          chrlist=NULL,
165 |                          add.cnv.legend="topleft",
166 |                          add.svc.legend="toprigh",
167 |                          ...){
168 |     
169 |     stopifnot(cnv@type == "cnv")
170 |     cnvdat <- cnv@data
171 |     
172 |     stopifnot(svc@type == "svc")
173 |     svcdat <- svc@data
174 |     
175 |     if(is.null(sample.id)){ 
176 |         sample.id <- intersect(cnvdat$sample,svcdat$sample)
177 |         stopifnot(length(sample.id) == 1)
178 |     }
179 |     cnvdat <- cnvdat[which(cnvdat$sample == sample.id),]
180 |     svcdat <- svcdat[which(svcdat$sample == sample.id),]
181 |     
182 |     if(is.null(chrlist)) chrlist <- chr.sort(unique(cnvdat$chrom))
183 |     
184 |     alllinks1 <- data.table(svcdat$chrom1,svcdat$pos1,svcdat$pos1 )
185 |     alllinks2 <- data.table(svcdat$chrom2,svcdat$pos2,svcdat$pos2 )
186 |     colnames(alllinks1) <- colnames(alllinks2) <- c("chr","start","end")
187 |     map = setNames(c("blue", "red", "orange","black","green","black"), c("DEL", "DUP","INV","TRA","INS","BND"))
188 |     alllinkcolors <- map[as.character(svcdat$svclass)]
189 |     
190 |     cnvcirc <- cnvdat[,c("chrom","start","end","segmean")]
191 |     colores <- rep("black",nrow(cnvcirc))
192 |     colores[which(cnvcirc$segmean < log2(1 - lrr.pct)) ] <- "blue"
193 |     colores[which(cnvcirc$segmean > log2(1 + lrr.pct)) ] <- "red"
194 |     cnvcirc <- data.table(cnvcirc,colores)
195 |     cnvcirc[which(cnvcirc$segmean < log2(1/lrr.max) ),"segmean"] <- log2(1/lrr.max) 
196 |     cnvcirc[which(cnvcirc$segmean > log2(lrr.max)),"segmean"] <- log2(lrr.max)
197 |     allcnvlist <- list()
198 |     for(i in chrlist) allcnvlist[[i]] <- as.data.frame(cnvcirc[which(cnvcirc$chrom == i),])
199 |     
200 |     circos.initializeWithIdeogram(species=genome.v, chromosome.index=chrlist, plotType=c("ideogram","labels"))
201 |     text(0, 0,  gsub("_","\n",sample.id), cex = 1)
202 |     circos.genomicTrackPlotRegion(allcnvlist, bg.lwd =0.2, bg.col=rainbow(length(allcnvlist),alpha=0.1),ylim=c(-2.4,2.4), track.height=0.2, panel.fun = function(region, value, ...) {
203 |         circos.genomicLines(region, value, col=as.character(allcnvlist[[CELL_META$sector.index]][,"colores"]), numeric.column = c(1), type="segment")
204 |     })
205 |     circos.genomicLink(alllinks1, alllinks2, col = alllinkcolors, border = NA)
206 |     
207 |     if(!is.null(add.cnv.legend)){
208 |       legend(add.cnv.legend,c("CNV gain","CNV neutral","CNV loss"),lty=1, col=c("red","black","blue"),
209 |              bty='n', title=expression(bold("CNV (outer)")))
210 |     }
211 |     
212 |     if(!is.null(add.svc.legend)){
213 |       map.legend <- map[sort(unique(svcdat$svclass))]
214 |       legend(add.svc.legend,names(map.legend),lty=1, col=map.legend, bty='n', title=expression(bold("SVC (center)")))
215 |     }
216 |     
217 | }
218 | 
219 | 
220 | 
221 | 
222 | 
223 | 
224 | 


--------------------------------------------------------------------------------
/R/sv.model.view.r:
--------------------------------------------------------------------------------
  1 | #' SV integrated visualization
  2 | #' 
  3 | #' Integrated visualization of SVC and CNV data for defined genomic locations. CNV and SVC data is overlayed into a sample-based track visualization map.
  4 | #' 
  5 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv
  6 | #' @param svc (S4) an object of class svcnvio containing data type 'svc' initialized by validate.svc
  7 | #' @param chrom (character) chromosome (e.g chr9)
  8 | #' @param start (numeric) genomic coordinate from specified chromosome to start plotting
  9 | #' @param stop (numeric) genomic coordinate from specified chromosome to stop plotting
 10 | #' @param sampleids (character) a vector containing a list of sample ids represented in svc and/or cnv objects to be plotted
 11 | #' @param cnvlim (numeric) limits for color coding of background CNV log-ratios. Use to modify the CNV color contrast at different levels.
 12 | #' @param addlegend (character) One of 'sv' (show SV type legend), 'cnv' (show CNV background color legend) or 'both'.
 13 | #' @param cex.legend (numeric) The cex values for each legend
 14 | #' @param interval (numeric) The axis interval in base pairs
 15 | #' @param addtext (character) a vector indicating what SV types should include text labels indicating brakpoint partners genomic locations. The added labels are point breakpoint locations outside the plot area. (e.g. c("TRA","INV") )
 16 | #' @param cex.text (numeric) The magnification to be used for SV text info added
 17 | #' @param plot (logic) whether to produce a graphical output
 18 | #' @param summary (logic) whether the function shoud return CNV segment 'segbrk' and SV 'svbrk' breakpoints tabular output
 19 | #' @param ... additional plot parameters from graphics plot function 
 20 | #' @return a data.frame with CNV and SVN breakpoint annotations and/or plot into open device
 21 | #' @keywords structural variant, CNV, segmentation
 22 | #' @export
 23 | #' @examples
 24 | #' 
 25 | #' ## validate input data.frames
 26 | #' cnv <- validate.cnv(segdat_lung_ccle)
 27 | #' svc <- validate.svc(svdat_lung_ccle)
 28 | #'
 29 | #' # obtain the coordinates of a desired genomic regionbased on a known gene locus 
 30 | #' refSeqGene <- gene.symbol.info(refseq_hg19,"PTPRD")
 31 | #' start <- refSeqGene$start - 150000;
 32 | #' stop <- refSeqGene$stop+ 50000;
 33 | #' chrom <- refSeqGene$chrom
 34 | #' 
 35 | #' sv.model.view(cnv, svc, chrom, start, stop)
 36 | #' 
 37 | 
 38 | 
 39 | 
 40 | sv.model.view <- function(cnv, svc, chrom, start, stop, 
 41 |                           sampleids=NULL,
 42 |                           cnvlim=c(-2,2), 
 43 |                           addlegend='both',
 44 |                           cex.legend=1,
 45 |                           interval=NULL,
 46 |                           addtext=NULL,
 47 |                           cex.text=.8,
 48 |                           plot=TRUE,
 49 |                           summary=TRUE,
 50 |                           ...){
 51 |     
 52 | 
 53 |  stopifnot(!is.null(chrom) && !is.null(start) && !is.null(stop))
 54 | 
 55 |     stopifnot(cnv@type == "cnv")
 56 |     cnvdat <- cnv@data
 57 |     
 58 |     stopifnot(svc@type == "svc")
 59 |     svcdat <- svc@data
 60 |     
 61 |     if(!is.null(sampleids)){
 62 |         missing.samples <- setdiff(sampleids,c(svcdat$sample,cnvdat$sample))
 63 |         if(length(missing.samples) == length(unique(sampleids))){
 64 |             stop("None of the samples provided were found in 'sv' and 'cnv' input data!")
 65 |         }else if(length(missing.samples) > 0){
 66 |             warning(paste("The following samples provided are not found in 'sv' and 'cnv' input data:", paste(missing.samples,collapse=" "),sep=" "))
 67 |         }
 68 |         svcdat<-svcdat[which(svcdat$sample %in% intersect(sampleids,svcdat$sample)),]
 69 |         cnvdat<-cnvdat[which(cnvdat$sample %in% intersect(sampleids,cnvdat$sample)),]
 70 |     }
 71 | 
 72 |     genegr <- with(data.frame(chrom,start,stop), GRanges(chrom, IRanges(start=start, end=stop))) 
 73 |     
 74 |     # Find samples with SV breaks within defined genomic region
 75 |     sv1gr = with(svcdat, GRanges(chrom1, IRanges(start=pos1, end=pos1))) 
 76 |     sv2gr = with(svcdat, GRanges(chrom2, IRanges(start=pos2, end=pos2))) 
 77 |     
 78 |     sv_hits1 = GenomicAlignments::findOverlaps(sv1gr,genegr)
 79 |     sv_hits2 = GenomicAlignments::findOverlaps(sv2gr,genegr)
 80 |     svtab <- svcdat[sort(unique(c(queryHits(sv_hits1),queryHits(sv_hits2)))),]
 81 |     svBreakSamples <- unique(svtab$sample)
 82 |     if(length(svBreakSamples) == 0) warning("Thre is no SV breakpoints in the defined genomic region")
 83 |         
 84 |     # obtain SVs for plotting with different colors for each svclass
 85 |     svcolormap = setNames(c("blue", "red", "orange", "black", "green","grey20"), 
 86 |                    c("DEL", "DUP", "INV", "TRA", "INS", "BND"))
 87 |     svcolor <- svcolormap[svtab$svclass]
 88 |     svtab_plot <- data.table(svtab,svcolor)
 89 |     svtab_plot_seg <- svtab_plot[which(svtab_plot$svclass != "TRA")]
 90 |     svtab_plot_tra <- svtab_plot[which(svtab_plot$svclass == "TRA")]
 91 |     
 92 |     # Find samples with CNV segment breaks within defined genomic region
 93 |     seg1br  = with(cnvdat, GRanges(chrom, IRanges(start=start, end=start))) 
 94 |     seg2br  = with(cnvdat, GRanges(chrom, IRanges(start=end, end=end))) 
 95 |     seg_hits1 = GenomicAlignments::findOverlaps(seg1br,genegr)
 96 |     seg_hits2 = GenomicAlignments::findOverlaps(seg2br,genegr)
 97 |     segBreakSamples <- unique(cnvdat[sort(unique(c(queryHits(seg_hits1),queryHits(seg_hits2))))]$sample)
 98 |     if(length(segBreakSamples) == 0) warning("Thre is no CNV segment breakpoints in the defined genomic region")    
 99 |     segbrk <- cnvdat[sort(unique(c(queryHits(seg_hits1),queryHits(seg_hits2))))]
100 |     
101 |     if(plot==TRUE){
102 |         # Find overlap between all CNV segments and the defined genomic region for plotting
103 | 
104 |         seggr <- with(cnvdat, GRanges(chrom, IRanges(start=start, end=end))) 
105 |         hits_seg = GenomicAlignments::findOverlaps(seggr,genegr)
106 |         seg_plot <- cnvdat[queryHits(hits_seg)]
107 |         segcolor <- map2color(seg_plot$segmean,
108 |                   pal=colorRampPalette(c("lightblue","white","salmon"))(256),
109 |                   limits=cnvlim)
110 |         seg_plot <- data.table(seg_plot,segcolor)
111 |     
112 |         if(!is.null(sampleids)){
113 |             sample_order <- 1:length(sampleids)
114 |             names(sample_order) <- sampleids
115 |         }else{
116 |             sample_order <- 1:length(unique(c(svBreakSamples,segBreakSamples)))
117 |             names(sample_order) <- unique(c(svBreakSamples,segBreakSamples))
118 |         }
119 |     
120 |         if(!is.null(addlegend)){
121 |             plot_ylim <- length(sample_order)*10/100+length(sample_order)
122 |             legend_ypos <- plot_ylim - length(sample_order)*3/100 
123 |             if(length(sample_order) < 10) plot_ylim <- length(sample_order) +1
124 |         }else{
125 |             plot_ylim <- length(sample_order)
126 |         }
127 |         
128 |         plot(x=NULL,y=NULL,xlim=range(c(start,stop)),ylim=range(c(0,plot_ylim)),
129 |              xaxt='n',yaxt='n',xlab='',ylab='',bty='n', ...)
130 |     
131 |         mtext(side=2,at=sample_order-0.5,text=names(sample_order),las=2,line = 0.5, ...)
132 |     
133 |         for(sid in names(sample_order)){
134 |             ypos <- sample_order[sid]
135 |             polygon(rbind(
136 |                 c(start-1e7,ypos+0.02),
137 |                 c(start-1e7,ypos-0.98),
138 |                 c(stop+1e7,ypos-0.98),
139 |                 c(stop+1e7,ypos+0.02)),
140 |                 col=rep(c("grey80","grey80"),length(sample_order))[ypos],border=NA)
141 |         }
142 |         
143 |         for(sid in names(sample_order)){
144 |             seg_sample_plot <- seg_plot[which(seg_plot$sample == sid),]
145 |             ypos <- sample_order[sid]
146 |             for(i in 1:nrow(seg_sample_plot)){
147 |                 polygon(rbind(
148 |                     c(seg_sample_plot[i]$start,ypos),
149 |                     c(seg_sample_plot[i]$start,ypos-1),
150 |                     c(seg_sample_plot[i]$end,ypos-1),
151 |                     c(seg_sample_plot[i]$end,ypos)
152 |                 ),col=seg_sample_plot[i]$segcolor,border=NA)
153 |             }
154 |         }
155 |     
156 |     
157 |         for(sid in unique(svtab_plot_tra$sample)){
158 |             svtab_plot_tra_i <- svtab_plot_tra[which(svtab_plot_tra$sample == sid),]
159 |             ypos <- sample_order[sid]
160 |             addrnorm <- rep(c(0,0.3,-0.3,0.1,-0.1,0.2,-0.2),nrow(svtab_plot_tra_i))
161 |             for(i in 1:nrow(svtab_plot_tra_i)){
162 |                 if(svtab_plot_tra_i[i]$chrom2 == chrom){ 
163 |                     points(svtab_plot_tra_i[i]$pos2,ypos-0.5+addrnorm[i],pch=10)
164 |                     lines(c(svtab_plot_tra_i[i]$pos2,svtab_plot_tra_i[i]$pos2),c(ypos,ypos-1),lwd=1,lty=3)
165 |                     if("TRA" %in% addtext){
166 |                         text(svtab_plot_tra_i[i]$pos2,ypos-0.5+addrnorm[i],
167 |                              paste("  ",svtab_plot_tra_i[i]$chrom1,":",svtab_plot_tra_i[i]$pos1,sep=""),
168 |                              pos=4,offset=0,cex=cex.text)
169 |                     }
170 |                 }            
171 |                 if(svtab_plot_tra_i[i,"chrom1"] == chrom){
172 |                     points(svtab_plot_tra_i[i]$pos1,ypos-0.5+addrnorm[i],pch=10)
173 |                     lines(c(svtab_plot_tra_i[i]$pos1,svtab_plot_tra_i[i]$pos1),c(ypos,ypos-1),lwd=1,lty=3)
174 |                     if("TRA" %in% addtext) {
175 |                         text(svtab_plot_tra_i[i]$pos1,ypos-0.5+addrnorm[i],
176 |                              paste("  ",svtab_plot_tra_i[i]$chrom2,":",svtab_plot_tra_i[i]$pos2,sep=""),
177 |                              pos=4,offset=0,cex=cex.text)
178 |                     }
179 |                 }
180 |             }
181 |         }
182 |         
183 |         for(sid in unique(svtab_plot_seg$sample)){
184 |             svtab_plot_seg_i <- svtab_plot_seg[which(svtab_plot_seg$sample == sid)]
185 |             ypos <- sample_order[sid]
186 |             addrnorm <- rep(c(0,0.2,-0.2,0.1,-0.1,0.3,-0.3),nrow(svtab_plot_seg_i))
187 |             for(i in 1:nrow(svtab_plot_seg_i)){
188 |                 polygon(rbind(
189 |                     c(svtab_plot_seg_i[i]$pos1,ypos-0.4-addrnorm[i]),
190 |                     c(svtab_plot_seg_i[i]$pos1,ypos-0.6-addrnorm[i]),
191 |                     c(svtab_plot_seg_i[i]$pos2,ypos-0.6-addrnorm[i]),
192 |                     c(svtab_plot_seg_i[i]$pos2,ypos-0.4-addrnorm[i])
193 |                 ),col=NA,border=svtab_plot_seg_i[i]$svcolor)
194 |                 
195 |                 if(svtab_plot_seg_i[i]$svclass %in% addtext){
196 |                     if(svtab_plot_seg_i[i]$pos1 < start){
197 |                         text(start,ypos-0.5-addrnorm[i],
198 |                              paste("<-",svtab_plot_seg_i[i]$pos1,sep=""),
199 |                              pos=4,offset=0,cex=cex.text)
200 |                     }
201 |                     if(svtab_plot_seg_i[i]$pos2 > stop){
202 |                         text(stop,ypos-0.5-addrnorm[i],
203 |                              paste(svtab_plot_seg_i[i]$pos2,"->",sep=""),
204 |                              pos=2,offset=0,cex=cex.text)
205 |                     }
206 |                 }
207 |             }
208 |         }
209 |         
210 |         if(is.null(interval)) interval <- round((stop - start)/5000) * 1000
211 |         xlabs <- seq(floor(start/10000)*10000, ceiling(stop/10000)*10000,interval)
212 |         axis(1, at = xlabs,labels=TRUE, lwd.ticks=1.5, pos=0,...)
213 | 
214 |         if(is.null(cex.legend)) cex.legend <- 1
215 |         
216 |         if(addlegend %in% c("sv","both")) {
217 |             fillx <- c("white", "white", "white", "white", "white",NA)
218 |             borderx <- c("blue", "red","orange","green","grey20",NA)
219 |             pchx <- c(NA,NA,NA,NA,NA,10)
220 |             names(fillx) <- names(borderx) <- names(pchx) <- c("DEL", "DUP", "INV","INS","BND", "TRA")
221 |             svclassin <- sort(unique(svtab_plot$svclass))
222 |             legend(x= start, y =legend_ypos+0.2, legend = svclassin, bg=NA,
223 |                    bty = "n", fill = fillx[svclassin], border=borderx[svclassin], 
224 |                    pch = pchx[svclassin], horiz = TRUE, x.intersp=0.2, cex = cex.legend)
225 |         }
226 |         if(addlegend %in% c("cnv","both")) {
227 |             colkey(colorRampPalette(c("lightblue","white","salmon"))(256),clim = c(-4,4),side=3,add=TRUE,side.clab=1,length=0.5,shift=0.2,lwd.ticks = 2, dist = -0.12)
228 |         }
229 |     }
230 |     if(summary){
231 |         return(list(svbrk=svcdat,segbrk=segbrk))
232 |     }
233 | }
234 | 
235 | 
236 | 
237 | 


--------------------------------------------------------------------------------
/R/breakpoint.density.r:
--------------------------------------------------------------------------------
  1 | #' Data class breaks
  2 | #' 
  3 | #' Class to store breakpoint annotations in association with genomic features (e.g. gene loci)
  4 | #' 
  5 | #' @param breaks (data.table): the breakpoint info containing data.table, this will be occupied by the CNV segmentation data in the case of cnv.break.annot or SV for sv.break.annot. Unique random string rownames are added to the returned breaks data.frame.
  6 | #' @param burden (numeric): a vector containing the total number of breakpoints in each sample 
  7 | #' @param param (list): a list of parametres provided 
  8 | #' @return an instance of the class 'breaks' containing breakpoint and breakpoint burden information
  9 | #' @export
 10 | breaks <- setClass("breaks", representation(
 11 |                             breaks  = "data.table",
 12 |                             burden = "numeric",
 13 |                             param = "list"
 14 |                         ))
 15 | 
 16 | 
 17 | setMethod("show","breaks",function(object){
 18 |     writeLines(paste("An object of class breaks from svpluscnv containing",object@param$datatype,"breakpoints:
 19 |                 \nNumber of samples=",length(object@burden),
 20 |                 "\nTotal number of breakpoints =",nrow(object@breaks)))
 21 | })
 22 | 
 23 | 
 24 | #' Identify CNV breakpoints
 25 | #' 
 26 | #' Identify CNV breakpoints filtered by the change in copy number log-ratio between contiguous segments
 27 | #' 
 28 | #' @param cnv (S4) an object of class svcnvio containing data type 'cnv' initialized by validate.cnv
 29 | #' @param fc.pct (numeric) copy number change between 2 consecutive segments: i.e (default) cutoff = 0.2 represents a fold change of 0.8 or 1.2
 30 | #' @param break.width (numeric) the maximum distance between a segment end and the subsequent segment start positions beyond which breakpoints are discarded
 31 | #' @param min.cnv.size (numeric) The minimun segment size (in base pairs) to include in the analysis 
 32 | #' @param min.num.probes (numeric) The minimun number of probes per segment to include in the analysis 
 33 | #' @param chrlist (character) list of chromosomes to include chr1, chr2, etc...
 34 | #' @param low.cov (data.frame) a data.frame (chr, start, end) indicating low coverage regions to exclude from the analysis
 35 | #' @param clean.brk (numeric) identical breakpoints across multiple samples tend to be artifacts; remove breaks > N 
 36 | #' @param verbose (logical) whether to return  
 37 | #' @return an instance of the class 'breaks' containing breakpoint and breakpoint burden information
 38 | #' @keywords CNV, segmentation
 39 | #' @export
 40 | #' @examples
 41 | #' 
 42 | #' # initialized CNV data
 43 | #' cnv <- validate.cnv(segdat_lung_ccle)
 44 | #' 
 45 | #' cnv.breaks(cnv)
 46 | #' 
 47 | 
 48 | 
 49 | cnv.breaks <- function(cnv,
 50 |                        fc.pct = 0.2,
 51 |                        break.width = 10000,
 52 |                        min.cnv.size = NULL,
 53 |                        min.num.probes = NULL,
 54 |                        chrlist = NULL,
 55 |                        low.cov = NULL, 
 56 |                        clean.brk = NULL,
 57 |                        verbose = TRUE){
 58 |     
 59 | 
 60 | stopifnot(cnv@type == "cnv")
 61 | cnvdat <- cnv@data
 62 |     
 63 | if(is.null(chrlist)) chrlist <- unique(cnvdat$chrom)
 64 | chrlist <- chr.sort(chrlist)
 65 |     
 66 | brk.burden <- rep(0,length(unique(cnvdat$sample)))
 67 | names(brk.burden) <- unique(cnvdat$sample)
 68 |     
 69 | if(!is.null(min.cnv.size)) cnvdat <- cnvdat[which(cnvdat$end - cnvdat$start >= min.cnv.size),]
 70 | if(!is.null(min.num.probes)) cnvdat <- cnvdat[which(cnvdat$probes  >= min.num.probes),]
 71 |     
 72 | lastrow <- nrow(cnvdat)
 73 | pos <- round(apply(cbind(cnvdat[2:(lastrow),"start"], cnvdat[1:(lastrow-1),"end"]),1,mean))
 74 | chrom <- cnvdat[2:(lastrow),"chrom"]
 75 | sample <- cnvdat[2:(lastrow),"sample"]
 76 | width <- cnvdat[2:(lastrow),"start"] - cnvdat[1:(lastrow-1),"end"]
 77 | FC <-  (2^cnvdat[1:(lastrow-1),"segmean"]) / (2^cnvdat[2:lastrow,"segmean"])
 78 | uid <- paste("brk_",createRandomString(nrow(cnvdat)-1,8),sep="")
 79 | breakpoints <- data.table(sample,chrom,pos,width,FC,uid)
 80 | colnames(breakpoints) <- c("sample","chrom","pos","width","FC","uid")
 81 | 
 82 | break_idx <- c(which( log2(FC) >= log2(1+fc.pct)),which( log2(FC) < log2(1 - fc.pct)))
 83 |     
 84 | samechr <- which(apply(cbind(cnvdat[1:(lastrow-1),"chrom"],cnvdat[2:(lastrow),"chrom"]),1,anyDuplicated) == 2)
 85 | 
 86 | samesample <-  which(apply(cbind(cnvdat[1:(lastrow-1),"sample"],cnvdat[2:(lastrow),"sample"]),1,anyDuplicated) == 2)
 87 | 
 88 | if(is.null(break.width)) break.width <- Inf
 89 | brwidthin <- which(width < break.width)
 90 |     
 91 | breakpoints <- breakpoints[Reduce(intersect, list(break_idx,samechr,samesample,brwidthin)),]
 92 |     
 93 | 
 94 | if(!is.null(low.cov)){
 95 |     message("Filtering breakpoints in low coverage regiomns")
 96 |     colnames(low.cov) <- c("chrom","start","end")
 97 |     low_cov_GR = with(low.cov, GRanges(chrom, IRanges(start=start, end=end)))
 98 |     breakpoints_GR = with(breakpoints, GRanges(chrom, IRanges(start=start, end=end)))
 99 |     overlapgr <- GenomicAlignments::findOverlaps(breakpoints_GR,low_cov_GR,ignore.strand=TRUE)
100 |     breakpoints <- breakpoints[setdiff(1:nrow(breakpoints),queryHits(overlapgr)),]
101 | }
102 |     
103 | if(!is.null(clean.brk)){
104 |     breakids <- unite(breakpoints[,c(2:4)],"newcol")$newcol
105 |     breakids.freq <- sort(table(breakids),decreasing=TRUE)
106 |     breakpoints <- breakpoints[which(breakids %in% names(which(breakids.freq < clean.brk))),]
107 | }
108 |     
109 | brk.burden.sub <- table(breakpoints$sample)
110 | brk.burden[names(brk.burden.sub)] <- brk.burden.sub
111 |     
112 | return(breaks(breaks=breakpoints,
113 |             burden=brk.burden,
114 |             param=list(
115 |                 datatype=cnv@type,
116 |                 fc.pct = fc.pct,
117 |                 min.cnv.size = min.cnv.size,
118 |                 min.num.probes=min.num.probes,
119 |                 low.cov=low.cov, 
120 |                 clean.brk=clean.brk
121 |             )
122 |             )
123 |        )
124 | }
125 | 
126 | 
127 | 
128 | #' Identify SVC breakpoints
129 | #' 
130 | #' Transform structural varian (SVC) data.frame into a 'breaks' object 
131 | #' 
132 | #' @param svc (S4) an object of class svcnvio containing data type 'svc' initialized by validate.svc
133 | #' @param chrlist (character) list of chromosomes to include chr1, chr2, etc...
134 | #' @param low.cov (data.table) a data.table (chrom, start, end) indicating low coverage regions to exclude from the analysis
135 | #' @return an instance of the class 'breaks' containing breakpoint and breakpoint burden information
136 | #' @keywords Structural variants
137 | #' @export
138 | #' @examples
139 | #' 
140 | #' ## Obtain breakpoints from SV calls data
141 | #' svc <- validate.svc(svdat_lung_ccle)
142 | #' 
143 | #' svc.breaks(svc)
144 | 
145 | 
146 | 
147 | svc.breaks <- function(svc, chrlist=NULL,low.cov=NULL){
148 |     
149 | stopifnot(svc@type == "svc")
150 | 
151 | if(!is.null(chrlist) ){
152 |   svcdat <- svc@data[intersect(which(svc@data$chrom1 %in% chrlist),which(svc@data$chrom2 %in% chrlist))]
153 | }else{
154 |   svcdat <- svc@data
155 | }
156 |   stopifnot(nrow(svcdat) > 0)
157 |   
158 | 
159 | brk.burden <- rep(0,length(unique(svcdat$sample)))
160 | names(brk.burden) <- unique(svcdat$sample)
161 | 
162 | 
163 | uid<- paste("brk_",createRandomString(nrow(svcdat)*2,8),sep="")
164 | svcdat.breaks <- data.table(c(svcdat$sample,svcdat$sample),
165 |                            c(svcdat$chrom1,svcdat$chrom2),
166 |                            c(svcdat$pos1,svcdat$pos2),
167 |                            c(svcdat$strand1,svcdat$strand2),
168 |                            c(svcdat$svclass,svcdat$svclass),
169 |                            c(svcdat$uid,svcdat$uid),
170 |                            uid)
171 | 
172 | colnames(svcdat.breaks) <- c("sample","chrom","pos","strand","svclass","svcuid","uid")
173 | if(!is.null(low.cov)){
174 |     low.cov.df <- data.table(low.cov[,1:3])
175 |     colnames(low.cov.df) <- c("chrom","start","end")
176 |     
177 |     svc_ranges <- with(svcdat.breaks, GRanges(chrom, IRanges(start=pos, end=pos)))
178 |     low.cov_ranges <- with(low.cov.df, GRanges(chrom, IRanges(start=start, end=end)))
179 |     
180 |     low.cov_ranges = GenomicAlignments::findOverlaps(svc_ranges,low.cov_ranges)
181 |     
182 |     svcdat.breaks <- svcdat.breaks[which(!svcdat.breaks$id %in% queryHits(low.cov_ranges)),]
183 | }else{
184 |     svcdat.breaks <- svcdat.breaks
185 | }
186 | 
187 | brk.burden.sub <- table(svcdat.breaks$sample)
188 | brk.burden[names(brk.burden.sub)] <- brk.burden.sub
189 |     
190 | 
191 | return(breaks(breaks=svcdat.breaks,
192 |             burden=brk.burden,
193 |             param=list(
194 |                 datatype=svc@type,
195 |                 low.cov=low.cov
196 |             )
197 |         )
198 |     )
199 |     
200 | }
201 | 
202 | 
203 | 
204 | 
205 | #' Breakpoint density map
206 | #' 
207 | #' Generating a genomic map based on a defined bin size and sliding window and counts the number of breakpoints mapped onto each bin. This function is used internally by svpluscnv::shattered.regions and svpluscnv::shattered.regions.cnv
208 | #' 
209 | #' @param brk (breaks) An instance of the class 'breaks' obtained from CNV segmentation data (svpluscnv::cnv.breaks) or Structural Variant calls (svpluscnv::svc.breaks). 
210 | #' @param chr.lim (data.frame) 3 column table (chrom, begin, end) indicating the chromosome most distal coordinates with coverage. Also returned by the function svpluscnv::chromosome.limit.coords.
211 | #' @param genome.v (hg19 or hg38) reference genome version to draw chromosome limits and centromeres
212 | #' @param window.size (numeric) size in megabases of the genmome bin onto which breakpoints will be mapped 
213 | #' @param slide.size (numeric) size in megabases of the sliding genomic window; if slide.size < window.size the genomic bins will overlap
214 | #' @param verbose (logical) whether to return internal messages
215 | #' @return a matrix of samples (rows) and genomic bins (cols) qith the number of breakpoints mapped in heach cell
216 | #' @keywords CNV, segmentation
217 | #' @export
218 | #' @examples
219 | #' 
220 | #' # initialize CNV data
221 | #' cnv <- validate.cnv(segdat_lung_ccle)
222 | #' 
223 | #' # obtain CNV breakpoints
224 | #' brk <- cnv.breaks(cnv)
225 | #' 
226 | #' break.density(brk)
227 | 
228 | 
229 | break.density <- function(brk, 
230 |                           chr.lim=NULL, 
231 |                           genome.v = "hg19",
232 |                           window.size = 10, 
233 |                          slide.size=2,
234 |                          verbose=TRUE){
235 | if(is.null(chr.lim)){
236 |     chr.lim<- d3gb.chr.lim(genome.v=genome.v)
237 | }else{
238 |     stopifnot(ncol(chr.lim) == 3)   
239 | }
240 |     
241 |     chr.begin <- chr.lim$begin
242 |     chr.end <- chr.lim$end
243 |     names(chr.begin) <- names(chr.end) <- chr.lim$chrom
244 |     
245 |   # make sure both chr.lim and breaks have same chromosome names 
246 |   seqnames <- intersect(chr.lim$chrom,brk@breaks$chr)
247 |   stopifnot(length(seqnames) > 0) 
248 |   
249 |   # a template vector to save breakpoint counts 
250 |   templatevector <- brk@burden
251 |   templatevector[]<-0
252 |   
253 |   WS <- window.size * 1e+6
254 |   SS <- slide.size * 1e+6
255 |   offset <- window.size/slide.size
256 |   
257 |     chrlist <- chr.sort(chr.lim$chrom)
258 |   
259 |   # count breaks for each chromosome for each fragment
260 |   fragment <- list()
261 |   for(chr in  chrlist){
262 | 
263 |     if(verbose) cat("\r",chr)
264 | 
265 |     chr_breaks <- brk@breaks[which(brk@breaks$chrom == chr),]
266 |     frag <- seq(chr.begin[chr],chr.end[chr]+SS,SS)
267 |     
268 |     for(i in (1+offset):length(frag)){
269 |       start <- frag[i - offset]
270 |       stop <- frag[i]
271 |       fragment[[paste(chr,start,stop)]] <- templatevector
272 |       break.position <- chr_breaks$pos
273 |       res_bp <- table(chr_breaks[intersect(which(break.position > start),which(break.position < stop)),"sample"])
274 |       fragment[[paste(chr,start,stop)]][names(res_bp)] <- res_bp
275 |     }
276 |   }
277 |   if(verbose) cat("\nDone!\n")
278 | 
279 |   return( do.call(cbind,fragment))
280 |   
281 | }
282 | 
283 | 
284 | 
285 | 
286 | #' Breakpoint matching
287 | #' 
288 | #' Match common breakpoints from two different datasets or data types based on their co-localization in the genome. 
289 | #' 
290 | #' @param brk1 (S4) an object of class breaks as returned by `svc.breaks` and `cnv.breaks`
291 | #' @param brk2 (S4) an object of class breaks as returned by `svc.breaks` and `cnv.breaks` to compare against brk1
292 | #' @param maxgap (numeric) distance (base pairs) limit for nreakpoints to be consider colocalized 
293 | #' @param plot (logical) whether to plot into open device
294 | #' @param verbose (logical) whether to return internal messages
295 | #' @return an object containing co-localizing breakpoints from two input 'breaks'  
296 | #' @keywords CNV, SV, genomic breakpoints
297 | #' @export
298 | #' @examples
299 | #' 
300 | #' # initialize CNV and SVC data
301 | #' cnv <- validate.cnv(segdat_lung_ccle)
302 | #' svc <- validate.svc(svdat_lung_ccle)
303 | #' 
304 | #' ## Obtain breakpoints from CNV and SVC
305 | #' brk1 <- cnv.breaks(cnv)
306 | #' brk2 <- svc.breaks(svc)
307 | #' 
308 | #' common.brk <- match.breaks(brk1, brk2)
309 | #' 
310 | 
311 | 
312 | 
313 | match.breaks <- function(brk1, 
314 |                          brk2, 
315 |                          maxgap=100000,
316 |                          verbose=FALSE,
317 |                          plot=TRUE){
318 |     
319 |     common_samples <- intersect(names(brk1@burden),names(brk2@burden))
320 |     stopifnot(length(common_samples) > 0) 
321 |     
322 |     brk1_match <- brk2_match <- res <- list()
323 |     for(id in common_samples){
324 |         
325 |         brk1_i <- brk1@breaks[which(brk1@breaks$sample == id),]
326 |         brk_ranges1 <- with(brk1_i, GRanges(chrom, IRanges(start=pos, end=pos)))
327 |         
328 |         brk2_i <- brk2@breaks[which(brk2@breaks$sample == id),]
329 |         brk_ranges2 <- with(brk2_i, GRanges(chrom, IRanges(start=pos, end=pos)))
330 |         
331 |         
332 |         options(warn=-1)
333 |         seg_seg = GenomicAlignments::findOverlaps(brk_ranges1, brk_ranges2, maxgap=maxgap)
334 |         options(warn=0)
335 |         
336 |         brk_match1 <- sort(unique(queryHits(seg_seg)))
337 |         brk_match2 <- sort(unique(subjectHits(seg_seg)))
338 |         
339 |         res[[id]] <- data.table(id,length(brk_match1), nrow(brk1_i), length(brk_match2), nrow(brk2_i))
340 |         colnames(res[[id]]) <- c("sample","matched.brk1", "total.brk1", "matched.brk2", "total.brk2")
341 |         
342 |         brk1_match[[id]] <- brk1_i[brk_match1,]
343 |         brk2_match[[id]] <- brk2_i[brk_match2,]
344 |     }
345 | 
346 |     restab <- do.call(rbind,res)
347 |     
348 |     if(plot){
349 |       def.par <- par(no.readonly = TRUE)
350 |       par(mfrow=c(2,1))
351 |       restab <- restab[order(restab$total.brk2)]
352 |       m2 <- sprintf("%.1f",100*mean(na.omit(restab$matched.brk2/restab$total.brk2))) 
353 |       barplot(rbind(restab$matched.brk2, restab$total.brk2 - restab$matched.brk2),
354 |               border=NA,las=2,xlab="",horiz=FALSE,cex.main=.7,cex.names=.4, 
355 |               names=restab$sample,ylab="#samples" )
356 |       legend("top",paste(brk2@param$datatype," breaks matched by ",
357 |                          brk1@param$datatype,
358 |                          " breaks\n","Average = ",m2,"%",sep=""),bty='n')
359 |       grid(ny=NULL,nx=NA)
360 | 
361 |             restab <- restab[order(restab$total.brk1)]
362 |       m2 <- sprintf("%.1f",100*mean(na.omit(restab$matched.brk1/restab$total.brk1))) 
363 |       barplot(rbind(restab$matched.brk1, restab$total.brk1 - restab$matched.brk1),
364 |               border=NA,las=2,xlab="",horiz=FALSE,cex.main=.7,cex.names=.4, 
365 |               names=restab$sample,ylab="#samples")
366 |       legend("top",paste(brk1@param$datatype,
367 |                          " breaks matched by ",brk2@param$datatype,
368 |                          " breaks\n","Average = ",m2,"%",sep=""),bty='n')
369 |       grid(ny=NULL,nx=NA)
370 |       par(def.par)
371 |     }
372 |     
373 |     return(list(
374 |         brk1_match = do.call(rbind,brk1_match),
375 |         brk2_match = do.call(rbind,brk2_match),
376 |         restab= restab))
377 | }
378 | 
379 | 


--------------------------------------------------------------------------------