├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── NAMESPACE
├── NEWS
├── R
├── LMC_interpretation.R
├── MeDeCom-package.R
├── MeDeComSet-class.R
├── RcppExports.R
├── allGlobals.R
├── contribution_interpretation.R
├── factorizations.R
├── frontend.R
├── gapStatisitics.R
├── markerSelection.R
├── matching.R
├── plotting.R
├── reference.R
├── simulation.R
└── utilities.R
├── README
├── README.md
├── data
├── datalist
├── example.MeDeComSet.RData
├── example.cg.annotation.RData
├── example.dataset.RData
└── small.example.RnBSet.RData
├── exec
└── cluster.script.sge.R
├── inst
└── unitTests
│ └── test_general.R
├── man
├── AVAIL.REFS.Rd
├── MeDeCom-package.Rd
├── MeDeComSet-class.Rd
├── as.MeDeComSet.Rd
├── cluster.refbased.Rd
├── example.MeDeComSet.Rd
├── example.cg.annotation.Rd
├── example.dataset.Rd
├── factorize.alternate.Rd
├── factorize.regr.Rd
├── getLMCs-methods.Rd
├── getProportions-methods.Rd
├── getStatistics-methods.Rd
├── greedymatch.Rd
├── lmc.annotation.enrichment.Rd
├── lmc.annotation.plots.tables.Rd
├── lmc.go.enrichment.Rd
├── lmc.go.plots.tables.Rd
├── lmc.lola.enrichment.Rd
├── lmc.lola.plots.tables.Rd
├── load.lola.for.medecom.Rd
├── locus_plot.Rd
├── matchLMCs.Rd
├── plotLMC.reference.Rd
├── plotLMCs.Rd
├── plotParameters.Rd
├── plotProportions.Rd
├── run.refbased.Rd
├── run.trait.association.Rd
├── run.trait.association.single.Rd
└── runMeDeCom.Rd
├── src
├── HCLasso.cpp
├── Makevars
├── Makevars.win
├── QuadHC.cpp
├── RProjSplxBox.cpp
├── RQuadSimplex.cpp
├── RQuadSimplexBox.cpp
├── RcppExports.cpp
├── cppTAfact.cpp
└── dynblas.h
├── tests
└── runTests.R
└── vignettes
├── MeDeCom.Rmd
├── MeDeCom.html
├── MeDeCom.md
├── MeDeCom_files
└── MathJax.js
└── figure
├── unnamed-chunk-10-1.png
├── unnamed-chunk-11-1.png
├── unnamed-chunk-12-1.png
├── unnamed-chunk-13-1.png
├── unnamed-chunk-14-1.png
├── unnamed-chunk-17-1.png
├── unnamed-chunk-18-1.png
├── unnamed-chunk-19-1.png
├── unnamed-chunk-20-1.png
├── unnamed-chunk-22-1.png
├── unnamed-chunk-7-1.png
└── unnamed-chunk-8-1.png
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | src/*.o
6 | src/*.so
7 | src/*.dll
8 | *.Rproj
9 | .Rbuildignore
10 | .gitignore
11 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: R
2 | sudo: false
3 | cache: packages
4 | r: bioc-release
5 | warnings_are_errors: false
6 | #before_install:
7 | # - R -e "install.packages('knitr', 'rmarkdown'); remotes::install_deps(dependencies = TRUE)"
8 | #install: travis_wait 30 R CMD build
9 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: MeDeCom
2 | Title: Decomposition of heterogeneous methylomes
3 | Description: Decomposition of heterogeneous methylomes with non-negative matrix factorization.
4 | Maintainer: Pavlo Lutsik
5 | Authors@R: c(
6 | person ( "Pavlo", "Lutsik", email="p.lutsik@dkfz.de", role = c("cre")),
7 | person ( "Martin", "Slawski", email="ms@cs.uni-saarland.de", role = c("aut")),
8 | person ( "Nik", "Vedeneev", email="nikitavedeneev@gmail.com", role = c("aut")),
9 | person ( "Gilles", "Gasparoni", email="gillesgasparoni@gmail.com", role = c("aut")),
10 | person ( "Michael", "Scherer", email="mscherer@mpi-inf.mpg.de", role = c("aut")),
11 | person ( "Matthias", "Hein", email="hein@cs.uni-saarland.de", role = c("aut")),
12 | person ( "Joern", "Walter", email="j.walter@mx.uni-saarland.de", role = c("aut"))
13 | )
14 | Date: 2020-05-13
15 | Version: 1.0.1
16 | Depends:
17 | R (>= 3.2.0),
18 | Rcpp,
19 | pracma,
20 | gtools,
21 | gplots,
22 | parallel,
23 | RUnit,
24 | RnBeads
25 | Suggests:
26 | BiocStyle,
27 | knitr,
28 | rmarkdown,
29 | LOLA,
30 | GOstats,
31 | qvalue,
32 | simpleCache,
33 | igraph
34 | Imports:
35 | RcppEigen (>= 0.3)
36 | LinkingTo:
37 | Rcpp,
38 | RcppEigen
39 | VignetteBuilder: knitr
40 | License: GPL-3
41 | RoxygenNote: 7.1.0
42 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(MeDeComSet)
4 | export(as.MeDeComSet)
5 | export(cluster.refbased)
6 | export(factorize.alternate)
7 | export(factorize.regr)
8 | export(greedymatch)
9 | export(lmc.annotation.enrichment)
10 | export(lmc.annotation.plots.tables)
11 | export(lmc.go.enrichment)
12 | export(lmc.go.plots.tables)
13 | export(lmc.lola.enrichment)
14 | export(lmc.lola.plots.tables)
15 | export(load.lola.for.medecom)
16 | export(matchLMCs)
17 | export(plotLMC.reference)
18 | export(plotLMCs)
19 | export(plotParameters)
20 | export(plotProportions)
21 | export(run.refbased)
22 | export(run.trait.association)
23 | export(run.trait.association.single)
24 | export(runMeDeCom)
25 | exportClasses(MeDeComSet)
26 | exportMethods(getLMCs)
27 | exportMethods(getProportions)
28 | exportMethods(getStatistics)
29 | useDynLib(MeDeCom)
30 |
--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
1 | MeDeCom 1.0.1
2 | =============
3 |
4 | * Implemented functionality for genomic annotation enrichment
5 |
6 | MeDeCom 1.0.0
7 | =============
8 |
9 | * Bumped version 1.0.0
10 |
11 | MeDeCom 0.3.3
12 | =============
13 |
14 | * Fixed bug in case that NFOLDS=1
15 | * Further smaller bugfixes and documentation updates
16 |
17 | MeDeCom 0.3
18 | =============
19 |
20 | * New functionality for gene-centric and interval-based functional annotation of LMCs as well
21 | as inference on LMC proportions (LOLA, GO-stats, association of proportions with phenotypic traits)
22 | * MeDeCom now fully integrates with RnBeads, DecompPipeline and FactorViz
23 | * New functions for exploring LMC distributions
24 | * Multiple bugfixes
25 |
26 | MeDeCom 0.2.3
27 | =============
28 |
29 | * Added conversion of RefFreeCellMix (https://cran.r-project.org/web/packages/RefFreeEWAS/index.html) to MeDeComSet
30 |
31 | MeDeCom 0.2
32 | =============
33 |
34 | * Added improved optimization routines (cppTAfact by Nik Vedeneev) which are now used by default
35 | * Improved R-side parallelization in the single machine mode
36 | * Multiple minor fixes and improvements
37 |
38 | MeDeCom 0.1
39 | =============
40 |
41 | * Initial release
42 |
--------------------------------------------------------------------------------
/R/MeDeCom-package.R:
--------------------------------------------------------------------------------
1 | #' MeDeCom: Methylome DeComposition using regularized constrained matrix factorization
2 | #'
3 | #' MeDeCom is an R-package that discovers and quantifies latent components
4 | #' in the DNA methylomes of heterogeneous samples
5 | #'
6 | #' @references TBA
7 | #' @docType package
8 | #' @name MeDeCom-package
9 | #' @useDynLib MeDeCom
10 | NULL
11 |
12 | #' Example CG annotation
13 | #'
14 | #' An data frame containing CpGs with their corresponding annotation in the genome to be used for the analysis. You can provide a similar
15 | #' \code{data.frame} for your own analysis.
16 | #'
17 | #' @docType data
18 | #' @keywords datasets
19 | #' @name example.cg.annotation
20 | #' @format \code{cg.ann} is a \code{data.frame} containing identifiers and postitions of CpG sites to be analyzed
21 | #' @author Michael Scherer
22 | NULL
23 |
24 | #' Example dataset
25 | #'
26 | #' Contains a data set of methylation values as a \code{matrix} with 10,000 rows (CpGs) and 100 columns (samples), a reference of
27 | #' contributions of 5 cell types in the samples, and the reference methylomes of the samples as a matrix with 10,000 rows and 5
28 | #' columns.
29 | #'
30 | #' \itemize{
31 | #' \item D, a \code{matrix} with 10,000 rows (CpGs) and 100 columns (samples) representing a potential input methylation
32 | #' matrix
33 | #' \item Aref, a reference of contributions of 5 cell types in the samples
34 | #' \item Tref, reference methylomes of the samples as a matrix with 10,000 rows and 5 columns
35 | #' }
36 | #'
37 | #' @docType data
38 | #' @keywords datasets
39 | #' @name example.dataset
40 | #' @usage data(example.dataset)
41 | #' @author Michael Scherer
42 | NULL
43 |
44 | #' Example result object (MeDeComSet)
45 | #'
46 | #' An example result of applying \pkg{MeDeCom} for a deconvolution experiment.
47 | #'
48 | #' @docType data
49 | #' @keywords datasets
50 | #' @name example.MeDeComSet
51 | #' @format \code{MeDeComSet}, the result of a deconvolution experiment and input to any follow-up analysis or to retrieve latent
52 | #' methylation components (LMCs)
53 | #' @author Michael Scherer
54 | NULL
--------------------------------------------------------------------------------
/R/MeDeComSet-class.R:
--------------------------------------------------------------------------------
1 | #' MeDeComSet Class
2 | #'
3 | #' Stores the result of a methylation deconvolution experiment
4 | #'
5 | #' @section Slots:
6 | #' \describe{
7 | #' \item{\code{dataset_info}}{\code{list} with information about the input data set.}
8 | #' \item{\code{parameters}}{\code{list} containing parameters of the deconvolution experiment.}
9 | #' \item{\code{outputs}}{\code{list} of deconvolution products for each combination of MeDeCom parameters.}
10 | #' }
11 | #' @section Methods and functions:
12 | #' \describe{
13 | #' \item{\code{\link[=getStatistics,MeDeComSet-method]{getStatistics}}}{Returns the value of one goodness-of-fit statistics for a given parameter combination.}
14 | #' \item{\code{\link[=getLMCs,MeDeComSet-method]{getLMCs}}}{Returns a matrix of LMCs for a given parameter combination.}
15 | #' \item{\code{\link[=getProportions,MeDeComSet-method]{getProportions}}}{Returns a matrix of mixing proportions for a given parameter combination.}
16 | #' \item{\code{\link{plotParameters}}}{Create a parameter selection plot.}
17 | #' \item{\code{\link{plotLMCs}}}{Visualize the LMCs.}
18 | #' \item{\code{\link{plotProportions}}}{Visualize the mixing proportions.}
19 | #' }
20 | #'
21 | #'
22 | #' @name MeDeComSet-class
23 | #' @rdname MeDeComSet-class
24 | #' @author Pavlo Lutsik
25 | #' @exportClass MeDeComSet
26 | setClass("MeDeComSet",
27 | representation(
28 | dataset_info="list",
29 | parameters="list",
30 | outputs="list"
31 | ),
32 | prototype(
33 | dataset_info=list(),
34 | parameters=list(),
35 | outputs=list()
36 | ),
37 | package = "MeDeCom")
38 | ########################################################################################################################
39 | #setMethod("initialize", "MeDeComSet",
40 | # function(.Object,
41 | # outputs=list(),
42 | # parameters=list()
43 | # ) {
44 | #
45 | # .Object@outputs<-outputs
46 | # .Object@parameters<-parameters
47 | # .Object
48 | #})
49 | ########################################################################################################################
50 | #' MeDeComSet
51 | #'
52 | #' Wrapper function MeDeComSet
53 | #'
54 | #'
55 | #' @param parameters \code{list} of MeDeCom parameters with elements \code{K}, integer vector of k values, \code{lambdas}, numeric vector of lambda values.
56 | #' @param outputs \code{list} of MeDeCom resutls with one element per each used CpG subset.
57 | #' @param dataset_info \code{list} with information about the input data set.
58 | #'
59 | #' @return an object of class MeDeComSet
60 | #'
61 | #' @name MeDeComSet
62 | #' @rdname MeDeComSet-class
63 | #' @aliases initialize,MeDeComSet-method
64 | #' @export
65 | MeDeComSet<-function(
66 | parameters,
67 | outputs,
68 | dataset_info=list()){
69 |
70 | object<-new("MeDeComSet",
71 | dataset_info = dataset_info,
72 | parameters = parameters,
73 | outputs = outputs)
74 | object
75 | }
76 | ########################################################################################################################
77 | if(!isGeneric("getStatistics")) setGeneric("getStatistics",
78 | function(object, ...) standardGeneric("getStatistics"))
79 |
80 | #' getStatistics-methods
81 | #'
82 | #' Methylation sites object information for which is present in the \code{RnBSet} object.
83 | #'
84 | #' @param object object returned by \link{runMeDeCom}
85 | #' @param Ks numbers of LMCs
86 | #' @param lambdas regularlization parameters
87 | #' @param cg_subset used CpG subset, defaults to the full data set
88 | #' @param statistic \code{character} of length 1 specifying goodness of fit statistics
89 | #'
90 | #' @details
91 | #' Currently the following values for \code{statistics} can be supplied: \code{objective}, \code{RMSE}, \code{CVE}.
92 | #'
93 | #' @return A numeric \code{matrix} or \code{vector} with the requested statistics
94 | #'
95 | #' @rdname getStatistics-methods
96 | #' @docType methods
97 | #' @aliases getStatistics
98 | #' @aliases getStatistics,MeDeComSet-method
99 | #' @export
100 | #' @examples
101 | #' \donttest{
102 | #' data(example.data)
103 | #' getStatistics(example_MeDeComSet, K=2, lambda=0.001)
104 | #' }
105 | setMethod("getStatistics", signature(object="MeDeComSet"),
106 | function(object, Ks=object@parameters$Ks, lambdas=object@parameters$lambdas, cg_subset=1, statistic="cve"){
107 | check_inputs(object, cg_subset, Ks, lambda=lambdas)
108 | elt<-c(
109 | "objective"="Fval", "Fval"="Fval",
110 | "rmse"="rmse", "RMSE"="rmse",
111 | "CVE"="cve", "cve"="cve",
112 | "MAEA"="maeA", "maeA"="maeA",
113 | "RMSET"="rmseT","rmseT"="rmseT",
114 | "deviance"="Deviance", "Deviance"="Deviance",
115 | "rss"="RSS","RSS"="RSS"
116 | )[statistic]
117 | return(as.numeric(object@outputs[[cg_subset]][[elt]][match(Ks, object@parameters$Ks), match(lambdas, object@parameters$lambdas)]))
118 | })
119 | ########################################################################################################################
120 | if(!isGeneric("getLMCs")) setGeneric("getLMCs",
121 | function(object, ...) standardGeneric("getLMCs"))
122 | #'
123 | #' getLMCs-methods
124 | #'
125 | #' Return a matrix of LMCs
126 | #'
127 | #' @param object object returned by \link{runMeDeCom}
128 | #' @param K number of LMCs
129 | #' @param lambda regularlization parameter
130 | #' @param cg_subset used CpG subset, defaults to the full data set
131 | #' @param statistic statistic to be used in returning
132 | #'
133 | #' @rdname getLMCs-methods
134 | #' @docType methods
135 | #' @aliases getLMCs
136 | #' @aliases getLMCs,MeDeComSet-method
137 | #' @export
138 | #' @examples
139 | #' \donttest{
140 | #' data(example.data)
141 | #' getLMCs(example_MeDeComSet, K=2, lambda=0.001)
142 | #' }
143 | setMethod("getLMCs", signature(object="MeDeComSet"),
144 | function(object, K=object@parameters$Ks[1], lambda=object@parameters$lambdas[1], cg_subset=1, statistic="cve"){
145 | check_inputs(object, cg_subset, K, lambda)
146 | return(object@outputs[[cg_subset]]$T[[match(K, object@parameters$Ks), match(lambda, object@parameters$lambdas)]])
147 | })
148 | ########################################################################################################################
149 | if(!isGeneric("getProportions")) setGeneric("getProportions",
150 | function(object, ...) standardGeneric("getProportions"))
151 | #'
152 | #' getProportions-methods
153 | #'
154 | #' Return a matrix of LMCs
155 | #'
156 | #' @param object object returned by \link{runMeDeCom}
157 | #' @param K number of LMCs
158 | #' @param lambda regularlization parameter
159 | #' @param cg_subset used CpG subset, defaults to the full data set
160 | #' @param statistic statistic to be used in returning
161 | #'
162 | #' @rdname getProportions-methods
163 | #' @docType methods
164 | #' @aliases getProportions
165 | #' @aliases getProportions,MeDeComSet-method
166 | #' @export
167 | #' @examples
168 | #' \donttest{
169 | #' data(example.data)
170 | #' getProportions(example_MeDeComSet, K=2, lambda=0.001)
171 | #' }
172 | setMethod("getProportions", signature(object="MeDeComSet"),
173 | function(object, K=object@parameters$Ks[1], lambda=object@parameters$lambdas[1], cg_subset=1, statistic="cve"){
174 | check_inputs(object, cg_subset, K, lambda)
175 | Ahat<-object@outputs[[cg_subset]]$A[[match(K, object@parameters$Ks), match(lambda, object@parameters$lambdas)]]
176 | if(!is.null(dim(Ahat))){
177 | rownames(Ahat)<-sprintf("LMC%d", 1:nrow(Ahat))
178 | if(!is.null(object@dataset_info$sample_names)){
179 | colnames(Ahat)<-object@dataset_info$sample_names
180 | }
181 | }
182 | return(Ahat)
183 | })
184 | ########################################################################################################################
185 | #
186 | # Check inputs for the get* methods
187 | #
188 | check_inputs<-function(MeDeComSet, cg_subset, K, lambda){
189 | if(!all(cg_subset %in% MeDeComSet@parameters$cg_subsets)){
190 | stop("wrong cg subset supplied")
191 | }
192 |
193 | if(!all(K %in% MeDeComSet@parameters$Ks)){
194 | stop("wrong K value supplied")
195 | }
196 |
197 | if(!all(lambda %in% MeDeComSet@parameters$lambdas)){
198 | stop("wrong lambda value supplied")
199 | }
200 | }
201 | #' as.MeDeComSet
202 | #'
203 | #' Function to convert object of type RefFreeCellMix to MeDeComSet
204 | #'
205 | #' @param object An object of class \code{RefFreeCellMix} containing cell type deconvolution information, or a list of such objects.
206 | #' @param cg_subsets The indeces of the CpG subsets used in the analysis.
207 | #' @param Ks The values of K used in the analysis. If NULL, K is determined by the size of the matrices.
208 | #' @param deviances Optional argument specifying the deviances as computed with \code{RefFreeCellMixArrayDevianceBoots}.
209 | #' @param rss Optional argument specifying the residual sum of sqaures
210 | #' @param m.orig The original number of rows (CpGs) in the methylation matrix.
211 | #' @param n.orig The original number of columns (samples) in the methylation matrix.
212 | #' @return An object of type \code{MeDeComSet}
213 | #' @details Since \code{RefFreeCellMix} only contains information on a single value for K, and does not contain any regularization
214 | #' (lambda), the corresponding parameters in the MeDeComSet are set to single numeric values. Furthermore, no information
215 | #' on goodness of fit (CVE, Fval) can be stored. If \code{cg_subsets} is not of length 1, an object containing multiple
216 | #' subsets is creared.
217 | #' @export
218 | as.MeDeComSet <- function(object,cg_subsets=1,Ks=NULL,deviances=NULL,rss=NULL,m.orig=NULL,n.orig=NULL){
219 | c.obj <- class(object)
220 | if(c.obj=="list"){
221 | c.obj <- class(object[[1]])
222 | if(c.obj == "list"){
223 | c.obj <- class(object[[1]][[1]])
224 | #object <- object[[1]]
225 | }
226 | }
227 | if(!(c.obj=="RefFreeCellMix" | c.obj=="list")){
228 | stop(paste("Cannot convert object of type",c.obj,"to MeDeComSet"))
229 | }
230 | if(c.obj == "RefFreeCellMix"){
231 | output <- list()
232 | if(is.null(Ks)){
233 | Ks <- "1"
234 | all.Ks <- ncol(object$Omega)
235 | object <- list("1"=object)
236 | if(!is.null(deviances)){
237 | deviances <- list("1"=deviances)
238 | }
239 | }else{
240 | all.Ks <- Ks
241 | Ks <- as.character(Ks)
242 | }
243 | # if(length(cg_subsets)==1){
244 | # object <- list(object)
245 | # if(!is.null(deviances)){
246 | # deviances <- list(deviances)
247 | # }
248 | # }
249 | for(ssets in cg_subsets){
250 | sel.sset <- object[[ssets]]
251 | lambda <- 0
252 | T.all <- list()
253 | A.all <- list()
254 | for(i in 1:length(Ks)){
255 | K <- Ks[i]
256 | sel.object <- sel.sset[[K]]
257 | A <- sel.object$Omega
258 | T <- sel.object$Mu
259 | K <- ncol(A)
260 | T.all[[i]] <- T
261 | if(!is.null(A)){
262 | A.all[[i]] <- t(A)
263 | }else{
264 | A.all[[i]] <- NULL
265 | }
266 | }
267 | T.all <- matrix(T.all,nrow=length(Ks))
268 | row.names(T.all) <- paste("K",Ks,sep="_")
269 | colnames(T.all) <- paste("lambda",lambda,sep = "_")
270 | A.all <- matrix(A.all,nrow = length(Ks))
271 | row.names(A.all) <- paste("K",Ks,sep="_")
272 | colnames(A.all) <- paste("lambda",lambda,sep = "_")
273 | if(is.null(deviances)){
274 | output[[ssets]] <- list(T=T.all,A=A.all)
275 | }else{
276 | deviances.all <- matrix(deviances[[ssets]],nrow = length(Ks))
277 | row.names(deviances.all) <- paste("K",Ks,sep="_")
278 | colnames(deviances.all) <- paste("lambda",lambda,sep = "_")
279 | output[[ssets]] <- list(T=T.all,A=A.all,Deviance=deviances.all)
280 | }
281 | }
282 | parameters <- list(cg_subsets=cg_subsets,
283 | Ks=all.Ks,
284 | lambdas=0)
285 | if(is.null(m.orig)){
286 | m <- nrow(T)
287 | }else{
288 | m <- m.orig
289 | }
290 | if(is.null(n.orig)){
291 | n <- ncol(A)
292 | }else{
293 | n <- n.orig
294 | }
295 | d.info <- list(m=m,n=n,TYPE="RefFreeCellMix")
296 | new.obj <- MeDeComSet(parameters = parameters,
297 | outputs = output,
298 | dataset_info = d.info)
299 | }else if(c.obj=="list"){
300 | output <- list()
301 | if(is.null(Ks)){
302 | Ks <- "1"
303 | all.Ks <- ncol(object$T[[1]][[1]])
304 | object <- list("1"=object)
305 | if(!is.null(rss)){
306 | rss <- list("1"=rss)
307 | }
308 | }else{
309 | all.Ks <- Ks
310 | Ks <- as.character(Ks)
311 | }
312 | for(ssets in cg_subsets){
313 | sel.sset <- object[[ssets]]
314 | lambda <- 0
315 | T.all <- sel.sset$T
316 | A.all <- sel.sset$A
317 | T.all <- matrix(T.all,nrow=length(Ks))
318 | row.names(T.all) <- paste("K",Ks,sep="_")
319 | colnames(T.all) <- paste("lambda",lambda,sep = "_")
320 | A.all <- matrix(A.all,nrow = length(Ks))
321 | row.names(A.all) <- paste("K",Ks,sep="_")
322 | colnames(A.all) <- paste("lambda",lambda,sep = "_")
323 | if(is.null(rss)){
324 | output[[ssets]] <- list(T=T.all,A=A.all)
325 | }else{
326 | rss.all <- matrix(rss[[ssets]],nrow = length(Ks))
327 | row.names(rss.all) <- paste("K",Ks,sep="_")
328 | colnames(rss.all) <- paste("lambda",lambda,sep = "_")
329 | output[[ssets]] <- list(T=T.all,A=A.all,RSS=rss.all)
330 | }
331 | }
332 | parameters <- list(cg_subsets=cg_subsets,
333 | Ks=all.Ks,
334 | lambdas=0)
335 | if(is.null(m.orig)){
336 | m <- nrow(T)
337 | }else{
338 | m <- m.orig
339 | }
340 | if(is.null(n.orig)){
341 | n <- ncol(A)
342 | }else{
343 | n <- n.orig
344 | }
345 | d.info <- list(m=m,n=n,TYPE="EDec")
346 | new.obj <- MeDeComSet(parameters = parameters,
347 | outputs = output,
348 | dataset_info = d.info)
349 |
350 | }
351 | return(new.obj)
352 | }
353 | ########################################################################################################################
354 | setMethod("show", "MeDeComSet", function(object){
355 | cat("An object of class MeDeComSet\n")
356 | cat("Input data set:\n")
357 | cat(sprintf("\t%d CpGs\n", object@dataset_info$m))
358 | cat(sprintf("\t%d methylomes\n", object@dataset_info$n))
359 | cat("Experimental parameters:\n")
360 | #cat(sprintf("\tCpG subsets: %s\n", paste(object@parameters$lambdas, collapse=", ")))
361 | cat(sprintf("\tk values: %s\n", paste(object@parameters$Ks, collapse=", ")))
362 | cat(sprintf("\tlambda values: %s\n", paste(object@parameters$lambdas, collapse=", ")))
363 |
364 | })
365 | ########################################################################################################################
--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 |
4 | RHLasso <- function(Ginp, Winp, Ainp, l) {
5 | .Call('_MeDeCom_RHLasso', PACKAGE = 'MeDeCom', Ginp, Winp, Ainp, l)
6 | }
7 |
8 | RQuadHC <- function(Ginp, Winp, Ainp, otol, lconstr, uconstr) {
9 | .Call('_MeDeCom_RQuadHC', PACKAGE = 'MeDeCom', Ginp, Winp, Ainp, otol, lconstr, uconstr)
10 | }
11 |
12 | RProjSplxBox <- function(Xinp, linp, uinp) {
13 | .Call('_MeDeCom_RProjSplxBox', PACKAGE = 'MeDeCom', Xinp, linp, uinp)
14 | }
15 |
16 | RQuadSimplex <- function(Ginp, Winp, Ainp, ot) {
17 | .Call('_MeDeCom_RQuadSimplex', PACKAGE = 'MeDeCom', Ginp, Winp, Ainp, ot)
18 | }
19 |
20 | RQuadSimplexBox <- function(Ginp, Winp, Ainp, linp, uinp, ot) {
21 | .Call('_MeDeCom_RQuadSimplexBox', PACKAGE = 'MeDeCom', Ginp, Winp, Ainp, linp, uinp, ot)
22 | }
23 |
24 | cppTAfact <- function(mDtSEXP, mTtinitSEXP, mAinitSEXP, lambda = 0.0, itersMax = 1000L, tol = 1e-8, tolA = 1e-7, tolT = 1e-7) {
25 | .Call('_MeDeCom_cppTAfact', PACKAGE = 'MeDeCom', mDtSEXP, mTtinitSEXP, mAinitSEXP, lambda, itersMax, tol, tolA, tolT)
26 | }
27 |
28 |
--------------------------------------------------------------------------------
/R/allGlobals.R:
--------------------------------------------------------------------------------
1 |
2 | #######################################################################################################################
3 | # GLOBALS
4 | #######################################################################################################################
5 |
6 | ALGORITHMS<-c(
7 | "truth",
8 | "regression",
9 | "houseman2012",
10 | "houseman2016",
11 | "MeDeCom",
12 | "MeDeCom.quadPen",
13 | "MeDeCom.cppTAfact",
14 | "HLasso",
15 | "IntFac",
16 | "IntEmpirical",
17 | "Resample",
18 | "VertexSearch")
19 |
20 | T_METHODS<-c(
21 | NA,
22 | NA,
23 | NA,
24 | NA,
25 | "quadPen",
26 | "quadPen",
27 | "cppTAfact",
28 | "Hlasso",
29 | "integer",
30 | "empirical",
31 | "resample",
32 | NA)
33 |
34 | names(T_METHODS)=ALGORITHMS
35 |
36 | ALGORITHM.COLS=c(
37 | "truth"="black",
38 | "regression"="blue",
39 | "houseman2012"="deepskyblue",
40 | "houseman2016"="skyblue",
41 | "MeDeCom"="red",
42 | "MeDeCom.quadPen"="red",
43 | "MeDeCom.cppTAfact"="tomato",
44 | "HLasso"="orange",
45 | "IntFac"="green",
46 | "IntEmpirical"="plum",
47 | "Resample"="magenta",
48 | "VertexSearch"="brown")
49 |
50 | ALGORITHM.PCH=c(15,0,0,0,2,2,2,3,4,5,6,1)
51 |
52 | names(ALGORITHM.PCH)<-ALGORITHMS
53 |
54 | # a basis for the lambda parameter grid
55 | RELGRID = c(
56 | 1E-10, 1E-5, 1E-4, 5E-4, 1E-3, 2E-3, 5E-3,
57 | 1E-2, 2E-2, 5E-2, 1E-1, 2E-1, 5E-1, 1)
58 |
59 | PERFORMANCE_MEASURES<-c("Objective"="Fval", "RMSE"="rmse", "CV error"="cve", "RMSE, T"="rmseT", "MDC, T"="dist2C", "MAE, A"="maeA")
60 |
61 |
62 | #######################################################################################################################
63 |
--------------------------------------------------------------------------------
/R/gapStatisitics.R:
--------------------------------------------------------------------------------
1 | #######################################################################################################################
2 | #
3 | # Gap statistics for selection of parameter r (number of underlying components)
4 | #
5 | # author: Pavlo Lutsik
6 | #
7 | #######################################################################################################################
8 |
9 | #
10 | # select.r.gap
11 | #
12 | # Procedure for selecting the best r using the modified gap statistics
13 | #
14 | # @details The gap statistics for a given value of parameter r
15 | # is calculated as a difference between the "null" RMSE of
16 | # and the RMSE of the actual data factorization.
17 | # The "null" RMSE is obtained by averaging the factorization RMSE of the
18 | # the randomly permuted input matrix
19 | #
20 | #
21 | # @author Pavlo Lutsik
22 | #
23 | select.r.gap<-function(D, method, minr=1, maxr=5, nperm=100, return.all=TRUE, plot=TRUE, ...){
24 |
25 | rmse.orig<-rep(NA, length(minr: maxr))
26 | rmse.perm<-rep(NA, length(minr: maxr))
27 | sd.rmse.perm<-rep(NA, length(minr: maxr))
28 |
29 | perms<-lapply(1:nperm, function(np){
30 |
31 | sample.int(ncol(D)*nrow(D), ncol(D)*nrow(D))
32 |
33 | })
34 |
35 | for(ix in 1:length(minr:maxr)){
36 |
37 | kk<-(minr:maxr)[ix]
38 | ## Original factorization
39 | if(method %in% ALGORITHMS[2:6]){
40 |
41 | fr.res<-factorize.alternate(D, k=kk, t.method=T_METHODS[method],...)
42 | rmse.orig[ix]<-fr.res$rmse
43 |
44 | }
45 |
46 | curr.rmses<-rep(NA, length(nperm))
47 | for(pp in 1:nperm){
48 |
49 | fr.res<-factorize.alternate(matrix(D[perms[[pp]]], ncol=ncol(D)), k=kk, t.method=T_METHODS[method],...)
50 | curr.rmses[pp]<-fr.res$rmse
51 |
52 | }
53 |
54 | rmse.perm[ix]<-mean(curr.rmses)
55 | sd.rmse.perm[ix]<-sd(curr.rmses)
56 |
57 | }
58 |
59 |
60 | if(plot){
61 |
62 | layout(matrix(1:2, ncol=2))
63 | x<-minr:maxr
64 | plot(x,rmse.perm, col="red", pch=0, type="l", ylim=c(0, max(c(rmse.orig, rmse.perm))), main="RMSE")
65 | segments(x, rmse.perm-sd.rmse.perm,x, rmse.perm+sd.rmse.perm, col="red")
66 | epsilon = 0.02
67 | segments(x-epsilon,rmse.perm+sd.rmse.perm,x+epsilon,rmse.perm+sd.rmse.perm, col="red")
68 | segments(x-epsilon,rmse.perm-sd.rmse.perm,x+epsilon,rmse.perm-sd.rmse.perm, col="red")
69 |
70 | lines(rmse.orig, col="green", pch=1, type="o")
71 |
72 |
73 | plot(rmse.perm-rmse.orig, col="black", pch=1, type="o", main="Gap")
74 |
75 |
76 | }
77 |
78 | if(return.all){
79 |
80 | return(list(Rs=minr:maxr, RMSE=rmse.orig, RMSE.perm=rmse.perm, sd.RMSE.perm=sd.rmse.perm))
81 |
82 | }else{
83 |
84 | return((minr:maxr)[which.max(rmse.perm-rmse.orig)])
85 | }
86 | }
87 |
88 | #######################################################################################################################
89 |
--------------------------------------------------------------------------------
/R/markerSelection.R:
--------------------------------------------------------------------------------
1 | # Routines for selecting cell type-specific quantitative markers
2 | #
3 | # Author: Pavlo Lutsik
4 | ###############################################################################
5 |
6 |
7 | #
8 | #
9 | filter.by.mean.diff<-function(ref.data, min.diff=0.25, cell.types=colnames(ref.data)){
10 |
11 | if(length(cell.types)!=ncol(ref.data)){
12 | stop("invalid value for cell.types")
13 | }
14 |
15 | meandiff<-sapply(unique(cell.types), function(t){
16 |
17 | meandiff<-rowMeans(as.matrix(ref.data[,cell.types==t]))-rowMeans(ref.data[,cell.types!=t])
18 | meandiff
19 |
20 | })
21 |
22 |
23 | candidates<-lapply(unique(cell.types), function(t){
24 |
25 | mdiff<-meandiff[,t][abs(meandiff[,t])>min.diff]
26 | names(mdiff[order(abs(mdiff), decreasing=T)])
27 |
28 | })
29 |
30 | names(candidates)<-unique(cell.types)
31 |
32 | candidates
33 |
34 | }
35 |
36 | #
37 | #
38 | filter.by.ttest<-function(ref.data, n.cand=10, preselected=NULL, cell.types=colnames(ref.data)){
39 |
40 | if(length(cell.types)!=ncol(ref.data)){
41 | stop("invalid value for cell.types")
42 | }
43 |
44 |
45 | if(!is.null(preselected)){
46 | if(!(is.list(preselected) && all(sapply(preselected, is.character)) && length(preselected)==length(unique(cell.types)) )){
47 | stop("invalid argument for preselected: expected list ")
48 | }
49 |
50 | }
51 |
52 | ttest.stat<-lapply(unique(cell.types), function(t){
53 | apply(ref.data[preselected[[t]],],1,function(r) {
54 | if(length(r[cell.types==t])>1) {
55 | t.test(r[cell.types==t], r[cell.types!=t])$statistic
56 | }else{
57 | t.test(mu=r[cell.types==t], r[cell.types!=t])$statistic
58 | }
59 | })
60 | })
61 |
62 | names(ttest.stat)<-unique(cell.types)
63 |
64 | candidates<-sapply(unique(cell.types),function(t){
65 |
66 | ord<-order(ttest.stat[[t]], decreasing=T)
67 |
68 | names(ttest.stat[[t]])[ord][1:n.cand]
69 |
70 | })
71 |
72 | colnames(candidates)<-unique(cell.types)
73 |
74 | candidates
75 | }
76 |
77 | #
78 | #
79 | filter.by.linearity<-function(candidates, data.set){
80 |
81 | cell.types<-colnames(candidates)
82 |
83 | #minAICcounts<-list()
84 |
85 | markers<-lapply(cell.types, function(ct){
86 |
87 | cand<-intersect(rownames(data.set),candidates[,ct])
88 | models<-paste("resp", cand, sep="~")
89 | models<-c(models, "resp~1")
90 |
91 | predictors<-t(data.frame(data.set)[cand,])
92 |
93 | design<-list()
94 | lapply(1:dim(predictors)[2], function(i){
95 | el(design, where=colnames(predictors)[i])<<-predictors[,i]; return(NULL)
96 | })
97 |
98 | design$resp<-t(data.set)
99 | logliks<-sapply(models, function(f) apply(residuals(lm(f, design)),2,logLik2))
100 |
101 | model.size<-sapply(strsplit(models,"+", fixed=T), length)
102 | model.size[grep("~1", models)]<-0
103 | aics<-sapply(1:length(models), function(i) -2*logliks[,i]+2*model.size[i])
104 |
105 | bestModel<-models[apply(aics,1,which.min)]
106 | names(bestModel)<-rownames(aics)
107 |
108 | counts<-table(bestModel)
109 | #el(minAICcounts, where=ct)<<-counts
110 |
111 | counts["resp~1"]<-0
112 | counts<-counts[order(-counts)]
113 | print("Best models for:")
114 | print(ct)
115 | print(counts)
116 |
117 | return(strsplit(names(counts)[1], "~")[[1]][2])
118 |
119 | })
120 |
121 | markers
122 |
123 | }
124 |
125 |
126 |
--------------------------------------------------------------------------------
/R/matching.R:
--------------------------------------------------------------------------------
1 | #######################################################################################################################
2 | ##
3 | ## Matching the recovered latent components to the true (experimentally obtained) ones
4 | ##
5 | #######################################################################################################################
6 |
7 | match.matrix<-function(TT, Tref, method="pearson"){
8 |
9 | if(method=="pearson"){
10 |
11 | matchmat<-cor(TT,Tref, use="pairwise.complete.obs")
12 |
13 | }else if(method=="spearman"){
14 |
15 | matchmat<-cor(TT,Tref, method="spearman", use="pairwise.complete.obs")
16 |
17 | }else if(method=="anova"){
18 |
19 | fitsmat<-matrix(0L, nrow=ncol(TT), ncol=ncol(Tref))
20 | signsmat<-matrix(1L, nrow=ncol(TT), ncol=ncol(Tref))
21 |
22 | for(tt in 1:ncol(TT)){
23 | fits<-list()
24 | signs<-integer()
25 | for(i in 1:ncol(Tref)){
26 | signsmat[tt,i]<-sign(mean(Tref[TT[,tt]==range(TT)[2],i])-mean(Tref[TT[,tt]==range(TT)[1],i]))
27 | fitsmat[tt,i]<-summary(aov(ct~comp1, data=data.frame(ct=Tref[,i], comp1=TT[,tt])))[[1]]$`F value`[1]
28 | }
29 | }
30 |
31 | matchmat<-signsmat*fitsmat
32 | }
33 | }
34 |
35 | #corrmatch<-function(TT, Tref, method="pearson",return.vals=F){
36 | #
37 | # if(method=="pearson"){
38 | #
39 | # values<-apply(cor(TT,Tref, use="pairwise.complete.obs"),1,max)
40 | # indices<-apply(cor(TT,Tref, use="pairwise.complete.obs"),1,which.max)
41 | #
42 | # }else if(method=="spearman"){
43 | #
44 | # values<-apply(cor(TT,Tref, use="pairwise.complete.obs"),1,max)
45 | # indices<-apply(cor(TT,Tref, method="spearman", use="pairwise.complete.obs"),1,which.max)
46 | #
47 | # }else if(method=="anova"){
48 | #
49 | # indices<-integer()
50 | # values<-numeric()
51 | # for(tt in 1:ncol(TT)){
52 | # fits<-list()
53 | # signs<-integer()
54 | # for(i in 1:ncol(Tref)){
55 | # signs<-c(signs, sign(mean(Tref[TT[,tt]==range(TT)[2],i])-mean(Tref[TT[,tt]==range(TT)[1],i])))
56 | # fits[[length(fits)+1]]<-aov(ct~comp1, data=data.frame(ct=Tref[,i], comp1=TT[,tt]))
57 | # }
58 | # fvals<-sapply(fits, function(fit) summary(fit)[[1]]$`F value`[1])
59 | # fvals<-fvals*signs
60 | # indices<-c(indices,(which.max(fvals)))
61 | # values<-c(values,(max(fvals)))
62 | # }
63 | #
64 | # }
65 | # if(return.vals){
66 | # return(values)
67 | # }else{
68 | # return(indices)
69 | # }
70 | #
71 | #}
72 |
73 |
74 | corrmatch<-function(TT, Tref, method="pearson",return.vals=F){
75 |
76 | mmat<-match.matrix(TT,Tref,method)
77 | ## a very bad solution for the degenerate cases
78 | mmat[is.na(mmat)]<-0
79 | if(return.vals){
80 | return(apply(mmat, 1, max))
81 | }else{
82 | return(apply(mmat, 1, which.max))
83 | }
84 | }
85 |
86 | #######################################################################################################################
87 | #'
88 | #' greedymatch
89 | #'
90 | #' Matching of latent components
91 | #'
92 | #'
93 | #' @details suppose Tstar contains K* topics (columns)
94 | #' 1. select the K* most popular topics of That
95 | #' 2. use a greedy method to match them to those of Tstar (w.r.t L2 norm)
96 | #' and correspondingly permute rows of Ahat
97 | #'
98 | #' @author Martin Slawski
99 | #' @author R port by Pavlo Lutsik
100 | #'
101 | #' @export
102 | #'
103 | greedymatch<-function(Tstar,That,Ahat){
104 |
105 | ### 1. select K* most popular topics of That, get submatrix T, A
106 | k <- size(That,2) ;
107 | kstar <- size(Tstar,2);
108 |
109 | if (k < kstar){
110 | # pad zeros rows and columns to T, A when k < K*
111 | Delta <- kstar - k;
112 | That <- cbind(That, matrix(0, nrow=nrow(That), ncol=Delta));
113 | Ahat <- rbind(Ahat, matrix(0, nrow=Delta, ncol=ncol(Ahat)));
114 | }
115 |
116 | pop <- rowSums(Ahat) / sum(Ahat)
117 | poporder <- order(pop, decreasing=TRUE)
118 | TT <- That[,poporder[1:kstar],drop=FALSE]
119 | A <- Ahat[poporder[1:kstar],,drop=FALSE]
120 |
121 | ### matching submatrix
122 | idx <- 1:kstar
123 | Tm <- zeros(nrow(TT), ncol(TT))
124 | Am <- zeros(nrow(A), ncol(A))
125 | for(i in 1:kstar){
126 |
127 | topic <- Tstar[,i,drop=FALSE]
128 | dist <- zeros(length(idx),1)
129 | for(j in 1:length(idx)){
130 | dist[j,1] = norm(topic-TT[,idx[j],drop=FALSE],"2")
131 | }
132 | ii = which.min(dist)
133 | Tm[,i] = TT[,idx[ii]]
134 | Am[i,] = A[idx[ii],]
135 | idx<-idx[-ii]
136 |
137 | }
138 |
139 | return(list(Tm=Tm, Am=Am))
140 | }
141 |
142 | #######################################################################################################################
143 | #'
144 | #' match.components
145 | #'
146 | #' Matching the recovered components to the reference
147 | #'
148 | #' @param That recovered components from factorization
149 | #' @param Tref reference components
150 | #' @param method one of "corrmatch" and "greedymatch"
151 | #' @param Ahat for method "greedymatch": recovered mixing proportions
152 | #' @param check for method "corrmatch": a flag specifying whether to check uniquenes of the match
153 | #'
154 | #'
155 | #' @details Wrapper function for component matching methods
156 | #'
157 | #' @return a vector of indices. The length and the order of the vector corresponds to the columns of \code{TT}
158 | #' and the indices specify the columns of Tref
159 | #'
160 | #' @export
161 | #'
162 | matchLMCs<-function(
163 | That,
164 | Tref,
165 | method="corrmatch",
166 | check=TRUE,
167 | Ahat=NULL
168 | ){
169 |
170 | if(method=="corrmatch"){
171 |
172 | mr<-corrmatch(That, Tref, method="pearson", return.vals=F)
173 |
174 | if(check && length(unique(mr))nsites(rnb.set)){
112 | warning("most.var bigger than number of sites, reduced to maximum")
113 | most.var <- nsites(rnb.set)
114 | }
115 | rem.sites[queryHits(op)][ordered[1:most.var]] <- FALSE
116 | #rem.sites[sample(1:nsites(rnb.set),most.var)] <- FALSE
117 | rnb.set <- remove.sites(rnb.set,rem.sites)
118 | if(save.restricted.sites){
119 | save.rnb.set(rnb.set,file.path(temp.dir,"restrictedRnBSet"))
120 | }
121 | anno.set <- anno.set[!rem.sites]
122 | op <- findOverlaps(anno.set,anno.ref)
123 | }
124 | meth.ref <- meth(ref.set)[subjectHits(op),]
125 | meth.ref <- rnb.execute.imputation(meth.ref)
126 | colnames(meth.ref) <- pheno(ref.set)[,id.col]
127 | medecom.result <- runMeDeCom(rnb.set,
128 | Ks=Ks,
129 | lambdas=lambdas,
130 | cg_subsets=cg_subsets,
131 | opt.method=opt.method,
132 | temp.dir=temp.dir,
133 | NCORES=NCORES,
134 | cluster.settings=cluster.settings)
135 | if(!is.null(cg_subsets)){
136 | medecom.result@parameters$GROUP_LISTS <- cg_subsets
137 | }
138 | return(list("MeDeComSet"=medecom.result,"RefMeth"=meth.ref))
139 | }
140 |
141 | #' cluster.refbased
142 | #'
143 | #' This routine performs general hierachical clustering as in the \code{\link{plotLMCs}} and adds reference methylome profiles to this
144 | #' clustering.
145 | #'
146 | #' @param ref.run A result of \code{\link{run.refbased}} with the results of \code{\link{runMeDeCom}} and the reference data matrix
147 | #' @param K Selected number of components
148 | #' @param lambda Selected regularization parameter
149 | #' @param cg_subset Numeric vector representing the CpG sites selected for analysis from the orignial MeDeComSet
150 | #' @param plot.type Type of plot to be created, is passed to \code{\link{plotLMCs}}
151 | #' @param ... Further arguments passed to plotLMCs
152 | #'
153 | #' @return A plot object displaying the clustering
154 | #'
155 | #' @author Michael Scherer
156 | #'
157 | #' @export
158 | cluster.refbased <- function(ref.run,
159 | K,
160 | lambda,
161 | cg_subset=1,
162 | plot.type="dendrogram",
163 | ...){
164 | if(!is.list(ref.run) || length(ref.run)<2){
165 | stop("Argument needs to be the results obtained from 'run.refbased'")
166 | }
167 | medecom.result <- ref.run$MeDeComSet
168 | meth.ref <- ref.run$RefMeth
169 | if(!is.null(medecom.result@parameters$GROUP_LISTS)){
170 | sset <- medecom.result@parameters$GROUP_LISTS[[cg_subset]]
171 | meth.ref <- meth.ref[sset,]
172 | }
173 | plot <- plotLMCs(medecom.result,K=K,lambda=lambda,cg_subset=cg_subset,type=plot.type,Tref=meth.ref,...)
174 | return(plot)
175 | }
176 |
177 | #' plotLMC.reference
178 | #'
179 | #' This routine uses a reference methylome, together with an annotation, and calls plotLMCs for the matching postions
180 | #'
181 | #' @param medecom.set An object of type MeDeComSet
182 | #' @param ref.meth Reference methylome in the form of a matrix of a RnBSet
183 | #' @param ann.md Genomic annotation for the CpG sites present in medecom.set
184 | #' @param ann.ref Genomic annotation for the CpG sites in the referenc methylome. Can be omitted, if ref.meth is an RnBSet object
185 | #' @param cg_subset The cg_subset of interest
186 | #' @param K K value
187 | #' @param lambda lambda value
188 | #' @param type Plot type, see \code{\link{plotLMCs}}
189 | #' @param chrom.col The chromosome column name in ann.md and ann.ref
190 | #' @param start.col The start column name in ann.md and ann.ref
191 | #' @param end.col The end column name in ann.md and ann.ref
192 | #' @param ct.color.column A column name in the phenotypic information of ref.meth to be shown in the clustering
193 | #' @return NULL
194 | #' @author Michael Scherer
195 | #' @export
196 |
197 | plotLMC.reference <- function(medecom.set,
198 | ref.meth,
199 | ann.md,
200 | cg_subset=medecom.set@parameters$cg_subsets[1],
201 | K=medecom.set@parameters$Ks[1],
202 | lambda=medecom.set@parameters$lambdas[1],
203 | ann.ref=NULL,
204 | type="dendrogram",
205 | chrom.col="Chromosome",
206 | start.col="Start",
207 | end.col="End",
208 | ct.color.column="cell_type"){
209 | #require("RnBeads")
210 | if(!inherits(medecom.set,"MeDeComSet")){
211 | stop("Invalid value for medecom.set")
212 | }
213 | if(!inherits(ref.meth,"RnBSet") && !is.matrix(ref.meth)){
214 | stop("Invalid value for ref.meth; needs to be RnBSet or matrix")
215 | }
216 | if(is.matrix(ref.meth) && is.null(ann.ref)){
217 | stop("Genomic annotation needs to be provided for the reference methylome")
218 | }
219 | if(inherits(ref.meth,"RnBSet")){
220 | ann.ref <- annotation(ref.meth)
221 | meth.ref <- meth(ref.meth)
222 | colnames(meth.ref) <- pheno(ref.meth)[,ct.color.column]
223 | ref.meth <- meth.ref
224 | rm(meth.ref)
225 | }
226 | ann.md <- GRanges(Rle(ann.md[,chrom.col]),IRanges(start=ann.md[,start.col],end=ann.md[,end.col]))
227 | ann.ref <- GRanges(Rle(ann.ref[,chrom.col]),IRanges(start=ann.ref[,start.col],end=ann.ref[,end.col]))
228 | op <- findOverlaps(ann.md,ann.ref)
229 | if(any(!(1:length(ann.md) %in% queryHits(op)))){
230 | stop("Insuffiecient information present in reference methylome")
231 | }
232 | ref.meth <- ref.meth[subjectHits(op),]
233 | ref.meth <- ref.meth[medecom.set@parameters$GROUP_LISTS[[cg_subset]],]
234 | ref.meth <- rnb.execute.imputation(ref.meth,method="knn")
235 | plotLMCs(medecom.set,Tref=ref.meth,K=K,lambda=lambda,cg_subset=cg_subset,type = type)
236 | }
237 | #' load.ref.set
238 | #'
239 | #' This functions loads a reference data base and return the corresponding \code{RnBSet}.
240 | #'
241 | #' @param ref.base Reference base to be used. See \code{\link{run.refbased}} for further information.
242 | #' @param temp.dir Temporary directory to store the object.
243 | #'
244 | #' @return List of two elements \itemize{
245 | #' \item ID sample identifier column of the refernce data set used for plotting
246 | #' \item rnb.set \code{RnBSet} object containing the reference methylation profiles
247 | #' }
248 | #'
249 | #' @author Michael Scherer
250 | #'
251 | #' @noRd
252 |
253 | load.ref.set <- function(ref.base,temp.dir=NULL){
254 | #require("RnBeads")
255 | if(!ref.base %in% AVAIL.REFS){
256 | stop(paste("Unsupported reference data base, must be one of",AVAIL.REFS))
257 | }
258 | if(is.null(temp.dir)){
259 | temp.dir <- tempdir()
260 | }
261 | if(ref.base=="local") return(NULL)
262 | id.col <- "sample_id"
263 | if(ref.base=="reinius"){
264 | location <- file.path(temp.dir,"Reinius_Blood_Reference.zip")
265 | if(!file.exists(location)){
266 | cat("Downloading Reinius reference set \n")
267 | downloaded <- tryCatch(download.file("http://rnbeads.mpi-inf.mpg.de/publication/Reinius_Blood_Reference.zip",destfile = location),error=function(e){
268 | if(inherits(e,"Error")){
269 | stop("Failed to download reference data set. Check internet connection.")
270 | }
271 | })
272 | }
273 | ref.set <- load.rnb.set(location)
274 | id.col <- "Cell/Tissue"
275 | }
276 | return(list(ID=id.col,rnb.set=ref.set))
277 | }
278 |
--------------------------------------------------------------------------------
/R/simulation.R:
--------------------------------------------------------------------------------
1 | #######################################################################################################################
2 | #
3 | # Routines for simulation of the DNA methylation profiles
4 | #
5 | # @author Pavlo Lutsik
6 | #######################################################################################################################
7 |
8 | #
9 | # simulate.source.profile
10 | #
11 | # Generate the source DNA methylation profile
12 | #
13 | simulate.source.profile<-function(m, vals=c(0,1), probs=c(0.5,0.5)){
14 |
15 | sample(vals, m, replace=TRUE)
16 |
17 | }
18 | #
19 | # simulate.ct.profile
20 | #
21 | # Generate the characteristic DNA methylation profiles of cell types
22 | #
23 | simulate.ct.profiles<-function(source, similarities){
24 |
25 | CT.AVG<-sapply(similarities,function(sim){
26 |
27 | idx<-sample.int(length(source),floor((1-sim)*length(source)))
28 | CT.M<-source
29 | CT.M[idx]<-sample(c(0,1), floor((1-sim)*length(source)), replace=TRUE)
30 | CT.M
31 |
32 | })
33 |
34 | CT.AVG
35 |
36 | }
37 |
38 | # simulate.ind.profiles
39 | #
40 | # Generate the characteristic DNA methylation profiles of cell types
41 | #
42 |
43 | simulate.ind.profiles<-function(
44 | ctm,
45 | n,
46 | method="binom",
47 | vars=NULL,
48 | chi.sq.df=0.11,
49 | success.prob=NULL,
50 | alpha=0.1,
51 | beta=10,
52 | vals=c(0,1)){
53 |
54 | # if(is.null(CHI.SQ.DF)){
55 | # CHI.SQ.DF=(20/n)
56 | # }
57 |
58 |
59 | if(method=="normal"){
60 |
61 | if(is.null(vars)){
62 | sds<-sqrt(rchisq(nrow(ctm), df=chi.sq.df)/n)
63 | }
64 |
65 | profiles<-lapply(1:ncol(ctm), function(iid){
66 |
67 | cell.sample<-sapply(1:m, function(cgi){
68 |
69 | cg.vector<-rnorm(n, mean=ctm[cgi,iid], sd=sds[cgi]*scale)
70 |
71 | })
72 | cell.sample<-MeDeCom:::projectV(cell.sample, vals)
73 | t(cell.sample)
74 |
75 | })
76 | }else if(method=="binom"){
77 |
78 | #success.prob<-scale*rchisq(nrow(ctm), df=chi.sq.df)/n
79 |
80 | #success.prob[success.prob>0.5]<-0.5
81 |
82 | profiles<-lapply(1:ncol(ctm), function(iid){
83 |
84 | if(is.null(success.prob))
85 | success.prob<-rbeta(nrow(ctm), shape1=alpha, shape2=beta)
86 |
87 | ind.prof<-repmat(ctm[,iid,drop=F],m=n,n=1)
88 |
89 | change.v<-t(sapply(success.prob, function(sp) rbinom(n,1,prob=sp)))
90 |
91 | ind.prof[change.v==1]<-sign(1-ind.prof[change.v==1])
92 |
93 | ind.prof
94 | })
95 |
96 | }
97 |
98 |
99 | profiles<-lapply(1:n, function(i) sapply(1:ncol(ctm), function(j) profiles[[j]][,i] ))
100 |
101 |
102 | profiles
103 |
104 | }
105 |
106 | # prepare.average.profile
107 | #
108 | # Prepare individual profiles from existing data
109 | #
110 |
111 | prepare.average.profile<-function(pure.ct,
112 | samp,
113 | o=0.45,
114 | mean=F){
115 |
116 | n<-nrow(pure.ct)
117 |
118 | sds.samp<-apply(pure.ct[samp,,drop=F],1,sd)
119 |
120 | proj.profile.samp<-pure.ct[samp,,drop=F]
121 | if(mean){
122 | proj.profile.samp<-matrix(rowMeans(proj.profile.samp))
123 | }
124 | proj.profile.samp[proj.profile.samp1-o]<-1
126 | proj.profile.samp[proj.profile.samp>=o & proj.profile.samp<=1-o]<-0.5
127 |
128 |
129 |
130 | return(list(proj.profile.samp, sds.samp))
131 |
132 | }
133 |
134 | # simulate.populations
135 | #
136 | # Generate the cell populations
137 | #
138 | simulate.populations<-function(ind.profiles,
139 | sds.samp=NULL,
140 | vals=c(0,1),
141 | method="binom",
142 | success.prob=NULL,
143 | m=nrow(ind.profiles),
144 | nc=1000,
145 | alpha=0.1,
146 | beta=10){
147 |
148 | if(method=="normal")
149 | {
150 | profiles<-lapply(1:ncol(ind.profiles), function(iid){
151 |
152 | cell.sample<-sapply(1:m, function(cgi){
153 |
154 | cg.vector<-rnorm(nc, mean=ind.profiles[cgi,iid], sd=sds.samp[cgi]*scale)
155 |
156 | })
157 | cell.sample<-MeDeCom:::projectV(cell.sample, vals)
158 | t(cell.sample)
159 |
160 | })
161 |
162 | }else if(method=="binom"){
163 |
164 | #success.prob<-scale*rchisq(nrow(ind.profiles), df=chi.sq.df)/length(ind.profiles)
165 |
166 | if(is.null(success.prob))
167 | success.prob<-rbeta(nrow(ind.profiles), shape1=alpha, shape2=beta)
168 |
169 | #success.prob[success.prob>0.5]<-0.5
170 |
171 | profiles<-lapply(1:ncol(ind.profiles), function(iid){
172 |
173 | pop.prof<-repmat(ind.profiles[,iid,drop=F],m=nc,n=1)
174 |
175 | change.v<-t(sapply(success.prob, function(sp) rbinom(nc,1,prob=sp)))
176 |
177 | pop.prof[change.v==1]<-sign(1-pop.prof[change.v==1])
178 |
179 | pop.prof
180 |
181 | })
182 |
183 | }
184 | profiles
185 | }
186 |
187 |
188 | # mix.populations
189 | #
190 | # Mix the cell populations according in proportions,
191 | # given by the mixing matrix
192 | #
193 | mix.populations<-function(
194 | populations
195 | ,mixing.matrix
196 | ,cell.subset.size=NULL
197 | ,noize=0){
198 |
199 | if(!is.list(populations))
200 | stop("Invalid value for populations")
201 |
202 |
203 | if(ncol(mixing.matrix)!=length(populations))
204 | stop("The second dimension of the mixing matrix has to the number of cell types")
205 |
206 | nt<-length(populations[[1]])
207 |
208 | results<-vector("list", ncol(mixing.matrix))
209 |
210 | if(length(populations[[1]])==nrow(mixing.matrix)){
211 |
212 | for(j in 1:ncol(mixing.matrix)){
213 | nc<-ncol(populations[[j]][[1]])
214 |
215 | if(is.null(cell.subset.size))
216 | css<-nc
217 | else
218 | css<-cell.subset.size
219 |
220 | subs.sizes<-sapply(mixing.matrix[,j], function(fr) floor(css*fr))
221 | cell.subsets<-lapply(1:nt,function(x) sample.int(ncol(populations[[j]][[nt]]), subs.sizes[x]))
222 |
223 |
224 | mix<-list()
225 | for(i in 1:nt){
226 | mix[[i]]<-populations[[j]][[i]][,cell.subsets[[i]]]
227 | }
228 |
229 | results[[j]]<-do.call("cbind", mix)
230 | rm(mix)
231 | }
232 |
233 |
234 | }else{
235 | warning("Not implemented yet")
236 | results<-NULL
237 |
238 | }
239 |
240 | return(results)
241 |
242 | }
243 |
244 | # introduce.imprinting
245 | #
246 | # Simulate genomic imprinting
247 | #
248 |
249 | introduce.imprinting<-function(populations,
250 | fraction=0.02,
251 | ixx=NULL){
252 |
253 | ixx<-sample.int(nrow(populations[[1]]),floor(fraction*nrow(populations[[1]])))
254 |
255 |
256 | for(i in 1:length(populations))
257 | {
258 | populations[[i]][ixx,]<-0.5
259 | }
260 |
261 | populations
262 |
263 | }
264 |
265 | # introduce.asm
266 | #
267 | # Simulate allele-specific methylation
268 | #
269 | introduce.asm<-function(populations,
270 | fraction=0.1,
271 | sites=NULL){
272 |
273 |
274 | for(i in 1:length(populations))
275 | {
276 | ixx<-sample.int(nrow(populations[[i]]),floor(fraction*nrow(populations[[i]])))
277 | snp.vals<-sample(c(0,0.5,1), length(ixx), replace=TRUE)
278 |
279 | populations[[i]][ixx,]<-snp.vals
280 |
281 | }
282 |
283 | populations
284 | }
285 |
286 | # introduce.effects
287 | #
288 | # Simulate true biological effects
289 | #
290 | introduce.effects<-function(populations,
291 | types,
292 | sites,
293 | signs,
294 | means,
295 | sd.mean.frac=0.1){
296 |
297 |
298 | for(i in 1:length(populations)){
299 |
300 | for(j in types){
301 |
302 | effects<-rnorm(length(sites[[j]]), mean=means[j], sd=sd.mean.frac*means[j])
303 | if(is.null(signs[[j]]))
304 | signs[[j]]<-sign(rnorm(length(sites[[j]])))
305 |
306 | new.values<-sapply(1:length(sites[[j]]), function(si){
307 |
308 | if(signs[[j]][si]==1){
309 |
310 | new.value<-sapply(populations[[i]][[j]][sites[[j]][si],], function(value){
311 | if(value %in% c(0,0.5)){
312 | if(runif(1)1){
92 | if(verbose) print(paste("Extracting data from:", table.name))
93 | present<-intersect(ids,keys(table2))
94 | result<-data.frame(ID=present, V=as.character(as.list(table2[present])))
95 | cn<-c(cn,strsplit(table.name,"IlluminaHumanMethylation450k")[[1]][2])
96 | all.results<-merge(all.results, result, by="ID", all.x=T, sort=F)
97 | colnames(all.results)<-cn
98 | }
99 | }
100 |
101 | }
102 |
103 |
104 | return(all.results[match(ids,all.results$ID),])
105 |
106 | }
107 | ###############################################################################
108 | fetch.by.interval<-function(chromosome, start, end, strand="*", type="probes450"){
109 |
110 | if(!type %in% c("probes450", "hg19", "mm9")){
111 | stop("unsupported annotation type")
112 | }
113 |
114 | ann<-rnb.get.annotation(type)
115 | ann<-ann[match(chromosome,names(ann))]
116 |
117 | interval<-GRanges(chromosome,IRanges(start=start, end=end), strand=strand)
118 |
119 | olap<-findOverlaps(ann[[1]], interval)
120 |
121 | return(rownames(rnb.annotation2data.frame(ann))[queryHits(olap)])
122 |
123 | }
124 | ###############################################################################
125 | combin<-function(V,k){
126 | #
127 | # if(k>1){
128 | # vv<-rep(V,k)
129 | # Posss<-apply(combn(length(vv),k),2,function(idx) vv[idx])
130 | # Posss<-Posss[,!duplicated(t(Posss)), drop=FALSE]
131 | # }else{
132 | # return(matrix(V,ncol=length(V)))
133 | # }
134 | #
135 | # print("##################### DIMENSIONS ####################")
136 | # print(dim(Posss))
137 | # return(Posss)
138 |
139 | t(expand.grid(list(V)[rep(1,k)]))
140 |
141 | }
142 |
143 | ###############################################################################
144 |
145 | randsplxmat<-function(m,n){
146 |
147 | U = -log(matrix(runif(m*n), nrow=m))
148 | S = colSums(U) # probably only 1 row, so specify 1 explicitly
149 | dump<-sapply(1:n, function(j) U[,j] <<- U[,j]/S[j])
150 | return(U)
151 |
152 | }
153 |
154 | ###############################################################################
155 | projectV<-function(TP,V){
156 |
157 | TP_proj<-apply(TP, 2, function(col){
158 | dist<-abs(repmat(matrix(V, ncol=length(V)), n=length(col), m=1)-col)
159 | proj<-V[apply(dist,1,which.min)]
160 | })
161 | TP_proj
162 |
163 | }
164 | ###############################################################################
165 |
166 | opt.factorize.exact<-function(affine=TRUE,
167 | nonnegative=TRUE,
168 | aggr="no",
169 | replace=1L,
170 | nsamples=0L,
171 | naggsets=5L,
172 | chunksize=10L,
173 | verbose=FALSE,
174 | varargin=NULL){
175 |
176 | options<-list(
177 | affine=affine,
178 | nonnegative=nonnegative,
179 | aggr=aggr,
180 | replace=replace,
181 | nsamples=nsamples,
182 | naggsets=naggsets,
183 | chunksize=chunksize,
184 | verbose=verbose)
185 |
186 | if(is.null(varargin)){
187 |
188 | return(options)
189 |
190 | }else{
191 | print("not supported yet")
192 |
193 | return(options)
194 | }
195 |
196 | }
197 | ###############################################################################
198 |
199 | licols<-function(X,tol=1e-10){
200 |
201 | if(all(X==0)){ #X has no non-zeros and hence no independent columns
202 |
203 | return(list(r<-integer(), Xsubs=matrix(0), idx<-integer()))
204 | }
205 |
206 | colnames(X)<-as.character(1:ncol(X))
207 |
208 | qr.res<-qr(X, tol = tol, LAPACK=T)
209 | Q<-qr.Q(qr.res)
210 | R<-qr.R(qr.res)
211 | E<-match(colnames(R), colnames(X))
212 |
213 | diagr <- abs(diag(R));
214 |
215 | r <-length(which(diagr >=tol*diagr[1])) #Rank estimation
216 |
217 | idx<-sort(E[1:r])
218 |
219 | Xsub<-X[,idx]
220 |
221 | return(list(r=r, idx=idx, Xsub=Xsub))
222 | }
223 |
224 | adjust.nmf.coefs<-function(coefs){
225 | t(t(coefs)/colSums(coefs))
226 | }
227 |
228 | ###############################################################################
229 | #
230 | # Down-rank SNP-affected probes using a one-dimentional k-means
231 | # with k=3
232 | #
233 |
234 | rank.snp<-function(D){
235 |
236 | require(Ckmeans.1d.dp)
237 |
238 | R3 <- numeric(nrow(D))
239 | for(i in 1:nrow(D)){
240 | res <- Ckmeans.1d.dp(D[i,], 3)
241 | R3[i] <- sum(res$withinss)
242 | }
243 |
244 | R3
245 | }
246 |
247 | ###############################################################################
248 | get.cpg.intervals<-function(cpg.coords, ids=NULL, offset=50){
249 |
250 | #if(annot.full<-rnb.get.annotation("probes450")
251 | intervals<-GRanges(seq=cpg.coords$Chromosome,
252 | IRanges(start=cpg.coords$Start, width=1),
253 | strand=rep("*", nrow(cpg.coords)))
254 |
255 |
256 | intervals<-flank(intervals, width=offset, both=TRUE)
257 | intervals<-reduce(intervals)
258 | as.data.frame(intervals)
259 |
260 | }
261 | ###############################################################################
262 | #
263 | # generateExample
264 | #
265 | # Examples for testing factorization methods
266 | #
267 | # m number of genomic features
268 | # n number of profiles
269 | # k hidden dimension
270 | # t.method method used to generate matrix T: "integer", "uniform" or "beta"
271 | # a.method method used to generate matrix A: "uniform" or "dirichlet"
272 | # V a vector of possible values for \cs{method} integer or the upper and lower
273 | # bounds for the \cs{method} uniform
274 | # beta1 first beta-distribution parameter for \cs{t.method} beta
275 | # beta2 second beta-distribution parameter for \cs{t.method} beta
276 | # proportion.prior numeric vector of length \code{r}
277 | # noise.sd a standard deviation for additive Gaussian noize
278 | # digits desired precision
279 | #
280 | # return \cs{list} with elements \cs{D}, \cs{T} and \cs{A}
281 | #
282 | #
283 | generateExample<-function(
284 | m,
285 | n,
286 | k,
287 | t.method="beta",
288 | a.method="dirichlet",
289 | e.method="gaussian",
290 | V=c(0,1),
291 | beta1=0.5,
292 | beta2=0.5,
293 | proportion.prior=NULL,
294 | proportion.var.facror=1,
295 | Alower=rep(0, k),
296 | Aupper=rep(1, k),
297 | noise.sd=0,
298 | digits=12
299 | ){
300 |
301 | if(t.method=="integer"){
302 |
303 | Tt<-matrix(Inf,ncol=k, nrow=m)
304 | ri<-1
305 | it<-0
306 | while(any(Tt==Inf) && it<100){
307 | it<-it+1
308 | vertex<-V[sample.int(length(V), m, replace=T)]
309 | if(all(colSums(abs(Tt-vertex))!=0)){
310 | Tt[1:m,ri]<-vertex
311 | ri<-ri+1
312 | }
313 | }
314 |
315 | }else if(t.method=="uniform"){
316 |
317 | Tt<-matrix(runif(m*k, min=min(V), max=max(V)), ncol=k)
318 |
319 | }else if(t.method=="beta"){
320 |
321 | Tt<-matrix(rbeta(m*k, shape1=beta1, shape2=beta2), ncol=k)
322 |
323 | }else{
324 | stop("this method for generating T is not implemented")
325 | }
326 |
327 | if(a.method=="uniform"){
328 |
329 | if(max(Alower)==0 && min(Aupper)==1){
330 |
331 | A<-matrix(-log(runif(k*n)), ncol=n, nrow=k)
332 | A<-t(t(A)/colSums(A))
333 | A[1,]<-A[1,]+(rep(1,n)-colSums(A))
334 |
335 | }else{
336 | A <- t(sapply(1:k, function(pri){
337 | -log(runif(n, min=Alower[pri], max=Aupper[pri]))
338 | }))
339 | A<-t(t(A)/colSums(A))
340 | A[1,]<-A[1,]+(rep(1,n)-colSums(A))
341 | # for(kk in 1:n){
342 | # A[,kk] <- RProjSplxBox(A[,kk,drop=FALSE], Alower, Aupper);
343 | #
344 | }
345 |
346 | }else if(a.method=="dirichlet"){
347 |
348 | if(is.null(proportion.prior)){
349 | proportion.prior<-rep(1/k,k)
350 | }
351 | A<-t(rdirichlet(n, proportion.prior*proportion.var.facror))
352 |
353 | }else{
354 | stop("this method for generating A is not implemented")
355 | }
356 |
357 | if(e.method=="gaussian"){
358 | if(noise.sd>0){
359 | E=matrix(rnorm(m*n, sd=noise.sd), ncol=n)
360 | }else{
361 | E=matrix(0, nrow=m, ncol=n)
362 | }
363 | }else{
364 | stop("this method for generating additive noise is not implemented")
365 | }
366 |
367 | # get the data matrix
368 | Tt<-round(Tt, digits=digits)
369 | A<-round(A, digits=digits)
370 | E<-round(E, digits=digits)
371 |
372 | D<-Tt%*%A+E
373 |
374 | D[Dmax(V)]<-max(V)
376 |
377 | return(list("T"=Tt,"A"=A,"D"=D, "E"=E))
378 | }
379 |
380 | ###############################################################################
381 |
382 | RMSE_T<-function(That, Tstar, perm){
383 |
384 | if(length(unique(perm))==length(perm)){
385 | rmseT<-sqrt(sum((That-Tstar[,perm])^2)/ncol(Tstar)/nrow(Tstar))
386 | }else{
387 | rmseT<-sqrt(mean((
388 | sapply(unique(perm), function(comp){
389 | abs(Tstar[,comp]-rowMeans(That[,perm==comp,drop=FALSE]))
390 | }))^2))
391 | }
392 |
393 | rmseT
394 | }
395 | ###############################################################################
396 | MAE_A<-function(Ahat, Astar, perm){
397 |
398 | if(length(unique(perm))==length(perm)){
399 | maeA<-sum(abs(Ahat-Astar[perm,]))/ncol(Astar)/nrow(Astar)
400 | }else{
401 | # maeA<-sum(abs(
402 | # sapply(unique(perm), function(comp){
403 | # abs(Astar[comp,,drop=FALSE]/colSums(Astar[unique(perm),,drop=FALSE])-colSums(Ahat[perm==comp,,drop=FALSE]))
404 | # })))/ncol(Astar)/nrow(Astar)
405 |
406 | aggrAhat<-t(sapply(unique(perm), function(comp){
407 | colSums(Ahat[perm==comp,,drop=FALSE])
408 | }))
409 |
410 | aggrAstar<-sweep(Astar[unique(perm),,drop=FALSE], 2, colSums(Astar[unique(perm),,drop=FALSE]), "/")
411 |
412 | maeA<-sum(abs(aggrAhat-aggrAstar))/nrow(aggrAstar)/ncol(aggrAstar)
413 | }
414 | maeA
415 | }
416 | ###############################################################################
417 | estimate.accuracy<-function(fr, trueT, trueA, check=FALSE){
418 |
419 | perm<-MeDeCom:::match.components(fr$T, trueT, check=check)
420 |
421 | if(!is.null(perm)){
422 | if(length(unique(perm))==length(perm)){
423 | rmseT<-sqrt(sum((trueT-fr$T[,perm])^2)/ncol(trueT)/nrow(trueT))
424 | maeA<-sum(abs(trueA-fr$A[perm,]))/ncol(trueA)/nrow(trueA)
425 | }else{
426 | rmseT<-sqrt(sum((
427 | sapply(unique(perm), function(comp){
428 | abs(trueT[,comp]-rowMeans(fr$T[,perm==comp,drop=FALSE]))
429 | }))^2)/ncol(trueT)/nrow(trueT))
430 |
431 | maeA<-sum(
432 | sapply(unique(perm), function(comp){
433 | abs(trueA[comp,]-colMeans(fr$A[perm==comp,,drop=FALSE]))
434 | }))/ncol(trueA)/nrow(trueA)
435 | }
436 | }else{
437 | rmseT<-NA
438 | maeA<-NA
439 | }
440 |
441 | return(list(rmseT=rmseT, maeA=maeA))
442 | }
443 | ###############################################################################
444 | get.distance.matrix<-function(mdd, measure, centered=FALSE){
445 |
446 | if(centered){
447 | mdd<-lapply(mdd, function(mm) sweep(mm, 1, rowMeans(mm)))
448 | }
449 | mdd<-do.call("cbind", mdd)
450 |
451 | if(measure=="euclidean"){
452 | d <- dist(t(mdd))
453 | }else if(measure=="angular"){
454 | dm<-matrix(NA, ncol=ncol(mdd), nrow=ncol(mdd))
455 | colnames(dm)<-colnames(mdd)
456 | rownames(dm)<-colnames(mdd)
457 | for(ri in 1:ncol(mdd)){
458 | for(ci in 1:ncol(mdd)){
459 | dm[ri,ci]<-sum(mdd[,ci]*mdd[,ri])/sqrt(sum(mdd[,ci]^2))/sqrt(sum(mdd[,ri]^2))
460 | }
461 | }
462 | d <- as.dist(1-dm)
463 | }else if(measure=="correlation"){
464 | d <- as.dist(1-cor(mdd, method="pearson"))
465 | }
466 | }
467 | #####
468 | RQuadHC_dummy<-function(G, W, Tk, tol, lower, upper){
469 | outp<-Tk+matrix(runif(ncol(Tk)*nrow(Tk))*tol*10, nrow(Tk),ncol(Tk))
470 | outp[outp>1]<-1;
471 | dummy_loss<-sqrt(mean((Tk-outp)^2))
472 | return(list(outp, dummy_loss))
473 | }
474 | ### END
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | MeDeCom
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MeDeCom
2 |
3 | MeDeCom is an R package for reference-free decomposition of heterogeneous DNA methylation profiles.
4 | It uses constrained matrix factorization enhanced by biologically-relevant constraints and a specially tailored regularization.
5 | See MeDeCom [vignette](vignettes/MeDeCom.md) for a more detailed description and an example analysis.
6 |
7 | # Publications
8 |
9 | MeDeCom has originially been published in [Genome Biology](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-017-1182-6). Together with its accompanying packages [DecompPipeline](https://github.com/CompEpigen/DecompPipeline) and [FactorViz](https://github.com/CompEpigen/FactorViz), MeDeCom is part of a published [protocol](https://www.nature.com/articles/s41596-020-0369-6) for reference-free deconvolution of complex DNA methylation data (see also [http://epigenomics.dkfz.de/DecompProtocol/](http://epigenomics.dkfz.de/DecompProtocol/)).
10 |
--------------------------------------------------------------------------------
/data/datalist:
--------------------------------------------------------------------------------
1 | example.MeDeComSet: medecom.result
2 | example.cg.annotation: cg.ann
3 | example.dataset: Aref D Tref
4 |
--------------------------------------------------------------------------------
/data/example.MeDeComSet.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/data/example.MeDeComSet.RData
--------------------------------------------------------------------------------
/data/example.cg.annotation.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/data/example.cg.annotation.RData
--------------------------------------------------------------------------------
/data/example.dataset.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/data/example.dataset.RData
--------------------------------------------------------------------------------
/data/small.example.RnBSet.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/data/small.example.RnBSet.RData
--------------------------------------------------------------------------------
/exec/cluster.script.sge.R:
--------------------------------------------------------------------------------
1 |
2 | if(any(grepl(".*cluster.script.gse.R", commandArgs()))){
3 | require(MeDeCom)
4 | }
5 |
6 | param.file<-commandArgs()[6]
7 | params_list<-readRDS(param.file)
8 |
9 | for(params in params_list){
10 |
11 | #print(params)
12 |
13 | load(file.path(params$DD,"data.set.RData"))
14 | D<-meth.data
15 |
16 | if(file.exists(file.path(params$DD, "trueT.RData"))){
17 | load(file.path(params$DD,"trueT.RData"))
18 | params$trueT<-trueT
19 | Tstar_present<-TRUE
20 | }else{
21 | Tstar_present<-FALSE
22 | }
23 | if(file.exists(file.path(params$DD, "trueA.RData"))){
24 | load(file.path(params$DD, "trueA.RData"))
25 | params$trueA<-trueA[,params$sample_subset]
26 | Astar_present<-TRUE
27 | }else{
28 | Astar_present<-FALSE
29 | }
30 |
31 | if(file.exists(file.path(params$DD, "start.RData"))){
32 | load(file.path(params$WD,"start.RData"))
33 | }
34 |
35 | load(file.path(params$DD, sprintf("cg_subset_%d.RDdata",params$cg_subset)))
36 | params$cg_subset<-cg_subset
37 |
38 | if(params$mode %in% c("initial_fine", "cv_fine")){
39 | if(file.exists(file.path(params$WD, params$lnr_result))){
40 | load(file.path(params$WD, params$lnr_result))
41 | old_result<-result
42 | rm(result)
43 | }
44 |
45 | if(file.exists(file.path(params$WD, params$clnr_result))){
46 | load(file.path(params$WD, params$clnr_result))
47 | params$startT<-result$That
48 | params$startA<-result$Ahat
49 |
50 | }
51 | }
52 |
53 |
54 | if(file.exists(file.path(params$DD, "cv_partitions.RDdata"))){
55 | load(file.path(params$DD, sprintf("cv_partitions.RDdata")))
56 | }
57 | #params$cg_subset<-cg_subsets[[params$cg_subset_id]]
58 | #params$sample_subset<-sample_subset
59 | #params$meth_matrix<-D
60 |
61 | if(params$mode %in% c("full","initial", "initial_fine")){
62 | incl_samples<-1:length(params$sample_subset)
63 | }else{
64 | fold_subset<-cv.partitions[params$FOLD,]
65 | incl_samples<-params$sample_subset[-fold_subset]
66 | }
67 |
68 | params$meth_matrix<-D[params$cg_subset,incl_samples,drop=FALSE]
69 |
70 | if(params$mode %in% c("full")){
71 | ## average the CV results and use as the init
72 | cv_files<-params$cv_init_results
73 | print(cv_files)
74 | cv_result_list<-vector("list", length(cv_files))
75 | for(cv_file_idx in 1:length(cv_files)){
76 | cv.file<-file.path(params$WD, cv_files[[cv_file_idx]])
77 | if(file.exists(cv.file)){
78 | load.env<-new.env(parent=emptyenv())
79 | load(cv.file, envir=load.env)
80 | cv_result_list[[cv_file_idx]]<-get("result", envir=load.env)
81 | }
82 | }
83 | print(str(cv_result_list))
84 | cv_result_list<-cv_result_list[!sapply(cv_result_list, is.null)]
85 | inits<-MeDeCom:::summarizeCVinits(cv_result_list)
86 | params$startT<-inits$T
87 | if(!is.null(inits$A)){
88 | params$startA<-inits$A
89 | }else{
90 | params$startA<-MeDeCom:::factorize.regr(params$meth_matrix, params$startT)[["A"]]
91 | }
92 | }
93 |
94 |
95 | if(Tstar_present){
96 | if(!is.null(params$fixed_T_cols)){
97 | free_cols<-setdiff(1:ncol(trueT), params$fixed_T_cols)
98 | params$fixedT<-trueT[,params$fixed_T_cols, drop=FALSE]
99 | #trueT<-trueT_ff[,-fixed_T_cols, drop=FALSE]]
100 | }else{
101 | fixedT<-NULL
102 | free_cols<-1:ncol(trueT)
103 | }
104 | }
105 |
106 | single_run_params<-intersect(names(as.list(args(MeDeCom:::singleRun))), names(params))
107 | result<-do.call("singleRun", params[single_run_params], envir=asNamespace("MeDeCom"))
108 |
109 | if(params$mode %in% c("full", "initial", "cv") || (result$Fval < old_result$Fval)){
110 | if(params$mode %in% c("full", "initial", "initial_fine")){
111 | trueT_prep<-trueA_prep<-NULL
112 | if(Tstar_present){
113 | trueT_prep<-trueT[params$cg_subset,free_cols,drop=FALSE]
114 | }
115 | if(Astar_present){
116 | trueA_prep<-trueA[,incl_samples,drop=FALSE]
117 | }
118 | perf_result<-MeDeCom:::estimatePerformance(result,
119 | params$meth_matrix,
120 | trueT_prep,
121 | trueA_prep)
122 | }else{
123 | perf_result<-MeDeCom:::estimateFoldError(
124 | result$That,
125 | D[params$cg_subset,params$sample_subset[fold_subset],drop=FALSE],
126 | params$NFOLDS)
127 | }
128 | for(elt in names(perf_result)){
129 | result[[elt]]<-perf_result[[elt]]
130 | }
131 |
132 | if(params$mode %in% c("initial_fine", "cv_fine")){
133 | print("found a better solution")
134 | }
135 | save(result, file=file.path(params$WD, params$lnr_result))
136 | }
137 | }
--------------------------------------------------------------------------------
/inst/unitTests/test_general.R:
--------------------------------------------------------------------------------
1 | test.reinius.reference <- function(){
2 | data("small.example.RnBSet")
3 | Ks <- 2
4 | lambdas <- c(0.01)
5 | res <- run.refbased(rnb.set = rnb.set.example,Ks = Ks,lambdas = lambdas)
6 | plot <- cluster.refbased(res,plot.type="dendrogram",K=2,lambda=0.01)
7 | passes <- is.null(plot)
8 | checkTrue(passes)
9 | }
10 |
11 | test.general <- function(){
12 | data("example.dataset")
13 | input.data <- D[sample(1:nrow(D),1000),sample(1:ncol(D),5)]
14 | cg_subsets <- list("var"=sample(1:nrow(D),250),"random"=sample(1:nrow(D),500))
15 | Ks <- 2
16 | lambdas <- c(0.01)
17 | res <- runMeDeCom(data = D,Ks = Ks,lambdas = lambdas,cg_subsets = cg_subsets)
18 | passes <- inherits(res,"MeDeComSet")
19 | checkTrue(passes)
20 | }
21 |
22 | test.contribution.interpretation <- function(){
23 | data("example.MeDeComSet")
24 | anno.frame <- data.frame(Sex=sample(c("M","F"),100,replace=T),Age=sample(1:100,100,replace=T),Ethnicity=sample(c("A","B","C"),100,replace = T))
25 | res <- run.trait.association.single(medecom.result,pheno.data=anno.frame)
26 | passes <- all(names(res) %in% c("linear model","qualitative","quantitative"))
27 | checkTrue(passes)
28 | }
29 |
30 | # test.enrichment <- function(){
31 | # require("RnBeads")
32 | # data("example.MeDeComSet")
33 | # anno.frame <- rnb.annotation2data.frame(rnb.get.annotation("probes450"))[sample(1:460000,10000),]
34 | # res <- lmc.lola.plots.tables(medecom.result,anno.data=anno.frame)
35 | # passes <- all(names(res) %in% c("Plots","Tables"))
36 | # res <- lmc.go.enrichment(medecom.result,anno.data=anno.frame)
37 | # passes <- passes && (class(res) == "list")
38 | # checkTrue(passes)
39 | # }
40 |
41 | test.routine <- function(){
42 | require("RUnit")
43 | require("MeDeCom")
44 | cat("STARTED testing general function \n")
45 | test.general()
46 | cat("COMPLETED testing general function \n")
47 | cat("STARTED testing contribution interpretation \n")
48 | test.contribution.interpretation()
49 | cat("COMPLETED testing contribution interpretation \n")
50 | # cat("STARTED testing enrichment functions \n")
51 | # test.enrichment()
52 | # cat("COMPLETED testing enrichment functions \n")
53 | }
54 |
55 | test.routine()
--------------------------------------------------------------------------------
/man/AVAIL.REFS.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/reference.R
3 | \docType{data}
4 | \name{AVAIL.REFS}
5 | \alias{AVAIL.REFS}
6 | \title{reference.R
7 | -------------------------------------------------------------------------------------------------------------------------------------
8 | This scripts contains functions for assocating MeDeCom with known reference cell type profiles
9 | GLOBALS}
10 | \format{
11 | An object of class \code{character} of length 2.
12 | }
13 | \usage{
14 | AVAIL.REFS
15 | }
16 | \description{
17 | reference.R
18 | -------------------------------------------------------------------------------------------------------------------------------------
19 | This scripts contains functions for assocating MeDeCom with known reference cell type profiles
20 | GLOBALS
21 | }
22 | \keyword{datasets}
23 |
--------------------------------------------------------------------------------
/man/MeDeCom-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/MeDeCom-package.R
3 | \docType{package}
4 | \name{MeDeCom-package}
5 | \alias{MeDeCom-package}
6 | \title{MeDeCom: Methylome DeComposition using regularized constrained matrix factorization}
7 | \description{
8 | MeDeCom is an R-package that discovers and quantifies latent components
9 | in the DNA methylomes of heterogeneous samples
10 | }
11 | \references{
12 | TBA
13 | }
14 |
--------------------------------------------------------------------------------
/man/MeDeComSet-class.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/MeDeComSet-class.R
3 | \docType{class}
4 | \name{MeDeComSet-class}
5 | \alias{MeDeComSet-class}
6 | \alias{MeDeComSet}
7 | \alias{initialize,MeDeComSet-method}
8 | \title{MeDeComSet Class}
9 | \usage{
10 | MeDeComSet(parameters, outputs, dataset_info = list())
11 | }
12 | \arguments{
13 | \item{parameters}{\code{list} of MeDeCom parameters with elements \code{K}, integer vector of k values, \code{lambdas}, numeric vector of lambda values.}
14 |
15 | \item{outputs}{\code{list} of MeDeCom resutls with one element per each used CpG subset.}
16 |
17 | \item{dataset_info}{\code{list} with information about the input data set.}
18 | }
19 | \value{
20 | an object of class MeDeComSet
21 | }
22 | \description{
23 | Stores the result of a methylation deconvolution experiment
24 |
25 | Wrapper function MeDeComSet
26 | }
27 | \section{Slots}{
28 |
29 | \describe{
30 | \item{\code{dataset_info}}{\code{list} with information about the input data set.}
31 | \item{\code{parameters}}{\code{list} containing parameters of the deconvolution experiment.}
32 | \item{\code{outputs}}{\code{list} of deconvolution products for each combination of MeDeCom parameters.}
33 | }
34 | }
35 |
36 | \section{Methods and functions}{
37 |
38 | \describe{
39 | \item{\code{\link[=getStatistics,MeDeComSet-method]{getStatistics}}}{Returns the value of one goodness-of-fit statistics for a given parameter combination.}
40 | \item{\code{\link[=getLMCs,MeDeComSet-method]{getLMCs}}}{Returns a matrix of LMCs for a given parameter combination.}
41 | \item{\code{\link[=getProportions,MeDeComSet-method]{getProportions}}}{Returns a matrix of mixing proportions for a given parameter combination.}
42 | \item{\code{\link{plotParameters}}}{Create a parameter selection plot.}
43 | \item{\code{\link{plotLMCs}}}{Visualize the LMCs.}
44 | \item{\code{\link{plotProportions}}}{Visualize the mixing proportions.}
45 | }
46 | }
47 |
48 | \author{
49 | Pavlo Lutsik
50 | }
51 |
--------------------------------------------------------------------------------
/man/as.MeDeComSet.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/MeDeComSet-class.R
3 | \name{as.MeDeComSet}
4 | \alias{as.MeDeComSet}
5 | \title{as.MeDeComSet}
6 | \usage{
7 | as.MeDeComSet(
8 | object,
9 | cg_subsets = 1,
10 | Ks = NULL,
11 | deviances = NULL,
12 | rss = NULL,
13 | m.orig = NULL,
14 | n.orig = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{object}{An object of class \code{RefFreeCellMix} containing cell type deconvolution information, or a list of such objects.}
19 |
20 | \item{cg_subsets}{The indeces of the CpG subsets used in the analysis.}
21 |
22 | \item{Ks}{The values of K used in the analysis. If NULL, K is determined by the size of the matrices.}
23 |
24 | \item{deviances}{Optional argument specifying the deviances as computed with \code{RefFreeCellMixArrayDevianceBoots}.}
25 |
26 | \item{rss}{Optional argument specifying the residual sum of sqaures}
27 |
28 | \item{m.orig}{The original number of rows (CpGs) in the methylation matrix.}
29 |
30 | \item{n.orig}{The original number of columns (samples) in the methylation matrix.}
31 | }
32 | \value{
33 | An object of type \code{MeDeComSet}
34 | }
35 | \description{
36 | Function to convert object of type RefFreeCellMix to MeDeComSet
37 | }
38 | \details{
39 | Since \code{RefFreeCellMix} only contains information on a single value for K, and does not contain any regularization
40 | (lambda), the corresponding parameters in the MeDeComSet are set to single numeric values. Furthermore, no information
41 | on goodness of fit (CVE, Fval) can be stored. If \code{cg_subsets} is not of length 1, an object containing multiple
42 | subsets is creared.
43 | }
44 |
--------------------------------------------------------------------------------
/man/cluster.refbased.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/reference.R
3 | \name{cluster.refbased}
4 | \alias{cluster.refbased}
5 | \title{cluster.refbased}
6 | \usage{
7 | cluster.refbased(
8 | ref.run,
9 | K,
10 | lambda,
11 | cg_subset = 1,
12 | plot.type = "dendrogram",
13 | ...
14 | )
15 | }
16 | \arguments{
17 | \item{ref.run}{A result of \code{\link{run.refbased}} with the results of \code{\link{runMeDeCom}} and the reference data matrix}
18 |
19 | \item{K}{Selected number of components}
20 |
21 | \item{lambda}{Selected regularization parameter}
22 |
23 | \item{cg_subset}{Numeric vector representing the CpG sites selected for analysis from the orignial MeDeComSet}
24 |
25 | \item{plot.type}{Type of plot to be created, is passed to \code{\link{plotLMCs}}}
26 |
27 | \item{...}{Further arguments passed to plotLMCs}
28 | }
29 | \value{
30 | A plot object displaying the clustering
31 | }
32 | \description{
33 | This routine performs general hierachical clustering as in the \code{\link{plotLMCs}} and adds reference methylome profiles to this
34 | clustering.
35 | }
36 | \author{
37 | Michael Scherer
38 | }
39 |
--------------------------------------------------------------------------------
/man/example.MeDeComSet.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/MeDeCom-package.R
3 | \docType{data}
4 | \name{example.MeDeComSet}
5 | \alias{example.MeDeComSet}
6 | \title{Example result object (MeDeComSet)}
7 | \format{
8 | \code{MeDeComSet}, the result of a deconvolution experiment and input to any follow-up analysis or to retrieve latent
9 | methylation components (LMCs)
10 | }
11 | \description{
12 | An example result of applying \pkg{MeDeCom} for a deconvolution experiment.
13 | }
14 | \author{
15 | Michael Scherer
16 | }
17 | \keyword{datasets}
18 |
--------------------------------------------------------------------------------
/man/example.cg.annotation.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/MeDeCom-package.R
3 | \docType{data}
4 | \name{example.cg.annotation}
5 | \alias{example.cg.annotation}
6 | \title{Example CG annotation}
7 | \format{
8 | \code{cg.ann} is a \code{data.frame} containing identifiers and postitions of CpG sites to be analyzed
9 | }
10 | \description{
11 | An data frame containing CpGs with their corresponding annotation in the genome to be used for the analysis. You can provide a similar
12 | \code{data.frame} for your own analysis.
13 | }
14 | \author{
15 | Michael Scherer
16 | }
17 | \keyword{datasets}
18 |
--------------------------------------------------------------------------------
/man/example.dataset.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/MeDeCom-package.R
3 | \docType{data}
4 | \name{example.dataset}
5 | \alias{example.dataset}
6 | \title{Example dataset}
7 | \usage{
8 | data(example.dataset)
9 | }
10 | \description{
11 | Contains a data set of methylation values as a \code{matrix} with 10,000 rows (CpGs) and 100 columns (samples), a reference of
12 | contributions of 5 cell types in the samples, and the reference methylomes of the samples as a matrix with 10,000 rows and 5
13 | columns.
14 |
15 | \itemize{
16 | \item D, a \code{matrix} with 10,000 rows (CpGs) and 100 columns (samples) representing a potential input methylation
17 | matrix
18 | \item Aref, a reference of contributions of 5 cell types in the samples
19 | \item Tref, reference methylomes of the samples as a matrix with 10,000 rows and 5 columns
20 | }
21 | }
22 | \author{
23 | Michael Scherer
24 | }
25 | \keyword{datasets}
26 |
--------------------------------------------------------------------------------
/man/factorize.alternate.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/factorizations.R
3 | \name{factorize.alternate}
4 | \alias{factorize.alternate}
5 | \title{factorize.alternate}
6 | \usage{
7 | factorize.alternate(
8 | D,
9 | k,
10 | method = "MeDeCom.quadPen",
11 | t.method = "quadPen",
12 | Tfix = NULL,
13 | Tpartial = NULL,
14 | Tpartial.rows = NULL,
15 | Apartial = NULL,
16 | Apartial.cols = NULL,
17 | V = NULL,
18 | lambda = 0,
19 | init = "random",
20 | opt = 5,
21 | emp.dim = 500,
22 | emp.resample = TRUE,
23 | emp.vsf = 1,
24 | emp.borders = c(0, 1),
25 | qp.rangeT = c(0, 1),
26 | qp.Alower = NULL,
27 | qp.Aupper = NULL,
28 | itermax = 100,
29 | trace = FALSE,
30 | eps = 1e-08,
31 | ncores = 1,
32 | pheno = NULL,
33 | na.values = FALSE,
34 | seed = NULL,
35 | verbosity = 0L
36 | )
37 | }
38 | \arguments{
39 | \item{D}{m by n input matrix with mixture data}
40 |
41 | \item{k}{number of latent components, \code{integer}}
42 |
43 | \item{method}{optimization method used. Currently supported values are
44 | \code{"MeDeCom.quadPen"} and \code{"MeDeCom.cppTAfact"}.}
45 |
46 | \item{t.method}{method for updating the latent component matrix, one of
47 | \code{"integer", "empirical", "Hlasso"} or \code{"quadPen"}}
48 |
49 | \item{Tfix}{an optional matrix of a priori known fixed components}
50 |
51 | \item{V}{for \code{t.method} "integer" a small vector of possible values;
52 | for \code{t.method} "empirical" a vector giving empirical distribution
53 | of T values}
54 |
55 | \item{init}{type of initialization, either "random" (default) or "fixed".}
56 |
57 | \item{opt}{if \code{init} is "random"
58 | number of runs with independent initialization,
59 | if \code{init} is "fixed"
60 | starting values for T and A (see details)}
61 |
62 | \item{emp.dim}{for \code{t.method} "empirical",
63 | number of randomly drawn samples for T row selection}
64 |
65 | \item{emp.resample}{for \code{t.method} "empirical",
66 | a flag indicating whether resampling should
67 | be done at each iteration}
68 |
69 | \item{itermax}{maximal number of iterations}
70 |
71 | \item{trace}{a flag indicating whether to return the
72 | factorization results for each iteration}
73 |
74 | \item{eps}{threshold for objective value change}
75 |
76 | \item{ncores}{number of CPU cores used for parallelization}
77 |
78 | \item{pheno}{a list with phenotypic information}
79 |
80 | \item{verbosity}{flag specifying whether to show diagnostic
81 | statements during the execution}
82 | }
83 | \value{
84 | a \code{list} with the following elements:
85 | \describe{
86 | \item{\code{T}}{matrix of latent components}
87 | \item{\code{A}}{matrix of mixture proportions}
88 | \item{\code{Fval}}{the final value of the objective function}
89 | \item{\code{Conv}}{sequence of objective function values
90 | attained after each iteration}
91 | \item{\code{rmse}}{RMSE of the factorization}
92 | }
93 | }
94 | \description{
95 | Matrix factorization algorithms based on the alternating optimization scheme
96 | }
97 | \details{
98 | In case \code{init} is "fixed" the starting values
99 | for the m by k matrix of latent components
100 | and for the k by n matrix of mixing proportions
101 | should be specified as T and A elements of a list
102 | supplied as \code{opt}
103 | }
104 | \author{
105 | Martin Slawski
106 |
107 | R port by Pavlo Lutsik
108 | }
109 |
--------------------------------------------------------------------------------
/man/factorize.regr.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/factorizations.R
3 | \name{factorize.regr}
4 | \alias{factorize.regr}
5 | \title{factorize.regr}
6 | \usage{
7 | factorize.regr(D, Tt, A0 = NULL, precision = 1e-08)
8 | }
9 | \arguments{
10 | \item{D}{m by n matrix with mixture data}
11 |
12 | \item{Tt}{either a m by k matrix of k true latent components
13 | or a list of n such matrices, one per each column of D}
14 |
15 | \item{A0}{initialization for the mixture proportions matrix}
16 |
17 | \item{precision}{numerical tolerance of the optimization algorithm}
18 | }
19 | \value{
20 | a \code{list} with elements:
21 | \describe{
22 | \item{\code{A}}{matrix of mixing proportions}
23 | \item{\code{T}}{Tt used}
24 | \item{\code{rmse}}{RMSE of the regression model}
25 | }
26 | }
27 | \description{
28 | Get mixing proportions from the target data matrix and a matrix of latent factors
29 | }
30 |
--------------------------------------------------------------------------------
/man/getLMCs-methods.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/MeDeComSet-class.R
3 | \docType{methods}
4 | \name{getLMCs,MeDeComSet-method}
5 | \alias{getLMCs,MeDeComSet-method}
6 | \alias{getLMCs}
7 | \title{getLMCs-methods}
8 | \usage{
9 | \S4method{getLMCs}{MeDeComSet}(
10 | object,
11 | K = object@parameters$Ks[1],
12 | lambda = object@parameters$lambdas[1],
13 | cg_subset = 1,
14 | statistic = "cve"
15 | )
16 | }
17 | \arguments{
18 | \item{object}{object returned by \link{runMeDeCom}}
19 |
20 | \item{K}{number of LMCs}
21 |
22 | \item{lambda}{regularlization parameter}
23 |
24 | \item{cg_subset}{used CpG subset, defaults to the full data set}
25 |
26 | \item{statistic}{statistic to be used in returning}
27 | }
28 | \description{
29 | Return a matrix of LMCs
30 | }
31 | \examples{
32 | \donttest{
33 | data(example.data)
34 | getLMCs(example_MeDeComSet, K=2, lambda=0.001)
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/man/getProportions-methods.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/MeDeComSet-class.R
3 | \docType{methods}
4 | \name{getProportions,MeDeComSet-method}
5 | \alias{getProportions,MeDeComSet-method}
6 | \alias{getProportions}
7 | \title{getProportions-methods}
8 | \usage{
9 | \S4method{getProportions}{MeDeComSet}(
10 | object,
11 | K = object@parameters$Ks[1],
12 | lambda = object@parameters$lambdas[1],
13 | cg_subset = 1,
14 | statistic = "cve"
15 | )
16 | }
17 | \arguments{
18 | \item{object}{object returned by \link{runMeDeCom}}
19 |
20 | \item{K}{number of LMCs}
21 |
22 | \item{lambda}{regularlization parameter}
23 |
24 | \item{cg_subset}{used CpG subset, defaults to the full data set}
25 |
26 | \item{statistic}{statistic to be used in returning}
27 | }
28 | \description{
29 | Return a matrix of LMCs
30 | }
31 | \examples{
32 | \donttest{
33 | data(example.data)
34 | getProportions(example_MeDeComSet, K=2, lambda=0.001)
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/man/getStatistics-methods.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/MeDeComSet-class.R
3 | \docType{methods}
4 | \name{getStatistics,MeDeComSet-method}
5 | \alias{getStatistics,MeDeComSet-method}
6 | \alias{getStatistics}
7 | \title{getStatistics-methods}
8 | \usage{
9 | \S4method{getStatistics}{MeDeComSet}(
10 | object,
11 | Ks = object@parameters$Ks,
12 | lambdas = object@parameters$lambdas,
13 | cg_subset = 1,
14 | statistic = "cve"
15 | )
16 | }
17 | \arguments{
18 | \item{object}{object returned by \link{runMeDeCom}}
19 |
20 | \item{Ks}{numbers of LMCs}
21 |
22 | \item{lambdas}{regularlization parameters}
23 |
24 | \item{cg_subset}{used CpG subset, defaults to the full data set}
25 |
26 | \item{statistic}{\code{character} of length 1 specifying goodness of fit statistics}
27 | }
28 | \value{
29 | A numeric \code{matrix} or \code{vector} with the requested statistics
30 | }
31 | \description{
32 | Methylation sites object information for which is present in the \code{RnBSet} object.
33 | }
34 | \details{
35 | Currently the following values for \code{statistics} can be supplied: \code{objective}, \code{RMSE}, \code{CVE}.
36 | }
37 | \examples{
38 | \donttest{
39 | data(example.data)
40 | getStatistics(example_MeDeComSet, K=2, lambda=0.001)
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/man/greedymatch.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/matching.R
3 | \name{greedymatch}
4 | \alias{greedymatch}
5 | \title{greedymatch}
6 | \usage{
7 | greedymatch(Tstar, That, Ahat)
8 | }
9 | \description{
10 | Matching of latent components
11 | }
12 | \details{
13 | suppose Tstar contains K* topics (columns)
14 | 1. select the K* most popular topics of That
15 | 2. use a greedy method to match them to those of Tstar (w.r.t L2 norm)
16 | and correspondingly permute rows of Ahat
17 | }
18 | \author{
19 | Martin Slawski
20 |
21 | R port by Pavlo Lutsik
22 | }
23 |
--------------------------------------------------------------------------------
/man/lmc.annotation.enrichment.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/LMC_interpretation.R
3 | \name{lmc.annotation.enrichment}
4 | \alias{lmc.annotation.enrichment}
5 | \title{lmc.annotation.enrichment}
6 | \usage{
7 | lmc.annotation.enrichment(
8 | medecom.result,
9 | annotation.filter = NULL,
10 | anno.data,
11 | K = NULL,
12 | lambda = NULL,
13 | cg_subset = NULL,
14 | diff.threshold = 0.5,
15 | reference.computation = "median",
16 | comp.lmcs = NULL,
17 | type = "hypo",
18 | assembly = "hg19"
19 | )
20 | }
21 | \arguments{
22 | \item{medecom.result}{An object of type \code{\link{MeDeComSet-class}} or the location of an .RData file, where
23 | such an object is stored.}
24 |
25 | \item{annotation.filter}{A numeric vector specifying the sites that have been removed from \code{rnb.set} in a
26 | preprocessing step (e.g. coverage filtering) or a path to an .RData file.}
27 |
28 | \item{anno.data}{The original \code{\link[RnBeads]{RnBSet-class}} object containing methylation, sample meta and annotation
29 | information or a path to a directory stored by \code{\link[RnBeads]{save.rnb.set}} or a data.frame containing
30 | CpG annotations (ann_C)}
31 |
32 | \item{K}{The number of LMCs specified for the MeDeCom run.}
33 |
34 | \item{lambda}{The lambda parameter selected.}
35 |
36 | \item{cg_subset}{The index of the selection strategy employed (e.g. most variable CpGs).}
37 |
38 | \item{diff.threshold}{The difference cutoff between median methylation in the remaining LMCs and the LMC of interest
39 | used to call a CpG differentially methylated. The higher this value, the more conservative the
40 | selection.}
41 |
42 | \item{reference.computation}{Metric used to set the reference on the remaining LMCs to determine hyper- and hypomethylated sites.
43 | Can be either \code{"median"} (default), \code{"mean"}, or \code{"lmcs"} (\code{comp.lmcs} argument needs to be provided).}
44 |
45 | \item{comp.lmcs}{Numeric vector containing two numbers representing the LMCs that should be compared to one another.}
46 |
47 | \item{type}{Which direction is to be tested for enrichment. Can be one of "hypo", "hyper", or "differential"}
48 |
49 | \item{assembly}{The assembly used. Needs to be one of "hg19", "hg38" or "mm10". Does not need to be specified, if rnb.set is a
50 | \code{\link{RnBSet-class}}}
51 | }
52 | \value{
53 | A data frame with four columns: \describe{
54 | \item{LMC}{The LMC analyzed}
55 | \item{annotation}{The annotation used. Can either be \code{chrXY} for enrichments on different chromosmes or different functional categories in the Ensembl regulatory build}
56 | \item{p.value}{The p-value computing using Fisher's exact test for enrichment}
57 | \item{OR}{The odds ratio for enrichment}
58 | }
59 | }
60 | \description{
61 | This function performs enrichment analysis for various genomic locations including chromosomes,
62 | and different function categories defined by the Ensembl regulatory build
63 | }
64 | \author{
65 | Michael Scherer
66 | }
67 |
--------------------------------------------------------------------------------
/man/lmc.annotation.plots.tables.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/LMC_interpretation.R
3 | \name{lmc.annotation.plots.tables}
4 | \alias{lmc.annotation.plots.tables}
5 | \title{lmc.annotation.plots.tables}
6 | \usage{
7 | lmc.annotation.plots.tables(medecom.result, anno.data, ...)
8 | }
9 | \arguments{
10 | \item{medecom.result}{An object of type \code{\link{MeDeComSet-class}} or the location of an .RData file, where
11 | such an object is stored.}
12 |
13 | \item{anno.data}{The original \code{\link[RnBeads]{RnBSet-class}} object containing methylation, sample meta and annotation
14 | information, a path to a directory stored by \code{\link[RnBeads]{save.rnb.set}} or a data.frame containing
15 | CpG annotations (ann_C)}
16 |
17 | \item{...}{Further arguments passed to \code{lmc.annotation.enrichment}}
18 | }
19 | \value{
20 | A list with two elements, one of them containing the plots for each LMC and the other for the corresponding annotation
21 | enrichment tables
22 | }
23 | \description{
24 | This functions calls \link{lmc.annotation.enrichment} and returns plots representing those results, as well as the tables with annotation
25 | enrichment results.
26 | }
27 | \seealso{
28 | lmc.annotation.enrichment
29 | }
30 | \author{
31 | Michael Scherer
32 | }
33 |
--------------------------------------------------------------------------------
/man/lmc.go.enrichment.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/LMC_interpretation.R
3 | \name{lmc.go.enrichment}
4 | \alias{lmc.go.enrichment}
5 | \title{lmc.go.enrichment}
6 | \usage{
7 | lmc.go.enrichment(
8 | medecom.result,
9 | annotation.filter = NULL,
10 | anno.data,
11 | K = NULL,
12 | lambda = NULL,
13 | cg_subset = NULL,
14 | diff.threshold = 0.5,
15 | reference.computation = "median",
16 | comp.lmcs = NULL,
17 | region.type = "genes",
18 | temp.dir = tempdir(),
19 | type = "hypo",
20 | assembly = "hg19"
21 | )
22 | }
23 | \arguments{
24 | \item{medecom.result}{An object of type \code{\link{MeDeComSet-class}} or the location of an .RData file, where
25 | such an object is stored.}
26 |
27 | \item{annotation.filter}{A numeric vector specifying the sites that have been removed from \code{rnb.set} in a
28 | preprocessing step (e.g. coverage filtering) or a path to an .RData file.}
29 |
30 | \item{anno.data}{The original \code{\link[RnBeads]{RnBSet-class}} object containing methylation, sample meta and annotation
31 | information or a path to a directory stored by \code{\link[RnBeads]{save.rnb.set}} or a data.frame containing
32 | CpG annotations (ann_C)}
33 |
34 | \item{K}{The number of LMCs specified for the MeDeCom run.}
35 |
36 | \item{lambda}{The lambda parameter selected.}
37 |
38 | \item{cg_subset}{The index of the selection strategy employed (e.g. most variable CpGs).}
39 |
40 | \item{diff.threshold}{The difference cutoff between median methylation in the remaining LMCs and the LMC of interest
41 | used to call a CpG differentially methylated. The higher this value, the more conservative the
42 | selection.}
43 |
44 | \item{reference.computation}{Metric used to set the reference on the remaining LMCs to determine hyper- and hypomethylated sites.
45 | Can be either \code{"median"} (default), \code{"mean"}, or \code{"lmcs"} (\code{comp.lmcs} argument needs to be provided).}
46 |
47 | \item{comp.lmcs}{Numeric vector containing two numbers representing the LMCs that should be compared to one another.}
48 |
49 | \item{region.type}{Region type used to annotate CpGs to potentially regulatory regions (see \url{https://rnbeads.org/regions.html})
50 | for a list of available region types. Here, only "genes" "promoters" and their gencode versions
51 | are available.}
52 |
53 | \item{temp.dir}{Path to a directory used to store temporary files.}
54 |
55 | \item{type}{Which direction is to be tested for enrichment. Can be one of "hypo", "hyper", or "differential"}
56 |
57 | \item{assembly}{The assembly used. Needs to be one of "hg19", "hg38" or "mm10". Does not need to be specified, if rnb.set is a
58 | \code{\link{RnBSet-class}}}
59 | }
60 | \value{
61 | A list with K elements. One element is the enrichment result of the corresponding LMC-specific hypomethylated CpG sites.
62 | }
63 | \description{
64 | This routine computes GO enrichment results for LMC-specifically hypo- or hypermethylated sites.
65 | }
66 | \details{
67 | This function employs GO enrichment analysis with the GOstats package on the CpG sites that are LMC-specifically
68 | hypomethylated, after annotating the sites to the closest promotor/gene defined by \code{region.type}.
69 | The sites are selected by computing the median methylation value of the other LMCs and then selecting
70 | those sites that are more than \code{diff.threshold} away from the median in the LMC of interest.
71 | This is done for all LMCs from 1 to K.
72 | }
73 | \author{
74 | Michael Scherer
75 | }
76 |
--------------------------------------------------------------------------------
/man/lmc.go.plots.tables.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/LMC_interpretation.R
3 | \name{lmc.go.plots.tables}
4 | \alias{lmc.go.plots.tables}
5 | \title{lmc.go.plots.tables}
6 | \usage{
7 | lmc.go.plots.tables(medecom.result, anno.data, ...)
8 | }
9 | \arguments{
10 | \item{medecom.result}{An object of type \code{\link{MeDeComSet-class}} or the location of an .RData file, where
11 | such an object is stored.}
12 |
13 | \item{anno.data}{The original \code{\link[RnBeads]{RnBSet-class}} object containing methylation, sample meta and annotation
14 | information, a path to a directory stored by \code{\link[RnBeads]{save.rnb.set}} or a data.frame containing
15 | CpG annotations (ann_C)}
16 |
17 | \item{...}{Further arguments passed to \code{lmc.go.enrichment}}
18 | }
19 | \value{
20 | A list with two elements, one of them containing the plots for each LMC and the other for the corresponding GO
21 | enrichment tables
22 | }
23 | \description{
24 | This functions calls \link{lmc.go.enrichment} and returns plots representing those results, as well as the tables with GO
25 | enrichment results.
26 | }
27 | \seealso{
28 | lmc.go.enrichment
29 | }
30 | \author{
31 | Michael Scherer
32 | }
33 |
--------------------------------------------------------------------------------
/man/lmc.lola.enrichment.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/LMC_interpretation.R
3 | \name{lmc.lola.enrichment}
4 | \alias{lmc.lola.enrichment}
5 | \title{lmc.lola.enrichment}
6 | \usage{
7 | lmc.lola.enrichment(
8 | medecom.result,
9 | annotation.filter = NULL,
10 | anno.data,
11 | K = NULL,
12 | lambda = NULL,
13 | cg_subset = NULL,
14 | diff.threshold = 0.5,
15 | reference.computation = "median",
16 | comp.lmcs = NULL,
17 | region.type = "ensembleRegBuildBPall",
18 | temp.dir = tempdir(),
19 | type = "hypo",
20 | assembly = "hg19",
21 | lola.db = NULL
22 | )
23 | }
24 | \arguments{
25 | \item{medecom.result}{An object of type \code{\link{MeDeComSet-class}} or the location of an .RData file, where
26 | such an object is stored.}
27 |
28 | \item{annotation.filter}{A numeric vector specifying the sites that have been removed from \code{rnb.set} in a
29 | preprocessing step (e.g. coverage filtering) or a path to an .RData file.}
30 |
31 | \item{anno.data}{The original \code{\link[RnBeads]{RnBSet-class}} object containing methylation, sample meta and annotation
32 | information, a path to a directory stored by \code{\link[RnBeads]{save.rnb.set}} or a data.frame containing
33 | CpG annotations (ann_C)}
34 |
35 | \item{K}{The number of LMCs specified for the MeDeCom run.}
36 |
37 | \item{lambda}{The lambda parameter selected.}
38 |
39 | \item{cg_subset}{The index of the selection strategy employed (e.g. most variable CpGs).}
40 |
41 | \item{diff.threshold}{The difference cutoff between median methylation in the remaining LMCs and the LMC of interest
42 | used to call a CpG differentially methylated. The higher this value, the more conservative the
43 | selection.}
44 |
45 | \item{reference.computation}{Metric used to set the reference on the remaining LMCs to determine hyper- and hypomethylated sites.
46 | Can be either \code{"median"} (default), \code{"mean"}, or \code{"lmcs"} (\code{comp.lmcs} argument needs to be provided).}
47 |
48 | \item{comp.lmcs}{Numeric vector containing two numbers representing the LMCs that should be compared to one another.}
49 |
50 | \item{region.type}{Region type used to annotate CpGs to potentially regulatory regions (see \url{https://rnbeads.org/regions.html})
51 | for a list of available region types.}
52 |
53 | \item{temp.dir}{Path to a directory used to store temporary files.}
54 |
55 | \item{type}{Which direction is to be tested for enrichment. Can be one of "hypo", "hyper", or "differential"}
56 |
57 | \item{assembly}{The assembly used. Needs to be one of "hg19", "hg38" or "mm10". Does not need to be specified, if rnb.set is a
58 | \code{\link{RnBSet-class}}}
59 |
60 | \item{lola.db}{A loaded LOLA database as loaded with LOLA::loadRegionDB. If this value is NULL, the database is loaded
61 | automatically and stored in the temporary directory.}
62 | }
63 | \value{
64 | A list with K elements. One element is the enrichment result of the corresponding LMC-specific hypomethylated CpG sites.
65 | }
66 | \description{
67 | This routine computes LOLA enrichment results for LMC-specifically hypo- or hypermethylated sites.
68 | }
69 | \details{
70 | This function employs LOLA on the CpG sites that are LMC-specifically hypomethylated, after annotating
71 | the sites to the closest region defined by \code{region.type}. The sites are selected by computing
72 | the median methylation value of the other LMCs and then selecting those sites that are more than
73 | \code{diff.threshold} away from the median in the LMC of interest. This is done for all LMCs from
74 | 1 to K.
75 | }
76 | \seealso{
77 | lmc.lola.plot.tables
78 | }
79 | \author{
80 | Michael Scherer
81 | }
82 |
--------------------------------------------------------------------------------
/man/lmc.lola.plots.tables.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/LMC_interpretation.R
3 | \name{lmc.lola.plots.tables}
4 | \alias{lmc.lola.plots.tables}
5 | \title{lmc.lola.plots.tables}
6 | \usage{
7 | lmc.lola.plots.tables(
8 | medecom.result,
9 | annotation.filter = NULL,
10 | anno.data,
11 | K = NULL,
12 | lambda = NULL,
13 | cg_subset = NULL,
14 | diff.threshold = 0.5,
15 | region.type = "ensembleRegBuildBPall",
16 | temp.dir = tempdir(),
17 | type = "hypo",
18 | assembly = "hg19",
19 | lola.db = NULL,
20 | ...
21 | )
22 | }
23 | \arguments{
24 | \item{medecom.result}{An object of type \code{\link{MeDeComSet-class}} or the location of an .RData file, where
25 | such an object is stored.}
26 |
27 | \item{annotation.filter}{A numeric vector specifying the sites that have been removed from \code{rnb.set} in a
28 | preprocessing step (e.g. coverage filtering) or a path to an .RData file.}
29 |
30 | \item{anno.data}{The original \code{\link[RnBeads]{RnBSet-class}} object containing methylation, sample meta and annotation
31 | information, a path to a directory stored by \code{\link[RnBeads]{save.rnb.set}} or a data.frame containing
32 | CpG annotations (ann_C)}
33 |
34 | \item{K}{The number of LMCs specified for the MeDeCom run.}
35 |
36 | \item{lambda}{The lambda parameter selected.}
37 |
38 | \item{cg_subset}{The index of the selection strategy employed (e.g. most variable CpGs).}
39 |
40 | \item{diff.threshold}{The difference cutoff between median methylation in the remaining LMCs and the LMC of interest
41 | used to call a CpG differentially methylated. The higher this value, the more conservative the
42 | selection.}
43 |
44 | \item{region.type}{Region type used to annotate CpGs to potentially regulatory regions (see \url{https://rnbeads.org/regions.html})
45 | for a list of available region types.}
46 |
47 | \item{temp.dir}{Path to a directory used to store temporary files.}
48 |
49 | \item{type}{Which direction is to be tested for enrichment. Can be one of "hypo", "hyper", or "differential"}
50 |
51 | \item{assembly}{The assembly used. Needs to be one of "hg19", "hg38" or "mm10". Does not need to be specified, if rnb.set is a
52 | \code{\link{RnBSet-class}}}
53 |
54 | \item{lola.db}{A loaded LOLA database as loaded with LOLA::loadRegionDB. If this value is NULL, the database is loaded
55 | automatically and stored in the temporary directory.}
56 |
57 | \item{...}{Further arguments passed to \code{lmc.lola.enrichment}}
58 | }
59 | \value{
60 | A list with two elements, one of them containing the plots for each LMC and the other for the corresponding LOLA
61 | enrichment tables
62 | }
63 | \description{
64 | This functions calls \link{lmc.lola.enrichment} and returns plots representing those results, as well as the tables with LOLA
65 | enrichment results.
66 | }
67 | \seealso{
68 | lmc.lola.enrichment
69 | }
70 | \author{
71 | Michael Scherer
72 | }
73 |
--------------------------------------------------------------------------------
/man/load.lola.for.medecom.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/LMC_interpretation.R
3 | \name{load.lola.for.medecom}
4 | \alias{load.lola.for.medecom}
5 | \title{load.lola.for.medecom}
6 | \usage{
7 | load.lola.for.medecom(dir.path = tempdir(), assembly = "hg19")
8 | }
9 | \arguments{
10 | \item{dir.path}{A path to a directory, where the LOLA database is to be downloaded. Defaults to the temporary directory.}
11 |
12 | \item{assembly}{The assembly to be used.}
13 | }
14 | \value{
15 | The loaded LOLA database
16 | }
17 | \description{
18 | This functions downloads and loads the LOLA database in the specified directory. Should only be called once per session to save time.
19 | }
20 | \author{
21 | Michael Scherer
22 | }
23 |
--------------------------------------------------------------------------------
/man/locus_plot.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plotting.R
3 | \name{locus_plot}
4 | \alias{locus_plot}
5 | \title{locus_plot}
6 | \usage{
7 | locus_plot(
8 | That,
9 | ann,
10 | cgs,
11 | locus.chr,
12 | locus.start,
13 | locus.end,
14 | locus.name = sprintf("\%s:\%d-\%d", locus.chr, locus.start, locus.end),
15 | locus.forward = FALSE,
16 | flank.start = 1000,
17 | flank.end = 1000,
18 | comp.cols = rainbow(ncol(That)),
19 | legend.pos = "topleft",
20 | Tstar = NULL,
21 | D = NULL,
22 | plot.genes = FALSE,
23 | ann.genes = NULL
24 | )
25 | }
26 | \arguments{
27 | \item{That}{a matrix of LMCs}
28 |
29 | \item{ann}{CpG annotation}
30 |
31 | \item{cgs}{indices of CpGs data for which are present in That with respect to ann}
32 |
33 | \item{locus.chr}{chromosome}
34 |
35 | \item{locus.start}{start coordinate}
36 |
37 | \item{locus.end}{end coordinate}
38 |
39 | \item{flank.start}{number of basepairs to extend the locus upstream}
40 |
41 | \item{flank.end}{number of basepairs to extend the locus downstream}
42 |
43 | \item{comp.cols}{color code for LMCs}
44 |
45 | \item{legend.pos}{location of the legend, in accordance with \link{legend}}
46 |
47 | \item{Tstar}{matrix of reference profiles}
48 |
49 | \item{D}{matrix of input methylation data used to produce That}
50 |
51 | \item{plot.genes}{if \code{TRUE} a track with gene locations will be plotted}
52 |
53 | \item{ann.genes}{gene annotation necessary for the gene plotting
54 |
55 | @author Pavlo Lutsik, with modifications by Michael Scherer
56 | @export}
57 | }
58 | \description{
59 | Plot results for a selected locus
60 | }
61 |
--------------------------------------------------------------------------------
/man/matchLMCs.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/matching.R
3 | \name{matchLMCs}
4 | \alias{matchLMCs}
5 | \title{match.components}
6 | \usage{
7 | matchLMCs(That, Tref, method = "corrmatch", check = TRUE, Ahat = NULL)
8 | }
9 | \arguments{
10 | \item{That}{recovered components from factorization}
11 |
12 | \item{Tref}{reference components}
13 |
14 | \item{method}{one of "corrmatch" and "greedymatch"}
15 |
16 | \item{check}{for method "corrmatch": a flag specifying whether to check uniquenes of the match}
17 |
18 | \item{Ahat}{for method "greedymatch": recovered mixing proportions}
19 | }
20 | \value{
21 | a vector of indices. The length and the order of the vector corresponds to the columns of \code{TT}
22 | and the indices specify the columns of Tref
23 | }
24 | \description{
25 | Matching the recovered components to the reference
26 | }
27 | \details{
28 | Wrapper function for component matching methods
29 | }
30 |
--------------------------------------------------------------------------------
/man/plotLMC.reference.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/reference.R
3 | \name{plotLMC.reference}
4 | \alias{plotLMC.reference}
5 | \title{plotLMC.reference}
6 | \usage{
7 | plotLMC.reference(
8 | medecom.set,
9 | ref.meth,
10 | ann.md,
11 | cg_subset = medecom.set@parameters$cg_subsets[1],
12 | K = medecom.set@parameters$Ks[1],
13 | lambda = medecom.set@parameters$lambdas[1],
14 | ann.ref = NULL,
15 | type = "dendrogram",
16 | chrom.col = "Chromosome",
17 | start.col = "Start",
18 | end.col = "End",
19 | ct.color.column = "cell_type"
20 | )
21 | }
22 | \arguments{
23 | \item{medecom.set}{An object of type MeDeComSet}
24 |
25 | \item{ref.meth}{Reference methylome in the form of a matrix of a RnBSet}
26 |
27 | \item{ann.md}{Genomic annotation for the CpG sites present in medecom.set}
28 |
29 | \item{cg_subset}{The cg_subset of interest}
30 |
31 | \item{K}{K value}
32 |
33 | \item{lambda}{lambda value}
34 |
35 | \item{ann.ref}{Genomic annotation for the CpG sites in the referenc methylome. Can be omitted, if ref.meth is an RnBSet object}
36 |
37 | \item{type}{Plot type, see \code{\link{plotLMCs}}}
38 |
39 | \item{chrom.col}{The chromosome column name in ann.md and ann.ref}
40 |
41 | \item{start.col}{The start column name in ann.md and ann.ref}
42 |
43 | \item{end.col}{The end column name in ann.md and ann.ref}
44 |
45 | \item{ct.color.column}{A column name in the phenotypic information of ref.meth to be shown in the clustering}
46 | }
47 | \description{
48 | This routine uses a reference methylome, together with an annotation, and calls plotLMCs for the matching postions
49 | }
50 | \author{
51 | Michael Scherer
52 | }
53 |
--------------------------------------------------------------------------------
/man/plotLMCs.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plotting.R
3 | \name{plotLMCs}
4 | \alias{plotLMCs}
5 | \title{plotLMCs}
6 | \usage{
7 | plotLMCs(
8 | MeDeComSet,
9 | type,
10 | K = NA,
11 | lambda = NA,
12 | cg_subset = 1,
13 | lmc = NA,
14 | Tref = NULL,
15 | distance = "correlation",
16 | center = FALSE,
17 | n.most.var = NA,
18 | D = NULL,
19 | sample.characteristic = NULL,
20 | scatter.matching = FALSE,
21 | scatter.smooth = TRUE,
22 | scatter.cg.feature = NULL,
23 | min.similarity = 0
24 | )
25 | }
26 | \arguments{
27 | \item{MeDeComSet}{an object with MeDeCom results}
28 |
29 | \item{type}{plot type, a \code{character} of length 1 (see Details)}
30 |
31 | \item{K}{value of parameter k to use}
32 |
33 | \item{lambda}{value of parameter lambda to use}
34 |
35 | \item{cg_subset}{which CpG subset to use}
36 |
37 | \item{lmc}{which LMC to use for visualization}
38 |
39 | \item{Tref}{a matrix with reference methylomes}
40 |
41 | \item{distance}{distance measure to use}
42 |
43 | \item{center}{if \code{TRUE} the LMC and reference methylome matrices will be row-centered}
44 |
45 | \item{n.most.var}{is not \code{NA} a respective number of CpGs with the highest standard deviation will be plotted}
46 |
47 | \item{D}{input data matrix used to derive the LMCs}
48 |
49 | \item{min.similarity}{minimal similarity between LMCs and (if available) reference profiles, used to select edges in \code{"similarity graph"}.
50 | Has only an influence if \code{type}=\code{"similarity graph"}.}
51 | }
52 | \description{
53 | A wrapper for various plotting methods for the visualization of LMCs
54 | }
55 | \details{
56 | Available plot types include:
57 | \describe{
58 | \item{\bold{\code{boxplot}}}{
59 | Boxplot describing the distributions of each of the LMCs and, if available, the reference methylomss.}
60 | \item{\bold{\code{dendrogram}}}{
61 | Dendrogram visualizing a joint hierarchical clustering of LMCs and, if available, the reference methylomes.}
62 | \item{\bold{\code{heatmap}}}{
63 | Heatmap visualizing a distance between LMCs and the reference methylomes.}
64 | \item{\bold{\code{mds}}}{
65 | Joint multidimensional scaling of LMCs and the reference methylomes.}
66 | \item{\bold{\code{scatterplot}}}{
67 | Multi-panel scatterplot of LMCs and reference methylomes.}
68 | \item{\bold{\code{extremality}}}{
69 | Barplot visualizing the value of the regularizer term for each LMC.}
70 | \item{\bold{\code{distance to center}}}{
71 | Barplot visualizing a distance to the data center for each LMC. Input data matrix used to derive the LMCs should be
72 | supplied as argument \code{D}.}
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/man/plotParameters.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plotting.R
3 | \name{plotParameters}
4 | \alias{plotParameters}
5 | \title{plotParameters}
6 | \usage{
7 | plotParameters(
8 | MeDeComSet,
9 | cg_subset = 1,
10 | Ks = integer(),
11 | lambdas = integer(),
12 | statistic = "cve",
13 | minLambda = 0,
14 | maxLambda = Inf,
15 | lambdaScale = "native",
16 | ...
17 | )
18 | }
19 | \arguments{
20 | \item{MeDeComSet}{MeDeCom object}
21 |
22 | \item{cg_subset}{integer index of the CpG subset (if no subsets were used)}
23 |
24 | \item{Ks}{values of parameter k to use (by default all k values available in the \code{MeDeComSet} are used)}
25 |
26 | \item{lambdas}{values of parameter lambda to use (by default all lambda values available in the \code{MeDeComSet} are used)}
27 |
28 | \item{statistic}{if multiple k values are supplied, the statistic which is plotted (defaults to cross-validation error)}
29 |
30 | \item{minLambda}{minimal lambda value}
31 |
32 | \item{maxLambda}{maximal lambda value}
33 |
34 | \item{lambdaScale}{character indicating if native scale or logarithmic scale should be employed for plotting lambda}
35 |
36 | \item{...}{further paramters passed to \code{plot.lambda.selection} or \code{plot.K.selection}}
37 | }
38 | \description{
39 | Parameter selection plots
40 | }
41 |
--------------------------------------------------------------------------------
/man/plotProportions.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/plotting.R
3 | \name{plotProportions}
4 | \alias{plotProportions}
5 | \title{plotProportions}
6 | \usage{
7 | plotProportions(
8 | MeDeComSet,
9 | type,
10 | K,
11 | lambda,
12 | cg_subset = 1,
13 | lmc = NA,
14 | Aref = NULL,
15 | ref.profile = NA,
16 | sample.characteristic = NULL,
17 | heatmap.clusterCols = FALSE,
18 | heatmap.clusterRows = FALSE,
19 | ...
20 | )
21 | }
22 | \arguments{
23 | \item{MeDeComSet}{an object with MeDeCom results}
24 |
25 | \item{type}{plot type, a \code{character} of length 1 (see Details)}
26 |
27 | \item{K}{value of parameter k to use}
28 |
29 | \item{lambda}{value of parameter lambda to use}
30 |
31 | \item{cg_subset}{which CpG subset to use}
32 |
33 | \item{lmc}{which LMC to use for visualization}
34 |
35 | \item{Aref}{a matrix with reference methylomes}
36 |
37 | \item{...}{Extra variables based to proportion.lineplot}
38 | }
39 | \description{
40 | A wrapper for various plotting methods for the visualization of mixing proportions
41 | }
42 | \details{
43 | Available plot types include:
44 | \describe{
45 | \item{\bold{\code{heatmap}}}{
46 | Lineplot of proportions recovered by MeDeCom and reference proportions.}
47 | \item{\bold{\code{barplot}}}{
48 | Stacked barplot of proportions recovered by MeDeCom.}
49 | \item{\bold{\code{lineplot}}}{
50 | Lineplot of proportions recovered by MeDeCom and, if available, reference proportions.}
51 | \item{\bold{\code{scatterplot}}}{
52 | Lineplot of proportions recovered by MeDeCom and reference proportions.}
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/man/run.refbased.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/reference.R
3 | \name{run.refbased}
4 | \alias{run.refbased}
5 | \title{FUNCTIONS
6 | run.refbased}
7 | \usage{
8 | run.refbased(
9 | rnb.set,
10 | Ks,
11 | lambdas,
12 | cg_subsets = NULL,
13 | opt.method = "MeDeCom.cppTAfact",
14 | temp.dir = NULL,
15 | ref.base = "reinius",
16 | most.var = NULL,
17 | NCORES = 1,
18 | cluster.settings = NULL,
19 | ref.set = NULL,
20 | id.col = NULL,
21 | save.restricted.sites = FALSE
22 | )
23 | }
24 | \arguments{
25 | \item{rnb.set}{An Object of type \code{RnBSet} from the \pkg{RnBeads} package containing methylation information on which
26 | the deconvolution is to be performed}
27 |
28 | \item{Ks}{Numeric vector containing the number of components to be computed by \pkg{MeDeCom}}
29 |
30 | \item{lambdas}{Numeric vector specifying the regularzation parameters to be explored}
31 |
32 | \item{cg_subsets}{List of numeric vectors specifying the rows that are to be used in the analysis.}
33 |
34 | \item{opt.method}{Optimization method to be employed. For further information see \code{\link{runMeDeCom}}}
35 |
36 | \item{temp.dir}{Optional temporary directory to store intermediate results}
37 |
38 | \item{ref.base}{Reference profile data base to be used. Supported are
39 | \itemize{
40 | \item \code{"reinius"} A blood cell type reference methylome data set from Reinius et.al. (Reference to be added)
41 | }
42 | \itemize{
43 | \item \code{"local"} The reference data set is provided by the user. If this option is selected, \code{ref.set}
44 | must not be empty.
45 | }}
46 |
47 | \item{most.var}{Number specifying the number of most variable to be selected from \code{rnb.set}}
48 |
49 | \item{NCORES}{Number of cores to be used for analysis.}
50 |
51 | \item{cluster.settings}{Setting for the environment of a high performance compute cluster. Passed to \code{\link{runMeDeCom}}}
52 |
53 | \item{ref.set}{A \code{RnBSet} object containing a reference data set to be used besides \code{rnb.set}. Is only compatible with
54 | \code{ref.base="local"}.}
55 |
56 | \item{id.col}{The name of the column in the sample annotation sheet of \code{ref.set} containing the reference cell type. Is only
57 | compatible with \code{ref.base="local"}.}
58 |
59 | \item{save.restricted.sites}{Flag indicating if \code{rnb.set} restricted to the \code{most.var} sites is to be saved in \code{temp.dir}
60 | for potential downstream analysis.}
61 | }
62 | \value{
63 | A list object containing two elements \itemize{
64 | \item \code{"MeDeComSet"} Results of applying MeDeCom with the setting above
65 | \item \code{"RefMeth"} A matrix containing reference profiles from the specified data set. The number of rows in
66 | this matrix has been reduced according to the most variable sites in \code{rnb.set}.
67 | }
68 | }
69 | \description{
70 | Function to link MeDeCom's output to reference methylation profiles. The function returns both the MeDeCom result and a methyalation
71 | matrix with reference profiles from the source specified.
72 | }
73 | \details{
74 | This function applied MeDeCom to the specified data set and only support \code{RnBSet} objects as inputs. The function
75 | internally manipulated the object by selecting the most variable sites according to \code{most.var}. This leads to a decrease in
76 | the number of rows in the reference profiles to this number.
77 |
78 | Please note that an active internet connection is required, since this routine downloads data through the world wide web.
79 | }
80 | \author{
81 | Michael Scherer
82 | }
83 |
--------------------------------------------------------------------------------
/man/run.trait.association.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/contribution_interpretation.R
3 | \name{run.trait.association}
4 | \alias{run.trait.association}
5 | \title{run.trait.association}
6 | \usage{
7 | run.trait.association(
8 | medecom.set,
9 | pheno.data,
10 | test.fun = t.test,
11 | plot.path = getwd(),
12 | figure.format = "pdf"
13 | )
14 | }
15 | \arguments{
16 | \item{medecom.set}{An object of type \code{\link{MeDeComSet}} as the result of \code{\link{runMeDeCom}} containing LMCs and their
17 | proportions in the samples. The Set can contain multiple runs for different values of K and lambda.}
18 |
19 | \item{pheno.data}{An object of type \code{\link[RnBeads]{RnBSet-class}} containing methylation data and metadata for the same samples for which
20 | \code{medecom.set} was computed or a data.frame of sample annotations (ann_S)}
21 |
22 | \item{test.fun}{Test statistic used to compute p-values of differences between LMC contributions in pairwise sample comparisons.
23 | Defaults to \code{t.test}.}
24 |
25 | \item{plot.path}{Path to store the p-value heatmaps.}
26 |
27 | \item{figure.format}{Character describing the format in which plots should be stored on disk. Either \code{"pdf"} or \code{"png"}.}
28 | }
29 | \description{
30 | Computes test statistics for all possible group assignments of samples defined in \code{medecom.set} and \code{rnb.set} and stores
31 | heatmaps of p-values on the given location for all CG Subsets, Ks and lambdas present in \code{medecom.set}
32 | }
33 | \details{
34 | This function creates a new folder names \code{pdfs} at the location given by \code{plot.path} and stores a heatmap for
35 | all possible Ks and lambdas defined in \code{medecom.set}. The p-values are produced by comparing the LMC contributions
36 | in all sample comparisons defined by \code{\link[RnBeads]{rnb.sample.groups}} on \code{rnb.set}. The employed test statistic for
37 | pariwise comparison can be specified by \code{test.fun}, for groups defining more than one group \code{\link{kruskal.test}}
38 | is employed. P-values lower than 0.01 are added to the heatmap.
39 | }
40 | \author{
41 | Michael Scherer
42 | }
43 |
--------------------------------------------------------------------------------
/man/run.trait.association.single.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/contribution_interpretation.R
3 | \name{run.trait.association.single}
4 | \alias{run.trait.association.single}
5 | \title{run.trait.association.single}
6 | \usage{
7 | run.trait.association.single(
8 | medecom.set,
9 | pheno.data,
10 | cg_subset = NULL,
11 | K = NULL,
12 | lambda = NULL,
13 | test.fun = t.test
14 | )
15 | }
16 | \arguments{
17 | \item{medecom.set}{An object of type \code{\link{MeDeComSet}} as the result of \code{\link{runMeDeCom}} containing LMCs and their
18 | proportions in the samples. The Set can contain multiple runs for different values of K and lambda.}
19 |
20 | \item{pheno.data}{An object of type \code{\link[RnBeads]{RnBSet-class}} containing methylation data and metadata for the same samples for which
21 | \code{medecom.set} was computed or a data.frame of sample annotations (ann_S)}
22 |
23 | \item{cg_subset}{The cg_subset of interest}
24 |
25 | \item{K}{The selected value for number of LMCs (K)}
26 |
27 | \item{lambda}{The selected value of the regularizer (lambda)}
28 |
29 | \item{test.fun}{Test statistic used to compute p-values of differences between LMC contributions in pairwise sample comparisons.
30 | Defaults to \code{t.test}.}
31 | }
32 | \description{
33 | Computes test statistics for all possible group assignments of samples defined in \code{medecom.set} and \code{rnb.set} and stores
34 | heatmaps of p-values on the given location only for a given CG Subset, K and lambda.
35 | }
36 | \details{
37 | Returns a list with two elements, each a heatmap as a ggplot object for the given \code{medecom.set}, \code{cg_subset} \code{K} and \code{lambda}.
38 | The elements correpond to p-values of correlation ("quantivative") and t-tests ("qualitative") traits.
39 | The p-values are produced by comparing the LMC contributions in all sample comparisons defined by \code{\link[RnBeads]{rnb.sample.groups}}
40 | on \code{rnb.set}. The employed test statistic for pariwise comparison can be specified by \code{test.fun}, for groups defining more than one group \code{\link{kruskal.test}}
41 | is employed. P-values lower than 0.01 are added to the heatmap.
42 | }
43 | \author{
44 | Michael Scherer
45 | }
46 |
--------------------------------------------------------------------------------
/man/runMeDeCom.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/frontend.R
3 | \name{runMeDeCom}
4 | \alias{runMeDeCom}
5 | \title{runMeDeCom}
6 | \usage{
7 | runMeDeCom(
8 | data,
9 | Ks,
10 | lambdas,
11 | opt.method = "MeDeCom.cppTAfact",
12 | cg_subsets = NULL,
13 | sample_subset = NULL,
14 | startT = NULL,
15 | startA = NULL,
16 | trueT = NULL,
17 | trueA = NULL,
18 | fixed_T_cols = NULL,
19 | NINIT = 100,
20 | ITERMAX = 1000,
21 | NFOLDS = 10,
22 | N_COMP_LAMBDA = 4,
23 | NCORES = 1,
24 | random.seed = NULL,
25 | num.tol = 1e-08,
26 | analysis.name = NULL,
27 | use.ff = FALSE,
28 | cluster.settings = NULL,
29 | temp.dir = NULL,
30 | cleanup = TRUE,
31 | verbosity = 1L,
32 | time.stamps = FALSE
33 | )
34 | }
35 | \arguments{
36 | \item{data}{DNA methylation dataset as a \code{numeric} matrix (methylation sites vs samples) or an ojbect of class \code{RnBeadSet}}
37 |
38 | \item{Ks}{values of parameter \deqn{k} to be tested, vector of type \code{integer}}
39 |
40 | \item{lambdas}{values of parameter \deqn{\lambda} to be tested, vector of type \code{numeric}}
41 |
42 | \item{opt.method}{optimization method used. Currently supported values are \code{"MeDeCom.quadPen"} and \code{"MeDeCom.cppTAfact"}.}
43 |
44 | \item{cg_subsets}{a \code{list} of \code{integer} vectors specifying row indices to include into the analysis}
45 |
46 | \item{sample_subset}{samples to include into the analysis}
47 |
48 | \item{startT}{a \code{list} of length equal to \code{length(Ks)} or a \code{matrix} with \code{max(Ks)} columns}
49 |
50 | \item{startA}{a \code{list} of length equal to \code{length(Ks)} or a \code{matrix} with \code{max(Ks)} rows}
51 |
52 | \item{trueT}{a numeric matrix with as many rows as there are methylation sites in \code{data}}
53 |
54 | \item{trueA}{a numeric matrix with as many columns as there are methylation sites in \code{data}}
55 |
56 | \item{fixed_T_cols}{columsn of T which are known (to be implemented)}
57 |
58 | \item{NINIT}{number of random initializations}
59 |
60 | \item{ITERMAX}{maximal number of iterations of the alternating optimization scheme}
61 |
62 | \item{NFOLDS}{number of cross-validation folds}
63 |
64 | \item{N_COMP_LAMBDA}{the number of solutions to compare in the "smoothing" step}
65 |
66 | \item{NCORES}{number of cores to be used in the parallelized steps (at best a divisor of NINIT)}
67 |
68 | \item{random.seed}{seed for random number generation}
69 |
70 | \item{num.tol}{some small parameter}
71 |
72 | \item{analysis.name}{a deliberate name of the analysis as a \code{character} singleton}
73 |
74 | \item{use.ff}{use \code{ff} package functionality for memory optimization}
75 |
76 | \item{cluster.settings}{a list with parameters for an HPC cluster}
77 |
78 | \item{temp.dir}{a temporary directory for the cluster-based analysis available on all nodes}
79 |
80 | \item{cleanup}{if \code{TRUE} the temporary directory will be removed on completion}
81 |
82 | \item{verbosity}{verbosity level, \code{0} for quiet execution}
83 |
84 | \item{time.stamps}{add timestamps to the diagnostic output}
85 | }
86 | \value{
87 | MeDeComSet object
88 | }
89 | \description{
90 | Perform a MeDeCom experiment
91 | }
92 | \author{
93 | Pavlo Lutsik
94 | }
95 |
--------------------------------------------------------------------------------
/src/HCLasso.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * [Anew, Loss_new] = mexHCLasso(G,W,A,lambda)
3 | *
4 | * ---Input---
5 | *
6 | * G (k,k) full double matrix
7 | * W (k,d) full double matrix
8 | * A (k,d) full double matrix
9 | *
10 | * ---Output---
11 | *
12 | * Anew full (k,d) matrix
13 | * Loss_new double number
14 | *
15 | * ---Algorithm---
16 | *
17 | * For each clm of W, denoted by w, solve
18 | *
19 | * min a' * G * a - 2 * w' * a + lambda * norm(a,1),
20 | * sb.to. 1>= a_i >= 0
21 | *
22 | * i.e. a is in the hypercube of R^k.
23 | *
24 | * where the starting value(the estimate of the minimizer)
25 | * is stored in the corresponding clms of A(Anew).
26 | *
27 | * The implementation is based on SPG.
28 | *
29 | * Parallel Computing is supported by OpenMP.
30 | *
31 | * BLAS routines are embedded in for operations on matrices & vectors.
32 | *
33 | * ---Default parameters---
34 | *
35 | * convergence accuracy: 1e-10
36 | * suffcient descent criterion in line search: 1e-3
37 | * memory size in non-mono. descent checking: 10
38 | * max #threads : 1
39 | * dynamic allocating threads: yes
40 | *
41 | */
42 |
43 |
44 | #include
45 | #include
46 | #include "dynblas.h"
47 | #include
48 | #include
49 | #include
50 | using namespace std;
51 | using namespace Rcpp;
52 |
53 | #define MEM_OLD_VALUES 10
54 | #define MAX_NUM_THREADS 1
55 | #define OPT_TOL 1e-10
56 | #define SUFF_DESC 1e-3
57 | #define DYNAMIC_THREAD 1
58 |
59 | /*
60 | * compute the absolute value of x
61 | * write this function explicitly to avoid compiler issue
62 | */
63 | inline double dabs(double x){
64 | if ( x < 0 )
65 | x = -x;
66 | return x;
67 | }
68 |
69 | /* compute the constant Hess and Beta */
70 | inline void SetInput(double* G, double* w, double lambda, double* Hess, double* beta, ptrdiff_t k){
71 |
72 | // beta = 2 * w - lambda:
73 | // don't know BLAS function for subtracting a scalar for every
74 | // component of a vecotr
75 | for( int i = 0; i < k ; i++){
76 | beta[i] = 2 * w[i] - lambda;
77 | }
78 |
79 | // Hess = 2 * G :
80 | // Hess = G;
81 | // Hess = 1 * G + Hess;
82 | ptrdiff_t ione = 1;
83 | double one = 1.0;
84 | ptrdiff_t ks = (ptrdiff_t) (k * k);
85 | dcopy(&ks, G, &ione, Hess, &ione);
86 | daxpy(&ks, &one, G, &ione, Hess, &ione);
87 |
88 | //int ik = (int) k;
89 | //for(int i = 0; i < ik; i++ ){
90 | // mexPrintf("%f\n",beta[i]);
91 | //}
92 |
93 | //for(int i = 0; i < ik; i++ ){
94 | // for (int j = 0; j < ik; j++ ){
95 | // mexPrintf("%f\t", Hess[i + j * k]);
96 | // }
97 | // mexPrintf("%\n");
98 | //}
99 | }
100 |
101 | /* compute the gradient at x */
102 | inline void GetGrad(double* grad, double* Hess, double* beta, double* x, ptrdiff_t k){
103 | // grad = 2 * G * x - beta = Hess * x - beta;
104 |
105 | ptrdiff_t ione = 1;
106 | char* chn = (char*)"N";
107 | double one = 1.0;
108 | double zero = 0.0;
109 | double mone = -1.0;
110 |
111 | // step 1: grad = Hess * x ;
112 | dgemv(chn, &k, &k, &one, Hess, &k, x, &ione, &zero, grad, &ione);
113 |
114 | // step 2: grad = -1 * beta + grad;
115 | daxpy(&k, &mone, beta, &ione, grad, &ione);
116 |
117 | //int ik = (int) k;
118 | //for(int i = 0; i < ik; i++ ){
119 | // mexPrintf("%f\n",grad[i]);
120 | //}
121 |
122 | }
123 |
124 | /* compute the objective value at x */
125 | inline double ObjValue(double* G, double* beta, double* x, double* tmp, ptrdiff_t k){
126 | // x' * G * x - beta' * x = x' * (G * x - beta)
127 | ptrdiff_t ione = 1;
128 | char* chn = (char*)"N";
129 | double one = 1.0;
130 | double zero = 0.0;
131 | double mone = -1.0;
132 |
133 | // step 1: tmp = G * x ;
134 | dgemv(chn, &k, &k, &one, G, &k, x, &ione, &zero, tmp, &ione);
135 |
136 | // step 2: tmp = -1 * beta + tmp;
137 | daxpy(&k, &mone, beta, &ione, tmp, &ione);
138 |
139 | // step 3: f = tmp' * x;
140 | double f = (double)ddot(&k, x, &ione, tmp, &ione);
141 |
142 | return f;
143 | }
144 |
145 | /* compute the BB parameter */
146 | inline double GetAlpha(double* x, double* x_old, double* g, double* g_old, double* tmp, double* tmp1, ptrdiff_t k){
147 |
148 | // alpha = (x - x_old)' * (x - x_old) / [ (x - x_old)' * (g - g_old)];
149 |
150 | ptrdiff_t ione = 1;
151 | //double one = 1.0;
152 | //double zero = 0.0;
153 | double mone = -1.0;
154 |
155 | // tmp = x - x_old
156 | // step 1: copy x to tmp
157 | dcopy(&k, x, &ione, tmp, &ione);
158 | // step 2: tmp = -1 * x_old + tmp;
159 | daxpy(&k, &mone, x_old, &ione, tmp, &ione);
160 |
161 | // tmp1 = g - g_old
162 | // step 1: copy g to tmp1
163 | dcopy(&k, g, &ione, tmp1, &ione);
164 | // step 2: tmp1 = -1 * g_old + tmp1;
165 | daxpy(&k, &mone, g_old, &ione, tmp1, &ione);
166 |
167 | // numerator = tmp' * tmp
168 | double numerator = (double)ddot(&k, tmp, &ione, tmp, &ione);
169 |
170 | // denominator = tmp' * tmp1;
171 | double denominator = (double)ddot(&k, tmp, &ione, tmp1, &ione);
172 |
173 | double alpha = numerator / denominator;
174 |
175 | return alpha;
176 | }
177 |
178 | /* project x to the hypercube -> output f */
179 | inline void ProjHyperCube(double* f, double* x, int m) {
180 | for(int i = 0; i< m; i++){
181 | if (x[i] < 0)
182 | f[i] = 0;
183 | else if (x[i] > 1)
184 | f[i] = 1;
185 | else
186 | f[i] = x[i];
187 | }
188 | }
189 |
190 | /* compute the projected step */
191 | inline void GetProjStep(double* d, double* x, double* g, double alpha, ptrdiff_t k) {
192 | // d = Proj(x - alpha * g) - x;
193 |
194 | ptrdiff_t ione = 1;
195 | double mone = -1.0;
196 |
197 | //Step 1: d = x;
198 | dcopy(&k, x, &ione, d, &ione);
199 |
200 | //Step 2: d = -alpha*g + d;
201 | double malpha = -alpha;
202 | daxpy(&k, &malpha, g, &ione, d, &ione);
203 |
204 | //Step 3: d = ProjHyperCube(d)
205 | ProjHyperCube(d, d, (int)k);
206 |
207 | //step 4: d = d - x;
208 | daxpy(&k, &mone, x, &ione, d, &ione);
209 |
210 | //int ik = (int) k;
211 | //for(int i = 0; i < ik; i++ ){
212 | // mexPrintf("%f\n",d[i]);
213 | //}
214 | }
215 |
216 | /* compute the directional derivative */
217 | inline double GetDirectDerivative(double* g, double* d, ptrdiff_t k){
218 | ptrdiff_t ione = 1;
219 | double sum = (double)ddot(&k, g, &ione, d, &ione);
220 | return sum;
221 | }
222 |
223 | /* compute norm(x,1) for a vector x */
224 | inline double NormOne(double* x, ptrdiff_t k){
225 | ptrdiff_t ione = 1;
226 | double sum = dasum(&k, x, &ione);
227 | return sum;
228 | }
229 |
230 | /*** Solve Lasso Problem on Hypercube by SPG ***/
231 | void HCLasso(double* G, double* w, double* a0, double lambda, ptrdiff_t k,
232 | double* Hess, double* beta,
233 | double* x, double* x_old, double* g, double* g_old, double* d,
234 | double* old_fvals, double* tmp, double* tmp1,
235 | double* ahat, double* fhat){
236 | /********
237 | *
238 | * solve: min_a a'* G * a - 2 * w'* a + lambda * norm(a,1);
239 | * sb.to. a >= 0
240 | *
241 | * ---Input---
242 | *
243 | * G,w - quandratic form
244 | * a0 - starting value
245 | * lambda - regression parameter
246 | * k - length of a0
247 | *
248 | * ---Temporary Variables---
249 | *
250 | * Hess - Hessian = 2 * G, constant
251 | * beta - (2 * w - lambda), constant
252 | * x - current solution
253 | * x_old - previous solution
254 | * g - current gradient = Hess * x - beta
255 | * g_old - previous gradient = Hess * x - beta
256 | * d - descent direction
257 | *
258 | * old_fvals - for non-monotonically descent
259 | * tmp,tmp1 - for BLAS
260 | *
261 | * ---Output---
262 | *
263 | * ahat - minimizer
264 | * fhat - objective value at ahat
265 | *
266 | *******/
267 |
268 | ptrdiff_t ione = 1;
269 | double one = 1.0;
270 | double mone = -1.0;
271 | double zero = 0.0;
272 | char* chn = (char*)"N";
273 |
274 | /*** Initialiation ***/
275 |
276 | // set parameter
277 | double optTol = OPT_TOL;
278 | double suffDec = SUFF_DESC;
279 | double f; // objective value at current solution
280 | double fmin; // minimum. objective value in the sequence generated by SPG
281 | // memory for non-monotone line search
282 | for(int i = 0; i < MEM_OLD_VALUES; i++) {
283 | old_fvals[i] = -std::numeric_limits::max();
284 | }
285 |
286 | // set Hessian and beta
287 | SetInput(G, w, lambda, Hess, beta, k);
288 |
289 | // get starting point a0, gradient & fval
290 | dcopy(&k, a0, &ione, x, &ione);
291 | GetGrad(g, Hess, beta, x, k);
292 | f = ObjValue(G, beta, x, tmp, k);
293 | fmin = f;
294 |
295 | // copy to estimate
296 | dcopy(&k, x, &ione, ahat, &ione);
297 | fhat[0] = fmin;
298 |
299 | /*** SPG Loop ***/
300 |
301 | double alpha; // BB parameter
302 | double gtd; // Directional Derivative
303 | double t; //stepsize;
304 | double f_ref; // reference function value in non-monotone linear search
305 | double Linear, Quad; // ingredient to compute new function value;
306 | double factor; // for linear search, factor to reduce stepsize
307 | double Norm1_dx; // ||dx||_1, for linear search and as stopping criterion
308 | double linear, quad, red_f, f_tmp, norm1_dx; //temporary variable in linear search
309 |
310 | int iter = 0;
311 | int itermax = 500;
312 | while (1){
313 |
314 | //** Compute Step Direction
315 | if (iter == 0)
316 | alpha = 1;
317 | else{
318 | alpha = GetAlpha(x, x_old, g, g_old, tmp, tmp1, k);
319 | if (alpha <= 1e-10 || alpha > 1e10) {
320 | alpha = 1;
321 | }
322 | }
323 |
324 | //** Compute the projected step
325 | GetProjStep(d, x, g, alpha, k);
326 |
327 |
328 | //** Check that Progress can be made along the direction
329 | gtd = GetDirectDerivative(g, d, k);
330 |
331 | if (gtd > -optTol){
332 | //mexPrintf("Directional Derivative below optTol\n%f", gtd);
333 | break;
334 | }
335 |
336 | //** Backtracking Line Search
337 | // Select Initial Guess to step length
338 | if (iter == 0){
339 | t = 1/NormOne(g, k);
340 | t = (t > 1) ? 1 : t;
341 | }
342 | else{
343 | t = 1;
344 | }
345 |
346 | // Get the reference function value for non-monotone condition:
347 | // __update the old_values memorized
348 | if (iter < MEM_OLD_VALUES)
349 | old_fvals[iter] = f;
350 | else{
351 | for(int i = 0; i < MEM_OLD_VALUES-1; i++){
352 | old_fvals[i] = old_fvals[i+1];
353 | }
354 | old_fvals[MEM_OLD_VALUES-1] = f;
355 | }
356 |
357 | // __find f_ref = max(old_fvals);
358 | f_ref = old_fvals[0];
359 | for(int i = 1; i < MEM_OLD_VALUES; i++){
360 | if (f_ref < old_fvals[i])
361 | f_ref = old_fvals[i];
362 | }
363 |
364 | // ingredients for computing (f_new - f) based on stepsize t:
365 | // __dx = t * d; Linear = g' * dx; Quad = dx' * Hess * dx;
366 | // __equivalently, Linear = t * g' * d = t * gtd; Quad = t^2 * d' * Hess * d;
367 | Linear = t * gtd;
368 | dgemv(chn, &k, &k, &one, Hess, &k, d, &ione, &zero, tmp, &ione);
369 | Quad = (double)ddot(&k, d, &ione, tmp, &ione);
370 | Quad = Quad * t * t;
371 |
372 | // __|dx||_1
373 | Norm1_dx = t * NormOne(d, k);
374 | //mexPrintf("%f\n\n", Norm1_dx);
375 |
376 | // stepsize selection
377 | factor = 1;
378 | norm1_dx = Norm1_dx * factor;
379 | while (1) {
380 |
381 | //__compute (f_new - f)
382 | linear = Linear * factor;
383 | quad = Quad * factor * factor;
384 | red_f = 0.5 * quad + linear;
385 | f_tmp = f + red_f;
386 |
387 | if (f_tmp < f_ref + suffDec * linear) {
388 | //__get sufficient descent
389 | t = t * factor;
390 | norm1_dx = Norm1_dx * factor;
391 | break;
392 | }
393 | else {
394 | //__Evaluate New Stepsize
395 | //__t = t * 0.5; -> t0 fixed; factor = factor * 0.5; t = t0 * factor;
396 | factor = factor * 0.5;
397 | }
398 |
399 | //__Check whether step has become too small
400 | if (Norm1_dx * factor < optTol || t == 0) {
401 | // mexPrintf("Line Search failed\n");
402 | t = 0;
403 | norm1_dx = 0;
404 | red_f = 0;
405 | break;
406 | }
407 |
408 | }
409 |
410 | //** Take Step
411 |
412 | /*
413 | * x_old = x;
414 | * x = x + t * d;
415 | *
416 | * first copy x to x_old
417 | * then x = t * d + x;
418 | *
419 | */
420 | dcopy(&k, x, &ione, x_old, &ione);
421 | daxpy(&k, &t, d, &ione, x, &ione);
422 |
423 | /*
424 | * g_old = g;
425 | * g = compute grad(x);
426 | *
427 | * first copy g to g_old
428 | * then compute new gradient
429 | *
430 | */
431 | dcopy(&k, g, &ione, g_old, &ione);
432 | GetGrad(g, Hess, beta, x, k);
433 |
434 | // new objective value and iteration index
435 | f = f + red_f;
436 | iter = iter + 1;
437 |
438 | //** keep track of the minimum value attained
439 | if ( f < fmin ){
440 | fmin = f; // update
441 | // copy to the estimate
442 | dcopy(&k, x, &ione, ahat, &ione);
443 | fhat[0] = fmin;
444 | }
445 |
446 | //** Check 1st order optimality condition
447 | // tmp = ProjHyperCube(x-g)-x;
448 | dcopy(&k, x, &ione, tmp, &ione);
449 | daxpy(&k, &mone, g, &ione, tmp, &ione);
450 | ProjHyperCube(tmp, tmp, (int)k);
451 | daxpy(&k, &mone, x, &ione, tmp, &ione);
452 |
453 | if ( NormOne(tmp, k) < optTol ){
454 | // mexPrintf("First-Order Optimality Conditions Below optTol\n");
455 | break;
456 | }
457 |
458 | if (norm1_dx < optTol ) {
459 | // mexPrintf("***********************norm_1: \t %f",norm1_dx);
460 | break;
461 | }
462 |
463 | if ( dabs(red_f) < optTol ) {
464 | // mexPrintf("***************red_f: \t %f \t optTol: \t %.10f ",dabs(red_f),optTol);
465 | break;
466 | }
467 |
468 | if( iter > itermax ) {
469 | // mexPrintf("***********update T SPG: Reach iteration limits.");
470 | break;
471 | }
472 | }
473 | }
474 |
475 |
476 | /*** Parallel Computing ***/
477 | void spawn_threads(double* G, double* W, double* A, double lambda, ptrdiff_t k, int d, double* Anew, double* Loss_new) {
478 |
479 | /* temporary variables */
480 | double* Hess = (double*)malloc(MAX_NUM_THREADS * k * k * sizeof(double));
481 | double* beta = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
482 | double* x = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
483 | double* x_old = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
484 | double* g = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
485 | double* g_old = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
486 | double* dsct = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
487 | double* old_fvals = (double*)malloc(MAX_NUM_THREADS * MEM_OLD_VALUES * sizeof(double));
488 | double* tmp = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
489 | double* tmp1 = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
490 | double* fhat = (double*) malloc(MAX_NUM_THREADS * sizeof(double));
491 |
492 | if ( Hess == NULL || beta == NULL || x == NULL || x_old == NULL || g == NULL || g_old == NULL ||
493 | dsct == NULL || old_fvals == NULL || tmp == NULL || tmp1 == NULL || fhat == NULL ) {
494 | //mexErrMsgTxt("Out of memory.");
495 |
496 | }
497 | // construct independet subproblems
498 | omp_set_num_threads(MAX_NUM_THREADS);
499 | omp_set_dynamic(DYNAMIC_THREAD);
500 | int j = 0;
501 | double loss = 0.0;
502 | //#pragma omp parallel for private(j), reduction(+: loss)
503 | for(j = 0; j < d; j++){
504 | int id = omp_get_thread_num();
505 |
506 | //int id = 0;
507 | HCLasso(G, W + j * k, A + j * k, lambda, k,
508 | Hess + id * (k * k), beta + id * k,
509 | x + id * k, x_old + id * k, g + id * k, g_old + id * k, dsct + id * k,
510 | old_fvals + id * k, tmp + id * k, tmp1 + id * k,
511 | Anew + j * k, fhat + id);
512 | loss = loss + *(fhat+id);
513 | }
514 |
515 | Loss_new[0] = loss;
516 |
517 | free(fhat);
518 | free(tmp1);
519 | free(tmp);
520 | free(old_fvals);
521 | free(dsct);
522 | free(g_old);
523 | free(g);
524 | free(x_old);
525 | free(x);
526 | free(beta);
527 | free(Hess);
528 |
529 | }
530 |
531 |
532 |
533 | //[[Rcpp::export]]
534 | List RHLasso(NumericMatrix Ginp, NumericMatrix Winp, NumericMatrix Ainp, NumericVector l)
535 | {
536 |
537 | Rcpp::NumericMatrix Gi(clone(Ginp));
538 | Rcpp::NumericMatrix Wi(clone(Winp));
539 | Rcpp::NumericMatrix Ai(clone(Ainp));
540 |
541 | double* Gptr = Gi.begin();
542 | double* Wptr = Wi.begin();
543 | double* Aptr = Ai.begin();
544 |
545 | double lambda = Rcpp::as(l);
546 |
547 | ptrdiff_t k = (ptrdiff_t) Wi.nrow();
548 | int d = Wi.ncol();
549 |
550 | /* create the output data */
551 | double* Anew = NULL;
552 | double* Loss_new = NULL;
553 | //double* iters = NULL;
554 |
555 | Rcpp::NumericMatrix newA((int)k,d);
556 | Anew = newA.begin();
557 |
558 |
559 | Rcpp::NumericMatrix newLoss(1,1);
560 | Loss_new = newLoss.begin();
561 |
562 | //printf("Starting threads\n");
563 | /* parallel computing */
564 | spawn_threads(Gptr, Wptr, Aptr, lambda, k, d, Anew, Loss_new);
565 |
566 | //printf("%f\n", newLoss[0]);
567 | //printf("%f\n", NumIters[0]);
568 | //printf("%f\n", newA[0]);
569 |
570 | List result = List::create(
571 | Named("A") = wrap(newA),
572 | Named("Loss") = wrap(newLoss)
573 | );
574 |
575 | return(result);
576 | }
577 |
--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
1 | # standard setup
2 | PKG_LIBS = `$(R_HOME)/bin/Rscript -e "Rcpp:::LdFlags()"`
3 | PKG_CXXFLAGS =`$(R_HOME)/bin/Rscript -e "Rcpp:::CxxFlags()"` `$(R_HOME)/bin/Rscript -e "RcppEigen:::CxxFlags()"` -I. -std=c++11
4 |
5 | # OMP setup
6 | OMP_NUM_THREADS=1
7 | OMP_SHEDULE="dynamic,16"
--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | PKG_LIBS = $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "Rcpp:::LdFlags()") $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) -fopenmp
2 | PKG_CXXFLAGS=$(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "Rcpp:::CxxFlags()") -I. -fopenmp
--------------------------------------------------------------------------------
/src/QuadHC.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * [Anew, Loss_new] = mexQuadHC(G,W,A,optTol)
3 | *
4 | * ---Input---
5 | *
6 | * G (k,k) full double matrix
7 | * W (k,d) full double matrix
8 | * A (k,d) full double matrix
9 | * optTol double number
10 | *
11 | * ---Output---
12 | *
13 | * Anew full (k,d) matrix
14 | * Loss_new double number
15 | * iters full (d,1) vector - #iterations used to solve each subproblem
16 | * ---Algorithm---
17 | *
18 | * For each clm of W, denoted by w, solve
19 | *
20 | * min a' * G * a - 2 * w' * a,
21 | * sb.to. 1>= a_i >= 0
22 | *
23 | * i.e. a is in the hypercube of R^k.
24 | *
25 | * where the starting value(the estimate of the minimizer)
26 | * is stored in the corresponding clms of A(Anew).
27 | *
28 | * The implementation is based on SPG.
29 | *
30 | * Parallel Computing is supported by OpenMP.
31 | *
32 | * BLAS routines are embedded in for operations on matrices & vectors.
33 | *
34 | * ---Default parameters---
35 | *
36 | * convergence accuracy: 1e-10
37 | * suffcient descent criterion in line search: 1e-3
38 | * memory size in non-mono. descent checking: 10
39 | * max #threads : 1
40 | * dynamic allocating threads: yes
41 | *
42 | */
43 |
44 |
45 | #include
46 | #include
47 | #include "dynblas.h"
48 | #include
49 | #include
50 | #include
51 | #include
52 |
53 | using namespace std;
54 | using namespace Rcpp;
55 |
56 | #define MEM_OLD_VALUES 10
57 | //#define MAX_NUM_THREADS 1
58 | #define MAX_NUM_THREADS 1
59 | //#define OPT_TOL 1e-10
60 | #define SUFF_DESC 1e-3
61 | #define DYNAMIC_THREAD 1
62 |
63 | /*
64 | * compute the absolute value of x
65 | * write this function explicitly to avoid compiler issue
66 | */
67 | inline double dabs(double x){
68 | if ( x < 0 )
69 | x = -x;
70 | return x;
71 | }
72 |
73 | /* compute the constant Hess and Beta */
74 | inline void SetInput(double* G, double* w, double* Hess, double* beta, ptrdiff_t k){
75 |
76 | // beta = 2 * w:
77 | // beta = w
78 | // beta = 1 * w + beta;
79 | //for( int i = 0; i < k ; i++){
80 | // beta[i] = 2 * w[i] - lambda;
81 | //}
82 | //
83 | ptrdiff_t ione = 1;
84 | double one = 1.0;
85 | dcopy(&k, w, &ione, beta, &ione);
86 | daxpy(&k, &one, w, &ione, beta, &ione);
87 |
88 | // Hess = 2 * G :
89 | // Hess = G;
90 | // Hess = 1 * G + Hess;
91 | //ptrdiff_t ione = 1;
92 | //double one = 1.0;
93 | ptrdiff_t ks = (ptrdiff_t) (k * k);
94 | dcopy(&ks, G, &ione, Hess, &ione);
95 | daxpy(&ks, &one, G, &ione, Hess, &ione);
96 |
97 | //printf("Setinput1\n");
98 | int ik = (int) k;
99 | for(int i = 0; i < ik; i++ ){
100 | //printf("%1.22f\n",beta[i]);
101 | }
102 | //printf("Setinput2\n");
103 | for(int i = 0; i < ik; i++ ){
104 | for (int j = 0; j < ik; j++ ){
105 | //printf("%f\t", Hess[i + j * k]);
106 | }
107 | //printf("%\n");
108 | }
109 | }
110 |
111 | /* compute the gradient at x */
112 | inline void GetGrad(double* grad, double* Hess, double* beta, double* x, ptrdiff_t k){
113 | // grad = 2 * G * x - beta = Hess * x - beta;
114 |
115 | ptrdiff_t ione = 1;
116 | char* chn = (char*)"N";
117 | double one = 1.0;
118 | double zero = 0.0;
119 | double mone = -1.0;
120 |
121 | // step 1: grad = Hess * x ;
122 | dgemv(chn, &k, &k, &one, Hess, &k, x, &ione, &zero, grad, &ione);
123 |
124 | // step 2: grad = -1 * beta + grad;
125 | daxpy(&k, &mone, beta, &ione, grad, &ione);
126 |
127 | //printf("GetGrad\n");
128 | int ik = (int) k;
129 | for(int i = 0; i < ik; i++ ){
130 | //printf("%1.22f\n",grad[i]);
131 | }
132 |
133 | }
134 |
135 | /* compute the objective value at x */
136 | inline double ObjValue(double* G, double* beta, double* x, double* tmp, ptrdiff_t k){
137 | // x' * G * x - beta' * x = x' * (G * x - beta)
138 | ptrdiff_t ione = 1;
139 | char* chn = (char*)"N";
140 | double one = 1.0;
141 | double zero = 0.0;
142 | double mone = -1.0;
143 |
144 | // step 1: tmp = G * x ;
145 | dgemv(chn, &k, &k, &one, G, &k, x, &ione, &zero, tmp, &ione);
146 |
147 | // step 2: tmp = -1 * beta + tmp;
148 | daxpy(&k, &mone, beta, &ione, tmp, &ione);
149 |
150 | // step 3: f = tmp' * x;
151 | double f = (double)ddot(&k, x, &ione, tmp, &ione);
152 |
153 | return f;
154 | }
155 |
156 | /* compute the BB parameter */
157 | inline double GetAlpha(double* x, double* x_old, double* g, double* g_old, double* tmp, double* tmp1, ptrdiff_t k){
158 |
159 | // alpha = (x - x_old)' * (x - x_old) / [ (x - x_old)' * (g - g_old)];
160 |
161 | ptrdiff_t ione = 1;
162 | //double one = 1.0;
163 | //double zero = 0.0;
164 | double mone = -1.0;
165 |
166 | // tmp = x - x_old
167 | // step 1: copy x to tmp
168 | dcopy(&k, x, &ione, tmp, &ione);
169 | // step 2: tmp = -1 * x_old + tmp;
170 | daxpy(&k, &mone, x_old, &ione, tmp, &ione);
171 |
172 | // tmp1 = g - g_old
173 | // step 1: copy g to tmp1
174 | dcopy(&k, g, &ione, tmp1, &ione);
175 | // step 2: tmp1 = -1 * g_old + tmp1;
176 | daxpy(&k, &mone, g_old, &ione, tmp1, &ione);
177 |
178 | // numerator = tmp' * tmp
179 | double numerator = (double)ddot(&k, tmp, &ione, tmp, &ione);
180 |
181 | // denominator = tmp' * tmp1;
182 | double denominator = (double)ddot(&k, tmp, &ione, tmp1, &ione);
183 |
184 | double alpha = numerator / denominator;
185 |
186 | return alpha;
187 | }
188 |
189 | /* project x to the hypercube -> output f */
190 | inline void ProjHyperCube(double* f, double* x, int m, double L, double U) {
191 | for(int i = 0; i< m; i++){
192 | if (x[i] < L)
193 | f[i] = L;
194 | else if (x[i] > U)
195 | f[i] = U;
196 | else
197 | f[i] = x[i];
198 | }
199 | }
200 |
201 | /* compute the projected step */
202 | inline void GetProjStep(double* d, double* x, double* g, double alpha, ptrdiff_t k, double low, double upp) {
203 | // d = Proj(x - alpha * g) - x;
204 |
205 | ptrdiff_t ione = 1;
206 | double mone = -1.0;
207 |
208 | //Step 1: d = x;
209 | dcopy(&k, x, &ione, d, &ione);
210 |
211 | //Step 2: d = -alpha*g + d;
212 | double malpha = -alpha;
213 | daxpy(&k, &malpha, g, &ione, d, &ione);
214 |
215 | //Step 3: d = ProjHyperCube(d)
216 | ProjHyperCube(d, d, (int)k, low, upp);
217 |
218 | //step 4: d = d - x;
219 | daxpy(&k, &mone, x, &ione, d, &ione);
220 |
221 | //printf("GetProjStep\n");
222 | int ik = (int) k;
223 | for(int i = 0; i < ik; i++ ){
224 | //printf("%1.22f\n",d[i]);
225 | }
226 | }
227 |
228 | /* compute the directional derivative */
229 | inline double GetDirectDerivative(double* g, double* d, ptrdiff_t k){
230 | ptrdiff_t ione = 1;
231 | double sum = (double)ddot(&k, g, &ione, d, &ione);
232 | return sum;
233 | }
234 |
235 | /* compute norm(x,1) for a vector x */
236 | inline double NormOne(double* x, ptrdiff_t k){
237 | ptrdiff_t ione = 1;
238 | double sum = dasum(&k, x, &ione);
239 | return sum;
240 | }
241 |
242 | /*** Solve Quadratic Problem on Non-negative Orthant by SPG ***/
243 | int QuadHC(double* G, double* w, double* a0, ptrdiff_t k,
244 | double* Hess, double* beta,
245 | double* x, double* x_old, double* g, double* g_old, double* d,
246 | double* old_fvals, double* tmp, double* tmp1,
247 | double* ahat, double* fhat, double optTol, double lower, double upper){
248 | /********
249 | *
250 | * solve: min_a a'* G * a - 2 * w'* a;
251 | * sb.to. 1 >= a >= 0
252 | *
253 | * ---Input---
254 | *
255 | * G,w - quandratic form
256 | * a0 - starting value
257 | * k - length of a0
258 | * optTol - tolerance for stopping criterion
259 | *
260 | * ---Temporary Variables---
261 | *
262 | * Hess - Hessian = 2 * G, constant
263 | * beta - 2 * w, constant
264 | * x - current solution
265 | * x_old - previous solution
266 | * g - current gradient = Hess * x - beta
267 | * g_old - previous gradient = Hess * x - beta
268 | * d - descent direction
269 | *
270 | * old_fvals - for non-monotonically descent
271 | * tmp,tmp1 - for BLAS
272 | *
273 | * ---Output---
274 | *
275 | * ahat - minimizer
276 | * fhat - objective value at ahat
277 | *
278 | *******/
279 |
280 | ptrdiff_t ione = 1;
281 | double one = 1.0;
282 | double mone = -1.0;
283 | double zero = 0.0;
284 | char* chn = (char*)"N";
285 |
286 | /*** Initialiation ***/
287 |
288 | // set parameter
289 | //double optTol = OPT_TOL;
290 | double suffDec = SUFF_DESC;
291 | double f; // objective value at current solution
292 | double fmin; // minimum. objective value in the sequence generated by SPG
293 | // memory for non-monotone line search
294 | for(int i = 0; i < MEM_OLD_VALUES; i++) {
295 | old_fvals[i] = -std::numeric_limits::max();
296 | }
297 |
298 | // set Hessian and beta
299 | SetInput(G, w, Hess, beta, k);
300 |
301 | // get starting point a0, gradient & fval
302 | dcopy(&k, a0, &ione, x, &ione);
303 | GetGrad(g, Hess, beta, x, k);
304 | f = ObjValue(G, beta, x, tmp, k);
305 | fmin = f;
306 |
307 | // copy to estimate
308 | dcopy(&k, x, &ione, ahat, &ione);
309 | fhat[0] = fmin;
310 |
311 | /*** SPG Loop ***/
312 |
313 | double alpha; // BB parameter
314 | double gtd; // Directional Derivative
315 | double t; //stepsize;
316 | double f_ref; // reference function value in non-monotone linear search
317 | double Linear, Quad; // ingredient to compute new function value;
318 | double factor; // for linear search, factor to reduce stepsize
319 | double Norm1_dx; // ||dx||_1, for linear search and as stopping criterion
320 | double linear, quad, red_f, f_tmp, norm1_dx; //temporary variable in linear search
321 |
322 | int iter = 0;
323 | int itermax = 500;
324 | while (1){
325 |
326 | //** Compute Step Direction
327 | if (iter == 0)
328 | alpha = 1;
329 | else{
330 | alpha = GetAlpha(x, x_old, g, g_old, tmp, tmp1, k);
331 | if (alpha <= 1e-10 || alpha > 1e10) {
332 | alpha = 1;
333 | }
334 | }
335 |
336 | //** Compute the projected step
337 | GetProjStep(d, x, g, alpha, k, lower, upper);
338 |
339 |
340 | //** Check that Progress can be made along the direction
341 | gtd = GetDirectDerivative(g, d, k);
342 | //printf("Directional Derivative %1.22f\n", gtd);
343 | if (gtd > -optTol){
344 | //printf("Directional Derivative below optTol %1.22f\n", gtd);
345 | break;
346 | }
347 |
348 | //** Backtracking Line Search
349 | // Select Initial Guess to step length
350 | if (iter == 0){
351 | t = 1/NormOne(g, k);
352 | t = (t > 1) ? 1 : t;
353 | }
354 | else{
355 | t = 1;
356 | }
357 |
358 | // Get the reference function value for non-monotone condition:
359 | // __update the old_values memorized
360 | if (iter < MEM_OLD_VALUES)
361 | old_fvals[iter] = f;
362 | else{
363 | for(int i = 0; i < MEM_OLD_VALUES-1; i++){
364 | old_fvals[i] = old_fvals[i+1];
365 | }
366 | old_fvals[MEM_OLD_VALUES-1] = f;
367 | }
368 |
369 | // __find f_ref = max(old_fvals);
370 | f_ref = old_fvals[0];
371 | for(int i = 1; i < MEM_OLD_VALUES; i++){
372 | if (f_ref < old_fvals[i])
373 | f_ref = old_fvals[i];
374 | }
375 |
376 | // ingredients for computing (f_new - f) based on stepsize t:
377 | // __dx = t * d; Linear = g' * dx; Quad = dx' * Hess * dx;
378 | // __equivalently, Linear = t * g' * d = t * gtd; Quad = t^2 * d' * Hess * d;
379 | Linear = t * gtd;
380 | dgemv(chn, &k, &k, &one, Hess, &k, d, &ione, &zero, tmp, &ione);
381 | Quad = (double)ddot(&k, d, &ione, tmp, &ione);
382 | Quad = Quad * t * t;
383 |
384 | // __|dx||_1
385 | Norm1_dx = t * NormOne(d, k);
386 | //printf("Norm\n");
387 | //printf("%1.22f\n\n", Norm1_dx);
388 |
389 | // stepsize selection
390 | factor = 1;
391 | norm1_dx = Norm1_dx * factor;
392 | while (1) {
393 |
394 | //__compute (f_new - f)
395 | linear = Linear * factor;
396 | quad = Quad * factor * factor;
397 | red_f = 0.5 * quad + linear;
398 | f_tmp = f + red_f;
399 |
400 | if (f_tmp < f_ref + suffDec * linear) {
401 | //__get sufficient descent
402 | t = t * factor;
403 | norm1_dx = Norm1_dx * factor;
404 | break;
405 | }
406 | else {
407 | //__Evaluate New Stepsize
408 | //__t = t * 0.5; -> t0 fixed; factor = factor * 0.5; t = t0 * factor;
409 | factor = factor * 0.5;
410 | }
411 |
412 | //__Check whether step has become too small
413 | if (Norm1_dx * factor < optTol || t == 0) {
414 | //printf("Line Search failed\n");
415 | t = 0;
416 | norm1_dx = 0;
417 | red_f = 0;
418 | break;
419 | }
420 |
421 | }
422 |
423 | //** Take Step
424 |
425 | /*
426 | * x_old = x;
427 | * x = x + t * d;
428 | *
429 | * first copy x to x_old
430 | * then x = t * d + x;
431 | *
432 | */
433 | dcopy(&k, x, &ione, x_old, &ione);
434 | daxpy(&k, &t, d, &ione, x, &ione);
435 |
436 | /*
437 | * g_old = g;
438 | * g = compute grad(x);
439 | *
440 | * first copy g to g_old
441 | * then compute new gradient
442 | *
443 | */
444 | dcopy(&k, g, &ione, g_old, &ione);
445 | GetGrad(g, Hess, beta, x, k);
446 |
447 | // new objective value and iteration index
448 | f = f + red_f;
449 | iter = iter + 1;
450 |
451 | //** keep track of the minimum value attained
452 | if ( f < fmin ){
453 | fmin = f; // update
454 | // copy to the estimate
455 | dcopy(&k, x, &ione, ahat, &ione);
456 | fhat[0] = fmin;
457 | }
458 |
459 | //** Check 1st order optimality condition
460 | // tmp = ProjHyperCube(x-g)-x;
461 | dcopy(&k, x, &ione, tmp, &ione);
462 | daxpy(&k, &mone, g, &ione, tmp, &ione);
463 | ProjHyperCube(tmp, tmp, (int)k, lower, upper);
464 | daxpy(&k, &mone, x, &ione, tmp, &ione);
465 |
466 | if ( NormOne(tmp, k) < optTol ){
467 | //printf("First-Order Optimality Conditions Below optTol\n");
468 | break;
469 | }
470 |
471 | if (norm1_dx < optTol ) {
472 | //printf("***********************norm_1: \t %f",norm1_dx);
473 | break;
474 | }
475 |
476 | if ( dabs(red_f) < optTol ) {
477 | //printf("***************red_f: \t %f \t optTol: \t %.10f ",dabs(red_f),optTol);
478 | break;
479 | }
480 |
481 | if( iter == itermax ) {
482 | //printf("***********update T SPG: Reach iteration limits.");
483 | break;
484 | }
485 | }
486 | return iter;
487 | }
488 |
489 | /*** Parallel Computing ***/
490 | //void spawn_threads(double* G, double* W, double* A, double lambda, ptrdiff_t k, int d, double* Anew, double* Loss_new) {
491 | void spawn_threads(double* G, double* W, double* A, ptrdiff_t k, int d, double* Anew, double* Loss_new, double* iters, double optTol, double lower, double upper) {
492 |
493 | /* temporary variables */
494 | double* Hess = (double*)malloc(MAX_NUM_THREADS * k * k * sizeof(double));
495 | double* beta = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
496 | double* x = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
497 | double* x_old = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
498 | double* g = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
499 | double* g_old = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
500 | double* dsct = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
501 | double* old_fvals = (double*)malloc(MAX_NUM_THREADS * MEM_OLD_VALUES * sizeof(double));
502 | double* tmp = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
503 | double* tmp1 = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
504 | double* fhat = (double*) malloc(MAX_NUM_THREADS * sizeof(double));
505 |
506 | if ( Hess == NULL || beta == NULL || x == NULL || x_old == NULL || g == NULL || g_old == NULL ||
507 | dsct == NULL || old_fvals == NULL || tmp == NULL || tmp1 == NULL || fhat == NULL ) {
508 | //mexErrMsgTxt("Out of memory.");
509 |
510 | }
511 | // construct independet subproblems
512 | //omp_set_num_threads(MAX_NUM_THREADS);
513 | //omp_set_dynamic(DYNAMIC_THREAD);
514 | int j = 0;
515 | double loss = 0.0;
516 | //#pragma omp parallel for private(j), reduction(+: loss)
517 | for(j = 0; j < d; j++){
518 | int id = omp_get_thread_num();
519 |
520 | //int id = 0;
521 | int res=QuadHC(G, W + j * k, A + j * k, k,
522 | Hess + id * (k * k), beta + id * k,
523 | x + id * k, x_old + id * k, g + id * k, g_old + id * k, dsct + id * k,
524 | old_fvals + id * k, tmp + id * k, tmp1 + id * k,
525 | Anew + j * k, fhat + id, optTol, lower, upper);
526 | iters[j] = res;
527 | loss = loss + *(fhat+id);
528 |
529 | //printf("Iterations: %d\n", res);
530 | //printf("Loss: %1.22f\n", loss);
531 |
532 | }
533 |
534 | Loss_new[0] = loss;
535 |
536 | free(fhat);
537 | free(tmp1);
538 | free(tmp);
539 | free(old_fvals);
540 | free(dsct);
541 | free(g_old);
542 | free(g);
543 | free(x_old);
544 | free(x);
545 | free(beta);
546 | free(Hess);
547 |
548 | }
549 |
550 | //[[Rcpp::export]]
551 | List RQuadHC(NumericMatrix Ginp, NumericMatrix Winp, NumericMatrix Ainp, NumericVector otol, NumericVector lconstr, NumericVector uconstr)
552 | {
553 |
554 | // Rcpp::NumericMatrix Gi(clone(Ginp));
555 | // Rcpp::NumericMatrix Wi(clone(Winp));
556 | // Rcpp::NumericMatrix Ai(clone(Ainp));
557 |
558 | Rcpp::NumericMatrix Gi(Ginp);
559 | Rcpp::NumericMatrix Wi(Winp);
560 | Rcpp::NumericMatrix Ai(Ainp);
561 |
562 | double* Gptr = Gi.begin();
563 | double* Wptr = Wi.begin();
564 | double* Aptr = Ai.begin();
565 |
566 | //double* Gend = Gi.end();
567 |
568 | // double lambda = Rcpp::as(l);
569 | double optTol = Rcpp::as(otol);
570 | double lower = Rcpp::as(lconstr);
571 | double upper = Rcpp::as(uconstr);
572 |
573 | ptrdiff_t k = (ptrdiff_t) Wi.nrow();
574 | int d = Wi.ncol();
575 |
576 | double* Anew = NULL;
577 | double* Loss_new = NULL;
578 | double* iters = NULL;
579 |
580 | //ptrdiff_t nrg = (ptrdiff_t) Gi.nrow();
581 | //printf("Pointer value: %p\n", Gptr);
582 | //printf("First value of G: %1.22f\n", *Gptr);
583 | //printf("Pointer 2 value: %p\n", Gptr + nrg * 1);
584 | //printf("Last value of G: %1.22f\n", *( (double*) Gptr + nrg * 1 ));
585 |
586 | Rcpp::NumericMatrix newA((int)k,d);
587 | Anew = newA.begin();
588 |
589 | Rcpp::NumericMatrix newLoss(1,1);
590 | Loss_new = newLoss.begin();
591 |
592 | //double* iters = NULL;
593 | //Rcpp::NumericVector iterations(d);
594 | Rcpp::NumericMatrix iterations(d,1);
595 | iters = iterations.begin();
596 |
597 |
598 | ////printf("Starting threads\n");
599 | // parallel computing //
600 | spawn_threads(Gptr, Wptr, Aptr, k, d, Anew, Loss_new, iters, optTol, lower, upper);
601 |
602 | ////printf("%1.22f\n", newLoss[0]);
603 | ////printf("%1.22f\n", NumIters[0]);
604 | ////printf("%1.22f\n", newA[0]);
605 |
606 | List result = List::create(
607 | Named("A") = wrap(newA),
608 | Named("Loss") = wrap(newLoss),
609 | Named("iters") = wrap(iterations)
610 | );
611 | //List result = List::create(
612 | // Named("A") = wrap(Ai),
613 | // Named("Loss") = wrap(Wi),
614 | // Named("iters") = wrap(Gi)
615 | // );
616 |
617 | return(result);
618 | }
619 |
--------------------------------------------------------------------------------
/src/RProjSplxBox.cpp:
--------------------------------------------------------------------------------
1 | /******************************************************
2 | * f = mexProjSplxBox(x, l, u)
3 | *
4 | * Input: x - a vector in Rn.
5 | * Output: f - Projection of x onto {y: sum(y) == 1, l <= y <= u}
6 | *
7 | *********************************************************/
8 |
9 | #include
10 | #include
11 | #include "dynblas.h"
12 | #include
13 | #include
14 | #include
15 | #include
16 | using namespace Rcpp;
17 |
18 | inline double dabs(double x){
19 | if ( x < 0 )
20 | x = -x;
21 | return x;
22 | }
23 |
24 | void Proj(double* f, double* x, double* l, double* u, double* y, double* z, double* p, double* q, int k) {
25 | /*
26 | * projection uses Dykstra's algorithm
27 | *
28 | */
29 |
30 | double tol1 = 0;
31 | double tol2 = 0;
32 | double tolcur = 0;
33 | double tolterm = 1E-12;
34 | double mean = 0;
35 | double sum = 0;
36 | double delta = 0;
37 | double overk = 1/(double)k;
38 | /* int maxiters = 10000; */
39 | /* int itercur = 0; */
40 |
41 | for(int i = 0; i < k; i++){
42 | p[i] = 0;
43 | }
44 |
45 |
46 | for(int i = 0; i < k; i++){
47 | q[i] = 0;
48 | }
49 |
50 |
51 | for(int i = 0; i < k; i ++){
52 | f[i] = x[i];
53 | }
54 |
55 |
56 |
57 | do{/* break; */
58 | /* itercur = itercur + 1; */
59 | /* reset before each new round */
60 | mean = 0;
61 | tol1 = 0;
62 | tol2 = 0;
63 |
64 | /* copy old iterate */
65 | for(int i = 0; i < k; i++){
66 | y[i] = f[i];
67 | }
68 |
69 |
70 | /* projection 1: box constraints, primal */
71 |
72 | for(int i = 0; i < k; i++) {
73 |
74 | z[i] = f[i] + p[i];
75 |
76 | if(z[i] < l[i]){
77 | z[i] = l[i];
78 | }
79 |
80 | if(z[i] > u[i]){
81 | z[i] = u[i];
82 | }
83 | }
84 |
85 | /* projection 1: box constraints, dual */
86 |
87 | for(int i = 0; i < k; i++) {
88 | p[i] = f[i] + p[i] - z[i];
89 | }
90 |
91 | /* projection 2: sum constraint, primal */
92 |
93 | for(int i = 0; i < k; i++) {
94 | f[i] = z[i] + q[i];
95 | mean = mean + f[i] * overk;
96 | }
97 |
98 |
99 |
100 | for(int i = 0; i < k; i++) {
101 | f[i] = f[i] - mean + overk;
102 | }
103 |
104 | /* projection 2: sum constraint, primal */
105 |
106 | for(int i = 0; i < k; i++) {
107 | q[i] = z[i] + q[i] - f[i];
108 | }
109 |
110 | /* tolerance checking */
111 |
112 | for(int i = 0; i < k; i++) {
113 | delta = dabs(f[i] - y[i]);
114 | if(delta > tol1){
115 | tol1 = delta;
116 | }
117 | }
118 |
119 | for(int i = 0; i < k; i++) {
120 | delta = l[i] - f[i];
121 | if(delta > tol2){
122 | tol2 = delta;
123 | }
124 |
125 | delta = f[i] - u[i];
126 | if(delta > tol2){
127 | tol2 = delta;
128 | }
129 | }
130 |
131 | sum = 0;
132 | for(int i=0; i < k; i++){
133 | sum = sum + f[i];
134 | }
135 |
136 | if(dabs(sum - 1) > tol2){
137 | tol2 = dabs(sum - 1);
138 | }
139 |
140 | if(tol1 < tol2){
141 | tolcur = tol2;
142 | }
143 | else{
144 | tolcur = tol1;
145 | }
146 |
147 | /* printf ("tols: %1.6f \n", tol2); */
148 |
149 | } while(tolcur > tolterm);/* && itercur < maxiters); */
150 | }
151 |
152 |
153 | //[[Rcpp::export]]
154 | NumericMatrix RProjSplxBox(NumericMatrix Xinp, NumericVector linp, NumericVector uinp) {
155 |
156 | Rcpp::NumericMatrix Xi(clone(Xinp));
157 | Rcpp::NumericVector li(clone(linp));
158 | Rcpp::NumericVector ui(clone(uinp));
159 |
160 | /*Get pointer to input data*/
161 |
162 | double* x = Xi.begin();
163 | double* l = li.begin();
164 | double* u = ui.begin();
165 |
166 | //int cols = Xi.ncol();
167 | int rows = Xi.nrow();
168 |
169 | double* f = NULL; /* output */
170 |
171 | /* create the output vector */
172 | Rcpp::NumericMatrix newX((int)rows,1);
173 | f = newX.begin();
174 |
175 | /* int* ix = (int*) malloc(rows * sizeof(int)); */
176 | double* y = (double*) malloc(rows * sizeof(double));
177 | double* z = (double*) malloc(rows * sizeof(double));
178 | double* p = (double*) malloc(rows * sizeof(double));
179 | double* q = (double*) malloc(rows * sizeof(double));
180 |
181 | Proj(f, x, l, u, y, z, p, q, rows);
182 |
183 | /* free(ix); */
184 | free(z);
185 | free(y);
186 | free(p);
187 | free(q);
188 |
189 | return(newX);
190 |
191 | }
192 |
--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 |
4 | #include
5 | #include
6 |
7 | using namespace Rcpp;
8 |
9 | // RHLasso
10 | List RHLasso(NumericMatrix Ginp, NumericMatrix Winp, NumericMatrix Ainp, NumericVector l);
11 | RcppExport SEXP _MeDeCom_RHLasso(SEXP GinpSEXP, SEXP WinpSEXP, SEXP AinpSEXP, SEXP lSEXP) {
12 | BEGIN_RCPP
13 | Rcpp::RObject rcpp_result_gen;
14 | Rcpp::RNGScope rcpp_rngScope_gen;
15 | Rcpp::traits::input_parameter< NumericMatrix >::type Ginp(GinpSEXP);
16 | Rcpp::traits::input_parameter< NumericMatrix >::type Winp(WinpSEXP);
17 | Rcpp::traits::input_parameter< NumericMatrix >::type Ainp(AinpSEXP);
18 | Rcpp::traits::input_parameter< NumericVector >::type l(lSEXP);
19 | rcpp_result_gen = Rcpp::wrap(RHLasso(Ginp, Winp, Ainp, l));
20 | return rcpp_result_gen;
21 | END_RCPP
22 | }
23 | // RQuadHC
24 | List RQuadHC(NumericMatrix Ginp, NumericMatrix Winp, NumericMatrix Ainp, NumericVector otol, NumericVector lconstr, NumericVector uconstr);
25 | RcppExport SEXP _MeDeCom_RQuadHC(SEXP GinpSEXP, SEXP WinpSEXP, SEXP AinpSEXP, SEXP otolSEXP, SEXP lconstrSEXP, SEXP uconstrSEXP) {
26 | BEGIN_RCPP
27 | Rcpp::RObject rcpp_result_gen;
28 | Rcpp::RNGScope rcpp_rngScope_gen;
29 | Rcpp::traits::input_parameter< NumericMatrix >::type Ginp(GinpSEXP);
30 | Rcpp::traits::input_parameter< NumericMatrix >::type Winp(WinpSEXP);
31 | Rcpp::traits::input_parameter< NumericMatrix >::type Ainp(AinpSEXP);
32 | Rcpp::traits::input_parameter< NumericVector >::type otol(otolSEXP);
33 | Rcpp::traits::input_parameter< NumericVector >::type lconstr(lconstrSEXP);
34 | Rcpp::traits::input_parameter< NumericVector >::type uconstr(uconstrSEXP);
35 | rcpp_result_gen = Rcpp::wrap(RQuadHC(Ginp, Winp, Ainp, otol, lconstr, uconstr));
36 | return rcpp_result_gen;
37 | END_RCPP
38 | }
39 | // RProjSplxBox
40 | NumericMatrix RProjSplxBox(NumericMatrix Xinp, NumericVector linp, NumericVector uinp);
41 | RcppExport SEXP _MeDeCom_RProjSplxBox(SEXP XinpSEXP, SEXP linpSEXP, SEXP uinpSEXP) {
42 | BEGIN_RCPP
43 | Rcpp::RObject rcpp_result_gen;
44 | Rcpp::RNGScope rcpp_rngScope_gen;
45 | Rcpp::traits::input_parameter< NumericMatrix >::type Xinp(XinpSEXP);
46 | Rcpp::traits::input_parameter< NumericVector >::type linp(linpSEXP);
47 | Rcpp::traits::input_parameter< NumericVector >::type uinp(uinpSEXP);
48 | rcpp_result_gen = Rcpp::wrap(RProjSplxBox(Xinp, linp, uinp));
49 | return rcpp_result_gen;
50 | END_RCPP
51 | }
52 | // RQuadSimplex
53 | List RQuadSimplex(NumericMatrix Ginp, NumericMatrix Winp, NumericMatrix Ainp, NumericVector ot);
54 | RcppExport SEXP _MeDeCom_RQuadSimplex(SEXP GinpSEXP, SEXP WinpSEXP, SEXP AinpSEXP, SEXP otSEXP) {
55 | BEGIN_RCPP
56 | Rcpp::RObject rcpp_result_gen;
57 | Rcpp::RNGScope rcpp_rngScope_gen;
58 | Rcpp::traits::input_parameter< NumericMatrix >::type Ginp(GinpSEXP);
59 | Rcpp::traits::input_parameter< NumericMatrix >::type Winp(WinpSEXP);
60 | Rcpp::traits::input_parameter< NumericMatrix >::type Ainp(AinpSEXP);
61 | Rcpp::traits::input_parameter< NumericVector >::type ot(otSEXP);
62 | rcpp_result_gen = Rcpp::wrap(RQuadSimplex(Ginp, Winp, Ainp, ot));
63 | return rcpp_result_gen;
64 | END_RCPP
65 | }
66 | // RQuadSimplexBox
67 | List RQuadSimplexBox(NumericMatrix Ginp, NumericMatrix Winp, NumericMatrix Ainp, NumericVector linp, NumericVector uinp, NumericVector ot);
68 | RcppExport SEXP _MeDeCom_RQuadSimplexBox(SEXP GinpSEXP, SEXP WinpSEXP, SEXP AinpSEXP, SEXP linpSEXP, SEXP uinpSEXP, SEXP otSEXP) {
69 | BEGIN_RCPP
70 | Rcpp::RObject rcpp_result_gen;
71 | Rcpp::RNGScope rcpp_rngScope_gen;
72 | Rcpp::traits::input_parameter< NumericMatrix >::type Ginp(GinpSEXP);
73 | Rcpp::traits::input_parameter< NumericMatrix >::type Winp(WinpSEXP);
74 | Rcpp::traits::input_parameter< NumericMatrix >::type Ainp(AinpSEXP);
75 | Rcpp::traits::input_parameter< NumericVector >::type linp(linpSEXP);
76 | Rcpp::traits::input_parameter< NumericVector >::type uinp(uinpSEXP);
77 | Rcpp::traits::input_parameter< NumericVector >::type ot(otSEXP);
78 | rcpp_result_gen = Rcpp::wrap(RQuadSimplexBox(Ginp, Winp, Ainp, linp, uinp, ot));
79 | return rcpp_result_gen;
80 | END_RCPP
81 | }
82 | // cppTAfact
83 | RcppExport SEXP cppTAfact(SEXP mDtSEXP, SEXP mTtinitSEXP, SEXP mAinitSEXP, double lambda, int itersMax, double tol, double tolA, double tolT);
84 | RcppExport SEXP _MeDeCom_cppTAfact(SEXP mDtSEXPSEXP, SEXP mTtinitSEXPSEXP, SEXP mAinitSEXPSEXP, SEXP lambdaSEXP, SEXP itersMaxSEXP, SEXP tolSEXP, SEXP tolASEXP, SEXP tolTSEXP) {
85 | BEGIN_RCPP
86 | Rcpp::RObject rcpp_result_gen;
87 | Rcpp::RNGScope rcpp_rngScope_gen;
88 | Rcpp::traits::input_parameter< SEXP >::type mDtSEXP(mDtSEXPSEXP);
89 | Rcpp::traits::input_parameter< SEXP >::type mTtinitSEXP(mTtinitSEXPSEXP);
90 | Rcpp::traits::input_parameter< SEXP >::type mAinitSEXP(mAinitSEXPSEXP);
91 | Rcpp::traits::input_parameter< double >::type lambda(lambdaSEXP);
92 | Rcpp::traits::input_parameter< int >::type itersMax(itersMaxSEXP);
93 | Rcpp::traits::input_parameter< double >::type tol(tolSEXP);
94 | Rcpp::traits::input_parameter< double >::type tolA(tolASEXP);
95 | Rcpp::traits::input_parameter< double >::type tolT(tolTSEXP);
96 | rcpp_result_gen = Rcpp::wrap(cppTAfact(mDtSEXP, mTtinitSEXP, mAinitSEXP, lambda, itersMax, tol, tolA, tolT));
97 | return rcpp_result_gen;
98 | END_RCPP
99 | }
100 |
101 | static const R_CallMethodDef CallEntries[] = {
102 | {"_MeDeCom_RHLasso", (DL_FUNC) &_MeDeCom_RHLasso, 4},
103 | {"_MeDeCom_RQuadHC", (DL_FUNC) &_MeDeCom_RQuadHC, 6},
104 | {"_MeDeCom_RProjSplxBox", (DL_FUNC) &_MeDeCom_RProjSplxBox, 3},
105 | {"_MeDeCom_RQuadSimplex", (DL_FUNC) &_MeDeCom_RQuadSimplex, 4},
106 | {"_MeDeCom_RQuadSimplexBox", (DL_FUNC) &_MeDeCom_RQuadSimplexBox, 6},
107 | {"_MeDeCom_cppTAfact", (DL_FUNC) &_MeDeCom_cppTAfact, 8},
108 | {NULL, NULL, 0}
109 | };
110 |
111 | RcppExport void R_init_MeDeCom(DllInfo *dll) {
112 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
113 | R_useDynamicSymbols(dll, FALSE);
114 | }
115 |
--------------------------------------------------------------------------------
/src/dynblas.h:
--------------------------------------------------------------------------------
1 | /*
2 | * Defines the prototypes for BLAS Fortran functions.
3 | *
4 | * Also defines a typedef blas_int to be used for all integers passed to BLAS
5 | * functions.
6 | *
7 | * When used in the context of a MATLAB MEX file, you must define MATLAB_MEX_FILE
8 | * and MATLAB_VERSION (for version 7.4, define it to 0x0704).
9 | *
10 | *
11 | * Copyright (C) 2009-2011 Dynare Team
12 | *
13 | * This file is part of Dynare.
14 | *
15 | * Dynare is free software: you can redistribute it and/or modify
16 | * it under the terms of the GNU General Public License as published by
17 | * the Free Software Foundation, either version 3 of the License, or
18 | * (at your option) any later version.
19 | *
20 | * Dynare is distributed in the hope that it will be useful,
21 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
22 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 | * GNU General Public License for more details.
24 | *
25 | * You should have received a copy of the GNU General Public License
26 | * along with Dynare. If not, see .
27 | */
28 |
29 | #ifndef _DYNBLAS_H
30 | #define _DYNBLAS_H
31 |
32 | /* Starting from version 7.8, MATLAB BLAS expects ptrdiff_t arguments for integers */
33 | /*#if defined(MATLAB_MEX_FILE) && MATLAB_VERSION >= 0x0708*/
34 | # ifdef __cplusplus
35 | # include
36 | # else
37 | # include
38 | # endif
39 |
40 | typedef ptrdiff_t blas_int;
41 |
42 | #if defined(MATLAB_MEX_FILE) && defined(_WIN32) && !defined(_MSC_VER)
43 | # define FORTRAN_WRAPPER(x) x
44 | #else
45 | # define FORTRAN_WRAPPER(x) x ## _
46 | #endif
47 |
48 | #ifdef __cplusplus
49 | extern "C" {
50 | #endif
51 |
52 | typedef const char *BLCHAR;
53 | typedef const blas_int *CONST_BLINT;
54 | typedef const double *CONST_BLDOU;
55 | typedef const float *CONST_BLFLT;
56 | typedef double *BLDOU;
57 | typedef float *BLFLT;
58 |
59 | #define dgemm FORTRAN_WRAPPER(dgemm)
60 | void dgemm(BLCHAR transa, BLCHAR transb, CONST_BLINT m, CONST_BLINT n,
61 | CONST_BLINT k, CONST_BLDOU alpha, CONST_BLDOU a, CONST_BLINT lda,
62 | CONST_BLDOU b, CONST_BLINT ldb, CONST_BLDOU beta,
63 | BLDOU c, CONST_BLINT ldc);
64 |
65 | #define sgemm FORTRAN_WRAPPER(sgemm)
66 | void sgemm(BLCHAR transa, BLCHAR transb, CONST_BLINT m, CONST_BLINT n,
67 | CONST_BLINT k, CONST_BLFLT alpha, CONST_BLFLT a, CONST_BLINT lda,
68 | CONST_BLFLT b, CONST_BLINT ldb, CONST_BLFLT beta,
69 | BLFLT c, CONST_BLINT ldc);
70 |
71 | #define dsymm FORTRAN_WRAPPER(dsymm)
72 | void dsymm(BLCHAR side, BLCHAR uplo, CONST_BLINT m, CONST_BLINT n,
73 | CONST_BLDOU alpha, CONST_BLDOU a, CONST_BLINT lda,
74 | CONST_BLDOU b, CONST_BLINT ldb, CONST_BLDOU beta,
75 | BLDOU c, CONST_BLINT ldc);
76 |
77 | #define dgemv FORTRAN_WRAPPER(dgemv)
78 | void dgemv(BLCHAR trans, CONST_BLINT m, CONST_BLINT n, CONST_BLDOU alpha,
79 | CONST_BLDOU a, CONST_BLINT lda, CONST_BLDOU x, CONST_BLINT incx,
80 | CONST_BLDOU beta, BLDOU y, CONST_BLINT incy);
81 |
82 | #define dsymv FORTRAN_WRAPPER(dsymv)
83 | void dsymv(BLCHAR uplo, CONST_BLINT m, CONST_BLDOU alpha, CONST_BLDOU a,
84 | CONST_BLINT lda, CONST_BLDOU b, CONST_BLINT ldb, CONST_BLDOU beta,
85 | BLDOU c, CONST_BLINT ldc);
86 |
87 | #define dtrsv FORTRAN_WRAPPER(dtrsv)
88 | void dtrsv(BLCHAR uplo, BLCHAR trans, BLCHAR diag, CONST_BLINT n,
89 | CONST_BLDOU a, CONST_BLINT lda, BLDOU x, CONST_BLINT incx);
90 |
91 | #define dtrmv FORTRAN_WRAPPER(dtrmv)
92 | void dtrmv(BLCHAR uplo, BLCHAR trans, BLCHAR diag, CONST_BLINT n,
93 | CONST_BLDOU a, CONST_BLINT lda, BLDOU x, CONST_BLINT incx);
94 |
95 | #define daxpy FORTRAN_WRAPPER(daxpy)
96 | void daxpy(CONST_BLINT n, CONST_BLDOU a, CONST_BLDOU x, CONST_BLINT incx,
97 | BLDOU y, CONST_BLINT incy);
98 |
99 | #define saxpy FORTRAN_WRAPPER(saxpy)
100 | void saxpy(CONST_BLINT n, CONST_BLFLT a, CONST_BLFLT x, CONST_BLINT incx,
101 | BLFLT y, CONST_BLINT incy);
102 |
103 | #define dcopy FORTRAN_WRAPPER(dcopy)
104 | void dcopy(CONST_BLINT n, CONST_BLDOU x, CONST_BLINT incx,
105 | BLDOU y, CONST_BLINT incy);
106 |
107 | #define zaxpy FORTRAN_WRAPPER(zaxpy)
108 | void zaxpy(CONST_BLINT n, CONST_BLDOU a, CONST_BLDOU x, CONST_BLINT incx,
109 | BLDOU y, CONST_BLINT incy);
110 |
111 | #define dscal FORTRAN_WRAPPER(dscal)
112 | void dscal(CONST_BLINT n, CONST_BLDOU a, BLDOU x, CONST_BLINT incx);
113 |
114 | #define sscal FORTRAN_WRAPPER(sscal)
115 | void sscal(CONST_BLINT n, CONST_BLDOU a, BLFLT x, CONST_BLINT incx);
116 |
117 | #define dtrsm FORTRAN_WRAPPER(dtrsm)
118 | void dtrsm(BLCHAR side, BLCHAR uplo, BLCHAR transa, BLCHAR diag, CONST_BLINT m,
119 | CONST_BLINT n, CONST_BLDOU alpha, CONST_BLDOU a, CONST_BLINT lda,
120 | BLDOU b, CONST_BLINT ldb);
121 |
122 | #define ddot FORTRAN_WRAPPER(ddot)
123 | double ddot(CONST_BLINT n, CONST_BLDOU x, CONST_BLINT incx, CONST_BLDOU y,
124 | CONST_BLINT incy);
125 |
126 | #define dsyr FORTRAN_WRAPPER(dsyr)
127 | void dsyr(BLCHAR uplo, CONST_BLINT n, CONST_BLDOU alpha, CONST_BLDOU x,
128 | CONST_BLINT incx, BLDOU a, CONST_BLINT lda);
129 |
130 | #define dtrmm FORTRAN_WRAPPER(dtrmm)
131 | void dtrmm(BLCHAR side, BLCHAR uplo, BLCHAR transa, BLCHAR diag, CONST_BLINT m,
132 | CONST_BLINT n, CONST_BLDOU alpha, CONST_BLDOU a, CONST_BLINT lda,
133 | BLDOU b, CONST_BLINT ldb);
134 |
135 | #define strmm FORTRAN_WRAPPER(strmm)
136 | void strmm(BLCHAR side, BLCHAR uplo, BLCHAR transa, BLCHAR diag, CONST_BLINT m,
137 | CONST_BLINT n, CONST_BLFLT alpha, CONST_BLFLT a, CONST_BLINT lda,
138 | BLFLT b, CONST_BLINT ldb);
139 |
140 | #define dasum FORTRAN_WRAPPER(dasum)
141 | double dasum(CONST_BLINT n, BLDOU dx, CONST_BLINT incx);
142 |
143 |
144 |
145 | #ifdef __cplusplus
146 | } /* extern "C" */
147 | #endif
148 |
149 | #endif /* _DYNBLAS_H */
150 |
--------------------------------------------------------------------------------
/tests/runTests.R:
--------------------------------------------------------------------------------
1 | BiocGenerics:::testPackage("MeDeCom")
--------------------------------------------------------------------------------
/vignettes/MeDeCom.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "MeDeCom: Methylome Decomposition via Constrained Matrix Factorization"
3 | author: "Pavlo Lutsik, Martin Slawski, Gilles Gasparoni, Nikita Vedeneev, Matthias Hein and Joern Walter"
4 | date: "`r Sys.Date()`"
5 | output:
6 | rmarkdown::html_document:
7 | mathjax: default
8 | toc: true
9 | number_sections: false
10 | fig_width: 5
11 | fig_height: 5
12 | vignette: >
13 | %\VignetteIndexEntry{MeDeCom}
14 | %\VignetteEngine{knitr::rmarkdown}
15 | \usepackage[utf8]{inputenc}
16 | ---
17 |
18 | # Introduction
19 |
20 | *MeDeCom* is an R-package for reference-free decomposition of heterogeneous DNA methylation profiles.
21 | It uses matrix factorization enhanced by constraints and a specially tailored regularization.
22 | *MeDeCom* represents an input $m\times n$ data matrix ($m$ CpGs measured in $n$ samples) as a product of two other matrices.
23 | The first matrix has $m$ rows, just as the input data, but the number of columns is equal to $k$.
24 | The columns of this matrix can be interpreted as methylomes of the $k$ unknown
25 | cell populations underlying the samples and will be referred to as **latent methylation components** or **LMCs**.
26 | The second matrix has $k$ rows and $n$ columns, and can be interpreted as a matrix of relative contributions (mixing proportions)
27 | of each LMC to each sample.
28 |
29 | *MeDeCom* starts with a set of related DNA methylation profiles, e.g. a series of Infinium microarray measurements
30 | from a population cohort, or several bisulfite sequencing-based methylomes. The key requirement is that
31 | the input data represents absolute DNA methylation measurements in a population of cells
32 | and contains values between 0 and 1. *MeDeCom* implements an alternating scheme which iteratively
33 | updates randomly initialized factor matrices until convergence or until the maximum number of iterations
34 | has been reached. This is repeated for multiple random initializations and the best solution is returned.
35 |
36 | MeDeCom features two tunable parameters. The first one is the number of LMCs $k$, an approximate choice for which
37 | should be known from prior information. To enforce the distribution properties of a methylation profile
38 | upon LMCs *MeDeCom* uses a special for of regularization controlled by the parameter $\lambda$.
39 | A typical *MeDeCom* experiment includes testing a grid of values for $k$ and $\lambda$. For each combination
40 | of parameter values *MeDeCom* estimates a cross-validation error. The latter helps select the optimal number
41 | of LMCs and the strength of regularization.
42 |
43 | # Installation
44 |
45 | *MeDeCom* can be installed directly from github using the package `devtools`:
46 |
47 | ```{r, eval=FALSE}
48 | devtools::install_github("lutsik/MeDeCom")
49 | ```
50 |
51 | The master branch of the GitHub repository only compiles on *nix-like* platforms with a C++11-compatible compiler are supported.
52 | Users on Mac OS might run into compilation errors as OpenMP, which is required for compilation, is disabled by default. To enable OpenMP, the following tutorial can be followed [https://mac.r-project.org/openmp/](https://mac.r-project.org/openmp/)
53 | We created a separate branch for installation of *MeDeCom* on Windows machines. However, parallel processing options are currently
54 | not supported for Windows due to some incompatabilites of the R/Windows connection. Thus, executing MeDeCom on a Windows machine
55 | takes sustantially longer. Additionally, we creared a Docker image with MeDeCom, which can be used in case of further installation
56 | issues [https://hub.docker.com/r/mscherer/medecom](https://hub.docker.com/r/mscherer/medecom).
57 |
58 | ```{r, eval=FALSE}
59 | devtools::install_github("lutsik/MeDeCom",ref="windows")
60 | ```
61 |
62 | MeDeCom uses stack model for memory to accelerate factorization for smaller ranks.
63 | This requires certain preparation during compilation, therefore, please, note the extended
64 | compilation time (15 to 20 minutes).
65 |
66 | # Data preparation
67 |
68 | *MeDeCom* accepts DNA methylation data in several forms. Preferably the user may load and preprocess the data
69 | using a general-purpose DNA methylation analysis package [RnBeads](http://rnbeads.mpi-inf.mpg.de). A resulting RnBSet object
70 | can be directly supplied to *MeDeCom*. Alternatively, *MeDeCom* runs on any matrix of type `numeric` with valid methylation values.
71 |
72 | *MeDeCom* comes with a small example data set obtained by mixing reference profiles of blood cell methylomes *in silico*.
73 | The example data set can be loaded in a usual way:
74 |
75 | ```{r}
76 | ## load the package
77 | suppressPackageStartupMessages(library(MeDeCom))
78 | ## load the example data sets
79 | data(example.dataset, package="MeDeCom")
80 | ## you should get objects D, Tref and Aref
81 | ## in your global R environment
82 | ls()
83 | ```
84 |
85 | Loaded numeric matrix `D` contains 100 *in silico* mixtures and serves as an example input. Columns of matrix `Tref` contains the methylomes
86 | of 5 blood cell types used to generate the mixtures, while matrix `Aref` provides the mixing proportions.
87 | ```{r}
88 | ## matrix D has dimension 10000x100
89 | str(D)
90 | ## matrix Tref has dimension 10000x5
91 | str(Tref)
92 | ## matrix Aref has dimension 5x100
93 | str(Aref)
94 | ```
95 | # Performing a methylome decomposition experiment
96 |
97 | *MeDeCom* can be run directly on matrix `D`.
98 |
99 | It is crucial to select the values of parameters $k$ and $\lambda$ to test.
100 | A choice of $k$ is often dictated by prior knowledge about the methylomes.
101 | Precise value of lambda has to be selected for each data set independently. A good start
102 | is a logarithmic grid of lambda values. It is important to include $\lambda=0$ into the
103 | grid, as this particular case the regularization is effectively absent making *MeDeCom* similar
104 | to other NMF-based deconvolution algorithms.
105 |
106 | ```
107 | medecom.result<-runMeDeCom(D, Ks=2:10, lambdas=c(0,10^(-5:-1)))
108 | ```
109 |
110 | *MeDeCom* is based upon an alternating optimization heuristic and requires a lot
111 | of computation. The processing of the data matrix can take several hours.
112 | One can speed up the run by decreasing the number of cross-validation folds and random initializations, and
113 | increasing the number of computational cores.
114 |
115 | ```{r, eval=FALSE}
116 | medecom.result<-runMeDeCom(D, 2:10, c(0,10^(-5:-1)), NINIT=10, NFOLDS=10, ITERMAX=300, NCORES=9)
117 | ```
118 |
119 | ```{r, echo=FALSE}
120 | cat("
121 | [Main:] checking inputs
122 | [Main:] preparing data
123 | [Main:] preparing jobs
124 | [Main:] 3114 factorization runs in total
125 | [Main:] runs 2755 to 2788 complete
126 | [Main:] runs 2789 to 2822 complete
127 | [Main:] runs 2823 to 2856 complete
128 | [Main:] runs 2857 to 2890 complete
129 | ......
130 | [Main:] finished all jobs. Creating the object
131 | ")
132 | data(example.MeDeComSet)
133 | ```
134 |
135 | This can, however, lead to decomposition slightly different from the one presented given below.
136 |
137 | The results of a decomposition experiment are saved to an object of class `MeDeComSet`.
138 | The contents of an object can be conveniently displayed using the `print` functionality.
139 |
140 | ```{r}
141 | medecom.result
142 | ```
143 |
144 | # Exploring the decomposition results
145 |
146 | ## Parameter selection
147 |
148 | The first key step is parameter selection. It is important to carefully explore the obtained results and make a decision about
149 | the most feasible parameter values, or about extending the parameter value grids to be tested in refinement experiments.
150 |
151 | *MeDeCom* provides a **cross-validation error** (CVE) for each tested parameter combination.
152 |
153 | ```{r, fig.width=7}
154 | plotParameters(medecom.result)
155 | ```
156 |
157 | A lineplot helping to select parameter $\lambda$ can be produced by specifying a fixed value for $k$:
158 |
159 | ```{r, fig.width=5.5, fig.height=6}
160 | plotParameters(medecom.result, K=5, lambdaScale="log")
161 | ```
162 |
163 | Cross-validation error has a minimum at $\lambda=10^{-2}$ so this value is preferred.
164 |
165 | ## Latent methylation components (LMCs)
166 |
167 | A matrix of LMCs can be extracted using `getLMCs`:
168 |
169 | ```{r}
170 | lmcs<-getLMCs(medecom.result, K=5, lambda=0.01)
171 | str(lmcs)
172 | ```
173 |
174 | LMCs can be seen as measured methylation profiles of purified cell populations.
175 | *MeDeCom* provides for several visualization methods for LMCs using the function `plotLMCs`
176 | which operates directly on `MeDeComSet` objects.
177 |
178 | ### Clustering
179 |
180 | For instance, standard hierarchical clustering can be visualized using:
181 | ```{r}
182 | plotLMCs(medecom.result, K=5, lambda=0.01, type="dendrogram")
183 | ```
184 |
185 | A two-dimensional embedding with MDS is also obtainable:
186 |
187 | ```{r}
188 | plotLMCs(medecom.result, K=5, lambda=0.01, type="MDS")
189 | ```
190 |
191 | Input data can be included into the MDS plot to enhance the interpretation.
192 |
193 | ```{r}
194 | plotLMCs(medecom.result, K=5, lambda=0.01, type="MDS", D=D)
195 | ```
196 |
197 | ### Matching LMCs to reference profiles
198 |
199 | In many cases reference methylomes exists, which are relevant for the data set in question.
200 | For our example analysis matrix `Tref` contains the reference type
201 | profiles which were *in silico* mixed. *MeDeCom* offers several ways to visualize the
202 | resulting LMCs together with the reference methylation profiles. The reference methylomes
203 | can be included into a joint clustering analysis:
204 |
205 | ```{r}
206 | plotLMCs(medecom.result, K=5, lambda=0.01, type="dendrogram", Tref=Tref, center=TRUE)
207 | ```
208 |
209 | Furthermore, a similarity matrix of LMCs vs reference profiles can be visualized as a heatmap.
210 |
211 | ```{r}
212 | plotLMCs(medecom.result, K=5, lambda=0.01, type="heatmap", Tref=Tref)
213 | ```
214 |
215 | Correlation coefficient values and asterisks aid the interpretation.
216 | The values are displayed in the cells which contain maximal values column-wise.
217 | The asterisks mark cells which have the highest correlation value in the respective rows.
218 | Thus, a value with asterisk corresponds to a mutual match, i.e. LMC unambiguously
219 | matching a reference profile.
220 |
221 | In this example analysis each LMC uniquely matches one of the reference
222 | profiles. The matching of
223 |
224 | Function `matchLMCs` offers several methods for
225 | matching LMCs to reference profiles.
226 |
227 | ```{r}
228 | perm<-matchLMCs(lmcs, Tref)
229 | ```
230 |
231 | ### LMC enrichment analysis
232 |
233 | MeDeCom provides functions to perform enrichment analysis on the sites that are particularly hypo-/hypermethylated in an LMC. These sites can then be used for GO and LOLA enrichment analysis. Importantly, genomic annotations of the LMC sites is required to be specified. We thus recommend to use the [DecompPipeline](https://github.com/CompEpigen/DecompPipeline) package for processing, but the annotation can also be specified manually using a ```data.frame``` that looks as follows:
234 |
235 | ```{r,CpG_annotation,eval=F}
236 | Chromosome Start End Strand CpG GC CGI Relation SNPs
237 | 30365 chr1 1036375 1036376 + 2 59 Shelf
238 | 42681 chr1 1184537 1184538 + 2 58 Open Sea
239 | 45091 chr1 1218625 1218626 + 5 66 Island
240 | 51615 chr1 1292773 1292774 + 3 64 Island
241 | 52001 chr1 1295504 1295505 + 9 65 Shore
242 | 52003 chr1 1295507 1295508 + 9 65 Shore
243 | ```
244 |
245 | The required columns are `Chromosome`, `Start`, `End`, and `Strand`. Using this ```data.frame``` (called `df` in the following), enrichment analysis can be performed using:
246 |
247 | ```{r enrichment, eval=F}
248 | lmc.lola.enrichment(medecom.result,anno.data=df,K=5,lambda=0.001,diff.threshold = 0.5, region.type = "tiling")
249 | ```
250 |
251 | Please note that `df` needs to have the same number of rows than the methylation matrix used as input to MeDeCom. CpGs are first aggregated over the `region.type` specified, then the regions are selected that have a difference larger than `diff.threshold`. The list of available region types is published here [https://rnbeads.org/regions.html](https://rnbeads.org/regions.htm).
252 |
253 | ## Mixing proportions
254 |
255 | A matrix of mixing proportions is obtained using `getProportions`:
256 |
257 | ```{r}
258 | prop<-getProportions(medecom.result, K=5, lambda=0.001)
259 | str(prop)
260 | ```
261 |
262 | ### Visualization of the complete proportion matrix
263 |
264 | A complete matrix of propotions can be visualized as a stacked barplot:
265 | ```{r}
266 | plotProportions(medecom.result, K=5, lambda=0.01, type="barplot")
267 | ```
268 |
269 | or a heatmap:
270 |
271 | ```{r, fig.width=8, fig.height=6}
272 | plotProportions(medecom.result, K=5, lambda=0.01, type="heatmap")
273 | ```
274 |
275 | The heatmap can be enhanced by clustering the columns:
276 |
277 | ```{r, fig.width=8, fig.height=6}
278 | plotProportions(medecom.result, K=5, lambda=0.01, type="heatmap", heatmap.clusterCols=TRUE)
279 | ```
280 |
281 | or adding color code for the samples:
282 |
283 | ```{r, fig.width=8, fig.height=6}
284 | sample.group<-c("Case", "Control")[1+sample.int(ncol(D))%%2]
285 | plotProportions(medecom.result, K=5, lambda=0.01, type="heatmap", sample.characteristic=sample.group)
286 | ```
287 |
288 | ### Visualization of selected LMC proportions
289 |
290 | ```{r, echo=FALSE}
291 | rownames(Aref)<-colnames(Tref)
292 | ```
293 |
294 | ```{r}
295 | plotProportions(medecom.result, K=5, lambda=0.01, type="lineplot", lmc=2, Aref=Aref, ref.profile=2)
296 | ```
297 |
298 | # Advanced usage
299 |
300 | ## Running *MeDeCom* on a compute cluster
301 |
302 | *MeDeCom* experiments require a lot of computational time. On the other hand most of the factorization runs are
303 | independent and, therefore, can be run in parallel. Thus, a significant speedup can be achieved when running *MeDeCom*
304 | in an HPC environment. *MeDeCom* can be easily adapted to most of the popular schedulers. There are, however, several prerequisites:
305 |
306 | * the scheduler provides the standard utilities `qsub` for the submission of the cluster jobs and `qstat` for obtaining the job statistics;
307 | * the cluster does not have a low limit on the number of submitted jobs;
308 | * the R installation (location of the R binary and the package library) is consistent across the execution nodes.
309 |
310 | The example below
311 | is for the cluster operated by *Son of Grid Engine* (SoGE). To be able to run on a SoGE cluster *MeDeCom* needs to know:
312 |
313 | * location of the R executable (directory);
314 | * an operating memory limit per each factorization job;
315 | * a pattern for the names of cluster nodes to run the jobs on.
316 |
317 | These settings should be stored in a `list` object:
318 | ```{r}
319 | sge.setup<-list(
320 | R_bin_dir="/usr/bin",
321 | host_pattern="*",
322 | mem_limit="5G"
323 | )
324 | ```
325 | This object should be supplied to *MeDeCom* as the argument `cluster.settings`. It is also important to specify a valid temporary
326 | directory, which is available to all execution nodes.
327 | ```{r, eval=FALSE}
328 | medecom.result<-runMeDeCom(D, Ks=2:10, lambdas=c(0,10^(-5:-1)), N_COMP_LAMBDA=1, NFOLDS=5, NINIT=10,
329 | temp.dir="/cluster_fs/medecom_temp",
330 | cluster.settings=sge.setup)
331 | ```
332 | *MeDeCom* will start the jobs and will periodically monitor the number of remaining ones.
333 | ```{r, echo=FALSE}
334 | cat("
335 | [Main:] checking inputs
336 | [Main:] preparing data
337 | [Main:] preparing jobs
338 | [Main:] 3114 factorization runs in total
339 | [Main:] 3114 jobs remaining
340 | ....
341 | [Main:] finished all jobs. Creating the object
342 | ")
343 | ```
344 |
345 | # R session
346 | Here is the output of `sessionInfo()` on the system on which this document was compiled:
347 | ```{r, echo=FALSE}
348 | sessionInfo()
349 | ```
350 |
351 |
352 |
--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-10-1.png
--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-11-1.png
--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-12-1.png
--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-13-1.png
--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-14-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-14-1.png
--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-17-1.png
--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-18-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-18-1.png
--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-19-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-19-1.png
--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-20-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-20-1.png
--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-22-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-22-1.png
--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-7-1.png
--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-8-1.png
--------------------------------------------------------------------------------