├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── NAMESPACE
├── NEWS
├── R
    ├── LMC_interpretation.R
    ├── MeDeCom-package.R
    ├── MeDeComSet-class.R
    ├── RcppExports.R
    ├── allGlobals.R
    ├── contribution_interpretation.R
    ├── factorizations.R
    ├── frontend.R
    ├── gapStatisitics.R
    ├── markerSelection.R
    ├── matching.R
    ├── plotting.R
    ├── reference.R
    ├── simulation.R
    └── utilities.R
├── README
├── README.md
├── data
    ├── datalist
    ├── example.MeDeComSet.RData
    ├── example.cg.annotation.RData
    ├── example.dataset.RData
    └── small.example.RnBSet.RData
├── exec
    └── cluster.script.sge.R
├── inst
    └── unitTests
    │   └── test_general.R
├── man
    ├── AVAIL.REFS.Rd
    ├── MeDeCom-package.Rd
    ├── MeDeComSet-class.Rd
    ├── as.MeDeComSet.Rd
    ├── cluster.refbased.Rd
    ├── example.MeDeComSet.Rd
    ├── example.cg.annotation.Rd
    ├── example.dataset.Rd
    ├── factorize.alternate.Rd
    ├── factorize.regr.Rd
    ├── getLMCs-methods.Rd
    ├── getProportions-methods.Rd
    ├── getStatistics-methods.Rd
    ├── greedymatch.Rd
    ├── lmc.annotation.enrichment.Rd
    ├── lmc.annotation.plots.tables.Rd
    ├── lmc.go.enrichment.Rd
    ├── lmc.go.plots.tables.Rd
    ├── lmc.lola.enrichment.Rd
    ├── lmc.lola.plots.tables.Rd
    ├── load.lola.for.medecom.Rd
    ├── locus_plot.Rd
    ├── matchLMCs.Rd
    ├── plotLMC.reference.Rd
    ├── plotLMCs.Rd
    ├── plotParameters.Rd
    ├── plotProportions.Rd
    ├── run.refbased.Rd
    ├── run.trait.association.Rd
    ├── run.trait.association.single.Rd
    └── runMeDeCom.Rd
├── src
    ├── HCLasso.cpp
    ├── Makevars
    ├── Makevars.win
    ├── QuadHC.cpp
    ├── RProjSplxBox.cpp
    ├── RQuadSimplex.cpp
    ├── RQuadSimplexBox.cpp
    ├── RcppExports.cpp
    ├── cppTAfact.cpp
    └── dynblas.h
├── tests
    └── runTests.R
└── vignettes
    ├── MeDeCom.Rmd
    ├── MeDeCom.html
    ├── MeDeCom.md
    ├── MeDeCom_files
        └── MathJax.js
    └── figure
        ├── unnamed-chunk-10-1.png
        ├── unnamed-chunk-11-1.png
        ├── unnamed-chunk-12-1.png
        ├── unnamed-chunk-13-1.png
        ├── unnamed-chunk-14-1.png
        ├── unnamed-chunk-17-1.png
        ├── unnamed-chunk-18-1.png
        ├── unnamed-chunk-19-1.png
        ├── unnamed-chunk-20-1.png
        ├── unnamed-chunk-22-1.png
        ├── unnamed-chunk-7-1.png
        └── unnamed-chunk-8-1.png


/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | src/*.o
 6 | src/*.so
 7 | src/*.dll
 8 | *.Rproj
 9 | .Rbuildignore
10 | .gitignore
11 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: R
2 | sudo: false
3 | cache: packages
4 | r: bioc-release
5 | warnings_are_errors: false
6 | #before_install: 
7 | #  - R -e "install.packages('knitr', 'rmarkdown'); remotes::install_deps(dependencies = TRUE)"
8 | #install: travis_wait 30 R CMD build
9 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: MeDeCom
 2 | Title: Decomposition of heterogeneous methylomes
 3 | Description: Decomposition of heterogeneous methylomes with non-negative matrix factorization.
 4 | Maintainer: Pavlo Lutsik <p.lutsik@mx.uni-saarland.de>
 5 | Authors@R: c(
 6 |    person ( "Pavlo", "Lutsik", email="p.lutsik@dkfz.de", role = c("cre")),
 7 |    person ( "Martin", "Slawski", email="ms@cs.uni-saarland.de", role = c("aut")),
 8 |    person ( "Nik", "Vedeneev", email="nikitavedeneev@gmail.com", role = c("aut")),
 9 |    person ( "Gilles", "Gasparoni", email="gillesgasparoni@gmail.com", role = c("aut")),
10 |    person ( "Michael", "Scherer", email="mscherer@mpi-inf.mpg.de", role = c("aut")),
11 |    person ( "Matthias", "Hein", email="hein@cs.uni-saarland.de", role = c("aut")),
12 |    person ( "Joern", "Walter", email="j.walter@mx.uni-saarland.de", role = c("aut"))
13 |  )
14 | Date: 2020-05-13
15 | Version: 1.0.1
16 | Depends:
17 |     R (>= 3.2.0),
18 |     Rcpp,
19 |     pracma,
20 |     gtools,
21 |     gplots,
22 |     parallel,
23 |     RUnit,
24 |     RnBeads
25 | Suggests:
26 |     BiocStyle,
27 |     knitr,
28 |     rmarkdown,
29 |     LOLA,
30 |     GOstats,
31 |     qvalue,
32 |     simpleCache,
33 |     igraph
34 | Imports:
35 |     RcppEigen (>= 0.3)
36 | LinkingTo:
37 |     Rcpp, 
38 |     RcppEigen
39 | VignetteBuilder: knitr
40 | License: GPL-3
41 | RoxygenNote: 7.1.0
42 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(MeDeComSet)
 4 | export(as.MeDeComSet)
 5 | export(cluster.refbased)
 6 | export(factorize.alternate)
 7 | export(factorize.regr)
 8 | export(greedymatch)
 9 | export(lmc.annotation.enrichment)
10 | export(lmc.annotation.plots.tables)
11 | export(lmc.go.enrichment)
12 | export(lmc.go.plots.tables)
13 | export(lmc.lola.enrichment)
14 | export(lmc.lola.plots.tables)
15 | export(load.lola.for.medecom)
16 | export(matchLMCs)
17 | export(plotLMC.reference)
18 | export(plotLMCs)
19 | export(plotParameters)
20 | export(plotProportions)
21 | export(run.refbased)
22 | export(run.trait.association)
23 | export(run.trait.association.single)
24 | export(runMeDeCom)
25 | exportClasses(MeDeComSet)
26 | exportMethods(getLMCs)
27 | exportMethods(getProportions)
28 | exportMethods(getStatistics)
29 | useDynLib(MeDeCom)
30 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
 1 | MeDeCom 1.0.1
 2 | =============
 3 | 
 4 |  * Implemented functionality for genomic annotation enrichment
 5 | 
 6 | MeDeCom 1.0.0
 7 | =============
 8 | 
 9 |  * Bumped version 1.0.0
10 | 
11 | MeDeCom 0.3.3
12 | =============
13 | 
14 |  * Fixed bug in case that NFOLDS=1
15 |  * Further smaller bugfixes and documentation updates
16 |  
17 | MeDeCom 0.3
18 | =============
19 | 
20 |  * New functionality for gene-centric and interval-based functional annotation of LMCs as well 
21 |  as inference on LMC proportions (LOLA, GO-stats, association of proportions with phenotypic traits)
22 |  * MeDeCom now fully integrates with RnBeads, DecompPipeline and FactorViz
23 |  * New functions for exploring LMC distributions
24 |  * Multiple bugfixes
25 | 
26 | MeDeCom 0.2.3
27 | =============
28 | 
29 |  * Added conversion of RefFreeCellMix (https://cran.r-project.org/web/packages/RefFreeEWAS/index.html) to MeDeComSet
30 |  
31 | MeDeCom 0.2
32 | =============
33 | 
34 |  * Added improved optimization routines (cppTAfact by Nik Vedeneev) which are now used by default
35 |  * Improved R-side parallelization in the single machine mode
36 |  * Multiple minor fixes and improvements
37 | 
38 | MeDeCom 0.1
39 | =============
40 | 
41 |  * Initial release
42 | 


--------------------------------------------------------------------------------
/R/MeDeCom-package.R:
--------------------------------------------------------------------------------
 1 | #' MeDeCom: Methylome DeComposition using regularized constrained matrix factorization
 2 | #'
 3 | #' MeDeCom is an R-package that discovers and quantifies latent components
 4 | #' in the DNA methylomes of heterogeneous samples
 5 | #' 
 6 | #' @references TBA
 7 | #' @docType package  
 8 | #' @name MeDeCom-package
 9 | #' @useDynLib MeDeCom
10 | NULL
11 | 
12 | #' Example CG annotation
13 | #' 
14 | #' An data frame containing CpGs with their corresponding annotation in the genome to be used for the analysis. You can provide a similar
15 | #' \code{data.frame} for your own analysis.
16 | #' 
17 | #' @docType data
18 | #' @keywords datasets
19 | #' @name example.cg.annotation
20 | #' @format \code{cg.ann} is a \code{data.frame} containing identifiers and postitions of CpG sites to be analyzed
21 | #' @author Michael Scherer
22 | NULL
23 | 
24 | #' Example dataset
25 | #' 
26 | #' Contains a data set of methylation values as a \code{matrix} with 10,000 rows (CpGs) and 100 columns (samples), a reference of
27 | #' contributions of 5 cell types in the samples, and the reference methylomes of the samples as a matrix with 10,000 rows and 5
28 | #' columns.
29 | #'  
30 | #' \itemize{
31 | #'           \item D, a \code{matrix} with 10,000 rows (CpGs) and 100 columns (samples) representing a potential input methylation
32 | #'           matrix
33 | #'           \item Aref, a reference of contributions of 5 cell types in the samples
34 | #'           \item Tref, reference methylomes of the samples as a matrix with 10,000 rows and 5 columns
35 | #' }
36 | #' 
37 | #' @docType data
38 | #' @keywords datasets
39 | #' @name example.dataset
40 | #' @usage data(example.dataset)
41 | #' @author Michael Scherer
42 | NULL
43 | 
44 | #' Example result object (MeDeComSet)
45 | #' 
46 | #' An example result of applying \pkg{MeDeCom} for a deconvolution experiment.
47 | #'  
48 | #' @docType data
49 | #' @keywords datasets
50 | #' @name example.MeDeComSet
51 | #' @format \code{MeDeComSet}, the result of a deconvolution experiment and input to any follow-up analysis or to retrieve latent 
52 | #' methylation components (LMCs)
53 | #' @author Michael Scherer
54 | NULL


--------------------------------------------------------------------------------
/R/MeDeComSet-class.R:
--------------------------------------------------------------------------------
  1 | #' MeDeComSet Class
  2 | #'
  3 | #' Stores the result of a methylation deconvolution experiment
  4 | #'
  5 | #' @section Slots:
  6 | #' \describe{
  7 | #'   \item{\code{dataset_info}}{\code{list} with information about the input data set.}
  8 | #'   \item{\code{parameters}}{\code{list} containing parameters of the deconvolution experiment.}
  9 | #'   \item{\code{outputs}}{\code{list} of deconvolution products for each combination of MeDeCom parameters.}
 10 | #' }
 11 | #' @section Methods and functions:
 12 | #' \describe{
 13 | #'   \item{\code{\link[=getStatistics,MeDeComSet-method]{getStatistics}}}{Returns the value of one goodness-of-fit statistics for a given parameter combination.}
 14 | #'   \item{\code{\link[=getLMCs,MeDeComSet-method]{getLMCs}}}{Returns a matrix of LMCs for a given parameter combination.}
 15 | #'   \item{\code{\link[=getProportions,MeDeComSet-method]{getProportions}}}{Returns a matrix of mixing proportions for a given parameter combination.}
 16 | #'   \item{\code{\link{plotParameters}}}{Create a parameter selection plot.}
 17 | #'   \item{\code{\link{plotLMCs}}}{Visualize the LMCs.}
 18 | #'   \item{\code{\link{plotProportions}}}{Visualize the mixing proportions.}
 19 | #' }
 20 | #' 
 21 | #'
 22 | #' @name MeDeComSet-class
 23 | #' @rdname MeDeComSet-class
 24 | #' @author Pavlo Lutsik
 25 | #' @exportClass MeDeComSet
 26 | setClass("MeDeComSet",
 27 | 		representation(
 28 | 				dataset_info="list",
 29 | 				parameters="list",
 30 | 				outputs="list"
 31 | 		),
 32 | 		prototype(
 33 | 				dataset_info=list(),
 34 | 				parameters=list(),
 35 | 				outputs=list()				
 36 | 		),
 37 | 		package = "MeDeCom")
 38 | ########################################################################################################################
 39 | #setMethod("initialize", "MeDeComSet",
 40 | #		function(.Object,
 41 | #				outputs=list(),
 42 | #				parameters=list()
 43 | #		) {
 44 | #			
 45 | #			.Object@outputs<-outputs
 46 | #			.Object@parameters<-parameters
 47 | #			.Object
 48 | #})
 49 | ########################################################################################################################
 50 | #' MeDeComSet
 51 | #' 
 52 | #' Wrapper function MeDeComSet
 53 | #'
 54 | #'
 55 | #' @param parameters       	\code{list} of MeDeCom parameters with elements \code{K}, integer vector of k values, \code{lambdas}, numeric vector of lambda values.
 56 | #' @param outputs			\code{list} of MeDeCom resutls with one element per each used CpG subset.
 57 | #' @param dataset_info   \code{list} with information about the input data set.
 58 | #' 
 59 | #' @return an object of class MeDeComSet
 60 | #' 
 61 | #' @name MeDeComSet
 62 | #' @rdname MeDeComSet-class
 63 | #' @aliases initialize,MeDeComSet-method
 64 | #' @export
 65 | MeDeComSet<-function(
 66 | 		parameters,
 67 | 		outputs,
 68 | 		dataset_info=list()){
 69 | 	
 70 | 	object<-new("MeDeComSet",
 71 | 			dataset_info = dataset_info,
 72 | 			parameters = parameters,
 73 | 			outputs = outputs)
 74 | 	object
 75 | }
 76 | ########################################################################################################################
 77 | if(!isGeneric("getStatistics")) setGeneric("getStatistics",
 78 | 			function(object, ...) standardGeneric("getStatistics"))
 79 | 
 80 | #' getStatistics-methods
 81 | #'
 82 | #' Methylation sites object information for which is present in the \code{RnBSet} object.
 83 | #'
 84 | #' @param object   object returned by \link{runMeDeCom}
 85 | #' @param Ks			   numbers of LMCs
 86 | #' @param lambdas	   regularlization parameters
 87 | #' @param cg_subset	   used CpG subset, defaults to the full data set
 88 | #' @param statistic   \code{character} of length 1 specifying goodness of fit statistics
 89 | #' 
 90 | #' @details 
 91 | #' Currently the following values for \code{statistics} can be supplied: \code{objective}, \code{RMSE}, \code{CVE}.
 92 | #'
 93 | #' @return A numeric \code{matrix} or \code{vector} with the requested statistics
 94 | #'
 95 | #' @rdname getStatistics-methods
 96 | #' @docType methods
 97 | #' @aliases getStatistics
 98 | #' @aliases getStatistics,MeDeComSet-method
 99 | #' @export
100 | #' @examples
101 | #' \donttest{
102 | #' data(example.data)
103 | #' getStatistics(example_MeDeComSet, K=2, lambda=0.001)
104 | #' }
105 | setMethod("getStatistics", signature(object="MeDeComSet"),
106 | 		function(object, Ks=object@parameters$Ks, lambdas=object@parameters$lambdas, cg_subset=1, statistic="cve"){
107 | 		check_inputs(object, cg_subset, Ks, lambda=lambdas)
108 | 		elt<-c(
109 | 				"objective"="Fval",	"Fval"="Fval", 
110 | 				"rmse"="rmse", "RMSE"="rmse",
111 | 				"CVE"="cve", "cve"="cve",
112 | 				"MAEA"="maeA", "maeA"="maeA",
113 | 				"RMSET"="rmseT","rmseT"="rmseT",
114 | 				"deviance"="Deviance", "Deviance"="Deviance",
115 | 				"rss"="RSS","RSS"="RSS"
116 | 		)[statistic]
117 | 		return(as.numeric(object@outputs[[cg_subset]][[elt]][match(Ks, object@parameters$Ks), match(lambdas, object@parameters$lambdas)]))
118 | })
119 | ########################################################################################################################
120 | if(!isGeneric("getLMCs")) setGeneric("getLMCs",
121 | 			function(object, ...) standardGeneric("getLMCs"))
122 | #'
123 | #' getLMCs-methods
124 | #' 
125 | #' Return a matrix of LMCs
126 | #' 
127 | #' @param object   object returned by \link{runMeDeCom}
128 | #' @param K			   number of LMCs
129 | #' @param lambda	   regularlization parameter
130 | #' @param cg_subset	   used CpG subset, defaults to the full data set
131 | #' @param statistic    statistic to be used in returning
132 | #' 
133 | #' @rdname getLMCs-methods
134 | #' @docType methods
135 | #' @aliases getLMCs
136 | #' @aliases getLMCs,MeDeComSet-method
137 | #' @export
138 | #' @examples
139 | #' \donttest{
140 | #' data(example.data)
141 | #' getLMCs(example_MeDeComSet, K=2, lambda=0.001)
142 | #' }
143 | setMethod("getLMCs", signature(object="MeDeComSet"),
144 | 		function(object, K=object@parameters$Ks[1], lambda=object@parameters$lambdas[1], cg_subset=1, statistic="cve"){
145 | 	check_inputs(object, cg_subset, K, lambda)
146 | 	return(object@outputs[[cg_subset]]$T[[match(K, object@parameters$Ks), match(lambda, object@parameters$lambdas)]])
147 | })
148 | ########################################################################################################################
149 | if(!isGeneric("getProportions")) setGeneric("getProportions",
150 | 			function(object, ...) standardGeneric("getProportions"))
151 | #'
152 | #' getProportions-methods
153 | #' 
154 | #' Return a matrix of LMCs
155 | #' 
156 | #' @param object   object returned by \link{runMeDeCom}
157 | #' @param K			   number of LMCs
158 | #' @param lambda	   regularlization parameter
159 | #' @param cg_subset	   used CpG subset, defaults to the full data set
160 | #' @param statistic    statistic to be used in returning
161 | #' 
162 | #' @rdname getProportions-methods
163 | #' @docType methods
164 | #' @aliases getProportions
165 | #' @aliases getProportions,MeDeComSet-method
166 | #' @export
167 | #' @examples
168 | #' \donttest{
169 | #' data(example.data)
170 | #' getProportions(example_MeDeComSet, K=2, lambda=0.001)
171 | #' }
172 | setMethod("getProportions", signature(object="MeDeComSet"),
173 | 		function(object, K=object@parameters$Ks[1], lambda=object@parameters$lambdas[1], cg_subset=1, statistic="cve"){
174 | 	check_inputs(object, cg_subset, K, lambda)
175 | 	Ahat<-object@outputs[[cg_subset]]$A[[match(K, object@parameters$Ks), match(lambda, object@parameters$lambdas)]]
176 | 	if(!is.null(dim(Ahat))){
177 | 	  rownames(Ahat)<-sprintf("LMC%d", 1:nrow(Ahat))
178 |   	if(!is.null(object@dataset_info$sample_names)){
179 | 	  	colnames(Ahat)<-object@dataset_info$sample_names
180 |   	}
181 | 	}
182 | 	return(Ahat)
183 | })
184 | ########################################################################################################################
185 | #
186 | # Check inputs for the get* methods
187 | #
188 | check_inputs<-function(MeDeComSet, cg_subset, K, lambda){
189 | 	if(!all(cg_subset %in% MeDeComSet@parameters$cg_subsets)){
190 | 		stop("wrong cg subset supplied")
191 | 	}
192 | 	
193 | 	if(!all(K %in% MeDeComSet@parameters$Ks)){
194 | 		stop("wrong K value supplied")
195 | 	}
196 | 	
197 | 	if(!all(lambda %in% MeDeComSet@parameters$lambdas)){
198 | 		stop("wrong lambda value supplied")
199 | 	}
200 | }
201 | #' as.MeDeComSet
202 | #' 
203 | #' Function to convert object of type RefFreeCellMix to MeDeComSet
204 | #' 
205 | #' @param object An object of class \code{RefFreeCellMix} containing cell type deconvolution information, or a list of such objects.
206 | #' @param cg_subsets The indeces of the CpG subsets used in the analysis.
207 | #' @param Ks The values of K used in the analysis. If NULL, K is determined by the size of the matrices.
208 | #' @param deviances Optional argument specifying the deviances as computed with \code{RefFreeCellMixArrayDevianceBoots}.
209 | #' @param rss Optional argument specifying the residual sum of sqaures
210 | #' @param m.orig The original number of rows (CpGs) in the methylation matrix.
211 | #' @param n.orig The original number of columns (samples) in the methylation matrix.
212 | #' @return An object of type \code{MeDeComSet}
213 | #' @details Since \code{RefFreeCellMix} only contains information on a single value for K, and does not contain any regularization
214 | #'           (lambda), the corresponding parameters in the MeDeComSet are set to single numeric values. Furthermore, no information
215 | #'           on goodness of fit (CVE, Fval) can be stored. If \code{cg_subsets} is not of length 1, an object containing multiple
216 | #'           subsets is creared. 
217 | #' @export
218 | as.MeDeComSet <- function(object,cg_subsets=1,Ks=NULL,deviances=NULL,rss=NULL,m.orig=NULL,n.orig=NULL){
219 |   c.obj <- class(object)
220 |   if(c.obj=="list"){
221 |     c.obj <- class(object[[1]])
222 |     if(c.obj == "list"){
223 |       c.obj <- class(object[[1]][[1]])
224 |       #object <- object[[1]]
225 |     }
226 |   }  
227 |   if(!(c.obj=="RefFreeCellMix" | c.obj=="list")){
228 |       stop(paste("Cannot convert object of type",c.obj,"to MeDeComSet"))
229 |   }
230 |   if(c.obj == "RefFreeCellMix"){
231 |     output <- list()
232 |     if(is.null(Ks)){
233 |       Ks <- "1"
234 |       all.Ks <- ncol(object$Omega)
235 |       object <- list("1"=object)
236 |       if(!is.null(deviances)){
237 |         deviances <- list("1"=deviances)
238 |       }
239 |     }else{
240 |       all.Ks <- Ks
241 |       Ks <- as.character(Ks)
242 |     }
243 |     # if(length(cg_subsets)==1){
244 |     #   object <- list(object)
245 |     #   if(!is.null(deviances)){
246 |     #     deviances <- list(deviances)
247 |     #   }
248 |     # }
249 |     for(ssets in cg_subsets){
250 |       sel.sset <- object[[ssets]]
251 |       lambda <- 0
252 |       T.all <- list()
253 |       A.all <- list()
254 |       for(i in 1:length(Ks)){
255 |         K <- Ks[i]
256 |         sel.object <- sel.sset[[K]]
257 |         A <- sel.object$Omega
258 |         T <- sel.object$Mu
259 |         K <- ncol(A)
260 |         T.all[[i]] <- T
261 |         if(!is.null(A)){
262 |           A.all[[i]] <- t(A)
263 |         }else{
264 |           A.all[[i]] <- NULL
265 |         }  
266 |       }
267 |       T.all <- matrix(T.all,nrow=length(Ks))
268 |       row.names(T.all) <- paste("K",Ks,sep="_")
269 |       colnames(T.all) <- paste("lambda",lambda,sep = "_")
270 |       A.all <- matrix(A.all,nrow = length(Ks))
271 |       row.names(A.all) <- paste("K",Ks,sep="_")
272 |       colnames(A.all) <- paste("lambda",lambda,sep = "_")
273 |       if(is.null(deviances)){
274 |         output[[ssets]] <- list(T=T.all,A=A.all)
275 |       }else{
276 |         deviances.all <- matrix(deviances[[ssets]],nrow = length(Ks))
277 |         row.names(deviances.all) <- paste("K",Ks,sep="_")
278 |         colnames(deviances.all) <- paste("lambda",lambda,sep = "_")
279 |         output[[ssets]] <- list(T=T.all,A=A.all,Deviance=deviances.all)
280 |       }
281 |     }
282 |     parameters <- list(cg_subsets=cg_subsets,
283 |                        Ks=all.Ks,
284 |                        lambdas=0)
285 |     if(is.null(m.orig)){
286 |       m <- nrow(T)
287 |     }else{
288 |       m <- m.orig
289 |     }
290 |     if(is.null(n.orig)){
291 |       n <- ncol(A)
292 |     }else{
293 |       n <- n.orig
294 |     }
295 |     d.info <- list(m=m,n=n,TYPE="RefFreeCellMix")
296 |     new.obj <- MeDeComSet(parameters = parameters,
297 |                           outputs = output,
298 |                           dataset_info = d.info)
299 |   }else if(c.obj=="list"){
300 |     output <- list()
301 |     if(is.null(Ks)){
302 |       Ks <- "1"
303 |       all.Ks <- ncol(object$T[[1]][[1]])
304 |       object <- list("1"=object)
305 |       if(!is.null(rss)){
306 |         rss <- list("1"=rss)
307 |       }
308 |     }else{
309 |       all.Ks <- Ks
310 |       Ks <- as.character(Ks)
311 |     }
312 |    for(ssets in cg_subsets){
313 |       sel.sset <- object[[ssets]]
314 |       lambda <- 0
315 |       T.all <- sel.sset$T
316 |       A.all <- sel.sset$A
317 |       T.all <- matrix(T.all,nrow=length(Ks))
318 |       row.names(T.all) <- paste("K",Ks,sep="_")
319 |       colnames(T.all) <- paste("lambda",lambda,sep = "_")
320 |       A.all <- matrix(A.all,nrow = length(Ks))
321 |       row.names(A.all) <- paste("K",Ks,sep="_")
322 |       colnames(A.all) <- paste("lambda",lambda,sep = "_")
323 |       if(is.null(rss)){
324 |         output[[ssets]] <- list(T=T.all,A=A.all)
325 |       }else{
326 |         rss.all <- matrix(rss[[ssets]],nrow = length(Ks))
327 |         row.names(rss.all) <- paste("K",Ks,sep="_")
328 |         colnames(rss.all) <- paste("lambda",lambda,sep = "_")
329 |         output[[ssets]] <- list(T=T.all,A=A.all,RSS=rss.all)
330 |       }
331 |     }
332 |     parameters <- list(cg_subsets=cg_subsets,
333 |                        Ks=all.Ks,
334 |                        lambdas=0)
335 |     if(is.null(m.orig)){
336 |       m <- nrow(T)
337 |     }else{
338 |       m <- m.orig
339 |     }
340 |     if(is.null(n.orig)){
341 |       n <- ncol(A)
342 |     }else{
343 |       n <- n.orig
344 |     }
345 |     d.info <- list(m=m,n=n,TYPE="EDec")
346 |     new.obj <- MeDeComSet(parameters = parameters,
347 |                           outputs = output,
348 |                           dataset_info = d.info)
349 |     
350 |   }
351 |   return(new.obj)
352 | }
353 | ########################################################################################################################
354 | setMethod("show", "MeDeComSet", function(object){
355 | 			cat("An object of class MeDeComSet\n")
356 | 			cat("Input data set:\n")
357 | 			cat(sprintf("\t%d CpGs\n", object@dataset_info$m))
358 | 			cat(sprintf("\t%d methylomes\n", object@dataset_info$n))
359 | 			cat("Experimental parameters:\n")
360 | 			#cat(sprintf("\tCpG subsets: %s\n", paste(object@parameters$lambdas, collapse=", ")))
361 | 			cat(sprintf("\tk values: %s\n", paste(object@parameters$Ks, collapse=", ")))
362 | 			cat(sprintf("\tlambda values: %s\n", paste(object@parameters$lambdas, collapse=", ")))
363 | 			
364 | 		})
365 | ########################################################################################################################


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | RHLasso <- function(Ginp, Winp, Ainp, l) {
 5 |     .Call('_MeDeCom_RHLasso', PACKAGE = 'MeDeCom', Ginp, Winp, Ainp, l)
 6 | }
 7 | 
 8 | RQuadHC <- function(Ginp, Winp, Ainp, otol, lconstr, uconstr) {
 9 |     .Call('_MeDeCom_RQuadHC', PACKAGE = 'MeDeCom', Ginp, Winp, Ainp, otol, lconstr, uconstr)
10 | }
11 | 
12 | RProjSplxBox <- function(Xinp, linp, uinp) {
13 |     .Call('_MeDeCom_RProjSplxBox', PACKAGE = 'MeDeCom', Xinp, linp, uinp)
14 | }
15 | 
16 | RQuadSimplex <- function(Ginp, Winp, Ainp, ot) {
17 |     .Call('_MeDeCom_RQuadSimplex', PACKAGE = 'MeDeCom', Ginp, Winp, Ainp, ot)
18 | }
19 | 
20 | RQuadSimplexBox <- function(Ginp, Winp, Ainp, linp, uinp, ot) {
21 |     .Call('_MeDeCom_RQuadSimplexBox', PACKAGE = 'MeDeCom', Ginp, Winp, Ainp, linp, uinp, ot)
22 | }
23 | 
24 | cppTAfact <- function(mDtSEXP, mTtinitSEXP, mAinitSEXP, lambda = 0.0, itersMax = 1000L, tol = 1e-8, tolA = 1e-7, tolT = 1e-7) {
25 |     .Call('_MeDeCom_cppTAfact', PACKAGE = 'MeDeCom', mDtSEXP, mTtinitSEXP, mAinitSEXP, lambda, itersMax, tol, tolA, tolT)
26 | }
27 | 
28 | 


--------------------------------------------------------------------------------
/R/allGlobals.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #######################################################################################################################
 3 | # GLOBALS
 4 | #######################################################################################################################
 5 | 
 6 | ALGORITHMS<-c(
 7 | 		"truth",
 8 | 		"regression",
 9 | 		"houseman2012",
10 | 		"houseman2016",
11 | 		"MeDeCom",
12 | 		"MeDeCom.quadPen",
13 | 		"MeDeCom.cppTAfact", 
14 | 		"HLasso", 
15 | 		"IntFac", 
16 | 		"IntEmpirical",
17 | 		"Resample",
18 | 		"VertexSearch")
19 | 
20 | T_METHODS<-c(
21 | 		NA,
22 | 		NA,
23 | 		NA,
24 | 		NA,
25 | 		"quadPen",
26 | 		"quadPen",
27 | 		"cppTAfact",
28 | 		"Hlasso", 
29 | 		"integer", 
30 | 		"empirical",
31 | 		"resample",
32 | 		NA)
33 | 
34 | names(T_METHODS)=ALGORITHMS
35 | 
36 | ALGORITHM.COLS=c(
37 | 		"truth"="black",
38 | 		"regression"="blue",
39 | 		"houseman2012"="deepskyblue",
40 | 		"houseman2016"="skyblue",
41 | 		"MeDeCom"="red",
42 | 		"MeDeCom.quadPen"="red",
43 | 		"MeDeCom.cppTAfact"="tomato", 
44 | 		"HLasso"="orange", 
45 | 		"IntFac"="green", 
46 | 		"IntEmpirical"="plum",
47 | 		"Resample"="magenta",
48 | 		"VertexSearch"="brown")
49 | 
50 | ALGORITHM.PCH=c(15,0,0,0,2,2,2,3,4,5,6,1)
51 | 
52 | names(ALGORITHM.PCH)<-ALGORITHMS
53 | 
54 | # a basis for the lambda parameter grid
55 | RELGRID = c(
56 | 		1E-10, 1E-5, 1E-4, 5E-4, 1E-3, 2E-3, 5E-3, 
57 | 		1E-2, 2E-2, 5E-2, 1E-1, 2E-1, 5E-1, 1)
58 | 
59 | PERFORMANCE_MEASURES<-c("Objective"="Fval", "RMSE"="rmse", "CV error"="cve", "RMSE, T"="rmseT", "MDC, T"="dist2C", "MAE, A"="maeA")
60 | 
61 | 
62 | #######################################################################################################################
63 | 


--------------------------------------------------------------------------------
/R/gapStatisitics.R:
--------------------------------------------------------------------------------
 1 | #######################################################################################################################
 2 | # 
 3 | #  Gap statistics for selection of parameter r (number of underlying components)
 4 | #  
 5 | #  author: Pavlo Lutsik
 6 | # 
 7 | #######################################################################################################################
 8 | 
 9 | #
10 | # select.r.gap
11 | #
12 | # Procedure for selecting the best r using the modified gap statistics 
13 | #
14 | # @details  The gap statistics for a given value of parameter r 
15 | #		    is calculated as a difference between the "null" RMSE of 
16 | #		    and the RMSE of the actual data factorization.
17 | #			The "null" RMSE is obtained by averaging the factorization RMSE of the 
18 | #			the randomly permuted input matrix
19 | #			
20 | #
21 | # @author Pavlo Lutsik
22 | #
23 | select.r.gap<-function(D, method, minr=1, maxr=5, nperm=100, return.all=TRUE, plot=TRUE, ...){
24 | 		
25 | 	rmse.orig<-rep(NA, length(minr: maxr))
26 | 	rmse.perm<-rep(NA, length(minr: maxr))
27 | 	sd.rmse.perm<-rep(NA, length(minr: maxr))
28 | 	
29 | 	perms<-lapply(1:nperm, function(np){
30 | 		
31 | 		sample.int(ncol(D)*nrow(D), ncol(D)*nrow(D))
32 | 		
33 | 	})
34 | 	
35 | 	for(ix in 1:length(minr:maxr)){
36 | 		
37 | 		kk<-(minr:maxr)[ix]
38 | 		##	Original factorization
39 | 		if(method %in% ALGORITHMS[2:6]){
40 | 						
41 | 			fr.res<-factorize.alternate(D, k=kk, t.method=T_METHODS[method],...)
42 | 			rmse.orig[ix]<-fr.res$rmse
43 | 			
44 | 		}
45 | 		
46 | 		curr.rmses<-rep(NA, length(nperm))
47 | 		for(pp in 1:nperm){
48 | 			
49 | 			fr.res<-factorize.alternate(matrix(D[perms[[pp]]], ncol=ncol(D)), k=kk, t.method=T_METHODS[method],...)
50 | 			curr.rmses[pp]<-fr.res$rmse
51 | 	
52 | 		}	
53 | 		
54 | 		rmse.perm[ix]<-mean(curr.rmses)
55 | 		sd.rmse.perm[ix]<-sd(curr.rmses)
56 | 	
57 | 	}
58 | 
59 | 	
60 | 	if(plot){
61 | 		
62 | 		layout(matrix(1:2, ncol=2))
63 | 		x<-minr:maxr
64 | 		plot(x,rmse.perm, col="red", pch=0, type="l", ylim=c(0, max(c(rmse.orig, rmse.perm))), main="RMSE")
65 | 		segments(x, rmse.perm-sd.rmse.perm,x, rmse.perm+sd.rmse.perm, col="red")
66 | 		epsilon = 0.02
67 | 		segments(x-epsilon,rmse.perm+sd.rmse.perm,x+epsilon,rmse.perm+sd.rmse.perm, col="red")
68 | 		segments(x-epsilon,rmse.perm-sd.rmse.perm,x+epsilon,rmse.perm-sd.rmse.perm, col="red")
69 | 		
70 | 		lines(rmse.orig, col="green", pch=1, type="o")
71 | 		
72 | 		
73 | 		plot(rmse.perm-rmse.orig, col="black", pch=1, type="o", main="Gap")
74 | 		
75 | 		
76 | 	}
77 | 	
78 | 	if(return.all){
79 | 	
80 | 		return(list(Rs=minr:maxr, RMSE=rmse.orig, RMSE.perm=rmse.perm, sd.RMSE.perm=sd.rmse.perm))
81 | 		
82 | 	}else{
83 | 		
84 | 		return((minr:maxr)[which.max(rmse.perm-rmse.orig)])		
85 | 	}
86 | }
87 | 
88 | #######################################################################################################################
89 | 


--------------------------------------------------------------------------------
/R/markerSelection.R:
--------------------------------------------------------------------------------
  1 | # Routines for selecting cell type-specific quantitative markers
  2 | # 
  3 | # Author: Pavlo Lutsik
  4 | ###############################################################################
  5 | 
  6 | 
  7 | #
  8 | #
  9 | filter.by.mean.diff<-function(ref.data, min.diff=0.25, cell.types=colnames(ref.data)){
 10 | 	
 11 | 	if(length(cell.types)!=ncol(ref.data)){
 12 | 		stop("invalid value for cell.types")
 13 | 	}
 14 | 	
 15 | 	meandiff<-sapply(unique(cell.types), function(t){
 16 | 				
 17 | 				meandiff<-rowMeans(as.matrix(ref.data[,cell.types==t]))-rowMeans(ref.data[,cell.types!=t])
 18 | 				meandiff
 19 | 				
 20 | 			})
 21 | 	
 22 | 	
 23 | 	candidates<-lapply(unique(cell.types), function(t){
 24 | 				
 25 | 				mdiff<-meandiff[,t][abs(meandiff[,t])>min.diff]  
 26 | 				names(mdiff[order(abs(mdiff), decreasing=T)])
 27 | 				
 28 | 			})
 29 | 	
 30 | 	names(candidates)<-unique(cell.types)
 31 | 	
 32 | 	candidates
 33 | 		
 34 | }
 35 | 
 36 | #
 37 | #
 38 | filter.by.ttest<-function(ref.data, n.cand=10, preselected=NULL, cell.types=colnames(ref.data)){
 39 | 	
 40 | 	if(length(cell.types)!=ncol(ref.data)){
 41 | 		stop("invalid value for cell.types")
 42 | 	}
 43 | 	
 44 | 	
 45 | 	if(!is.null(preselected)){
 46 | 		if(!(is.list(preselected) && all(sapply(preselected, is.character)) && length(preselected)==length(unique(cell.types)) )){
 47 | 			stop("invalid argument for preselected: expected list ")
 48 | 		}
 49 | 		
 50 | 	}	
 51 | 		
 52 | 	ttest.stat<-lapply(unique(cell.types), function(t){ 
 53 | 				apply(ref.data[preselected[[t]],],1,function(r) {
 54 | 							if(length(r[cell.types==t])>1) {
 55 | 								t.test(r[cell.types==t], r[cell.types!=t])$statistic 
 56 | 							}else{ 
 57 | 								t.test(mu=r[cell.types==t], r[cell.types!=t])$statistic
 58 | 							}
 59 | 						})
 60 | 			})
 61 | 	
 62 | 	names(ttest.stat)<-unique(cell.types)
 63 | 	
 64 | 	candidates<-sapply(unique(cell.types),function(t){
 65 | 				
 66 | 				ord<-order(ttest.stat[[t]], decreasing=T)
 67 | 				
 68 | 				names(ttest.stat[[t]])[ord][1:n.cand]
 69 | 				
 70 | 			})
 71 | 	
 72 | 	colnames(candidates)<-unique(cell.types)
 73 | 	
 74 | 	candidates
 75 | }
 76 | 
 77 | #
 78 | #
 79 | filter.by.linearity<-function(candidates, data.set){
 80 | 	
 81 | 	cell.types<-colnames(candidates)
 82 | 	
 83 | 	#minAICcounts<-list()
 84 | 	
 85 | 	markers<-lapply(cell.types, function(ct){
 86 | 				
 87 | 				cand<-intersect(rownames(data.set),candidates[,ct])
 88 | 				models<-paste("resp", cand, sep="~")
 89 | 				models<-c(models, "resp~1")
 90 | 				
 91 | 				predictors<-t(data.frame(data.set)[cand,])
 92 | 				
 93 | 				design<-list()
 94 | 				lapply(1:dim(predictors)[2], function(i){
 95 | 							el(design, where=colnames(predictors)[i])<<-predictors[,i]; return(NULL)
 96 | 						})
 97 | 				
 98 | 				design$resp<-t(data.set)
 99 | 				logliks<-sapply(models, function(f) apply(residuals(lm(f, design)),2,logLik2))
100 | 				
101 | 				model.size<-sapply(strsplit(models,"+", fixed=T),  length)
102 | 				model.size[grep("~1", models)]<-0
103 | 				aics<-sapply(1:length(models), function(i) -2*logliks[,i]+2*model.size[i])
104 | 				
105 | 				bestModel<-models[apply(aics,1,which.min)]
106 | 				names(bestModel)<-rownames(aics)
107 | 				
108 | 				counts<-table(bestModel)
109 | 				#el(minAICcounts, where=ct)<<-counts
110 | 			
111 | 				counts["resp~1"]<-0
112 | 				counts<-counts[order(-counts)]
113 | 				print("Best models for:")
114 | 				print(ct)
115 | 				print(counts)
116 | 					
117 | 				return(strsplit(names(counts)[1], "~")[[1]][2])
118 | 				
119 | 			})
120 | 			
121 | 	markers
122 | 	
123 | }
124 | 
125 | 
126 | 


--------------------------------------------------------------------------------
/R/matching.R:
--------------------------------------------------------------------------------
  1 | #######################################################################################################################
  2 | ##
  3 | ##  Matching the recovered latent components to the true (experimentally obtained) ones 
  4 | ##
  5 | #######################################################################################################################
  6 | 
  7 | match.matrix<-function(TT, Tref, method="pearson"){
  8 | 	
  9 | 	if(method=="pearson"){
 10 | 		
 11 | 		matchmat<-cor(TT,Tref, use="pairwise.complete.obs")
 12 | 		
 13 | 	}else if(method=="spearman"){
 14 | 		
 15 | 		matchmat<-cor(TT,Tref, method="spearman", use="pairwise.complete.obs")
 16 | 		
 17 | 	}else if(method=="anova"){
 18 | 		
 19 | 		fitsmat<-matrix(0L, nrow=ncol(TT), ncol=ncol(Tref))
 20 | 		signsmat<-matrix(1L, nrow=ncol(TT), ncol=ncol(Tref))
 21 | 		
 22 | 		for(tt in 1:ncol(TT)){
 23 | 			fits<-list()
 24 | 			signs<-integer()
 25 | 			for(i in 1:ncol(Tref)){
 26 | 				signsmat[tt,i]<-sign(mean(Tref[TT[,tt]==range(TT)[2],i])-mean(Tref[TT[,tt]==range(TT)[1],i]))
 27 | 				fitsmat[tt,i]<-summary(aov(ct~comp1, data=data.frame(ct=Tref[,i], comp1=TT[,tt])))[[1]]$`F value`[1]	
 28 | 			}
 29 | 		}
 30 | 		
 31 | 		matchmat<-signsmat*fitsmat
 32 | 	}
 33 | }
 34 | 
 35 | #corrmatch<-function(TT, Tref, method="pearson",return.vals=F){
 36 | #	
 37 | #	if(method=="pearson"){
 38 | #		
 39 | #		values<-apply(cor(TT,Tref, use="pairwise.complete.obs"),1,max)
 40 | #		indices<-apply(cor(TT,Tref, use="pairwise.complete.obs"),1,which.max)
 41 | #		
 42 | #	}else if(method=="spearman"){
 43 | #		
 44 | #		values<-apply(cor(TT,Tref, use="pairwise.complete.obs"),1,max)
 45 | #		indices<-apply(cor(TT,Tref, method="spearman", use="pairwise.complete.obs"),1,which.max)
 46 | #		
 47 | #	}else if(method=="anova"){
 48 | #		
 49 | #		indices<-integer()
 50 | #		values<-numeric()
 51 | #		for(tt in 1:ncol(TT)){
 52 | #			fits<-list()
 53 | #			signs<-integer()
 54 | #			for(i in 1:ncol(Tref)){
 55 | #				signs<-c(signs, sign(mean(Tref[TT[,tt]==range(TT)[2],i])-mean(Tref[TT[,tt]==range(TT)[1],i])))
 56 | #				fits[[length(fits)+1]]<-aov(ct~comp1, data=data.frame(ct=Tref[,i], comp1=TT[,tt]))	
 57 | #			}
 58 | #			fvals<-sapply(fits, function(fit) summary(fit)[[1]]$`F value`[1])
 59 | #			fvals<-fvals*signs
 60 | #			indices<-c(indices,(which.max(fvals)))
 61 | #			values<-c(values,(max(fvals)))
 62 | #		}
 63 | #		
 64 | #	}
 65 | #	if(return.vals){
 66 | #		return(values)
 67 | #	}else{
 68 | #		return(indices)
 69 | #	}
 70 | #	
 71 | #}
 72 | 
 73 | 
 74 | corrmatch<-function(TT, Tref, method="pearson",return.vals=F){
 75 | 	
 76 | 	mmat<-match.matrix(TT,Tref,method)
 77 | 	## a very bad solution for the degenerate cases
 78 | 	mmat[is.na(mmat)]<-0 
 79 | 	if(return.vals){
 80 | 		return(apply(mmat, 1, max))
 81 | 	}else{
 82 | 		return(apply(mmat, 1, which.max))
 83 | 	}
 84 | }
 85 | 
 86 | #######################################################################################################################
 87 | #'
 88 | #' greedymatch
 89 | #'
 90 | #' Matching of latent components 
 91 | #'
 92 | #'
 93 | #' @details suppose Tstar contains K* topics (columns)
 94 | #' 1. select the K* most popular topics of That
 95 | #' 2. use a greedy method to match them to those of Tstar (w.r.t L2 norm)
 96 | #'    and correspondingly permute rows of Ahat
 97 | #' 
 98 | #' @author Martin Slawski
 99 | #' @author R port by Pavlo Lutsik
100 | #' 
101 | #' @export
102 | #' 
103 | greedymatch<-function(Tstar,That,Ahat){
104 | 	
105 | 	### 1. select K* most popular topics of That, get submatrix T, A
106 | 	k <- size(That,2) ;
107 | 	kstar <- size(Tstar,2);
108 | 	
109 | 	if (k < kstar){
110 | 		# pad zeros rows and columns to T, A when k < K*
111 | 		Delta <- kstar - k;
112 | 		That <- cbind(That, matrix(0, nrow=nrow(That), ncol=Delta));
113 | 		Ahat <- rbind(Ahat, matrix(0, nrow=Delta, ncol=ncol(Ahat)));
114 | 	}
115 | 	
116 | 	pop <- rowSums(Ahat) / sum(Ahat)
117 | 	poporder <- order(pop, decreasing=TRUE)
118 | 	TT <- That[,poporder[1:kstar],drop=FALSE]
119 | 	A <- Ahat[poporder[1:kstar],,drop=FALSE]
120 | 	
121 | 	### matching submatrix
122 | 	idx <- 1:kstar
123 | 	Tm <- zeros(nrow(TT), ncol(TT))
124 | 	Am <- zeros(nrow(A), ncol(A))
125 | 	for(i in 1:kstar){
126 | 		
127 | 		topic <- Tstar[,i,drop=FALSE]
128 | 		dist <- zeros(length(idx),1)
129 | 		for(j in 1:length(idx)){
130 | 			dist[j,1] = norm(topic-TT[,idx[j],drop=FALSE],"2")
131 | 		}
132 | 		ii = which.min(dist)
133 | 		Tm[,i] = TT[,idx[ii]]
134 | 		Am[i,] = A[idx[ii],]
135 | 		idx<-idx[-ii]
136 | 		
137 | 	}
138 | 	
139 | 	return(list(Tm=Tm, Am=Am)) 
140 | }
141 | 
142 | #######################################################################################################################
143 | #'
144 | #' match.components
145 | #'
146 | #' Matching the recovered components to the reference
147 | #' 
148 | #' @param That 		recovered components from factorization 
149 | #' @param Tref		reference components 
150 | #' @param method	one of "corrmatch" and "greedymatch"
151 | #' @param Ahat 		for method "greedymatch": recovered mixing proportions
152 | #' @param check		for method "corrmatch": a flag specifying whether to check uniquenes of the match
153 | #' 
154 | #' 
155 | #' @details Wrapper function for component matching methods 
156 | #' 
157 | #' @return 			a vector of indices. The length and the order of the vector corresponds to the columns of \code{TT}
158 | #' 					and the indices specify the columns of Tref
159 | #' 
160 | #' @export 
161 | #' 
162 | matchLMCs<-function(
163 | 		That, 
164 | 		Tref,
165 | 		method="corrmatch",
166 | 		check=TRUE,
167 | 		Ahat=NULL
168 | 		){
169 | 	
170 | 	if(method=="corrmatch"){
171 | 	
172 | 		mr<-corrmatch(That, Tref, method="pearson", return.vals=F)
173 | 		
174 | 		if(check && length(unique(mr))<ncol(Tref)){
175 | 			return(NULL)
176 | 		}else{
177 | 			return(mr)
178 | 		}
179 | 		
180 | 	}else if(method=="greedymatch"){
181 | 		
182 | 		if(is.null(Ahat)){
183 | 			stop("need estimated mixture components matrix for method greedymatch")
184 | 		}
185 | 		
186 | 		greedymatch(Tref, That, Ahat)
187 | 		
188 | 	}else{
189 | 		stop("match.components: this method has not been implemented yet")
190 | 	}
191 | }
192 | 
193 | #######################################################################################################################
194 | 
195 | #
196 | #  Legacy matching routine
197 | #
198 | 
199 | assign.components<-function(TT, Tref, method="pearson",return.vals=F){
200 | 	
201 | 	if(method=="pearson"){
202 | 		values<-apply(cor(TT,Tref, use="pairwise.complete.obs"),1,max)
203 | 		indices<-apply(cor(TT,Tref, use="pairwise.complete.obs"),1,which.max)
204 | 	}else if(method=="spearman"){
205 | 		values<-apply(cor(TT,Tref, use="pairwise.complete.obs"),1,max)
206 | 		indices<-apply(cor(TT,Tref, method="spearman", use="pairwise.complete.obs"),1,which.max)
207 | 	}else if(method=="anova"){
208 | 		indices<-integer()
209 | 		values<-numeric()
210 | 		for(tt in 1:ncol(TT)){
211 | 			fits<-list()
212 | 			signs<-integer()
213 | 			for(i in 1:ncol(Tref)){
214 | 				signs<-c(signs, sign(mean(Tref[TT[,tt]==range(TT)[2],i])-mean(Tref[TT[,tt]==range(TT)[1],i])))
215 | 				fits[[length(fits)+1]]<-aov(ct~comp1, data=data.frame(ct=Tref[,i], comp1=TT[,tt]))	
216 | 			}
217 | 			fvals<-sapply(fits, function(fit) summary(fit)[[1]]$`F value`[1])
218 | 			fvals<-fvals*signs
219 | 			indices<-c(indices,(which.max(fvals)))
220 | 			values<-c(values,(max(fvals)))
221 | 		}
222 | 		
223 | 	}
224 | 	if(return.vals){
225 | 		return(values)
226 | 	}else{
227 | 		return(indices)
228 | 	}
229 | 	
230 | }


--------------------------------------------------------------------------------
/R/reference.R:
--------------------------------------------------------------------------------
  1 | ########################################################################################################################################
  2 | #' reference.R
  3 | #' -------------------------------------------------------------------------------------------------------------------------------------
  4 | #' This scripts contains functions for assocating MeDeCom with known reference cell type profiles
  5 | ######################################################################################################################################## 
  6 | 
  7 | ######################################################################################################################################## 
  8 | #' GLOBALS
  9 | AVAIL.REFS <- c("reinius","local")
 10 | 
 11 | ######################################################################################################################################## 
 12 | #' FUNCTIONS
 13 | 
 14 | #' run.refbased
 15 | #' 
 16 | #' Function to link MeDeCom's output to reference methylation profiles. The function returns both the MeDeCom result and a methyalation
 17 | #' matrix with reference profiles from the source specified.
 18 | #' 
 19 | #' @param rnb.set An Object of type \code{RnBSet} from the \pkg{RnBeads} package containing methylation information on which
 20 | #'                 the deconvolution is to be performed
 21 | #' @param Ks Numeric vector containing the number of components to be computed by \pkg{MeDeCom}
 22 | #' @param lambdas Numeric vector specifying the regularzation parameters to be explored
 23 | #' @param cg_subsets List of numeric vectors specifying the rows that are to be used in the analysis.
 24 | #' @param opt.method Optimization method to be employed. For further information see \code{\link{runMeDeCom}}
 25 | #' @param temp.dir Optional temporary directory to store intermediate results
 26 | #' @param ref.base Reference profile data base to be used. Supported are 
 27 | #'                 \itemize{
 28 | #'                   \item \code{"reinius"} A blood cell type reference methylome data set from Reinius et.al. (Reference to be added)
 29 | #'                 }
 30 | #'                 \itemize{
 31 | #'                   \item \code{"local"} The reference data set is provided by the user. If this option is selected, \code{ref.set} 
 32 | #'                                      must not be empty.
 33 | #'                 }
 34 | #' @param most.var Number specifying the number of most variable to be selected from \code{rnb.set}
 35 | #' @param NCORES Number of cores to be used for analysis.
 36 | #' @param cluster.settings Setting for the environment of a high performance compute cluster. Passed to \code{\link{runMeDeCom}}
 37 | #' @param ref.set A \code{RnBSet} object containing a reference data set to be used besides \code{rnb.set}. Is only compatible with
 38 | #'                 \code{ref.base="local"}.
 39 | #' @param id.col The name of the column in the sample annotation sheet of \code{ref.set} containing the reference cell type. Is only
 40 | #'                 compatible with \code{ref.base="local"}.
 41 | #' @param save.restricted.sites Flag indicating if \code{rnb.set} restricted to the \code{most.var} sites is to be saved in \code{temp.dir}
 42 | #'                             for potential downstream analysis.                 
 43 | #' 
 44 | #' @return A list object containing two elements \itemize{
 45 | #'            \item \code{"MeDeComSet"} Results of applying MeDeCom with the setting above
 46 | #'            \item \code{"RefMeth"} A matrix containing reference profiles from the specified data set. The number of rows in 
 47 | #'                        this matrix has been reduced according to the most variable sites in \code{rnb.set}.
 48 | #'         }
 49 | #' 
 50 | #' @details This function applied MeDeCom to the specified data set and only support \code{RnBSet} objects as inputs. The function
 51 | #'  internally manipulated the object by selecting the most variable sites according to \code{most.var}. This leads to a decrease in
 52 | #'  the number of rows in the reference profiles to this number. 
 53 | #'  
 54 | #'  Please note that an active internet connection is required, since this routine downloads data through the world wide web.
 55 | #'  
 56 | #' @author Michael Scherer
 57 | #' 
 58 | #' @export
 59 | run.refbased <- function(rnb.set, 
 60 |                          Ks, 
 61 |                          lambdas,
 62 |                          cg_subsets=NULL,
 63 |                          opt.method="MeDeCom.cppTAfact",
 64 |                          temp.dir=NULL,
 65 |                          ref.base="reinius",
 66 |                          most.var=NULL,
 67 |                          NCORES=1,
 68 |                          cluster.settings=NULL,
 69 |                          ref.set=NULL,
 70 |                          id.col=NULL,
 71 |                          save.restricted.sites=FALSE){
 72 |   #require("RnBeads")
 73 |   res <- load.ref.set(ref.base,temp.dir)
 74 |   if(!is.null(res)){
 75 |     if(!is.null(ref.set)){
 76 |       warning("ref.set specified although ref.base is local. It will be ignored.")
 77 |     }
 78 |     if(!is.null(id.col)){
 79 |       warning("id.col specified although ref.base is local. It will be ignored.")
 80 |     }
 81 |     id.col <- res$ID
 82 |     ref.set <- res$rnb.set
 83 |   }else{
 84 |     if(is.null(ref.set)||is.null(id.col)){
 85 |       stop("Please specify ref.set and ref.id.col if.")
 86 |     }
 87 |   }
 88 | 
 89 |   rnb.set <- rnb.execute.imputation(rnb.set)
 90 |   anno.set <- annotation(rnb.set)
 91 |   anno.ref <- annotation(ref.set)
 92 |   anno.set <- GRanges(Rle(anno.set$Chromosome),IRanges(start=anno.set$Start,end=anno.set$End))
 93 |   anno.ref <- GRanges(Rle(anno.ref$Chromosome),IRanges(start=anno.ref$Start,end=anno.ref$End))
 94 |   op <- findOverlaps(anno.set,anno.ref)
 95 |   #anno.set <- anno.set[queryHits(op)]
 96 |   rem.sites <- !(1:nsites(rnb.set) %in% queryHits(op))
 97 |   rnb.set <- remove.sites(rnb.set,rem.sites)
 98 |   meth.data <- meth(rnb.set)
 99 |   if(!is.null(cg_subsets)){
100 |     new.anno <- anno.set[!rem.sites]
101 |     op2 <- findOverlaps(new.anno,anno.set)
102 |     cg_subsets <- lapply(cg_subsets,function(x){
103 |       matchi <- match(x,subjectHits(op2))
104 |       queryHits(op2)[matchi[!is.na(matchi)]]
105 |     })
106 |   }
107 |   if(!is.null(most.var)){
108 |     vars <- apply(meth.data,1,function(x)var(x,na.rm=T))
109 |     ordered <- order(vars,decreasing=T)
110 |     rem.sites <- rep(TRUE,nsites(rnb.set))
111 |     if(most.var>nsites(rnb.set)){
112 |       warning("most.var bigger than number of sites, reduced to maximum")
113 |       most.var <- nsites(rnb.set)
114 |     }
115 |     rem.sites[queryHits(op)][ordered[1:most.var]] <- FALSE
116 |     #rem.sites[sample(1:nsites(rnb.set),most.var)] <- FALSE
117 |     rnb.set <- remove.sites(rnb.set,rem.sites)
118 |     if(save.restricted.sites){
119 |       save.rnb.set(rnb.set,file.path(temp.dir,"restrictedRnBSet"))
120 |     }
121 |     anno.set <- anno.set[!rem.sites]
122 |     op <- findOverlaps(anno.set,anno.ref)
123 |   }
124 |   meth.ref <- meth(ref.set)[subjectHits(op),]
125 |   meth.ref <- rnb.execute.imputation(meth.ref)
126 |   colnames(meth.ref) <- pheno(ref.set)[,id.col]
127 |   medecom.result <- runMeDeCom(rnb.set,
128 |                                Ks=Ks,
129 |                                lambdas=lambdas,
130 |                                cg_subsets=cg_subsets,
131 |                                opt.method=opt.method,
132 |                                temp.dir=temp.dir,
133 |                                NCORES=NCORES,
134 |                                cluster.settings=cluster.settings)
135 |   if(!is.null(cg_subsets)){
136 |     medecom.result@parameters$GROUP_LISTS <- cg_subsets
137 |   }
138 |   return(list("MeDeComSet"=medecom.result,"RefMeth"=meth.ref))
139 | }
140 | 
141 | #' cluster.refbased
142 | #' 
143 | #' This routine performs general hierachical clustering as in the \code{\link{plotLMCs}} and adds reference methylome profiles to this
144 | #' clustering. 
145 | #' 
146 | #' @param ref.run A result of \code{\link{run.refbased}} with the results of \code{\link{runMeDeCom}} and the reference data matrix
147 | #' @param K Selected number of components
148 | #' @param lambda Selected regularization parameter
149 | #' @param cg_subset Numeric vector representing the CpG sites selected for analysis from the orignial MeDeComSet
150 | #' @param plot.type Type of plot to be created, is passed to \code{\link{plotLMCs}}
151 | #' @param ... Further arguments passed to plotLMCs
152 | #' 
153 | #' @return A plot object displaying the clustering
154 | #' 
155 | #' @author Michael Scherer
156 | #' 
157 | #' @export
158 | cluster.refbased <- function(ref.run,
159 |                              K,
160 |                              lambda,
161 |                              cg_subset=1,
162 |                              plot.type="dendrogram",
163 |                              ...){
164 |   if(!is.list(ref.run) || length(ref.run)<2){
165 |     stop("Argument needs to be the results obtained from 'run.refbased'")
166 |   }
167 |   medecom.result <- ref.run$MeDeComSet
168 |   meth.ref <- ref.run$RefMeth
169 |   if(!is.null(medecom.result@parameters$GROUP_LISTS)){
170 |     sset <- medecom.result@parameters$GROUP_LISTS[[cg_subset]]
171 |     meth.ref <- meth.ref[sset,]
172 |   }
173 |   plot <- plotLMCs(medecom.result,K=K,lambda=lambda,cg_subset=cg_subset,type=plot.type,Tref=meth.ref,...)
174 |   return(plot)
175 | }
176 | 
177 | #' plotLMC.reference
178 | #' 
179 | #' This routine uses a reference methylome, together with an annotation, and calls plotLMCs for the matching postions
180 | #' 
181 | #' @param medecom.set An object of type MeDeComSet
182 | #' @param ref.meth Reference methylome in the form of a matrix of a RnBSet
183 | #' @param ann.md Genomic annotation for the CpG sites present in medecom.set
184 | #' @param ann.ref Genomic annotation for the CpG sites in the referenc methylome. Can be omitted, if ref.meth is an RnBSet object
185 | #' @param cg_subset The cg_subset of interest
186 | #' @param K K value
187 | #' @param lambda lambda value
188 | #' @param type Plot type, see \code{\link{plotLMCs}}
189 | #' @param chrom.col The chromosome column name in ann.md and ann.ref
190 | #' @param start.col The start column name in ann.md and ann.ref
191 | #' @param end.col The end column name in ann.md and ann.ref
192 | #' @param ct.color.column A column name in the phenotypic information of ref.meth to be shown in the clustering
193 | #' @return NULL
194 | #' @author Michael Scherer
195 | #' @export
196 | 
197 | plotLMC.reference <- function(medecom.set,
198 |                               ref.meth,
199 |                               ann.md,
200 |                               cg_subset=medecom.set@parameters$cg_subsets[1],
201 |                               K=medecom.set@parameters$Ks[1],
202 |                               lambda=medecom.set@parameters$lambdas[1],
203 |                               ann.ref=NULL,
204 |                               type="dendrogram",
205 |                               chrom.col="Chromosome",
206 |                               start.col="Start",
207 |                               end.col="End",
208 |                               ct.color.column="cell_type"){
209 |   #require("RnBeads")
210 |   if(!inherits(medecom.set,"MeDeComSet")){
211 |     stop("Invalid value for medecom.set")
212 |   }
213 |   if(!inherits(ref.meth,"RnBSet") && !is.matrix(ref.meth)){
214 |     stop("Invalid value for ref.meth; needs to be RnBSet or matrix")
215 |   }
216 |   if(is.matrix(ref.meth) && is.null(ann.ref)){
217 |     stop("Genomic annotation needs to be provided for the reference methylome")
218 |   }
219 |   if(inherits(ref.meth,"RnBSet")){
220 |     ann.ref <- annotation(ref.meth)
221 |     meth.ref <- meth(ref.meth)
222 |     colnames(meth.ref) <- pheno(ref.meth)[,ct.color.column]
223 |     ref.meth <- meth.ref
224 |     rm(meth.ref)
225 |   }
226 |   ann.md <- GRanges(Rle(ann.md[,chrom.col]),IRanges(start=ann.md[,start.col],end=ann.md[,end.col]))
227 |   ann.ref <- GRanges(Rle(ann.ref[,chrom.col]),IRanges(start=ann.ref[,start.col],end=ann.ref[,end.col]))
228 |   op <- findOverlaps(ann.md,ann.ref)
229 |   if(any(!(1:length(ann.md) %in% queryHits(op)))){
230 |     stop("Insuffiecient information present in reference methylome")
231 |   }
232 |   ref.meth <- ref.meth[subjectHits(op),]
233 |   ref.meth <- ref.meth[medecom.set@parameters$GROUP_LISTS[[cg_subset]],]
234 |   ref.meth <- rnb.execute.imputation(ref.meth,method="knn")
235 |   plotLMCs(medecom.set,Tref=ref.meth,K=K,lambda=lambda,cg_subset=cg_subset,type = type)
236 | }
237 | #' load.ref.set
238 | #' 
239 | #' This functions loads a reference data base and return the corresponding \code{RnBSet}.
240 | #' 
241 | #' @param ref.base Reference base to be used. See \code{\link{run.refbased}} for further information.
242 | #' @param temp.dir Temporary directory to store the object.
243 | #' 
244 | #' @return List of two elements \itemize{
245 | #'           \item ID sample identifier column of the refernce data set used for plotting
246 | #'           \item rnb.set \code{RnBSet} object containing the reference methylation profiles
247 | #' }
248 | #' 
249 | #' @author Michael Scherer
250 | #' 
251 | #' @noRd
252 | 
253 | load.ref.set <- function(ref.base,temp.dir=NULL){
254 |   #require("RnBeads")
255 |   if(!ref.base %in% AVAIL.REFS){
256 |     stop(paste("Unsupported reference data base, must be one of",AVAIL.REFS))
257 |   }
258 |   if(is.null(temp.dir)){
259 |     temp.dir <- tempdir()
260 |   }
261 |   if(ref.base=="local") return(NULL)
262 |   id.col <- "sample_id"
263 |   if(ref.base=="reinius"){
264 |     location <- file.path(temp.dir,"Reinius_Blood_Reference.zip")
265 |     if(!file.exists(location)){
266 |       cat("Downloading Reinius reference set \n")
267 |       downloaded <- tryCatch(download.file("http://rnbeads.mpi-inf.mpg.de/publication/Reinius_Blood_Reference.zip",destfile = location),error=function(e){
268 |         if(inherits(e,"Error")){
269 |           stop("Failed to download reference data set. Check internet connection.")
270 |         }
271 |       })
272 |     }
273 |     ref.set <- load.rnb.set(location)
274 |     id.col <- "Cell/Tissue"
275 |   }
276 |   return(list(ID=id.col,rnb.set=ref.set))
277 | }
278 | 


--------------------------------------------------------------------------------
/R/simulation.R:
--------------------------------------------------------------------------------
  1 | #######################################################################################################################
  2 | #
  3 | # Routines for simulation of the DNA methylation profiles
  4 | #
  5 | # @author Pavlo Lutsik
  6 | #######################################################################################################################
  7 | 
  8 | #
  9 | # simulate.source.profile
 10 | #
 11 | # Generate the source DNA methylation profile
 12 | #
 13 | simulate.source.profile<-function(m, vals=c(0,1), probs=c(0.5,0.5)){
 14 | 
 15 | 	sample(vals, m, replace=TRUE)
 16 | 	
 17 | }
 18 | #
 19 | # simulate.ct.profile
 20 | #
 21 | # Generate the characteristic DNA methylation profiles of cell types
 22 | #
 23 | simulate.ct.profiles<-function(source, similarities){
 24 | 	 
 25 | 	CT.AVG<-sapply(similarities,function(sim){
 26 | 				
 27 | 				idx<-sample.int(length(source),floor((1-sim)*length(source)))
 28 | 				CT.M<-source
 29 | 				CT.M[idx]<-sample(c(0,1), floor((1-sim)*length(source)), replace=TRUE)
 30 | 				CT.M
 31 | 				
 32 | 			})
 33 | 	
 34 | 	CT.AVG
 35 |   	
 36 | }
 37 | 
 38 | # simulate.ind.profiles
 39 | #
 40 | # Generate the characteristic DNA methylation profiles of cell types
 41 | #
 42 | 
 43 | simulate.ind.profiles<-function(
 44 | 		ctm, 
 45 | 		n,
 46 | 		method="binom",
 47 | 		vars=NULL, 
 48 | 		chi.sq.df=0.11,
 49 | 		success.prob=NULL,
 50 | 		alpha=0.1,
 51 | 		beta=10,
 52 | 		vals=c(0,1)){
 53 | 
 54 | #	if(is.null(CHI.SQ.DF)){
 55 | #		CHI.SQ.DF=(20/n)
 56 | #	}
 57 | 	
 58 | 	
 59 | 	if(method=="normal"){
 60 | 		
 61 | 		if(is.null(vars)){
 62 | 			sds<-sqrt(rchisq(nrow(ctm), df=chi.sq.df)/n)
 63 | 		}
 64 | 		
 65 | 		profiles<-lapply(1:ncol(ctm), function(iid){
 66 | 					
 67 | 					cell.sample<-sapply(1:m, function(cgi){
 68 | 								
 69 | 								cg.vector<-rnorm(n, mean=ctm[cgi,iid], sd=sds[cgi]*scale)
 70 | 								
 71 | 							})
 72 | 					cell.sample<-MeDeCom:::projectV(cell.sample, vals)
 73 | 					t(cell.sample)
 74 | 					
 75 | 				})
 76 | 	}else if(method=="binom"){
 77 | 		
 78 | 		#success.prob<-scale*rchisq(nrow(ctm), df=chi.sq.df)/n
 79 | 		
 80 | 		#success.prob[success.prob>0.5]<-0.5
 81 | 		
 82 | 		profiles<-lapply(1:ncol(ctm), function(iid){
 83 | 					
 84 | 				if(is.null(success.prob))
 85 | 					success.prob<-rbeta(nrow(ctm), shape1=alpha, shape2=beta)	
 86 | 					
 87 | 				ind.prof<-repmat(ctm[,iid,drop=F],m=n,n=1)
 88 | 					
 89 | 				change.v<-t(sapply(success.prob, function(sp) rbinom(n,1,prob=sp)))	
 90 | 				
 91 | 				ind.prof[change.v==1]<-sign(1-ind.prof[change.v==1])
 92 | 								
 93 | 				ind.prof
 94 | 				})
 95 | 		
 96 | 	}
 97 | 	
 98 | 	
 99 | 	profiles<-lapply(1:n, function(i) sapply(1:ncol(ctm), function(j) profiles[[j]][,i] ))
100 | 	
101 | 	
102 | 	profiles
103 | 	
104 | }
105 | 
106 | # prepare.average.profile
107 | #
108 | # Prepare individual profiles from existing data 
109 | #
110 | 
111 | prepare.average.profile<-function(pure.ct, 
112 | 		samp,
113 | 		o=0.45, 
114 | 		mean=F){
115 | 	
116 | 	n<-nrow(pure.ct)
117 | 	
118 | 	sds.samp<-apply(pure.ct[samp,,drop=F],1,sd)
119 | 	
120 | 	proj.profile.samp<-pure.ct[samp,,drop=F]
121 | 	if(mean){
122 | 		proj.profile.samp<-matrix(rowMeans(proj.profile.samp))
123 | 	}
124 | 	proj.profile.samp[proj.profile.samp<o]<-0
125 | 	proj.profile.samp[proj.profile.samp>1-o]<-1
126 | 	proj.profile.samp[proj.profile.samp>=o & proj.profile.samp<=1-o]<-0.5
127 | 	
128 | 	
129 | 	
130 | 	return(list(proj.profile.samp, sds.samp))
131 | 	
132 | }
133 | 
134 | # simulate.populations
135 | #
136 | # Generate the cell populations
137 | #
138 | simulate.populations<-function(ind.profiles,
139 | 		sds.samp=NULL,
140 | 		vals=c(0,1),
141 | 		method="binom",
142 | 		success.prob=NULL,
143 | 		m=nrow(ind.profiles),
144 | 		nc=1000,
145 | 		alpha=0.1,
146 | 		beta=10){
147 | 		
148 | 		if(method=="normal")
149 | 		{
150 | 			profiles<-lapply(1:ncol(ind.profiles), function(iid){
151 | 				
152 | 				cell.sample<-sapply(1:m, function(cgi){
153 | 							
154 | 							cg.vector<-rnorm(nc, mean=ind.profiles[cgi,iid], sd=sds.samp[cgi]*scale)
155 | 							
156 | 						})
157 | 				cell.sample<-MeDeCom:::projectV(cell.sample, vals)
158 | 				t(cell.sample)
159 | 				
160 | 			})
161 | 	
162 | 		}else if(method=="binom"){
163 | 			
164 | 			#success.prob<-scale*rchisq(nrow(ind.profiles), df=chi.sq.df)/length(ind.profiles)
165 | 			
166 | 			if(is.null(success.prob))
167 | 				success.prob<-rbeta(nrow(ind.profiles), shape1=alpha, shape2=beta)
168 | 			
169 | 			#success.prob[success.prob>0.5]<-0.5
170 | 			
171 | 			profiles<-lapply(1:ncol(ind.profiles), function(iid){
172 | 						
173 | 						pop.prof<-repmat(ind.profiles[,iid,drop=F],m=nc,n=1)
174 | 						
175 | 						change.v<-t(sapply(success.prob, function(sp) rbinom(nc,1,prob=sp)))	
176 | 						
177 | 						pop.prof[change.v==1]<-sign(1-pop.prof[change.v==1])
178 | 						
179 | 						pop.prof
180 | 						
181 | 					})
182 | 			
183 | 		}
184 | 		profiles
185 | }
186 | 
187 | 
188 | # mix.populations
189 | #
190 | # Mix the cell populations according in proportions,
191 | # given by the mixing matrix
192 | #
193 | mix.populations<-function(
194 | 		populations
195 | 		,mixing.matrix
196 | 		,cell.subset.size=NULL
197 | 		,noize=0){
198 | 	
199 | 	if(!is.list(populations))
200 | 		stop("Invalid value for populations")
201 | 	
202 | 	
203 | 	if(ncol(mixing.matrix)!=length(populations))
204 | 		stop("The second dimension of the mixing matrix has to the number of cell types")
205 | 		
206 | 	nt<-length(populations[[1]])
207 | 	
208 | 	results<-vector("list", ncol(mixing.matrix))
209 | 	
210 | 	if(length(populations[[1]])==nrow(mixing.matrix)){
211 | 		
212 | 		for(j in 1:ncol(mixing.matrix)){
213 | 			nc<-ncol(populations[[j]][[1]])
214 | 			
215 | 			if(is.null(cell.subset.size))
216 | 				css<-nc
217 | 			else
218 | 				css<-cell.subset.size
219 | 			
220 | 			subs.sizes<-sapply(mixing.matrix[,j], function(fr) floor(css*fr))
221 | 			cell.subsets<-lapply(1:nt,function(x) sample.int(ncol(populations[[j]][[nt]]), subs.sizes[x])) 
222 | 			
223 | 			
224 | 			mix<-list()
225 | 			for(i in 1:nt){
226 | 				mix[[i]]<-populations[[j]][[i]][,cell.subsets[[i]]]				
227 | 			}
228 | 			
229 | 			results[[j]]<-do.call("cbind", mix)
230 | 			rm(mix)
231 | 		}
232 | 			
233 | 		
234 | 	}else{
235 | 			warning("Not implemented yet")
236 | 			results<-NULL
237 | 		
238 | 	}
239 | 	
240 | 	return(results)
241 | 		
242 | }
243 | 
244 | # introduce.imprinting
245 | #
246 | # Simulate genomic imprinting
247 | #
248 | 
249 | introduce.imprinting<-function(populations,
250 | 		fraction=0.02,
251 | 		ixx=NULL){
252 | 	
253 | 	ixx<-sample.int(nrow(populations[[1]]),floor(fraction*nrow(populations[[1]])))
254 | 	
255 | 
256 | 	for(i in 1:length(populations))
257 | 	{
258 | 		populations[[i]][ixx,]<-0.5
259 | 	}
260 | 	
261 | 	populations
262 | 	
263 | }
264 | 
265 | # introduce.asm
266 | #
267 | # Simulate allele-specific methylation
268 | #
269 | introduce.asm<-function(populations,
270 | 		fraction=0.1,
271 | 		sites=NULL){
272 | 	
273 | 	
274 | 	for(i in 1:length(populations))
275 | 	{
276 | 		ixx<-sample.int(nrow(populations[[i]]),floor(fraction*nrow(populations[[i]])))
277 | 		snp.vals<-sample(c(0,0.5,1), length(ixx), replace=TRUE)
278 | 	
279 | 		populations[[i]][ixx,]<-snp.vals
280 | 		
281 | 	}
282 | 
283 | 	populations
284 | }
285 | 
286 | # introduce.effects
287 | #
288 | # Simulate true biological effects
289 | #
290 | introduce.effects<-function(populations,
291 | 		types,
292 | 		sites,
293 | 		signs,
294 | 		means, 
295 | 		sd.mean.frac=0.1){
296 | 	
297 | 	
298 | 	for(i in 1:length(populations)){
299 | 	
300 | 		for(j in types){
301 | 		
302 | 		effects<-rnorm(length(sites[[j]]), mean=means[j], sd=sd.mean.frac*means[j])		
303 | 		if(is.null(signs[[j]]))
304 | 			signs[[j]]<-sign(rnorm(length(sites[[j]])))
305 | 		
306 | 		new.values<-sapply(1:length(sites[[j]]), function(si){
307 | 					
308 | 				if(signs[[j]][si]==1){
309 | 					
310 | 					new.value<-sapply(populations[[i]][[j]][sites[[j]][si],], function(value){
311 | 						if(value %in% c(0,0.5)){
312 | 							if(runif(1)<effects[si]/(1-value)){
313 | 							    return(1)	
314 | 							}else{
315 | 								return(value)
316 | 							}
317 | 						}else{
318 | 							return(value)
319 | 						}
320 | 					})
321 | 										
322 | 				}else{
323 | 					new.value<-sapply(populations[[i]][[j]][sites[[j]][si],], function(value){
324 | 								if(value %in% c(0.5,1)){
325 | 									if(runif(1)<effects[si]/(value)){
326 | 										return(0)	
327 | 									}else{
328 | 										return(value)
329 | 									}
330 | 								}else{
331 | 									return(value)
332 | 								}
333 | 							})
334 | 					
335 | 				}		
336 | 					
337 | 				})
338 | 		
339 | 		populations[[i]][[j]][sites[[j]],]<-t(new.values)
340 | 		
341 | 		}
342 | 	}
343 | 	
344 | 	return(populations)	
345 | }
346 | 
347 | summarize.patterns<-function(cell.pops){
348 | 	
349 | 	up_pops<-lapply(cell.pops, unique, MARGIN=2)
350 | 	
351 | 	ups<-do.call("cbind", up_pops)
352 | 	
353 | 	up<-unique(ups, MARGIN=2)
354 | 	
355 | 	#large.matrix<-do.call("cbind", cell.pops)
356 | 	
357 | 	fmat<-matrix(0, nrow=ncol(up), ncol=length(cell.pops))
358 | 	
359 | 	for(upi in 1:ncol(up)){
360 | 		
361 | 		for(popi in 1:length(cell.pops)){
362 | 			fmat[upi,popi]<-sum(colSums(abs(cell.pops[[popi]]-up[,upi]))==0)/ncol(cell.pops[[popi]])
363 | 		}
364 | 	}
365 | 	
366 | 	return(list(C=up, F=fmat))
367 | }
368 | 
369 | # simulate.450k
370 | #
371 | # Simulate 450k microarray measurement 
372 | #
373 | 
374 | simulate.450k<-function(
375 | 		cell.pops
376 | 		,ncs=ncol(cell.pops[[1]])
377 | 		,technical.effects=TRUE
378 | 		,totalInt=20000
379 | 		,totalInt.sd.stable=2000
380 | 		,totalInt.sd.ind=0
381 | 		,meth.channel.sd=300
382 | 		,umeth.channel.sd=300
383 | 		,meth.channel.bg.mean=1000
384 | 		,umeth.channel.bg.mean=1000
385 | 		,meth.channel.bg.sd=300
386 | 		,umeth.channel.bg.sd=300
387 | ){
388 | 	
389 | 	ti<-rnorm(nrow(cell.pops[[1]]), mean=totalInt, sd=totalInt.sd.stable)
390 | 	
391 | 	profiles<-sapply(cell.pops, function(ip){
392 | 				
393 | 				nc<-ncol(ip)
394 | 				
395 | 				cell.sample<-sample.int(nc, ncs)
396 | 				
397 | 				cell.sample.profile<-apply(ip,1,"[", cell.sample)
398 | 				
399 | 				profile<-rowMeans(t(cell.sample.profile))
400 | 				
401 | 				
402 | 				#			profile.sds<-beta.sd.model(profile)
403 | 				#			
404 | 				#			profile<-sapply(1:length(profile),function(j){
405 | 				#						rnorm(1,mean=profile[j],sd=beta.sd.model(profile[j]))
406 | 				#					})
407 | 				
408 | 				
409 | 				if(technical.effects){
410 | 					
411 | 					tii<-sapply(ti, rnorm, n=1, sd=totalInt.sd.ind)
412 | 					
413 | 					meth<-tii*profile
414 | 					umeth<-tii*(1-profile)
415 | 					
416 | 					meth<-sapply(meth, function(mv) rnorm(1,mv,meth.channel.sd)+rnorm(1,meth.channel.bg.mean,meth.channel.bg.sd))
417 | 					umeth<-sapply(umeth, function(uv) rnorm(1,uv,umeth.channel.sd)+rnorm(1,umeth.channel.bg.mean,umeth.channel.bg.sd))
418 | 					
419 | 					meth[meth<0]<-1
420 | 					umeth[umeth<0]<-1
421 | 					
422 | 					measured.profile<-meth/(meth+umeth)
423 | 					
424 | 					return(measured.profile)	
425 | 				}
426 | 				
427 | 				return(profile)
428 | 				
429 | 			})
430 | 	
431 | 	profiles
432 | 	
433 | }
434 | 
435 | #
436 | # get.confusion.table
437 | #
438 | # Get the confusion table from CpG site classification results 
439 | #
440 | get.confusion.table<-function(pvals, true.sites, sl=0.05, adjust=TRUE, adjust.method="hochberg"){
441 | 
442 | 	if(adjust){
443 | 		pvals<-p.adjust(pvals, adjust.method)
444 | 	}
445 | 	
446 | 	## confusion table
447 | 	predict<-pvals<sl
448 | 	true<-rep(FALSE, length(pvals))
449 | 	true[true.sites]<-TRUE
450 | 	
451 | 	conf.table<-table(Outcome = predict, Truth = true)[2:1,2:1]
452 | 	
453 | 	sens<-conf.table[1,1]/(conf.table[1,1]+conf.table[2,1])
454 | 	spec<-conf.table[2,2]/(conf.table[1,2]+conf.table[2,2])
455 | 	
456 | 	return(list(conf.table=conf.table, sensitivity=sens, specificity=spec))
457 | 	
458 | }


--------------------------------------------------------------------------------
/R/utilities.R:
--------------------------------------------------------------------------------
  1 | ###############################################################################
  2 | #  
  3 | # Utility routines 
  4 | #  
  5 | # created 2012-10-11
  6 | # 
  7 | # Author: Pavlo Lutsik
  8 | ###############################################################################
  9 | 
 10 | #
 11 | # rmse
 12 | #
 13 | # Root mean square error
 14 | #
 15 | # D data matrix
 16 | # T recovered profiles 
 17 | # A recovered proportions
 18 | #
 19 | #
 20 | rmse<-function(D,T,A){
 21 | 	
 22 | 	sqrt(sum((D-T%*%A)^2)/nrow(D)/ncol(D))
 23 | 	
 24 | }
 25 | 
 26 | ###############################################################################
 27 | 
 28 | # logLik2
 29 | #
 30 | # Calculate log-likelihood of a fitted linear regression model
 31 | # 
 32 | # @param vector of the model residuals
 33 | # @return log-likelihood value
 34 | # 
 35 | # @author Pavlo Lutsik
 36 | logLik2<-function(res){
 37 | 	N<-length(res)
 38 | 	val <- 0.5 * (- N * (log(2 * pi) + 1 - log(N) + log(sum(res^2))))
 39 | 	return(val)
 40 | }
 41 | 
 42 | ###############################################################################
 43 | #wilcoxTestTwins<-function(betas, twin1, twin2){
 44 | #	
 45 | #	deltas<-betas[,twin1]-betas[,twin2]
 46 | #	wilcox<-apply(deltas, 1, wilcox.test)
 47 | #	results<-data.frame(meanDeltas=rowMeans(deltas), pval=sapply(wilcox, simplify=T, function(t) return(t$p.value)))
 48 | #	
 49 | #}
 50 | #
 51 | ###############################################################################
 52 | #tTestTwins<-function(betas, twin1, twin2){
 53 | #	
 54 | #	deltas<-betas[,twin1]-betas[,twin2]
 55 | #	tt<-apply(deltas, 1, t.test)
 56 | #	results<-data.frame(meanDeltas=rowMeans(deltas), pval=sapply(tt, simplify=T, function(t) return(t$p.value)))
 57 | #	
 58 | #}
 59 | ###############################################################################
 60 | # fetch.annotations
 61 | #
 62 | # collects annotations for all probe ids in rownames(dataset) from IlluminaHumanMethylation450k.db.
 63 | # @author Pavlo Lutsik
 64 | # @param dataset a matrix/data.frame object all row names of which are valid Infinium probe ids (cgXXXXXXXX).
 65 | # @param tables a character vector of IlluminaHumanMethylation450k.db tables. If NULL all existing annotations are fetched
 66 | # @return a data.frame object with annotations
 67 | # @seealso \code{\link{IlluminaHumanMethylation450k.db}}
 68 | # @export
 69 | 
 70 | fetch.annotations<-function(dataset, ids, tables=NULL, verbose=F)
 71 | {
 72 | 	require(IlluminaHumanMethylation450k.db)
 73 | 	if(is.null(tables)){
 74 | 		all.tables<-ls("package:IlluminaHumanMethylation450k.db")
 75 | 		meta.tables<-all.tables[c(14:65)]
 76 | 	}else{
 77 | 		meta.tables<-tables
 78 | 	}
 79 | 	
 80 | 	#metaTables<-allTables[grep("IlluminaHumanMethylation450k[A-Z]", allTables, perl=T)]
 81 | 	if(!missing(dataset)) ids<-rownames(dataset)
 82 | 	all.results<-data.frame(ID=ids)
 83 | 	cn<-colnames(all.results)
 84 | 	
 85 | 	
 86 | 	for (table.name in meta.tables) {
 87 | #		table2 <- get(table.name)
 88 | 
 89 | 		do.call("<-", list(as.name("table2"), as.name(table.name)))
 90 | 		if(!class(table2) %in% c("function","integer") ) {
 91 | 			if(length(table2)>1){
 92 | 				if(verbose) print(paste("Extracting data from:", table.name))
 93 | 				present<-intersect(ids,keys(table2))
 94 | 				result<-data.frame(ID=present, V=as.character(as.list(table2[present])))
 95 | 				cn<-c(cn,strsplit(table.name,"IlluminaHumanMethylation450k")[[1]][2])
 96 | 				all.results<-merge(all.results, result, by="ID", all.x=T, sort=F)
 97 | 				colnames(all.results)<-cn
 98 | 			}
 99 | 		}
100 | 		
101 | 	}
102 | 	
103 | 	
104 | 	return(all.results[match(ids,all.results$ID),])
105 | 	
106 | }
107 | ###############################################################################
108 | fetch.by.interval<-function(chromosome, start, end, strand="*", type="probes450"){
109 | 	
110 | 	if(!type %in% c("probes450", "hg19", "mm9")){
111 | 		stop("unsupported annotation type")
112 | 	}
113 | 	
114 | 	ann<-rnb.get.annotation(type)
115 | 	ann<-ann[match(chromosome,names(ann))]
116 | 	
117 | 	interval<-GRanges(chromosome,IRanges(start=start, end=end), strand=strand)
118 | 	
119 | 	olap<-findOverlaps(ann[[1]], interval)
120 | 	
121 | 	return(rownames(rnb.annotation2data.frame(ann))[queryHits(olap)])
122 | 		
123 | }
124 | ###############################################################################
125 | combin<-function(V,k){
126 | #	
127 | #	if(k>1){
128 | #		vv<-rep(V,k)
129 | #		Posss<-apply(combn(length(vv),k),2,function(idx) vv[idx])
130 | #		Posss<-Posss[,!duplicated(t(Posss)), drop=FALSE]
131 | #	}else{
132 | #		return(matrix(V,ncol=length(V)))
133 | #	}
134 | #	
135 | #	print("##################### DIMENSIONS ####################")
136 | #	print(dim(Posss))
137 | #	return(Posss)
138 | 	
139 | 	t(expand.grid(list(V)[rep(1,k)]))
140 | 		
141 | }
142 | 
143 | ###############################################################################
144 | 
145 | randsplxmat<-function(m,n){
146 | 	
147 | 	U = -log(matrix(runif(m*n), nrow=m))
148 | 	S = colSums(U) # probably only 1 row, so specify 1 explicitly 
149 | 	dump<-sapply(1:n, function(j) U[,j] <<- U[,j]/S[j])   
150 | 	return(U)
151 | 	
152 | }
153 | 
154 | ###############################################################################
155 | projectV<-function(TP,V){
156 | 	
157 | 	TP_proj<-apply(TP, 2, function(col){
158 | 				dist<-abs(repmat(matrix(V, ncol=length(V)), n=length(col), m=1)-col)								
159 | 				proj<-V[apply(dist,1,which.min)]
160 | 			})
161 | 	TP_proj
162 | 	
163 | }
164 | ###############################################################################
165 | 
166 | opt.factorize.exact<-function(affine=TRUE,
167 | 		nonnegative=TRUE,
168 | 		aggr="no",
169 | 		replace=1L,
170 | 		nsamples=0L,
171 | 		naggsets=5L,
172 | 		chunksize=10L,
173 | 		verbose=FALSE,
174 | 		varargin=NULL){
175 | 	
176 | 	options<-list(
177 | 			affine=affine,
178 | 			nonnegative=nonnegative,
179 | 			aggr=aggr,
180 | 			replace=replace,
181 | 			nsamples=nsamples,
182 | 			naggsets=naggsets,
183 | 			chunksize=chunksize,
184 | 			verbose=verbose)
185 | 	
186 | 	if(is.null(varargin)){
187 | 		
188 | 		return(options)
189 | 		
190 | 	}else{
191 | 		print("not supported yet")
192 | 		
193 | 		return(options)
194 | 	}
195 | 	
196 | }
197 | ###############################################################################
198 | 
199 | licols<-function(X,tol=1e-10){
200 | 	
201 | 	if(all(X==0)){ #X has no non-zeros and hence no independent columns
202 | 		
203 | 		return(list(r<-integer(), Xsubs=matrix(0), idx<-integer()))
204 | 	}
205 | 	
206 | 	colnames(X)<-as.character(1:ncol(X))
207 | 	
208 | 	qr.res<-qr(X, tol = tol, LAPACK=T)
209 | 	Q<-qr.Q(qr.res)
210 | 	R<-qr.R(qr.res)
211 | 	E<-match(colnames(R), colnames(X))
212 | 	
213 | 	diagr <- abs(diag(R));
214 | 	
215 | 	r <-length(which(diagr >=tol*diagr[1])) #Rank estimation
216 | 	
217 | 	idx<-sort(E[1:r])
218 | 	
219 | 	Xsub<-X[,idx]
220 | 	
221 | 	return(list(r=r, idx=idx, Xsub=Xsub))
222 | }
223 | 
224 | adjust.nmf.coefs<-function(coefs){
225 | 	t(t(coefs)/colSums(coefs))
226 | }
227 | 
228 | ###############################################################################
229 | #
230 | # Down-rank SNP-affected probes using a one-dimentional k-means
231 | # with k=3
232 | #
233 | 
234 | rank.snp<-function(D){
235 | 	
236 | 	require(Ckmeans.1d.dp)
237 | 	
238 | 	R3 <- numeric(nrow(D))
239 | 	for(i in 1:nrow(D)){
240 | 		res <- Ckmeans.1d.dp(D[i,], 3)
241 | 		R3[i] <- sum(res$withinss)
242 | 	}
243 | 	
244 | 	R3
245 | }
246 | 
247 | ###############################################################################
248 | get.cpg.intervals<-function(cpg.coords, ids=NULL, offset=50){
249 | 	
250 | 	#if(annot.full<-rnb.get.annotation("probes450")
251 | 	intervals<-GRanges(seq=cpg.coords$Chromosome, 
252 | 			IRanges(start=cpg.coords$Start, width=1), 
253 | 			strand=rep("*", nrow(cpg.coords)))
254 | 	
255 | 	
256 | 	intervals<-flank(intervals, width=offset, both=TRUE)
257 | 	intervals<-reduce(intervals)
258 | 	as.data.frame(intervals)
259 | 	
260 | }
261 | ###############################################################################
262 | #
263 | # generateExample
264 | #
265 | # Examples for testing factorization methods
266 | # 
267 | # m	number of genomic features
268 | # n number of profiles
269 | # k hidden dimension
270 | # t.method method used to generate matrix T: "integer", "uniform" or "beta" 
271 | # a.method method used to generate matrix A: "uniform" or "dirichlet" 
272 | # V a vector of possible values for \cs{method} integer or the upper and lower 
273 | # 			bounds for the \cs{method} uniform 
274 | # beta1 first beta-distribution parameter for \cs{t.method} beta
275 | # beta2 second beta-distribution parameter for \cs{t.method} beta
276 | # proportion.prior numeric vector of length \code{r}
277 | # noise.sd a standard deviation for additive Gaussian noize
278 | # digits desired precision
279 | #
280 | # return \cs{list} with elements \cs{D}, \cs{T} and \cs{A}
281 | #
282 | #
283 | generateExample<-function(
284 | 		m, 
285 | 		n, 
286 | 		k, 
287 | 		t.method="beta",
288 | 		a.method="dirichlet",
289 | 		e.method="gaussian",
290 | 		V=c(0,1), 
291 | 		beta1=0.5, 
292 | 		beta2=0.5,
293 | 		proportion.prior=NULL,
294 | 		proportion.var.facror=1,
295 | 		Alower=rep(0, k),
296 | 		Aupper=rep(1, k),
297 | 		noise.sd=0,
298 | 		digits=12
299 | ){
300 | 	
301 | 	if(t.method=="integer"){
302 | 		
303 | 		Tt<-matrix(Inf,ncol=k, nrow=m)
304 | 		ri<-1
305 | 		it<-0
306 | 		while(any(Tt==Inf) && it<100){
307 | 			it<-it+1
308 | 			vertex<-V[sample.int(length(V), m, replace=T)]
309 | 			if(all(colSums(abs(Tt-vertex))!=0)){
310 | 				Tt[1:m,ri]<-vertex
311 | 				ri<-ri+1
312 | 			}
313 | 		}
314 | 		
315 | 	}else if(t.method=="uniform"){
316 | 		
317 | 		Tt<-matrix(runif(m*k, min=min(V), max=max(V)), ncol=k)
318 | 		
319 | 	}else if(t.method=="beta"){
320 | 		
321 | 		Tt<-matrix(rbeta(m*k, shape1=beta1, shape2=beta2), ncol=k)
322 | 		
323 | 	}else{
324 | 		stop("this method for generating T is not implemented")
325 | 	}
326 | 	
327 | 	if(a.method=="uniform"){
328 | 		
329 | 		if(max(Alower)==0 && min(Aupper)==1){
330 | 			
331 | 			A<-matrix(-log(runif(k*n)), ncol=n, nrow=k)
332 | 			A<-t(t(A)/colSums(A))
333 | 			A[1,]<-A[1,]+(rep(1,n)-colSums(A))
334 | 			
335 | 		}else{
336 | 			A <- t(sapply(1:k, function(pri){
337 | 								-log(runif(n, min=Alower[pri], max=Aupper[pri]))
338 | 							}))
339 | 			A<-t(t(A)/colSums(A))
340 | 			A[1,]<-A[1,]+(rep(1,n)-colSums(A))
341 | 	#		for(kk in 1:n){
342 | 	#			A[,kk] <- RProjSplxBox(A[,kk,drop=FALSE], Alower, Aupper);
343 | 	#		
344 | 		}
345 | 		
346 | 	}else if(a.method=="dirichlet"){
347 | 		
348 | 		if(is.null(proportion.prior)){
349 | 			proportion.prior<-rep(1/k,k)
350 | 		}
351 | 		A<-t(rdirichlet(n, proportion.prior*proportion.var.facror))
352 | 		
353 | 	}else{
354 | 		stop("this method for generating A is not implemented")
355 | 	}
356 | 	
357 | 	if(e.method=="gaussian"){
358 | 		if(noise.sd>0){
359 | 			E=matrix(rnorm(m*n, sd=noise.sd), ncol=n)
360 | 		}else{
361 | 			E=matrix(0, nrow=m, ncol=n)
362 | 		}
363 | 	}else{
364 | 		stop("this method for generating additive noise is not implemented")
365 | 	}
366 | 	
367 | 	# get the data matrix
368 | 	Tt<-round(Tt, digits=digits)
369 | 	A<-round(A, digits=digits)
370 | 	E<-round(E, digits=digits)
371 | 	
372 | 	D<-Tt%*%A+E
373 | 	
374 | 	D[D<min(V)]<-min(V)
375 | 	D[D>max(V)]<-max(V)
376 | 	
377 | 	return(list("T"=Tt,"A"=A,"D"=D, "E"=E))
378 | }
379 | 
380 | ###############################################################################
381 | 
382 | RMSE_T<-function(That, Tstar, perm){
383 | 	
384 | 	if(length(unique(perm))==length(perm)){
385 | 		rmseT<-sqrt(sum((That-Tstar[,perm])^2)/ncol(Tstar)/nrow(Tstar))
386 | 	}else{
387 | 		rmseT<-sqrt(mean((
388 | 					sapply(unique(perm), function(comp){
389 | 								abs(Tstar[,comp]-rowMeans(That[,perm==comp,drop=FALSE]))
390 | 					}))^2))
391 | 	}
392 | 	
393 | 	rmseT
394 | }
395 | ###############################################################################
396 | MAE_A<-function(Ahat, Astar, perm){
397 | 	
398 | 	if(length(unique(perm))==length(perm)){
399 | 		maeA<-sum(abs(Ahat-Astar[perm,]))/ncol(Astar)/nrow(Astar)
400 | 	}else{
401 | #		maeA<-sum(abs(
402 | #						sapply(unique(perm), function(comp){
403 | #								abs(Astar[comp,,drop=FALSE]/colSums(Astar[unique(perm),,drop=FALSE])-colSums(Ahat[perm==comp,,drop=FALSE]))
404 | #						})))/ncol(Astar)/nrow(Astar)
405 | 
406 | 		aggrAhat<-t(sapply(unique(perm), function(comp){
407 | 					colSums(Ahat[perm==comp,,drop=FALSE])
408 | 				}))
409 | 		
410 | 		aggrAstar<-sweep(Astar[unique(perm),,drop=FALSE], 2, colSums(Astar[unique(perm),,drop=FALSE]), "/")
411 | 		
412 | 		maeA<-sum(abs(aggrAhat-aggrAstar))/nrow(aggrAstar)/ncol(aggrAstar)
413 | 	}
414 | 	maeA
415 | }
416 | ###############################################################################
417 | estimate.accuracy<-function(fr, trueT, trueA, check=FALSE){
418 | 	
419 | 	perm<-MeDeCom:::match.components(fr$T, trueT, check=check)
420 | 	
421 | 	if(!is.null(perm)){
422 | 		if(length(unique(perm))==length(perm)){
423 | 			rmseT<-sqrt(sum((trueT-fr$T[,perm])^2)/ncol(trueT)/nrow(trueT))
424 | 			maeA<-sum(abs(trueA-fr$A[perm,]))/ncol(trueA)/nrow(trueA)
425 | 		}else{
426 | 			rmseT<-sqrt(sum((
427 | 										sapply(unique(perm), function(comp){
428 | 													abs(trueT[,comp]-rowMeans(fr$T[,perm==comp,drop=FALSE]))
429 | 												}))^2)/ncol(trueT)/nrow(trueT))
430 | 			
431 | 			maeA<-sum(
432 | 					sapply(unique(perm), function(comp){
433 | 								abs(trueA[comp,]-colMeans(fr$A[perm==comp,,drop=FALSE]))
434 | 							}))/ncol(trueA)/nrow(trueA)
435 | 		}
436 | 	}else{
437 | 		rmseT<-NA
438 | 		maeA<-NA
439 | 	}
440 | 	
441 | 	return(list(rmseT=rmseT, maeA=maeA))
442 | }
443 | ###############################################################################
444 | get.distance.matrix<-function(mdd, measure, centered=FALSE){
445 | 	
446 | 	if(centered){
447 | 		mdd<-lapply(mdd, function(mm) sweep(mm, 1, rowMeans(mm)))
448 | 	}
449 | 	mdd<-do.call("cbind", mdd)
450 | 	
451 | 	if(measure=="euclidean"){
452 | 		d <- dist(t(mdd))
453 | 	}else if(measure=="angular"){
454 | 		dm<-matrix(NA, ncol=ncol(mdd), nrow=ncol(mdd))
455 | 		colnames(dm)<-colnames(mdd)
456 | 		rownames(dm)<-colnames(mdd)
457 | 		for(ri in 1:ncol(mdd)){
458 | 			for(ci in 1:ncol(mdd)){
459 | 				dm[ri,ci]<-sum(mdd[,ci]*mdd[,ri])/sqrt(sum(mdd[,ci]^2))/sqrt(sum(mdd[,ri]^2))
460 | 			}
461 | 		}
462 | 		d <- as.dist(1-dm)
463 | 	}else if(measure=="correlation"){
464 | 		d <- as.dist(1-cor(mdd, method="pearson"))
465 | 	}
466 | }
467 | #####
468 | RQuadHC_dummy<-function(G, W, Tk, tol, lower, upper){
469 | 	outp<-Tk+matrix(runif(ncol(Tk)*nrow(Tk))*tol*10, nrow(Tk),ncol(Tk))
470 | 	outp[outp>1]<-1;
471 | 	dummy_loss<-sqrt(mean((Tk-outp)^2))
472 | 	return(list(outp, dummy_loss))
473 | }
474 | ### END


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | MeDeCom


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MeDeCom
 2 | 
 3 | MeDeCom is an R package for reference-free decomposition of heterogeneous DNA methylation profiles. 
 4 | It uses constrained matrix factorization enhanced by biologically-relevant constraints and a specially tailored regularization.
 5 | See MeDeCom [vignette](vignettes/MeDeCom.md) for a more detailed description and an example analysis.
 6 | 
 7 | # Publications
 8 | 
 9 | MeDeCom has originially been published in [Genome Biology](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-017-1182-6). Together with its accompanying packages [DecompPipeline](https://github.com/CompEpigen/DecompPipeline) and [FactorViz](https://github.com/CompEpigen/FactorViz), MeDeCom is part of a published [protocol](https://www.nature.com/articles/s41596-020-0369-6) for reference-free deconvolution of complex DNA methylation data (see also [http://epigenomics.dkfz.de/DecompProtocol/](http://epigenomics.dkfz.de/DecompProtocol/)).
10 | 


--------------------------------------------------------------------------------
/data/datalist:
--------------------------------------------------------------------------------
1 | example.MeDeComSet: medecom.result
2 | example.cg.annotation: cg.ann
3 | example.dataset: Aref D Tref
4 | 


--------------------------------------------------------------------------------
/data/example.MeDeComSet.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/data/example.MeDeComSet.RData


--------------------------------------------------------------------------------
/data/example.cg.annotation.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/data/example.cg.annotation.RData


--------------------------------------------------------------------------------
/data/example.dataset.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/data/example.dataset.RData


--------------------------------------------------------------------------------
/data/small.example.RnBSet.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/data/small.example.RnBSet.RData


--------------------------------------------------------------------------------
/exec/cluster.script.sge.R:
--------------------------------------------------------------------------------
  1 | 
  2 | if(any(grepl(".*cluster.script.gse.R", commandArgs()))){
  3 | 	require(MeDeCom)
  4 | }
  5 | 
  6 | param.file<-commandArgs()[6]
  7 | params_list<-readRDS(param.file)
  8 | 
  9 | for(params in params_list){
 10 | 	
 11 | 	#print(params)
 12 | 	
 13 | 	load(file.path(params$DD,"data.set.RData"))
 14 | 	D<-meth.data
 15 | 
 16 | 	if(file.exists(file.path(params$DD, "trueT.RData"))){
 17 | 		load(file.path(params$DD,"trueT.RData"))
 18 | 		params$trueT<-trueT
 19 | 		Tstar_present<-TRUE
 20 | 	}else{
 21 | 		Tstar_present<-FALSE
 22 | 	}
 23 | 	if(file.exists(file.path(params$DD, "trueA.RData"))){
 24 | 		load(file.path(params$DD, "trueA.RData"))
 25 | 		params$trueA<-trueA[,params$sample_subset]
 26 | 		Astar_present<-TRUE
 27 | 	}else{
 28 | 		Astar_present<-FALSE
 29 | 	}
 30 | 	
 31 | 	if(file.exists(file.path(params$DD, "start.RData"))){
 32 | 		load(file.path(params$WD,"start.RData"))
 33 | 	}
 34 | 	
 35 | 	load(file.path(params$DD, sprintf("cg_subset_%d.RDdata",params$cg_subset)))
 36 | 	params$cg_subset<-cg_subset
 37 | 	
 38 | 	if(params$mode %in% c("initial_fine", "cv_fine")){
 39 | 		if(file.exists(file.path(params$WD, params$lnr_result))){
 40 | 			load(file.path(params$WD, params$lnr_result))
 41 | 			old_result<-result
 42 | 			rm(result)
 43 | 		}
 44 | 		
 45 | 		if(file.exists(file.path(params$WD, params$clnr_result))){
 46 | 			load(file.path(params$WD, params$clnr_result))
 47 | 			params$startT<-result$That
 48 | 			params$startA<-result$Ahat
 49 | 			
 50 | 		}
 51 | 	}
 52 | 	
 53 | 	
 54 | 	if(file.exists(file.path(params$DD, "cv_partitions.RDdata"))){
 55 | 		load(file.path(params$DD, sprintf("cv_partitions.RDdata")))
 56 | 	}
 57 | 	#params$cg_subset<-cg_subsets[[params$cg_subset_id]]
 58 | 	#params$sample_subset<-sample_subset
 59 | 	#params$meth_matrix<-D
 60 | 	
 61 | 	if(params$mode %in% c("full","initial", "initial_fine")){
 62 | 		incl_samples<-1:length(params$sample_subset)
 63 | 	}else{
 64 | 		fold_subset<-cv.partitions[params$FOLD,]
 65 | 		incl_samples<-params$sample_subset[-fold_subset]
 66 | 	}
 67 | 	
 68 | 	params$meth_matrix<-D[params$cg_subset,incl_samples,drop=FALSE]
 69 | 	
 70 | 	if(params$mode %in% c("full")){
 71 | 		## average the CV results and use as the init
 72 | 		cv_files<-params$cv_init_results
 73 | 		print(cv_files)
 74 | 		cv_result_list<-vector("list", length(cv_files))
 75 | 		for(cv_file_idx in 1:length(cv_files)){
 76 | 			cv.file<-file.path(params$WD, cv_files[[cv_file_idx]])
 77 | 			if(file.exists(cv.file)){
 78 | 				load.env<-new.env(parent=emptyenv())
 79 | 				load(cv.file, envir=load.env)
 80 | 				cv_result_list[[cv_file_idx]]<-get("result", envir=load.env)
 81 | 			}
 82 | 		}
 83 | 		print(str(cv_result_list))
 84 | 		cv_result_list<-cv_result_list[!sapply(cv_result_list, is.null)]
 85 | 		inits<-MeDeCom:::summarizeCVinits(cv_result_list)
 86 | 		params$startT<-inits$T
 87 | 		if(!is.null(inits$A)){
 88 | 			params$startA<-inits$A
 89 | 		}else{
 90 | 			params$startA<-MeDeCom:::factorize.regr(params$meth_matrix, params$startT)[["A"]]
 91 | 		}
 92 | 	}
 93 | 	
 94 | 	
 95 | 	if(Tstar_present){
 96 | 		if(!is.null(params$fixed_T_cols)){
 97 | 			free_cols<-setdiff(1:ncol(trueT), params$fixed_T_cols)
 98 | 			params$fixedT<-trueT[,params$fixed_T_cols, drop=FALSE]
 99 | 			#trueT<-trueT_ff[,-fixed_T_cols, drop=FALSE]]
100 | 		}else{
101 | 			fixedT<-NULL
102 | 			free_cols<-1:ncol(trueT)
103 | 		}
104 | 	}
105 | 	
106 | 	single_run_params<-intersect(names(as.list(args(MeDeCom:::singleRun))), names(params))
107 | 	result<-do.call("singleRun", params[single_run_params], envir=asNamespace("MeDeCom"))
108 | 	
109 | 	if(params$mode %in% c("full", "initial", "cv") || (result$Fval < old_result$Fval)){
110 | 		if(params$mode %in% c("full", "initial", "initial_fine")){
111 | 			trueT_prep<-trueA_prep<-NULL
112 | 			if(Tstar_present){
113 | 				trueT_prep<-trueT[params$cg_subset,free_cols,drop=FALSE]
114 | 			}
115 | 			if(Astar_present){
116 | 				trueA_prep<-trueA[,incl_samples,drop=FALSE]
117 | 			}
118 | 			perf_result<-MeDeCom:::estimatePerformance(result,
119 | 					params$meth_matrix,
120 | 					trueT_prep, 
121 | 					trueA_prep)
122 | 		}else{
123 | 			perf_result<-MeDeCom:::estimateFoldError(
124 | 					result$That, 
125 | 					D[params$cg_subset,params$sample_subset[fold_subset],drop=FALSE],
126 | 					params$NFOLDS)			
127 | 		}
128 | 		for(elt in names(perf_result)){
129 | 			result[[elt]]<-perf_result[[elt]]
130 | 		}
131 | 		
132 | 		if(params$mode %in% c("initial_fine", "cv_fine")){
133 | 			print("found a better solution")
134 | 		}
135 | 		save(result, file=file.path(params$WD, params$lnr_result))
136 | 	}
137 | }


--------------------------------------------------------------------------------
/inst/unitTests/test_general.R:
--------------------------------------------------------------------------------
 1 | test.reinius.reference <- function(){
 2 |   data("small.example.RnBSet")
 3 |   Ks <- 2
 4 |   lambdas <- c(0.01)
 5 |   res <- run.refbased(rnb.set = rnb.set.example,Ks = Ks,lambdas = lambdas)
 6 |   plot <- cluster.refbased(res,plot.type="dendrogram",K=2,lambda=0.01)
 7 |   passes <- is.null(plot)
 8 |   checkTrue(passes)
 9 | }
10 | 
11 | test.general <- function(){
12 |   data("example.dataset")
13 |   input.data <- D[sample(1:nrow(D),1000),sample(1:ncol(D),5)]
14 |   cg_subsets <- list("var"=sample(1:nrow(D),250),"random"=sample(1:nrow(D),500))
15 |   Ks <- 2
16 |   lambdas <- c(0.01)
17 |   res <- runMeDeCom(data = D,Ks = Ks,lambdas = lambdas,cg_subsets = cg_subsets)
18 |   passes <- inherits(res,"MeDeComSet")
19 |   checkTrue(passes)
20 | }
21 | 
22 | test.contribution.interpretation <- function(){
23 |   data("example.MeDeComSet")
24 |   anno.frame <- data.frame(Sex=sample(c("M","F"),100,replace=T),Age=sample(1:100,100,replace=T),Ethnicity=sample(c("A","B","C"),100,replace = T))
25 |   res <- run.trait.association.single(medecom.result,pheno.data=anno.frame)
26 |   passes <- all(names(res) %in% c("linear model","qualitative","quantitative"))
27 |   checkTrue(passes)
28 | }
29 | 
30 | # test.enrichment <- function(){
31 | #   require("RnBeads")
32 | #   data("example.MeDeComSet")
33 | #   anno.frame <- rnb.annotation2data.frame(rnb.get.annotation("probes450"))[sample(1:460000,10000),]
34 | #   res <- lmc.lola.plots.tables(medecom.result,anno.data=anno.frame)
35 | #   passes <- all(names(res) %in% c("Plots","Tables"))
36 | #   res <- lmc.go.enrichment(medecom.result,anno.data=anno.frame)
37 | #   passes <- passes && (class(res) == "list")
38 | #   checkTrue(passes)
39 | # }
40 | 
41 | test.routine <- function(){
42 |   require("RUnit")
43 |   require("MeDeCom")
44 |   cat("STARTED testing general function \n")
45 |   test.general()
46 |   cat("COMPLETED testing general function \n")
47 |   cat("STARTED testing contribution interpretation \n")
48 |   test.contribution.interpretation()
49 |   cat("COMPLETED testing contribution interpretation \n")
50 |   # cat("STARTED testing enrichment functions \n")
51 |   # test.enrichment()
52 |   # cat("COMPLETED testing enrichment functions \n")
53 | }
54 | 
55 | test.routine()


--------------------------------------------------------------------------------
/man/AVAIL.REFS.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reference.R
 3 | \docType{data}
 4 | \name{AVAIL.REFS}
 5 | \alias{AVAIL.REFS}
 6 | \title{reference.R
 7 | -------------------------------------------------------------------------------------------------------------------------------------
 8 | This scripts contains functions for assocating MeDeCom with known reference cell type profiles
 9 | GLOBALS}
10 | \format{
11 | An object of class \code{character} of length 2.
12 | }
13 | \usage{
14 | AVAIL.REFS
15 | }
16 | \description{
17 | reference.R
18 | -------------------------------------------------------------------------------------------------------------------------------------
19 | This scripts contains functions for assocating MeDeCom with known reference cell type profiles
20 | GLOBALS
21 | }
22 | \keyword{datasets}
23 | 


--------------------------------------------------------------------------------
/man/MeDeCom-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MeDeCom-package.R
 3 | \docType{package}
 4 | \name{MeDeCom-package}
 5 | \alias{MeDeCom-package}
 6 | \title{MeDeCom: Methylome DeComposition using regularized constrained matrix factorization}
 7 | \description{
 8 | MeDeCom is an R-package that discovers and quantifies latent components
 9 | in the DNA methylomes of heterogeneous samples
10 | }
11 | \references{
12 | TBA
13 | }
14 | 


--------------------------------------------------------------------------------
/man/MeDeComSet-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MeDeComSet-class.R
 3 | \docType{class}
 4 | \name{MeDeComSet-class}
 5 | \alias{MeDeComSet-class}
 6 | \alias{MeDeComSet}
 7 | \alias{initialize,MeDeComSet-method}
 8 | \title{MeDeComSet Class}
 9 | \usage{
10 | MeDeComSet(parameters, outputs, dataset_info = list())
11 | }
12 | \arguments{
13 | \item{parameters}{\code{list} of MeDeCom parameters with elements \code{K}, integer vector of k values, \code{lambdas}, numeric vector of lambda values.}
14 | 
15 | \item{outputs}{\code{list} of MeDeCom resutls with one element per each used CpG subset.}
16 | 
17 | \item{dataset_info}{\code{list} with information about the input data set.}
18 | }
19 | \value{
20 | an object of class MeDeComSet
21 | }
22 | \description{
23 | Stores the result of a methylation deconvolution experiment
24 | 
25 | Wrapper function MeDeComSet
26 | }
27 | \section{Slots}{
28 | 
29 | \describe{
30 |   \item{\code{dataset_info}}{\code{list} with information about the input data set.}
31 |   \item{\code{parameters}}{\code{list} containing parameters of the deconvolution experiment.}
32 |   \item{\code{outputs}}{\code{list} of deconvolution products for each combination of MeDeCom parameters.}
33 | }
34 | }
35 | 
36 | \section{Methods and functions}{
37 | 
38 | \describe{
39 |   \item{\code{\link[=getStatistics,MeDeComSet-method]{getStatistics}}}{Returns the value of one goodness-of-fit statistics for a given parameter combination.}
40 |   \item{\code{\link[=getLMCs,MeDeComSet-method]{getLMCs}}}{Returns a matrix of LMCs for a given parameter combination.}
41 |   \item{\code{\link[=getProportions,MeDeComSet-method]{getProportions}}}{Returns a matrix of mixing proportions for a given parameter combination.}
42 |   \item{\code{\link{plotParameters}}}{Create a parameter selection plot.}
43 |   \item{\code{\link{plotLMCs}}}{Visualize the LMCs.}
44 |   \item{\code{\link{plotProportions}}}{Visualize the mixing proportions.}
45 | }
46 | }
47 | 
48 | \author{
49 | Pavlo Lutsik
50 | }
51 | 


--------------------------------------------------------------------------------
/man/as.MeDeComSet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MeDeComSet-class.R
 3 | \name{as.MeDeComSet}
 4 | \alias{as.MeDeComSet}
 5 | \title{as.MeDeComSet}
 6 | \usage{
 7 | as.MeDeComSet(
 8 |   object,
 9 |   cg_subsets = 1,
10 |   Ks = NULL,
11 |   deviances = NULL,
12 |   rss = NULL,
13 |   m.orig = NULL,
14 |   n.orig = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{object}{An object of class \code{RefFreeCellMix} containing cell type deconvolution information, or a list of such objects.}
19 | 
20 | \item{cg_subsets}{The indeces of the CpG subsets used in the analysis.}
21 | 
22 | \item{Ks}{The values of K used in the analysis. If NULL, K is determined by the size of the matrices.}
23 | 
24 | \item{deviances}{Optional argument specifying the deviances as computed with \code{RefFreeCellMixArrayDevianceBoots}.}
25 | 
26 | \item{rss}{Optional argument specifying the residual sum of sqaures}
27 | 
28 | \item{m.orig}{The original number of rows (CpGs) in the methylation matrix.}
29 | 
30 | \item{n.orig}{The original number of columns (samples) in the methylation matrix.}
31 | }
32 | \value{
33 | An object of type \code{MeDeComSet}
34 | }
35 | \description{
36 | Function to convert object of type RefFreeCellMix to MeDeComSet
37 | }
38 | \details{
39 | Since \code{RefFreeCellMix} only contains information on a single value for K, and does not contain any regularization
40 |           (lambda), the corresponding parameters in the MeDeComSet are set to single numeric values. Furthermore, no information
41 |           on goodness of fit (CVE, Fval) can be stored. If \code{cg_subsets} is not of length 1, an object containing multiple
42 |           subsets is creared.
43 | }
44 | 


--------------------------------------------------------------------------------
/man/cluster.refbased.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reference.R
 3 | \name{cluster.refbased}
 4 | \alias{cluster.refbased}
 5 | \title{cluster.refbased}
 6 | \usage{
 7 | cluster.refbased(
 8 |   ref.run,
 9 |   K,
10 |   lambda,
11 |   cg_subset = 1,
12 |   plot.type = "dendrogram",
13 |   ...
14 | )
15 | }
16 | \arguments{
17 | \item{ref.run}{A result of \code{\link{run.refbased}} with the results of \code{\link{runMeDeCom}} and the reference data matrix}
18 | 
19 | \item{K}{Selected number of components}
20 | 
21 | \item{lambda}{Selected regularization parameter}
22 | 
23 | \item{cg_subset}{Numeric vector representing the CpG sites selected for analysis from the orignial MeDeComSet}
24 | 
25 | \item{plot.type}{Type of plot to be created, is passed to \code{\link{plotLMCs}}}
26 | 
27 | \item{...}{Further arguments passed to plotLMCs}
28 | }
29 | \value{
30 | A plot object displaying the clustering
31 | }
32 | \description{
33 | This routine performs general hierachical clustering as in the \code{\link{plotLMCs}} and adds reference methylome profiles to this
34 | clustering.
35 | }
36 | \author{
37 | Michael Scherer
38 | }
39 | 


--------------------------------------------------------------------------------
/man/example.MeDeComSet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MeDeCom-package.R
 3 | \docType{data}
 4 | \name{example.MeDeComSet}
 5 | \alias{example.MeDeComSet}
 6 | \title{Example result object (MeDeComSet)}
 7 | \format{
 8 | \code{MeDeComSet}, the result of a deconvolution experiment and input to any follow-up analysis or to retrieve latent 
 9 | methylation components (LMCs)
10 | }
11 | \description{
12 | An example result of applying \pkg{MeDeCom} for a deconvolution experiment.
13 | }
14 | \author{
15 | Michael Scherer
16 | }
17 | \keyword{datasets}
18 | 


--------------------------------------------------------------------------------
/man/example.cg.annotation.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MeDeCom-package.R
 3 | \docType{data}
 4 | \name{example.cg.annotation}
 5 | \alias{example.cg.annotation}
 6 | \title{Example CG annotation}
 7 | \format{
 8 | \code{cg.ann} is a \code{data.frame} containing identifiers and postitions of CpG sites to be analyzed
 9 | }
10 | \description{
11 | An data frame containing CpGs with their corresponding annotation in the genome to be used for the analysis. You can provide a similar
12 | \code{data.frame} for your own analysis.
13 | }
14 | \author{
15 | Michael Scherer
16 | }
17 | \keyword{datasets}
18 | 


--------------------------------------------------------------------------------
/man/example.dataset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MeDeCom-package.R
 3 | \docType{data}
 4 | \name{example.dataset}
 5 | \alias{example.dataset}
 6 | \title{Example dataset}
 7 | \usage{
 8 | data(example.dataset)
 9 | }
10 | \description{
11 | Contains a data set of methylation values as a \code{matrix} with 10,000 rows (CpGs) and 100 columns (samples), a reference of
12 | contributions of 5 cell types in the samples, and the reference methylomes of the samples as a matrix with 10,000 rows and 5
13 | columns.
14 |  
15 | \itemize{
16 |           \item D, a \code{matrix} with 10,000 rows (CpGs) and 100 columns (samples) representing a potential input methylation
17 |           matrix
18 |           \item Aref, a reference of contributions of 5 cell types in the samples
19 |           \item Tref, reference methylomes of the samples as a matrix with 10,000 rows and 5 columns
20 | }
21 | }
22 | \author{
23 | Michael Scherer
24 | }
25 | \keyword{datasets}
26 | 


--------------------------------------------------------------------------------
/man/factorize.alternate.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/factorizations.R
  3 | \name{factorize.alternate}
  4 | \alias{factorize.alternate}
  5 | \title{factorize.alternate}
  6 | \usage{
  7 | factorize.alternate(
  8 |   D,
  9 |   k,
 10 |   method = "MeDeCom.quadPen",
 11 |   t.method = "quadPen",
 12 |   Tfix = NULL,
 13 |   Tpartial = NULL,
 14 |   Tpartial.rows = NULL,
 15 |   Apartial = NULL,
 16 |   Apartial.cols = NULL,
 17 |   V = NULL,
 18 |   lambda = 0,
 19 |   init = "random",
 20 |   opt = 5,
 21 |   emp.dim = 500,
 22 |   emp.resample = TRUE,
 23 |   emp.vsf = 1,
 24 |   emp.borders = c(0, 1),
 25 |   qp.rangeT = c(0, 1),
 26 |   qp.Alower = NULL,
 27 |   qp.Aupper = NULL,
 28 |   itermax = 100,
 29 |   trace = FALSE,
 30 |   eps = 1e-08,
 31 |   ncores = 1,
 32 |   pheno = NULL,
 33 |   na.values = FALSE,
 34 |   seed = NULL,
 35 |   verbosity = 0L
 36 | )
 37 | }
 38 | \arguments{
 39 | \item{D}{m by n input matrix with mixture data}
 40 | 
 41 | \item{k}{number of latent components, \code{integer}}
 42 | 
 43 | \item{method}{optimization method used. Currently supported values are
 44 | \code{"MeDeCom.quadPen"} and \code{"MeDeCom.cppTAfact"}.}
 45 | 
 46 | \item{t.method}{method for updating the latent component matrix, one of 
 47 | \code{"integer", "empirical", "Hlasso"} or \code{"quadPen"}}
 48 | 
 49 | \item{Tfix}{an optional matrix of a priori known fixed components}
 50 | 
 51 | \item{V}{for \code{t.method} "integer" a small vector of possible values;
 52 | for \code{t.method} "empirical" a vector giving empirical distribution
 53 | of T values}
 54 | 
 55 | \item{init}{type of initialization, either "random" (default) or "fixed".}
 56 | 
 57 | \item{opt}{if \code{init} is "random" 
 58 | number of runs with independent initialization,
 59 | if \code{init} is "fixed"
 60 | starting values for T and A (see details)}
 61 | 
 62 | \item{emp.dim}{for \code{t.method} "empirical", 
 63 | number of randomly drawn samples for T row selection}
 64 | 
 65 | \item{emp.resample}{for \code{t.method} "empirical",
 66 | a flag indicating whether resampling should
 67 | be done at each iteration}
 68 | 
 69 | \item{itermax}{maximal number of iterations}
 70 | 
 71 | \item{trace}{a flag indicating whether to return the 
 72 | factorization results for each iteration}
 73 | 
 74 | \item{eps}{threshold for objective value change}
 75 | 
 76 | \item{ncores}{number of CPU cores used for parallelization}
 77 | 
 78 | \item{pheno}{a list with phenotypic information}
 79 | 
 80 | \item{verbosity}{flag specifying whether to show diagnostic
 81 | statements during the execution}
 82 | }
 83 | \value{
 84 | a \code{list} with the following elements:
 85 | 			\describe{
 86 | 	 				\item{\code{T}}{matrix of latent components}
 87 |   					\item{\code{A}}{matrix of mixture proportions}
 88 | 						\item{\code{Fval}}{the final value of the objective function}
 89 | 						\item{\code{Conv}}{sequence of objective function values
 90 | 								attained after each iteration} 
 91 | 						\item{\code{rmse}}{RMSE of the factorization}
 92 | 			}
 93 | }
 94 | \description{
 95 | Matrix factorization algorithms based on the alternating optimization scheme
 96 | }
 97 | \details{
 98 | In case \code{init} is "fixed" the starting values
 99 | 						for the m by k matrix of latent components 
100 | 						and for the k by n matrix of mixing proportions 
101 | 						should be specified as T and A elements of a list
102 | 						supplied as \code{opt}
103 | }
104 | \author{
105 | Martin Slawski
106 | 
107 | R port by Pavlo Lutsik
108 | }
109 | 


--------------------------------------------------------------------------------
/man/factorize.regr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/factorizations.R
 3 | \name{factorize.regr}
 4 | \alias{factorize.regr}
 5 | \title{factorize.regr}
 6 | \usage{
 7 | factorize.regr(D, Tt, A0 = NULL, precision = 1e-08)
 8 | }
 9 | \arguments{
10 | \item{D}{m by n matrix with mixture data}
11 | 
12 | \item{Tt}{either a m by k matrix of k true latent components
13 | or a list of n such matrices, one per each column of D}
14 | 
15 | \item{A0}{initialization for the mixture proportions matrix}
16 | 
17 | \item{precision}{numerical tolerance of the optimization algorithm}
18 | }
19 | \value{
20 | a \code{list} with elements:
21 | 					\describe{
22 | 						\item{\code{A}}{matrix of mixing proportions}
23 | 						\item{\code{T}}{Tt used}
24 | 						\item{\code{rmse}}{RMSE of the regression model}
25 | 					}
26 | }
27 | \description{
28 | Get mixing proportions from the target data matrix and a matrix of latent factors
29 | }
30 | 


--------------------------------------------------------------------------------
/man/getLMCs-methods.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MeDeComSet-class.R
 3 | \docType{methods}
 4 | \name{getLMCs,MeDeComSet-method}
 5 | \alias{getLMCs,MeDeComSet-method}
 6 | \alias{getLMCs}
 7 | \title{getLMCs-methods}
 8 | \usage{
 9 | \S4method{getLMCs}{MeDeComSet}(
10 |   object,
11 |   K = object@parameters$Ks[1],
12 |   lambda = object@parameters$lambdas[1],
13 |   cg_subset = 1,
14 |   statistic = "cve"
15 | )
16 | }
17 | \arguments{
18 | \item{object}{object returned by \link{runMeDeCom}}
19 | 
20 | \item{K}{number of LMCs}
21 | 
22 | \item{lambda}{regularlization parameter}
23 | 
24 | \item{cg_subset}{used CpG subset, defaults to the full data set}
25 | 
26 | \item{statistic}{statistic to be used in returning}
27 | }
28 | \description{
29 | Return a matrix of LMCs
30 | }
31 | \examples{
32 | \donttest{
33 | data(example.data)
34 | getLMCs(example_MeDeComSet, K=2, lambda=0.001)
35 | }
36 | }
37 | 


--------------------------------------------------------------------------------
/man/getProportions-methods.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MeDeComSet-class.R
 3 | \docType{methods}
 4 | \name{getProportions,MeDeComSet-method}
 5 | \alias{getProportions,MeDeComSet-method}
 6 | \alias{getProportions}
 7 | \title{getProportions-methods}
 8 | \usage{
 9 | \S4method{getProportions}{MeDeComSet}(
10 |   object,
11 |   K = object@parameters$Ks[1],
12 |   lambda = object@parameters$lambdas[1],
13 |   cg_subset = 1,
14 |   statistic = "cve"
15 | )
16 | }
17 | \arguments{
18 | \item{object}{object returned by \link{runMeDeCom}}
19 | 
20 | \item{K}{number of LMCs}
21 | 
22 | \item{lambda}{regularlization parameter}
23 | 
24 | \item{cg_subset}{used CpG subset, defaults to the full data set}
25 | 
26 | \item{statistic}{statistic to be used in returning}
27 | }
28 | \description{
29 | Return a matrix of LMCs
30 | }
31 | \examples{
32 | \donttest{
33 | data(example.data)
34 | getProportions(example_MeDeComSet, K=2, lambda=0.001)
35 | }
36 | }
37 | 


--------------------------------------------------------------------------------
/man/getStatistics-methods.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MeDeComSet-class.R
 3 | \docType{methods}
 4 | \name{getStatistics,MeDeComSet-method}
 5 | \alias{getStatistics,MeDeComSet-method}
 6 | \alias{getStatistics}
 7 | \title{getStatistics-methods}
 8 | \usage{
 9 | \S4method{getStatistics}{MeDeComSet}(
10 |   object,
11 |   Ks = object@parameters$Ks,
12 |   lambdas = object@parameters$lambdas,
13 |   cg_subset = 1,
14 |   statistic = "cve"
15 | )
16 | }
17 | \arguments{
18 | \item{object}{object returned by \link{runMeDeCom}}
19 | 
20 | \item{Ks}{numbers of LMCs}
21 | 
22 | \item{lambdas}{regularlization parameters}
23 | 
24 | \item{cg_subset}{used CpG subset, defaults to the full data set}
25 | 
26 | \item{statistic}{\code{character} of length 1 specifying goodness of fit statistics}
27 | }
28 | \value{
29 | A numeric \code{matrix} or \code{vector} with the requested statistics
30 | }
31 | \description{
32 | Methylation sites object information for which is present in the \code{RnBSet} object.
33 | }
34 | \details{
35 | Currently the following values for \code{statistics} can be supplied: \code{objective}, \code{RMSE}, \code{CVE}.
36 | }
37 | \examples{
38 | \donttest{
39 | data(example.data)
40 | getStatistics(example_MeDeComSet, K=2, lambda=0.001)
41 | }
42 | }
43 | 


--------------------------------------------------------------------------------
/man/greedymatch.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/matching.R
 3 | \name{greedymatch}
 4 | \alias{greedymatch}
 5 | \title{greedymatch}
 6 | \usage{
 7 | greedymatch(Tstar, That, Ahat)
 8 | }
 9 | \description{
10 | Matching of latent components
11 | }
12 | \details{
13 | suppose Tstar contains K* topics (columns)
14 | 1. select the K* most popular topics of That
15 | 2. use a greedy method to match them to those of Tstar (w.r.t L2 norm)
16 |    and correspondingly permute rows of Ahat
17 | }
18 | \author{
19 | Martin Slawski
20 | 
21 | R port by Pavlo Lutsik
22 | }
23 | 


--------------------------------------------------------------------------------
/man/lmc.annotation.enrichment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/LMC_interpretation.R
 3 | \name{lmc.annotation.enrichment}
 4 | \alias{lmc.annotation.enrichment}
 5 | \title{lmc.annotation.enrichment}
 6 | \usage{
 7 | lmc.annotation.enrichment(
 8 |   medecom.result,
 9 |   annotation.filter = NULL,
10 |   anno.data,
11 |   K = NULL,
12 |   lambda = NULL,
13 |   cg_subset = NULL,
14 |   diff.threshold = 0.5,
15 |   reference.computation = "median",
16 |   comp.lmcs = NULL,
17 |   type = "hypo",
18 |   assembly = "hg19"
19 | )
20 | }
21 | \arguments{
22 | \item{medecom.result}{An object of type \code{\link{MeDeComSet-class}} or the location of an .RData file, where
23 | such an object is stored.}
24 | 
25 | \item{annotation.filter}{A numeric vector specifying the sites that have been removed from \code{rnb.set} in a
26 | preprocessing step (e.g. coverage filtering) or a path to an .RData file.}
27 | 
28 | \item{anno.data}{The original \code{\link[RnBeads]{RnBSet-class}} object containing methylation, sample meta and annotation
29 | information or a path to a directory stored by \code{\link[RnBeads]{save.rnb.set}} or a data.frame containing
30 | CpG annotations (ann_C)}
31 | 
32 | \item{K}{The number of LMCs specified for the MeDeCom run.}
33 | 
34 | \item{lambda}{The lambda parameter selected.}
35 | 
36 | \item{cg_subset}{The index of the selection strategy employed (e.g. most variable CpGs).}
37 | 
38 | \item{diff.threshold}{The difference cutoff between median methylation in the remaining LMCs and the LMC of interest
39 | used to call a CpG differentially methylated. The higher this value, the more conservative the
40 | selection.}
41 | 
42 | \item{reference.computation}{Metric used to set the reference on the remaining LMCs to determine hyper- and hypomethylated sites.
43 | Can be either \code{"median"} (default), \code{"mean"}, or \code{"lmcs"} (\code{comp.lmcs} argument needs to be provided).}
44 | 
45 | \item{comp.lmcs}{Numeric vector containing two numbers representing the LMCs that should be compared to one another.}
46 | 
47 | \item{type}{Which direction is to be tested for enrichment. Can be one of "hypo", "hyper", or "differential"}
48 | 
49 | \item{assembly}{The assembly used. Needs to be one of "hg19", "hg38" or "mm10". Does not need to be specified, if rnb.set is a
50 | \code{\link{RnBSet-class}}}
51 | }
52 | \value{
53 | A data frame with four columns: \describe{
54 |  \item{LMC}{The LMC analyzed}
55 |  \item{annotation}{The annotation used. Can either be \code{chrXY} for enrichments on different chromosmes or different functional categories in the Ensembl regulatory build}
56 |  \item{p.value}{The p-value computing using Fisher's exact test for enrichment}
57 |  \item{OR}{The odds ratio for enrichment}
58 | }
59 | }
60 | \description{
61 | This function performs enrichment analysis for various genomic locations including chromosomes,
62 | and different function categories defined by the Ensembl regulatory build
63 | }
64 | \author{
65 | Michael Scherer
66 | }
67 | 


--------------------------------------------------------------------------------
/man/lmc.annotation.plots.tables.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/LMC_interpretation.R
 3 | \name{lmc.annotation.plots.tables}
 4 | \alias{lmc.annotation.plots.tables}
 5 | \title{lmc.annotation.plots.tables}
 6 | \usage{
 7 | lmc.annotation.plots.tables(medecom.result, anno.data, ...)
 8 | }
 9 | \arguments{
10 | \item{medecom.result}{An object of type \code{\link{MeDeComSet-class}} or the location of an .RData file, where
11 | such an object is stored.}
12 | 
13 | \item{anno.data}{The original \code{\link[RnBeads]{RnBSet-class}} object containing methylation, sample meta and annotation
14 | information, a path to a directory stored by \code{\link[RnBeads]{save.rnb.set}} or a data.frame containing
15 | CpG annotations (ann_C)}
16 | 
17 | \item{...}{Further arguments passed to \code{lmc.annotation.enrichment}}
18 | }
19 | \value{
20 | A list with two elements, one of them containing the plots for each LMC and the other for the corresponding annotation
21 |         enrichment tables
22 | }
23 | \description{
24 | This functions calls \link{lmc.annotation.enrichment} and returns plots representing those results, as well as the tables with annotation
25 | enrichment results.
26 | }
27 | \seealso{
28 | lmc.annotation.enrichment
29 | }
30 | \author{
31 | Michael Scherer
32 | }
33 | 


--------------------------------------------------------------------------------
/man/lmc.go.enrichment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/LMC_interpretation.R
 3 | \name{lmc.go.enrichment}
 4 | \alias{lmc.go.enrichment}
 5 | \title{lmc.go.enrichment}
 6 | \usage{
 7 | lmc.go.enrichment(
 8 |   medecom.result,
 9 |   annotation.filter = NULL,
10 |   anno.data,
11 |   K = NULL,
12 |   lambda = NULL,
13 |   cg_subset = NULL,
14 |   diff.threshold = 0.5,
15 |   reference.computation = "median",
16 |   comp.lmcs = NULL,
17 |   region.type = "genes",
18 |   temp.dir = tempdir(),
19 |   type = "hypo",
20 |   assembly = "hg19"
21 | )
22 | }
23 | \arguments{
24 | \item{medecom.result}{An object of type \code{\link{MeDeComSet-class}} or the location of an .RData file, where
25 | such an object is stored.}
26 | 
27 | \item{annotation.filter}{A numeric vector specifying the sites that have been removed from \code{rnb.set} in a
28 | preprocessing step (e.g. coverage filtering) or a path to an .RData file.}
29 | 
30 | \item{anno.data}{The original \code{\link[RnBeads]{RnBSet-class}} object containing methylation, sample meta and annotation
31 | information or a path to a directory stored by \code{\link[RnBeads]{save.rnb.set}} or a data.frame containing
32 | CpG annotations (ann_C)}
33 | 
34 | \item{K}{The number of LMCs specified for the MeDeCom run.}
35 | 
36 | \item{lambda}{The lambda parameter selected.}
37 | 
38 | \item{cg_subset}{The index of the selection strategy employed (e.g. most variable CpGs).}
39 | 
40 | \item{diff.threshold}{The difference cutoff between median methylation in the remaining LMCs and the LMC of interest
41 | used to call a CpG differentially methylated. The higher this value, the more conservative the
42 | selection.}
43 | 
44 | \item{reference.computation}{Metric used to set the reference on the remaining LMCs to determine hyper- and hypomethylated sites.
45 | Can be either \code{"median"} (default), \code{"mean"}, or \code{"lmcs"} (\code{comp.lmcs} argument needs to be provided).}
46 | 
47 | \item{comp.lmcs}{Numeric vector containing two numbers representing the LMCs that should be compared to one another.}
48 | 
49 | \item{region.type}{Region type used to annotate CpGs to potentially regulatory regions (see \url{https://rnbeads.org/regions.html})
50 | for a list of available region types. Here, only "genes" "promoters" and their gencode versions
51 | are available.}
52 | 
53 | \item{temp.dir}{Path to a directory used to store temporary files.}
54 | 
55 | \item{type}{Which direction is to be tested for enrichment. Can be one of "hypo", "hyper", or "differential"}
56 | 
57 | \item{assembly}{The assembly used. Needs to be one of "hg19", "hg38" or "mm10". Does not need to be specified, if rnb.set is a
58 | \code{\link{RnBSet-class}}}
59 | }
60 | \value{
61 | A list with K elements. One element is the enrichment result of the corresponding LMC-specific hypomethylated CpG sites.
62 | }
63 | \description{
64 | This routine computes GO enrichment results for LMC-specifically hypo- or hypermethylated sites.
65 | }
66 | \details{
67 | This function employs GO enrichment analysis with the GOstats package on the CpG sites that are LMC-specifically
68 |                  hypomethylated, after annotating the sites to the closest promotor/gene defined by \code{region.type}.
69 |                  The sites are selected by computing the median methylation value of the other LMCs and then selecting
70 |                  those sites that are more than \code{diff.threshold} away from the median in the LMC of interest.
71 |                  This is done for all LMCs from 1 to K.
72 | }
73 | \author{
74 | Michael Scherer
75 | }
76 | 


--------------------------------------------------------------------------------
/man/lmc.go.plots.tables.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/LMC_interpretation.R
 3 | \name{lmc.go.plots.tables}
 4 | \alias{lmc.go.plots.tables}
 5 | \title{lmc.go.plots.tables}
 6 | \usage{
 7 | lmc.go.plots.tables(medecom.result, anno.data, ...)
 8 | }
 9 | \arguments{
10 | \item{medecom.result}{An object of type \code{\link{MeDeComSet-class}} or the location of an .RData file, where
11 | such an object is stored.}
12 | 
13 | \item{anno.data}{The original \code{\link[RnBeads]{RnBSet-class}} object containing methylation, sample meta and annotation
14 | information, a path to a directory stored by \code{\link[RnBeads]{save.rnb.set}} or a data.frame containing
15 | CpG annotations (ann_C)}
16 | 
17 | \item{...}{Further arguments passed to \code{lmc.go.enrichment}}
18 | }
19 | \value{
20 | A list with two elements, one of them containing the plots for each LMC and the other for the corresponding GO
21 |         enrichment tables
22 | }
23 | \description{
24 | This functions calls \link{lmc.go.enrichment} and returns plots representing those results, as well as the tables with GO
25 | enrichment results.
26 | }
27 | \seealso{
28 | lmc.go.enrichment
29 | }
30 | \author{
31 | Michael Scherer
32 | }
33 | 


--------------------------------------------------------------------------------
/man/lmc.lola.enrichment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/LMC_interpretation.R
 3 | \name{lmc.lola.enrichment}
 4 | \alias{lmc.lola.enrichment}
 5 | \title{lmc.lola.enrichment}
 6 | \usage{
 7 | lmc.lola.enrichment(
 8 |   medecom.result,
 9 |   annotation.filter = NULL,
10 |   anno.data,
11 |   K = NULL,
12 |   lambda = NULL,
13 |   cg_subset = NULL,
14 |   diff.threshold = 0.5,
15 |   reference.computation = "median",
16 |   comp.lmcs = NULL,
17 |   region.type = "ensembleRegBuildBPall",
18 |   temp.dir = tempdir(),
19 |   type = "hypo",
20 |   assembly = "hg19",
21 |   lola.db = NULL
22 | )
23 | }
24 | \arguments{
25 | \item{medecom.result}{An object of type \code{\link{MeDeComSet-class}} or the location of an .RData file, where
26 | such an object is stored.}
27 | 
28 | \item{annotation.filter}{A numeric vector specifying the sites that have been removed from \code{rnb.set} in a
29 | preprocessing step (e.g. coverage filtering) or a path to an .RData file.}
30 | 
31 | \item{anno.data}{The original \code{\link[RnBeads]{RnBSet-class}} object containing methylation, sample meta and annotation
32 | information, a path to a directory stored by \code{\link[RnBeads]{save.rnb.set}} or a data.frame containing
33 | CpG annotations (ann_C)}
34 | 
35 | \item{K}{The number of LMCs specified for the MeDeCom run.}
36 | 
37 | \item{lambda}{The lambda parameter selected.}
38 | 
39 | \item{cg_subset}{The index of the selection strategy employed (e.g. most variable CpGs).}
40 | 
41 | \item{diff.threshold}{The difference cutoff between median methylation in the remaining LMCs and the LMC of interest
42 | used to call a CpG differentially methylated. The higher this value, the more conservative the
43 | selection.}
44 | 
45 | \item{reference.computation}{Metric used to set the reference on the remaining LMCs to determine hyper- and hypomethylated sites.
46 | Can be either \code{"median"} (default), \code{"mean"}, or \code{"lmcs"} (\code{comp.lmcs} argument needs to be provided).}
47 | 
48 | \item{comp.lmcs}{Numeric vector containing two numbers representing the LMCs that should be compared to one another.}
49 | 
50 | \item{region.type}{Region type used to annotate CpGs to potentially regulatory regions (see \url{https://rnbeads.org/regions.html})
51 | for a list of available region types.}
52 | 
53 | \item{temp.dir}{Path to a directory used to store temporary files.}
54 | 
55 | \item{type}{Which direction is to be tested for enrichment. Can be one of "hypo", "hyper", or "differential"}
56 | 
57 | \item{assembly}{The assembly used. Needs to be one of "hg19", "hg38" or "mm10". Does not need to be specified, if rnb.set is a
58 | \code{\link{RnBSet-class}}}
59 | 
60 | \item{lola.db}{A loaded LOLA database as loaded with LOLA::loadRegionDB. If this value is NULL, the database is loaded
61 | automatically and stored in the temporary directory.}
62 | }
63 | \value{
64 | A list with K elements. One element is the enrichment result of the corresponding LMC-specific hypomethylated CpG sites.
65 | }
66 | \description{
67 | This routine computes LOLA enrichment results for LMC-specifically hypo- or hypermethylated sites.
68 | }
69 | \details{
70 | This function employs LOLA on the CpG sites that are LMC-specifically hypomethylated, after annotating 
71 |                  the sites to the closest region defined by \code{region.type}. The sites are selected by computing
72 |                  the median methylation value of the other LMCs and then selecting those sites that are more than 
73 |                  \code{diff.threshold} away from the median in the LMC of interest. This is done for all LMCs from
74 |                  1 to K.
75 | }
76 | \seealso{
77 | lmc.lola.plot.tables
78 | }
79 | \author{
80 | Michael Scherer
81 | }
82 | 


--------------------------------------------------------------------------------
/man/lmc.lola.plots.tables.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/LMC_interpretation.R
 3 | \name{lmc.lola.plots.tables}
 4 | \alias{lmc.lola.plots.tables}
 5 | \title{lmc.lola.plots.tables}
 6 | \usage{
 7 | lmc.lola.plots.tables(
 8 |   medecom.result,
 9 |   annotation.filter = NULL,
10 |   anno.data,
11 |   K = NULL,
12 |   lambda = NULL,
13 |   cg_subset = NULL,
14 |   diff.threshold = 0.5,
15 |   region.type = "ensembleRegBuildBPall",
16 |   temp.dir = tempdir(),
17 |   type = "hypo",
18 |   assembly = "hg19",
19 |   lola.db = NULL,
20 |   ...
21 | )
22 | }
23 | \arguments{
24 | \item{medecom.result}{An object of type \code{\link{MeDeComSet-class}} or the location of an .RData file, where
25 | such an object is stored.}
26 | 
27 | \item{annotation.filter}{A numeric vector specifying the sites that have been removed from \code{rnb.set} in a
28 | preprocessing step (e.g. coverage filtering) or a path to an .RData file.}
29 | 
30 | \item{anno.data}{The original \code{\link[RnBeads]{RnBSet-class}} object containing methylation, sample meta and annotation
31 | information, a path to a directory stored by \code{\link[RnBeads]{save.rnb.set}} or a data.frame containing
32 | CpG annotations (ann_C)}
33 | 
34 | \item{K}{The number of LMCs specified for the MeDeCom run.}
35 | 
36 | \item{lambda}{The lambda parameter selected.}
37 | 
38 | \item{cg_subset}{The index of the selection strategy employed (e.g. most variable CpGs).}
39 | 
40 | \item{diff.threshold}{The difference cutoff between median methylation in the remaining LMCs and the LMC of interest
41 | used to call a CpG differentially methylated. The higher this value, the more conservative the
42 | selection.}
43 | 
44 | \item{region.type}{Region type used to annotate CpGs to potentially regulatory regions (see \url{https://rnbeads.org/regions.html})
45 | for a list of available region types.}
46 | 
47 | \item{temp.dir}{Path to a directory used to store temporary files.}
48 | 
49 | \item{type}{Which direction is to be tested for enrichment. Can be one of "hypo", "hyper", or "differential"}
50 | 
51 | \item{assembly}{The assembly used. Needs to be one of "hg19", "hg38" or "mm10". Does not need to be specified, if rnb.set is a
52 | \code{\link{RnBSet-class}}}
53 | 
54 | \item{lola.db}{A loaded LOLA database as loaded with LOLA::loadRegionDB. If this value is NULL, the database is loaded
55 | automatically and stored in the temporary directory.}
56 | 
57 | \item{...}{Further arguments passed to \code{lmc.lola.enrichment}}
58 | }
59 | \value{
60 | A list with two elements, one of them containing the plots for each LMC and the other for the corresponding LOLA
61 |         enrichment tables
62 | }
63 | \description{
64 | This functions calls \link{lmc.lola.enrichment} and returns plots representing those results, as well as the tables with LOLA
65 | enrichment results.
66 | }
67 | \seealso{
68 | lmc.lola.enrichment
69 | }
70 | \author{
71 | Michael Scherer
72 | }
73 | 


--------------------------------------------------------------------------------
/man/load.lola.for.medecom.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/LMC_interpretation.R
 3 | \name{load.lola.for.medecom}
 4 | \alias{load.lola.for.medecom}
 5 | \title{load.lola.for.medecom}
 6 | \usage{
 7 | load.lola.for.medecom(dir.path = tempdir(), assembly = "hg19")
 8 | }
 9 | \arguments{
10 | \item{dir.path}{A path to a directory, where the LOLA database is to be downloaded. Defaults to the temporary directory.}
11 | 
12 | \item{assembly}{The assembly to be used.}
13 | }
14 | \value{
15 | The loaded LOLA database
16 | }
17 | \description{
18 | This functions downloads and loads the LOLA database in the specified directory. Should only be called once per session to save time.
19 | }
20 | \author{
21 | Michael Scherer
22 | }
23 | 


--------------------------------------------------------------------------------
/man/locus_plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting.R
 3 | \name{locus_plot}
 4 | \alias{locus_plot}
 5 | \title{locus_plot}
 6 | \usage{
 7 | locus_plot(
 8 |   That,
 9 |   ann,
10 |   cgs,
11 |   locus.chr,
12 |   locus.start,
13 |   locus.end,
14 |   locus.name = sprintf("\%s:\%d-\%d", locus.chr, locus.start, locus.end),
15 |   locus.forward = FALSE,
16 |   flank.start = 1000,
17 |   flank.end = 1000,
18 |   comp.cols = rainbow(ncol(That)),
19 |   legend.pos = "topleft",
20 |   Tstar = NULL,
21 |   D = NULL,
22 |   plot.genes = FALSE,
23 |   ann.genes = NULL
24 | )
25 | }
26 | \arguments{
27 | \item{That}{a matrix of LMCs}
28 | 
29 | \item{ann}{CpG annotation}
30 | 
31 | \item{cgs}{indices of CpGs data for which are present in That with respect to ann}
32 | 
33 | \item{locus.chr}{chromosome}
34 | 
35 | \item{locus.start}{start coordinate}
36 | 
37 | \item{locus.end}{end coordinate}
38 | 
39 | \item{flank.start}{number of basepairs to extend the locus upstream}
40 | 
41 | \item{flank.end}{number of basepairs to extend the locus downstream}
42 | 
43 | \item{comp.cols}{color code for LMCs}
44 | 
45 | \item{legend.pos}{location of the legend, in accordance with \link{legend}}
46 | 
47 | \item{Tstar}{matrix of reference profiles}
48 | 
49 | \item{D}{matrix of input methylation data used to produce That}
50 | 
51 | \item{plot.genes}{if \code{TRUE} a track with gene locations will be plotted}
52 | 
53 | \item{ann.genes}{gene annotation necessary for the gene plotting
54 | 
55 |  @author Pavlo Lutsik, with modifications by Michael Scherer
56 |  @export}
57 | }
58 | \description{
59 | Plot results for a selected locus
60 | }
61 | 


--------------------------------------------------------------------------------
/man/matchLMCs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/matching.R
 3 | \name{matchLMCs}
 4 | \alias{matchLMCs}
 5 | \title{match.components}
 6 | \usage{
 7 | matchLMCs(That, Tref, method = "corrmatch", check = TRUE, Ahat = NULL)
 8 | }
 9 | \arguments{
10 | \item{That}{recovered components from factorization}
11 | 
12 | \item{Tref}{reference components}
13 | 
14 | \item{method}{one of "corrmatch" and "greedymatch"}
15 | 
16 | \item{check}{for method "corrmatch": a flag specifying whether to check uniquenes of the match}
17 | 
18 | \item{Ahat}{for method "greedymatch": recovered mixing proportions}
19 | }
20 | \value{
21 | a vector of indices. The length and the order of the vector corresponds to the columns of \code{TT}
22 | 					and the indices specify the columns of Tref
23 | }
24 | \description{
25 | Matching the recovered components to the reference
26 | }
27 | \details{
28 | Wrapper function for component matching methods
29 | }
30 | 


--------------------------------------------------------------------------------
/man/plotLMC.reference.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reference.R
 3 | \name{plotLMC.reference}
 4 | \alias{plotLMC.reference}
 5 | \title{plotLMC.reference}
 6 | \usage{
 7 | plotLMC.reference(
 8 |   medecom.set,
 9 |   ref.meth,
10 |   ann.md,
11 |   cg_subset = medecom.set@parameters$cg_subsets[1],
12 |   K = medecom.set@parameters$Ks[1],
13 |   lambda = medecom.set@parameters$lambdas[1],
14 |   ann.ref = NULL,
15 |   type = "dendrogram",
16 |   chrom.col = "Chromosome",
17 |   start.col = "Start",
18 |   end.col = "End",
19 |   ct.color.column = "cell_type"
20 | )
21 | }
22 | \arguments{
23 | \item{medecom.set}{An object of type MeDeComSet}
24 | 
25 | \item{ref.meth}{Reference methylome in the form of a matrix of a RnBSet}
26 | 
27 | \item{ann.md}{Genomic annotation for the CpG sites present in medecom.set}
28 | 
29 | \item{cg_subset}{The cg_subset of interest}
30 | 
31 | \item{K}{K value}
32 | 
33 | \item{lambda}{lambda value}
34 | 
35 | \item{ann.ref}{Genomic annotation for the CpG sites in the referenc methylome. Can be omitted, if ref.meth is an RnBSet object}
36 | 
37 | \item{type}{Plot type, see \code{\link{plotLMCs}}}
38 | 
39 | \item{chrom.col}{The chromosome column name in ann.md and ann.ref}
40 | 
41 | \item{start.col}{The start column name in ann.md and ann.ref}
42 | 
43 | \item{end.col}{The end column name in ann.md and ann.ref}
44 | 
45 | \item{ct.color.column}{A column name in the phenotypic information of ref.meth to be shown in the clustering}
46 | }
47 | \description{
48 | This routine uses a reference methylome, together with an annotation, and calls plotLMCs for the matching postions
49 | }
50 | \author{
51 | Michael Scherer
52 | }
53 | 


--------------------------------------------------------------------------------
/man/plotLMCs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting.R
 3 | \name{plotLMCs}
 4 | \alias{plotLMCs}
 5 | \title{plotLMCs}
 6 | \usage{
 7 | plotLMCs(
 8 |   MeDeComSet,
 9 |   type,
10 |   K = NA,
11 |   lambda = NA,
12 |   cg_subset = 1,
13 |   lmc = NA,
14 |   Tref = NULL,
15 |   distance = "correlation",
16 |   center = FALSE,
17 |   n.most.var = NA,
18 |   D = NULL,
19 |   sample.characteristic = NULL,
20 |   scatter.matching = FALSE,
21 |   scatter.smooth = TRUE,
22 |   scatter.cg.feature = NULL,
23 |   min.similarity = 0
24 | )
25 | }
26 | \arguments{
27 | \item{MeDeComSet}{an object with MeDeCom results}
28 | 
29 | \item{type}{plot type, a \code{character} of length 1 (see Details)}
30 | 
31 | \item{K}{value of parameter k to use}
32 | 
33 | \item{lambda}{value of parameter lambda to use}
34 | 
35 | \item{cg_subset}{which CpG subset to use}
36 | 
37 | \item{lmc}{which LMC to use for visualization}
38 | 
39 | \item{Tref}{a matrix with reference methylomes}
40 | 
41 | \item{distance}{distance measure to use}
42 | 
43 | \item{center}{if \code{TRUE} the LMC and reference methylome matrices will be row-centered}
44 | 
45 | \item{n.most.var}{is not \code{NA} a respective number of CpGs with the highest standard deviation will be plotted}
46 | 
47 | \item{D}{input data matrix used to derive the LMCs}
48 | 
49 | \item{min.similarity}{minimal similarity between LMCs and (if available) reference profiles, used to select edges in \code{"similarity graph"}.
50 | Has only an influence if \code{type}=\code{"similarity graph"}.}
51 | }
52 | \description{
53 | A wrapper for various plotting methods for the visualization of LMCs
54 | }
55 | \details{
56 | Available plot types include:
57 | \describe{
58 | \item{\bold{\code{boxplot}}}{
59 |        Boxplot describing the distributions of each of the LMCs and, if available, the reference methylomss.}
60 | \item{\bold{\code{dendrogram}}}{
61 |        Dendrogram visualizing a joint hierarchical clustering of LMCs and, if available, the reference methylomes.}
62 | \item{\bold{\code{heatmap}}}{
63 |        Heatmap visualizing a distance between LMCs and the reference methylomes.}
64 | \item{\bold{\code{mds}}}{
65 |        Joint multidimensional scaling of LMCs and the reference methylomes.}
66 | \item{\bold{\code{scatterplot}}}{
67 |        Multi-panel scatterplot of LMCs and reference methylomes.}
68 | \item{\bold{\code{extremality}}}{
69 |        Barplot visualizing the value of the regularizer term for each LMC.}
70 | \item{\bold{\code{distance to center}}}{
71 |        Barplot visualizing a distance to the data center for each LMC. Input data matrix used to derive the LMCs should be 
72 | 		  supplied as argument \code{D}.}
73 | }
74 | }
75 | 


--------------------------------------------------------------------------------
/man/plotParameters.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting.R
 3 | \name{plotParameters}
 4 | \alias{plotParameters}
 5 | \title{plotParameters}
 6 | \usage{
 7 | plotParameters(
 8 |   MeDeComSet,
 9 |   cg_subset = 1,
10 |   Ks = integer(),
11 |   lambdas = integer(),
12 |   statistic = "cve",
13 |   minLambda = 0,
14 |   maxLambda = Inf,
15 |   lambdaScale = "native",
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{MeDeComSet}{MeDeCom object}
21 | 
22 | \item{cg_subset}{integer index of the CpG subset (if no subsets were used)}
23 | 
24 | \item{Ks}{values of parameter k to use (by default all k values available in the \code{MeDeComSet} are used)}
25 | 
26 | \item{lambdas}{values of parameter lambda to use (by default all lambda values available in the \code{MeDeComSet} are used)}
27 | 
28 | \item{statistic}{if multiple k values are supplied, the statistic which is plotted (defaults to cross-validation error)}
29 | 
30 | \item{minLambda}{minimal lambda value}
31 | 
32 | \item{maxLambda}{maximal lambda value}
33 | 
34 | \item{lambdaScale}{character indicating if native scale or logarithmic scale should be employed for plotting lambda}
35 | 
36 | \item{...}{further paramters passed to \code{plot.lambda.selection} or \code{plot.K.selection}}
37 | }
38 | \description{
39 | Parameter selection plots
40 | }
41 | 


--------------------------------------------------------------------------------
/man/plotProportions.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plotting.R
 3 | \name{plotProportions}
 4 | \alias{plotProportions}
 5 | \title{plotProportions}
 6 | \usage{
 7 | plotProportions(
 8 |   MeDeComSet,
 9 |   type,
10 |   K,
11 |   lambda,
12 |   cg_subset = 1,
13 |   lmc = NA,
14 |   Aref = NULL,
15 |   ref.profile = NA,
16 |   sample.characteristic = NULL,
17 |   heatmap.clusterCols = FALSE,
18 |   heatmap.clusterRows = FALSE,
19 |   ...
20 | )
21 | }
22 | \arguments{
23 | \item{MeDeComSet}{an object with MeDeCom results}
24 | 
25 | \item{type}{plot type, a \code{character} of length 1 (see Details)}
26 | 
27 | \item{K}{value of parameter k to use}
28 | 
29 | \item{lambda}{value of parameter lambda to use}
30 | 
31 | \item{cg_subset}{which CpG subset to use}
32 | 
33 | \item{lmc}{which LMC to use for visualization}
34 | 
35 | \item{Aref}{a matrix with reference methylomes}
36 | 
37 | \item{...}{Extra variables based to proportion.lineplot}
38 | }
39 | \description{
40 | A wrapper for various plotting methods for the visualization of mixing proportions
41 | }
42 | \details{
43 | Available plot types include:
44 | \describe{
45 | \item{\bold{\code{heatmap}}}{
46 |        Lineplot of proportions recovered by MeDeCom and reference proportions.}
47 | \item{\bold{\code{barplot}}}{
48 |        Stacked barplot of proportions recovered by MeDeCom.}
49 | \item{\bold{\code{lineplot}}}{
50 |        Lineplot of proportions recovered by MeDeCom and, if available, reference proportions.}
51 | \item{\bold{\code{scatterplot}}}{
52 |        Lineplot of proportions recovered by MeDeCom and reference proportions.}
53 | }
54 | }
55 | 


--------------------------------------------------------------------------------
/man/run.refbased.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reference.R
 3 | \name{run.refbased}
 4 | \alias{run.refbased}
 5 | \title{FUNCTIONS
 6 | run.refbased}
 7 | \usage{
 8 | run.refbased(
 9 |   rnb.set,
10 |   Ks,
11 |   lambdas,
12 |   cg_subsets = NULL,
13 |   opt.method = "MeDeCom.cppTAfact",
14 |   temp.dir = NULL,
15 |   ref.base = "reinius",
16 |   most.var = NULL,
17 |   NCORES = 1,
18 |   cluster.settings = NULL,
19 |   ref.set = NULL,
20 |   id.col = NULL,
21 |   save.restricted.sites = FALSE
22 | )
23 | }
24 | \arguments{
25 | \item{rnb.set}{An Object of type \code{RnBSet} from the \pkg{RnBeads} package containing methylation information on which
26 | the deconvolution is to be performed}
27 | 
28 | \item{Ks}{Numeric vector containing the number of components to be computed by \pkg{MeDeCom}}
29 | 
30 | \item{lambdas}{Numeric vector specifying the regularzation parameters to be explored}
31 | 
32 | \item{cg_subsets}{List of numeric vectors specifying the rows that are to be used in the analysis.}
33 | 
34 | \item{opt.method}{Optimization method to be employed. For further information see \code{\link{runMeDeCom}}}
35 | 
36 | \item{temp.dir}{Optional temporary directory to store intermediate results}
37 | 
38 | \item{ref.base}{Reference profile data base to be used. Supported are 
39 | \itemize{
40 |   \item \code{"reinius"} A blood cell type reference methylome data set from Reinius et.al. (Reference to be added)
41 | }
42 | \itemize{
43 |   \item \code{"local"} The reference data set is provided by the user. If this option is selected, \code{ref.set} 
44 |                      must not be empty.
45 | }}
46 | 
47 | \item{most.var}{Number specifying the number of most variable to be selected from \code{rnb.set}}
48 | 
49 | \item{NCORES}{Number of cores to be used for analysis.}
50 | 
51 | \item{cluster.settings}{Setting for the environment of a high performance compute cluster. Passed to \code{\link{runMeDeCom}}}
52 | 
53 | \item{ref.set}{A \code{RnBSet} object containing a reference data set to be used besides \code{rnb.set}. Is only compatible with
54 | \code{ref.base="local"}.}
55 | 
56 | \item{id.col}{The name of the column in the sample annotation sheet of \code{ref.set} containing the reference cell type. Is only
57 | compatible with \code{ref.base="local"}.}
58 | 
59 | \item{save.restricted.sites}{Flag indicating if \code{rnb.set} restricted to the \code{most.var} sites is to be saved in \code{temp.dir}
60 | for potential downstream analysis.}
61 | }
62 | \value{
63 | A list object containing two elements \itemize{
64 |            \item \code{"MeDeComSet"} Results of applying MeDeCom with the setting above
65 |            \item \code{"RefMeth"} A matrix containing reference profiles from the specified data set. The number of rows in 
66 |                        this matrix has been reduced according to the most variable sites in \code{rnb.set}.
67 |         }
68 | }
69 | \description{
70 | Function to link MeDeCom's output to reference methylation profiles. The function returns both the MeDeCom result and a methyalation
71 | matrix with reference profiles from the source specified.
72 | }
73 | \details{
74 | This function applied MeDeCom to the specified data set and only support \code{RnBSet} objects as inputs. The function
75 |  internally manipulated the object by selecting the most variable sites according to \code{most.var}. This leads to a decrease in
76 |  the number of rows in the reference profiles to this number. 
77 |  
78 |  Please note that an active internet connection is required, since this routine downloads data through the world wide web.
79 | }
80 | \author{
81 | Michael Scherer
82 | }
83 | 


--------------------------------------------------------------------------------
/man/run.trait.association.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/contribution_interpretation.R
 3 | \name{run.trait.association}
 4 | \alias{run.trait.association}
 5 | \title{run.trait.association}
 6 | \usage{
 7 | run.trait.association(
 8 |   medecom.set,
 9 |   pheno.data,
10 |   test.fun = t.test,
11 |   plot.path = getwd(),
12 |   figure.format = "pdf"
13 | )
14 | }
15 | \arguments{
16 | \item{medecom.set}{An object of type \code{\link{MeDeComSet}} as the result of \code{\link{runMeDeCom}} containing LMCs and their
17 | proportions in the samples. The Set can contain multiple runs for different values of K and lambda.}
18 | 
19 | \item{pheno.data}{An object of type \code{\link[RnBeads]{RnBSet-class}} containing methylation data and metadata for the same samples for which 
20 | \code{medecom.set} was computed or a data.frame of sample annotations (ann_S)}
21 | 
22 | \item{test.fun}{Test statistic used to compute p-values of differences between LMC contributions in pairwise sample comparisons.
23 | Defaults to \code{t.test}.}
24 | 
25 | \item{plot.path}{Path to store the p-value heatmaps.}
26 | 
27 | \item{figure.format}{Character describing the format in which plots should be stored on disk. Either \code{"pdf"} or \code{"png"}.}
28 | }
29 | \description{
30 | Computes test statistics for all possible group assignments of samples defined in \code{medecom.set} and \code{rnb.set} and stores
31 | heatmaps of p-values on the given location for all CG Subsets, Ks and lambdas present in \code{medecom.set}
32 | }
33 | \details{
34 | This function creates a new folder names \code{pdfs} at the location given by \code{plot.path} and stores a heatmap for
35 |           all possible Ks and lambdas defined in \code{medecom.set}. The p-values are produced by comparing the LMC contributions
36 |           in all sample comparisons defined by \code{\link[RnBeads]{rnb.sample.groups}} on \code{rnb.set}. The employed test statistic for
37 |           pariwise comparison can be specified by \code{test.fun}, for groups defining more than one group \code{\link{kruskal.test}}
38 |           is employed. P-values lower than 0.01 are added to the heatmap.
39 | }
40 | \author{
41 | Michael Scherer
42 | }
43 | 


--------------------------------------------------------------------------------
/man/run.trait.association.single.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/contribution_interpretation.R
 3 | \name{run.trait.association.single}
 4 | \alias{run.trait.association.single}
 5 | \title{run.trait.association.single}
 6 | \usage{
 7 | run.trait.association.single(
 8 |   medecom.set,
 9 |   pheno.data,
10 |   cg_subset = NULL,
11 |   K = NULL,
12 |   lambda = NULL,
13 |   test.fun = t.test
14 | )
15 | }
16 | \arguments{
17 | \item{medecom.set}{An object of type \code{\link{MeDeComSet}} as the result of \code{\link{runMeDeCom}} containing LMCs and their
18 | proportions in the samples. The Set can contain multiple runs for different values of K and lambda.}
19 | 
20 | \item{pheno.data}{An object of type \code{\link[RnBeads]{RnBSet-class}} containing methylation data and metadata for the same samples for which 
21 | \code{medecom.set} was computed or a data.frame of sample annotations (ann_S)}
22 | 
23 | \item{cg_subset}{The cg_subset of interest}
24 | 
25 | \item{K}{The selected value for number of LMCs (K)}
26 | 
27 | \item{lambda}{The selected value of the regularizer (lambda)}
28 | 
29 | \item{test.fun}{Test statistic used to compute p-values of differences between LMC contributions in pairwise sample comparisons.
30 | Defaults to \code{t.test}.}
31 | }
32 | \description{
33 | Computes test statistics for all possible group assignments of samples defined in \code{medecom.set} and \code{rnb.set} and stores
34 | heatmaps of p-values on the given location only for a given CG Subset, K and lambda.
35 | }
36 | \details{
37 | Returns a list with two elements, each a heatmap as a ggplot object for the given \code{medecom.set}, \code{cg_subset} \code{K} and \code{lambda}. 
38 |           The elements correpond to p-values of correlation ("quantivative") and t-tests ("qualitative") traits.
39 |           The p-values are produced by comparing the LMC contributions in all sample comparisons defined by \code{\link[RnBeads]{rnb.sample.groups}} 
40 |           on \code{rnb.set}. The employed test statistic for pariwise comparison can be specified by \code{test.fun}, for groups defining more than one group \code{\link{kruskal.test}}
41 |           is employed. P-values lower than 0.01 are added to the heatmap.
42 | }
43 | \author{
44 | Michael Scherer
45 | }
46 | 


--------------------------------------------------------------------------------
/man/runMeDeCom.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/frontend.R
 3 | \name{runMeDeCom}
 4 | \alias{runMeDeCom}
 5 | \title{runMeDeCom}
 6 | \usage{
 7 | runMeDeCom(
 8 |   data,
 9 |   Ks,
10 |   lambdas,
11 |   opt.method = "MeDeCom.cppTAfact",
12 |   cg_subsets = NULL,
13 |   sample_subset = NULL,
14 |   startT = NULL,
15 |   startA = NULL,
16 |   trueT = NULL,
17 |   trueA = NULL,
18 |   fixed_T_cols = NULL,
19 |   NINIT = 100,
20 |   ITERMAX = 1000,
21 |   NFOLDS = 10,
22 |   N_COMP_LAMBDA = 4,
23 |   NCORES = 1,
24 |   random.seed = NULL,
25 |   num.tol = 1e-08,
26 |   analysis.name = NULL,
27 |   use.ff = FALSE,
28 |   cluster.settings = NULL,
29 |   temp.dir = NULL,
30 |   cleanup = TRUE,
31 |   verbosity = 1L,
32 |   time.stamps = FALSE
33 | )
34 | }
35 | \arguments{
36 | \item{data}{DNA methylation dataset as a \code{numeric} matrix (methylation sites vs samples) or an ojbect of class \code{RnBeadSet}}
37 | 
38 | \item{Ks}{values of parameter \deqn{k} to be tested, vector of type \code{integer}}
39 | 
40 | \item{lambdas}{values of parameter \deqn{\lambda} to be tested, vector of type \code{numeric}}
41 | 
42 | \item{opt.method}{optimization method used. Currently supported values are \code{"MeDeCom.quadPen"} and \code{"MeDeCom.cppTAfact"}.}
43 | 
44 | \item{cg_subsets}{a \code{list} of \code{integer} vectors specifying row indices to include into the analysis}
45 | 
46 | \item{sample_subset}{samples to include into the analysis}
47 | 
48 | \item{startT}{a \code{list} of length equal to \code{length(Ks)} or a \code{matrix} with \code{max(Ks)} columns}
49 | 
50 | \item{startA}{a \code{list} of length equal to \code{length(Ks)} or a \code{matrix} with \code{max(Ks)} rows}
51 | 
52 | \item{trueT}{a numeric matrix with as many rows as there are methylation sites in \code{data}}
53 | 
54 | \item{trueA}{a numeric matrix with as many columns as there are methylation sites in \code{data}}
55 | 
56 | \item{fixed_T_cols}{columsn of T which are known (to be implemented)}
57 | 
58 | \item{NINIT}{number of random initializations}
59 | 
60 | \item{ITERMAX}{maximal number of iterations of the alternating optimization scheme}
61 | 
62 | \item{NFOLDS}{number of cross-validation folds}
63 | 
64 | \item{N_COMP_LAMBDA}{the number of solutions to compare in the "smoothing" step}
65 | 
66 | \item{NCORES}{number of cores to be used in the parallelized steps (at best a divisor of NINIT)}
67 | 
68 | \item{random.seed}{seed for random number generation}
69 | 
70 | \item{num.tol}{some small parameter}
71 | 
72 | \item{analysis.name}{a deliberate name of the analysis as a \code{character} singleton}
73 | 
74 | \item{use.ff}{use \code{ff} package functionality for memory optimization}
75 | 
76 | \item{cluster.settings}{a list with parameters for an HPC cluster}
77 | 
78 | \item{temp.dir}{a temporary directory for the cluster-based analysis available on all nodes}
79 | 
80 | \item{cleanup}{if \code{TRUE} the temporary directory will be removed on completion}
81 | 
82 | \item{verbosity}{verbosity level, \code{0} for quiet execution}
83 | 
84 | \item{time.stamps}{add timestamps to the diagnostic output}
85 | }
86 | \value{
87 | MeDeComSet object
88 | }
89 | \description{
90 | Perform a MeDeCom experiment
91 | }
92 | \author{
93 | Pavlo Lutsik
94 | }
95 | 


--------------------------------------------------------------------------------
/src/HCLasso.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * [Anew, Loss_new] = mexHCLasso(G,W,A,lambda)
  3 |  *
  4 |  * ---Input---
  5 |  *
  6 |  * G (k,k) full double matrix
  7 |  * W (k,d) full double matrix
  8 |  * A (k,d) full double matrix
  9 |  *
 10 |  * ---Output---
 11 |  *
 12 |  * Anew       full (k,d) matrix
 13 |  * Loss_new   double number
 14 |  *
 15 |  * ---Algorithm---
 16 |  *
 17 |  * For each clm of W, denoted by w, solve
 18 |  *
 19 |  * min     a' * G * a - 2 * w' * a + lambda * norm(a,1),
 20 |  * sb.to.  1>= a_i >= 0
 21 |  *
 22 |  * i.e. a is in the hypercube of R^k.
 23 |  *
 24 |  * where the starting value(the estimate of the minimizer)
 25 |  * is stored in the corresponding clms of A(Anew).
 26 |  *
 27 |  * The implementation is based on SPG.
 28 |  *
 29 |  * Parallel Computing is supported by OpenMP.
 30 |  *
 31 |  * BLAS routines are embedded in for operations on matrices & vectors.
 32 |  *
 33 |  * ---Default parameters---
 34 |  *
 35 |  * convergence accuracy: 1e-10
 36 |  * suffcient descent criterion in line search: 1e-3
 37 |  * memory size in non-mono. descent checking: 10
 38 |  * max #threads : 1
 39 |  * dynamic allocating threads: yes
 40 |  *
 41 |  */
 42 | 
 43 | 
 44 | #include <stdio.h>
 45 | #include <cstddef>
 46 | #include "dynblas.h"
 47 | #include <omp.h>
 48 | #include <limits>
 49 | #include <Rcpp.h>
 50 | using namespace std;
 51 | using namespace Rcpp;
 52 | 
 53 | #define MEM_OLD_VALUES 10
 54 | #define MAX_NUM_THREADS 1
 55 | #define OPT_TOL   1e-10
 56 | #define SUFF_DESC 1e-3
 57 | #define DYNAMIC_THREAD 1
 58 | 
 59 | /*
 60 |  * compute the absolute value of x
 61 |  * write this function explicitly to avoid compiler issue
 62 |  */
 63 | inline double dabs(double x){
 64 |     if ( x < 0 )
 65 |         x = -x;
 66 |     return x;
 67 | }
 68 | 
 69 | /* compute the constant Hess and Beta */
 70 | inline void SetInput(double* G, double* w, double lambda, double* Hess, double* beta, ptrdiff_t k){
 71 |     
 72 |     // beta = 2 * w - lambda:
 73 |     // don't know BLAS function for subtracting a scalar for every
 74 |     // component of a vecotr
 75 |     for( int i = 0; i < k ; i++){
 76 |         beta[i] = 2 * w[i] - lambda;
 77 |     }
 78 |     
 79 |     // Hess = 2 * G :
 80 |     // Hess = G;
 81 |     // Hess = 1 * G + Hess;
 82 |     ptrdiff_t ione = 1;
 83 |     double one = 1.0;
 84 |     ptrdiff_t ks = (ptrdiff_t) (k * k);
 85 |     dcopy(&ks, G, &ione, Hess, &ione);
 86 |     daxpy(&ks, &one, G, &ione, Hess, &ione);
 87 |     
 88 |     //int ik = (int) k;
 89 |     //for(int i = 0; i < ik; i++ ){
 90 |     //	mexPrintf("%f\n",beta[i]);
 91 |     //}
 92 |     
 93 |     //for(int i = 0; i < ik; i++ ){
 94 |     //	for (int j = 0; j < ik; j++ ){
 95 |     //		mexPrintf("%f\t", Hess[i + j * k]);
 96 |     //	}
 97 |     //	mexPrintf("%\n");
 98 |     //}
 99 | }
100 | 
101 | /* compute the gradient at x */
102 | inline void GetGrad(double* grad, double* Hess, double* beta, double* x, ptrdiff_t k){
103 |     // grad = 2 * G * x - beta = Hess * x - beta;
104 |     
105 |     ptrdiff_t ione = 1;
106 |     char* chn = (char*)"N";
107 |     double one  = 1.0;
108 |     double zero = 0.0;
109 |     double mone = -1.0;
110 |     
111 |     // step 1: grad =  Hess * x ;
112 |     dgemv(chn, &k, &k, &one, Hess, &k, x, &ione, &zero, grad, &ione);
113 |     
114 |     // step 2: grad =  -1 * beta + grad;
115 |     daxpy(&k, &mone, beta, &ione, grad, &ione);
116 |     
117 |     //int ik = (int) k;
118 |     //for(int i = 0; i < ik; i++ ){
119 |     //	mexPrintf("%f\n",grad[i]);
120 |     //}
121 |     
122 | }
123 | 
124 | /* compute the objective value at x */
125 | inline double ObjValue(double* G, double* beta, double* x, double* tmp, ptrdiff_t k){
126 |     // x' * G * x - beta' * x = x' * (G * x - beta)
127 |     ptrdiff_t ione = 1;
128 |     char* chn = (char*)"N";
129 |     double one  = 1.0;
130 |     double zero = 0.0;
131 |     double mone = -1.0;
132 |     
133 |     // step 1: tmp =  G * x ;
134 |     dgemv(chn, &k, &k, &one, G, &k, x, &ione, &zero, tmp, &ione);
135 |     
136 |     // step 2: tmp =  -1 * beta + tmp;
137 |     daxpy(&k, &mone, beta, &ione, tmp, &ione);
138 |     
139 |     // step 3: f = tmp' * x;
140 |     double f = (double)ddot(&k, x, &ione, tmp, &ione);
141 |     
142 |     return f;
143 | }
144 | 
145 | /* compute the BB parameter */
146 | inline double GetAlpha(double* x, double* x_old, double* g, double* g_old, double* tmp, double* tmp1, ptrdiff_t k){
147 |     
148 |     // alpha = (x - x_old)' * (x - x_old)  /  [ (x - x_old)' * (g - g_old)];
149 |     
150 |     ptrdiff_t ione = 1;
151 |     //double one  = 1.0;
152 |     //double zero = 0.0;
153 |     double mone = -1.0;
154 |     
155 |     // tmp = x - x_old
156 |     // step 1: copy x to tmp
157 |     dcopy(&k, x, &ione, tmp, &ione);
158 |     // step 2: tmp =  -1 * x_old + tmp;
159 |     daxpy(&k, &mone, x_old, &ione, tmp, &ione);
160 |     
161 |     // tmp1 = g - g_old
162 |     // step 1: copy g to tmp1
163 |     dcopy(&k, g, &ione, tmp1, &ione);
164 |     // step 2: tmp1 =  -1 * g_old + tmp1;
165 |     daxpy(&k, &mone, g_old, &ione, tmp1, &ione);
166 |     
167 |     // numerator =  tmp' * tmp
168 |     double numerator = (double)ddot(&k, tmp, &ione, tmp, &ione);
169 |     
170 |     // denominator = tmp' * tmp1;
171 |     double denominator = (double)ddot(&k, tmp, &ione, tmp1, &ione);
172 |     
173 |     double alpha = numerator / denominator;
174 |     
175 |     return alpha;
176 | }
177 | 
178 | /* project x to the hypercube -> output f */
179 | inline void ProjHyperCube(double* f, double* x, int m) {
180 |     for(int i = 0; i< m; i++){
181 |         if (x[i] < 0)
182 |             f[i] = 0;
183 |         else if (x[i] > 1)
184 |             f[i] = 1;
185 |         else
186 |             f[i] = x[i];
187 |     }
188 | }
189 | 
190 | /* compute the projected step */
191 | inline void GetProjStep(double* d, double* x, double* g, double alpha, ptrdiff_t k) {
192 |     // d = Proj(x - alpha * g) - x;
193 |     
194 |     ptrdiff_t ione = 1;
195 |     double mone = -1.0;
196 |     
197 |     //Step 1:  d = x;
198 |     dcopy(&k, x, &ione, d, &ione);
199 |     
200 |     //Step 2:  d = -alpha*g + d;
201 |     double malpha = -alpha;
202 |     daxpy(&k, &malpha, g, &ione, d, &ione);
203 |     
204 |     //Step 3: d = ProjHyperCube(d)
205 |     ProjHyperCube(d, d, (int)k);
206 |     
207 |     //step 4: d = d - x;
208 |     daxpy(&k, &mone, x, &ione, d, &ione);
209 |     
210 |     //int ik = (int) k;
211 |     //for(int i = 0; i < ik; i++ ){
212 |     //	mexPrintf("%f\n",d[i]);
213 |     //}
214 | }
215 | 
216 | /* compute the directional derivative */
217 | inline double GetDirectDerivative(double* g, double* d, ptrdiff_t k){
218 |     ptrdiff_t ione = 1;
219 |     double sum = (double)ddot(&k, g, &ione, d, &ione);
220 |     return sum;
221 | }
222 | 
223 | /* compute norm(x,1) for a vector x */
224 | inline double NormOne(double* x, ptrdiff_t k){
225 |     ptrdiff_t ione = 1;
226 |     double sum = dasum(&k, x, &ione);
227 |     return sum;
228 | }
229 | 
230 | /***  Solve Lasso Problem on Hypercube by SPG ***/
231 | void HCLasso(double* G, double* w, double* a0, double lambda, ptrdiff_t k,
232 |         double* Hess, double* beta,
233 |         double* x, double* x_old, double* g, double* g_old, double* d,
234 |         double* old_fvals, double* tmp, double* tmp1,
235 |         double* ahat, double* fhat){
236 |     /********
237 |      *
238 |      * solve:  min_a   a'* G * a - 2 * w'* a + lambda * norm(a,1);
239 |      *         sb.to.  a >= 0
240 |      *
241 |      * ---Input---
242 |      *
243 |      * G,w    - quandratic form
244 |      * a0     - starting value
245 |      * lambda - regression parameter
246 |      * k      - length of a0
247 |      *
248 |      * ---Temporary Variables---
249 |      *
250 |      * Hess  - Hessian = 2 * G, constant
251 |      * beta  - (2 * w - lambda), constant
252 |      * x     - current solution
253 |      * x_old - previous solution
254 |      * g     - current gradient  = Hess * x - beta
255 |      * g_old - previous gradient  = Hess * x - beta
256 |      * d     - descent direction
257 |      *
258 |      * old_fvals - for non-monotonically descent
259 |      * tmp,tmp1  - for BLAS
260 |      *
261 |      * ---Output---
262 |      *
263 |      * ahat - minimizer
264 |      * fhat - objective value at ahat
265 |      *
266 |      *******/
267 |     
268 |     ptrdiff_t ione = 1;
269 |     double one = 1.0;
270 |     double mone = -1.0;
271 |     double zero = 0.0;
272 |     char* chn = (char*)"N";
273 |     
274 |     /*** Initialiation ***/
275 |     
276 |     // set parameter
277 |     double optTol = OPT_TOL;
278 |     double suffDec = SUFF_DESC;
279 |     double f;    // objective value at current solution
280 |     double fmin; // minimum. objective value in the sequence generated by SPG
281 |     // memory for non-monotone line search
282 |     for(int i = 0; i < MEM_OLD_VALUES; i++) {
283 |         old_fvals[i] = -std::numeric_limits<double>::max();
284 |     }
285 |     
286 |     // set Hessian and beta
287 |     SetInput(G, w, lambda, Hess, beta, k);
288 |     
289 |     // get starting point a0, gradient & fval
290 |     dcopy(&k, a0, &ione, x, &ione);
291 |     GetGrad(g, Hess, beta, x, k);
292 |     f = ObjValue(G, beta, x, tmp, k);
293 |     fmin = f;
294 |     
295 |     // copy to estimate
296 |     dcopy(&k, x, &ione, ahat, &ione);
297 |     fhat[0] = fmin;
298 |     
299 |     /*** SPG Loop ***/
300 |     
301 |     double alpha; // BB parameter
302 |     double gtd; // Directional Derivative
303 |     double t; //stepsize;
304 |     double f_ref; // reference function value in non-monotone linear search
305 |     double Linear, Quad; // ingredient to compute new function value;
306 |     double factor; // for linear search, factor to reduce stepsize
307 |     double Norm1_dx; // ||dx||_1, for linear search and as stopping criterion
308 |     double linear, quad, red_f, f_tmp, norm1_dx; //temporary variable in linear search
309 |     
310 |     int iter = 0;
311 |     int itermax = 500;
312 |     while (1){
313 |         
314 |         //** Compute Step Direction
315 |         if (iter == 0)
316 |             alpha = 1;
317 |         else{
318 |             alpha = GetAlpha(x, x_old, g, g_old, tmp, tmp1, k);
319 |             if (alpha <= 1e-10 || alpha > 1e10) {
320 |                 alpha = 1;
321 |             }
322 |         }
323 |         
324 |         //** Compute the projected step
325 |         GetProjStep(d, x, g, alpha, k);
326 |         
327 |         
328 |         //** Check that Progress can be made along the direction
329 |         gtd = GetDirectDerivative(g, d, k);
330 |         
331 |         if (gtd > -optTol){
332 |             //mexPrintf("Directional Derivative below optTol\n%f", gtd);
333 |             break;
334 |         }
335 |         
336 |         //** Backtracking Line Search
337 |         // Select Initial Guess to step length
338 |         if (iter == 0){
339 |             t = 1/NormOne(g, k);
340 |             t =  (t > 1) ? 1 : t;
341 |         }
342 |         else{
343 |             t = 1;
344 |         }
345 |         
346 |         // Get the reference function value for non-monotone condition:
347 |         // __update the old_values memorized
348 |         if (iter < MEM_OLD_VALUES)
349 |             old_fvals[iter] = f;
350 |         else{
351 |             for(int i = 0; i < MEM_OLD_VALUES-1; i++){
352 |                 old_fvals[i] = old_fvals[i+1];
353 |             }
354 |             old_fvals[MEM_OLD_VALUES-1] = f;
355 |         }
356 |         
357 |         // __find f_ref = max(old_fvals);
358 |         f_ref = old_fvals[0];
359 |         for(int i = 1; i < MEM_OLD_VALUES; i++){
360 |             if (f_ref < old_fvals[i])
361 |                 f_ref = old_fvals[i];
362 |         }
363 |         
364 |         // ingredients for computing (f_new - f) based on stepsize t:
365 |         // __dx = t * d; Linear = g' * dx; Quad = dx' * Hess * dx;
366 |         // __equivalently, Linear = t * g' * d = t * gtd; Quad = t^2 * d' * Hess * d;
367 |         Linear = t * gtd;
368 |         dgemv(chn, &k, &k, &one, Hess, &k, d, &ione, &zero, tmp, &ione);
369 |         Quad = (double)ddot(&k, d, &ione, tmp, &ione);
370 |         Quad = Quad * t * t;
371 |         
372 |         // __|dx||_1
373 |         Norm1_dx = t * NormOne(d, k);
374 |         //mexPrintf("%f\n\n", Norm1_dx);
375 |         
376 |         // stepsize selection
377 |         factor = 1;
378 |         norm1_dx = Norm1_dx * factor;
379 |         while (1) {
380 |             
381 |             //__compute (f_new - f)
382 |             linear = Linear * factor;
383 |             quad = Quad * factor * factor;
384 |             red_f = 0.5 * quad + linear;
385 |             f_tmp = f + red_f;
386 |             
387 |             if (f_tmp < f_ref + suffDec * linear) {
388 |                 //__get sufficient descent
389 |                 t = t * factor;
390 |                 norm1_dx = Norm1_dx * factor;
391 |                 break;
392 |             }
393 |             else {
394 |                 //__Evaluate New Stepsize
395 |                 //__t = t * 0.5; -> t0 fixed; factor = factor * 0.5; t = t0 * factor;
396 |                 factor = factor * 0.5;
397 |             }
398 |             
399 |             //__Check whether step has become too small
400 |             if (Norm1_dx * factor < optTol || t == 0) {
401 |                 //    mexPrintf("Line Search failed\n");
402 |                 t = 0;
403 |                 norm1_dx = 0;
404 |                 red_f = 0;
405 |                 break;
406 |             }
407 |             
408 |         }
409 |         
410 |         //** Take Step
411 |         
412 |         /*
413 |          * x_old = x;
414 |          * x = x + t * d;
415 |          *
416 |          * first copy x to x_old
417 |          * then x = t * d + x;
418 |          *
419 |          */
420 |         dcopy(&k, x, &ione, x_old, &ione);
421 |         daxpy(&k, &t, d, &ione, x, &ione);
422 |         
423 |         /*
424 |          * g_old = g;
425 |          * g = compute grad(x);
426 |          *
427 |          * first copy g to g_old
428 |          * then compute new gradient
429 |          *
430 |          */
431 |         dcopy(&k, g, &ione, g_old, &ione);
432 |         GetGrad(g, Hess, beta, x, k);
433 |         
434 |         // new objective value and iteration index
435 |         f = f + red_f;
436 |         iter = iter + 1;
437 |         
438 |         //** keep track of the minimum value attained
439 |         if ( f < fmin ){
440 |             fmin = f; // update
441 |             // copy to the estimate
442 |             dcopy(&k, x, &ione, ahat, &ione);
443 |             fhat[0] = fmin;
444 |         }
445 |         
446 |         //** Check 1st order optimality condition
447 |         // tmp = ProjHyperCube(x-g)-x;
448 |         dcopy(&k, x, &ione, tmp, &ione);
449 |         daxpy(&k, &mone, g, &ione, tmp, &ione);
450 |         ProjHyperCube(tmp, tmp, (int)k);
451 |         daxpy(&k, &mone, x, &ione, tmp, &ione);
452 |         
453 |         if ( NormOne(tmp, k) < optTol ){
454 |             // mexPrintf("First-Order Optimality Conditions Below optTol\n");
455 |             break;
456 |         }
457 |         
458 |         if (norm1_dx < optTol ) {
459 |             //  mexPrintf("***********************norm_1: \t %f",norm1_dx);
460 |             break;
461 |         }
462 |         
463 |         if ( dabs(red_f) < optTol ) {
464 |             //   mexPrintf("***************red_f: \t %f \t optTol: \t %.10f ",dabs(red_f),optTol);
465 |             break;
466 |         }
467 |         
468 |         if( iter > itermax ) {
469 |             //   mexPrintf("***********update T SPG: Reach iteration limits.");
470 |             break;
471 |         }
472 |     }
473 | }
474 | 
475 | 
476 | /*** Parallel Computing ***/
477 | void spawn_threads(double* G, double* W, double* A, double lambda, ptrdiff_t k, int d, double* Anew, double* Loss_new) {
478 |     
479 |     /* temporary variables */
480 |     double* Hess       = (double*)malloc(MAX_NUM_THREADS * k * k * sizeof(double));
481 |     double* beta       = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
482 |     double* x          = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
483 |     double* x_old      = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
484 |     double* g          = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
485 |     double* g_old      = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
486 |     double* dsct       = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
487 |     double* old_fvals  = (double*)malloc(MAX_NUM_THREADS * MEM_OLD_VALUES * sizeof(double));
488 |     double* tmp        = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
489 |     double* tmp1       = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
490 |     double* fhat       = (double*) malloc(MAX_NUM_THREADS * sizeof(double));
491 |     
492 |     if ( Hess == NULL || beta == NULL || x == NULL || x_old == NULL || g == NULL || g_old == NULL ||
493 |             dsct == NULL || old_fvals == NULL || tmp == NULL || tmp1 == NULL || fhat == NULL ) {
494 |         //mexErrMsgTxt("Out of memory.");
495 | 
496 |     }
497 |     // construct independet subproblems
498 |     omp_set_num_threads(MAX_NUM_THREADS);
499 |     omp_set_dynamic(DYNAMIC_THREAD);
500 |     int j = 0;
501 |     double loss = 0.0;
502 |     //#pragma omp parallel for private(j), reduction(+: loss)
503 |     for(j = 0; j < d; j++){
504 |         int id = omp_get_thread_num();
505 |         
506 |         //int id = 0;
507 |         HCLasso(G, W + j * k, A + j * k, lambda, k,
508 |                 Hess + id * (k * k), beta + id * k,
509 |                 x + id * k, x_old + id * k, g + id * k, g_old + id * k, dsct + id * k,
510 |                 old_fvals + id * k, tmp + id * k, tmp1 + id * k,
511 |                 Anew + j * k, fhat + id);
512 |         loss = loss + *(fhat+id);
513 |     }
514 |     
515 |     Loss_new[0] = loss;
516 |     
517 |     free(fhat);
518 |     free(tmp1);
519 |     free(tmp);
520 |     free(old_fvals);
521 |     free(dsct);
522 |     free(g_old);
523 |     free(g);
524 |     free(x_old);
525 |     free(x);
526 |     free(beta);
527 |     free(Hess);
528 |     
529 | }
530 | 
531 | 
532 | 
533 | //[[Rcpp::export]]
534 | List RHLasso(NumericMatrix  Ginp, NumericMatrix  Winp, NumericMatrix  Ainp, NumericVector l)
535 | {
536 | 
537 | 	Rcpp::NumericMatrix Gi(clone(Ginp));
538 | 	Rcpp::NumericMatrix Wi(clone(Winp));
539 | 	Rcpp::NumericMatrix Ai(clone(Ainp));
540 | 
541 | 	double* Gptr =  Gi.begin();
542 | 	double* Wptr =  Wi.begin();
543 | 	double* Aptr =  Ai.begin();
544 | 
545 | 	double lambda = Rcpp::as<double>(l);
546 | 
547 | 	ptrdiff_t k = (ptrdiff_t) Wi.nrow();
548 | 	int d = Wi.ncol();
549 | 
550 |     /* create the output data */
551 |     double* Anew = NULL;
552 |     double* Loss_new = NULL;
553 |     //double* iters = NULL;
554 | 
555 |     Rcpp::NumericMatrix newA((int)k,d);
556 |     Anew = newA.begin();
557 | 
558 | 
559 |     Rcpp::NumericMatrix newLoss(1,1);
560 |     Loss_new = newLoss.begin();
561 | 
562 |     //printf("Starting threads\n");
563 |     /* parallel computing */
564 |     spawn_threads(Gptr, Wptr, Aptr, lambda, k, d, Anew, Loss_new);
565 | 
566 |     //printf("%f\n", newLoss[0]);
567 |     //printf("%f\n", NumIters[0]);
568 |     //printf("%f\n", newA[0]);
569 | 
570 | 	List result = List::create(
571 | 			Named("A") = wrap(newA),
572 | 			Named("Loss") = wrap(newLoss)
573 | 			);
574 | 
575 | 	return(result);
576 | }
577 | 


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
1 | # standard setup
2 | PKG_LIBS = `$(R_HOME)/bin/Rscript -e "Rcpp:::LdFlags()"`
3 | PKG_CXXFLAGS =`$(R_HOME)/bin/Rscript -e "Rcpp:::CxxFlags()"` `$(R_HOME)/bin/Rscript -e "RcppEigen:::CxxFlags()"` -I. -std=c++11
4 | 
5 | # OMP setup
6 | OMP_NUM_THREADS=1	
7 | OMP_SHEDULE="dynamic,16"


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | PKG_LIBS = $(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "Rcpp:::LdFlags()") $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) -fopenmp
2 | PKG_CXXFLAGS=$(shell "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "Rcpp:::CxxFlags()") -I. -fopenmp


--------------------------------------------------------------------------------
/src/QuadHC.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * [Anew, Loss_new] = mexQuadHC(G,W,A,optTol)
  3 |  *
  4 |  * ---Input---
  5 |  *
  6 |  * G (k,k) full double matrix
  7 |  * W (k,d) full double matrix
  8 |  * A (k,d) full double matrix
  9 |  * optTol	double number
 10 |  *
 11 |  * ---Output---
 12 |  *
 13 |  * Anew       full (k,d) matrix
 14 |  * Loss_new   double number
 15 |  * iters      full (d,1) vector - #iterations used to solve each subproblem
 16 |  * ---Algorithm---
 17 |  *
 18 |  * For each clm of W, denoted by w, solve
 19 |  *
 20 |  * min     a' * G * a - 2 * w' * a, 
 21 |  * sb.to.  1>= a_i >= 0
 22 |  *
 23 |  * i.e. a is in the hypercube of R^k.
 24 |  *
 25 |  * where the starting value(the estimate of the minimizer)
 26 |  * is stored in the corresponding clms of A(Anew).
 27 |  *
 28 |  * The implementation is based on SPG.
 29 |  *
 30 |  * Parallel Computing is supported by OpenMP.
 31 |  *
 32 |  * BLAS routines are embedded in for operations on matrices & vectors.
 33 |  *
 34 |  * ---Default parameters---
 35 |  *
 36 |  * convergence accuracy: 1e-10
 37 |  * suffcient descent criterion in line search: 1e-3
 38 |  * memory size in non-mono. descent checking: 10
 39 |  * max #threads : 1
 40 |  * dynamic allocating threads: yes
 41 |  *
 42 |  */
 43 | 
 44 | 
 45 | #include <stdio.h>
 46 | #include <cstddef>
 47 | #include "dynblas.h"
 48 | #include <omp.h>
 49 | #include <limits>
 50 | #include <Rcpp.h>
 51 | #include <iterator>
 52 | 
 53 | using namespace std;
 54 | using namespace Rcpp;
 55 | 
 56 | #define MEM_OLD_VALUES 10
 57 | //#define MAX_NUM_THREADS 1
 58 | #define MAX_NUM_THREADS 1
 59 | //#define OPT_TOL   1e-10
 60 | #define SUFF_DESC 1e-3
 61 | #define DYNAMIC_THREAD 1
 62 | 
 63 | /*
 64 |  * compute the absolute value of x
 65 |  * write this function explicitly to avoid compiler issue
 66 |  */
 67 | inline double dabs(double x){
 68 |     if ( x < 0 )
 69 |         x = -x;
 70 |     return x;
 71 | }
 72 | 
 73 | /* compute the constant Hess and Beta */
 74 | inline void SetInput(double* G, double* w, double* Hess, double* beta, ptrdiff_t k){
 75 |     
 76 |     // beta = 2 * w:
 77 |     // beta = w
 78 |     // beta = 1 * w + beta; 
 79 |     //for( int i = 0; i < k ; i++){
 80 |     //    beta[i] = 2 * w[i] - lambda;
 81 |     //}
 82 |     //
 83 |     ptrdiff_t ione = 1;
 84 |     double one = 1.0;
 85 |     dcopy(&k, w, &ione, beta, &ione);
 86 |     daxpy(&k, &one, w, &ione, beta, &ione);
 87 |     
 88 |     // Hess = 2 * G :
 89 |     // Hess = G;
 90 |     // Hess = 1 * G + Hess;
 91 |     //ptrdiff_t ione = 1;
 92 |     //double one = 1.0;
 93 |     ptrdiff_t ks = (ptrdiff_t) (k * k);
 94 |     dcopy(&ks, G, &ione, Hess, &ione);
 95 |     daxpy(&ks, &one, G, &ione, Hess, &ione);
 96 |     
 97 |     //printf("Setinput1\n");
 98 |     int ik = (int) k;
 99 |     for(int i = 0; i < ik; i++ ){
100 |     	//printf("%1.22f\n",beta[i]);
101 |     }
102 |     //printf("Setinput2\n");
103 |     for(int i = 0; i < ik; i++ ){
104 |     	for (int j = 0; j < ik; j++ ){
105 |     		//printf("%f\t", Hess[i + j * k]);
106 |     	}
107 |     	//printf("%\n");
108 |     }
109 | }
110 | 
111 | /* compute the gradient at x */
112 | inline void GetGrad(double* grad, double* Hess, double* beta, double* x, ptrdiff_t k){
113 |     // grad = 2 * G * x - beta = Hess * x - beta;
114 |     
115 |     ptrdiff_t ione = 1;
116 |     char* chn = (char*)"N";
117 |     double one  = 1.0;
118 |     double zero = 0.0;
119 |     double mone = -1.0;
120 |     
121 |     // step 1: grad =  Hess * x ;
122 |     dgemv(chn, &k, &k, &one, Hess, &k, x, &ione, &zero, grad, &ione);
123 |     
124 |     // step 2: grad =  -1 * beta + grad;
125 |     daxpy(&k, &mone, beta, &ione, grad, &ione);
126 |     
127 |     //printf("GetGrad\n");
128 |     int ik = (int) k;
129 |     for(int i = 0; i < ik; i++ ){
130 |     	//printf("%1.22f\n",grad[i]);
131 |     }
132 |     
133 | }
134 | 
135 | /* compute the objective value at x */
136 | inline double ObjValue(double* G, double* beta, double* x, double* tmp, ptrdiff_t k){
137 |     // x' * G * x - beta' * x = x' * (G * x - beta)
138 |     ptrdiff_t ione = 1;
139 |     char* chn = (char*)"N";
140 |     double one  = 1.0;
141 |     double zero = 0.0;
142 |     double mone = -1.0;
143 |     
144 |     // step 1: tmp =  G * x ;
145 |     dgemv(chn, &k, &k, &one, G, &k, x, &ione, &zero, tmp, &ione);
146 |     
147 |     // step 2: tmp =  -1 * beta + tmp;
148 |     daxpy(&k, &mone, beta, &ione, tmp, &ione);
149 |     
150 |     // step 3: f = tmp' * x;
151 |     double f = (double)ddot(&k, x, &ione, tmp, &ione);
152 |     
153 |     return f;
154 | }
155 | 
156 | /* compute the BB parameter */
157 | inline double GetAlpha(double* x, double* x_old, double* g, double* g_old, double* tmp, double* tmp1, ptrdiff_t k){
158 |     
159 |     // alpha = (x - x_old)' * (x - x_old)  /  [ (x - x_old)' * (g - g_old)];
160 |     
161 |     ptrdiff_t ione = 1;
162 |     //double one  = 1.0;
163 |     //double zero = 0.0;
164 |     double mone = -1.0;
165 |     
166 |     // tmp = x - x_old
167 |     // step 1: copy x to tmp
168 |     dcopy(&k, x, &ione, tmp, &ione);
169 |     // step 2: tmp =  -1 * x_old + tmp;
170 |     daxpy(&k, &mone, x_old, &ione, tmp, &ione);
171 |     
172 |     // tmp1 = g - g_old
173 |     // step 1: copy g to tmp1
174 |     dcopy(&k, g, &ione, tmp1, &ione);
175 |     // step 2: tmp1 =  -1 * g_old + tmp1;
176 |     daxpy(&k, &mone, g_old, &ione, tmp1, &ione);
177 |     
178 |     // numerator =  tmp' * tmp
179 |     double numerator = (double)ddot(&k, tmp, &ione, tmp, &ione);
180 |     
181 |     // denominator = tmp' * tmp1;
182 |     double denominator = (double)ddot(&k, tmp, &ione, tmp1, &ione);
183 |     
184 |     double alpha = numerator / denominator;
185 |     
186 |     return alpha;
187 | }
188 | 
189 | /* project x to the hypercube -> output f */
190 | inline void ProjHyperCube(double* f, double* x, int m, double L, double U) {
191 |     for(int i = 0; i< m; i++){
192 |         if (x[i] < L)
193 |             f[i] = L;
194 |         else if (x[i] > U)
195 |             f[i] = U;
196 |         else
197 |             f[i] = x[i];
198 |     }
199 | }
200 | 
201 | /* compute the projected step */
202 | inline void GetProjStep(double* d, double* x, double* g, double alpha, ptrdiff_t k, double low, double upp) {
203 |     // d = Proj(x - alpha * g) - x;
204 |     
205 |     ptrdiff_t ione = 1;
206 |     double mone = -1.0;
207 |     
208 |     //Step 1:  d = x;
209 |     dcopy(&k, x, &ione, d, &ione);
210 |     
211 |     //Step 2:  d = -alpha*g + d;
212 |     double malpha = -alpha;
213 |     daxpy(&k, &malpha, g, &ione, d, &ione);
214 |     
215 |     //Step 3: d = ProjHyperCube(d)
216 |     ProjHyperCube(d, d, (int)k, low, upp);
217 |     
218 |     //step 4: d = d - x;
219 |     daxpy(&k, &mone, x, &ione, d, &ione);
220 |     
221 |     //printf("GetProjStep\n");
222 |     int ik = (int) k;
223 |     for(int i = 0; i < ik; i++ ){
224 |     	//printf("%1.22f\n",d[i]);
225 |     }
226 | }
227 | 
228 | /* compute the directional derivative */
229 | inline double GetDirectDerivative(double* g, double* d, ptrdiff_t k){
230 |     ptrdiff_t ione = 1;
231 |     double sum = (double)ddot(&k, g, &ione, d, &ione);
232 |     return sum;
233 | }
234 | 
235 | /* compute norm(x,1) for a vector x */
236 | inline double NormOne(double* x, ptrdiff_t k){
237 |     ptrdiff_t ione = 1;
238 |     double sum = dasum(&k, x, &ione);
239 |     return sum;
240 | }
241 | 
242 | /***  Solve Quadratic Problem on Non-negative Orthant by SPG ***/
243 | int QuadHC(double* G, double* w, double* a0, ptrdiff_t k,
244 |         double* Hess, double* beta,
245 |         double* x, double* x_old, double* g, double* g_old, double* d,
246 |         double* old_fvals, double* tmp, double* tmp1,
247 |         double* ahat, double* fhat, double optTol, double lower, double upper){
248 |     /********
249 |      *
250 |      * solve:  min_a   a'* G * a - 2 * w'* a;
251 |      *         sb.to.  1 >= a >= 0
252 |      *
253 |      * ---Input---
254 |      *
255 |      * G,w    - quandratic form
256 |      * a0     - starting value
257 |      * k      - length of a0
258 |      * optTol - tolerance for stopping criterion 
259 |      *
260 |      * ---Temporary Variables---
261 |      *
262 |      * Hess  - Hessian = 2 * G, constant
263 |      * beta  - 2 * w, constant
264 |      * x     - current solution
265 |      * x_old - previous solution
266 |      * g     - current gradient  = Hess * x - beta
267 |      * g_old - previous gradient  = Hess * x - beta
268 |      * d     - descent direction
269 |      *
270 |      * old_fvals - for non-monotonically descent
271 |      * tmp,tmp1  - for BLAS
272 |      *
273 |      * ---Output---
274 |      *
275 |      * ahat - minimizer
276 |      * fhat - objective value at ahat
277 |      *
278 |      *******/
279 |     
280 |     ptrdiff_t ione = 1;
281 |     double one = 1.0;
282 |     double mone = -1.0;
283 |     double zero = 0.0;
284 |     char* chn = (char*)"N";
285 | 
286 |     /*** Initialiation ***/
287 |     
288 |     // set parameter
289 |     //double optTol = OPT_TOL;
290 |     double suffDec = SUFF_DESC;
291 |     double f;    // objective value at current solution
292 |     double fmin; // minimum. objective value in the sequence generated by SPG
293 |     // memory for non-monotone line search
294 |     for(int i = 0; i < MEM_OLD_VALUES; i++) {
295 |         old_fvals[i] = -std::numeric_limits<double>::max();
296 |     }
297 |     
298 |     // set Hessian and beta
299 |     SetInput(G, w, Hess, beta, k);
300 |     
301 |     // get starting point a0, gradient & fval
302 |     dcopy(&k, a0, &ione, x, &ione);
303 |     GetGrad(g, Hess, beta, x, k);
304 |     f = ObjValue(G, beta, x, tmp, k);
305 |     fmin = f;
306 |     
307 |     // copy to estimate
308 |     dcopy(&k, x, &ione, ahat, &ione);
309 |     fhat[0] = fmin;
310 |     
311 |     /*** SPG Loop ***/
312 |     
313 |     double alpha; // BB parameter
314 |     double gtd; // Directional Derivative
315 |     double t; //stepsize;
316 |     double f_ref; // reference function value in non-monotone linear search
317 |     double Linear, Quad; // ingredient to compute new function value;
318 |     double factor; // for linear search, factor to reduce stepsize
319 |     double Norm1_dx; // ||dx||_1, for linear search and as stopping criterion
320 |     double linear, quad, red_f, f_tmp, norm1_dx; //temporary variable in linear search
321 |     
322 |     int iter = 0;
323 |     int itermax = 500;
324 |     while (1){
325 |         
326 |         //** Compute Step Direction
327 |         if (iter == 0)
328 |             alpha = 1;
329 |         else{
330 |             alpha = GetAlpha(x, x_old, g, g_old, tmp, tmp1, k);
331 |             if (alpha <= 1e-10 || alpha > 1e10) {
332 |                 alpha = 1;
333 |             }
334 |         }
335 |         
336 |         //** Compute the projected step
337 |         GetProjStep(d, x, g, alpha, k, lower, upper);
338 |         
339 |         
340 |         //** Check that Progress can be made along the direction
341 |         gtd = GetDirectDerivative(g, d, k);
342 |         //printf("Directional Derivative %1.22f\n", gtd);
343 |         if (gtd > -optTol){
344 |             //printf("Directional Derivative below optTol %1.22f\n", gtd);
345 |             break;
346 |         }
347 |         
348 |         //** Backtracking Line Search
349 |         // Select Initial Guess to step length
350 |         if (iter == 0){
351 |             t = 1/NormOne(g, k);
352 |             t =  (t > 1) ? 1 : t;
353 |         }
354 |         else{
355 |             t = 1;
356 |         }
357 |         
358 |         // Get the reference function value for non-monotone condition:
359 |         // __update the old_values memorized
360 |         if (iter < MEM_OLD_VALUES)
361 |             old_fvals[iter] = f;
362 |         else{
363 |             for(int i = 0; i < MEM_OLD_VALUES-1; i++){
364 |                 old_fvals[i] = old_fvals[i+1];
365 |             }
366 |             old_fvals[MEM_OLD_VALUES-1] = f;
367 |         }
368 |         
369 |         // __find f_ref = max(old_fvals);
370 |         f_ref = old_fvals[0];
371 |         for(int i = 1; i < MEM_OLD_VALUES; i++){
372 |             if (f_ref < old_fvals[i])
373 |                 f_ref = old_fvals[i];
374 |         }
375 |         
376 |         // ingredients for computing (f_new - f) based on stepsize t:
377 |         // __dx = t * d; Linear = g' * dx; Quad = dx' * Hess * dx;
378 |         // __equivalently, Linear = t * g' * d = t * gtd; Quad = t^2 * d' * Hess * d;
379 |         Linear = t * gtd;
380 |         dgemv(chn, &k, &k, &one, Hess, &k, d, &ione, &zero, tmp, &ione);
381 |         Quad = (double)ddot(&k, d, &ione, tmp, &ione);
382 |         Quad = Quad * t * t;
383 |         
384 |         // __|dx||_1
385 |         Norm1_dx = t * NormOne(d, k);
386 |         //printf("Norm\n");
387 |         //printf("%1.22f\n\n", Norm1_dx);
388 |         
389 |         // stepsize selection
390 |         factor = 1;
391 |         norm1_dx = Norm1_dx * factor;
392 |         while (1) {
393 |             
394 |             //__compute (f_new - f)
395 |             linear = Linear * factor;
396 |             quad = Quad * factor * factor;
397 |             red_f = 0.5 * quad + linear;
398 |             f_tmp = f + red_f;
399 |             
400 |             if (f_tmp < f_ref + suffDec * linear) {
401 |                 //__get sufficient descent
402 |                 t = t * factor;
403 |                 norm1_dx = Norm1_dx * factor;
404 |                 break;
405 |             }
406 |             else {
407 |                 //__Evaluate New Stepsize
408 |                 //__t = t * 0.5; -> t0 fixed; factor = factor * 0.5; t = t0 * factor;
409 |                 factor = factor * 0.5;
410 |             }
411 |             
412 |             //__Check whether step has become too small
413 |             if (Norm1_dx * factor < optTol || t == 0) {
414 |                 //printf("Line Search failed\n");
415 |                 t = 0;
416 |                 norm1_dx = 0;
417 |                 red_f = 0;
418 |                 break;
419 |             }
420 |             
421 |         }
422 |         
423 |         //** Take Step
424 |         
425 |         /*
426 |          * x_old = x;
427 |          * x = x + t * d;
428 |          *
429 |          * first copy x to x_old
430 |          * then x = t * d + x;
431 |          *
432 |          */
433 |         dcopy(&k, x, &ione, x_old, &ione);
434 |         daxpy(&k, &t, d, &ione, x, &ione);
435 |         
436 |         /*
437 |          * g_old = g;
438 |          * g = compute grad(x);
439 |          *
440 |          * first copy g to g_old
441 |          * then compute new gradient
442 |          *
443 |          */
444 |         dcopy(&k, g, &ione, g_old, &ione);
445 |         GetGrad(g, Hess, beta, x, k);
446 |         
447 |         // new objective value and iteration index
448 |         f = f + red_f;
449 |         iter = iter + 1;
450 |         
451 |         //** keep track of the minimum value attained
452 |         if ( f < fmin ){
453 |             fmin = f; // update
454 |             // copy to the estimate
455 |             dcopy(&k, x, &ione, ahat, &ione);
456 |             fhat[0] = fmin;
457 |         }
458 |         
459 |         //** Check 1st order optimality condition
460 |         // tmp = ProjHyperCube(x-g)-x;
461 |         dcopy(&k, x, &ione, tmp, &ione);
462 |         daxpy(&k, &mone, g, &ione, tmp, &ione);
463 |         ProjHyperCube(tmp, tmp, (int)k, lower, upper);
464 |         daxpy(&k, &mone, x, &ione, tmp, &ione);
465 |         
466 |         if ( NormOne(tmp, k) < optTol ){
467 |             //printf("First-Order Optimality Conditions Below optTol\n");
468 |             break;
469 |         }
470 |         
471 |         if (norm1_dx < optTol ) {
472 |             //printf("***********************norm_1: \t %f",norm1_dx);
473 |             break;
474 |         }
475 |         
476 |         if ( dabs(red_f) < optTol ) {
477 |             //printf("***************red_f: \t %f \t optTol: \t %.10f ",dabs(red_f),optTol);
478 |             break;
479 |         }
480 |         
481 |         if( iter == itermax ) {
482 |             //printf("***********update T SPG: Reach iteration limits.");
483 |             break;
484 |         }
485 |     }
486 |     return iter;
487 | }
488 | 
489 | /*** Parallel Computing ***/
490 | //void spawn_threads(double* G, double* W, double* A, double lambda, ptrdiff_t k, int d, double* Anew, double* Loss_new) {
491 | void spawn_threads(double* G, double* W, double* A, ptrdiff_t k, int d, double* Anew, double* Loss_new, double* iters, double optTol, double lower, double upper) {
492 |     
493 |     /* temporary variables */
494 |     double* Hess       = (double*)malloc(MAX_NUM_THREADS * k * k * sizeof(double));
495 |     double* beta       = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
496 |     double* x          = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
497 |     double* x_old      = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
498 |     double* g          = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
499 |     double* g_old      = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
500 |     double* dsct       = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
501 |     double* old_fvals  = (double*)malloc(MAX_NUM_THREADS * MEM_OLD_VALUES * sizeof(double));
502 |     double* tmp        = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
503 |     double* tmp1       = (double*)malloc(MAX_NUM_THREADS * k * sizeof(double));
504 |     double* fhat       = (double*) malloc(MAX_NUM_THREADS * sizeof(double));
505 |     
506 |     if ( Hess == NULL || beta == NULL || x == NULL || x_old == NULL || g == NULL || g_old == NULL ||
507 |             dsct == NULL || old_fvals == NULL || tmp == NULL || tmp1 == NULL || fhat == NULL ) {
508 |         //mexErrMsgTxt("Out of memory.");
509 | 
510 |     }
511 |     // construct independet subproblems
512 |     //omp_set_num_threads(MAX_NUM_THREADS);
513 |     //omp_set_dynamic(DYNAMIC_THREAD);
514 |     int j = 0;
515 |     double loss = 0.0;
516 |     //#pragma omp parallel for private(j), reduction(+: loss)
517 |     for(j = 0; j < d; j++){
518 |         int id = omp_get_thread_num();
519 |         
520 |         //int id = 0;
521 |         int res=QuadHC(G, W + j * k, A + j * k, k,
522 |                 Hess + id * (k * k), beta + id * k,
523 |                 x + id * k, x_old + id * k, g + id * k, g_old + id * k, dsct + id * k,
524 |                 old_fvals + id * k, tmp + id * k, tmp1 + id * k,
525 |                 Anew + j * k, fhat + id, optTol, lower, upper);
526 |         iters[j] = res;
527 |         loss = loss + *(fhat+id);
528 | 
529 |         //printf("Iterations: %d\n", res);
530 |         //printf("Loss: %1.22f\n", loss);
531 | 
532 |     }
533 |     
534 |     Loss_new[0] = loss;
535 |     
536 |     free(fhat);
537 |     free(tmp1);
538 |     free(tmp);
539 |     free(old_fvals);
540 |     free(dsct);
541 |     free(g_old);
542 |     free(g);
543 |     free(x_old);
544 |     free(x);
545 |     free(beta);
546 |     free(Hess);
547 |     
548 | }
549 | 
550 | //[[Rcpp::export]]
551 | List RQuadHC(NumericMatrix  Ginp, NumericMatrix  Winp, NumericMatrix  Ainp, NumericVector otol, NumericVector lconstr, NumericVector uconstr)
552 | {
553 | 
554 | //	Rcpp::NumericMatrix Gi(clone(Ginp));
555 | //	Rcpp::NumericMatrix Wi(clone(Winp));
556 | //	Rcpp::NumericMatrix Ai(clone(Ainp));
557 | 
558 | 	Rcpp::NumericMatrix Gi(Ginp);
559 | 	Rcpp::NumericMatrix Wi(Winp);
560 | 	Rcpp::NumericMatrix Ai(Ainp);
561 | 
562 | 	double* Gptr =  Gi.begin();
563 | 	double* Wptr =  Wi.begin();
564 | 	double* Aptr =  Ai.begin();
565 | 
566 | 	//double* Gend = Gi.end();
567 | 
568 | //	double lambda = Rcpp::as<double>(l);
569 |     double optTol = Rcpp::as<double>(otol);
570 |     double lower = Rcpp::as<double>(lconstr);
571 |     double upper = Rcpp::as<double>(uconstr);
572 | 
573 | 	ptrdiff_t k = (ptrdiff_t) Wi.nrow();
574 | 	int d = Wi.ncol();
575 | 
576 |     double* Anew = NULL;
577 |     double* Loss_new = NULL;
578 |     double* iters = NULL;
579 | 
580 |     //ptrdiff_t nrg = (ptrdiff_t) Gi.nrow();
581 |     //printf("Pointer value: %p\n",  Gptr);
582 |     //printf("First value of G: %1.22f\n", *Gptr);
583 |     //printf("Pointer 2 value: %p\n", Gptr + nrg * 1);
584 |     //printf("Last value of G: %1.22f\n", *( (double*) Gptr + nrg * 1 ));
585 |  
586 |     Rcpp::NumericMatrix newA((int)k,d);
587 |     Anew = newA.begin();
588 | 
589 |     Rcpp::NumericMatrix newLoss(1,1);
590 |     Loss_new = newLoss.begin();
591 |     
592 |     //double* iters = NULL;
593 |     //Rcpp::NumericVector iterations(d);
594 |     Rcpp::NumericMatrix iterations(d,1);
595 |     iters = iterations.begin();
596 | 
597 | 
598 |     ////printf("Starting threads\n");
599 |     // parallel computing //
600 |     spawn_threads(Gptr, Wptr, Aptr, k, d, Anew, Loss_new, iters, optTol, lower, upper);
601 | 
602 |     ////printf("%1.22f\n", newLoss[0]);
603 |     ////printf("%1.22f\n", NumIters[0]);
604 |     ////printf("%1.22f\n", newA[0]);
605 | 
606 |     List result = List::create(
607 | 			Named("A") = wrap(newA),
608 | 			Named("Loss") = wrap(newLoss),
609 | 			Named("iters") = wrap(iterations)
610 | 			);
611 |     //List result = List::create(
612 |     //			Named("A") = wrap(Ai),
613 |     //			Named("Loss") = wrap(Wi),
614 |     //			Named("iters") = wrap(Gi)
615 |     //			);
616 | 
617 | 	return(result);
618 | }
619 | 


--------------------------------------------------------------------------------
/src/RProjSplxBox.cpp:
--------------------------------------------------------------------------------
  1 | /******************************************************
  2 |  * f = mexProjSplxBox(x, l, u)
  3 |  *
  4 |  * Input: x - a vector in Rn.
  5 |  * Output: f - Projection of x onto {y: sum(y) == 1, l <= y <= u}
  6 |  *
  7 |  *********************************************************/
  8 | 
  9 | #include <stdio.h>
 10 | #include <cstddef>
 11 | #include "dynblas.h"
 12 | #include <omp.h>
 13 | #include <limits>
 14 | #include <Rcpp.h>
 15 | #include <float.h>
 16 | using namespace Rcpp;
 17 | 
 18 | inline double dabs(double x){
 19 |     if ( x < 0 )
 20 |         x = -x;
 21 |     return x;
 22 | }
 23 | 
 24 | void Proj(double* f, double* x, double* l, double* u, double* y, double* z, double* p, double* q, int k) {
 25 |     /* 
 26 |      * projection uses Dykstra's algorithm
 27 |      * 
 28 |      */
 29 |    
 30 |      double tol1 = 0;
 31 |      double tol2 = 0;	
 32 |      double tolcur = 0;	
 33 |      double tolterm = 1E-12; 
 34 |      double mean = 0;
 35 |      double sum = 0;	 
 36 |      double delta = 0;
 37 |      double overk = 1/(double)k;
 38 |      /* int maxiters = 10000; */
 39 |      /* int itercur = 0; */	
 40 | 
 41 | 	for(int i = 0; i < k; i++){
 42 | 	 	p[i] = 0;
 43 |        	}
 44 |     
 45 | 
 46 | 	for(int i = 0; i < k; i++){
 47 | 	 	q[i] = 0;
 48 |        	}
 49 | 
 50 |       
 51 |      for(int i = 0; i < k; i ++){
 52 |        f[i] = x[i];		
 53 |      }
 54 | 
 55 | 		
 56 | 
 57 |      do{/* break; */
 58 |        /* itercur = itercur + 1; */
 59 |        /* reset before each new round */
 60 |        mean = 0;
 61 |        tol1 = 0;
 62 |        tol2 = 0;
 63 | 
 64 |        /* copy old iterate */
 65 |        for(int i = 0; i < k; i++){
 66 | 	 y[i] = f[i];
 67 |        }
 68 | 	
 69 | 
 70 |        /* projection 1: box constraints, primal */
 71 | 
 72 |        for(int i = 0; i < k; i++) {
 73 | 	 
 74 | 	 z[i] = f[i] + p[i];
 75 | 
 76 | 	 if(z[i] < l[i]){
 77 | 	   z[i] = l[i];
 78 | 	 }
 79 | 
 80 | 	 if(z[i] > u[i]){
 81 | 	   z[i] = u[i];
 82 | 	 }	
 83 |        }
 84 | 
 85 |        /* projection 1: box constraints, dual */ 
 86 | 
 87 |        for(int i = 0; i < k; i++) {
 88 | 	 p[i] = f[i] + p[i] - z[i];
 89 |        }
 90 | 
 91 |        /* projection 2: sum constraint, primal */
 92 | 
 93 |        for(int i = 0; i < k; i++) {
 94 | 	 f[i] = z[i] + q[i]; 
 95 | 	 mean = mean + f[i] * overk; 
 96 |        }
 97 | 
 98 | 
 99 | 
100 |        for(int i = 0; i < k; i++) {
101 | 	 f[i] = f[i] - mean + overk;
102 |        }
103 | 
104 |        /* projection 2: sum constraint, primal */
105 | 
106 |        for(int i = 0; i < k; i++) {
107 | 	 q[i] = z[i] + q[i] - f[i];
108 |        }
109 | 
110 |        /* tolerance checking */
111 | 
112 |        for(int i = 0; i < k; i++) {
113 | 	 delta = dabs(f[i] - y[i]);
114 | 	 if(delta > tol1){
115 | 	   tol1 = delta;
116 | 	 }
117 |        }
118 | 
119 |        for(int i = 0; i < k; i++) {
120 | 	 delta = l[i] - f[i];
121 | 	 if(delta > tol2){
122 | 	   tol2 = delta;
123 | 	 }
124 | 
125 | 	 delta  = f[i] - u[i];
126 | 	 if(delta > tol2){
127 | 	   tol2 = delta;
128 | 	 }
129 |        }
130 | 
131 |        sum = 0;	
132 |        for(int i=0; i < k; i++){		
133 | 	sum = sum + f[i];
134 |        }
135 | 
136 |        if(dabs(sum - 1) > tol2){
137 | 	tol2 = dabs(sum - 1);	 	
138 |        }	
139 | 
140 |        if(tol1 < tol2){
141 | 	 tolcur = tol2;
142 |        }
143 |        else{
144 | 	   tolcur = tol1;	
145 |        }
146 | 
147 |      /* printf ("tols: %1.6f \n", tol2); */
148 | 
149 |      } while(tolcur > tolterm);/* && itercur < maxiters); */     
150 | }
151 | 
152 | 
153 | //[[Rcpp::export]]
154 |  NumericMatrix RProjSplxBox(NumericMatrix Xinp, NumericVector linp, NumericVector uinp) {
155 |     
156 | 	Rcpp::NumericMatrix Xi(clone(Xinp));
157 | 	Rcpp::NumericVector li(clone(linp));
158 | 	Rcpp::NumericVector ui(clone(uinp));
159 | 
160 |     /*Get pointer to input data*/
161 | 
162 | 	double* x =  Xi.begin();
163 | 	double* l =  li.begin();
164 | 	double* u =  ui.begin();
165 | 
166 |     //int cols = Xi.ncol();
167 |     int rows = Xi.nrow();
168 | 	
169 |     double* f = NULL;      /* output  */
170 |     
171 |     /* create the output vector */
172 |     Rcpp::NumericMatrix newX((int)rows,1);
173 |     f = newX.begin();
174 | 
175 |     /* int* ix = (int*) malloc(rows * sizeof(int)); */
176 |     double* y = (double*) malloc(rows * sizeof(double));
177 |     double* z = (double*) malloc(rows * sizeof(double)); 
178 |     double* p = (double*) malloc(rows * sizeof(double));
179 |     double* q = (double*) malloc(rows * sizeof(double));
180 |       
181 |     Proj(f, x, l, u, y, z, p, q, rows); 
182 |     
183 |     /* free(ix); */
184 |     free(z);
185 |     free(y);
186 |     free(p);
187 |     free(q);
188 |     
189 |     return(newX);
190 |     
191 | }
192 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
  1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
  2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
  3 | 
  4 | #include <RcppEigen.h>
  5 | #include <Rcpp.h>
  6 | 
  7 | using namespace Rcpp;
  8 | 
  9 | // RHLasso
 10 | List RHLasso(NumericMatrix Ginp, NumericMatrix Winp, NumericMatrix Ainp, NumericVector l);
 11 | RcppExport SEXP _MeDeCom_RHLasso(SEXP GinpSEXP, SEXP WinpSEXP, SEXP AinpSEXP, SEXP lSEXP) {
 12 | BEGIN_RCPP
 13 |     Rcpp::RObject rcpp_result_gen;
 14 |     Rcpp::RNGScope rcpp_rngScope_gen;
 15 |     Rcpp::traits::input_parameter< NumericMatrix >::type Ginp(GinpSEXP);
 16 |     Rcpp::traits::input_parameter< NumericMatrix >::type Winp(WinpSEXP);
 17 |     Rcpp::traits::input_parameter< NumericMatrix >::type Ainp(AinpSEXP);
 18 |     Rcpp::traits::input_parameter< NumericVector >::type l(lSEXP);
 19 |     rcpp_result_gen = Rcpp::wrap(RHLasso(Ginp, Winp, Ainp, l));
 20 |     return rcpp_result_gen;
 21 | END_RCPP
 22 | }
 23 | // RQuadHC
 24 | List RQuadHC(NumericMatrix Ginp, NumericMatrix Winp, NumericMatrix Ainp, NumericVector otol, NumericVector lconstr, NumericVector uconstr);
 25 | RcppExport SEXP _MeDeCom_RQuadHC(SEXP GinpSEXP, SEXP WinpSEXP, SEXP AinpSEXP, SEXP otolSEXP, SEXP lconstrSEXP, SEXP uconstrSEXP) {
 26 | BEGIN_RCPP
 27 |     Rcpp::RObject rcpp_result_gen;
 28 |     Rcpp::RNGScope rcpp_rngScope_gen;
 29 |     Rcpp::traits::input_parameter< NumericMatrix >::type Ginp(GinpSEXP);
 30 |     Rcpp::traits::input_parameter< NumericMatrix >::type Winp(WinpSEXP);
 31 |     Rcpp::traits::input_parameter< NumericMatrix >::type Ainp(AinpSEXP);
 32 |     Rcpp::traits::input_parameter< NumericVector >::type otol(otolSEXP);
 33 |     Rcpp::traits::input_parameter< NumericVector >::type lconstr(lconstrSEXP);
 34 |     Rcpp::traits::input_parameter< NumericVector >::type uconstr(uconstrSEXP);
 35 |     rcpp_result_gen = Rcpp::wrap(RQuadHC(Ginp, Winp, Ainp, otol, lconstr, uconstr));
 36 |     return rcpp_result_gen;
 37 | END_RCPP
 38 | }
 39 | // RProjSplxBox
 40 | NumericMatrix RProjSplxBox(NumericMatrix Xinp, NumericVector linp, NumericVector uinp);
 41 | RcppExport SEXP _MeDeCom_RProjSplxBox(SEXP XinpSEXP, SEXP linpSEXP, SEXP uinpSEXP) {
 42 | BEGIN_RCPP
 43 |     Rcpp::RObject rcpp_result_gen;
 44 |     Rcpp::RNGScope rcpp_rngScope_gen;
 45 |     Rcpp::traits::input_parameter< NumericMatrix >::type Xinp(XinpSEXP);
 46 |     Rcpp::traits::input_parameter< NumericVector >::type linp(linpSEXP);
 47 |     Rcpp::traits::input_parameter< NumericVector >::type uinp(uinpSEXP);
 48 |     rcpp_result_gen = Rcpp::wrap(RProjSplxBox(Xinp, linp, uinp));
 49 |     return rcpp_result_gen;
 50 | END_RCPP
 51 | }
 52 | // RQuadSimplex
 53 | List RQuadSimplex(NumericMatrix Ginp, NumericMatrix Winp, NumericMatrix Ainp, NumericVector ot);
 54 | RcppExport SEXP _MeDeCom_RQuadSimplex(SEXP GinpSEXP, SEXP WinpSEXP, SEXP AinpSEXP, SEXP otSEXP) {
 55 | BEGIN_RCPP
 56 |     Rcpp::RObject rcpp_result_gen;
 57 |     Rcpp::RNGScope rcpp_rngScope_gen;
 58 |     Rcpp::traits::input_parameter< NumericMatrix >::type Ginp(GinpSEXP);
 59 |     Rcpp::traits::input_parameter< NumericMatrix >::type Winp(WinpSEXP);
 60 |     Rcpp::traits::input_parameter< NumericMatrix >::type Ainp(AinpSEXP);
 61 |     Rcpp::traits::input_parameter< NumericVector >::type ot(otSEXP);
 62 |     rcpp_result_gen = Rcpp::wrap(RQuadSimplex(Ginp, Winp, Ainp, ot));
 63 |     return rcpp_result_gen;
 64 | END_RCPP
 65 | }
 66 | // RQuadSimplexBox
 67 | List RQuadSimplexBox(NumericMatrix Ginp, NumericMatrix Winp, NumericMatrix Ainp, NumericVector linp, NumericVector uinp, NumericVector ot);
 68 | RcppExport SEXP _MeDeCom_RQuadSimplexBox(SEXP GinpSEXP, SEXP WinpSEXP, SEXP AinpSEXP, SEXP linpSEXP, SEXP uinpSEXP, SEXP otSEXP) {
 69 | BEGIN_RCPP
 70 |     Rcpp::RObject rcpp_result_gen;
 71 |     Rcpp::RNGScope rcpp_rngScope_gen;
 72 |     Rcpp::traits::input_parameter< NumericMatrix >::type Ginp(GinpSEXP);
 73 |     Rcpp::traits::input_parameter< NumericMatrix >::type Winp(WinpSEXP);
 74 |     Rcpp::traits::input_parameter< NumericMatrix >::type Ainp(AinpSEXP);
 75 |     Rcpp::traits::input_parameter< NumericVector >::type linp(linpSEXP);
 76 |     Rcpp::traits::input_parameter< NumericVector >::type uinp(uinpSEXP);
 77 |     Rcpp::traits::input_parameter< NumericVector >::type ot(otSEXP);
 78 |     rcpp_result_gen = Rcpp::wrap(RQuadSimplexBox(Ginp, Winp, Ainp, linp, uinp, ot));
 79 |     return rcpp_result_gen;
 80 | END_RCPP
 81 | }
 82 | // cppTAfact
 83 | RcppExport SEXP cppTAfact(SEXP mDtSEXP, SEXP mTtinitSEXP, SEXP mAinitSEXP, double lambda, int itersMax, double tol, double tolA, double tolT);
 84 | RcppExport SEXP _MeDeCom_cppTAfact(SEXP mDtSEXPSEXP, SEXP mTtinitSEXPSEXP, SEXP mAinitSEXPSEXP, SEXP lambdaSEXP, SEXP itersMaxSEXP, SEXP tolSEXP, SEXP tolASEXP, SEXP tolTSEXP) {
 85 | BEGIN_RCPP
 86 |     Rcpp::RObject rcpp_result_gen;
 87 |     Rcpp::RNGScope rcpp_rngScope_gen;
 88 |     Rcpp::traits::input_parameter< SEXP >::type mDtSEXP(mDtSEXPSEXP);
 89 |     Rcpp::traits::input_parameter< SEXP >::type mTtinitSEXP(mTtinitSEXPSEXP);
 90 |     Rcpp::traits::input_parameter< SEXP >::type mAinitSEXP(mAinitSEXPSEXP);
 91 |     Rcpp::traits::input_parameter< double >::type lambda(lambdaSEXP);
 92 |     Rcpp::traits::input_parameter< int >::type itersMax(itersMaxSEXP);
 93 |     Rcpp::traits::input_parameter< double >::type tol(tolSEXP);
 94 |     Rcpp::traits::input_parameter< double >::type tolA(tolASEXP);
 95 |     Rcpp::traits::input_parameter< double >::type tolT(tolTSEXP);
 96 |     rcpp_result_gen = Rcpp::wrap(cppTAfact(mDtSEXP, mTtinitSEXP, mAinitSEXP, lambda, itersMax, tol, tolA, tolT));
 97 |     return rcpp_result_gen;
 98 | END_RCPP
 99 | }
100 | 
101 | static const R_CallMethodDef CallEntries[] = {
102 |     {"_MeDeCom_RHLasso", (DL_FUNC) &_MeDeCom_RHLasso, 4},
103 |     {"_MeDeCom_RQuadHC", (DL_FUNC) &_MeDeCom_RQuadHC, 6},
104 |     {"_MeDeCom_RProjSplxBox", (DL_FUNC) &_MeDeCom_RProjSplxBox, 3},
105 |     {"_MeDeCom_RQuadSimplex", (DL_FUNC) &_MeDeCom_RQuadSimplex, 4},
106 |     {"_MeDeCom_RQuadSimplexBox", (DL_FUNC) &_MeDeCom_RQuadSimplexBox, 6},
107 |     {"_MeDeCom_cppTAfact", (DL_FUNC) &_MeDeCom_cppTAfact, 8},
108 |     {NULL, NULL, 0}
109 | };
110 | 
111 | RcppExport void R_init_MeDeCom(DllInfo *dll) {
112 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
113 |     R_useDynamicSymbols(dll, FALSE);
114 | }
115 | 


--------------------------------------------------------------------------------
/src/dynblas.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Defines the prototypes for BLAS Fortran functions.
  3 |  *
  4 |  * Also defines a typedef blas_int to be used for all integers passed to BLAS
  5 |  * functions.
  6 |  *
  7 |  * When used in the context of a MATLAB MEX file, you must define MATLAB_MEX_FILE
  8 |  * and MATLAB_VERSION (for version 7.4, define it to 0x0704).
  9 |  *
 10 |  *
 11 |  * Copyright (C) 2009-2011 Dynare Team
 12 |  *
 13 |  * This file is part of Dynare.
 14 |  *
 15 |  * Dynare is free software: you can redistribute it and/or modify
 16 |  * it under the terms of the GNU General Public License as published by
 17 |  * the Free Software Foundation, either version 3 of the License, or
 18 |  * (at your option) any later version.
 19 |  *
 20 |  * Dynare is distributed in the hope that it will be useful,
 21 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 22 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 23 |  * GNU General Public License for more details.
 24 |  *
 25 |  * You should have received a copy of the GNU General Public License
 26 |  * along with Dynare.  If not, see <http://www.gnu.org/licenses/>.
 27 |  */
 28 | 
 29 | #ifndef _DYNBLAS_H
 30 | #define _DYNBLAS_H
 31 | 
 32 | /* Starting from version 7.8, MATLAB BLAS expects ptrdiff_t arguments for integers */
 33 | /*#if defined(MATLAB_MEX_FILE) && MATLAB_VERSION >= 0x0708*/
 34 | # ifdef __cplusplus
 35 | #  include <cstddef>
 36 | # else
 37 | #  include <stddef.h>
 38 | # endif
 39 | 
 40 | typedef ptrdiff_t blas_int;
 41 | 
 42 | #if defined(MATLAB_MEX_FILE) && defined(_WIN32) && !defined(_MSC_VER)
 43 | # define FORTRAN_WRAPPER(x) x
 44 | #else
 45 | # define FORTRAN_WRAPPER(x) x ## _
 46 | #endif
 47 | 
 48 | #ifdef __cplusplus
 49 | extern "C" {
 50 | #endif
 51 | 
 52 |   typedef const char *BLCHAR;
 53 |   typedef const blas_int *CONST_BLINT;
 54 |   typedef const double *CONST_BLDOU;
 55 |   typedef const float *CONST_BLFLT;
 56 |   typedef double *BLDOU;
 57 |   typedef float *BLFLT;
 58 | 
 59 | #define dgemm FORTRAN_WRAPPER(dgemm)
 60 |   void dgemm(BLCHAR transa, BLCHAR transb, CONST_BLINT m, CONST_BLINT n,
 61 |              CONST_BLINT k, CONST_BLDOU alpha, CONST_BLDOU a, CONST_BLINT lda,
 62 |              CONST_BLDOU b, CONST_BLINT ldb, CONST_BLDOU beta,
 63 |              BLDOU c, CONST_BLINT ldc);
 64 | 
 65 | #define sgemm FORTRAN_WRAPPER(sgemm)
 66 |   void sgemm(BLCHAR transa, BLCHAR transb, CONST_BLINT m, CONST_BLINT n,
 67 |              CONST_BLINT k, CONST_BLFLT alpha, CONST_BLFLT a, CONST_BLINT lda,
 68 |              CONST_BLFLT b, CONST_BLINT ldb, CONST_BLFLT beta,
 69 |              BLFLT c, CONST_BLINT ldc);
 70 | 
 71 | #define dsymm FORTRAN_WRAPPER(dsymm)
 72 |   void dsymm(BLCHAR side, BLCHAR uplo, CONST_BLINT m, CONST_BLINT n,
 73 |              CONST_BLDOU alpha, CONST_BLDOU a, CONST_BLINT lda,
 74 |              CONST_BLDOU b, CONST_BLINT ldb, CONST_BLDOU beta,
 75 |              BLDOU c, CONST_BLINT ldc);
 76 | 
 77 | #define dgemv FORTRAN_WRAPPER(dgemv)
 78 |   void dgemv(BLCHAR trans, CONST_BLINT m, CONST_BLINT n, CONST_BLDOU alpha,
 79 |              CONST_BLDOU a, CONST_BLINT lda, CONST_BLDOU x, CONST_BLINT incx,
 80 |              CONST_BLDOU beta, BLDOU y, CONST_BLINT incy);
 81 | 
 82 | #define dsymv FORTRAN_WRAPPER(dsymv)
 83 |   void dsymv(BLCHAR uplo, CONST_BLINT m, CONST_BLDOU alpha, CONST_BLDOU a,
 84 |              CONST_BLINT lda, CONST_BLDOU b, CONST_BLINT ldb, CONST_BLDOU beta,
 85 |              BLDOU c, CONST_BLINT ldc);
 86 | 
 87 | #define dtrsv FORTRAN_WRAPPER(dtrsv)
 88 |   void dtrsv(BLCHAR uplo, BLCHAR trans, BLCHAR diag, CONST_BLINT n,
 89 |              CONST_BLDOU a, CONST_BLINT lda, BLDOU x, CONST_BLINT incx);
 90 | 
 91 | #define dtrmv FORTRAN_WRAPPER(dtrmv)
 92 |   void dtrmv(BLCHAR uplo, BLCHAR trans, BLCHAR diag, CONST_BLINT n,
 93 |              CONST_BLDOU a, CONST_BLINT lda, BLDOU x, CONST_BLINT incx);
 94 | 
 95 | #define daxpy FORTRAN_WRAPPER(daxpy)
 96 |   void daxpy(CONST_BLINT n, CONST_BLDOU a, CONST_BLDOU x, CONST_BLINT incx,
 97 |              BLDOU y, CONST_BLINT incy);
 98 | 
 99 | #define saxpy FORTRAN_WRAPPER(saxpy)
100 |   void saxpy(CONST_BLINT n, CONST_BLFLT a, CONST_BLFLT x, CONST_BLINT incx,
101 |              BLFLT y, CONST_BLINT incy);
102 | 
103 | #define dcopy FORTRAN_WRAPPER(dcopy)
104 |   void dcopy(CONST_BLINT n, CONST_BLDOU x, CONST_BLINT incx,
105 |              BLDOU y, CONST_BLINT incy);
106 | 
107 | #define zaxpy FORTRAN_WRAPPER(zaxpy)
108 |   void zaxpy(CONST_BLINT n, CONST_BLDOU a, CONST_BLDOU x, CONST_BLINT incx,
109 |              BLDOU y, CONST_BLINT incy);
110 | 
111 | #define dscal FORTRAN_WRAPPER(dscal)
112 |   void dscal(CONST_BLINT n, CONST_BLDOU a, BLDOU x, CONST_BLINT incx);
113 | 
114 | #define sscal FORTRAN_WRAPPER(sscal)
115 |   void sscal(CONST_BLINT n, CONST_BLDOU a, BLFLT x, CONST_BLINT incx);
116 | 
117 | #define dtrsm FORTRAN_WRAPPER(dtrsm)
118 |   void dtrsm(BLCHAR side, BLCHAR uplo, BLCHAR transa, BLCHAR diag, CONST_BLINT m,
119 |              CONST_BLINT n, CONST_BLDOU alpha, CONST_BLDOU a, CONST_BLINT lda,
120 |              BLDOU b, CONST_BLINT ldb);
121 | 
122 | #define ddot FORTRAN_WRAPPER(ddot)
123 |   double ddot(CONST_BLINT n, CONST_BLDOU x, CONST_BLINT incx, CONST_BLDOU y,
124 |               CONST_BLINT incy);
125 | 
126 | #define dsyr FORTRAN_WRAPPER(dsyr)
127 |   void dsyr(BLCHAR uplo, CONST_BLINT n, CONST_BLDOU alpha, CONST_BLDOU x,
128 |             CONST_BLINT incx, BLDOU a, CONST_BLINT lda);
129 | 
130 | #define dtrmm FORTRAN_WRAPPER(dtrmm)
131 |   void dtrmm(BLCHAR side, BLCHAR uplo, BLCHAR transa, BLCHAR diag, CONST_BLINT m,
132 |              CONST_BLINT n, CONST_BLDOU alpha, CONST_BLDOU a, CONST_BLINT lda,
133 |              BLDOU b, CONST_BLINT ldb);
134 | 
135 | #define strmm FORTRAN_WRAPPER(strmm)
136 |   void strmm(BLCHAR side, BLCHAR uplo, BLCHAR transa, BLCHAR diag, CONST_BLINT m,
137 |              CONST_BLINT n, CONST_BLFLT alpha, CONST_BLFLT a, CONST_BLINT lda,
138 |              BLFLT b, CONST_BLINT ldb);
139 | 
140 | #define dasum FORTRAN_WRAPPER(dasum)
141 |   double dasum(CONST_BLINT n, BLDOU dx, CONST_BLINT incx);
142 | 
143 | 
144 | 
145 | #ifdef __cplusplus
146 | } /* extern "C" */
147 | #endif
148 | 
149 | #endif /* _DYNBLAS_H */
150 | 


--------------------------------------------------------------------------------
/tests/runTests.R:
--------------------------------------------------------------------------------
1 | BiocGenerics:::testPackage("MeDeCom")


--------------------------------------------------------------------------------
/vignettes/MeDeCom.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "MeDeCom: Methylome Decomposition via Constrained Matrix Factorization"
  3 | author: "Pavlo Lutsik, Martin Slawski, Gilles Gasparoni, Nikita Vedeneev, Matthias Hein and Joern Walter"
  4 | date: "`r Sys.Date()`"
  5 | output:
  6 |   rmarkdown::html_document:
  7 |     mathjax: default
  8 |     toc: true
  9 |     number_sections: false
 10 |     fig_width: 5
 11 |     fig_height: 5
 12 | vignette: >
 13 |   %\VignetteIndexEntry{MeDeCom}
 14 |   %\VignetteEngine{knitr::rmarkdown}
 15 |   \usepackage[utf8]{inputenc}
 16 | ---
 17 | 
 18 | # Introduction
 19 | 
 20 | *MeDeCom* is an R-package for reference-free decomposition of heterogeneous DNA methylation profiles. 
 21 | It uses matrix factorization enhanced by constraints and a specially tailored regularization. 
 22 | *MeDeCom* represents an input $m\times n$ data matrix ($m$ CpGs measured in $n$ samples) as a product of two other matrices. 
 23 | The first matrix has $m$ rows, just as the input data, but the number of columns is equal to $k$. 
 24 | The columns of this matrix can be interpreted as methylomes of the $k$ unknown 
 25 | cell populations underlying the samples and will be referred to as **latent methylation components** or **LMCs**. 
 26 | The second matrix has $k$ rows and $n$ columns, and can be interpreted as a matrix of relative contributions (mixing proportions) 
 27 | of each LMC to each sample.
 28 | 
 29 | *MeDeCom* starts with a set of related DNA methylation profiles, e.g. a series of Infinium microarray measurements
 30 | from a population cohort, or several bisulfite sequencing-based methylomes. The key requirement is that
 31 |  the input data represents absolute DNA methylation measurements in a population of cells 
 32 | and contains values between 0 and 1. *MeDeCom* implements an alternating scheme which iteratively 
 33 | updates randomly initialized factor matrices until convergence or until the maximum number of iterations 
 34 | has been reached. This is repeated for multiple random initializations and the best solution is returned.
 35 | 
 36 | MeDeCom features two tunable parameters. The first one is the number of LMCs $k$, an approximate choice for which 
 37 | should be known from prior information. To enforce the distribution properties of a methylation profile 
 38 | upon LMCs *MeDeCom* uses a special for of regularization controlled by the parameter $\lambda$. 
 39 | A typical *MeDeCom* experiment includes testing a grid of values for $k$ and $\lambda$. For each combination 
 40 | of parameter values *MeDeCom* estimates a cross-validation error. The latter helps select the optimal number 
 41 | of LMCs and the strength of regularization.
 42 | 
 43 | # Installation
 44 | 
 45 | *MeDeCom* can be installed directly from github using the package `devtools`:
 46 | 
 47 | ```{r, eval=FALSE}
 48 | devtools::install_github("lutsik/MeDeCom")
 49 | ```
 50 | 
 51 | The master branch of the GitHub repository only compiles on *nix-like* platforms with a C++11-compatible compiler are supported.
 52 | Users on Mac OS might run into compilation errors as OpenMP, which is required for compilation, is disabled by default. To enable OpenMP, the following tutorial can be followed [https://mac.r-project.org/openmp/](https://mac.r-project.org/openmp/)
 53 | We created a separate branch for installation of *MeDeCom* on Windows machines. However, parallel processing options are currently
 54 | not supported for Windows due to some incompatabilites of the R/Windows connection. Thus, executing MeDeCom on a Windows machine
 55 | takes sustantially longer. Additionally, we creared a Docker image with MeDeCom, which can be used in case of further installation
 56 | issues [https://hub.docker.com/r/mscherer/medecom](https://hub.docker.com/r/mscherer/medecom).
 57 | 
 58 | ```{r, eval=FALSE}
 59 | devtools::install_github("lutsik/MeDeCom",ref="windows")
 60 | ```
 61 | 
 62 | MeDeCom uses stack model for memory to accelerate factorization for smaller ranks. 
 63 | This requires certain preparation during compilation, therefore, please, note the extended
 64 | compilation time (15 to 20 minutes).
 65 | 
 66 | # Data preparation
 67 | 
 68 | *MeDeCom* accepts DNA methylation data in several forms. Preferably the user may load and preprocess the data
 69 | using a general-purpose DNA methylation analysis package [RnBeads](http://rnbeads.mpi-inf.mpg.de). A resulting RnBSet object
 70 | can be directly supplied to *MeDeCom*. Alternatively, *MeDeCom* runs on any matrix of type `numeric` with valid methylation values.
 71 | 
 72 | *MeDeCom* comes with a small example data set obtained by mixing reference profiles of blood cell methylomes *in silico*.
 73 | The example data set can be loaded in a usual way:
 74 | 
 75 | ```{r}
 76 | ## load the package
 77 | suppressPackageStartupMessages(library(MeDeCom))
 78 | ##  load the example data sets
 79 | data(example.dataset, package="MeDeCom")
 80 | ## you should get objects D, Tref and Aref
 81 | ## in your global R environment
 82 | ls()
 83 | ```
 84 | 
 85 | Loaded numeric matrix `D` contains 100 *in silico* mixtures and serves as an example input. Columns of matrix `Tref` contains the methylomes 
 86 | of 5 blood cell types used to generate the mixtures, while matrix `Aref` provides the mixing proportions.
 87 | ```{r}
 88 | ## matrix D has dimension 10000x100
 89 | str(D)
 90 | ## matrix Tref has dimension 10000x5
 91 | str(Tref)
 92 | ## matrix Aref has dimension 5x100
 93 | str(Aref)
 94 | ```
 95 | # Performing a methylome decomposition experiment
 96 | 
 97 | *MeDeCom* can be run directly on matrix `D`. 
 98 | 
 99 | It is crucial to select the values of parameters $k$ and $\lambda$ to test. 
100 | A choice of $k$ is often dictated by prior knowledge about the methylomes.
101 | Precise value of lambda has to be selected for each data set independently. A good start 
102 | is a logarithmic grid of lambda values. It is important to include $\lambda=0$ into the 
103 | grid, as this particular case the regularization is effectively absent making *MeDeCom* similar 
104 | to other NMF-based deconvolution algorithms.
105 | 
106 | ```
107 | medecom.result<-runMeDeCom(D, Ks=2:10, lambdas=c(0,10^(-5:-1)))
108 | ```
109 | 
110 | *MeDeCom* is based upon an alternating optimization heuristic and requires a lot 
111 | of computation. The processing of the data matrix can take several hours.
112 | One can speed up the run by decreasing the number of cross-validation folds and random initializations, and 
113 | increasing the number of computational cores.
114 | 
115 | ```{r, eval=FALSE}
116 | medecom.result<-runMeDeCom(D, 2:10, c(0,10^(-5:-1)), NINIT=10, NFOLDS=10, ITERMAX=300, NCORES=9)
117 | ```
118 | 
119 | ```{r, echo=FALSE}
120 | cat("
121 | [Main:] checking inputs
122 | [Main:] preparing data
123 | [Main:] preparing jobs
124 | [Main:] 3114 factorization runs in total
125 | [Main:] runs 2755 to 2788 complete
126 | [Main:] runs 2789 to 2822 complete
127 | [Main:] runs 2823 to 2856 complete
128 | [Main:] runs 2857 to 2890 complete
129 | ......
130 | [Main:] finished all jobs. Creating the object
131 | ")
132 | data(example.MeDeComSet)
133 | ```
134 | 
135 | This can, however, lead to decomposition slightly different from the one presented given below.
136 | 
137 | The results of a decomposition experiment are saved to an object of class `MeDeComSet`.
138 | The contents of an object can be conveniently displayed using the `print` functionality.
139 | 
140 | ```{r}
141 | medecom.result
142 | ```
143 | 
144 | # Exploring the decomposition results
145 | 
146 | ## Parameter selection 
147 | 
148 | The first key step is parameter selection. It is important to carefully explore the obtained results and make a decision about 
149 | the most feasible parameter values, or about extending the parameter value grids to be tested in refinement experiments.
150 | 
151 | *MeDeCom* provides a **cross-validation error** (CVE) for each tested parameter combination.
152 | 
153 | ```{r, fig.width=7}
154 | plotParameters(medecom.result)
155 | ```
156 | 
157 | A lineplot helping to select parameter $\lambda$ can be produced by specifying a fixed value for $k$:
158 | 
159 | ```{r, fig.width=5.5, fig.height=6}
160 | plotParameters(medecom.result, K=5, lambdaScale="log")
161 | ```
162 | 
163 | Cross-validation error has a minimum at $\lambda=10^{-2}$ so this value is preferred.
164 | 
165 | ## Latent methylation components (LMCs)
166 | 
167 | A matrix of LMCs can be extracted using `getLMCs`:
168 | 
169 | ```{r}
170 | lmcs<-getLMCs(medecom.result, K=5, lambda=0.01)
171 | str(lmcs)
172 | ```
173 | 
174 | LMCs can be seen as measured methylation profiles of purified cell populations. 
175 | *MeDeCom* provides for several visualization methods for LMCs using the function `plotLMCs` 
176 | which operates directly on `MeDeComSet` objects.
177 | 
178 | ### Clustering
179 | 
180 | For instance, standard hierarchical clustering can be visualized using:
181 | ```{r}
182 | plotLMCs(medecom.result, K=5, lambda=0.01, type="dendrogram")
183 | ```
184 | 
185 | A two-dimensional embedding with MDS is also obtainable:
186 | 
187 | ```{r}
188 | plotLMCs(medecom.result, K=5, lambda=0.01, type="MDS")
189 | ```
190 | 
191 | Input data can be included into the MDS plot to enhance the interpretation.
192 | 
193 | ```{r}
194 | plotLMCs(medecom.result, K=5, lambda=0.01, type="MDS", D=D)
195 | ```
196 | 
197 | ### Matching LMCs to reference profiles
198 | 
199 | In many cases reference methylomes exists, which are relevant for the data set in question.
200 | For our example analysis matrix `Tref` contains the reference type 
201 | profiles which were *in silico* mixed. *MeDeCom* offers several ways to visualize the 
202 | resulting LMCs together with the reference methylation profiles. The reference methylomes 
203 | can be included into a joint clustering analysis: 
204 | 
205 | ```{r}
206 | plotLMCs(medecom.result, K=5, lambda=0.01, type="dendrogram", Tref=Tref, center=TRUE)
207 | ```
208 | 
209 | Furthermore, a similarity matrix of LMCs vs reference profiles can be visualized as a heatmap.
210 | 
211 | ```{r}
212 | plotLMCs(medecom.result, K=5, lambda=0.01, type="heatmap", Tref=Tref)
213 | ```
214 | 
215 | Correlation coefficient values and asterisks aid the interpretation.
216 | The values are displayed in the cells which contain maximal values column-wise.
217 | The asterisks mark cells which have the highest correlation value in the respective rows.
218 | Thus, a value with asterisk corresponds to a mutual match, i.e. LMC unambiguously 
219 | matching a reference profile.
220 | 
221 | In this example analysis each LMC uniquely matches one of the reference 
222 | profiles. The matching of 
223 | 
224 | Function `matchLMCs` offers several methods for 
225 | matching LMCs to reference profiles.
226 | 
227 | ```{r}
228 | perm<-matchLMCs(lmcs, Tref)
229 | ```
230 | 
231 | ### LMC enrichment analysis
232 | 
233 | MeDeCom provides functions to perform enrichment analysis on the sites that are particularly hypo-/hypermethylated in an LMC. These sites can then be used for GO and LOLA enrichment analysis. Importantly, genomic annotations of the LMC sites is required to be specified. We thus recommend to use the [DecompPipeline](https://github.com/CompEpigen/DecompPipeline) package for processing, but the annotation can also be specified manually using a ```data.frame``` that looks as follows:
234 | 
235 | ```{r,CpG_annotation,eval=F}
236 |       Chromosome   Start     End Strand CpG GC CGI Relation SNPs
237 | 30365       chr1 1036375 1036376      +   2 59        Shelf <NA>
238 | 42681       chr1 1184537 1184538      +   2 58     Open Sea <NA>
239 | 45091       chr1 1218625 1218626      +   5 66       Island <NA>
240 | 51615       chr1 1292773 1292774      +   3 64       Island <NA>
241 | 52001       chr1 1295504 1295505      +   9 65        Shore <NA>
242 | 52003       chr1 1295507 1295508      +   9 65        Shore <NA>
243 | ```
244 | 
245 | The required columns are `Chromosome`, `Start`, `End`, and `Strand`. Using this ```data.frame``` (called `df` in the following), enrichment analysis can be performed using:
246 | 
247 | ```{r enrichment, eval=F}
248 | lmc.lola.enrichment(medecom.result,anno.data=df,K=5,lambda=0.001,diff.threshold = 0.5, region.type = "tiling")
249 | ```
250 | 
251 | Please note that `df` needs to have the same number of rows than the methylation matrix used as input to MeDeCom. CpGs are first aggregated over the `region.type` specified, then the regions are selected that have a difference larger than `diff.threshold`. The list of available region types is published here [https://rnbeads.org/regions.html](https://rnbeads.org/regions.htm).
252 | 
253 | ## Mixing proportions
254 | 
255 | A matrix of mixing proportions is obtained using `getProportions`:
256 | 
257 | ```{r}
258 | prop<-getProportions(medecom.result, K=5, lambda=0.001)
259 | str(prop)
260 | ```
261 | 
262 | ### Visualization of the complete proportion matrix
263 | 
264 | A complete matrix of propotions can be visualized as a stacked barplot:
265 | ```{r}
266 | plotProportions(medecom.result, K=5, lambda=0.01, type="barplot")
267 | ```
268 | 
269 | or a heatmap:
270 | 
271 | ```{r, fig.width=8, fig.height=6}
272 | plotProportions(medecom.result, K=5, lambda=0.01, type="heatmap")
273 | ```
274 | 
275 | The heatmap can be enhanced by clustering the columns:
276 | 
277 | ```{r, fig.width=8, fig.height=6}
278 | plotProportions(medecom.result, K=5, lambda=0.01, type="heatmap", heatmap.clusterCols=TRUE)
279 | ```
280 | 
281 | or adding color code for the samples:
282 | 
283 | ```{r, fig.width=8, fig.height=6}
284 | sample.group<-c("Case", "Control")[1+sample.int(ncol(D))%%2]
285 | plotProportions(medecom.result, K=5, lambda=0.01, type="heatmap", sample.characteristic=sample.group)
286 | ```
287 | 
288 | ### Visualization of selected LMC proportions
289 | 
290 | ```{r, echo=FALSE}
291 | rownames(Aref)<-colnames(Tref)
292 | ```
293 | 
294 | ```{r}
295 | plotProportions(medecom.result,  K=5, lambda=0.01, type="lineplot", lmc=2, Aref=Aref, ref.profile=2)
296 | ```
297 | 
298 | # Advanced usage
299 | 
300 | ## Running *MeDeCom* on a compute cluster
301 | 
302 | *MeDeCom* experiments require a lot of computational time. On the other hand most of the factorization runs are 
303 | independent and, therefore, can be run in parallel. Thus, a significant speedup can be achieved when running *MeDeCom* 
304 | in an HPC environment. *MeDeCom* can be easily adapted to most of the popular schedulers. There are, however, several prerequisites:
305 | 
306 |  * the scheduler provides the standard utilities `qsub` for the submission of the cluster jobs and `qstat` for obtaining the job statistics;
307 |  * the cluster does not have a low limit on the number of submitted jobs;
308 |  * the R installation (location of the R binary and the package library) is consistent across the execution nodes.
309 | 
310 | The example below 
311 | is for the cluster operated by *Son of Grid Engine* (SoGE). To be able to run on a SoGE cluster *MeDeCom* needs to know:
312 | 
313 |  * location of the R executable (directory);
314 |  * an operating memory limit per each factorization job;
315 |  * a pattern for the names of cluster nodes to run the jobs on.
316 | 
317 | These settings should be stored in a `list` object:
318 | ```{r}
319 | sge.setup<-list(
320 | R_bin_dir="/usr/bin",
321 | host_pattern="*",
322 | mem_limit="5G"
323 | )
324 | ```
325 | This object should be supplied to *MeDeCom* as the argument `cluster.settings`. It is also important to specify a valid temporary 
326 | directory, which is available to all execution nodes.
327 | ```{r, eval=FALSE}
328 | medecom.result<-runMeDeCom(D, Ks=2:10, lambdas=c(0,10^(-5:-1)), N_COMP_LAMBDA=1, NFOLDS=5, NINIT=10, 
329 | temp.dir="/cluster_fs/medecom_temp",
330 | cluster.settings=sge.setup)
331 | ```
332 | *MeDeCom* will start the jobs and will periodically monitor the number of remaining ones.
333 | ```{r, echo=FALSE}
334 | cat("
335 | [Main:] checking inputs
336 | [Main:] preparing data
337 | [Main:] preparing jobs
338 | [Main:] 3114 factorization runs in total
339 | [Main:] 3114 jobs remaining
340 | ....
341 | [Main:] finished all jobs. Creating the object
342 | ")
343 | ```
344 | 
345 | # R session
346 | Here is the output of `sessionInfo()` on the system on which this document was compiled:
347 | ```{r, echo=FALSE}
348 | sessionInfo()
349 | ```
350 | 
351 |  
352 | 


--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-14-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-14-1.png


--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-17-1.png


--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-18-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-18-1.png


--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-19-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-19-1.png


--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-20-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-20-1.png


--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-22-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-22-1.png


--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/vignettes/figure/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CompEpigen/MeDeCom/180a64d22323208800835789fccd120bc3e3b528/vignettes/figure/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------