├── .Rbuildignore ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── NEWS ├── R ├── DrawFG.R ├── FactorGraph.R ├── FlagSymmetric.R ├── ModelMatrix.R ├── ModelMatrix_standard.R ├── Reg2Graph.R ├── applyTauAND.R ├── beepday2consec.R ├── bwSelPredict.R ├── bwSelect.R ├── calcLL.R ├── calcNeighbors.R ├── condition.R ├── condition_core.R ├── defunct_msg.R ├── getSign.R ├── glmnetRequirements.R ├── lagData.R ├── mgm.R ├── mgmsampler.R ├── mvar.R ├── mvarsampler.R ├── nodeEst.R ├── plotRes.R ├── predict.mgm.R ├── predictCore_stat.R ├── print.int.R ├── print.mgm.R ├── print.resample.R ├── resample.R ├── showInteraction.R ├── startup_msg.R ├── tvmgm.R ├── tvmgmsampler.R ├── tvmvar.R └── tvmvarsampler.R ├── README.md ├── data ├── B5MS.RData ├── Fried2015.RData ├── PTSD_data.RData ├── autism_data.RData ├── autism_data_large.RData ├── dataGD.RData ├── fruitfly_data.RData ├── mgm_data.RData ├── modnw.RData ├── msq_p3.RData ├── msq_p5.RData ├── mvar_data.RData ├── restingstate_data.RData └── symptom_data.RData ├── inst └── CITATION ├── man ├── FactorGraph.Rd ├── bwSelect.Rd ├── condition.Rd ├── datasets.Rd ├── mgm-internal.Rd ├── mgm-package.Rd ├── mgm.Rd ├── mgmsampler.Rd ├── mvar.Rd ├── mvarsampler.Rd ├── plotRes.Rd ├── predict.mgm.Rd ├── print.int.Rd ├── print.mgm.Rd ├── resample.Rd ├── showInteraction.Rd ├── tvmgm.Rd ├── tvmgmsampler.Rd ├── tvmvar.Rd └── tvmvarsampler.Rd ├── mgm.Rproj └── tests ├── testthat.R └── testthat ├── Rplots.pdf ├── test_MNM.R └── test_mgm.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | src/*.o 5 | src/*.so 6 | src/*.dll 7 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: mgm 2 | Type: Package 3 | Title: Estimating Time-Varying k-Order Mixed Graphical Models 4 | Version: 1.2-16 5 | Date: 2023-09-04 6 | Author: Jonas Haslbeck 7 | Maintainer: Jonas Haslbeck 8 | Depends: R (>= 3.5.0) 9 | Description: Estimation of k-Order time-varying Mixed Graphical Models and mixed VAR(p) models via elastic-net regularized neighborhood regression. For details see Haslbeck & Waldorp (2020) . 10 | URL: https://www.jstatsoft.org/article/view/v093i08 11 | BugReports: https://github.com/jmbh/mgm/issues 12 | License: GPL (>= 2) 13 | Imports: glmnet, stringr, Hmisc, qgraph, gtools 14 | LazyData: true 15 | Packaged: 2025-11-03 00:00:01 CET 16 | Suggests: 17 | testthat (>= 2.0.0) 18 | Config/testthat/edition: 2 19 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # exportPattern("^[[:alpha:]]+") 2 | 3 | 4 | # --- Packages mgm depends on --- 5 | # import(matrixcalc) # archived on CRAN 6 | import(stringr) 7 | import(Hmisc) 8 | import(qgraph) # for FactorGraph() 9 | import(gtools) 10 | #import(glmnet) # now specificly below 11 | 12 | #importFrom("gtools", "permutations") 13 | importFrom("glmnet", "coef.glmnet", "glmnet", "cv.glmnet") 14 | 15 | 16 | # Recommended by build check 17 | importFrom("stats", "as.formula", "coef", "dnorm", "dpois", 18 | "model.matrix", "var", "quantile") 19 | importFrom("utils", "combn", "setTxtProgressBar", "txtProgressBar") 20 | importFrom("stats", "rnorm", "rpois") 21 | importFrom("graphics", "abline", "axis", "layout", "par", "plot.new", 22 | "plot.window", "points", "segments", "text") 23 | importFrom("graphics", "rect") 24 | importFrom("stats", "sd") 25 | 26 | 27 | # --- Exported functions --- 28 | export(mgm) 29 | export(mvar) 30 | 31 | export(tvmgm) 32 | export(tvmvar) 33 | export(bwSelect) 34 | 35 | 36 | export(mgmsampler) 37 | export(mvarsampler) 38 | export(tvmgmsampler) 39 | export(tvmvarsampler) 40 | 41 | export(resample) 42 | export(showInteraction) 43 | export(plotRes) 44 | 45 | export(FactorGraph) 46 | export(condition) 47 | 48 | 49 | # Exported S3 Methods 50 | S3method(predict, mgm) 51 | S3method(print, mgm) 52 | S3method(print, resample) 53 | S3method(print, int) 54 | 55 | 56 | # Export old functions with defunct msg 57 | export(mgmfit) 58 | export(var.mgm) 59 | export(tv.mgmfit) 60 | export(tv_var.mgm) 61 | 62 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | Changes in Version 1.2-16 2 | o 3 | 4 | Changes in Version 1.2-14 5 | o When setting the argument binarySign=TRUE, now also the signs of higher-order (i.e., 3-way and higher) interactions involving binary variables and continuous variables are reported in the output 6 | o Fixed new issue creating errors on Unix/Windows machines on CRAN 7 | 8 | Changes in Version 1.2-13 9 | o Fixed bug: in EBIC calculations, the standard deviation in the likelihood function was set equal to 1. Now it is set to the standard deviation of the residual as it should be. 10 | o Use inherits(X, "??") instead of class(X)="??" throughout, since this seems to be required by CRAN now 11 | 12 | Changes in Version 1.2-12 13 | o Removed dependency on matrixcalc package which as been archived 14 | o Improved mvar() input checks 15 | 16 | Changes in Version 1.2-11 17 | o Fixed bug: switch off thresholding when using using internal function Reg2Graph() in condition() 18 | o Extend condition() to Poisson variables 19 | o Fixed bug: The processing of certain custom moderation models created an error 20 | o Fixed bug: In plotRes() negative moderation effects of binary variables were positive even when binarySign=TRUE 21 | 22 | Changes in Version 1.2-10 23 | o Added a simulated example data set to illustrate estimating group differences using moderation 24 | o Fixed problems in computing binary signs created by R 4.0.0 25 | o Added colorblind color scheme to pairwise output 26 | 27 | Changes in Version 1.2-9 28 | o Fixed bug in bwSelect() + tvmgm() when specifying k manually 29 | 30 | Changes in Version 1.2-8 31 | o Fixed bug in computing predictions for Poisson variables 32 | o Fixed bug that only printed 10 pairwise interactions in plotRes() for MNMs 33 | o resample(): removed seed as an argument; now seeds are randomly drawn; for MGM objects, seeds are resampled until the MGM can be estimated on each of the bootstrap samples; the final seeds are provided in the output 34 | o Fixed bug in Reg2Graph()/mgm(): moderators were not shown in the output when specified as a matrix 35 | o Fixed bug in tvmvarsampler() 36 | o Fixed bug: Adapted condition() to custom moderator input in mgm() 37 | 38 | Changes in Version 1.2-7 39 | o Fixed bug in bootstrap scheme for time-varying VAR models 40 | o Provide optional output of chains of Gibbs sampler from mgmsampler() 41 | o New function condition() allows to condition on values of a set of variables. The main motivation for this function is to make 3-way interactions / moderation more accessible. 42 | o mgm() now allows one to specify custom sets of moderation effects via the "moderators" argument 43 | o Fixed bug in bwSelect in combination with mgm() opjects 44 | o Extended plotRes() to moderated MGMs with only a single moderator variable 45 | 46 | Changes in Version 1.2-6 47 | o Fix aliasing of internal functions 48 | 49 | Changes in Version 1.2-5 50 | o Bug fix in resample() 51 | o Improved input checks in mgmsampler() 52 | o Bug fix in prediction with mvar() objects 53 | o Allow for prediction with time-varying models in new data sets 54 | 55 | Changes in Version 1.2-4 56 | o Updated help files 57 | o Added two example data sets for higher-order (moderated) MGMs 58 | 59 | Changes in Version 1.2-3 60 | o Bug fix in bwSelect() 61 | o Added verbatim option in resample() to detect zero variance bootstrap samples 62 | o Fixed warning due to R 3.4.0: "Recycling array of length 1 in vector-array arithmetic is deprecatd.Use c() or as.vector() instead." 63 | o Bug fix: resampling did not work together with new mvar() arguments dayvar/beepvar since Version 1.2-2 64 | o Added argument "moderators" to the mgm() function that allows to ... 65 | o Change default: in earlier versions if overparameterize=TRUE, all thresholds (intercepts) of categorical variables were set to zero. From this version on this threshold is estimated. The thresholds of categoricals can still be set to zero with the new argument "thresholdCat" 66 | o If scale=TRUE, now interaction terms (k>2 or moderation) are scaled after computing the design matrix to ensure that all predictors have the same mean and standard deviation, which avoids different penalization depending on the scaling of a predictor 67 | o Add CES-D depression dataset taken from Fried et al. (2015) to example datasets 68 | o Added function FactorGraph() that computes and plots a factor graph from stationary or time-varying MGMs. The factorgraph output has been removed from the mgm() and tvmgm() functions. 69 | o Added support of showInteraction() to continuous 3-way interactions 70 | o For all time-varying models the estimation points are now specified on the unit interval [0,1]; for tvmvar() models, 0 is associated with the first data point that is not trivially excluded due to the maximum lag 71 | 72 | 73 | Changes in Version 1.2-2 74 | o Add running time to output of resample() function 75 | o Added a note and references on the normalizability of MGMs 76 | o Added function plotRes() to plot summaries of sampling distributions resampled with the resample() function 77 | o The consecutiveness of measurements in mvar() and tvmvar() models now can be specified alternatively to the consec argument with the arguments beepvar and dayvar. This is tailored to the typical time stamps of ecological momentary assessment (EMA) studies, where the consecutiveness is defined by the number of notification on a given day (beepvar) and the number of that given day (dayvar). 78 | o New function showInteraction() retrieves details of interactions from mgm model objects 79 | o The print method now also shows number of rows in the VAR design matrix compared to the number of rows of the original data matrix. 80 | o All estimation functions require a matrix object as data input to avoid problems with properties of data.frame objects 81 | o Corrected row-order in dataset "symptom_data" 82 | o In mVAR models we use an adjusted sample size that is equal to the number of rows in the design matrix. The adjusted sample size can be smaller than nrow(data) - max(lags) if measurements are not consecutive, or if non-uniform weighting is used. In earlier versions the sample size was only adjusted for non-uniform weighting. The adjusted sample size is used in the additional thresholding of parameter estimates (see ?mgm or ?mvar). 83 | o Added function resample() that fits any model class to a specified number of bootstrap samples and provides the bootstrapped sampling distribution in an array. For time series data, the block bootstrap is used, where the number of blocks can be specified. 84 | 85 | Changes in Version 1.2-1 86 | o Added consec argument to predict.mgm(), which allows prediction for VAR models in time series with unequal time intervals by excluding time points that are not preceded by enough measurements (see ?mvar) 87 | 88 | Changes in Version 1.2-0 89 | o The whole package has been rewritten from scratch to reduce susceptibility to bugs and to allow new features 90 | o Every function now has many input checks and associated informative error messages 91 | o Major syntax changes: mgmfit() -> mgm(), var.mgm() -> mvar(), tv.mgmfit() -> tvmgm() and tv_var.mgm() -> tvmvar() 92 | o The names of function arguments are now consistent across the package and therefore had to be changed considerably 93 | o There is no more missing argument in the estimation functions 94 | o All estimation functions allow to search a sequence of the (alpha) elastic net paramter, using the EBIC or cross-validation; so far alpha was fixed to 1 95 | o mgm() and tvmgm() now presents all paramters involved in higher (than pairwise) order interactions in the output; this includes a factor-graph representation that is easy to visualize 96 | o The d parameter for the largest order of interaction in the neighborhood of a given node has been replaced with the k parameter, the largest order of interaction in the whole graph. Note that d = k - 1 97 | o All estimation functions allow the standard parameterization for categorical variables, but also an overparameterization. This is necessary to correctly identify higher order interactions between categorical variables. 98 | o The sampling functions mgmsampler() and tvmgmsampler() were extended to k-order MGMs (before only pairwise / k = 2) 99 | o mvar() and tvmvar() now allow the specification of any number of lags 100 | o The new functions mvarsampler() and tvmvarsampler() now allow to sample from mVAR models any number of lags 101 | o There is no more function provided for resampling. Instead, we provide an interface with the bootnet package. 102 | o The function bwSelect() allows the selection of an optimal bandwidth parameter for timer-varying MGM or mVAR models using cross-validation 103 | o predict.mgm() now allows two different ways to predict from time-varying models, see ?predict.mgm 104 | o Fixed bug in mgmsampler() that was present in binary-Gaussian graphs 105 | o Fixed bug in predict.mgm() which caused the prediction of incorrect category labels in some situations 106 | o Fixed bug in mgm() which did not use the weight-argument in case of lambdaSel = 'CV' 107 | o Default for argument lambdaSel changed from 'EBIC' to 'CV' 108 | 109 | Changes in Version 1.1-7 110 | o The predict() function now returns the predicted probabilities in addition to the predicted category for categorical variables 111 | o Added a message for all estimation functions indicating where edge weights (if defined) can be found: fitobject$signs 112 | o Added a startup message with a link to report bugs 113 | o The predict() function now computes predicted values and a prediction error for each variable in the graph 114 | o The print() function now returns a small summary of the model type when printing a mgm object 115 | o Added subsampling scheme to evaluate edge-stability for non-time-varying models (MGM and mixed VAR) 116 | o Added summary() & plot() for the bootstrap object to summarize edge-stability 117 | o Added argument 'binary.sign': If binary.sign=TRUE, the sign of the interactions of all binary variables coded (0,1) with other binary variables and continuous variables will be returned in the sign matrix fit$signs 118 | 119 | -------------------------------------------------------------------------------- /R/FactorGraph.R: -------------------------------------------------------------------------------- 1 | # jonashaslbeck@gmail.com; March 2016 2 | 3 | FactorGraph <- function(object, 4 | labels, 5 | PairwiseAsEdge = FALSE, 6 | Nodewise = FALSE, 7 | DoNotPlot = FALSE, 8 | FactorLabels = TRUE, 9 | colors, 10 | shapes, 11 | shapeSizes = c(8, 4), 12 | estpoint = NULL, 13 | negDashed = FALSE, 14 | ...) 15 | 16 | { 17 | 18 | # --------- Compute Aux Variables --------- 19 | 20 | if(Nodewise) PairwiseAsEdge <- FALSE 21 | 22 | p <- length(object$call$level) 23 | n_estpoints <- length(object$call$estpoints) 24 | 25 | # --------- Input Checks --------- 26 | 27 | if(!missing(labels)) if(length(labels) != p) stop("Number of provided labels has to match the number of variables.") 28 | 29 | # Checks for time-varying FactorGraph 30 | if("tvmgm" %in% class(object)) { 31 | if(missing(estpoint)) stop("Specify the estimation point for which the factor graph should be visualized.") 32 | if(estpoint > n_estpoints) stop(paste0("The provided fit object has only ", n_estpoints, " estimation points.")) 33 | } 34 | 35 | if(object$call$k > 4) stop("Please specify additional colors/shapes for interactions with order > 4.") 36 | 37 | 38 | # --------- Create FractorGraph object --------- 39 | 40 | call <- list("object" = object) 41 | 42 | FG_object <- list("call" = call, 43 | "graph" = NULL, 44 | "nodetype" = NULL, 45 | "order" = NULL, 46 | "signs" = NULL, 47 | "edgecolor" = NULL, 48 | "nonzero" = NULL, 49 | "qgraph" = NULL) 50 | 51 | 52 | # --------- Fill in defaults --------- 53 | 54 | if(missing(labels)) labels <- 1:p 55 | if(missing(colors)) colors <- c("white", "tomato", "lightblue", "orange") 56 | if(missing(shapes)) shapes <- c("circle", "square", "triangle", "diamond") 57 | layout <- "circle" 58 | cut <- 0 59 | 60 | 61 | # --------- Compute Factor Graph ---------- 62 | 63 | # Call different DrawFG() version for stationary/time-varying 64 | if("tvmgm" %in% class(object)) { 65 | 66 | # Time-varying 67 | FG <- DrawFGtv(object = object, 68 | PairwiseAsEdge = PairwiseAsEdge, 69 | Nodewise = Nodewise, 70 | estpoint = estpoint) 71 | 72 | } else { 73 | 74 | # Stationary 75 | FG <- DrawFG(object = object, 76 | PairwiseAsEdge = PairwiseAsEdge, 77 | Nodewise = Nodewise) 78 | 79 | } 80 | 81 | # Save into FG_object 82 | FG_object$graph <- FG$weightedgraph 83 | FG_object$nodetype <- FG$nodetype 84 | FG_object$order <- FG$order 85 | FG_object$signs <- FG$signs 86 | FG_object$edgecolor <- edge.color <- FG$signcolor 87 | FG_object$nonzero <- FG$nonzero 88 | 89 | 90 | 91 | # Allow overwriting ... 92 | args <- list(...) 93 | if(!is.null(args$cut)) cut <- args$cut 94 | if(!is.null(args$layout)) layout <- args$layout 95 | if(!is.null(args$edge.color)) edge.color <- args$edge.color 96 | 97 | 98 | # browser() 99 | 100 | # Adapt edge labels for zero edges in Nodewise=TRUE 101 | if(!is.null(args$edge.labels)) { # if specified, otherwise set to FALSE 102 | if(is.logical(args$edge.labels)) { # if specified and logical, then adapt for nonzero or FALSE 103 | if(args$edge.labels) { 104 | edge.labels <- FG_object$graph 105 | edge.labels[FG_object$nonzero == 2] <- 0 106 | edge.labels <- round(edge.labels, 2) 107 | } else { 108 | edge.labels = FALSE 109 | } 110 | } else { 111 | # if not logical, take the input 112 | edge.labels <- args$edge.labels 113 | } 114 | } else { 115 | edge.labels = FALSE 116 | } 117 | 118 | 119 | # Edge lty: allow negative edges to be dashed for greyscale images 120 | edge_lty <- FG_object$nonzero 121 | if(negDashed) edge_lty[edge.color == "red"] <- 2 122 | 123 | # --------- Plot & Return --------- 124 | 125 | if(!DoNotPlot){ 126 | 127 | # ----- Compute stuff necessary for plotting ----- 128 | 129 | # Create labels for factors (label = order of factor/interaction) 130 | ifelse(PairwiseAsEdge, ek <- 1, ek <- 0) 131 | if(FactorLabels) { 132 | tb <- table(FG_object$order)[-1] 133 | 134 | if(length(tb)==0) { # For the case PairwiseAsEdge=FALSE and no 3-way interactions 135 | FL <- NULL 136 | } else { 137 | l_lf <- list() 138 | for(k in 1:length(tb)) l_lf[[k]] <- rep(k+1+ek, tb[k]) 139 | FL <- unlist(l_lf) 140 | } 141 | 142 | labels_ex <- c(labels, FL) 143 | } else { 144 | labels_ex <- c(labels, rep('', sum(FG_object$nodetype))) 145 | } 146 | 147 | # ----- Call qgraph ----- 148 | 149 | qgraph_object <- qgraph(FG_object$graph, 150 | color = colors[FG_object$order + 1], 151 | edge.color = edge.color, 152 | lty = edge_lty, 153 | layout = layout, 154 | labels = labels_ex, 155 | shape = shapes[FG_object$order + 1], 156 | vsize = shapeSizes[FG_object$nodetype + 1], 157 | edge.labels = edge.labels, 158 | cut = cut, 159 | ...) 160 | 161 | FG_object$qgraph <- qgraph_object 162 | 163 | 164 | invisible(FG_object) # return output object invisible 165 | 166 | } else { 167 | return(FG_object) 168 | } 169 | 170 | 171 | 172 | } # eoF -------------------------------------------------------------------------------- /R/FlagSymmetric.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Gives identical interactions (e.g. 1-2-3 and 3-1-2) the same flag 4 | 5 | # Input: Matrix with row = number of interactions, col = involved variables 6 | # Output: numeric indicator vector 7 | 8 | 9 | 10 | FlagSymmetric <- function(x) { 11 | 12 | vec_sim <- rep(NA, nrow(x)) 13 | ind_ord <- ncol(x) 14 | 15 | counter <- 1 16 | 17 | for(i in 1:nrow(x)) { 18 | 19 | if(is.na(vec_sim[i])) { 20 | 21 | vec_sim[i] <- counter 22 | 23 | for(j in (i+1):nrow(x)) { 24 | 25 | if( (i+1) > nrow(x) ) next # in case of very few interactions 26 | 27 | ind <- x[j, ] %in% x[i, ] 28 | if(sum(ind)==ind_ord) vec_sim[j] <- counter 29 | 30 | } 31 | 32 | counter <- counter + 1 33 | 34 | } 35 | } 36 | return(vec_sim) 37 | } 38 | 39 | 40 | 41 | FlagSymmetricFast <- function(x) { 42 | 43 | x <- data.frame(x) 44 | flag <- as.numeric(factor(apply(x, 1, function(x) paste0(sort(x), collapse = "-")))) 45 | flag_consec <- as.numeric(factor(flag, levels = unique(flag))) # I think unnecessary 46 | 47 | return(flag_consec) 48 | } 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /R/ModelMatrix.R: -------------------------------------------------------------------------------- 1 | # jonashaslbeck@gmail.com; May 2018 2 | 3 | 4 | ModelMatrix <- function(data, # matrix 5 | type, # type vector (I think not needed, level should be sufficient) 6 | level, # level vector 7 | labels, 8 | d, # largest neighborhood size 9 | moderators = NULL, 10 | v = NULL, 11 | allCats = FALSE # if true, the model matrix does not use all unique categories, but the categories specified in level, this exists because I use this function within the sampling function 12 | ) 13 | 14 | 15 | { 16 | 17 | 18 | # Delete variable v: only for MGMs! 19 | if(!is.null(v)) { 20 | data <- data[, -v] 21 | type <- type[-v] 22 | level <- level[-v] 23 | labels <- labels[-v] 24 | } 25 | 26 | 27 | # ---------- Calculate Auxilliary variables ---------- 28 | 29 | p <- ncol(data) # note that we have only the predictors here!! 30 | n <- nrow(data) 31 | 32 | # mSpec <- ifelse(class(moderators) %in% c("integer", "numeric"), "vector", "matrix") 33 | mSpec <- ifelse(any(class(moderators) %in% c("integer","numeric")), "vector", "matrix") 34 | 35 | # ---------- Input Checks ---------- 36 | 37 | if(p != length(type)) stop('Length of type has to match the number of columns in data.') 38 | if(p != length(level)) stop('Length of level has to match the number of columns in data.') 39 | if(!(inherits(level, "numeric") | inherits(level, "integer"))) stop('level has to be an integer vector.') 40 | 41 | # ---------- Calculate Indicator Functions for all Variables ---------- 42 | 43 | l_ind_datasets <- list() 44 | for(j in 1:p) { 45 | 46 | if(type[j] != 'c') { 47 | 48 | l_ind_datasets[[j]] <- as.matrix(data[, j]) 49 | colnames(l_ind_datasets[[j]]) <- paste0(labels[j]) 50 | 51 | } else { 52 | 53 | if(allCats==FALSE) { 54 | 55 | unique_labels <- unique(data[, j]) 56 | unique_labels_sorted <- sort(unique_labels) 57 | n_labels <- length(unique_labels_sorted) 58 | ind_matrix <- matrix(NA, nrow = n, ncol=n_labels) 59 | for(s in 1:n_labels) ind_matrix[, s] <- data[, j] == unique_labels_sorted[s] 60 | 61 | } else { 62 | 63 | # This is used in mvarsampler() and avoids that the design matrix is too small cases early in the time series, where not all categories are seen yet 64 | unique_labels <- 1:level[j] 65 | n_labels <- level[j] 66 | ind_matrix <- matrix(NA, nrow = n, ncol=n_labels) 67 | for(s in 1:n_labels) ind_matrix[, s] <- data[, j] == unique_labels[s] 68 | 69 | } 70 | 71 | # Add Var names 72 | cn <- paste0(labels[j], 1:n_labels) 73 | colnames(ind_matrix) <- cn 74 | l_ind_datasets[[j]] <- ind_matrix 75 | 76 | } 77 | 78 | } 79 | 80 | # ---------- Collect d = 1 interaction Terms ---------- 81 | 82 | # In this case we just use the indicator functions computed above 83 | 84 | l_ind_datasets_nV <- l_ind_datasets 85 | Xd1 <- do.call(cbind, l_ind_datasets_nV) 86 | 87 | 88 | # ---------- Compute all d>1 Interaction Terms ---------- 89 | 90 | # Here we loop over all orders, in each order over all interactions, and in each interaction over all combinations of the levels of all variables 91 | 92 | 93 | if(!is.null(moderators)) d <- 2 # If first-order moderation (three way interactions) is specified, we fix k = 3, d = k - 1 = 2 94 | 95 | if(d > 1) { 96 | 97 | ## List all possible Interactions 98 | l_interactions <- vector('list', length = d) 99 | 100 | # Case I: No Moderation 101 | if(is.null(moderators)) { 102 | for(ord in 1:d) l_interactions[[ord]] <- combn((1:p), ord, simplify = FALSE) # note that the numbers here refer to the all variables minus variable v 103 | } else { 104 | 105 | l_interactions[[1]] <- list() 106 | for(i in 1:p) l_interactions[[1]][[i]] <- i 107 | 108 | # Case II: Moderation 109 | 110 | if(mSpec == "vector") { 111 | 112 | if(v %in% moderators) { 113 | 114 | l_interactions[[2]] <- combn(1:p, 2, simplify = FALSE) # all combinations of the remaining (here denoted by 1:p) variables 115 | 116 | } else { 117 | 118 | ind_mod_MM <- (1:(p+1) %in% moderators)[-v] # indicator(moderator?) for 1:p predictors 119 | n_mods <- sum(ind_mod_MM) 120 | which_mod <- which(ind_mod_MM) 121 | 122 | l_mods <- list() 123 | for(i in 1:n_mods) l_mods[[i]] <- expand.grid((1:p)[-which_mod[i]], which_mod[i]) # loop over moderators 124 | m_mods <- do.call(rbind, l_mods) 125 | m_mods <- as.matrix(m_mods) 126 | 127 | l_mods_combn <- list() 128 | for(i in 1:nrow(m_mods)) l_mods_combn[[i]] <- m_mods[i, ] # turn each row into list entry 129 | 130 | l_interactions[[2]] <- l_mods_combn 131 | 132 | } # end: v = moderator? 133 | 134 | } # end if: vector specification? 135 | 136 | 137 | if(mSpec == "matrix") { 138 | 139 | nrow_mods <- nrow(moderators) 140 | ind_v_inMod <- as.logical(apply(moderators, 1, function(x) v %in% x )) 141 | 142 | if(sum(ind_v_inMod)>0) { # if variable v is involved in at least one interaction 143 | # get interaction terms for node v 144 | int_terms <- t(apply(matrix(moderators[ind_v_inMod], ncol=3), 1, function(x) x[x!=v])) 145 | 146 | # To get the predictors on that weird "other variable" vector I use in this script 147 | for(i in 1:nrow(int_terms)) for(j in 1:2) if(int_terms[i, j] > v) int_terms[i, j] <- int_terms[i, j] - 1 148 | 149 | # fill in existing structure: 150 | for(i in 1:nrow(int_terms)) l_interactions[[2]][[i]] <- int_terms[i, ] 151 | } else { 152 | d <- 1 # for the present node v, to skip the below which takes l_interactions[[2]] as input 153 | } 154 | 155 | } # end if: matrix specification? 156 | 157 | } # end if: moderators? 158 | 159 | } # end if: d>1 160 | 161 | 162 | 163 | 164 | # if statement again here, because it can happen that there is no interaction term in regression on variable v 165 | if(d > 1) { 166 | 167 | # storage for all interactions of all orders 168 | l_collect_terms <- list() 169 | 170 | # Loop over order of interactions; 171 | for(ord in 2:d) { 172 | 173 | n_terms <- length(l_interactions[[ord]]) 174 | 175 | # storage for all interactions of fixed order 176 | l_ord_terms <- list() 177 | 178 | # Loop over the interactions of a given order 179 | for(it in 1:n_terms) { 180 | 181 | # Storage: collect here all interaction terms of one interaction 182 | l_it_terms <- list() 183 | 184 | # For fixed interaction: which variables are involved? 185 | inter_it <- l_interactions[[ord]][[it]] # select interactions one by one 186 | 187 | # Compute amount of levels of each variable (continuous=1) 188 | l_indicator_it <- list() 189 | for(i in 1:ord) l_indicator_it[[i]] <- 1:(level[inter_it[i]]) 190 | 191 | # List all combination of levels of variables in the interaction it 192 | all_combs <- expand.grid(l_indicator_it) 193 | n_combs <- nrow(all_combs) 194 | 195 | # Loop over all combinations of levels of variables in interaction and collect in matrix 196 | for(comb in 1:n_combs) { 197 | tarmat <- matrix(NA, nrow = n, ncol = ord) 198 | for(i in 1:ord) tarmat[, i] <- as.matrix(l_ind_datasets[[inter_it[i]]])[,all_combs[comb, i]] 199 | l_it_terms[[comb]] <- apply(tarmat, 1, prod) 200 | } 201 | 202 | # combine all level combinations for interaction it 203 | it_data <- do.call(cbind, l_it_terms) 204 | 205 | # Create Names for all (combinations of) interactions 206 | all_combs_char <- apply(all_combs, 2, function(x) { 207 | if(length(unique(x))==1) { 208 | out <- rep("", length(x)) 209 | } else { 210 | out <- x 211 | } 212 | }) 213 | cn <- rep(NA, n_combs) 214 | 215 | # DEV: check whether this still works: 216 | 217 | for(comb in 1:n_combs) cn[comb] <- paste0(labels[inter_it], matrix(all_combs_char, ncol=ord)[comb,], collapse=':') 218 | # the point above added to deliminate category flag 219 | 220 | colnames(it_data) <- cn 221 | l_ord_terms[[it]] <- it_data 222 | 223 | } 224 | 225 | # Collapse over interactions of fixed order 226 | l_collect_terms[[ord]] <- do.call(cbind, l_ord_terms) 227 | 228 | } 229 | 230 | # Collapse across order of interactions 231 | # l_collect_terms[[1]] <- NULL 232 | all_HOI_terms <- do.call(cbind, l_collect_terms) 233 | 234 | } # end if: d>1 235 | 236 | # Combine with d=1 size neighborhoods (singletons) 237 | 238 | if(d > 1) { 239 | X <- cbind(Xd1, all_HOI_terms) 240 | } else { 241 | X <- Xd1 242 | } 243 | 244 | 245 | # ---------- Output---------- 246 | 247 | return(X) 248 | 249 | 250 | } # end of function 251 | 252 | 253 | -------------------------------------------------------------------------------- /R/ModelMatrix_standard.R: -------------------------------------------------------------------------------- 1 | # jonashaslbeck@gmail.com; May 2018 2 | 3 | ModelMatrix_standard <- function(data, 4 | type, 5 | d, # maximum neighborhood size in model = k-1 6 | v, # node on which current nodewise regression is performed 7 | moderators # moderators, if specified 8 | 9 | ) 10 | 11 | 12 | { 13 | 14 | # -------- Input checks -------- 15 | 16 | # ... 17 | 18 | # -------- Case I: No Moderation -------- 19 | 20 | p <- ncol(data) 21 | for(i in 1:p) if(type[i] == "c") data[, i] <- as.factor(data[, i]) 22 | 23 | if(is.null(moderators)) { 24 | 25 | if(d > (p - 1)) { 26 | stop("Order of interactions cannot be larger than the number of predictors.") 27 | } else if (d == 1){ form <- as.formula(paste(colnames(data)[v],"~ (.)")) 28 | } else { form <- as.formula(paste(colnames(data)[v],"~ (.)^", d)) } 29 | 30 | # Construct standard design matrix (to get number of parameters for tau threshold) 31 | X_standard <- model.matrix(form, data = data)[, -1] # delete intercept (added by glmnet later) 32 | 33 | } else { 34 | 35 | 36 | # -------- Case II: Moderation -------- 37 | 38 | # mSpec <- ifelse(class(moderators) %in% c("integer","numeric"), "vector", "matrix") 39 | mSpec <- ifelse(any(class(moderators) %in% c("integer","numeric")), "vector", "matrix") 40 | # browser() 41 | 42 | 43 | # Terms for interactions/moderation 44 | n_mods <- ifelse(mSpec == "vector", length(moderators), nrow(moderators)) 45 | 46 | # IIa) Moderator specification: Vector 47 | if(mSpec == "vector") { 48 | 49 | n_l_mods <- 0 50 | l_mods <- list() 51 | 52 | if(v %in% moderators) { 53 | other_comb_pairw <- t(combn((1:p)[-v], 2)) 54 | l_mods[[1]] <- paste0("V", other_comb_pairw[, 1], ".", " * ", "V", other_comb_pairw[, 2], ".", collapse = " + ") 55 | } else { 56 | for(i in 1:n_mods) { # loop over moderators 57 | l_mods[[i]] <- paste0(colnames(data)[-c(v, moderators[i])], "*", "V", moderators[i], ".", collapse = " + ") 58 | n_l_mods <- n_l_mods + 1 59 | } 60 | } # end if: v = moderator? 61 | 62 | if(n_l_mods > 1) for(i in 1:(n_l_mods - 1)) l_mods[[i]] <- paste0(l_mods[[i]], " + ") # add plus between terms but not in the end 63 | v_mods <- do.call(paste0, l_mods) 64 | 65 | } # end if: vector specification 66 | 67 | 68 | # IIb) Moderator specification: Matrix 69 | if(mSpec == "matrix") { 70 | 71 | nrow_mods <- nrow(moderators) 72 | ind_v_inMod <- as.logical(apply(moderators, 1, function(x) v %in% x )) 73 | 74 | if(sum(ind_v_inMod)>0) { # if variable v is involved in at least one interaction 75 | # get interaction terms for node v 76 | int_terms <- t(apply(matrix(moderators[ind_v_inMod], ncol=3), 1, function(x) x[x!=v])) 77 | n_terms <- length(int_terms) 78 | v_mods <- paste0("V", int_terms[, 1], ".", " * ", "V", int_terms[, 2], ".", collapse = " + ") 79 | } else { 80 | v_mods <- NULL 81 | } 82 | 83 | 84 | } # end if: matrix specification 85 | 86 | 87 | 88 | # -------- Final Model formula: main effects + interactions -------- 89 | 90 | # Terms for main effects 91 | pred_simple <- paste(colnames(data)[-v], collapse = " + ") 92 | 93 | # Put together 94 | if(length(v_mods) == 0) { # if there is no moderation for that variable (the case if v is the only moderator) 95 | form <- paste(colnames(data)[v], "~", 96 | pred_simple) 97 | 98 | } else { 99 | form <- paste(colnames(data)[v], "~", 100 | pred_simple, 101 | " + ", 102 | v_mods) 103 | } 104 | 105 | form <- as.formula(form) 106 | 107 | # Create design matrix 108 | X_standard <- model.matrix(form, 109 | data = data)[, -1] # delete intercept (added by glmnet later) 110 | 111 | } # end if: moderation? 112 | 113 | 114 | # -------- Output -------- 115 | 116 | 117 | return(X_standard) 118 | 119 | 120 | } # end of function 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /R/applyTauAND.R: -------------------------------------------------------------------------------- 1 | # jonashaslbeck@gmail.com; July 2019 2 | 3 | # Input: 4 | # - fitted mgm model object (mgm() output) 5 | # - node i at hand 6 | # Output: 7 | # - Nodemodels with tau-threshold and AND rule applied 8 | 9 | # This is used in condition() (for now) and in predict() (planned later) 10 | 11 | 12 | applyTauAND <- function(i, 13 | object, 14 | model_i) { 15 | 16 | 17 | # ----- Apply: tau-threshold ----- 18 | 19 | model_i[abs(model_i) < object$nodemodels[[i]]$tau] <- 0 20 | 21 | 22 | # ----- Apply AND-rule ------ 23 | 24 | # if(i==12) browser() 25 | 26 | if(object$call$ruleReg == "AND") { 27 | 28 | # print(i) 29 | 30 | ## This creates an object with the same dimensionality as the glmnet output matrix 31 | ## Below we fill into this matrix whether the corresponding parameter has been set to zero by the AND rule in the fitted model 32 | ## We take that information from the indicator matrix 33 | 34 | # Aux matrix 35 | aux_m_AND <- matrix(NA, nrow(model_i)-1, 5) # columns: parameter value, variable 1, variable 2, order of interaction, parameter value after AND rule 36 | aux_m_AND[, 1] <- model_i[-1, 1] # copy parameters, without intercept 37 | 38 | # get variables out: 39 | names_i <- rownames(model_i)[-1] 40 | names_aux1 <- strsplit(names_i, ":") 41 | names_aux2 <- lapply(names_aux1, function(x) gsub("V", "", x)) 42 | names_aux3 <- lapply(names_aux2, function(x) { 43 | out <- strsplit(x, "\\.") 44 | if(length(out) == 1) c(out[[1]][1], NA) else c(out[[1]][1], out[[2]][1]) 45 | }) 46 | names_aux4 <- do.call(rbind, names_aux3) 47 | names_aux4 <- apply(names_aux4, 2, as.numeric) 48 | 49 | aux_m_AND[, 2:3] <- names_aux4 50 | aux_m_AND[, 4] <- apply(aux_m_AND[, 2:3], 1, function(x) sum(!is.na(x))) # order of interaction 51 | n_2way <- sum(aux_m_AND[, 4] == 1) 52 | n_3way <- sum(aux_m_AND[, 4] == 2) 53 | 54 | 55 | # -- Loop over indicator matrix -- 56 | 57 | # 2-way interactions 58 | ind_2way <- object$interactions$indicator[[1]] 59 | ind_2way <- matrix(ind_2way, ncol=2) # for the case of: only 1 e-way interaction 60 | out <- apply(ind_2way, 1, function(x) { 61 | if(i %in% x) { 62 | TRUE 63 | } else { 64 | FALSE 65 | } 66 | }) 67 | 68 | if(sum(out)>0) { # all of this needed only if at least one pairwise interaction is estimated to be nonzero 69 | 70 | ind_2way_i <- matrix(ind_2way[out, ], ncol=2) # subset nonzero estimated 2-way interactions that contain variable i in indicator matrix 71 | preds_2way <- aux_m_AND[aux_m_AND[, 4] == 1, 2] # subset 2-way interactions in model object 72 | 73 | for(v in 1:n_2way) { 74 | out_v <- apply(ind_2way_i, 1, function(x) preds_2way[v] %in% x) 75 | if(sum(out_v) > 0) aux_m_AND[v, 5] <- 1 else aux_m_AND[v, 5] <- 0 76 | } 77 | 78 | } else { 79 | 80 | aux_m_AND[1:n_2way, 5] <- 0 81 | 82 | } 83 | 84 | # 3-way interactions 85 | 86 | # only execute if there are any 3-way interactions _specified_ involving variable i 87 | # (it can happen that no 3-way interaction is specified if the moderators are specified by matrix input) 88 | if(n_3way > 0) { 89 | 90 | ind_3way <- object$interactions$indicator[[2]] 91 | ind_3way <- matrix(ind_3way, ncol=3) # for the case of: only one 3-way interaction 92 | out <- apply(ind_3way, 1, function(x) { 93 | if(i %in% x) { 94 | TRUE 95 | } else { 96 | FALSE 97 | } 98 | }) 99 | 100 | if(sum(out)>0) { # all of this needed only if at least one 3-way interaction is estimated to be nonzero 101 | 102 | ind_3way_i <- matrix(ind_3way[out, ], ncol=3) 103 | preds_3way <- matrix(aux_m_AND[aux_m_AND[, 4] == 2, 2:3], ncol=2) 104 | 105 | for(v in 1:n_3way) { 106 | out_v <- apply(ind_3way_i, 1, function(x) preds_3way[v, 1] %in% x & preds_3way[v, 2] %in% x) 107 | if(sum(out_v) > 0) aux_m_AND[v+n_2way, 5] <- 1 else aux_m_AND[v+n_2way, 5] <- 0 108 | } 109 | 110 | } else { 111 | 112 | aux_m_AND[(n_2way+1):(n_2way+n_3way), 5] <- 0 113 | 114 | } 115 | 116 | } 117 | 118 | 119 | # Finally, threshold 120 | model_i[-1, 1][aux_m_AND[, 5] == 0] <- 0 121 | 122 | 123 | } # end if: ruleReg = "AND" ? 124 | 125 | return(model_i) 126 | 127 | } # eoF 128 | -------------------------------------------------------------------------------- /R/beepday2consec.R: -------------------------------------------------------------------------------- 1 | 2 | beepday2consec <- function(beepvar, # Beep number in EMA study 3 | dayvar) # Day Number 4 | 5 | { 6 | 7 | # Input Checks 8 | if(!all(dayvar == round(dayvar))) stop("beepvar has to be a vector of non-negative integers") 9 | if(!all(beepvar == round(beepvar))) stop("beepvar has to be a vector of non-negative integers") 10 | if(length(beepvar) != length(dayvar)) stop("beepvar has to have the same length as dayvar") 11 | 12 | # Compute Aux variables 13 | fillin_beep <- max(beepvar) + 2 # fill in an integer for day/night shifts that ensures that lagData() treats this data point as non-consecutive 14 | n <- length(dayvar) 15 | 16 | # Get day breaks 17 | ind_sameday <- dayvar[-1] == dayvar[-n] 18 | ind_sameday[1] <- TRUE # dont change beep of first measurement 19 | 20 | consec <- rep(NA, n) 21 | consec[1] <- 1 22 | 23 | # check consecutiveness for each (consecutive) row-pair 24 | counter <- 1 25 | for(i in 2:n) { 26 | beep_diff <- beepvar[i] - beepvar[i-1] 27 | day_diff <- dayvar[i] - dayvar[i-1] 28 | if(beep_diff == 1 & day_diff == 0) counter <- counter + 1 else counter <- counter + 2 29 | consec[i] <- counter 30 | } 31 | 32 | return(consec) 33 | 34 | } 35 | 36 | -------------------------------------------------------------------------------- /R/bwSelPredict.R: -------------------------------------------------------------------------------- 1 | 2 | bwSelPredict <- function(data, 3 | type, 4 | level, 5 | obj, 6 | test, 7 | modeltype, 8 | # k = NULL, 9 | # consec = NULL, 10 | ... ) # is lags passed on? 11 | 12 | 13 | 14 | { 15 | 16 | # -------------------- Input checks & calc Aux Vars ------------------- 17 | 18 | p <- ncol(data) 19 | 20 | # Fetch arguments 21 | args <- list(...) 22 | 23 | 24 | # if(is.null(args$k)) args$k <- 2 25 | args$d <- args$k - 1 26 | n_test <- length(test) 27 | consec <- args$consec 28 | 29 | # Scale Gaussians 30 | ind_Gauss <- which(type == 'g') 31 | for(i in ind_Gauss) data[, i] <- scale(data[, i]) 32 | 33 | data_df <- as.data.frame(data) 34 | 35 | # Categoricals into factors (Needed to use formula to construct design matrix) 36 | for(i in which(type=='c')) data_df[, i] <- as.factor(data_df[, i]) 37 | 38 | 39 | # -------------------- Get VAR data structure ------------------- 40 | 41 | if(modeltype == 'mvar') { 42 | 43 | # Glue all lagged variables together 44 | 45 | n_lags <- length(args$lags) 46 | 47 | # Prepare Data (already cuts max(lags) first observations to compute design matrix) 48 | data_lagged <- lagData(data = data, 49 | lags = args$lags, 50 | consec = consec) 51 | 52 | # out of list 53 | data_response <- data_lagged$data_response 54 | l_data_lags <- data_lagged$l_data_lags 55 | data_response <- apply(data_response, 2, as.numeric) # to avoid confusion with labels of categories if there are factors 56 | 57 | # Detele cases 58 | data_response <- data_response[data_lagged$included, ] 59 | l_data_lags <- lapply(l_data_lags, function(z) z <- z[data_lagged$included, ]) 60 | 61 | data_response <- apply(data_response, 2, as.numeric) # to avoid confusion with labels of categories if there are factors 62 | 63 | } 64 | 65 | # -------------------- Compute Predictions ------------------- 66 | 67 | m_pred <- matrix(NA, nrow = n_test, ncol = p) 68 | 69 | for(v in 1:p) { 70 | 71 | # ----- Create Design Matrices ----- 72 | 73 | if(modeltype == 'mvar') { 74 | 75 | # append response with predictors 76 | y <- data_response[,v] # response variable v 77 | data_v <- cbind(do.call(cbind, l_data_lags)) # combine 78 | data_v <- as.data.frame(data_v) # because model.matrix() below requries a data.frame 79 | 80 | # Dummy coding 81 | form <- as.formula('y ~ (.)') 82 | 83 | # Construct standard design matrix (to get number of parameters for tau threshold) 84 | X_standard <- model.matrix(form, data = data_v)[, -1] # delete intercept (added by glmnet later) 85 | 86 | if(args$overparameterize) { 87 | 88 | # # Compute augmented type and level vectors 89 | type_aug <- rep(type, n_lags) 90 | level_aug <- rep(level, n_lags) 91 | 92 | # Construct over-parameterized design matrix 93 | X_over <- ModelMatrix(data = data_v, 94 | type = type_aug, 95 | level = level_aug, 96 | labels = colnames(data_v), 97 | d = 1, 98 | v = NULL) 99 | X <- X_over 100 | 101 | } else { 102 | 103 | X <- X_standard 104 | 105 | } 106 | 107 | } 108 | 109 | if(modeltype == 'mgm') { 110 | 111 | # Create HOI design matrix (copied from mgm) 112 | 113 | if(args$d > (p - 1)) { 114 | stop("Order of interactions cannot be larger than the number of predictors.") 115 | } else if (args$d == 1){ form <- as.formula(paste(colnames(data_df)[v],"~ (.)")) 116 | } else { form <- as.formula(paste(colnames(data_df)[v],"~ (.)^", args$d)) } 117 | 118 | # Construct standard design matrix (to get number of parameters for tau threshold) 119 | X_standard <- model.matrix(form, data = data_df)[, -1] # delete intercept (added by glmnet later) 120 | 121 | if(args$overparameterize) { 122 | 123 | # Construct over-parameterized design matrix 124 | X_over <- ModelMatrix(data = data, 125 | type = type, 126 | level = level, 127 | labels = colnames(data), 128 | d = args$d, 129 | v = v) # defined above from k 130 | X <- X_over 131 | } else { 132 | X <- X_standard 133 | } 134 | 135 | y <- data[, v] 136 | 137 | } # end if: modeltype mgm 138 | 139 | 140 | 141 | 142 | 143 | # ----- Make Predictions for test points ----- 144 | 145 | for(i in 1:n_test) { 146 | 147 | coefs <- obj$tvmodels[[i]]$nodemodels[[v]]$model 148 | row <- test[i] 149 | 150 | if(type[v] == 'c') { 151 | 152 | n_cat <- level[v] 153 | 154 | # Compute exp(potentials) 155 | Potentials <- rep(NA, n_cat) 156 | for(k in 1:n_cat) Potentials[k] <- exp(coefs[[k]][1] + X[row,] %*% coefs[[k]][-1]) 157 | 158 | # compute category-probabilities 159 | Probabilities <- Potentials / sum(Potentials) 160 | # Classify 161 | m_pred[i, v] <- sort(unique(y))[which.max(Probabilities)] 162 | 163 | 164 | } else { 165 | 166 | ## Prediction Continuous 167 | coefs <- as.numeric(coefs) # get coefficients 168 | m_pred[i, v] <- coefs[1] + X[row,] %*% coefs[-1] # predict 169 | 170 | } 171 | 172 | } # end for: i (over test locations) 173 | 174 | } # end for: v 175 | 176 | 177 | # -------------------- Compute Errors ------------------- 178 | 179 | if(modeltype == 'mvar') { 180 | if(n_test==1) { 181 | m_true <- matrix(data_response[test, ], nrow = n_test) # for the case n_test=1 182 | } else { 183 | m_true <- data_response[test, ] 184 | } 185 | } 186 | 187 | if(modeltype == 'mgm') { 188 | if(n_test==1) { 189 | m_true <- matrix(data[test, ], nrow = n_test) # for the case n_test=1 190 | } else { 191 | m_true <- data[test, ] 192 | } 193 | } 194 | 195 | m_error <- matrix(NA, nrow = n_test, ncol = p) 196 | 197 | # 0/1 loss for categorical, RMSE for continuous (which here is just absolute error, because 1 element) 198 | for(i in 1:p) { 199 | if(type[v] == 'c') { 200 | m_error[, i] <- abs(m_true[, i] - m_pred[, i]) 201 | } else { 202 | m_error[, i] <- abs(m_true[, i] - m_pred[, i]) # RMSE for 1 case = abs error 203 | } 204 | } 205 | 206 | 207 | 208 | # -------------------- Output ------------------- 209 | 210 | predOutlist <- list() 211 | predOutlist$errors <- m_error 212 | predOutlist$error_time <- rowMeans(m_error) 213 | predOutlist$error_mean <- mean(m_error) 214 | 215 | 216 | return(predOutlist) 217 | 218 | 219 | } 220 | -------------------------------------------------------------------------------- /R/calcLL.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | calcLL <- function(X, 4 | y, 5 | fit, #glmnet fit object 6 | type, 7 | level, 8 | v, 9 | weights, 10 | lambda, 11 | LLtype = 'model') 12 | 13 | 14 | { 15 | 16 | if(missing(level)) stop('No levels passed to calcLL !') 17 | 18 | # This function calculates three different LL: 19 | # 1) LLtype = 'model': The LL of a given model via fit 20 | # 2) LLtype = 'nullmodel': The LL of the Null (Intercept) Model 21 | # 3) LLtype = 'saturated': The LL of the saturated model 22 | 23 | n <- nrow(X) 24 | 25 | if(LLtype == 'model') { 26 | 27 | if(type[v] == 'g') { 28 | beta_vector <- matrix(coef(fit, s = lambda), ncol = 1) 29 | predicted_mean <- cbind(rep(1, n), X) %*% as.vector(beta_vector) 30 | sd_residual <- sd(y-predicted_mean) 31 | LL_model <- dnorm(y, mean = predicted_mean, sd = sd_residual, log = TRUE) 32 | mean_LL_model <- sum(LL_model * weights) 33 | } 34 | 35 | if(type[v] == 'p') { 36 | beta_vector <- matrix(coef(fit, s = lambda), ncol = 1) 37 | predicted_mean <- cbind(rep(1, n), X) %*% as.vector(beta_vector) 38 | LL_model <- dpois(y, exp(predicted_mean), log = TRUE) 39 | mean_LL_model <- sum(LL_model * weights) 40 | } 41 | 42 | if(type[v] == 'c') { 43 | 44 | n_cats <- level[v] # number of levels 45 | 46 | ## Compute LL (see http://www.stanford.edu/~hastie/Papers/glmnet.pdf, equation 22) 47 | m_respdum <- matrix(NA, n, n_cats) # dummy for data 48 | m_coefs <- matrix(NA, n, n_cats) # dummy for coefficients 49 | cats <- unique(y) 50 | 51 | LL_n <- rep(NA, n) # Storage 52 | m_LL_parts <- matrix(NA, nrow = n, ncol=n_cats+1) 53 | 54 | for(catIter in 1:n_cats) { 55 | m_respdum[,catIter] <- (y==cats[catIter])*1 # dummy matrix for categories 56 | m_coefs[,catIter] <- cbind(rep(1, n), X) %*% matrix(coef(fit, s = lambda)[[catIter]], ncol = 1) 57 | m_LL_parts[,catIter] <- m_respdum[, catIter] * m_coefs[, catIter] 58 | } 59 | 60 | m_LL_parts[, n_cats+1] <- - log(rowSums(exp(m_coefs))) # the log part, see eq (22) 61 | LL_n <- rowSums(m_LL_parts) # sum up n_cat + 1 parts 62 | mean_LL_model <- sum(LL_n * weights) # apply weighting 63 | 64 | } 65 | 66 | } 67 | 68 | if(LLtype == 'nullmodel') { 69 | 70 | if(type[v] == 'g') { 71 | beta_vector <- matrix(coef(fit, s = 1)[1], ncol = 1) # only intercept here 72 | predicted_mean <- rep(1, n) * as.vector(beta_vector) 73 | sd_residual <- sd(y-predicted_mean) 74 | LL_model <- dnorm(y, mean = predicted_mean, sd = sd_residual, log = TRUE) 75 | mean_LL_model <- sum(LL_model * weights) 76 | } 77 | 78 | if(type[v] == 'p') { 79 | beta_vector <- matrix(coef(fit, s = 1)[1], ncol = 1) 80 | predicted_mean <- rep(1, n) * as.vector(beta_vector) # log mean actually 81 | LL_model <- dpois(y, exp(predicted_mean), log = TRUE) 82 | mean_LL_model <- sum(LL_model * weights) 83 | } 84 | 85 | if(type[v] == 'c') { 86 | 87 | n_cats <- level[v] # number of levels 88 | 89 | ## Compute LL (see http://www.stanford.edu/~hastie/Papers/glmnet.pdf, equation 22) 90 | m_respdum <- matrix(NA, n, n_cats) # dummy for data 91 | m_coefs <- matrix(NA, n, n_cats) # dummy for coefficients 92 | cats <- unique(y) 93 | 94 | LL_n <- rep(NA, n) # Storage 95 | m_LL_parts <- matrix(NA, nrow = n, ncol=n_cats+1) 96 | 97 | for(catIter in 1:n_cats) { 98 | m_respdum[,catIter] <- (y==cats[catIter])*1 # dummy matrix for categories 99 | m_coefs[,catIter] <- cbind(rep(1, n), X) %*% matrix(coef(fit, s = 1)[[catIter]], ncol = 1) 100 | m_LL_parts[,catIter] <- m_respdum[, catIter] * m_coefs[, catIter] 101 | } 102 | 103 | m_LL_parts[, n_cats+1] <- - log(rowSums(exp(m_coefs))) # the log part, see eq (22) 104 | LL_n <- rowSums(m_LL_parts) # sum up n_cat + 1 parts 105 | mean_LL_model <- sum(LL_n * weights) # apply weighting 106 | 107 | } 108 | 109 | } 110 | 111 | 112 | # if(LLtype == 'saturated') { 113 | # 114 | # if(type[v] == 'g') { 115 | # predicted_mean <- y 116 | # LL_model <- dnorm(y, mean = predicted_mean, sd = 1, log = TRUE) 117 | # mean_LL_model <- sum(LL_model * weights) 118 | # } 119 | # 120 | # if(type[v] == 'p') { 121 | # predicted_mean <- y 122 | # LL_model <- dpois(y, exp(predicted_mean), log = TRUE) 123 | # mean_LL_model <- sum(LL_model * weights) 124 | # } 125 | # 126 | # if(type[v] == 'c') { 127 | # 128 | # mean_LL_model <- 0 129 | # 130 | # # For discrete RVs,the saturated model has Likelihood = 1 and LL = log(1) = 0 131 | # # e.g. http://stats.stackexchange.com/questions/114073/logistic-regression-how-to-obtain-a-saturated-model 132 | # 133 | # } 134 | # 135 | # } 136 | 137 | 138 | 139 | return(mean_LL_model) 140 | 141 | } 142 | -------------------------------------------------------------------------------- /R/calcNeighbors.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | calcNeighbors <- function(fit, lambda, type, level, v) { 4 | 5 | n_cats <- level[v] 6 | 7 | if(type[v]!="c") { #continuous case 8 | coefs_bin <- as.matrix(coef(fit, s = lambda)[-1, ]) != 0 #nonzero? 9 | n_neighbors <- colSums(coefs_bin) 10 | } 11 | if(type[v]=="c"){ #categorical case 12 | m_neighbors <- matrix(0, ncol = length(fit$lambda), nrow = n_cats) 13 | coefs_bin <- vector("list", length=n_cats) 14 | for(ca in 1:n_cats){ 15 | coefs_bin[[ca]] <- as.matrix(coef(fit, s = lambda)[[ca]][-1,]) != 0 #nonzero? 16 | } 17 | n_neighbors <- colSums(Reduce('+', coefs_bin)!=0) # rule: a predictor has a nonzero parameter with 1 category of the y, then we have a neighborhood relation 18 | } 19 | 20 | return(n_neighbors) 21 | 22 | } 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /R/condition.R: -------------------------------------------------------------------------------- 1 | # jonashaslbeck@gmail.com; July 2019 2 | 3 | # input: 4 | # - any mgm object with k <= 3 5 | # - a list of variables and values at which to fix those variables 6 | 7 | # output: 8 | # - the conditional model (possibly still including 3-way interactions) 9 | 10 | condition <- function(object, 11 | values) { 12 | 13 | # ---------- Basic Info ----------- 14 | 15 | type <- object$call$type 16 | level <- object$call$level 17 | p <- length(type) # Number of Variables 18 | nCond <- length(values) 19 | 20 | # values: turn list into matrix 21 | m_fixed_values <- matrix(NA, nrow=nCond, ncol=2) 22 | cond_names <- as.numeric(names(values)) 23 | m_fixed_values[, 1] <- cond_names 24 | m_fixed_values[, 2] <- unlist(values) 25 | 26 | 27 | # ---------- Create Output Object----------- 28 | 29 | object_new <- object 30 | object_new$call$condition <- values 31 | 32 | 33 | # ---------- Input Checks ----------- 34 | 35 | # Check whether variables are specified via column name 36 | if(any(is.na(cond_names))) stop("Variables to condition on have to be specified by column number (not, for example column name). See also ?condition.") 37 | 38 | if(object$call$k>3) stop("This function is only implemented for 'first-order' moderation (i.e., 3-way interactions).") 39 | if(! ("core" %in% class(object)) ) stop("condition() is currently only implemented for mgm() objects.") 40 | 41 | # Categorical variables: only condition on categories that exist 42 | for(cat in 1:nCond) { 43 | if(type[m_fixed_values[cat, 1]] == "c") { 44 | if(!(m_fixed_values[cat, 2] %in% object$call$unique_cats[[m_fixed_values[cat, 1]]])) stop("Fixed category does not exist in the data.") 45 | } 46 | } 47 | 48 | # TODO: Continuous variables: give warning if one conditions outside 99% quantiles 49 | 50 | 51 | 52 | # ---------- Loop over response variables ----------- 53 | 54 | 55 | for(i in 1:p) { 56 | 57 | # ----- Case I) Gaussian response ----- 58 | 59 | if(type[i] == "g") { 60 | 61 | # Access node model 62 | model_i <- object$nodemodels[[i]]$model 63 | 64 | 65 | # Apply tau-thresholding & AND rule 66 | model_i <- applyTauAND(i = i, 67 | object = object, 68 | model_i = model_i) 69 | 70 | 71 | # Condition / fix values 72 | model_i_new <- condition_core(i = i, 73 | model_i = model_i, 74 | m_fixed_values = m_fixed_values) 75 | 76 | # Overwrite model object 77 | object_new$nodemodels[[i]]$model <- model_i_new 78 | 79 | 80 | } # end if: response gaussian? 81 | 82 | 83 | 84 | # ----- Case III: Poisson responses ----- 85 | # (actually exactly the same handling as "g" above) 86 | 87 | if(type[i] == "p") { 88 | 89 | # Retrieve nodemodel i 90 | model_i <- object$nodemodels[[i]]$model 91 | n_resp <- length(model_i) 92 | 93 | # Apply tau-thresholding & AND rule 94 | model_i <- applyTauAND(i = i, 95 | object = object, 96 | model_i = model_i) 97 | 98 | # Condition / fix values 99 | model_i_new <- condition_core(i = i, 100 | model_i = model_i, 101 | m_fixed_values = m_fixed_values) 102 | 103 | # Overwrite model object 104 | object_new$nodemodels[[i]]$model <- model_i_new 105 | 106 | } # end if: response Poisson? 107 | 108 | 109 | 110 | # ----- Case II: Categorical response ----- 111 | 112 | if(type[i] == "c") { 113 | 114 | # Retrieve nodemodel i 115 | model_i <- object$nodemodels[[i]]$model 116 | n_resp <- length(model_i) 117 | 118 | # Loop over response categories 119 | for(cat in 1:n_resp) { 120 | 121 | model_i_cat <- model_i[[cat]] 122 | 123 | # Apply tau-thresholding & AND rule 124 | model_i_cat <- applyTauAND(i = i, 125 | object = object, 126 | model_i = model_i_cat) 127 | 128 | # Condition / fix values 129 | model_i_new <- condition_core(i = i, 130 | model_i = model_i_cat, 131 | m_fixed_values = m_fixed_values) 132 | 133 | # Overwrite model object 134 | object_new$nodemodels[[i]]$model[[cat]] <- model_i_new 135 | 136 | } # end for: response cats 137 | 138 | } # end if: response categorical? 139 | 140 | } # end for: response variables 141 | 142 | 143 | # ---------- Aggregation across regressions ----------- 144 | 145 | 146 | 147 | object_new2 <- Reg2Graph(object_new, thresholding=FALSE) 148 | 149 | 150 | # ---------- Prepare output & return ----------- 151 | 152 | return(object_new2) 153 | 154 | 155 | } # eoF 156 | 157 | 158 | -------------------------------------------------------------------------------- /R/condition_core.R: -------------------------------------------------------------------------------- 1 | # jonashaslbeck@gmail.com; August 2020 2 | 3 | # Input: 4 | # - the nodemodel of node i 5 | # - matrix of fixed values 6 | # Output: 7 | # - conditioned nodemodel object 8 | 9 | 10 | condition_core <- function(i = i, 11 | model_i, 12 | m_fixed_values) { 13 | 14 | 15 | # ----- Distinguish between two cases: variable i is involved in 3-way interaction or not -- 16 | 17 | names_i <- rownames(model_i) 18 | n_terms <- nrow(model_i) 19 | nCond <- nrow(m_fixed_values) 20 | 21 | # --- Get a nicer object for main effects / interactions --- 22 | 23 | # This lists all non-zero terms and matches them with the fixed values 24 | effects <- matrix(NA, nrow = n_terms-1, ncol=7) 25 | colnames(effects) <- c("Variable1", "Variable2", "Fixed1", "Fixed2", "Parameter", "Type1", "Type2") 26 | 27 | names_aux1 <- strsplit(names_i[-1], ":") 28 | names_aux2 <- lapply(names_aux1, function(x) gsub("V", "", x)) 29 | 30 | names_aux3 <- lapply(names_aux2, function(x) { 31 | x_out <- rep(NA, length(x)) 32 | for(v in 1:length(x)) if(substr(x[v], start = nchar(x[v]), nchar(x[v])) == ".") x_out[v] <- gsub("\\.", "", x[v]) else x_out[v] <- x[v] 33 | return(x_out) 34 | }) 35 | 36 | n_var_i <- unlist(lapply(names_aux3, length)) 37 | for(q in 1:length(n_var_i)) effects[q, 1:n_var_i[q]] <- as.numeric(unlist(names_aux3[[q]])) 38 | 39 | 40 | # --- Fill in fixed values --- 41 | 42 | # type of predictor (cat (coded=0) vs con (coded=1))? 43 | for(q in 1:(n_terms-1)) effects[q, 6] <- ifelse(effects[q, 1] == round(effects[q, 1]), 1, 0) # 1 = continuous, 0 = categorical 44 | for(q in which(n_var_i==2)) effects[q, 7] <- ifelse(effects[q, 2] == round(effects[q, 2]), 1, 0) # 1 = continuous, 0 = categorical 45 | 46 | # Fill in continuous fixed values 47 | for(q in 1:(n_terms-1)) for(f in 1:nCond) if(effects[q, 1] == m_fixed_values[f, 1]) effects[q, 3] <- m_fixed_values[f, 2] 48 | for(q in which(n_var_i==2)) for(f in 1:nCond) if(effects[q, 2] == m_fixed_values[f, 1]) effects[q, 4] <- m_fixed_values[f, 2] 49 | 50 | # Fill in categorical fixed values 51 | for(q in 1:(n_terms-1)) { 52 | if(effects[q, 6] == 0) { 53 | var_cat <- strsplit(as.character(effects[q, 1]), "\\.")[[1]] 54 | for(f in 1:nCond) if(as.numeric(var_cat[1]) == m_fixed_values[f, 1]) if(as.numeric(var_cat[2]) == m_fixed_values[f, 2]) effects[q, 3] <- 1 else effects[q, 3] <- 0 55 | } 56 | } 57 | 58 | for(q in which(n_var_i==2)) { 59 | if(effects[q, 7] == 0) { 60 | var_cat <- strsplit(as.character(effects[q, 2]), "\\.")[[1]] 61 | for(f in 1:nCond) if(as.numeric(var_cat[1]) == m_fixed_values[f, 1]) if(as.numeric(var_cat[2]) == m_fixed_values[f, 2]) effects[q, 4] <- 1 else effects[q, 4] <- 0 62 | } 63 | } 64 | 65 | 66 | # Fill in parameter values 67 | effects[, 5] <- model_i[-1, 1] 68 | 69 | 70 | 71 | # --- Fill (new) conditioned model i --- 72 | 73 | l_cPars <- vector("list", length = n_terms) 74 | l_cPars <- lapply(l_cPars, function(x) list() ) # list structure, since we don't "know" in advance how many terms we'll have 75 | 76 | # Only calculate new parameters for regressions on variables that are not fixed 77 | if(!i %in% m_fixed_values[, 1]) { 78 | 79 | # Copy intercept 80 | l_cPars[[1]][[1]] <- model_i[1, 1] 81 | 82 | for(q in 1:(n_terms-1)) { 83 | 84 | # main effects 85 | if(n_var_i[q] == 1) { 86 | 87 | if(is.na(effects[q, 3])) { 88 | # I) no fixed value: just copy main effect 89 | l_cPars[[q+1]][[length(l_cPars[[q+1]])+1]] <- effects[q, 5] 90 | } else { 91 | # II) fixed value: multiply times fixed valye & copy to intercept 92 | l_cPars[[1]][[length(l_cPars[[1]])+1]] <- effects[q, 5] * effects[q, 3] 93 | } 94 | 95 | 96 | } # end if: main effects 97 | 98 | # interaction effects 99 | if(n_var_i[q] == 2) { 100 | 101 | ind_spec <- sum(c(is.na(effects[q, 3]), is.na(effects[q, 4]))) 102 | 103 | # I) nothing: just copy interaction effect 104 | if(ind_spec == 2) 105 | l_cPars[[q+1]][[length(l_cPars[[q+1]])+1]] <- effects[q, 5] 106 | 107 | # II) one of the two: add to respective main effect 108 | if(ind_spec==1) { 109 | ind_specified <- !is.na(c(effects[q, 3], effects[q, 4])) 110 | 111 | ind_leftover_mainE <- which(effects[n_var_i==1, 1]==effects[q, 1:2][!ind_specified]) # indicates the row of the main effect to which we add the present moderation effect 112 | 113 | l_cPars[[ind_leftover_mainE+1]][[length(l_cPars[[ind_leftover_mainE+1]])+1]] <- effects[q, 5] * effects[q, 3:4][ind_specified] 114 | } 115 | 116 | # III) both: add to intercept 117 | if(ind_spec == 0) l_cPars[[1]][[length(l_cPars[[1]])+1]] <- effects[q, 5] * effects[q, 3] * effects[q, 4] 118 | 119 | } # end if: interaction effects 120 | 121 | } # end for: loop parameters 122 | 123 | } # end if: variable still random (not fixed)? 124 | 125 | 126 | # --- Collapse lists into new model object --- 127 | 128 | model_i_new <- matrix(NA, nrow=n_terms, ncol=1) 129 | 130 | rownames(model_i_new) <- names_i 131 | for(q in 1:n_terms) model_i_new[q, 1] <- sum(unlist(l_cPars[[q]])) 132 | 133 | return(model_i_new) 134 | 135 | } # eoF 136 | 137 | 138 | 139 | 140 | -------------------------------------------------------------------------------- /R/defunct_msg.R: -------------------------------------------------------------------------------- 1 | 2 | mgmfit <- function(...) { 3 | .Defunct('mgm') 4 | } 5 | 6 | var.mgm <- function(...) { 7 | .Defunct('mvar') 8 | } 9 | 10 | tv.mgmfit <- function(...) { 11 | .Defunct('tvmgm') 12 | } 13 | 14 | tv_var.mgm <- function(...) { 15 | .Defunct('tvmvar') 16 | } 17 | 18 | -------------------------------------------------------------------------------- /R/getSign.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | getSign <- function(l_w_ind, 4 | l_w_par, 5 | type, 6 | set_signdefined, 7 | overparameterize, 8 | ord) 9 | 10 | 11 | 12 | { 13 | 14 | 15 | # ---------- Compute Aux Variables ---------- 16 | 17 | pair <- l_w_ind[[1]] 18 | 19 | outlist <- list("voteSign" = NULL, 20 | "Signs" = NULL) 21 | 22 | # ---------- A) For overparameterize = TRUE ---------- 23 | 24 | if(overparameterize) { 25 | 26 | # We have to take care of 3 cases: con-con, cat-cat, con-cat 27 | 28 | # I) ----- continuous-continuous ----- 29 | 30 | if(all(type[pair]!='c')) { 31 | v_sign <- rep(NA, ord+1) 32 | for(u in 1:(ord+1)) v_sign[u] <- sign(l_w_par[[u]]) 33 | int_sign <- sign(mean(v_sign)) # Majority vote; if equal, we get 0 = undefined 34 | 35 | outlist$voteSign <- int_sign 36 | outlist$Signs <- v_sign 37 | 38 | } # end if: I) 39 | 40 | 41 | # II) ----- binary-binary ----- 42 | 43 | if(all(type[pair]=='c')) { 44 | 45 | # Loop through terms (there are "ord" terms) 46 | v_sign_ord_j <- rep(NA, ord+1) 47 | for(ord_j in 1:(ord+1)) { 48 | 49 | sign_j <- 0 # default in case regression1 led to a zero estimate 50 | if(l_w_par[[ord_j]][[1]][1] != 0) sign_j <- sign(l_w_par[[ord_j]][[1]][1]) 51 | if(l_w_par[[ord_j]][[1]][2] != 0) sign_j <- - sign(l_w_par[[ord_j]][[1]][2]) 52 | 53 | v_sign_ord_j[ord_j] <- sign_j 54 | 55 | } # end loop: ord_j 56 | 57 | # Majority vote 58 | int_sign <- sign(mean(v_sign_ord_j)) 59 | 60 | outlist$Signs <- NA 61 | outlist$voteSign <- int_sign 62 | 63 | # if(ord == 1) { 64 | # ## only need to check parameters for one category, because of symmetry 65 | # # regression1 66 | # sign1 <- 0 # default in case regression1 led to a zero estimate 67 | # if(l_w_par[[1]][[1]][1] != 0) sign1 <- sign(l_w_par[[1]][[1]][1]) 68 | # if(l_w_par[[1]][[1]][2] != 0) sign1 <- - sign(l_w_par[[1]][[1]][2]) 69 | # # regression2 70 | # sign2 <- 0 # default in case regression1 led to a zero estimate 71 | # if(l_w_par[[2]][[1]][1] != 0) sign2 <- sign(l_w_par[[2]][[1]][1]) 72 | # if(l_w_par[[2]][[1]][2] != 0) sign2 <- - sign(l_w_par[[2]][[1]][2]) 73 | # int_sign <- sign(mean(c(sign1, sign2))) # Majority vote 74 | # 75 | # outlist$voteSign <- int_sign 76 | # outlist$Signs <- c(sign1, sign2) 77 | # 78 | # } else { 79 | # int_sign <- 0 # no sign defined for interactions of order k>2 80 | # 81 | # outlist$Signs <- NA 82 | # outlist$voteSign <- int_sign 83 | # 84 | # } 85 | 86 | 87 | } # end if: II) 88 | 89 | 90 | # III) ----- continuous-binary ----- 91 | 92 | if(any(type[pair] %in% 'c') & any(type[pair] %in% c('p', 'g')) ) { 93 | 94 | 95 | # Loop through terms (there are "ord" terms) 96 | v_sign_ord_j <- rep(NA, ord+1) 97 | for(ord_j in 1:(ord+1)) { 98 | 99 | if(!any(class(l_w_par[[ord_j]])=="list")) { #is.null -> continuous, else binary 100 | sign_j <- sign(as.numeric(l_w_par[[ord_j]][[2]])) 101 | } else { 102 | sign_j <- 0 # default in case regression1 led to a zero estimate 103 | if(l_w_par[[ord_j]][[1]][1] != 0) sign2 <- - sign(l_w_par[[ord_j]][[1]][1]) # positive value for state 1 means negative 'pairwise relationship' 104 | if(l_w_par[[ord_j]][[2]][1] != 0) sign2 <- sign(l_w_par[[ord_j]][[2]][1]) 105 | } 106 | 107 | v_sign_ord_j[ord_j] <- sign_j 108 | 109 | } # end loop: ord_j 110 | 111 | # Majority vote 112 | int_sign <- sign(mean(v_sign_ord_j)) 113 | 114 | outlist$Signs <- NA 115 | outlist$voteSign <- int_sign 116 | 117 | # if(ord == 1) { 118 | # sign1 <- sign2 <- 0 # set default in case one direction has zero estimates 119 | # # need to know which list entry in l_w_par corresponds to which regression: cont <- binary or cont -> binary; I do that by the fixed dimensionality of the parameter vector/matrix 120 | # if(is.null(dim(l_w_par[[1]]))) { #is.null -> continuous, else binary 121 | # sign1 <- sign(as.numeric(l_w_par[[1]][[2]])) 122 | # } else { 123 | # if(l_w_par[[1]][1, 1] != 0) sign1 <- - sign(l_w_par[[1]][1,1]) # positive value for state 1 means negative 'pairwise relationship' 124 | # if(l_w_par[[1]][2, 1] != 0) sign1 <- sign(l_w_par[[1]][2,1]) 125 | # } 126 | # if(is.null(dim(l_w_par[[2]]))) { 127 | # sign2 <- sign(l_w_par[[2]][2]) 128 | # } else { 129 | # if(l_w_par[[2]][1,1] != 0) sign2 <- - sign(l_w_par[[2]][1,1]) # positive value for state 1 means negative 'pairwise relationship' 130 | # if(l_w_par[[2]][2,1] != 0) sign2 <- sign(l_w_par[[2]][2,1]) 131 | # } 132 | # int_sign <- sign(mean(c(sign1,sign2))) # Majority vote 133 | # 134 | # outlist$voteSign <- int_sign 135 | # outlist$Signs <- c(sign1, sign2) 136 | # 137 | # } else { 138 | # int_sign <- 0 # no sign defined for interactions of order k>2 139 | # 140 | # outlist$Signs <- NA 141 | # outlist$voteSign <- int_sign 142 | # } 143 | 144 | } # end if: III) 145 | 146 | 147 | } # end if: overparameterize? 148 | 149 | 150 | 151 | 152 | # ---------- B) For overparameterize = FALSE ---------- 153 | 154 | if(!overparameterize) { 155 | 156 | 157 | # I) ----- continuous-continuous ----- 158 | 159 | if(all(type[pair]!='c')) { 160 | v_sign <- rep(NA, ord+1) 161 | for(u in 1:(ord+1)) v_sign[u] <- sign(l_w_par[[u]]) 162 | int_sign <- sign(mean(v_sign)) # Majority vote; if equal, we get 0 = undefined 163 | 164 | outlist$voteSign <- int_sign 165 | outlist$Signs <- v_sign 166 | 167 | } # end if: I) 168 | 169 | 170 | # II) ----- binary-binary ----- 171 | 172 | if(all(type[pair]=='c')) { 173 | 174 | # Loop through terms (there are "ord" terms) 175 | v_sign_ord_j <- rep(NA, ord+1) 176 | for(ord_j in 1:(ord+1)) { 177 | 178 | sign_j <- sign(as.numeric(l_w_par[[ord_j]][[2]])) # if interaction A is binary <- cont 179 | v_sign_ord_j[ord_j] <- sign_j 180 | 181 | } # end loop: ord_j 182 | 183 | # Majority vote 184 | int_sign <- sign(mean(v_sign_ord_j)) 185 | 186 | outlist$Signs <- NA 187 | outlist$voteSign <- int_sign 188 | 189 | # if(ord == 1) { 190 | # sign1 <- sign(as.numeric(l_w_par[[1]][[2]])) 191 | # sign2 <- sign(as.numeric(l_w_par[[2]][[2]])) 192 | # int_sign <- sign(mean(c(sign1, sign2))) 193 | # 194 | # outlist$voteSign <- int_sign 195 | # outlist$Signs <- c(sign1, sign2) 196 | # 197 | # } else { 198 | # int_sign <- 0 # no sign defined for interactions of order k>2 199 | # 200 | # outlist$Signs <- NA 201 | # outlist$voteSign <- int_sign 202 | # } 203 | 204 | } # end if: II) 205 | 206 | 207 | # III) ----- continuous-binary ----- 208 | 209 | if(any(type[pair] %in% 'c') & any(type[pair] %in% c('p', 'g')) ) { 210 | 211 | # if(ord == 1) { 212 | 213 | # if(length(l_w_par[[1]]) == 1) { 214 | # sign1 <- sign(as.numeric(l_w_par[[1]])) # if interaction A is cont <- binary 215 | # } else { 216 | # sign1 <- sign(as.numeric(l_w_par[[1]][[2]])) # if interaction A is binary <- cont 217 | # } 218 | # 219 | # # same for second interaction 220 | # if(length(l_w_par[[2]]) == 1) { 221 | # sign2 <- sign(as.numeric(l_w_par[[2]])) 222 | # } else { 223 | # sign2 <- sign(as.numeric(l_w_par[[2]][[2]])) 224 | # } 225 | # 226 | # int_sign <- sign(mean(c(sign1, sign2))) 227 | # 228 | # outlist$voteSign <- int_sign 229 | # outlist$Signs <- c(sign1, sign2) 230 | 231 | # } else { 232 | 233 | # Loop through terms (there are "ord" terms) 234 | v_sign_ord_j <- rep(NA, ord+1) 235 | for(ord_j in 1:(ord+1)) { 236 | 237 | if(length(l_w_par[[ord_j]]) == 1) { 238 | sign_j <- sign(as.numeric(l_w_par[[ord_j]])) # if interaction A is cont <- binary 239 | } else { 240 | sign_j <- sign(as.numeric(l_w_par[[ord_j]][[2]])) # if interaction A is binary <- cont 241 | } 242 | 243 | v_sign_ord_j[ord_j] <- sign_j 244 | 245 | } # end loop: ord_j 246 | 247 | # Majority vote 248 | int_sign <- sign(mean(v_sign_ord_j)) 249 | 250 | outlist$Signs <- NA 251 | outlist$voteSign <- int_sign 252 | 253 | # } 254 | 255 | } # end if: III) 256 | 257 | 258 | } # end if: overparameterize? 259 | 260 | # ---------- Return Sign ---------- 261 | 262 | return(outlist) 263 | 264 | } # eoF 265 | -------------------------------------------------------------------------------- /R/glmnetRequirements.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | glmnetRequirements <- function(data, 4 | type, 5 | weights, 6 | bootstrap = FALSE, 7 | b = NULL, 8 | seed_b = NULL, 9 | silent = FALSE) { 10 | 11 | # if silent = FALSE; the function returns error messages 12 | # if silent = TRUE: the function returns a logical with TRUE=at least one error; 13 | # this is used in resampling schemes to discard bootstrap samples that do not allow to fit the model 14 | 15 | var_names <- colnames(data) 16 | n <- nrow(data) 17 | 18 | # 1) Nonzero variance 19 | var_check <- apply(data, 2, var) 20 | ind_zero_var <- which(var_check == 0) 21 | 22 | check_var_1 <- length(ind_zero_var) > 0 23 | 24 | if(!silent) if(check_var_1) { 25 | if(bootstrap) cat(paste0("In boostrap sample ", b, " with seed ", seed_b, " the following error occured:\n")) 26 | stop(paste0('Please only provide variables with nonzero variance. Variable(s) with zero variance: ', paste(var_names[ind_zero_var], collapse = ', '))) 27 | } 28 | 29 | 30 | # 2) > 1 events per category 31 | 32 | check_var_2 <- FALSE # for the case of: (1) no categorical variables and (2) silent = TRUE (when called in resample() ) 33 | 34 | if('c' %in% type) { 35 | ind_cat <- which(type == 'c') 36 | l_frqu <- list() 37 | for(i in 1:length(ind_cat)) l_frqu[[i]] <- table(data[,ind_cat[i]]) # this does not catch the case where one category is not present at all; but this is catched by comparing specified levels and real levels 38 | v_check <- unlist(lapply(l_frqu, function(x) { 39 | frq_norm <- x / sum(x) 40 | ind_min <- which.min(frq_norm) 41 | check1 <- !(x[ind_min] > 1) 42 | })) 43 | # Error Msg Check 1: 44 | 45 | check_var_2 <- sum(v_check) > 0 46 | 47 | if(!silent) if(check_var_2) { 48 | ind_check1 <- ind_cat[v_check == TRUE] 49 | stop(paste0('At least 2 events required for each category. Requirement not met for variable(s): ',paste(var_names[ind_cat[ind_check1]], collapse = ', '))) 50 | } 51 | } 52 | 53 | 54 | # 3) For each category: p(K=l) > 10^-5 55 | 56 | check_var_3 <- FALSE # for the case of: (1) no categorical variables and (2) silent = TRUE (when called in resample() ) 57 | 58 | if('c' %in% type) { 59 | 60 | # Function to compute weighted table: 61 | wtable <- function(x, weights) { 62 | n_level <- length(unique(x)) 63 | v_level <- unique(x) 64 | n_obs <- length(x) 65 | v_wfrq <- rep(NA, n_level) 66 | for(i in 1:n_level) v_wfrq[i] <- sum(rep(1, sum(x == v_level[i])) * weights[x == v_level[i]]) / n 67 | return(v_wfrq) 68 | } 69 | 70 | 71 | ind_cat <- which(type == 'c') 72 | check2 <- rep(NA, length(ind_cat)) 73 | for(i in 1:length(ind_cat)) check2[i] <- min(wtable(data[,ind_cat[i]], weights)) < 10^-5 # smaller = error 74 | 75 | check_var_3 <- sum(check2)>0 76 | 77 | if(!silent) if(check_var_3) stop(paste0('Each category has to have probability > 10^-5. Requirement not met for variable(s): ',paste(var_names[ind_cat[check2]], collapse = ', '))) 78 | 79 | } 80 | 81 | 82 | # If any of the three checks fails, return TRUE 83 | if(silent) return(any(check_var_1, check_var_2, check_var_3)) 84 | 85 | 86 | } # eoF 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /R/lagData.R: -------------------------------------------------------------------------------- 1 | 2 | lagData <- function(data, 3 | lags, 4 | consec = NULL) { 5 | 6 | 7 | # ---------- Compute Aux Variables ---------- 8 | 9 | data <- as.matrix(data) # turn into matrix 10 | 11 | max_lag <- max(lags) # maximum lag 12 | lags_ext <- 1:max(lags) # makes it easier to delete right columns 13 | 14 | n <- nrow(data) 15 | p <- ncol(data) 16 | n_var <- nrow(data) - max(lags) 17 | n_lags <- length(lags_ext) 18 | 19 | data_response <- data #[-c(1:max_lag), ] 20 | 21 | if(!is.null(consec)) m_consec <- matrix(NA, 22 | nrow = n, 23 | ncol = n_lags) 24 | 25 | # ---------- Lag Variables ---------- 26 | 27 | # Storage 28 | l_data_lags <- list() 29 | 30 | # Loop through lags 31 | lag_pos <- 1 # to make sure that the list is filled successively, if not a full sequence (e.g. lags = c(1,5)); otherwise this leads to problems later in the code 32 | for(lag in lags) { 33 | 34 | lagged_data <- matrix(NA, nrow = n, ncol=p) 35 | lagged_data[(lag+1):n, ] <- data[-((n-lag+1) : n), ] 36 | lagged_data <- matrix(lagged_data, 37 | ncol = p, 38 | nrow = n) 39 | colnames(lagged_data) <- paste("V", 1:p, '.lag', lag, '.', sep = "") 40 | 41 | l_data_lags[[lag_pos]] <- lagged_data 42 | 43 | lag_pos <- lag_pos + 1 44 | } 45 | 46 | # ---------- Knock Out if not consecutive ---------- 47 | 48 | 49 | # browser() 50 | 51 | if(!is.null(consec)) { 52 | 53 | for(lag in lags_ext) m_consec[(lag+1):n, lag] <- consec[-((n-lag+1) : n)] 54 | 55 | # Calculate which cases to knock out 56 | m_consec_check <- cbind(consec, m_consec) 57 | 58 | v_check <- apply(m_consec_check, 1, function(x) { 59 | 60 | if(any(is.na(x))) { 61 | FALSE 62 | } else { 63 | check_row <- x[1] - x[-1] == 1:length(x[-1]) # check for extended lags 1:max(lags) 64 | check_row_relevant <- check_row[lags_ext %in% lags] # but then compute check only over the lags that are actually specified 65 | if(any(check_row_relevant == FALSE)) FALSE else TRUE # and return test result: any required previous measurement missing? => FALSE 66 | } 67 | 68 | }) 69 | 70 | } else { 71 | 72 | v_check <- rep(TRUE, n) 73 | v_check[1:n_lags] <- FALSE 74 | 75 | } 76 | 77 | 78 | # ---------- Output ---------- 79 | 80 | outlist <- list() 81 | outlist$data_response <- data_response 82 | outlist$l_data_lags <- l_data_lags 83 | outlist$included <- v_check 84 | 85 | 86 | return(outlist) 87 | 88 | } 89 | -------------------------------------------------------------------------------- /R/mvarsampler.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | mvarsampler <- function(coefarray, # v x v2 x cat(v) x cat(v2) x lag array specifying lagged parameters 4 | lags, # vector specifiying the lags 5 | thresholds, # p list containint vector with a threshold for each category 6 | sds, # p vector of sds 7 | type, 8 | level, 9 | N, # number of cases that should be sampled from the model 10 | pbar) 11 | 12 | { 13 | 14 | # ---------- Input Checks ---------- 15 | 16 | # Does coefarray have the right dimensions? 17 | if(length(dim(coefarray))!=5) stop('coefarray has to have 5 dimensions: p x p x max(level) x max(level) x n_lags.') 18 | if(dim(coefarray)[5] != length(lags)) stop('Dimensions for lags in coefarray have to match the number of lags specified in lags.') 19 | if(dim(coefarray)[1] != length(thresholds)) stop('Dimensions for variables in coefarray have to match the number of specified thresholds.') 20 | if(any(type=='g')) if(dim(coefarray)[1] != length(sds)) stop('Dimensions for variables in coefarray have to match the number of specified standard deviations.') 21 | if(dim(coefarray)[1] != length(type)) stop('Dimensions for variables in coefarray have to match the number of specified types.') 22 | if(dim(coefarray)[1] != length(level)) stop('Dimensions for variables in coefarray have to match the number of specified levels.') 23 | if(dim(coefarray)[1] != dim(coefarray)[2]) stop('The first two dimensions specifying the cross-lagged effects in coefarray have to have the same dimensionality.') 24 | 25 | 26 | # ---------- Compute Auxilliary Variables ---------- 27 | 28 | p <- dim(coefarray)[1] 29 | n_lags <- length(lags) 30 | labels <- paste0('V', 1:p, '.') 31 | max_lag <- max(lags) 32 | 33 | 34 | # ---------- Create Output Object & copy the call ---------- 35 | 36 | # Create Output Object 37 | mvarsamp_obj <- list('call' = NULL, 38 | 'data' = NULL) 39 | 40 | # Copy the call 41 | mvarsamp_obj$call <- list('coefarray' = coefarray, 42 | 'lags' = lags, 43 | 'thresholds' = thresholds, 44 | 'sds' = sds, 45 | 'type' = type, 46 | 'level' = level, 47 | 'N' = N, 48 | 'pbar' = pbar) 49 | 50 | 51 | # ---------- Sample Data ---------- 52 | 53 | data <- matrix(NA, ncol = p, nrow = N + max_lag) 54 | 55 | # Create random starting values 56 | for(r in 1:max_lag) { 57 | for(v in 1:p) { 58 | if(type[v] == 'c') data[r, v] <- sample(1:level[v], size = 1) 59 | if(type[v] == 'g') data[r, v] <- rnorm(1) 60 | if(type[v] == 'p') data[r, v] <- rpois(1, 1) 61 | } 62 | } 63 | 64 | 65 | if(pbar==TRUE) pb <- txtProgressBar(min = 0, max=N, initial=0, char = "-", style = 3) 66 | 67 | 68 | for(r in (max_lag+1):(N+max_lag)) { 69 | 70 | for(v in 1:p) { 71 | 72 | if(type[v] != 'c') { 73 | 74 | # Get design matrix (always use overparamterized version) 75 | design_mat <- ModelMatrix(data, 76 | type, 77 | level, 78 | labels, 79 | d = 1, 80 | allCats = TRUE) 81 | 82 | potential_lag <- list() 83 | for(lag in 1:n_lags) { 84 | 85 | # Get parameters in order 86 | l_parms <- list() 87 | for(par in (1:p)[-v]) l_parms[[par]] <- coefarray[v, par, 1:level[v], 1:level[par], lag] 88 | v_parms <- unlist(l_parms) 89 | 90 | # browser() 91 | 92 | # Compute part of potential for given lag 93 | potential_lag[[lag]] <- design_mat[r - lags[lag], ] * v_parms 94 | 95 | } # end for: lag 96 | 97 | mu <- thresholds[[v]] + sum(unlist(potential_lag)) 98 | 99 | # if(v==3) browser() 100 | 101 | if(type[v] == 'g') data[r, v] <- rnorm(1, mean = mu, sd = sds[v]) 102 | if(type[v] == 'p') data[r, v] <- rpois(1, lambda = mu) 103 | 104 | } # end if: !='c' 105 | 106 | 107 | 108 | if(type[v] == 'c') { 109 | 110 | # Get design matrix 111 | design_mat <- ModelMatrix(data, 112 | type, 113 | level, 114 | labels, 115 | d = 1, 116 | v = v, 117 | allCats = TRUE) 118 | 119 | # Loop over categories 120 | l_potentials <- list() 121 | for(cat in 1:level[v]) { 122 | 123 | potential_lag <- list() 124 | for(lag in 1:n_lags) { 125 | 126 | # Get parameters in order 127 | l_parms <- list() 128 | for(par in (1:p)[-v]) l_parms[[par]] <- coefarray[v, par, (1:level[v])[cat], 1:level[par], lag] 129 | v_parms <- unlist(l_parms) 130 | 131 | # Compute part of potential for given lag 132 | potential_lag[[lag]] <- design_mat[r - lags[lag], ] * v_parms 133 | 134 | } # end for: lag 135 | 136 | # put potential-parts from different lags together 137 | l_potentials[[cat]] <- thresholds[[v]][cat] + sum(unlist(potential_lag)) 138 | 139 | } # end for: cat 140 | 141 | 142 | # compute probabilities 143 | v_potentials <- exp(unlist(l_potentials)) 144 | probabilities <- v_potentials / sum(v_potentials) 145 | 146 | data[r, v] <- sample(1:level[v], prob = probabilities, size = 1) 147 | 148 | } # end if: =='c'? 149 | 150 | 151 | } # end for: v variables 152 | 153 | if(pbar==TRUE) setTxtProgressBar(pb, r) 154 | 155 | } # end for: r rows 156 | 157 | 158 | 159 | # ---------- Output ---------- 160 | 161 | # browser() 162 | 163 | mvarsamp_obj$data <- data[(max_lag+1):(N+max_lag), ] 164 | 165 | return(mvarsamp_obj) 166 | 167 | } # eoF 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /R/nodeEst.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | nodeEst <- function(y, 4 | X, 5 | fam, 6 | lambdaSeq, 7 | lambdaSel, 8 | lambdaFolds, 9 | lambdaGam, 10 | alpha, 11 | weights, 12 | n, 13 | nadj, 14 | v, 15 | type, 16 | level, 17 | emp_lev, 18 | overparameterize, 19 | thresholdCat) 20 | 21 | 22 | { 23 | 24 | # ---------- Calc Aux Variables ---------- 25 | 26 | # Define Exponential Family of Node at Hand 27 | if(type[v] == 'c') fam = 'multinomial' 28 | if(type[v] == 'g') fam = 'gaussian' 29 | if(type[v] == 'p') fam = 'poisson' 30 | 31 | 32 | # Set threshold (intercept) parameter to zero? 33 | if(type[v] == 'c') { 34 | intercept <- thresholdCat 35 | } else { 36 | intercept <- TRUE # for continuous variables always estimated 37 | } 38 | 39 | 40 | # ---------- Lambda selection via EBIC ---------- 41 | 42 | if(lambdaSel == 'EBIC') { 43 | 44 | 45 | # if(v==3) browser()FA 46 | 47 | # ----- Fit Model ----- 48 | 49 | fit <- glmnet(x = X, 50 | y = y, 51 | family = fam, 52 | alpha = alpha, 53 | weights = weights, 54 | lambda = lambdaSeq, 55 | intercept = intercept) 56 | 57 | 58 | 59 | n_lambdas <- length(fit$lambda) # length of fitted lambda sequence 60 | 61 | # ----- Calc EBIC of model: Fast Alternative by using ----- 62 | 63 | # Calculate LL of Null Model 64 | LL_null <- calcLL(X = X, 65 | y = y, 66 | fit = fit, 67 | type = type, 68 | level = level, 69 | v = v, 70 | weights = weights, 71 | lambda = fit$lambda[1], # any is fine, lambda has no influence on null (intercept) model 72 | LLtype = 'nullmodel') 73 | 74 | LL_sat <- 1/2 * fit$nulldev + LL_null # calculate LL of saturated model 75 | deviance <- (1 - fit$dev.ratio) * fit$nulldev # note: dangerous in glmnet: fit$dev = fit$dev.ratio 76 | LL_lambda_models <- - 1/2 * deviance + LL_sat # length = length of lambda sequence 77 | 78 | 79 | n_neighbors <- rep(NA, n_lambdas) 80 | for(i in 1:n_lambdas) n_neighbors[i] <- calcNeighbors(fit = fit, 81 | lambda = fit$lambda[i], 82 | type = type, 83 | level = level, 84 | v = v) 85 | # Note: n_neighbors = fit$df 86 | EBIC_lambda <- - 2 * LL_lambda_models + n_neighbors * log(nadj) + 2 * lambdaGam * n_neighbors * log(ncol(X)) 87 | 88 | EBIC_min <- min(EBIC_lambda) 89 | 90 | ind_lambda_min <- which.min(EBIC_lambda) 91 | lambda_min <- fit$lambda[ind_lambda_min] 92 | lambad_min_model <- coef(fit, s = lambda_min) 93 | 94 | # ----- Output ----- 95 | 96 | outlist <- list('EBIC' = EBIC_min, 97 | 'deviance' = deviance[which.min(EBIC_lambda)], 98 | 'lambda' = lambda_min, 99 | 'alpha' = alpha, 100 | 'model' = lambad_min_model, 101 | 'fitobj' = fit) 102 | 103 | } # end if: EBIC 104 | 105 | 106 | # ---------- Lambda selection via CV ---------- 107 | 108 | if(lambdaSel == 'CV') { 109 | 110 | # ----- Fit Model ----- 111 | 112 | fit <- cv.glmnet(x = X, 113 | y = y, 114 | family = fam, 115 | alpha = alpha, 116 | weights = weights, 117 | nfolds = lambdaFolds, 118 | type.measure = "deviance", 119 | lambda = lambdaSeq, 120 | intercept = intercept) 121 | 122 | lambda_min <- fit$lambda.min 123 | lambad_min_model <- coef(fit, s = lambda_min) 124 | 125 | # ----- Calc Deviance of model ----- 126 | # (used in alpha selection via EBIC) 127 | 128 | LL_model <- calcLL(X = X, 129 | y = y, 130 | fit = fit, 131 | type = type, 132 | level = level, 133 | v = v, 134 | weights = weights, 135 | lambda = lambda_min, 136 | LLtype = 'model') 137 | 138 | n_neighbors <- calcNeighbors(fit = fit, 139 | lambda = lambda_min, 140 | type = type, 141 | level = level, 142 | v = v) 143 | 144 | EBIC <- - 2 * LL_model + n_neighbors * log(nadj) + 2 * lambdaGam * log(ncol(X)) 145 | 146 | 147 | # ----- Output ----- 148 | 149 | outlist <- list('EBIC' = EBIC, 150 | 'deviance' = NULL, 151 | 'lambda' = lambda_min, 152 | 'alpha' = alpha, 153 | 'model' = lambad_min_model, 154 | 'fitobj' = fit) 155 | 156 | } # end if: CV 157 | 158 | 159 | # ----- Return ----- 160 | 161 | return(outlist) 162 | 163 | 164 | 165 | } # eoF 166 | -------------------------------------------------------------------------------- /R/predictCore_stat.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | predictCore_stat <- function(object, 4 | data, 5 | consec = NULL) 6 | 7 | 8 | { 9 | 10 | 11 | # ----- Compute Aux Variables ----- 12 | 13 | 14 | cobj <- class(object)[2] 15 | nNodes <- ncol(data) 16 | nCases <- nrow(data) 17 | call <- object$call 18 | type <- call$type 19 | level <- call$level 20 | k <- call$k 21 | n_lags <- length(call$lags) 22 | if(cobj == "mvar") max_lags <- max(call$lags) 23 | 24 | 25 | # Create outlist and storage 26 | predCoreObj <- list('pred' = vector('list', length = nNodes), 27 | 'prob' = vector('list', length = nNodes), 28 | 'true' = NULL, 29 | 'included' = NULL) 30 | 31 | 32 | # Some Generic Data preparation 33 | data <- data.frame(data) 34 | colnames(data)[1:nNodes] <- paste("V", 1:nNodes, '.', sep = "") 35 | for(sc in which(type=='g')) data[,sc] <- scale(data[,sc]) # scale gaussians 36 | data_df <- data 37 | for(sc2 in which(type=='c')) data_df[,sc2] <- as.factor(data_df[,sc2]) # categoricals as factors 38 | 39 | nodeModels <- object$nodemodels 40 | d <- object$call$k - 1 41 | 42 | # ----- A.1) mgm ----- 43 | 44 | if(cobj == 'core') { 45 | 46 | for(v in 1:nNodes) { 47 | 48 | # -- Define model matrix -- 49 | 50 | # ----- Construct Design Matrix ----- 51 | 52 | X_standard <- X <- ModelMatrix_standard(data = data, 53 | type = type, 54 | d = d, 55 | v = v, 56 | moderators = object$call$moderators) 57 | 58 | if(object$call$overparameterize) { 59 | 60 | X_over <- ModelMatrix(data = data, # fix that input, that's stupid 61 | type = type, 62 | level = level, 63 | labels = colnames(data), 64 | d = d, 65 | moderators = object$call$moderators, 66 | v = v) 67 | 68 | X <- X_over 69 | 70 | } # end if: overparameterize? 71 | 72 | data_df <- as.data.frame(data_df) 73 | y <- as.numeric(data[, v]) 74 | 75 | 76 | if(type[v]=='c') { 77 | 78 | ## Prediction Categorical 79 | coefs <- nodeModels[[v]]$model 80 | n_cat <- length(coefs) 81 | Potentials <- matrix(NA, nCases, n_cat) 82 | 83 | # loop over categories & compute potentials 84 | for(cat in 1:n_cat) Potentials[,cat] <- exp(coefs[[cat]][1] + X %*% matrix(coefs[[cat]][-1], nrow=length(coefs[[cat]][-1]))) 85 | 86 | # compute category-probabilities 87 | Probabilities <- Potentials[,1:n_cat] / rowSums(Potentials[,1:n_cat]) 88 | pred_class_id <- apply(Probabilities, 1, which.max) # classify 89 | predCoreObj$pred[[v]] <- sort(unique(data[,v]))[pred_class_id] # matches the ordering of predicted categories in glmnet(), which also orders with increasing integers 90 | predCoreObj$prob[[v]] <- Probabilities 91 | 92 | 93 | } else { 94 | 95 | ## Prediction Continuous (same for Gauss and Pois, because in both cases the natural parameter is equal to the expectation) 96 | # predicitions 97 | coefs <- as.numeric(nodeModels[[v]]$model) # get coefficients 98 | predCoreObj$pred[[v]] <- coefs[1] + X %*% matrix(coefs[-1], nrow=length(coefs[-1])) # predict 99 | 100 | if(type[v]=="p") predCoreObj$pred[[v]] <- exp(predCoreObj$pred[[v]]) # coefficients of poisson regresison are on log scale 101 | 102 | } 103 | 104 | } # end of variable loop 'mgm' 105 | 106 | # Save ground truth 107 | predCoreObj$true <- data 108 | 109 | } 110 | 111 | 112 | 113 | # ----- A.2) mvar ----- 114 | 115 | if(cobj == 'mvar') { 116 | 117 | # Prepare Data (already cuts max(lags) first observations to compute design matrix) 118 | data_lagged <- lagData(data = data, 119 | lags = object$call$lags, 120 | consec = consec) 121 | 122 | predCoreObj$included <- data_lagged$included # this specifies additionally, whether measurements are successive after the max(lags) measurement 123 | 124 | data_response <- data_lagged$data_response 125 | l_data_lags <- data_lagged$l_data_lags 126 | data_response <- apply(data_response, 2, as.numeric) # to avoid confusion with labels of categories if there are factors 127 | 128 | data_response <- data_response[predCoreObj$included, ] 129 | l_data_lags <- lapply(l_data_lags, function(z) z <- z[predCoreObj$included, ]) 130 | 131 | nCases <- nrow(data_response) 132 | 133 | for(v in 1:nNodes) { 134 | 135 | # ----- Create VAR Design Matrix ----- 136 | 137 | # append response with predictors 138 | y <- data_response[, v] # response variable v 139 | data_input_MM <- do.call(cbind, l_data_lags) 140 | data_input_MM <- as.data.frame(data_input_MM) 141 | 142 | # turn categoricals into factors for model.matrix() 143 | for(i in which(type=='c')) data_input_MM[, i] <- as.factor(data_input_MM[, i]) 144 | 145 | # need to have response and predictors in one dataframe for model.matrix() 146 | data_v <- cbind(y, data_input_MM) 147 | 148 | # Dummy coding 149 | form <- as.formula('y ~ (.)') 150 | 151 | # Construct standard design matrix (to get number of parameters for tau threshold) 152 | X_standard <- model.matrix(form, data = data_v)[, -1] # delete intercept (added by glmnet later) 153 | 154 | if(object$call$overparameterize) { 155 | 156 | # Compute augmented type and level vectors 157 | type_aug <- rep(type, n_lags) 158 | level_aug <- rep(level, n_lags) 159 | 160 | # Construct over-parameterized design matrix 161 | X_over <- ModelMatrix(data = data_input_MM, 162 | type = type_aug, 163 | level = level_aug, 164 | labels = colnames(data_input_MM), 165 | d = 1, 166 | v = NULL) 167 | X <- X_over 168 | 169 | } else { 170 | 171 | X <- X_standard 172 | 173 | } 174 | 175 | # ---- Prediction ----- 176 | 177 | if(type[v]=='c') { 178 | 179 | ## Prediction Categorical 180 | coefs <- nodeModels[[v]]$model 181 | n_cat <- length(coefs) 182 | Potentials <- matrix(NA, nCases, n_cat) 183 | 184 | # loop over categories & compute potentials 185 | for(cat in 1:n_cat) Potentials[,cat] <- exp(coefs[[cat]][1] + X %*% matrix(coefs[[cat]][-1], nrow=length(coefs[[cat]][-1]))) 186 | 187 | # compute category-probabilities 188 | Probabilities <- Potentials[, 1:n_cat] / rowSums(Potentials[, 1:n_cat]) 189 | pred_class_id <- apply(Probabilities, 1, which.max) # classify 190 | predCoreObj$pred[[v]] <- sort(unique(data[,v]))[pred_class_id] # matches the ordering of predicted categories in glmnet(), which also orders with increasing integers 191 | predCoreObj$prob[[v]] <- Probabilities 192 | 193 | } else { 194 | 195 | ## Prediction Continuous (same for Gauss and Pois, because in both cases the natural parameter is equal to the expectation) 196 | # predicitions 197 | coefs <- as.numeric(nodeModels[[v]]$model) # get coefficients 198 | predCoreObj$pred[[v]] <- coefs[1] + X %*% matrix(coefs[-1], nrow=length(coefs[-1])) # predict 199 | 200 | if(type[v]=="p") predCoreObj$pred[[v]] <- exp(predCoreObj$pred[[v]]) 201 | 202 | } 203 | 204 | } # end loop: v 205 | 206 | 207 | # Save true (reduced) data 208 | predCoreObj$true <- data_response 209 | 210 | } # end if: mvar? 211 | 212 | 213 | # ----- Return ----- 214 | 215 | return(predCoreObj) 216 | 217 | 218 | } # eoF 219 | 220 | 221 | 222 | 223 | 224 | -------------------------------------------------------------------------------- /R/print.int.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | print.int <- function(x, 5 | ...) 6 | 7 | 8 | 9 | { 10 | 11 | # Sign Defined? 12 | if(x$sign == 0) sign <- "NA" else sign <- x$sign 13 | 14 | # Signtext 15 | if(x$sign == 0) signtext <- " (Not defined)" 16 | if(x$sign == 1) signtext <- " (Positive)" 17 | if(x$sign == -1) signtext <- " (Negative)" 18 | 19 | # Create Console Output 20 | cat('Interaction:', paste(x$variables, collapse = "-"), 21 | '\nWeight: ', x$edgeweight, 22 | '\nSign: ' , sign, signtext) 23 | 24 | } 25 | 26 | -------------------------------------------------------------------------------- /R/print.mgm.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | print.mgm <- function(x, 4 | ...) 5 | 6 | 7 | 8 | { 9 | 10 | model_classes <- c('Mixed Graphical Model (MGM)', 11 | 'mixed Vector Autoregressive (mVAR) model', 12 | 'Time-varying Mixed Graphical Model (tv-MGM)', 13 | 'Time-varying mixed Vector Autoregressive (tv-mVAR) model') 14 | 15 | # ---------- print for fit objects ---------- 16 | 17 | if(!('predicted' %in% class(x)) & !('bwSelect' %in% class(x))) { 18 | 19 | if('core' %in% class(x)) { 20 | 21 | msg_basic <- paste0('mgm fit-object', 22 | '\n\nModel class: ', model_classes[1], 23 | '\nOrder: ' , x$call$k, 24 | '\nNodes: ' , length(x$call$type)) 25 | 26 | 27 | if(!is.null(x$call$moderators)) { 28 | 29 | x$call$k <- 3 30 | 31 | msg_basic <- paste0('mgm fit-object', 32 | '\n\nModel class: ', model_classes[1], 33 | '\nOrder: ' , x$call$k, 34 | '\nNodes: ' , length(x$call$type)) # call again with updated x$call$k 35 | 36 | 37 | if(is.matrix(x$call$moderators)) mod_text <- "Custom specification" else mod_text <- paste(x$call$moderators, collapse = ", ") 38 | 39 | msg_basic <- paste0(msg_basic, 40 | paste0('\nModerators: ' , mod_text)) 41 | 42 | 43 | } 44 | 45 | 46 | if(!is.null(x$call$condition)) { 47 | 48 | nCond <- length(x$call$condition) 49 | names <- names(x$call$condition) 50 | msgCond <- paste0(names, "=", unlist(x$call$condition)) 51 | msg_basic <- paste0(msg_basic, "\nFixed: ", paste(msgCond, collapse = ", ")) 52 | 53 | } 54 | 55 | cat(msg_basic) 56 | 57 | 58 | } # end if: basic mgm object? 59 | 60 | 61 | 62 | 63 | 64 | 65 | if('mvar' %in% class(x)) { 66 | 67 | n_incl <- sum(x$call$data_lagged$included == TRUE) 68 | n_exp <- sum(x$call$data_lagged$included == FALSE) 69 | n <- n_incl + n_exp 70 | perc <- n_incl / n 71 | perc <- round(perc, 4) * 100 72 | 73 | cat('mgm fit-object', 74 | '\n\nModel class: ', model_classes[2], 75 | '\nLags: ' , x$call$lags, 76 | '\nRows included in VAR design matrix: ' , n_incl ,'/', n, '(', perc, '%)', 77 | '\nNodes: ' , length(x$call$type)) 78 | } 79 | 80 | 81 | 82 | if('tvmgm' %in% class(x)) { 83 | 84 | if(!is.null(x$call$moderators)) { 85 | 86 | x$call$k <- 3 87 | 88 | cat('mgm fit-object', 89 | '\n\nModel class: ', model_classes[3], 90 | '\nOrder: ' , x$call$k, 91 | '\nNodes: ' , length(x$call$type), 92 | '\nModerators: Variable ' , x$call$moderators, 93 | '\nEstimation points: ' , length(x$call$estpoints)) 94 | 95 | } else { 96 | 97 | cat('mgm fit-object', 98 | '\n\nModel class: ', model_classes[3], 99 | '\nOrder: ' , x$call$k, 100 | '\nNodes: ' , length(x$call$type), 101 | '\nEstimation points: ' , length(x$call$estpoints)) 102 | 103 | } 104 | 105 | 106 | 107 | } 108 | 109 | 110 | if('tvmvar' %in% class(x)) { 111 | 112 | 113 | n_incl <- sum(x$call$data_lagged$included == TRUE) 114 | n_exp <- sum(x$call$data_lagged$included == FALSE) 115 | n <- n_incl + n_exp 116 | perc <- n_incl / n 117 | perc <- round(perc, 4) * 100 118 | 119 | 120 | cat('mgm fit-object', 121 | '\n\nModel class: ', model_classes[4], 122 | '\nLags: ' , x$call$lags, 123 | '\nRows included in VAR design matrix: ' , n_incl ,'/', n, '(', perc, '%)', 124 | '\nNodes: ' , length(x$call$type), 125 | '\nEstimation points: ' , length(x$call$estpoints)) 126 | } 127 | 128 | } 129 | 130 | 131 | # ---------- print for prediction object ---------- 132 | 133 | if('predicted' %in% class(x)) { 134 | 135 | if('mgm' %in% class(x)) mc <- model_classes[1] 136 | if('mvar' %in% class(x)) mc <- model_classes[2] 137 | if('tvmgm' %in% class(x)) mc <- model_classes[3] 138 | if('tvmvar' %in% class(x)) mc <- model_classes[4] 139 | 140 | 141 | cat('mgm prediction-object', 142 | '\n\nModel class: ', mc, 143 | '\nError Types:', paste(names(x$call$errorCon), names(x$call$errorCat))) 144 | 145 | } 146 | 147 | 148 | # ---------- print for bwSelect object ---------- 149 | 150 | 151 | if('bwSelect' %in% class(x)) { 152 | 153 | if('mgm' %in% class(x)) mc <- model_classes[3] 154 | if('mvar' %in% class(x)) mc <- model_classes[4] 155 | 156 | cat('mgm bandwidth selection-object', 157 | '\n\nModel class: ', mc, 158 | '\nBandwith path: ', paste(x$call$bwSeq), 159 | '\nNumber of Folds: ', paste(x$call$bwFolds), 160 | '\nFoldsize: ', paste(x$call$bwFoldsize), 161 | '\nOptimal Bandwidth: ', x$call$bwSeq[which.min(x$meanError)]) 162 | 163 | } 164 | 165 | 166 | 167 | 168 | } # eoF 169 | -------------------------------------------------------------------------------- /R/print.resample.R: -------------------------------------------------------------------------------- 1 | 2 | # Gives summary of resampling object 3 | 4 | print.resample <- function(x, # output from resample() 5 | ...) 6 | 7 | 8 | { 9 | 10 | obj_class <- class(x$call$object) 11 | 12 | model_classes <- c('Mixed Graphical Model (MGM)', 13 | 'mixed Vector Autoregressive (mVAR) model', 14 | 'Time-varying Mixed Graphical Model (tv-MGM)', 15 | 'Time-varying mixed Vector Autoregressive (tv-mVAR) model') 16 | 17 | if('core' %in% obj_class) obj_class_display <- model_classes[1] 18 | if('mvar' %in% obj_class) obj_class_display <- model_classes[2] 19 | if('tvmgm' %in% obj_class) obj_class_display <- model_classes[3] 20 | if('tvmvar' %in% obj_class) obj_class_display <- model_classes[4] 21 | 22 | 23 | if('tvmvar' %in% obj_class | 'tvmgm' %in% obj_class) { 24 | 25 | cat('resample-object', 26 | '\n\nModel class: ', obj_class_display, 27 | '\nBootstrap samples: ' , x$call$nB, 28 | '\nBlocks: ' , x$call$blocks) 29 | 30 | } else { 31 | 32 | cat('resample-object', 33 | '\n\nModel class: ', obj_class_display, 34 | '\nBootstrap samples: ' , x$call$nB) 35 | 36 | } 37 | 38 | 39 | 40 | } # eoF 41 | 42 | 43 | -------------------------------------------------------------------------------- /R/showInteraction.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | showInteraction <- function(object, 4 | int) { 5 | 6 | 7 | # ----- Input Checks ----- 8 | 9 | class_obj <- class(object)[2] 10 | 11 | # Check whether object is mgm model object 12 | if(!(class_obj %in% c("core", "tvmgm", "mvar", "tvmvar") )) stop("Please provide an mgm model object (mgm, tvmgm, mvar, tvmvar) as input.") 13 | 14 | # Check whether interactions are specified as integers 15 | if(!all(int == round(int))) stop("Interactions have to be specified as sets of integers in 1:p") 16 | if(any(int < 1)) stop("Interactions have to be specified as sets of integers in 1:p") 17 | 18 | # Current limitation of function: 19 | if(class_obj %in% c("core", "tvmgm")) if(object$call$k > 2) stop("showInteraction() currently only supports pairwise interactions.") 20 | if(class_obj %in% c("tvmgm", "mvar", "tvmvar")) stop("showInteraction() currently only supports mgm() objects.") 21 | 22 | if(length(unique(int)) != length(int)) stop("A k-order interaction have to be specified by k distinct variables.") 23 | p <- length(object$call$level) 24 | if(any(int > p)) stop("Please specify variables in {1, 2, ..., p}.") 25 | 26 | 27 | # ----- Compute Aux variables ----- 28 | 29 | int <- sort(int) # sort, because interactions are saved sorted 1:p in "object$interactions$weights[[n_order-1]][[int_row]]" 30 | levelNames <- list() 31 | levelNames[[1]] <- object$call$levelNames[[int[1]]] 32 | levelNames[[2]] <- object$call$levelNames[[int[2]]] 33 | 34 | 35 | # ------------------------------------------------------------------------------------------------ 36 | # ---------- a) mgm-objects ---------------------------------------------------------------------- 37 | # ------------------------------------------------------------------------------------------------ 38 | 39 | if(class_obj == "core") { 40 | 41 | n_order <- length(int) 42 | type_int <- object$call$type[int] 43 | level_int <- object$call$level[int] 44 | 45 | 46 | # Input checks 47 | # if(n_order > object$call$k) stop("The order of requested interactions has to match the order of interactions in the model.") 48 | 49 | # Create outlist 50 | outlist <- list("variables" = int, 51 | "order" = n_order, 52 | "type" = type_int, 53 | "level" = level_int, 54 | "edgeweight" = NULL, 55 | "sign" = NULL, 56 | "parameters" = list()) 57 | 58 | # Get row of specified interaction 59 | int_row <- which(apply(matrix(object$interactions$indicator[[n_order-1]], ncol=n_order), 1, function(x) all(x %in% int))) # get row of interaction in "int" in interaction list 60 | 61 | # Is the interaction nonzero? 62 | if(length(int_row) == 0) { 63 | 64 | outlist$edgeweight <- 0 65 | outlist$sign <- 0 66 | 67 | } else { 68 | 69 | outlist$edgeweight <- object$interactions$weightsAgg[[n_order-1]][[int_row]] 70 | outlist$sign <- object$interactions$sign[[n_order-1]][int_row] 71 | 72 | if(n_order > 2) { 73 | 74 | outlist$parameters = NULL 75 | 76 | } else { 77 | 78 | # ------- Collect & label parameter estimates ------- 79 | 80 | for(i in 1:n_order) { 81 | 82 | # Get parameters of regression on i 83 | int_i <- object$interactions$weights[[n_order-1]][[int_row]][[i]] 84 | 85 | # Create empty array with correct dimensions 86 | par_mat <- matrix(NA, level_int[i], level_int[-i]) 87 | 88 | # Fill array 89 | if(type_int[i] == "c") { # if response = categorical 90 | 91 | if(type_int[-i] == "c") { # if predictor = categorical (only works for k=2 order MGMs ..) 92 | for(i_resp in 1:length(int_i)) { 93 | if(object$call$overparameterize) par_mat[i_resp, ] <- int_i[[i_resp]] else par_mat[i_resp, -1] <- int_i[[i_resp]] 94 | } 95 | } else { 96 | int_i_ul <- unlist(int_i) 97 | par_mat[, 1] <- int_i_ul 98 | } 99 | 100 | } else { # if response = continuous 101 | 102 | if(type_int[-i] == "c") { # if predictor = categorical 103 | par_mat[1, -1] <- int_i # first column = dummy coded 104 | } else { 105 | par_mat[1, 1] <- int_i 106 | } 107 | } 108 | 109 | # browser() 110 | # Set dimension names in array 111 | if(type_int[i] == "c") row.names(par_mat) <- paste0(int[i], ".", levelNames[[i]]) else row.names(par_mat) <- int[i] 112 | if(type_int[-i] == "c") colnames(par_mat) <- paste0(int[-i], ".", levelNames[[abs(i-3)]]) else colnames(par_mat) <- int[-i] 113 | 114 | # Save array in output list 115 | outlist$parameters[[paste0("Predict_", int[i])]] <- par_mat 116 | 117 | } # end for: n_order 118 | 119 | } # if: order == 2 120 | 121 | } # end if: estimated to zero? 122 | 123 | } # end if: core? 124 | 125 | 126 | # ------------------------------------------------------------------------------------------------ 127 | # ---------- b) tvmgm-objects ---------------------------------------------------------------------- 128 | # ------------------------------------------------------------------------------------------------ 129 | 130 | 131 | # ------------------------------------------------------------------------------------------------ 132 | # ---------- c) mvar-objects ---------------------------------------------------------------------- 133 | # ------------------------------------------------------------------------------------------------ 134 | 135 | 136 | # ------------------------------------------------------------------------------------------------ 137 | # ---------- d) tvmvar-objects ---------------------------------------------------------------------- 138 | # ------------------------------------------------------------------------------------------------ 139 | 140 | 141 | # ----- Return ----- 142 | 143 | class(outlist) <- "int" 144 | 145 | return(outlist) 146 | 147 | } # eoF -------------------------------------------------------------------------------- /R/startup_msg.R: -------------------------------------------------------------------------------- 1 | # I copied this piece of code from the bootnet package where it was copied from Lavaan mainly: 2 | 3 | .onAttach <- function(libname, pkgname) { 4 | version <- read.dcf(file=system.file("DESCRIPTION", package=pkgname), 5 | fields="Version") 6 | packageStartupMessage("This is ",paste(pkgname, version)) 7 | packageStartupMessage("Please report issues on Github: https://github.com/jmbh/mgm/issues") 8 | } 9 | -------------------------------------------------------------------------------- /R/tvmgmsampler.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | tvmgmsampler <- function(factors, 4 | interactions, 5 | thresholds, 6 | sds, 7 | type, 8 | level, 9 | nIter = 250, 10 | pbar = TRUE, 11 | ...) 12 | 13 | 14 | { 15 | 16 | # ---------- Input Checks ---------- 17 | 18 | 19 | # ----- Calc Aux Variables ----- 20 | 21 | n_timepoints <- nrow(thresholds[[1]]) 22 | n_order <- length(factors) 23 | p <- length(level) 24 | 25 | data <- matrix(NA, 26 | nrow = n_timepoints, 27 | ncol = p) 28 | 29 | # ----- Input Checks ----- 30 | 31 | # - same nrow for all thresholds 32 | # - match time dimension for factors, interaction, thresholds and sds 33 | # - check time vector: is it a integer sequence 1:N 34 | # - only checks on sd if at least one gaussian present! 35 | 36 | if(missing(sds)) sds <- NULL 37 | 38 | 39 | # ----- Create Outout object ----- 40 | 41 | tvmgmsamp_obj <- list('call' = NULL, 42 | 'data' = NULL) 43 | 44 | # Copy the call 45 | tvmgmsamp_obj$call <- list('factors' = factors, 46 | 'interactions' = interactions, 47 | 'thresholds' = thresholds, 48 | 'sds' = sds, 49 | 'type' = type, 50 | 'level' = level, 51 | 'nIter' = nIter, 52 | 'pbar' = pbar) 53 | 54 | 55 | if(pbar==TRUE) pb <- txtProgressBar(min = 0, max = n_timepoints, initial=0, char="-", style = 3) 56 | 57 | for(ts in 1:n_timepoints) { 58 | 59 | # ---------- Select Time dimension ---------- 60 | 61 | 62 | # a) Take out time point from: interactions 63 | 64 | factors_ts <- list() 65 | interactions_ts <- list() 66 | 67 | for(ord in 1:n_order) { 68 | 69 | n_row <- dim(factors[[ord]])[1] 70 | 71 | if(is.null(n_row)) { 72 | 73 | interactions_ts[[ord]] <- NULL 74 | 75 | } else { 76 | 77 | # factors_ts[[ord]] <- matrix(NA, nrow = n_row, ncol = ord+1) 78 | interactions_ts[[ord]] <- list() 79 | 80 | for(row in 1:n_row) { 81 | 82 | # factors_ts[[ord]][row, ] <- fr <- factors[[ord]][row, , ts] 83 | fr <- factors[[ord]][row, ] 84 | 85 | # create list we use to access dynamic array: interactions 86 | l_query <- list() 87 | for(i2 in 1:(ord+1)) l_query[[i2]] <- 1:level[fr[i2]] 88 | l_query[[(ord+2)]] <- ts 89 | 90 | A <- interactions[[ord]][[row]] 91 | query_out <- do.call(function(...)A[...], l_query) 92 | dim_ts_fix_array <- unlist(lapply(l_query, length))[-(ord+2)] 93 | interactions_ts[[ord]][[row]] <- array(query_out, dim = dim_ts_fix_array) 94 | 95 | } 96 | 97 | } # end if else: at least 1 row? 98 | 99 | } # end for: ord 100 | 101 | 102 | # b) Take out time point of: thresholds and sds 103 | 104 | thresholds_ts <- list() 105 | for(v in 1:p) thresholds_ts[[v]] <- as.numeric(thresholds[[v]][ts, ]) 106 | sds_ts <- sds[ts, ] 107 | 108 | # c) factors stays the same 109 | factors_ts <- factors 110 | 111 | 112 | # ---------- Call mgmsampler() ---------- 113 | 114 | one_case <- mgmsampler(factors = factors_ts, 115 | interactions = interactions_ts, 116 | thresholds = thresholds_ts, 117 | sds = sds_ts, 118 | type = type, 119 | level = level, 120 | N = 1, 121 | nIter = nIter, 122 | pbar = FALSE) 123 | 124 | data[ts, ] <- one_case$data 125 | 126 | 127 | 128 | if(pbar==TRUE) setTxtProgressBar(pb, ts) 129 | 130 | } # end for: time steps 131 | 132 | # ---------- Output ---------- 133 | 134 | tvmgmsamp_obj$data <- data 135 | 136 | class(tvmgmsamp_obj) <- 'tvmgm' 137 | 138 | return(tvmgmsamp_obj) 139 | 140 | 141 | } # eoF 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | -------------------------------------------------------------------------------- /R/tvmvarsampler.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | tvmvarsampler <- function(coefarray, # v x v2 x cat(v) x cat(v2) x lag array x N specifying lagged parameters 5 | lags, # vector specifiying the lags 6 | thresholds, # N x p matrix containint vector with a threshold for each category 7 | sds, # N x p matrix of sds 8 | type, 9 | level, 10 | pbar) 11 | 12 | 13 | 14 | 15 | { 16 | 17 | # ---------- Input Checks ---------- 18 | 19 | # Does coefarray have the right dimensions? 20 | if(length(dim(coefarray))!=6) stop('coefarray has to have 5 dimensions: p x p x max(level) x max(level) x n_lags x N.') 21 | if(dim(coefarray)[5] != length(lags)) stop('Dimensions for lags in coefarray have to match the number of lags specified in lags.') 22 | if(dim(coefarray)[1] != length(thresholds)) stop('Dimensions for variables in coefarray have to match the number of specified thresholds.') 23 | if(dim(coefarray)[1] != ncol(sds)) stop('Dimensions for variables in coefarray have to match the number of specified standard deviations.') 24 | if(dim(coefarray)[1] != length(type)) stop('Dimensions for variables in coefarray have to match the number of specified types.') 25 | if(dim(coefarray)[1] != length(level)) stop('Dimensions for variables in coefarray have to match the number of specified levels.') 26 | if(dim(coefarray)[1] != dim(coefarray)[2]) stop('The first two dimensions specifying the cross-lagged effects in coefarray have to have the same dimensionality.') 27 | 28 | 29 | if(!inherits(thresholds, "list")) stop('The thresholds have to be provided via a list with p entries. See ?tvmvarsampler') 30 | if(!inherits(sds, "matrix")) stop('The standard deviations of Gaussian variables have to be provided via a p x n matrix.') 31 | 32 | 33 | # ---------- Fill in Defaults ---------- 34 | 35 | if(missing(pbar)) pbar <- TRUE 36 | 37 | 38 | # ---------- Create Output Object & copy the call ---------- 39 | 40 | # Create Output Object 41 | mvarsamp_obj <- list('call' = NULL, 42 | 'data' = NULL) 43 | 44 | # Copy the call 45 | mvarsamp_obj$call <- list('coefarray' = coefarray, 46 | 'lags' = lags, 47 | 'thresholds' = thresholds, 48 | 'sds' = sds, 49 | 'type' = type, 50 | 'level' = level, 51 | 'pbar' = pbar) 52 | 53 | 54 | 55 | # ---------- Compute Auxilliary Variables ---------- 56 | 57 | p <- dim(coefarray)[1] 58 | N <- dim(coefarray)[6] 59 | n_lags <- length(lags) 60 | labels <- paste0('V', 1:p, '.') 61 | max_lag <- max(lags) 62 | 63 | 64 | 65 | # ---------- Sample Data ---------- 66 | 67 | data <- matrix(NA, ncol = p, nrow = N + max_lag) 68 | 69 | # Create random starting values 70 | for(r in 1:max_lag) { 71 | for(v in 1:p) { 72 | if(type[v] == 'c') data[r, v] <- sample(1:level[v], size = 1) 73 | if(type[v] == 'g') data[r, v] <- rnorm(1) 74 | if(type[v] == 'p') data[r, v] <- rpois(1, 1) 75 | } 76 | } 77 | 78 | 79 | if(pbar==TRUE) pb <- txtProgressBar(min = 0, max=N, initial=0, char="-", style = 3) 80 | 81 | 82 | # Loop over time and variables 83 | 84 | for(r in 1:N) { 85 | r2 <- ((max_lag+1):(N+max_lag))[r] 86 | 87 | for(v in 1:p) { 88 | 89 | # Get design matrix 90 | design_mat <- ModelMatrix(data = data, 91 | type = type, 92 | level = level, 93 | labels = labels, 94 | d = 1, 95 | allCats=TRUE) 96 | 97 | if(type[v] != 'c') { 98 | 99 | potential_lag <- list() 100 | for(lag in 1:n_lags) { 101 | 102 | # Get parameters in order 103 | l_parms <- list() 104 | for(par in 1:p) l_parms[[par]] <- coefarray[v, par, 1:level[v], 1:level[par], lag, r] 105 | v_parms <- unlist(l_parms) 106 | 107 | # Compute part of potential for given lag 108 | potential_lag[[lag]] <- design_mat[r2 - lags[lag], ] * v_parms 109 | 110 | } # end for: lag 111 | 112 | mu <- thresholds[[v]][r, 1] + sum(unlist(potential_lag)) 113 | 114 | # if(r==50) browser() 115 | 116 | if(type[v] == 'g') data[r2, v] <- rnorm(1, mean = mu, sd = sds[r, v]) 117 | if(type[v] == 'p') data[r2, v] <- rpois(1, lambda = mu) 118 | 119 | } # end if: !='c' 120 | 121 | if(type[v] == 'c') { 122 | 123 | # Loop over categories 124 | l_potentials <- list() 125 | for(cat in 1:level[v]) { 126 | 127 | potential_lag <- list() 128 | for(lag in 1:n_lags) { 129 | 130 | # Get parameters in order 131 | l_parms <- list() 132 | for(par in 1:p) l_parms[[par]] <- coefarray[v, par, (1:level[v])[cat], 1:level[par], lag, r] 133 | v_parms <- unlist(l_parms) 134 | 135 | # Compute part of potential for given lag 136 | potential_lag[[lag]] <- design_mat[r2 - lags[lag], ] * v_parms 137 | 138 | } # end for: lag 139 | 140 | # put potential-parts from different lags together 141 | l_potentials[[cat]] <- thresholds[[v]][r, cat] + sum(unlist(potential_lag)) 142 | 143 | } # end for: cat 144 | 145 | # compute probabilities 146 | v_potentials <- exp(unlist(l_potentials)) 147 | probabilities <- v_potentials / sum(v_potentials) 148 | 149 | data[r2, v] <- sample(1:level[v], prob = probabilities, size = 1) 150 | 151 | } # end if: =='c'? 152 | 153 | 154 | } # end for: v variables 155 | 156 | if(pbar==TRUE) setTxtProgressBar(pb, r) 157 | 158 | } # end for: r rows 159 | 160 | 161 | # ---------- Output ---------- 162 | 163 | 164 | mvarsamp_obj$data <- data[(max_lag+1):(N+max_lag), ] 165 | 166 | class(mvarsamp_obj) <- 'tvmvar' 167 | 168 | 169 | return(mvarsamp_obj) 170 | 171 | 172 | 173 | 174 | } # eoF 175 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mgm 2 | 3 | [![CRAN 4 | Version](http://www.r-pkg.org/badges/version/mgm)](https://cran.r-project.org/package=mgm) 5 | [![Downloads](https://cranlogs.r-pkg.org/badges/mgm)](https://cran.r-project.org/package=mgm) 6 | 7 | The package includes functions to estimate, visualize and resample time-varying k-order Mixed Graphical Models (MGMs) and mixed Vector Autoregressive (mVAR) models. 8 | 9 | 10 | Here is a paper describing the package: https://arxiv.org/abs/1510.06871 11 | 12 | And here are a couple of blog posts about some functions: https://jmbh.github.io/ 13 | 14 | 15 | The developmental version can be installed from within R using the devtools-package: 16 | 17 | library(devtools) 18 | install_github("jmbh/mgm") 19 | -------------------------------------------------------------------------------- /data/B5MS.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/B5MS.RData -------------------------------------------------------------------------------- /data/Fried2015.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/Fried2015.RData -------------------------------------------------------------------------------- /data/PTSD_data.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/PTSD_data.RData -------------------------------------------------------------------------------- /data/autism_data.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/autism_data.RData -------------------------------------------------------------------------------- /data/autism_data_large.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/autism_data_large.RData -------------------------------------------------------------------------------- /data/dataGD.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/dataGD.RData -------------------------------------------------------------------------------- /data/fruitfly_data.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/fruitfly_data.RData -------------------------------------------------------------------------------- /data/mgm_data.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/mgm_data.RData -------------------------------------------------------------------------------- /data/modnw.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/modnw.RData -------------------------------------------------------------------------------- /data/msq_p3.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/msq_p3.RData -------------------------------------------------------------------------------- /data/msq_p5.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/msq_p5.RData -------------------------------------------------------------------------------- /data/mvar_data.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/mvar_data.RData -------------------------------------------------------------------------------- /data/restingstate_data.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/restingstate_data.RData -------------------------------------------------------------------------------- /data/symptom_data.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/symptom_data.RData -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | bibentry(bibtype = "Article", 2 | title = "{mgm}: Estimating Time-Varying Mixed Graphical Models in High-Dimensional Data", 3 | author = c(person(given = c("Jonas", "M.", "B."), 4 | family = "Haslbeck", 5 | email = "jonashaslbeck@gmail.com"), 6 | person(given = c("Lourens", "J."), 7 | family = "Waldorp")), 8 | journal = "Journal of Statistical Software", 9 | year = "2020", 10 | volume = "93", 11 | number = "8", 12 | pages = "1--46", 13 | doi = "10.18637/jss.v093.i08", 14 | 15 | header = "To cite mgm in publications use:" 16 | ) 17 | 18 | -------------------------------------------------------------------------------- /man/FactorGraph.Rd: -------------------------------------------------------------------------------- 1 | \name{FactorGraph} 2 | \alias{FactorGraph} 3 | 4 | \title{ 5 | Draws a factor graph of a (time-varying) MGM 6 | } 7 | \description{ 8 | Wrapper function around qgraph() that draws factor graphs for (time-varying) MGMs 9 | } 10 | \usage{ 11 | FactorGraph(object, labels, PairwiseAsEdge = FALSE, 12 | Nodewise = FALSE, DoNotPlot = FALSE, 13 | FactorLabels = TRUE, colors, shapes, 14 | shapeSizes = c(8, 4), estpoint = NULL, 15 | negDashed = FALSE, ...) 16 | } 17 | 18 | 19 | \arguments{ 20 | \item{object}{ 21 | The output object of \code{mgm()} or \code{tvmgm()}. 22 | } 23 | \item{labels}{ 24 | A character vector of (variable) node labels. 25 | } 26 | \item{PairwiseAsEdge}{ 27 | If \code{TRUE}, pairwise interactions are not displayed as factors but as simple edges between nodes. Defaults to \code{PairwiseAsEdge = FALSE}. 28 | } 29 | \item{Nodewise}{ 30 | If \code{TRUE}, the estimates from the individual nodewise regressions are displayed as a directed edge towards the node on which the respective nodewise regression was performed. This is useful to identify model misspecification (e.g. moderation effects / interaction parameters with largely different values across nodewise regressions). Defaults to \code{Nodewise = FALSE}. 31 | } 32 | \item{DoNotPlot}{ 33 | If \code{DoNotPlot = TRUE} no factorgraph is plotted. This way the computed factor graph can be obtained without plotting. Defaults to \code{DoNotPlot = FALSE}. 34 | } 35 | \item{FactorLabels}{ 36 | If \code{FactorLabels = TRUE} the factors are labeled by their order. If \code{FactorLabels = FALSE} no label is shown. Defaults to \code{FactorLabels = TRUE}. 37 | } 38 | \item{colors}{ 39 | A character vector of colors for nodes and factors. The first color is for variable-nodes, the second for 2-way interactions, the third for 3-way interactions, etc. Defaults to \code{colors = c("white", "tomato", "lightblue", "orange")}. 40 | } 41 | \item{shapes}{ 42 | A character vector of shapes for for nodes and factors. The first shape is for variable-nodes, the second for 2-way interactions, the third for 3-way interactions, etc. Defaults to \code{shapes = c("circle", "square", "triangle", "diamond")}. 43 | } 44 | \item{shapeSizes}{ 45 | A numeric vector of length two indicating the size of shapes for nodes and factors. Defaults to \code{shapeSizes = c(8, 4)}. 46 | } 47 | \item{estpoint}{ 48 | An integer indicating the estimation point to display if the output object of a time-varying MGM is provided. 49 | } 50 | \item{negDashed}{ 51 | If \code{negDashed = TRUE}, edges with negative sign are dashed. 52 | } 53 | \item{\dots}{ 54 | Arguments passed to qgraph. 55 | } 56 | } 57 | \details{ 58 | \code{FactorGraph()} is a wrapper around \code{qgraph()} from the qgraph package. Therefore all arguments of \code{qgraph()} are available and can be provided as additional arguments. 59 | 60 | To make time-varying factor graphs comparable across estimation points, the factor graph of each estimation point includes all factors that are estimated nonzero at least at one estimation point. 61 | 62 | } 63 | \value{ 64 | Plots the factor graph and returns a list including the arguments used to plot the factor graph using qgraph(). 65 | 66 | Specifically, a list is returned including: \code{graph} contains a weighted adjacency matrix of a (bipartide) factor graph. If p is the number of variables and E the number of interactions (factors) in the model, this matrix has dimensions (p+E) x (p+E). The factor graph is furter specified by the following objects: \code{signs} is a matrix of the same dimensions as \code{graph} that indicates the sign of each interaction, if defined (see \code{pairwise} above). \code{edgecolor} is a matrix with the same dimension as \code{graph} that provides edge colors depending on the sign as above. \code{order} is a (p+E) vector indicating the order of interaction. The first p entries are set to zero. \code{qgraph} contains the qgraph object created while plotting. 67 | 68 | } 69 | %%\references{ 70 | %% ~put references to the literature/web site here ~ 71 | %%} 72 | 73 | \author{ 74 | Jonas Haslbeck 75 | } 76 | 77 | 78 | \seealso{ 79 | \code{mgm()}, \code{tvmgm()}, \code{qgraph()} 80 | } 81 | 82 | 83 | \examples{ 84 | 85 | 86 | \dontrun{ 87 | 88 | # Fit MGM with pairwise & threeway interactions to Autism Dataset 89 | fit_k3 <- mgm(data = autism_data$data, 90 | type = autism_data$type, 91 | level = autism_data$lev, 92 | k = 3, 93 | overparameterize = TRUE, 94 | lambdaSel = "EBIC", 95 | lambdaGam = .5) 96 | 97 | # List of estimated interactions 98 | fit_k3$interactions$indicator 99 | 100 | FactorGraph(object = fit_k3, 101 | PairwiseAsEdge = FALSE, 102 | DoNotPlot = FALSE, 103 | labels = 1:7, 104 | layout="circle") 105 | 106 | # For more examples see https://github.com/jmbh/mgmDocumentation 107 | } 108 | 109 | } 110 | 111 | 112 | -------------------------------------------------------------------------------- /man/bwSelect.Rd: -------------------------------------------------------------------------------- 1 | 2 | \name{bwSelect} 3 | \alias{bwSelect} 4 | 5 | \title{ 6 | Select optimal bandwidth for time-varying MGMs and mVAR Models 7 | } 8 | 9 | \description{ 10 | Selects the bandwidth parameter with lowest out of sample prediction error for MGMs and mVAR Models. 11 | } 12 | 13 | \usage{ 14 | bwSelect(data, type, level, bwSeq, bwFolds, 15 | bwFoldsize, modeltype, pbar, ...) 16 | } 17 | 18 | \arguments{ 19 | \item{data}{ 20 | A n x p data matrix. 21 | } 22 | \item{type}{ 23 | p vector indicating the type of variable for each column in \code{data}. "g" for Gaussian, "p" for Poisson, "c" for categorical. 24 | } 25 | \item{level}{ 26 | p vector indicating the number of categories of each variable. For continuous variables set to 1. 27 | } 28 | \item{bwSeq}{ 29 | A sequence with candidate bandwidth values (0, s] with s < Inf. Note that the bandwidth is applied relative to the unit time interval [0,1] and hence a banwidth of > 2 corresponds roughly to equal weights for all time points and hence gives similar estimates as the stationary model estimated via \code{mvar()}. 30 | } 31 | \item{bwFolds}{ 32 | The number of folds (see details below). 33 | } 34 | \item{bwFoldsize}{ 35 | The size of each fold (see details below). 36 | } 37 | \item{modeltype}{ 38 | If \code{modeltype = "mvar"} model, the optimal bandwidth parameter for a \code{tvmvar()} model is selected. If \code{modeltype = "mgm"} model, the optimal bandwidth parameter for a \code{tvmgm()} model is selected. Additional arguments to \code{tvmvar()} or \code{tvmgm()} can be passed via the \code{\dots} argument. 39 | } 40 | 41 | \item{pbar}{ 42 | If TRUE a progress bar is shown. Defaults to \code{pbar = "TRUE"}. 43 | } 44 | 45 | \item{\dots}{ 46 | Arguments passed to \code{tvmgm} or \code{tvmvar}. 47 | } 48 | } 49 | 50 | \details{ 51 | 52 | Performs a cross-validation scheme that is specified by \code{bwFolds} and \code{bwFoldsize}. In the first fold, the test set is defined by an equally spaced sequence between [1, n - \code{bwFolds}] of length \code{bwFoldsize}. In the second fold, the test set is defined by an equally spaced sequence between [2, n - \code{bwFolds} + 1] of length \code{bwFoldsize}, etc. . Note that if \code{bwFoldsize} = n / \code{bwFolds}, this procedure is equal to \code{bwFolds}-fold cross valildation. However, full cross validation is computationally very expensive and a single split in test/training set by setting \code{bwFolds = 1} is sufficient in many situations. The procedure selects the bandwidth with the lowest prediction error, averaged over variables and time points in the test set. 53 | 54 | \code{bwSelect} computes the absolute error (continuous) or 0/1-loss (categorical) for each time point in the test set defined by \code{bwFoldsize} as described in the previous paragraph for every fold specified in \code{bwFolds}, separately for each variable. The computed errors are returned in different levels of aggregation in the output list (see below). Note that continuous variables are scaled (centered and divided by their standard deviation), hence the absolute error and 0/1-loss are roughly on the scale scale. 55 | 56 | Note that selecting the bandwidth with the EBIC is no alternative. This is because the EBIC always selects the intercept model with the lowest bandwidth. The reason is that the unregularized intercept closely models the noise in the data and hence the penalty sets all other parameters to zero. This problem is solved by using out of sample prediction error in the cross validation scheme. 57 | 58 | 59 | } 60 | \value{ 61 | 62 | The function returns a list with the following entries: 63 | 64 | \item{call}{ 65 | Contains all provided input arguments. If \code{saveData = TRUE}, it also contains the data. 66 | } 67 | 68 | \item{bwModels}{ 69 | Contains the models estimated at the time points in the tests set. For details see \code{tvmvar} or \code{tvmgm}. 70 | } 71 | 72 | \item{fullErrorFolds}{ 73 | List with number of entries equal to the length of \code{bwSeq} entries. Each entry contains a list with \code{bwFolds} entries. Each of those entries contains a contains a \code{bwFoldsize} times p matrix of out of sample prediction errors. 74 | } 75 | 76 | \item{fullError}{ 77 | The same as \code{fullErrorFolds} but pooled over folds. 78 | } 79 | 80 | \item{meanError}{ 81 | List with number of entries equal to the length of \code{bwSeq} entries. Each entry contains the average prediction error over variables and time points in the test set. 82 | } 83 | 84 | \item{testsets}{ 85 | List with \code{bwFolds} entries, which contain the rows of the test sample for each fold. 86 | } 87 | 88 | \item{zeroweights}{ 89 | List with \code{bwFolds} entries, which contains the observation weights used to fit the model at the \code{bwFoldsize} time points. 90 | } 91 | 92 | 93 | } 94 | \references{ 95 | 96 | Barber, R. F., & Drton, M. (2015). High-dimensional Ising model selection with Bayesian information criteria. Electronic Journal of Statistics, 9(1), 567-607. 97 | 98 | Foygel, R., & Drton, M. (2010). Extended Bayesian information criteria for Gaussian graphical models. In Advances in neural information processing systems (pp. 604-612). 99 | 100 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08 101 | 102 | 103 | 104 | } 105 | \author{ 106 | Jonas Haslbeck 107 | } 108 | 109 | 110 | 111 | 112 | \examples{ 113 | 114 | \dontrun{ 115 | 116 | 117 | ## A) bwSelect for tvmgm() 118 | 119 | # A.1) Generate noise data set 120 | p <- 5 121 | n <- 100 122 | data_n <- matrix(rnorm(p*n), nrow=100) 123 | head(data_n) 124 | 125 | type <- c("c", "c", rep("g", 3)) 126 | level <- c(2, 2, 1, 1, 1) 127 | x1 <- data_n[,1] 128 | x2 <- data_n[,2] 129 | data_n[x1>0,1] <- 1 130 | data_n[x1<0,1] <- 0 131 | data_n[x2>0,2] <- 1 132 | data_n[x2<0,2] <- 0 133 | 134 | head(data_n) 135 | 136 | # A.2) Estimate optimal bandwidth parameter 137 | 138 | bwobj_mgm <- bwSelect(data = data_n, 139 | type = type, 140 | level = level, 141 | bwSeq = seq(0.05, 1, length=3), 142 | bwFolds = 1, 143 | bwFoldsize = 3, 144 | modeltype = "mgm", 145 | k = 3, 146 | pbar = TRUE, 147 | overparameterize = TRUE) 148 | 149 | 150 | print.mgm(bwobj_mgm) 151 | 152 | 153 | 154 | ## B) bwSelect for tvmVar() 155 | 156 | # B.1) Generate noise data set 157 | 158 | p <- 5 159 | n <- 100 160 | data_n <- matrix(rnorm(p*n), nrow=100) 161 | head(data_n) 162 | 163 | type <- c("c", "c", rep("g", 3)) 164 | level <- c(2, 2, 1, 1, 1) 165 | x1 <- data_n[,1] 166 | x2 <- data_n[,2] 167 | data_n[x1>0,1] <- 1 168 | data_n[x1<0,1] <- 0 169 | data_n[x2>0,2] <- 1 170 | data_n[x2<0,2] <- 0 171 | 172 | head(data_n) 173 | 174 | # B.2) Estimate optimal bandwidth parameter 175 | 176 | bwobj_mvar <- bwSelect(data = data_n, 177 | type = type, 178 | level = level, 179 | bwSeq = seq(0.05, 1, length=3), 180 | bwFolds = 1, 181 | bwFoldsize = 3, 182 | modeltype = "mvar", 183 | lags = 1:3, 184 | pbar = TRUE, 185 | overparameterize = TRUE) 186 | 187 | 188 | print.mgm(bwobj_mvar) 189 | 190 | # For more examples see https://github.com/jmbh/mgmDocumentation 191 | 192 | 193 | } 194 | 195 | } 196 | 197 | 198 | -------------------------------------------------------------------------------- /man/condition.Rd: -------------------------------------------------------------------------------- 1 | \name{condition} 2 | \alias{condition} 3 | 4 | \title{ 5 | Computes mgm object conditional on a set of variables 6 | } 7 | \description{ 8 | The function takes an mgm object and a set of variables fixed to given values as input and returns the conditional mgm object. 9 | } 10 | \usage{ 11 | condition(object, values) 12 | } 13 | 14 | \arguments{ 15 | \item{object}{ 16 | An mgm object, the output of the \code{mgm()} function. 17 | } 18 | \item{values}{ 19 | A list, where the entry name indicates the column number of the variable that should be fixed, and the entry value indicates the value to which the corresponding variable should be fixed. See below for an example. 20 | } 21 | } 22 | \value{ 23 | The function returns an mgm object that is conditional on the provided values. The new mgm object can again be used as input in \code{predict()}, \code{print()}, \code{showInteraction()}, etc.. Note that code{mgm()} by default standardizes variables to mean=0, SD=1. Therefore, also the values one conditions on should be chosen on the scaled version of the variable to avoid extrapolating out of the dataset. 24 | } 25 | 26 | \details{ 27 | The new conditional object still contains the variables that were fixed, however, they are not related to any of the random variables anymore. We kept the variables in the object to avoid confusion with variable labels and plotting. Also note that \code{mgm()} by default scales all Gaussian variables to mean=0, sd=1. Thus, fixed values should be selected based on the scaled version of variables. 28 | } 29 | 30 | \references{ 31 | Haslbeck, J., & Waldorp, L. J. (2019). mgm: Estimating time-varying mixed graphical models in high-dimensional data. arXiv preprint arXiv:1510.06871. 32 | } 33 | 34 | \author{ 35 | Jonas Haslbeck 36 | } 37 | 38 | 39 | 40 | \seealso{ 41 | \code{mgm} 42 | } 43 | 44 | 45 | \examples{ 46 | 47 | \dontrun{ 48 | 49 | # --- Create Mixture of two Gaussians --- 50 | 51 | set.seed(1) 52 | n <- 500 53 | library(MASS) 54 | 55 | # Component A 56 | Sigma_a <- diag(2) 57 | Sigma_a[1, 2] <- Sigma_a[2, 1] <- .5 58 | Xa <- mvrnorm(n = n, mu = rep(0, 2), Sigma = Sigma_a) 59 | 60 | # Component B 61 | Sigma_b <- diag(2) 62 | Sigma_b[1, 2] <- Sigma_b[2, 1] <- 0 63 | Xb <- mvrnorm(n = n, mu = rep(0, 2), Sigma = Sigma_b) 64 | 65 | data <- cbind(rbind(Xa, Xb), c(rep(0, n), rep(1, n))) 66 | colnames(data) <- c("x1", "x2", "x3") 67 | 68 | 69 | # --- Fit MGM --- 70 | 71 | # with mgm 72 | mgm_obj <- mgm(data = data, 73 | type = c("g","g","c"), 74 | level = c(1, 1, 2), 75 | moderator = c(3), 76 | lambdaSel = "EBIC") 77 | 78 | # --- Condition on / fix values of variable 3 --- 79 | 80 | # Fix x3=0 81 | mgm_obj_x3.0 <- condition(object = mgm_obj, 82 | values = list("3"=0)) 83 | mgm_obj_x3.0$pairwise$wadj 84 | 85 | # Fix x3=1 86 | mgm_obj_x3.1 <- condition(object = mgm_obj, 87 | values = list("3"=1)) 88 | mgm_obj_x3.1$pairwise$wadj 89 | 90 | } 91 | 92 | } 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /man/datasets.Rd: -------------------------------------------------------------------------------- 1 | \name{datasets} 2 | \docType{data} 3 | 4 | \alias{datasets} 5 | 6 | \alias{autism_data} 7 | \alias{autism_data_large} 8 | \alias{restingstate_data} 9 | \alias{fruitfly_data} 10 | \alias{symptom_data} 11 | \alias{Fried2015} 12 | \alias{PTSD_data} 13 | \alias{mgm_data} 14 | \alias{mvar_data} 15 | \alias{modnw} 16 | \alias{B5MS} 17 | \alias{msq_p5} 18 | \alias{msq_p3} 19 | \alias{dataGD} 20 | 21 | 22 | \title{Example Datasets in the mgm Package} 23 | \description{ 24 | The autism dataset (and its short version) are taken from Deserno et al. (2016). 25 | 26 | The restingstate fMRI data are taken from Schmittmann et al. (2015). 27 | 28 | The gene expression data across the life span of the fruit fly are taken from Gibberd & Nelson (2017), who took a subset of the data first presented by Arbeitman et al. (2002). 29 | 30 | The symptom data of the single individual diagnosed with major depression is described in Kossakowski et al. (2017). 31 | 32 | The PTSD data is taken from McNally et al. (2015). 33 | 34 | The dataset \code{mgm_data} is generated by example code shown in ?mgmsampler, and \code{mvar_data} is generated by example code shown in ?mvarsampler. 35 | 36 | The dataset \code{Fried2015} contains 515 cases of the 11 depression symptoms measured by the CES-D and is taken from Fried et al. 2015. 37 | 38 | The dataset \code{B5MS} contains the mean scores across subscales (48 items each) for the Big Five personality traits. The dataset is taken from the \code{qgraph} package (Epskamp, et al., 2012) and was first used in Dolan et al. (2009). 39 | 40 | The dataset \code{dataGD} contains 4 continuous variables and 3 categorical variables that are generated from a mixed DAG. This dataset is useful to illustrate estimating group differences in MGMs using moderation. 41 | 42 | All datasets are loaded automatically. All real data sets come as a list including the data and additional information (names of variables, types of variables, time stamps for time series data, etc.) 43 | 44 | } 45 | 46 | 47 | \references{ 48 | 49 | Deserno, M. K., Borsboom, D., Begeer, S., & Geurts, H. M. (2016). Multicausal systems ask for multicausal approaches: A network perspective on subjective well-being in individuals with autism spectrum disorder. Autism. 50 | 51 | Dolan, C. V., Oort, F. J., Stoel, R. D., & Wicherts, J. M. (2009). Testing measurement invariance in the target rotated multigroup exploratory factor model. Structural Equation Modeling, 16(2), 295-314. 52 | 53 | Epskamp, S., Cramer, A. O., Waldorp, L. J., Schmittmann, V. D., & Borsboom, D. (2012). qgraph: Network visualizations of relationships in psychometric data. Journal of Statistical Software, 48(4), 1-18. 54 | 55 | Schmittmann, V. D., Jahfari, S., Borsboom, D., Savi, A. O., & Waldorp, L. J. (2015). Making large-scale networks from fMRI data. PloS one, 10(9), e0129074. 56 | 57 | Gibberd, A. J., & Nelson, J. D. (2017). Regularized Estimation of Piecewise Constant Gaussian Graphical Models: The Group-Fused Graphical Lasso. Journal of Computational and Graphical Statistics, (just-accepted). 58 | 59 | Arbeitman, M. N., Furlong, E. E., Imam, F., Johnson, E., Null, B. H., Baker, B. S., ... & White, K. P. (2002). Gene expression during the life cycle of Drosophila melanogaster. Science, 297(5590), 2270-2275. 60 | 61 | Kossakowski, J., Groot, P., Haslbeck, J., Borsboom, D., & Whichers, M. (2017). Data from "Critical Slowing Down as a Personalized Early Warning Signal for Depression". Journal of Open Psychology Data, 5(1). 62 | 63 | McNally, R. J., Robinaugh, D. J., Wu, G. W., Wang, L., Deserno, M. K., & Borsboom, D. (2015). Mental disorders as causal systems a network approach to posttraumatic stress disorder. Clinical Psychological Science, 3(6), 836-849. 64 | 65 | Fried, E. I., Bockting, C., Arjadi, R., Borsboom, D., Amshoff, M., Cramer, A. O., ... & Stroebe, M. (2015). From loss to loneliness: The relationship between bereavement and depressive symptoms. Journal of abnormal psychology, 124(2), 256. 66 | 67 | } 68 | 69 | \keyword{datasets} 70 | 71 | -------------------------------------------------------------------------------- /man/mgm-internal.Rd: -------------------------------------------------------------------------------- 1 | \name{mgm-internal} 2 | \title{Internal mgm functions} 3 | 4 | \alias{DrawFG} 5 | \alias{DrawFGtv} 6 | \alias{FlagSymmetric} 7 | \alias{bwSelPredict} 8 | \alias{calcLL} 9 | \alias{calcNeighbors} 10 | \alias{glmnetRequirements} 11 | \alias{lagData} 12 | \alias{nodeEst} 13 | \alias{ModelMatrix} 14 | \alias{ModelMatrix_standard} 15 | 16 | \description{Internal mgm functions.} 17 | 18 | \author{Jonas Haslbeck} 19 | 20 | \details{ 21 | 22 | These are internal functions. 23 | 24 | } 25 | 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /man/mgm-package.Rd: -------------------------------------------------------------------------------- 1 | \name{mgm-package} 2 | \alias{mgm-package} 3 | \alias{mgm-package} 4 | \docType{package} 5 | \title{ 6 | Estimating Time-Varying k-order Mixed Graphical Models 7 | } 8 | \description{ 9 | Estimation of time-varying Mixed Graphical models and mixed VAR models via elastic-net regularized neighborhood regression. 10 | } 11 | 12 | \author{ 13 | Jonas Haslbeck 14 | 15 | Maintainer: 16 | } 17 | 18 | 19 | \references{ 20 | 21 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08 22 | 23 | Loh, P. L., & Wainwright, M. J. (2013). Structure estimation for discrete graphical models: Generalized covariance matrices and their inverses. The Annals of Statistics, 41(6), 3022-3049. 24 | 25 | Yang, E., Baker, Y., Ravikumar, P., Allen, G., & Liu, Z. (2014). Mixed graphical models via exponential families. In Proceedings of the Seventeenth International Conference on Artificial Intelligence and Statistics (pp. 1042-1050). 26 | } 27 | 28 | \keyword{package} 29 | -------------------------------------------------------------------------------- /man/mgmsampler.Rd: -------------------------------------------------------------------------------- 1 | \name{mgmsampler} 2 | \alias{mgmsampler} 3 | 4 | \title{ 5 | Sample from k-order Mixed Graphical Model 6 | } 7 | \description{ 8 | Generates samples from a k-order Mixed Graphical Model 9 | } 10 | \usage{ 11 | mgmsampler(factors, interactions, thresholds, sds, type, 12 | level, N, nIter = 250, pbar = TRUE, 13 | divWarning = 10^3, returnChains = FALSE) 14 | } 15 | 16 | 17 | \arguments{ 18 | \item{factors}{ 19 | This object indicates which interactions are present in the model. It is a list in which the first entry corresponds to 2-way interactions, the second entry corresponds to 3-way interactions, etc. and the kth entry to the k+1-way interaction. Each entry contains a matrix with dimensions order x number of interaction of given order. Each row in the matrix indicates an interaction, e.g. (1, 3, 7, 9) in the matrix in list entry three indicates a 4-way interaction between the variables 1, 3, 7 and 9. 20 | } 21 | \item{interactions}{ 22 | This object specifies the parameters associated to the interactions specified in \code{factors}. Corresponding to the structure in \code{factors}, this object is a list, where the kth entry corresponds to k+1-way interactions. Each list entry contains another list, with entries equal to the number of rows in the corresponding matrix in \code{factors}. Each of these list entries (for a fixed k) contains a k-dimensional array that specifies the parameters of the given k-order interaction. For instance, if we have a 3-way interaction (1, 2, 3) and all variables are binary, we have a 2 x 2 x 2 array specifiying the parameters for each of the 2^3 = 8 possible configurations. If all variables are continuous, we have a 1 x 1 x 1 array, so the interaction is specified by a single parameter. See the examples below for an illustration. 23 | } 24 | \item{thresholds}{ 25 | A list with p entries corresponding to p variables in the model. Each entry contains a vector indicating the threshold for each category (for categorical variables) or a numeric value indicating the threshold/intercept (for continuous variables). 26 | } 27 | \item{sds}{ 28 | A numeric vector with p entries, specifying the variances of Gaussian variables. If variables 6 and 13 are Gaussians, then the corresponding entries of \code{sds} have to contain the corresponding variances. Other entries are ignored. 29 | } 30 | \item{type}{ 31 | p character vector indicating the type of variable for each column in \code{data}. "g" for Gaussian, "p" for Poisson, "c" of each variable. 32 | } 33 | \item{level}{ 34 | p integer vector indicating the number of categories of each variable. For continuous variables set to 1. 35 | } 36 | \item{N}{ 37 | Number of samples that should be drawn from the distribution. 38 | } 39 | \item{nIter}{ 40 | Number of iterations in the Gibbs sampler until a sample is drawn. 41 | } 42 | \item{pbar}{ 43 | If \code{pbar = TRUE} a progress bar is shown. Defaults to \code{pbar = TRUE}. 44 | } 45 | \item{divWarning}{ 46 | \code{mgmsampler()} returns a warning message if the absolute value of a continuous variable the chain of the gibbs sampler is larger than \code{divWarning}. To our best knowledge there is no theory yet defining a parameter space that ensures a proper probability density and hence a converging chain. Defaults to \code{divWarning = 10^3}. 47 | } 48 | 49 | \item{returnChains}{ 50 | If \code{returnChains = TRUE}, the sampler provides the entire chain of the Gibbs sampler, for each sampled case. Can be used to check convergence of the Gibbs sampler. Defaults to \code{returnChains = FALSE}.} 51 | } 52 | 53 | \details{ 54 | We use a Gibbs sampler to sample from the join distribution introduced by Yang and colleageus (2014). Note that the contraints on the parameter space necessary to ensure that the joint distribution is normalizable are to our best knowledge unknown. Yang and colleagues (2014) give these constraints for a number of simple pairwise models. In practice, an "improper joint density" will lead to a sampling process that approaches infinity, and hence \code{mgmsampler()} will return \code{Inf} / \code{-Inf} values. 55 | } 56 | 57 | 58 | \value{ 59 | A list containing: 60 | 61 | \item{call}{ 62 | Contains all provided input arguments. 63 | } 64 | 65 | \item{data}{ 66 | The N x p data matrix of sampled values 67 | } 68 | 69 | } 70 | \references{ 71 | Haslbeck, J., & Waldorp, L. J. (2018). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. arXiv preprint arXiv:1510.06871. 72 | 73 | Yang, E., Baker, Y., Ravikumar, P., Allen, G. I., & Liu, Z. (2014, April). Mixed Graphical Models via Exponential Families. In AISTATS (Vol. 2012, pp. 1042-1050). 74 | 75 | } 76 | \author{ 77 | Jonas Haslbeck 78 | } 79 | 80 | 81 | \examples{ 82 | 83 | \dontrun{ 84 | 85 | # --------- Example 1: p = 10 dimensional Gaussian --------- 86 | 87 | # ----- 1) Specify Model ----- 88 | 89 | # a) General Graph Info 90 | p <- 10 # number of variables 91 | type = rep("g", p) # type of variables 92 | level = rep(1, 10) # number of categories for each variable (1 = convention for continuous) 93 | 94 | # b) Define interactions 95 | factors <- list() 96 | factors[[1]] <- matrix(c(1,2, 97 | 1,3, 98 | 4,5, 99 | 7,8), ncol=2, byrow = T) # 4 pairwise interactions 100 | interactions <- list() 101 | interactions[[1]] <- vector("list", length = 4) 102 | 103 | # all pairwise interactions have value .5 104 | for(i in 1:4) interactions[[1]][[i]] <- array(.5, dim=c(1, 1)) 105 | 106 | # c) Define Thresholds 107 | thresholds <- vector("list", length = p) 108 | thresholds <- lapply(thresholds, function(x) 0 ) # all means are zero 109 | 110 | # d) Define Variances 111 | sds <- rep(1, p) # All variances equal to 1 112 | 113 | 114 | # ----- 2) Sample cases ----- 115 | 116 | data <- mgmsampler(factors = factors, 117 | interactions = interactions, 118 | thresholds = thresholds, 119 | sds = sds, 120 | type = type, 121 | level = level, 122 | N = 500, 123 | nIter = 100, 124 | pbar = TRUE) 125 | 126 | 127 | # ----- 3) Recover model from sampled cases ----- 128 | 129 | set.seed(1) 130 | mgm_obj <- mgm(data = data$data, 131 | type = type, 132 | level = level, 133 | k = 2, 134 | lambdaSel = "EBIC", 135 | lambdaGam = 0.25) 136 | 137 | mgm_obj$interactions$indicator # worked! 138 | 139 | 140 | 141 | # --------- Example 2: p = 3 Binary model with one 3-way interaction --------- 142 | 143 | # ----- 1) Specify Model ----- 144 | 145 | # a) General Graph Info 146 | type = c("c", "c", "c") 147 | level = c(2, 2, 2) 148 | 149 | # b) Define Interaction 150 | factors <- list() 151 | factors[[1]] <- NULL # no pairwise interactions 152 | factors[[2]] <- matrix(c(1,2,3), ncol=3, byrow = T) # one 3-way interaction 153 | 154 | interactions <- list() 155 | interactions[[1]] <- NULL 156 | interactions[[2]] <- vector("list", length = 1) 157 | # threeway interaction no1 158 | interactions[[2]][[1]] <- array(0, dim = c(level[1], level[2], level[3])) 159 | theta <- 2 160 | interactions[[2]][[1]][1, 1, 1] <- theta # fill in nonzero entries 161 | # thus: high probability for the case that x1 = x2 = x3 = 1 162 | 163 | # c) Define Thresholds 164 | thresholds <- list() 165 | thresholds[[1]] <- rep(0, level[1]) 166 | thresholds[[2]] <- rep(0, level[2]) 167 | thresholds[[3]] <- rep(0, level[3]) 168 | 169 | 170 | # ----- 2) Sample cases ----- 171 | 172 | set.seed(1) 173 | dlist <- mgmsampler(factors = factors, 174 | interactions = interactions, 175 | thresholds = thresholds, 176 | type = type, 177 | level = level, 178 | N = 500, 179 | nIter = 100, 180 | pbar = TRUE) 181 | 182 | 183 | # ----- 3) Check: Contingency Table ----- 184 | 185 | dat <- dlist$data 186 | table(dat[,1], dat[,2], dat[,3]) # this is what we expected 187 | 188 | 189 | # ----- 4) Recover model from sampled cases ----- 190 | 191 | mgm_obj <- mgm(data = dlist$data, 192 | type = type, 193 | level = level, 194 | k = 3, 195 | lambdaSel = "EBIC", 196 | lambdaGam = 0.25, 197 | overparameterize = TRUE) 198 | 199 | mgm_obj$interactions$indicator # recovered, plus small spurious pairwise 1-2 200 | 201 | 202 | # --------- Example 3: p = 5 Mixed Graphical Model with two 3-way interaction --------- 203 | 204 | # ----- 1) Specify Model ----- 205 | 206 | # a) General Graph Info 207 | type = c("g", "c", "c", "g") 208 | level = c(1, 3, 5, 1) 209 | # b) Define Interaction 210 | factors <- list() 211 | factors[[1]] <- NULL # no pairwise interactions 212 | factors[[2]] <- matrix(c(1,2,3, 213 | 2,3,4), ncol=3, byrow = T) # no pairwise interactions 214 | interactions <- list() 215 | interactions[[1]] <- NULL 216 | interactions[[2]] <- vector("list", length = 2) 217 | # 3-way interaction no1 218 | interactions[[2]][[1]] <- array(0, dim = c(level[1], level[2], level[3])) 219 | interactions[[2]][[1]][,,1:3] <- rep(.8, 3) # fill in nonzero entries 220 | # 3-way interaction no2 221 | interactions[[2]][[2]] <- array(0, dim = c(level[2], level[3], level[4])) 222 | interactions[[2]][[2]][1,1,] <- .3 223 | interactions[[2]][[2]][2,2,] <- .3 224 | interactions[[2]][[2]][3,3,] <- .3 225 | # c) Define Thresholds 226 | thresholds <- list() 227 | thresholds[[1]] <- 0 228 | thresholds[[2]] <- rep(0, level[2]) 229 | thresholds[[3]] <- rep(0, level[3]) 230 | thresholds[[4]] <- 0 231 | # d) Define Variances 232 | sds <- rep(.1, length(type)) 233 | 234 | 235 | # ----- 2) Sample cases ----- 236 | 237 | set.seed(1) 238 | data <- mgmsampler(factors = factors, 239 | interactions = interactions, 240 | thresholds = thresholds, 241 | sds = sds, 242 | type = type, 243 | level = level, 244 | N = 500, 245 | nIter = 100, 246 | pbar = TRUE) 247 | 248 | 249 | # ----- 3) Check: Conditional Means ----- 250 | 251 | # We condition on the categorical variables and check whether 252 | # the conditional means match what we expect from the model: 253 | 254 | dat <- data$data 255 | 256 | # Check interaction 1 257 | mean(dat[dat[,2] == 1 & dat[,3] == 1, 1]) # (compare with interactions[[2]][[1]]) 258 | mean(dat[dat[,2] == 1 & dat[,3] == 5, 1]) 259 | # first mean higher, ok! 260 | 261 | # Check interaction 2 262 | mean(dat[dat[,2] == 1 & dat[,3] == 1, 4]) # (compare with interactions[[2]][[2]]) 263 | mean(dat[dat[,2] == 1 & dat[,3] == 2, 4]) 264 | # first mean higher, ok! 265 | 266 | 267 | } 268 | 269 | } 270 | 271 | -------------------------------------------------------------------------------- /man/mvarsampler.Rd: -------------------------------------------------------------------------------- 1 | \name{mvarsampler} 2 | \alias{mvarsampler} 3 | 4 | \title{ 5 | Sampling from a mixed VAR model 6 | } 7 | \description{ 8 | Function to sample from a mixed VAR (mVAR) model 9 | } 10 | \usage{ 11 | mvarsampler(coefarray, lags, thresholds, 12 | sds, type, level, N, pbar) 13 | } 14 | %- maybe also "usage" for other objects documented here. 15 | \arguments{ 16 | \item{coefarray}{ 17 | A p x p x max(level) x max(level) x n_lags array, where p are the number of variables, level is the input argument \code{level} and n_lags is the number of specified lags in \code{lags}, so n_lags = length(n_lags). The first four dimensions specify the parameters involved in the cross-lagged effects of the lag specified in the 5th dimension. I.e. coefarray[5, 6, 1, 1, 3] indicates the cross-lagged effect of variable 6 on variable 5 (if both are continuous), for the third lag specified in \code{lags}. If variable 1 and 3 are categorical with m = 2 and = 4 categories, respectively, then coefarray[1, 3, 1:2, 1:4, 1] indicates the m*s=8 parameters specifying this interaction for the first lag specified in \code{lags}. See the examples below for an illustration. 18 | } 19 | \item{lags}{ 20 | A vector indicating the lags in the mVAR model. E.g. \code{lags = c(1, 4, 9)} specifies lags of order 1, 3, 9. The number of specified lags has to match the 5th dimension in \code{coefarray}. 21 | } 22 | \item{thresholds}{ 23 | A list with p entries, each consisting of a vector indicating a threshold for each category of the given variable. For continuous variable, the vector has length 1. 24 | } 25 | \item{sds}{ 26 | A vector of length p indicating the standard deviations of the included Gaussian nodes. If non-Gaussian variables are included in the mVAR model, the corresponding entries are ignored. 27 | } 28 | \item{type}{ 29 | p vector indicating the type of variable for each column in \code{data}. "g" for Gaussian, "p" for Poisson, "c" for categorical. 30 | } 31 | \item{level}{ 32 | p vector indicating the number of categories of each variable. For continuous variables set to 1. 33 | } 34 | \item{N}{ 35 | The number of samples to be drawn from the specified mVAR model. 36 | } 37 | \item{pbar}{ 38 | If \code{pbar = TRUE}, a progress bar is shown. 39 | } 40 | } 41 | \details{ 42 | We sample from the mVAR model by separately sampling from its corresponding p conditional distributions. 43 | } 44 | \value{ 45 | A list with two entries: 46 | 47 | \item{call}{The function call} 48 | 49 | \item{data}{The sampled n x p data matrix} 50 | 51 | } 52 | \references{ 53 | 54 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08 55 | 56 | } 57 | \author{ 58 | Jonas Haslbeck 59 | } 60 | 61 | 62 | \examples{ 63 | 64 | \dontrun{ 65 | 66 | 67 | ## Generate data from mixed VAR model using mvarsampler() and recover model using mvar() 68 | 69 | # 1) Define mVAR model 70 | 71 | p <- 6 # Six variables 72 | type <- c("c", "c", "c", "c", "g", "g") # 4 categorical, 2 gaussians 73 | level <- c(2, 2, 4, 4, 1, 1) # 2 categoricals with m=2, 2 categoricals with m=4, two continuous 74 | max_level <- max(level) 75 | 76 | lags <- c(1, 3, 9) # include lagged effects of order 1, 3, 9 77 | n_lags <- length(lags) 78 | 79 | # Specify thresholds 80 | thresholds <- list() 81 | thresholds[[1]] <- rep(0, level[1]) 82 | thresholds[[2]] <- rep(0, level[2]) 83 | thresholds[[3]] <- rep(0, level[3]) 84 | thresholds[[4]] <- rep(0, level[4]) 85 | thresholds[[5]] <- rep(0, level[5]) 86 | thresholds[[6]] <- rep(0, level[6]) 87 | 88 | # Specify standard deviations for the Gaussians 89 | sds <- rep(NULL, p) 90 | sds[5:6] <- 1 91 | 92 | # Create coefficient array 93 | coefarray <- array(0, dim=c(p, p, max_level, max_level, n_lags)) 94 | 95 | # a.1) interaction between continuous 5<-6, lag=3 96 | coefarray[5, 6, 1, 1, 2] <- .4 97 | # a.2) interaction between 1<-3, lag=1 98 | m1 <- matrix(0, nrow=level[2], ncol=level[4]) 99 | m1[1,1:2] <- 1 100 | m1[2,3:4] <- 1 101 | coefarray[1, 3, 1:level[2], 1:level[4], 1] <- m1 102 | # a.3) interaction between 1<-5, lag=9 103 | coefarray[1, 5, 1:level[1], 1:level[5], 3] <- c(0, 1) 104 | 105 | 106 | # 2) Sample 107 | set.seed(1) 108 | dlist <- mvarsampler(coefarray = coefarray, 109 | lags = lags, 110 | thresholds = thresholds, 111 | sds = sds, 112 | type = type, 113 | level = level, 114 | N = 200, 115 | pbar = TRUE) 116 | 117 | # 3) Recover 118 | set.seed(1) 119 | mvar_obj <- mvar(data = dlist$data, 120 | type = type, 121 | level = level, 122 | lambdaSel = "CV", 123 | lags = c(1, 3, 9), 124 | signInfo = FALSE, 125 | overparameterize = F) 126 | 127 | # Did we recover the true parameters? 128 | mvar_obj$wadj[5, 6, 2] # cross-lagged effect of 6 on 5 over lag lags[2] 129 | mvar_obj$wadj[1, 3, 1] # cross-lagged effect of 3 on 1 over lag lags[1] 130 | mvar_obj$wadj[1, 5, 3] # cross-lagged effect of 1 on 5 over lag lags[3] 131 | 132 | 133 | # For more examples see https://github.com/jmbh/mgmDocumentation 134 | 135 | 136 | } 137 | 138 | } 139 | 140 | 141 | 142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /man/plotRes.Rd: -------------------------------------------------------------------------------- 1 | \name{plotRes} 2 | \alias{plotRes} 3 | 4 | \title{ 5 | Plot summary of resampled sampling distributions 6 | } 7 | \description{ 8 | Plots a summary of sampling distributions resampled with the resample() function 9 | } 10 | \usage{ 11 | plotRes(object, quantiles = c(.05, .95), labels = NULL, 12 | decreasing = TRUE, cut = NULL, cex.label = 0.75, 13 | lwd.qtl = 2, cex.mean = 0.55, cex.bg = 2.7, 14 | axis.ticks = c(-0.5, -0.25, 0, 0.25, 0.5, 0.75, 1), 15 | axis.ticks.mod = NULL, layout.width.labels = .2, 16 | layout.gap.pw.mod = .15, table = FALSE) 17 | } 18 | 19 | \arguments{ 20 | \item{object}{ 21 | An output object from the \code{resample()} function. 22 | } 23 | \item{quantiles}{ 24 | A numerical vector of length two, specifying the desired lower/upper quantiles. Defaults to \code{quantiles = c(.05, .95)}. 25 | } 26 | \item{labels}{ 27 | A character vector of length p, containing the label of each variable, where p is the number of variables. 28 | } 29 | \item{decreasing}{ 30 | If \code{TRUE} (default), the edges are ordered by the arithmetic mean of the sampling distribution in decreasing order. If \code{FALSE} they are ordered in increasing order. 31 | } 32 | \item{cut}{ 33 | A sequence of integers, specifying which edges are represented. For instance, if \code{decreasing = TRUE} and \code{cut = 1:10}, summaries for the 10 edges with the largest parameter estimate are displayed. The cut argument can also be used to present the boostrapped CIs in several figures. 34 | } 35 | \item{cex.label}{ 36 | Text size of the labels. 37 | } 38 | \item{lwd.qtl}{ 39 | Line width of line indicating the upper/lower quantiles. 40 | } 41 | \item{cex.mean}{ 42 | Text size of the number indicating the proportion of the estimates whose absolute value is larger than zero. 43 | } 44 | \item{cex.bg}{ 45 | Size of the white background of the number indicating the proportion of the estimates whose absolute value is larger than zero. 46 | } 47 | \item{axis.ticks}{ 48 | A numeric vector indicating the axis ticks and labels for the x-axis. 49 | } 50 | \item{axis.ticks.mod}{ 51 | A numeric vector indicating the axis ticks and labels for the x-axis for moderation effects. If \code{axis.ticks.mod=NULL}, the values from \code{axis.ticks} for pairwise interactions are used. 52 | } 53 | \item{layout.width.labels}{ 54 | A positive numeric value which specifies the width of the left-hand-side legend relative to the width of the data panel (or data panels, in case of a moderator model), which have width = 1. Defaults to \code{layout.width.labels = 0.2}.} 55 | \item{layout.gap.pw.mod}{ 56 | A positive numeric value which specifies the width of the gap between the stability of pairwise effects and moderation effects. Defaults to \code{layout.gap.pw.mod = 0.15}. 57 | } 58 | \item{table}{ 59 | If \code{table = TRUE}, the output is presented as a table instead of a figure. Defaults to \code{table = FALSE}. 60 | } 61 | } 62 | 63 | \details{ 64 | Currently only supports summaries for resampled \code{mgm()} objects, and moderated MGMs with a single moderator. 65 | } 66 | 67 | \value{ 68 | Plots a figure that shows summaries of the resampled sampling distribution for (a set of) all edge parameters. These include the mean, a specified upper and lower quantile and the proportion of parameter estimates whose absolute value is larger than zero. 69 | } 70 | 71 | \author{ 72 | Jonas Haslbeck 73 | } 74 | 75 | 76 | \seealso{ 77 | \code{resample()}, \code{mgm()}, \code{mvar()}, \code{tvmgm()}, \code{tvmar()} 78 | } 79 | 80 | 81 | \examples{ 82 | 83 | \dontrun{ 84 | 85 | # Fit initial model 86 | fit_aut <- mgm(data = as.matrix(autism_data$data), 87 | type = autism_data$type, 88 | level = autism_data$lev, 89 | k = 2) 90 | 91 | 92 | # Fit bootstrapped models 93 | res_aut <- resample(object = fit_aut, 94 | data = as.matrix(autism_data$data), 95 | nB = 10) # should be more in real applications 96 | 97 | # Plot Summary 98 | plotRes(object = res_aut, 99 | quantiles = c(.05, .95), 100 | labels = NULL, 101 | axis.ticks = c(-.25, 0, .25, .5, .75)) 102 | 103 | } 104 | } 105 | 106 | 107 | -------------------------------------------------------------------------------- /man/predict.mgm.Rd: -------------------------------------------------------------------------------- 1 | \name{predict.mgm} 2 | \alias{predict.mgm} 3 | 4 | 5 | 6 | \title{ 7 | Compute predictions from mgm model objects 8 | } 9 | \description{ 10 | Computes predictions and prediction errors from a mgm model-object (\code{mgm}, \code{mvar}, \code{tvmgm} or \code{tvmvar}). 11 | 12 | } 13 | \usage{ 14 | \method{predict}{mgm}(object, data, errorCon, errorCat, 15 | tvMethod, consec, beepvar, dayvar, errordecimals=3, 16 | \dots) 17 | } 18 | 19 | \arguments{ 20 | \item{object}{ 21 | An mgm model object (the output of one of the functions \code{mgm()}, \code{mvar()}, \code{tvmgm()} or \code{tvmvar()}) 22 | } 23 | \item{data}{ 24 | A n x p data matrix with the same structure (number of variables p and types of variables) as the data used to fit the model. 25 | } 26 | \item{errorCon}{ 27 | Either a character vector specifying the types of nodewise errors that should be computed, where the two provided error functions for continuous varaibles are \code{errorCon = "RMSE"}, the Root Mean Squared Error, and \code{errorCon = "R2"}, the proportion of explained variance. The default is \code{errorCon = c("RMSE" "R2")}. 28 | 29 | Alternatively, \code{errorCon} can be a list, where each list entry is a custom error function of the form \code{foo(true, pred)}, where \code{true} and \code{pred} are the arguments for the vectors of true and predicted values, respectively. If predictions are made for a time-varying model and \code{tvMethod = "weighted"}, the weighted R2 or RMSE are computed. If a custom function is used, an additional argument for the weights has to be provided: \code{foo(true, pred, weights)}. Note that custom error functios can also be combined with the buildt-in functions, i.e. \code{errorCon = list("RMSE", "CustomError"=foo)}.} 30 | 31 | 32 | \item{errorCat}{ 33 | Either a character vector specifying the types of nodewise errors that should be computed, where the two provided error functions for categorical variables are \code{errorCat = "CC"}, the proportion of correct classification (accuracy) and \code{errorCat = "nCC"}, the proportion of correct classification normalized by the marginal distribution of the variable at hand. Specifically, nCC = (CC - norm_constant) / (1 - norm_constant), where norm_constant is the highest relative frequency across categories. Another provided error is "CCmarg" which returns the accuracy of the intercept/marginal model. The default is to return all types of errors \code{errorCon = c("CC" "nCC", "CCmarg")}. 34 | 35 | Alternatively, \code{errorCat} can be a list, where each list entry is a custom error function of the form \code{foo(true, pred)}, where \code{true} and \code{pred} are the arguments for the vectors of true and predicted values, respectively. If predictions are made for a time-varying model and \code{tvMethod = "weighted"}, the weighted R2 or RMSE are computed. If a custom function is used, an additional argument for the weights has to be provided: \code{foo(true, pred, weights)}. Note that custom error functios can also be combined with the buildt-in functions, i.e. \code{errorCon = list("nCC", "CustomError"=foo)}.} 36 | 37 | \item{tvMethod}{ 38 | Specifies how predictions and errors are computed for time-varying models: \code{tvMethod = "weighted"} computes errors by computing a weighted error over all cases in the time series at each estimation point specified in \code{estpoints} in \code{tvmgm()} or \code{tvmvar()}. The weighting corresponds to the weighting used for estimation (see \code{?tvmgm} or \code{?tvmvar}). \code{tvMethod = "closestModel"} determines for each time point the closest model and uses that model for prediction. See Details below for a more detailed explanation. 39 | } 40 | 41 | \item{consec}{Only relevant for (time-varying) mVAR models. An integer vector of length \code{nrow(data)}, indicating the sequence of measurement points in a time series. This is only relevant for mVAR models and time series with unequal time intervals. Defaults to \code{consec = NULL}, which assumes equal time intervals. \code{consec} is ignored if a \code{mgm} or \code{tvmgm} object is provided to \code{predict.mgm()}. For details see \code{?mvar}.} 42 | 43 | \item{beepvar}{Together with the argument \code{dayvar}, this argument is an alternative to the \code{consec} argument (see above) to specify the consecutiveness of measurements. This is tailored to ecological momentary assessment (EMA) studies, where the consectutiveness is defined by the number of notification on a given day (\code{beepvar}) and the given day (\code{dayvar}). 44 | } 45 | 46 | \item{dayvar}{See \code{beepvar}.} 47 | 48 | \item{errordecimals}{Number of decimals to which predictability / prediction error values are rounded. Defaults to \code{errordecimals = 3}.} 49 | 50 | \item{...}{Additional arguments.} 51 | 52 | 53 | } 54 | 55 | 56 | 57 | \details{ 58 | Nodewise errors in time-varying models can be computed in two different ways: first, one computes the predicted value for each of the N cases in the time series for all models (estimated at different estimation points, see \code{?tvmgm} or \code{?tvmvar}). Then the error of each of the N cases for each of the models is weighted by the weight that has been used to estimate a given model at its estimation point. This means that the error of a data point close to the end of a time-series gets a high weight for models estimated in the end of the time-series and a low weight for models estimated in the beginning of the time series. 59 | 60 | Second, we determine for each case in the time-series the closest estmation point, and use the model estimated at that estimation point to make predictions for that case. 61 | 62 | Note that the error function normalized accuracy (nCC) is negative if the full model performs worse than the intercept model. This can happen if the model overfits the data. 63 | 64 | } 65 | 66 | 67 | \value{ 68 | A list with the following entries: 69 | 70 | \item{call}{ 71 | Contains all provided input arguments. 72 | } 73 | 74 | \item{predicted}{ 75 | A n x p matrix with predicted values, matching the dimension of the true values in \code{true}. 76 | } 77 | 78 | \item{probabilities}{ 79 | A list with p entries corresponding to p nodes in the data. If a variable is categorical, the corresponding entry contains a n x k matrix with predicted probabilities, where k is the number of categories of the categorical variable. If a variable is continuous, the corresponding entry is empty. 80 | } 81 | 82 | \item{true}{ 83 | Contains the true values. For \code{mgm} and \code{tvmgm} objects these are equal to the data provided via \code{data}. For \code{mvar} and \code{tvmvar} objects, these are equal to the rows that can be predicted in a VAR model, depending on the largest specified lag and (if specified) the \code{consec} argument. 84 | } 85 | 86 | \item{errors}{ 87 | A matrix containing the all types of errors specified via \code{errorCon} and \code{errorCat}, for each variable. If \code{tvMethod = "weighted"}, the matrix becomes an array, with an additional dimension for the estimation point. 88 | } 89 | 90 | \item{tverrors}{ 91 | If \code{tvMethod = "weighted"}, this list entry contains a list with errors of the format of \code{errors}, separately for each estimation point. The errors are computed from predictions of the model at the given estimation points and weighted by the weight-vector at that estimation point. If \code{tvMethod = "closestModel"}, this entry is empty. 92 | } 93 | 94 | 95 | } 96 | \references{ 97 | 98 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08 99 | 100 | } 101 | \author{ 102 | Jonas Haslbeck 103 | } 104 | 105 | \examples{ 106 | 107 | \dontrun{ 108 | # See examples in ?mgm, ?tvmgm, ?mvar and ?tvmvar. 109 | } 110 | 111 | } 112 | 113 | -------------------------------------------------------------------------------- /man/print.int.Rd: -------------------------------------------------------------------------------- 1 | \name{print.int} 2 | \alias{print.int} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{ 5 | Print method for int objects 6 | } 7 | \description{ 8 | Returns basic information about objects created with showInteraction() 9 | } 10 | \usage{ 11 | \method{print}{int}(x, ...) 12 | 13 | } 14 | %- maybe also 'usage' for other objects documented here. 15 | \arguments{ 16 | \item{x}{ 17 | The output object of showInteraction(). 18 | } 19 | \item{\dots}{ 20 | Additional arguments. 21 | } 22 | } 23 | \value{ 24 | Writes basic information about the object in the console. 25 | } 26 | 27 | \author{ 28 | Jonas Haslbeck 29 | } 30 | -------------------------------------------------------------------------------- /man/print.mgm.Rd: -------------------------------------------------------------------------------- 1 | \name{print.mgm} 2 | \alias{print.mgm} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{ 5 | Print method for mgm objects 6 | } 7 | \description{ 8 | Returns basic information about fit objects, prediction objects and bandwidth-selection objects. 9 | } 10 | \usage{ 11 | \method{print}{mgm}(x, ...) 12 | 13 | } 14 | %- maybe also 'usage' for other objects documented here. 15 | \arguments{ 16 | \item{x}{ 17 | The output object of \code{mgm()}, \code{mvar()}, \code{tvmgm()}, \code{tvmvar()}, \code{predict.mgm()} or \code{bwSelect()}. 18 | } 19 | \item{\dots}{ 20 | Additional arguments. 21 | } 22 | } 23 | \value{ 24 | Writes basic information about the object in the console. 25 | } 26 | 27 | 28 | \author{ 29 | Jonas Haslbeck 30 | } 31 | -------------------------------------------------------------------------------- /man/resample.Rd: -------------------------------------------------------------------------------- 1 | \name{resample} 2 | \alias{resample} 3 | 4 | \title{ 5 | Resampling scheme for mgm objects 6 | } 7 | \description{ 8 | Fits mgm model types (mgm, mvar, tvmgm, tvmvar) to a specified number of bootstrap samples. 9 | } 10 | \usage{ 11 | resample(object, data, nB, blocks, quantiles, 12 | pbar, verbatim, ...) 13 | } 14 | 15 | \arguments{ 16 | \item{object}{ 17 | An mgm model object, the output of mgm(), tvmgm(), mvar(), tvmvar(). The model specifications for all fitted models are taken from this model object. 18 | } 19 | \item{data}{ 20 | The n x p data matrix. 21 | } 22 | \item{nB}{ 23 | The number of bootstrap samples. 24 | } 25 | \item{blocks}{ 26 | The number of blocks for the block bootstrap used for time-varying models. 27 | } 28 | 29 | \item{quantiles}{ 30 | A vector with two values in [0, 1], specifying quantiles computed on the bootstrapped sampling distributions. Defaults to \code{quantiles = c(.05, .95)} 31 | } 32 | 33 | \item{pbar}{ 34 | If \code{TRUE}, a progress bar is shown. Defaults to \code{pbar = TRUE}. 35 | } 36 | 37 | \item{verbatim}{ 38 | If \code{TRUE}, the seed of the current bootstrap sample is printed in the console. Useful to exclude zero-variance bootstrap samples in datasets with low variance. 39 | } 40 | \item{\dots}{ 41 | Additional arguments. 42 | } 43 | } 44 | 45 | \details{ 46 | 47 | \code{resample()} fits a model specified via the \code{object} argument to \code{nB} bootstrap samples obtained from the orginial dataset. For stationary models (mgm() and mvar()) objects, we use the standard bootstrap. For time-varying models (tvmgm() and tvmvar()) we use the block bootstrap. 48 | 49 | For mvar models, \code{bootParameters} is a p x p x nlags x nB array, where p is the number of variables, nlags is the number of specified lags, and nB is the number of bootstrap samples. Thus \code{bootParameters[7, 3, 2, ]} returns the bootstrapped sampling distribution of the lagged effect from variable 3 on 7 for the 2nd specified lag. See also \code{?mvar}. 50 | 51 | For tvmar models, \code{bootParameters} is a p x p x nlags x nestpoints x nB array, analogously to mvar models. nestpoints is the number of specified estpoints. See also \code{?tvmvar}. 52 | 53 | Resampling is currently only supported for pairwise MGMs (\code{k = 2}). For mgms, \code{bootParameters} is a p x p x nB array. For tvmgms, \code{bootParameters} is a p x p x nestpoint x nB array. 54 | 55 | The seeds for the bootstrap samples are randomly sampled. For MGMs, the seeds are resampled until there are nB bootstrap samples on which an MGM can be estimated. This resampling has been implemented, because especially for small data sets, one can obtain bootstrap samples in which one or several variables have zero variance. For the other model classes, an informative error is returned in case the respective model cannot be estimated on one or more of the bootstrap samples. 56 | 57 | } 58 | \value{ 59 | The output consists of a list with the entries 60 | 61 | \item{call}{Contains the function call.} 62 | 63 | \item{models}{A list with \code{nB} entries, containing the models fit to the bootstrapped sampels.} 64 | 65 | \item{bootParameters}{Contains all the bootstrapped sampling distribution of all parameters. The dimension of this object depends on the type of model. Specifically, this object has the same dimension as the main parameter output of the corresponding estimation function, with one dimension added for the bootstrap iterations. See "Details" above. 66 | } 67 | 68 | \item{bootQuantiles}{Contains the specified quantiles of the bootstrapped sampling distribution for each parameter. Has the same structure as \code{bootParameters}. See "Details" above. } 69 | 70 | \item{Times}{Returns the running time for each bootstrap model in seconds.} 71 | 72 | \item{totalTime}{Returns the running time for all bootstrap models together in seconds.} 73 | 74 | \item{seeds}{\code{nB} integers indicating the seeds used to sample the \code{nB} bootstrap samples.} 75 | 76 | } 77 | 78 | 79 | \references{ 80 | 81 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08 82 | 83 | 84 | } 85 | \author{ 86 | Jonas Haslbeck 87 | } 88 | 89 | 90 | \examples{ 91 | 92 | \dontrun{ 93 | 94 | 95 | # 1) Fit mgm to example dataset (true edges: 1-4, 2-3, 1-2) 96 | mgm_obj <- mgm(data = mgm_data$data, 97 | type = c('g', 'c', 'c', 'g'), 98 | level = c(1, 2, 4, 1), 99 | k = 2, 100 | lambdaSel = 'CV', 101 | threshold = 'none') 102 | 103 | # 2) Take 50 bootstrap samples using resample() 104 | res_obj <- resample(object = mgm_obj, 105 | data = mgm_data$data, 106 | nB = 50) 107 | 108 | # 3) Plot histogram of bootstrapped sampling distribution of edge 1-4 109 | hist(res_obj$bootParameters[1, 4, ], 110 | main = "", 111 | xlab = "Parameter Estimate") 112 | 113 | # 4) Plot summary of all sampling distributions 114 | plotRes(object = res_obj, 115 | quantiles = c(0.05, .95)) 116 | 117 | 118 | # For more examples see https://github.com/jmbh/mgmDocumentation 119 | 120 | 121 | 122 | } 123 | 124 | 125 | } 126 | -------------------------------------------------------------------------------- /man/showInteraction.Rd: -------------------------------------------------------------------------------- 1 | \name{showInteraction} 2 | \alias{showInteraction} 3 | 4 | \title{ 5 | Retrieving details of interactions 6 | } 7 | \description{ 8 | Retrieves details of a specified interaction from mgm model objects. 9 | } 10 | \usage{ 11 | showInteraction(object, int) 12 | } 13 | 14 | 15 | \arguments{ 16 | \item{object}{ 17 | The output of one of the estimation functions \code{mgm()}, \code{tvmgm()}, \code{mvar()}, \code{tvmvar()}. 18 | } 19 | \item{int}{ 20 | An integer vector specifying the interaction. For mVAR models, this vector has length 2. For MGMs the vector can be larger to request details of interaction of order > 2. 21 | } 22 | } 23 | 24 | \details{ 25 | Currently the function only returns details of pairwise interactions from output objects of \code{mgm()}. 26 | } 27 | \value{ 28 | \item{variables}{Integer vector returning the variables specified via the argument \code{int}} 29 | \item{type}{Character vector returning the type of the specified variables variables} 30 | \item{level}{Integer vector returning the number of levels of the specified variables variables} 31 | \item{parameters}{A list of length equal to the order k of the specified interaction. The entries contain the set of parameters obtained from the nodewise regressions on the k variables. Depending on the type of the variables in the interaction, these sets can obtain one or several parameters. For details see \code{?mgm} or Haslbeck & Waldorp (2017).} 32 | 33 | 34 | 35 | } 36 | \references{ 37 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08 38 | } 39 | \author{ 40 | Jonas Haslbeck 41 | } 42 | 43 | \seealso{ 44 | \code{mgm}, \code{tvmgm}, \code{mvar}, \code{tvmvar} 45 | } 46 | 47 | 48 | \examples{ 49 | 50 | \dontrun{ 51 | 52 | 53 | ## We fit a pairwise and 3-order MGM to the mixed Autism dataset (?autism_data) 54 | 55 | # 1) Fit Pairwise MGM 56 | 57 | # Call mgm() 58 | fit_d2 <- mgm(data = autism_data$data, 59 | type = autism_data$type, 60 | level = autism_data$lev, 61 | k = 2) # ad most pairwise interacitons 62 | 63 | # Weighted adjacency matrix 64 | fit_d2$pairwise$wadj # for instance, we see there is an interaction 1-2 65 | 66 | # 2) Look at details of interaction 1-2 67 | showInteraction(object = fit_d2, 68 | int = c(1, 2)) 69 | 70 | # For more examples see https://github.com/jmbh/mgmDocumentation 71 | 72 | } 73 | 74 | 75 | } 76 | 77 | -------------------------------------------------------------------------------- /man/tvmgm.Rd: -------------------------------------------------------------------------------- 1 | \name{tvmgm} 2 | \alias{tvmgm} 3 | \alias{tv.mgmfit} 4 | 5 | \title{ 6 | Estimating time-varying Mixed Graphical Models 7 | } 8 | 9 | \description{ 10 | Estimates time-varying k-order Mixed Graphical Models (MGMs) via elastic-net regularized kernel smoothed Generalized Linear Models 11 | } 12 | 13 | \usage{ 14 | tvmgm(data, type, level, timepoints, estpoints, bandwidth, ...) 15 | } 16 | 17 | 18 | \arguments{ 19 | \item{data}{ 20 | n x p data matrix. 21 | } 22 | \item{type}{ 23 | p vector indicating the type of variable for each column in \code{data}: "g" for Gaussian, "p" for Poisson, "c" for categorical. 24 | } 25 | \item{level}{ 26 | p vector indicating the number of categories of each variable. For continuous variables set to 1. 27 | } 28 | \item{timepoints}{ 29 | A strictly increasing numeric vector of length \code{nrow(data)} indicating the time points of the measurements in \code{data}. If \code{timepoints} is not specified, it is assumed that the time points are equally spaced. For details, see Haslbeck and Waldorp (2018). 30 | } 31 | \item{estpoints}{ 32 | Vector indicating estimation points on the unit interval [0, 1] (the provided time scale is normalized interally to [0,1]). 33 | } 34 | \item{bandwidth}{ 35 | We use a gaussian density on the unit time-interval [0,1] to determine the weights for each observation at each estimated time point. The bandwidth specifies the standard deviation the Gaussian density. To get some intuition, which bandwidth results in the combination of how many data close in time one can plot Gaussians on [0,1] for different bandwidths. The bandwidth can also be selected in a data driven way using the function (see \code{bwSelect}). 36 | 37 | } 38 | \item{\dots}{ 39 | Arguments passed to \code{mgm}, specifying the MGM. See \code{?mgm}. 40 | } 41 | } 42 | 43 | \details{ 44 | Estimates a sequence of MGMs at the time points specified at the locations specified via \code{estpoints}. \code{tvmgm()} is a wrapper around \code{mgm()} and estimates a series of MGM with different weightings which are defined by the estimation locations in \code{estpoints} and the bandwidth parameter specified in \code{bandwidth}. For details see Haslbeck and Waldorp (2018). 45 | 46 | Note that MGMs are not normalizable for all parameter values. See Chen, Witten & Shojaie (2015) for an overview of when pairwise MGMs are normalizable. To our best knowledge, for MGMs with interactions of order > 2 that include non-categorical variables, the conditions for normalizablity are unknown. 47 | } 48 | \value{ 49 | A list with the following entries: 50 | 51 | \item{call}{ 52 | Contains all provided input arguments. If \code{saveData = TRUE}, it also contains the data. 53 | } 54 | 55 | \item{pairwise}{ 56 | Contains a list with all information about estimated pairwise interactions. \code{wadj} contains a p x p x estpoints array containing the weighted adjacency matrix for each estimation point specified in \code{estpoints}, if p is the number of variables in the network. \code{signs} has the same dimensions as \code{wadj} and contains the signs for the entries of \code{wadj}: 1 indicates a positive sign, -1 a negative sign and 0 an undefined sign. A sign is undefined if an edge is a function of more than one parameter. This is the case for interactions involving a categorical variable with more than 2 categories. \code{edgecolor} also has the same dimensions as \code{wadj} contains a color for each edge, depending on \code{signs}. It is provided for more convenient plotting. If only pairwise interactions are modeled (\code{k = 2}), \code{wadj} contains all conditional independence relations. 57 | } 58 | 59 | \item{interactions}{ 60 | Contains a list with one entry for each estimation point specified in \code{estpoints}; each entry is a list with three entries that relate each interaction in the model to all its parameters. \code{indicator} contains a list with k-1 entries, one for each order of modeled interaction, which contain the estimated (nonzero) interactions. \code{weights} contains a list with k-1 entries, which in turn contain R lists, where R is the number of interactions (and rows in the corresponding list entry in\code{indicator}) that were estimated (nonzero) in the given entry. \code{signs} has the same structure as \code{weights} and provides the sign of the interaction, if defined. 61 | } 62 | 63 | \item{intercepts}{ 64 | Contains a list with one entry for each estimation point specified in \code{estpoints}; each entry is a list with p entries, which contain the intercept/thresholds for each node in the network. In case a given node is categorical with m categories, there are m thresholds for this variable (one for each category). 65 | } 66 | 67 | 68 | \item{tvmodels}{ 69 | Contains the MGM model estimated by \code{mgm()} at each time point specified via \code{estpoints}. See \code{?mgm} for a detailed description of this output. 70 | } 71 | 72 | } 73 | 74 | 75 | \references{ 76 | Chen S, Witten DM & Shojaie (2015). Selection and estimation for mixed graphical models. Biometrika, 102(1), 47. 77 | 78 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08 79 | 80 | Yang, E., Baker, Y., Ravikumar, P., Allen, G. I., & Liu, Z. (2014, April). Mixed Graphical Models via Exponential Families. In AISTATS (Vol. 2012, pp. 1042-1050). 81 | 82 | } 83 | 84 | \author{ 85 | 86 | Jonas Haslbeck 87 | 88 | } 89 | 90 | 91 | 92 | \examples{ 93 | 94 | \dontrun{ 95 | 96 | 97 | ## We specify a time-varying MGM and recover it using tvmgm() 98 | 99 | # 1) Specify Model 100 | 101 | # a) Define Graph 102 | p <- 6 103 | type = c("c", "c", "g", "g", "p", "p") 104 | level = c(2, 3, 1, 1, 1, 1) 105 | n_timepoints <- 1000 106 | 107 | # b) Define Interaction 108 | factors <- list() 109 | factors[[1]] <- matrix(c(1,2, 110 | 2,3, 111 | 3,4), ncol=2, byrow = T) # no pairwise interactions 112 | factors[[2]] <- matrix(c(1,2,3, 113 | 2,3,4), ncol=3, byrow = T) # one 3-way interaction 114 | 115 | interactions <- list() 116 | interactions[[1]] <- vector("list", length = 3) 117 | interactions[[2]] <- vector("list", length = 2) 118 | # 3 2-way interactions 119 | interactions[[1]][[1]] <- array(0, dim = c(level[1], level[2], n_timepoints)) 120 | interactions[[1]][[2]] <- array(0, dim = c(level[2], level[3], n_timepoints)) 121 | interactions[[1]][[3]] <- array(0, dim = c(level[3], level[4], n_timepoints)) 122 | # 2 3-way interactions 123 | interactions[[2]][[1]] <- array(0, dim = c(level[1], level[2], level[3], n_timepoints)) 124 | interactions[[2]][[2]] <- array(0, dim = c(level[2], level[3], level[4], n_timepoints)) 125 | theta <- .3 126 | interactions[[1]][[1]][1, 1, ] <- theta 127 | interactions[[1]][[2]][1, 1, ] <- theta 128 | interactions[[1]][[3]][1, 1, ] <- seq(0, theta, length = n_timepoints) 129 | interactions[[2]][[1]][1, 1, 1, ] <- theta 130 | interactions[[2]][[2]][1, 1, 1, ] <- theta 131 | # c) Define Thresholds 132 | thresholds <- list() 133 | thresholds[[1]] <- matrix(0, nrow = n_timepoints, ncol= level[1]) 134 | thresholds[[2]] <- matrix(0, nrow = n_timepoints, ncol= level[2]) 135 | thresholds[[3]] <- matrix(0, nrow = n_timepoints, ncol= level[3]) 136 | thresholds[[4]] <- matrix(0, nrow = n_timepoints, ncol= level[4]) 137 | thresholds[[5]] <- matrix(.1, nrow = n_timepoints, ncol= level[5]) 138 | thresholds[[6]] <- matrix(.1, nrow = n_timepoints, ncol= level[6]) 139 | # d) define sds 140 | sds <- matrix(.2, ncol=p, nrow=n_timepoints) 141 | 142 | # 2) Sample Data 143 | set.seed(1) 144 | d_iter <- tvmgmsampler(factors = factors, 145 | interactions = interactions, 146 | thresholds = thresholds, 147 | sds = sds, 148 | type = type, 149 | level = level, 150 | nIter = 100, 151 | pbar = TRUE) 152 | 153 | data <- d_iter$data 154 | head(data) 155 | # delete inf rows: 156 | ind_finite <- apply(data, 1, function(x) if(all(is.finite(x))) TRUE else FALSE) 157 | table(ind_finite) # all fine for this setup & seed 158 | # in case of inf values (no theory on how to keep k-order MGM well-defined) 159 | data <- data[ind_finite, ] 160 | 161 | 162 | # 3) Recover 163 | mgm_c_cv <- tvmgm(data = data, 164 | type = type, 165 | level = level, 166 | k = 3, 167 | estpoints = seq(0, 1, length=10), 168 | bandwidth = .1, 169 | lambdaSel = "CV", 170 | ruleReg = "AND", 171 | pbar = TRUE, 172 | overparameterize = T, 173 | signInfo = FALSE) 174 | 175 | # Look at time-varying pairwise parameter 3-4 176 | mgm_c_cv$pairwise$wadj[3,4,] # recovers increase 177 | 178 | # 4) Predict values / compute nodewise Errors 179 | pred_mgm_cv_w <- predict.mgm(mgm_c_cv, 180 | data = data, 181 | tvMethod = "weighted") 182 | pred_mgm_cv_cM <- predict.mgm(mgm_c_cv, 183 | data = data, 184 | tvMethod = "closestModel") 185 | 186 | pred_mgm_cv_w$errors 187 | pred_mgm_cv_cM$errors # Pretty similar! 188 | 189 | 190 | # For more examples see https://github.com/jmbh/mgmDocumentation 191 | 192 | } 193 | 194 | } 195 | -------------------------------------------------------------------------------- /man/tvmgmsampler.Rd: -------------------------------------------------------------------------------- 1 | \name{tvmgmsampler} 2 | \alias{tvmgmsampler} 3 | 4 | \title{ 5 | Sample from time-varying k-order Mixed Graphical Model 6 | } 7 | \description{ 8 | Generates samples from a time-varying k-order Mixed Graphical Model 9 | } 10 | \usage{ 11 | tvmgmsampler(factors, interactions, thresholds, sds, type, 12 | level, nIter = 250, pbar = TRUE, ...) 13 | } 14 | 15 | \arguments{ 16 | \item{factors}{ 17 | The same object as \code{factors} in \code{mgmsampler()}. An interaction is specified in \code{factors} if it should be nonzero at least at one time point in the time series. The values of each parameter at each time point is specified via \code{interactions}. 18 | } 19 | \item{interactions}{ 20 | The same object as \code{factors} in \code{mgmsampler()}, except that each array indicating the parameters of an interaction has an additional (the last) dimension, indicating time. Corresponding to the time vector in \code{factors}, the time vector has to be a sequence of integers \{1, 2, ..., N\}. For an illustration see the examples below. 21 | } 22 | \item{thresholds}{ 23 | A list with p entries for p variables, each of which contains a N x m matrix. The columns contain the m thresholds for m categories (for continuous variables m = 1 and the entry contains the threshold/intercept). The rows indicate how the thresholds change over time. 24 | } 25 | \item{sds}{ 26 | N x p matrix indicating the standard deviations of Gaussians specified in \code{type} for \{1, ..., N\} time points. Entries not referring to Gaussians are ignored. 27 | } 28 | \item{type}{ 29 | p character vector indicating the type of variable for each column in \code{data}. "g" for Gaussian, "p" for Poisson, "c" of each variable. 30 | } 31 | \item{level}{ 32 | p integer vector indicating the number of categories of each variable. For continuous variables set to 1. 33 | } 34 | \item{nIter}{ 35 | Number of iterations in the Gibbs sampler until a sample is drawn. 36 | } 37 | \item{pbar}{ 38 | If \code{pbar = TRUE} a progress bar is shown. Defaults to \code{pbar = TRUE}. 39 | } 40 | 41 | \item{...}{ 42 | Additional arguments. 43 | } 44 | 45 | } 46 | \details{ 47 | 48 | \code{tvmgmsampler} is a wrapper function around \code{mgmsampler}. Its input is the same as for \code{mgmsampler}, except that each object has an additional dimension for time. The number of time points is specified via entries in the additional time dimension. 49 | 50 | } 51 | \value{ 52 | 53 | A list containing: 54 | 55 | \item{call}{ 56 | Contains all provided input arguments. 57 | } 58 | 59 | \item{data}{ 60 | The N x p data matrix of sampled values 61 | } 62 | 63 | 64 | } 65 | 66 | \references{ 67 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08 68 | 69 | Yang, E., Baker, Y., Ravikumar, P., Allen, G. I., & Liu, Z. (2014, April). Mixed Graphical Models via Exponential Families. In AISTATS (Vol. 2012, pp. 1042-1050). 70 | 71 | } 72 | \author{ 73 | Jonas Haslbeck 74 | } 75 | 76 | 77 | \examples{ 78 | 79 | 80 | \dontrun{ 81 | 82 | # --------- Example 1: p = 4 dimensional Gaussian --------- 83 | 84 | # ----- 1) Specify Model ----- 85 | 86 | # a) General Graph Info 87 | type = c("g", "g", "g", "g") # Four Gaussians 88 | level = c(1, 1, 1, 1) 89 | n_timepoints = 500 # Number of time points 90 | 91 | # b) Define Interaction 92 | factors <- list() 93 | factors[[1]] <- array(NA, dim=c(2, 2)) # two pairwise interactions 94 | factors[[1]][1, 1:2] <- c(3,4) 95 | factors[[1]][2, 1:2] <- c(1,2) 96 | 97 | # Two parameters, one linearly increasing from 0 to 0.8, another one lin decreasing from 0.8 to 0 98 | interactions <- list() 99 | interactions[[1]] <- vector("list", length = 2) 100 | interactions[[1]][[1]] <- array(0, dim = c(level[1], level[2], n_timepoints)) 101 | interactions[[1]][[1]][1,1, ] <- seq(.8, 0, length = n_timepoints) 102 | interactions[[1]][[2]] <- array(0, dim = c(level[1], level[2], n_timepoints)) 103 | interactions[[1]][[2]][1,1, ] <- seq(0, .8, length = n_timepoints) 104 | 105 | # c) Define Thresholds 106 | thresholds <- vector("list", length = 4) 107 | thresholds <- lapply(thresholds, function(x) matrix(0, ncol = level[1], nrow = n_timepoints)) 108 | 109 | # d) Define Standard deviations 110 | sds <- matrix(1, ncol = length(type), nrow = n_timepoints) # constant across variables and time 111 | 112 | 113 | # ----- 2) Sample cases ----- 114 | 115 | set.seed(1) 116 | dlist <- tvmgmsampler(factors = factors, 117 | interactions = interactions, 118 | thresholds = thresholds, 119 | sds = sds, 120 | type = type, 121 | level = level, 122 | nIter = 75, 123 | pbar = TRUE) 124 | 125 | 126 | # ----- 3) Recover model from sampled cases ----- 127 | 128 | set.seed(1) 129 | tvmgm_obj <- tvmgm(data = dlist$data, 130 | type = type, 131 | level = level, 132 | estpoints = seq(0, 1, length = 15), 133 | bandwidth = .2, 134 | k = 2, 135 | lambdaSel = "CV", 136 | ruleReg = "AND") 137 | 138 | # How well did we recover those two time-varying parameters? 139 | plot(tvmgm_obj$pairwise$wadj[3,4,], type="l", ylim=c(0,.8)) 140 | lines(tvmgm_obj$pairwise$wadj[1,2,], type="l", col="red") 141 | # Looks quite good 142 | 143 | 144 | # --------- Example 2: p = 5 binary; one 3-way interaction --------- 145 | 146 | # ----- 1) Specify Model ----- 147 | 148 | # a) General Graph Info 149 | p <- 5 # number of variables 150 | type = rep("c", p) # all categorical 151 | level = rep(2, p) # all binary 152 | n_timepoints <- 1000 153 | 154 | # b) Define Interaction 155 | factors <- list() 156 | factors[[1]] <- NULL # no pairwise interactions 157 | factors[[2]] <- array(NA, dim = c(1,3)) # one 3-way interaction 158 | factors[[2]][1, 1:3] <- c(1, 2, 3) 159 | 160 | interactions <- list() 161 | interactions[[1]] <- NULL # no pairwise interactions 162 | interactions[[2]] <- vector("list", length = 1) # one 3-way interaction 163 | # 3-way interaction no1 164 | interactions[[2]][[1]] <- array(0, dim = c(level[1], level[2], level[3], n_timepoints)) 165 | theta <- 2 166 | interactions[[2]][[1]][1, 1, 1, ] <- seq(0, 2, length = n_timepoints) # fill in nonzero entries 167 | 168 | # c) Define Thresholds 169 | thresholds <- list() 170 | for(i in 1:p) thresholds[[i]] <- matrix(0, nrow = n_timepoints, ncol = level[i]) 171 | 172 | 173 | # ----- 2) Sample cases ----- 174 | 175 | set.seed(1) 176 | dlist <- tvmgmsampler(factors = factors, 177 | interactions = interactions, 178 | thresholds = thresholds, 179 | type = type, 180 | level = level, 181 | nIter = 150, 182 | pbar = TRUE) 183 | 184 | 185 | # ----- 3) Check Marginals ----- 186 | 187 | dat <- dlist$data[1:round(n_timepoints/2),] 188 | table(dat[,1], dat[,2], dat[,3]) 189 | 190 | dat <- dlist$data[round(n_timepoints/2):n_timepoints,] 191 | table(dat[,1], dat[,2], dat[,3]) 192 | 193 | # Observation: much stronger effect in second hald of the time-series, 194 | # which is what we expect 195 | 196 | 197 | # ----- 4) Recover model from sampled cases ----- 198 | 199 | set.seed(1) 200 | tvmgm_obj <- tvmgm(data = dlist$data, 201 | type = type, 202 | level = level, 203 | estpoints = seq(0, 1, length = 15), 204 | bandwidth = .2, 205 | k = 3, 206 | lambdaSel = "CV", 207 | ruleReg = "AND") 208 | 209 | tvmgm_obj$interactions$indicator 210 | # Seems very difficult to recover this time-varying 3-way binary interaction 211 | # See also the corresponding problems in the examples of ?mgmsampler 212 | 213 | 214 | # For more examples see https://github.com/jmbh/mgmDocumentation 215 | 216 | 217 | } 218 | 219 | 220 | } 221 | 222 | 223 | -------------------------------------------------------------------------------- /man/tvmvar.Rd: -------------------------------------------------------------------------------- 1 | \name{tvmvar} 2 | \alias{tvmvar} 3 | \alias{tv_var.mgm} 4 | 5 | \title{ 6 | Estimating time-varying Mixed Vector Autoregressive Model (mVAR) 7 | } 8 | 9 | \description{ 10 | Estimates time-varying Mixed Vector Autoregressive Model (mVAR) via elastic-net regularized kernel smoothed Generalized Linear Models 11 | } 12 | 13 | \usage{ 14 | tvmvar(data, type, level, timepoints, estpoints, bandwidth, ...) 15 | } 16 | 17 | 18 | \arguments{ 19 | \item{data}{ 20 | n x p data matrix. 21 | } 22 | \item{type}{ 23 | p vector indicating the type of variable for each column in \code{data}: "g" for Gaussian, "p" for Poisson, "c" for categorical. 24 | } 25 | \item{level}{ 26 | p vector indicating the number of categories of each variable. For continuous variables set to 1. 27 | } 28 | \item{timepoints}{ 29 | A strictly increasing numeric vector of length \code{nrow(data)} indicating time points for the measurements in \code{data}. If \code{timepoints} is not specified, it is assumed that the time points are equally spaced. For details, see Haslbeck and Waldorp (2018). 30 | } 31 | \item{estpoints}{ 32 | Vector indicating estimation points on interval [0, 1]. Note that we define this unit interval on the entire time series. This also includes measurements that are excluded because not enough previous measurements are available to fit the model. This ensures that the a model estimated at, for example, estimation point 0.15 is actually estimated on data close to data points around this time point. See Haslbeck and Waldorp (2018) Section 2.5 and 3.4 for a detailed description. 33 | } 34 | \item{bandwidth}{ 35 | We use a gaussian density on the unit time-interval [0,1] to determine the weights for each observation at each estimated time point. The bandwidth specifies the standard deviation the Gaussian density. To get some intuition, which bandwidth results in the combination of how many data close in time one can plot Gaussians on [0,1] for different bandwidths. The bandwidth can also be selected in a data driven way using the function (see \code{bwSelect}). 36 | 37 | } 38 | \item{\dots}{ 39 | Arguments passed to \code{mvar}, specifying how each single model should be estimated. See \code{?mvar}. 40 | } 41 | } 42 | 43 | 44 | \details{ 45 | Estimates a sequence of mVAR models at the time points specified at the locations specified via \code{estpoints}. \code{tvmvar()} is a wrapper around \code{mvar()} and estimates a series of MGM with different weightings which are defined by the estimation locations in \code{estpoints} and the banwdith parameter specified in \code{bandwidth}. For details see Haslbeck and Waldorp (2018) 46 | 47 | 48 | } 49 | \value{ 50 | A list with the following entries: 51 | 52 | 53 | \item{call}{ 54 | Contains all provided input arguments. If \code{saveData = TRUE}, it also contains the data. 55 | } 56 | 57 | 58 | \item{wadj}{ 59 | A p x p x n_lags x S array, where n_lags is the number of specified lags in \code{lags} (see \code{?mvar}) and S is the number of estimation points specified in \code{estpoints}. For instance, wadj[1, 2, 1, 10] is the cross-lagged predicting variable 1 at time point t by variable 2 at time point t - z, where z is specified by the first lag specified in \code{lags} (see \code{?mvar}), in the model estimated at estimation point 10.} 60 | 61 | \item{signs}{ Has the same structure as \code{wadj} and specifies the signs corresponding to the parameters in \code{wadj}, if defined. 1/-1 indicate positive and negative relationships, respectively. 0 indicates that no sign is defined, which is the case for interactions that involve a categorical variable where an interaction can have more than one parameter. If \code{binarySign = TRUE}, a sign is calculated for interactions between binary variables and binary and continuous variables, where the interaction is still defined by one parameter and hence a sign can be specified. \code{NA} indicates that the corresponding parameter in \code{wadj} is zero. See also \code{?mvar}. 62 | } 63 | 64 | \item{intercepts}{ 65 | A list with S entries, where S is the number of estimated time points. Each entry of that list contains a list p entries with the intercept/thresholds for each node in the network. In case a given node is categorical with m categories, there are m thresholds for this variable. 66 | } 67 | 68 | \item{tvmodels}{ 69 | Contains the mVAR model estimated by \code{mvar()} at each time point specified via \code{estpoints}. See \code{?mvar} for a detailed description of this output. 70 | } 71 | 72 | } 73 | 74 | 75 | \references{ 76 | 77 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08 78 | 79 | } 80 | 81 | \author{ 82 | Jonas Haslbeck 83 | } 84 | 85 | 86 | 87 | \examples{ 88 | 89 | \dontrun{ 90 | 91 | 92 | ## We set up the same model as in the example of mvar(), but 93 | ## specify one time-varying parameter, and try to recover it with 94 | ## tvmvar() 95 | 96 | # a) Specify time-varying VAR model 97 | 98 | p <- 6 # Six variables 99 | type <- c("c", "c", "c", "c", "g", "g") # 4 categorical, 2 gaussians 100 | level <- c(2, 2, 4, 4, 1, 1) # 2 categoricals with 2 categories, 2 with 5 101 | max_level <- max(level) 102 | n_timepoints <- 4000 103 | 104 | lags <- c(1, 3, 9) # include lagged effects of order 1, 3, 9 105 | n_lags <- length(lags) 106 | 107 | # Specify thresholds 108 | thresholds <- list() 109 | thresholds[[1]] <- matrix(0, ncol=level[1], nrow=n_timepoints) 110 | thresholds[[2]] <- matrix(0, ncol=level[2], nrow=n_timepoints) 111 | thresholds[[3]] <- matrix(0, ncol=level[3], nrow=n_timepoints) 112 | thresholds[[4]] <- matrix(0, ncol=level[4], nrow=n_timepoints) 113 | thresholds[[5]] <- matrix(0, ncol=level[5], nrow=n_timepoints) 114 | thresholds[[6]] <- matrix(0, ncol=level[6], nrow=n_timepoints) 115 | 116 | # Specify standard deviations for the Gaussians 117 | sds <- matrix(NA, ncol=p, nrow=n_timepoints) 118 | sds[, 5:6] <- 1 119 | 120 | # Create coefficient array 121 | coefarray <- array(0, dim=c(p, p, max_level, max_level, n_lags, n_timepoints)) 122 | 123 | # a.1) interaction between continuous 5<-6, lag=3 124 | coefarray[5, 6, 1, 1, 2, ] <- seq(0, .4, length = n_timepoints) # only time-varying parameter 125 | # a.2) interaction between 1<-3, lag=1 126 | m1 <- matrix(0, nrow=level[2], ncol=level[4]) 127 | m1[1,1:2] <- 1 128 | m1[2,3:4] <- 1 129 | coefarray[1, 3, 1:level[2], 1:level[4], 1, ] <- m1 # constant across time 130 | # a.3) interaction between 1<-5, lag=9 131 | coefarray[1, 5, 1:level[1], 1:level[5], 3, ] <- c(0, 1) # constant across time 132 | 133 | 134 | # b) Sample 135 | set.seed(1) 136 | dlist <- tvmvarsampler(coefarray = coefarray, 137 | lags = lags, 138 | thresholds = thresholds, 139 | sds = sds, 140 | type = type, 141 | level = level, 142 | pbar = TRUE) 143 | 144 | # c.1) Recover: stationary 145 | set.seed(1) 146 | mvar_obj <- mvar(data = dlist$data, 147 | type = type, 148 | level = level, 149 | lambdaSel = "CV", 150 | lags = c(1, 3, 9), 151 | signInfo = FALSE) 152 | 153 | # Did we recover the true parameters? 154 | mvar_obj$wadj[5, 6, 2] # cross-lagged effect of 6 on 5 over lag lags[2] (lag 3) 155 | mvar_obj$wadj[1, 3, 1] # cross-lagged effect of 3 on 1 over lag lags[1] (lag 1) 156 | mvar_obj$wadj[1, 5, 3] # cross-lagged effect of 1 on 5 over lag lags[3] (lag 9) 157 | 158 | 159 | # c.2) Recover: time-varying 160 | set.seed(1) 161 | mvar_obj <- tvmvar(data = dlist$data, 162 | type = type, 163 | level = level, 164 | estpoints = seq(0, 1, length=10), 165 | bandwidth = .15, 166 | lambdaSel = "CV", 167 | lags = c(1, 3, 9), 168 | signInfo = FALSE) 169 | 170 | # Did we recover the true parameters? 171 | mvar_obj$wadj[5, 6, 2, ] # true sort of recovered 172 | mvar_obj$wadj[1, 3, 1, ] # yes 173 | mvar_obj$wadj[1, 5, 3, ] # yes 174 | 175 | # Plotting parameter estimates over time 176 | plot(mvar_obj$wadj[5, 6, 2, ], 177 | type="l", ylim=c(-.2,.7), 178 | lwd=2, ylab="Parameter value", xlab="Estimation points") 179 | lines(mvar_obj$wadj[1, 3, 1, ], col="red", lwd=2) 180 | lines(mvar_obj$wadj[1, 5, 3, ], col="blue", lwd=2) 181 | legend("bottomright", c("5 <-- 6", "1 <-- 3", "1 <-- 5"), 182 | lwd = c(2,2,2), col=c("black", "red", "blue")) 183 | 184 | 185 | # d) Predict values / compute nodewise error 186 | 187 | mvar_pred_w <- predict.mgm(object=mvar_obj, 188 | data=dlist$data, 189 | tvMethod = "weighted") 190 | 191 | mvar_pred_cM <- predict.mgm(object=mvar_obj, 192 | data=dlist$data, 193 | tvMethod = "closestModel") 194 | 195 | mvar_pred_w$errors 196 | mvar_pred_cM$errors 197 | 198 | # For more examples see https://github.com/jmbh/mgmDocumentation 199 | 200 | 201 | } 202 | 203 | } 204 | 205 | 206 | 207 | -------------------------------------------------------------------------------- /man/tvmvarsampler.Rd: -------------------------------------------------------------------------------- 1 | \name{tvmvarsampler} 2 | \alias{tvmvarsampler} 3 | 4 | \title{ 5 | Sampling from a time-varying mixed VAR model 6 | } 7 | \description{ 8 | Function to sample from a time-varying mixed VAR (mVAR) model 9 | } 10 | \usage{ 11 | tvmvarsampler(coefarray, lags, thresholds, 12 | sds, type, level, pbar) 13 | } 14 | 15 | \arguments{ 16 | \item{coefarray}{ 17 | A p x p x max(level) x max(level) x n_lags x N array, where p are the number of variables, level is the input argument \code{level} and n_lags is the number of specified lags in \code{lags}, so n_lags = length(n_lags), and N is the number of time points in the time series. The first four dimensions specify the parameters involved in the cross-lagged effects of the lag specified in the 5th dimension. I.e. coefarray[5, 6, 1, 1, 3, 100] indicates the cross-lagged effect of variable 6 on variable 5 (if both are continuous), for the third lag specified in \code{lags} at time point 100. If variable 1 and 3 are categorical with m = 2 and = 4 categories, respectively, then coefarray[1, 3, 1:2, 1:4, 1, 250] indicates the m*s=8 parameters specifying this interaction for the first lag specified in \code{lags} at time point 250. See the examples below for an illustration. 18 | } 19 | \item{lags}{ 20 | A vector indicating the lags in the mVAR model. E.g. \code{lags = c(1, 4, 9)} specifies lags of order 1, 3, 9. The number of specified lags has to match the 5th dimension in \code{coefarray}. 21 | } 22 | \item{thresholds}{ 23 | A list with p entries, each consisting of a matrix indicating a threshold for each category of the given variable (column) and time point (row). For continuous variable, the matrix has 1 column. 24 | } 25 | \item{sds}{ 26 | A N x p matrix specifying the standard deviation of Gaussian variables (columns) at each time point (rows)If non-Gaussian variables are included in the mVAR model, the corresponding columns are ignored. 27 | } 28 | \item{type}{ 29 | p vector indicating the type of variable for each column in \code{data}. "g" for Gaussian, "p" for Poisson, "c" for categorical. 30 | } 31 | \item{level}{ 32 | p vector indicating the number of categories of each variable. For continuous variables set to 1. 33 | } 34 | \item{pbar}{ 35 | If \code{pbar = TRUE}, a progress bar is shown. 36 | } 37 | } 38 | \details{ 39 | We sample from the mVAR model by separately sampling from its corresponding p conditional distributions. 40 | } 41 | \value{ 42 | A list with two entries: 43 | 44 | \item{call}{The function call} 45 | 46 | \item{data}{The sampled n x p data matrix} 47 | 48 | } 49 | \references{ 50 | 51 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08 52 | 53 | } 54 | \author{ 55 | Jonas Haslbeck 56 | } 57 | 58 | 59 | 60 | \examples{ 61 | 62 | 63 | \dontrun{ 64 | 65 | ## We specify a tvmvar model, sample from it and recover it 66 | 67 | # a) Set up time-varying mvar model 68 | 69 | p <- 6 # Six variables 70 | type <- c("c", "c", "c", "c", "g", "g") # 4 categorical, 2 gaussians 71 | level <- c(2, 2, 4, 4, 1, 1) # 2 categoricals with 2 categories, 2 with 5 72 | max_level <- max(level) 73 | 74 | lags <- c(1, 3, 9) # include lagged effects of order 1, 3, 9 75 | n_lags <- length(lags) 76 | 77 | N <- 5000 78 | 79 | # Specify thresholds 80 | thresholds <- list() 81 | thresholds[[1]] <- matrix(0, ncol=2, nrow=N) 82 | thresholds[[2]] <- matrix(0, ncol=2, nrow=N) 83 | thresholds[[3]] <- matrix(0, ncol=4, nrow=N) 84 | thresholds[[4]] <- matrix(0, ncol=4, nrow=N) 85 | thresholds[[5]] <- matrix(0, ncol=1, nrow=N) 86 | thresholds[[6]] <- matrix(0, ncol=1, nrow=N) 87 | 88 | # Specify standard deviations for the Gaussians 89 | sds <- matrix(NA, ncol=6, nrow=N) 90 | sds[,5:6] <- 1 91 | 92 | # Create coefficient array 93 | coefarray <- array(0, dim=c(p, p, max_level, max_level, n_lags, N)) 94 | 95 | # a.1) interaction between continuous 5<-6, lag=3 96 | coefarray[5, 6, 1, 1, 2, ] <- c(rep(.5, N/2), rep(0, N/2)) 97 | # a.2) interaction between 1<-3, lag=1 98 | m1 <- matrix(0, nrow=level[2], ncol=level[4]) 99 | m1[1, 1:2] <- 1 100 | m1[2, 3:4] <- 1 101 | coefarray[1, 3, 1:level[2], 1:level[4], 1, ] <- m1 102 | # a.3) interaction between 1<-5, lag=9 103 | coefarray[1, 5, 1:level[1], 1:level[5], 3, ] <- c(0, 1) 104 | 105 | dim(coefarray) 106 | 107 | 108 | # b) Sample 109 | set.seed(1) 110 | dlist <- tvmvarsampler(coefarray = coefarray, 111 | lags = lags, 112 | thresholds = thresholds, 113 | sds = sds, 114 | type = type, 115 | level = level, 116 | pbar = TRUE) 117 | 118 | 119 | # c) Recover: time-varying mVAR model 120 | set.seed(1) 121 | tvmvar_obj <- tvmvar(data = dlist$data, 122 | type = type, 123 | level = level, 124 | lambdaSel = "CV", 125 | lags = c(1, 3, 9), 126 | estpoints = seq(0, 1, length=10), 127 | bandwidth = .05) 128 | 129 | tvmvar_obj$wadj[5, 6, 2, ] # parameter goes down, as specified 130 | tvmvar_obj$wadj[1, 3, 1, ] 131 | tvmvar_obj$wadj[1, 5, 3, ] 132 | 133 | # For more examples see https://github.com/jmbh/mgmDocumentation 134 | 135 | } 136 | 137 | } 138 | 139 | 140 | -------------------------------------------------------------------------------- /mgm.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(mgm) 3 | 4 | test_check("mgm") 5 | -------------------------------------------------------------------------------- /tests/testthat/Rplots.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/tests/testthat/Rplots.pdf -------------------------------------------------------------------------------- /tests/testthat/test_MNM.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | test_that("MNM with k-specification", { 4 | 5 | data(autism_data) 6 | 7 | # Fit MGM with pairwise & three-way interactions 8 | set.seed(1) 9 | fit_k3 <- mgm(data = autism_data$data, 10 | type = autism_data$type, 11 | level = autism_data$lev, 12 | lambdaSel = "CV", 13 | lambdaFolds = 10, 14 | k = 3, 15 | pbar = FALSE, 16 | signInfo = FALSE) 17 | 18 | # Number of 3-way interactions = 11 19 | expect_equal(nrow(fit_k3$interactions$indicator[[2]]), 10) 20 | 21 | 22 | # Plot Factor Graph (only to catch errors ...) 23 | FactorGraph(object = fit_k3, 24 | PairwiseAsEdge = FALSE, 25 | labels = autism_data$colnames) 26 | 27 | 28 | }) -------------------------------------------------------------------------------- /tests/testthat/test_mgm.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | test_that("Pairwise mgm examples return expected estimates", { 4 | 5 | # --- Fit pairwise with EBIC --- 6 | fit_k2 <- mgm(data = autism_data$data, 7 | type = autism_data$type, 8 | level = autism_data$lev, 9 | lambdaSel = "EBIC", 10 | lambdaGam = 0.25, 11 | k = 2, 12 | pbar = FALSE, 13 | signInfo = FALSE) 14 | 15 | # check one pairwise interactiion 16 | expect_equal(round(fit_k2$pairwise$wadj[1, 4], 3), 0.152) 17 | # and one intercept 18 | expect_equal(round(fit_k2$intercepts[[1]][[1]], 3), 0.664) 19 | 20 | 21 | # --- Fit pairwise with CV --- 22 | set.seed(1) 23 | fit_k2 <- mgm(data = autism_data$data, 24 | type = autism_data$type, 25 | level = autism_data$lev, 26 | lambdaSel = "CV", 27 | lambdaFolds = 10, 28 | k = 2, 29 | pbar = FALSE, 30 | signInfo = FALSE) 31 | 32 | # check one pairwise interactiion 33 | expect_equal(round(fit_k2$pairwise$wadj[1, 4], 3), 0.173) 34 | # and one intercept 35 | expect_equal(round(fit_k2$intercepts[[1]][[1]], 3), 0.72) 36 | 37 | 38 | }) --------------------------------------------------------------------------------