├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── NEWS
├── R
    ├── DrawFG.R
    ├── FactorGraph.R
    ├── FlagSymmetric.R
    ├── ModelMatrix.R
    ├── ModelMatrix_standard.R
    ├── Reg2Graph.R
    ├── applyTauAND.R
    ├── beepday2consec.R
    ├── bwSelPredict.R
    ├── bwSelect.R
    ├── calcLL.R
    ├── calcNeighbors.R
    ├── condition.R
    ├── condition_core.R
    ├── defunct_msg.R
    ├── getSign.R
    ├── glmnetRequirements.R
    ├── lagData.R
    ├── mgm.R
    ├── mgmsampler.R
    ├── mvar.R
    ├── mvarsampler.R
    ├── nodeEst.R
    ├── plotRes.R
    ├── predict.mgm.R
    ├── predictCore_stat.R
    ├── print.int.R
    ├── print.mgm.R
    ├── print.resample.R
    ├── resample.R
    ├── showInteraction.R
    ├── startup_msg.R
    ├── tvmgm.R
    ├── tvmgmsampler.R
    ├── tvmvar.R
    └── tvmvarsampler.R
├── README.md
├── data
    ├── B5MS.RData
    ├── Fried2015.RData
    ├── PTSD_data.RData
    ├── autism_data.RData
    ├── autism_data_large.RData
    ├── dataGD.RData
    ├── fruitfly_data.RData
    ├── mgm_data.RData
    ├── modnw.RData
    ├── msq_p3.RData
    ├── msq_p5.RData
    ├── mvar_data.RData
    ├── restingstate_data.RData
    └── symptom_data.RData
├── inst
    └── CITATION
├── man
    ├── FactorGraph.Rd
    ├── bwSelect.Rd
    ├── condition.Rd
    ├── datasets.Rd
    ├── mgm-internal.Rd
    ├── mgm-package.Rd
    ├── mgm.Rd
    ├── mgmsampler.Rd
    ├── mvar.Rd
    ├── mvarsampler.Rd
    ├── plotRes.Rd
    ├── predict.mgm.Rd
    ├── print.int.Rd
    ├── print.mgm.Rd
    ├── resample.Rd
    ├── showInteraction.Rd
    ├── tvmgm.Rd
    ├── tvmgmsampler.Rd
    ├── tvmvar.Rd
    └── tvmvarsampler.Rd
├── mgm.Rproj
└── tests
    ├── testthat.R
    └── testthat
        ├── Rplots.pdf
        ├── test_MNM.R
        └── test_mgm.R


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | src/*.o
5 | src/*.so
6 | src/*.dll
7 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: mgm
 2 | Type: Package
 3 | Title: Estimating Time-Varying k-Order Mixed Graphical Models
 4 | Version: 1.2-16
 5 | Date: 2023-09-04
 6 | Author: Jonas Haslbeck
 7 | Maintainer: Jonas Haslbeck  <jonashaslbeck@gmail.com>
 8 | Depends: R (>= 3.5.0)
 9 | Description: Estimation of k-Order time-varying Mixed Graphical Models and mixed VAR(p) models via elastic-net regularized neighborhood regression. For details see Haslbeck & Waldorp (2020) <doi:10.18637/jss.v093.i08>.
10 | URL: https://www.jstatsoft.org/article/view/v093i08
11 | BugReports: https://github.com/jmbh/mgm/issues
12 | License: GPL (>= 2)
13 | Imports: glmnet, stringr, Hmisc, qgraph, gtools
14 | LazyData: true
15 | Packaged: 2025-11-03 00:00:01 CET
16 | Suggests: 
17 |     testthat (>= 2.0.0)
18 | Config/testthat/edition: 2
19 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # exportPattern("^[[:alpha:]]+")
 2 | 
 3 | 
 4 | # --- Packages mgm depends on ---
 5 | # import(matrixcalc) # archived on CRAN
 6 | import(stringr)
 7 | import(Hmisc)
 8 | import(qgraph) # for FactorGraph()
 9 | import(gtools)
10 | #import(glmnet) # now specificly below
11 | 
12 | #importFrom("gtools", "permutations")
13 | importFrom("glmnet", "coef.glmnet", "glmnet", "cv.glmnet")
14 | 
15 | 
16 | # Recommended by build check
17 | importFrom("stats", "as.formula", "coef", "dnorm", "dpois",
18 |              "model.matrix", "var", "quantile")
19 | importFrom("utils", "combn", "setTxtProgressBar", "txtProgressBar")
20 | importFrom("stats", "rnorm", "rpois")
21 | importFrom("graphics", "abline", "axis", "layout", "par", "plot.new",
22 |              "plot.window", "points", "segments", "text")
23 | importFrom("graphics", "rect")
24 | importFrom("stats", "sd")
25 | 
26 | 
27 | # --- Exported functions ---
28 | export(mgm)
29 | export(mvar)
30 | 
31 | export(tvmgm)
32 | export(tvmvar)
33 | export(bwSelect)
34 | 
35 | 
36 | export(mgmsampler)
37 | export(mvarsampler)
38 | export(tvmgmsampler)
39 | export(tvmvarsampler)
40 | 
41 | export(resample)
42 | export(showInteraction)
43 | export(plotRes)
44 | 
45 | export(FactorGraph)
46 | export(condition)
47 | 
48 | 
49 | # Exported S3 Methods
50 | S3method(predict, mgm)
51 | S3method(print, mgm)
52 | S3method(print, resample)
53 | S3method(print, int)
54 | 
55 | 
56 | # Export old functions with defunct msg
57 | export(mgmfit)
58 | export(var.mgm)
59 | export(tv.mgmfit)
60 | export(tv_var.mgm)
61 | 
62 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
  1 | Changes in Version 1.2-16
  2 |   o 
  3 | 
  4 | Changes in Version 1.2-14
  5 |   o When setting the argument binarySign=TRUE, now also the signs of higher-order (i.e., 3-way and higher) interactions involving binary variables and continuous variables are reported in the output
  6 |   o Fixed new issue creating errors on Unix/Windows machines on CRAN
  7 | 
  8 | Changes in Version 1.2-13
  9 |   o Fixed bug: in EBIC calculations, the standard deviation in the likelihood function was set equal to 1. Now it is set to the standard deviation of the residual as it should be.
 10 |   o Use inherits(X, "??") instead of class(X)="??" throughout, since this seems to be required by CRAN now
 11 | 
 12 | Changes in Version 1.2-12
 13 |   o Removed dependency on matrixcalc package which as been archived
 14 |   o Improved mvar() input checks
 15 | 
 16 | Changes in Version 1.2-11
 17 |   o Fixed bug: switch off thresholding when using using internal function Reg2Graph() in condition()
 18 |   o Extend condition() to Poisson variables
 19 |   o Fixed bug: The processing of certain custom moderation models created an error
 20 |   o Fixed bug: In plotRes() negative moderation effects of binary variables were positive even when binarySign=TRUE
 21 | 
 22 | Changes in Version 1.2-10
 23 |   o Added a simulated example data set to illustrate estimating group differences using moderation
 24 |   o Fixed problems in computing binary signs created by R 4.0.0
 25 |   o Added colorblind color scheme to pairwise output
 26 |   
 27 | Changes in Version 1.2-9
 28 |   o Fixed bug in bwSelect() + tvmgm() when specifying k manually
 29 | 
 30 | Changes in Version 1.2-8
 31 |   o Fixed bug in computing predictions for Poisson variables
 32 |   o Fixed bug that only printed 10 pairwise interactions in plotRes() for MNMs
 33 |   o resample(): removed seed as an argument; now seeds are randomly drawn; for MGM objects, seeds are resampled until the MGM can be estimated on each of the bootstrap samples; the final seeds are provided in the output
 34 |   o Fixed bug in Reg2Graph()/mgm(): moderators were not shown in the output when specified as a matrix
 35 |   o Fixed bug in tvmvarsampler()
 36 |   o Fixed bug: Adapted condition() to custom moderator input in mgm()
 37 |   
 38 | Changes in Version 1.2-7
 39 |   o Fixed bug in bootstrap scheme for time-varying VAR models
 40 |   o Provide optional output of chains of Gibbs sampler from mgmsampler()
 41 |   o New function condition() allows to condition on values of a set of variables. The main motivation for this function is to make 3-way interactions / moderation more accessible.
 42 |   o mgm() now allows one to specify custom sets of moderation effects via the "moderators" argument
 43 |   o Fixed bug in bwSelect in combination with mgm() opjects
 44 |   o Extended plotRes() to moderated MGMs with only a single moderator variable
 45 | 
 46 | Changes in Version 1.2-6
 47 |   o Fix aliasing of internal functions
 48 | 
 49 | Changes in Version 1.2-5
 50 |   o Bug fix in resample()
 51 |   o Improved input checks in mgmsampler()
 52 |   o Bug fix in prediction with mvar() objects
 53 |   o Allow for prediction with time-varying models in new data sets
 54 | 
 55 | Changes in Version 1.2-4
 56 |   o Updated help files
 57 |   o Added two example data sets for higher-order (moderated) MGMs
 58 | 
 59 | Changes in Version 1.2-3
 60 |   o Bug fix in bwSelect()
 61 |   o Added verbatim option in resample() to detect zero variance bootstrap samples
 62 |   o Fixed warning due to R 3.4.0: "Recycling array of length 1 in vector-array arithmetic is deprecatd.Use c() or as.vector() instead."
 63 |   o Bug fix: resampling did not work together with new mvar() arguments dayvar/beepvar since Version 1.2-2
 64 |   o Added argument "moderators" to the mgm() function that allows to ...
 65 |   o Change default: in earlier versions if overparameterize=TRUE, all thresholds (intercepts) of categorical variables were set to zero. From this version on this threshold is estimated. The thresholds of categoricals can still be set to zero with the new argument "thresholdCat"
 66 |   o If scale=TRUE, now interaction terms (k>2 or moderation) are scaled after computing the design matrix to ensure that all predictors have the same mean and standard deviation, which avoids different penalization depending on the scaling of a predictor
 67 |   o Add CES-D depression dataset taken from Fried et al. (2015) to example datasets
 68 |   o Added function FactorGraph() that computes and plots a factor graph from stationary or time-varying MGMs. The factorgraph output has been removed from the mgm() and tvmgm() functions.
 69 |   o Added support of showInteraction() to continuous 3-way interactions
 70 |   o For all time-varying models the estimation points are now specified on the unit interval [0,1]; for tvmvar() models, 0 is associated with the first data point that is not trivially excluded due to the maximum lag
 71 | 
 72 | 
 73 | Changes in Version 1.2-2
 74 |   o Add running time to output of resample() function
 75 |   o Added a note and references on the normalizability of MGMs
 76 |   o Added function plotRes() to plot summaries of sampling distributions resampled with the resample() function
 77 |   o The consecutiveness of measurements in mvar() and tvmvar() models now can be specified alternatively to the consec argument with the arguments beepvar and dayvar. This is tailored to the typical time stamps of ecological momentary assessment (EMA) studies, where the consecutiveness is defined by the number of notification on a given day (beepvar) and the number of that given day (dayvar).
 78 |   o New function showInteraction() retrieves details of interactions from mgm model objects
 79 |   o The print method now also shows number of rows in the VAR design matrix compared to the number of rows of the original data matrix.
 80 |   o All estimation functions require a matrix object as data input to avoid problems with properties of data.frame objects
 81 |   o Corrected row-order in dataset "symptom_data"  
 82 |   o In mVAR models we use an adjusted sample size that is equal to the number of rows in the design matrix. The adjusted sample size can be smaller than nrow(data) - max(lags) if measurements are not consecutive, or if non-uniform weighting is used. In earlier versions the sample size was only adjusted for non-uniform weighting. The adjusted sample size is used in the additional thresholding of parameter estimates (see ?mgm or ?mvar).
 83 |   o Added function resample() that fits any model class to a specified number of bootstrap samples and provides the bootstrapped sampling distribution in an array. For time series data, the block bootstrap is used, where the number of blocks can be specified.
 84 |   
 85 | Changes in Version 1.2-1
 86 |   o Added consec argument to predict.mgm(), which allows prediction for VAR models in time series with unequal time intervals by excluding time points that are not preceded by enough measurements (see ?mvar)
 87 | 
 88 | Changes in Version 1.2-0
 89 |   o The whole package has been rewritten from scratch to reduce susceptibility to bugs and to allow new features
 90 |   o Every function now has many input checks and associated informative error messages
 91 |   o Major syntax changes: mgmfit() -> mgm(), var.mgm() -> mvar(), tv.mgmfit() -> tvmgm() and tv_var.mgm() -> tvmvar()
 92 |   o The names of function arguments are now consistent across the package and therefore had to be changed considerably
 93 |   o There is no more missing argument in the estimation functions
 94 |   o All estimation functions allow to search a sequence of the (alpha) elastic net paramter, using the EBIC or cross-validation; so far alpha was fixed to 1
 95 |   o mgm() and tvmgm() now presents all paramters involved in higher (than pairwise) order interactions in the output; this includes a factor-graph representation that is easy to visualize
 96 |   o The d parameter for the largest order of interaction in the neighborhood of a given node has been replaced with the k parameter, the largest order of interaction in the whole graph. Note that d = k - 1
 97 |   o All estimation functions allow the standard parameterization for categorical variables, but also an overparameterization. This is necessary to correctly identify higher order interactions between categorical variables.
 98 |   o The sampling functions mgmsampler() and tvmgmsampler() were extended to k-order MGMs (before only pairwise / k = 2)
 99 |   o mvar() and tvmvar() now allow the specification of any number of lags
100 |   o The new functions mvarsampler() and tvmvarsampler() now allow to sample from mVAR models any number of lags
101 |   o There is no more function provided for resampling. Instead, we provide an interface with the bootnet package.
102 |   o The function bwSelect() allows the selection of an optimal bandwidth parameter for timer-varying MGM or mVAR models using cross-validation
103 |   o predict.mgm() now allows two different ways to predict from time-varying models, see ?predict.mgm
104 |   o Fixed bug in mgmsampler() that was present in binary-Gaussian graphs
105 |   o Fixed bug in predict.mgm() which caused the prediction of incorrect category labels in some situations
106 |   o Fixed bug in mgm() which did not use the weight-argument in case of lambdaSel = 'CV'
107 |   o Default for argument lambdaSel changed from 'EBIC' to 'CV'
108 | 
109 | Changes in Version 1.1-7
110 |   o The predict() function now returns the predicted probabilities in addition to the predicted category for categorical variables
111 |   o Added a message for all estimation functions indicating where edge weights (if defined) can be found: fitobject$signs
112 |   o Added a startup message with a link to report bugs
113 |   o The predict() function now computes predicted values and a prediction error for each variable in the graph
114 |   o The print() function now returns a small summary of the model type when printing a mgm object
115 |   o Added subsampling scheme to evaluate edge-stability for non-time-varying models (MGM and mixed VAR)
116 |       o Added summary() & plot() for the bootstrap object to summarize edge-stability
117 |   o Added argument 'binary.sign': If binary.sign=TRUE, the sign of the interactions of all binary variables coded (0,1) with other binary variables and continuous variables will be returned in the sign matrix fit$signs
118 | 
119 | 


--------------------------------------------------------------------------------
/R/FactorGraph.R:
--------------------------------------------------------------------------------
  1 | # jonashaslbeck@gmail.com; March 2016
  2 | 
  3 | FactorGraph <- function(object, 
  4 |                         labels,
  5 |                         PairwiseAsEdge = FALSE, 
  6 |                         Nodewise = FALSE,
  7 |                         DoNotPlot = FALSE, 
  8 |                         FactorLabels = TRUE, 
  9 |                         colors,
 10 |                         shapes, 
 11 |                         shapeSizes = c(8, 4), 
 12 |                         estpoint = NULL,
 13 |                         negDashed = FALSE,
 14 |                         ...)
 15 | 
 16 | {
 17 |   
 18 |   # --------- Compute Aux Variables ---------
 19 |   
 20 |   if(Nodewise) PairwiseAsEdge <- FALSE
 21 |   
 22 |   p <- length(object$call$level)
 23 |   n_estpoints <- length(object$call$estpoints)
 24 |   
 25 |   # --------- Input Checks ---------
 26 |   
 27 |   if(!missing(labels)) if(length(labels) != p) stop("Number of provided labels has to match the number of variables.")
 28 |   
 29 |   # Checks for time-varying FactorGraph  
 30 |   if("tvmgm" %in% class(object)) {
 31 |     if(missing(estpoint)) stop("Specify the estimation point for which the factor graph should be visualized.")
 32 |     if(estpoint > n_estpoints) stop(paste0("The provided fit object has only ", n_estpoints, " estimation points."))
 33 |   } 
 34 |   
 35 |   if(object$call$k > 4) stop("Please specify additional colors/shapes for interactions with order > 4.")
 36 |   
 37 |   
 38 |   # --------- Create FractorGraph object ---------
 39 |   
 40 |   call <- list("object" = object)
 41 |   
 42 |   FG_object <- list("call" = call,
 43 |                     "graph" = NULL,
 44 |                     "nodetype" = NULL,
 45 |                     "order" = NULL,
 46 |                     "signs" = NULL,
 47 |                     "edgecolor" = NULL, 
 48 |                     "nonzero" = NULL,
 49 |                     "qgraph" = NULL)
 50 |   
 51 |   
 52 |   # --------- Fill in defaults ---------
 53 |   
 54 |   if(missing(labels)) labels <- 1:p
 55 |   if(missing(colors)) colors <- c("white", "tomato", "lightblue", "orange")
 56 |   if(missing(shapes)) shapes <- c("circle", "square", "triangle", "diamond")
 57 |   layout <- "circle"
 58 |   cut <- 0
 59 |   
 60 |   
 61 |   # --------- Compute Factor Graph ----------
 62 |   
 63 |   # Call different DrawFG() version for stationary/time-varying
 64 |   if("tvmgm" %in% class(object)) {
 65 |     
 66 |     # Time-varying
 67 |     FG <- DrawFGtv(object = object,
 68 |                    PairwiseAsEdge = PairwiseAsEdge, 
 69 |                    Nodewise = Nodewise,
 70 |                    estpoint = estpoint)
 71 |     
 72 |   } else {
 73 |     
 74 |     # Stationary
 75 |     FG <- DrawFG(object = object,
 76 |                  PairwiseAsEdge = PairwiseAsEdge, 
 77 |                  Nodewise = Nodewise)
 78 |     
 79 |   }
 80 |   
 81 |   # Save into FG_object
 82 |   FG_object$graph <- FG$weightedgraph
 83 |   FG_object$nodetype <- FG$nodetype
 84 |   FG_object$order <- FG$order
 85 |   FG_object$signs <- FG$signs
 86 |   FG_object$edgecolor <- edge.color <- FG$signcolor
 87 |   FG_object$nonzero <- FG$nonzero
 88 |   
 89 |   
 90 |   
 91 |   # Allow overwriting ...
 92 |   args <- list(...)
 93 |   if(!is.null(args$cut)) cut <- args$cut
 94 |   if(!is.null(args$layout)) layout <- args$layout
 95 |   if(!is.null(args$edge.color)) edge.color <- args$edge.color
 96 |   
 97 |   
 98 |   # browser()
 99 |   
100 |   # Adapt edge labels for zero edges in Nodewise=TRUE
101 |   if(!is.null(args$edge.labels)) { # if specified, otherwise set to FALSE
102 |     if(is.logical(args$edge.labels)) { # if specified and logical, then adapt for nonzero or FALSE
103 |       if(args$edge.labels) {
104 |         edge.labels <- FG_object$graph
105 |         edge.labels[FG_object$nonzero == 2] <- 0
106 |         edge.labels <- round(edge.labels, 2)
107 |       } else {
108 |         edge.labels = FALSE
109 |       }
110 |     } else {
111 |       # if not logical, take the input
112 |       edge.labels <- args$edge.labels
113 |     }
114 |   } else {
115 |     edge.labels = FALSE
116 |   }
117 |   
118 |   
119 |   # Edge lty: allow negative edges to be dashed for greyscale images
120 |   edge_lty <- FG_object$nonzero
121 |   if(negDashed) edge_lty[edge.color == "red"] <- 2
122 |   
123 |   # --------- Plot & Return ---------
124 |   
125 |   if(!DoNotPlot){
126 |     
127 |     # ----- Compute stuff necessary for plotting -----
128 |     
129 |     # Create labels for factors (label = order of factor/interaction)
130 |     ifelse(PairwiseAsEdge, ek <- 1, ek <- 0)
131 |     if(FactorLabels) {
132 |       tb <- table(FG_object$order)[-1]
133 |       
134 |       if(length(tb)==0) { # For the case PairwiseAsEdge=FALSE and no 3-way interactions
135 |         FL <- NULL
136 |       } else {
137 |         l_lf <- list()
138 |         for(k in 1:length(tb)) l_lf[[k]] <- rep(k+1+ek, tb[k])
139 |         FL <- unlist(l_lf)
140 |       }
141 |       
142 |       labels_ex <- c(labels, FL)
143 |     } else {
144 |       labels_ex <- c(labels, rep('', sum(FG_object$nodetype)))
145 |     }
146 |     
147 |     # ----- Call qgraph -----
148 |     
149 |     qgraph_object <- qgraph(FG_object$graph,
150 |                             color = colors[FG_object$order + 1],
151 |                             edge.color = edge.color,
152 |                             lty = edge_lty,
153 |                             layout = layout,
154 |                             labels =  labels_ex,
155 |                             shape = shapes[FG_object$order + 1],
156 |                             vsize = shapeSizes[FG_object$nodetype + 1], 
157 |                             edge.labels = edge.labels,
158 |                             cut = cut,
159 |                             ...)
160 |     
161 |     FG_object$qgraph <- qgraph_object
162 |     
163 |     
164 |     invisible(FG_object) # return output object invisible
165 |     
166 |   } else {
167 |     return(FG_object)
168 |   }
169 |   
170 |   
171 |   
172 | } # eoF


--------------------------------------------------------------------------------
/R/FlagSymmetric.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # Gives identical interactions (e.g. 1-2-3 and 3-1-2) the same flag
 4 | 
 5 | # Input: Matrix with row = number of interactions, col = involved variables
 6 | # Output: numeric indicator vector
 7 | 
 8 | 
 9 | 
10 | FlagSymmetric <- function(x) {
11 |   
12 |   vec_sim <- rep(NA, nrow(x))
13 |   ind_ord <- ncol(x)
14 |   
15 |   counter <- 1
16 |   
17 |   for(i in 1:nrow(x)) {
18 |     
19 |     if(is.na(vec_sim[i])) {
20 |       
21 |       vec_sim[i] <- counter
22 |       
23 |       for(j in (i+1):nrow(x)) {
24 |         
25 |         if( (i+1) > nrow(x) ) next # in case of very few interactions
26 |         
27 |         ind <- x[j, ] %in% x[i, ]
28 |         if(sum(ind)==ind_ord) vec_sim[j] <- counter
29 |         
30 |       }
31 |       
32 |       counter <- counter + 1
33 |       
34 |     }
35 |   }
36 |   return(vec_sim)
37 | }
38 | 
39 | 
40 | 
41 | FlagSymmetricFast <- function(x) {
42 |   
43 |   x <- data.frame(x)
44 |   flag <- as.numeric(factor(apply(x, 1, function(x) paste0(sort(x), collapse = "-"))))
45 |   flag_consec <- as.numeric(factor(flag, levels = unique(flag))) # I think unnecessary
46 |   
47 |   return(flag_consec)
48 | }
49 | 
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/R/ModelMatrix.R:
--------------------------------------------------------------------------------
  1 | # jonashaslbeck@gmail.com; May 2018
  2 | 
  3 | 
  4 | ModelMatrix <- function(data,  # matrix
  5 |                         type,  # type vector (I think not needed, level should be sufficient)
  6 |                         level, # level vector
  7 |                         labels,
  8 |                         d, # largest neighborhood size
  9 |                         moderators = NULL,
 10 |                         v = NULL,
 11 |                         allCats = FALSE # if true, the model matrix does not use all unique categories, but the categories specified in level, this exists because I use this function within the sampling function
 12 | )
 13 |   
 14 |   
 15 | {
 16 |   
 17 |   
 18 |   # Delete variable v: only for MGMs!
 19 |   if(!is.null(v)) {
 20 |     data <- data[, -v]
 21 |     type <- type[-v]
 22 |     level <- level[-v]
 23 |     labels <- labels[-v]
 24 |   }
 25 |   
 26 |   
 27 |   # ---------- Calculate Auxilliary variables ----------
 28 |   
 29 |   p <- ncol(data) # note that we have only the predictors here!!
 30 |   n <- nrow(data)
 31 |   
 32 |   # mSpec <- ifelse(class(moderators) %in% c("integer", "numeric"), "vector", "matrix")
 33 |   mSpec <- ifelse(any(class(moderators) %in% c("integer","numeric")), "vector", "matrix")
 34 |   
 35 |   # ---------- Input Checks ----------
 36 |   
 37 |   if(p != length(type)) stop('Length of type has to match the number of columns in data.')
 38 |   if(p != length(level)) stop('Length of level has to match the number of columns in data.')
 39 |   if(!(inherits(level, "numeric") | inherits(level, "integer"))) stop('level has to be an integer vector.')
 40 |   
 41 |   # ---------- Calculate Indicator Functions for all Variables ----------
 42 |   
 43 |   l_ind_datasets <- list()
 44 |   for(j in 1:p) {
 45 |     
 46 |     if(type[j] != 'c') {
 47 |       
 48 |       l_ind_datasets[[j]] <- as.matrix(data[, j])
 49 |       colnames(l_ind_datasets[[j]]) <- paste0(labels[j])
 50 |       
 51 |     } else {
 52 |       
 53 |       if(allCats==FALSE) {
 54 |         
 55 |         unique_labels <- unique(data[, j])
 56 |         unique_labels_sorted <- sort(unique_labels)
 57 |         n_labels <- length(unique_labels_sorted)
 58 |         ind_matrix <- matrix(NA, nrow = n, ncol=n_labels)
 59 |         for(s in 1:n_labels) ind_matrix[, s] <- data[, j] == unique_labels_sorted[s]
 60 |         
 61 |       } else {
 62 |         
 63 |         # This is used in mvarsampler() and avoids that the design matrix is too small cases early in the time series, where not all categories are seen yet
 64 |         unique_labels <- 1:level[j]
 65 |         n_labels <- level[j]
 66 |         ind_matrix <- matrix(NA, nrow = n, ncol=n_labels)
 67 |         for(s in 1:n_labels) ind_matrix[, s] <- data[, j] == unique_labels[s]
 68 |         
 69 |       }
 70 |       
 71 |       # Add Var names
 72 |       cn <- paste0(labels[j], 1:n_labels)
 73 |       colnames(ind_matrix) <- cn
 74 |       l_ind_datasets[[j]] <- ind_matrix
 75 |       
 76 |     }
 77 |     
 78 |   }
 79 |   
 80 |   # ---------- Collect d = 1 interaction Terms ----------
 81 |   
 82 |   # In this case we just use the indicator functions computed above
 83 |   
 84 |   l_ind_datasets_nV <- l_ind_datasets
 85 |   Xd1 <- do.call(cbind, l_ind_datasets_nV)
 86 |   
 87 |   
 88 |   # ---------- Compute all d>1 Interaction Terms ----------
 89 |   
 90 |   # Here we loop over all orders, in each order over all interactions, and in each interaction over all combinations of the levels of all variables
 91 |   
 92 |   
 93 |   if(!is.null(moderators)) d <- 2 # If first-order moderation (three way interactions) is specified, we fix k = 3, d = k - 1 = 2
 94 |   
 95 |   if(d > 1) {
 96 |     
 97 |     ## List all possible Interactions
 98 |     l_interactions <- vector('list', length = d)
 99 |     
100 |     # Case I: No Moderation
101 |     if(is.null(moderators)) {
102 |       for(ord in 1:d) l_interactions[[ord]] <- combn((1:p), ord, simplify = FALSE) # note that the numbers here refer to the all variables minus variable v
103 |     } else {
104 |       
105 |       l_interactions[[1]] <- list()
106 |       for(i in 1:p) l_interactions[[1]][[i]] <- i
107 |       
108 |       # Case II: Moderation
109 |       
110 |       if(mSpec == "vector") {
111 |         
112 |         if(v %in% moderators) {
113 |           
114 |           l_interactions[[2]] <- combn(1:p, 2, simplify = FALSE) # all combinations of the remaining (here denoted by 1:p) variables
115 |           
116 |         } else {
117 |           
118 |           ind_mod_MM <- (1:(p+1) %in% moderators)[-v] # indicator(moderator?) for 1:p predictors
119 |           n_mods <- sum(ind_mod_MM)
120 |           which_mod <- which(ind_mod_MM)
121 |           
122 |           l_mods <- list()
123 |           for(i in 1:n_mods) l_mods[[i]] <- expand.grid((1:p)[-which_mod[i]], which_mod[i]) # loop over moderators
124 |           m_mods <- do.call(rbind, l_mods)
125 |           m_mods <- as.matrix(m_mods)
126 |           
127 |           l_mods_combn <- list()
128 |           for(i in 1:nrow(m_mods)) l_mods_combn[[i]] <- m_mods[i, ] # turn each row into list entry
129 |           
130 |           l_interactions[[2]] <- l_mods_combn
131 |           
132 |         } # end: v = moderator?
133 |         
134 |       } # end if: vector specification?
135 |       
136 |       
137 |       if(mSpec == "matrix") {
138 |         
139 |         nrow_mods <- nrow(moderators)
140 |         ind_v_inMod <- as.logical(apply(moderators, 1, function(x) v %in% x  ))
141 |         
142 |         if(sum(ind_v_inMod)>0) { # if variable v is involved in at least one interaction
143 |           # get interaction terms for node v
144 |           int_terms <- t(apply(matrix(moderators[ind_v_inMod], ncol=3), 1, function(x) x[x!=v]))
145 |           
146 |           # To get the predictors on that weird "other variable" vector I use in this script
147 |           for(i in 1:nrow(int_terms)) for(j in 1:2) if(int_terms[i, j] > v) int_terms[i, j] <- int_terms[i, j] - 1
148 |         
149 |           # fill in existing structure:
150 |           for(i in 1:nrow(int_terms)) l_interactions[[2]][[i]] <- int_terms[i, ]
151 |         } else {
152 |           d <- 1 # for the present node v, to skip the below which takes l_interactions[[2]] as input
153 |         }
154 |         
155 |       } # end if: matrix specification?
156 |       
157 |     } # end if: moderators?
158 |     
159 |   } # end if: d>1    
160 |   
161 |   
162 |   
163 |   
164 |   # if statement again here, because it can happen that there is no interaction term in regression on variable v    
165 |   if(d > 1) {
166 |     
167 |     # storage for all interactions of all orders
168 |     l_collect_terms <- list()
169 |     
170 |     # Loop over order of interactions;
171 |     for(ord in 2:d) {
172 |       
173 |       n_terms <- length(l_interactions[[ord]])
174 |       
175 |       # storage for all interactions of fixed order
176 |       l_ord_terms <- list()
177 |       
178 |       # Loop over the interactions of a given order
179 |       for(it in 1:n_terms) {
180 |         
181 |         # Storage: collect here all interaction terms of one interaction
182 |         l_it_terms <- list()
183 |         
184 |         # For fixed interaction: which variables are involved?
185 |         inter_it <- l_interactions[[ord]][[it]] # select interactions one by one
186 |         
187 |         # Compute amount of levels of each variable (continuous=1)
188 |         l_indicator_it <- list()
189 |         for(i in 1:ord) l_indicator_it[[i]] <- 1:(level[inter_it[i]])
190 |         
191 |         # List all combination of levels of variables in the interaction it
192 |         all_combs <- expand.grid(l_indicator_it)
193 |         n_combs <- nrow(all_combs)
194 |         
195 |         # Loop over all combinations of levels of variables in interaction and collect in matrix
196 |         for(comb in 1:n_combs) {
197 |           tarmat <- matrix(NA, nrow = n, ncol = ord)
198 |           for(i in 1:ord) tarmat[, i] <- as.matrix(l_ind_datasets[[inter_it[i]]])[,all_combs[comb, i]]
199 |           l_it_terms[[comb]] <- apply(tarmat, 1, prod)
200 |         }
201 |         
202 |         # combine all level combinations for interaction it
203 |         it_data <- do.call(cbind, l_it_terms)
204 |         
205 |         # Create Names for all (combinations of) interactions
206 |         all_combs_char <- apply(all_combs, 2, function(x) {
207 |           if(length(unique(x))==1) {
208 |             out <- rep("", length(x))
209 |           } else {
210 |             out <- x
211 |           }
212 |         })
213 |         cn <- rep(NA, n_combs)
214 |         
215 |         # DEV: check whether this still works:
216 |         
217 |         for(comb in 1:n_combs) cn[comb] <- paste0(labels[inter_it], matrix(all_combs_char, ncol=ord)[comb,], collapse=':')
218 |         # the point above added to deliminate category flag
219 |         
220 |         colnames(it_data) <- cn
221 |         l_ord_terms[[it]] <- it_data
222 |         
223 |       }
224 |       
225 |       # Collapse over interactions of fixed order
226 |       l_collect_terms[[ord]] <- do.call(cbind, l_ord_terms)
227 |       
228 |     }
229 |     
230 |     # Collapse across order of interactions
231 |     # l_collect_terms[[1]] <- NULL
232 |     all_HOI_terms <- do.call(cbind, l_collect_terms)
233 |     
234 |   } # end if: d>1
235 |   
236 |   # Combine with d=1 size neighborhoods (singletons)
237 |   
238 |   if(d > 1) {
239 |     X <- cbind(Xd1, all_HOI_terms)
240 |   } else {
241 |     X <- Xd1
242 |   }
243 |   
244 |   
245 |   # ---------- Output----------
246 |   
247 |   return(X)
248 |   
249 |   
250 | } # end of function
251 | 
252 | 
253 | 


--------------------------------------------------------------------------------
/R/ModelMatrix_standard.R:
--------------------------------------------------------------------------------
  1 | # jonashaslbeck@gmail.com; May 2018
  2 | 
  3 | ModelMatrix_standard <- function(data, 
  4 |                                  type,
  5 |                                  d, # maximum neighborhood size in model = k-1
  6 |                                  v, # node on which current nodewise regression is performed
  7 |                                  moderators # moderators, if specified
  8 |                                  
  9 | )
 10 | 
 11 |   
 12 | {
 13 |   
 14 |   # -------- Input checks --------
 15 |   
 16 |   # ...
 17 |   
 18 |   # -------- Case I: No Moderation --------
 19 |   
 20 |   p <- ncol(data)
 21 |   for(i in 1:p) if(type[i] == "c") data[, i] <- as.factor(data[, i])
 22 |   
 23 |   if(is.null(moderators)) {
 24 |     
 25 |     if(d > (p - 1)) {
 26 |       stop("Order of interactions cannot be larger than the number of predictors.")
 27 |     } else if (d == 1){ form <- as.formula(paste(colnames(data)[v],"~ (.)"))
 28 |     } else { form <- as.formula(paste(colnames(data)[v],"~ (.)^", d)) }
 29 |     
 30 |     # Construct standard design matrix (to get number of parameters for tau threshold)
 31 |     X_standard <- model.matrix(form, data = data)[, -1] # delete intercept (added by glmnet later)
 32 |     
 33 |   } else {
 34 |     
 35 |     
 36 |     # -------- Case II: Moderation --------
 37 |     
 38 |     # mSpec <- ifelse(class(moderators) %in% c("integer","numeric"), "vector", "matrix")
 39 |     mSpec <- ifelse(any(class(moderators) %in% c("integer","numeric")), "vector", "matrix")
 40 |     # browser()
 41 |     
 42 |     
 43 |     # Terms for interactions/moderation
 44 |     n_mods <- ifelse(mSpec == "vector", length(moderators), nrow(moderators))
 45 |     
 46 |     # IIa) Moderator specification: Vector
 47 |     if(mSpec == "vector") {
 48 |       
 49 |       n_l_mods <- 0
 50 |       l_mods <- list()
 51 |       
 52 |       if(v %in% moderators) {
 53 |         other_comb_pairw <- t(combn((1:p)[-v], 2))
 54 |         l_mods[[1]] <- paste0("V", other_comb_pairw[, 1], ".",  " * ", "V", other_comb_pairw[, 2], ".", collapse = " + ")
 55 |       } else {      
 56 |         for(i in 1:n_mods) {  # loop over moderators
 57 |           l_mods[[i]] <- paste0(colnames(data)[-c(v, moderators[i])], "*", "V", moderators[i], ".", collapse = " + ")
 58 |           n_l_mods <- n_l_mods + 1
 59 |         }
 60 |       } # end if: v = moderator?
 61 |       
 62 |       if(n_l_mods > 1) for(i in 1:(n_l_mods - 1)) l_mods[[i]] <- paste0(l_mods[[i]], " + ") # add plus between terms but not in the end
 63 |       v_mods <- do.call(paste0, l_mods)
 64 |       
 65 |     } # end if: vector specification
 66 |     
 67 |     
 68 |     # IIb) Moderator specification: Matrix
 69 |     if(mSpec == "matrix") {
 70 |       
 71 |       nrow_mods <- nrow(moderators)
 72 |       ind_v_inMod <- as.logical(apply(moderators, 1, function(x) v %in% x  ))
 73 |       
 74 |       if(sum(ind_v_inMod)>0) { # if variable v is involved in at least one interaction
 75 |       # get interaction terms for node v
 76 |       int_terms <- t(apply(matrix(moderators[ind_v_inMod], ncol=3), 1, function(x) x[x!=v]))
 77 |       n_terms <- length(int_terms)
 78 |       v_mods <- paste0("V", int_terms[, 1], ".",  " * ", "V", int_terms[, 2], ".", collapse = " + ")
 79 |       } else {
 80 |         v_mods <- NULL
 81 |       }
 82 |       
 83 |       
 84 |     } # end if: matrix specification
 85 |     
 86 |     
 87 |     
 88 |     # -------- Final Model formula: main effects + interactions --------
 89 |     
 90 |     # Terms for main effects
 91 |     pred_simple <- paste(colnames(data)[-v], collapse = " + ")
 92 |     
 93 |     # Put together
 94 |     if(length(v_mods) == 0) { # if there is no moderation for that variable (the case if v is the only moderator)
 95 |       form <- paste(colnames(data)[v], "~",
 96 |                     pred_simple)
 97 |       
 98 |     } else {
 99 |       form <- paste(colnames(data)[v], "~",
100 |                     pred_simple,
101 |                     " + ",
102 |                     v_mods)
103 |     }
104 |     
105 |     form <- as.formula(form)
106 |     
107 |     # Create design matrix
108 |     X_standard <- model.matrix(form, 
109 |                                data = data)[, -1] # delete intercept (added by glmnet later)
110 |     
111 |   } # end if: moderation?
112 | 
113 |   
114 |   # -------- Output --------
115 |   
116 | 
117 |   return(X_standard)
118 |   
119 |   
120 | } # end of function
121 | 
122 | 
123 | 
124 | 


--------------------------------------------------------------------------------
/R/applyTauAND.R:
--------------------------------------------------------------------------------
  1 | # jonashaslbeck@gmail.com; July 2019
  2 | 
  3 | # Input:
  4 | # - fitted mgm model object (mgm() output)
  5 | # - node i at hand
  6 | # Output:
  7 | # - Nodemodels with tau-threshold and AND rule applied
  8 | 
  9 | # This is used in condition() (for now) and in predict() (planned later)
 10 | 
 11 | 
 12 | applyTauAND <- function(i, 
 13 |                         object, 
 14 |                         model_i) {
 15 |   
 16 |   
 17 |   # ----- Apply: tau-threshold -----
 18 |   
 19 |   model_i[abs(model_i) < object$nodemodels[[i]]$tau] <- 0
 20 |   
 21 |   
 22 |   # ----- Apply AND-rule ------
 23 |   
 24 |   # if(i==12) browser()
 25 |   
 26 |   if(object$call$ruleReg == "AND") {
 27 |     
 28 |     # print(i)
 29 |     
 30 |     ## This creates an object with the same dimensionality as the glmnet output matrix
 31 |     ## Below we fill into this matrix whether the corresponding parameter has been set to zero by the AND rule in the fitted model
 32 |     ## We take that information from the indicator matrix
 33 |     
 34 |     # Aux matrix
 35 |     aux_m_AND <- matrix(NA, nrow(model_i)-1, 5) # columns: parameter value, variable 1, variable 2, order of interaction, parameter value after AND rule
 36 |     aux_m_AND[, 1] <- model_i[-1, 1] # copy parameters, without intercept
 37 |     
 38 |     # get variables out:
 39 |     names_i <- rownames(model_i)[-1]
 40 |     names_aux1 <- strsplit(names_i, ":")
 41 |     names_aux2 <- lapply(names_aux1, function(x) gsub("V", "", x))
 42 |     names_aux3 <- lapply(names_aux2, function(x) {
 43 |       out <- strsplit(x, "\\.")
 44 |       if(length(out) == 1) c(out[[1]][1], NA) else c(out[[1]][1], out[[2]][1])
 45 |     })
 46 |     names_aux4 <- do.call(rbind, names_aux3)
 47 |     names_aux4 <- apply(names_aux4, 2, as.numeric)
 48 |     
 49 |     aux_m_AND[, 2:3] <- names_aux4
 50 |     aux_m_AND[, 4] <- apply(aux_m_AND[, 2:3], 1, function(x) sum(!is.na(x))) # order of interaction
 51 |     n_2way <- sum(aux_m_AND[, 4] == 1)
 52 |     n_3way <- sum(aux_m_AND[, 4] == 2)
 53 |     
 54 |     
 55 |     # -- Loop over indicator matrix --
 56 |     
 57 |     # 2-way interactions
 58 |     ind_2way <- object$interactions$indicator[[1]]
 59 |     ind_2way <- matrix(ind_2way, ncol=2) # for the case of: only 1 e-way interaction
 60 |     out <- apply(ind_2way, 1, function(x) {
 61 |       if(i %in% x) {
 62 |         TRUE
 63 |       } else { 
 64 |         FALSE
 65 |       }
 66 |     })
 67 |     
 68 |     if(sum(out)>0) { # all of this needed only if at least one pairwise interaction is estimated to be nonzero
 69 |       
 70 |       ind_2way_i <- matrix(ind_2way[out, ], ncol=2) # subset nonzero estimated 2-way interactions that contain variable i in indicator matrix
 71 |       preds_2way <- aux_m_AND[aux_m_AND[, 4] == 1, 2] # subset 2-way interactions in model object
 72 |       
 73 |       for(v in 1:n_2way) {
 74 |         out_v <- apply(ind_2way_i, 1, function(x) preds_2way[v] %in% x)
 75 |         if(sum(out_v) > 0) aux_m_AND[v, 5] <- 1 else aux_m_AND[v, 5] <- 0
 76 |       }
 77 |       
 78 |     } else {
 79 |       
 80 |       aux_m_AND[1:n_2way, 5] <- 0
 81 |       
 82 |     }
 83 |     
 84 |     # 3-way interactions
 85 |     
 86 |     # only execute if there are any 3-way interactions _specified_ involving variable i
 87 |     # (it can happen that no 3-way interaction is specified if the moderators are specified by matrix input)
 88 |     if(n_3way > 0) {
 89 |       
 90 |       ind_3way <- object$interactions$indicator[[2]]
 91 |       ind_3way <- matrix(ind_3way, ncol=3) # for the case of: only one 3-way interaction
 92 |       out <- apply(ind_3way, 1, function(x) {
 93 |         if(i %in% x) {
 94 |           TRUE
 95 |         } else { 
 96 |           FALSE
 97 |         }
 98 |       })
 99 |       
100 |       if(sum(out)>0) { # all of this needed only if at least one 3-way interaction is estimated to be nonzero
101 |         
102 |         ind_3way_i <- matrix(ind_3way[out, ], ncol=3)
103 |         preds_3way <- matrix(aux_m_AND[aux_m_AND[, 4] == 2, 2:3], ncol=2)
104 |         
105 |         for(v in 1:n_3way) {
106 |           out_v <- apply(ind_3way_i, 1, function(x) preds_3way[v, 1] %in% x & preds_3way[v, 2] %in% x)
107 |           if(sum(out_v) > 0) aux_m_AND[v+n_2way, 5] <- 1 else aux_m_AND[v+n_2way, 5] <- 0
108 |         }
109 |         
110 |       } else {
111 |         
112 |         aux_m_AND[(n_2way+1):(n_2way+n_3way), 5] <- 0
113 |         
114 |       }
115 |       
116 |     }
117 |     
118 |     
119 |     # Finally, threshold
120 |     model_i[-1, 1][aux_m_AND[, 5] == 0] <- 0
121 |     
122 |     
123 |   } # end if: ruleReg = "AND" ?
124 |   
125 |   return(model_i)
126 |   
127 | } # eoF
128 | 


--------------------------------------------------------------------------------
/R/beepday2consec.R:
--------------------------------------------------------------------------------
 1 | 
 2 | beepday2consec <- function(beepvar, # Beep number in EMA study
 3 |                            dayvar) # Day Number
 4 |   
 5 | {
 6 |   
 7 |   # Input Checks
 8 |   if(!all(dayvar == round(dayvar))) stop("beepvar has to be a vector of non-negative integers")
 9 |   if(!all(beepvar == round(beepvar))) stop("beepvar has to be a vector of non-negative integers")
10 |   if(length(beepvar) != length(dayvar)) stop("beepvar has to have the same length as dayvar")
11 |   
12 |   # Compute Aux variables
13 |   fillin_beep <- max(beepvar) + 2 # fill in an integer for day/night shifts that ensures that lagData() treats this data point as non-consecutive
14 |   n <- length(dayvar)
15 |   
16 |   # Get day breaks
17 |   ind_sameday <- dayvar[-1] == dayvar[-n]
18 |   ind_sameday[1] <- TRUE # dont change beep of first measurement
19 |   
20 |   consec <- rep(NA, n)
21 |   consec[1] <- 1
22 |   
23 |   # check consecutiveness for each (consecutive) row-pair
24 |   counter <- 1
25 |   for(i in 2:n) {
26 |     beep_diff <- beepvar[i] - beepvar[i-1]
27 |     day_diff <- dayvar[i] - dayvar[i-1]
28 |     if(beep_diff == 1 & day_diff == 0) counter <- counter + 1 else counter <- counter + 2
29 |     consec[i] <- counter
30 |   }
31 | 
32 |   return(consec)
33 |   
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/R/bwSelPredict.R:
--------------------------------------------------------------------------------
  1 | 
  2 | bwSelPredict <- function(data,
  3 |                          type,
  4 |                          level,
  5 |                          obj,
  6 |                          test,
  7 |                          modeltype,
  8 |                          # k = NULL,
  9 |                          # consec = NULL,
 10 |                          ... ) # is lags passed on?
 11 | 
 12 | 
 13 | 
 14 | {
 15 | 
 16 |   # -------------------- Input checks & calc Aux Vars -------------------
 17 | 
 18 |   p <- ncol(data)
 19 | 
 20 |   # Fetch arguments
 21 |   args <- list(...)
 22 |   
 23 |   
 24 |   # if(is.null(args$k)) args$k <- 2
 25 |   args$d <- args$k - 1
 26 |   n_test <- length(test)
 27 |   consec <- args$consec
 28 | 
 29 |   # Scale Gaussians
 30 |   ind_Gauss <- which(type == 'g')
 31 |   for(i in ind_Gauss) data[, i] <- scale(data[, i])
 32 | 
 33 |   data_df <- as.data.frame(data)
 34 | 
 35 |   # Categoricals into factors (Needed to use formula to construct design matrix)
 36 |   for(i in which(type=='c')) data_df[, i] <- as.factor(data_df[, i])
 37 | 
 38 | 
 39 |   # -------------------- Get VAR data structure -------------------
 40 | 
 41 |   if(modeltype == 'mvar') {
 42 | 
 43 |     # Glue all lagged variables together
 44 | 
 45 |     n_lags <- length(args$lags)
 46 | 
 47 |     # Prepare Data (already cuts max(lags) first observations to compute design matrix)
 48 |     data_lagged <- lagData(data = data, 
 49 |                            lags = args$lags, 
 50 |                            consec = consec)
 51 |     
 52 |     # out of list
 53 |     data_response <- data_lagged$data_response
 54 |     l_data_lags <- data_lagged$l_data_lags
 55 |     data_response <- apply(data_response, 2, as.numeric) # to avoid confusion with labels of categories if there are factors
 56 |     
 57 |     # Detele cases
 58 |     data_response <- data_response[data_lagged$included, ]
 59 |     l_data_lags <- lapply(l_data_lags, function(z) z <- z[data_lagged$included, ])
 60 |     
 61 |     data_response <- apply(data_response, 2, as.numeric) # to avoid confusion with labels of categories if there are factors
 62 | 
 63 |   }
 64 | 
 65 |   # -------------------- Compute Predictions -------------------
 66 | 
 67 |   m_pred <- matrix(NA, nrow = n_test, ncol = p)
 68 | 
 69 |   for(v in 1:p) {
 70 | 
 71 |     # ----- Create Design Matrices -----
 72 | 
 73 |     if(modeltype == 'mvar') {
 74 | 
 75 |       # append response with predictors
 76 |       y <- data_response[,v] # response variable v
 77 |       data_v <- cbind(do.call(cbind, l_data_lags)) # combine
 78 |       data_v <- as.data.frame(data_v) # because model.matrix() below requries a data.frame
 79 |       
 80 |       # Dummy coding
 81 |       form <- as.formula('y ~ (.)')
 82 | 
 83 |       # Construct standard design matrix (to get number of parameters for tau threshold)
 84 |       X_standard <- model.matrix(form, data = data_v)[, -1] # delete intercept (added by glmnet later)
 85 | 
 86 |       if(args$overparameterize) {
 87 | 
 88 |         # # Compute augmented type and level vectors
 89 |         type_aug <- rep(type, n_lags)
 90 |         level_aug <- rep(level, n_lags)
 91 | 
 92 |         # Construct over-parameterized design matrix
 93 |         X_over <- ModelMatrix(data = data_v,
 94 |                               type = type_aug,
 95 |                               level = level_aug,
 96 |                               labels = colnames(data_v),
 97 |                               d = 1, 
 98 |                               v = NULL)
 99 |         X <- X_over
100 | 
101 |       } else {
102 | 
103 |         X <- X_standard
104 | 
105 |       }
106 | 
107 |     }
108 | 
109 |     if(modeltype == 'mgm') {
110 | 
111 |       # Create HOI design matrix (copied from mgm)
112 | 
113 |       if(args$d > (p - 1)) {
114 |         stop("Order of interactions cannot be larger than the number of predictors.")
115 |       } else if (args$d == 1){ form <- as.formula(paste(colnames(data_df)[v],"~ (.)"))
116 |       } else { form <- as.formula(paste(colnames(data_df)[v],"~ (.)^", args$d)) }
117 | 
118 |       # Construct standard design matrix (to get number of parameters for tau threshold)
119 |       X_standard <- model.matrix(form, data = data_df)[, -1] # delete intercept (added by glmnet later)
120 | 
121 |       if(args$overparameterize) {
122 | 
123 |         # Construct over-parameterized design matrix
124 |         X_over <- ModelMatrix(data = data,
125 |                               type = type,
126 |                               level = level,
127 |                               labels = colnames(data),
128 |                               d = args$d, 
129 |                               v = v)  # defined above from  k
130 |         X <- X_over
131 |       } else {
132 |         X <- X_standard
133 |       }
134 | 
135 |       y <- data[, v]
136 | 
137 |     } # end if: modeltype mgm
138 | 
139 | 
140 | 
141 | 
142 | 
143 |     # ----- Make Predictions for test points -----
144 | 
145 |     for(i in 1:n_test) {
146 | 
147 |       coefs <- obj$tvmodels[[i]]$nodemodels[[v]]$model
148 |       row <- test[i]
149 | 
150 |       if(type[v] == 'c') {
151 | 
152 |         n_cat <- level[v]
153 | 
154 |         # Compute exp(potentials)
155 |         Potentials <- rep(NA, n_cat)
156 |         for(k in 1:n_cat)  Potentials[k] <- exp(coefs[[k]][1] + X[row,] %*% coefs[[k]][-1])
157 | 
158 |         # compute category-probabilities
159 |         Probabilities <- Potentials / sum(Potentials)
160 |         # Classify
161 |         m_pred[i, v] <- sort(unique(y))[which.max(Probabilities)]
162 | 
163 | 
164 |       } else {
165 | 
166 |         ## Prediction Continuous
167 |         coefs <- as.numeric(coefs) # get coefficients
168 |         m_pred[i, v] <- coefs[1] + X[row,] %*% coefs[-1] # predict
169 | 
170 |       }
171 | 
172 |     } # end for: i (over test locations)
173 | 
174 |   } # end for: v
175 | 
176 | 
177 |   # -------------------- Compute Errors -------------------
178 | 
179 |   if(modeltype == 'mvar')  {
180 |     if(n_test==1) { 
181 |       m_true <- matrix(data_response[test, ], nrow = n_test)  # for the case n_test=1
182 |     } else {
183 |       m_true <- data_response[test, ]
184 |     }
185 |   }
186 |   
187 |   if(modeltype == 'mgm') {
188 |     if(n_test==1) { 
189 |       m_true <- matrix(data[test, ], nrow = n_test)  # for the case n_test=1
190 |     } else {
191 |       m_true <- data[test, ]
192 |     }
193 |   }
194 | 
195 |   m_error <- matrix(NA, nrow = n_test, ncol = p)
196 | 
197 |   # 0/1 loss for categorical, RMSE for continuous (which here is just absolute error, because 1 element)
198 |   for(i in 1:p) {
199 |     if(type[v] == 'c') {
200 |       m_error[, i] <- abs(m_true[, i] - m_pred[, i])  
201 |     } else {
202 |       m_error[, i] <- abs(m_true[, i] - m_pred[, i]) # RMSE for 1 case = abs error
203 |     }
204 |   }
205 |   
206 |   
207 | 
208 |   # -------------------- Output -------------------
209 | 
210 |   predOutlist <- list()
211 |   predOutlist$errors <- m_error
212 |   predOutlist$error_time <- rowMeans(m_error)
213 |   predOutlist$error_mean <- mean(m_error)
214 | 
215 | 
216 |   return(predOutlist)
217 | 
218 | 
219 | }
220 | 


--------------------------------------------------------------------------------
/R/calcLL.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | calcLL <- function(X,
  4 |                    y,
  5 |                    fit, #glmnet fit object
  6 |                    type,
  7 |                    level,
  8 |                    v,
  9 |                    weights,
 10 |                    lambda,
 11 |                    LLtype = 'model')
 12 | 
 13 | 
 14 |   {
 15 | 
 16 |   if(missing(level)) stop('No levels passed to calcLL !')
 17 | 
 18 |   # This function calculates three different LL:
 19 |   # 1) LLtype = 'model': The LL of a given model via fit
 20 |   # 2) LLtype = 'nullmodel': The LL of the Null (Intercept) Model
 21 |   # 3) LLtype = 'saturated': The LL of the saturated model
 22 | 
 23 |   n <- nrow(X)
 24 | 
 25 |   if(LLtype == 'model') {
 26 | 
 27 |     if(type[v] == 'g') {
 28 |       beta_vector <- matrix(coef(fit, s = lambda), ncol = 1)
 29 |       predicted_mean <- cbind(rep(1, n), X) %*% as.vector(beta_vector)
 30 |       sd_residual <- sd(y-predicted_mean)
 31 |       LL_model <- dnorm(y, mean = predicted_mean, sd = sd_residual, log = TRUE)
 32 |       mean_LL_model <- sum(LL_model * weights)
 33 |     }
 34 | 
 35 |     if(type[v] == 'p') {
 36 |       beta_vector <- matrix(coef(fit, s = lambda), ncol = 1)
 37 |       predicted_mean <- cbind(rep(1, n), X) %*% as.vector(beta_vector)
 38 |       LL_model <- dpois(y, exp(predicted_mean), log = TRUE)
 39 |       mean_LL_model <- sum(LL_model * weights)
 40 |     }
 41 | 
 42 |     if(type[v] == 'c') {
 43 | 
 44 |       n_cats <- level[v] # number of levels
 45 | 
 46 |       ## Compute LL (see http://www.stanford.edu/~hastie/Papers/glmnet.pdf, equation 22)
 47 |       m_respdum <- matrix(NA, n, n_cats) # dummy for data
 48 |       m_coefs <- matrix(NA, n, n_cats) # dummy for coefficients
 49 |       cats <- unique(y)
 50 | 
 51 |       LL_n <- rep(NA, n) # Storage
 52 |       m_LL_parts <- matrix(NA, nrow = n, ncol=n_cats+1)
 53 | 
 54 |       for(catIter in 1:n_cats) {
 55 |         m_respdum[,catIter] <- (y==cats[catIter])*1 # dummy matrix for categories
 56 |         m_coefs[,catIter] <- cbind(rep(1, n), X) %*% matrix(coef(fit, s = lambda)[[catIter]], ncol = 1)
 57 |         m_LL_parts[,catIter] <- m_respdum[, catIter] * m_coefs[, catIter]
 58 |       }
 59 | 
 60 |       m_LL_parts[, n_cats+1] <- - log(rowSums(exp(m_coefs))) # the log part, see eq (22)
 61 |       LL_n <- rowSums(m_LL_parts) # sum up n_cat + 1 parts
 62 |       mean_LL_model <- sum(LL_n * weights) # apply weighting
 63 | 
 64 |     }
 65 | 
 66 |   }
 67 | 
 68 |   if(LLtype == 'nullmodel') {
 69 | 
 70 |     if(type[v] == 'g') {
 71 |       beta_vector <- matrix(coef(fit, s = 1)[1], ncol = 1) # only intercept here
 72 |       predicted_mean <- rep(1, n) * as.vector(beta_vector)
 73 |       sd_residual <- sd(y-predicted_mean)
 74 |       LL_model <- dnorm(y, mean = predicted_mean, sd = sd_residual, log = TRUE)
 75 |       mean_LL_model <- sum(LL_model * weights)
 76 |     }
 77 | 
 78 |     if(type[v] == 'p') {
 79 |       beta_vector <- matrix(coef(fit, s = 1)[1], ncol = 1)
 80 |       predicted_mean <- rep(1, n) * as.vector(beta_vector) # log mean actually
 81 |       LL_model <- dpois(y, exp(predicted_mean), log = TRUE)
 82 |       mean_LL_model <- sum(LL_model * weights)
 83 |     }
 84 | 
 85 |     if(type[v] == 'c') {
 86 | 
 87 |       n_cats <- level[v] # number of levels
 88 | 
 89 |       ## Compute LL (see http://www.stanford.edu/~hastie/Papers/glmnet.pdf, equation 22)
 90 |       m_respdum <- matrix(NA, n, n_cats) # dummy for data
 91 |       m_coefs <- matrix(NA, n, n_cats) # dummy for coefficients
 92 |       cats <- unique(y)
 93 | 
 94 |       LL_n <- rep(NA, n) # Storage
 95 |       m_LL_parts <- matrix(NA, nrow = n, ncol=n_cats+1)
 96 | 
 97 |       for(catIter in 1:n_cats) {
 98 |         m_respdum[,catIter] <- (y==cats[catIter])*1 # dummy matrix for categories
 99 |         m_coefs[,catIter] <- cbind(rep(1, n), X) %*% matrix(coef(fit, s = 1)[[catIter]], ncol = 1)
100 |         m_LL_parts[,catIter] <- m_respdum[, catIter] * m_coefs[, catIter]
101 |       }
102 | 
103 |       m_LL_parts[, n_cats+1] <- - log(rowSums(exp(m_coefs))) # the log part, see eq (22)
104 |       LL_n <- rowSums(m_LL_parts) # sum up n_cat + 1 parts
105 |       mean_LL_model <- sum(LL_n * weights) # apply weighting
106 | 
107 |     }
108 | 
109 |   }
110 | 
111 | 
112 |   # if(LLtype == 'saturated') {
113 |   # 
114 |   #   if(type[v] == 'g') {
115 |   #     predicted_mean <- y
116 |   #     LL_model <- dnorm(y, mean = predicted_mean, sd = 1, log = TRUE)
117 |   #     mean_LL_model <- sum(LL_model * weights)
118 |   #   }
119 |   # 
120 |   #   if(type[v] == 'p') {
121 |   #     predicted_mean <- y
122 |   #     LL_model <- dpois(y, exp(predicted_mean), log = TRUE)
123 |   #     mean_LL_model <- sum(LL_model * weights)
124 |   #   }
125 |   # 
126 |   #   if(type[v] == 'c') {
127 |   # 
128 |   #     mean_LL_model <- 0
129 |   # 
130 |   #     # For discrete RVs,the saturated model has Likelihood = 1 and LL = log(1) = 0
131 |   #     # e.g. http://stats.stackexchange.com/questions/114073/logistic-regression-how-to-obtain-a-saturated-model
132 |   # 
133 |   #   }
134 |   # 
135 |   # }
136 | 
137 | 
138 | 
139 |   return(mean_LL_model)
140 | 
141 | }
142 | 


--------------------------------------------------------------------------------
/R/calcNeighbors.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | calcNeighbors <- function(fit, lambda, type, level, v) {
 4 | 
 5 |   n_cats <- level[v]
 6 | 
 7 |   if(type[v]!="c") { #continuous case
 8 |     coefs_bin <- as.matrix(coef(fit, s = lambda)[-1, ]) != 0 #nonzero?
 9 |     n_neighbors <- colSums(coefs_bin)
10 |   }
11 |   if(type[v]=="c"){ #categorical case
12 |     m_neighbors <- matrix(0, ncol = length(fit$lambda), nrow = n_cats)
13 |     coefs_bin <- vector("list", length=n_cats)
14 |     for(ca in 1:n_cats){
15 |       coefs_bin[[ca]] <- as.matrix(coef(fit, s = lambda)[[ca]][-1,]) != 0 #nonzero?
16 |     }
17 |     n_neighbors <- colSums(Reduce('+', coefs_bin)!=0) # rule: a predictor has a nonzero parameter with 1 category of the y, then we have a neighborhood relation
18 |   }
19 | 
20 |   return(n_neighbors)
21 | 
22 | }
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/R/condition.R:
--------------------------------------------------------------------------------
  1 | # jonashaslbeck@gmail.com; July 2019
  2 | 
  3 | # input:
  4 | # - any mgm object with k <= 3
  5 | # - a list of variables and values at which to fix those variables
  6 | 
  7 | # output:
  8 | # - the conditional model (possibly still including 3-way interactions)
  9 | 
 10 | condition <- function(object, 
 11 |                       values) {
 12 |   
 13 |   # ---------- Basic Info -----------
 14 |   
 15 |   type <- object$call$type
 16 |   level <- object$call$level
 17 |   p <- length(type) # Number of Variables
 18 |   nCond <- length(values)
 19 |   
 20 |   # values: turn list into matrix
 21 |   m_fixed_values <- matrix(NA, nrow=nCond, ncol=2)
 22 |   cond_names <- as.numeric(names(values))
 23 |   m_fixed_values[, 1] <- cond_names
 24 |   m_fixed_values[, 2] <- unlist(values)
 25 |   
 26 |   
 27 |   # ---------- Create Output Object-----------
 28 |   
 29 |   object_new <- object
 30 |   object_new$call$condition <- values
 31 |   
 32 |   
 33 |   # ---------- Input Checks -----------
 34 |   
 35 |   # Check whether variables are specified via column name
 36 |   if(any(is.na(cond_names))) stop("Variables to condition on have to be specified by column number (not, for example column name). See also ?condition.")
 37 |   
 38 |   if(object$call$k>3) stop("This function is only implemented for 'first-order' moderation (i.e., 3-way interactions).")
 39 |   if(! ("core" %in% class(object)) ) stop("condition() is currently only implemented for mgm() objects.")
 40 |   
 41 |   # Categorical variables: only condition on categories that exist
 42 |   for(cat in 1:nCond) {
 43 |     if(type[m_fixed_values[cat, 1]] == "c") {
 44 |       if(!(m_fixed_values[cat, 2] %in% object$call$unique_cats[[m_fixed_values[cat, 1]]])) stop("Fixed category does not exist in the data.")
 45 |     }  
 46 |   }
 47 |   
 48 |   # TODO: Continuous variables: give warning if one conditions outside 99% quantiles
 49 |   
 50 |   
 51 |   
 52 |   # ---------- Loop over response variables -----------
 53 |   
 54 |   
 55 |   for(i in 1:p) {
 56 |     
 57 |     # ----- Case I) Gaussian response -----
 58 |     
 59 |     if(type[i] == "g") {
 60 |       
 61 |       # Access node model
 62 |       model_i <- object$nodemodels[[i]]$model
 63 |       
 64 |       
 65 |       # Apply tau-thresholding & AND rule
 66 |       model_i <- applyTauAND(i = i,
 67 |                              object = object, 
 68 |                              model_i = model_i)
 69 |       
 70 |       
 71 |       # Condition / fix values
 72 |       model_i_new <- condition_core(i = i,
 73 |                                     model_i = model_i, 
 74 |                                     m_fixed_values = m_fixed_values)
 75 |       
 76 |       # Overwrite model object    
 77 |       object_new$nodemodels[[i]]$model <- model_i_new
 78 |       
 79 |       
 80 |     } # end if: response gaussian?
 81 |     
 82 |     
 83 |     
 84 |     # ----- Case III: Poisson responses -----
 85 |     # (actually exactly the same handling as "g" above)
 86 |     
 87 |     if(type[i] == "p") {
 88 |       
 89 |       # Retrieve nodemodel i
 90 |       model_i <- object$nodemodels[[i]]$model
 91 |       n_resp <- length(model_i)
 92 |       
 93 |       # Apply tau-thresholding & AND rule
 94 |       model_i <- applyTauAND(i = i,
 95 |                              object = object, 
 96 |                              model_i = model_i)
 97 |       
 98 |       # Condition / fix values
 99 |       model_i_new <- condition_core(i = i,
100 |                                     model_i = model_i, 
101 |                                     m_fixed_values = m_fixed_values)
102 |       
103 |       # Overwrite model object    
104 |       object_new$nodemodels[[i]]$model <- model_i_new
105 |       
106 |     } # end if: response Poisson?
107 |     
108 |     
109 |     
110 |     # ----- Case II: Categorical response -----
111 |     
112 |     if(type[i] == "c") {
113 |       
114 |       # Retrieve nodemodel i
115 |       model_i <- object$nodemodels[[i]]$model
116 |       n_resp <- length(model_i)
117 |       
118 |       # Loop over response categories
119 |       for(cat in 1:n_resp) {
120 |         
121 |         model_i_cat <- model_i[[cat]]
122 |         
123 |         # Apply tau-thresholding & AND rule
124 |         model_i_cat <- applyTauAND(i = i,
125 |                                    object = object,
126 |                                    model_i = model_i_cat)
127 |         
128 |         # Condition / fix values
129 |         model_i_new <- condition_core(i = i,
130 |                                       model_i = model_i_cat, 
131 |                                       m_fixed_values = m_fixed_values)
132 |         
133 |         # Overwrite model object    
134 |         object_new$nodemodels[[i]]$model[[cat]] <- model_i_new
135 |         
136 |       } # end for: response cats
137 |       
138 |     } # end if: response categorical?
139 |     
140 |   } # end for: response variables
141 |   
142 | 
143 |   # ---------- Aggregation across regressions -----------
144 |   
145 |   
146 |   
147 |   object_new2 <- Reg2Graph(object_new, thresholding=FALSE)
148 |   
149 |   
150 |   # ---------- Prepare output & return -----------
151 |   
152 |   return(object_new2)
153 |   
154 |   
155 | } # eoF
156 | 
157 | 
158 | 


--------------------------------------------------------------------------------
/R/condition_core.R:
--------------------------------------------------------------------------------
  1 | # jonashaslbeck@gmail.com; August 2020
  2 | 
  3 | # Input:
  4 | # - the nodemodel of node i
  5 | # - matrix of fixed values
  6 | # Output: 
  7 | # - conditioned nodemodel object
  8 | 
  9 | 
 10 | condition_core <- function(i = i,
 11 |                            model_i, 
 12 |                            m_fixed_values) {
 13 |   
 14 |   
 15 |   # ----- Distinguish between two cases: variable i is involved in 3-way interaction or not --
 16 |   
 17 |   names_i <- rownames(model_i)
 18 |   n_terms <- nrow(model_i)
 19 |   nCond <- nrow(m_fixed_values)
 20 |   
 21 |   # --- Get a nicer object for main effects / interactions ---
 22 |   
 23 |   # This lists all non-zero terms and matches them with the fixed values
 24 |   effects <- matrix(NA, nrow = n_terms-1, ncol=7)
 25 |   colnames(effects) <- c("Variable1", "Variable2", "Fixed1", "Fixed2", "Parameter", "Type1", "Type2")
 26 |   
 27 |   names_aux1 <- strsplit(names_i[-1], ":")
 28 |   names_aux2 <- lapply(names_aux1, function(x) gsub("V", "", x))
 29 |   
 30 |   names_aux3 <- lapply(names_aux2, function(x) {
 31 |     x_out <- rep(NA, length(x))
 32 |     for(v in 1:length(x)) if(substr(x[v], start = nchar(x[v]), nchar(x[v])) == ".") x_out[v] <- gsub("\\.", "", x[v]) else x_out[v] <- x[v]
 33 |     return(x_out)
 34 |   })
 35 |   
 36 |   n_var_i <- unlist(lapply(names_aux3, length))
 37 |   for(q in 1:length(n_var_i)) effects[q, 1:n_var_i[q]] <- as.numeric(unlist(names_aux3[[q]]))
 38 |   
 39 |   
 40 |   # --- Fill in fixed values ---
 41 |   
 42 |   # type of predictor (cat (coded=0) vs con (coded=1))?
 43 |   for(q in 1:(n_terms-1)) effects[q, 6] <- ifelse(effects[q, 1] == round(effects[q, 1]), 1, 0) # 1 = continuous, 0 = categorical
 44 |   for(q in which(n_var_i==2)) effects[q, 7] <- ifelse(effects[q, 2] == round(effects[q, 2]), 1, 0) # 1 = continuous, 0 = categorical
 45 |   
 46 |   # Fill in continuous fixed values
 47 |   for(q in 1:(n_terms-1)) for(f in 1:nCond) if(effects[q, 1] == m_fixed_values[f, 1]) effects[q, 3] <- m_fixed_values[f, 2]
 48 |   for(q in which(n_var_i==2)) for(f in 1:nCond) if(effects[q, 2] == m_fixed_values[f, 1]) effects[q, 4] <- m_fixed_values[f, 2]
 49 |   
 50 |   # Fill in categorical fixed values
 51 |   for(q in 1:(n_terms-1)) {
 52 |     if(effects[q, 6] == 0) {
 53 |       var_cat <- strsplit(as.character(effects[q, 1]), "\\.")[[1]]
 54 |       for(f in 1:nCond) if(as.numeric(var_cat[1]) == m_fixed_values[f, 1]) if(as.numeric(var_cat[2]) == m_fixed_values[f, 2]) effects[q, 3] <- 1 else effects[q, 3] <- 0  
 55 |     }
 56 |   }
 57 |   
 58 |   for(q in which(n_var_i==2)) {
 59 |     if(effects[q, 7] == 0) {
 60 |       var_cat <- strsplit(as.character(effects[q, 2]), "\\.")[[1]]
 61 |       for(f in 1:nCond) if(as.numeric(var_cat[1]) == m_fixed_values[f, 1]) if(as.numeric(var_cat[2]) == m_fixed_values[f, 2]) effects[q, 4] <- 1 else effects[q, 4] <- 0  
 62 |     }
 63 |   }
 64 |   
 65 |   
 66 |   # Fill in parameter values
 67 |   effects[, 5] <- model_i[-1, 1]
 68 |   
 69 |   
 70 |   
 71 |   # --- Fill (new) conditioned model i ---
 72 |   
 73 |   l_cPars <- vector("list", length = n_terms)
 74 |   l_cPars <- lapply(l_cPars, function(x) list() ) # list structure, since we don't "know" in advance how many terms we'll have
 75 |   
 76 |   # Only calculate new parameters for regressions on variables that are not fixed
 77 |   if(!i %in% m_fixed_values[, 1]) {
 78 |     
 79 |     # Copy intercept
 80 |     l_cPars[[1]][[1]] <- model_i[1, 1]
 81 |     
 82 |     for(q in 1:(n_terms-1)) {
 83 |       
 84 |       # main effects
 85 |       if(n_var_i[q] == 1) {
 86 |         
 87 |         if(is.na(effects[q, 3])) {
 88 |           # I) no fixed value: just copy main effect
 89 |           l_cPars[[q+1]][[length(l_cPars[[q+1]])+1]] <- effects[q, 5]
 90 |         } else {
 91 |           # II) fixed value: multiply times fixed valye & copy to intercept
 92 |           l_cPars[[1]][[length(l_cPars[[1]])+1]] <- effects[q, 5] * effects[q, 3]
 93 |         }
 94 |         
 95 |         
 96 |       } # end if: main effects
 97 |       
 98 |       # interaction effects
 99 |       if(n_var_i[q] == 2) {
100 |         
101 |         ind_spec <- sum(c(is.na(effects[q, 3]), is.na(effects[q, 4])))
102 |         
103 |         # I) nothing: just copy interaction effect
104 |         if(ind_spec == 2) 
105 |           l_cPars[[q+1]][[length(l_cPars[[q+1]])+1]] <- effects[q, 5]
106 |         
107 |         # II) one of the two: add to respective main effect
108 |         if(ind_spec==1) {
109 |           ind_specified <- !is.na(c(effects[q, 3], effects[q, 4]))
110 |           
111 |           ind_leftover_mainE <- which(effects[n_var_i==1, 1]==effects[q, 1:2][!ind_specified]) # indicates the row of the main effect to which we add the present moderation effect
112 |           
113 |           l_cPars[[ind_leftover_mainE+1]][[length(l_cPars[[ind_leftover_mainE+1]])+1]] <- effects[q, 5] * effects[q, 3:4][ind_specified]
114 |         } 
115 |         
116 |         # III) both: add to intercept
117 |         if(ind_spec == 0) l_cPars[[1]][[length(l_cPars[[1]])+1]] <- effects[q, 5] * effects[q, 3] * effects[q, 4]
118 |         
119 |       } # end if: interaction effects
120 | 
121 |     } # end for: loop parameters
122 |     
123 |   } # end if: variable still random (not fixed)?
124 |   
125 |   
126 |   # --- Collapse lists into new model object  ---
127 |   
128 |   model_i_new <- matrix(NA, nrow=n_terms, ncol=1)
129 |   
130 |   rownames(model_i_new) <- names_i
131 |   for(q in 1:n_terms) model_i_new[q, 1] <- sum(unlist(l_cPars[[q]]))
132 |   
133 |   return(model_i_new)
134 |   
135 | } # eoF
136 | 
137 | 
138 | 
139 | 
140 | 


--------------------------------------------------------------------------------
/R/defunct_msg.R:
--------------------------------------------------------------------------------
 1 | 
 2 | mgmfit <- function(...) {
 3 |   .Defunct('mgm')
 4 | }
 5 | 
 6 | var.mgm <- function(...) {
 7 |   .Defunct('mvar')
 8 | }
 9 | 
10 | tv.mgmfit <- function(...) {
11 |   .Defunct('tvmgm')
12 | }
13 | 
14 | tv_var.mgm <- function(...) {
15 |   .Defunct('tvmvar')
16 | }
17 | 
18 | 


--------------------------------------------------------------------------------
/R/getSign.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | getSign <- function(l_w_ind,
  4 |                     l_w_par,
  5 |                     type,
  6 |                     set_signdefined,
  7 |                     overparameterize,
  8 |                     ord)
  9 |   
 10 |   
 11 |   
 12 | {
 13 |   
 14 |   
 15 |   # ---------- Compute Aux Variables ----------
 16 |   
 17 |   pair <- l_w_ind[[1]]
 18 |   
 19 |   outlist <- list("voteSign" = NULL,
 20 |                   "Signs" = NULL)
 21 |   
 22 |   # ---------- A) For overparameterize = TRUE ----------
 23 |   
 24 |   if(overparameterize) {
 25 |     
 26 |     # We have to take care of 3 cases: con-con, cat-cat, con-cat
 27 |     
 28 |     # I) ----- continuous-continuous -----
 29 |     
 30 |     if(all(type[pair]!='c')) {
 31 |       v_sign <- rep(NA, ord+1)
 32 |       for(u in 1:(ord+1)) v_sign[u] <-  sign(l_w_par[[u]])
 33 |       int_sign <- sign(mean(v_sign)) # Majority vote; if equal, we get 0 = undefined
 34 |       
 35 |       outlist$voteSign <- int_sign
 36 |       outlist$Signs <- v_sign
 37 |       
 38 |     } # end if: I)
 39 |     
 40 |     
 41 |     # II) ----- binary-binary -----
 42 |     
 43 |     if(all(type[pair]=='c')) {
 44 |       
 45 |       # Loop through terms (there are "ord" terms)
 46 |       v_sign_ord_j <- rep(NA, ord+1)
 47 |       for(ord_j in 1:(ord+1)) {
 48 |         
 49 |         sign_j <- 0 #  default in case regression1 led to a zero estimate
 50 |         if(l_w_par[[ord_j]][[1]][1] != 0) sign_j <- sign(l_w_par[[ord_j]][[1]][1])
 51 |         if(l_w_par[[ord_j]][[1]][2] != 0) sign_j <- - sign(l_w_par[[ord_j]][[1]][2])
 52 |         
 53 |         v_sign_ord_j[ord_j] <- sign_j
 54 |         
 55 |       } # end loop: ord_j
 56 |       
 57 |       # Majority vote
 58 |       int_sign <- sign(mean(v_sign_ord_j))
 59 |       
 60 |       outlist$Signs <- NA
 61 |       outlist$voteSign <- int_sign
 62 |       
 63 |       # if(ord == 1) {
 64 |       #   ## only need to check parameters for one category, because of symmetry
 65 |       #   # regression1
 66 |       #   sign1 <- 0 #  default in case regression1 led to a zero estimate
 67 |       #   if(l_w_par[[1]][[1]][1] != 0) sign1 <- sign(l_w_par[[1]][[1]][1])
 68 |       #   if(l_w_par[[1]][[1]][2] != 0) sign1 <- - sign(l_w_par[[1]][[1]][2])
 69 |       #   # regression2
 70 |       #   sign2 <- 0 #  default in case regression1 led to a zero estimate
 71 |       #   if(l_w_par[[2]][[1]][1] != 0) sign2 <- sign(l_w_par[[2]][[1]][1])
 72 |       #   if(l_w_par[[2]][[1]][2] != 0) sign2 <- - sign(l_w_par[[2]][[1]][2])
 73 |       #   int_sign <- sign(mean(c(sign1, sign2))) # Majority vote
 74 |       #   
 75 |       #   outlist$voteSign <- int_sign
 76 |       #   outlist$Signs <- c(sign1, sign2)
 77 |       #   
 78 |       # } else {
 79 |       #   int_sign <- 0 # no sign defined for interactions of order k>2
 80 |       #   
 81 |       #   outlist$Signs <- NA
 82 |       #   outlist$voteSign <- int_sign
 83 |       #   
 84 |       # }
 85 |       
 86 |       
 87 |     } # end if: II)
 88 |     
 89 |     
 90 |     # III) ----- continuous-binary -----
 91 |     
 92 |     if(any(type[pair] %in% 'c') & any(type[pair] %in% c('p', 'g')) ) {
 93 |       
 94 |       
 95 |       # Loop through terms (there are "ord" terms)
 96 |       v_sign_ord_j <- rep(NA, ord+1)
 97 |       for(ord_j in 1:(ord+1)) {
 98 | 
 99 |         if(!any(class(l_w_par[[ord_j]])=="list"))  { #is.null -> continuous, else binary
100 |           sign_j <- sign(as.numeric(l_w_par[[ord_j]][[2]]))
101 |         } else {
102 |           sign_j <- 0 #  default in case regression1 led to a zero estimate
103 |           if(l_w_par[[ord_j]][[1]][1] != 0) sign2 <- - sign(l_w_par[[ord_j]][[1]][1]) # positive value for state 1 means negative 'pairwise relationship'
104 |           if(l_w_par[[ord_j]][[2]][1] != 0) sign2 <- sign(l_w_par[[ord_j]][[2]][1])
105 |         }
106 | 
107 |         v_sign_ord_j[ord_j] <- sign_j
108 |         
109 |       } # end loop: ord_j
110 |       
111 |       # Majority vote
112 |       int_sign <- sign(mean(v_sign_ord_j))
113 |       
114 |       outlist$Signs <- NA
115 |       outlist$voteSign <- int_sign
116 |       
117 |       # if(ord == 1) {
118 |       #   sign1 <- sign2 <- 0 # set default in case one direction has zero estimates
119 |       #   # need to know which list entry in l_w_par corresponds to which regression: cont <- binary or cont -> binary; I do that by the fixed dimensionality of the parameter vector/matrix
120 |       #   if(is.null(dim(l_w_par[[1]])))  { #is.null -> continuous, else binary
121 |       #     sign1 <- sign(as.numeric(l_w_par[[1]][[2]]))
122 |       #   } else {
123 |       #     if(l_w_par[[1]][1, 1] != 0) sign1 <- - sign(l_w_par[[1]][1,1]) # positive value for state 1 means negative 'pairwise relationship'
124 |       #     if(l_w_par[[1]][2, 1] != 0) sign1 <- sign(l_w_par[[1]][2,1])
125 |       #   }
126 |       #   if(is.null(dim(l_w_par[[2]])))  {
127 |       #     sign2 <- sign(l_w_par[[2]][2])
128 |       #   } else {
129 |       #     if(l_w_par[[2]][1,1] != 0) sign2 <- - sign(l_w_par[[2]][1,1]) # positive value for state 1 means negative 'pairwise relationship'
130 |       #     if(l_w_par[[2]][2,1] != 0) sign2 <- sign(l_w_par[[2]][2,1])
131 |       #   }
132 |       #   int_sign <- sign(mean(c(sign1,sign2))) # Majority vote
133 |       #   
134 |       #   outlist$voteSign <- int_sign
135 |       #   outlist$Signs <- c(sign1, sign2)
136 |       #   
137 |       # } else {
138 |       #   int_sign <- 0 # no sign defined for interactions of order k>2
139 |       #   
140 |       #   outlist$Signs <- NA
141 |       #   outlist$voteSign <- int_sign
142 |       # }
143 |       
144 |     } # end if: III)
145 |     
146 |     
147 |   } # end if: overparameterize?
148 |   
149 |   
150 |   
151 |   
152 |   # ---------- B) For overparameterize = FALSE ----------
153 |   
154 |   if(!overparameterize) {
155 |     
156 |     
157 |     # I) ----- continuous-continuous -----
158 |     
159 |     if(all(type[pair]!='c')) {
160 |       v_sign <- rep(NA, ord+1)
161 |       for(u in 1:(ord+1)) v_sign[u] <-  sign(l_w_par[[u]])
162 |       int_sign <- sign(mean(v_sign)) # Majority vote; if equal, we get 0 = undefined
163 |       
164 |       outlist$voteSign <- int_sign
165 |       outlist$Signs <- v_sign
166 |       
167 |     } # end if: I)
168 |     
169 |     
170 |     # II) ----- binary-binary -----
171 |     
172 |     if(all(type[pair]=='c')) {
173 |       
174 |       # Loop through terms (there are "ord" terms)
175 |       v_sign_ord_j <- rep(NA, ord+1)
176 |       for(ord_j in 1:(ord+1)) {
177 |         
178 |         sign_j <- sign(as.numeric(l_w_par[[ord_j]][[2]])) # if interaction A is binary <- cont
179 |         v_sign_ord_j[ord_j] <- sign_j
180 |         
181 |       } # end loop: ord_j
182 |       
183 |       # Majority vote
184 |       int_sign <- sign(mean(v_sign_ord_j))
185 |       
186 |       outlist$Signs <- NA
187 |       outlist$voteSign <- int_sign
188 |       
189 |       # if(ord == 1) {
190 |       #   sign1 <- sign(as.numeric(l_w_par[[1]][[2]]))
191 |       #   sign2 <- sign(as.numeric(l_w_par[[2]][[2]]))
192 |       #   int_sign <- sign(mean(c(sign1, sign2)))
193 |       #   
194 |       #   outlist$voteSign <- int_sign
195 |       #   outlist$Signs <- c(sign1, sign2)
196 |       #   
197 |       # } else {
198 |       #   int_sign <- 0 # no sign defined for interactions of order k>2
199 |       #   
200 |       #   outlist$Signs <- NA
201 |       #   outlist$voteSign <- int_sign
202 |       # }
203 |       
204 |     } # end if: II)
205 |     
206 |     
207 |     # III) ----- continuous-binary -----
208 |     
209 |     if(any(type[pair] %in% 'c') & any(type[pair] %in% c('p', 'g')) ) {
210 |       
211 |       # if(ord == 1) {
212 |       
213 |       # if(length(l_w_par[[1]]) == 1) {
214 |       #   sign1 <- sign(as.numeric(l_w_par[[1]])) # if interaction A is cont <- binary
215 |       # } else {
216 |       #   sign1 <- sign(as.numeric(l_w_par[[1]][[2]])) # if interaction A is binary <- cont
217 |       # }
218 |       # 
219 |       # # same for second interaction
220 |       # if(length(l_w_par[[2]]) == 1) {
221 |       #   sign2 <- sign(as.numeric(l_w_par[[2]]))
222 |       # } else {
223 |       #   sign2 <- sign(as.numeric(l_w_par[[2]][[2]]))
224 |       # }
225 |       # 
226 |       # int_sign <- sign(mean(c(sign1, sign2)))
227 |       # 
228 |       # outlist$voteSign <- int_sign
229 |       # outlist$Signs <- c(sign1, sign2)
230 |       
231 |       # } else {
232 |       
233 |       # Loop through terms (there are "ord" terms)
234 |       v_sign_ord_j <- rep(NA, ord+1)
235 |       for(ord_j in 1:(ord+1)) {
236 |         
237 |         if(length(l_w_par[[ord_j]]) == 1) {
238 |           sign_j <- sign(as.numeric(l_w_par[[ord_j]])) # if interaction A is cont <- binary
239 |         } else {
240 |           sign_j <- sign(as.numeric(l_w_par[[ord_j]][[2]])) # if interaction A is binary <- cont
241 |         }
242 |         
243 |         v_sign_ord_j[ord_j] <- sign_j
244 |         
245 |       } # end loop: ord_j
246 |       
247 |       # Majority vote
248 |       int_sign <- sign(mean(v_sign_ord_j))
249 |       
250 |       outlist$Signs <- NA
251 |       outlist$voteSign <- int_sign
252 |       
253 |       # }
254 |       
255 |     } # end if: III)
256 |     
257 |     
258 |   } # end if: overparameterize?
259 |   
260 |   # ---------- Return Sign ----------
261 |   
262 |   return(outlist)
263 |   
264 | } # eoF
265 | 


--------------------------------------------------------------------------------
/R/glmnetRequirements.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | glmnetRequirements <- function(data,
 4 |                                type,
 5 |                                weights, 
 6 |                                bootstrap = FALSE,
 7 |                                b = NULL, 
 8 |                                seed_b = NULL, 
 9 |                                silent = FALSE) {
10 |   
11 |   # if silent = FALSE; the function returns error messages
12 |   # if silent = TRUE: the function returns a logical with TRUE=at least one error; 
13 |   #   this is used in resampling schemes to discard bootstrap samples that do not allow to fit the model
14 |   
15 |   var_names <- colnames(data)
16 |   n <- nrow(data)
17 |   
18 |   # 1) Nonzero variance
19 |   var_check <- apply(data, 2, var)
20 |   ind_zero_var <- which(var_check == 0)
21 |   
22 |   check_var_1 <- length(ind_zero_var) > 0
23 | 
24 |   if(!silent)  if(check_var_1) {
25 |     if(bootstrap) cat(paste0("In boostrap sample ", b, " with seed ", seed_b, " the following error occured:\n"))
26 |     stop(paste0('Please only provide variables with nonzero variance. Variable(s) with zero variance: ', paste(var_names[ind_zero_var], collapse = ', ')))
27 |   } 
28 | 
29 |   
30 |   # 2) > 1 events per category
31 |   
32 |   check_var_2 <- FALSE # for the case of: (1) no categorical variables and (2) silent = TRUE (when called in resample() )
33 |   
34 |   if('c' %in% type) {
35 |     ind_cat <- which(type == 'c')
36 |     l_frqu <- list()
37 |     for(i in 1:length(ind_cat)) l_frqu[[i]] <- table(data[,ind_cat[i]]) # this does not catch the case where one category is not present at all; but this is catched by comparing specified levels and real levels
38 |     v_check <- unlist(lapply(l_frqu, function(x) {
39 |       frq_norm <- x / sum(x)
40 |       ind_min <- which.min(frq_norm)
41 |       check1 <- !(x[ind_min] > 1)
42 |     }))
43 |     # Error Msg Check 1:
44 |     
45 |     check_var_2 <- sum(v_check) > 0
46 |     
47 |     if(!silent) if(check_var_2) {
48 |       ind_check1 <- ind_cat[v_check == TRUE]
49 |       stop(paste0('At least 2 events required for each category. Requirement not met for variable(s): ',paste(var_names[ind_cat[ind_check1]], collapse = ', ')))
50 |     }
51 |   }
52 |   
53 |   
54 |   # 3) For each category: p(K=l) > 10^-5
55 |   
56 |   check_var_3 <- FALSE # for the case of: (1) no categorical variables and (2) silent = TRUE (when called in resample() )
57 |   
58 |   if('c' %in% type) {
59 |     
60 |     # Function to compute weighted table:
61 |     wtable <- function(x, weights) {
62 |       n_level <- length(unique(x))
63 |       v_level <- unique(x)
64 |       n_obs <- length(x)
65 |       v_wfrq <- rep(NA, n_level)
66 |       for(i in 1:n_level) v_wfrq[i] <- sum(rep(1, sum(x == v_level[i])) * weights[x == v_level[i]]) / n
67 |       return(v_wfrq)
68 |     }
69 |     
70 |     
71 |     ind_cat <- which(type == 'c')
72 |     check2 <- rep(NA, length(ind_cat))
73 |     for(i in 1:length(ind_cat)) check2[i] <- min(wtable(data[,ind_cat[i]], weights)) < 10^-5 # smaller = error
74 |     
75 |     check_var_3 <- sum(check2)>0
76 |     
77 |     if(!silent) if(check_var_3) stop(paste0('Each category has to have probability > 10^-5. Requirement not met for variable(s): ',paste(var_names[ind_cat[check2]], collapse = ', ')))
78 |     
79 |   }
80 |   
81 |   
82 |   # If any of the three checks fails, return TRUE
83 |   if(silent) return(any(check_var_1, check_var_2, check_var_3))
84 |   
85 |   
86 | } # eoF
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/R/lagData.R:
--------------------------------------------------------------------------------
 1 | 
 2 | lagData <- function(data, 
 3 |                     lags, 
 4 |                     consec = NULL) {
 5 | 
 6 | 
 7 |   # ---------- Compute Aux Variables ----------
 8 | 
 9 |   data <- as.matrix(data) # turn into matrix
10 |   
11 |   max_lag <- max(lags) # maximum lag
12 |   lags_ext <- 1:max(lags) # makes it easier to delete right columns
13 | 
14 |   n <- nrow(data)
15 |   p <- ncol(data)
16 |   n_var <- nrow(data) - max(lags)
17 |   n_lags <- length(lags_ext)
18 | 
19 |   data_response <- data #[-c(1:max_lag), ]
20 | 
21 |   if(!is.null(consec)) m_consec <- matrix(NA, 
22 |                                           nrow = n, 
23 |                                           ncol = n_lags)
24 | 
25 |   # ---------- Lag Variables ----------
26 | 
27 |   # Storage
28 |   l_data_lags <- list()
29 | 
30 |   # Loop through lags
31 |   lag_pos <- 1 # to make sure that the list is filled successively, if not a full sequence (e.g. lags = c(1,5)); otherwise this leads to problems later in the code  
32 |   for(lag in lags) {
33 |     
34 |     lagged_data <- matrix(NA, nrow = n, ncol=p)
35 |     lagged_data[(lag+1):n, ] <- data[-((n-lag+1) : n), ]
36 |     lagged_data <- matrix(lagged_data, 
37 |                           ncol = p, 
38 |                           nrow = n) 
39 |     colnames(lagged_data) <- paste("V", 1:p, '.lag', lag, '.', sep = "")
40 |     
41 |     l_data_lags[[lag_pos]] <- lagged_data
42 | 
43 |     lag_pos <- lag_pos + 1
44 |   }
45 | 
46 |   # ---------- Knock Out if not consecutive ----------
47 | 
48 |   
49 |   # browser()
50 |   
51 |   if(!is.null(consec)) {
52 | 
53 |     for(lag in lags_ext)  m_consec[(lag+1):n, lag] <- consec[-((n-lag+1) : n)]
54 | 
55 |     # Calculate which cases to knock out
56 |     m_consec_check <- cbind(consec, m_consec)
57 |     
58 |     v_check <- apply(m_consec_check, 1, function(x) {
59 |       
60 |       if(any(is.na(x))) {
61 |         FALSE
62 |       } else {
63 |         check_row <- x[1] - x[-1] == 1:length(x[-1]) # check for extended lags 1:max(lags)
64 |         check_row_relevant <- check_row[lags_ext %in% lags] # but then compute check only over the lags that are actually specified
65 |         if(any(check_row_relevant == FALSE)) FALSE else TRUE # and return test result: any required previous measurement missing? => FALSE
66 |       }
67 | 
68 |     })
69 | 
70 |   } else {
71 |     
72 |     v_check <- rep(TRUE, n)
73 |     v_check[1:n_lags] <- FALSE
74 |     
75 |   }
76 | 
77 | 
78 |   # ---------- Output ----------
79 |   
80 |   outlist <- list()
81 |   outlist$data_response <- data_response
82 |   outlist$l_data_lags <- l_data_lags
83 |   outlist$included <- v_check
84 | 
85 | 
86 |   return(outlist)
87 | 
88 | }
89 | 


--------------------------------------------------------------------------------
/R/mvarsampler.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | mvarsampler <- function(coefarray, # v x v2 x cat(v) x cat(v2) x lag array specifying lagged parameters
  4 |                         lags, # vector specifiying the lags
  5 |                         thresholds, # p list containint vector with a threshold for each category
  6 |                         sds, # p vector of sds
  7 |                         type,
  8 |                         level,
  9 |                         N, # number of cases that should be sampled from the model
 10 |                         pbar)
 11 | 
 12 | {
 13 | 
 14 |   # ---------- Input Checks ----------
 15 | 
 16 |   # Does coefarray have the right dimensions?
 17 |   if(length(dim(coefarray))!=5) stop('coefarray has to have 5 dimensions: p x p x max(level) x max(level) x n_lags.')
 18 |   if(dim(coefarray)[5] != length(lags)) stop('Dimensions for lags in coefarray have to match the number of lags specified in lags.')
 19 |   if(dim(coefarray)[1] != length(thresholds)) stop('Dimensions for variables in coefarray have to match the number of specified thresholds.')
 20 |   if(any(type=='g')) if(dim(coefarray)[1] != length(sds)) stop('Dimensions for variables in coefarray have to match the number of specified standard deviations.')
 21 |   if(dim(coefarray)[1] != length(type)) stop('Dimensions for variables in coefarray have to match the number of specified types.')
 22 |   if(dim(coefarray)[1] != length(level)) stop('Dimensions for variables in coefarray have to match the number of specified levels.')
 23 |   if(dim(coefarray)[1] != dim(coefarray)[2]) stop('The first two dimensions specifying the cross-lagged effects in coefarray have to have the same dimensionality.')
 24 | 
 25 | 
 26 |   # ---------- Compute Auxilliary Variables ----------
 27 | 
 28 |   p <- dim(coefarray)[1]
 29 |   n_lags <- length(lags)
 30 |   labels <- paste0('V', 1:p, '.')
 31 |   max_lag <- max(lags)
 32 | 
 33 | 
 34 |   # ---------- Create Output Object & copy the call ----------
 35 | 
 36 |   # Create Output Object
 37 |   mvarsamp_obj <- list('call' = NULL,
 38 |                        'data' = NULL)
 39 | 
 40 |   # Copy the call
 41 |   mvarsamp_obj$call <- list('coefarray' = coefarray,
 42 |                             'lags' = lags,
 43 |                             'thresholds' = thresholds,
 44 |                             'sds' = sds,
 45 |                             'type' = type,
 46 |                             'level' = level,
 47 |                             'N' = N,
 48 |                             'pbar' = pbar)
 49 | 
 50 | 
 51 |   # ---------- Sample Data ----------
 52 | 
 53 |   data <- matrix(NA, ncol = p, nrow = N + max_lag)
 54 | 
 55 |   # Create random starting values
 56 |   for(r in 1:max_lag) {
 57 |     for(v in 1:p) {
 58 |       if(type[v] == 'c') data[r, v] <- sample(1:level[v], size = 1)
 59 |       if(type[v] == 'g') data[r, v] <- rnorm(1)
 60 |       if(type[v] == 'p') data[r, v] <- rpois(1, 1)
 61 |     }
 62 |   }
 63 | 
 64 | 
 65 |   if(pbar==TRUE) pb <- txtProgressBar(min = 0, max=N, initial=0, char = "-", style = 3)
 66 | 
 67 | 
 68 |   for(r in (max_lag+1):(N+max_lag)) {
 69 | 
 70 |     for(v in 1:p) {
 71 | 
 72 |       if(type[v] != 'c') {
 73 | 
 74 |         # Get design matrix (always use overparamterized version)
 75 |         design_mat <- ModelMatrix(data, 
 76 |                                   type, 
 77 |                                   level, 
 78 |                                   labels, 
 79 |                                   d = 1, 
 80 |                                   allCats = TRUE)
 81 | 
 82 |         potential_lag <- list()
 83 |         for(lag in 1:n_lags) {
 84 | 
 85 |           # Get parameters in order
 86 |           l_parms <- list()
 87 |           for(par in (1:p)[-v]) l_parms[[par]] <- coefarray[v, par, 1:level[v], 1:level[par], lag]
 88 |           v_parms <- unlist(l_parms)
 89 | 
 90 |           # browser()
 91 |           
 92 |           # Compute part of potential for given lag
 93 |           potential_lag[[lag]] <- design_mat[r - lags[lag], ] * v_parms
 94 | 
 95 |         } # end for: lag
 96 | 
 97 |         mu <- thresholds[[v]] + sum(unlist(potential_lag))
 98 | 
 99 |         # if(v==3) browser()
100 |         
101 |         if(type[v] == 'g') data[r, v] <- rnorm(1, mean = mu, sd = sds[v])
102 |         if(type[v] == 'p') data[r, v] <- rpois(1, lambda = mu)
103 | 
104 |       } # end if: !='c'
105 | 
106 | 
107 | 
108 |       if(type[v] == 'c') {
109 | 
110 |         # Get design matrix
111 |         design_mat <- ModelMatrix(data, 
112 |                                   type, 
113 |                                   level, 
114 |                                   labels, 
115 |                                   d = 1, 
116 |                                   v = v,
117 |                                   allCats = TRUE)
118 | 
119 |         # Loop over categories
120 |         l_potentials <- list()
121 |         for(cat in 1:level[v]) {
122 | 
123 |           potential_lag <- list()
124 |           for(lag in 1:n_lags) {
125 | 
126 |             # Get parameters in order
127 |             l_parms <- list()
128 |             for(par in (1:p)[-v]) l_parms[[par]] <- coefarray[v, par, (1:level[v])[cat], 1:level[par], lag]
129 |             v_parms <- unlist(l_parms)
130 | 
131 |             # Compute part of potential for given lag
132 |             potential_lag[[lag]] <- design_mat[r - lags[lag], ] * v_parms
133 | 
134 |           } # end for: lag
135 | 
136 |           # put potential-parts from different lags together
137 |           l_potentials[[cat]] <- thresholds[[v]][cat] + sum(unlist(potential_lag))
138 | 
139 |         } # end for: cat
140 | 
141 | 
142 |         # compute probabilities
143 |         v_potentials <- exp(unlist(l_potentials))
144 |         probabilities <- v_potentials / sum(v_potentials)
145 | 
146 |         data[r, v] <- sample(1:level[v], prob = probabilities, size = 1)
147 | 
148 |       } # end if: =='c'?
149 | 
150 | 
151 |     } # end for: v variables
152 | 
153 |     if(pbar==TRUE) setTxtProgressBar(pb, r)
154 | 
155 |   } # end for: r rows
156 | 
157 | 
158 | 
159 |   # ---------- Output ----------
160 | 
161 |   # browser()
162 | 
163 |   mvarsamp_obj$data <- data[(max_lag+1):(N+max_lag), ]
164 | 
165 |   return(mvarsamp_obj)
166 | 
167 | } # eoF
168 | 
169 | 
170 | 
171 | 


--------------------------------------------------------------------------------
/R/nodeEst.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | nodeEst <- function(y,
  4 |                     X,
  5 |                     fam,
  6 |                     lambdaSeq,
  7 |                     lambdaSel,
  8 |                     lambdaFolds,
  9 |                     lambdaGam,
 10 |                     alpha,
 11 |                     weights,
 12 |                     n,
 13 |                     nadj,
 14 |                     v,
 15 |                     type,
 16 |                     level,
 17 |                     emp_lev,
 18 |                     overparameterize, 
 19 |                     thresholdCat)
 20 | 
 21 | 
 22 | {
 23 |   
 24 |   # ---------- Calc Aux Variables ----------
 25 |   
 26 |   # Define Exponential Family of Node at Hand
 27 |   if(type[v] == 'c') fam = 'multinomial'
 28 |   if(type[v] == 'g') fam = 'gaussian'
 29 |   if(type[v] == 'p') fam = 'poisson'
 30 |   
 31 |   
 32 |   # Set threshold (intercept) parameter to zero?
 33 |   if(type[v] == 'c') {
 34 |     intercept <- thresholdCat
 35 |   } else {
 36 |     intercept <- TRUE # for continuous variables always estimated
 37 |   }
 38 |   
 39 |   
 40 |   # ---------- Lambda selection via EBIC ----------
 41 |   
 42 |   if(lambdaSel == 'EBIC') {
 43 |     
 44 |     
 45 |     # if(v==3) browser()FA
 46 |     
 47 |     # ----- Fit Model -----
 48 |     
 49 |     fit <- glmnet(x = X,
 50 |                   y = y,
 51 |                   family = fam,
 52 |                   alpha = alpha,
 53 |                   weights = weights,
 54 |                   lambda = lambdaSeq,
 55 |                   intercept = intercept)
 56 |     
 57 |     
 58 |     
 59 |     n_lambdas <- length(fit$lambda) # length of fitted lambda sequence
 60 | 
 61 |     # ----- Calc EBIC of model: Fast Alternative by using -----
 62 |     
 63 |     # Calculate LL of Null Model
 64 |     LL_null <- calcLL(X = X,
 65 |                       y = y,
 66 |                       fit = fit,
 67 |                       type = type,
 68 |                       level = level,
 69 |                       v = v,
 70 |                       weights = weights,
 71 |                       lambda = fit$lambda[1], # any is fine, lambda has no influence on null (intercept) model
 72 |                       LLtype = 'nullmodel')
 73 |     
 74 |     LL_sat <- 1/2 * fit$nulldev + LL_null # calculate LL of saturated model
 75 |     deviance <- (1 - fit$dev.ratio) * fit$nulldev # note: dangerous in glmnet: fit$dev = fit$dev.ratio
 76 |     LL_lambda_models <- - 1/2 * deviance + LL_sat # length = length of lambda sequence
 77 |     
 78 |     
 79 |     n_neighbors <- rep(NA, n_lambdas)
 80 |     for(i in 1:n_lambdas)  n_neighbors[i] <- calcNeighbors(fit = fit,
 81 |                                                            lambda = fit$lambda[i],
 82 |                                                            type = type,
 83 |                                                            level = level,
 84 |                                                            v = v)
 85 |     # Note: n_neighbors = fit$df
 86 |     EBIC_lambda <- - 2 * LL_lambda_models + n_neighbors * log(nadj) + 2 * lambdaGam * n_neighbors * log(ncol(X))
 87 |     
 88 |     EBIC_min <- min(EBIC_lambda)
 89 |     
 90 |     ind_lambda_min <- which.min(EBIC_lambda)
 91 |     lambda_min <- fit$lambda[ind_lambda_min]
 92 |     lambad_min_model <- coef(fit, s = lambda_min)
 93 |     
 94 |     # ----- Output -----
 95 |     
 96 |     outlist <- list('EBIC' = EBIC_min,
 97 |                     'deviance' = deviance[which.min(EBIC_lambda)],
 98 |                     'lambda' = lambda_min,
 99 |                     'alpha' = alpha,
100 |                     'model' = lambad_min_model,
101 |                     'fitobj' = fit)
102 |     
103 |   } # end if: EBIC
104 |   
105 |   
106 |   # ---------- Lambda selection via CV ----------
107 |   
108 |   if(lambdaSel == 'CV') {
109 |     
110 |     # ----- Fit Model -----
111 |     
112 |     fit <- cv.glmnet(x = X,
113 |                      y = y,
114 |                      family = fam,
115 |                      alpha = alpha,
116 |                      weights = weights,
117 |                      nfolds = lambdaFolds,
118 |                      type.measure = "deviance",
119 |                      lambda = lambdaSeq,
120 |                      intercept = intercept)
121 |     
122 |     lambda_min <-  fit$lambda.min
123 |     lambad_min_model <- coef(fit, s = lambda_min)
124 |     
125 |     # ----- Calc Deviance of model -----
126 |     # (used in alpha selection via EBIC)
127 |     
128 |     LL_model <- calcLL(X = X,
129 |                        y = y,
130 |                        fit = fit,
131 |                        type = type,
132 |                        level = level,
133 |                        v = v,
134 |                        weights = weights,
135 |                        lambda = lambda_min,
136 |                        LLtype = 'model')
137 |     
138 |     n_neighbors <- calcNeighbors(fit = fit,
139 |                                  lambda = lambda_min,
140 |                                  type = type,
141 |                                  level = level,
142 |                                  v = v)
143 |     
144 |     EBIC <- - 2 * LL_model + n_neighbors * log(nadj) + 2 * lambdaGam * log(ncol(X))
145 |     
146 |     
147 |     # ----- Output -----
148 |     
149 |     outlist <- list('EBIC' = EBIC,
150 |                     'deviance' = NULL,
151 |                     'lambda' = lambda_min,
152 |                     'alpha' = alpha,
153 |                     'model' = lambad_min_model,
154 |                     'fitobj' = fit)
155 |     
156 |   } # end if: CV
157 |   
158 |   
159 |   # ----- Return -----
160 |   
161 |   return(outlist)
162 |   
163 |   
164 |   
165 | } # eoF
166 | 


--------------------------------------------------------------------------------
/R/predictCore_stat.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | predictCore_stat <- function(object,
  4 |                              data, 
  5 |                              consec = NULL)
  6 |   
  7 |   
  8 | {
  9 |   
 10 |   
 11 |   # ----- Compute Aux Variables -----
 12 |   
 13 |   
 14 |   cobj <- class(object)[2]
 15 |   nNodes <- ncol(data)
 16 |   nCases <- nrow(data)
 17 |   call <- object$call
 18 |   type <- call$type
 19 |   level <- call$level
 20 |   k <- call$k
 21 |   n_lags <- length(call$lags)
 22 |   if(cobj == "mvar") max_lags <- max(call$lags)
 23 |   
 24 |   
 25 |   # Create outlist and storage
 26 |   predCoreObj <- list('pred' = vector('list', length = nNodes),
 27 |                       'prob' = vector('list', length = nNodes),
 28 |                       'true' = NULL,
 29 |                       'included' = NULL)
 30 |   
 31 |   
 32 |   # Some Generic Data preparation
 33 |   data <- data.frame(data)
 34 |   colnames(data)[1:nNodes] <- paste("V", 1:nNodes, '.', sep = "")
 35 |   for(sc in which(type=='g')) data[,sc] <- scale(data[,sc]) # scale gaussians
 36 |   data_df <- data
 37 |   for(sc2 in which(type=='c')) data_df[,sc2] <- as.factor(data_df[,sc2]) # categoricals as factors
 38 |   
 39 |   nodeModels <- object$nodemodels
 40 |   d <- object$call$k - 1
 41 |   
 42 |   # ----- A.1) mgm -----
 43 |   
 44 |   if(cobj == 'core') {
 45 |     
 46 |     for(v in 1:nNodes) {
 47 |       
 48 |       # -- Define model matrix --
 49 |       
 50 |       # ----- Construct Design Matrix -----
 51 |       
 52 |       X_standard <- X <- ModelMatrix_standard(data = data,
 53 |                                               type = type,
 54 |                                               d = d, 
 55 |                                               v = v, 
 56 |                                               moderators = object$call$moderators)
 57 |       
 58 |       if(object$call$overparameterize) {
 59 |         
 60 |         X_over <- ModelMatrix(data = data, # fix that input, that's stupid
 61 |                               type = type,
 62 |                               level = level,
 63 |                               labels = colnames(data),
 64 |                               d = d, 
 65 |                               moderators = object$call$moderators,
 66 |                               v = v)
 67 |         
 68 |         X <- X_over
 69 |         
 70 |       } # end if: overparameterize?
 71 |       
 72 |       data_df <- as.data.frame(data_df)
 73 |       y <- as.numeric(data[, v])
 74 |       
 75 |       
 76 |       if(type[v]=='c') {
 77 |         
 78 |         ## Prediction Categorical
 79 |         coefs <- nodeModels[[v]]$model
 80 |         n_cat <- length(coefs)
 81 |         Potentials <- matrix(NA, nCases, n_cat)
 82 |         
 83 |         # loop over categories & compute potentials
 84 |         for(cat in 1:n_cat) Potentials[,cat] <- exp(coefs[[cat]][1] + X %*% matrix(coefs[[cat]][-1], nrow=length(coefs[[cat]][-1])))
 85 |         
 86 |         # compute category-probabilities
 87 |         Probabilities <- Potentials[,1:n_cat] / rowSums(Potentials[,1:n_cat])
 88 |         pred_class_id <-  apply(Probabilities, 1, which.max) # classify
 89 |         predCoreObj$pred[[v]] <- sort(unique(data[,v]))[pred_class_id] # matches the ordering of predicted categories in glmnet(), which also orders with increasing integers
 90 |         predCoreObj$prob[[v]] <- Probabilities
 91 |         
 92 |         
 93 |       } else {
 94 |         
 95 |         ## Prediction Continuous (same for Gauss and Pois, because in both cases the natural parameter is equal to the expectation)
 96 |         # predicitions
 97 |         coefs <- as.numeric(nodeModels[[v]]$model) # get coefficients
 98 |         predCoreObj$pred[[v]] <- coefs[1] + X %*% matrix(coefs[-1], nrow=length(coefs[-1])) # predict
 99 |         
100 |         if(type[v]=="p") predCoreObj$pred[[v]] <- exp(predCoreObj$pred[[v]]) # coefficients of poisson regresison are on log scale
101 |         
102 |       }
103 |       
104 |     } # end of variable loop 'mgm'
105 |     
106 |     # Save ground truth
107 |     predCoreObj$true <- data
108 |     
109 |   }
110 |   
111 |   
112 |   
113 |   # ----- A.2) mvar -----
114 |   
115 |   if(cobj == 'mvar') {
116 |     
117 |     # Prepare Data (already cuts max(lags) first observations to compute design matrix)
118 |     data_lagged <- lagData(data = data, 
119 |                            lags = object$call$lags, 
120 |                            consec = consec)
121 |     
122 |     predCoreObj$included <- data_lagged$included # this specifies additionally, whether measurements are successive after the max(lags) measurement
123 |     
124 |     data_response <- data_lagged$data_response
125 |     l_data_lags <- data_lagged$l_data_lags
126 |     data_response <- apply(data_response, 2, as.numeric) # to avoid confusion with labels of categories if there are factors
127 |     
128 |     data_response <- data_response[predCoreObj$included, ]
129 |     l_data_lags <- lapply(l_data_lags, function(z) z <- z[predCoreObj$included, ])
130 |     
131 |     nCases <- nrow(data_response)
132 |     
133 |     for(v in 1:nNodes) {
134 |       
135 |       # ----- Create VAR Design Matrix -----
136 |       
137 |       # append response with predictors
138 |       y <- data_response[, v] # response variable v
139 |       data_input_MM <- do.call(cbind, l_data_lags)
140 |       data_input_MM <- as.data.frame(data_input_MM)
141 |       
142 |       # turn categoricals into factors for model.matrix()
143 |       for(i in which(type=='c')) data_input_MM[, i] <- as.factor(data_input_MM[, i])  
144 |       
145 |       # need to have response and predictors in one dataframe for model.matrix()
146 |       data_v <- cbind(y, data_input_MM) 
147 |       
148 |       # Dummy coding
149 |       form <- as.formula('y ~ (.)')
150 |       
151 |       # Construct standard design matrix (to get number of parameters for tau threshold)
152 |       X_standard <- model.matrix(form, data = data_v)[, -1] # delete intercept (added by glmnet later)
153 |       
154 |       if(object$call$overparameterize) {
155 |         
156 |         # Compute augmented type and level vectors
157 |         type_aug <- rep(type, n_lags)
158 |         level_aug <- rep(level, n_lags)
159 |         
160 |         # Construct over-parameterized design matrix
161 |         X_over <- ModelMatrix(data = data_input_MM,
162 |                               type = type_aug,
163 |                               level = level_aug,
164 |                               labels = colnames(data_input_MM),
165 |                               d = 1, 
166 |                               v = NULL)
167 |         X <- X_over
168 |         
169 |       } else {
170 |         
171 |         X <- X_standard
172 |         
173 |       }
174 |       
175 |       # ---- Prediction -----
176 |       
177 |       if(type[v]=='c') {
178 |         
179 |         ## Prediction Categorical
180 |         coefs <- nodeModels[[v]]$model
181 |         n_cat <- length(coefs)
182 |         Potentials <- matrix(NA, nCases, n_cat)
183 |         
184 |         # loop over categories & compute potentials
185 |         for(cat in 1:n_cat) Potentials[,cat] <- exp(coefs[[cat]][1] + X %*% matrix(coefs[[cat]][-1], nrow=length(coefs[[cat]][-1])))
186 |         
187 |         # compute category-probabilities
188 |         Probabilities <- Potentials[, 1:n_cat] / rowSums(Potentials[, 1:n_cat])
189 |         pred_class_id <-  apply(Probabilities, 1, which.max) # classify
190 |         predCoreObj$pred[[v]] <- sort(unique(data[,v]))[pred_class_id] # matches the ordering of predicted categories in glmnet(), which also orders with increasing integers
191 |         predCoreObj$prob[[v]] <- Probabilities
192 |         
193 |       } else {
194 |         
195 |         ## Prediction Continuous (same for Gauss and Pois, because in both cases the natural parameter is equal to the expectation)
196 |         # predicitions
197 |         coefs <- as.numeric(nodeModels[[v]]$model) # get coefficients
198 |         predCoreObj$pred[[v]] <- coefs[1] + X %*% matrix(coefs[-1], nrow=length(coefs[-1])) # predict
199 |         
200 |         if(type[v]=="p") predCoreObj$pred[[v]] <- exp(predCoreObj$pred[[v]])
201 |         
202 |       }
203 |       
204 |     } # end loop: v
205 |     
206 |     
207 |     # Save true (reduced) data
208 |     predCoreObj$true <- data_response
209 |     
210 |   } # end if: mvar?
211 |   
212 |   
213 |   # ----- Return -----
214 |   
215 |   return(predCoreObj)
216 |   
217 |   
218 | } # eoF
219 | 
220 | 
221 | 
222 | 
223 | 
224 | 


--------------------------------------------------------------------------------
/R/print.int.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | print.int <- function(x,
 5 |                       ...)
 6 |  
 7 |   
 8 |   
 9 | {
10 |   
11 |   # Sign Defined?  
12 |   if(x$sign == 0) sign <- "NA" else sign <- x$sign 
13 |   
14 |   # Signtext
15 |   if(x$sign == 0) signtext <- " (Not defined)"
16 |   if(x$sign == 1) signtext <- " (Positive)"
17 |   if(x$sign == -1) signtext <- " (Negative)"
18 |   
19 |   # Create Console Output
20 |   cat('Interaction:', paste(x$variables, collapse = "-"),
21 |         '\nWeight: ', x$edgeweight,
22 |         '\nSign: ' , sign, signtext)
23 | 
24 | }
25 |   
26 |   


--------------------------------------------------------------------------------
/R/print.mgm.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | print.mgm <- function(x,
  4 |                       ...)
  5 |   
  6 |   
  7 |   
  8 | {
  9 |   
 10 |   model_classes <- c('Mixed Graphical Model (MGM)',
 11 |                      'mixed Vector Autoregressive (mVAR) model',
 12 |                      'Time-varying Mixed Graphical Model (tv-MGM)',
 13 |                      'Time-varying mixed Vector Autoregressive (tv-mVAR) model')
 14 |   
 15 |   # ---------- print for fit objects ----------
 16 |   
 17 |   if(!('predicted' %in% class(x)) & !('bwSelect' %in% class(x))) {
 18 |     
 19 |     if('core' %in% class(x)) {
 20 |       
 21 |       msg_basic <- paste0('mgm fit-object',
 22 |                           '\n\nModel class: ', model_classes[1],
 23 |                           '\nOrder: ' , x$call$k,
 24 |                           '\nNodes: ' , length(x$call$type))
 25 |       
 26 |       
 27 |       if(!is.null(x$call$moderators))  {
 28 |         
 29 |         x$call$k <- 3
 30 |         
 31 |         msg_basic <- paste0('mgm fit-object',
 32 |                             '\n\nModel class: ', model_classes[1],
 33 |                             '\nOrder: ' , x$call$k,
 34 |                             '\nNodes: ' , length(x$call$type)) # call again with updated x$call$k
 35 |         
 36 |         
 37 |         if(is.matrix(x$call$moderators)) mod_text <- "Custom specification" else mod_text <- paste(x$call$moderators, collapse = ", ")
 38 | 
 39 |         msg_basic <- paste0(msg_basic,
 40 |                             paste0('\nModerators: ' , mod_text)) 
 41 |         
 42 |     
 43 |       }
 44 |         
 45 |       
 46 |       if(!is.null(x$call$condition)) {
 47 |         
 48 |         nCond <- length(x$call$condition)
 49 |         names <- names(x$call$condition)
 50 |         msgCond <- paste0(names, "=", unlist(x$call$condition))
 51 |         msg_basic <- paste0(msg_basic, "\nFixed: ", paste(msgCond, collapse = ", "))
 52 |         
 53 |       }
 54 |       
 55 |       cat(msg_basic)
 56 | 
 57 |       
 58 |     } # end if: basic mgm object?
 59 |     
 60 |     
 61 |     
 62 |     
 63 |     
 64 |     
 65 |     if('mvar' %in% class(x)) {
 66 |       
 67 |       n_incl <- sum(x$call$data_lagged$included == TRUE)
 68 |       n_exp <- sum(x$call$data_lagged$included == FALSE)
 69 |       n <- n_incl + n_exp
 70 |       perc <- n_incl / n
 71 |       perc <- round(perc, 4) * 100
 72 |       
 73 |       cat('mgm fit-object',
 74 |           '\n\nModel class: ', model_classes[2],
 75 |           '\nLags: ' , x$call$lags,
 76 |           '\nRows included in VAR design matrix: ' , n_incl ,'/', n, '(', perc, '%)',
 77 |           '\nNodes: ' , length(x$call$type))
 78 |     }
 79 |     
 80 |     
 81 |     
 82 |     if('tvmgm' %in% class(x)) {
 83 |       
 84 |       if(!is.null(x$call$moderators)) {
 85 |         
 86 |         x$call$k <- 3
 87 |         
 88 |         cat('mgm fit-object',
 89 |             '\n\nModel class: ', model_classes[3],
 90 |             '\nOrder: ' , x$call$k,
 91 |             '\nNodes: ' , length(x$call$type),
 92 |             '\nModerators: Variable ' , x$call$moderators,
 93 |             '\nEstimation points: ' , length(x$call$estpoints))
 94 |         
 95 |       } else {
 96 |         
 97 |         cat('mgm fit-object',
 98 |             '\n\nModel class: ', model_classes[3],
 99 |             '\nOrder: ' , x$call$k,
100 |             '\nNodes: ' , length(x$call$type),
101 |             '\nEstimation points: ' , length(x$call$estpoints))
102 |         
103 |       }
104 |       
105 |       
106 |       
107 |     }
108 |     
109 |     
110 |     if('tvmvar' %in% class(x)) {
111 |       
112 |       
113 |       n_incl <- sum(x$call$data_lagged$included == TRUE)
114 |       n_exp <- sum(x$call$data_lagged$included == FALSE)
115 |       n <- n_incl + n_exp
116 |       perc <- n_incl / n
117 |       perc <- round(perc, 4) * 100
118 |       
119 |       
120 |       cat('mgm fit-object',
121 |           '\n\nModel class: ', model_classes[4],
122 |           '\nLags: ' , x$call$lags,
123 |           '\nRows included in VAR design matrix: ' , n_incl ,'/', n, '(', perc, '%)',
124 |           '\nNodes: ' , length(x$call$type),
125 |           '\nEstimation points: ' , length(x$call$estpoints))
126 |     }
127 |     
128 |   }
129 |   
130 |   
131 |   # ---------- print for prediction object ----------
132 |   
133 |   if('predicted' %in% class(x)) {
134 |     
135 |     if('mgm' %in% class(x)) mc <- model_classes[1]
136 |     if('mvar' %in% class(x)) mc <- model_classes[2]
137 |     if('tvmgm' %in% class(x)) mc <- model_classes[3]
138 |     if('tvmvar' %in% class(x)) mc <- model_classes[4]
139 |     
140 |     
141 |     cat('mgm prediction-object',
142 |         '\n\nModel class: ', mc,
143 |         '\nError Types:', paste(names(x$call$errorCon), names(x$call$errorCat)))
144 |     
145 |   }
146 |   
147 |   
148 |   # ---------- print for bwSelect object ----------
149 |   
150 |   
151 |   if('bwSelect' %in% class(x)) {
152 |     
153 |     if('mgm' %in% class(x)) mc <- model_classes[3]
154 |     if('mvar' %in% class(x)) mc <- model_classes[4]
155 |     
156 |     cat('mgm bandwidth selection-object',
157 |         '\n\nModel class: ', mc,
158 |         '\nBandwith path: ', paste(x$call$bwSeq),
159 |         '\nNumber of Folds: ', paste(x$call$bwFolds),
160 |         '\nFoldsize: ', paste(x$call$bwFoldsize),
161 |         '\nOptimal Bandwidth: ', x$call$bwSeq[which.min(x$meanError)])
162 |     
163 |   }
164 |   
165 |   
166 |   
167 |   
168 | } # eoF
169 | 


--------------------------------------------------------------------------------
/R/print.resample.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # Gives summary of resampling object
 3 | 
 4 | print.resample <- function(x, # output from resample()
 5 |                              ...)
 6 |   
 7 |   
 8 | {
 9 |   
10 |   obj_class <- class(x$call$object)
11 |   
12 |   model_classes <- c('Mixed Graphical Model (MGM)',
13 |                      'mixed Vector Autoregressive (mVAR) model',
14 |                      'Time-varying Mixed Graphical Model (tv-MGM)',
15 |                      'Time-varying mixed Vector Autoregressive (tv-mVAR) model')
16 |   
17 |   if('core' %in% obj_class) obj_class_display <- model_classes[1]
18 |   if('mvar' %in% obj_class) obj_class_display <- model_classes[2]
19 |   if('tvmgm' %in% obj_class) obj_class_display <- model_classes[3]
20 |   if('tvmvar' %in% obj_class) obj_class_display <- model_classes[4]
21 | 
22 |   
23 |   if('tvmvar' %in% obj_class | 'tvmgm' %in% obj_class) {
24 |     
25 |     cat('resample-object',
26 |         '\n\nModel class: ', obj_class_display,
27 |         '\nBootstrap samples: ' , x$call$nB,
28 |         '\nBlocks: ' , x$call$blocks)
29 |     
30 |   } else {
31 |     
32 |     cat('resample-object',
33 |         '\n\nModel class: ', obj_class_display,
34 |         '\nBootstrap samples: ' , x$call$nB)
35 |     
36 |   }
37 |   
38 |     
39 |     
40 | } # eoF
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/R/showInteraction.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | showInteraction <- function(object, 
  4 |                             int) {
  5 |   
  6 |   
  7 |   # ----- Input Checks -----
  8 |   
  9 |   class_obj <- class(object)[2]
 10 |   
 11 |   # Check whether object is mgm model object
 12 |   if(!(class_obj %in% c("core", "tvmgm", "mvar", "tvmvar") )) stop("Please provide an mgm model object (mgm, tvmgm, mvar, tvmvar) as input.")
 13 |   
 14 |   # Check whether interactions are specified as integers
 15 |   if(!all(int == round(int))) stop("Interactions have to be specified as sets of integers in 1:p")
 16 |   if(any(int < 1)) stop("Interactions have to be specified as sets of integers in 1:p")
 17 |   
 18 |   # Current limitation of function:
 19 |   if(class_obj %in% c("core", "tvmgm")) if(object$call$k > 2) stop("showInteraction() currently only supports pairwise interactions.")
 20 |   if(class_obj %in% c("tvmgm", "mvar", "tvmvar")) stop("showInteraction() currently only supports mgm() objects.")
 21 |   
 22 |   if(length(unique(int)) != length(int)) stop("A k-order interaction have to be specified by k distinct variables.")
 23 |   p <- length(object$call$level)
 24 |   if(any(int > p)) stop("Please specify variables in {1, 2, ..., p}.")
 25 |   
 26 |   
 27 |   # ----- Compute Aux variables -----
 28 |   
 29 |   int <- sort(int) # sort, because interactions are saved sorted 1:p in "object$interactions$weights[[n_order-1]][[int_row]]"
 30 |   levelNames <- list()
 31 |   levelNames[[1]] <- object$call$levelNames[[int[1]]]
 32 |   levelNames[[2]] <- object$call$levelNames[[int[2]]]
 33 |   
 34 |   
 35 |   # ------------------------------------------------------------------------------------------------
 36 |   # ---------- a) mgm-objects ----------------------------------------------------------------------
 37 |   # ------------------------------------------------------------------------------------------------
 38 |   
 39 |   if(class_obj == "core") {
 40 |     
 41 |     n_order <- length(int)
 42 |     type_int <- object$call$type[int]
 43 |     level_int <- object$call$level[int]
 44 |     
 45 |     
 46 |     # Input checks
 47 |     # if(n_order > object$call$k) stop("The order of requested interactions has to match the order of interactions in the model.")
 48 |     
 49 |     # Create outlist
 50 |     outlist <- list("variables" = int, 
 51 |                     "order" = n_order, 
 52 |                     "type" = type_int, 
 53 |                     "level" = level_int, 
 54 |                     "edgeweight" = NULL, 
 55 |                     "sign" = NULL,
 56 |                     "parameters" = list())
 57 |     
 58 |     # Get row of specified interaction
 59 |     int_row <- which(apply(matrix(object$interactions$indicator[[n_order-1]], ncol=n_order), 1, function(x) all(x %in% int))) # get row of interaction in "int" in interaction list
 60 |     
 61 |     # Is the interaction nonzero?
 62 |     if(length(int_row) == 0) {
 63 |       
 64 |       outlist$edgeweight <- 0
 65 |       outlist$sign <- 0
 66 |       
 67 |     } else {
 68 |       
 69 |       outlist$edgeweight <- object$interactions$weightsAgg[[n_order-1]][[int_row]]
 70 |       outlist$sign <- object$interactions$sign[[n_order-1]][int_row]
 71 |       
 72 |       if(n_order > 2) {
 73 |         
 74 |         outlist$parameters = NULL
 75 |         
 76 |       } else {
 77 |         
 78 |         # ------- Collect & label parameter estimates -------
 79 |         
 80 |         for(i in 1:n_order) {
 81 |           
 82 |           # Get parameters of regression on i
 83 |           int_i  <- object$interactions$weights[[n_order-1]][[int_row]][[i]]
 84 |           
 85 |           # Create empty array with correct dimensions
 86 |           par_mat <- matrix(NA, level_int[i], level_int[-i])
 87 |           
 88 |           # Fill array
 89 |           if(type_int[i] == "c") { # if response = categorical
 90 |             
 91 |             if(type_int[-i] == "c") { # if predictor = categorical (only works for k=2 order MGMs ..)
 92 |               for(i_resp in 1:length(int_i)) {
 93 |                 if(object$call$overparameterize) par_mat[i_resp, ] <- int_i[[i_resp]] else par_mat[i_resp, -1] <- int_i[[i_resp]]
 94 |               } 
 95 |             } else {
 96 |               int_i_ul <- unlist(int_i)
 97 |               par_mat[, 1] <- int_i_ul
 98 |             }
 99 |             
100 |           } else { # if response = continuous
101 |             
102 |             if(type_int[-i] == "c") { # if predictor = categorical
103 |               par_mat[1, -1] <- int_i # first column = dummy coded
104 |             } else {
105 |               par_mat[1, 1] <- int_i
106 |             }
107 |           }
108 |           
109 |           # browser()
110 |           # Set dimension names in array
111 |           if(type_int[i] == "c") row.names(par_mat) <- paste0(int[i], ".", levelNames[[i]]) else row.names(par_mat) <- int[i]
112 |           if(type_int[-i] == "c") colnames(par_mat) <- paste0(int[-i], ".", levelNames[[abs(i-3)]]) else colnames(par_mat) <- int[-i]
113 |           
114 |           # Save array in output list
115 |           outlist$parameters[[paste0("Predict_", int[i])]] <- par_mat
116 |           
117 |         } # end for: n_order
118 |         
119 |       } # if: order == 2
120 |       
121 |     } # end if: estimated to zero?
122 |     
123 |   } # end if: core?
124 |   
125 |   
126 |   # ------------------------------------------------------------------------------------------------
127 |   # ---------- b) tvmgm-objects ----------------------------------------------------------------------
128 |   # ------------------------------------------------------------------------------------------------
129 |   
130 |   
131 |   # ------------------------------------------------------------------------------------------------
132 |   # ---------- c) mvar-objects ----------------------------------------------------------------------
133 |   # ------------------------------------------------------------------------------------------------
134 |   
135 |   
136 |   # ------------------------------------------------------------------------------------------------
137 |   # ---------- d) tvmvar-objects ----------------------------------------------------------------------
138 |   # ------------------------------------------------------------------------------------------------
139 |   
140 |   
141 |   # ----- Return -----
142 |   
143 |   class(outlist) <- "int"
144 |   
145 |   return(outlist)
146 |   
147 | } # eoF


--------------------------------------------------------------------------------
/R/startup_msg.R:
--------------------------------------------------------------------------------
1 | # I copied this piece of code from the bootnet package where it was copied from Lavaan mainly:
2 | 
3 | .onAttach <- function(libname, pkgname) {
4 |   version <- read.dcf(file=system.file("DESCRIPTION", package=pkgname),
5 |                       fields="Version")
6 |   packageStartupMessage("This is ",paste(pkgname, version))
7 |   packageStartupMessage("Please report issues on Github: https://github.com/jmbh/mgm/issues")
8 | }
9 | 


--------------------------------------------------------------------------------
/R/tvmgmsampler.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | tvmgmsampler <- function(factors,
  4 |                          interactions,
  5 |                          thresholds,
  6 |                          sds,
  7 |                          type,
  8 |                          level,
  9 |                          nIter = 250,
 10 |                          pbar = TRUE,
 11 |                          ...)
 12 | 
 13 | 
 14 | {
 15 | 
 16 |   # ---------- Input Checks ----------
 17 | 
 18 | 
 19 |   # ----- Calc Aux Variables -----
 20 | 
 21 |   n_timepoints <- nrow(thresholds[[1]])
 22 |   n_order <- length(factors)
 23 |   p <- length(level)
 24 | 
 25 |   data <- matrix(NA,
 26 |                  nrow = n_timepoints,
 27 |                  ncol = p)
 28 | 
 29 |   # ----- Input Checks -----
 30 | 
 31 |   # - same nrow for all thresholds
 32 |   # - match time dimension for factors, interaction, thresholds and sds
 33 |   # - check time vector: is it a integer sequence 1:N
 34 |   # - only checks on sd if at least one gaussian present!
 35 | 
 36 |   if(missing(sds)) sds <- NULL
 37 | 
 38 | 
 39 |   # ----- Create Outout object -----
 40 | 
 41 |   tvmgmsamp_obj <- list('call' = NULL,
 42 |                         'data' = NULL)
 43 | 
 44 |   # Copy the call
 45 |   tvmgmsamp_obj$call <- list('factors' = factors,
 46 |                              'interactions' = interactions,
 47 |                              'thresholds' = thresholds,
 48 |                              'sds' = sds,
 49 |                              'type' = type,
 50 |                              'level' = level,
 51 |                              'nIter' = nIter,
 52 |                              'pbar' = pbar)
 53 | 
 54 | 
 55 |   if(pbar==TRUE) pb <- txtProgressBar(min = 0, max = n_timepoints, initial=0, char="-", style = 3)
 56 | 
 57 |   for(ts in 1:n_timepoints) {
 58 | 
 59 |     # ---------- Select Time dimension ----------
 60 | 
 61 | 
 62 |     # a) Take out time point from: interactions
 63 | 
 64 |     factors_ts <- list()
 65 |     interactions_ts <- list()
 66 | 
 67 |     for(ord in 1:n_order) {
 68 | 
 69 |       n_row <- dim(factors[[ord]])[1]
 70 | 
 71 |       if(is.null(n_row)) {
 72 | 
 73 |         interactions_ts[[ord]] <- NULL
 74 | 
 75 |       } else {
 76 | 
 77 |         # factors_ts[[ord]] <- matrix(NA, nrow = n_row, ncol = ord+1)
 78 |         interactions_ts[[ord]] <- list()
 79 | 
 80 |         for(row in 1:n_row) {
 81 | 
 82 |           # factors_ts[[ord]][row, ] <- fr <- factors[[ord]][row, , ts]
 83 |           fr <- factors[[ord]][row, ]
 84 | 
 85 |           # create list we use to access dynamic array: interactions
 86 |           l_query <- list()
 87 |           for(i2 in 1:(ord+1)) l_query[[i2]] <- 1:level[fr[i2]]
 88 |           l_query[[(ord+2)]] <- ts
 89 | 
 90 |           A <- interactions[[ord]][[row]]
 91 |           query_out <- do.call(function(...)A[...], l_query)
 92 |           dim_ts_fix_array <- unlist(lapply(l_query, length))[-(ord+2)]
 93 |           interactions_ts[[ord]][[row]] <- array(query_out, dim = dim_ts_fix_array)
 94 | 
 95 |         }
 96 | 
 97 |       } # end if else: at least 1 row?
 98 | 
 99 |     } # end for: ord
100 | 
101 | 
102 |     # b) Take out time point of: thresholds and sds
103 | 
104 |     thresholds_ts <- list()
105 |     for(v in 1:p) thresholds_ts[[v]] <- as.numeric(thresholds[[v]][ts, ])
106 |     sds_ts <- sds[ts, ]
107 | 
108 |     # c) factors stays the same
109 |     factors_ts <- factors
110 | 
111 | 
112 |     # ---------- Call mgmsampler() ----------
113 | 
114 |     one_case <- mgmsampler(factors = factors_ts,
115 |                              interactions = interactions_ts,
116 |                              thresholds = thresholds_ts,
117 |                              sds = sds_ts,
118 |                              type = type,
119 |                              level = level,
120 |                              N = 1,
121 |                              nIter = nIter,
122 |                              pbar = FALSE)
123 | 
124 |     data[ts, ] <- one_case$data
125 | 
126 | 
127 | 
128 |     if(pbar==TRUE) setTxtProgressBar(pb, ts)
129 | 
130 |   } # end for: time steps
131 | 
132 |   # ---------- Output ----------
133 | 
134 |   tvmgmsamp_obj$data <- data
135 | 
136 |   class(tvmgmsamp_obj) <- 'tvmgm'
137 | 
138 |   return(tvmgmsamp_obj)
139 | 
140 | 
141 | } # eoF
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 


--------------------------------------------------------------------------------
/R/tvmvarsampler.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | tvmvarsampler <- function(coefarray, # v x v2 x cat(v) x cat(v2) x lag array x N specifying lagged parameters
  5 |                           lags, # vector specifiying the lags
  6 |                           thresholds, # N x p matrix containint vector with a threshold for each category
  7 |                           sds, # N x p matrix of sds
  8 |                           type,
  9 |                           level,
 10 |                           pbar)
 11 |   
 12 |   
 13 |   
 14 |   
 15 | {
 16 |   
 17 |   # ---------- Input Checks ----------
 18 |   
 19 |   # Does coefarray have the right dimensions?
 20 |   if(length(dim(coefarray))!=6) stop('coefarray has to have 5 dimensions: p x p x max(level) x max(level) x n_lags x N.')
 21 |   if(dim(coefarray)[5] != length(lags)) stop('Dimensions for lags in coefarray have to match the number of lags specified in lags.')
 22 |   if(dim(coefarray)[1] != length(thresholds)) stop('Dimensions for variables in coefarray have to match the number of specified thresholds.')
 23 |   if(dim(coefarray)[1] != ncol(sds)) stop('Dimensions for variables in coefarray have to match the number of specified standard deviations.')
 24 |   if(dim(coefarray)[1] != length(type)) stop('Dimensions for variables in coefarray have to match the number of specified types.')
 25 |   if(dim(coefarray)[1] != length(level)) stop('Dimensions for variables in coefarray have to match the number of specified levels.')
 26 |   if(dim(coefarray)[1] != dim(coefarray)[2]) stop('The first two dimensions specifying the cross-lagged effects in coefarray have to have the same dimensionality.')
 27 |   
 28 | 
 29 |   if(!inherits(thresholds, "list")) stop('The thresholds have to be provided via a list with p entries. See ?tvmvarsampler')
 30 |   if(!inherits(sds, "matrix")) stop('The standard deviations of Gaussian variables have to be provided via a p x n matrix.')
 31 | 
 32 |   
 33 |   # ---------- Fill in Defaults ----------
 34 |   
 35 |   if(missing(pbar)) pbar <- TRUE
 36 |   
 37 |   
 38 |   # ---------- Create Output Object & copy the call ----------
 39 |   
 40 |   # Create Output Object
 41 |   mvarsamp_obj <- list('call' = NULL,
 42 |                        'data' = NULL)
 43 |   
 44 |   # Copy the call
 45 |   mvarsamp_obj$call <- list('coefarray' = coefarray,
 46 |                             'lags' = lags,
 47 |                             'thresholds' = thresholds,
 48 |                             'sds' = sds,
 49 |                             'type' = type,
 50 |                             'level' = level,
 51 |                             'pbar' = pbar)
 52 |   
 53 |   
 54 |   
 55 |   # ---------- Compute Auxilliary Variables ----------
 56 |   
 57 |   p <- dim(coefarray)[1]
 58 |   N <- dim(coefarray)[6]
 59 |   n_lags <- length(lags)
 60 |   labels <- paste0('V', 1:p, '.')
 61 |   max_lag <- max(lags)
 62 |   
 63 |   
 64 |   
 65 |   # ---------- Sample Data ----------
 66 |   
 67 |   data <- matrix(NA, ncol = p, nrow = N + max_lag)
 68 |   
 69 |   # Create random starting values
 70 |   for(r in 1:max_lag) {
 71 |     for(v in 1:p) {
 72 |       if(type[v] == 'c') data[r, v] <- sample(1:level[v], size = 1)
 73 |       if(type[v] == 'g') data[r, v] <- rnorm(1)
 74 |       if(type[v] == 'p') data[r, v] <- rpois(1, 1)
 75 |     }
 76 |   }
 77 |   
 78 |   
 79 |   if(pbar==TRUE) pb <- txtProgressBar(min = 0, max=N, initial=0, char="-", style = 3)
 80 |   
 81 |   
 82 |   # Loop over time and variables
 83 |   
 84 |   for(r in 1:N) {
 85 |     r2 <- ((max_lag+1):(N+max_lag))[r]
 86 |     
 87 |     for(v in 1:p) {
 88 |       
 89 |       # Get design matrix
 90 |       design_mat <- ModelMatrix(data = data, 
 91 |                                 type = type, 
 92 |                                 level = level, 
 93 |                                 labels = labels, 
 94 |                                 d = 1, 
 95 |                                 allCats=TRUE)
 96 |       
 97 |       if(type[v] != 'c') {
 98 |         
 99 |         potential_lag <- list()
100 |         for(lag in 1:n_lags) {
101 |           
102 |           # Get parameters in order
103 |           l_parms <- list()
104 |           for(par in 1:p) l_parms[[par]] <- coefarray[v, par, 1:level[v], 1:level[par], lag, r]
105 |           v_parms <- unlist(l_parms)
106 |           
107 |           # Compute part of potential for given lag
108 |           potential_lag[[lag]] <- design_mat[r2 - lags[lag], ] * v_parms
109 |           
110 |         } # end for: lag
111 |         
112 |         mu <- thresholds[[v]][r, 1] + sum(unlist(potential_lag))
113 |         
114 |         # if(r==50)        browser()
115 |         
116 |         if(type[v] == 'g') data[r2, v] <- rnorm(1, mean = mu, sd = sds[r, v])
117 |         if(type[v] == 'p') data[r2, v] <- rpois(1, lambda = mu)
118 |         
119 |       } # end if: !='c'
120 |       
121 |       if(type[v] == 'c') {
122 |         
123 |         # Loop over categories
124 |         l_potentials <- list()
125 |         for(cat in 1:level[v]) {
126 |           
127 |           potential_lag <- list()
128 |           for(lag in 1:n_lags) {
129 |             
130 |             # Get parameters in order
131 |             l_parms <- list()
132 |             for(par in 1:p) l_parms[[par]] <- coefarray[v, par, (1:level[v])[cat], 1:level[par], lag, r]
133 |             v_parms <- unlist(l_parms)
134 |             
135 |             # Compute part of potential for given lag
136 |             potential_lag[[lag]] <- design_mat[r2 - lags[lag], ] * v_parms
137 |             
138 |           } # end for: lag
139 |           
140 |           # put potential-parts from different lags together
141 |           l_potentials[[cat]] <- thresholds[[v]][r, cat] + sum(unlist(potential_lag))
142 |           
143 |         } # end for: cat
144 |         
145 |         # compute probabilities
146 |         v_potentials <- exp(unlist(l_potentials))
147 |         probabilities <- v_potentials / sum(v_potentials)
148 |         
149 |         data[r2, v] <- sample(1:level[v], prob = probabilities, size = 1)
150 |         
151 |       } # end if: =='c'?
152 |       
153 |       
154 |     } # end for: v variables
155 |     
156 |     if(pbar==TRUE) setTxtProgressBar(pb, r)
157 |     
158 |   } # end for: r rows
159 |   
160 |   
161 |   # ---------- Output ----------
162 |   
163 |   
164 |   mvarsamp_obj$data <- data[(max_lag+1):(N+max_lag), ]
165 |   
166 |   class(mvarsamp_obj) <- 'tvmvar'
167 |   
168 |   
169 |   return(mvarsamp_obj)
170 |   
171 |   
172 |   
173 |   
174 | } # eoF
175 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # mgm
 2 | 
 3 | [![CRAN
 4 | Version](http://www.r-pkg.org/badges/version/mgm)](https://cran.r-project.org/package=mgm)
 5 | [![Downloads](https://cranlogs.r-pkg.org/badges/mgm)](https://cran.r-project.org/package=mgm)
 6 | 
 7 | The package includes functions to estimate, visualize and resample time-varying k-order Mixed Graphical Models (MGMs) and mixed Vector Autoregressive (mVAR) models.
 8 | 
 9 | 
10 | Here is a paper describing the package: https://arxiv.org/abs/1510.06871
11 | 
12 | And here are a couple of blog posts about some functions: https://jmbh.github.io/
13 | 
14 | 
15 | The developmental version can be installed from within R using the devtools-package:
16 | 
17 | library(devtools)
18 | install_github("jmbh/mgm")
19 | 


--------------------------------------------------------------------------------
/data/B5MS.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/B5MS.RData


--------------------------------------------------------------------------------
/data/Fried2015.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/Fried2015.RData


--------------------------------------------------------------------------------
/data/PTSD_data.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/PTSD_data.RData


--------------------------------------------------------------------------------
/data/autism_data.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/autism_data.RData


--------------------------------------------------------------------------------
/data/autism_data_large.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/autism_data_large.RData


--------------------------------------------------------------------------------
/data/dataGD.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/dataGD.RData


--------------------------------------------------------------------------------
/data/fruitfly_data.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/fruitfly_data.RData


--------------------------------------------------------------------------------
/data/mgm_data.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/mgm_data.RData


--------------------------------------------------------------------------------
/data/modnw.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/modnw.RData


--------------------------------------------------------------------------------
/data/msq_p3.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/msq_p3.RData


--------------------------------------------------------------------------------
/data/msq_p5.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/msq_p5.RData


--------------------------------------------------------------------------------
/data/mvar_data.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/mvar_data.RData


--------------------------------------------------------------------------------
/data/restingstate_data.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/restingstate_data.RData


--------------------------------------------------------------------------------
/data/symptom_data.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/data/symptom_data.RData


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | bibentry(bibtype = "Article",
 2 |   title        = "{mgm}: Estimating Time-Varying Mixed Graphical Models in High-Dimensional Data",
 3 |   author       = c(person(given = c("Jonas", "M.", "B."),
 4 |                           family = "Haslbeck",
 5 |                           email = "jonashaslbeck@gmail.com"),
 6 |                    person(given = c("Lourens", "J."),
 7 |                           family = "Waldorp")),
 8 |   journal      = "Journal of Statistical Software",
 9 |   year         = "2020",
10 |   volume       = "93",
11 |   number       = "8",
12 |   pages        = "1--46",
13 |   doi          = "10.18637/jss.v093.i08",
14 | 
15 |   header       = "To cite mgm in publications use:"
16 | )
17 | 
18 | 


--------------------------------------------------------------------------------
/man/FactorGraph.Rd:
--------------------------------------------------------------------------------
  1 | \name{FactorGraph}
  2 | \alias{FactorGraph}
  3 | 
  4 | \title{
  5 | Draws a factor graph of a (time-varying) MGM
  6 | }
  7 | \description{
  8 | Wrapper function around qgraph() that draws factor graphs for (time-varying) MGMs
  9 | }
 10 | \usage{
 11 | FactorGraph(object, labels, PairwiseAsEdge = FALSE, 
 12 |             Nodewise = FALSE, DoNotPlot = FALSE, 
 13 |             FactorLabels = TRUE, colors, shapes,
 14 |             shapeSizes = c(8, 4), estpoint = NULL, 
 15 |             negDashed = FALSE, ...)
 16 | }
 17 | 
 18 | 
 19 | \arguments{
 20 |   \item{object}{
 21 | The output object of \code{mgm()} or \code{tvmgm()}.
 22 | }
 23 |   \item{labels}{
 24 | A character vector of (variable) node labels.
 25 | }
 26 |   \item{PairwiseAsEdge}{
 27 | If \code{TRUE}, pairwise interactions are not displayed as factors but as simple edges between nodes. Defaults to \code{PairwiseAsEdge = FALSE}.
 28 | }
 29 |   \item{Nodewise}{
 30 | If \code{TRUE}, the estimates from the individual nodewise regressions are displayed as a directed edge towards the node on which the respective nodewise regression was performed. This is useful to identify model misspecification (e.g. moderation effects / interaction parameters with largely different values across nodewise regressions). Defaults to \code{Nodewise = FALSE}.
 31 | }
 32 |   \item{DoNotPlot}{
 33 | If \code{DoNotPlot = TRUE} no factorgraph is plotted. This way the computed factor graph can be obtained without plotting. Defaults to \code{DoNotPlot = FALSE}.
 34 | }
 35 |   \item{FactorLabels}{
 36 | If \code{FactorLabels = TRUE} the factors are labeled by their order. If \code{FactorLabels = FALSE} no label is shown. Defaults to \code{FactorLabels = TRUE}.
 37 | }
 38 |   \item{colors}{
 39 | A character vector of colors for nodes and factors. The first color is for variable-nodes, the second for 2-way interactions, the third for 3-way interactions, etc. Defaults to \code{colors = c("white", "tomato", "lightblue", "orange")}.
 40 | }
 41 |   \item{shapes}{
 42 | A character vector of shapes for for nodes and factors. The first shape is for variable-nodes, the second for 2-way interactions, the third for 3-way interactions, etc. Defaults to \code{shapes = c("circle", "square", "triangle", "diamond")}.
 43 | }
 44 |   \item{shapeSizes}{
 45 | A numeric vector of length two indicating the size of shapes for nodes and factors. Defaults to \code{shapeSizes = c(8, 4)}.
 46 | }
 47 |   \item{estpoint}{
 48 | An integer indicating the estimation point to display if the output object of a time-varying MGM is provided. 
 49 | }
 50 |   \item{negDashed}{
 51 | If \code{negDashed = TRUE}, edges with negative sign are dashed.
 52 | }
 53 |   \item{\dots}{
 54 | Arguments passed to qgraph.
 55 | }
 56 | }
 57 | \details{
 58 | \code{FactorGraph()} is a wrapper around \code{qgraph()} from the qgraph package. Therefore all arguments of \code{qgraph()} are available and can be provided as additional arguments.
 59 | 
 60 | To make time-varying factor graphs comparable across estimation points, the factor graph of each estimation point includes all factors that are estimated nonzero at least at one estimation point.
 61 |  
 62 | }
 63 | \value{
 64 | Plots the factor graph and returns a list including the arguments used to plot the factor graph using qgraph().
 65 | 
 66 | Specifically, a list is returned including:  \code{graph} contains a weighted adjacency matrix of a (bipartide) factor graph. If p is the number of variables and E the number of interactions (factors) in the model, this matrix has dimensions (p+E) x (p+E). The factor graph is furter specified by the following objects: \code{signs} is a matrix of the same dimensions as \code{graph} that indicates the sign of each interaction, if defined (see \code{pairwise} above). \code{edgecolor} is a matrix with the same dimension as \code{graph} that provides edge colors depending on the sign as above. \code{order} is a (p+E) vector indicating the order of interaction. The first p entries are set to zero. \code{qgraph} contains the qgraph object created while plotting.
 67 | 
 68 | }
 69 | %%\references{
 70 | %% ~put references to the literature/web site here ~
 71 | %%}
 72 | 
 73 | \author{
 74 | Jonas Haslbeck <jonashaslbeck@gmail.com>
 75 | }
 76 | 
 77 | 
 78 | \seealso{
 79 | \code{mgm()}, \code{tvmgm()}, \code{qgraph()}
 80 | }
 81 | 
 82 | 
 83 | \examples{
 84 | 
 85 | 
 86 | \dontrun{
 87 | 
 88 | # Fit MGM with pairwise & threeway interactions to Autism Dataset
 89 | fit_k3 <- mgm(data = autism_data$data,
 90 |               type = autism_data$type,
 91 |               level = autism_data$lev,
 92 |               k = 3, 
 93 |               overparameterize = TRUE,
 94 |               lambdaSel = "EBIC", 
 95 |               lambdaGam = .5) 
 96 | 
 97 | # List of estimated interactions
 98 | fit_k3$interactions$indicator
 99 | 
100 | FactorGraph(object = fit_k3, 
101 |             PairwiseAsEdge = FALSE, 
102 |             DoNotPlot = FALSE, 
103 |             labels = 1:7, 
104 |             layout="circle")
105 | 
106 | # For more examples see https://github.com/jmbh/mgmDocumentation
107 | }
108 | 
109 | }
110 | 
111 | 
112 | 


--------------------------------------------------------------------------------
/man/bwSelect.Rd:
--------------------------------------------------------------------------------
  1 | 
  2 | \name{bwSelect}
  3 | \alias{bwSelect}
  4 | 
  5 | \title{
  6 | Select optimal bandwidth for time-varying MGMs and mVAR Models
  7 | }
  8 | 
  9 | \description{
 10 | Selects the bandwidth parameter with lowest out of sample prediction error for MGMs and mVAR Models.
 11 | }
 12 | 
 13 | \usage{
 14 | bwSelect(data, type, level, bwSeq, bwFolds,
 15 |          bwFoldsize, modeltype, pbar, ...)
 16 | }
 17 | 
 18 | \arguments{
 19 |   \item{data}{
 20 | A n x p data matrix.
 21 | }
 22 |   \item{type}{
 23 | p vector indicating the type of variable for each column in \code{data}. "g" for Gaussian, "p" for Poisson, "c" for categorical.
 24 | }
 25 |   \item{level}{
 26 | p vector indicating the number of categories of each variable. For continuous variables set to 1.
 27 | }
 28 |   \item{bwSeq}{
 29 | A sequence with candidate bandwidth values (0, s] with s < Inf. Note that the bandwidth is applied relative to the unit time interval [0,1] and hence a banwidth of > 2 corresponds roughly to equal weights for all time points and hence gives similar estimates as the stationary model estimated via \code{mvar()}.
 30 | }
 31 |   \item{bwFolds}{
 32 | The number of folds (see details below).
 33 | }
 34 |   \item{bwFoldsize}{
 35 | The size of each fold (see details below).
 36 | }
 37 |   \item{modeltype}{
 38 | If \code{modeltype = "mvar"} model, the optimal bandwidth parameter for a \code{tvmvar()} model is selected. If \code{modeltype = "mgm"} model, the optimal bandwidth parameter for a \code{tvmgm()} model is selected. Additional arguments to \code{tvmvar()} or \code{tvmgm()} can be passed via the \code{\dots} argument.
 39 | }
 40 | 
 41 |   \item{pbar}{
 42 | If TRUE a progress bar is shown. Defaults to \code{pbar = "TRUE"}.
 43 | }
 44 | 
 45 |   \item{\dots}{
 46 | Arguments passed to \code{tvmgm} or \code{tvmvar}.
 47 | }
 48 | }
 49 | 
 50 | \details{
 51 | 
 52 | Performs a cross-validation scheme that is specified by \code{bwFolds} and \code{bwFoldsize}. In the first fold, the test set is defined by an equally spaced sequence between [1,  n - \code{bwFolds}] of length \code{bwFoldsize}. In the second fold, the test set is defined by an equally spaced sequence between [2, n - \code{bwFolds} + 1] of length \code{bwFoldsize}, etc. . Note that if \code{bwFoldsize} = n / \code{bwFolds}, this procedure is equal to \code{bwFolds}-fold cross valildation. However, full cross validation is computationally very expensive and a single split in test/training set by setting \code{bwFolds = 1} is sufficient in many situations. The procedure selects the bandwidth with the lowest prediction error, averaged over variables and time points in the test set.
 53 | 
 54 | \code{bwSelect} computes the absolute error (continuous) or 0/1-loss (categorical) for each time point in the test set defined by \code{bwFoldsize} as described in the previous paragraph for every fold specified in \code{bwFolds}, separately for each variable. The computed errors are returned in different levels of aggregation in the output list (see below). Note that continuous variables are scaled (centered and divided by their standard deviation), hence the absolute error and 0/1-loss are roughly on the scale scale.
 55 | 
 56 | Note that selecting the bandwidth with the EBIC is no alternative. This is because the EBIC always selects the intercept model with the lowest bandwidth.  The reason is that the unregularized intercept closely models the noise in the data and hence the penalty sets all other parameters to zero. This problem is solved by using out of sample prediction error in the cross validation scheme.
 57 | 
 58 | 
 59 | }
 60 | \value{
 61 | 
 62 | The function returns a list with the following entries:
 63 | 
 64 |   \item{call}{
 65 | Contains all provided input arguments. If \code{saveData = TRUE}, it also contains the data.
 66 | }
 67 | 
 68 |   \item{bwModels}{
 69 | Contains the models estimated at the time points in the tests set. For details see \code{tvmvar} or \code{tvmgm}.
 70 | }
 71 | 
 72 |   \item{fullErrorFolds}{
 73 | List with number of entries equal to the length of \code{bwSeq} entries. Each entry contains a list with \code{bwFolds} entries. Each of those entries contains a contains a \code{bwFoldsize} times p matrix of out of sample prediction errors.
 74 | }
 75 | 
 76 |   \item{fullError}{
 77 | The same as \code{fullErrorFolds} but pooled over folds.
 78 | }
 79 | 
 80 |   \item{meanError}{
 81 | List with number of entries equal to the length of \code{bwSeq} entries. Each entry contains the average prediction error over variables and time points in the test set.
 82 | }
 83 | 
 84 |   \item{testsets}{
 85 | List with \code{bwFolds} entries, which contain the rows of the test sample for each fold.
 86 | }
 87 | 
 88 |   \item{zeroweights}{
 89 | List with \code{bwFolds} entries, which contains the observation weights used to fit the model at the \code{bwFoldsize} time points.
 90 | }
 91 | 
 92 | 
 93 | }
 94 | \references{
 95 | 
 96 | Barber, R. F., & Drton, M. (2015). High-dimensional Ising model selection with Bayesian information criteria. Electronic Journal of Statistics, 9(1), 567-607.
 97 | 
 98 | Foygel, R., & Drton, M. (2010). Extended Bayesian information criteria for Gaussian graphical models. In Advances in neural information processing systems (pp. 604-612).
 99 | 
100 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08
101 | 
102 | 
103 | 
104 | }
105 | \author{
106 | Jonas Haslbeck <jonashaslbeck@gmail.com>
107 | }
108 | 
109 | 
110 | 
111 | 
112 | \examples{
113 | 
114 | \dontrun{
115 | 
116 | 
117 | ## A) bwSelect for tvmgm() 
118 | 
119 | # A.1) Generate noise data set
120 | p <- 5
121 | n <- 100
122 | data_n <- matrix(rnorm(p*n), nrow=100)
123 | head(data_n)
124 | 
125 | type <- c("c", "c", rep("g", 3))
126 | level <- c(2, 2, 1, 1, 1)
127 | x1 <- data_n[,1]
128 | x2 <- data_n[,2]
129 | data_n[x1>0,1] <- 1
130 | data_n[x1<0,1] <- 0
131 | data_n[x2>0,2] <- 1
132 | data_n[x2<0,2] <- 0
133 | 
134 | head(data_n)
135 | 
136 | # A.2) Estimate optimal bandwidth parameter
137 | 
138 | bwobj_mgm <- bwSelect(data = data_n,
139 |                       type = type,
140 |                       level = level,
141 |                       bwSeq = seq(0.05, 1, length=3),
142 |                       bwFolds = 1,
143 |                       bwFoldsize = 3,
144 |                       modeltype = "mgm",
145 |                       k = 3,
146 |                       pbar = TRUE,
147 |                       overparameterize = TRUE)
148 | 
149 | 
150 | print.mgm(bwobj_mgm)
151 | 
152 | 
153 | 
154 | ## B) bwSelect for tvmVar() 
155 | 
156 | # B.1) Generate noise data set
157 | 
158 | p <- 5
159 | n <- 100
160 | data_n <- matrix(rnorm(p*n), nrow=100)
161 | head(data_n)
162 | 
163 | type <- c("c", "c", rep("g", 3))
164 | level <- c(2, 2, 1, 1, 1)
165 | x1 <- data_n[,1]
166 | x2 <- data_n[,2]
167 | data_n[x1>0,1] <- 1
168 | data_n[x1<0,1] <- 0
169 | data_n[x2>0,2] <- 1
170 | data_n[x2<0,2] <- 0
171 | 
172 | head(data_n)
173 | 
174 | # B.2) Estimate optimal bandwidth parameter
175 | 
176 | bwobj_mvar <- bwSelect(data = data_n,
177 |                        type = type,
178 |                        level = level,
179 |                        bwSeq = seq(0.05, 1, length=3),
180 |                        bwFolds = 1,
181 |                        bwFoldsize = 3,
182 |                        modeltype = "mvar",
183 |                        lags = 1:3,
184 |                        pbar = TRUE,
185 |                        overparameterize = TRUE)
186 | 
187 | 
188 | print.mgm(bwobj_mvar)
189 | 
190 | # For more examples see https://github.com/jmbh/mgmDocumentation
191 | 
192 | 
193 | }
194 | 
195 | }
196 | 
197 | 
198 | 


--------------------------------------------------------------------------------
/man/condition.Rd:
--------------------------------------------------------------------------------
  1 | \name{condition}
  2 | \alias{condition}
  3 | 
  4 | \title{
  5 | Computes mgm object conditional on a set of variables
  6 | }
  7 | \description{
  8 | The function takes an mgm object and a set of variables fixed to given values as input and returns the conditional mgm object.
  9 | }
 10 | \usage{
 11 | condition(object, values)
 12 | }
 13 | 
 14 | \arguments{
 15 |   \item{object}{
 16 | An mgm object, the output of the \code{mgm()} function.
 17 | }
 18 |   \item{values}{
 19 | A list, where the entry name indicates the column number of the variable that should be fixed, and the entry value indicates the value to which the corresponding variable should be fixed. See below for an example.
 20 | }
 21 | }
 22 | \value{
 23 | The function returns an mgm object that is conditional on the provided values. The new mgm object can again be used as input in \code{predict()}, \code{print()}, \code{showInteraction()}, etc.. Note that code{mgm()} by default standardizes variables to mean=0, SD=1. Therefore, also the values one conditions on should be chosen on the scaled version of the variable to avoid extrapolating out of the dataset.
 24 | }
 25 | 
 26 | \details{
 27 | The new conditional object still contains the variables that were fixed, however, they are not related to any of the random variables anymore. We kept the variables in the object to avoid confusion with variable labels and plotting. Also note that \code{mgm()} by default scales all Gaussian variables to mean=0, sd=1. Thus, fixed values should be selected based on the scaled version of variables.
 28 | }
 29 | 
 30 | \references{
 31 | Haslbeck, J., & Waldorp, L. J. (2019). mgm: Estimating time-varying mixed graphical models in high-dimensional data. arXiv preprint arXiv:1510.06871.
 32 | }
 33 | 
 34 | \author{
 35 | Jonas Haslbeck <jonashaslbeck@gmail.com>
 36 | }
 37 | 
 38 | 
 39 | 
 40 | \seealso{
 41 | \code{mgm}
 42 | }
 43 | 
 44 | 
 45 | \examples{
 46 | 
 47 | \dontrun{
 48 | 
 49 | # --- Create Mixture of two Gaussians ---
 50 | 
 51 | set.seed(1)
 52 | n <- 500
 53 | library(MASS)
 54 | 
 55 | # Component A
 56 | Sigma_a <- diag(2)
 57 | Sigma_a[1, 2] <- Sigma_a[2, 1] <- .5
 58 | Xa <- mvrnorm(n = n, mu = rep(0, 2), Sigma = Sigma_a)
 59 | 
 60 | # Component B
 61 | Sigma_b <- diag(2)
 62 | Sigma_b[1, 2] <- Sigma_b[2, 1] <- 0
 63 | Xb <- mvrnorm(n = n, mu = rep(0, 2), Sigma = Sigma_b)
 64 | 
 65 | data <- cbind(rbind(Xa, Xb), c(rep(0, n), rep(1, n)))
 66 | colnames(data) <- c("x1", "x2", "x3")
 67 | 
 68 | 
 69 | # --- Fit MGM ---
 70 | 
 71 | # with mgm
 72 | mgm_obj <- mgm(data = data, 
 73 |                type = c("g","g","c"), 
 74 |                level = c(1, 1, 2), 
 75 |                moderator = c(3), 
 76 |                lambdaSel = "EBIC")
 77 | 
 78 | # --- Condition on / fix values of variable 3 ---
 79 | 
 80 | # Fix x3=0
 81 | mgm_obj_x3.0 <- condition(object = mgm_obj, 
 82 |                           values = list("3"=0))
 83 | mgm_obj_x3.0$pairwise$wadj
 84 | 
 85 | # Fix x3=1
 86 | mgm_obj_x3.1 <- condition(object = mgm_obj, 
 87 |                           values = list("3"=1))
 88 | mgm_obj_x3.1$pairwise$wadj
 89 | 
 90 | }
 91 | 
 92 | }
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 


--------------------------------------------------------------------------------
/man/datasets.Rd:
--------------------------------------------------------------------------------
 1 | \name{datasets}
 2 | \docType{data}
 3 | 
 4 | \alias{datasets}
 5 | 
 6 | \alias{autism_data}
 7 | \alias{autism_data_large}
 8 | \alias{restingstate_data}
 9 | \alias{fruitfly_data}
10 | \alias{symptom_data}
11 | \alias{Fried2015}
12 | \alias{PTSD_data}
13 | \alias{mgm_data}
14 | \alias{mvar_data}
15 | \alias{modnw}
16 | \alias{B5MS}
17 | \alias{msq_p5}
18 | \alias{msq_p3}
19 | \alias{dataGD}
20 | 
21 | 
22 | \title{Example Datasets in the mgm Package}
23 | \description{
24 | The autism dataset (and its short version) are taken from Deserno et al. (2016).
25 | 
26 | The restingstate fMRI data are taken from Schmittmann et al. (2015).
27 | 
28 | The gene expression data across the life span of the fruit fly are taken from Gibberd & Nelson (2017), who took a subset of the data first presented by Arbeitman et al. (2002).
29 | 
30 | The symptom data of the single individual diagnosed with major depression is described in Kossakowski et al. (2017).
31 | 
32 | The PTSD data is taken from McNally et al. (2015).
33 | 
34 | The dataset \code{mgm_data} is generated by example code shown in ?mgmsampler, and \code{mvar_data} is generated by example code shown in ?mvarsampler.
35 | 
36 | The dataset \code{Fried2015} contains 515 cases of the 11 depression symptoms measured by the CES-D and is taken from Fried et al. 2015.
37 | 
38 | The dataset \code{B5MS} contains the mean scores across subscales (48 items each) for the Big Five personality traits. The dataset is taken from the \code{qgraph} package (Epskamp, et al., 2012) and was first used in Dolan et al. (2009).
39 | 
40 | The dataset \code{dataGD} contains 4 continuous variables and 3 categorical variables that are generated from a mixed DAG. This dataset is useful to illustrate estimating group differences in MGMs using moderation.
41 | 
42 | All datasets are loaded automatically. All real data sets come as a list including the data and additional information (names of variables, types of variables, time stamps for time series data, etc.)
43 | 
44 | }
45 | 
46 | 
47 | \references{
48 | 
49 | Deserno, M. K., Borsboom, D., Begeer, S., & Geurts, H. M. (2016). Multicausal systems ask for multicausal approaches: A network perspective on subjective well-being in individuals with autism spectrum disorder. Autism.
50 | 
51 | Dolan, C. V., Oort, F. J., Stoel, R. D., & Wicherts, J. M. (2009). Testing measurement invariance in the target rotated multigroup exploratory factor model. Structural Equation Modeling, 16(2), 295-314.
52 | 
53 | Epskamp, S., Cramer, A. O., Waldorp, L. J., Schmittmann, V. D., & Borsboom, D. (2012). qgraph: Network visualizations of relationships in psychometric data. Journal of Statistical Software, 48(4), 1-18.
54 | 
55 | Schmittmann, V. D., Jahfari, S., Borsboom, D., Savi, A. O., & Waldorp, L. J. (2015). Making large-scale networks from fMRI data. PloS one, 10(9), e0129074.
56 | 
57 | Gibberd, A. J., & Nelson, J. D. (2017). Regularized Estimation of Piecewise Constant Gaussian Graphical Models: The Group-Fused Graphical Lasso. Journal of Computational and Graphical Statistics, (just-accepted).
58 | 
59 | Arbeitman, M. N., Furlong, E. E., Imam, F., Johnson, E., Null, B. H., Baker, B. S., ... & White, K. P. (2002). Gene expression during the life cycle of Drosophila melanogaster. Science, 297(5590), 2270-2275.
60 | 
61 | Kossakowski, J., Groot, P., Haslbeck, J., Borsboom, D., & Whichers, M. (2017). Data from "Critical Slowing Down as a Personalized Early Warning Signal for Depression". Journal of Open Psychology Data, 5(1).
62 | 
63 | McNally, R. J., Robinaugh, D. J., Wu, G. W., Wang, L., Deserno, M. K., & Borsboom, D. (2015). Mental disorders as causal systems a network approach to posttraumatic stress disorder. Clinical Psychological Science, 3(6), 836-849.
64 | 
65 | Fried, E. I., Bockting, C., Arjadi, R., Borsboom, D., Amshoff, M., Cramer, A. O., ... & Stroebe, M. (2015). From loss to loneliness: The relationship between bereavement and depressive symptoms. Journal of abnormal psychology, 124(2), 256.
66 | 
67 | }
68 | 
69 | \keyword{datasets}
70 | 
71 | 


--------------------------------------------------------------------------------
/man/mgm-internal.Rd:
--------------------------------------------------------------------------------
 1 | \name{mgm-internal}
 2 | \title{Internal mgm functions}
 3 | 
 4 | \alias{DrawFG}
 5 | \alias{DrawFGtv}
 6 | \alias{FlagSymmetric}
 7 | \alias{bwSelPredict}
 8 | \alias{calcLL}
 9 | \alias{calcNeighbors}
10 | \alias{glmnetRequirements}
11 | \alias{lagData}
12 | \alias{nodeEst}
13 | \alias{ModelMatrix}
14 | \alias{ModelMatrix_standard}
15 | 
16 | \description{Internal mgm functions.}
17 | 
18 | \author{Jonas Haslbeck}
19 | 
20 | \details{
21 | 
22 | These are internal functions.
23 | 
24 | }
25 | 
26 | \keyword{internal}
27 | 


--------------------------------------------------------------------------------
/man/mgm-package.Rd:
--------------------------------------------------------------------------------
 1 | \name{mgm-package}
 2 | \alias{mgm-package}
 3 | \alias{mgm-package}
 4 | \docType{package}
 5 | \title{
 6 | Estimating Time-Varying k-order Mixed Graphical Models
 7 | }
 8 | \description{
 9 | Estimation of time-varying Mixed Graphical models and mixed VAR models via elastic-net regularized neighborhood regression.
10 | }
11 | 
12 | \author{
13 | Jonas Haslbeck
14 | 
15 | Maintainer: <jonashaslbeck@gmail.com>
16 | }
17 | 
18 | 
19 | \references{
20 | 
21 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08
22 | 
23 | Loh, P. L., & Wainwright, M. J. (2013). Structure estimation for discrete graphical models: Generalized covariance matrices and their inverses. The Annals of Statistics, 41(6), 3022-3049.
24 | 
25 | Yang, E., Baker, Y., Ravikumar, P., Allen, G., & Liu, Z. (2014). Mixed graphical models via exponential families. In Proceedings of the Seventeenth International Conference on Artificial Intelligence and Statistics (pp. 1042-1050).
26 | }
27 | 
28 | \keyword{package}
29 | 


--------------------------------------------------------------------------------
/man/mgmsampler.Rd:
--------------------------------------------------------------------------------
  1 | \name{mgmsampler}
  2 | \alias{mgmsampler}
  3 | 
  4 | \title{
  5 | Sample from k-order Mixed Graphical Model
  6 | }
  7 | \description{
  8 | Generates samples from a k-order Mixed Graphical Model
  9 | }
 10 | \usage{
 11 | mgmsampler(factors, interactions, thresholds, sds, type,
 12 |            level, N, nIter = 250, pbar = TRUE, 
 13 |            divWarning = 10^3, returnChains = FALSE)
 14 | }
 15 | 
 16 | 
 17 | \arguments{
 18 |   \item{factors}{
 19 | This object indicates which interactions are present in the model.  It is a list in which the first entry corresponds to 2-way interactions, the second entry corresponds to 3-way interactions, etc. and the kth entry to the k+1-way interaction. Each entry contains a matrix with dimensions order x number of interaction of given order. Each row in the matrix indicates an interaction, e.g. (1, 3, 7, 9) in the matrix in list entry three indicates a 4-way interaction between the variables 1, 3, 7 and 9.
 20 | }
 21 |   \item{interactions}{
 22 | This object specifies the parameters associated to the interactions specified in \code{factors}. Corresponding to the structure in \code{factors}, this object is a list, where the kth entry corresponds to k+1-way interactions. Each list entry contains another list, with entries equal to the number of rows in the corresponding matrix in \code{factors}. Each of these list entries (for a fixed k) contains a k-dimensional array that specifies the parameters of the given k-order interaction. For instance, if we have a 3-way interaction (1, 2, 3) and all variables are binary, we have a 2 x 2 x 2 array specifiying the parameters for each of the 2^3 = 8 possible configurations. If all variables are continuous, we have a 1 x 1 x 1 array, so the interaction is specified by a single parameter. See the examples below for an illustration.
 23 | }
 24 |   \item{thresholds}{
 25 | A list with p entries corresponding to p variables in the model. Each entry contains a vector indicating the threshold for each category (for categorical variables) or a numeric value indicating the threshold/intercept (for continuous variables).
 26 | }
 27 |   \item{sds}{
 28 | A numeric vector with p entries, specifying the variances of Gaussian variables. If variables 6 and 13 are Gaussians, then the corresponding entries of \code{sds} have to contain the corresponding variances. Other entries are ignored.
 29 | }
 30 |   \item{type}{
 31 | p character vector indicating the type of variable for each column in \code{data}. "g" for Gaussian, "p" for Poisson, "c" of each variable.
 32 | }
 33 |   \item{level}{
 34 | p integer vector indicating the number of categories of each variable. For continuous variables set to 1.
 35 | }
 36 |   \item{N}{
 37 | Number of samples that should be drawn from the distribution.
 38 | }
 39 |   \item{nIter}{
 40 | Number of iterations in the Gibbs sampler until a sample is drawn.
 41 | }
 42 |   \item{pbar}{
 43 |   If \code{pbar = TRUE} a progress bar is shown. Defaults to \code{pbar = TRUE}.
 44 | }
 45 |   \item{divWarning}{
 46 |   \code{mgmsampler()} returns a warning message if the absolute value of a continuous variable the chain of the gibbs sampler is larger than \code{divWarning}. To our best knowledge there is no theory yet defining a parameter space that ensures a proper probability density and hence a converging chain. Defaults to \code{divWarning = 10^3}.
 47 | }
 48 | 
 49 |   \item{returnChains}{
 50 |   If \code{returnChains = TRUE}, the sampler provides the entire chain of the Gibbs sampler, for each sampled case. Can be used to check convergence of the Gibbs sampler. Defaults to \code{returnChains = FALSE}.}
 51 | }
 52 | 
 53 | \details{
 54 | We use a Gibbs sampler to sample from the join distribution introduced by Yang and colleageus (2014). Note that the contraints on the parameter space necessary to ensure that the joint distribution is normalizable are to our best knowledge unknown. Yang and colleagues (2014) give these constraints for a number of simple pairwise models. In practice, an "improper joint density" will lead to a sampling process that approaches infinity, and hence \code{mgmsampler()} will return \code{Inf} / \code{-Inf} values.
 55 | }
 56 | 
 57 | 
 58 | \value{
 59 | A list containing:
 60 | 
 61 | \item{call}{
 62 | Contains all provided input arguments.
 63 | }
 64 | 
 65 | \item{data}{
 66 | The N x p data matrix of sampled values
 67 | }
 68 | 
 69 | }
 70 | \references{
 71 | Haslbeck, J., & Waldorp, L. J. (2018). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. arXiv preprint arXiv:1510.06871.
 72 | 
 73 | Yang, E., Baker, Y., Ravikumar, P., Allen, G. I., & Liu, Z. (2014, April). Mixed Graphical Models via Exponential Families. In AISTATS (Vol. 2012, pp. 1042-1050).
 74 | 
 75 | }
 76 | \author{
 77 | Jonas Haslbeck <jonashaslbeck@gmail.com>
 78 | }
 79 | 
 80 | 
 81 | \examples{
 82 | 
 83 | \dontrun{
 84 | 
 85 | # --------- Example 1: p = 10 dimensional Gaussian ---------
 86 | 
 87 | # ----- 1) Specify Model -----
 88 | 
 89 | # a) General Graph Info
 90 | p <- 10 # number of variables
 91 | type = rep("g", p) # type of variables
 92 | level = rep(1, 10) # number of categories for each variable (1 = convention for continuous)
 93 | 
 94 | # b) Define interactions
 95 | factors <- list()
 96 | factors[[1]] <- matrix(c(1,2,
 97 |                          1,3,
 98 |                          4,5,
 99 |                          7,8), ncol=2, byrow = T) # 4 pairwise interactions
100 | interactions <- list()
101 | interactions[[1]] <- vector("list", length = 4)
102 | 
103 | # all pairwise interactions have value .5
104 | for(i in 1:4) interactions[[1]][[i]] <- array(.5, dim=c(1, 1))
105 | 
106 | # c) Define Thresholds
107 | thresholds <- vector("list", length = p)
108 | thresholds <- lapply(thresholds, function(x) 0 ) # all means are zero
109 | 
110 | # d) Define Variances
111 | sds <- rep(1, p) # All variances equal to 1
112 | 
113 | 
114 | # ----- 2) Sample cases -----
115 | 
116 | data <- mgmsampler(factors = factors,
117 |                    interactions = interactions,
118 |                    thresholds = thresholds,
119 |                    sds = sds,
120 |                    type = type,
121 |                    level = level,
122 |                    N = 500,
123 |                    nIter = 100,
124 |                    pbar = TRUE)
125 | 
126 | 
127 | # ----- 3) Recover model from sampled cases -----
128 | 
129 | set.seed(1)
130 | mgm_obj <- mgm(data = data$data,
131 |                type = type,
132 |                level = level,
133 |                k = 2,
134 |                lambdaSel = "EBIC",
135 |                lambdaGam = 0.25)
136 | 
137 | mgm_obj$interactions$indicator # worked!
138 | 
139 | 
140 | 
141 | # --------- Example 2: p = 3 Binary model with one 3-way interaction ---------
142 | 
143 | # ----- 1) Specify Model -----
144 | 
145 | # a) General Graph Info
146 | type = c("c", "c", "c")
147 | level = c(2, 2, 2)
148 | 
149 | # b) Define Interaction
150 | factors <- list()
151 | factors[[1]] <- NULL # no pairwise interactions
152 | factors[[2]] <- matrix(c(1,2,3), ncol=3, byrow = T) # one 3-way interaction
153 | 
154 | interactions <- list()
155 | interactions[[1]] <- NULL
156 | interactions[[2]] <- vector("list", length = 1)
157 | # threeway interaction no1
158 | interactions[[2]][[1]] <- array(0, dim = c(level[1], level[2], level[3]))
159 | theta <- 2
160 | interactions[[2]][[1]][1, 1, 1] <- theta # fill in nonzero entries
161 | # thus: high probability for the case that x1 = x2 = x3 = 1
162 | 
163 | # c) Define Thresholds
164 | thresholds <- list()
165 | thresholds[[1]] <- rep(0, level[1])
166 | thresholds[[2]] <- rep(0, level[2])
167 | thresholds[[3]] <- rep(0, level[3])
168 | 
169 | 
170 | # ----- 2) Sample cases -----
171 | 
172 | set.seed(1)
173 | dlist <- mgmsampler(factors = factors,
174 |                     interactions = interactions,
175 |                     thresholds = thresholds,
176 |                     type = type,
177 |                     level = level,
178 |                     N = 500,
179 |                     nIter = 100,
180 |                     pbar = TRUE)
181 | 
182 | 
183 | # ----- 3) Check: Contingency Table -----
184 | 
185 | dat <- dlist$data
186 | table(dat[,1], dat[,2], dat[,3]) # this is what we expected
187 | 
188 | 
189 | # ----- 4) Recover model from sampled cases -----
190 | 
191 | mgm_obj <- mgm(data = dlist$data,
192 |                type = type,
193 |                level = level,
194 |                k = 3,
195 |                lambdaSel = "EBIC",
196 |                lambdaGam = 0.25, 
197 |                overparameterize = TRUE)
198 | 
199 | mgm_obj$interactions$indicator # recovered, plus small spurious pairwise 1-2
200 | 
201 | 
202 | # --------- Example 3: p = 5 Mixed Graphical Model with two 3-way interaction ---------
203 | 
204 | # ----- 1) Specify Model -----
205 | 
206 | # a) General Graph Info
207 | type = c("g", "c", "c", "g")
208 | level = c(1, 3, 5, 1)
209 | # b) Define Interaction
210 | factors <- list()
211 | factors[[1]] <- NULL # no pairwise interactions
212 | factors[[2]] <- matrix(c(1,2,3,
213 |                          2,3,4), ncol=3, byrow = T) # no pairwise interactions
214 | interactions <- list()
215 | interactions[[1]] <- NULL
216 | interactions[[2]] <- vector("list", length = 2)
217 | # 3-way interaction no1
218 | interactions[[2]][[1]] <- array(0, dim = c(level[1], level[2], level[3]))
219 | interactions[[2]][[1]][,,1:3] <- rep(.8, 3) # fill in nonzero entries
220 | # 3-way interaction no2
221 | interactions[[2]][[2]] <- array(0, dim = c(level[2], level[3], level[4]))
222 | interactions[[2]][[2]][1,1,] <- .3
223 | interactions[[2]][[2]][2,2,] <- .3
224 | interactions[[2]][[2]][3,3,] <- .3
225 | # c) Define Thresholds
226 | thresholds <- list()
227 | thresholds[[1]] <- 0
228 | thresholds[[2]] <- rep(0, level[2])
229 | thresholds[[3]] <- rep(0, level[3])
230 | thresholds[[4]] <- 0
231 | # d) Define Variances
232 | sds <- rep(.1, length(type))
233 | 
234 | 
235 | # ----- 2) Sample cases -----
236 | 
237 | set.seed(1)
238 | data <- mgmsampler(factors = factors,
239 |                    interactions = interactions,
240 |                    thresholds = thresholds,
241 |                    sds = sds,
242 |                    type = type,
243 |                    level = level,
244 |                    N = 500,
245 |                    nIter = 100,
246 |                    pbar = TRUE)
247 | 
248 | 
249 | # ----- 3) Check: Conditional Means -----
250 | 
251 | # We condition on the categorical variables and check whether
252 | # the conditional means match what we expect from the model:
253 | 
254 | dat <- data$data
255 | 
256 | # Check interaction 1
257 | mean(dat[dat[,2] == 1 & dat[,3] == 1, 1]) # (compare with interactions[[2]][[1]])
258 | mean(dat[dat[,2] == 1 & dat[,3] == 5, 1])
259 | # first mean higher, ok!
260 | 
261 | # Check interaction 2
262 | mean(dat[dat[,2] == 1 & dat[,3] == 1, 4]) # (compare with interactions[[2]][[2]])
263 | mean(dat[dat[,2] == 1 & dat[,3] == 2, 4])
264 | # first mean higher, ok!
265 | 
266 | 
267 | }
268 | 
269 | }
270 | 
271 | 


--------------------------------------------------------------------------------
/man/mvarsampler.Rd:
--------------------------------------------------------------------------------
  1 | \name{mvarsampler}
  2 | \alias{mvarsampler}
  3 | 
  4 | \title{
  5 | Sampling from a mixed VAR model
  6 | }
  7 | \description{
  8 | Function to sample from a mixed VAR (mVAR) model
  9 | }
 10 | \usage{
 11 | mvarsampler(coefarray, lags, thresholds, 
 12 |             sds, type, level, N, pbar)
 13 | }
 14 | %- maybe also "usage" for other objects documented here.
 15 | \arguments{
 16 |   \item{coefarray}{
 17 | A p x p x max(level) x max(level) x n_lags array, where p are the number of variables, level is the input argument \code{level} and n_lags is the number of specified lags in \code{lags}, so n_lags = length(n_lags). The first four dimensions specify the parameters involved in the cross-lagged effects of the lag specified in the 5th dimension. I.e. coefarray[5, 6, 1, 1, 3] indicates the cross-lagged effect of variable 6 on variable 5 (if both are continuous), for the third lag specified in \code{lags}. If variable 1 and 3 are categorical with m = 2 and  = 4 categories, respectively, then coefarray[1, 3, 1:2, 1:4, 1] indicates the m*s=8 parameters specifying this interaction for the first lag specified in \code{lags}. See the examples below for an illustration.
 18 | }
 19 |   \item{lags}{
 20 | A vector indicating the lags in the mVAR model. E.g. \code{lags = c(1, 4, 9)} specifies lags of order 1, 3, 9. The number of specified lags has to match the 5th dimension in \code{coefarray}.
 21 | }
 22 |   \item{thresholds}{
 23 | A list with p entries, each consisting of a vector indicating a threshold for each category of the given variable. For continuous variable, the vector has length 1.
 24 | }
 25 |   \item{sds}{
 26 | A vector of length p indicating the standard deviations of the included Gaussian nodes. If non-Gaussian variables are included in the mVAR model, the corresponding entries are ignored.
 27 | }
 28 |   \item{type}{
 29 | p vector indicating the type of variable for each column in \code{data}. "g" for Gaussian, "p" for Poisson, "c" for categorical.
 30 | }
 31 |   \item{level}{
 32 | p vector indicating the number of categories of each variable. For continuous variables set to 1.
 33 | }
 34 |   \item{N}{
 35 | The number of samples to be drawn from the specified mVAR model.
 36 | }
 37 |   \item{pbar}{
 38 | If \code{pbar = TRUE}, a progress bar is shown.
 39 | }
 40 | }
 41 | \details{
 42 | We sample from the mVAR model by separately sampling from its corresponding p conditional distributions.
 43 | }
 44 | \value{
 45 | A list with two entries:
 46 | 
 47 | \item{call}{The function call}
 48 | 
 49 | \item{data}{The sampled n x p data matrix}
 50 | 
 51 | }
 52 | \references{
 53 | 
 54 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08
 55 | 
 56 | }
 57 | \author{
 58 | Jonas Haslbeck <jonashaslbeck@gmail.com>
 59 | }
 60 | 
 61 | 
 62 | \examples{
 63 | 
 64 | \dontrun{
 65 | 
 66 | 
 67 | ## Generate data from mixed VAR model using mvarsampler() and recover model using mvar()
 68 | 
 69 | # 1) Define mVAR model
 70 | 
 71 | p <- 6 # Six variables
 72 | type <- c("c", "c", "c", "c", "g", "g") # 4 categorical, 2 gaussians
 73 | level <- c(2, 2, 4, 4, 1, 1) # 2 categoricals with m=2, 2 categoricals with m=4, two continuous
 74 | max_level <- max(level)
 75 | 
 76 | lags <- c(1, 3, 9) # include lagged effects of order 1, 3, 9
 77 | n_lags <- length(lags)
 78 | 
 79 | # Specify thresholds
 80 | thresholds <- list()
 81 | thresholds[[1]] <- rep(0, level[1])
 82 | thresholds[[2]] <- rep(0, level[2])
 83 | thresholds[[3]] <- rep(0, level[3])
 84 | thresholds[[4]] <- rep(0, level[4])
 85 | thresholds[[5]] <- rep(0, level[5])
 86 | thresholds[[6]] <- rep(0, level[6])
 87 | 
 88 | # Specify standard deviations for the Gaussians
 89 | sds <- rep(NULL, p)
 90 | sds[5:6] <- 1
 91 | 
 92 | # Create coefficient array
 93 | coefarray <- array(0, dim=c(p, p, max_level, max_level, n_lags))
 94 | 
 95 | # a.1) interaction between continuous 5<-6, lag=3
 96 | coefarray[5, 6, 1, 1, 2] <- .4
 97 | # a.2) interaction between 1<-3, lag=1
 98 | m1 <- matrix(0, nrow=level[2], ncol=level[4])
 99 | m1[1,1:2] <- 1
100 | m1[2,3:4] <- 1
101 | coefarray[1, 3, 1:level[2], 1:level[4], 1] <- m1
102 | # a.3) interaction between 1<-5, lag=9
103 | coefarray[1, 5, 1:level[1], 1:level[5], 3] <- c(0, 1)
104 | 
105 | 
106 | # 2) Sample
107 | set.seed(1)
108 | dlist <- mvarsampler(coefarray = coefarray,
109 |                      lags = lags,
110 |                      thresholds = thresholds,
111 |                      sds = sds,
112 |                      type = type,
113 |                      level = level,
114 |                      N = 200,
115 |                      pbar = TRUE)
116 | 
117 | # 3) Recover
118 | set.seed(1)
119 | mvar_obj <- mvar(data = dlist$data,
120 |                  type = type,
121 |                  level = level,
122 |                  lambdaSel = "CV",
123 |                  lags = c(1, 3, 9),
124 |                  signInfo = FALSE,
125 |                  overparameterize = F)
126 | 
127 | # Did we recover the true parameters?
128 | mvar_obj$wadj[5, 6, 2] # cross-lagged effect of 6 on 5 over lag lags[2]
129 | mvar_obj$wadj[1, 3, 1] # cross-lagged effect of 3 on 1 over lag lags[1]
130 | mvar_obj$wadj[1, 5, 3] # cross-lagged effect of 1 on 5 over lag lags[3]
131 | 
132 | 
133 | # For more examples see https://github.com/jmbh/mgmDocumentation
134 | 
135 | 
136 | }
137 | 
138 | }
139 | 
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 


--------------------------------------------------------------------------------
/man/plotRes.Rd:
--------------------------------------------------------------------------------
  1 | \name{plotRes}
  2 | \alias{plotRes}
  3 | 
  4 | \title{
  5 | Plot summary of resampled sampling distributions
  6 | }
  7 | \description{
  8 | Plots a summary of sampling distributions resampled with the resample() function
  9 | }
 10 | \usage{
 11 | plotRes(object, quantiles = c(.05, .95), labels = NULL, 
 12 |         decreasing = TRUE, cut = NULL, cex.label = 0.75, 
 13 |         lwd.qtl = 2, cex.mean = 0.55, cex.bg = 2.7, 
 14 |         axis.ticks = c(-0.5, -0.25, 0, 0.25, 0.5, 0.75, 1), 
 15 |         axis.ticks.mod = NULL, layout.width.labels = .2,
 16 |         layout.gap.pw.mod = .15, table = FALSE)
 17 | }
 18 | 
 19 | \arguments{
 20 |   \item{object}{
 21 | An output object from the \code{resample()} function.
 22 | }
 23 |   \item{quantiles}{
 24 | A numerical vector of length two, specifying the desired lower/upper quantiles. Defaults to \code{quantiles = c(.05, .95)}.
 25 | }
 26 |   \item{labels}{
 27 | A character vector of length p, containing the label of each variable, where p is the number of variables.
 28 | }
 29 |   \item{decreasing}{
 30 | If \code{TRUE} (default), the edges are ordered by the arithmetic mean of the sampling distribution in decreasing order. If \code{FALSE} they are ordered in increasing order.
 31 | }
 32 |   \item{cut}{
 33 | A sequence of integers, specifying which edges are represented. For instance, if \code{decreasing = TRUE} and \code{cut = 1:10}, summaries for the 10 edges with the largest parameter estimate are displayed. The cut argument can also be used to present the boostrapped CIs in several figures.
 34 | }
 35 |   \item{cex.label}{
 36 | Text size of the labels.
 37 | }
 38 |   \item{lwd.qtl}{
 39 | Line width of line indicating the upper/lower quantiles.
 40 | }
 41 |   \item{cex.mean}{
 42 | Text size of the number indicating the proportion of the estimates whose absolute value is larger than zero.
 43 | }
 44 |   \item{cex.bg}{
 45 | Size of the white background of the number indicating the proportion of the estimates whose absolute value is larger than zero.
 46 | }
 47 |   \item{axis.ticks}{
 48 | A numeric vector indicating the axis ticks and labels for the x-axis.
 49 | }
 50 |   \item{axis.ticks.mod}{
 51 | A numeric vector indicating the axis ticks and labels for the x-axis for moderation effects. If \code{axis.ticks.mod=NULL}, the values from \code{axis.ticks} for pairwise interactions are used.
 52 | }
 53 | \item{layout.width.labels}{
 54 | A positive numeric value which specifies the width of the left-hand-side legend relative to the width of the data panel (or data panels, in case of a moderator model), which have width = 1. Defaults to \code{layout.width.labels = 0.2}.}
 55 | \item{layout.gap.pw.mod}{
 56 | A positive numeric value which specifies the width of the gap between the stability of pairwise effects and moderation effects. Defaults to \code{layout.gap.pw.mod = 0.15}.
 57 | }
 58 | \item{table}{
 59 | If \code{table = TRUE}, the output is presented as a table instead of a figure. Defaults to \code{table = FALSE}.
 60 | }
 61 | }
 62 | 
 63 | \details{
 64 | Currently only supports summaries for resampled \code{mgm()} objects, and moderated MGMs with a single moderator.
 65 | }
 66 | 
 67 | \value{
 68 | Plots a figure that shows summaries of the resampled sampling distribution for (a set of) all edge parameters. These include the mean, a specified upper and lower quantile and the proportion of parameter estimates whose absolute value is larger than zero.
 69 | }
 70 | 
 71 | \author{
 72 | Jonas Haslbeck <jonashaslbeck@gmail.com>
 73 | }
 74 | 
 75 | 
 76 | \seealso{
 77 | \code{resample()}, \code{mgm()}, \code{mvar()}, \code{tvmgm()}, \code{tvmar()}
 78 | }
 79 | 
 80 | 
 81 | \examples{
 82 | 
 83 | \dontrun{
 84 | 
 85 | # Fit initial model
 86 | fit_aut <- mgm(data = as.matrix(autism_data$data),
 87 |                type = autism_data$type,
 88 |                level = autism_data$lev,
 89 |                k = 2)
 90 | 
 91 | 
 92 | # Fit bootstrapped models
 93 | res_aut <- resample(object = fit_aut, 
 94 |                     data = as.matrix(autism_data$data), 
 95 |                     nB = 10) # should be more in real applications 
 96 | 
 97 | # Plot Summary
 98 | plotRes(object = res_aut,
 99 |         quantiles = c(.05, .95), 
100 |         labels = NULL, 
101 |         axis.ticks = c(-.25, 0, .25, .5, .75))
102 | 
103 |   }
104 | }
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/man/predict.mgm.Rd:
--------------------------------------------------------------------------------
  1 | \name{predict.mgm}
  2 | \alias{predict.mgm}
  3 | 
  4 | 
  5 | 
  6 | \title{
  7 | Compute predictions from mgm model objects
  8 | }
  9 | \description{
 10 | Computes predictions and prediction errors from a mgm model-object (\code{mgm}, \code{mvar}, \code{tvmgm} or \code{tvmvar}).
 11 | 
 12 | }
 13 | \usage{
 14 | \method{predict}{mgm}(object, data, errorCon, errorCat, 
 15 |         tvMethod, consec, beepvar, dayvar, errordecimals=3, 
 16 |         \dots)
 17 | }
 18 | 
 19 | \arguments{
 20 |   \item{object}{
 21 | An mgm model object (the output of one of the functions \code{mgm()}, \code{mvar()}, \code{tvmgm()} or \code{tvmvar()})
 22 | }
 23 |   \item{data}{
 24 | A n x p data matrix with the same structure (number of variables p and types of variables) as the data used to fit the model.
 25 | }
 26 |   \item{errorCon}{
 27 |   Either a character vector specifying the types of nodewise errors that should be computed, where the two provided error functions for continuous varaibles are \code{errorCon = "RMSE"}, the Root Mean Squared Error, and \code{errorCon = "R2"}, the proportion of explained variance. The default is \code{errorCon = c("RMSE" "R2")}.
 28 | 
 29 | Alternatively, \code{errorCon} can be a list, where each list entry is a custom error function of the form \code{foo(true, pred)}, where \code{true} and \code{pred} are the arguments for the vectors of true and predicted values, respectively.  If predictions are made for a time-varying model and \code{tvMethod = "weighted"}, the weighted R2 or RMSE are computed. If a custom function is used, an additional argument for the weights has to be provided: \code{foo(true, pred, weights)}. Note that custom error functios can also be combined with the buildt-in functions, i.e. \code{errorCon = list("RMSE", "CustomError"=foo)}.}
 30 | 
 31 | 
 32 | \item{errorCat}{
 33 | Either a character vector specifying the types of nodewise errors that should be computed, where the two provided error functions for categorical variables are \code{errorCat = "CC"}, the proportion of correct classification (accuracy) and \code{errorCat = "nCC"}, the proportion of correct classification normalized by the marginal distribution of the variable at hand. Specifically, nCC = (CC - norm_constant) / (1 - norm_constant), where norm_constant is the highest relative frequency across categories. Another provided error is "CCmarg" which returns the accuracy of the intercept/marginal model. The default is to return all types of errors \code{errorCon = c("CC" "nCC", "CCmarg")}.
 34 | 
 35 | Alternatively, \code{errorCat} can be a list, where each list entry is a custom error function of the form \code{foo(true, pred)}, where \code{true} and \code{pred} are the arguments for the vectors of true and predicted values, respectively.  If predictions are made for a time-varying model and \code{tvMethod = "weighted"}, the weighted R2 or RMSE are computed. If a custom function is used, an additional argument for the weights has to be provided: \code{foo(true, pred, weights)}. Note that custom error functios can also be combined with the buildt-in functions, i.e. \code{errorCon = list("nCC", "CustomError"=foo)}.}
 36 | 
 37 | \item{tvMethod}{
 38 | Specifies how predictions and errors are computed for time-varying models: \code{tvMethod = "weighted"} computes errors by computing a weighted error over all cases in the time series at each estimation point specified in \code{estpoints} in \code{tvmgm()} or \code{tvmvar()}. The weighting corresponds to the weighting used for estimation (see \code{?tvmgm} or \code{?tvmvar}). \code{tvMethod = "closestModel"} determines for each time point the closest model and uses that model for prediction. See Details below for a more detailed explanation.
 39 | }
 40 | 
 41 | \item{consec}{Only relevant for (time-varying) mVAR models. An integer vector of length \code{nrow(data)}, indicating the sequence of measurement points in a time series. This is only relevant for mVAR models and time series with unequal time intervals. Defaults to \code{consec = NULL}, which assumes equal time intervals. \code{consec} is ignored if a \code{mgm} or \code{tvmgm} object is provided to \code{predict.mgm()}. For details see \code{?mvar}.}
 42 | 
 43 | \item{beepvar}{Together with the argument \code{dayvar}, this argument is an alternative to the \code{consec} argument (see above) to specify the consecutiveness of measurements. This is tailored to ecological momentary assessment (EMA) studies, where the consectutiveness is defined by the number of notification on a given day (\code{beepvar}) and the given day (\code{dayvar}).
 44 | }
 45 | 
 46 | \item{dayvar}{See \code{beepvar}.}
 47 | 
 48 | \item{errordecimals}{Number of decimals to which predictability / prediction error values are rounded. Defaults to \code{errordecimals = 3}.}
 49 | 
 50 | \item{...}{Additional arguments.}
 51 | 
 52 | 
 53 | }
 54 | 
 55 | 
 56 | 
 57 | \details{
 58 | Nodewise errors in time-varying models can be computed in two different ways: first, one computes the predicted value for each of the N cases in the time series for all models (estimated at different estimation points, see \code{?tvmgm} or \code{?tvmvar}). Then the error of each of the N cases for each of the models is weighted by the weight that has been used to estimate a given model at its estimation point. This means that the error of a data point close to the end of a time-series gets a high weight for models estimated in the end of the time-series and a low weight for models estimated in the beginning of the time series.
 59 | 
 60 | Second, we determine for each case in the time-series the closest estmation point, and use the model estimated at that estimation point to make predictions for that case.
 61 | 
 62 | Note that the error function normalized accuracy (nCC) is negative if the full model performs worse than the intercept model. This can happen if the model overfits the data.
 63 | 
 64 | }
 65 | 
 66 | 
 67 | \value{
 68 | A list with the following entries:
 69 | 
 70 | \item{call}{
 71 | Contains all provided input arguments.
 72 | }
 73 | 
 74 | \item{predicted}{
 75 | A n x p matrix with predicted values, matching the dimension of the true values in \code{true}.
 76 | }
 77 | 
 78 | \item{probabilities}{
 79 | A list with p entries corresponding to p nodes in the data. If a variable is categorical, the corresponding entry contains a n x k matrix with predicted probabilities, where k is the number of categories of the categorical variable. If a variable is continuous, the corresponding entry is empty.
 80 | }
 81 | 
 82 | \item{true}{
 83 | Contains the true values. For \code{mgm} and \code{tvmgm} objects these are equal to the data provided via \code{data}. For \code{mvar} and \code{tvmvar} objects, these are equal to the rows that can be predicted in a VAR model, depending on the largest specified lag and (if specified) the \code{consec} argument. 
 84 | }
 85 | 
 86 | \item{errors}{
 87 | A matrix containing the all types of errors specified via \code{errorCon} and \code{errorCat}, for each variable. If \code{tvMethod = "weighted"}, the matrix becomes an array, with an additional dimension for the estimation point.
 88 | }
 89 | 
 90 | \item{tverrors}{
 91 | If \code{tvMethod = "weighted"}, this list entry contains a list with errors of the format of \code{errors}, separately for each estimation point. The errors are computed from predictions of the model at the given estimation points and weighted by the weight-vector at that estimation point. If \code{tvMethod = "closestModel"}, this entry is empty.
 92 | }
 93 | 
 94 | 
 95 | }
 96 | \references{
 97 | 
 98 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08
 99 | 
100 | }
101 | \author{
102 | Jonas Haslbeck <jonashaslbeck@gmail.com>
103 | }
104 | 
105 | \examples{
106 | 
107 | \dontrun{
108 | # See examples in ?mgm, ?tvmgm, ?mvar and ?tvmvar.
109 | }
110 | 
111 | }
112 | 
113 | 


--------------------------------------------------------------------------------
/man/print.int.Rd:
--------------------------------------------------------------------------------
 1 | \name{print.int}
 2 | \alias{print.int}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Print method for int objects
 6 | }
 7 | \description{
 8 | Returns basic information about objects created with showInteraction()
 9 | }
10 | \usage{
11 | \method{print}{int}(x, ...)
12 | 
13 | }
14 | %- maybe also 'usage' for other objects documented here.
15 | \arguments{
16 |   \item{x}{
17 | The output object of showInteraction().
18 | }
19 |   \item{\dots}{
20 | Additional arguments.
21 | }
22 | }
23 | \value{
24 | Writes basic information about the object in the console.
25 | }
26 | 
27 | \author{
28 | Jonas Haslbeck <jonashaslbeck@gmail.com>
29 | }
30 | 


--------------------------------------------------------------------------------
/man/print.mgm.Rd:
--------------------------------------------------------------------------------
 1 | \name{print.mgm}
 2 | \alias{print.mgm}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Print method for mgm objects
 6 | }
 7 | \description{
 8 | Returns basic information about fit objects, prediction objects and bandwidth-selection objects.
 9 | }
10 | \usage{
11 | \method{print}{mgm}(x, ...)
12 | 
13 | }
14 | %- maybe also 'usage' for other objects documented here.
15 | \arguments{
16 |   \item{x}{
17 | The output object of \code{mgm()}, \code{mvar()}, \code{tvmgm()}, \code{tvmvar()}, \code{predict.mgm()} or \code{bwSelect()}.
18 | }
19 |   \item{\dots}{
20 | Additional arguments.
21 | }
22 | }
23 | \value{
24 | Writes basic information about the object in the console.
25 | }
26 | 
27 | 
28 | \author{
29 | Jonas Haslbeck <jonashaslbeck@gmail.com>
30 | }
31 | 


--------------------------------------------------------------------------------
/man/resample.Rd:
--------------------------------------------------------------------------------
  1 | \name{resample}
  2 | \alias{resample}
  3 | 
  4 | \title{
  5 | Resampling scheme for mgm objects
  6 | }
  7 | \description{
  8 | Fits mgm model types (mgm, mvar, tvmgm, tvmvar) to a specified number of bootstrap samples.
  9 | }
 10 | \usage{
 11 | resample(object, data, nB, blocks, quantiles, 
 12 |          pbar, verbatim, ...)
 13 | }
 14 | 
 15 | \arguments{
 16 |   \item{object}{
 17 | An mgm model object, the output of mgm(), tvmgm(), mvar(), tvmvar(). The model specifications for all fitted models are taken from this model object.
 18 | }
 19 |   \item{data}{
 20 | The n x p data matrix.
 21 | }
 22 |   \item{nB}{
 23 | The number of bootstrap samples.
 24 | }
 25 |   \item{blocks}{
 26 | The number of blocks for the block bootstrap used for time-varying models.
 27 | }
 28 | 
 29 |   \item{quantiles}{
 30 | A vector with two values in [0, 1], specifying quantiles computed on the bootstrapped sampling distributions. Defaults to \code{quantiles = c(.05, .95)}
 31 | }
 32 | 
 33 | \item{pbar}{
 34 | If \code{TRUE}, a progress bar is shown. Defaults to \code{pbar = TRUE}.
 35 | }
 36 | 
 37 | \item{verbatim}{
 38 | If \code{TRUE}, the seed of the current bootstrap sample is printed in the console. Useful to exclude zero-variance bootstrap samples in datasets with low variance.
 39 | }
 40 |   \item{\dots}{
 41 | Additional arguments.
 42 | }
 43 | }
 44 | 
 45 | \details{
 46 | 
 47 | \code{resample()} fits a model specified via the \code{object} argument to \code{nB} bootstrap samples obtained from the orginial dataset. For stationary models (mgm() and mvar()) objects, we use the standard bootstrap. For time-varying models (tvmgm() and tvmvar()) we use the block bootstrap.
 48 | 
 49 | For mvar models, \code{bootParameters} is a p x p x nlags x nB array, where p is the number of variables, nlags is the number of specified lags, and nB is the number of bootstrap samples. Thus \code{bootParameters[7, 3, 2, ]} returns the bootstrapped sampling distribution of the lagged effect from variable 3 on 7 for the 2nd specified lag. See also \code{?mvar}.
 50 | 
 51 | For tvmar models, \code{bootParameters} is a p x p x nlags x nestpoints x nB array, analogously to mvar models. nestpoints is the number of specified estpoints. See also \code{?tvmvar}.
 52 | 
 53 | Resampling is currently only supported for pairwise MGMs (\code{k = 2}). For mgms, \code{bootParameters} is a p x p x nB array. For tvmgms, \code{bootParameters} is a p x p x nestpoint x nB array.
 54 | 
 55 | The seeds for the bootstrap samples are randomly sampled. For MGMs, the seeds are resampled until there are nB bootstrap samples on which an MGM can be estimated. This resampling has been implemented, because especially for small data sets, one can obtain bootstrap samples in which one or several variables have zero variance. For the other model classes, an informative error is returned in case the respective model cannot be estimated on one or more of the bootstrap samples.
 56 | 
 57 | }
 58 | \value{
 59 | The output consists of a list with the entries
 60 | 
 61 | \item{call}{Contains the function call.}
 62 | 
 63 | \item{models}{A list with \code{nB} entries, containing the models fit to the bootstrapped sampels.}
 64 | 
 65 | \item{bootParameters}{Contains all the bootstrapped sampling distribution of all parameters. The dimension of this object depends on the type of model. Specifically, this object has the same dimension as the main  parameter output of the corresponding estimation function, with one dimension added for the bootstrap iterations. See "Details" above.
 66 | }
 67 | 
 68 | \item{bootQuantiles}{Contains the specified quantiles of the bootstrapped sampling distribution for each parameter. Has the same structure as \code{bootParameters}. See "Details" above. }
 69 | 
 70 | \item{Times}{Returns the running time for each bootstrap model in seconds.}
 71 | 
 72 | \item{totalTime}{Returns the running time for all bootstrap models together in seconds.}
 73 | 
 74 | \item{seeds}{\code{nB} integers indicating the seeds used to sample the \code{nB} bootstrap samples.}
 75 | 
 76 | }
 77 | 
 78 | 
 79 | \references{
 80 | 
 81 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08
 82 | 
 83 | 
 84 | }
 85 | \author{
 86 | Jonas Haslbeck <jonashaslbeck@gmail.com>
 87 | }
 88 | 
 89 | 
 90 | \examples{
 91 | 
 92 | \dontrun{
 93 | 
 94 | 
 95 | # 1) Fit mgm to example dataset (true edges: 1-4, 2-3, 1-2)
 96 | mgm_obj <- mgm(data = mgm_data$data, 
 97 |                type = c('g', 'c', 'c', 'g'),
 98 |                level = c(1, 2, 4, 1),
 99 |                k = 2, 
100 |                lambdaSel = 'CV', 
101 |                threshold = 'none')
102 | 
103 | # 2) Take 50 bootstrap samples using resample()
104 | res_obj <- resample(object = mgm_obj, 
105 |                     data = mgm_data$data,
106 |                     nB = 50)
107 | 
108 | # 3) Plot histogram of bootstrapped sampling distribution of edge 1-4
109 | hist(res_obj$bootParameters[1, 4, ], 
110 |      main = "", 
111 |      xlab = "Parameter Estimate")
112 | 
113 | # 4) Plot summary of all sampling distributions
114 | plotRes(object = res_obj, 
115 |         quantiles = c(0.05, .95))
116 | 
117 | 
118 | # For more examples see https://github.com/jmbh/mgmDocumentation
119 | 
120 | 
121 | 
122 | }
123 | 
124 | 
125 | }
126 | 


--------------------------------------------------------------------------------
/man/showInteraction.Rd:
--------------------------------------------------------------------------------
 1 | \name{showInteraction}
 2 | \alias{showInteraction}
 3 | 
 4 | \title{
 5 | Retrieving details of interactions
 6 | }
 7 | \description{
 8 | Retrieves details of a specified interaction from mgm model objects.
 9 | }
10 | \usage{
11 | showInteraction(object, int)
12 | }
13 | 
14 | 
15 | \arguments{
16 |   \item{object}{
17 | The output of one of the estimation functions \code{mgm()}, \code{tvmgm()}, \code{mvar()}, \code{tvmvar()}.
18 | }
19 |   \item{int}{
20 | An integer vector specifying the interaction. For mVAR models, this vector has length 2. For MGMs the vector can be larger to request details of interaction of order > 2.
21 | }
22 | }
23 | 
24 | \details{
25 | Currently the function only returns details of pairwise interactions from output objects of \code{mgm()}.
26 | }
27 | \value{
28 |   \item{variables}{Integer vector returning the variables specified via the argument \code{int}}
29 |   \item{type}{Character vector returning the type of the specified variables variables}
30 |   \item{level}{Integer vector returning the number of levels of the specified variables variables}
31 |   \item{parameters}{A list of length equal to the order k of the specified interaction. The entries contain the set of parameters obtained from the nodewise regressions on the k variables. Depending on the type of the variables in the interaction, these sets can obtain one or several parameters. For details see \code{?mgm} or Haslbeck & Waldorp (2017).}
32 |   
33 | 
34 | 
35 | }
36 | \references{
37 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08
38 | }
39 | \author{
40 | Jonas Haslbeck <jonashaslbeck@gmail.com>
41 | }
42 | 
43 | \seealso{
44 | \code{mgm}, \code{tvmgm}, \code{mvar}, \code{tvmvar}
45 | }
46 | 
47 | 
48 | \examples{
49 | 
50 | \dontrun{
51 | 
52 | 
53 | ## We fit a pairwise and 3-order MGM to the mixed Autism dataset (?autism_data)
54 | 
55 | # 1) Fit Pairwise MGM
56 | 
57 | # Call mgm()
58 | fit_d2 <- mgm(data = autism_data$data,
59 |               type = autism_data$type,
60 |               level = autism_data$lev,
61 |               k = 2) # ad most pairwise interacitons
62 | 
63 | # Weighted adjacency matrix
64 | fit_d2$pairwise$wadj # for instance, we see there is an interaction 1-2
65 | 
66 | # 2) Look at details of interaction 1-2
67 | showInteraction(object = fit_d2, 
68 |                 int = c(1, 2))
69 | 
70 | # For more examples see https://github.com/jmbh/mgmDocumentation
71 | 
72 | }
73 | 
74 | 
75 | }
76 | 
77 | 


--------------------------------------------------------------------------------
/man/tvmgm.Rd:
--------------------------------------------------------------------------------
  1 | \name{tvmgm}
  2 | \alias{tvmgm}
  3 | \alias{tv.mgmfit}
  4 | 
  5 | \title{
  6 | Estimating time-varying Mixed Graphical Models
  7 | }
  8 | 
  9 | \description{
 10 | Estimates time-varying k-order Mixed Graphical Models (MGMs) via elastic-net regularized kernel smoothed Generalized Linear Models
 11 | }
 12 | 
 13 | \usage{
 14 | tvmgm(data, type, level, timepoints, estpoints, bandwidth, ...)
 15 | }
 16 | 
 17 | 
 18 | \arguments{
 19 |   \item{data}{
 20 | n x p data matrix.
 21 | }
 22 |   \item{type}{
 23 | p vector indicating the type of variable for each column in \code{data}: "g" for Gaussian, "p" for Poisson, "c" for categorical.
 24 | }
 25 |   \item{level}{
 26 | p vector indicating the number of categories of each variable. For continuous variables set to 1.
 27 | }
 28 |   \item{timepoints}{
 29 | A strictly increasing numeric vector of length \code{nrow(data)} indicating the time points of the measurements in \code{data}. If \code{timepoints} is not specified, it is assumed that the time points are equally spaced. For details, see Haslbeck and Waldorp (2018).
 30 | }
 31 |   \item{estpoints}{
 32 | Vector indicating estimation points on the unit interval [0, 1] (the provided time scale is normalized interally to [0,1]).
 33 | }
 34 |   \item{bandwidth}{
 35 | We use a gaussian density on the unit time-interval [0,1] to determine the weights for each observation at each estimated time point. The bandwidth specifies the standard deviation the Gaussian density. To get some intuition, which bandwidth results in the combination of how many data close in time one can plot Gaussians on [0,1] for different bandwidths. The bandwidth can also be selected in a data driven way using the function (see \code{bwSelect}).
 36 | 
 37 | }
 38 |   \item{\dots}{
 39 | Arguments passed to \code{mgm}, specifying the MGM. See \code{?mgm}.
 40 | }
 41 | }
 42 | 
 43 | \details{
 44 | Estimates a sequence of MGMs at the time points specified at the locations specified via \code{estpoints}. \code{tvmgm()} is a wrapper around \code{mgm()} and estimates a series of MGM with different weightings which are defined by the estimation locations in \code{estpoints} and the bandwidth parameter specified in \code{bandwidth}. For details see Haslbeck and Waldorp (2018).
 45 | 
 46 | Note that MGMs are not normalizable for all parameter values. See Chen, Witten & Shojaie (2015) for an overview of when pairwise MGMs are normalizable. To our best knowledge, for MGMs with interactions of order > 2 that include non-categorical variables, the conditions for normalizablity are unknown.
 47 | }
 48 | \value{
 49 | A list with the following entries:
 50 | 
 51 | \item{call}{
 52 | Contains all provided input arguments. If \code{saveData = TRUE}, it also contains the data.
 53 | }
 54 | 
 55 | \item{pairwise}{
 56 | Contains a list with all information about estimated pairwise interactions. \code{wadj} contains a p x p x estpoints array containing the weighted adjacency matrix for each estimation point specified in \code{estpoints}, if p is the number of variables in the network. \code{signs} has the same dimensions as \code{wadj} and contains the signs for the entries of \code{wadj}: 1 indicates a positive sign, -1 a negative sign and 0 an undefined sign. A sign is undefined if an edge is a function of more than one parameter. This is the case for interactions involving a categorical variable with more than 2 categories. \code{edgecolor} also has the same dimensions as \code{wadj} contains a color for each edge, depending on \code{signs}. It is provided for more convenient plotting. If only pairwise interactions are modeled (\code{k = 2}), \code{wadj} contains all conditional independence relations.
 57 | }
 58 | 
 59 | \item{interactions}{
 60 | Contains a list with one entry for each estimation point specified in \code{estpoints}; each entry is a list with three entries that relate each interaction in the model to all its parameters. \code{indicator} contains a list with k-1 entries, one for each order of modeled interaction, which contain the estimated (nonzero) interactions. \code{weights} contains a list with k-1 entries, which in turn contain R lists, where R is the number of interactions (and rows in the corresponding list entry in\code{indicator}) that were estimated (nonzero) in the given entry. \code{signs} has the same structure as \code{weights} and provides the sign of the interaction, if defined.
 61 | }
 62 | 
 63 | \item{intercepts}{
 64 | Contains a list with one entry for each estimation point specified in \code{estpoints}; each entry is a list with p entries, which contain the intercept/thresholds for each node in the network. In case a given node is categorical with m categories, there are m thresholds for this variable (one for each category).
 65 | }
 66 | 
 67 | 
 68 | \item{tvmodels}{
 69 | Contains the MGM model estimated by \code{mgm()} at each time point specified via \code{estpoints}. See \code{?mgm} for a detailed description of this output.
 70 | }
 71 | 
 72 | }
 73 | 
 74 | 
 75 | \references{
 76 | Chen S, Witten DM & Shojaie (2015). Selection and estimation for mixed graphical models. Biometrika, 102(1), 47.
 77 | 
 78 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08
 79 | 
 80 | Yang, E., Baker, Y., Ravikumar, P., Allen, G. I., & Liu, Z. (2014, April). Mixed Graphical Models via Exponential Families. In AISTATS (Vol. 2012, pp. 1042-1050).
 81 | 
 82 | }
 83 | 
 84 | \author{
 85 | 
 86 | Jonas Haslbeck <jonashaslbeck@gmail.com>
 87 | 
 88 | }
 89 | 
 90 | 
 91 | 
 92 | \examples{
 93 | 
 94 | \dontrun{
 95 | 
 96 | 
 97 | ## We specify a time-varying MGM and recover it using tvmgm()
 98 | 
 99 | # 1) Specify Model
100 | 
101 | # a) Define Graph
102 | p <- 6
103 | type = c("c", "c", "g", "g", "p", "p")
104 | level = c(2, 3, 1, 1, 1, 1)
105 | n_timepoints <- 1000
106 | 
107 | # b) Define Interaction
108 | factors <- list()
109 | factors[[1]] <- matrix(c(1,2,
110 |                          2,3,
111 |                          3,4), ncol=2, byrow = T)  # no pairwise interactions
112 | factors[[2]] <- matrix(c(1,2,3,
113 |                          2,3,4), ncol=3, byrow = T) # one 3-way interaction
114 | 
115 | interactions <- list()
116 | interactions[[1]] <- vector("list", length = 3)
117 | interactions[[2]] <- vector("list", length = 2)
118 | # 3 2-way interactions
119 | interactions[[1]][[1]] <- array(0, dim = c(level[1], level[2], n_timepoints))
120 | interactions[[1]][[2]] <- array(0, dim = c(level[2], level[3], n_timepoints))
121 | interactions[[1]][[3]] <- array(0, dim = c(level[3], level[4], n_timepoints))
122 | # 2 3-way interactions
123 | interactions[[2]][[1]] <- array(0, dim = c(level[1], level[2], level[3], n_timepoints))
124 | interactions[[2]][[2]] <- array(0, dim = c(level[2], level[3], level[4], n_timepoints))
125 | theta <- .3
126 | interactions[[1]][[1]][1, 1, ] <- theta
127 | interactions[[1]][[2]][1, 1, ] <- theta
128 | interactions[[1]][[3]][1, 1, ] <- seq(0, theta, length = n_timepoints)
129 | interactions[[2]][[1]][1, 1, 1, ] <- theta
130 | interactions[[2]][[2]][1, 1, 1, ] <- theta
131 | # c) Define Thresholds
132 | thresholds <- list()
133 | thresholds[[1]] <- matrix(0, nrow = n_timepoints, ncol= level[1])
134 | thresholds[[2]] <- matrix(0, nrow = n_timepoints, ncol= level[2])
135 | thresholds[[3]] <- matrix(0, nrow = n_timepoints, ncol= level[3])
136 | thresholds[[4]] <- matrix(0, nrow = n_timepoints, ncol= level[4])
137 | thresholds[[5]] <- matrix(.1, nrow = n_timepoints, ncol= level[5])
138 | thresholds[[6]] <- matrix(.1, nrow = n_timepoints, ncol= level[6])
139 | # d) define sds
140 | sds <- matrix(.2, ncol=p, nrow=n_timepoints)
141 | 
142 | # 2) Sample Data
143 | set.seed(1)
144 | d_iter <- tvmgmsampler(factors = factors,
145 |                        interactions = interactions,
146 |                        thresholds = thresholds,
147 |                        sds = sds,
148 |                        type = type,
149 |                        level = level,
150 |                        nIter = 100,
151 |                        pbar = TRUE)
152 | 
153 | data <- d_iter$data
154 | head(data)
155 | # delete inf rows:
156 | ind_finite <- apply(data, 1, function(x) if(all(is.finite(x))) TRUE else FALSE)
157 | table(ind_finite) # all fine for this setup & seed
158 | # in case of inf values (no theory on how to keep k-order MGM well-defined)
159 | data <- data[ind_finite, ] 
160 | 
161 | 
162 | # 3) Recover
163 | mgm_c_cv <- tvmgm(data = data,
164 |                   type = type,
165 |                   level = level,
166 |                   k = 3,
167 |                   estpoints = seq(0, 1, length=10),
168 |                   bandwidth = .1,
169 |                   lambdaSel = "CV",
170 |                   ruleReg = "AND",
171 |                   pbar = TRUE,
172 |                   overparameterize = T,
173 |                   signInfo = FALSE)
174 | 
175 | # Look at time-varying pairwise parameter 3-4
176 | mgm_c_cv$pairwise$wadj[3,4,] # recovers increase
177 | 
178 | # 4) Predict values / compute nodewise Errors
179 | pred_mgm_cv_w <- predict.mgm(mgm_c_cv,
180 |                              data = data,
181 |                              tvMethod = "weighted")
182 | pred_mgm_cv_cM <- predict.mgm(mgm_c_cv,
183 |                               data = data,
184 |                               tvMethod = "closestModel")
185 | 
186 | pred_mgm_cv_w$errors
187 | pred_mgm_cv_cM$errors # Pretty similar!
188 | 
189 | 
190 | # For more examples see https://github.com/jmbh/mgmDocumentation
191 | 
192 | }
193 | 
194 | }
195 | 


--------------------------------------------------------------------------------
/man/tvmgmsampler.Rd:
--------------------------------------------------------------------------------
  1 | \name{tvmgmsampler}
  2 | \alias{tvmgmsampler}
  3 | 
  4 | \title{
  5 | Sample from time-varying k-order Mixed Graphical Model
  6 | }
  7 | \description{
  8 | Generates samples from a time-varying k-order Mixed Graphical Model
  9 | }
 10 | \usage{
 11 | tvmgmsampler(factors, interactions, thresholds, sds, type,
 12 |              level, nIter = 250, pbar = TRUE, ...)
 13 | }
 14 | 
 15 | \arguments{
 16 |   \item{factors}{
 17 | The same object as \code{factors} in \code{mgmsampler()}. An interaction is specified in \code{factors} if it should be nonzero at least at one time point in the time series. The values of each parameter at each time point is specified via \code{interactions}.
 18 | }
 19 |   \item{interactions}{
 20 | The same object as \code{factors} in \code{mgmsampler()}, except that each array indicating the parameters of an interaction has an additional (the last) dimension, indicating time. Corresponding to the time vector in \code{factors}, the time vector has to be a sequence of integers \{1, 2, ..., N\}. For an illustration see the examples below.
 21 | }
 22 |   \item{thresholds}{
 23 | A list with p entries for p variables, each of which contains a N x m matrix. The columns contain the m thresholds for m categories (for continuous variables m = 1 and the entry contains the threshold/intercept). The rows indicate how the thresholds change over time.
 24 | }
 25 |   \item{sds}{
 26 | N x p matrix indicating the standard deviations of Gaussians specified in \code{type} for \{1, ..., N\} time points. Entries not referring to Gaussians are ignored.
 27 | }
 28 |   \item{type}{
 29 | p character vector indicating the type of variable for each column in \code{data}. "g" for Gaussian, "p" for Poisson, "c" of each variable.
 30 | }
 31 |   \item{level}{
 32 | p integer vector indicating the number of categories of each variable. For continuous variables set to 1.
 33 | }
 34 |   \item{nIter}{
 35 | Number of iterations in the Gibbs sampler until a sample is drawn.
 36 | }
 37 |   \item{pbar}{
 38 |   If \code{pbar = TRUE} a progress bar is shown. Defaults to \code{pbar = TRUE}.
 39 | }
 40 | 
 41 |   \item{...}{
 42 |   Additional arguments.
 43 | }
 44 | 
 45 | }
 46 | \details{
 47 | 
 48 | \code{tvmgmsampler} is a wrapper function around \code{mgmsampler}. Its input is the same as for \code{mgmsampler}, except that each object has an additional dimension for time. The number of time points is specified via entries in the additional time dimension.
 49 | 
 50 | }
 51 | \value{
 52 | 
 53 | A list containing:
 54 | 
 55 | \item{call}{
 56 | Contains all provided input arguments.
 57 | }
 58 | 
 59 | \item{data}{
 60 | The N x p data matrix of sampled values
 61 | }
 62 | 
 63 | 
 64 | }
 65 | 
 66 | \references{
 67 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08
 68 | 
 69 | Yang, E., Baker, Y., Ravikumar, P., Allen, G. I., & Liu, Z. (2014, April). Mixed Graphical Models via Exponential Families. In AISTATS (Vol. 2012, pp. 1042-1050).
 70 | 
 71 | }
 72 | \author{
 73 | Jonas Haslbeck <jonashaslbeck@gmail.com>
 74 | }
 75 | 
 76 | 
 77 | \examples{
 78 | 
 79 | 
 80 | \dontrun{
 81 | 
 82 | # --------- Example 1: p = 4 dimensional Gaussian ---------
 83 | 
 84 | # ----- 1) Specify Model -----
 85 | 
 86 | # a) General Graph Info
 87 | type = c("g", "g", "g", "g") # Four Gaussians
 88 | level = c(1, 1, 1, 1)
 89 | n_timepoints = 500 #  Number of time points
 90 | 
 91 | # b) Define Interaction
 92 | factors <- list()
 93 | factors[[1]] <- array(NA, dim=c(2, 2)) # two pairwise interactions
 94 | factors[[1]][1, 1:2] <- c(3,4)
 95 | factors[[1]][2, 1:2] <- c(1,2)
 96 | 
 97 | # Two parameters, one linearly increasing from 0 to 0.8, another one lin decreasing from 0.8 to 0
 98 | interactions <- list()
 99 | interactions[[1]] <- vector("list", length = 2)
100 | interactions[[1]][[1]] <- array(0, dim = c(level[1], level[2], n_timepoints))
101 | interactions[[1]][[1]][1,1, ] <- seq(.8, 0, length = n_timepoints)
102 | interactions[[1]][[2]] <- array(0, dim = c(level[1], level[2], n_timepoints))
103 | interactions[[1]][[2]][1,1, ] <- seq(0, .8, length = n_timepoints)
104 | 
105 | # c) Define Thresholds
106 | thresholds <- vector("list", length = 4)
107 | thresholds <- lapply(thresholds, function(x) matrix(0, ncol = level[1], nrow = n_timepoints))
108 | 
109 | # d) Define Standard deviations
110 | sds <- matrix(1, ncol = length(type), nrow = n_timepoints) # constant across variables and time
111 | 
112 | 
113 | # ----- 2) Sample cases -----
114 | 
115 | set.seed(1)
116 | dlist <- tvmgmsampler(factors = factors,
117 |                       interactions = interactions,
118 |                       thresholds = thresholds,
119 |                       sds = sds,
120 |                       type = type,
121 |                       level = level,
122 |                       nIter = 75,
123 |                       pbar = TRUE)
124 | 
125 | 
126 | # ----- 3) Recover model from sampled cases -----
127 | 
128 | set.seed(1)
129 | tvmgm_obj <- tvmgm(data = dlist$data,
130 |                    type = type,
131 |                    level = level,
132 |                    estpoints = seq(0, 1, length = 15),
133 |                    bandwidth = .2,
134 |                    k = 2,
135 |                    lambdaSel = "CV",
136 |                    ruleReg = "AND")
137 | 
138 | # How well did we recover those two time-varying parameters?
139 | plot(tvmgm_obj$pairwise$wadj[3,4,], type="l", ylim=c(0,.8))
140 | lines(tvmgm_obj$pairwise$wadj[1,2,], type="l", col="red")
141 | # Looks quite good
142 | 
143 | 
144 | # --------- Example 2: p = 5 binary; one 3-way interaction ---------
145 | 
146 | # ----- 1) Specify Model -----
147 | 
148 | # a) General Graph Info
149 | p <- 5 # number of variables
150 | type = rep("c", p) # all categorical
151 | level = rep(2, p) # all binary
152 | n_timepoints <- 1000
153 | 
154 | # b) Define Interaction
155 | factors <- list()
156 | factors[[1]] <- NULL # no pairwise interactions
157 | factors[[2]] <- array(NA, dim = c(1,3)) # one 3-way interaction
158 | factors[[2]][1, 1:3] <- c(1, 2, 3)
159 | 
160 | interactions <- list()
161 | interactions[[1]] <- NULL # no pairwise interactions
162 | interactions[[2]] <- vector("list", length = 1)  # one 3-way interaction
163 | # 3-way interaction no1
164 | interactions[[2]][[1]] <- array(0, dim = c(level[1], level[2], level[3], n_timepoints))
165 | theta <- 2
166 | interactions[[2]][[1]][1, 1, 1, ] <- seq(0, 2, length = n_timepoints) # fill in nonzero entries
167 | 
168 | # c) Define Thresholds
169 | thresholds <- list()
170 | for(i in 1:p) thresholds[[i]] <- matrix(0, nrow = n_timepoints, ncol = level[i])
171 | 
172 | 
173 | # ----- 2) Sample cases -----
174 | 
175 | set.seed(1)
176 | dlist <- tvmgmsampler(factors = factors,
177 |                       interactions = interactions,
178 |                       thresholds = thresholds,
179 |                       type = type,
180 |                       level = level,
181 |                       nIter = 150,
182 |                       pbar = TRUE)
183 | 
184 | 
185 | # ----- 3) Check Marginals -----
186 | 
187 | dat <- dlist$data[1:round(n_timepoints/2),]
188 | table(dat[,1], dat[,2], dat[,3])
189 | 
190 | dat <- dlist$data[round(n_timepoints/2):n_timepoints,]
191 | table(dat[,1], dat[,2], dat[,3])
192 | 
193 | # Observation: much stronger effect in second hald of the time-series,
194 | # which is what we expect
195 | 
196 | 
197 | # ----- 4) Recover model from sampled cases -----
198 | 
199 | set.seed(1)
200 | tvmgm_obj <- tvmgm(data = dlist$data,
201 |                    type = type,
202 |                    level = level,
203 |                    estpoints = seq(0, 1, length = 15),
204 |                    bandwidth = .2,
205 |                    k = 3,
206 |                    lambdaSel = "CV",
207 |                    ruleReg = "AND")
208 | 
209 | tvmgm_obj$interactions$indicator
210 | # Seems very difficult to recover this time-varying 3-way binary interaction
211 | # See also the corresponding problems in the examples of ?mgmsampler
212 | 
213 | 
214 | # For more examples see https://github.com/jmbh/mgmDocumentation
215 | 
216 | 
217 | }
218 | 
219 | 
220 | }
221 | 
222 | 
223 | 


--------------------------------------------------------------------------------
/man/tvmvar.Rd:
--------------------------------------------------------------------------------
  1 | \name{tvmvar}
  2 | \alias{tvmvar}
  3 | \alias{tv_var.mgm}
  4 | 
  5 | \title{
  6 | Estimating time-varying Mixed Vector Autoregressive Model (mVAR)
  7 | }
  8 | 
  9 | \description{
 10 | Estimates time-varying Mixed Vector Autoregressive Model (mVAR) via elastic-net regularized kernel smoothed Generalized Linear Models
 11 | }
 12 | 
 13 | \usage{
 14 | tvmvar(data, type, level, timepoints, estpoints, bandwidth, ...)
 15 | }
 16 | 
 17 | 
 18 | \arguments{
 19 |   \item{data}{
 20 | n x p data matrix.
 21 | }
 22 |   \item{type}{
 23 | p vector indicating the type of variable for each column in \code{data}: "g" for Gaussian, "p" for Poisson, "c" for categorical.
 24 | }
 25 |   \item{level}{
 26 | p vector indicating the number of categories of each variable. For continuous variables set to 1.
 27 | }
 28 |   \item{timepoints}{
 29 | A strictly increasing numeric vector of length \code{nrow(data)} indicating time points for the measurements in \code{data}. If \code{timepoints} is not specified, it is assumed that the time points are equally spaced. For details, see Haslbeck and Waldorp (2018).
 30 | }
 31 |   \item{estpoints}{
 32 | Vector indicating estimation points on interval [0, 1]. Note that we define this unit interval on the entire time series. This also includes measurements that are excluded because not enough previous measurements are available to fit the model. This ensures that the a model estimated at, for example, estimation point 0.15 is actually estimated on data close to data points around this time point. See Haslbeck and Waldorp (2018) Section 2.5 and 3.4 for a detailed description.
 33 | }
 34 |   \item{bandwidth}{
 35 | We use a gaussian density on the unit time-interval [0,1] to determine the weights for each observation at each estimated time point. The bandwidth specifies the standard deviation the Gaussian density. To get some intuition, which bandwidth results in the combination of how many data close in time one can plot Gaussians on [0,1] for different bandwidths. The bandwidth can also be selected in a data driven way using the function (see \code{bwSelect}).
 36 | 
 37 | }
 38 |   \item{\dots}{
 39 | Arguments passed to \code{mvar}, specifying how each single model should be estimated. See \code{?mvar}.
 40 | }
 41 | }
 42 | 
 43 | 
 44 | \details{
 45 | Estimates a sequence of mVAR models at the time points specified at the locations specified via \code{estpoints}. \code{tvmvar()} is a wrapper around \code{mvar()} and estimates a series of MGM with different weightings which are defined by the estimation locations in \code{estpoints} and the banwdith parameter specified in \code{bandwidth}. For details see Haslbeck and Waldorp (2018)
 46 | 
 47 | 
 48 | }
 49 | \value{
 50 | A list with the following entries:
 51 | 
 52 | 
 53 | \item{call}{
 54 | Contains all provided input arguments. If \code{saveData = TRUE}, it also contains the data.
 55 | }
 56 | 
 57 | 
 58 | \item{wadj}{
 59 | A p x p x n_lags x S array, where n_lags is the number of specified lags in \code{lags} (see \code{?mvar}) and S is the number of estimation points specified in \code{estpoints}. For instance, wadj[1, 2, 1, 10] is the cross-lagged predicting variable 1 at time point t by variable 2 at time point t - z, where z is specified by the first lag specified in \code{lags} (see \code{?mvar}), in the model estimated at estimation point 10.}
 60 | 
 61 | \item{signs}{ Has the same structure as \code{wadj} and specifies the signs corresponding to the parameters in \code{wadj}, if defined. 1/-1 indicate positive and negative relationships, respectively. 0 indicates that no sign is defined, which is the case for interactions that involve a categorical variable where an interaction can have more than one parameter. If \code{binarySign = TRUE}, a sign is calculated for interactions between binary variables and binary and continuous variables, where the interaction is still defined by one parameter and hence a sign can be specified. \code{NA} indicates that the corresponding parameter in \code{wadj} is zero. See also \code{?mvar}.
 62 | }
 63 | 
 64 | \item{intercepts}{
 65 | A list with S entries, where S is the number of estimated time points. Each entry of that list contains a list p entries with the intercept/thresholds for each node in the network. In case a given node is categorical with m categories, there are m thresholds for this variable.
 66 | }
 67 | 
 68 | \item{tvmodels}{
 69 | Contains the mVAR model estimated by \code{mvar()} at each time point specified via \code{estpoints}. See \code{?mvar} for a detailed description of this output.
 70 | }
 71 | 
 72 | }
 73 | 
 74 | 
 75 | \references{
 76 | 
 77 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08
 78 | 
 79 | }
 80 | 
 81 | \author{
 82 | Jonas Haslbeck <jonashaslbeck@gmail.com>
 83 | }
 84 | 
 85 | 
 86 | 
 87 | \examples{
 88 | 
 89 | \dontrun{
 90 | 
 91 | 
 92 | ## We set up the same model as in the example of mvar(), but
 93 | ## specify one time-varying parameter, and try to recover it with
 94 | ## tvmvar()
 95 | 
 96 | # a) Specify time-varying VAR model
 97 | 
 98 | p <- 6 # Six variables
 99 | type <- c("c", "c", "c", "c", "g", "g") # 4 categorical, 2 gaussians
100 | level <- c(2, 2, 4, 4, 1, 1) # 2 categoricals with 2 categories, 2 with 5
101 | max_level <- max(level)
102 | n_timepoints <- 4000
103 | 
104 | lags <- c(1, 3, 9) # include lagged effects of order 1, 3, 9
105 | n_lags <- length(lags)
106 | 
107 | # Specify thresholds
108 | thresholds <- list()
109 | thresholds[[1]] <- matrix(0, ncol=level[1], nrow=n_timepoints)
110 | thresholds[[2]] <- matrix(0, ncol=level[2], nrow=n_timepoints)
111 | thresholds[[3]] <- matrix(0, ncol=level[3], nrow=n_timepoints)
112 | thresholds[[4]] <- matrix(0, ncol=level[4], nrow=n_timepoints)
113 | thresholds[[5]] <- matrix(0, ncol=level[5], nrow=n_timepoints)
114 | thresholds[[6]] <- matrix(0, ncol=level[6], nrow=n_timepoints)
115 | 
116 | # Specify standard deviations for the Gaussians
117 | sds <- matrix(NA, ncol=p, nrow=n_timepoints)
118 | sds[, 5:6] <- 1
119 | 
120 | # Create coefficient array
121 | coefarray <- array(0, dim=c(p, p, max_level, max_level, n_lags, n_timepoints))
122 | 
123 | # a.1) interaction between continuous 5<-6, lag=3
124 | coefarray[5, 6, 1, 1, 2, ] <- seq(0, .4, length = n_timepoints) # only time-varying parameter
125 | # a.2) interaction between 1<-3, lag=1
126 | m1 <- matrix(0, nrow=level[2], ncol=level[4])
127 | m1[1,1:2] <- 1
128 | m1[2,3:4] <- 1
129 | coefarray[1, 3, 1:level[2], 1:level[4], 1, ] <- m1 # constant across time
130 | # a.3) interaction between 1<-5, lag=9
131 | coefarray[1, 5, 1:level[1], 1:level[5], 3, ] <- c(0, 1) # constant across time
132 | 
133 | 
134 | # b) Sample
135 | set.seed(1)
136 | dlist <- tvmvarsampler(coefarray = coefarray,
137 |                        lags = lags,
138 |                        thresholds = thresholds,
139 |                        sds = sds,
140 |                        type = type,
141 |                        level = level,
142 |                        pbar = TRUE)
143 | 
144 | # c.1) Recover: stationary
145 | set.seed(1)
146 | mvar_obj <- mvar(data = dlist$data,
147 |                  type = type,
148 |                  level = level,
149 |                  lambdaSel = "CV",
150 |                  lags = c(1, 3, 9),
151 |                  signInfo = FALSE)
152 | 
153 | # Did we recover the true parameters?
154 | mvar_obj$wadj[5, 6, 2] # cross-lagged effect of 6 on 5 over lag lags[2] (lag 3)
155 | mvar_obj$wadj[1, 3, 1] # cross-lagged effect of 3 on 1 over lag lags[1] (lag 1)
156 | mvar_obj$wadj[1, 5, 3] # cross-lagged effect of 1 on 5 over lag lags[3] (lag 9)
157 | 
158 | 
159 | # c.2) Recover: time-varying
160 | set.seed(1)
161 | mvar_obj <- tvmvar(data = dlist$data,
162 |                    type = type,
163 |                    level = level,
164 |                    estpoints = seq(0, 1, length=10),
165 |                    bandwidth = .15,
166 |                    lambdaSel = "CV",
167 |                    lags = c(1, 3, 9),
168 |                    signInfo = FALSE)
169 | 
170 | # Did we recover the true parameters?
171 | mvar_obj$wadj[5, 6, 2, ] # true sort of recovered
172 | mvar_obj$wadj[1, 3, 1, ] # yes
173 | mvar_obj$wadj[1, 5, 3, ] # yes
174 | 
175 | # Plotting parameter estimates over time
176 | plot(mvar_obj$wadj[5, 6, 2, ], 
177 |      type="l", ylim=c(-.2,.7), 
178 |      lwd=2, ylab="Parameter value", xlab="Estimation points")
179 | lines(mvar_obj$wadj[1, 3, 1, ], col="red", lwd=2)
180 | lines(mvar_obj$wadj[1, 5, 3, ], col="blue", lwd=2)
181 | legend("bottomright", c("5 <-- 6", "1 <-- 3", "1 <-- 5"), 
182 |        lwd = c(2,2,2), col=c("black", "red", "blue"))
183 | 
184 | 
185 | # d) Predict values / compute nodewise error
186 | 
187 | mvar_pred_w <- predict.mgm(object=mvar_obj,
188 |                            data=dlist$data,
189 |                            tvMethod = "weighted")
190 | 
191 | mvar_pred_cM <- predict.mgm(object=mvar_obj,
192 |                             data=dlist$data,
193 |                             tvMethod = "closestModel")
194 | 
195 | mvar_pred_w$errors
196 | mvar_pred_cM$errors
197 | 
198 | # For more examples see https://github.com/jmbh/mgmDocumentation
199 | 
200 | 
201 | }
202 | 
203 | }
204 | 
205 | 
206 | 
207 | 


--------------------------------------------------------------------------------
/man/tvmvarsampler.Rd:
--------------------------------------------------------------------------------
  1 | \name{tvmvarsampler}
  2 | \alias{tvmvarsampler}
  3 | 
  4 | \title{
  5 | Sampling from a time-varying mixed VAR model
  6 | }
  7 | \description{
  8 | Function to sample from a time-varying mixed VAR (mVAR) model
  9 | }
 10 | \usage{
 11 | tvmvarsampler(coefarray, lags, thresholds,
 12 |               sds, type, level, pbar)
 13 | }
 14 | 
 15 | \arguments{
 16 |   \item{coefarray}{
 17 | A p x p x max(level) x max(level) x n_lags x N array, where p are the number of variables, level is the input argument \code{level} and n_lags is the number of specified lags in \code{lags}, so n_lags = length(n_lags), and N is the number of time points in the time series. The first four dimensions specify the parameters involved in the cross-lagged effects of the lag specified in the 5th dimension. I.e. coefarray[5, 6, 1, 1, 3, 100] indicates the cross-lagged effect of variable 6 on variable 5 (if both are continuous), for the third lag specified in \code{lags} at time point 100. If variable 1 and 3 are categorical with m = 2 and  = 4 categories, respectively, then coefarray[1, 3, 1:2, 1:4, 1, 250] indicates the m*s=8 parameters specifying this interaction for the first lag specified in \code{lags} at time point 250. See the examples below for an illustration.
 18 | }
 19 |   \item{lags}{
 20 | A vector indicating the lags in the mVAR model. E.g. \code{lags = c(1, 4, 9)} specifies lags of order 1, 3, 9. The number of specified lags has to match the 5th dimension in \code{coefarray}.
 21 | }
 22 |   \item{thresholds}{
 23 | A list with p entries, each consisting of a matrix indicating a threshold for each category of the given variable (column) and time point (row). For continuous variable, the matrix has 1 column.
 24 | }
 25 |   \item{sds}{
 26 | A N x p matrix specifying the standard deviation of Gaussian variables (columns) at each time point (rows)If non-Gaussian variables are included in the mVAR model, the corresponding columns are ignored.
 27 | }
 28 |   \item{type}{
 29 | p vector indicating the type of variable for each column in \code{data}. "g" for Gaussian, "p" for Poisson, "c" for categorical.
 30 | }
 31 |   \item{level}{
 32 | p vector indicating the number of categories of each variable. For continuous variables set to 1.
 33 | }
 34 |   \item{pbar}{
 35 | If \code{pbar = TRUE}, a progress bar is shown.
 36 | }
 37 | }
 38 | \details{
 39 | We sample from the mVAR model by separately sampling from its corresponding p conditional distributions.
 40 | }
 41 | \value{
 42 | A list with two entries:
 43 | 
 44 | \item{call}{The function call}
 45 | 
 46 | \item{data}{The sampled n x p data matrix}
 47 | 
 48 | }
 49 | \references{
 50 | 
 51 | Haslbeck, J. M. B., & Waldorp, L. J. (2020). mgm: Estimating time-varying Mixed Graphical Models in high-dimensional Data. Journal of Statistical Software, 93(8), pp. 1-46. DOI: 10.18637/jss.v093.i08
 52 | 
 53 | }
 54 | \author{
 55 | Jonas Haslbeck <jonashaslbeck@gmail.com>
 56 | }
 57 | 
 58 | 
 59 | 
 60 | \examples{
 61 | 
 62 | 
 63 | \dontrun{
 64 | 
 65 | ## We specify a tvmvar model, sample from it and recover it
 66 | 
 67 | # a) Set up time-varying mvar model
 68 | 
 69 | p <- 6 # Six variables
 70 | type <- c("c", "c", "c", "c", "g", "g") # 4 categorical, 2 gaussians
 71 | level <- c(2, 2, 4, 4, 1, 1) # 2 categoricals with 2 categories, 2 with 5
 72 | max_level <- max(level)
 73 | 
 74 | lags <- c(1, 3, 9) # include lagged effects of order 1, 3, 9
 75 | n_lags <- length(lags)
 76 | 
 77 | N <- 5000
 78 | 
 79 | # Specify thresholds
 80 | thresholds <- list()
 81 | thresholds[[1]] <- matrix(0, ncol=2, nrow=N)
 82 | thresholds[[2]] <- matrix(0, ncol=2, nrow=N)
 83 | thresholds[[3]] <- matrix(0, ncol=4, nrow=N)
 84 | thresholds[[4]] <- matrix(0, ncol=4, nrow=N)
 85 | thresholds[[5]] <- matrix(0, ncol=1, nrow=N)
 86 | thresholds[[6]] <- matrix(0, ncol=1, nrow=N)
 87 | 
 88 | # Specify standard deviations for the Gaussians
 89 | sds <- matrix(NA, ncol=6, nrow=N)
 90 | sds[,5:6] <- 1
 91 | 
 92 | # Create coefficient array
 93 | coefarray <- array(0, dim=c(p, p, max_level, max_level, n_lags, N))
 94 | 
 95 | # a.1) interaction between continuous 5<-6, lag=3
 96 | coefarray[5, 6, 1, 1, 2, ] <- c(rep(.5, N/2), rep(0, N/2))
 97 | # a.2) interaction between 1<-3, lag=1
 98 | m1 <- matrix(0, nrow=level[2], ncol=level[4])
 99 | m1[1, 1:2] <- 1
100 | m1[2, 3:4] <- 1
101 | coefarray[1, 3, 1:level[2], 1:level[4], 1, ] <- m1
102 | # a.3) interaction between 1<-5, lag=9
103 | coefarray[1, 5, 1:level[1], 1:level[5], 3, ] <- c(0, 1)
104 | 
105 | dim(coefarray)
106 | 
107 | 
108 | # b) Sample
109 | set.seed(1)
110 | dlist <- tvmvarsampler(coefarray = coefarray,
111 |                        lags = lags,
112 |                        thresholds = thresholds,
113 |                        sds = sds,
114 |                        type = type,
115 |                        level = level,
116 |                        pbar = TRUE)
117 | 
118 | 
119 | # c) Recover: time-varying mVAR model
120 | set.seed(1)
121 | tvmvar_obj <- tvmvar(data = dlist$data,
122 |                      type = type,
123 |                      level = level,
124 |                      lambdaSel = "CV",
125 |                      lags = c(1, 3, 9),
126 |                      estpoints = seq(0, 1, length=10),
127 |                      bandwidth = .05)
128 | 
129 | tvmvar_obj$wadj[5, 6, 2, ] # parameter goes down, as specified
130 | tvmvar_obj$wadj[1, 3, 1, ]
131 | tvmvar_obj$wadj[1, 5, 3, ]
132 | 
133 | # For more examples see https://github.com/jmbh/mgmDocumentation
134 | 
135 | }
136 | 
137 | }
138 | 
139 | 
140 | 


--------------------------------------------------------------------------------
/mgm.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(mgm)
3 | 
4 | test_check("mgm")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/Rplots.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jmbh/mgm/22f9110136943c161be08f7f84353a76f5ac259d/tests/testthat/Rplots.pdf


--------------------------------------------------------------------------------
/tests/testthat/test_MNM.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | test_that("MNM with k-specification", {
 4 |   
 5 |   data(autism_data)
 6 |   
 7 |   # Fit MGM with pairwise & three-way interactions
 8 |   set.seed(1)
 9 |   fit_k3 <- mgm(data = autism_data$data,
10 |                 type = autism_data$type,
11 |                 level = autism_data$lev,
12 |                 lambdaSel = "CV",
13 |                 lambdaFolds = 10,
14 |                 k = 3, 
15 |                 pbar = FALSE, 
16 |                 signInfo = FALSE)
17 |   
18 |   # Number of 3-way interactions = 11 
19 |   expect_equal(nrow(fit_k3$interactions$indicator[[2]]), 10)
20 |   
21 |   
22 |   # Plot Factor Graph (only to catch errors ...)
23 |   FactorGraph(object = fit_k3, 
24 |               PairwiseAsEdge = FALSE, 
25 |               labels = autism_data$colnames)
26 |   
27 | 
28 | })


--------------------------------------------------------------------------------
/tests/testthat/test_mgm.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | test_that("Pairwise mgm examples return expected estimates", {
 4 |   
 5 |   # --- Fit pairwise with EBIC ---
 6 |   fit_k2 <- mgm(data = autism_data$data,
 7 |                 type = autism_data$type,
 8 |                 level = autism_data$lev,
 9 |                 lambdaSel = "EBIC", 
10 |                 lambdaGam = 0.25,
11 |                 k = 2, 
12 |                 pbar = FALSE, 
13 |                 signInfo = FALSE) 
14 |   
15 |   # check one pairwise interactiion
16 |   expect_equal(round(fit_k2$pairwise$wadj[1, 4], 3), 0.152)
17 |   # and one intercept
18 |   expect_equal(round(fit_k2$intercepts[[1]][[1]], 3), 0.664)
19 |   
20 |   
21 |   # --- Fit pairwise with CV ---
22 |   set.seed(1)
23 |   fit_k2 <- mgm(data = autism_data$data,
24 |                 type = autism_data$type,
25 |                 level = autism_data$lev,
26 |                 lambdaSel = "CV", 
27 |                 lambdaFolds = 10,
28 |                 k = 2, 
29 |                 pbar = FALSE, 
30 |                 signInfo = FALSE) 
31 |   
32 |   # check one pairwise interactiion
33 |   expect_equal(round(fit_k2$pairwise$wadj[1, 4], 3), 0.173)
34 |   # and one intercept
35 |   expect_equal(round(fit_k2$intercepts[[1]][[1]], 3), 0.72)
36 |   
37 | 
38 | })


--------------------------------------------------------------------------------