├── DESCRIPTION ├── NAMESPACE ├── NEWS ├── R ├── amelia2mitml.list.R ├── anova.mitml.result.R ├── as.mitml.list.R ├── c.mitml.list.R ├── cbind.mitml.list.R ├── clusterMeans.R ├── coef.mitml.testEstimates.R ├── confint.mitml.testEstimates.R ├── internal-convergence.R ├── internal-methods-estimates.R ├── internal-methods-likelihood.R ├── internal-methods-zzz.R ├── internal-model.R ├── internal-pool.R ├── internal-zzz.R ├── is.mitml.list.R ├── jomo2mitml.list.R ├── jomoImpute.R ├── long2mitml.list.R ├── mids2mitml.list.R ├── mitml.list2mids.R ├── mitmlComplete.R ├── multilevelR2.R ├── panImpute.R ├── plot.mitml.R ├── print.mitml.R ├── print.mitml.anova.R ├── print.mitml.summary.R ├── print.mitml.testConstraints.R ├── print.mitml.testEstimates.R ├── print.mitml.testModels.R ├── rbind.mitml.list.R ├── read.mitml.R ├── sort.mitml.list.R ├── subset.mitml.list.R ├── summary.mitml.R ├── testConstraints.R ├── testEstimates.R ├── testModels.R ├── vcov.mitml.testEstimates.R ├── with.mitml.list.R ├── within.mitml.list.R ├── write.mitml.R ├── write.mitmlMplus.R ├── write.mitmlSAV.R ├── write.mitmlSPSS.R └── zzz.R ├── README.md ├── data ├── justice.rda ├── leadership.rda └── studentratings.rda ├── man ├── amelia2mitml.list.Rd ├── anova.mitml.result.Rd ├── as.mitml.list.Rd ├── clusterMeans.Rd ├── is.mitml.list.Rd ├── jomoImpute.Rd ├── justice.Rd ├── leadership.Rd ├── long2mitml.list.Rd ├── methods-mitml.list.Rd ├── methods-mitml.testEstimates.Rd ├── mids2mitml.list.Rd ├── mitml-package.Rd ├── mitml.list2mids.Rd ├── mitmlComplete.Rd ├── multilevelR2.Rd ├── panImpute.Rd ├── plot.mitml.Rd ├── read.mitml.Rd ├── sort.mitml.list.Rd ├── studentratings.Rd ├── subset.mitml.list.Rd ├── summary.mitml.Rd ├── testConstraints.Rd ├── testEstimates.Rd ├── testModels.Rd ├── with.mitml.list.Rd ├── write.mitml.Rd ├── write.mitmlMplus.Rd ├── write.mitmlSAV.Rd └── write.mitmlSPSS.Rd └── vignettes ├── Analysis.Rmd ├── Introduction.Rmd ├── Level2.Rmd └── css └── vignette.css /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: mitml 2 | Type: Package 3 | Title: Tools for Multiple Imputation in Multilevel Modeling 4 | Version: 0.4-5 5 | Date: 2023-03-08 6 | Author: Simon Grund [aut,cre], Alexander Robitzsch [aut], Oliver Luedtke [aut] 7 | Maintainer: Simon Grund 8 | BugReports: https://github.com/simongrund1/mitml/issues 9 | Imports: pan, jomo, haven, grDevices, graphics, stats, methods, utils 10 | Suggests: mice, miceadds, Amelia, lme4, nlme, lavaan, geepack, glmmTMB, survival, knitr, rmarkdown 11 | LazyData: true 12 | LazyLoad: true 13 | Description: Provides tools for multiple imputation of missing data in multilevel 14 | modeling. Includes a user-friendly interface to the packages 'pan' and 'jomo', 15 | and several functions for visualization, data management and the analysis 16 | of multiply imputed data sets. 17 | License: GPL (>=2) 18 | VignetteBuilder: knitr 19 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | import(stats) 2 | import(jomo) 3 | import(pan) 4 | 5 | importFrom(graphics, abline, layout, par, plot, plot.new, axTicks, axis, lines, text, title) 6 | importFrom(grDevices, dev.new, dev.off, devAskNewPage) 7 | importFrom(methods, slot) 8 | importFrom(utils, flush.console, tail, write.table) 9 | 10 | export(panImpute, 11 | jomoImpute, 12 | mitmlComplete, 13 | clusterMeans, 14 | plot.mitml, 15 | read.mitml, 16 | summary.mitml, 17 | multilevelR2, 18 | testEstimates, 19 | testModels, 20 | testConstraints, 21 | with.mitml.list, 22 | within.mitml.list, 23 | long2mitml.list, 24 | jomo2mitml.list, 25 | mids2mitml.list, 26 | mitml.list2mids, 27 | amelia2mitml.list, 28 | as.mitml.list, 29 | is.mitml.list, 30 | c.mitml.list, 31 | cbind.mitml.list, 32 | rbind.mitml.list, 33 | sort.mitml.list, 34 | subset.mitml.list, 35 | anova.mitml.result, 36 | confint.mitml.testEstimates, 37 | write.mitml, 38 | write.mitmlMplus, 39 | write.mitmlSAV, 40 | write.mitmlSPSS 41 | ) 42 | 43 | S3method(c, mitml.list) 44 | S3method(cbind, mitml.list) 45 | S3method(rbind, mitml.list) 46 | S3method(sort, mitml.list) 47 | S3method(subset, mitml.list) 48 | S3method(with, mitml.list) 49 | S3method(within, mitml.list) 50 | S3method(plot, mitml) 51 | S3method(anova, mitml.result) 52 | S3method(coef, mitml.testEstimates) 53 | S3method(vcov, mitml.testEstimates) 54 | S3method(confint, mitml.testEstimates) 55 | S3method(print, mitml) 56 | S3method(print, mitml.summary) 57 | S3method(print, mitml.testEstimates) 58 | S3method(print, mitml.testModels) 59 | S3method(print, mitml.testConstraints) 60 | S3method(print, mitml.anova) 61 | S3method(summary, mitml) 62 | S3method(summary, mitml.testEstimates) 63 | S3method(summary, mitml.testModels) 64 | S3method(summary, mitml.testConstraints) 65 | 66 | S3method(.getCoef, default) 67 | S3method(.getCoef, merMod) 68 | S3method(.getCoef, lme) 69 | S3method(.getCoef, glmmTMB) 70 | S3method(.getCoef, lavaan) 71 | S3method(.getCoef, coxph.null) 72 | S3method(.getCoef, polr) 73 | S3method(.getVcov, default) 74 | S3method(.getVcov, glmmTMB) 75 | S3method(.getVcov, lavaan) 76 | S3method(.getVcov, coxph.null) 77 | S3method(.getMisc, default) 78 | S3method(.getMisc, lm) 79 | S3method(.getMisc, glm) 80 | S3method(.getMisc, merMod) 81 | S3method(.getMisc, lme) 82 | S3method(.getMisc, glmmTMB) 83 | S3method(.getMisc, geeglm) 84 | S3method(.getMisc, lavaan) 85 | 86 | S3method(.getLL, default) 87 | S3method(.getLL, geeglm) 88 | S3method(.getLL, lavaan) 89 | S3method(.getArgsLL, default) 90 | S3method(.getArgsLL, lm) 91 | S3method(.getArgsLL, glm) 92 | S3method(.getArgsLL, geeglm) 93 | S3method(.getArgsLL, lmerMod) 94 | S3method(.getArgsLL, lme) 95 | S3method(.getArgsLL, lavaan) 96 | S3method(.getUserLL, default) 97 | S3method(.getUserLL, lm) 98 | S3method(.getUserLL, lmerMod) 99 | S3method(.getUserLL, lme) 100 | S3method(.getUserLL, lavaan) 101 | S3method(.getDataLL, default) 102 | S3method(.getDataLL, lme) 103 | S3method(.getDataLL, gls) 104 | S3method(.getDataLL, lavaan) 105 | S3method(.updateStackedLL, default) 106 | S3method(.updateStackedLL, merMod) 107 | S3method(.updateStackedLL, lme) 108 | S3method(.updateStackedLL, gls) 109 | S3method(.updateStackedLL, lavaan) 110 | 111 | S3method(.checkREML, default) 112 | S3method(.checkREML, merMod) 113 | S3method(.checkREML, lme) 114 | S3method(.updateML, default) 115 | S3method(.updateML, merMod) 116 | S3method(.updateML, lme) 117 | S3method(.getDFs, default) 118 | S3method(.getDFs, lavaan) 119 | S3method(.getFormula, default) 120 | S3method(.getFormula, lme) 121 | S3method(.getFormula, lavaan) 122 | 123 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | # * RELEASE HISTORY OF THE 'mitml' PACKAGE: 2 | -- 3 | 4 | # Version 0.4-4 (2021-11-09) 5 | -- 6 | 7 | * coef, vcov: new methods for extracting pooled parameter estimates and the 8 | pooled variance-covariance matrix 9 | * testEstimates: no longer requires specifying variance estimates (uhat) when 10 | manually pooling estimated parameters (qhat), fixed spurious 11 | warning for 'coxph' models, added initial support for 12 | 'glmmTMB' 13 | * testModels: added initial support for 'D4' for models fitted with GLS 14 | ('nlme'), added initial support for 'glmmTMB' 15 | 16 | # Version 0.4-3 (2021-10-05) 17 | -- 18 | 19 | * mitml.list2mids: new function (converts objects from 'mitml.list' to 'mids') 20 | 21 | * testEstimates, added features (support for ordinal models estimated with 22 | testConstraints, 'MASS::polr') 23 | testModels: 24 | 25 | * other: bugfix (fixes vignette build errors) 26 | 27 | # Version 0.4-2 (2021-09-10) 28 | -- 29 | 30 | * testModels: bugfix (fixes unneded refits in 'D1', fixes scoping issue in 31 | 'D3' and 'D4') 32 | 33 | # Version 0.4-1 (2021-02-05) 34 | -- 35 | 36 | * fixed Solaris build error 37 | 38 | # Version 0.4-0 (2021-01-25) 39 | -- 40 | 41 | * testEstimates: added initial support for SEM ('lavaan'); argument 'var.comp' 42 | is now deprecated and was replaced by 'extra.pars' 43 | 44 | * testModels: added new pooling method for LRTs ('D4'); now adopts more 45 | accurate formula for small-sample degrees of freedom ('df.com'); 46 | added initial support for SEM ('lavaan'); expanded support of 47 | 'D3' for multilevel models ('lme4') with arbitrary number of 48 | clusters; added 'ariv' argument for different estimators 49 | 50 | * testConstraints: added initial support for SEM ('lavaan'); now adopts more 51 | accurate formula for small-sample degrees of freedom ('df.com') 52 | 53 | * anova.mitml.results: see 'testModels'; now uses 'D4' and 'D2' as fallback 54 | options (in that order) 55 | 56 | * other: general code improvements 57 | 58 | # Version 0.3-6 (2018-07-10) 59 | -- 60 | 61 | * confint: new function, calculating confidence intervals for pooled 62 | estimates (applicable to testEstimates() objects) 63 | 64 | * jomoImpute, added features (option to save fewer parameters with 65 | panImpute: 'keep.chains') 66 | 67 | * jomoImpute, added features (support for single-level imputation models) 68 | 69 | * testEstimates, added features (support for Cox-PH models using 'survival' 70 | testConstraints, package) 71 | testModels: 72 | 73 | * testConstraints: added features (pooled estimates and SEs of specified 74 | constraints) 75 | 76 | * mitmlComplete: bugfix (fixes row ordering issue) 77 | 78 | * jomoImpute, bugfix (fixes erroneous removal of global seed) 79 | panImpute: 80 | 81 | * other: added vignettes (analysis, level-2 imputation) 82 | 83 | # Version 0.3-5 (2017-03-14) 84 | -- 85 | 86 | * testEstimates: now prints the two-tailed p-value (as opposed to one-tailed 87 | in earlier versions), revised label for p-values, improved 88 | output with constrained variance components 89 | 90 | * testModels: revised label for p-values 91 | 92 | * testEstimates: added features (support for GEEs using the 'geepack' package) 93 | 94 | * testModels: added features (support for GEEs using the 'geepack' package) 95 | 96 | * testConstraints: added features (support for GEEs using the 'geepack' package) 97 | 98 | * c.mitml.list: new functions, combining lists of multiply imputed data sets 99 | (and rbind..., by data set (c.mitml.list), row (rbind.mitml.list), or column 100 | cbind...) (cbind.mitml.list) 101 | 102 | * sort.mitml.list: new function, sorting lists of multiply imputed data sets by 103 | one or several variables (similar to '?order') 104 | 105 | * subset.mitml.list: new function, generating subsets for lists of multiply 106 | imputed data sets (similar to '?subset') 107 | 108 | * amelia2mitml.list: new function, converting imputations generated by the 109 | 'Amelia' package to 'mitml.list' 110 | 111 | * justice: updated data set (added categorical variable, missing data at 112 | Level 2) 113 | 114 | # Version 0.3-4 (2016-09-12) 115 | -- 116 | 117 | * mitmlComplete: changed default arguments ('print' now defaults to 'all', 118 | returning list of completed data sets) 119 | 120 | * jomoImpute: bugfix (fixes error in which jomoImpute() rejected correctly 121 | specified priors when 'group=NULL') 122 | 123 | * mitmlComplete: bugfix (fixes error with categorical target variables when 124 | there are no missing data) 125 | 126 | * plot: adjusted warning message for 'pos' argument to include 127 | 'beta2'. 128 | 129 | # Version 0.3-3 (2016-07-04) 130 | -- 131 | 132 | * jomoImpute: added features (support for imputation of cluster-level 133 | variables, i.e., the two-level procedures "jomo2...") 134 | 135 | * print/summary: revised appearance in two-level models (model summary is 136 | displayed separately by level for two-level imputation 137 | procedures) 138 | 139 | * plot: additional value for print argument ("beta2", denoting the 140 | regression coefficients of the cluster-level imputation 141 | model) 142 | 143 | * jomoImpute: bugfix (fixes error in the usage of starting values in cases 144 | with only continuous/no categorical data) 145 | 146 | * plot: revised formatting of the plot title (order of variables) 147 | 148 | # Version 0.3-2 (2016-05-10) 149 | -- 150 | 151 | * plot: added features (requesting single parameters, thinning of 152 | the chain prior to plotting) 153 | 154 | * summary: added features (summary of autocorrelation) 155 | 156 | * plot: revised appearance and behavior (burn-in printed in grey, 157 | included Rhat and autocorrelation at lag k in the posterior 158 | summary; for trace="burnin", the summary is now calculated 159 | for the burn-in phase, not the imputation phase) 160 | 161 | # Version 0.3-1 (2016-05-10) 162 | -- 163 | 164 | * anova: new function based on objects of class 'mitml.result', 165 | facilitating comparisons for a sequence of models 166 | 167 | * long2mitml.list: new function, converting multiple imputations from "long" 168 | format to 'mitml.list' 169 | 170 | * jomo2mitml.list: new function, converting imputations generated byt the 'jomo' 171 | package to 'mitml.list' 172 | 173 | * multilevelR2: new function, calculating measures of explained variance 174 | (R^2) for multilevel models and based on observed or multiply 175 | imputed data sets 176 | 177 | * justice: new data set, including re-simulated data based on the study 178 | of procedural justice, justice climate, and organizational 179 | satisfaction by Liao and Rupp (2005) 180 | 181 | * plot: renamed export directory ("mitmlPlots", formerly "panPlots") 182 | 183 | * testModels: added automatic refit using ML for REML fitted models 184 | 185 | * mitmlComplete: bugfix (fixes error with mixed categorical and continuous 186 | variables) 187 | 188 | * plot: bugfix (fixes handling of latent background variables for 189 | categorical variables) 190 | 191 | # Version 0.3-0 (2016-03-15) 192 | -- 193 | 194 | * jomoImpute: new function, providing an interface to the jomo package for 195 | imputation of missing values at level 1 196 | 197 | - includes adjustments in mitml.complete as well as the 198 | summary, print, and plot methods) 199 | - includes minor changes in the interpretation of the formula 200 | and type arguments 201 | - adds a few slots to the 'mitml' object class 202 | 203 | * summary: bugfix (fixes behavior when applied to fixed parameters 204 | with zero variance) 205 | 206 | * as.mitml.list: bugfix (fixes order of class attributes) 207 | 208 | 209 | # Version 0.2-4 (2015-10-19) 210 | -- 211 | 212 | * clusterMeans: code improvements 213 | 214 | * panImpute: code improvements 215 | 216 | * testConstraints: added features (model-independent input) 217 | 218 | * testEstimates: added features (model-independent input) 219 | 220 | * testModels: comparisons of REML fits through D2 is now permitted 221 | 222 | * summary: bugfix (n.Rhat now takes effect), added features (goodness 223 | of approximation) 224 | 225 | 226 | # Version 0.2-3 (2015-07-09) 227 | -- 228 | 229 | * panImpute: added features (silent mode), bugfix (ordering of variables 230 | with nonstandard priors) 231 | 232 | * summary: added features (details on PSR) 233 | 234 | * plot: revised layout, added features (trend line, posterior 235 | summary), bugfix (plot labels) 236 | 237 | * testModels: bugfix (structural zeros in lmer objects) 238 | 239 | * studentratings: renamed variable (data set) 240 | 241 | 242 | # Version 0.2-2 (2015-05-23) 243 | -- 244 | 245 | * initial release 246 | 247 | -------------------------------------------------------------------------------- /R/amelia2mitml.list.R: -------------------------------------------------------------------------------- 1 | amelia2mitml.list <- function(x){ 2 | # convert amelia to mitml.list 3 | 4 | out <- unname(x$imputations) 5 | class(out) <- c("mitml.list", "list") 6 | 7 | return(out) 8 | 9 | } 10 | 11 | -------------------------------------------------------------------------------- /R/anova.mitml.result.R: -------------------------------------------------------------------------------- 1 | anova.mitml.result <- function(object, ..., method = c("D3", "D4", "D2"), 2 | ariv = c("default", "positive", "robust"), 3 | data = NULL){ 4 | 5 | # create list of models 6 | mod.list <- c(list(object), list(...)) 7 | 8 | # *** 9 | # check input 10 | # 11 | 12 | # check lists 13 | m <- length(object) 14 | 15 | if(length(mod.list) == 1) stop("Comparison requires at least two lists of fitted statistical models.") 16 | if(any(!sapply(mod.list, is.list))) stop("The 'object' and '...' arguments must be lists of fitted statistical models.") 17 | if(any(sapply(mod.list[-1], length) != m)) stop("The 'object' and '...' arguments must be lists with the same length.") 18 | 19 | # check method 20 | method.choices <- c("D3", "D4", "D2") 21 | method <- original.method <- match.arg(method, method.choices) 22 | 23 | # check model classes 24 | cls.list <- lapply(mod.list, function(x) class(x[[1]])) 25 | 26 | if(any(sapply(cls.list[-1], "[", 1) != cls.list[[1]][1])) warning("The 'object' and '...' arguments appear to include objects of different classes. Results may not be trustworthy.") 27 | 28 | .checkNamespace(unique(unlist(cls.list))) 29 | 30 | # check for REML and refit (if needed) 31 | reml.list <- lapply(mod.list, function(x) sapply(x, .checkREML)) 32 | reml <- any(unlist(reml.list)) 33 | 34 | if(reml){ 35 | for(ii in seq_along(mod.list)){ 36 | mod.list[[ii]][reml.list[[ii]]] <- lapply(mod.list[[ii]][reml.list[[ii]]], .updateML) 37 | } 38 | } 39 | 40 | # *** 41 | # check method and possible fallback methods 42 | # 43 | 44 | # find user-defined method and possible fallback options 45 | try.method <- method.choices[seq.int(which(method.choices == original.method), length(method.choices))] 46 | error.msg <- character() 47 | 48 | # try logLik evaluation methods until working method is found 49 | for(mm in seq_along(try.method)){ 50 | 51 | if(try.method[mm] == "D3") try.fun <- .evaluateUserLogLik 52 | if(try.method[mm] == "D4") try.fun <- .evaluateStackedLogLik 53 | if(try.method[mm] == "D2") try.fun <- .evaluateLogLik 54 | 55 | # check if method can be applied to specified objects 56 | res <- lapply(mod.list, function(x, fun){ 57 | tryCatch(expr = suppressMessages(suppressWarnings(fun(x[1]))), 58 | error = function(e) e 59 | ) 60 | }, fun = try.fun) 61 | 62 | 63 | # if applicable, proceed; otherwise, save error message and try next method (if any) 64 | notApplicable <- sapply(res, inherits, what = "error") 65 | if(any(notApplicable)){ 66 | 67 | # save error message 68 | err <- as.character(res[[which(notApplicable)[1]]]) 69 | error.msg[try.method[mm]] <- sub("^Error in .*: ", "", err) 70 | 71 | # try next method (if any) 72 | if(mm < length(try.method)){ 73 | next() 74 | }else{ 75 | stop("The '", original.method, "' method is not supported for the specified models, and no valid alternative was found. Problems were due to:\n", paste(error.msg, collapse = "")) 76 | } 77 | 78 | }else{ 79 | 80 | # set method, print warning if needed 81 | method <- try.method[mm] 82 | if(method != original.method) warning("The '", original.method, "' method is not supported for the specified models. Switching to '", method, "'.") 83 | break() 84 | 85 | } 86 | 87 | } 88 | 89 | # *** 90 | # find order of models 91 | # 92 | 93 | # try to determine (numerator) degrees of freedom for each model 94 | df.list <- lapply(lapply(mod.list, "[[", 1), .getDFs) 95 | 96 | # check if models can be ordered 97 | reorderModels <- FALSE 98 | if(all(!sapply(df.list, is.null))){ 99 | 100 | df.method <- sapply(df.list, attr, which = "type") 101 | 102 | # check if extraction method was consistent across models 103 | if(all(df.method[-1] == df.method[1])){ 104 | reorderModels <- TRUE 105 | } 106 | 107 | } 108 | 109 | # re-order models (if possible) 110 | if(reorderModels){ 111 | mod.list <- mod.list[order(unlist(df.list), decreasing = TRUE)] 112 | }else{ 113 | warning("Could not determine the order of models in 'object' and '...'. The order is therefore assumed to be as specified (with decreasing complexity). Please check whether this was intended, and see '?testModels' for specific comparisons between models.") 114 | } 115 | 116 | # *** 117 | # perform model comparisons 118 | # 119 | 120 | # model comparisons 121 | nmod <- length(mod.list) 122 | out.list <- vector("list", nmod-1) 123 | 124 | for(ii in seq_len(nmod-1)){ 125 | 126 | # make call 127 | cll <- call("testModels", model = quote(mod.list[[ii]]), null.model = quote(mod.list[[ii+1]])) 128 | cll[["method"]] <- method 129 | 130 | if(method == "D2") cll[["use"]] <- "likelihood" 131 | if(method == "D4"){ 132 | if(!is.null(data)) cll[["data"]] <- data 133 | cll[["ariv"]] <- ariv 134 | } 135 | 136 | # evaluate call 137 | out.list[[ii]] <- eval(cll) 138 | 139 | } 140 | 141 | # try to get model formulas 142 | fml <- character(nmod) 143 | for(ii in seq_len(nmod)){ 144 | 145 | f <- .getFormula(mod.list[[ii]][[1]]) 146 | fml[ii] <- f 147 | 148 | } 149 | 150 | out <- list( 151 | call = match.call(), 152 | test = out.list, 153 | m = m, 154 | method = method, 155 | use = "likelihood", 156 | ariv = ariv, 157 | data = !is.null(data), 158 | formula = fml, 159 | order.method = ifelse(reorderModels, df.method[1], NULL), 160 | reml = reml 161 | ) 162 | 163 | class(out) <- "mitml.anova" 164 | return(out) 165 | 166 | } 167 | 168 | -------------------------------------------------------------------------------- /R/as.mitml.list.R: -------------------------------------------------------------------------------- 1 | as.mitml.list <- function(x){ 2 | # adds a class attribute "mitml.list" to its argument 3 | 4 | if(!is.list(x)) stop("Argument must be a 'list'.") 5 | 6 | if(any(!sapply(x, is.data.frame))){ 7 | x <- lapply(x, as.data.frame) 8 | cat("Note: List entries were converted to class 'data.frame'.\n") 9 | } 10 | 11 | class(x) <- c("mitml.list", class(x)) 12 | return(x) 13 | 14 | } 15 | -------------------------------------------------------------------------------- /R/c.mitml.list.R: -------------------------------------------------------------------------------- 1 | c.mitml.list <- function(...){ 2 | # merges two objects of class "mitml.list" by appending list entries 3 | 4 | as.mitml.list(unlist(list(...), recursive = FALSE)) 5 | 6 | } 7 | -------------------------------------------------------------------------------- /R/cbind.mitml.list.R: -------------------------------------------------------------------------------- 1 | cbind.mitml.list <- function(...){ 2 | # merges two objects of class "mitml.list" by appending columns of list entries 3 | 4 | Map(cbind, ...) 5 | 6 | } 7 | -------------------------------------------------------------------------------- /R/clusterMeans.R: -------------------------------------------------------------------------------- 1 | clusterMeans <- function(x, cluster, adj = FALSE, group = NULL){ 2 | # calculate cluster means 3 | 4 | # get objects if names are given 5 | isname <- c(length(x) == 1, length(cluster) == 1, length(group) == 1) & 6 | c(is.character(x), is.character(cluster), is.character(group)) 7 | if(any(isname)){ 8 | parent <- parent.frame() 9 | if(isname[1]) x <- eval(parse(text = x), parent) 10 | if(isname[2]) cluster <- eval(parse(text = cluster), parent) 11 | if(isname[3]) group <- eval(parse(text = group), parent) 12 | } 13 | 14 | # prepare group 15 | if(!is.null(group)) { 16 | if(is.character(group)) group <- as.factor(group) 17 | if(is.factor(group)) group <- as.integer(group) 18 | ngr <- length(unique(group)) 19 | } 20 | 21 | # format cluster (and groups) 22 | if(!is.numeric(cluster)) cluster <- as.integer(cluster) 23 | if(!is.null(group)) cluster <- cluster + group/(ngr+1) 24 | cluster <- match(cluster, unique(cluster)) 25 | 26 | 27 | n.obs <- rowsum(as.integer(!is.na(x)), cluster) 28 | gm <- rowsum(x, cluster, na.rm = T)/n.obs 29 | gm[is.nan(gm)] <- NA 30 | gm <- gm[cluster] 31 | if(adj){ 32 | n.obs <- n.obs[cluster] 33 | ((n.obs * gm) - x)/(n.obs - 1) 34 | }else{ 35 | gm 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /R/coef.mitml.testEstimates.R: -------------------------------------------------------------------------------- 1 | coef.mitml.testEstimates <- function(object, ...){ 2 | # extract pooled parameter estimates 3 | 4 | est <- object$estimates 5 | out <- est[, 1, drop = TRUE] 6 | if(is.null(names(out))) names(out) <- rownames(est) 7 | 8 | return(out) 9 | 10 | } 11 | 12 | -------------------------------------------------------------------------------- /R/confint.mitml.testEstimates.R: -------------------------------------------------------------------------------- 1 | confint.mitml.testEstimates <- function(object, parm, level = 0.95, ...){ 2 | # calculate confidence intervals from pooled estimates 3 | 4 | est <- object$estimates 5 | 6 | pnames <- rownames(est) 7 | if(missing(parm)) parm <- pnames 8 | if(is.numeric(parm)) parm <- pnames[parm] 9 | 10 | cf <- est[parm, 1] 11 | se <- est[parm, 2] 12 | df <- est[parm, 4] 13 | 14 | a <- (1-level)/2 15 | fac <- qt(1-a, est[parm, "df"]) 16 | pct <- paste(format(100*c(a, 1-a), trim = TRUE, scientific = FALSE, digits = 3), "%") 17 | 18 | 19 | ci <- matrix(NA_real_, length(parm), 2, dimnames = list(parm, pct)) 20 | ci[,1] <- cf - se*fac 21 | ci[,2] <- cf + se*fac 22 | 23 | return(ci) 24 | 25 | } 26 | 27 | -------------------------------------------------------------------------------- /R/internal-convergence.R: -------------------------------------------------------------------------------- 1 | # Gelman-Rubin (1992) criterion for convergence (Rhat) 2 | .GelmanRubin <- function(x, m){ 3 | 4 | # check NA 5 | if(all(is.na(x))) return(NA) 6 | 7 | # convert vector 8 | if(is.vector(x)) x <- matrix(x, 1, length(x)) 9 | 10 | iter <- ncol(x) 11 | mod <- iter %% m 12 | n <- rep( (iter-mod)/m , m ) 13 | nmat <- matrix(c(cumsum(n)-n+1, cumsum(n)), nrow = m) 14 | n <- n[1] 15 | 16 | Rhat <- numeric(nrow(x)) 17 | for(ii in 1:nrow(x)){ 18 | 19 | # values per chain 20 | chs <- apply(nmat, 1, function(j) x[ii, j[1]:j[2]]) 21 | mns <- apply(chs, 2, mean) 22 | vrs <- apply(chs, 2, var) 23 | Bdivn <- var(mns) 24 | W <- mean(vrs) 25 | muhat <- mean(chs) 26 | sighat2 <- (n-1)/n * W + Bdivn 27 | # sampling distribution 28 | Vhat <- sighat2 + Bdivn/m 29 | var.Vhat <- ((n-1)/n)^2*(1/m)*var(vrs) + ((m+1)/(m*n))^2*2/(m-1)*(Bdivn*n)^2 + 30 | 2*((m+1)*(n-1)/(m*n^2)) * (n/m)*(cov(vrs, mns^2)-2*muhat*cov(vrs, mns)) 31 | df <- 2*Vhat^2 / var.Vhat 32 | # compute Rhat 33 | if(Bdivn == 0 & identical(vrs, rep(0, m))){ # for zero variance defined as 1 34 | Rhat[ii] <- 1 35 | }else{ 36 | Rhat[ii] <- sqrt( (Vhat/W)*df/(df-2) ) 37 | } 38 | 39 | } 40 | Rhat 41 | 42 | } 43 | 44 | # criterion for goodness of approximation (Hoff, 2009) 45 | .SDprop <- function(x){ 46 | 47 | # check NA 48 | if(all(is.na(x))) return(NA) 49 | 50 | # convert vector 51 | if(is.vector(x)) x <- matrix(x, 1, length(x)) 52 | 53 | np <- nrow(x) 54 | v <- apply(x, 1, var) # variance of chain 55 | v0 <- v == 0 56 | sdp <- sp0 <- neff <- numeric(np) 57 | for(i in 1:np){ 58 | arp <- try( ar(x[i,], aic = TRUE), silent = T ) 59 | if(!v0[i]) sp0[i] <- arp$var.pred/(1 - sum(arp$ar))^2 # spectral density at frequency 0 60 | } 61 | n <- ncol(x) 62 | mcmc.v <- sp0/n # true variance of the mean (correcting for autocorrelation) 63 | neff[!v0] <- (v/mcmc.v)[!v0] # effective sample size 64 | neff[v0] <- n 65 | # proportion of variance due to sampling inefficiency 66 | sdp[!v0] <- sqrt(mcmc.v / v)[!v0] 67 | attr(sdp, "n.eff") <- neff 68 | sdp 69 | 70 | } 71 | 72 | -------------------------------------------------------------------------------- /R/internal-methods-zzz.R: -------------------------------------------------------------------------------- 1 | # *** 2 | # misc. methods 3 | # 4 | 5 | # * check for REML fit 6 | 7 | .checkREML <- function(object) UseMethod(".checkREML", object) 8 | .checkREML.default <- function(object) return(FALSE) 9 | .checkREML.merMod <- function(object) return(lme4::isREML(object)) 10 | .checkREML.lme <- function(object) return(object$method == "REML") 11 | 12 | # * update REML fit with ML 13 | 14 | .updateML <- function(object) UseMethod(".updateML", object) 15 | .updateML.default <- function(object) return(object) 16 | .updateML.merMod <- function(object) return(.localUpdate(object, REML = FALSE)) 17 | .updateML.lme <- function(object) return(.localUpdate(object, data = object$data, method = "ML")) 18 | 19 | # * determine degrees of freedom 20 | 21 | .getDFs <- function(object) UseMethod(".getDFs", object) 22 | 23 | .getDFs.default <- function(object){ 24 | 25 | df <- NULL 26 | 27 | # try logLik 28 | df.try <- try(attr(logLik(object), "df"), silent = TRUE) 29 | if(!inherits(df.try, "try-error")){ 30 | df <- df.try 31 | attr(df, "type") <- "logLik" 32 | } 33 | 34 | # try df.residual and sample size (nobs, residuals) 35 | # NOTE: does not account for scale parameters (e.g., residual variance) 36 | if(is.null(df)){ 37 | rdf <- try(df.residual(object), silent = TRUE) 38 | n <- try(nobs(object), silent = TRUE) 39 | if(inherits(n, "try-error")) n <- try(length(predict(object)), silent = TRUE) 40 | if(inherits(n, "try-error")) n <- try(length(residuals(object)), silent = TRUE) 41 | if(!inherits(rdf, "try-error") && !inherits(n, "try-error")){ 42 | df <- n - rdf 43 | attr(df, "type") <- "df.residual" 44 | } 45 | } 46 | 47 | return(df) 48 | 49 | } 50 | 51 | .getDFs.lavaan <- function(object){ 52 | 53 | df <- attr(lavaan::logLik(object), "df") 54 | attr(df, "type") <- "logLik" 55 | return(df) 56 | 57 | } 58 | 59 | 60 | # * extract model formula 61 | 62 | .getFormula <- function(object) UseMethod(".getFormula", object) 63 | 64 | .getFormula.default <- function(object){ 65 | 66 | fml <- try(deparse(formula(object))) 67 | if(inherits(fml, "try-error")) fml <- NULL 68 | fml <- Reduce(paste, fml) 69 | 70 | return(fml) 71 | 72 | } 73 | 74 | .getFormula.lme <- function(object){ 75 | 76 | fe.fml <- deparse(formula(object)) 77 | re.fml <- lapply(formula(object$modelStruct$reStruct), deparse) 78 | for(ff in seq_along(re.fml)) re.fml[[ff]] <- paste0(re.fml[[ff]], "|", names(re.fml)[ff]) 79 | fml <- paste(c(fe.fml, unlist(re.fml)), collapse = ", ") 80 | 81 | return(fml) 82 | 83 | } 84 | 85 | .getFormula.lavaan <- function(object){ 86 | 87 | cll <- getCall(object) 88 | fml <- deparse(cll[c(1, match("model", names(cll)))]) 89 | fml <- sub(")$", ", ...)", fml) 90 | 91 | return(fml) 92 | 93 | } 94 | -------------------------------------------------------------------------------- /R/internal-model.R: -------------------------------------------------------------------------------- 1 | # prepare model input by formula 2 | .model.byFormula <- function(data, formula, group, group.original, 3 | method = c("pan", "jomo", "jomo.matrix")){ 4 | 5 | # check model, separate equations 6 | formula <- .check.model(formula) 7 | 8 | isML <- attr(formula, "is.ML") 9 | isL2 <- attr(formula, "is.L2") 10 | 11 | if(isL2){ 12 | formula.L2 <- formula[[2]] 13 | formula <- formula[[1]] 14 | } 15 | 16 | method <- match.arg(method) 17 | 18 | # *** evaluate L1 model 19 | # 20 | 21 | ft <- terms(formula) 22 | tl <- attr(ft, "term.labels") 23 | vrs <- attr(ft, "variables")[-1] 24 | nms <- colnames(data) 25 | 26 | # responses 27 | yvrs <- as.character(vrs)[attr(ft, "response")] 28 | yvrs <- gsub("[\r\n]", "", yvrs) 29 | y.fml <- as.formula(paste0("~", yvrs)) 30 | yvrs <- attr(terms(y.fml), "term.labels") 31 | # check for untransformed yvrs 32 | err <- !(yvrs %in% nms) 33 | if(any(err)) stop("Could not find: ", paste0(yvrs[err], collapse = ", "), "). Target variables must be contained in the data set 'as is', and transformations must be applied beforehand.") 34 | 35 | # cluster id 36 | clt <- tl[grep("\\|", tl)] 37 | 38 | if(method == "pan" & !isML) stop("Cluster indicator not found in formula\n\n", .formula2char(formula), "\n\nPlease specify the cluster indicator and at least one random term using the '|' operator. Single-level imputation is supported by jomoImpute().") 39 | 40 | # extract and reorder 41 | if(isML){ 42 | 43 | clt <- strsplit( clt, split = "[[:blank:]]*\\|[[:blank:]]*" )[[1]] 44 | clname <- clt[2] 45 | 46 | # order data and grouping 47 | data <- data[ order(group, data[,clname]), ] 48 | group.original <- group.original[ order(group) ] 49 | group <- group[ order(group) ] 50 | 51 | }else{ 52 | clname <- NULL 53 | } 54 | 55 | # predictors: fixed 56 | pvrs <- c(if(attr(ft, "intercept")){"(Intercept)"}, tl[!grepl("\\|", tl)]) 57 | fe.fml <- c(if(attr(ft, "intercept")){"1"}else{"0"}, tl[!grepl("\\|", tl)]) 58 | fe.fml <- as.formula(paste0("~", paste0(fe.fml, collapse = "+"))) 59 | 60 | # predictors: random 61 | if(isML){ 62 | cl.fml <- as.formula(paste("~", clt[1])) 63 | cl.ft <- terms(cl.fml) 64 | qvrs <- c(if(attr(cl.ft, "intercept")){"(Intercept)"}, attr(cl.ft, "term.labels")) 65 | }else{ 66 | cl.fml <- ~0 67 | qvrs <- NULL 68 | } 69 | 70 | # model matrix for fe and cl 71 | attr(data, "na.action") <- identity 72 | mmp <- suppressWarnings( model.matrix(fe.fml, data = data) ) 73 | mmq <- suppressWarnings( model.matrix(cl.fml, data = data) ) 74 | pnames <- colnames(mmp) 75 | qnames <- colnames(mmq) 76 | psave <- setdiff( c(pnames, qnames), c("(Intercept)", nms) ) 77 | 78 | switch( method , 79 | # panImpute (matrix input) 80 | pan={ 81 | y <- data.matrix(data[yvrs]) 82 | ycat <- NULL 83 | }, 84 | # jomoImpute, for higher-level functions (data frames, uses jomo for preprocessing) 85 | jomo={ 86 | y <- data[yvrs] 87 | ycat <- NULL 88 | }, 89 | # jomoImpute, for higher- and lower-level versions (preprocessed matrix input) 90 | jomo.matrix={ 91 | y <- data.matrix(data[yvrs]) 92 | cvrs <- sapply(data[, yvrs, drop = F], is.factor) 93 | ycat <- y[,cvrs, drop = F] 94 | y <- y[,!cvrs, drop = F] 95 | } 96 | ) 97 | 98 | clus <- if(isML) data[,clname] else NULL 99 | pred <- cbind(mmp, mmq[,!(qnames%in%pnames), drop = F]) 100 | xcol <- which(colnames(pred)%in%pnames) 101 | zcol <- which(colnames(pred)%in%qnames) 102 | 103 | # assign to parent.frame 104 | inp <- list( 105 | y = y, ycat = ycat, clus = clus, pred = pred, xcol = xcol, zcol = zcol, data = data, 106 | group = group, group.original = group.original, psave = psave, clname = clname, 107 | yvrs = yvrs, pvrs = pvrs, qvrs = qvrs, pnames = pnames, qnames = qnames 108 | ) 109 | 110 | for(i in names(inp)) assign(i, inp[[i]], pos = parent.frame()) 111 | 112 | # *** evaluate L2 model 113 | # 114 | 115 | if(isL2){ 116 | 117 | ft <- terms(formula.L2) 118 | tl <- attr(ft, "term.labels") 119 | vrs <- attr(ft, "variables")[-1] 120 | 121 | # responses 122 | yvrs <- as.character(vrs)[attr(ft, "response")] 123 | yvrs <- gsub("[\r\n]", "", yvrs) 124 | y.fml <- as.formula(paste0("~", yvrs)) 125 | yvrs <- attr(terms(y.fml), "term.labels") 126 | # check for untransformed yvrs 127 | err <- !(yvrs %in% nms) 128 | if(any(err)) stop("Could not find: ", paste0(yvrs[err], collapse = ", "), "). Target variables must be contained in the data set 'as is', and transformations must be applied beforehand.") 129 | 130 | # predictors: fixed only at L2 131 | pvrs <- c(if(attr(ft, "intercept")){"(Intercept)"}, tl[!grepl("\\|", tl)]) 132 | fe.fml <- c(if(attr(ft, "intercept")){"1"}else{"0"}, tl[!grepl("\\|", tl)]) 133 | fe.fml <- as.formula(paste0("~", paste0(fe.fml, collapse = "+"))) 134 | 135 | # model matrix for FE only 136 | attr(data, "na.action") <- identity 137 | mmp <- suppressWarnings( model.matrix(fe.fml, data = data) ) 138 | pnames <- colnames(mmp) 139 | psave <- c( psave, setdiff( c(pnames), c("(Intercept)", nms) ) ) 140 | 141 | switch( method , 142 | jomo={ # jomoImpute, for higher-level functions (data input) 143 | y <- data[yvrs] 144 | ycat <- NULL 145 | }, 146 | jomo.matrix={ # jomoImpute, for lower-level versions (matrix input) 147 | y <- data.matrix(data[yvrs]) 148 | cvrs <- sapply(data[,yvrs, drop = F], is.factor) 149 | ycat <- y[,cvrs, drop = F] 150 | y <- y[,!cvrs, drop = F] 151 | } 152 | ) 153 | 154 | pred <- mmp 155 | xcol <- which(colnames(pred) %in% pnames) 156 | 157 | # assign to parent.frame 158 | inp <- list( 159 | y.L2 = y, ycat.L2 = ycat, pred.L2 = pred, xcol.L2 = xcol, yvrs.L2 = yvrs, 160 | pvrs.L2 = pvrs, pnames.L2 = pnames, psave = psave 161 | ) 162 | 163 | for(i in names(inp)) assign(i, inp[[i]], pos = parent.frame()) 164 | 165 | } 166 | 167 | invisible(NULL) 168 | 169 | } 170 | 171 | # convert formula to character 172 | .formula2char <- function(x){ 173 | 174 | chr <- as.character(x) 175 | paste(chr[c(3, 1, 2)]) 176 | 177 | } 178 | 179 | .check.model <- function(x){ 180 | # check model type and number of levels 181 | 182 | xnew <- x 183 | 184 | # ensure proper list format 185 | if(is.list(x) & length(x) > 2) stop("Cannot determine the number of levels. The 'formula' or 'type' argument must indicate either a single-level model, a model for responses at level 1, or two models for responses at level 1 and 2.") 186 | 187 | if(!is.list(x)) x <- list(x) 188 | 189 | # check cluster specification and model type 190 | clt <- lapply(x, function(z){ 191 | if(is.language(z)){ 192 | tl <- attr(terms(z), "term.labels") 193 | tl[grep("\\|", tl)] 194 | }else{ 195 | which(z == -2) 196 | } 197 | }) 198 | isML <- length(clt[[1]]) > 0 199 | isL2 <- length(x) == 2 200 | 201 | if(isL2 & !isML) stop("No cluster variable found. Imputation models for responses at level 1 and 2 require the specification of a cluster variable in the level-1 equation.") 202 | 203 | attr(xnew, "is.ML") <- isML 204 | attr(xnew, "is.L2") <- isL2 205 | xnew 206 | 207 | } 208 | 209 | .check.variablesL2 <- function(x, clus){ 210 | # check for variables at L2 (constant at L1) 211 | 212 | apply(x, 2, function(a) all( abs(a-clusterMeans(a, clus)) < sqrt(.Machine$double.eps), 213 | na.rm = T)) 214 | 215 | } 216 | 217 | # convert type to formula 218 | .type2formula <- function(data, type){ 219 | 220 | # L2: separate model equations 221 | type <- .check.model(type) 222 | isML <- attr(type, "is.ML") 223 | isL2 <- attr(type, "is.L2") 224 | if(isL2){ 225 | type.L2 <- type[[2]] 226 | type <- type[[1]] 227 | } 228 | 229 | nms <- colnames(data) 230 | 231 | # grouping 232 | grp <- if(any(type == -1)) nms[type == -1] else NULL 233 | if(isL2 & is.null(grp)){ 234 | if(any(type.L2 == -1)) grp <- nms[type.L2 == -1] 235 | } 236 | 237 | # L1 model 238 | if(ncol(data) != length(type)) stop("Length of 'type' must be equal to the number of colums in 'data'.") 239 | if(sum(type == -2)>1) stop("Only one cluster indicator may be specified.") 240 | 241 | cls <- nms[type == -2] 242 | 243 | yvrs <- paste( nms[type == 1], collapse = "+" ) 244 | pvrs <- paste( c(1, nms[type%in%c(2, 3)]), collapse = "+" ) 245 | qvrs <- if(isML) paste( c(1, nms[type == 3]), collapse = "+" ) else NULL 246 | 247 | # build L1 formula 248 | cls.fml <- if(isML) paste("+ (", qvrs, "|", cls, ")") else NULL 249 | fml <- formula( paste(yvrs, "~", pvrs, cls.fml) ) 250 | 251 | # L2 model 252 | if(isL2){ 253 | 254 | if(ncol(data) != length(type.L2)) stop("Length of 'type' must be equal to the number of colums in 'data'.") 255 | 256 | yvrs <- paste( nms[type.L2 == 1], collapse = "+" ) 257 | pvrs <- paste( c(1, nms[type.L2%in%c(2, 3)]), collapse = "+" ) 258 | 259 | # build formula (make list) 260 | fml <- list( fml, formula( paste(yvrs, "~", pvrs) ) ) 261 | 262 | } 263 | 264 | attr(fml, "group") <- grp 265 | attr(fml, "is.ML") <- isML 266 | attr(fml, "is.L2") <- isL2 267 | 268 | return(fml) 269 | 270 | } 271 | 272 | -------------------------------------------------------------------------------- /R/internal-pool.R: -------------------------------------------------------------------------------- 1 | .pool.estimates <- function(Qhat, Uhat, m, diagonal = FALSE, df.com = NULL, nms = NULL) { 2 | 3 | # pool point estimates 4 | Qbar <- apply(Qhat, 1, mean) 5 | 6 | # pool variances and inferences 7 | if (!is.null(Uhat)) { 8 | 9 | Ubar <- apply(Uhat, 1:2, mean) 10 | B <- tcrossprod(Qhat - Qbar) / (m-1) 11 | T <- Ubar + (1 + m^(-1)) * B 12 | 13 | se <- sqrt(diag(T)) 14 | t <- Qbar/se 15 | 16 | r <- (1 + m^(-1)) * diag(B) / diag(Ubar) 17 | 18 | # compute degrees of freedom 19 | v <- vm <- (m-1) * (1 + r^(-1))^2 20 | if (!is.null(df.com)) { 21 | lam <- r / (r+1) 22 | vobs <- (1-lam) * ((df.com+1) / (df.com+3)) * df.com 23 | v <- (vm^(-1) + vobs^(-1))^(-1) 24 | } 25 | 26 | fmi <- (r + 2 / (v+3)) / (r+1) 27 | pval <- 2 * (1 - pt(abs(t), df = v)) 28 | 29 | # create output 30 | out <- matrix(c(Qbar, se, t, v, pval, r, fmi), ncol = 7) 31 | colnames(out) <- c("Estimate", "Std.Error", "t.value", "df", "P(>|t|)", "RIV", "FMI") 32 | if(!diagonal) attr(out, "T") <- T 33 | 34 | } else { 35 | 36 | # create output 37 | out <- matrix(Qbar, ncol = 1) 38 | colnames(out) <- "Estimate" 39 | 40 | } 41 | 42 | # parameter names 43 | rownames(out) <- nms 44 | attr(out, "par.labels") <- attr(nms, "par.labels") 45 | 46 | return(out) 47 | 48 | } 49 | 50 | 51 | .D1 <- function(Qhat, Uhat, df.com) { 52 | # pooling for multidimensional estimands (D1, Li et al., 1991; Reiter, 2007) 53 | 54 | k <- dim(Qhat)[1] 55 | m <- dim(Qhat)[2] 56 | 57 | # D1 58 | Qbar <- apply(Qhat, 1, mean) 59 | Ubar <- apply(Uhat, c(1, 2), mean) 60 | 61 | B <- cov(t(Qhat)) 62 | r <- (1+m^(-1)) * sum(diag(B %*% solve(Ubar))) / k 63 | Ttilde <- (1 + r) * Ubar 64 | 65 | val <- t(Qbar) %*% solve(Ttilde) %*% Qbar / k 66 | 67 | # compute degrees of freedom (df2) 68 | t <- k*(m-1) 69 | if(!is.null(df.com)) { 70 | 71 | # warn about poor behavior for t<=4 72 | if (t <= 4) { 73 | warning("Degrees of freedom (df.com) may not be trustworthy, because the number of imputations is too low (m \u2264 5). To obtain trustworthy results, re-run the procedure with a larger number of imputations.") 74 | } 75 | 76 | # small-sample degrees of freedom (Reiter, 2007; Eq. 1-2) 77 | a <- r * t / (t-2) 78 | vstar <- ( (df.com+1) / (df.com+3) ) * df.com 79 | 80 | c0 <- 1 / (t-4) 81 | c1 <- vstar - 2 * (1+a) 82 | c2 <- vstar - 4 * (1+a) 83 | 84 | z <- 1 / c2 + 85 | c0 * (a^2 * c1 / ((1+a)^2 * c2)) + 86 | c0 * (8*a^2 * c1 / ((1+a) * c2^2) + 4*a^2 / ((1+a) * c2)) + 87 | c0 * (4*a^2 / (c2 * c1) + 16*a^2 * c1 / c2^3) + 88 | c0 * (8*a^2 / c2^2) 89 | 90 | v <- 4 + 1/z 91 | 92 | } else { 93 | 94 | if (t > 4) { 95 | v <- 4 + (t-4) * (1 + (1 - 2*t^(-1)) * (r^(-1)))^2 96 | } else { 97 | v <- t * (1 + k^(-1)) * ((1 + r^(-1))^2) / 2 98 | } 99 | 100 | } 101 | 102 | return(list(F = val, k = k, v = v, r = r)) 103 | 104 | } 105 | 106 | .D2 <- function(d, k) { 107 | # pooling for multidimensional estimands (D2, Li, Meng et al., 1991) 108 | 109 | m <- length(d) 110 | 111 | # D2 112 | dbar <- mean(d) 113 | 114 | r <- (1 + m^(-1)) * var(sqrt(d)) 115 | 116 | val <- (dbar/k - (m+1)/(m-1) * r) / (1+r) 117 | 118 | # compute degrees of freedom (df2) 119 | v <- k^(-3/m) * (m-1) * (1 + r^(-1))^2 120 | 121 | return(list(F = val, k = k, v = v, r = r)) 122 | 123 | } 124 | 125 | -------------------------------------------------------------------------------- /R/internal-zzz.R: -------------------------------------------------------------------------------- 1 | .localUpdate <- function(object, envir = parent.frame(), ...){ 2 | # update call in parent frame 3 | 4 | cll <- getCall(object) 5 | if (is.null(call)) stop("Need an object with a call component.") 6 | 7 | # update call components based on additional arguments (...) 8 | extras <- match.call(expand.dots = FALSE)$... 9 | for(i in names(extras)) cll[[i]] <- extras[[i]] 10 | 11 | # update in local environment 12 | eval(cll, envir = envir) 13 | 14 | } 15 | 16 | .checkDeprecated <- function(x, arg.list, name){ 17 | # match argument list (arg.list, usually ...) by name against deprecated (name) 18 | # and return matching value if match is found, otherwise return original value 19 | # (x) 20 | 21 | cll <- match.call() 22 | 23 | nms <- names(arg.list) 24 | m <- sapply(nms, function(n, o){ 25 | m <- try(match.arg(n, o), silent = TRUE) 26 | return(if(inherits(m, "try-error")) NA else m) 27 | }, o = name) 28 | 29 | # if match is found, print message and assign value to new name 30 | if(any(!is.na(m))){ 31 | ans <- arg.list[[nms[1]]] 32 | msg <- paste0("The '", name, "' argument is deprecated. Please use '", as.character(cll[[2]]), "' instead.") 33 | warning(msg) 34 | }else{ 35 | ans <- x 36 | } 37 | 38 | return(ans) 39 | 40 | } 41 | 42 | .checkNamespace <- function(x){ 43 | # check required packages for supported object types 44 | 45 | # specify class-package pairs 46 | cls.pkg <- list( 47 | "lme4" = "^g?l?merMod$", 48 | "nlme" = "^n?lme$", 49 | "geepack" = "^geeglm$", 50 | "survival" = "^coxph$", 51 | "MASS" = "^polr$" 52 | ) 53 | 54 | # match class to package names 55 | req.pkg <- lapply(cls.pkg, function(p, x) grep(pattern = p, x = x, value = TRUE), x = x) 56 | req.pkg <- req.pkg[sapply(req.pkg, length) > 0] 57 | 58 | for(i in seq_along(req.pkg)){ 59 | pkg.name <- names(req.pkg)[i] 60 | pkg.cls <- paste(req.pkg[[i]], collapse = "|") 61 | if(!requireNamespace(pkg.name, quietly = TRUE)) stop("The '", pkg.name, "' package must be installed in order to use this function with objects of class '", pkg.cls, "'.") 62 | } 63 | 64 | invisible(NULL) 65 | 66 | } 67 | 68 | .formatTable <- function(x, prefix = "%.", postfix = "f", digits = 3, sci.limit = 5, width, 69 | col.names = colnames(x), row.names = rownames(x), labels = NULL, 70 | labels.sep = 3){ 71 | 72 | # format table with common format and fixed width 73 | 74 | # fotmat 75 | fmt <- paste0(prefix, digits, postfix) 76 | if(ncol(x) %% length(fmt)) stop("Format and table dimensions do not match.") 77 | fmt <- rep_len(fmt, length.out = ncol(x)) 78 | 79 | # format for large values (ignore NA/NaN) 80 | isLarge <- apply(x, 2, function(z, l){ 81 | y <- any(z >= 10^l) 82 | y[is.na(y) | is.nan(y)] <- FALSE 83 | return(y) 84 | }, l = sci.limit) 85 | fmt[isLarge] <- sub(paste0(postfix, "$"), "e", fmt[isLarge]) 86 | 87 | # make formatted matrix 88 | y <- matrix("", nrow(x), ncol(x)) 89 | for(i in seq_len(ncol(x))) y[,i] <- sprintf(fmt[i], x[,i] + 0) 90 | 91 | # find width 92 | if(missing(width)) width <- max(sapply(c(colnames(x), y), nchar)) 93 | 94 | # fill table 95 | out <- matrix("", nrow(x)+1, ncol(x)+1) 96 | out[,1] <- format(c("", row.names), justify = "left") 97 | out[1, -1] <- format(col.names, justify = "right", width = width) 98 | out[-1, -1] <- format(y, justify = "right", width = width) 99 | 100 | # add labels (if any) 101 | if(!is.null(labels)){ 102 | labels[nchar(labels) > 0] <- paste0("(", labels[nchar(labels) > 0], ")") 103 | pl <- format(labels, justify = "left") 104 | nc <- max(nchar(pl)) 105 | out[-1, 1] <- paste0(out[-1, 1], paste0(rep(" ", labels.sep), collapse = ""), pl) 106 | out[1, 1] <- paste0(out[1, 1], paste0(rep(" ", nc + labels.sep), collapse = "")) 107 | } 108 | 109 | return(out) 110 | 111 | } 112 | 113 | 114 | .extractMatrix <- function(x, ...){ 115 | # extract submatrix from array (indexed by ...) 116 | 117 | if(is.null(dim(x))) return(x) 118 | 119 | out <- `[`(x, , , ...) 120 | dim(out) <- dim(x)[1:2] 121 | dimnames(out) <- dimnames(x)[1:2] 122 | 123 | out 124 | 125 | } 126 | 127 | .adiag <- function(x, stacked = FALSE){ 128 | # extract diagonal elements of first two dimensions in three-dimensional array 129 | # containing either square (default) or stacked matrices 130 | 131 | d <- dim(x) 132 | 133 | # indices for diagonal entries (square or stacked-square) 134 | if(stacked){ 135 | i <- seq_len(d[2]) + d[1]*(seq_len(d[2])-1) 136 | i <- outer(i, (seq_len(d[1]/d[2])-1)*d[2], `+`) 137 | i <- outer(i, (seq_len(d[3])-1)*d[1]*d[2], `+`) 138 | }else{ 139 | i <- seq_len(d[1]) + d[1]*(seq_len(d[1])-1) 140 | i <- outer(i, (seq_len(d[3])-1)*d[1]^2, `+`) 141 | } 142 | 143 | x[as.vector(i)] 144 | 145 | } 146 | 147 | -------------------------------------------------------------------------------- /R/is.mitml.list.R: -------------------------------------------------------------------------------- 1 | is.mitml.list <- function(x){ 2 | # checks if the argument is a list of class "mitml.list" 3 | 4 | l <- inherits(x, "mitml.list") & is.list(x) 5 | if(!l){ 6 | return(FALSE) 7 | }else{ 8 | if(any(!sapply(x, is.data.frame))) warning("Does not appear to be a list of data frames.") 9 | return(TRUE) 10 | } 11 | 12 | } 13 | -------------------------------------------------------------------------------- /R/jomo2mitml.list.R: -------------------------------------------------------------------------------- 1 | jomo2mitml.list <- function(x){ 2 | # convert jomo imputations to mitml.list 3 | 4 | long2mitml.list(x, split = "Imputation", exclude = 0) 5 | 6 | } 7 | 8 | -------------------------------------------------------------------------------- /R/long2mitml.list.R: -------------------------------------------------------------------------------- 1 | long2mitml.list <- function(x, split, exclude = NULL){ 2 | # convert data set in "long" format to mitml.list 3 | 4 | i1 <- which(colnames(x) == split) 5 | f <- x[,i1] 6 | 7 | if(!is.null(exclude)){ 8 | i2 <- if(length(exclude) == 1) f != exclude else !f %in% exclude 9 | x <- x[i2, , drop = F] 10 | f <- f[i2] 11 | if(is.factor(f)) f <- droplevels(f) 12 | } 13 | 14 | out <- split(x[, -i1, drop = F], f = f) 15 | names(out) <- NULL 16 | 17 | class(out) <- c("mitml.list", "list") 18 | return(out) 19 | 20 | } 21 | 22 | -------------------------------------------------------------------------------- /R/mids2mitml.list.R: -------------------------------------------------------------------------------- 1 | mids2mitml.list <- function(x){ 2 | # convert mids to mitml.list 3 | 4 | if(!requireNamespace("mice", quietly = TRUE)) stop("The 'mice' package must be installed to use this function.") 5 | m <- x$m 6 | 7 | out <- list() 8 | length(out) <- m 9 | for(ii in 1:m){ 10 | out[[ii]] <- mice::complete(x, action = ii) 11 | } 12 | 13 | class(out) <- c("mitml.list", "list") 14 | return(out) 15 | 16 | } 17 | 18 | -------------------------------------------------------------------------------- /R/mitml.list2mids.R: -------------------------------------------------------------------------------- 1 | mitml.list2mids <- function(x, data, fill = FALSE, where = NULL){ 2 | # convert objects of class "mitml.list" to "mids" 3 | 4 | # check for 'mice' 5 | if(!requireNamespace("mice", quietly = TRUE)) stop("The 'mice' package must be installed to use this function.") 6 | 7 | # check variable names 8 | nms.inc <- names(data) 9 | nms.imp <- unique(do.call(c, lapply(x, names))) 10 | 11 | if(any(c(".imp", ".id") %in% nms.inc)) stop("Columns named '.imp' or '.id' are not allowed in 'data'.") 12 | if(any(c(".imp", ".id") %in% nms.imp)) stop("Columns named '.imp' or '.id' are not allowed in 'x'.") 13 | 14 | nms.new <- nms.imp[!nms.imp %in% nms.inc] 15 | if(length(nms.new) > 0L){ 16 | if(!fill) stop("Some variables in the imputed data ('x') are not present in the original data ('data') Use 'fill = TRUE' to include them.") 17 | data[, nms.new] <- NA 18 | } 19 | 20 | # prepare data 21 | z <- c(list(data), x) 22 | for(i in seq_along(z)){ 23 | z[[i]] <- cbind(.imp = i - 1, .id = seq.int(1, nrow(z[[i]])), z[[i]]) 24 | } 25 | 26 | return(mice::as.mids(long = do.call(rbind, z), where = where)) 27 | 28 | } 29 | 30 | -------------------------------------------------------------------------------- /R/mitmlComplete.R: -------------------------------------------------------------------------------- 1 | mitmlComplete <- function(x, print = "all", force.list = FALSE){ 2 | 3 | if(sum(print <= 0) > 1) stop("Only one negative or zero value is allowed in 'print'.") 4 | 5 | dat <- x$data 6 | srt <- order( attr(x$data, "sort") ) 7 | labs <- attr(x$data, "labels") 8 | method <- class(x)[2] 9 | 10 | m <- x$iter$m 11 | ind <- x$index.mat 12 | rpm <- x$replacement.mat 13 | 14 | if(is.numeric(print)){ 15 | 16 | if(length(print) == 1){ 17 | if(print > 0){ 18 | com <- .completeOne(dat, print, ind, rpm, method) 19 | out <- com[srt,] 20 | }else{ 21 | out <- .stripDataAttributes(dat[srt,]) 22 | } 23 | if(force.list) out <- list(out) 24 | }else{ 25 | out <- list() 26 | for(ii in print){ 27 | if(ii > 0){ 28 | com <- .completeOne(dat, ii, ind, rpm, method) 29 | out <- c(out, list(com[srt,])) 30 | }else{ 31 | out <- c(out, list(.stripDataAttributes(dat[srt,]))) 32 | } 33 | } 34 | } 35 | 36 | }else{ 37 | 38 | if(!print %in% c("list", "all")) stop("Invalid 'print' argument.") 39 | out <- list() 40 | for(ii in 1:m){ 41 | com <- .completeOne(dat, ii, ind, rpm, method) 42 | out <- c(out, list(com[srt,])) 43 | } 44 | 45 | } 46 | 47 | if(is.list(out) && !is.data.frame(out)) class(out) <- c("mitml.list", "list") 48 | return(out) 49 | 50 | } 51 | 52 | .completeOne <- function(x, i, ind, rpm, method){ 53 | 54 | if(method == "jomo"){ 55 | 56 | fac <- which(colnames(x) %in% names(attr(x, "labels"))) 57 | nofac <- !(ind[,2] %in% fac) 58 | if(any(nofac)) x[ ind[nofac, ,drop = F] ] <- rpm[nofac, i, drop = F] 59 | 60 | for(ff in fac){ 61 | fi <- which(ind[,2] == ff) 62 | lev <- attr(x, "labels")[[colnames(x)[ff]]] 63 | if(length(fi)>0) x[ ind[fi, ,drop = F] ] <- lev[rpm[fi, i]] 64 | } 65 | 66 | }else{ 67 | 68 | x[ind] <- rpm[,i] 69 | 70 | } 71 | .stripDataAttributes(x) 72 | 73 | } 74 | 75 | .stripDataAttributes <- function(x){ 76 | 77 | attr(x, "sort") <- NULL 78 | attr(x, "group") <- NULL 79 | attr(x, "levels") <- NULL 80 | attr(x, "labels") <- NULL 81 | 82 | x 83 | 84 | } 85 | -------------------------------------------------------------------------------- /R/multilevelR2.R: -------------------------------------------------------------------------------- 1 | multilevelR2 <- function(model, print = c("RB1", "RB2", "SB", "MVP")){ 2 | 3 | # print argument case insensitive 4 | print <- toupper(print) 5 | print <- match.arg(print, several.ok = TRUE) 6 | method <- NULL 7 | 8 | # select method 9 | if(is.list(model)){ 10 | cls <- class(model[[1]]) 11 | if(inherits(model[[1]], "merMod")) method <- "lmer" 12 | if(inherits(model[[1]], "lme")) method <- "nlme" 13 | }else{ 14 | cls <- class(model) 15 | if(inherits(model, "merMod")) method <- "lmer" 16 | if(inherits(model, "lme")) method <- "nlme" 17 | } 18 | if(is.null(method)) stop("Calculation of multilevel R-squared statistics not supported for models of class '", paste0(cls, collapse = "|"), "'.") 19 | 20 | # calculate R-squared 21 | if(is.list(model)){ 22 | 23 | out <- sapply(model, .getRsquared, print = print, method = method) 24 | if(is.null(dim(out))) out <- matrix(out, nrow = 1) 25 | out <- rowMeans(out) 26 | 27 | }else{ 28 | 29 | out <- .getRsquared(model, print, method) 30 | 31 | } 32 | out 33 | 34 | } 35 | 36 | .getRsquared <- function(model, print, method){ 37 | # R squared for single model fit (lme4) 38 | 39 | # check if refit is necessary 40 | refit <- any(c("RB1", "RB2", "SB") %in% print) 41 | 42 | if(method == "lmer"){ 43 | 44 | # model terms 45 | trm <- terms(model) 46 | if(!as.logical(attr(trm, "intercept"))) stop("Model must contain intercept.") 47 | yvr <- as.character(attr(trm, "variables")[-1])[attr(trm, "response")] 48 | cvr <- names(lme4::getME(model, "flist")) 49 | if(length(cvr)>1) stop("Calculation of R-squared only support for models with a single cluster variable.") 50 | cvr <- cvr[1] 51 | 52 | if(refit){ 53 | 54 | # fit null model 55 | fml0 <- formula(paste0(yvr, "~1+(1|", cvr, ")")) 56 | model0 <- update(model, fml0) 57 | 58 | # variance components under null 59 | vc0 <- lme4::VarCorr(model0) 60 | s0 <- attr(vc0, "sc")^2 61 | t0.0 <- vc0[[cvr]][1, 1] 62 | 63 | } 64 | 65 | # alternative model components 66 | beta <- lme4::fixef(model)[-1] 67 | X <- lme4::getME(model, "X")[, -1, drop = F] 68 | Z <- lme4::getME(model, "mmList")[[1]][, -1, drop = F] 69 | muX <- colMeans(X) 70 | muZ <- colMeans(Z) 71 | vZ <- cov(Z) 72 | 73 | # predicted and total variance 74 | vc1 <- lme4::VarCorr(model) 75 | t0.1 <- vc1[[cvr]][1, 1] 76 | t10.1 <- vc1[[cvr]][1, -1] 77 | t11.1 <- vc1[[cvr]][-1, -1, drop = F] 78 | s1 <- attr(vc1, "sc")^2 79 | 80 | } 81 | 82 | if(method == "nlme"){ 83 | 84 | # model terms 85 | trm <- terms(model) 86 | if(!as.logical(attr(trm, "intercept"))) stop("Model must contain intercept.") 87 | yvr <- as.character(attr(trm, "variables")[-1])[attr(trm, "response")] 88 | cvr <- attr(nlme::getGroups(model), "label") 89 | if(length(nlme::getGroupsFormula(model, asList = T))>1) stop("Calculation of R-squared only support for models with a single cluster variable.") 90 | 91 | if(refit){ 92 | 93 | # fit null model 94 | ffml0 <- formula(paste0(yvr, "~1")) 95 | rfml0 <- formula(paste0("~1|", cvr, "")) 96 | if(is.null(nlme::getData(model))) stop("No data sets found in 'lme' fit. See '?testModels' for an example.") 97 | model0 <- update(model, fixed = ffml0, random = rfml0, data = model$data) 98 | 99 | # variance components under null 100 | vc0 <- nlme::getVarCov(model0) 101 | s0 <- model0$sigma^2 102 | t0.0 <- vc0[1, 1] 103 | 104 | } 105 | 106 | # alternative model components 107 | beta <- nlme::fixef(model)[-1] 108 | fe <- model$terms 109 | X <- model.matrix(fe, nlme::getData(model))[, -1, drop = F] 110 | re <- attr(model$modelStruct$reStruct[[1]], "formula") 111 | Z <- model.matrix(re, nlme::getData(model))[, -1, drop = F] 112 | muX <- colMeans(X) 113 | muZ <- colMeans(Z) 114 | vZ <- cov(Z) 115 | 116 | # predicted and total variance 117 | vc1 <- nlme::getVarCov(model) 118 | t0.1 <- vc1[1, 1] 119 | t10.1 <- vc1[1, -1] 120 | t11.1 <- vc1[-1, -1, drop = F] 121 | s1 <- model$sigma^2 122 | 123 | } 124 | 125 | # calculate R2 126 | vyhat <- var( X %*% beta ) 127 | vy <- vyhat + t0.1 + 2*(muZ %*% t10.1) + muZ%*%t11.1%*%muZ + sum(diag(t11.1%*%vZ)) + s1 128 | 129 | if(refit){ 130 | rb1 <- 1 - s1/s0 131 | rb2 <- 1 - t0.1/t0.0 132 | sb <- 1 - (s1+t0.1)/(s0+t0.0) 133 | }else{ 134 | rb1 <- rb2 <- sb <- NA 135 | } 136 | mvp <- as.vector(vyhat/vy) 137 | 138 | c(RB1 = rb1, RB2 = rb2, SB = sb, MVP = mvp)[print] 139 | 140 | } 141 | -------------------------------------------------------------------------------- /R/panImpute.R: -------------------------------------------------------------------------------- 1 | panImpute <- function(data, type, formula, n.burn = 5000, n.iter = 100, m = 10, 2 | group = NULL, prior = NULL, seed = NULL, save.pred = FALSE, 3 | keep.chains = c("full", "diagonal"), silent = FALSE){ 4 | 5 | # wrapper function for the Gibbs sampler in the pan package 6 | 7 | # *** checks 8 | if(!missing(type) && !missing(formula)) stop("Only one of 'type' or 'formula' may be specified.") 9 | if(save.pred && !missing(type)){ 10 | warning("Option 'save.pred' is ignored if 'type' is specified") 11 | save.pred = FALSE 12 | } 13 | keep.chains <- match.arg(keep.chains) 14 | 15 | # convert type 16 | if(!missing(type)){ 17 | formula <- .type2formula(data, type) 18 | group <- attr(formula, "group") 19 | } 20 | 21 | # empty objects to assign to 22 | clname <- yvrs <- y <- ycat <- zcol <- xcol <- pred <- clus <- psave <- 23 | pvrs <- qvrs <- pnames <- qnames <- NULL 24 | 25 | # preserve original order 26 | if(!is.data.frame(data)) as.data.frame(data) 27 | data <- cbind(data, original.order = 1:nrow(data)) 28 | 29 | # address additional grouping 30 | grname <- group 31 | if(is.null(group)){ 32 | group <- rep(1, nrow(data)) 33 | }else{ 34 | group <- data[,group] 35 | if(length(group) != nrow(data)) stop("Argument 'group' is not correctly specified.") 36 | } 37 | group.original <- group 38 | group <- as.numeric(factor(group, levels = unique(group))) 39 | 40 | # *** 41 | # model input 42 | 43 | # populate local frame 44 | .model.byFormula(data, formula, group, group.original, method = "pan") 45 | 46 | # check model input 47 | if(any(is.na(group))) 48 | stop("Grouping variable must not contain missing data.") 49 | if(any(is.na(pred))) 50 | stop("Predictor variables must not contain missing data.") 51 | if(sum(is.na(y)) == 0) 52 | stop("Target variables do not contain any missing data.") 53 | if(any(!sapply(y, is.numeric))) 54 | stop("Target variables must be numeric. You may either convert them or use jomoImpute() instead.") 55 | if(any(duplicated(yvrs))) 56 | stop("Found duplicate target variables.") 57 | 58 | # reorder colums 59 | cc <- which(colnames(data) %in% c(clname, grname, yvrs)) 60 | data.ord <- cbind(data[c(clname, grname, yvrs)], data[-cc]) 61 | 62 | # *** 63 | # pan setup 64 | 65 | if(is.null(prior)){ 66 | prior <- list( a = ncol(y), Binv = diag(1, ncol(y)), 67 | c = ncol(y)*length(zcol), Dinv = diag(1, ncol(y)*length(zcol)) ) 68 | } 69 | 70 | if(is.null(seed)){ 71 | set.seed(as.integer(runif(1, 0, 10^6))) 72 | }else{ 73 | set.seed(as.integer(seed)) 74 | } 75 | rns <- sapply(unique(group), function(x, m) as.integer(runif(m+1, 0, 10^6)), m = m) 76 | 77 | # prepare output 78 | ind <- which(is.na(data.ord), arr.ind = TRUE, useNames = FALSE) 79 | ind <- ind[ ind[,2] %in% which(colnames(data.ord) %in% colnames(y)), ,drop = FALSE ] 80 | rpm <- matrix(NA, nrow(ind), m) 81 | 82 | # standard dimensions 83 | ng <- length(unique(group)) 84 | np <- length(xcol) 85 | nq <- length(zcol) 86 | nr <- ncol(y) 87 | 88 | # reduced dimensions 89 | dpsi <- nr*nq 90 | dsig <- nr 91 | if(keep.chains == "diagonal"){ 92 | dpsi <- dsig <- 1 93 | } 94 | 95 | bpar <- list(beta = array( NA, c(np, nr, n.burn, ng) ), 96 | psi = array( NA, c(nr*nq, dpsi, n.burn, ng) ), 97 | sigma = array( NA, c(nr, dsig, n.burn, ng) )) 98 | ipar <- list(beta = array( NA, c(np, nr, n.iter*m, ng) ), 99 | psi = array( NA, c(nr*nq, dpsi, n.iter*m, ng) ), 100 | sigma = array( NA, c(nr, dsig, n.iter*m, ng) )) 101 | 102 | # burn-in 103 | if(!silent){ 104 | cat("Running burn-in phase ...\n") 105 | flush.console() 106 | } 107 | glast <- as.list(unique(group)) 108 | for(gg in unique(group)){ 109 | 110 | gi <- group == gg 111 | gy <- y[gi,] 112 | gpred <- pred[gi,] 113 | gclus <- clus[gi] 114 | # sort 1, ..., k 115 | gclus <- match(gclus, unique(gclus)) 116 | 117 | cur <- pan::pan(gy, subj = gclus, gpred, xcol, zcol, prior, seed = rns[1, gg], iter = n.burn) 118 | glast[[gg]] <- cur$last 119 | 120 | # save parameter chains 121 | bpar[["beta"]][,,,gg] <- cur$beta 122 | if(keep.chains == "diagonal"){ 123 | bpar[["psi"]][,,,gg] <- .adiag( cur$psi ) 124 | bpar[["sigma"]][,,,gg] <-.adiag( cur$sigma ) 125 | }else{ 126 | bpar[["psi"]][,,,gg] <- cur$psi 127 | bpar[["sigma"]][,,,gg] <- cur$sigma 128 | } 129 | 130 | } 131 | 132 | # imputation 133 | for(ii in 1:m){ 134 | if(!silent){ 135 | cat("Creating imputed data set (", ii, "/", m,") ...\n") 136 | flush.console() 137 | } 138 | 139 | gy.imp <- as.list(unique(group)) 140 | for(gg in unique(group)){ 141 | 142 | gi <- group == gg 143 | gy <- y[gi,] 144 | gpred <- pred[gi,] 145 | gclus <- clus[gi] 146 | # sort 1, ..., k 147 | gclus <- match(gclus, unique(gclus)) 148 | 149 | cur <- pan::pan(gy, subj = gclus, gpred, xcol, zcol, prior, seed = rns[ii+1, gg], iter = n.iter, 150 | start = glast[[gg]]) 151 | glast[[gg]] <- cur$last 152 | 153 | # save imputations 154 | gy.imp[[gg]] <- cur$y 155 | 156 | # save parameter chains 157 | i0 <- seq.int(n.iter*(ii-1)+1, n.iter*ii) 158 | ipar[["beta"]][,,i0, gg] <- cur$beta 159 | if(keep.chains == "diagonal"){ 160 | ipar[["psi"]][,,i0, gg] <- .adiag( cur$psi ) 161 | ipar[["sigma"]][,,i0, gg] <- .adiag( cur$sigma ) 162 | }else{ 163 | ipar[["psi"]][,,i0, gg] <- cur$psi 164 | ipar[["sigma"]][,,i0, gg] <- cur$sigma 165 | } 166 | 167 | } 168 | y.imp <- do.call(rbind, gy.imp) 169 | rpm[,ii] <- y.imp[is.na(y)] 170 | 171 | } 172 | 173 | if(!silent){ 174 | cat("Done!\n") 175 | } 176 | 177 | # clean up 178 | srt <- data.ord[,ncol(data.ord)] 179 | data.ord <- data.ord[,-ncol(data.ord)] 180 | 181 | # prepare output data 182 | if( save.pred && !missing(formula) ) data.ord <- cbind(data.ord, pred[, psave, drop = F]) 183 | # ordering 184 | attr(data.ord, "sort") <- srt 185 | attr(data.ord, "group") <- group.original 186 | # model summary 187 | model <- list(clus = clname, yvrs = yvrs, pvrs = pvrs, qvrs = qvrs) 188 | attr(model, "is.ML") <- TRUE 189 | attr(model, "is.L2") <- FALSE 190 | attr(model, "full.names") <- list(pvrs = pnames, qvrs = qnames) 191 | 192 | out <- list( 193 | data = data.ord, 194 | replacement.mat = rpm, 195 | index.mat = ind, 196 | call = match.call(), 197 | model = model, 198 | random.L1 = "none", 199 | prior = prior, 200 | iter = list(burn = n.burn, iter = n.iter, m = m), 201 | keep.chains = keep.chains, 202 | par.burnin = bpar, 203 | par.imputation = ipar 204 | ) 205 | class(out) <- c("mitml", "pan") 206 | return(out) 207 | 208 | } 209 | 210 | -------------------------------------------------------------------------------- /R/print.mitml.R: -------------------------------------------------------------------------------- 1 | print.mitml <- function(x, ...){ 2 | # print method for objects of class "mitml" 3 | 4 | cl <- x$call 5 | vrs <-x$model 6 | itr <- x$iter 7 | ngr <- length(unique(attr(x$data, "group"))) 8 | isML <- attr(x$model, "is.ML") 9 | isL2 <- attr(x$model, "is.L2") 10 | 11 | cat("\nCall:\n", paste(deparse(cl)), sep = "\n") 12 | cat("\n") 13 | 14 | if(isL2) cat("Level 1:\n", collapse = "\n") 15 | if(isML) cat(formatC("Cluster variable:", width=-25), vrs$clus, sep = " ", collapse = "\n") 16 | cat(formatC("Target variables:", width=-25), vrs$yvrs, collapse = "\n") 17 | cat(formatC("Fixed effect predictors:", width=-25), vrs$pvrs, collapse = "\n") 18 | if(isML) cat(formatC("Random effect predictors:", width=-25), vrs$qvrs, collapse = "\n") 19 | 20 | if(isL2){ 21 | cat("\n") 22 | cat(formatC("Level 2:\n", width=-25), collapse = "\n") 23 | cat(formatC("Target variables:", width=-25), vrs$yvrs.L2, collapse = "\n") 24 | cat(formatC("Fixed effect predictors:", width=-25), vrs$pvrs.L2, collapse = "\n") 25 | } 26 | 27 | cat("\nPerformed", sprintf("%.0f", itr$burn), "burn-in iterations, and generated", sprintf("%.0f", itr$m), 28 | "imputed data sets,\neach", sprintf("%.0f", itr$iter), "iterations apart.", 29 | if(ngr>1){c("\nImputations were carried out seperately within", sprintf("%.0f", ngr), "groups.\n")}, "\n") 30 | 31 | invisible(NULL) 32 | 33 | } 34 | -------------------------------------------------------------------------------- /R/print.mitml.anova.R: -------------------------------------------------------------------------------- 1 | print.mitml.anova <- function(x, digits = 3, sci.limit = 5, ...){ 2 | # print method for anova method 3 | 4 | cll <- x$call 5 | test <- x$test 6 | fml <- x$formula 7 | method <- x$method 8 | data <- x$data 9 | ariv <- x$ariv 10 | order.method <- x$order.method 11 | use <- x$use 12 | reml <- x$reml 13 | m <- x$test[[1]]$m 14 | 15 | n.tests <- length(fml) 16 | 17 | # print header 18 | cat("\nCall:\n", paste(deparse(cll)), sep = "\n") 19 | cat("\nModel comparison calculated from", m, "imputed data sets.") 20 | 21 | # print method 22 | cat("\nCombination method:", method) 23 | if(method == "D2") cat(" (", use, ")", sep = "") 24 | if(method == "D4" && ariv == "robust") cat(" (robust)", sep = "") 25 | cat("\n") 26 | 27 | # print model formulas 28 | cat("\n") 29 | for(mm in seq.int(1, n.tests)) cat("Model ", mm, ": ", fml[mm], "\n", sep = "") 30 | cat("\n") 31 | 32 | # combine multiple tests in one table 33 | test.tab <- lapply(test, "[[", "test") 34 | test.tab <- do.call(rbind, test.tab) 35 | rn <- paste0(seq.int(1, n.tests - 1), " vs ", seq.int(2, n.tests), " ") 36 | rownames(test.tab) <- rn 37 | 38 | # format table 39 | test.digits <- c(digits, 0, rep(digits, ncol(test.tab)-2)) 40 | out <- .formatTable(test.tab, digits = test.digits, sci.limit = sci.limit) 41 | for(i in seq_len(nrow(out))) cat(" ", out[i,], "\n") 42 | 43 | cat("\n") 44 | 45 | # print footer 46 | if(is.null(order.method)){ 47 | cat("Models were ordered as provided by the user (by decreasing complexity).\n") 48 | }else{ 49 | cat("Models were automatically ordered via '", order.method, "' (by decreasing complexity).\n", sep = "") 50 | } 51 | 52 | if(method == "D4"){ 53 | if(data){ 54 | cat("Data for stacking were extracted from the `data` argument.\n") 55 | }else{ 56 | cat("Data for stacking were automatically extracted from the fitted models.\n") 57 | } 58 | } 59 | 60 | if(reml){ 61 | cat("Models originally fit with REML were automatically refit using ML.\n") 62 | } 63 | 64 | cat("\n") 65 | 66 | invisible() 67 | 68 | } 69 | 70 | -------------------------------------------------------------------------------- /R/print.mitml.summary.R: -------------------------------------------------------------------------------- 1 | print.mitml.summary <- function(x, ...){ 2 | # print method for objects of class "summary.mitml" 3 | 4 | cl <- x$call 5 | vrs <- x$model 6 | itr <- x$iter 7 | ngr <- x$ngr 8 | mdr <- x$missing.rates 9 | conv <- x$conv 10 | isML <- attr(x$model, "is.ML") 11 | isL2 <- attr(x$model, "is.L2") 12 | 13 | # print general information 14 | cat("\nCall:\n", paste(deparse(cl)), sep = "\n") 15 | cat("\n") 16 | 17 | if(isL2) cat("Level 1:\n", collapse = "\n") 18 | if(isML) cat(formatC("Cluster variable:", width=-25), vrs$clus, sep = " ", collapse = "\n") 19 | cat(formatC("Target variables:", width=-25), vrs$yvrs, collapse = "\n") 20 | cat(formatC("Fixed effect predictors:", width=-25), vrs$pvrs, collapse = "\n") 21 | if(isML) cat(formatC("Random effect predictors:", width=-25), vrs$qvrs, collapse = "\n") 22 | 23 | if(isL2){ 24 | cat("\n") 25 | cat(formatC("Level 2:\n", width=-25), collapse = "\n") 26 | cat(formatC("Target variables:", width=-25), vrs$yvrs.L2, collapse = "\n") 27 | cat(formatC("Fixed effect predictors:", width=-25), vrs$pvrs.L2, collapse = "\n") 28 | } 29 | 30 | cat("\nPerformed", sprintf("%.0f", itr$burn), "burn-in iterations, and generated", sprintf("%.0f", itr$m), 31 | "imputed data sets,\neach", sprintf("%.0f", itr$iter), "iterations apart.", 32 | if(ngr>1){c("\nImputations were carried out seperately within", sprintf("%.0f", ngr), "groups.")}, "\n") 33 | 34 | # print convergence diagnostics 35 | if(!is.null(conv)){ 36 | 37 | # note for reduced chains 38 | if(x$keep.chains != "full"){ 39 | cat("\nNote: Convergence criteria were calculated from a reduced set of\nparameters (setting: ", 40 | x$keep.chains, ").\n", sep = "") 41 | } 42 | 43 | for(cc in attr(conv, "stats")){ 44 | 45 | # summary for Rhat and SDprop 46 | if(cc == "Rhat" || cc == "SDprop"){ 47 | 48 | cout <- matrix(c( sapply(conv, function(z) min(z[,cc])), 49 | sapply(conv, function(z) quantile(z[,cc], .25)), 50 | sapply(conv, function(z) mean(z[,cc])), 51 | sapply(conv, function(z) median(z[,cc])), 52 | sapply(conv, function(z) quantile(z[,cc], .75)), 53 | sapply(conv, function(z) max(z[,cc])) ), ncol = 6 ) 54 | rownames(cout) <- c("Beta:", if(isL2) "Beta2:", if(isML) "Psi:", "Sigma:") 55 | colnames(cout) <- c("Min", "25%", "Mean", "Median", "75%", "Max") 56 | clab <- switch(cc, Rhat = "\nPotential scale reduction (Rhat, imputation phase):\n", 57 | SDprop = "\nGoodness of approximation (imputation phase):\n") 58 | cat(clab, "\n") 59 | print.table(round(cout, 3)) 60 | 61 | clab <- switch(cc, Rhat = "\nLargest potential scale reduction:\n", 62 | SDprop = "\nPoorest approximation:\n") 63 | cat(clab) 64 | maxval <- lapply(conv, function(a) a[which.max(a[,cc]), 1:2]) 65 | cat("Beta: [", paste(maxval$beta, collapse = ",") ,"], ", 66 | if(isL2) paste0("Beta2: [", paste(maxval$beta2, collapse = ",") ,"], "), 67 | if(isML) paste0("Psi: [", paste(maxval$psi, collapse = ",") ,"], "), 68 | "Sigma: [", paste(maxval$sigma, collapse = ",") ,"]\n", sep = "") 69 | 70 | } 71 | 72 | # summary for ACF 73 | if(cc == "ACF"){ 74 | 75 | cout <- c( sapply(conv, function(z) mean(z[,"lag-1"])), 76 | sapply(conv, function(z) mean(z[,"lag-k"])), 77 | sapply(conv, function(z) mean(z[,"lag-2k"])), 78 | sapply(conv, function(z) max(z[,"lag-1"])), 79 | sapply(conv, function(z) max(z[,"lag-k"])), 80 | sapply(conv, function(z) max(z[,"lag-2k"])) ) 81 | neg <- cout<0 82 | cout <- sprintf(cout, fmt = "%.3f") 83 | cout[neg] <- gsub("^-0", "-", cout[neg]) 84 | cout[!neg] <- gsub("^0", " ", cout[!neg]) 85 | cout <- matrix(cout, 2+isML+isL2, 6) 86 | cout <- rbind(c(" Lag1", " Lagk", "Lag2k", " Lag1", " Lagk", "Lag2k"), cout) 87 | rownames(cout) <- c("", "Beta:", if(isL2) "Beta2:", if(isML) "Psi:", "Sigma:") 88 | colnames(cout) <- c(" Mean", "", "", " Max", "", "") 89 | cat("\nAutocorrelation (ACF, imputation phase):\n\n") 90 | print.table(cout) 91 | 92 | cat("\nLargest autocorrelation at lag k:\n") 93 | maxval <- lapply(conv, function(a) a[which.max(abs(a[,"lag-k"])), 1:2]) 94 | cat("Beta: [", paste(maxval$beta, collapse = ",") ,"], ", 95 | if(isL2) paste0("Beta2: [", paste(maxval$beta2, collapse = ",") ,"], "), 96 | if(isML) paste0("Psi: [", paste(maxval$psi, collapse = ",") ,"], "), 97 | "Sigma: [", paste(maxval$sigma, collapse = ",") ,"]\n", sep = "") 98 | 99 | } 100 | } 101 | } 102 | 103 | # missing data rates 104 | mdrout <- t(as.matrix(mdr)) 105 | rownames(mdrout) <- "MD%" 106 | cat("\nMissing data per variable:\n") 107 | print.table(mdrout) 108 | 109 | cat("\n") 110 | 111 | invisible(NULL) 112 | 113 | } 114 | 115 | -------------------------------------------------------------------------------- /R/print.mitml.testConstraints.R: -------------------------------------------------------------------------------- 1 | print.mitml.testConstraints <- function(x, digits = 3, sci.limit = 5, ...){ 2 | # print method for MI estimates 3 | 4 | cll <- x$call 5 | test <- x$test 6 | constraints <- x$constraints 7 | method <- x$method 8 | m <- x$m 9 | adj.df <- x$adj.df 10 | df.com <- x$df.com 11 | 12 | # print header 13 | cat("\nCall:\n", paste(deparse(cll)), sep = "\n") 14 | cat("\nHypothesis test calculated from", m, "imputed data sets. The following\nconstraints were specified:\n\n") 15 | 16 | # print constrained estimates 17 | est <- cbind(x$Qbar, sqrt(diag(x$T))) 18 | colnames(est) <- c("Estimate", "Std. Error") 19 | rownames(est) <- paste0(constraints, ":") 20 | 21 | out <- .formatTable(est, digits = digits, sci.limit = sci.limit) 22 | for(i in seq_len(nrow(out))) cat(" ", out[i,], "\n") 23 | 24 | # print method 25 | cat("\nCombination method:", method, "\n\n") 26 | 27 | # print test results 28 | test.digits <- c(digits, 0, rep(digits, ncol(test)-2)) 29 | out <- .formatTable(test, digits = test.digits, sci.limit = sci.limit) 30 | for(i in seq_len(nrow(out))) cat(" ", out[i,], "\n") 31 | 32 | # print footer 33 | if(method == "D1"){ 34 | cat("\n") 35 | if(adj.df){ 36 | cat(c("Hypothesis test adjusted for small samples with", 37 | paste0("df=[", paste(df.com, collapse = ","), "]\ncomplete-data degrees of freedom."))) 38 | }else{ 39 | cat("Unadjusted hypothesis test as appropriate in larger samples.") 40 | } 41 | cat("\n") 42 | } 43 | 44 | cat("\n") 45 | 46 | invisible() 47 | 48 | } 49 | 50 | summary.mitml.testConstraints <- function(object, ...){ 51 | # summary method for objects of class mitml.testConstraints 52 | 53 | print.mitml.testConstraints(object, ...) 54 | 55 | } 56 | 57 | -------------------------------------------------------------------------------- /R/print.mitml.testEstimates.R: -------------------------------------------------------------------------------- 1 | print.mitml.testEstimates <- function(x, digits = 3, sci.limit = 5, ...){ 2 | # print method for MI estimates 3 | 4 | cll <- x$call 5 | est <- x$estimates 6 | ep <- x$extra.pars 7 | m <- x$m 8 | adj.df <- x$adj.df 9 | df.com <- x$df.com 10 | 11 | # print header 12 | cat("\nCall:\n", paste(deparse(cll)), sep = "\n") 13 | cat("\nFinal parameter estimates and inferences obtained from", m, "imputed data sets.\n") 14 | cat("\n") 15 | 16 | # print results 17 | if(!is.null(est)){ 18 | 19 | # format numeric results 20 | pl <- attr(est, "par.labels") 21 | out <- .formatTable(est, digits = digits, sci.limit = sci.limit, labels = pl) 22 | for(i in seq_len(nrow(out))) cat(out[i,], "\n") 23 | 24 | } 25 | 26 | # print other results 27 | if(!is.null(ep)){ 28 | 29 | if(!is.null(est)) cat("\n") 30 | 31 | # format numeric results 32 | pl <- attr(ep, "par.labels") 33 | out <- .formatTable(ep, digits = digits, sci.limit = sci.limit, labels = pl) 34 | for(i in seq_len(nrow(out))) cat(out[i,], "\n") 35 | 36 | } 37 | 38 | cat("\n") 39 | 40 | # print footer 41 | if(adj.df){ 42 | cat(c("Hypothesis test adjusted for small samples with", 43 | paste0("df=[", paste(df.com, collapse = ","), "]\ncomplete-data degrees of freedom."))) 44 | }else{ 45 | cat("Unadjusted hypothesis test as appropriate in larger samples.") 46 | } 47 | 48 | cat("\n\n") 49 | 50 | invisible() 51 | 52 | } 53 | 54 | summary.mitml.testEstimates <- function(object, ...){ 55 | # summary method for objects of class mitml.testEstimates 56 | 57 | print.mitml.testEstimates(object, ...) 58 | 59 | } 60 | -------------------------------------------------------------------------------- /R/print.mitml.testModels.R: -------------------------------------------------------------------------------- 1 | print.mitml.testModels <- function(x, digits = 3, sci.limit = 5, ...){ 2 | # print method for model comparisons 3 | 4 | cll <- x$call 5 | test <- x$test 6 | method <- x$method 7 | use <- x$use 8 | reml <- x$reml 9 | refit <- x$refit 10 | m <- x$m 11 | data <- x$data 12 | ariv <- x$ariv 13 | adj.df <- x$adj.df 14 | df.com <- x$df.com 15 | 16 | # print header 17 | cat("\nCall:\n", paste(deparse(cll)), sep = "\n") 18 | cat("\nModel comparison calculated from", m, "imputed data sets.") 19 | 20 | # print method 21 | cat("\nCombination method:", method) 22 | if(method == "D2") cat(" (", use, ")", sep = "") 23 | if(method == "D4" && ariv == "robust") cat(" (robust)", sep = "") 24 | cat("\n\n") 25 | 26 | # print test results 27 | test.digits <- c(digits, 0, rep(digits, ncol(test)-2)) 28 | out <- .formatTable(test, digits = test.digits, sci.limit = sci.limit) 29 | for(i in seq_len(nrow(out))) cat(" ", out[i,], "\n") 30 | 31 | cat("\n") 32 | 33 | # print footer (if any) 34 | footer <- FALSE 35 | 36 | if(method == "D1"){ 37 | footer <- TRUE 38 | if(adj.df){ 39 | cat("Hypothesis test adjusted for small samples with ", 40 | paste0("df=[", paste(df.com, collapse = ","), "]\ncomplete-data degrees of freedom."), 41 | "\n", sep = "") 42 | }else{ 43 | cat("Unadjusted hypothesis test as appropriate in larger samples.\n") 44 | } 45 | } 46 | 47 | if(method == "D4"){ 48 | footer <- TRUE 49 | if(data){ 50 | cat("Data for stacking were extracted from the `data` argument.\n") 51 | }else{ 52 | cat("Data for stacking were automatically extracted from the fitted models.\n") 53 | } 54 | } 55 | 56 | if(reml){ 57 | footer <- TRUE 58 | if(refit){ 59 | cat("Models originally fitted with REML were refitted using ML.\n") 60 | }else{ 61 | cat("Models fitted with REML were used as is.\n") 62 | } 63 | } 64 | 65 | if(footer) cat("\n") 66 | 67 | invisible() 68 | 69 | } 70 | 71 | summary.mitml.testModels <- function(object, ...){ 72 | # summary method for objects of class mitml.testModels 73 | 74 | print.mitml.testModels(object, ...) 75 | 76 | } 77 | -------------------------------------------------------------------------------- /R/rbind.mitml.list.R: -------------------------------------------------------------------------------- 1 | rbind.mitml.list <- function(...){ 2 | # merges two objects of class "mitml.list" by appending rows of list entries 3 | 4 | Map(rbind, ...) 5 | 6 | } 7 | -------------------------------------------------------------------------------- /R/read.mitml.R: -------------------------------------------------------------------------------- 1 | read.mitml <- function(filename){ 2 | # read mitml objects from file 3 | 4 | env <- new.env(parent = parent.frame()) 5 | load(filename, env) 6 | obj <- ls(env) 7 | eval(parse(text = obj), env) 8 | 9 | } 10 | 11 | -------------------------------------------------------------------------------- /R/sort.mitml.list.R: -------------------------------------------------------------------------------- 1 | sort.mitml.list <- function(x, decreasing = FALSE, by, ...){ 2 | # sort list of multiply imputed data sets 3 | 4 | expr <- substitute(by) 5 | args0 <- list(decreasing = decreasing, ...) 6 | 7 | res <- lapply(x, function(i){ 8 | args <- eval(expr, i, parent.frame()) 9 | if(!is.list(args)) args <- list(args) 10 | ind <- do.call("order", c(args, args0)) 11 | i[ind,] 12 | }) 13 | 14 | as.mitml.list(res) 15 | 16 | } 17 | -------------------------------------------------------------------------------- /R/subset.mitml.list.R: -------------------------------------------------------------------------------- 1 | subset.mitml.list <- function(x, subset, select, ...){ 2 | # subset list of multiply imputed data sets 3 | # NOTE: code adapted from subset.data.frame (by Peter Dalgaard and Brian Ripley) 4 | 5 | rind <- if (missing(subset)) { 6 | lapply(x, function(i) rep(TRUE, nrow(i))) 7 | } else { 8 | ss <- substitute(subset) 9 | rind <- lapply(x, function(i) eval(ss, i, parent.frame())) 10 | if (!is.logical(unlist(rind))) stop("'subset' must be logical") 11 | lapply(rind, function(i) i & !is.na(i)) 12 | } 13 | 14 | cind <- if (missing(select)) { 15 | lapply(x, function(i) TRUE) 16 | } else { 17 | nl <- lapply(x, function(i){ 18 | l <- as.list(seq_along(i)) 19 | names(l) <- names(i) 20 | l 21 | }) 22 | se <- substitute(select) 23 | lapply(nl, function(i) eval(se, i, parent.frame())) 24 | } 25 | 26 | res <- lapply(seq_along(x), function(i) x[[i]][rind[[i]], cind[[i]], drop = FALSE]) 27 | as.mitml.list(res) 28 | 29 | } 30 | 31 | -------------------------------------------------------------------------------- /R/summary.mitml.R: -------------------------------------------------------------------------------- 1 | summary.mitml <- function(object, n.Rhat = 3, goodness.of.appr = FALSE, 2 | autocorrelation = FALSE, ...){ 3 | 4 | # summary method for objects of class "mitml" 5 | 6 | inc <- object$data 7 | ngr <- length(unique(attr(object$data, "group"))) 8 | prm <- object$par.imputation 9 | iter <- dim(prm[[1]])[3] 10 | k <- object$iter$iter 11 | isML <- attr(object$model, "is.ML") 12 | isL2 <- attr(object$model, "is.L2") 13 | 14 | # parameter chains (for backwards compatibility) 15 | if(is.null(object$keep.chains)) object$keep.chains <- "full" 16 | 17 | # percent missing 18 | mdr <- sapply(inc, FUN = function(x){mean(is.na(x))}) 19 | mdr[] <- sprintf(mdr*100, fmt = "%.1f") 20 | mdr <- gsub("^0.0$", "0", mdr) 21 | 22 | # convergence for imputation phase 23 | conv <- NULL 24 | Rhat <- ifelse(is.null(n.Rhat), FALSE, n.Rhat >= 2) 25 | 26 | SDprop <- goodness.of.appr 27 | ACF <- autocorrelation 28 | if(Rhat|SDprop|ACF){ 29 | 30 | conv <- c(list(beta = NULL), if(isL2) list(beta2 = NULL), if(isML) list(psi = NULL), 31 | list(sigma = NULL)) 32 | for(pp in names(conv)){ 33 | 34 | ni <- dim(prm[[pp]])[1] 35 | nj <- dim(prm[[pp]])[2] 36 | nl <- dim(prm[[pp]])[4] 37 | cmat <- matrix(NA_real_, ni*nj*nl, 3+Rhat+SDprop+3*ACF) 38 | cmat[,1] <- rep(1:ni, nj*nl) 39 | cmat[,2] <- rep(1:nj, each = ni, times = nl) 40 | cmat[,3] <- rep(1:nl, each = ni*nj) 41 | colnames(cmat) <- c("i1", "i2", "grp", if(Rhat) "Rhat", if(SDprop) "SDprop", 42 | if(ACF) c("lag-1", "lag-k", "lag-2k")) 43 | 44 | for(ll in 1:nl){ # by group 45 | 46 | for(jj in 1:nj){ 47 | for(ii in 1:ni){ 48 | 49 | # check for redundant entries 50 | if(pp == "psi"){ 51 | if(jj > ii) next 52 | } 53 | if(pp == "sigma"){ 54 | if(jj > ((ii-1)%%nj)+1) next 55 | } 56 | ind <- ( cmat[,1] == ii & cmat[,2] == jj & cmat[,3] == ll ) 57 | chn <- matrix(prm[[pp]][ii,jj,,ll], 1, iter) 58 | # potential scale reduction (Rhat) 59 | if(Rhat) cmat[ind, "Rhat"] <- .GelmanRubin(chn, n.Rhat) 60 | # goodness of approximation 61 | if(SDprop) cmat[ind, "SDprop"] <- .SDprop(chn) 62 | # autocorrelation 63 | if(ACF){ 64 | cmat[ind, "lag-1"] <- .reducedACF(chn, lag = 1, smooth = 0) 65 | cmat[ind, "lag-k"] <- .reducedACF(chn, lag = k, smooth = 2, sd=.5) 66 | cmat[ind, "lag-2k"] <- .reducedACF(chn, lag = 2*k, smooth = 2, sd=.5) 67 | } 68 | } 69 | } 70 | } 71 | conv[[pp]] <- cmat[!apply(cmat, 1, function(x) any(is.na(x))),,drop = F] 72 | } 73 | 74 | attr(conv, "stats") <- c("Rhat", "SDprop", "ACF")[c(Rhat, SDprop, ACF)] 75 | } 76 | 77 | smr <- list( 78 | call = object$call, 79 | model = object$model, 80 | prior = object$prior, 81 | iter = object$iter, 82 | keep.chains = object$keep.chains, 83 | ngr = ngr, 84 | missing.rates = mdr, 85 | conv = conv 86 | ) 87 | 88 | class(smr) <- "mitml.summary" 89 | return(smr) 90 | 91 | } 92 | 93 | .reducedACF <- function(x, lag, smooth = 0, sd=.5){ 94 | 95 | # check NA 96 | if(all(is.na(x))) return(NA) 97 | 98 | n <- length(x) 99 | lag0 <- lag 100 | lag <- lag + (-smooth:smooth) 101 | 102 | ac <- numeric(length(lag)) 103 | y <- x - mean(x) 104 | ss.y <- sum(y^2) 105 | 106 | for(li in 1:length(lag)){ 107 | ll <- lag[li] 108 | # leave at 0 for constant value 109 | ac[li] <- if(ss.y>0) sum( y[1:(n-ll)] * y[1:(n-ll)+ll] ) / ss.y else 0 110 | } 111 | 112 | if(smooth>0){ 113 | # weights based on normal density 114 | w <- dnorm(-smooth:smooth, 0, sd) 115 | ac <- sum( ac * (w/sum(w)) ) 116 | } 117 | 118 | ac 119 | 120 | } 121 | 122 | -------------------------------------------------------------------------------- /R/testConstraints.R: -------------------------------------------------------------------------------- 1 | testConstraints <- function(model, qhat, uhat, constraints, method = c("D1", "D2"), ariv = c("default", "positive"), df.com = NULL){ 2 | # test constraints with multiply imputed data 3 | 4 | # *** 5 | # check input 6 | # 7 | 8 | if(missing(model) == (missing(qhat) || missing(uhat))){ 9 | stop("Either 'model' or both 'qhat' and 'uhat' must be supplied.") 10 | } 11 | 12 | # match methods 13 | method <- match.arg(method) 14 | ariv <- match.arg(ariv) 15 | 16 | # warnings for ignored arguments 17 | if(!is.null(df.com) && method == "D2") warning("Complete-data degrees of freedom are not available for use with 'D2', and thus were ignored.") 18 | if(ariv == "positive" && method == "D1") warning("The 'positive' option is not available with method 'D1' and was ignored.") 19 | 20 | # clean names in constraints 21 | constraints <- gsub("\\(Intercept\\)", "Intercept", constraints) 22 | k <- length(constraints) 23 | 24 | # *** 25 | # process matrix, array or list arguments 26 | # 27 | 28 | if(!missing(qhat)){ 29 | 30 | # check input 31 | if(missing(uhat)) stop("Either 'model' or both 'qhat' and 'uhat' must be supplied.") 32 | if(!is.matrix(qhat) && is.array(qhat)) stop("The 'qhat' argument must be either a matrix or a list.") 33 | if(is.matrix(uhat)) stop("The 'uhat' argument must be either an array or a list.") 34 | 35 | # convert point estimates 36 | if(is.matrix(qhat)){ 37 | qhat <- lapply(seq_len(ncol(qhat)), function(i, Q) Q[,i], Q = qhat) 38 | } 39 | 40 | # convert variance estimates 41 | if(is.array(uhat)){ 42 | uhat <- lapply(seq_len(dim(uhat)[3]), function(i, U) as.matrix(U[,,i]), U = uhat) 43 | } 44 | 45 | # ensure proper format 46 | m <- length(qhat) 47 | p <- length(qhat[[1]]) 48 | if(m != length(uhat) || !is.matrix(uhat[[1]]) || p != ncol(uhat[[1]]) || p != nrow(uhat[[1]])) stop("Dimensions of 'qhat' and 'uhat' do not match.") 49 | 50 | Qhat <- matrix(unlist(qhat), ncol = m) 51 | Uhat <- array(unlist(uhat), dim = c(p, p, m)) 52 | if(any(!is.finite(Uhat))) stop("Missing values in 'uhat' are not allowed.") 53 | 54 | nms <- names(qhat[[1]]) 55 | 56 | } 57 | 58 | # *** 59 | # process fitted models 60 | # 61 | 62 | if(!missing(model)){ 63 | 64 | if(!is.list(model)) stop("The 'model' argument must be a list of fitted statistical models.") 65 | 66 | # get class (check required packages) 67 | cls <- class(model[[1]]) 68 | .checkNamespace(cls) 69 | 70 | # extract parameter estimates 71 | est <- .extractParameters(model) 72 | 73 | Qhat <- est$Qhat 74 | Uhat <- est$Uhat 75 | nms <- est$nms 76 | 77 | m <- length(model) 78 | p <- nrow(Qhat) 79 | 80 | } 81 | 82 | # *** 83 | # delta method 84 | # 85 | 86 | # prepare parameter names 87 | if(is.null(nms)) stop("Could not determine parameter names.") 88 | nms <- gsub("\\(Intercept\\)", "Intercept", nms) 89 | rownames(Qhat) <- nms 90 | dimnames(Uhat) <- list(nms, nms, NULL) 91 | 92 | newQhat <- array(NA, dim = c(k, m)) 93 | newUhat <- array(NA, dim = c(k, k, m)) 94 | 95 | for(ii in 1:m){ 96 | 97 | theta <- Qhat[,ii] 98 | Sigma <- as.matrix(Uhat[,,ii]) 99 | 100 | g <- parse(text = constraints) 101 | env.g <- new.env() 102 | for(pp in 1:p) assign(names(theta)[pp], theta[pp], pos = env.g) 103 | 104 | # new parameter estimates 105 | newtheta <- numeric(k) 106 | for(kk in seq_len(k)) newtheta[kk] <- eval(g[kk], envir = env.g) 107 | 108 | # derivative, new covariance matrix 109 | gdash.theta <- matrix(NA, k, p) 110 | for(kk in seq_len(k)){ 111 | tmp <- numericDeriv(g[[kk]], names(theta), env.g) 112 | gdash.theta[kk,] <- attr(tmp, "gradient") 113 | } 114 | newSigma <- gdash.theta %*% Sigma %*% t(gdash.theta) 115 | 116 | newQhat[,ii] <- newtheta 117 | newUhat[,,ii] <- newSigma 118 | 119 | } 120 | 121 | # *** 122 | # pool results 123 | # 124 | 125 | # calculate pooled estimates and covariance matrix (for output) 126 | Qbar <- apply(newQhat, 1, mean) 127 | Ubar <- apply(newUhat, c(1, 2), mean) 128 | 129 | B <- cov(t(newQhat)) 130 | r <- (1+m^(-1)) * sum(diag(B%*%solve(Ubar))) / k 131 | 132 | Ttilde <- (1+r) * Ubar 133 | 134 | # D1 (Li et al., 1991) 135 | if(method == "D1"){ 136 | 137 | D <- .D1(Qhat = newQhat, Uhat = newUhat, df.com = df.com) 138 | 139 | r <- D$r 140 | val <- D$F 141 | v <- D$v 142 | 143 | } 144 | 145 | # D2 (Li, Meng et al., 1991) 146 | if(method == "D2"){ 147 | 148 | dW <- numeric(m) 149 | for(ii in seq_len(m)) dW[ii] <- t(newQhat[,ii]) %*% solve(newUhat[,,ii]) %*% newQhat[,ii] 150 | 151 | D <- .D2(d = dW, k = k) 152 | 153 | r <- D$r 154 | if(ariv == "positive") r <- max(0, r) 155 | val <- D$F 156 | v <- D$v 157 | 158 | } 159 | 160 | # create output 161 | pval <- pf(val, k, v, lower.tail = FALSE) 162 | out <- matrix(c(val, k, v, pval, r), ncol = 5) 163 | colnames(out) <- c("F.value", "df1", "df2", "P(>F)", "RIV") # new label for p-value, SiG 2017-02-09 164 | 165 | out <- list( 166 | call = match.call(), 167 | constraints = constraints, 168 | test = out, 169 | Qbar = Qbar, 170 | T = Ttilde, 171 | m = m, 172 | adj.df = !is.null(df.com), 173 | df.com = df.com, 174 | method = method 175 | ) 176 | 177 | class(out) <- "mitml.testConstraints" 178 | return(out) 179 | 180 | } 181 | 182 | -------------------------------------------------------------------------------- /R/testEstimates.R: -------------------------------------------------------------------------------- 1 | testEstimates <- function(model, qhat, uhat = NULL, extra.pars = FALSE, df.com = NULL, ...){ 2 | # combine scalar estimates from the analysis of multiply imputed data 3 | 4 | # *** 5 | # check input 6 | # 7 | 8 | # handle deprecated arguments 9 | dots <- list(...) 10 | extra.pars <- .checkDeprecated(extra.pars, arg.list = dots, name = "var.comp") 11 | 12 | # check model specification 13 | if(missing(model) == missing(qhat)){ 14 | stop("Either 'model' or 'qhat' must be supplied.") 15 | } 16 | 17 | # check extra parameters 18 | if(!extra.pars) pooled.ep.est <- NULL 19 | 20 | # *** 21 | # process matrix, array or list input 22 | # 23 | 24 | if(!missing(qhat)){ 25 | 26 | # check arguments 27 | if(extra.pars) warning("The 'extra.pars' argument is ignored when 'qhat' is used.") 28 | pooled.ep.est <- NULL 29 | cls <- NULL 30 | 31 | # check qhat 32 | if(!is.matrix(qhat) && is.array(qhat)) stop("The 'qhat' argument must be either a matrix or a list.") 33 | if(is.matrix(qhat)){ 34 | qhat <- lapply(seq_len(ncol(qhat)), function(i, Q) Q[,i], Q = qhat) 35 | } 36 | 37 | # check uhat 38 | if(!is.null(uhat)){ 39 | 40 | if(is.matrix(uhat)){ 41 | uhat <- lapply(seq_len(ncol(uhat)), function(i, U) U[,i], U = uhat) 42 | } else 43 | if(is.array(uhat)){ 44 | uhat <- lapply(seq_len(dim(uhat)[3]), function(i, U) as.matrix(U[,,i]), U = uhat) 45 | } 46 | 47 | # check uhat entries 48 | if(is.list(uhat) && is.null(dim(uhat[[1]]))){ 49 | uhat <- lapply(uhat, diag) 50 | uhat.diag <- TRUE 51 | }else{ 52 | uhat.diag <- FALSE 53 | } 54 | 55 | } 56 | 57 | # convert to standard format 58 | m <- length(qhat) 59 | Qhat <- matrix(unlist(qhat), ncol = m) 60 | p <- nrow(Qhat) 61 | if(is.null(uhat)){ 62 | Uhat <- NULL 63 | }else{ 64 | Uhat <- array(unlist(uhat), dim = c(p, p, m)) 65 | if(any(!is.finite(Uhat))) stop("Missing values in 'uhat' are not allowed.") 66 | } 67 | 68 | # get parameter names 69 | nms <- names(qhat[[1]]) 70 | if(is.null(nms)) nms <- paste0("Parameter.", 1:nrow(Qhat)) 71 | 72 | # pool results 73 | pooled.est <- .pool.estimates(Qhat = Qhat, Uhat = Uhat, m = m, diagonal = uhat.diag, df.com = df.com, nms = nms) 74 | 75 | } 76 | 77 | # *** 78 | # process model input 79 | # 80 | 81 | if(!missing(model)){ 82 | 83 | if(!is.list(model)) stop("The 'model' argument must be a list of fitted statistical models.") 84 | m <- length(model) 85 | 86 | # get class (check required packages) 87 | cls <- class(model[[1]]) 88 | .checkNamespace(cls) 89 | 90 | # extract parameter estimates 91 | est <- .extractParameters(model, include.extra.pars = TRUE) 92 | 93 | Qhat <- est$Qhat 94 | Uhat <- est$Uhat 95 | nms <- est$nms 96 | 97 | # pool estimates 98 | pooled.est <- .pool.estimates(Qhat = Qhat, Uhat = Uhat, m = m, df.com = df.com, nms = nms) 99 | 100 | # handle extra parameters 101 | if(extra.pars){ 102 | 103 | # extract parameter estimates 104 | ep.est <- .extractMiscParameters(model) 105 | ep.Qhat <- ep.est$Qhat 106 | ep.nms <- ep.est$nms 107 | 108 | # pool estimates 109 | if(is.null(ep.Qhat)){ 110 | pooled.ep.est <- NULL 111 | warning("Computation of variance components not supported for objects of class '", paste(cls, collapse = "|"), "' (see ?with.mitml.list for manual calculation).") 112 | }else if(length(ep.Qhat) == 0){ 113 | pooled.ep.est <- NULL 114 | }else{ 115 | pooled.ep.est <- .pool.estimates(Qhat = ep.Qhat, Uhat = NULL, nms = ep.nms) 116 | } 117 | 118 | } 119 | 120 | } 121 | 122 | # *** 123 | # pool results 124 | # 125 | 126 | # pool estimates of extra parameters 127 | if(extra.pars && !missing(model)){ 128 | 129 | } 130 | 131 | out <- list( 132 | call = match.call(), 133 | estimates = pooled.est, 134 | extra.pars = pooled.ep.est, 135 | m = m, 136 | adj.df = !is.null(df.com), 137 | df.com = df.com, 138 | cls.method = cls 139 | ) 140 | 141 | class(out) <- "mitml.testEstimates" 142 | return(out) 143 | 144 | } 145 | 146 | -------------------------------------------------------------------------------- /R/testModels.R: -------------------------------------------------------------------------------- 1 | testModels <- function(model, null.model, method = c("D1", "D2", "D3", "D4"), use = c("wald", "likelihood"), ariv = c("default", "positive", "robust"), df.com = NULL, data = NULL){ 2 | # model comparison and hypothesis tests for k-dimensional estimands 3 | 4 | # *** 5 | # check input 6 | # 7 | 8 | # check model specification 9 | m <- length(model) 10 | if(!(is.list(model) && is.list(null.model))) stop("The 'model' and 'null.model' arguments must be lists of fitted statistical models.") 11 | if(length(null.model) != m) stop("The 'model' and 'null.model' arguments must be lists with the same length.") 12 | 13 | # match methods 14 | method <- match.arg(method) 15 | use <- match.arg(use) 16 | ariv <- match.arg(ariv) 17 | 18 | # check for incompatible arguments 19 | if(!is.null(df.com) && method != "D1") warning("Complete-data degrees of freedom are not available for use with '", method, "' and were ignored.") 20 | if(use == "likelihood" && method != "D2") warning("The 'likelihood' option is not available with method '", method ,"' and was ignored.") 21 | if(!is.null(data) && method != "D4") warning("The 'data' argument is not used with method '", method ,"' and was ignored.") 22 | if(ariv == "positive" && method == "D1") warning("The 'positive' option is not available with method 'D1' and was ignored.") 23 | if(ariv == "robust" && method != "D4") warning("The 'robust' option is not available with method '", method ,"' and was ignored.") 24 | 25 | # check model classes 26 | cls <- class(model[[1]]) 27 | cls.null <- class(null.model[[1]]) 28 | 29 | if(!inherits(null.model[[1]], cls)) warning("The 'model' and 'null.model' arguments appear to include objects of different classes. Results may not be trustworthy.") 30 | 31 | .checkNamespace(union(cls, cls.null)) 32 | 33 | # check for REML and refit (if needed) 34 | reml.model <- sapply(model, .checkREML) 35 | reml.null.model <- sapply(null.model, .checkREML) 36 | reml <- any(reml.model, reml.null.model) 37 | 38 | need.refit <- FALSE 39 | 40 | if(reml){ 41 | need.refit <- (method == "D2" && use == "likelihood") || method == "D3" || method == "D4" 42 | if(need.refit){ 43 | model[reml.model] <- lapply(model[reml.model], .updateML) 44 | null.model[reml.null.model] <- lapply(null.model[reml.null.model], .updateML) 45 | } 46 | } 47 | 48 | # *** 49 | # D1 50 | # 51 | 52 | if(method == "D1"){ 53 | 54 | # FIXME: better way to handle this? 55 | if(inherits(model[[1]], "lavaan")) stop("The 'D1' method is currently not supported for objects of class 'lavaan'. Please see '?testModels' for a list of supported model types.") 56 | 57 | est <- .extractParameters(model, diagonal = FALSE) 58 | est.null <- .extractParameters(null.model, diagonal = FALSE) 59 | 60 | par.diff <- est$nms[!(est$nms %in% est.null$nms)] 61 | par.ind <- match(par.diff, est$nms) 62 | if(length(par.diff) == 0L) stop("The 'model' and 'null.model' appear not to be nested or include the same set of parameters.") 63 | 64 | k <- length(par.diff) 65 | Qhat <- est$Qhat[par.ind,, drop = FALSE] 66 | Uhat <- est$Uhat[par.ind, par.ind,, drop = FALSE] 67 | 68 | # D1 (Li et al., 1991) 69 | D <- .D1(Qhat = Qhat, Uhat = Uhat, df.com = df.com) 70 | 71 | r <- D$r 72 | val <- D$F 73 | v <- D$v 74 | 75 | } 76 | 77 | # *** 78 | # D2 79 | # 80 | 81 | if(method == "D2"){ 82 | 83 | if(use == "wald"){ 84 | 85 | # FIXME: better way to handle this? 86 | if(inherits(model[[1]], "lavaan")) stop("The 'D2' method currently only supports likelihood-based comparisons for objects of class 'lavaan'. Please see '?testModels' for a list of supported model types.") 87 | 88 | # extract parameter estimates 89 | est <- .extractParameters(model, diagonal = FALSE) 90 | est.null <- .extractParameters(null.model, diagonal = FALSE) 91 | 92 | par.diff <- est$nms[!(est$nms %in% est.null$nms)] 93 | par.ind <- match(par.diff, est$nms) 94 | if(length(par.diff) == 0L) stop("The 'model' and 'null.model' appear not to be nested or include the same set of parameters.") 95 | 96 | # Wald tests 97 | k <- length(par.diff) 98 | Qhat <- est$Qhat[par.ind,, drop = FALSE] 99 | Uhat <- est$Uhat[par.ind, par.ind,, drop = FALSE] 100 | 101 | dW <- sapply(seq_len(m), function(z) t(Qhat[,z]) %*% solve(Uhat[,,z]) %*% Qhat[,z]) 102 | 103 | } 104 | 105 | if(use == "likelihood"){ 106 | 107 | # extract logLik 108 | ll <- .evaluateLogLik(model) 109 | ll.null <- .evaluateLogLik(null.model) 110 | ll.diff <- ll.null$LL - ll$LL 111 | 112 | if(is.null(ll$df) || is.null(ll.null$df)) stop("Degrees of freedom for the model comparison could not be detected.") 113 | k <- ll$df - ll.null$df 114 | 115 | # account for numerical imprecision 116 | isEqual <- mapply(function(x, y) isTRUE(all.equal(x, y)), x = ll$LL, y = ll.null$LL) 117 | ll.diff[isEqual] <- 0L 118 | 119 | # LR tests 120 | dW <- -2 * (ll.diff) 121 | 122 | } 123 | 124 | # D2 (Li, Meng et al., 1991) 125 | D <- .D2(d = dW, k = k) 126 | 127 | r <- D$r 128 | if(ariv == "positive") r <- max(0, r) 129 | val <- D$F 130 | v <- D$v 131 | 132 | } 133 | 134 | # *** 135 | # D3 136 | # 137 | 138 | if(method == "D3"){ 139 | 140 | # evaluate log-likelihood at estimated and pooled values of model parameters 141 | ll <- .evaluateUserLogLik(model) 142 | ll.null <- .evaluateUserLogLik(null.model) 143 | 144 | k <- ll$df - ll.null$df 145 | 146 | # D3 (Meng & Rubin, 1992) 147 | dL.bar <- mean(-2 * (ll.null$LL - ll$LL)) 148 | dL.tilde <- mean(-2 * (ll.null$LL.pooled - ll$LL.pooled)) 149 | 150 | r <- (m+1) * (k*(m-1))^(-1) * (dL.bar - dL.tilde) 151 | if(ariv == "positive") r <- max(0, r) 152 | val <- dL.tilde / (k*(1+r)) 153 | 154 | t <- k*(m-1) 155 | if( t > 4 ){ 156 | v <- 4 + (t-4) * (1 + (1-2*t^(-1)) * r^(-1))^2 157 | }else{ 158 | v <- t * (1+k^(-1)) * (1+r^(-1))^2 / 2 159 | } 160 | 161 | use <- "likelihood" 162 | 163 | } 164 | 165 | # *** 166 | # D4 167 | # 168 | 169 | if(method == "D4"){ 170 | 171 | # evaluate log-likelihood at estimated and pooled values of model parameters 172 | ll <- .evaluateStackedLogLik(model, datalist = data) 173 | ll.null <- .evaluateStackedLogLik(null.model, datalist = data) 174 | ll.diff <- ll.null$LL - ll$LL 175 | 176 | ll.stacked.diff <- ll.null$LL.stacked - ll$LL.stacked 177 | 178 | k <- ll$df - ll.null$df 179 | h <- ll$df 180 | 181 | # account for numerical imprecision 182 | if(isTRUE(all.equal(ll.stacked.diff[1], 0))) ll.stacked.diff <- 0L 183 | isEqual <- mapply(function(x, y) isTRUE(all.equal(x, y)), x = ll$LL, y = ll.null$LL) 184 | ll.diff[isEqual] <- 0L 185 | 186 | # D4 (Chan & Meng, 2019) 187 | dbar <- mean(-2 * ll.diff) 188 | dhat <- -2 * ll.stacked.diff 189 | 190 | if(ariv == "robust"){ 191 | 192 | deltabar <- 2 * mean(ll$LL) 193 | deltahat <- 2 * ll$LL.stacked 194 | r <- (m+1) / (h*(m-1)) * (deltabar - deltahat) 195 | v <- (h*(m-1)) * (1 + 1/r)^2 196 | 197 | }else{ 198 | 199 | r <- (m+1) / (k*(m-1)) * (dbar - dhat) 200 | if(ariv == "positive") r <- max(0, r) 201 | v <- (k*(m-1)) * (1 + r^(-1))^2 202 | 203 | } 204 | 205 | val <- dhat / (k*(1+r)) 206 | use <- "likelihood" 207 | 208 | } 209 | 210 | # create output 211 | pval <- pf(val, k, v, lower.tail = FALSE) 212 | out <- matrix(c(val, k, v, pval, r), ncol = 5) 213 | colnames(out) <- c("F.value", "df1", "df2", "P(>F)", "RIV") # new label for p-value, SiG 2017-02-09 214 | 215 | out <- list( 216 | call = match.call(), 217 | test = out, 218 | m = m, 219 | method = method, 220 | adj.df = !is.null(df.com), 221 | df.com = df.com, 222 | use = use, 223 | ariv = ariv, 224 | data = !is.null(data), 225 | reml = reml, 226 | refit = need.refit 227 | ) 228 | 229 | class(out) <- "mitml.testModels" 230 | return(out) 231 | 232 | } 233 | 234 | -------------------------------------------------------------------------------- /R/vcov.mitml.testEstimates.R: -------------------------------------------------------------------------------- 1 | vcov.mitml.testEstimates <- function(object, ...){ 2 | # extract pooled variance-covariance matrix of parameter estimates 3 | 4 | est <- object$estimates 5 | v <- attr(est, "T") 6 | 7 | # full variance-covariance matrix 8 | if(!is.null(v)){ 9 | out <- v 10 | rownames(out) <- colnames(out) <- rownames(est) 11 | } else 12 | # diagonal (squared SEs) 13 | if(any(colnames(est) == "Std.Error")){ 14 | warning("Could find only diagonal elements of the pooled variance-covariance matrix.") 15 | p <- nrow(est) 16 | out <- matrix(NA_real_, nrow = p, ncol = p) 17 | diag(out) <- est[, "Std.Error", drop = TRUE]^2 18 | rownames(out) <- colnames(out) <- rownames(est) 19 | }else{ 20 | stop("Could not find the pooled variance-covariance matrix.") 21 | } 22 | 23 | return(out) 24 | 25 | } 26 | 27 | -------------------------------------------------------------------------------- /R/with.mitml.list.R: -------------------------------------------------------------------------------- 1 | with.mitml.list <- function(data, expr, include.data = FALSE, ...){ 2 | # evaluates an expression for a list of data sets 3 | 4 | expr <- substitute(expr) 5 | pf <- parent.frame() 6 | 7 | # check include.data argument 8 | if(is.character(include.data)){ 9 | name.data <- include.data 10 | include.data <- TRUE 11 | }else{ 12 | name.data <- "data" 13 | } 14 | 15 | out <- if(include.data){ 16 | 17 | lapply(data, function(d, expr){ 18 | expr[[name.data]] <- substitute(d) 19 | eval(expr, parent.frame()) 20 | }, expr = expr) 21 | 22 | }else{ 23 | 24 | lapply(data, function(d, expr, pf) eval(expr, d, pf), expr = expr, pf = pf) 25 | 26 | } 27 | 28 | class(out) <- c("mitml.result", "list") 29 | return(out) 30 | 31 | } 32 | -------------------------------------------------------------------------------- /R/within.mitml.list.R: -------------------------------------------------------------------------------- 1 | within.mitml.list <- function(data, expr, ignore = NULL, ...){ 2 | # evaluate an expression for a list of data sets, then return altered data sets 3 | 4 | expr <- substitute(expr) 5 | parent <- parent.frame() 6 | 7 | out <- lapply(data, function(x){ 8 | e <- evalq(environment(), x, parent) 9 | eval(expr, e) 10 | l <- as.list(e) 11 | l <- l[!sapply(l, is.null)] 12 | l[ignore] <- NULL 13 | nD <- length(del <- setdiff(names(x), (nl <- names(l)))) 14 | x[nl] <- l 15 | if(nD){ 16 | x[del] <- if(nD == 1){ NULL } else { vector("list", nD) } 17 | } 18 | x 19 | }) 20 | 21 | class(out) <- c("mitml.list", "list") 22 | return(out) 23 | 24 | } 25 | -------------------------------------------------------------------------------- /R/write.mitml.R: -------------------------------------------------------------------------------- 1 | write.mitml <- function(x, filename, drop = FALSE){ 2 | # write mitml class object to file 3 | 4 | if(drop){ 5 | x <- x[!names(x) %in% c("par.burnin", "par.imputation")] 6 | } 7 | 8 | save(x, file = filename) 9 | invisible() 10 | 11 | } 12 | 13 | -------------------------------------------------------------------------------- /R/write.mitmlMplus.R: -------------------------------------------------------------------------------- 1 | write.mitmlMplus <- function(x, filename, suffix = "list", sep = "\t", dec = ".", na.value=-999){ 2 | # write text files that can be read into Mplus 3 | 4 | if(!inherits(x, "mitml") && !inherits(x, "mitml.list")) stop("'x' must be of class 'mitml' or 'mitml.list'.") 5 | 6 | if(inherits(x, "mitml")){ 7 | x <- mitmlComplete(x, "all", force.list = TRUE) 8 | } 9 | m <- length(x) 10 | 11 | if(!is.list(x)) x <- list(x) 12 | 13 | dnames <- paste(filename, 1:m, ".dat", sep = "") 14 | lname <- paste(filename, suffix, ".dat", sep = "") 15 | 16 | write.table(dnames, file = lname, col.names = FALSE, row.names = FALSE, quote = FALSE) 17 | 18 | for(ii in 1:m){ 19 | 20 | out <- x[[ii]] 21 | # convert factors 22 | notnum <- which(sapply(out, function(z) !is.numeric(z))) 23 | conv <- as.list(notnum) 24 | for(nn in notnum){ 25 | out[,nn] <- as.factor(out[,nn]) 26 | conv[[colnames(out)[nn]]] <- matrix(c(levels(out[,nn]), 1:nlevels(out[,nn])), ncol = 2) 27 | out[,nn] <- as.numeric(out[,nn]) 28 | } 29 | # write 30 | out[is.na(out)] <- na.value 31 | write.table(out, file = dnames[ii], sep = sep, dec = dec, col.names = F, row.names = F, 32 | quote = FALSE) 33 | 34 | } 35 | 36 | # log file 37 | cname <- paste(filename, ".log", sep = "") 38 | cat(file = cname, "The data set featured the following variables:") 39 | cat(file = cname, "\n\n", paste(colnames(out), collapse = " "), sep = "", append = T) 40 | if(length(conv)>0){ 41 | cat(file = cname, "\n\n", "Factors were converted to numeric values as follows:\n ", sep = "", append = T) 42 | for(cc in 1:length(conv)){ 43 | cat(file = cname, "\n", names(conv[cc]), ":\n", sep = "", append = T) 44 | write.table(conv[[cc]], file = cname, row.names = F, col.names = F, sep = " = ", quote = F, append = T) 45 | } 46 | } 47 | 48 | # input file 49 | iname <- paste(filename, ".inp", sep = "") 50 | cat(file = iname, sep = "", 51 | "TITLE:\n This Mplus input file for multiply imputed data sets was generated by mitml in R.\n", 52 | "DATA:\n file = ", lname, ";\n", 53 | " type = imputation;\n", 54 | "VARIABLE:\n names = ", paste(colnames(out), collapse = " "), ";\n", 55 | " missing = all (", na.value, ");" 56 | ) 57 | 58 | invisible() 59 | 60 | } 61 | 62 | -------------------------------------------------------------------------------- /R/write.mitmlSAV.R: -------------------------------------------------------------------------------- 1 | write.mitmlSAV <- function(x, filename){ 2 | # write to native SPSS format 3 | 4 | if(!inherits(x, "mitml") && !inherits(x, "mitml.list")) stop("'x' must be of class 'mitml' or 'mitml.list'.") 5 | if(!grepl(".sav$", tolower(filename))) filename <- paste(filename, ".sav", sep = "") 6 | 7 | # convert mitml to mitml.list 8 | if(inherits(x, "mitml")){ 9 | x <- mitmlComplete(x, "all", force.list = TRUE) 10 | } 11 | 12 | # add imputation indicator 13 | for(ii in 1:length(x)){ 14 | x[[ii]] <- cbind(ii-1, x[[ii]]) 15 | colnames(x[[ii]])[1] <- "Imputation_" 16 | } 17 | 18 | # write to file 19 | out <- do.call(rbind, x) 20 | haven::write_sav(out, filename) 21 | 22 | invisible() 23 | 24 | } 25 | 26 | -------------------------------------------------------------------------------- /R/write.mitmlSPSS.R: -------------------------------------------------------------------------------- 1 | write.mitmlSPSS <- function(x, filename, sep = "\t", dec = ".", na.value=-999, syntax = TRUE, locale = NULL){ 2 | # write text file to be read into SPSS 3 | 4 | if(!inherits(x, "mitml") && !inherits(x, "mitml.list")) stop("'x' must be of class 'mitml' or 'mitml.list'.") 5 | if(!dec %in% c(",", ".")) stop("Only a dot '.' or a comma ',' may be specified as decimal separator.") 6 | 7 | if(inherits(x, "mitml")){ 8 | x <- mitmlComplete(x, "all", force.list = TRUE) 9 | } 10 | 11 | for(ii in 1:length(x)){ 12 | x[[ii]] <- cbind(ii-1, x[[ii]]) 13 | colnames(x[[ii]])[1] <- "Imputation_" 14 | } 15 | out <- do.call(rbind, x) 16 | num <- sapply(out, is.numeric) 17 | chr <- sapply(out, is.character) 18 | fac <- sapply(out, is.factor) 19 | ord <- sapply(out, is.ordered) 20 | # convert factors 21 | conv <- as.list(which(fac)) 22 | for(ff in which(fac)){ 23 | out[,ff] <- as.factor(out[,ff]) 24 | conv[[colnames(out)[ff]]] <- matrix(c(levels(out[,ff]), 1:nlevels(out[,ff])), ncol = 2) 25 | out[,ff] <- as.numeric(out[,ff]) 26 | } 27 | 28 | ds <- paste(filename, ".dat", sep = "") 29 | out[is.na(out)] <- na.value 30 | write.table(out, file = ds, sep = sep, dec = dec, col.names = T, row.names = F, quote = F) 31 | 32 | # gerate syntax 33 | if(syntax){ 34 | 35 | sf <- paste(filename, ".sps", sep = "") 36 | if(dec == ".") d <- "DOT" else d <- "COMMA" 37 | cat(file = sf, "SET DECIMAL", d, ".\n") 38 | if(!is.null(locale)) cat(file = sf, "SET LOCALE", locale, ".\n", append = T) 39 | cat(file = sf, "\n", append = T) 40 | 41 | cat(file = sf, append = T, 42 | "GET DATA\n", 43 | "/TYPE=TXT\n", 44 | paste("/FILE=\"", ds, "\"\n", sep = ""), 45 | "/DELCASE=LINE\n", 46 | paste("/DELIMITERS=\"", sub("\t", "\\\\t", sep), "\"\n", sep = ""), 47 | "/ARRANGEMENT=DELIMITED\n", 48 | "/FIRSTCASE=2\n", 49 | "/IMPORTCASE=ALL\n", 50 | "/VARIABLES=" 51 | ) 52 | 53 | # class specific format 54 | width <- sapply(as.matrix(out)[1,], nchar, type = "width") 55 | width[chr] <- sapply(out[,chr, drop = FALSE], function(z) max(nchar(z, type = "width"))) 56 | fmt <- data.frame(v = colnames(out), f = character(ncol(out)), stringsAsFactors = F) 57 | fmt[num|fac|ord, "f"] <- paste("F", width[num|fac|ord]+3, ".2", sep = "") 58 | fmt[chr, "f"] <- paste("A", width[chr], sep = "") 59 | fmt[num, "l"] <- "SCALE" 60 | fmt[fac|chr, "l"] <- "NOMINAL" 61 | fmt[ord, "l"] <- "ORDINAL" 62 | fmt[1, "l"] <- "NOMINAL" 63 | 64 | cat(file = sf, "\n ", append = T) 65 | cat(file = sf, paste(fmt$v, fmt$f, collapse = "\n "), ".\n\n", append = T) 66 | 67 | cat(file = sf, append = T, sep = "", 68 | "CACHE .\n", 69 | "EXECUTE .\n", 70 | "DATASET NAME panImpute1 WINDOW=FRONT .\n\n" 71 | ) 72 | 73 | # value labels 74 | cat(file = sf, "VALUE LABELS", append = T) 75 | for(cc in 1:length(conv)){ 76 | cat(file = sf, "\n", paste("/", names(conv)[cc], sep = ""), append = T) 77 | for(rr in 1:nrow(conv[[cc]])){ 78 | cat(file = sf, "\n", conv[[cc]][rr, 2], paste("\'", conv[[cc]][rr, 1], "\'", sep = ""), append = T) 79 | } 80 | } 81 | cat(file = sf, " .\n\n", append = T) 82 | 83 | # missing values 84 | cat(file = sf, append = T, 85 | "MISSING VALUES\n", 86 | paste(fmt$v[num|fac|ord], collapse = " "), paste("(", na.value, ")", sep = ""), "\n", 87 | paste(fmt$v[chr], collapse = " "), paste("(\"", na.value, "\")", sep = ""), 88 | ".\n" 89 | ) 90 | 91 | } 92 | 93 | invisible() 94 | 95 | } 96 | 97 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onAttach <- function(libname, pkgname){ 2 | 3 | packageStartupMessage("*** This is beta software. Please report any bugs!\n*** See the NEWS file for recent changes.") 4 | 5 | } 6 | 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mitml 2 | #### Tools for multiple imputation in multilevel modeling 3 | 4 | This [R](https://www.r-project.org/) package provides tools for multiple imputation of missing data in multilevel modeling. 5 | It includes a user-friendly interface to the packages `pan` and `jomo`, and several functions for visualization, data management, and the analysis of multiply imputed data sets. 6 | 7 | The purpose of `mitml` is to provide users with a set of effective and user-friendly tools for multiple imputation of multilevel data without requiring advanced knowledge of its statistical underpinnings. 8 | Examples and additional information can be found in the official [documentation](https://cran.r-project.org/package=mitml/mitml.pdf) of the package and in the [Wiki](https://github.com/simongrund1/mitml/wiki) pages on GitHub. 9 | 10 | If you use `mitml` and have suggestions for improvement, please email me (see [here](https://cran.r-project.org/package=mitml)) or file an [issue](https://github.com/simongrund1/mitml/issues) at the GitHub repository. 11 | 12 | #### CRAN version 13 | 14 | The official version of `mitml` is hosted on CRAN and may be found [here](https://cran.r-project.org/package=mitml). The CRAN version can be installed from within R using: 15 | 16 | ```r 17 | install.packages("mitml") 18 | ``` 19 | 20 | [![CRAN release](http://www.r-pkg.org/badges/version/mitml)](https://cran.r-project.org/package=mitml) [![CRAN downloads](http://cranlogs.r-pkg.org/badges/mitml)](https://cran.r-project.org/package=mitml) 21 | 22 | #### GitHub version 23 | 24 | The version hosted here is the development version of `mitml`, allowing better tracking of [issues](https://github.com/simongrund1/mitml/issues) and possibly containing features and changes in advance. The GitHub version can be installed using `devtools` as: 25 | 26 | ```r 27 | install.packages("devtools") 28 | devtools::install_github("simongrund1/mitml") 29 | ``` 30 | 31 | ![Github commits](https://img.shields.io/github/commits-since/simongrund1/mitml/latest.svg?colorB=green) 32 | -------------------------------------------------------------------------------- /data/justice.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simongrund1/mitml/1b347258e44dda005a4e24db51534360a480daca/data/justice.rda -------------------------------------------------------------------------------- /data/leadership.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simongrund1/mitml/1b347258e44dda005a4e24db51534360a480daca/data/leadership.rda -------------------------------------------------------------------------------- /data/studentratings.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/simongrund1/mitml/1b347258e44dda005a4e24db51534360a480daca/data/studentratings.rda -------------------------------------------------------------------------------- /man/amelia2mitml.list.Rd: -------------------------------------------------------------------------------- 1 | \name{amelia2mitml.list} 2 | \alias{amelia2mitml.list} 3 | \title{Convert objects of class \code{amelia} to \code{mitml.list}} 4 | \description{This function converts a \code{amelia} class object (as produced by the \code{Amelia} package) to \code{mitml.list}. The resulting object may be used in further analyses.} 5 | \usage{ 6 | 7 | amelia2mitml.list(x) 8 | 9 | } 10 | \arguments{ 11 | 12 | \item{x}{An object of class \code{amelia} as produced by \code{amelia} (see the \code{Amelia} package).} 13 | 14 | } 15 | \value{ 16 | 17 | A list of imputed data sets with an additional class attribute \code{mitml.list}. 18 | 19 | } 20 | \author{Simon Grund} 21 | \seealso{\code{\link{mitmlComplete}}} 22 | 23 | \examples{ 24 | data(studentratings) 25 | 26 | require(Amelia) 27 | imp <- amelia(x = studentratings[, c("ID", "MathAchiev", "ReadAchiev")], cs = "ID") 28 | 29 | implist <- amelia2mitml.list(imp) 30 | } 31 | 32 | -------------------------------------------------------------------------------- /man/anova.mitml.result.Rd: -------------------------------------------------------------------------------- 1 | \name{anova.mitml.result} 2 | \alias{anova.mitml.result} 3 | \title{Compare several nested models} 4 | \description{ 5 | Performs model comparisons for a series of nested statistical models fitted using \code{with.mitml.list}. 6 | } 7 | 8 | \usage{ 9 | 10 | \method{anova}{mitml.result}(object, ..., method = c("D3", "D4", "D2"), 11 | ariv = c("default", "positive", "robust"), 12 | data = NULL) 13 | 14 | } 15 | 16 | \arguments{ 17 | 18 | \item{object}{An object of class \code{mitml.result} as produced by \code{\link{with.mitml.list}}.} 19 | \item{\dots}{Additional objects of class \code{mitml.result} to be compared.} 20 | \item{method}{A character string denoting the method used for the model comparison. Can be \code{"D3"}, \code{"D4"} or \code{"D2"} (see 'Details'). Default is \code{"D3"}.} 21 | \item{ariv}{A character string denoting how the ARIV is calculated. Can be \code{"default"}, \code{"positive"}, or \code{"robust"} (see 'Details').} 22 | \item{data}{(optional) A list of imputed data sets (see 'Details').} 23 | 24 | } 25 | 26 | \details{ 27 | 28 | This function performs likelihood-based comparisons between multiple statistical models fitted with \code{\link{with.mitml.list}}. 29 | If possible, the models are compared using the \eqn{D_3} statistic (Meng & Rubin, 1992). 30 | If this method is unavailable, the \eqn{D_4} or \eqn{D_2} statistic is used instead (Chan & Meng, 2019; Li, Meng, Raghunathan, & Rubin, 1991). 31 | 32 | This function is essentially a wrapper for \code{\link{testModels}} with the advantage that several models can be compared simultaneously. 33 | For a list of supported models and further options for more specific model comparisons, see \code{testModels}. 34 | 35 | The \code{ariv} argument affects how the average relative increase in variance is calculated (see also \code{testModels}). 36 | Note that the \eqn{D_4} method can fail if the data to which the model was fitted cannot be found. 37 | In such a case, the \code{data} argument can be used to specify the list of imputed data sets directly (see also \code{testModels}). 38 | 39 | } 40 | 41 | \value{ 42 | 43 | A list containing the results of each model comparison. 44 | A \code{print} method is used for more readable output. 45 | 46 | } 47 | 48 | \author{Simon Grund} 49 | \seealso{\code{\link{with.mitml.list}}, \code{\link{testModels}}} 50 | 51 | \examples{ 52 | require(lme4) 53 | data(studentratings) 54 | 55 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 56 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 57 | 58 | implist <- mitmlComplete(imp) 59 | 60 | # simple comparison (same as testModels) 61 | fit0 <- with(implist, lmer(ReadAchiev ~ (1|ID), REML = FALSE)) 62 | fit1 <- with(implist, lmer(ReadAchiev ~ ReadDis + (1|ID), REML = FALSE)) 63 | anova(fit1, fit0) 64 | 65 | \dontrun{ 66 | # multiple comparisons 67 | fit2 <- with(implist, lmer(ReadAchiev ~ ReadDis + (1 + ReadDis|ID), REML = FALSE)) 68 | anova(fit2, fit1, fit0) 69 | } 70 | } 71 | 72 | \references{ 73 | Meng, X.-L., & Rubin, D. B. (1992). Performing likelihood ratio tests with multiply-imputed data sets. \emph{Biometrika, 79}, 103-111. 74 | 75 | Laird, N., Lange, N., & Stram, D. (1987). Maximum likelihood computations with repeated measures: Application of the em algorithm. \emph{Journal of the American Statistical Association, 82}, 97-105. 76 | 77 | Li, K. H., Raghunathan, T. E., & Rubin, D. B. (1991). Large-sample significance levels from multiply imputed data using moment-based statistics and an F reference distribution. \emph{Journal of the American Statistical Association, 86}, 1065-1073. 78 | } 79 | \keyword{methods} 80 | -------------------------------------------------------------------------------- /man/as.mitml.list.Rd: -------------------------------------------------------------------------------- 1 | \name{as.mitml.list} 2 | \alias{as.mitml.list} 3 | \title{Convert a list of data sets to \code{mitml.list}} 4 | \description{ 5 | This function adds a \code{mitml.list} class attribute to a list of data frames. The resulting object can be used in further analyses. 6 | } 7 | 8 | \usage{ 9 | 10 | as.mitml.list(x) 11 | 12 | } 13 | 14 | \arguments{ 15 | 16 | \item{x}{A list of data frames.} 17 | 18 | } 19 | 20 | \value{ 21 | 22 | The original list with an additional class attribute \code{mitml.list}. 23 | The list entries are converted into a \code{data.frame} if necessary, in which case a note is printed. 24 | 25 | } 26 | 27 | \author{Simon Grund} 28 | \seealso{\code{\link{is.mitml.list}}, \code{\link{long2mitml.list}}} 29 | 30 | \examples{ 31 | # data frame with 'imputation' indicator 32 | dat <- data.frame(imputation = rep(1:10, each = 20), x = rnorm(200)) 33 | 34 | # split into a list and convert to 'mitml.list' 35 | l <- split(dat, dat$imputation) 36 | l <- as.mitml.list(l) 37 | 38 | is.mitml.list(l) 39 | # TRUE 40 | } 41 | -------------------------------------------------------------------------------- /man/clusterMeans.Rd: -------------------------------------------------------------------------------- 1 | \name{clusterMeans} 2 | \alias{clusterMeans} 3 | \title{Calculate cluster means} 4 | \description{ 5 | Calculates the mean of a given variable within each cluster, possibly conditioning on an additional grouping variable. 6 | } 7 | 8 | \usage{ 9 | 10 | clusterMeans(x, cluster, adj = FALSE, group = NULL) 11 | 12 | } 13 | 14 | \arguments{ 15 | 16 | \item{x}{A numeric vector for which cluster means should be calculated. Can also be supplied as a character string denoting a variable in the current environment (see 'Details').} 17 | \item{cluster}{A numeric vector or a factor denoting the cluster membership of each unit in \code{x}. Can also be supplied as a character string (see 'Details').} 18 | \item{adj}{Logical flag indicating if person-adjusted group means should be calculated. If \code{TRUE}, cluster means are calculated for each unit by excluding that unit from calculating the cluster mean. Default is \code{FALSE}.} 19 | \item{group}{(optional) A grouping factor or a variable that can be interpreted as such. If specified, cluster means are calculated separately within the sub-groups defined by \code{group}. Can also be supplied as a character string (see 'Details').} 20 | 21 | } 22 | 23 | \details{ 24 | 25 | This function calculates the mean of a variable within each level of a cluster variable. 26 | Any \code{NA} are omitted during calculation. 27 | 28 | The three main arguments of the function can also be supplied as (single) character strings, denoting the name of the respective variables in the current environment. 29 | This is especially useful for calculating several cluster means simultaneously, for example using \code{\link{within.mitml.list}} (see 'Example 2' below). 30 | 31 | } 32 | 33 | \value{ 34 | 35 | A numeric vector with the same length as \code{x} containing the cluster mean for all units. 36 | 37 | } 38 | 39 | \author{Simon Grund, Alexander Robitzsch} 40 | \seealso{\code{\link{within.mitml.list}}} 41 | \examples{ 42 | data(studentratings) 43 | 44 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 45 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 46 | 47 | implist <- mitmlComplete(imp) 48 | 49 | # * Example 1: single cluster means 50 | 51 | # calculate cluster means (for each data set) 52 | with(implist, clusterMeans(ReadAchiev, ID)) 53 | 54 | # ... person-adjusted cluster means 55 | with(implist, clusterMeans(ReadAchiev, ID, adj = TRUE)) 56 | 57 | # ... groupwise cluster means 58 | with(implist, clusterMeans(ReadAchiev, ID, group = Sex)) 59 | 60 | # * Example 2: automated cluster means using 'for' and 'assign' 61 | 62 | # calculate multiple cluster means within multiply imputed data sets 63 | within(implist,{ 64 | vars <- c("ReadAchiev", "MathAchiev", "CognAbility") 65 | for(i in vars) assign(paste(i, "Mean", sep = "."), clusterMeans(i, ID)) 66 | rm(i, vars) 67 | }) 68 | } 69 | -------------------------------------------------------------------------------- /man/is.mitml.list.Rd: -------------------------------------------------------------------------------- 1 | \name{is.mitml.list} 2 | \alias{is.mitml.list} 3 | \title{Check if an object is of class \code{mitml.list}} 4 | \description{ 5 | This function checks if its argument is a list of class \code{mitml.list}. 6 | } 7 | 8 | \usage{ 9 | 10 | is.mitml.list(x) 11 | 12 | } 13 | 14 | \arguments{ 15 | 16 | \item{x}{An R object.} 17 | 18 | } 19 | 20 | \value{ 21 | 22 | \code{TRUE} or \code{FALSE}. 23 | A warning message is displayed if the contents of \code{x} do not appear to be data frames. 24 | 25 | } 26 | 27 | \author{Simon Grund} 28 | \seealso{\code{\link{as.mitml.list}}} 29 | 30 | \examples{ 31 | l <- list(data.frame(x = rnorm(20))) 32 | l <- as.mitml.list(l) 33 | is.mitml.list(l) 34 | # TRUE 35 | 36 | l <- as.list(1:10) 37 | is.mitml.list(l) 38 | # FALSE 39 | 40 | class(l) <- "mitml.list" 41 | is.mitml.list(l) 42 | # TRUE, with a warning 43 | } 44 | -------------------------------------------------------------------------------- /man/justice.Rd: -------------------------------------------------------------------------------- 1 | \name{justice} 2 | \alias{justice} 3 | \docType{data} 4 | \title{Example data set on employees' justice perceptions and satisfaction} 5 | \description{ 6 | Data set containing simulated data for employees nested within organizations, featuring employees' sex, ratings on individual justice orientation and ratings on job satisfaction. 7 | The data set also includes scores for justice climate in each organization (defined at the level of organizations, level 2). 8 | Different organizations are denoted by the variable \code{id}. 9 | 10 | The data were simulated based on the results by Liao and Rupp (2005), as well as the secondary analyses of the same data given in Mathieu, Aguinis, Culpepper, and Chen, (2012). 11 | } 12 | \format{A data frame containing 1400 observations on 4 variables.} 13 | \usage{data(justice)} 14 | \references{ 15 | Liao, H., & Rupp, D. E. (2005). The impact of justice climate and justice orientation on work outcomes: A cross-level multifoci framework. \emph{Journal of Applied Psychology}, 90, 242.256. 16 | 17 | Mathieu, J. E., Aguinis, H., Culpepper, S. A., & Chen, G. (2012). Understanding and estimating the power to detect cross-level interaction effects in multilevel modeling. \emph{Journal of Applied Psychology}, 97, 951-966. 18 | } 19 | \keyword{datasets} 20 | -------------------------------------------------------------------------------- /man/leadership.Rd: -------------------------------------------------------------------------------- 1 | \name{leadership} 2 | \alias{leadership} 3 | \docType{data} 4 | \title{Example data set on leadership style and job satisfaction} 5 | \description{ 6 | Data set based on the data simulated by Paul D. Bliese as described in Klein et al. (2000) with slight modifications. 7 | The data set consists of 750 employees, nested within 50 work groups, and includes employees' ratings on negative leadership style, job satisfaction, and workload as well as a measure for each work group's cohesion. 8 | 9 | The original data set is available in the \code{multilevel} package and was modified by (a) transforming workload into a categorical variable, (b) transforming cohesion into a group-level variable, and (c) by inducing missing values. 10 | } 11 | \usage{data(leadership)} 12 | \format{A data frame containing 750 observations on 5 variables.} 13 | \references{ 14 | 15 | Bliese, P. D. (2013). multilevel: Multilevel functions (Version 2.5) [Computer software]. Retrieved from \code{http://CRAN.R-project.org/package=multilevel} 16 | 17 | Klein, K. J., Bliese, P. D., Kozlowski, S. W. J., Dansereau, F., Gavin, M. B., Griffin, M. A., ... Bligh, M. C. (2000). Multilevel analytical techniques: Commonalities, differences, and continuing questions. In K. J. Klein & S. W. J. Kozlowski (Eds.), \emph{Multilevel theory, research, and methods in organizations: Foundations, extensions, and new directions} (pp. 512-553). San Francisco, CA: Jossey-Bass. 18 | 19 | } 20 | \keyword{datasets} 21 | -------------------------------------------------------------------------------- /man/long2mitml.list.Rd: -------------------------------------------------------------------------------- 1 | \name{long2mitml.list} 2 | \alias{long2mitml.list} 3 | \alias{jomo2mitml.list} 4 | \title{Convert imputations from long format to \code{mitml.list}} 5 | \description{ 6 | These functions convert data sets containing multiple imputations in long format to objects of class \code{mitml.list}. The resulting object can be used in further analyses. 7 | } 8 | 9 | \usage{ 10 | 11 | long2mitml.list(x, split, exclude = NULL) 12 | 13 | jomo2mitml.list(x) 14 | 15 | } 16 | 17 | \arguments{ 18 | 19 | \item{x}{A data frame in long format containing multiple imputations (see 'Details').} 20 | \item{split}{A character string denoting the column in \code{x} that identifies different imputations (see 'Details').} 21 | \item{exclude}{A vector denoting the values of \code{split} that should be excluded.} 22 | 23 | } 24 | 25 | \details{ 26 | 27 | The function \code{long2mitml.list} converts data frames from the long format to \code{mitml.list} (i.e., a list of imputed data sets). 28 | In long format, all imputations are contained in a single data frame, where different imputations are denoted by \code{split}. 29 | This function splits the data frame into a list of imputed data sets according to \code{split}, excluding the values specified by \code{exclude} (see the 'Examples'). 30 | 31 | The \code{jomo2mitml.list} function is a special case of \code{long2mitml.list} which converts imputations that have been generated with \code{jomo} (see the \code{jomo} package)). 32 | } 33 | 34 | \value{ 35 | 36 | A list of imputed data sets with class \code{mitml.list}. 37 | 38 | } 39 | 40 | \author{Simon Grund} 41 | \seealso{\code{\link{mitmlComplete}}} 42 | 43 | \examples{ 44 | data(studentratings) 45 | require(jomo) 46 | 47 | # impute data using jomo (native functions) 48 | clus <- studentratings[, "ID"] 49 | Y <- studentratings[, c("ReadAchiev", "ReadDis")] 50 | imp <- jomo(Y = Y, clus = clus, nburn = 1000, nbetween = 100, nimp = 5) 51 | 52 | # split imputations 53 | impList <- long2mitml.list(imp, split = "Imputation", exclude = 0) 54 | impList <- jomo2mitml.list(imp) 55 | } 56 | -------------------------------------------------------------------------------- /man/methods-mitml.list.Rd: -------------------------------------------------------------------------------- 1 | \name{c.mitml.list} 2 | \alias{c.mitml.list} 3 | \alias{rbind.mitml.list} 4 | \alias{cbind.mitml.list} 5 | \title{Concatenate lists of imputed data sets} 6 | \description{ 7 | These functions allow concatenating lists of imputed data sets by data set, row, or column. 8 | } 9 | 10 | \usage{ 11 | 12 | \method{c}{mitml.list}(...) 13 | \method{rbind}{mitml.list}(...) 14 | \method{cbind}{mitml.list}(...) 15 | 16 | } 17 | 18 | \arguments{ 19 | 20 | \item{\dots}{One or several lists of imputed data sets with class \code{mitml.list} as produced by \code{mitmlComplete} (or similar).} 21 | 22 | } 23 | 24 | \details{ 25 | 26 | The \code{c}, \code{cbind}, and \code{rbind} functions allow combining multiple lists of imputed data sets in different ways. 27 | The \code{c} method concatenates by data set (i.e., by appending additional data sets to the list), \code{rbind} concatenates by row (i.e., appending additional rows to each data set), and \code{cbind} concatenates by column (i.e., by appending additional columns to each data set). 28 | 29 | These functions are intended for experienced users and should be used with caution. 30 | Appending rows or columns from multiple imputation procedures is usually unsafe unless in special applications (see 'Examples'). 31 | } 32 | 33 | \value{ 34 | 35 | A list of imputed data sets with an additional class attribute \code{mitml.list}. 36 | 37 | } 38 | 39 | \author{Simon Grund} 40 | \examples{ 41 | # Example 1: manual imputation by grouping variable 42 | 43 | data(studentratings) 44 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 45 | 46 | imp1 <- panImpute(subset(studentratings, FedState == "SH"), formula = fml, 47 | n.burn = 1000, n.iter = 100, m = 5) 48 | 49 | imp2 <- panImpute(subset(studentratings, FedState == "B"), formula = fml, 50 | n.burn = 1000, n.iter = 100, m = 5) 51 | 52 | implist1 <- mitmlComplete(imp1) 53 | implist2 <- mitmlComplete(imp2) 54 | 55 | rbind(implist1, implist2) 56 | 57 | # Example 2: predicted values from linear model 58 | 59 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 60 | implist <- mitmlComplete(imp) 61 | 62 | pred <- with(implist1, predict(lm(ReadDis ~ ReadAchiev))) 63 | cbind(implist, pred.ReadDis = pred) 64 | } 65 | \keyword{methods} 66 | -------------------------------------------------------------------------------- /man/methods-mitml.testEstimates.Rd: -------------------------------------------------------------------------------- 1 | \name{confint.mitml.testEstimates} 2 | \alias{coef.mitml.testEstimates} 3 | \alias{vcov.mitml.testEstimates} 4 | \alias{confint.mitml.testEstimates} 5 | \title{Summarize and extract pooled parameter estimates} 6 | \description{ 7 | Functions for extracting results and computing confidence intervals from the pooled parameter estimates computed with \code{\link{testEstimates}}. 8 | } 9 | 10 | \usage{ 11 | 12 | \method{coef}{mitml.testEstimates}(object, ...) 13 | \method{vcov}{mitml.testEstimates}(object, ...) 14 | \method{confint}{mitml.testEstimates}(object, parm, level = 0.95, ...) 15 | 16 | } 17 | \arguments{ 18 | 19 | \item{object}{An object of class \code{mitml.testEstimates} as produced by \code{testEstimates}.} 20 | \item{parm}{(optional) A reference to the parameters for which to calculate confidence intervals. Can be a character or integer vector denoting names or position of parameters, respectively. 21 | If missing, all parameters are considered (the default).} 22 | \item{level}{The confidence level. Default is to \code{0.95} (i.e., 95\%).} 23 | \item{\dots}{Not being used.} 24 | 25 | } 26 | \details{ 27 | 28 | These functions can be used to extract information and compute additional results from pooled parameter estimates. 29 | The \code{coef} and \code{vcov} methods extract the pooled parameter estimates and their pooled variance-covariance matrix (with the squared standard errors in the diagonal). 30 | The \code{confint} method computes confidence intervals with the given confidence level for the pooled parameters on the basis of a \eqn{t}-distribution, with estimates, standard errors, and degrees of freedom as returned by \code{\link{testEstimates}}. 31 | 32 | } 33 | \value{ 34 | 35 | 36 | For \code{coef}: A vector containing the pooled parameter estimates 37 | For \code{vcov}: A matrix containing the pooled variance-covariance matrix of the parameter estimates. 38 | For \code{confint}: A matrix containing the lower and upper bounds of the confidence intervals. 39 | 40 | } 41 | 42 | \author{Simon Grund} 43 | 44 | \seealso{\code{\link{testEstimates}}} 45 | \examples{ 46 | data(studentratings) 47 | 48 | fml <- ReadDis ~ ReadAchiev + (1|ID) 49 | imp <- panImpute(studentratings, formula = fml, n.burn = 500, n.iter = 100, m = 5) 50 | 51 | implist <- mitmlComplete(imp) 52 | 53 | # fit regression model 54 | fit <- with(implist, lm(ReadDis ~ 1 + ReadAchiev)) 55 | est <- testEstimates(fit) 56 | 57 | # extract parameter estimates and variance-covariance matrix 58 | coef(est) 59 | vcov(est) 60 | 61 | # compute confidence intervals 62 | confint(est) 63 | 64 | # ... with different confidence levels 65 | confint(est, level = 0.90) 66 | confint(est, level = 0.999) 67 | } 68 | 69 | -------------------------------------------------------------------------------- /man/mids2mitml.list.Rd: -------------------------------------------------------------------------------- 1 | \name{mids2mitml.list} 2 | \alias{mids2mitml.list} 3 | \title{Convert objects of class \code{mids} to \code{mitml.list}} 4 | \description{ 5 | This function converts a \code{mids} class object (as produced by the \code{mice} package) to \code{mitml.list}. The resulting object may be used in further analyses. 6 | } 7 | 8 | \usage{ 9 | 10 | mids2mitml.list(x) 11 | 12 | } 13 | 14 | \arguments{ 15 | 16 | \item{x}{An object of class \code{mids} as produced by \code{mice} (see the \code{mice} package).} 17 | 18 | } 19 | 20 | \value{ 21 | 22 | A list of imputed data sets with class \code{mitml.list}. 23 | 24 | } 25 | 26 | \author{Simon Grund} 27 | \seealso{\code{\link{mitmlComplete}}} 28 | 29 | \examples{ 30 | data(studentratings) 31 | 32 | # imputation using mice 33 | require(mice) 34 | imp <- mice(studentratings) 35 | 36 | implist <- mids2mitml.list(imp) 37 | } 38 | -------------------------------------------------------------------------------- /man/mitml-package.Rd: -------------------------------------------------------------------------------- 1 | \name{mitml-package} 2 | \alias{mitml-package} 3 | \docType{package} 4 | \title{mitml: Tools for multiple imputation in multilevel modeling} 5 | \description{ 6 | Provides tools for multiple imputation of missing data in multilevel modeling. 7 | This package includes a user-friendly interface to the algorithms implemented in the R packages \code{pan} and \code{jomo} as well as several functions for visualizing, managing, and analyzing multiply imputed data sets. 8 | 9 | The main interface to \code{pan} is the function \code{\link{panImpute}}, which allows specifying imputation models for continuous variables with missing data at level 1. 10 | In addition, the function \code{\link{jomoImpute}} provides an interface to \code{jomo}, which allows specifying imputation models for both continuous and categorical variables with missing data at levels 1 and 2 as well as single-level data. 11 | The imputations and parameter values are stored in objects of class \code{mitml}. 12 | To obtain the completed (i.e., imputed) data sets, \code{\link{mitmlComplete}} is used, producing a list of imputed data sets of class \code{mitml.list} that can be used in further analyses. 13 | 14 | Several additional functions allow for convenient analysis of multiply imputed data sets including (bot not limited to) multilevel analyses with \code{lme4} or \code{nlme} and structural equation modeling with \code{lavaan}. 15 | The functions \code{\link[=with.mitml.list]{within}}, \code{\link[=sort.mitml.list]{sort}}, and \code{\link[=subset.mitml.list]{subset}} can be used to manage and manipulate multiply imputed data sets. 16 | Statistical models are fitted using \code{\link[=with.mitml.list]{with}}. 17 | Pooled parameter estimates can be extracted with \code{\link{testEstimates}}, and model comparisons as well as single- and multi-parameter hypotheses tests can be performed using the functions \code{\link{testModels}} and \code{\link{testConstraints}}. 18 | In addition, the \code{\link[=anova.mitml.result]{anova}} method provides a simple interface to model comparisons. 19 | 20 | Data sets can be imported and exported from or to different statistical software packages. 21 | Currently, \code{\link{mids2mitml.list}}, \code{\link{amelia2mitml.list}}, \code{\link{jomo2mitml.list}}, and \code{\link{long2mitml.list}} can be used for importing imputations for other packages in R. 22 | In addition, \code{\link{write.mitmlMplus}}, \code{\link{write.mitmlSAV}}, and \code{\link{write.mitmlSPSS}} export data sets to M\emph{plus} and SPSS, respectively. 23 | 24 | Finally, the package provides tools for summarizing and visualizing imputation models, which is useful for the assessment of convergence and the reporting of results. 25 | 26 | The data sets contained in this package are published under the same license as the package itself. 27 | They contain simulated data and may be used by anyone free of charge as long as reference to this package is given. 28 | } 29 | \author{ 30 | Authors: Simon Grund, Alexander Robitzsch, Oliver Luedtke 31 | 32 | Maintainer: Simon Grund 33 | } 34 | \keyword{package} 35 | -------------------------------------------------------------------------------- /man/mitml.list2mids.Rd: -------------------------------------------------------------------------------- 1 | \name{mitml.list2mids} 2 | \alias{mitml.list2mids} 3 | \title{Convert objects of class \code{mitml.list} to \code{mids}} 4 | \description{ 5 | This function converts a \code{mitml.list} class object to \code{mids} (as used in the \code{mice} package). 6 | } 7 | 8 | \usage{ 9 | mitml.list2mids(x, data, fill = FALSE, where = NULL) 10 | } 11 | 12 | \arguments{ 13 | 14 | 15 | \item{x}{A list of imputed data sets with class \code{mitml.list} (as produced by \code{\link{mitmlComplete}}, \code{\link{mids2mitml.list}}, or similar).} 16 | \item{data}{A data frame containing to original (incomplete) data (see 'Details').} 17 | \item{fill}{A logical flag indicating whether variables in the imputed data that are not in the original data should be added and filled with \code{NA} (default is \code{FALSE}).} 18 | \item{where}{(optional) A data frame or matrix of logicals indicating the location of missing values (see 'Details').} 19 | 20 | } 21 | 22 | \details{ 23 | 24 | This function converts objects of class \code{mitml.list} into \code{mids} objects (as used in the \code{mice} package). 25 | The conversion requires a list of imputed data sets and the original (incomplete) data set. 26 | If the imputed data sets have been appended with new variables (e.g., by \code{\link{within.mitml.list}}), the new variables can be added to the original data set by setting \code{fill = TRUE}. 27 | 28 | This function is essentially a wrapper around \code{\link[mice:as.mids]{as.mids}} that sets the case and imputation identifiers automatically and and passes the \code{where} argument as is (see also the documentation of \code{\link[mice:as.mids]{as.mids}}). 29 | 30 | } 31 | 32 | \value{ 33 | 34 | An object of class \code{mids}. 35 | 36 | } 37 | 38 | \author{Simon Grund} 39 | \seealso{\code{\link{mitmlComplete}}, \code{\link{mids2mitml.list}}, \code{\link{within.mitml.list}}} 40 | 41 | \examples{ 42 | data(studentratings) 43 | 44 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 45 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 46 | 47 | implist <- mitmlComplete(imp) 48 | 49 | # * Example 1: simple conversion 50 | 51 | # convert to 'mids' 52 | impmids <- mitml.list2mids(implist, data = studentratings) 53 | 54 | # * Example 2: conversion with additional variables 55 | 56 | # compute new variables 57 | implist <- within(implist, { 58 | M.ReadAchiev <- clusterMeans(ReadAchiev, ID) 59 | C.ReadAchiev <- ReadAchiev - M.ReadAchiev 60 | }) 61 | 62 | # convert to 'mids' 63 | impmids <- mitml.list2mids(implist, data = studentratings, fill = TRUE) 64 | } 65 | -------------------------------------------------------------------------------- /man/mitmlComplete.Rd: -------------------------------------------------------------------------------- 1 | \name{mitmlComplete} 2 | \alias{mitmlComplete} 3 | \title{Extract imputed data sets} 4 | \description{ 5 | This function extracts imputed data sets from \code{mitml} class objects as produced by \code{panImpute} and \code{jomoImpute}. 6 | } 7 | 8 | \usage{ 9 | 10 | mitmlComplete(x, print = "all", force.list = FALSE) 11 | 12 | } 13 | 14 | \arguments{ 15 | 16 | \item{x}{An object of class \code{mitml} as produced by \code{panImpute} or \code{jomoImpute}.} 17 | \item{print}{Either an integer vector, \code{"list"}, or \code{"all"} denoting which data sets to extract. If set to \code{"list"} or \code{"all"}, all imputed data sets will be returned as a list. Negative values and zero return the original (incomplete) data set. Default is \code{"all"}.} 18 | \item{force.list}{(optional) Logical flag indicating if single data sets should be enclosed in a list. Default is \code{FALSE}.} 19 | 20 | } 21 | 22 | \value{ 23 | 24 | Usually a list of imputed data with class \code{mitml.list} 25 | If only one data set is extracted: a data frame unless \code{force.list = TRUE}. 26 | 27 | } 28 | 29 | \author{Simon Grund} 30 | \seealso{\code{\link{panImpute}}, \code{\link{jomoImpute}}} 31 | 32 | \examples{ 33 | data(studentratings) 34 | 35 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 36 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 37 | 38 | # extract original (incomplete) data set 39 | mitmlComplete(imp, print = 0) 40 | 41 | # extract first imputed data set (returned as mitml.list) 42 | mitmlComplete(imp, print = 1, force.list = TRUE) 43 | 44 | # extract all imputed data sets at once 45 | implist <- mitmlComplete(imp, print = "all") 46 | 47 | \dontrun{ 48 | # ... alternatives with same results 49 | implist <- mitmlComplete(imp, print = 1:5) 50 | implist <- mitmlComplete(imp, print = "list") 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /man/multilevelR2.Rd: -------------------------------------------------------------------------------- 1 | \name{multilevelR2} 2 | \alias{multilevelR2} 3 | \title{Calculate R-squared measures for multilevel models} 4 | \description{ 5 | Calculates several measures for the proportion of explained variance in a fitted linear mixed-effects or multilevel model (or a list of fitted models). 6 | } 7 | 8 | \usage{ 9 | 10 | multilevelR2(model, print = c("RB1", "RB2", "SB", "MVP")) 11 | 12 | } 13 | 14 | \arguments{ 15 | 16 | \item{model}{Either a fitted linear mixed-effects model as produced by \code{lme4} or \code{nlme}, or a list of fitted models as produced by \code{with.mitml.list}.} 17 | \item{print}{A character vector denoting which measures should be calculated (see details). Default is to printing all measures.} 18 | 19 | } 20 | 21 | \details{ 22 | 23 | This function calculates several measures of explained variance (\eqn{R^2}) for linear-mixed effects models. 24 | It can be used with a single model, as produced by the packages \code{lme4} or \code{nlme}, or a list of fitted models produced by \code{with.mitml.list}. 25 | In the latter case, the \eqn{R^2} measures are calculated separately for each imputed data set and then averaged across data sets. 26 | 27 | Different \eqn{R^2} measures can be requested using the \code{print} argument. 28 | Specifying \code{RB1} and \code{RB2} returns the explained variance at level 1 and level 2, respectively, according to Raudenbush and Bryk (2002, pp. 74 and 79). 29 | Specifying \code{SB} returns the total variance explained according to Snijders and Bosker (2012, p. 112). 30 | Specifying \code{MVP} returns the total variance explained based on ``multilevel variance partitioning'' as proposed by LaHuis, Hartman, Hakoyama, and Clark (2014). 31 | 32 | } 33 | 34 | \value{ 35 | 36 | A numeric vector containing the \eqn{R^2} measures requested in \code{print}. 37 | 38 | } 39 | 40 | \note{ 41 | 42 | Calculating \eqn{R^2} measures is currently only supported for two-level models with a single cluster variable. 43 | 44 | } 45 | 46 | \author{Simon Grund} 47 | \references{ 48 | LaHuis, D. M., Hartman, M. J., Hakoyama, S., & Clark, P. C. (2014). Explained variance measures for multilevel models. \emph{Organizational Research Methods}, 17, 433-451. 49 | 50 | Raudenbush, S. W., & Bryk, A. S. (2002). Hierarchical linear models: Applications and data analysis methods (2nd ed.). Thousand Oaks, CA: Sage. 51 | 52 | Snijders, T. A. B., & Bosker, R. J. (2012). Multilevel analysis: An introduction to basic and advanced multilevel modeling. Thousand Oaks, CA: Sage. 53 | } 54 | 55 | \examples{ 56 | require(lme4) 57 | data(studentratings) 58 | 59 | fml <- MathAchiev + ReadAchiev + CognAbility ~ 1 + (1|ID) 60 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 61 | 62 | implist <- mitmlComplete(imp) 63 | 64 | fit <- with(implist, lmer(MathAchiev ~ 1 + CognAbility + (1|ID))) 65 | multilevelR2(fit) 66 | } 67 | -------------------------------------------------------------------------------- /man/plot.mitml.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.mitml} 2 | \alias{plot.mitml} 3 | \title{Print diagnostic plots} 4 | \description{ 5 | Generates diagnostic plots for assessing the convergence and autocorrelation behavior of \code{pan}'s and \code{jomo}'s MCMC algorithms. 6 | } 7 | 8 | \usage{ 9 | 10 | \method{plot}{mitml}(x, print = c("beta", "beta2", "psi", "sigma"), pos = NULL, group = "all", 11 | trace = c("imputation", "burnin", "all"), thin = 1, smooth = 3, n.Rhat = 3, 12 | export = c("none", "png", "pdf"), dev.args = list(), ...) 13 | 14 | } 15 | 16 | \arguments{ 17 | 18 | \item{x}{An object of class \code{mitml} as produced by \code{panImpute} or \code{jomoImpute}.} 19 | \item{print}{A character vector containing one or several of \code{"beta"}, \code{"beta2"}, \code{"psi"} or \code{"sigma"} denoting which parameters should be plotted. Default is to plot all parameters.} 20 | \item{pos}{Either \code{NULL} or an integer vector denoting a specific entry in \code{"beta"}, \code{"beta2"}, \code{"psi"} or \code{"sigma"}. Default is \code{NULL}, which plots all entries.} 21 | \item{group}{Either \code{"all"} or an integer denoting for which group the plots should be generated. Used only when groupwise imputation was used. Default is \code{"all"}.} 22 | \item{trace}{One of \code{"imputation"}, \code{"burnin"} or \code{"all"} denoting which part of the chain should be used for the trace plot. Default is \code{"imputation"}, which plots only the iterations after burn-in.} 23 | \item{thin}{An integer denoting the thinning factor that is applied before plotting. Default is \code{1}, which corresponds to no thinning.} 24 | \item{smooth}{A numeric value denoting the smoothing factor for the trend line in trace plots. Higher values correspond to less smoothing. Default is \code{3}. If set to \code{0} or \code{NULL}, the trend line is suppressed.} 25 | \item{n.Rhat}{An integer denoting the number of segments of each chain used for calculating the potential scale reduction factor. Default is \code{3}.} 26 | \item{export}{(optional) A character string specifying if plots should be exported to a file. If \code{"png"} or \code{"pdf"}, then plots are printed into a folder named "mitmlPlots" in the current directory using either the \code{png} or the \code{pdf} device. Default is \code{"none"}, which does not export files.} 27 | \item{dev.args}{(optional) A named list containing additional arguments that are passed to the graphics device.} 28 | \item{\dots}{Parameters passed to the plotting functions.} 29 | 30 | } 31 | 32 | \details{ 33 | 34 | The \code{plot} method generates a series of plots for the parameters of the imputation model which can be used for diagnostic purposes. 35 | In addition, a short summary of the parameter chain is displayed. 36 | 37 | Setting \code{print} to \code{"beta"}, \code{"beta2"}, \code{"psi"} and \code{"sigma"} will plot the fixed effects, the variances and covariances of the random effects, and the variances and covariances of the residuals, respectively. 38 | In this context, \code{"beta2"} refers to the fixed effects for target variables at level 2 and is only used when a two-part model was specified in (\code{\link{jomoImpute}}). 39 | Each plotting window contains a trace plot (upper left), an autocorrelation plot (lower left), a kernel density approximation of the posterior distribution (upper right), and a posterior summary (lower right). 40 | The summary includes the following quantities: 41 | \describe{ 42 | \item{\code{EAP}:}{Expected value a posteriori (i.e., the mean of the parameter chain)} 43 | \item{\code{MAP}:}{Mode a posteriori (i.e., the mode of the parameter chain)} 44 | \item{\code{SD}:}{Standard deviation of the parameter chain} 45 | \item{\code{2.5\%}:}{The 2.5\% quantile of parameter values} 46 | \item{\code{97.5\%}:}{The 97.5\% quantile of parameter values} 47 | \item{\code{Rhat}:}{Estimated potential scale reduction factor (\eqn{\hat{R}})} 48 | \item{\code{ACF-k}:}{Smoothed autocorrelation at lag \eqn{k}, where \eqn{k} is the number of iterations between imputations (see \code{\link{summary.mitml}})} 49 | } 50 | The \code{trace} and \code{smooth} arguments can be used to influence how the trace plot is drawn and what part of the chain should be used for it. 51 | The \code{thin} argument can be used for thinning the chain before plotting, in which case the number of data points is reduced in the trace plot, and the autocorrelation is calculated up to lag \eqn{k/thin} (see above). 52 | The \code{n.Rhat} argument controls the number of segments that are used for calculating the potential scale reduction factor (\eqn{\hat{R}}) in each plot (see \code{summary.mitml}). 53 | Further aguments to the graphics device are supplied using the \code{dev.args} argument. 54 | 55 | The \code{plot} function computes and displays diagnostic information primarily for the imputation phase (i.e., for iterations after burn-in). 56 | This is the default in the \code{plot} function and the recommended method for most users. 57 | If \code{trace = "all"}, the full chain is displayed with emphasis on the imputation phase, and the posterior summary is calculated based on only the iterations after burn-in (as recommended). 58 | If \code{trace = "burnin"}, the posterior summary and the trace plots are calculated based on only the burn-on interations, which is generally not sufficient to establish convergence and should be used with caution. 59 | 60 | } 61 | \note{ 62 | 63 | The plots are presented on-screen one at a time. 64 | To proceed with the next plot, the user may left-click in the plotting window or press the "enter" key while in the R console, depending on the operating system. 65 | No plots are displayed when exporting to file. 66 | 67 | } 68 | 69 | \value{ 70 | 71 | None (invisible \code{NULL}). 72 | 73 | } 74 | 75 | \author{Simon Grund} 76 | \seealso{\code{\link{panImpute}}, \code{\link{jomoImpute}}, \code{\link{summary.mitml}}} 77 | \examples{ 78 | \dontrun{ 79 | data(studentratings) 80 | 81 | # * Example 1: simple imputation 82 | 83 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 84 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 85 | 86 | # plot all parameters (default) 87 | plot(imp) 88 | 89 | # plot fixed effects only 90 | plot(imp, print = "beta") 91 | 92 | # export plots to file (using pdf device) 93 | plot(imp, export = "pdf", dev.args = list(width = 9, height = 4, pointsize = 12)) 94 | 95 | # * Example 2: groupwise imputation 96 | 97 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 98 | imp <- panImpute(studentratings, formula = fml, group = FedState, n.burn = 1000, 99 | n.iter = 100, m = 5) 100 | 101 | # plot fixed effects for all groups (default for 'group') 102 | plot(imp, print = "beta", group = "all") 103 | 104 | # plot fixed effects for first group only 105 | plot(imp, print = "beta", group = 1) 106 | } 107 | } 108 | \keyword{methods} 109 | -------------------------------------------------------------------------------- /man/read.mitml.Rd: -------------------------------------------------------------------------------- 1 | \name{read.mitml} 2 | \alias{read.mitml} 3 | \title{Read \code{mitml} objects from file} 4 | \description{ 5 | This function loads \code{mitml} class objects from R binary formats (similar to \code{?load}), usually produced by \code{write.mitml}. 6 | } 7 | 8 | \usage{ 9 | 10 | read.mitml(filename) 11 | 12 | } 13 | 14 | \arguments{ 15 | 16 | \item{filename}{Name of the file to read, to be specified with file extension (e.g., \code{.R}, \code{.Rdata}).} 17 | 18 | } 19 | 20 | \value{ 21 | 22 | An object of class \code{mitml}. 23 | 24 | } 25 | 26 | \author{Simon Grund} 27 | \seealso{\code{\link{panImpute}}, \code{\link{jomoImpute}}, \code{\link{write.mitml}}} 28 | \examples{ 29 | \dontrun{ 30 | data(studentratings) 31 | 32 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 33 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 34 | 35 | # write 'mitml' object 36 | write.mitml(imp, filename = "imputation.R") 37 | 38 | # read previously saved 'mitml' object 39 | old.imp <- read.mitml("imputation.R") 40 | 41 | class(old.imp) 42 | old.imp 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /man/sort.mitml.list.Rd: -------------------------------------------------------------------------------- 1 | \name{sort.mitml.list} 2 | \alias{sort.mitml.list} 3 | \title{Sort a list of imputed data sets} 4 | \description{ 5 | Sorts a list of multiply imputed data sets according to an R expression. 6 | } 7 | 8 | \usage{ 9 | 10 | \method{sort}{mitml.list}(x, decreasing = FALSE, by, ...) 11 | 12 | } 13 | 14 | \arguments{ 15 | 16 | \item{x}{A list of imputed data sets with class \code{mitml.list} as produced by \code{mitmlComplete} (or similar).} 17 | \item{decreasing}{Logical flag indicating if data sets should be sorted in decreasing (i.e., reversed) order. Default is `\code{FALSE}`.} 18 | \item{by}{An R expression or a list of multiple expressions by which to sort the imputed data sets (see 'Examples').} 19 | \item{\dots}{Further arguments to `\code{order}' (see 'Details').} 20 | 21 | } 22 | 23 | \details{ 24 | 25 | This function sorts a list of imputed data sets according to the R expression given in the \code{by} argument. 26 | The function is based on \code{order} and works in a similar manner. 27 | Note that sorting is performed individually for each data set. 28 | For this reason, the order of cases may differ across data sets if the variables used for sorting contain different values. 29 | 30 | } 31 | 32 | \value{ 33 | 34 | A list of imputed data sets with class \code{mitml.list}. 35 | 36 | } 37 | 38 | \author{Simon Grund} 39 | \examples{ 40 | data(studentratings) 41 | 42 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 43 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 44 | 45 | implist <- mitmlComplete(imp) 46 | 47 | # * Example 1: sort by ID 48 | sort(implist, by = ID) 49 | 50 | # * Example 2: sort by combination of variables 51 | sort(implist, by = list(FedState, ID, -SES)) 52 | } 53 | \keyword{methods} 54 | -------------------------------------------------------------------------------- /man/studentratings.Rd: -------------------------------------------------------------------------------- 1 | \name{studentratings} 2 | \alias{studentratings} 3 | \docType{data} 4 | \title{Example data set on student ratings and achievement} 5 | \description{ 6 | Contains simulated data for students nested within schools, featuring students' ratings of their teachers' behavior (i.e., disciplinary problems in mathematics and reading class) and their general learning environment (school climate) as well as mathematics and reading achievement scores, and scores for socio-economic status and cognitive ability. 7 | 8 | In addition, the data set features the ID of 50 different schools (i.e., clusters), the biological sex of all students, and a broad, additional grouping factor. 9 | Different amounts of missing data have been inserted into the data set in a completely random fashion. 10 | } 11 | \usage{data(studentratings)} 12 | \format{A data frame containing 750 observations on 10 variables.} 13 | \keyword{datasets} 14 | -------------------------------------------------------------------------------- /man/subset.mitml.list.Rd: -------------------------------------------------------------------------------- 1 | \name{subset.mitml.list} 2 | \alias{subset.mitml.list} 3 | \title{Subset a list of imputed data sets} 4 | \description{ 5 | Creates data subsets for a list of multiply imputed data sets. 6 | } 7 | 8 | \usage{ 9 | 10 | \method{subset}{mitml.list}(x, subset, select, ...) 11 | 12 | } 13 | 14 | \arguments{ 15 | 16 | \item{x}{A list of imputed data sets with class \code{mitml.list} as produced by \code{mitmlComplete} (or similar).} 17 | \item{subset}{An R expression by which to subset each data set.} 18 | \item{select}{An R expression by which to select columns.} 19 | \item{\dots}{Not used.} 20 | 21 | } 22 | 23 | \details{ 24 | 25 | This function can be used to create subsets and select variables for a list of multiply imputed data sets according to the R expressions given in the \code{subset} and \code{select} arguments. 26 | The function is based on the \code{subset} function for regular data sets and works in a similar manner. 27 | Note that subsetting is performed individually for each data set. 28 | For this reason, the cases included may differ across data sets if the variables used for subsetting contain different values. 29 | 30 | } 31 | 32 | \value{ 33 | 34 | A list of imputed data sets with class \code{mitml.list}. 35 | 36 | } 37 | 38 | \author{Simon Grund} 39 | \examples{ 40 | data(studentratings) 41 | 42 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 43 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 44 | 45 | implist <- mitmlComplete(imp) 46 | 47 | # * Example 1: subset by SES, select variables by name 48 | subset(implist, SES < 25, select = c(ID, FedState, Sex, SES, ReadAchiev, ReadDis)) 49 | 50 | # * Example 2: subset by FedState, select variables by column number 51 | subset(implist, FedState == "SH", select = -c(6:7, 9:10)) 52 | 53 | \dontrun{ 54 | # * Example 3: subset by ID and Sex 55 | subset(implist, ID %in% 1001:1005 & Sex == "Boy") 56 | 57 | # * Example 4: select variables by name range 58 | subset(implist, select = ID:Sex) 59 | } 60 | } 61 | \keyword{methods} 62 | -------------------------------------------------------------------------------- /man/summary.mitml.Rd: -------------------------------------------------------------------------------- 1 | \name{summary.mitml} 2 | \alias{summary.mitml} 3 | \title{Summary measures for imputation models} 4 | \description{ 5 | Provides summary statistics and additional information on imputations in objects of class \code{mitml}. 6 | } 7 | 8 | \usage{ 9 | 10 | \method{summary}{mitml}(object, n.Rhat = 3, goodness.of.appr = FALSE, autocorrelation = FALSE, ...) 11 | 12 | } 13 | 14 | \arguments{ 15 | 16 | \item{object}{An object of class \code{mitml} as produced by \code{panImpute} or \code{jomoImpute}.} 17 | \item{n.Rhat}{(optional) An integer denoting the number of segments used for calculating the potential scale reduction factor. Default is \code{3}.} 18 | \item{goodness.of.appr}{(optional) A logical flag indicating if the goodness of approximation should be printed. Default is \code{FALSE} (see 'Details').} 19 | \item{autocorrelation}{(optional) A logical flag indicating if the autocorrelation should be printed. Default is \code{FALSE} (see 'Details').} 20 | \item{\dots}{Not used.} 21 | 22 | } 23 | 24 | \details{ 25 | 26 | The \code{summary} method calculates summary statistics for objects of class \code{mitml} as produced by \code{\link{panImpute}} or \code{\link{jomoImpute}}. 27 | The output includes the potential scale reduction factor (PSRF, or \eqn{\hat{R}}) and (optionally) the goodness of approximation and autocorrelation. 28 | 29 | The PSRF is calculated for each parameter of the imputation model and can be used as a convergence diagnostic (Gelman and Rubin, 1992). 30 | Calculation of the PSRFs can be suppressed by setting \code{n.Rhat = NULL}. 31 | The PSRFs are not computed from different chains but by dividing each chain from the imputation phase into a number of segments as denoted by \code{n.Rhat}. 32 | This is slightly different from the original method proposed by Gelman and Rubin. 33 | 34 | The goodness of approximation measure indicates what proportion of the posterior standard deviation is due to simulation error. 35 | This is useful for assessing the accuracy of the posterior summaries (e.g., the EAP). 36 | The autocorrelation includes estimates of the autocorrelation in the chains at lag 1 (i.e., for consecutive draws) and for lags \eqn{k} and \eqn{2k}, where \eqn{k} is the number of iterations between imputations. 37 | For lag \eqn{k} and \eqn{2k}, the autocorrelation is slightly smoothed to reduce the influence of noise on the estimates (see \code{\link{plot.mitml}}). 38 | 39 | } 40 | 41 | \value{ 42 | 43 | An object of class \code{summary.mitml}. 44 | A print method is used for more readable output. 45 | 46 | } 47 | 48 | \references{ 49 | Gelman, A., and Rubin, D. B. (1992). Inference from iterative simulation using multiple sequences. \emph{Statistical Science, 7}, 457-472. 50 | 51 | Hoff, P. D. (2009). \emph{A first course in Bayesian statistical methods}. New York, NY: Springer. 52 | } 53 | 54 | \author{Simon Grund} 55 | \seealso{\code{\link{panImpute}}, \code{\link{jomoImpute}}, \code{\link{plot.mitml}}} 56 | \examples{ 57 | data(studentratings) 58 | 59 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 60 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 61 | 62 | # print summary 63 | summary(imp) 64 | } 65 | \keyword{methods} 66 | -------------------------------------------------------------------------------- /man/testConstraints.Rd: -------------------------------------------------------------------------------- 1 | \name{testConstraints} 2 | \alias{testConstraints} 3 | \title{Test functions and constraints of model parameters} 4 | \description{ 5 | Performs hypothesis tests for arbitrary functions of the model parameters using the Delta method. 6 | } 7 | 8 | \usage{ 9 | 10 | testConstraints(model, qhat, uhat, constraints, method = c("D1", "D2"), 11 | ariv = c("default", "positive"), df.com = NULL) 12 | 13 | } 14 | \arguments{ 15 | 16 | \item{model}{A list of fitted statistical models as produced by \code{\link{with.mitml.list}} or similar.} 17 | \item{qhat}{A matrix or list containing the point estimates of the parameters for each imputed data set (see 'Details').} 18 | \item{uhat}{An array or list containing the variance-covariance matrix of the parameters for each imputed data set (see 'Details').} 19 | \item{constraints}{A character vector specifying constraints or functions of the vector of model parameters to be tested (see 'Details').} 20 | \item{method}{A character string denoting the method by which the test is performed. Can be \code{"D1"} or \code{"D2"} (see 'Details'). Default is \code{"D1"}.} 21 | \item{ariv}{A character string denoting how the ARIV is calculated. Can be \code{"default"} or \code{"positive"} (see 'Details').} 22 | \item{df.com}{(optional) A single number or a numeric vector denoting the complete-data degrees of freedom for the hypothesis test (see 'Details'). Only used if \code{method = "D1"}.} 23 | 24 | } 25 | \details{ 26 | 27 | This function performs tests of arbitrary functions (or constraints) of the model parameters using similar methods as \code{\link{testModels}}. 28 | The function relies on the Delta method (e.g., Casella & Berger, 2002) for testing functions of the parameters and assumes that their sampling distribution is approximately normal. 29 | The parameters can either be extracted automatically from the fitted statistical models (\code{model}) or provided manually as matrices, arrays, or lists (\code{qhat} and \code{uhat}, see 'Examples'). 30 | 31 | Constraints and other functions of the model parameters are specified in the \code{constraints} argument. 32 | The constraints must be supplied as a character vector, where each string denotes a function or a constraint to be tested (see 'Examples'). 33 | 34 | The Wald-like tests that are carried out by \code{testConstraints} are pooled across the imputed data sets with the \eqn{D_1} (Li, Raghunathan & Rubin, 1991) or \eqn{D_2} (Li, Meng, Raghunathan & Rubin, 1991) method, where \eqn{D_1} operates on the constrained point and variance estimates, and \eqn{D_2} operates on the Wald-statistics (for additional details, see \code{testModels}). 35 | The pooled estimates and standard errors reported in the output are always based on \eqn{D_1}. 36 | 37 | For \eqn{D_1}, the complete-data degrees of freedom can be adjusted for smaller samples by specifying \code{df.com} (see \code{testModels}). 38 | 39 | This function supports general statistical models that define \code{coef} and \code{vcov} methods (e.g., \code{lm}, \code{glm}, \code{lavaan} and others) as well as multilevel models estimated with \code{lme4} or \code{nlme} and GEEs estimated with \code{geepack}. 40 | The arguments \code{qhat} and \code{uhat} provide a general method for pooling parameter estimates regardless of model type (see 'Examples'). 41 | Support for further models may be added in future releases. 42 | 43 | The \code{ariv} argument determines how the average relative increase in variance (ARIV) is calculated (see \code{testModels}). 44 | If \code{ariv = "default"}, the default estimators are used. 45 | If \code{ariv = "positive"}, the default estimators are used but constrained to take on strictly positive values. 46 | 47 | } 48 | \value{ 49 | 50 | A list containing the results of the model comparison. 51 | A \code{print} method is used for more readable output. 52 | 53 | } 54 | 55 | \references{ 56 | Casella, G., & Berger, R. L. (2002). \emph{Statistical inference (2nd. Ed.)}. Pacific Grove, CA: Duxbury. 57 | 58 | Li, K.-H., Meng, X.-L., Raghunathan, T. E., & Rubin, D. B. (1991). Significance levels from repeated p-values with multiply-imputed data. \emph{Statistica Sinica, 1}, 65-92. 59 | 60 | Li, K. H., Raghunathan, T. E., & Rubin, D. B. (1991). Large-sample significance levels from multiply imputed data using moment-based statistics and an F reference distribution. \emph{Journal of the American Statistical Association, 86}, 1065-1073. 61 | } 62 | 63 | \author{Simon Grund} 64 | 65 | \seealso{\code{\link{testModels}}, \code{\link{with.mitml.list}}} 66 | \examples{ 67 | data(studentratings) 68 | 69 | fml <- MathDis + ReadDis + SchClimate ~ (1|ID) 70 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 71 | 72 | implist <- mitmlComplete(imp) 73 | 74 | # fit simple regression model 75 | fit.lm <- with(implist, lm(SchClimate ~ ReadDis + MathDis)) 76 | 77 | # apply Rubin's rules 78 | testEstimates(fit.lm) 79 | 80 | # * Example 1: test 'identity' function of two parameters (automatic) 81 | # test equivalent to model comparison with a restricted model (without 'ReadDis' 82 | # and 'MathDis') 83 | 84 | cons <- c("ReadDis", "MathDis") 85 | testConstraints(fit.lm, constraints = cons) 86 | 87 | # ... adjusting for finite samples 88 | testConstraints(fit.lm, constraints = cons, df.com = 749) 89 | 90 | # ... using D2 91 | testConstraints(fit.lm, constraints = cons, method = "D2") 92 | 93 | # * Example 2: test for equality of two parameters 94 | # tests the hypothesis that the coefficients pertaining to 'ReadDis' and 'MathDis' 95 | # are equal (ReadDis = MathDis) 96 | 97 | cons <- c("ReadDis-MathDis") 98 | testConstraints(fit.lm, constraints = cons) 99 | 100 | # * Example 3: test against a fixed value 101 | # tests the hypothesis that the coefficient of "ReadDis" is equal to one 102 | # (i.e., 'ReadDis' - 1 == 0) 103 | 104 | cons <- c("ReadDis-1") 105 | testConstraints(fit.lm, constraints = cons) 106 | 107 | # * Example 4: test 'identity' function of two parameters (qhat, uhat) 108 | 109 | fit.lm <- with(implist, lm(SchClimate ~ ReadDis + MathDis)) 110 | 111 | qhat <- sapply(fit.lm, coef) 112 | uhat <- sapply(fit.lm, function(x) vcov(x), simplify = "array") 113 | 114 | cons <- c("ReadDis", "MathDis") 115 | testConstraints(qhat = qhat, uhat = uhat, constraints = cons) 116 | } 117 | -------------------------------------------------------------------------------- /man/testEstimates.Rd: -------------------------------------------------------------------------------- 1 | \name{testEstimates} 2 | \alias{testEstimates} 3 | \title{Compute final estimates and inferences} 4 | \description{ 5 | Computes final parameter estimates and inferences from multiply imputed data sets. 6 | } 7 | 8 | \usage{ 9 | 10 | testEstimates(model, qhat, uhat, extra.pars = FALSE, df.com = NULL, ...) 11 | 12 | } 13 | \arguments{ 14 | 15 | \item{model}{A list of fitted statistical models as produced by \code{\link{with.mitml.list}} or similar.} 16 | \item{qhat}{A matrix or list containing the point estimates of the parameters for each imputed data set (see 'Details').} 17 | \item{uhat}{(optional) An array, matrix, or list containing the variance estimates (i.e., squared standard errors) of the parameters for each imputed data set (see 'Details').} 18 | \item{extra.pars}{A logical flag indicating if estimates of additional parameters (e.g., variance components) should be calculated. Default is \code{FALSE}.} 19 | \item{df.com}{(optional) A numeric vector denoting the complete-data degrees of freedom for the hypothesis tests (see 'Details').} 20 | \item{\dots}{Not used.} 21 | 22 | } 23 | \details{ 24 | 25 | This function calculates pooled parameter estimates and inferences as suggested by Rubin (1987, "Rubin's rules") for each parameter of the fitted model. 26 | The parameters can either be extracted automatically from the fitted statistical models (\code{model}) or provided manually as matrices, arrays, or lists (\code{qhat} and \code{uhat}, see 'Examples'). 27 | 28 | Rubin's original method assumes that the complete-data degrees of freedom are infinite, which is reasonable in larger samples. 29 | Alternatively, the degrees of freedom can be adjusted for smaller samples by specifying \code{df.com} (Barnard & Rubin, 1999). 30 | The \code{df.com} argument can either be a single number if the degrees of freedom are equal for all parameters being tested, or a numeric vector with one element per parameter. 31 | 32 | Using the \code{extra.pars} argument, pooled estimates for additional parameters can be requested (e.g., variance components). 33 | This option is available for a number of models but may not provide estimates for all parameters in all model types. 34 | In such a case, users may extract the estimates of additional parameters by hand and pool them with the \code{qhat} argument (see 'Examples'). 35 | No inferences are calculated for pooled additional parameters. 36 | 37 | Currently, the procedure supports automatic extraction of model parameters from models that define \code{coef} and \code{vcov} methods (e.g., \code{lm}, \code{glm}, \code{lavaan} and others) as well as multilevel models estimated with \code{lme4} or \code{nlme} and GEEs estimated with \code{geepack}. 38 | The arguments \code{qhat} and \code{uhat} provide a general method for pooling parameter estimates regardless of model type (see 'Examples'). 39 | Support for further models may be added in future releases. 40 | 41 | } 42 | \value{ 43 | 44 | A list containing the pooled parameter and inferences. 45 | A \code{print} method is used for more readable output. 46 | 47 | } 48 | 49 | \references{ 50 | Barnard, J., & Rubin, D. B. (1999). Small-sample degrees of freedom with multiple imputation. \emph{Biometrika, 86}, 948-955. 51 | 52 | Rubin, D. B. (1987). \emph{Multiple imputation for nonresponse in surveys}. Hoboken, NJ: Wiley. 53 | } 54 | 55 | 56 | \author{Simon Grund} 57 | 58 | \seealso{\code{\link{with.mitml.list}}, \code{\link{confint.mitml.testEstimates}}} 59 | \examples{ 60 | data(studentratings) 61 | 62 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 63 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 64 | 65 | implist <- mitmlComplete(imp) 66 | 67 | # fit multilevel model using lme4 68 | require(lme4) 69 | fit.lmer <- with(implist, lmer(SES ~ (1|ID))) 70 | 71 | # * Example 1: pool estimates of fitted models (automatic) 72 | # pooled estimates and inferences separately for each parameter (Rubin's rules) 73 | testEstimates(fit.lmer) 74 | 75 | # ... adjusted df for finite samples 76 | testEstimates(fit.lmer, df.com = 49) 77 | 78 | # ... with additional table for variance components and ICCs 79 | testEstimates(fit.lmer, extra.pars = TRUE) 80 | 81 | # * Example 2: pool estimates using matrices or lists (qhat, uhat) 82 | fit.lmer <- with(implist, lmer(SES ~ ReadAchiev + (1|ID))) 83 | 84 | qhat <- sapply(fit.lmer, fixef) 85 | uhat <- sapply(fit.lmer, function(x) diag(vcov(x))) 86 | 87 | testEstimates(qhat = qhat) 88 | testEstimates(qhat = qhat, uhat = uhat) 89 | } 90 | -------------------------------------------------------------------------------- /man/testModels.Rd: -------------------------------------------------------------------------------- 1 | \name{testModels} 2 | \alias{testModels} 3 | \title{Test multiple parameters and compare nested models} 4 | \description{ 5 | Performs multi-parameter hypothesis tests for a vector of statistical parameters and compares nested statistical models obtained from multiply imputed data sets. 6 | } 7 | 8 | \usage{ 9 | 10 | testModels(model, null.model, method = c("D1", "D2", "D3", "D4"), 11 | use = c("wald", "likelihood"), ariv = c("default", "positive", "robust"), 12 | df.com = NULL, data = NULL) 13 | 14 | } 15 | \arguments{ 16 | 17 | \item{model}{A list of fitted statistical models (``full'' model) as produced by \code{\link{with.mitml.list}} or similar.} 18 | \item{null.model}{A list of fitted statistical models (``restricted'' model) as produced by \code{\link{with.mitml.list}} or similar.} 19 | \item{method}{A character string denoting the method by which the test is performed. Can be \code{"D1"}, \code{"D2"}, \code{"D3"}, or \code{"D4"} (see 'Details'). Default is \code{"D1"}.} 20 | \item{use}{A character string denoting Wald- or likelihood-based based tests. Can be either \code{"wald"} or \code{"likelihood"}. Only used if \code{method = "D2"}.} 21 | \item{ariv}{A character string denoting how the ARIV is calculated. Can be \code{"default"}, \code{"positive"}, or \code{"robust"} (see 'Details').} 22 | \item{df.com}{(optional) A number denoting the complete-data degrees of freedom for the hypothesis test. Only used if \code{method = "D1"}.} 23 | \item{data}{(optional) A list of imputed data sets (see 'Details'). Only used if \code{method = "D4"}} 24 | 25 | } 26 | \details{ 27 | 28 | This function compares two nested statistical models fitted to multiply imputed data sets by pooling Wald-like or likelihood-ratio tests. 29 | 30 | Pooling methods for Wald-like tests of multiple parameters were introduced by Rubin (1987) and further developed by Li, Raghunathan and Rubin (1991). 31 | The pooled Wald test is referred to as \eqn{D_1} and can be used by setting \code{method = "D1"}. 32 | \eqn{D_1} is the multi-parameter equivalent of \code{\link{testEstimates}}, that is, it tests multiple parameters simultaneously. 33 | For \eqn{D_1}, the complete-data degrees of freedom are assumed to be infinite, but they can be adjusted for smaller samples by supplying \code{df.com} (Reiter, 2007). 34 | 35 | An alternative method for Wald-like hypothesis tests was suggested by Li, Meng, Raghunathan and Rubin (1991). 36 | The procedure is called \eqn{D_2} and can be used by setting \code{method = "D2"}. 37 | \eqn{D_2} calculates the Wald-test directly for each data set and then pools the resulting \eqn{\chi^2} values. 38 | The source of these values is specified by the \code{use} argument. 39 | If \code{use = "wald"} (the default), then a Wald test similar to \eqn{D_1} is performed. 40 | If \code{use = "likelihood"}, then the two models are compared with a likelihood-ratio test instead. 41 | 42 | Pooling methods for likelihood-ration tests were suggested by Meng and Rubin (1992). 43 | This procedure is referred to as \eqn{D_3} and can be used by setting \code{method = "D3"}. 44 | \eqn{D_3} compares the two models by pooling the likelihood-ratio test across multiply imputed data sets. 45 | 46 | Finally, an improved method for pooling likelihood-ratio tests was recommended by Chan & Meng (2019). 47 | This method is referred to as \eqn{D_4} and can be used by setting \code{method = "D4"}. 48 | \eqn{D_4} also compares models by pooling the likelihood-ratio test but does so in a more general and efficient manner. 49 | 50 | The function supports different classes of statistical models depending on which \code{method} is chosen. 51 | \eqn{D_1} supports models that define \code{coef} and \code{vcov} methods (or similar) for extracting the parameter estimates and their estimated covariance matrix. 52 | \eqn{D_2} can be used for the same models (if \code{use = "wald"} and models that define a \code{logLik} method (if \code{use = "likelihood"}). 53 | \eqn{D_3} supports linear models, linear mixed-effects models (see Laird, Lange, & Stram, 1987) with an arbitrary cluster structed if estimated with \code{lme4} or a single cluster if estimated by \code{nlme}, and structural equation models estimated with \code{lavaan} (requires ML estimator, see 'Note'). 54 | Finally, \eqn{D_4} supports models that define a \code{logLik} method but can fail if the data to which the model was fitted cannot be found. 55 | In such a case, users can provide the list of imputed data sets directly by specifying the \code{data} argument or refit with the \code{include.data} argument in \code{\link{with.mitml.list}}. 56 | Support for other statistical models may be added in future releases. 57 | 58 | The \eqn{D_4}, \eqn{D_3}, and \eqn{D_2} methods support different estimators of the relative increase in variance (ARIV), which can be specified with the \code{ariv} argument. 59 | If \code{ariv = "default"}, the default estimators are used. 60 | If \code{ariv = "positive"}, the default estimators are used but constrained to take on strictly positive values. 61 | This is useful if the estimated ARIV is negative. 62 | If \code{ariv = "robust"}, which is available only for \eqn{D_4}, the "robust" estimator proposed by Chan & Meng (2019) is used. 63 | This method should be used with caution, because it requires much stronger assumptions and may result in liberal inferences if these assumptions are violated. 64 | 65 | } 66 | \value{ 67 | 68 | A list containing the results of the model comparison. 69 | A \code{print} method is used for more readable output. 70 | 71 | } 72 | 73 | \note{ 74 | 75 | The methods \eqn{D_4}, \eqn{D_3}, and the likelihood-based \eqn{D_2} assume that models were fit using maximum likelihood (ML). 76 | Models fit using REML are automatically refit using ML. 77 | Models fit in \code{'lavaan'} using the MLR estimator or similar techniques that require scaled \eqn{chi^2} difference tests are currently not supported. 78 | 79 | } 80 | 81 | \references{ 82 | Chan, K. W., & Meng, X.-L. (2019). Multiple improvements of multiple imputation likelihood ratio tests. ArXiv:1711.08822 [Math, Stat]. \url{https://arxiv.org/abs/1711.08822} 83 | 84 | Laird, N., Lange, N., & Stram, D. (1987). Maximum likelihood computations with repeated measures: Application of the em algorithm. \emph{Journal of the American Statistical Association, 82}, 97-105. 85 | 86 | Li, K.-H., Meng, X.-L., Raghunathan, T. E., & Rubin, D. B. (1991). Significance levels from repeated p-values with multiply-imputed data. \emph{Statistica Sinica, 1}, 65-92. 87 | 88 | Li, K. H., Raghunathan, T. E., & Rubin, D. B. (1991). Large-sample significance levels from multiply imputed data using moment-based statistics and an F reference distribution. \emph{Journal of the American Statistical Association, 86}, 1065-1073. 89 | 90 | Meng, X.-L., & Rubin, D. B. (1992). Performing likelihood ratio tests with multiply-imputed data sets. \emph{Biometrika, 79}, 103-111. 91 | 92 | Reiter, J. P. (2007). Small-sample degrees of freedom for multi-component significance tests with multiple imputation for missing data. \emph{Biometrika, 94}, 502-508. 93 | 94 | Rubin, D. B. (1987). \emph{Multiple imputation for nonresponse in surveys}. Hoboken, NJ: Wiley. 95 | } 96 | 97 | \author{Simon Grund} 98 | \seealso{\code{\link{testEstimates}}, \code{\link{testConstraints}}, \code{\link{with.mitml.list}}, \code{\link{anova.mitml.result}}} 99 | \examples{ 100 | data(studentratings) 101 | 102 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 103 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 104 | 105 | implist <- mitmlComplete(imp) 106 | 107 | # * Example 1: multiparameter hypothesis test for 'ReadDis' and 'SES' 108 | # This tests the hypothesis that both effects are zero. 109 | 110 | require(lme4) 111 | fit0 <- with(implist, lmer(ReadAchiev ~ (1|ID), REML = FALSE)) 112 | fit1 <- with(implist, lmer(ReadAchiev ~ ReadDis + (1|ID), REML = FALSE)) 113 | 114 | # apply Rubin's rules 115 | testEstimates(fit1) 116 | 117 | # multiparameter hypothesis test using D1 (default) 118 | testModels(fit1, fit0) 119 | 120 | # ... adjusting for finite samples 121 | testModels(fit1, fit0, df.com = 47) 122 | 123 | # ... using D2 ("wald", using estimates and covariance-matrix) 124 | testModels(fit1, fit0, method = "D2") 125 | 126 | # ... using D2 ("likelihood", using likelihood-ratio test) 127 | testModels(fit1, fit0, method = "D2", use = "likelihood") 128 | 129 | # ... using D3 (likelihood-ratio test, requires ML fit) 130 | testModels(fit1, fit0, method = "D3") 131 | 132 | # ... using D4 (likelihood-ratio test, requires ML fit) 133 | testModels(fit1, fit0, method = "D4") 134 | } 135 | -------------------------------------------------------------------------------- /man/with.mitml.list.Rd: -------------------------------------------------------------------------------- 1 | \name{with.mitml.list} 2 | \alias{with.mitml.list} 3 | \alias{within.mitml.list} 4 | \title{Evaluate an expression in a list of imputed data sets} 5 | \description{ 6 | The \code{with} and \code{within} methods evaluate R expressions in a list of multiply imputed data sets. 7 | } 8 | 9 | \usage{ 10 | 11 | \method{with}{mitml.list}(data, expr, include.data = FALSE, ...) 12 | \method{within}{mitml.list}(data, expr, ignore = NULL, ...) 13 | 14 | } 15 | 16 | \arguments{ 17 | 18 | \item{data}{A list of imputed data sets with class \code{mitml.list} as produced by \code{mitmlComplete} or \code{as.mitml.list}.} 19 | \item{expr}{An R expression to be evaluated for each data set.} 20 | \item{include.data}{Either a logical flag or a character string denoting how the data are included when \code{expr} is evaluated (see 'Details'). If \code{FALSE}, an environment is created from \code{data}, and \code{expr} is evaluated therein. If \code{TRUE}, a call is constructed from \code{expr} and evaluated with the imputed data in the \code{"data"} slot. If character, a call is constructed from \code{expr} and evaluated with the imputed data in the slot named by \code{include.data}. Default is \code{FALSE}.} 21 | \item{ignore}{A character vector naming objects that are created but should not be saved (see 'Details').} 22 | \item{\dots}{Not used.} 23 | 24 | } 25 | 26 | \details{ 27 | 28 | The two functions provide \code{with} and \code{within} methods for objects of class \code{mitml.list}. 29 | They evaluate an R expression repeatedly for each of the imputed data sets but return different values: \code{with} returns the result of the evaluated expression; \code{within} returns the resulting data sets. 30 | 31 | The \code{within} function is useful for transforming and computing variables in the imputed data (e.g., centering, calculating cluster means, etc.). 32 | The \code{with} function is useful, for example, for fitting statistical models. 33 | The list of fitted models can be analyzed using \code{\link{testEstimates}}, \code{\link{testModels}}, \code{\link{testConstraints}}, or \code{\link[=anova.mitml.result]{anova}}. 34 | 35 | The \code{include.data} argument can be used to include the imputed data sets in the call to fit statistical models (\code{expr}) using \code{with}. 36 | This is useful for fitting models that require that the fitting function be called with a proper \code{data} argument (e.g., \code{lavaan} or \code{nlme}; see 'Examples'). 37 | Setting \code{include.data = TRUE} will fit the model with the imputed data sets used as the \code{data} argument. 38 | Setting \code{include.data = "df"} (or similar) will fit the model with the imputed data sets as the \code{df} argument (useful if the function refers to the data by a nonstandard name, such as \code{"df"}). 39 | 40 | The \code{ignore} argument can be used to declare objects that are not to be saved in the data sets created by \code{within}. 41 | 42 | } 43 | 44 | \value{ 45 | 46 | \code{with}: A list of class \code{mitml.results} containing the evaluated expression for each data set. 47 | 48 | \code{within}: A list of class \code{mitml.list} containing the imputed data modified by the evaluated expression. 49 | 50 | } 51 | 52 | \author{Simon Grund} 53 | \seealso{\code{\link{mitmlComplete}}, \code{\link{anova.mitml.result}}, \code{\link{testEstimates}}, \code{\link{testModels}}, \code{\link{testConstraints}}} 54 | \examples{ 55 | data(studentratings) 56 | 57 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 58 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 59 | 60 | implist <- mitmlComplete(imp) 61 | 62 | # * Example 1: data transformation 63 | 64 | # calculate and save cluster means 65 | new1.implist <- within(implist, Means.ReadAchiev <- clusterMeans(ReadAchiev, ID)) 66 | 67 | # center variables, calculate interaction terms, ignore byproducts 68 | new2.implist <- within(implist, { 69 | M.SES <- mean(SES) 70 | M.CognAbility <- mean(CognAbility) 71 | C.SES <- SES - M.SES 72 | C.CognAbility <- CognAbility - M.CognAbility 73 | SES.CognAbility <- C.SES * C.CognAbility 74 | }, ignore = c("M.SES", "M.CognAbility")) 75 | 76 | # * Example 2: fitting statistical models 77 | 78 | # fit regression model 79 | fit.lm <- with(implist, lm(ReadAchiev ~ ReadDis)) 80 | 81 | # fit multilevel model with lme4 82 | require(lme4) 83 | fit.lmer <- with(implist, lmer(ReadAchiev ~ ReadDis + (1|ID))) 84 | 85 | \dontrun{ 86 | # fit structural equation model with lavaan (with include.data = TRUE) 87 | require(lavaan) 88 | mod <- "ReadAchiev ~ ReadDis" 89 | fit.sem <- with(implist, 90 | sem(model = mod, cluster = "ID", estimator = "MLR"), 91 | include.data = TRUE) 92 | } 93 | } 94 | \keyword{methods} 95 | -------------------------------------------------------------------------------- /man/write.mitml.Rd: -------------------------------------------------------------------------------- 1 | \name{write.mitml} 2 | \alias{write.mitml} 3 | \title{Write \code{mitml} objects to file} 4 | \description{ 5 | Saves objects of class \code{mitml} in R binary formats (similar to \code{?save}). 6 | } 7 | 8 | \usage{ 9 | 10 | write.mitml(x, filename, drop = FALSE) 11 | 12 | } 13 | 14 | \arguments{ 15 | 16 | \item{x}{An object of class \code{mitml} as produced by \code{panImpute} or \code{jomoImpute}.} 17 | \item{filename}{Name of the destination file, specified with file extension (e.g., \code{.R}, \code{.Rdata}).} 18 | \item{drop}{Logical flag indicating if the parameters of the imputation model should be dropped to reduce file size. Default is \code{FALSE}.} 19 | 20 | } 21 | 22 | \value{ 23 | 24 | None (invisible \code{NULL}). 25 | 26 | } 27 | 28 | \author{Simon Grund} 29 | \seealso{\code{\link{panImpute}}, \code{\link{jomoImpute}}, \code{\link{read.mitml}}} 30 | \examples{ 31 | \dontrun{ 32 | data(studentratings) 33 | 34 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 35 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 36 | 37 | # write full 'mitml' object (default) 38 | write.mitml(imp, filename = "imputation.Rdata") 39 | 40 | # drop parameters of the imputation model 41 | write.mitml(imp, filename = "imputation.Rdata", drop = TRUE) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /man/write.mitmlMplus.Rd: -------------------------------------------------------------------------------- 1 | \name{write.mitmlMplus} 2 | \alias{write.mitmlMplus} 3 | \title{Write \code{mitml} objects to Mplus format} 4 | \description{ 5 | Saves objects of class \code{mitml} as a series of text files which can be processed by the statistical software M\emph{plus} (Muthen & Muthen, 2012). 6 | } 7 | 8 | \usage{ 9 | 10 | write.mitmlMplus(x, filename, suffix = "list", sep = "\t", dec = ".", 11 | na.value = -999) 12 | 13 | } 14 | 15 | \arguments{ 16 | 17 | \item{x}{An object of class \code{mitml} or \code{mitml.list} as produced by \code{panImpute}, \code{jomoImpute}, \code{mitmlComplete}, or similar).} 18 | \item{filename}{File base name for the text files containing the imputed data sets, specified without file extension.} 19 | \item{suffix}{File name suffix for the index file.} 20 | \item{sep}{The field separator.} 21 | \item{dec}{The decimal separator.} 22 | \item{na.value}{A numeric value coding the missing data in the resulting data files.} 23 | 24 | } 25 | 26 | \details{ 27 | 28 | The M\emph{plus} format for multiply imputed data sets comprises a set of text files, each containing one imputed data set, and an index file containing the names of all data files. 29 | During export, factors and character variables are converted to numeric. 30 | To make this more transparent, \code{write.mitmlMplus} produces a log file which contains information about the data set and the factors that have been converted. 31 | 32 | In addition, a basic M\emph{plus} input file is generated that can be used for setting up subsequent analysis models. 33 | 34 | } 35 | 36 | \value{ 37 | 38 | None (invisible \code{NULL}). 39 | 40 | } 41 | 42 | \references{ 43 | Muthen, L. K., & Muthen, B. O. (2012). \emph{Mplus User's Guide. Seventh Edition.} Los Angeles, CA: Muthen & Muthen. 44 | } 45 | 46 | \author{Simon Grund} 47 | \seealso{\code{\link{panImpute}}, \code{\link{jomoImpute}}, \code{\link{mitmlComplete}}} 48 | \examples{ 49 | \dontrun{ 50 | data(studentratings) 51 | 52 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 53 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 54 | 55 | # write imputation files, index file, and log file 56 | write.mitmlMplus(imp, filename = "imputation", suffix = "list", na.value = -999) 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /man/write.mitmlSAV.Rd: -------------------------------------------------------------------------------- 1 | \name{write.mitmlSAV} 2 | \alias{write.mitmlSAV} 3 | \title{Write \code{mitml} objects to native SPSS format} 4 | \description{ 5 | Saves objects of class \code{mitml} in the \code{.sav} format used by the statistical software SPSS (IBM Corp., 2013). 6 | The function serves as a front-end for \code{write_sav} from the \code{haven} package. 7 | } 8 | 9 | \usage{ 10 | 11 | write.mitmlSAV(x, filename) 12 | 13 | } 14 | 15 | \arguments{ 16 | 17 | \item{x}{An object of class \code{mitml} or \code{mitml.list} as produced by \code{panImpute}, \code{jomoImpute}, \code{mitmlComplete}, or similar).} 18 | \item{filename}{Name of the destination file. The file extension (\code{.sav}) is appended if needed.} 19 | 20 | } 21 | 22 | \details{ 23 | 24 | This function exports multiply imputed data sets to a single \code{.sav} file, in which an \code{Imputation_} variable separates the original data and the various imputed data sets. 25 | This allows exporting imputed data directly to the native SPSS format. 26 | 27 | Alternatively, \code{\link{write.mitmlSPSS}} may be used for creating separate text and SPSS syntax files, which offers more control over the data format. 28 | 29 | } 30 | 31 | \value{ 32 | 33 | None (invisible \code{NULL}). 34 | 35 | } 36 | 37 | \references{ 38 | IBM Corp. (2013). \emph{IBM SPSS Statistics for Windows, Version 22.0}. Armonk, NY: IBM Corp 39 | } 40 | 41 | \author{Simon Grund} 42 | \seealso{\code{\link{panImpute}}, \code{\link{jomoImpute}}, \code{\link{mitmlComplete}}, \code{\link{write.mitmlSPSS}}} 43 | \examples{ 44 | \dontrun{ 45 | data(studentratings) 46 | 47 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 48 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 49 | 50 | # write data file and SPSS syntax 51 | write.mitmlSAV(imp, filename = "imputation") 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /man/write.mitmlSPSS.Rd: -------------------------------------------------------------------------------- 1 | \name{write.mitmlSPSS} 2 | \alias{write.mitmlSPSS} 3 | \title{Write \code{mitml} objects to SPSS compatible format} 4 | \description{ 5 | Saves objects of class \code{mitml} as a text and a syntax file which can be processed by the statistical software SPSS (IBM Corp., 2013). 6 | } 7 | 8 | \usage{ 9 | 10 | write.mitmlSPSS(x, filename, sep = "\t", dec = ".", na.value = -999, syntax = TRUE, 11 | locale = NULL) 12 | 13 | } 14 | 15 | \arguments{ 16 | 17 | \item{x}{An object of class \code{mitml} or \code{mitml.list} as produced by \code{panImpute}, \code{jomoImpute}, \code{mitmlComplete}, or similar).} 18 | \item{filename}{File base name of the data and syntax files, specified without file extension.} 19 | \item{sep}{The field separator.} 20 | \item{dec}{The decimal separator.} 21 | \item{na.value}{A numeric value coding the missing data in the resulting data file.} 22 | \item{syntax}{A logical flag indicating if an SPSS syntax file should be generated. This file contains instructions for SPSS for reading in the data file. Default is \code{TRUE}.} 23 | \item{locale}{(optional) A character string specifying the localization to be used in SPSS (e.g., \code{"en_US"}, \code{"de_DE"}; see 'Details').} 24 | 25 | } 26 | 27 | \details{ 28 | 29 | In SPSS, multiply imputed data are contained in a single file, in which an \code{Imputation_} variable separates the original data and the various imputed data sets. 30 | During export, factors are converted to numeric, whereas character variables are left ``as is''. 31 | 32 | By default, \code{write.mitmlSPSS} generates a raw text file containing the data, along with a syntax file containing instructions for SPSS. 33 | This syntax file mimics SPSS's functionality to read text files with sensible settings. 34 | In order to read in the data, the syntax file must be opened and executed using SPSS, or open using the GUI. 35 | Manual changes to the syntax file can be required, for example, if the file path of the data file is not correctly represented in the syntax. 36 | The \code{locale} argument can be used to ensure that SPSS reads the data in the correct locale. 37 | 38 | Alternatively, \code{\link{write.mitmlSAV}} may be used for exporting directly to the SPSS native \code{.sav} format. 39 | } 40 | 41 | \value{ 42 | 43 | None (invisible \code{NULL}). 44 | 45 | } 46 | 47 | \references{ 48 | IBM Corp. \emph{IBM SPSS Statistics for Windows}. Armonk, NY: IBM Corp 49 | } 50 | 51 | \author{Simon Grund} 52 | \seealso{\code{\link{panImpute}}, \code{\link{jomoImpute}}, \code{\link{mitmlComplete}}, \code{\link{write.mitmlSAV}}} 53 | \examples{ 54 | \dontrun{ 55 | data(studentratings) 56 | 57 | fml <- ReadDis + SES ~ ReadAchiev + (1|ID) 58 | imp <- panImpute(studentratings, formula = fml, n.burn = 1000, n.iter = 100, m = 5) 59 | 60 | # write data file and SPSS syntax 61 | write.mitmlSPSS(imp, filename = "imputation", sep = "\t", dec = ".", 62 | na.value = -999, locale = "en_US") 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /vignettes/Analysis.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Analysis of Multiply Imputed Data Sets" 3 | output: 4 | rmarkdown::html_vignette: 5 | css: "css/vignette.css" 6 | vignette: > 7 | %\VignetteEngine{knitr::rmarkdown} 8 | %\VignetteIndexEntry{Analysis of Multiply Imputed Data Sets} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ```{r setup, include=FALSE, cache=FALSE} 13 | library(knitr) 14 | set.seed(123) 15 | options(width=87) 16 | opts_chunk$set(background="#ffffff", comment="#", collapse=FALSE, 17 | fig.width=9, fig.height=9, warning=FALSE, 18 | message=FALSE) 19 | ``` 20 | 21 | This vignette is intended to provide an overview of the analysis of multiply imputed data sets with `mitml`. 22 | Specifically, this vignette addresses the following topics: 23 | 24 | 1. Working with multiply imputed data sets 25 | 2. Rubin's rules for pooling individual parameters 26 | 3. Model comparisons 27 | 4. Parameter constraints 28 | 29 | Further information can be found in the other [vignettes](https://github.com/simongrund1/mitml/wiki) and the package [documentation](https://cran.r-project.org/package=mitml/mitml.pdf). 30 | 31 | ## Example data (`studentratings`) 32 | 33 | For the purposes of this vignette, we make use of the `studentratings` data set, which contains simulated data from 750 students in 50 schools including scores on reading and math achievement, socioeconomic status (SES), and ratings on school and classroom environment. 34 | 35 | The package and the data set can be loaded as follows. 36 | 37 | ```{r} 38 | library(mitml) 39 | library(lme4) 40 | data(studentratings) 41 | ``` 42 | 43 | As evident from its `summary`, most variables in the data set contain missing values. 44 | 45 | ```{r} 46 | summary(studentratings) 47 | ``` 48 | 49 | In the present example, we investigate the differences in mathematics achievement that can be attributed to differences in SES when controlling for students' sex. 50 | Specifically, we are interested in the following model. 51 | 52 | $$ 53 | \mathit{MA}_{ij} = \gamma_{00} + \gamma_{10} \mathit{Sex}_{ij} + \gamma_{20} (\mathit{SES}_{ij}-\overline{\mathit{SES}}_{\bullet j}) + \gamma_{01} \overline{\mathit{SES}}_{\bullet j} + u_{0j} + e_{ij} 54 | $$ 55 | 56 | Note that this model also employs group-mean centering to separate the individual and group-level effects of SES. 57 | 58 | ## Generating imputations 59 | 60 | In the present example, we generate 20 imputations from the following imputation model. 61 | 62 | ```{r, results="hide"} 63 | fml <- ReadDis + SES ~ 1 + Sex + (1|ID) 64 | imp <- panImpute(studentratings, formula = fml, n.burn = 5000, n.iter = 200, m = 20) 65 | ``` 66 | 67 | The completed data are then extracted with `mitmlComplete`. 68 | 69 | ```{r} 70 | implist <- mitmlComplete(imp, "all") 71 | ``` 72 | 73 | ## Transforming the imputed data sets 74 | 75 | In empirical research, the raw data rarely enter the analyses but often require to be transformed beforehand. 76 | For this purpose, the `mitml` package provides the `within` function, which applies a given transformation directly to each data set. 77 | 78 | In the following, we use this to (a) calculate the group means of SES and (b) center the individual scores around their group means. 79 | 80 | ```{r} 81 | implist <- within(implist, { 82 | G.SES <- clusterMeans(SES, ID) # calculate group means 83 | I.SES <- SES - G.SES # center around group means 84 | }) 85 | ``` 86 | 87 | This method can be used to apply arbitrary transformations to all of the completed data sets simultaneously. 88 | 89 | > **Note regarding** `dplyr`**:** 90 | > Due to how it is implemented, `within` cannot be used directly with `dplyr`. 91 | > Instead, users may use `with` instead of `within` with the following workaround. 92 | >```{r, eval=FALSE} 93 | >implist <- with(implist,{ 94 | > df <- data.frame(as.list(environment())) 95 | > df <- ... # dplyr commands 96 | > df 97 | >}) 98 | >implist <- as.mitml.list(implist) 99 | >``` 100 | > Advanced users may also consider using `lapply` for a similar workaround.` 101 | 102 | ## Fitting the analysis model 103 | 104 | In order to analyze the imputed data, each data set is analyzed using regular complete-data techniques. 105 | For this purpose, `mitml` offers the `with` function. 106 | In the present example, we use it to fit the model of interest with the R package `lme4`. 107 | 108 | ```{r} 109 | fit <- with(implist, { 110 | lmer(MathAchiev ~ 1 + Sex + I.SES + G.SES + (1|ID)) 111 | }) 112 | ``` 113 | 114 | This results in a list of fitted models, one for each of the imputed data sets. 115 | 116 | ## Pooling 117 | 118 | The results obtained from the imputed data sets must be pooled in order to obtain a set of final parameter estimates and inferences. 119 | In the following, we employ a number of different pooling methods that can be used to address common statistical tasks, for example, for (a) estimating and testing individual parameters, (b) model comparisons, and (c) tests of constraints about one or several parameters. 120 | 121 | #### Parameter estimates 122 | 123 | Individual parameters are commonly pooled with the rules developed by Rubin (1987). 124 | In `mitml`, Rubin's rules are implemented in the `testEstimates` function. 125 | 126 | ```{r} 127 | testEstimates(fit) 128 | ``` 129 | 130 | In addition, the argument `extra.pars = TRUE` can be used to obtain pooled estimates of variance components, and `df.com` can be used to specify the complete-data degrees of freedom, which provides more appropriate (i.e., conservative) inferences in smaller samples. 131 | 132 | For example, using a conservative value for the complete-data degrees of freedom for the fixed effects in the model of interest (Snijders & Bosker, 2012), the output changes as follows. 133 | 134 | ```{r} 135 | testEstimates(fit, extra.pars = TRUE, df.com = 46) 136 | ``` 137 | 138 | #### Multiple parameters and model comparisons 139 | 140 | Oftentimes, statistical inference concerns more than one parameter at a time. 141 | For example, the combined influence of SES (within and between groups) on mathematics achievement is represented by two parameters in the model of interest. 142 | 143 | Multiple pooling methods for Wald and likelihood ratio tests (LRTs) are implemented in the `testModels` function. 144 | This function requires the specification of a full model and a restricted model, which are then compared using (pooled) Wald tests or LRTs. 145 | Specifically, `testModels` allows users to pool Wald tests ($D_1$), $\chi^2$ test statistics ($D_2$), and LRTs ($D_3$ and $D_4$; for a comparison of these methods, see also Grund, Lüdtke, & Robitzsch, 2016b). 146 | 147 | To examine the combined influence of SES on mathematics achievement, the following restricted model can be specified and compared with the model of interest (using $D_1$). 148 | 149 | ```{r} 150 | fit.null <- with(implist, { 151 | lmer(MathAchiev ~ 1 + Sex + (1|ID)) 152 | }) 153 | 154 | testModels(fit, fit.null) 155 | ``` 156 | > **Note regarding the order of arguments:** 157 | > Please note that `testModels` expects that the first argument contains the full model, and the second argument contains the restricted model. 158 | > If the order of the arguments is reversed, the results will not be interpretable. 159 | 160 | Similar to the test for individual parameters, smaller samples can be accommodated with `testModels` (with method $D_1$) by specifying the complete-data degrees of freedom for the denominator of the $F$ statistic. 161 | 162 | ```{r} 163 | testModels(fit, fit.null, df.com = 46) 164 | ``` 165 | 166 | The pooling method used by `testModels` is determined by the `method` argument. 167 | For example, to calculate the pooled LRT corresponding to the Wald test above (i.e., $D_3$), the following command can be issued. 168 | 169 | ```{r} 170 | testModels(fit, fit.null, method="D3") 171 | ``` 172 | 173 | #### Constraints on parameters 174 | 175 | Finally, it is often useful to investigate functions (or constraints) of the parameters in the model of interest. 176 | In complete data sets, this can be achieved with a test of linear hypotheses or the delta method. 177 | The `mitml` package implements a pooled version of the delta method in the `testConstraints` function. 178 | 179 | For example, the combined influence of SES on mathematics achievement can also be tested without model comparisons by testing the constraint that the parameters pertaining to `I.SES` and `G.SES` are both zero. 180 | This constraint is defined and tested as follows. 181 | 182 | ```{r} 183 | c1 <- c("I.SES", "G.SES") 184 | testConstraints(fit, constraints = c1) 185 | ``` 186 | 187 | This test is identical to the Wald test given in the previous section. 188 | Arbitrary constraints on the parameters can be specified and tested in this manner, where each character string denotes an expression to be tested against zero. 189 | 190 | In the present example, we are also interested in the *contextual* effect of SES on mathematics achievement (e.g., Snijders & Bosker, 2012). 191 | The contextual effect is simply the difference between the coefficients pertaining to `G.SES` and `I.SES` and can be tested as follows. 192 | 193 | ```{r} 194 | c2 <- c("G.SES - I.SES") 195 | testConstraints(fit, constraints = c2) 196 | ``` 197 | 198 | Similar to model comparisons, constraints can be tested with different methods ($D_1$ and $D_2$) and can accommodate smaller samples by a value for `df.com`. 199 | Further examples for the analysis of multiply imputed data sets with `mitml` are given by Enders (2016) and Grund, Lüdtke, and Robitzsch (2016a). 200 | 201 | ###### References 202 | 203 | Enders, C. K. (2016). Multiple imputation as a flexible tool for missing data handling in clinical research. *Behaviour Research and Therapy*. doi: 10.1016/j.brat.2016.11.008 ([Link](https://doi.org/10.1016/j.brat.2016.11.008)) 204 | 205 | Grund, S., Lüdtke, O., & Robitzsch, A. (2016a). Multiple imputation of multilevel missing data: An introduction to the R package pan. *SAGE Open*, *6*(4), 1–17. doi: 10.1177/2158244016668220 ([Link](https://doi.org/10.1177/2158244016668220)) 206 | 207 | Grund, S., Lüdtke, O., & Robitzsch, A. (2016b). Pooling ANOVA results from multiply imputed datasets: A simulation study. *Methodology*, *12*, 75–88. doi: 10.1027/1614-2241/a000111 ([Link](https://doi.org/10.1027/1614-2241/a000111)) 208 | 209 | Rubin, D. B. (1987). *Multiple imputation for nonresponse in surveys*. Hoboken, NJ: Wiley. 210 | 211 | Snijders, T. A. B., & Bosker, R. J. (2012). *Multilevel analysis: An introduction to basic and advanced multilevel modeling*. Thousand Oaks, CA: Sage. 212 | 213 | --- 214 | 215 | ```{r, echo=F} 216 | cat("Author: Simon Grund (simon.grund@uni-hamburg.de)\nDate: ", as.character(Sys.Date())) 217 | ``` 218 | 219 | -------------------------------------------------------------------------------- /vignettes/Introduction.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Introduction" 3 | output: 4 | rmarkdown::html_vignette: 5 | css: "css/vignette.css" 6 | vignette: > 7 | %\VignetteEngine{knitr::rmarkdown} 8 | %\VignetteIndexEntry{Introduction} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | --- 13 | 14 | ```{r setup, include=FALSE, cache=FALSE} 15 | library(knitr) 16 | set.seed(123) 17 | options(width=87) 18 | opts_chunk$set(background="#ffffff", comment="#", collapse=FALSE, 19 | fig.width=9, fig.height=9, warning=FALSE, 20 | message=FALSE) 21 | ``` 22 | 23 | This vignette is intended to provide a first introduction to the R package `mitml` for generating and analyzing multiple imputations for multilevel missing data. 24 | A usual application of the package may consist of the following steps. 25 | 26 | 1. Imputation 27 | 2. Assessment of convergence 28 | 3. Completion of the data 29 | 4. Analysis 30 | 5. Pooling 31 | 32 | The `mitml` package offers a set of tools to facilitate each of these steps. 33 | This vignette is intended as a step-by-step illustration of the basic features of `mitml`. 34 | Further information can be found in the other [vignettes](https://github.com/simongrund1/mitml/wiki) and the package [documentation](https://cran.r-project.org/package=mitml/mitml.pdf). 35 | 36 | ## Example data 37 | 38 | For the purposes of this vignette, we employ a simple example that makes use of the `studentratings` data set, which is provided with `mitml`. 39 | To use it, the `mitml` package and the data set must be loaded as follows. 40 | 41 | ```{r} 42 | library(mitml) 43 | data(studentratings) 44 | ``` 45 | 46 | More information about the variables in the data set can be obtained from its `summary`. 47 | 48 | ```{r} 49 | summary(studentratings) 50 | ``` 51 | 52 | In addition, the correlations between variables (based on pairwise observations) may be useful for identifying possible sources of information that may be used during the treatment of missing data. 53 | 54 | ```{r, echo=FALSE} 55 | round(cor(studentratings[,-(1:3)], use="pairwise"),3) 56 | ``` 57 | 58 | This illustrates that (a) most variables in the data set are affected by missing data, but also (b) that substantial relations exist between variables. 59 | For simplicity, we focus on only a subset of these variables. 60 | 61 | ## Model of interest 62 | 63 | For the present example, we focus on the two variables `ReadDis` (disciplinary problems in reading class) and `ReadAchiev` (reading achievement). 64 | 65 | Assume we are interested in the relation between these variables. 66 | Specifically, we may be interested in the following analysis model 67 | 68 | $$ 69 | \mathit{ReadAchiev}_{ij} = \gamma_{00} + \gamma_{10} \mathit{ReadDis}_{ij} + u_{0j} + e_{ij} 70 | $$ 71 | 72 | On the basis of the syntax used in the R package `lme4`, this model may be written as follows. 73 | 74 | ```{r, results="hide"} 75 | ReadAchiev ~ 1 + ReadDis + (1|ID) 76 | ``` 77 | 78 | In this model, the relation between `ReadDis` and `ReadAchiev` is represented by a single fixed effect of `ReadDis`, and a random intercept is included to account for the clustered structure of the data and the group-level variance in `ReadAchiev` that is not explained by `ReadDis`. 79 | 80 | ## Generating imputations 81 | 82 | The `mitml` package includes wrapper functions for the R packages `pan` (`panImpute`) and `jomo` (`jomoImpute`). 83 | Here, we will use the first option. 84 | To generate imputations with `panImpute`, the user must specify (at least): 85 | 86 | 1. an imputation model 87 | 2. the number of iterations and imputations 88 | 89 | The easiest way of specifying the imputation model is to use the `formula` argument of `panImpute`. 90 | Generally speaking, the imputation model should include all variables that are either (a) part of the model of interest, (b) related to the variables in the model, or (c) related to whether the variables in the model are missing. 91 | 92 | In this simple example, we include only `ReadDis` and `ReadAchiev` as the main target variables and `SchClimate` as an auxiliary variable. 93 | 94 | ```{r} 95 | fml <- ReadAchiev + ReadDis + SchClimate ~ 1 + (1|ID) 96 | ``` 97 | 98 | Note that, in this specification of the imputation model. all variables are included on the left-hand side of the model, whereas the right-hand side is left "empty". 99 | This model allows for all relations between variables at Level 1 and 2 and is thus suitable for most applications of the multilevel random intercept model (for further discussion, see also Grund, Lüdtke, & Robitzsch, 2016, in press). 100 | 101 | The imputation procedure is then run for 5,000 iterations (burn-in), after which 100 imputations are drawn every 100 iterations. 102 | 103 | ```{r, results="hide"} 104 | imp <- panImpute(studentratings, formula = fml, n.burn = 5000, n.iter = 100, m = 100) 105 | ``` 106 | 107 | This step may take a few seconds. 108 | Once the process is completed, the imputations are saved in the `imp` object. 109 | 110 | ## Assessing convergence 111 | 112 | In `mitml`, there are two options for assessing the convergence of the imputation procedure. 113 | First, the `summary` calculates the "potential scale reduction factor" ($\hat{R}$) for each parameter in the imputation model. 114 | If this value is noticeably larger than 1 for some parameters (say $>1.05$), a longer burn-in period may be required. 115 | 116 | ```{r} 117 | summary(imp) 118 | ``` 119 | 120 | Second, diagnostic plots can be requested with the `plot` function. 121 | These plots consist of a trace plot, an autocorrelation plot, and some additional information about the posterior distribution. 122 | Convergence can be assumed if the trace plot is stationary (i.e., does not "drift"), and the autocorrelation is within reasonable bounds for the chosen number of iterations between imputations. 123 | 124 | For this example, we examine only the plot for the parameter `Beta[1,2]` (i.e., the intercept of `ReadDis`). 125 | 126 | ```{r conv, echo=FALSE} 127 | plot(imp, trace = "all", print = "beta", pos = c(1,2), export = "png", 128 | dev.args = list(width=720, height=380, pointsize=16)) 129 | ``` 130 | ```{r, eval=FALSE} 131 | plot(imp, trace = "all", print = "beta", pos = c(1,2)) 132 | ``` 133 | 134 | ![](mitmlPlots/BETA_ReadDis_ON_Intercept.png) 135 | 136 | Taken together, both $\hat{R}$ and the diagnostic plots indicate that the imputation model converged, setting the basis for the analysis of the imputed data sets. 137 | 138 | ## Completing the data 139 | 140 | In order to work with and analyze the imputed data sets, the data sets must be completed with the imputations generated in the previous steps. 141 | To do so, `mitml` provides the function `mitmlComplete`. 142 | 143 | ```{r} 144 | implist <- mitmlComplete(imp, "all") 145 | ``` 146 | 147 | This resulting object is a list that contains the 100 completed data sets. 148 | 149 | 150 | ## Analysis and pooling 151 | 152 | In order to obtain estimates for the model of interest, the model must be fit separately to each of the completed data sets, and the results must be pooled into a final set of estimates and inferences. 153 | The `mitml` package offers the `with` function to fit various statistical models to a list of completed data sets. 154 | 155 | In this example, we use the `lmer` function from the R package `lme4` to fit the model of interest. 156 | 157 | ```{r, message=FALSE} 158 | library(lme4) 159 | fit <- with(implist, lmer(ReadAchiev ~ 1 + ReadDis + (1|ID))) 160 | ``` 161 | 162 | The resulting object is a list containing the 100 fitted models. 163 | To pool the results of these models into a set of final estimates and inferences, `mitml` offers the `testEstimates` function. 164 | 165 | ```{r} 166 | testEstimates(fit, extra.pars = TRUE) 167 | ``` 168 | 169 | The estimates can be interpreted in a manner similar to the estimates from the corresponding complete-data procedure. 170 | In addition, the output includes diagnostic quantities such as the fraction of missing information (FMI), which can be helpful for interpreting the results and understanding problems with the imputation procedure. 171 | 172 | ###### References 173 | 174 | Grund, S., Lüdtke, O., & Robitzsch, A. (2016). Multiple imputation of multilevel missing data: An introduction to the R package pan. *SAGE Open*, *6*(4), 1–17. doi: 10.1177/2158244016668220 ([Link](https://doi.org/10.1177/2158244016668220)) 175 | 176 | Grund, S., Lüdtke, O., & Robitzsch, A. (2018). Multiple imputation of missing data for multilevel models: Simulations and recommendations. *Organizational Research Methods*, *21*(1), 111–149. doi: 10.1177/1094428117703686 ([Link](https://doi.org/10.1177/1094428117703686)) 177 | 178 | --- 179 | 180 | ```{r, echo=F} 181 | cat("Author: Simon Grund (simon.grund@uni-hamburg.de)\nDate: ", as.character(Sys.Date())) 182 | ``` 183 | 184 | -------------------------------------------------------------------------------- /vignettes/Level2.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Imputation of Missing Data at Level 2" 3 | output: 4 | rmarkdown::html_vignette: 5 | css: "css/vignette.css" 6 | vignette: > 7 | %\VignetteEngine{knitr::rmarkdown} 8 | %\VignetteIndexEntry{Imputation of Missing Data at Level 2} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | --- 13 | 14 | ```{r setup, include=FALSE, cache=FALSE} 15 | library(knitr) 16 | set.seed(123) 17 | options(width=87) 18 | opts_chunk$set(background="#ffffff", comment="#", collapse=FALSE, 19 | fig.width=9, fig.height=9, warning=FALSE, 20 | message=FALSE) 21 | ``` 22 | 23 | This vignette illustrates the use of `mitml` for the treatment of missing data at Level 2. 24 | Specifically, the vignette addresses the following topics: 25 | 26 | 1. Specification of the two-level imputation model for missing data at both Level 1 and 2 27 | 2. Running the imputation procedure 28 | 29 | Further information can be found in the other [vignettes](https://github.com/simongrund1/mitml/wiki) and the package [documentation](https://cran.r-project.org/package=mitml/mitml.pdf). 30 | 31 | ## Example data 32 | 33 | For purposes of this vignette, we make use of the `leadership` data set, which contains simulated data from 750 employees in 50 groups including ratings on job satisfaction, leadership style, and work load (Level 1) as well as cohesion (Level 2). 34 | 35 | The package and the data set can be loaded as follows. 36 | 37 | ```{r} 38 | library(mitml) 39 | data(leadership) 40 | ``` 41 | 42 | In the `summary` of the data, it becomes visible that all variables are affected by missing data. 43 | 44 | ```{r} 45 | summary(leadership) 46 | ``` 47 | 48 | The following data segment illustrates this fact, including cases with missing data at Level 1 (e.g., job satisfaction) and 2 (e.g., cohesion). 49 | 50 | ```{r, echo=FALSE} 51 | leadership[73:78,] 52 | ``` 53 | 54 | In the following, we will employ a two-level model to address missing data at both levels simultaneously. 55 | 56 | ## Specifying the imputation model 57 | 58 | The specification of the two-level model, involves two components, one pertaining to the variables at each level of the sample (Goldstein, Carpenter, Kenward, & Levin, 2009; for further discussion, see also Enders, Mister, & Keller, 2016; Grund, Lüdtke, & Robitzsch, in press). 59 | 60 | Specifically, the imputation model is specified as a list with two components, where the first component denotes the model for the variables at Level 1, and the second component denotes the model for the variables at Level 2. 61 | 62 | For example, using the `formula` interface, an imputation model targeting all variables in the data set can be written as follows. 63 | 64 | ```{r} 65 | fml <- list( JOBSAT + NEGLEAD + WLOAD ~ 1 + (1|GRPID) , # Level 1 66 | COHES ~ 1 ) # Level 2 67 | ``` 68 | 69 | The first component of this list includes the three target variables at Level 1 and a fixed (`1`) as well as a random intercept (`1|GRPID`). 70 | The second component includes the target variable at Level 2 with a fixed intercept (`1`). 71 | 72 | From a statistical point of view, this specification corresponds to the following model 73 | $$ 74 | \begin{aligned} 75 | \mathbf{y}_{1ij} &= \boldsymbol\mu_{1} + \mathbf{u}_{1j} + \mathbf{e}_{ij} \\ 76 | \mathbf{y}_{2j} &= \boldsymbol\mu_{2} + \mathbf{u}_{1j} \; , 77 | \end{aligned} 78 | $$ 79 | where $\mathbf{y}_{1ij}$ denotes the target variables at Level 1, $\mathbf{y}_{2j}$ the target variables at Level 2, and the right-hand side of the model contains the fixed effects, random effects, and residual terms as mentioned above. 80 | 81 | Note that, even though the two components of the model appear to be separated, they define a single (joint) model for all target variables at both Level 1 and 2. 82 | Specifically, this model employs a two-level covariance structure, which allows for relations between variables at both Level 1 (i.e., correlated residuals at Level 1) and 2 (i.e., correlated random effects residuals at Level 2). 83 | 84 | ## Generating imputations 85 | 86 | Because the data contain missing values at both levels, imputations will be generated with `jomoImpute` (and not `panImpute`). 87 | Except for the specification of the two-level model, the syntax is the same as in applications with missing data only at Level 1. 88 | 89 | Here, we will run 5,000 burn-in iterations and generate 20 imputations, each 250 iterations apart. 90 | 91 | ```{r, results="hide"} 92 | imp <- jomoImpute(leadership, formula = fml, n.burn = 5000, n.iter = 250, m = 20) 93 | ``` 94 | 95 | By looking at the `summary`, we can then review the imputation procedure and verify that the imputation model converged. 96 | 97 | ```{r} 98 | summary(imp) 99 | ``` 100 | 101 | Due to the greater complexity of the two-level model, the output includes more information than in applications with missing data only at Level 1. 102 | For example, the output features the model specification for variables at both Level 1 and 2. 103 | Furthermore, it provides convergence statistics for the additional regression coefficients for the target variables at Level 2 (i.e., `Beta2`). 104 | 105 | Finally, it also becomes visible that the two-level model indeed allows for relations between target variables at Level 1 and 2. 106 | This can be seen from the fact that the potential scale reduction factor ($\hat{R}$) for the covariance matrix at Level 2 (`Psi`) was largest for `Psi[4,3]`, which is the covariance between cohesion and the random intercept of work load. 107 | 108 | ## Completing the data 109 | 110 | The completed data sets can then be extracted with `mitmlComplete`. 111 | 112 | ```{r} 113 | implist <- mitmlComplete(imp, "all") 114 | ``` 115 | 116 | When inspecting the completed data, it is easy to verify that the imputations for variables at Level 2 are constant within groups as intended, thus preserving the two-level structure of the data. 117 | 118 | ```{r, echo=FALSE} 119 | implist[[1]][73:78,] 120 | ``` 121 | 122 | ###### References 123 | 124 | Enders, C. K., Mistler, S. A., & Keller, B. T. (2016). Multilevel multiple imputation: A review and evaluation of joint modeling and chained equations imputation. *Psychological Methods*, *21*, 222–240. doi: 10.1037/met0000063 ([Link](https://doi.org/10.1037/met0000063)) 125 | 126 | Goldstein, H., Carpenter, J. R., Kenward, M. G., & Levin, K. A. (2009). Multilevel models with multivariate mixed response types. *Statistical Modelling*, *9*, 173–197. doi: 10.1177/1471082X0800900301 ([Link](https://doi.org/10.1177/1471082X0800900301)) 127 | 128 | Grund, S., Lüdtke, O., & Robitzsch, A. (2018). Multiple imputation of missing data for multilevel models: Simulations and recommendations. *Organizational Research Methods*, *21*(1), 111–149. doi: 10.1177/1094428117703686 ([Link](https://doi.org/10.1177/1094428117703686)) 129 | 130 | --- 131 | 132 | ```{r, echo=F} 133 | cat("Author: Simon Grund (simon.grund@uni-hamburg.de)\nDate: ", as.character(Sys.Date())) 134 | ``` 135 | 136 | -------------------------------------------------------------------------------- /vignettes/css/vignette.css: -------------------------------------------------------------------------------- 1 | body { 2 | background-color: #fff; 3 | max-width: 720px; 4 | overflow: visible; 5 | padding-left: 2em; 6 | padding-right: 2em; 7 | font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; 8 | font-size: 16px; 9 | font-weight: 500; 10 | line-height: 1.65; 11 | text-align: justify; 12 | text-justify: inter-word; 13 | margin: 2em auto; 14 | } 15 | 16 | #header { 17 | text-align: center; 18 | } 19 | 20 | #TOC { 21 | clear: both; 22 | margin: 0 0 10px 10px; 23 | padding: 4px; 24 | width: 400px; 25 | border: 1px solid #CCCCCC; 26 | border-radius: 5px; 27 | 28 | background-color: #f6f6f6; 29 | font-size: 16px; 30 | line-height: 1.5; 31 | text-align: left; 32 | } 33 | #TOC .toctitle { 34 | font-weight: bold; 35 | font-size: 18px; 36 | margin-left: 5px; 37 | } 38 | 39 | #TOC ul { 40 | padding-left: 40px; 41 | margin-left: -1.5em; 42 | margin-top: 5px; 43 | margin-bottom: 5px; 44 | } 45 | #TOC ul ul { 46 | margin-left: -2em; 47 | } 48 | #TOC li { 49 | line-height: 16px; 50 | } 51 | 52 | p { 53 | margin: 0.6em 0; 54 | } 55 | 56 | blockquote { 57 | border-left:3px dotted #e5e5e5; 58 | background-color: #fff; 59 | padding: 0 1em; 60 | margin: 0.9em 0; 61 | } 62 | 63 | 64 | hr { 65 | border-style: solid; 66 | border: none; 67 | border-top: 1px solid #777; 68 | margin: 28px 0; 69 | } 70 | 71 | dl { 72 | margin-left: 0; 73 | } 74 | dl dd { 75 | margin-bottom: 13px; 76 | margin-left: 13px; 77 | } 78 | dl dt { 79 | font-weight: bold; 80 | } 81 | 82 | ul, ol { 83 | text-align: left; 84 | } 85 | ul { 86 | margin-top: 0; 87 | } 88 | ul li { 89 | list-style: circle outside; 90 | } 91 | ul ul { 92 | margin-bottom: 0; 93 | } 94 | 95 | pre, code { 96 | background-color: #f7f7f7; 97 | line-height: 1.2; 98 | border-radius: 3px; 99 | color: #333; 100 | padding: 0px; 101 | white-space: pre; /* or: pre-wrap */ 102 | overflow-x: auto; 103 | } 104 | pre { 105 | border-radius: 3px; 106 | margin: 5px 0px 10px 0px; 107 | padding: 10px; 108 | } 109 | 110 | code { 111 | font-family: Consolas, monospace; 112 | font-size: 85%; 113 | } 114 | p > code, li > code { 115 | padding: 2px 2px; 116 | } 117 | 118 | h1, h2, h3, h4, h5, h6 { 119 | text-align: left; 120 | line-height: 1.2; 121 | } 122 | 123 | h1 { 124 | font-size: 2em; 125 | font-weight: 600; 126 | } 127 | 128 | h2 { 129 | color: #191919; 130 | font-size: 1.5em; 131 | font-weight: 600; 132 | } 133 | 134 | h3, h4, h5 { 135 | color: #292929; 136 | font-weight: 600; 137 | } 138 | 139 | /* Reference list */ 140 | h6 { 141 | color:#191919; 142 | font-size: 1.5em; 143 | font-weight: 600; 144 | margin-top: 0.83em; 145 | margin-bottom: 0.83em; 146 | } 147 | h6 ~ p { 148 | text-align: left; 149 | } 150 | 151 | a { 152 | color: #777; 153 | text-decoration: none; 154 | } 155 | a:hover { 156 | color: #aaa; 157 | text-decoration: underline; 158 | } 159 | /* 160 | a:visited { 161 | color: #777; 162 | } 163 | a:visited:hover { 164 | color: #aaa; 165 | text-decoration: underline; 166 | } 167 | */ 168 | 169 | /* tables */ 170 | 171 | table, table th, table td { 172 | border-left-style: none; 173 | border-right-style: none; 174 | } 175 | 176 | table { 177 | margin-top: 25px; 178 | margin-bottom: 25px; 179 | margin-left: auto; 180 | margin-right: auto; 181 | border-collapse: collapse; 182 | border-spacing: 0; 183 | } 184 | 185 | th { 186 | padding:5px 10px; 187 | border: 1px solid #b2b2b2; 188 | } 189 | 190 | td { 191 | padding:5px 10px; 192 | border: 1px solid #e5e5e5; 193 | } 194 | 195 | dt { 196 | color:#444; 197 | font-weight:500; 198 | } 199 | 200 | th { 201 | color:#444; 202 | } 203 | table thead, table tr.even { 204 | background-color: #f7f7f7; 205 | } 206 | 207 | /* images */ 208 | 209 | img { 210 | display: block; 211 | margin-left: auto; 212 | margin-right: auto; 213 | max-width:100%; 214 | } 215 | div.figure { 216 | text-align: center; 217 | } 218 | 219 | /* hovering behavior for images (e.g., play/pause GIFs) */ 220 | 221 | .gif_play, #gif:hover .gif_stop{ 222 | display:none 223 | } 224 | .gif_stop, #gif:hover .gif_play{ 225 | display:block 226 | } 227 | 228 | /* code highlighting */ 229 | 230 | pre code { color: #707070; } /* General Code w/o Class */ 231 | pre code.r { color: #333333; } /* General Code */ 232 | code span.kw { color: #558200; font-weight: normal; } /* Keyword */ 233 | code span.co { color: #707070; font-style: normal; } /* Comment */ 234 | code span.dt { color: #333333; } /* Data Type */ 235 | code span.fu { color: #558200; } /* Function calls */ 236 | code span.dv { color: #007878; } /* Decimal Values */ 237 | code span.bn { color: #007878; } /* Base N */ 238 | code span.fl { color: #007878; } /* Float */ 239 | code span.ch { color: #985b00; } /* Character */ 240 | code span.st { color: #985b00; } /* String */ 241 | code span.ot { color: #007878; } /* Other Token */ 242 | code span.al { color: #a61717; font-weight: bold; } /* Alert Token */ 243 | code span.er { color: #a61717; background-color: #e3d2d2; } /* Error Token */ 244 | 245 | --------------------------------------------------------------------------------