├── .Rbuildignore ├── .github └── workflows │ ├── R-CMD-check.yaml │ └── pkgdown.yaml ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── 00class.R ├── blav_adapt_quad.R ├── blav_compare.R ├── blav_cond_utils.R ├── blav_fit.R ├── blav_fit_measures.R ├── blav_model_loglik.R ├── blav_object_inspect.R ├── blav_object_methods.R ├── blav_predict.R ├── blav_test.R ├── blav_utils.R ├── blavaan.R ├── ctr_bayes_fit.R ├── ctr_ppmc.R ├── dpriors.R ├── jags2r.R ├── lav_export_mcmc.R ├── lav_export_stanclassic.R ├── lav_export_stancond.R ├── lav_export_stanmarg.R ├── lvgqs.R ├── margloglik.R ├── postpred.R ├── set_inits.R ├── set_partable.R ├── set_priors.R ├── set_stancovs.R ├── set_stanpars.R ├── stanmarg_data.R └── zzz.R ├── README.md ├── _pkgdown.yml ├── blavaan.Rproj ├── configure ├── configure.win ├── inst ├── CITATION ├── include │ └── stan_meta_header.hpp ├── stan │ ├── include │ │ └── license.stan │ └── stanmarg.stan ├── stanfuns │ ├── fill_lower.stan │ ├── sem_lv.stan │ ├── sem_lv_missing.stan │ ├── sem_mean.stan │ └── sem_mean_eta.stan ├── testdata │ └── sysdata.rda └── tinytest │ ├── tests.blavaan.R │ └── tests.blavaanobject-methods.R ├── man ├── bcfa.Rd ├── bgrowth.Rd ├── blavCompare.Rd ├── blavFitIndices.Rd ├── blavInspect.Rd ├── blavPredict.Rd ├── blav_internal.Rd ├── blavaan-class.Rd ├── blavaan.Rd ├── bsem.Rd ├── dpriors.Rd ├── plot.blavaan.Rd ├── ppmc.Rd ├── sampleData.Rd └── standardizedPosterior.Rd ├── tests ├── blavaan_examples.R └── tinytest.R └── vignettes ├── approx_fi.Rmd ├── convergence_efficiency.Rmd ├── convergence_loop.Rmd ├── cross_loadings_strong_priors.Rmd ├── estimate.Rmd ├── invariance.Rmd ├── mod_indices.Rmd ├── model_comparison.Rmd ├── multilevel.Rmd ├── ordinal.Rmd ├── plotting.Rmd ├── prior.Rmd ├── prior_pred_checks.Rmd ├── probability_direction.Rmd ├── refs.bib ├── resources.Rmd ├── start.Rmd └── summaries.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^\.travis\.yml$ 2 | ^tests/blavaan_examples\.R$ 3 | ^.*\.Rhistory$ 4 | ^.*\.Rproj$ 5 | ^\.Rproj\.user$ 6 | ^.*\.so$ 7 | ^.*\.o$ 8 | ^.*\.h$ 9 | ^src/stan_files$ 10 | ^\.github$ 11 | ^_pkgdown\.yml$ 12 | ^docs$ 13 | ^pkgdown$ 14 | ^vignettes$ 15 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | 15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | config: 21 | - {os: macOS-latest, r: 'release'} 22 | # - {os: windows-latest, r: 'release'} 23 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 24 | - {os: ubuntu-latest, r: 'release'} 25 | # - {os: ubuntu-latest, r: 'oldrel-1'} 26 | 27 | env: 28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 29 | R_KEEP_PKG_SOURCE: yes 30 | 31 | steps: 32 | - uses: actions/checkout@v3 33 | 34 | - uses: r-lib/actions/setup-pandoc@v2 35 | 36 | - uses: r-lib/actions/setup-r@v2 37 | with: 38 | r-version: ${{ matrix.config.r }} 39 | http-user-agent: ${{ matrix.config.http-user-agent }} 40 | use-public-rspm: true 41 | 42 | - uses: r-lib/actions/setup-r-dependencies@v2 43 | with: 44 | extra-packages: rcmdcheck, rjags, runjags 45 | dependencies: NA 46 | 47 | - uses: r-lib/actions/check-r-package@v2 48 | env: 49 | _R_CHECK_FORCE_SUGGESTS_: false 50 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: pkgdown 13 | 14 | jobs: 15 | pkgdown: 16 | runs-on: ubuntu-latest 17 | # Only restrict concurrency for non-PR jobs 18 | concurrency: 19 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 20 | env: 21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 22 | steps: 23 | - uses: actions/checkout@v3 24 | 25 | - uses: r-lib/actions/setup-pandoc@v2 26 | 27 | - uses: r-lib/actions/setup-r@v2 28 | with: 29 | use-public-rspm: true 30 | extra-repositories: | 31 | https://mc-stan.org/r-packages/ | 32 | https://ecmerkle.github.io/drat/ 33 | 34 | - uses: r-lib/actions/setup-r-dependencies@v2 35 | with: 36 | extra-packages: any::pkgdown, local::. 37 | needs: website 38 | 39 | - name: Build site 40 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 41 | shell: Rscript {0} 42 | 43 | - name: Deploy to GitHub pages 🚀 44 | if: github.event_name != 'pull_request' 45 | uses: JamesIves/github-pages-deploy-action@v4.4.1 46 | with: 47 | clean: false 48 | branch: gh-pages 49 | folder: docs 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | src/* -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: blavaan 2 | Title: Bayesian Latent Variable Analysis 3 | Version: 0.5-8.1354 4 | Authors@R: c(person(given = "Edgar", family = "Merkle", 5 | role = c("aut", "cre"), 6 | email = "merklee@missouri.edu", 7 | comment = c(ORCID = "0000-0001-7158-0653")), 8 | person(given = "Yves", family = "Rosseel", 9 | role = "aut", 10 | email = "Yves.Rosseel@UGent.be"), 11 | person(given = "Ben", family = "Goodrich", role = "aut", 12 | email = "benjamin.goodrich@columbia.edu"), 13 | person(given = "Mauricio", 14 | family = "Garnier-Villarreal", 15 | role = "ctb", 16 | email = "mgv@pm.me", 17 | comment = c(ORCID = "0000-0002-2951-6647", 18 | "R/blav_compare.R", "R/ctr_bayes_fit.R", "vignettes")), 19 | person(given = c("Terrence", "D."), family = "Jorgensen", 20 | role = "ctb", 21 | email = "TJorgensen314@gmail.com", 22 | comment = c(ORCID = "0000-0001-5111-6773", 23 | "R/ctr_bayes_fit.R", "R/ctr_ppmc.R", "R/blav_predict.R")), 24 | person(given = "Huub", family="Hoofs", role = "ctb", 25 | comment = "R/ctr_bayes_fit.R"), 26 | person(given = "Rens", family="van de Schoot", 27 | role = "ctb", comment = "R/ctr_bayes_fit.R"), 28 | person(given = "Andrew", family="Johnson", role="ctb", 29 | comment = "Makevars"), 30 | person(given = "Matthew", family="Emery", role="ctb", 31 | comment = "loo moment_match") 32 | ) 33 | Description: Fit a variety of Bayesian latent variable models, including confirmatory 34 | factor analysis, structural equation models, and latent growth curve models. References: Merkle & Rosseel (2018) ; Merkle et al. (2021) . 35 | License: GPL (>= 3) 36 | ByteCompile: true 37 | Depends: R(>= 3.5.0), methods, Rcpp(>= 0.12.15) 38 | Imports: stats, utils, graphics, lavaan(>= 0.6-18), coda, mnormt, nonnest2(>= 0.5-7), loo(>= 2.0), rstan(>= 2.26.0), rstantools(>= 1.5.0), RcppParallel (>= 5.0.1), bayesplot, Matrix, future.apply, tmvnsim, igraph 39 | LinkingTo: StanHeaders (>= 2.26.0), rstan (>= 2.26.0), BH (>= 1.69.0), Rcpp (>= 0.12.15), RcppEigen (>= 0.3.3.4.0), RcppParallel (>= 5.0.1) 40 | Suggests: runjags(>= 2.0.4-3), modeest(>= 2.3.3), rjags, cmdstanr, semTools, blavsam, tinytest 41 | SystemRequirements: GNU make 42 | NeedsCompilation: yes 43 | URL: https://ecmerkle.github.io/blavaan/, https://github.com/ecmerkle/blavaan 44 | BugReports: https://github.com/ecmerkle/blavaan/issues 45 | Additional_repositories: https://stan-dev.r-universe.dev, https://ecmerkle.github.io/drat 46 | Config/Needs/website: brms 47 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | importFrom("methods", 2 | "is", "new", "slot", "slotNames", "callNextMethod", "getMethod") 3 | 4 | importFrom("utils", 5 | # "sessionInfo", 6 | "packageDescription", "str", "write.table", "packageVersion", 7 | "capture.output", "head", "tail", "getFromNamespace", "compareVersion") 8 | 9 | importFrom("stats", 10 | "approx", "density", "median", 11 | "dbinom", "dnorm", "pnorm", "rgamma", "rnorm", 12 | "runif", "sd", "quantile", "rWishart", "cov", "cor", 13 | "coef", "logLik", 14 | "residuals", "resid", 15 | "fitted.values", "fitted", "na.omit", 16 | "predict", 17 | "update", 18 | "anova", 19 | "vcov", "nobs", "cov2cor") 20 | 21 | importFrom("graphics", 22 | "plot", "hist", "pairs", "legend", "par", "plot.new", 23 | "polygon") 24 | 25 | importFrom("grDevices", 26 | "adjustcolor") 27 | 28 | importFrom("lavaan", 29 | "lavaan", "logLik", 30 | "fitMeasures", "fitmeasures", 31 | "inspect", "lavInspect", "lavTech", "lavNames", 32 | "lavParseModelString", "lavMatrixRepresentation", 33 | "lav_func_jacobian_complex", "lav_func_jacobian_simple", 34 | "lav_partable_labels", "lavaanify", 35 | "lav_model_get_parameters", "lav_model_implied", 36 | "lav_model_set_parameters", "lav_model_vcov_se", 37 | "lav_partable_attributes", 38 | "modificationIndices", "parTable", "parameterEstimates", 39 | "lavPredict", "standardizedSolution", "lav_data_update") 40 | 41 | importFrom("coda", 42 | "mcmc.list", 43 | "mcmc", "as.mcmc", 44 | "HPDinterval") 45 | 46 | importFrom("mnormt", 47 | "dmnorm", 48 | "rmnorm", 49 | "sadmvn") 50 | 51 | importFrom("nonnest2", 52 | "llcont") 53 | 54 | importFrom("rstan", 55 | "sampling", "stan", "vb") 56 | 57 | importFrom("loo", 58 | "loo", "waic", "loo_compare", "relative_eff") 59 | 60 | importFrom("Matrix", 61 | "Matrix") 62 | 63 | importFrom("future.apply", 64 | "future_lapply", "future_sapply") 65 | 66 | importFrom("tmvnsim", 67 | "tmvnsim") 68 | 69 | importFrom("igraph", 70 | "graph_from_adjacency_matrix", "components") 71 | 72 | ## need to import something, though others could be used 73 | importFrom("bayesplot", 74 | "mcmc_trace") 75 | ## "mcmc_acf", "mcmc_acf_bar", "mcmc_areas", 76 | ## "mcmc_areas_data", "mcmc_areas_ridges", 77 | ## "mcmc_areas_ridges_data", "mcmc_combo", "mcmc_dens", 78 | ## "mcmc_dens_chains", "mcmc_dens_chains_data", 79 | ## "mcmc_dens_overlay", "mcmc_hex", "mcmc_hist", 80 | ## "mcmc_hist_by_chain", "mcmc_intervals", 81 | ## "mcmc_intervals_data", "mcmc_neff", "mcmc_neff_data", 82 | ## "mcmc_neff_hist", "mcmc_nuts_acceptance", 83 | ## "mcmc_nuts_divergence", "mcmc_nuts_energy", 84 | ## "mcmc_nuts_stepsize", "mcmc_nuts_treedepth", "mcmc_pairs", 85 | ## "mcmc_parcoord", "mcmc_parcoord_data", "mcmc_rank_hist", 86 | ## "mcmc_rank_overlay", "mcmc_recover_hist", 87 | ## "mcmc_recover_intervals", "mcmc_recover_scatter", 88 | ## "mcmc_rhat", "mcmc_rhat_data", "mcmc_rhat_hist", 89 | ## "mcmc_scatter", "mcmc_trace", "mcmc_trace_data", 90 | ## "mcmc_trace_highlight", "mcmc_violin") 91 | 92 | import(Rcpp) 93 | 94 | importFrom("RcppParallel", "CxxFlags", "RcppParallelLibs") 95 | 96 | import(rstantools) 97 | 98 | export("blavaan", "bcfa", "bsem", "bgrowth", "dpriors", "BF", "blavCompare", 99 | "blavTech", "blavInspect", "blavFitIndices", "labelfun", "standardizedposterior", 100 | "standardizedPosterior", "ppmc", "blavPredict", "sampleData") 101 | 102 | exportClasses("blavaan", "blavPPMC", "blavFitIndices") 103 | 104 | exportMethods("summary", "coef", "show", "predict") 105 | 106 | S3method(plot, blavaan) 107 | #S3method(summary, blavaan) 108 | S3method(summary, blavPPMC) 109 | S3method(plot, blavPPMC) 110 | S3method(hist, blavPPMC) 111 | S3method(pairs, blavPPMC) 112 | S3method(summary, blavFitIndices) 113 | 114 | useDynLib(blavaan, .registration = TRUE) 115 | -------------------------------------------------------------------------------- /R/00class.R: -------------------------------------------------------------------------------- 1 | #setOldClass("runjags") 2 | setClass("blavaan", 3 | #slots = c( 4 | # runjags = "runjags" # output from run.jags() 5 | #), 6 | contains = "lavaan" 7 | ) 8 | -------------------------------------------------------------------------------- /R/blav_adapt_quad.R: -------------------------------------------------------------------------------- 1 | ## post-sampling, "adapted" gauss-hermite quadrature 2 | adapted_ghq <- function(fit, ngq, samprow = NULL) { 3 | samps <- do.call("rbind", make_mcmc(fit@external$mcmcout, fit@external$stanlvs)) 4 | 5 | lavmodel <- fill_params(samps[samprow, , drop = FALSE], fit@Model, fit@ParTable) 6 | GLIST <- lavmodel@GLIST 7 | if (any(GLIST$theta[lower.tri(GLIST$theta)] != 0L)) stop("blavaan ERROR: The quadrature method cannot be used with non-diagonal theta matrix.") 8 | alphas <- GLIST[which(names(GLIST) == "alpha")] 9 | psis <- GLIST[which(names(GLIST) == "psi")] 10 | N <- fit@SampleStats@ntotal 11 | 12 | grpidx <- rep(1, N) 13 | if(fit@Data@ngroups > 1) grpidx <- fit@Data@group 14 | 15 | ## compute mean and cov of each case i 16 | etamncov <- eta_moments(samps, N) 17 | 18 | ## get weights 19 | aws <- adapted_weights(samps, ngq, alphas, psis, grpidx, etamncov[[1]], etamncov[[2]], N) 20 | x.star.list <- aws[[1]]; w.star.list <- aws[[2]] 21 | 22 | ## loop thru quadrature points 23 | nqpt <- NROW(x.star.list[[1]]) 24 | out <- matrix(NA, length(etamncov[[1]]), nqpt) 25 | 26 | for(i in 1:nqpt){ 27 | samps[samprow,grep("^eta", colnames(samps))] <- as.numeric( sapply(1:length(etamncov[[1]]), function(k) x.star.list[[k]][i,]) ) 28 | 29 | out[,i] <- get_ll(postsamp = samps[samprow,], fit, 30 | casewise = TRUE, conditional = TRUE) 31 | } 32 | 33 | out <- exp(out) * do.call("rbind", w.star.list) 34 | 35 | return( log( rowSums(out) ) ) 36 | } 37 | 38 | ## fixed gauss-hermite quadrature, to reuse quadrature points across cases 39 | fixed_ghq <- function(fit, ngq, samprow = NULL) { 40 | GLIST <- fit@Model@GLIST 41 | if (any(GLIST$theta[lower.tri(GLIST$theta)] != 0L)) stop("blavaan ERROR: The quadrature method cannot be used with non-diagonal theta matrix.") 42 | ndim <- NROW(GLIST$alpha) 43 | 44 | if (blavInspect(fit, 'ngroups') > 1) stop("blavaan ERROR: The quadrature method currently does not support multiple groups.") 45 | 46 | samps <- do.call("rbind", make_mcmc(fit@external$mcmcout, fit@external$stanlvs)) 47 | if(length(samprow) > 0) samps <- samps[samprow, , drop = FALSE] 48 | 49 | lavigh <- getFromNamespace("lav_integration_gauss_hermite", "lavaan") 50 | XW <- lavigh(n = ngq, ndim = ndim, dnorm = TRUE) 51 | x.star <- XW$x 52 | x.star.eval <- apply(XW$x, 2, unique) 53 | w.star <- XW$w 54 | 55 | ## response patterns 56 | standata <- fit@external$mcmcdata 57 | if(length(standata$YX) > 0) stop("blavaan ERROR: The fixed quadrature method cannot handle mixes of continuous variables yet.") 58 | YX <- matrix(NA, NROW(standata$YX), NCOL(standata$YX) + NCOL(standata$YXo)) 59 | YX[, standata$contidx] <- standata$YX 60 | YX[, standata$ordidx] <- standata$YXo 61 | rpatts <- apply(standata$YXo, 1, paste0, collapse = "") 62 | upatts <- as.numeric(as.factor(rpatts)) 63 | ulocs <- which(!duplicated(upatts)) 64 | ## FIXME: also need to consider Ng > 1 in response patterns: 65 | YXou <- standata$YXo[!duplicated(upatts), , drop = FALSE] 66 | deltas <- which(names(fit@Model@GLIST) == "delta") 67 | th.idx <- fit@Model@th.idx 68 | Ng <- blavInspect(fit, 'ngroups') 69 | TH.idx <- lapply(1:Ng, function(g) th.idx[[g]][th.idx[[g]] > 0]) 70 | 71 | origlm <- fit@Model 72 | out <- matrix(NA, NROW(samps), NROW(YX)) 73 | 74 | for(i in 1:NROW(samps)) { 75 | lavmodel <- fill_params(samps[i, , drop = FALSE], origlm, fit@ParTable) 76 | lavmodel@GLIST[[deltas]] <- NULL 77 | ## fit@Model <- lavmodel 78 | ## mnvec <- lavPredict(fit, type = "ov", newdata = fakedat, 79 | ## ETA = x.star.eval) 80 | ## if(inherits(mnvec, "matrix")) mnvec <- list(mnvec) 81 | 82 | ## for each entry in mnvec, compute univariate likelihoods for each set of thresholds 83 | ## a matrix per column of mnvec: number of rows in x.star.eval by number of ordered categories 84 | likevals <- array(NA, dim = c(NROW(x.star.eval), max(standata$YXo), NCOL(standata$YXo), Ng)) 85 | 86 | for(g in 1:Ng) { 87 | mm.in.group <- 1:lavmodel@nmat[g] + cumsum(c(0,lavmodel@nmat[g]))[g] 88 | mms <- lavmodel@GLIST[mm.in.group] 89 | mnvec <- mms$lambda %*% t(x.star.eval) 90 | mnvec <- sweep(mnvec, 1, mms$nu, FUN = "+") 91 | 92 | for(j in 1:NCOL(standata$YXo)) { 93 | tmpidx <- unique(TH.idx[[g]])[j] 94 | tau <- c(-Inf, mms$tau[TH.idx[[g]] == tmpidx], Inf) 95 | utau <- tau[2:length(tau)] 96 | ltau <- tau[1:(length(tau) - 1)] 97 | 98 | for(k in 1:max(standata$YXo[,tmpidx])) { 99 | tmpprob <- pnorm(utau[k], mean = mnvec[tmpidx,], sd = sqrt(mms$theta[tmpidx, tmpidx])) - 100 | pnorm(ltau[k], mean = mnvec[tmpidx,], sd = sqrt(mms$theta[tmpidx, tmpidx])) 101 | 102 | #tmpprob[tmpprob == 0] <- 1e-300 103 | 104 | likevals[, k, j, g] <- log(tmpprob) 105 | } 106 | } 107 | } 108 | 109 | ## for each response pattern, use x.star to pull values out of the above matrices and sum 110 | qpt.uniq <- matrix(NA, NROW(YXou), NROW(x.star)) 111 | diment <- apply(mms$lambda != 0, 1, which) ## FIXME only works for no cross-loadings 112 | tmpmatch <- sapply(1:ndim, function(j) match(x.star[,j], x.star.eval[,j])) 113 | 114 | ## all entries we need, which could replace the loop. but summing the right entries 115 | ## might take just as long. 116 | ##tmpent <- cbind(as.numeric(t(tmpmatch[,diment])), rep(as.numeric(t(YXou)), nrow(x.star)), 117 | ## rep(1:9, nrow(x.star) * nrow(YXou)), 1) 118 | ## tmpeval <- likevals[tmpent] 119 | for(p in 1:NROW(x.star)) { 120 | tmpeval <- t(sapply(1:NROW(YXou), function(ii) likevals[cbind(tmpmatch[p,diment], YXou[ii,], 1:NCOL(YXou), 1)])) ## FIXME last index is currently fixed at 1 for group 121 | 122 | qpt.uniq[,p] <- rowSums(tmpeval) 123 | } 124 | 125 | qpt.uniq <- sweep(exp(qpt.uniq), 2, w.star, FUN = "*") 126 | 127 | ## FIXME deal with continuous data here 128 | 129 | ## assign values to full data matrix, for each response pattern 130 | full.lik <- rep(NA, NROW(YX)) 131 | for(j in 1:length(ulocs)) { 132 | tmpidx <- which(rpatts == rpatts[ulocs[j]]) 133 | full.lik[tmpidx] <- log(sum(qpt.uniq[j,])) 134 | } 135 | 136 | out[i,] <- full.lik 137 | } 138 | 139 | out 140 | } 141 | 142 | adapted_weights <- function(samps, ngq, alphas, psis, grpidx, etamns, etacovs, N) { 143 | ## adapt gh nodes/weights to each case 144 | ndim <- NROW(alphas[[1]]) 145 | lavigh <- getFromNamespace("lav_integration_gauss_hermite", "lavaan") 146 | lavdmvnorm <- getFromNamespace("lav_mvnorm_dmvnorm", "lavaan") 147 | 148 | XW <- lavigh(n = ngq, ndim = ndim, dnorm = TRUE) 149 | eXWxcp <- exp(0.5 * apply(XW$x, 1, crossprod)) 150 | 151 | x.star.list <- vector("list", length(etamns)) 152 | w.star.list <- vector("list", length(etamns)) 153 | XW2pi <- XW$w * (2*pi)^(ndim/2) 154 | 155 | for(i in 1:N) { 156 | C <- t(chol(etacovs[[i]])) 157 | tmpmn <- as.numeric(etamns[[i]]) 158 | 159 | x.star.list[[i]] <- t(as.matrix(C %*% t(XW$x)) + tmpmn) 160 | w.star.list[[i]] <- XW2pi * eXWxcp * prod(diag(C)) * ## = det(C) for triangular matrix 161 | lavdmvnorm(x.star.list[[i]], Mu = alphas[[grpidx[i]]], 162 | Sigma = psis[[grpidx[i]]], log = FALSE) 163 | } 164 | 165 | list(x.star.list, w.star.list) 166 | } 167 | 168 | eta_moments <- function(samps, N) { 169 | ## columns containing etas 170 | etasamps <- samps[, grep("^eta", colnames(samps))] 171 | 172 | etamns <- etacovs <- vector("list", N) 173 | 174 | for (i in 1:N) { 175 | tmpcol <- grep(paste0("eta[", i, ","), colnames(etasamps), fixed = TRUE) 176 | 177 | etamns[[i]] <- colMeans(etasamps[, tmpcol]) 178 | 179 | etacovs[[i]] <- cov(etasamps[, tmpcol]) 180 | } 181 | 182 | list(etamns, etacovs) 183 | } 184 | -------------------------------------------------------------------------------- /R/blav_compare.R: -------------------------------------------------------------------------------- 1 | blavCompare <- function(object1, object2, ...) { 2 | ## loo compare code from Mauricio Garnier-Villarreal + old BF() code 3 | ## possible TODO: compare using conditional likelihoods, in addition to marginal 4 | lavopt1 <- blavInspect(object1, "Options") 5 | lavopt2 <- blavInspect(object2, "Options") 6 | if((lavopt1$test == "none" & lavopt1$target != "stan") | 7 | (lavopt2$test == "none" & lavopt2$target != "stan")){ 8 | stop("blavaan ERROR: Models cannot be compared when test='none'") 9 | } 10 | targ1 <- lavopt1$target; targ2 <- lavopt2$target 11 | 12 | ## Bayes factor approximation based on marginal log-likelihoods 13 | bf <- object1@test[[1]]$stat - object2@test[[1]]$stat 14 | res <- c(bf, object1@test[[1]]$stat, object2@test[[1]]$stat) 15 | names(res) <- c("bf", "mll1", "mll2") 16 | 17 | if(targ1 == "stan" && blavInspect(object1, "meanstructure")){ 18 | ll1 <- loo::extract_log_lik(object1@external$mcmcout) 19 | } else if(blavInspect(object1, "categorical") && lavopt1$test != "none"){ 20 | if("llnsamp" %in% names(lavopt1)){ 21 | cat("blavaan NOTE: These criteria involve likelihood approximations that may be imprecise.\n", 22 | "You could try running the model again to see how much the criteria fluctuate.\n", 23 | "You can also manually set llnsamp for greater accuracy (but also greater runtime).\n\n") 24 | } 25 | ll1 <- object1@external$casells 26 | } else { 27 | lavopt1$estimator <- "ML" 28 | ll1 <- case_lls(object1@external$mcmcout, make_mcmc(object1@external$mcmcout), 29 | object1) 30 | } 31 | nchain1 <- blavInspect(object1, "n.chains") 32 | niter1 <- nrow(ll1)/nchain1 33 | cid1 <- rep(1:nchain1, each=niter1) 34 | ref1 <- relative_eff(exp(ll1), chain_id = cid1) 35 | 36 | if(targ2 == "stan" && blavInspect(object2, "meanstructure")){ 37 | ll2 <- loo::extract_log_lik(object2@external$mcmcout) 38 | } else if(blavInspect(object2, "categorical") && lavopt2$test != "none"){ 39 | if("llnsamp" %in% names(lavopt2)){ 40 | cat("blavaan NOTE: These criteria involve likelihood approximations that may be imprecise.\n", 41 | "You could try running the model again to see how much the criteria fluctuate.\n", 42 | "You can also manually set llnsamp for greater accuracy (but also greater runtime).\n\n") 43 | } 44 | ll2 <- object2@external$casells 45 | } else { 46 | lavopt2$estimator <- "ML" 47 | ll2 <- case_lls(object2@external$mcmcout, make_mcmc(object2@external$mcmcout), 48 | object2) 49 | } 50 | nchain2 <- blavInspect(object1, "n.chains") 51 | niter2 <- nrow(ll2)/nchain2 52 | cid2 <- rep(1:nchain2, each=niter2) 53 | ref2 <- relative_eff(exp(ll2), chain_id = cid2) 54 | 55 | loo1 <- loo(ll1, r_eff=ref1, ...) 56 | loo2 <- loo(ll2, r_eff=ref2, ...) 57 | waic1 <- waic(ll1); waic2 <- waic(ll2) 58 | 59 | diff_loo <- loo_compare(loo1, loo2) 60 | diff_waic <- loo_compare(waic1, waic2) 61 | 62 | cat("\nWAIC estimates: \n", 63 | paste("object1: ", round( waic1$estimates[3,1], 3) ), "\n", 64 | paste("object2: ", round( waic2$estimates[3,1], 3) ), "\n" ) 65 | 66 | cat("\n ELPD difference & SE: \n", 67 | sprintf("%8.3f", diff_waic[2, 1]), 68 | sprintf("%8.3f", diff_waic[2, 2]), "\n") 69 | 70 | cat("\nLOO estimates: \n", 71 | paste("object1: ", round( loo1$estimates[3,1], 3) ), "\n", 72 | paste("object2: ", round( loo2$estimates[3,1], 3) ), "\n" ) 73 | 74 | cat("\n ELPD difference & SE: \n", 75 | sprintf("%8.3f", diff_loo[2, 1]), 76 | sprintf("%8.3f", diff_loo[2, 2]), "\n\n") 77 | 78 | cat("Laplace approximation to the log-Bayes factor\n(experimental; positive values favor object1):", 79 | sprintf("%8.3f", bf), "\n\n") 80 | 81 | looobj <- list(loo1, loo2) 82 | waicobj <- list(waic1, waic2) 83 | 84 | res <- list(bf = res, loo = looobj, 85 | diff_loo = diff_loo, 86 | waic = waicobj, 87 | diff_waic = diff_waic) 88 | 89 | invisible(res) 90 | } 91 | 92 | BF <- function(object1, object2, ...) { 93 | cat("BF() is deprecated. Use blavCompare() instead.\n\n") 94 | 95 | blavCompare(object1, object2) 96 | } 97 | -------------------------------------------------------------------------------- /R/blav_cond_utils.R: -------------------------------------------------------------------------------- 1 | ## compute undirected K-L divergence across all draws 2 | ## (each draw paired with one from another chain) 3 | samp_kls <- function(lavjags = NULL, 4 | lavmodel = NULL, 5 | lavpartable = NULL, 6 | lavsamplestats = NULL, 7 | lavoptions = NULL, 8 | lavcache = NULL, 9 | lavdata = NULL, 10 | lavmcmc = NULL, 11 | lavobject = NULL, 12 | thin = 1, 13 | conditional = FALSE){ 14 | 15 | ## need to implement plummer's approach of generating y_rep 16 | ##mis <- FALSE 17 | ##if(any(is.na(unlist(lavdata@X)))) mis <- TRUE 18 | ##if(mis | lavInspect(lavobject, "categorical")) stop("blavaan ERROR: K-L divergence not implemented for missing data or ordinal variables.") 19 | 20 | itnums <- sampnums(lavjags, thin = thin) 21 | lavmcmc <- lapply(lavmcmc, function(x) x[itnums,]) 22 | draws <- do.call("rbind", lavmcmc) 23 | 24 | ndraws <- nrow(draws) 25 | halfdraws <- floor(ndraws/2) 26 | ngroups <- lavsamplestats@ngroups 27 | 28 | klres <- rep(NA, halfdraws) 29 | for(i in 1:halfdraws){ 30 | lavmodel0 <- fill_params(draws[i,], lavmodel, lavpartable) 31 | lavmodel1 <- fill_params(draws[(halfdraws + i),], lavmodel, 32 | lavpartable) 33 | 34 | if(conditional){ 35 | eta0 <- fill_eta(draws[i,], lavmodel, lavpartable, 36 | lavsamplestats, lavdata) 37 | eta1 <- fill_eta(draws[(halfdraws + i),], lavmodel, 38 | lavpartable, lavsamplestats, lavdata) 39 | 40 | lavobject@Model <- lavmodel0 41 | mnvec0 <- lavPredict(lavobject, type="ov", ETA=eta0) 42 | if(inherits(mnvec0, "matrix")) mnvec0 <- list(mnvec0) 43 | cmat0 <- lavInspect(lavobject, 'theta') 44 | if(inherits(cmat0, "matrix")) cmat0 <- list(cmat0) 45 | 46 | lavobject@Model <- lavmodel1 47 | mnvec1 <- lavPredict(lavobject, type="ov", ETA=eta1) 48 | if(inherits(mnvec1, "matrix")) mnvec1 <- list(mnvec1) 49 | cmat1 <- lavInspect(lavobject, 'theta') 50 | if(inherits(cmat1, "matrix")) cmat1 <- list(cmat1) 51 | 52 | implied0 <- list(cov = cmat0, mean = mnvec0, 53 | slopes = vector("list", ngroups), 54 | th = vector("list", ngroups), 55 | group.w = vector("list", ngroups)) 56 | implied1 <- list(cov = cmat1, mean = mnvec1, 57 | slopes = vector("list", ngroups), 58 | th = vector("list", ngroups), 59 | group.w = vector("list", ngroups)) 60 | } else { 61 | implied0 <- lav_model_implied(lavmodel0, delta = (lavmodel0@parameterization == "delta")) 62 | implied1 <- lav_model_implied(lavmodel1, delta = (lavmodel1@parameterization == "delta")) 63 | } 64 | 65 | tmpkl <- 0 66 | for(g in 1:lavsamplestats@ngroups){ 67 | ## ensure symmetric: 68 | cmat0 <- (implied0$cov[[g]] + t(implied0$cov[[g]]))/2 69 | invcmat0 <- solve(cmat0) 70 | det0 <- det(cmat0) 71 | cmat1 <- (implied1$cov[[g]] + t(implied1$cov[[g]]))/2 72 | invcmat1 <- solve(cmat1) 73 | det1 <- det(cmat1) 74 | if(conditional){ 75 | mnvec0 <- implied0$mean[[g]] 76 | mnvec1 <- implied1$mean[[g]] 77 | 78 | for(j in 1:nrow(mnvec0)){ 79 | tmpkl <- tmpkl + kl_und(mnvec0[j,], mnvec1[j,], 80 | cmat0, invcmat0, cmat1, 81 | invcmat1, det0, det1) 82 | } 83 | } else { 84 | mnvec0 <- as.numeric(implied0$mean[[g]]) 85 | mnvec1 <- as.numeric(implied1$mean[[g]]) 86 | if(length(mnvec0) == 0) { 87 | mnvec0 <- mnvec1 <- lavsamplestats@mean[[g]] 88 | } 89 | 90 | tmpkl <- tmpkl + lavsamplestats@nobs[[g]] * 91 | kl_und(mnvec0, mnvec1, cmat0, invcmat0, 92 | cmat1, invcmat1, det0, det1) 93 | } 94 | } 95 | klres[i] <- tmpkl 96 | } 97 | klres 98 | } 99 | 100 | ## fill in eta matrices (1 per group, in list) 101 | fill_eta <- function(postsamp, lavmodel, lavpartable, lavsamplestats, lavdata){ 102 | nlv <- nrow(lavmodel@GLIST$psi) 103 | etapars <- grepl("^eta", names(postsamp)) 104 | cnums <- strsplit(names(postsamp)[etapars], "\\[|,|\\]") 105 | cnums <- sapply(cnums, function(x) as.numeric(x[3])) 106 | etavec <- postsamp[etapars][order(cnums)] 107 | 108 | ## need to worry about (1) excluding phantom lvs 109 | ## and (2) including dummy lvs 110 | foundlvs <- sum(etapars)/lavsamplestats@ntotal 111 | etamat <- matrix(etavec, lavsamplestats@ntotal, foundlvs) 112 | if(foundlvs < nlv) etamat <- cbind(etamat, matrix(0, lavsamplestats@ntotal, (nlv - foundlvs))) 113 | 114 | ## fulleta needs to have rows for any excluded cases 115 | if(sum(unlist(lavdata@norig)) > lavsamplestats@ntotal){ 116 | fulleta <- matrix(NA, sum(unlist(lavdata@norig)), ncol(etamat)) 117 | empties <- unlist(sapply(lavdata@Mp, function(x) x$empty.idx)) 118 | fulleta[-empties,] <- etamat 119 | } else { 120 | fulleta <- etamat 121 | } 122 | 123 | ngroups <- lavsamplestats@ngroups 124 | eta <- vector("list", ngroups) 125 | for(g in 1:ngroups){ 126 | eta[[g]] <- fulleta[lavdata@case.idx[[g]], 1:nlv, drop = FALSE] 127 | 128 | ## fill in eta with dummys, if needed 129 | dummyov <- c(lavmodel@ov.x.dummy.ov.idx[[g]], lavmodel@ov.y.dummy.ov.idx[[g]]) 130 | dummylv <- c(lavmodel@ov.x.dummy.lv.idx[[g]], lavmodel@ov.y.dummy.lv.idx[[g]]) 131 | if(length(dummyov) > 0){ 132 | eta[[g]][, dummylv] <- lavdata@X[[g]][, dummyov] 133 | } 134 | } 135 | 136 | eta 137 | } 138 | 139 | ## compute undirected K-L divergence between two normal distributions 140 | kl_und <- function(mn0, mn1, cov0, invcov0, cov1, invcov1, 141 | det0, det1){ 142 | k <- nrow(cov0) 143 | 144 | kl01 <- sum(diag(invcov1 %*% cov0)) + 145 | t(mn1 - mn0) %*% invcov1 %*% (mn1 - mn0) - 146 | k + log(det1/det0) 147 | 148 | kl10 <- sum(diag(invcov0 %*% cov1)) + 149 | t(mn0 - mn1) %*% invcov0 %*% (mn0 - mn1) - 150 | k + log(det0/det1) 151 | 152 | (1/2) * (kl01 + kl10) 153 | } 154 | 155 | cond_moments <- function(postsamp, lavmodel, lavpartable, lavsamplestats, lavdata, lavobject){ 156 | eta <- fill_eta(postsamp, lavmodel, lavpartable, lavsamplestats, lavdata) 157 | 158 | ## implied meanvec + covmat 159 | ##mnvec <- lavaan:::computeYHAT(lavmodel, lavmodel@GLIST, 160 | ## lavsamplestats, ETA = eta) 161 | lavobject@Model <- lavmodel 162 | mnvec <- lavPredict(lavobject, type="ov", ETA = eta) 163 | if(inherits(mnvec, "matrix")) mnvec <- list(mnvec) 164 | 165 | covmat <- lavInspect(lavobject, 'theta') 166 | if(inherits(covmat, "matrix")) covmat <- list(covmat) 167 | ## to avoid warnings from mnormt::pd.solve 168 | covmat <- lapply(covmat, function(x){ 169 | class(x) <- "matrix" 170 | zvar <- which(diag(x) == 0L) 171 | if(length(zvar) > 0) diag(x)[zvar] <- 1e-4 172 | x}) 173 | 174 | ngroups <- lavsamplestats@ngroups 175 | implied <- list(cov = covmat, mean = mnvec, 176 | slopes = vector("list", ngroups), 177 | th = vector("list", ngroups), 178 | group.w = vector("list", ngroups)) 179 | 180 | implied 181 | } 182 | -------------------------------------------------------------------------------- /R/blav_fit.R: -------------------------------------------------------------------------------- 1 | blav_model_fit <- function(lavpartable = NULL, 2 | lavmodel = NULL, 3 | lavjags = NULL, 4 | x = NULL, 5 | VCOV = NULL, 6 | TEST = NULL) { 7 | 8 | stopifnot(is.list(lavpartable), inherits(lavmodel, c("Model", 9 | "lavModel"))) 10 | if(!inherits(lavjags, "NULL")){ 11 | lavmcmc <- make_mcmc(lavjags) 12 | } else { 13 | lavmcmc <- NULL 14 | } 15 | 16 | # extract information from 'x' 17 | iterations <- attr(x, "iterations") 18 | converged <- attr(x, "converged") 19 | fx <- attr(x, "fx") 20 | fx.group <- as.numeric(NA) 21 | #fx.group = attr(fx, "fx.group") 22 | logl.group <- as.numeric(NA) 23 | logl <- as.numeric(NA) 24 | 25 | #print(fx.group) 26 | control <- attr(x, "control") 27 | attributes(fx) <- NULL 28 | x.copy <- x # we are going to change it (remove attributes) 29 | attributes(x.copy) <- NULL 30 | est <- lav_model_get_parameters(lavmodel = lavmodel, type = "user") 31 | 32 | # did we compute standard errors? 33 | blaboot <- rearr_params(lavmcmc, lavpartable) 34 | se <- lav_model_vcov_se(lavmodel = lavmodel, lavpartable = lavpartable, 35 | VCOV = VCOV, BOOT = blaboot) 36 | 37 | # did we compute test statistics 38 | if(is.null(TEST)) { 39 | test <- list() 40 | } else { 41 | test <- TEST 42 | } 43 | 44 | # for convenience: compute lavmodel-implied Sigma and Mu 45 | implied <- lav_model_implied(lavmodel, delta = (lavmodel@parameterization == "delta")) 46 | # change names back if conditional.x (see lav_model_implied.R) 47 | if(lavmodel@conditional.x) { 48 | names(implied) <- c("cov", "mean", "slopes", "th", "group.w") 49 | } 50 | 51 | # partrace? 52 | if(!is.null(attr(x, "partrace"))) { 53 | PARTRACE <- attr(x, "partrace") 54 | } else { 55 | PARTRACE <- matrix(0, 0L, 0L) 56 | } 57 | 58 | new("Fit", 59 | npar = as.integer(max(lavpartable$free)), 60 | x = x.copy, 61 | partrace = PARTRACE, 62 | start = lavpartable$start, # needed? 63 | est = est, 64 | se = se, 65 | fx = fx, 66 | fx.group = fx.group, 67 | logl = logl, 68 | logl.group = logl.group, 69 | iterations = as.integer(iterations), 70 | converged = converged, 71 | control = control, 72 | Sigma.hat = implied$cov, 73 | Mu.hat = implied$mean, 74 | TH = implied$th, 75 | test = test 76 | ) 77 | } 78 | -------------------------------------------------------------------------------- /R/blav_predict.R: -------------------------------------------------------------------------------- 1 | ## predictions from blavaan object; similar to lavPredict, but lavPredict is never called 2 | ## overload standard R function `predict' 3 | setMethod("predict", "blavaan", 4 | function(object, newdata = NULL) { 5 | blavPredict(object, newdata = newdata) 6 | }) 7 | 8 | blavPredict <- function(object, newdata = NULL, type = "lv", level = 1L) { 9 | 10 | stopifnot(inherits(object, "blavaan")) 11 | blavmodel <- object@Model 12 | blavpartable <- object@ParTable 13 | blavsamplestats <- object@SampleStats 14 | blavdata <- object@Data 15 | standata <- object@external$mcmcdata 16 | 17 | type <- tolower(type) 18 | if(type %in% c("latent", "lv", "factor", "factor.score", "factorscore")) 19 | type <- "lv" 20 | if(type %in% c("ov","yhat")) 21 | type <- "yhat" 22 | if(type %in% c("ypred", "ydist")) 23 | type <- "ypred" 24 | if(type %in% c("ymis", "ovmis")){ 25 | type <- "ymis" 26 | if(all(!is.na(unlist(blavdata@X)))) stop("blavaan ERROR: No missing data are present.", call. = FALSE) 27 | } 28 | 29 | lavopt <- lavInspect(object, "options") 30 | stantarget <- lavopt$target == "stan" 31 | 32 | if(lavInspect(object, "categorical") & type == "ymis") stop("blavaan ERROR: ymis is not yet implemented for ordinal models.", call. = FALSE) 33 | 34 | if(level == 2L){ 35 | if(all(unlist(lavInspect(object, "nclusters")) == 1)) stop("blavaan ERROR: level 2 was requested but this does not appear to be a 2-level model.", call. = FALSE) 36 | if(type %in% c("yhat", "ypred", "ymis")) stop("blavaan ERROR: option", type, "is not yet implemented for two-level models.", call. = FALSE) 37 | } 38 | 39 | if(!is.null(newdata)) { 40 | if(!stantarget) stop("blavaan ERROR: newdata is currently only available for target='stan'") 41 | if(lavInspect(object, "categorical")) stop("blavaan ERROR: newdata is not yet available for ordinal data.") 42 | object <- blav_fill_newdata(object, newdata) 43 | 44 | blavsamplestats <- object@SampleStats 45 | blavdata <- object@Data 46 | standata <- object@external$mcmcdata 47 | } 48 | 49 | 50 | ## lv: posterior dist of lvs (use blavInspect functionality); matrix frame 51 | ## lvmeans: use blavInspect functionality; matrix 52 | ## yhat: posterior expected value of ovs conditioned on lv samples; mcmc list 53 | ## ypred: posterior predictive distribution of ovs conditioned on lv samples; mcmc list 54 | ## ymis: posterior predictive distribution of missing values conditioned on observed values; matrix 55 | if(type == "lv") { 56 | FS <- do.call("rbind", blavInspect(object, 'lvs', level = level)) 57 | 58 | ## N and latent variable names, to set dimensions 59 | lvmn <- lavInspect(object, "mean.lv") 60 | if(!inherits(lvmn, "list")){ 61 | lvmn <- list(lvmn) 62 | } 63 | if(level == 1L){ 64 | nlv <- length(lvmn[[1]]) 65 | N <- sum(lavInspect(object, "ntotal")) 66 | etas <- names(lvmn[[1]]) 67 | } else { 68 | nlv <- length(lvmn[[2]]) 69 | N <- sum(unlist(lavInspect(object, "nclusters"))) 70 | etas <- names(lvmn[[2]]) 71 | } 72 | 73 | out <- lapply(1:NROW(FS), function(i) { 74 | rowmat <- matrix(FS[i,], N, nlv) 75 | colnames(rowmat) <- etas 76 | rowmat } ) 77 | } else if(type == "lvmeans") { 78 | out <- blavInspect(object, 'lvmeans') 79 | } else if(type %in% c("yhat", "ypred", "ymis")) { 80 | if(!stantarget) stop(paste0("blavaan ERROR: '", type, "' is only supported for target='stan'")) 81 | 82 | if(type %in% c("yhat", "ypred")) { 83 | if(is.null(object@external$stanlvs)) stop("blavaan ERROR: for predictions, save.lvs must be TRUE during model estimation") 84 | lavmcmc <- make_mcmc(blavInspect(object, 'mcobj'), object@external$stanlvs) 85 | itnums <- sampnums(object@external$mcmcout, thin = 1) 86 | nsamps <- length(itnums) 87 | nchain <- length(lavmcmc) 88 | ng <- blavInspect(object, 'ngroups') 89 | 90 | tmpres <- vector("list", nchain) 91 | for(j in 1:nchain) { 92 | loop.args <- list(X = 1:nsamps, FUN = function(i, j){ 93 | cond_moments(lavmcmc[[j]][itnums[i],], 94 | blavmodel, 95 | blavpartable, 96 | blavsamplestats, 97 | blavdata, 98 | object)}, j = j, future.seed = TRUE) 99 | tmpres[[j]] <- do.call("future_lapply", loop.args) 100 | } 101 | tmpres <- unlist(tmpres, recursive = FALSE) 102 | 103 | if(type == "ypred") { 104 | ## use mean and cov from each entry of tmpres to randomly sample 105 | tmpres <- lapply(tmpres, function(x){ 106 | lapply(1:ng, function(g){ 107 | sigchol <- chol(x$cov[[g]]) 108 | t(apply(x$mean[[g]], 1, function(y) mnormt::rmnorm(n=1, mean=y, sqrt=sigchol))) 109 | }) 110 | }) 111 | } else { 112 | tmpres <- lapply(tmpres, function(x) x$mean) 113 | } 114 | 115 | ## these are now lists by group; rearrange to match original data 116 | cids <- unlist(blavInspect(object, 'case.idx')) 117 | cnms <- lavNames(object) 118 | yres <- lapply(tmpres, function(x) do.call("rbind", x)[cids,]) 119 | 120 | out <- yres 121 | } 122 | 123 | if(type == "ymis") { 124 | out <- samp_data(object@external$mcmcout, blavmodel, blavpartable, standata, blavdata) 125 | } 126 | } else { 127 | stop("blavaan ERROR: unknown type supplied; use lv lvmeans yhat ypred ymis") 128 | } 129 | 130 | out 131 | } 132 | 133 | ## fill blavaan object with newdata, then sample lvs given already-sampled parameters 134 | blav_fill_newdata <- function(object, newdat, lvs = TRUE) { 135 | 136 | lavd <- getFromNamespace("lavData", "lavaan") 137 | olddata <- object@Data 138 | OV <- olddata@ov 139 | object@Data <- lavd(data = newdat, 140 | group = olddata@group, 141 | ov.names = olddata@ov.names, 142 | ov.names.x = olddata@ov.names.x, 143 | ordered = OV$names[ OV$type == "ordered" ], 144 | lavoptions = object@Options, allow.single.case = TRUE) 145 | object@SampleStats@ntotal <- NROW(newdat) 146 | 147 | ## Stan-formatted newdata 148 | l2s <- lav2stanmarg(object, dp = blavInspect(object, 'options')$dp, 149 | n.chains = blavInspect(object, 'nchains'), inits = "simple") 150 | l2slev2 <- lav2stanmarg(object, dp = blavInspect(object, 'options')$dp, 151 | n.chains = blavInspect(object, 'nchains'), 152 | inits = "simple", level = 2, indat = l2s$dat) 153 | l2s$dat <- c(l2s$dat, l2slev2$dat) 154 | l2s$dat <- l2s$dat[!duplicated(names(l2s$dat))] 155 | l2s$free2 <- c(l2s$free2, l2slev2$free2) 156 | l2s$lavpartable <- rbind(l2s$lavpartable, l2slev2$lavpartable) 157 | l2s$wigpris <- c(l2s$wigpris, l2slev2$wigpris) 158 | l2s$init <- lapply(1:length(l2s$init), function(i) c(l2s$init[[i]], l2slev2$init[[i]])) 159 | ldargs <- c(l2s$dat, list(lavpartable = l2s$lavpartable, dumlv = l2s$dumlv, dumlv_c = l2slev2$dumlv, 160 | save_lvs = TRUE, do_test = FALSE)) 161 | smd <- do.call("stanmarg_data", ldargs) 162 | object@external$mcmcdata <- smd 163 | 164 | if (lvs) { 165 | newlvs <- samp_lvs(object@external$mcmcout, object@Model, object@ParTable, smd, eeta = NULL, categorical = FALSE) 166 | lvsumm <- as.matrix(rstan::monitor(newlvs, print=FALSE)) 167 | cmatch <- match(colnames(object@external$stansumm), colnames(lvsumm)) 168 | stansumm <- object@external$stansumm 169 | lvcols <- grep("^eta", rownames(stansumm)) 170 | if (length(lvcols) > 0) stansumm <- stansumm[-lvcols, ] 171 | object@external$stansumm <- rbind(stansumm, lvsumm[,cmatch]) 172 | object@external$stanlvs <- newlvs 173 | } 174 | 175 | object 176 | } 177 | -------------------------------------------------------------------------------- /R/blav_test.R: -------------------------------------------------------------------------------- 1 | blav_model_test <- function(lavmodel = NULL, 2 | lavpartable = NULL, 3 | lavsamplestats = NULL, 4 | lavoptions = NULL, 5 | x = NULL, 6 | VCOV = NULL, 7 | lavcache = NULL, 8 | lavdata = NULL, 9 | lavjags = NULL, 10 | lavobject = NULL, 11 | samplls = NULL, 12 | jagextra = NULL, 13 | stansumm = NULL, 14 | domll = NULL, 15 | control = list()) { 16 | 17 | 18 | TEST <- list() 19 | 20 | ## marginal log-likelihood approximation 21 | ## needs original partable with rhos 22 | if("syntax" %in% names(jagextra)){ 23 | warning("blavaan WARNING: Marginal log-likelihood cannot be approximated when there is additional JAGS syntax.", call. = FALSE) 24 | mll <- NA 25 | } else if(domll) { 26 | mll <- try(margloglik(lavpartable, lavmodel, lavoptions, 27 | lavsamplestats, lavdata, lavcache, 28 | lavjags, VCOV, x, stansumm), 29 | silent=TRUE) 30 | if(inherits(mll, "try-error")) mll <- NA 31 | } else { 32 | mll <- NA # not tested, priors may cause problems 33 | } 34 | 35 | if(lavoptions$target == "stan") { 36 | ppp <- stansumm['ppp', 'mean'] 37 | } else { 38 | ppp <- postpred(samplls, lavobject)$ppval 39 | } 40 | 41 | TEST[[1]] <- list(test="mloglik", 42 | stat=as.numeric(mll), 43 | stat.group=as.numeric(NA), 44 | df=as.integer(NA), 45 | refdistr="NA", 46 | pvalue=as.numeric(NA)) 47 | 48 | TEST[[2]] <- list(test="ppp", 49 | ## DIC: 2*ll(theta_hat) - 4*mean(ll(theta_samp)) 50 | stat=as.numeric(ppp), 51 | stat.group=as.numeric(NA), 52 | df=as.integer(NA), 53 | refdistr="NA", 54 | pvalue=as.numeric(NA)) 55 | 56 | TEST 57 | } 58 | -------------------------------------------------------------------------------- /R/dpriors.R: -------------------------------------------------------------------------------- 1 | dpriors <- function(..., target="stan"){ 2 | userspec <- list(...) 3 | 4 | if(length(userspec) > 0 && is.null(names(userspec))) stop("blavaan ERROR: dpriors() arguments require names (nu, lambda, etc)") 5 | 6 | jagpres <- pkgcheck("runjags") 7 | stanpres <- pkgcheck("rstan") 8 | 9 | if(jagpres & !stanpres){ 10 | dp <- do.call("jagpriors", userspec) 11 | } else if(stanpres & !jagpres){ 12 | dp <- do.call("stanpriors", userspec) 13 | } else if(length(userspec) > 0){ 14 | ## check whether they are supplying jags or stan distributions 15 | jagdists <- transtables()$disttrans[,'jags'] 16 | ## add other jags dists not in the translation table 17 | jagdists <- c(jagdists, 'dbetabin', 'ddirch', 'dmnorm', 18 | 'dwish', 'dmt', 'dmulti', 19 | 'dbinom', 'dchisq', 'dggamma', # aliases 20 | 'dnbinom', 'dweibull', 'ddirich') 21 | 22 | userjags <- sapply(jagdists, function(x) grep(x, userspec)) 23 | 24 | ## > 1 match can occur for things like ddexp: 25 | if(length(unlist(userjags)) >= length(userspec)){ 26 | if(target == "jags"){ 27 | dp <- do.call("jagpriors", userspec) 28 | } else { 29 | stop("blavaan ERROR: JAGS distributions sent to dpriors(), but target != 'jags'") 30 | } 31 | } else if(length(unlist(userjags)) == 0){ 32 | if(target == "jags") stop("blavaan ERROR: target='jags', but no jags distributions were found") 33 | ## assume they wanted stan 34 | if(target %in% c("stanclassic", "stancond")){ 35 | dp <- do.call("stanclassicpriors", userspec) 36 | } else { 37 | dp <- do.call("stanpriors", userspec) 38 | } 39 | } else { 40 | stop("blavaan ERROR: Distributions sent to dpriors() do not match target.") 41 | } 42 | } else { 43 | ## nothing is user specified, just use target 44 | if(target == "jags"){ 45 | dp <- do.call("jagpriors", userspec) 46 | } else if(target %in% c("stanclassic", "stancond")){ 47 | dp <- do.call("stanclassicpriors", userspec) 48 | } else { 49 | dp <- do.call("stanpriors", userspec) 50 | } 51 | } 52 | 53 | dp 54 | } 55 | 56 | jagpriors <- function(nu="dnorm(0,1e-3)", alpha="dnorm(0,1e-2)", 57 | lambda="dnorm(0,1e-2)", beta="dnorm(0,1e-2)", 58 | itheta="dgamma(1,.5)[prec]", ipsi="dgamma(1,.5)[prec]", 59 | rho="dbeta(1,1)", ibpsi="dwish(iden,3)", 60 | tau="dnorm(0,.44)"){ 61 | 62 | dp <- c(nu=nu, alpha=alpha, lambda=lambda, beta=beta, 63 | itheta=itheta, ipsi=ipsi, rho=rho, ibpsi=ibpsi, 64 | tau=tau) 65 | 66 | dp 67 | } 68 | 69 | ## see ?stan::expose_stan_functions for obtaining margloglik info 70 | stanpriors <- function(nu="normal(0,32)", 71 | alpha="normal(0,10)", lambda="normal(0,10)", 72 | beta="normal(0,10)", theta="gamma(1,.5)[sd]", 73 | psi="gamma(1,.5)[sd]", rho="beta(1,1)", 74 | ibpsi="wishart(3,iden)", 75 | tau="normal(0,1.5)"){ 76 | 77 | dp <- c(nu=nu, alpha=alpha, lambda=lambda, beta=beta, 78 | theta=theta, psi=psi, rho=rho, ibpsi=ibpsi, 79 | tau=tau) 80 | 81 | dp 82 | } 83 | 84 | stanclassicpriors <- function(nu="normal(0,1000^.5)", 85 | alpha="normal(0,10)", lambda="normal(0,10)", 86 | beta="normal(0,10)", itheta="gamma(1,.5)[prec]", 87 | ipsi="gamma(1,.5)[prec]", rho="beta(1,1)", 88 | ibpsi="wishart(3,iden)", 89 | tau="normal(0,1.5)"){ 90 | 91 | dp <- c(nu=nu, alpha=alpha, lambda=lambda, beta=beta, 92 | itheta=itheta, ipsi=ipsi, rho=rho, ibpsi=ibpsi, 93 | tau=tau) 94 | 95 | dp 96 | } 97 | -------------------------------------------------------------------------------- /R/jags2r.R: -------------------------------------------------------------------------------- 1 | jagsdist2r <- function(priors, direction = 'jags2r'){ 2 | ## Convert univariate JAGS distributions to R, relying 3 | ## on extra packages if needed. Partially inspired by 4 | ## LeBauer et al (2013), R Journal, 207-209. 5 | 6 | tabs <- transtables() 7 | disttrans <- tabs$disttrans 8 | jag2rfuns <- tabs$jag2rfuns 9 | 10 | priargs <- strsplit(priors, "[, ()]+") 11 | 12 | newargs <- lapply(priargs, function(x){ 13 | rnum <- match(x[1], disttrans[,1]) 14 | if(length(x) == 0){ 15 | res <- "" 16 | } else if(grepl("dwish", x[1])){ 17 | res <- x 18 | } else { 19 | trun <- which(x == "T") 20 | sdvar <- grep("\\[", x) 21 | if(length(trun) > 0 | length(sdvar) > 0){ 22 | trun <- min(c(trun, sdvar)) 23 | trunargs <- x[trun[1]:length(x)] 24 | x <- x[1:(trun[1]-1)] 25 | } 26 | ## distribution name 27 | if(is.na(rnum)){ 28 | ## Table 6.4 of jags manual 29 | aliases <- c("dbin","dbinom","dchisqr","dchisq", 30 | "dnegbin","dnbinom","dweib", "dweibull", 31 | "ddirch", "ddirich") 32 | aliases <- t(matrix(aliases, 2, 5)) 33 | rnum <- match(x[1], aliases[,2]) 34 | if(is.na(rnum)){ 35 | stop("blavaan ERROR: Bad prior specification.") 36 | } else { 37 | rname <- aliases[rnum] 38 | } 39 | } else { 40 | rname <- disttrans[rnum,2] 41 | } 42 | 43 | ## parameter changes 44 | rpars <- jag2rfuns[[rnum]](as.numeric(x[2:length(x)])) 45 | 46 | res <- c(rname, rpars) 47 | 48 | ## add truncation info 49 | if(length(trun) > 0){ 50 | res <- c(res, trunargs) 51 | } 52 | } 53 | res 54 | }) 55 | 56 | newargs 57 | } 58 | 59 | ## Might be interesting to also convert R distributions 60 | ## to JAGS, but I don't see an immediate application. 61 | ##rdist2jags <- function(priors){ 62 | ## res <- jagsdist2r(priors, direction = 'r2jags') 63 | ## res 64 | ##} 65 | 66 | transtables <- function(){ 67 | ## maintain name/parameter translations between r and jags 68 | 69 | ## distribution names for jags and R: 70 | disttrans <- c("dbeta","dbeta","dchisqr","dchisq","ddexp","ddexp","dexp","dexp","df","df","dgamma","dgamma","dgen.gamma",NA #"rmutil::dggamma", 71 | ,"dlogis","dlogis","dlnorm","dlnorm","dnchisqr","dchisq","dnorm","dnorm","dpar",NA,"dt","dt","dunif","dunif","dweib","dweibull",#"dbetabin","VGAM::dbetabinom", 72 | "dbern","dbinom","dbin","dbinom","dcat",NA #TODO "dmultinom" 73 | ,"dhyper","dhyper","dnegbin","dnbinom","dpois","dpois")#,"ddirch","MCMCpack::ddirichlet","dmnorm","dmnorm","dwish","dwish","dmt",NA,"dmulti","dmultinom") 74 | ## TODO: How to input non-scalar parameters 75 | ## for multivariate distributions? 76 | disttrans <- data.frame(t(matrix(disttrans,2,length(disttrans)/2)), stringsAsFactors = FALSE) 77 | 78 | names(disttrans) <- c("jags","r") 79 | 80 | ## functions translating jags parameters to R 81 | ## element 11 is dnorm, still need others 82 | jag2rfuns <- vector("list", length=nrow(disttrans)) 83 | for(i in 1:length(jag2rfuns)){ 84 | jag2rfuns[[i]] <- identity 85 | } 86 | 87 | ## for dnorm + dlnorm: 88 | dnloc <- which(disttrans$jags == "dnorm") 89 | jag2rfuns[[dnloc]] <- function(x){ 90 | x[2] <- 1/sqrt(x[2]) 91 | x 92 | } 93 | jag2rfuns[[which(disttrans$jags == "dlnorm")]] <- jag2rfuns[[dnloc]] 94 | 95 | ## dlogis + ddexp: 96 | dlogloc <- which(disttrans$jags == "dlogis") 97 | jag2rfuns[[dlogloc]] <- function(x){ 98 | x[2] <- 1/x[2] 99 | x 100 | } 101 | jag2rfuns[[which(disttrans$jags == "ddexp")]] <- jag2rfuns[[dlogloc]] 102 | 103 | ## dbin/dbern 104 | binloc <- which(disttrans$jags == "dbin") 105 | jag2rfuns[[binloc]] <- function(x) x[2:1] 106 | bernloc <- which(disttrans$jags == "dbern") 107 | jag2rfuns[[bernloc]] <- function(x) c(1,x) 108 | 109 | ## others with reversed parameters 110 | nbloc <- which(disttrans$jags == "dnegbin") 111 | jag2rfuns[[nbloc]] <- jag2rfuns[[binloc]] 112 | ## dgamma does not have reversed parameters, 113 | ## despite LeBauer: 114 | ##gloc <- which(disttrans$jags == "dgamma") 115 | ##jag2rfuns[[gloc]] <- jag2rfuns[[binloc]] 116 | 117 | ## weibull 118 | wbloc <- which(disttrans$jags == "dweib") 119 | jag2rfuns[[wbloc]] <- function(x){ 120 | ## LeBauer, p. 208 121 | par2 <- x[2]^(-1/x[1]) 122 | c(x[1], par2) 123 | } 124 | 125 | ## hypergeometric: what to do with noncentral? 126 | hyloc <- which(disttrans$jags == "dhyper") 127 | jag2rfuns[[hyloc]] <- function(x){ 128 | if(as.numeric(x[4]) != 1){ 129 | warning("blavaan WARNING: Fit measures with noncentral hypergeometric priors are inaccurate.", call. = FALSE) 130 | } 131 | x[1:3] 132 | } 133 | 134 | ## t distribution 135 | tloc <- which(disttrans$jags == "dt") 136 | jag2rfuns[[tloc]] <- function(x){ 137 | x[c(3,1,2)] 138 | } 139 | 140 | ## beta, poisson, exponential, uniform identical 141 | 142 | list(disttrans = disttrans, jag2rfuns = jag2rfuns) 143 | } 144 | 145 | ## define ddexp(), rdexp() here 146 | ddexp <- function(x, mu = 0, scale = 1, log = FALSE){ 147 | if(scale <= 0) stop("blavaan ERROR: Negative scale parameter to ddexp().") 148 | 149 | dens <- -log(2*scale) - abs(x - mu)/scale 150 | if(!log) dens <- exp(dens) 151 | 152 | dens 153 | } 154 | 155 | rdexp <- function(n, mu = 0, scale = 1){ 156 | if(scale <= 0) stop("blavaan ERROR: Negative scale parameter to rdexp().") 157 | 158 | U <- runif(n, -.5, .5) 159 | 160 | X <- mu - scale * sign(U) * log(1 - 2 * abs(U)) 161 | 162 | X 163 | } 164 | 165 | ## Empirical comparison of univariate distributions in jags vs r 166 | if(FALSE){ 167 | library(runjags) 168 | source("jags2r.R") 169 | tt <- transtables() 170 | 171 | compres <- vector("list", 21) 172 | for(i in 1:21){ 173 | tmppri <- paste(tt$disttrans$jags[i], "(.75", sep="") 174 | if(tt$disttrans$jags[i] %in% c("dchisqr", "dexp", "dbern", "dpois", "dcat")){ 175 | tmppri <- paste(tmppri, ")", sep="") 176 | } else if (tt$disttrans$jags[i] %in% c("dgen.gamma", "dbetabin", "dt")){ 177 | tmppri <- paste(tmppri, ",1,5)", sep="") 178 | } else if (tt$disttrans$jags[i] == "dhyper"){ 179 | tmppri <- paste(tt$disttrans$jags[i], "(3,5,4,1)", sep="") 180 | } else { 181 | tmppri <- paste(tmppri, ",5)", sep="") 182 | } 183 | tmpmod <- paste("model{\n y ~ ", tmppri, "\n}", sep="") 184 | 185 | jagres <- run.jags(tmpmod, monitor="y", data=list(x=rep(1,10)), adapt=10, 186 | burnin=10, sample=4000, n.chains=1) 187 | 188 | rparam <- jagsdist2r(tmppri) 189 | if(is.na(rparam[[1]][1])) next 190 | rfun <- gsub("^d", "r", rparam[[1]][1]) 191 | rargs <- list(10000, as.numeric(rparam[[1]][2])) 192 | if(length(rparam[[1]]) > 2){ 193 | for(j in 3:length(rparam[[1]])){ 194 | rargs <- c(rargs, list(as.numeric(rparam[[1]][j]))) 195 | } 196 | } 197 | ## special handling of t distribution because R 198 | ## doesn't allow us to set mean/variance 199 | ## Could also use rt.scaled() from metRology package. 200 | if(rfun == "rt"){ 201 | mnprec <- c(rargs[[3]][1], rargs[[4]][1]) 202 | rargs[[4]] <- NULL 203 | rargs[[3]] <- NULL 204 | } 205 | rres <- do.call(rfun, rargs) 206 | if(rfun == "rt"){ 207 | rres <- rres/sqrt(as.numeric(mnprec[2])) + as.numeric(mnprec[1]) 208 | } 209 | tmpres <- t(matrix(c(summary(as.numeric(jagres$mcmc[[1]])), 210 | summary(rres)), 6, 2)) 211 | 212 | compres[[i]] <- tmpres 213 | } 214 | 215 | ## check results 216 | for(i in 1:21){ 217 | if(is.null(compres[[i]])) next 218 | tmpd <- diff(compres[[i]][,3]) 219 | if(tmpd > .05) print(i) 220 | } 221 | } 222 | -------------------------------------------------------------------------------- /R/set_inits.R: -------------------------------------------------------------------------------- 1 | set_inits <- function(partable, ov.cp, lv.cp, n.chains, inits){ 2 | ## Generate initial values for each chain 3 | ## TODO write start values to new columns of coefvec, so can include in partable 4 | initvals <- vector("list", n.chains) 5 | names(initvals) <- paste("c", 1:n.chains, sep="") 6 | pveclen <- max(partable$freeparnums[partable$mat != ""], na.rm = TRUE) 7 | 8 | for(i in 1:n.chains){ 9 | initvals[[i]] <- list(parvec = rep(NA, pveclen)) 10 | } 11 | 12 | ## find parameters arising from wishart 13 | ## TODO this currently skips over priors that have been placed on 14 | ## variances/sds; could instead set them with some extra handling 15 | wps <- grep("dwish", partable$prior) 16 | 17 | ## handle wishart inits separately 18 | if(length(wps) > 0){ 19 | wdimen <- sum(grepl("dwish", partable$prior) & 20 | partable$group == 1 & 21 | partable$lhs == partable$rhs) 22 | ngroups <- length(wps)/(wdimen*(wdimen+1)/2) 23 | ## generate values 24 | for(i in 1:n.chains){ 25 | ## get something close to an identity matrix, otherwise 26 | ## the chains can go crazy places 27 | wvals <- rWishart(ngroups, wdimen*500, diag(wdimen))/(wdimen*500) 28 | initvals[[i]] <- c(initvals[[i]], list(ibpsi = wvals)) 29 | } 30 | } 31 | 32 | for(i in 1:nrow(partable)){ 33 | eqcons <- which(partable$lhs == partable$label[i] & 34 | partable$op %in% c("==", ":=", ">", "<")) 35 | if((i %in% wps) | partable$free[i] == 0 | partable$prior[i] == "") next 36 | ## next unless it is a simple equality constraint: 37 | if(length(eqcons) > 0 & !grepl('^\\.p', partable$rhs[eqcons[1]])) next 38 | 39 | tmppri <- partable$prior[i] 40 | 41 | pricom <- unlist(strsplit(tmppri, "[, ()]+")) 42 | 43 | if(inits == "prior"){ 44 | ## Try to set sensible starting values, using some of the 45 | ## prior information 46 | if(grepl("dnorm", pricom[1])){ 47 | pricom[3] <- "1" 48 | ## keep loadings/regressions on one side 49 | if(grepl("lambda", partable$mat[i]) | grepl("beta", partable$prior[i])){ 50 | pricom[1] <- "dunif" 51 | pricom[2] <- ".75" 52 | pricom[3] <- "2" 53 | } 54 | } 55 | ## Extreme correlations lead to errors, so keep them close to 0 56 | if(grepl("dbeta", pricom[1])){ 57 | pricom[2] <- "100" 58 | pricom[3] <- "100" 59 | } 60 | 61 | ## Switch to r instead of d for random inits 62 | pricom[1] <- gsub("^d", "r", pricom[1]) 63 | 64 | ## Generate initial values 65 | ## FIXME do something smarter upon failure 66 | ivs <- try(do.call(pricom[1], list(n.chains, as.numeric(pricom[2]), 67 | as.numeric(pricom[3]))), silent = TRUE) 68 | if(inherits(ivs, "try-error") | all(is.na(ivs))) ivs <- rep(partable$start[i], n.chains) 69 | } else { 70 | ivs <- rep(partable$start[i], n.chains) 71 | } 72 | 73 | ## now (try to) ensure the jittered values won't crash on us 74 | ## and converge 75 | if(grepl("\\[sd\\]", partable$prior[i]) | 76 | grepl("\\[var\\]", partable$prior[i])){ 77 | powval <- ifelse(grepl("\\[sd\\]", partable$prior[i]), -.5, -1) 78 | ivs[ivs <= 0] <- -ivs[ivs <= 0] 79 | ivs <- ivs^powval 80 | } 81 | if(grepl("dbeta", partable$prior[i])){ 82 | ivs <- rep(.5, n.chains) 83 | } 84 | 85 | ## extract matrix, dimensions 86 | for(j in 1:n.chains){ 87 | initvals[[j]][["parvec"]][partable$freeparnums[i]] <- ivs[j] 88 | } 89 | } 90 | 91 | ## if an entire init matrix/array is NA, remove it. This can happen when 92 | ## all lvs are covered by dmnorm/dwish (vs invpsi) or when phantom lvs 93 | ## are used in tandem with dmnorm/dwish. 94 | all.na <- FALSE 95 | if(all(is.na(as.numeric(initvals[[1]][["parvec"]])))) all.na <- TRUE 96 | 97 | if(all.na){ 98 | for(j in 1:n.chains){ 99 | initvals[[j]][["parvec"]] <- NULL 100 | } 101 | } 102 | initvals 103 | } 104 | 105 | set_inits_stan <- function(partable, nfree, n.chains, inits, 106 | ntot = NULL, nlvno0 = 0){ 107 | ## Generate initial values for each chain 108 | initvals <- vector("list", n.chains) 109 | names(initvals) <- paste("c", 1:n.chains, sep="") 110 | pveclen <- nfree[nfree > 0] 111 | 112 | initmats <- list() 113 | for(i in 1:length(pveclen)){ 114 | initmats <- c(initmats, list(array(NA, dim=pveclen[i]))) 115 | ## if(pveclen[i] == 1){ 116 | ## initmats <- c(initmats, list(as.vector(NA))) 117 | ## } else { 118 | ## initmats <- c(initmats, list(rep(NA, pveclen[i]))) 119 | ## } 120 | } 121 | names(initmats) <- paste0(names(pveclen), "free") 122 | if(nlvno0 > 0){ 123 | initmats <- c(initmats, list(etafree = array(1, dim = c(ntot, nlvno0)))) 124 | } 125 | 126 | for(i in 1:n.chains){ 127 | initvals[[i]] <- initmats 128 | } 129 | 130 | partable$freeparnums[is.na(partable$freeparnums)] <- 0 131 | freepartable <- partable[partable$freeparnums > 0,] 132 | if("rhoidx" %in% names(freepartable)){ 133 | rhorows <- which(!is.na(freepartable$rhoidx) & 134 | freepartable$free > 0 & 135 | freepartable$mat == "rho" & !grepl("lkj", freepartable$prior)) 136 | if(length(rhorows) > 0){ 137 | freepartable$freeparnums[rhorows] <- 1:length(rhorows) 138 | } 139 | lvrhorows <- which(!is.na(freepartable$rhoidx) & 140 | freepartable$free > 0 & 141 | freepartable$mat == "lvrho" & !grepl("lkj", freepartable$prior)) 142 | if(length(rhorows) > 0){ 143 | freepartable$freeparnums[lvrhorows] <- 1:length(lvrhorows) 144 | } 145 | } 146 | 147 | ## TODO need exported, or reverse rstan::lookup() 148 | ## rosetta <- rstan:::rosetta 149 | ## alternate way to possibly get around export 150 | rloc <- paste0(system.file("R", package="rstan"), "/sysdata") 151 | lazyLoad(rloc) 152 | rosetta <- rosetta 153 | 154 | prilist <- dist2r(freepartable$prior, target = "stan") 155 | for(i in 1:nrow(freepartable)){ 156 | if(inits == "prior"){ 157 | ## Try to set sensible starting values, using some of the 158 | ## prior information 159 | pricom <- prilist[[i]] 160 | if(grepl("dnorm", pricom[1])){ 161 | pricom[3] <- "1" 162 | ## keep loadings/regressions on one side 163 | if(grepl("lambda", freepartable$mat[i]) | grepl("beta", freepartable$prior[i])){ 164 | pricom[1] <- "dunif" 165 | pricom[2] <- ".75" 166 | pricom[3] <- "2" 167 | } 168 | } 169 | ## Extreme correlations lead to errors, so keep them close to 0 170 | if(grepl("dbeta", pricom[1])){ 171 | pricom[2] <- "100" 172 | pricom[3] <- "100" 173 | } 174 | 175 | ## Switch to r instead of d for random inits 176 | pricom[1] <- gsub("^d", "r", pricom[1]) 177 | 178 | ## Generate initial values 179 | ## FIXME do something smarter upon failure 180 | ivs <- try(do.call(pricom[1], list(n.chains, as.numeric(pricom[2]), 181 | as.numeric(pricom[3]))), silent = TRUE) 182 | 183 | if(inherits(ivs, "try-error")){ 184 | ivs <- rep(1, n.chains) 185 | } else if(pricom[1] == "rgamma" & !grepl("[sd]", freepartable$prior[i], fixed = TRUE) & 186 | !grepl("[var]", freepartable$prior[i], fixed = TRUE)){ 187 | ## free parameter is a precision, not a variance/sd: 188 | ivs <- 1/ivs 189 | } 190 | } else { 191 | ivs <- rep(freepartable$start[i], n.chains) 192 | } 193 | 194 | ## now (try to) ensure the jittered values won't crash on us 195 | ## and converge 196 | ## if(grepl("\\[sd\\]", freepartable$prior[i]) | 197 | ## grepl("\\[var\\]", freepartable$prior[i])){ 198 | ## powval <- ifelse(grepl("\\[sd\\]", freepartable$prior[i]), -.5, -1) 199 | ## ivs <- ivs^powval 200 | ## ivs[ivs <= 0] <- -ivs[ivs <= 0] 201 | ## } 202 | if(grepl("beta", freepartable$prior[i]) | grepl("lkj", freepartable$prior[i])){ 203 | ivs <- rep(.5, n.chains) 204 | } 205 | 206 | ## extract matrix, dimensions 207 | for(j in 1:n.chains){ 208 | matidx <- which(names(initvals[[j]]) == paste0(freepartable$mat[i], "free")) 209 | initvals[[j]][[matidx]][freepartable$freeparnums[i]] <- ivs[j] 210 | } 211 | } 212 | 213 | initvals 214 | } 215 | -------------------------------------------------------------------------------- /R/set_stancovs.R: -------------------------------------------------------------------------------- 1 | set_stancovs <- function(partable, std.lv) { 2 | ## Add phantom lvs for covariance parameters 3 | 4 | ## add prior column if it doesn't exist 5 | if(is.na(match("prior", names(partable)))) partable$prior <- rep("", length(partable$id)) 6 | 7 | ## parameter matrices + indexing 8 | partable <- lavMatrixRepresentation(partable, add.attributes = TRUE) 9 | ## for defined parameters 10 | defpar <- which(partable$op == ":=") 11 | if(length(defpar) > 0){ 12 | partable$mat[defpar] <- "def" 13 | partable$row[defpar] <- 1:length(defpar) 14 | partable$col[defpar] <- 1 15 | partable$group[defpar] <- 1 16 | } 17 | 18 | ## must be psiUNC if std.lv 19 | if(std.lv){ 20 | partable$mat[partable$mat == "psi"] <- "psiUNC" 21 | } 22 | 23 | covpars <- which(partable$op == "~~" & 24 | partable$lhs != partable$rhs & 25 | partable$free > 0L) 26 | 27 | partable$rhoidx <- rep(NA, length(partable$id)) 28 | blkrow <- rep(NA, ncol(partable)) #length(partable$id)) 29 | 30 | ## Only do this if covpars exist 31 | if(length(covpars) > 0){ 32 | mvcov <- 0 33 | lvcov <- 0 34 | 35 | for(i in 1:length(covpars)){ 36 | ## Is this constrained equal to a previous parameter? 37 | eq.const <- FALSE 38 | eq.idx <- which(partable$op == "==" & partable$rhs == partable$plabel[covpars[i]]) 39 | if(length(eq.idx) > 0){ 40 | eq.const <- TRUE 41 | ## TODO? assumes it is equal to another covariance; do any models 42 | ## restrict covariances to be equal to other types of parameters? 43 | full.idx <- which(partable$plabel == partable$lhs[eq.idx]) 44 | old.idx <- partable$rhoidx[full.idx] 45 | } 46 | 47 | tmprows <- nrow(partable) + 1 48 | partable <- rbind(partable, blkrow) 49 | 50 | ## TODO? should 'block' ever differ from 'group'? 51 | partable$group[tmprows] <- partable$block[tmprows] <- 52 | partable$group[covpars[i]] 53 | 54 | partable$lhs[tmprows] <- partable$lhs[covpars[i]] 55 | partable$rhs[tmprows] <- partable$rhs[covpars[i]] 56 | 57 | ## Decide on =~ (ov) vs ~ (lv) 58 | if(partable$mat[covpars[i]] == "theta"){ 59 | if(!eq.const){ 60 | mvcov <- mvcov + 1 61 | covidx <- mvcov 62 | } 63 | partable$mat[tmprows] <- "rho" 64 | } else { 65 | if(!eq.const){ 66 | lvcov <- lvcov + 1 67 | covidx <- lvcov 68 | } 69 | partable$mat[tmprows] <- "lvrho" 70 | } 71 | partable$op[tmprows] <- "~~" 72 | partable$row[tmprows] <- partable$row[covpars[i]] 73 | partable$col[tmprows] <- partable$col[covpars[i]] 74 | partable$group[tmprows] <- partable$group[covpars[i]] 75 | 76 | v1var <- which(partable$lhs == partable$lhs[covpars[i]] & 77 | partable$rhs == partable$lhs[covpars[i]] & 78 | partable$group == partable$group[covpars[i]] & 79 | partable$op == "~~") 80 | tmpv1 <- paste(partable$mat[v1var], "[", partable$row[v1var], ",", partable$col[v1var], ",", partable$group[v1var], 81 | "]", sep="") 82 | 83 | v2var <- which(partable$lhs == partable$rhs[covpars[i]] & 84 | partable$rhs == partable$rhs[covpars[i]] & 85 | partable$group == partable$group[covpars[i]] & 86 | partable$op == "~~") 87 | tmpv2 <- paste(partable$mat[v2var], "[", partable$row[v2var], ",", partable$col[v2var], ",", partable$group[v2var], "]", sep="") 88 | 89 | if(partable$prior[covpars[i]] != ""){ 90 | partable$prior[tmprows] <- partable$prior[covpars[i]] 91 | } else { 92 | partable$prior[tmprows] <- "" 93 | } 94 | 95 | if(eq.const){ 96 | partable$ustart[covpars[i]] <- paste0(partable$mat[full.idx], 97 | "[", 98 | partable$row[full.idx], 99 | ",", partable$col[full.idx], 100 | ",", partable$group[full.idx], 101 | "]") 102 | partable$ustart[tmprows] <- paste0(partable$ustart[covpars[i]], "/sqrt(", tmpv1, "*", tmpv2, ")") 103 | } else { 104 | partable$rhoidx[tmprows] <- partable$rhoidx[covpars[i]] <- covidx 105 | partable$ustart[covpars[i]] <- paste0(partable$mat[tmprows], 106 | "[", partable$row[tmprows], 107 | ",", partable$col[tmprows], 108 | ",", partable$group[tmprows], 109 | "] * sqrt(", tmpv1, 110 | " * ", tmpv2, ")") 111 | partable$start[tmprows] <- partable$start[covpars[i]] 112 | } 113 | partable$free[tmprows] <- as.integer(partable$free[covpars[i]]) 114 | partable$free[covpars[i]] <- 0L 115 | partable$plabel[tmprows] <- paste(".p", tmprows, ".", sep="") 116 | partable$label[tmprows] <- "" 117 | partable$exo[tmprows] <- 0L 118 | } 119 | 120 | ## put covariances last, so that they appear last in 121 | ## the defined parameter block (they are functions of 122 | ## other parameters) 123 | ptcov <- partable[covpars,] 124 | partable <- partable[-covpars,] 125 | partable <- rbind(partable, ptcov) 126 | } 127 | 128 | ## FIXME? 129 | ## Remove covariances associated with fixed x 130 | ## covpars <- which(partable$op == "~~" & 131 | ## partable$lhs != partable$rhs & 132 | ## partable$group == 1 & 133 | ## partable$lhs %in% ov.names.x & 134 | ## partable$free == 0) 135 | ## if(length(covpars) > 0) partable <- partable[-covpars,] 136 | 137 | partable 138 | } 139 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onAttach <- function(libname, pkgname) { 2 | version <- read.dcf(file=system.file("DESCRIPTION", package=pkgname), 3 | fields="Version") 4 | packageStartupMessage("This is ",paste(pkgname, version)) 5 | packageStartupMessage('On multicore systems, we suggest use of future::plan("multicore") or\n', ' future::plan("multisession") for faster post-MCMC computations.') 6 | } 7 | 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # blavaan 2 | 3 | 4 | [![R build status](https://github.com/ecmerkle/blavaan/workflows/R-CMD-check/badge.svg)](https://github.com/ecmerkle/blavaan/actions) 5 | 6 | 7 | blavaan is a free, open source R package for Bayesian latent variable analysis. It relies on JAGS and Stan to estimate models via MCMC. 8 | 9 | The blavaan functions and syntax are similar to lavaan. For example, consider the Political Democracy example from Bollen (1989): 10 | 11 | ```r 12 | library(lavaan) # for the PoliticalDemocracy data 13 | library(blavaan) 14 | 15 | model <- ' 16 | # latent variable definitions 17 | ind60 =~ x1 + x2 + x3 18 | dem60 =~ y1 + y2 + y3 + y4 19 | dem65 =~ y5 + y6 + y7 + y8 20 | # regressions 21 | dem60 ~ ind60 22 | dem65 ~ ind60 + dem60 23 | # residual covariances 24 | y1 ~~ y5 25 | y2 ~~ y4 + y6 26 | y3 ~~ y7 27 | y4 ~~ y8 28 | y6 ~~ y8 29 | ' 30 | fit <- bsem(model, data = PoliticalDemocracy) 31 | summary(fit) 32 | ``` 33 | 34 | The development version of blavaan (containing updates not yet on CRAN) can be installed via the command below. Compilation is required; this may be a problem for users who currently rely on a binary version of blavaan from CRAN. A potential alternative is to install a binary from [the r-universe repo](https://ecmerkle.r-universe.dev/blavaan), which stays up to date with this repo. 35 | 36 | ```r 37 | remotes::install_github("ecmerkle/blavaan", INSTALL_opts = "--no-multiarch") 38 | ``` 39 | 40 | For further information, see: 41 | 42 | Merkle, E. C., Fitzsimmons, E., Uanhoro, J., & Goodrich, B. (2021). [Efficient Bayesian structural equation modeling in Stan](https://doi.org/10.18637/jss.v100.i06). Journal of Statistical Software, 100(6), 1–22. 43 | 44 | Merkle, E. C., & Rosseel, Y. (2018). [blavaan: Bayesian structural equation models via parameter expansion](https://doi.org/10.18637/jss.v085.i04). Journal of Statistical Software, 85(4), 1–30. 45 | 46 | blavaan is supported by the Institute of Education Sciences, U.S. Department of Education, Grant R305D210044, as well as NSF grants SES-1061334 and 1460719. 47 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: http://ecmerkle.github.io/blavaan/ 2 | 3 | destination: "." 4 | 5 | template: 6 | bootstrap: 5 7 | params: 8 | bootswatch: cerulean 9 | 10 | resource_files: 11 | - vignettes/refs.bib 12 | 13 | news: 14 | one_page: true 15 | 16 | navbar: 17 | title: "blavaan" 18 | left: 19 | - text: Basics 20 | menu: 21 | - text: Getting Started 22 | href: articles/start.html 23 | - text: Prior Specification 24 | href: articles/prior.html 25 | - text: Estimation 26 | href: articles/estimate.html 27 | - text: Convergence and Efficiency Evaluation 28 | href: articles/convergence_efficiency.html 29 | - text: Model Summaries 30 | href: articles/summaries.html 31 | - text: Plots 32 | href: articles/plotting.html 33 | - text: Examples/Details 34 | menu: 35 | - text: Estimation with Ordinal Data 36 | href: articles/ordinal.html 37 | - text: Two-level Estimation 38 | href: articles/multilevel.html 39 | - text: Measurement Invariance 40 | href: articles/invariance.html 41 | - text: Approximate Fit Indices 42 | href: articles/approx_fi.html 43 | - text: Model Comparison 44 | href: articles/model_comparison.html 45 | - text: Cross-loadings with Strong Priors 46 | href: articles/cross_loadings_strong_priors.html 47 | - text: Modification Indices 48 | href: articles/mod_indices.html 49 | - text: Prior Predictive Checks 50 | href: articles/prior_pred_checks.html 51 | - text: Convergence Loop 52 | href: articles/convergence_loop.html 53 | - text: Probability of Direction 54 | href: articles/probability_direction.html 55 | - text: News 56 | href: news/index.html 57 | - text: Resources 58 | href: articles/resources.html 59 | - text: Functions 60 | href: reference/index.html 61 | right: 62 | - icon: "fab fa-github fa-lg" 63 | href: https://github.com/ecmerkle/blavaan 64 | - icon: fa-users 65 | href: https://groups.google.com/d/forum/blavaan 66 | -------------------------------------------------------------------------------- /blavaan.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageInstallArgs: --no-multiarch --with-keep.source 17 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | # Generated by rstantools. Do not edit by hand. 2 | 3 | #! /bin/sh 4 | "${R_HOME}/bin/Rscript" -e "rstantools::rstan_config()" 5 | -------------------------------------------------------------------------------- /configure.win: -------------------------------------------------------------------------------- 1 | # Generated by rstantools. Do not edit by hand. 2 | 3 | #! /bin/sh 4 | "${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe" -e "rstantools::rstan_config()" 5 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("There are two publications about blavaan, one describing the Stan methods and one describing the JAGS methods.") 2 | 3 | bibentry(bibtype = "Article", 4 | title = "Efficient {Bayesian} Structural Equation Modeling in {Stan}", 5 | author = c(person(given = c("Edgar", "C."), 6 | family = "Merkle", 7 | email = "merklee@missouri.edu"), 8 | person(given = "Ellen", 9 | family = "Fitzsimmons"), 10 | person(given = "James", 11 | family = "Uanhoro"), 12 | person(given = "Ben", 13 | family = "Goodrich")), 14 | journal = "Journal of Statistical Software", 15 | year = "2021", 16 | volume = "100", 17 | number = "6", 18 | pages = "1--22", 19 | doi = "10.18637/jss.v100.i06", 20 | 21 | header = "If you use Stan, please cite:" 22 | ) 23 | 24 | bibentry(bibtype = "Article", 25 | title = "{blavaan: Bayesian} Structural Equation Models via Parameter Expansion", 26 | author = c(person(given = c("Edgar", "C."), 27 | family = "Merkle", 28 | email = "merklee@missouri.edu"), 29 | person(given = "Yves", 30 | family = "Rosseel")), 31 | journal = "Journal of Statistical Software", 32 | year = "2018", 33 | volume = "85", 34 | number = "4", 35 | pages = "1--30", 36 | doi = "10.18637/jss.v085.i04", 37 | 38 | header = "If you use target='jags', please cite:" 39 | ) 40 | 41 | -------------------------------------------------------------------------------- /inst/include/stan_meta_header.hpp: -------------------------------------------------------------------------------- 1 | // Insert all #include statements here 2 | -------------------------------------------------------------------------------- /inst/stan/include/license.stan: -------------------------------------------------------------------------------- 1 | /* 2 | blavaan is free software: you can redistribute it and/or modify 3 | it under the terms of the GNU General Public License as published by 4 | the Free Software Foundation, either version 3 of the License, or 5 | (at your option) any later version. 6 | 7 | blavaan is distributed in the hope that it will be useful, 8 | but WITHOUT ANY WARRANTY; without even the implied warranty of 9 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 | GNU General Public License for more details. 11 | 12 | You should have received a copy of the GNU General Public License 13 | along with blavaan. If not, see . 14 | */ 15 | -------------------------------------------------------------------------------- /inst/stanfuns/fill_lower.stan: -------------------------------------------------------------------------------- 1 | matrix fill_lower(matrix x){ 2 | matrix[rows(x),cols(x)] newx; 3 | 4 | newx = x; 5 | for(i in 1:(rows(x) - 1)){ 6 | for(j in (i+1):rows(x)){ 7 | newx[j,i] = x[i,j]; 8 | } 9 | } 10 | return newx; 11 | } 12 | -------------------------------------------------------------------------------- /inst/stanfuns/sem_lv.stan: -------------------------------------------------------------------------------- 1 | real sem_lv_lpdf(matrix x, array[,,] real alpha, array[,,] real B, array[,,] real psi, array[,,] real gamma, int gamind, array[,] real meanx, array[] int g, int k, int N, int Ng, int diagpsi, int fullbeta, int nlv, array[] int lvind, int nlvno0){ 2 | array[Ng] real ldetcomp; 3 | matrix[k,k] iden; 4 | array[Ng] vector[k] alpha2; 5 | array[Ng] vector[k] psivecinv; 6 | array[Ng] matrix[k,k] psimatinv; 7 | array[Ng] matrix[k,k] psimat; 8 | array[Ng] matrix[k,k] siginv; 9 | vector[k] xvec; 10 | array[Ng] vector[k] evlv; 11 | array[(k-nlv+nlvno0)] int idx; 12 | real xvectm; 13 | real ldetsum; 14 | int nov; 15 | int nidx; 16 | 17 | nov = k - nlv; 18 | nidx = nov + nlvno0; 19 | 20 | iden = diag_matrix(rep_vector(1.0, k)); 21 | 22 | if(nlvno0 > 0){ 23 | idx[1:nlvno0] = lvind; 24 | } 25 | if(nov > 0){ 26 | for(j in 1:nov){ 27 | idx[nlvno0+j] = nlv + j; //nlvno0 + j? 28 | } 29 | } 30 | 31 | for(j in 1:Ng){ 32 | alpha2[j] = to_vector(alpha[,1,j]); 33 | } 34 | 35 | evlv = sem_mean(alpha2, B, gamma, g, k, Ng, gamind, meanx); 36 | 37 | if(diagpsi){ 38 | for(j in 1:Ng){ 39 | for(i in 1:nidx){ 40 | psivecinv[j,idx[i]] = 1/psi[idx[i],idx[i],j]; 41 | } 42 | psimatinv[j] = diag_matrix(psivecinv[j]); 43 | 44 | siginv[j,1:nidx,1:nidx] = (iden[idx,idx] - to_matrix(B[idx,idx,j])') * psimatinv[j,idx,idx] * (iden[idx,idx] - to_matrix(B[idx,idx,j])); 45 | 46 | if(fullbeta){ 47 | ldetcomp[j] = log_determinant(iden[idx,idx] - to_matrix(B[idx,idx,j])); 48 | ldetcomp[j] = -2 * ldetcomp[j] + sum(log(diagonal(to_matrix(psi[idx,idx,j])))); 49 | } else { 50 | ldetcomp[j] = sum(log(diagonal(to_matrix(psi[idx,idx,j])))); 51 | } 52 | } 53 | } else { 54 | for(j in 1:Ng){ 55 | psimat[j] = to_matrix(psi[,,j]) + to_matrix(psi[,,j])' - diag_matrix(diagonal(to_matrix(psi[,,j]))); 56 | 57 | ldetcomp[j] = log_determinant(psimat[j,idx,idx]); 58 | if(fullbeta){ 59 | ldetcomp[j] = ldetcomp[j] - 2 * log_determinant(iden[idx,idx] - to_matrix(B[idx,idx,j])); 60 | } 61 | 62 | psimatinv[j] = psimat[j]; 63 | psimatinv[j,1:nidx,1:nidx] = inverse_spd(psimat[j,idx,idx]); 64 | siginv[j,1:nidx,1:nidx] = (iden[idx,idx] - to_matrix(B[idx,idx,j])') * psimatinv[j,1:nidx,1:nidx] * (iden[idx,idx] - to_matrix(B[idx,idx,j])); 65 | } 66 | } 67 | 68 | xvectm = 0; 69 | ldetsum = 0; 70 | for(i in 1:N){ 71 | xvec = x[i,]'; 72 | xvectm = xvectm + (xvec[idx] - evlv[g[i],idx])' * siginv[g[i],1:nidx,1:nidx] * (xvec[idx] - evlv[g[i],idx]); 73 | ldetsum = ldetsum + ldetcomp[g[i]]; 74 | } 75 | 76 | return -0.5 * (ldetsum + xvectm); 77 | } 78 | -------------------------------------------------------------------------------- /inst/stanfuns/sem_lv_missing.stan: -------------------------------------------------------------------------------- 1 | real sem_lv_missing_lpdf(matrix x, array[,,] real alpha, array[,,] real B, array[,,] real psi, array[,,] real gamma, int gamind, array[,] real meanx, array[] int g, int k, int N, int Ng, int diagpsi, int fullbeta, int nlv, array[] int lvind, int nlvno0, array[,] int nseen, array[,,] int obsvar, array[] int obspatt, array[] int gpatt){ 2 | array[Ng,max(gpatt)] real ldetcomp; 3 | matrix[k,k] iden; 4 | array[Ng] vector[k] alpha2; 5 | array[Ng] vector[k] psivecinv; 6 | array[Ng] matrix[k,k] psimatinv; 7 | array[Ng] matrix[k,k] psimat; 8 | array[Ng,max(gpatt)] matrix[k,k] siginv; 9 | vector[k] xvec; 10 | array[Ng] vector[k] evlv; 11 | array[(k-nlv+nlvno0)] int idx; 12 | array[k] int tmpobs; 13 | real xvectm; 14 | real ldetsum; 15 | int nov; 16 | int nidx; 17 | 18 | nov = k - nlv; 19 | 20 | iden = diag_matrix(rep_vector(1.0, k)); 21 | 22 | for(j in 1:Ng){ 23 | alpha2[j] = to_vector(alpha[,1,j]); 24 | } 25 | 26 | evlv = sem_mean(alpha2, B, gamma, g, k, Ng, gamind, meanx); 27 | 28 | // compute siginv, ldetcomp by missingness pattern 29 | // siginv: matrix[k,k] siginv[Ng,max(gpatt)] 30 | // ldetcomp: vector[max(gpatt)] ldetcomp[Ng] 31 | for(gg in 1:Ng){ 32 | for(m in 1:gpatt[gg]){ 33 | if(nlvno0 > 0){ 34 | idx[1:nlvno0] = lvind; 35 | } 36 | if(nov > 0){ 37 | for(j in 1:nseen[gg,m]){ 38 | idx[nlvno0+j] = nlv + obsvar[gg,m,j]; //nlv + obsvar[i,(j - nlv)]; 39 | } 40 | } 41 | nidx = nlvno0 + nseen[gg,m]; 42 | 43 | if(diagpsi){ 44 | for(j in 1:nidx){ 45 | psivecinv[gg,idx[j]] = 1/psi[idx[j],idx[j],gg]; 46 | } 47 | psimatinv[gg] = diag_matrix(psivecinv[gg]); 48 | 49 | siginv[gg,m,1:nidx,1:nidx] = (iden[idx[1:nidx],idx[1:nidx]] - to_matrix(B[idx[1:nidx],idx[1:nidx],gg])') * psimatinv[gg,idx[1:nidx],idx[1:nidx]] * (iden[idx[1:nidx],idx[1:nidx]] - to_matrix(B[idx[1:nidx],idx[1:nidx],gg])); 50 | 51 | if(fullbeta){ 52 | ldetcomp[gg,m] = log_determinant(iden[idx[1:nidx],idx[1:nidx]] - to_matrix(B[idx[1:nidx],idx[1:nidx],gg])); 53 | ldetcomp[gg,m] = -2 * ldetcomp[gg,m] + sum(log(diagonal(to_matrix(psi[idx[1:nidx],idx[1:nidx],gg])))); 54 | } else { 55 | ldetcomp[gg,m] = sum(log(diagonal(to_matrix(psi[idx[1:nidx],idx[1:nidx],gg])))); 56 | } 57 | } else { 58 | psimat[gg] = to_matrix(psi[,,gg]) + to_matrix(psi[,,gg])' - diag_matrix(diagonal(to_matrix(psi[,,gg]))); 59 | 60 | ldetcomp[gg,m] = log_determinant(psimat[gg,idx[1:nidx],idx[1:nidx]]); 61 | if(fullbeta){ 62 | ldetcomp[gg,m] = ldetcomp[gg,m] - 2 * log_determinant(iden[idx[1:nidx],idx[1:nidx]] - to_matrix(B[idx[1:nidx],idx[1:nidx],gg])); 63 | } 64 | 65 | psimatinv[gg,1:nidx,1:nidx] = inverse_spd(psimat[gg,idx[1:nidx],idx[1:nidx]]); 66 | siginv[gg,m,1:nidx,1:nidx] = (iden[idx[1:nidx],idx[1:nidx]] - to_matrix(B[idx[1:nidx],idx[1:nidx],gg])') * psimatinv[gg,1:nidx,1:nidx] * (iden[idx[1:nidx],idx[1:nidx]] - to_matrix(B[idx[1:nidx],idx[1:nidx],gg])); 67 | } 68 | } 69 | } 70 | 71 | // now that ldetcomp and siginv computed for each pattern, 72 | // obtain log-likelihood 73 | xvectm = 0; 74 | ldetsum = 0; 75 | for(i in 1:N){ 76 | if(nlvno0 > 0){ 77 | idx[1:nlvno0] = lvind; 78 | } 79 | if(nov > 0){ 80 | for(j in 1:nseen[g[i],obspatt[i]]){ 81 | idx[nlvno0+j] = nlv + obsvar[g[i],obspatt[i],j]; //nlv + obsvar[i,(j - nlv)]; 82 | } 83 | } 84 | nidx = nlvno0 + nseen[g[i],obspatt[i]]; 85 | 86 | xvec[1:nidx] = x[i,1:nidx]'; 87 | xvectm = xvectm + (xvec[1:nidx] - evlv[g[i],idx[1:nidx]])' * siginv[g[i],obspatt[i],1:nidx,1:nidx] * (xvec[1:nidx] - evlv[g[i],idx[1:nidx]]); 88 | ldetsum = ldetsum + ldetcomp[g[i],obspatt[i]]; 89 | } 90 | 91 | return -0.5 * (ldetsum + xvectm); 92 | } 93 | -------------------------------------------------------------------------------- /inst/stanfuns/sem_mean.stan: -------------------------------------------------------------------------------- 1 | array[] vector sem_mean(array[] vector alpha, array[,,] real B, array[,,] real gamma, array[] int g, int k, int Ng, int gamind, array[,] real meanx){ 2 | matrix[k,k] iden; 3 | array[Ng] vector[k] evlv; 4 | 5 | iden = diag_matrix(rep_vector(1.0, k)); 6 | 7 | for(j in 1:Ng){ 8 | if(gamind == 1){ 9 | evlv[j] = inverse(iden - to_matrix(B[,,j])) * (alpha[j] + to_matrix(gamma[,,j]) * to_vector(meanx[,j])); 10 | 11 | } else { 12 | evlv[j] = inverse(iden - to_matrix(B[,,j])) * alpha[j]; 13 | } 14 | } 15 | 16 | return evlv; 17 | } 18 | -------------------------------------------------------------------------------- /inst/stanfuns/sem_mean_eta.stan: -------------------------------------------------------------------------------- 1 | array[] vector sem_mean_eta(array[,,] real alpha, matrix eta, array[,,] real B, array[,,] real gamma, array[] int g, int k, int N, int Ng, int nlv, array[] int lvind, array[] int lv0ind){ 2 | matrix[k,k] iden; 3 | array[Ng] matrix[k,k] ibinv; 4 | array[N] vector[k] evlv; 5 | array[k,1,Ng] real alphvec; 6 | array[(k - nlv + size(lvind))] int idx; 7 | int nov; 8 | int nlvno0; 9 | 10 | nov = k - nlv; 11 | nlvno0 = size(lvind); 12 | 13 | iden = diag_matrix(rep_vector(1.0, k)); 14 | 15 | alphvec = alpha; 16 | 17 | if(size(lvind) > 0){ 18 | idx[1:nlvno0] = lvind; 19 | } 20 | if(nov > 0){ 21 | for(j in 1:nov){ 22 | idx[nlvno0+j] = nlv + j; //nlvno0 + j? 23 | } 24 | } 25 | 26 | for(j in 1:Ng){ 27 | ibinv[j,lv0ind,lv0ind] = inverse(iden[lv0ind,lv0ind] - to_matrix(B[lv0ind,lv0ind,j])); 28 | } 29 | 30 | for(i in 1:N){ 31 | // this line took way too long to get right: 32 | evlv[i,lv0ind] = ibinv[g[i],lv0ind,lv0ind] * (to_vector(alphvec[lv0ind,1,g[i]]) + to_matrix(B[lv0ind,idx,g[i]]) * eta[i,idx]'); 33 | } 34 | 35 | return evlv; 36 | } 37 | -------------------------------------------------------------------------------- /inst/testdata/sysdata.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ecmerkle/blavaan/e60a1a4152cf93a6996b34b2a79be42714319b85/inst/testdata/sysdata.rda -------------------------------------------------------------------------------- /inst/tinytest/tests.blavaan.R: -------------------------------------------------------------------------------- 1 | set.seed(8675309) 2 | library(lavaan, quietly = TRUE) 3 | x1 <- rnorm(100) 4 | x2 <- rnorm(100) 5 | y1 <- 0.5 + 2*x1 + rnorm(100) 6 | Data <- data.frame(y1 = y1, x1 = x1, x2 = x2) 7 | 8 | model <- ' y1 ~ x1 ' 9 | 10 | ## auto convergence in stan 11 | expect_error(bsem(model, data=Data, fixed.x=TRUE, target="stan", convergence="auto")) 12 | 13 | ## seed length != # chains for jags 14 | expect_error(bsem(model, data=Data, fixed.x=TRUE, seed=1, target="jags")) 15 | 16 | ## unknown cp 17 | expect_error(bsem(model, data=Data, ov.cp="blah", fixed.x=TRUE)) 18 | 19 | ## cp/std.lv clash 20 | expect_error(bsem(model, data=Data, fixed.x=TRUE, std.lv=TRUE, cp="fa")) 21 | 22 | model2 <- ' y1 ~ b1*x1 + b2*x2 23 | b1 + b2 == 0 ' 24 | 25 | ## equality constraint with multiple variables on lhs 26 | expect_error(bsem(model2, data=Data, fixed.x=TRUE)) 27 | 28 | model2 <- ' y1 ~ b1*x1 + b2*x2 29 | b1 == -b2/2 ' 30 | fit <- bsem(model2, data=Data, target='jags', adapt=1, 31 | burnin=1, sample=3) 32 | ## ensure that == constraints are being respected 33 | expect_true(round(2*fit@Fit@x[1] + fit@Fit@x[2], 5) == 0L) 34 | 35 | ## do.fit=FALSE 36 | fit <- bsem(model, data=Data, fixed.x=TRUE, adapt=2, 37 | burnin=2, sample=2, do.fit=FALSE) 38 | expect_equal(class(fit)[1], "blavaan") 39 | 40 | ## mcmcextra 41 | fit <- bsem(model, data=Data, save.lvs=TRUE, do.fit=FALSE, 42 | mcmcextra=list(data=list(emiter=101, llnsamp=78))) 43 | expect_equal(class(fit)[1], "blavaan") 44 | expect_equal(fit@external$mcmcdata$emiter, 101L) 45 | expect_equal(fit@Options$llnsamp, 78L) 46 | 47 | ## vb 48 | fit <- bsem(model, data=Data, target="vb") 49 | expect_equal(class(fit)[1], "blavaan") 50 | 51 | ## named variable that clashes 52 | names(Data)[1] <- "lambda" 53 | model2 <- ' lambda ~ b1*x1 + b2*x2 ' 54 | expect_error(bsem(model2, data=Data)) 55 | 56 | ## one prior on variance, one on sd (problem for target="stan" only) 57 | ## and check that defined parameters translate 58 | names(Data)[1] <- "y1" 59 | model3 <- ' y1 ~ a*x1 60 | x2 ~ b*x1 61 | y1 ~~ prior("gamma(1,.5)[sd]")*y1 62 | x2 ~~ prior("gamma(1,.5)[var]")*x2 63 | pprod := a/b ' 64 | expect_error(bsem(model3, data=Data, target="stan")) 65 | 66 | ## priors are wrong form but will not throw error until estimation 67 | fit <- bsem(model3, data=Data, target="jags", do.fit=FALSE) 68 | expect_equal(class(fit)[1], "blavaan") 69 | 70 | fit <- bsem(model3, data=Data, target="stanclassic", do.fit=FALSE) 71 | expect_equal(class(fit)[1], "blavaan") 72 | 73 | ## unknown prior 74 | expect_error(bsem(model, data=Data, dp=dpriors(psi="mydist(1,.5)"))) 75 | 76 | ## wiggle argument 77 | expect_error(bsem(model3, data=Data, wiggle='a', wiggle.sd=0)) ## sd=0 not allowed 78 | expect_error(bsem(model3, data=Data, wiggle='sponge')) ## sd is string 79 | expect_error(bsem(model3, data=Data, wiggle='b', wiggle.sd=c(1,2))) ## 2 sds, but 1 wiggle 80 | expect_error(bsem(model3, data=Data, wiggle=c('a','b'), wiggle.sd=c(.2,.3), target='jags')) 81 | expect_error(bsem(model3, data=Data, wiggle=c('a','b'), wiggle.sd=c(.2,.3), target='stanclassic')) ## wiggle.sd of length > 1 not allowed for these targets 82 | 83 | HS.model <- ' visual =~ x1 + x2 + x3 ' 84 | 85 | expect_equal(class(bcfa(HS.model, data=HolzingerSwineford1939, target="stan", do.fit=FALSE, group="school", group.equal=c("intercepts","loadings"), wiggle=c("intercepts"), wiggle.sd=.1))[1], "blavaan") 86 | expect_equal(class(bcfa(HS.model, data=HolzingerSwineford1939, target="stanclassic", do.fit=FALSE, group="school", group.equal=c("intercepts","loadings"), wiggle=c("intercepts"), wiggle.sd=.1))[1], "blavaan") 87 | expect_equal(class(bcfa(HS.model, data=HolzingerSwineford1939, target="jags", do.fit=FALSE, group="school", group.equal=c("intercepts","loadings"), wiggle=c("intercepts"), wiggle.sd=.1))[1], "blavaan") 88 | 89 | ## moment match mcmcextra 90 | set.seed(341) 91 | 92 | x1 <- rnorm(100) 93 | y1 <- 0.5 + 2*x1 + rnorm(100) 94 | g <- rep(1:2, each=50) 95 | Data <- data.frame(y1 = y1, x1 = x1, g = g) 96 | 97 | model <- ' y1 ~ prior("normal(0,1)")*x1 ' 98 | fitstanmomentmatch <- bsem( 99 | model, 100 | data=Data, 101 | fixed.x=TRUE, 102 | burnin=20, 103 | sample=20, 104 | mcmcextra=list(data=list(moment_match_k_threshold=0.5)), 105 | target="stan", 106 | seed=1 107 | ) 108 | momentmatch_mcobj <- blavInspect(fitstanmomentmatch, "mcobj") 109 | expect_true("Lambda_y_free" %in% names(momentmatch_mcobj@par_dims)) 110 | expect_equal( 111 | fitstanmomentmatch@external$mcmcdata$moment_match_k_threshold, 112 | 0.5 113 | ) 114 | -------------------------------------------------------------------------------- /inst/tinytest/tests.blavaanobject-methods.R: -------------------------------------------------------------------------------- 1 | if(requireNamespace("rstan", quietly = TRUE) & 2 | requireNamespace("runjags", quietly = TRUE)){ 3 | load(system.file("testdata", "sysdata.rda", package="blavaan")) 4 | library("lavaan", quietly = TRUE) 5 | 6 | # classes 7 | expect_equal(class(fitjags@external), "list") 8 | expect_equal(class(fitstan@external), "list") 9 | expect_equal(class(fitstanc@external), "list") 10 | 11 | ## parameter summaries 12 | expect_equal(dim(parTable(fitjags)), c(10,20)) 13 | expect_equal(dim(parTable(fitstan)), c(10,24)) 14 | expect_equal(dim(parTable(fitstanc)), c(10,21)) 15 | 16 | expect_equal(sum(fitjags@ParTable$free > 0, na.rm = TRUE), 17 | length(blavInspect(fitjags, 'psrf'))) 18 | expect_equal(sum(fitstan@ParTable$free > 0, na.rm = TRUE), 19 | length(blavInspect(fitstan, 'psrf'))) 20 | expect_equal(sum(fitstanc@ParTable$free > 0, na.rm = TRUE), 21 | length(blavInspect(fitstanc, 'psrf'))) 22 | expect_equal(fitjags@ParTable$free, fitstan@ParTable$free) 23 | expect_equal(fitjags@ParTable$free, fitstanc@ParTable$free) 24 | expect_equal(nrow(parTable(fitjags)), nrow(parTable(fitstan))) 25 | expect_equal(nrow(parTable(fitjags)), nrow(parTable(fitstanc))) 26 | 27 | expect_error(blavInspect(fitjags, 'blah')) 28 | 29 | ## fitMeasures 30 | expect_equal(length(fitMeasures(fitjags)), 31 | length(fitMeasures(fitstan))) 32 | expect_equal(length(fitMeasures(fitjags)), 33 | length(fitMeasures(fitstanc))) 34 | 35 | ## this is how summary() obtains its results, but have not figured out 36 | ## how to get S4 methods to directly work in testthat 37 | expect_equal(dim(parameterEstimates(fitjags)), c(10, 6)) 38 | expect_equal(dim(parameterEstimates(fitstan)), c(10, 6)) 39 | expect_equal(dim(parameterEstimates(fitstanc)), c(10, 6)) 40 | 41 | ## various blavInspect args 42 | expect_equal(length(blavInspect(fitjags, 'psrf')), 43 | length(blavInspect(fitstan, 'psrf'))) 44 | 45 | expect_equal(length(blavInspect(fitjags, 'psrf')), 46 | length(blavInspect(fitstanc, 'psrf'))) 47 | 48 | expect_equal(length(blavInspect(fitjags, 'neff')), 49 | length(blavInspect(fitstan, 'neff'))) 50 | 51 | expect_equal(length(blavInspect(fitjags, 'neff')), 52 | length(blavInspect(fitstanc, 'neff'))) 53 | 54 | expect_equal(length(blavInspect(fitjags, 'mcmc')), 55 | length(blavInspect(fitstan, 'mcmc'))) 56 | 57 | expect_equal(length(blavInspect(fitjags, 'mcmc')), 58 | length(blavInspect(fitstanc, 'mcmc'))) 59 | 60 | expect_equal(length(blavInspect(fitjags, 'start')), 61 | length(blavInspect(fitstan, 'start'))) 62 | 63 | expect_equal(length(blavInspect(fitjags, 'start')), 64 | length(blavInspect(fitstanc, 'start'))) 65 | 66 | expect_equal(dim(blavInspect(fitjags, 'hpd')), 67 | dim(blavInspect(fitstan, 'hpd'))) 68 | 69 | expect_equal(dim(blavInspect(fitjags, 'hpd')), 70 | dim(blavInspect(fitstanc, 'hpd'))) 71 | 72 | expect_equal(dim(standardizedposterior(fitjags)), 73 | dim(standardizedposterior(fitstan))) 74 | 75 | expect_equal(dim(standardizedposterior(fitjags)), 76 | dim(standardizedposterior(fitstanc))) 77 | 78 | expect_equal(dim(blavInspect(fitstanfs, 'lvmeans')), 79 | c(301, 2)) 80 | 81 | expect_equal(dim(blavInspect(fitstanfs, 'lvs')[[2]]), 82 | c(10, 602)) 83 | 84 | HS.model <- ' visual =~ x1 + x2 + x3 85 | textual =~ x4 + x5 + x6 ' 86 | fitlav <- cfa(HS.model, data=HolzingerSwineford1939, 87 | meanstructure=TRUE) 88 | expect_true(cor(blavInspect(fitstanfs, 'lvmeans')[,1], 89 | lavPredict(fitlav, type='lv')[,1]) > .95) 90 | 91 | ## plots 92 | expect_silent(p <- plot(fitstan, showplot = FALSE)) 93 | expect_false(any(p$data$value[grep('~~', p$data$parameter)] < 0)) # check of parameter labels 94 | expect_silent(p <- plot(fitstan, 1:4, showplot = FALSE)) 95 | expect_silent(p <- plot(fitstan, plot.type = "hist", showplot = FALSE)) 96 | expect_silent(p <- plot(fitstan, 1:4, plot.type = "dens", showplot = FALSE)) 97 | expect_silent(p <- plot(fitstan, c(2,4), plot.type = "scatter", showplot = FALSE)) 98 | 99 | expect_silent(p <- plot(fitstanc, showplot = FALSE)) 100 | expect_silent(p <- plot(fitstanc, 1:4, showplot = FALSE)) 101 | expect_silent(p <- plot(fitstanc, plot.type = "hist", showplot = FALSE)) 102 | expect_silent(p <- plot(fitstanc, 1:4, plot.type = "dens", showplot = FALSE)) 103 | expect_silent(p <- plot(fitstanc, c(2,4), plot.type = "scatter", showplot = FALSE)) 104 | 105 | expect_silent(p <- plot(fitjags, showplot = FALSE)) 106 | expect_silent(p <- plot(fitjags, 1:4, showplot = FALSE)) 107 | expect_silent(p <- plot(fitjags, plot.type = "hist", showplot = FALSE)) 108 | expect_silent(p <- plot(fitjags, 1:4, plot.type = "dens", showplot = FALSE)) 109 | expect_silent(p <- plot(fitjags, c(2,4), plot.type = "scatter", showplot = FALSE)) 110 | 111 | ## blavFit + ppmc 112 | discFUN <- list(global = function(fit) { 113 | fitMeasures(fit, fit.measures = c("cfi","rmsea","srmr","chisq")) 114 | }, 115 | std.cov.resid = function(fit) lavResiduals(fit, zstat = FALSE, 116 | summary = FALSE)$`1`$cov) 117 | 118 | ppmc_res <- ppmc(fitstan, discFUN = discFUN) 119 | expect_equal(class(ppmc_res)[1], "blavPPMC") 120 | ppmc_summ <- summary(ppmc_res, "global", cent = "EAP") 121 | expect_equal(class(ppmc_summ)[1], "lavaan.data.frame") 122 | ppmc_summ <- summary(ppmc_res, "std.cov.resid", cent = "MAP", 123 | to.data.frame = TRUE, sort.by = "MAP", 124 | decreasing = TRUE) 125 | bf_res <- blavFitIndices(fitstan) 126 | expect_equal(class(bf_res)[1], "blavFitIndices") 127 | expect_equal(class(summary(bf_res))[1], "lavaan.data.frame") 128 | 129 | bf_res <- blavFitIndices(fitstan, rescale = "ppmc") 130 | expect_equal(class(bf_res)[1], "blavFitIndices") 131 | expect_equal(class(summary(bf_res))[1], "lavaan.data.frame") 132 | 133 | bf_res <- blavFitIndices(fitstan, rescale = "mcmc") 134 | expect_equal(class(bf_res)[1], "blavFitIndices") 135 | expect_equal(class(summary(bf_res))[1], "lavaan.data.frame") 136 | 137 | set.seed(341) 138 | 139 | x1 <- rnorm(100) 140 | y1 <- 0.5 + 2*x1 + rnorm(100) 141 | g <- rep(1:2, each=50) 142 | Data <- data.frame(y1 = y1, x1 = x1, g = g) 143 | 144 | model <- ' y1 ~ prior("normal(0,1)")*x1 ' 145 | fitstanmomentmatch <- bsem( 146 | model, 147 | data=Data, 148 | fixed.x=TRUE, 149 | burnin=20, 150 | sample=20, 151 | mcmcextra=list(data=list(moment_match_k_threshold=0.5)), 152 | target="stan", 153 | seed=1 154 | ) 155 | bf_mm_res <- blavFitIndices(fitstanmomentmatch, fit.measures = c("looic")) 156 | expect_equal(class(bf_mm_res)[1], "blavFitIndices") 157 | expect_equal(class(summary(bf_mm_res))[1], "lavaan.data.frame") 158 | expect_true("p_loo" %in% names(bf_mm_res@details$pD)) 159 | 160 | 161 | ## blavPredict 162 | expect_error(blavPredict(fitstanc)) 163 | expect_error(blavPredict(fitjags)) 164 | 165 | expect_equal(dim(blavPredict(fitstanfs)[[1]]), c(301,2)) 166 | expect_equal(length(blavPredict(fitstanfs)), 20) 167 | expect_equal(dim(blavPredict(fitstanfs, type="lvmeans")), c(301,2)) 168 | expect_equal(dim(blavPredict(fitstanfs, type="ov")[[1]]), c(301,6)) 169 | expect_equal(dim(blavPredict(fitstanfs, type="ypred")[[1]]), c(301,6)) 170 | expect_error(blavPredict(fitstanfs, type="ymis")) 171 | } 172 | -------------------------------------------------------------------------------- /man/bcfa.Rd: -------------------------------------------------------------------------------- 1 | \name{bcfa} 2 | \alias{bcfa} 3 | \title{Fit Confirmatory Factor Analysis Models} 4 | \description{ 5 | Fit a Confirmatory Factor Analysis (CFA) model.} 6 | \usage{ 7 | bcfa(..., cp = "srs", 8 | dp = NULL, n.chains = 3, burnin, sample, 9 | adapt, mcmcfile = FALSE, mcmcextra = list(), inits = "simple", 10 | convergence = "manual", target = "stan", save.lvs = FALSE, 11 | wiggle = NULL, wiggle.sd = 0.1, prisamp = FALSE, jags.ic = FALSE, 12 | seed = NULL, bcontrol = list()) 13 | } 14 | \arguments{ 15 | \item{...}{Default lavaan arguments. See \code{\link[lavaan]{lavaan}}.} 16 | \item{cp}{Handling of prior distributions on covariance parameters: 17 | possible values are \code{"srs"} (default) or 18 | \code{"fa"}. Option \code{"fa"} is only available for \code{target="jags"}.} 19 | \item{dp}{Default prior distributions on different types of 20 | parameters, typically the result of a call to \code{dpriors()}. 21 | See the \code{dpriors()} help file for more information.} 22 | \item{n.chains}{Number of desired MCMC chains.} 23 | \item{burnin}{Number of burnin/warmup iterations (not including the adaptive 24 | iterations, for target="jags"). Defaults to 4000 or target="jags" and 25 | 500 for Stan targets.} 26 | \item{sample}{The total number of samples to take after burnin. Defaults 27 | to 10000 for target="jags" and 1000 for Stan targets.} 28 | \item{adapt}{For target="jags", the number of adaptive iterations to use at the start of 29 | sampling. Defaults to 1000.} 30 | \item{mcmcfile}{If \code{TRUE}, the JAGS/Stan model will be written to file 31 | (in the lavExport directory). Can also supply a character 32 | string, which serves as the name of the directory to which files will be written.} 33 | \item{mcmcextra}{A list with potential names \code{syntax} (unavailable 34 | for target=\code{"stan"}), 35 | \code{monitor}, \code{data}, and \code{llnsamp}. The \code{syntax} object is a text string containing extra 36 | code to insert in the JAGS/Stan model syntax. The \code{data} object 37 | is a list of extra data to send to the JAGS/Stan model. If 38 | \code{moment_match_k_threshold} is specified within \code{data} the looic of 39 | the model will be calculated using moment matching. The \code{monitor} object 40 | is a character vector containing extra JAGS/Stan parameters to 41 | monitor. The \code{llnsamp} object is only relevant to models with ordinal 42 | variables, and specifies the number of samples that should be drawn to approximate 43 | the model log-likelihood (larger numbers imply higher accuracy and 44 | longer time). This log-likelihood is specifically used to compute 45 | information criteria.} 46 | \item{inits}{If it is a character string, the options are currently 47 | \code{"simple"} (default), \code{"Mplus"}, \code{"prior"}, or \code{"jags"}. In the first two 48 | cases, parameter values are set as though they will be estimated via 49 | ML (see \code{\link[lavaan]{lavaan}}). The starting parameter value for 50 | each chain is then perturbed from the original values through the 51 | addition of random uniform noise. If \code{"prior"} is used, the starting 52 | parameter values are obtained based on the prior distributions 53 | (while also trying to ensure that the starting values will not crash 54 | the model estimation). If \code{"jags"}, no starting values are 55 | specified and JAGS will choose values on its own (and this will probably 56 | crash Stan targets). You can also supply 57 | a list of starting values for each chain, where the list format can 58 | be obtained from, e.g., \code{blavInspect(fit, "inits")}. Finally, 59 | you can specify starting values in a similar way to lavaan, 60 | using the lavaan \code{start} argument (see the lavaan 61 | documentation for all the options there). In this case, you should also set 62 | \code{inits="simple"}, and be aware that the same starting values 63 | will be used for each chain.} 64 | \item{convergence}{Useful only for \code{target="jags"}. If \code{"auto"}, parameters are 65 | sampled until convergence is achieved (via \code{autorun.jags()}). In 66 | this case, the arguments \code{burnin} and \code{sample} are passed to 67 | \code{autorun.jags()} as \code{startburnin} and \code{startsample}, 68 | respectively. Otherwise, parameters 69 | are sampled as specified by the user (or by the \code{run.jags} 70 | defaults).} 71 | \item{target}{Desired MCMC sampling, with \code{"stan"} (pre-compiled 72 | marginal approach) as 73 | default. Also available is \code{"vb"}, which calls the rstan function 74 | \code{vb()}. Other options include \code{"jags"}, \code{"stancond"}, and 75 | \code{"stanclassic"}, which sample latent variables and provide some 76 | greater functionality (because syntax is written "on the fly"). But 77 | they are slower and less efficient.} 78 | \item{save.lvs}{Should sampled latent variables (factor scores) be 79 | saved? Logical; defaults to FALSE} 80 | \item{wiggle}{Labels of equality-constrained parameters that should be 81 | "approximately" equal. Can also be "intercepts", "loadings", 82 | "regressions", "means".} 83 | \item{wiggle.sd}{The prior sd (of normal distribution) to be used in approximate equality 84 | constraints. Can be one value, or (for target="stan") a numeric vector 85 | of values that is the same length as wiggle.} 86 | \item{prisamp}{Should samples be drawn from the prior, instead of the 87 | posterior (\code{target="stan"} only)? Logical; defaults to FALSE} 88 | \item{jags.ic}{Should DIC be computed the JAGS way, in addition to the BUGS way? Logical; defaults to FALSE} 89 | \item{seed}{A vector of length \code{n.chains} (for target 90 | \code{"jags"}) or an integer (for target \code{"stan"}) containing random 91 | seeds for the MCMC run. If \code{NULL}, seeds will be chosen randomly.} 92 | \item{bcontrol}{A list containing additional parameters passed to 93 | \code{run.jags} (or \code{autorun.jags}) or \code{stan}. See the manpage of those functions for an 94 | overview of the additional parameters that can be set.} 95 | } 96 | \details{ 97 | The \code{bcfa} function is a wrapper for the more general 98 | \code{\link{blavaan}} function, using the following default 99 | \code{\link[lavaan]{lavaan}} arguments: 100 | \code{int.ov.free = TRUE}, \code{int.lv.free = FALSE}, 101 | \code{auto.fix.first = TRUE} (unless \code{std.lv = TRUE}), 102 | \code{auto.fix.single = TRUE}, \code{auto.var = TRUE}, 103 | \code{auto.cov.lv.x = TRUE}, 104 | \code{auto.th = TRUE}, \code{auto.delta = TRUE}, 105 | and \code{auto.cov.y = TRUE}. 106 | } 107 | \value{ 108 | An object that inherits from class \link[lavaan:lavaan-class]{lavaan}, for which several methods 109 | are available, including a \code{summary} method. 110 | } 111 | \references{ 112 | Edgar C. Merkle, Ellen Fitzsimmons, James Uanhoro, & Ben Goodrich (2021). Efficient Bayesian Structural Equation Modeling in Stan. Journal of Statistical 113 | Software, 100(6), 1-22. URL http://www.jstatsoft.org/v100/i06/. 114 | 115 | Edgar C. Merkle & Yves Rosseel (2018). blavaan: Bayesian Structural 116 | Equation Models via Parameter Expansion. Journal of Statistical 117 | Software, 85(4), 1-30. URL http://www.jstatsoft.org/v85/i04/. 118 | 119 | Yves Rosseel (2012). lavaan: An R Package for Structural Equation 120 | Modeling. Journal of Statistical Software, 48(2), 1-36. URL 121 | http://www.jstatsoft.org/v48/i02/. 122 | } 123 | \seealso{ 124 | \code{\link{blavaan}} 125 | } 126 | \examples{ 127 | data(HolzingerSwineford1939, package = "lavaan") 128 | 129 | # The Holzinger and Swineford (1939) example 130 | HS.model <- ' visual =~ x1 + x2 + x3 131 | textual =~ x4 + x5 + x6 132 | speed =~ x7 + x8 + x9 ' 133 | 134 | \dontrun{ 135 | fit <- bcfa(HS.model, data = HolzingerSwineford1939) 136 | summary(fit) 137 | } 138 | 139 | # A short run for rough results 140 | fit <- bcfa(HS.model, data = HolzingerSwineford1939, burnin = 100, sample = 100, 141 | n.chains = 2) 142 | summary(fit) 143 | } 144 | -------------------------------------------------------------------------------- /man/bgrowth.Rd: -------------------------------------------------------------------------------- 1 | \name{bgrowth} 2 | \alias{bgrowth} 3 | \title{Fit Growth Curve Models} 4 | \description{ 5 | Fit a Growth Curve model.} 6 | \usage{ 7 | bgrowth(..., cp = "srs", dp = NULL, n.chains = 3, 8 | burnin, sample, adapt, mcmcfile = FALSE, mcmcextra = list(), 9 | inits = "simple", convergence = "manual", target = "stan", 10 | save.lvs = FALSE, wiggle = NULL, wiggle.sd = 0.1, prisamp = FALSE, 11 | jags.ic = FALSE, seed = NULL, bcontrol = list()) 12 | } 13 | \arguments{ 14 | \item{...}{Default lavaan arguments. See \code{\link[lavaan]{lavaan}}.} 15 | \item{cp}{Handling of prior distributions on covariance parameters: 16 | possible values are \code{"srs"} (default) or \code{"fa"}. Option \code{"fa"} is only available for \code{target="jags"}.} 17 | \item{dp}{Default prior distributions on different types of 18 | parameters, typically the result of a call to \code{dpriors()}. 19 | See the \code{dpriors()} help file for more information.} 20 | \item{n.chains}{Number of desired MCMC chains.} 21 | \item{burnin}{Number of burnin/warmup iterations (not including the adaptive 22 | iterations, for target="jags"). Defaults to 4000 or target="jags" and 23 | 500 for Stan targets.} 24 | \item{sample}{The total number of samples to take after burnin. Defaults 25 | to 10000 for target="jags" and 1000 for Stan targets.} 26 | \item{adapt}{For target="jags", the number of adaptive iterations to use at the start of 27 | sampling. Defaults to 1000.} 28 | \item{mcmcfile}{If \code{TRUE}, the JAGS/Stan model will be written to file 29 | (in the lavExport directory). Can also supply a character 30 | string, which serves as the name of the directory to which files will be written.} 31 | \item{mcmcextra}{A list with potential names \code{syntax} (unavailable 32 | for target=\code{"stan"}), 33 | \code{monitor}, \code{data}, and \code{llnsamp}. The \code{syntax} object is a text string containing extra 34 | code to insert in the JAGS/Stan model syntax. The \code{data} object 35 | is a list of extra data to send to the JAGS/Stan model. If 36 | \code{moment_match_k_threshold} is specified within \code{data} the looic of 37 | the model will be calculated using moment matching. The \code{monitor} object 38 | is a character vector containing extra JAGS/Stan parameters to 39 | monitor. The \code{llnsamp} object is only relevant to models with ordinal 40 | variables, and specifies the number of samples that should be drawn to approximate 41 | the model log-likelihood (larger numbers imply higher accuracy and 42 | longer time). This log-likelihood is specifically used to compute 43 | information criteria.} 44 | \item{inits}{If it is a character string, the options are currently 45 | \code{"simple"} (default), \code{"Mplus"}, \code{"prior"}, or \code{"jags"}. In the first two 46 | cases, parameter values are set as though they will be estimated via 47 | ML (see \code{\link[lavaan]{lavaan}}). The starting parameter value for 48 | each chain is then perturbed from the original values through the 49 | addition of random uniform noise. If \code{"prior"} is used, the starting 50 | parameter values are obtained based on the prior distributions 51 | (while also trying to ensure that the starting values will not crash 52 | the model estimation). If \code{"jags"}, no starting values are 53 | specified and JAGS will choose values on its own (and this will probably 54 | crash Stan targets). You can also supply 55 | a list of starting values for each chain, where the list format can 56 | be obtained from, e.g., \code{blavInspect(fit, "inits")}. Finally, 57 | you can specify starting values in a similar way to lavaan, 58 | using the lavaan \code{start} argument (see the lavaan 59 | documentation for all the options there). In this case, you should also set 60 | \code{inits="simple"}, and be aware that the same starting values 61 | will be used for each chain.} 62 | \item{convergence}{Useful only for \code{target="jags"}. If \code{"auto"}, parameters are 63 | sampled until convergence is achieved (via \code{autorun.jags()}). In 64 | this case, the arguments \code{burnin} and \code{sample} are passed to 65 | \code{autorun.jags()} as \code{startburnin} and \code{startsample}, 66 | respectively. Otherwise, parameters 67 | are sampled as specified by the user (or by the \code{run.jags} 68 | defaults).} 69 | \item{target}{Desired MCMC sampling, with \code{"stan"} (pre-compiled 70 | marginal approach) as 71 | default. Also available is \code{"vb"}, which calls the rstan function 72 | \code{vb()}. Other options include \code{"jags"}, \code{"stancond"}, and 73 | \code{"stanclassic"}, which sample latent variables and provide some 74 | greater functionality (because syntax is written "on the fly"). But 75 | they are slower and less efficient.} 76 | \item{save.lvs}{Should sampled latent variables (factor scores) be 77 | saved? Logical; defaults to FALSE} 78 | \item{wiggle}{Labels of equality-constrained parameters that should be 79 | "approximately" equal. Can also be "intercepts", "loadings", 80 | "regressions", "means".} 81 | \item{wiggle.sd}{The prior sd (of normal distribution) to be used in approximate equality 82 | constraints. Can be one value, or (for target="stan") a numeric vector 83 | of values that is the same length as wiggle.} 84 | \item{prisamp}{Should samples be drawn from the prior, instead of the 85 | posterior (\code{target="stan"} only)? Logical; defaults to FALSE} 86 | \item{jags.ic}{Should DIC be computed the JAGS way, in addition to the BUGS way? Logical; defaults to FALSE} 87 | \item{seed}{A vector of length \code{n.chains} (for target 88 | \code{"jags"}) or an integer (for target \code{"stan"}) containing random 89 | seeds for the MCMC run. If \code{NULL}, seeds will be chosen randomly.} 90 | \item{bcontrol}{A list containing additional parameters passed to 91 | \code{run.jags} (or \code{autorun.jags}) or \code{stan}. See the manpage of those functions for an 92 | overview of the additional parameters that can be set.} 93 | } 94 | \details{ 95 | The \code{bgrowth} function is a wrapper for the more general 96 | \code{\link{blavaan}} function, using the following default 97 | \code{\link[lavaan]{lavaan}} arguments: 98 | \code{meanstructure = TRUE}, 99 | \code{int.ov.free = FALSE}, \code{int.lv.free = TRUE}, 100 | \code{auto.fix.first = TRUE} (unless \code{std.lv = TRUE}), 101 | \code{auto.fix.single = TRUE}, \code{auto.var = TRUE}, 102 | \code{auto.cov.lv.x = TRUE}, 103 | \code{auto.th = TRUE}, \code{auto.delta = TRUE}, 104 | and \code{auto.cov.y = TRUE}. 105 | } 106 | \value{ 107 | An object of class \code{\link{blavaan}}, for which several methods 108 | are available, including a \code{summary} method. 109 | } 110 | \references{ 111 | Edgar C. Merkle, Ellen Fitzsimmons, James Uanhoro, & Ben Goodrich (2021). Efficient Bayesian Structural Equation Modeling in Stan. Journal of Statistical 112 | Software, 100(6), 1-22. URL http://www.jstatsoft.org/v100/i06/. 113 | 114 | Edgar C. Merkle & Yves Rosseel (2018). blavaan: Bayesian Structural 115 | Equation Models via Parameter Expansion. Journal of Statistical 116 | Software, 85(4), 1-30. URL http://www.jstatsoft.org/v85/i04/. 117 | 118 | Yves Rosseel (2012). lavaan: An R Package for Structural Equation 119 | Modeling. Journal of Statistical Software, 48(2), 1-36. URL 120 | http://www.jstatsoft.org/v48/i02/. 121 | } 122 | \seealso{ 123 | \code{\link{blavaan}} 124 | } 125 | \examples{ 126 | \dontrun{ 127 | ## linear growth model with a time-varying covariate 128 | data(Demo.growth, package = "lavaan") 129 | 130 | model.syntax <- ' 131 | # intercept and slope with fixed coefficients 132 | i =~ 1*t1 + 1*t2 + 1*t3 + 1*t4 133 | s =~ 0*t1 + 1*t2 + 2*t3 + 3*t4 134 | 135 | # regressions 136 | i ~ x1 + x2 137 | s ~ x1 + x2 138 | 139 | # time-varying covariates 140 | t1 ~ c1 141 | t2 ~ c2 142 | t3 ~ c3 143 | t4 ~ c4 144 | ' 145 | 146 | fit <- bgrowth(model.syntax, data = Demo.growth) 147 | summary(fit) 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /man/blavCompare.Rd: -------------------------------------------------------------------------------- 1 | \name{blavCompare} 2 | \alias{blavCompare} 3 | \alias{BF} 4 | \title{ 5 | Bayesian model comparisons 6 | } 7 | \description{ 8 | Bayesian model comparisons, including WAIC, LOO, and Bayes factor approximation. 9 | } 10 | \usage{ 11 | blavCompare(object1, object2, ...) 12 | } 13 | \arguments{ 14 | \item{object1}{An object of class \code{blavaan}.} 15 | \item{object2}{A second object of class \code{blavaan}.} 16 | \item{\dots}{Other arguments to loo().} 17 | } 18 | \details{ 19 | This function computes Bayesian model comparison metrics, including a 20 | Bayes factor approximation, WAIC, and LOOIC. 21 | 22 | The log-Bayes factor of the two models is based on the Laplace 23 | approximation to each model's marginal log-likelihood. 24 | 25 | The WAIC and LOOIC metrics come from the loo package. The ELPD 26 | difference and SE specifically come from loo::loo_compare(). 27 | } 28 | \value{ 29 | A list containing separate results for log-Bayes factor, WAIC, LOOIC, 30 | and differences between WAIC and LOOIC. 31 | } 32 | \references{ 33 | Raftery, A. E. (1993). Bayesian model selection in structural equation models. In K. A. Bollen & J. S. Long (Eds.), Testing structural equation models (pp. 163-180). Beverly Hills, CA: Sage. 34 | 35 | Vehtari A., Gelman A., Gabry J. (2017). Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC. Statistics and Computing, 27, 1413-1432. 36 | } 37 | \examples{ 38 | \dontrun{ 39 | data(HolzingerSwineford1939, package = "lavaan") 40 | 41 | hsm1 <- ' visual =~ x1 + x2 + x3 + x4 42 | textual =~ x4 + x5 + x6 43 | speed =~ x7 + x8 + x9 ' 44 | 45 | fit1 <- bcfa(hsm1, data = HolzingerSwineford1939) 46 | 47 | hsm2 <- ' visual =~ x1 + x2 + x3 48 | textual =~ x4 + x5 + x6 + x7 49 | speed =~ x7 + x8 + x9 ' 50 | 51 | fit2 <- bcfa(hsm2, data = HolzingerSwineford1939) 52 | 53 | blavCompare(fit1, fit2) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /man/blavInspect.Rd: -------------------------------------------------------------------------------- 1 | \name{blavInspect} 2 | \alias{blavInspect} 3 | \alias{blavTech} 4 | \title{Inspect or Extract Information from a Fitted blavaan Object} 5 | \description{ 6 | The \code{blavInspect()} and \code{blavTech()} functions can be used to 7 | inspect/extract information that is stored inside (or can be computed from) a 8 | fitted blavaan object. This is similar to lavaan's \code{lavInspect()} function.} 9 | \usage{ 10 | blavInspect(blavobject, what, ...) 11 | 12 | blavTech(blavobject, what, ...) 13 | } 14 | \arguments{ 15 | \item{blavobject}{An object of class blavaan.} 16 | \item{what}{Character. What needs to be inspected/extracted? See Details for Bayes-specific options, and see \code{\link[lavaan]{lavaan}}'s \code{lavInspect()} for additional options. Note: the \code{what} argument is not case-sensitive (everything is converted to lower case.)} 17 | \item{...}{lavaan arguments supplied to \code{lavInspect()}; see \code{\link[lavaan]{lavaan}}.} 18 | } 19 | \details{ 20 | Below is a list of Bayesian-specific values for the \code{what} 21 | argument; additional values can be found in the \code{lavInspect()} 22 | documentation. 23 | 24 | \describe{ 25 | \item{\code{"start"}:}{A list of starting values for each chain, unless \code{inits="jags"} is used during model estimation. Aliases: \code{"starting.values"}, \code{"inits"}.} 26 | \item{\code{"rhat"}:}{Each parameter's potential scale reduction 27 | factor for convergence assessment. Can also use "psrf" instead of "rhat"} 28 | \item{\code{"ac.10"}:}{Each parameter's estimated lag-10 autocorrelation.} 29 | \item{\code{"neff"}:}{Each parameters effective sample size, taking into account autocorrelation.} 30 | \item{\code{"mcmc"}:}{An object of class \code{mcmc} containing the individual parameter draws from the MCMC run. Aliases: \code{"draws"}, \code{"samples"}.} 31 | \item{\code{"mcobj"}:}{The underlying run.jags or stan object that resulted from the MCMC run.} 32 | \item{\code{"n.chains"}:}{The number of chains sampled.} 33 | \item{\code{"cp"}:}{The approach used for estimating covariance 34 | parameters (\code{"srs"} or \code{"fa"}); these are only relevant if 35 | using JAGS.} 36 | \item{\code{"dp"}:}{Default prior distributions used for each type of model parameter.} 37 | \item{\code{"postmode"}:}{Estimated posterior mode of each free parameter.} 38 | \item{\code{"postmean"}:}{Estimated posterior mean of each free parameter.} 39 | \item{\code{"postmedian"}:}{Estimated posterior median of each free parameter.} 40 | \item{\code{"lvs"}:}{An object of class \code{mcmc} containing latent variable (factor score) draws. In two-level models, use \code{level = 1} or \code{level = 2} to specify which factor scores you want.} 41 | \item{\code{"lvmeans"}:}{A matrix of mean factor scores (rows are observations, columns are variables). Use the additional \code{level} argument in the same way.} 42 | \item{\code{"hpd"}:}{HPD interval of each free parameter. In this case, the \code{prob} argument can be used to specify a number in (0,1) reflecting the desired percentage of the interval.} 43 | } 44 | } 45 | \seealso{ 46 | \code{\link[lavaan]{lavInspect}}, \code{\link{bcfa}}, \code{\link{bsem}}, \code{\link{bgrowth}} 47 | } 48 | \examples{ 49 | \dontrun{ 50 | # The Holzinger and Swineford (1939) example 51 | data(HolzingerSwineford1939, package = "lavaan") 52 | 53 | HS.model <- ' visual =~ x1 + x2 + x3 54 | textual =~ x4 + x5 + x6 55 | speed =~ x7 + x8 + x9 ' 56 | 57 | fit <- bcfa(HS.model, data = HolzingerSwineford1939, 58 | bcontrol = list(method = "rjparallel")) 59 | 60 | # extract information 61 | blavInspect(fit, "psrf") 62 | blavInspect(fit, "hpd", prob = .9) 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /man/blavPredict.Rd: -------------------------------------------------------------------------------- 1 | \name{blavPredict} 2 | \alias{blavPredict} 3 | \alias{blavpredict} 4 | \title{Predict the values of latent variables, observed variables, and missing variables.} 5 | \description{ 6 | The purpose of the \code{blavPredict()} function is to compute various 7 | types of model predictions, conditioned on observed data. This differs 8 | somewhat from \code{lavPredict()} in lavaan.} 9 | \usage{ 10 | blavPredict(object, newdata = NULL, type = "lv", level = 1L) 11 | } 12 | \arguments{ 13 | \item{object}{An object of class \code{\linkS4class{blavaan}}.} 14 | \item{newdata}{An optional data.frame, containing the same variables as 15 | the data.frame used when fitting the model in object.} 16 | \item{type}{A character string. If \code{"lv"}, estimated values for the latent 17 | variables in the model are computed. If \code{"ov"} or \code{"yhat"}, predicted means for 18 | the observed variables in the model are computed. If 19 | \code{"ypred"} or \code{"ydist"}, predicted values for the 20 | observed variables (including residual noise) are computed. If 21 | \code{"ymis"} or \code{"ovmis"}, model predicted values ("imputations") 22 | for the missing data are computed. See details for further information.} 23 | \item{level}{For \code{type = "lv"}, used to specify whether one desires 24 | the level 1 latent variables or level 2 latent variables.} 25 | } 26 | \details{ 27 | The \code{predict()} function calls the \code{blavPredict()} function 28 | with its default options. 29 | 30 | Below, we provide more information about each \code{type} option. Most 31 | options only work for target="stan", and "number of samples" is defined 32 | as the number of posterior samples across all chains. 33 | 34 | \code{type="lv"}: The posterior distribution of latent variables 35 | conditioned on observed variables. Returns a list with 36 | "number of samples" entries, where each entry is a matrix where rows 37 | are observations and columns are latent variables. 38 | 39 | \code{type="yhat"}: The posterior expected value of observed variables 40 | conditioned on the sampled latent variables. Returns a list with 41 | "number of samples" entries, where each entry is a matrix where rows 42 | are observations and columns are observed variables. 43 | 44 | \code{type="ypred"}: The posterior predictive distribution of observed 45 | variables conditioned on the sampled latent variables (including 46 | residual variability). Returns a list with "number of samples" entries, 47 | where each entry is a data frame where rows are observations and columns 48 | are observed variables. 49 | 50 | \code{type="ymis"}: The posterior predictive distribution of missing 51 | values conditioned on observed variables. Returns a matrix with 52 | "number of samples" rows and "number of missing variables" columns. 53 | 54 | } 55 | \seealso{ 56 | Users may also wish to generate the posterior predictive distribution of 57 | observed data, not conditioned on the latent variables. This 58 | would often be viewed as data from new clusters (people) that were not 59 | observed in the original dataset. For that, see \code{sampleData()}. 60 | } 61 | \examples{ 62 | \dontrun{ 63 | data(HolzingerSwineford1939, package = "lavaan") 64 | 65 | ## fit model 66 | HS.model <- ' visual =~ x1 + x2 + x3 67 | textual =~ x4 + x5 + x6 68 | speed =~ x7 + x8 + x9 ' 69 | 70 | fit <- bcfa(HS.model, data = HolzingerSwineford1939, save.lvs = TRUE) 71 | lapply(blavPredict(fit)[1:2], head) # first 6 rows of first 10 posterior samples 72 | head(blavPredict(fit, type = "yhat")[[1]]) # top of first posterior sample 73 | 74 | ## multigroup models return a list of factor scores (one per group) 75 | mgfit <- bcfa(HS.model, data = HolzingerSwineford1939, group = "school", 76 | group.equal = c("loadings","intercepts"), save.lvs = TRUE) 77 | 78 | lapply(blavPredict(fit)[1:2], head) 79 | head(blavPredict(fit, type = "ypred")[[1]]) 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /man/blav_internal.Rd: -------------------------------------------------------------------------------- 1 | \name{blav_internal} 2 | \alias{blav_internal} 3 | \alias{blav_model_test} 4 | \alias{coeffun} 5 | \alias{set_phantoms} 6 | \alias{set_inits} 7 | \alias{set_priors} 8 | \alias{labelfun} 9 | \title{blavaan internal functions} 10 | \description{Internal functions related to Bayesian model estimation. 11 | Not to be called by the user.} 12 | -------------------------------------------------------------------------------- /man/blavaan-class.Rd: -------------------------------------------------------------------------------- 1 | \name{blavaan-class} 2 | \docType{class} 3 | \alias{blavaan-class} 4 | \alias{coef,blavaan-method} 5 | \alias{show,blavaan-method} 6 | \alias{predict,blavaan-method} 7 | %\alias{fitted,blavaan-method} 8 | %\alias{fitted.values,blavaan-method} %% NOTE: no white space here! 9 | \alias{summary,blavaan-method} 10 | 11 | \title{Class For Representing A (Fitted) Bayesian Latent Variable Model} 12 | \description{The \code{blavaan} class contains the \code{lavaan} 13 | class, representing a (fitted) Bayesian latent variable 14 | model. It contains a description of the model as specified by the user, 15 | a summary of the data, an internal matrix representation, and if the model 16 | was fitted, the fitting results.} 17 | \section{Objects from the Class}{ 18 | Objects can be created via the 19 | \code{\link{bcfa}}, \code{\link{bsem}}, \code{\link{bgrowth}} or 20 | \code{\link{blavaan}} functions. 21 | } 22 | \section{Slots}{ 23 | \describe{ 24 | \item{\code{version}:}{The lavaan package version used to create this objects} 25 | \item{\code{call}:}{The function call as returned by \code{match.call()}.} 26 | \item{\code{timing}:}{The elapsed time (user+system) for various parts of 27 | the program as a list, including the total time.} 28 | \item{\code{Options}:}{Named list of options that were provided by 29 | the user, or filled-in automatically.} 30 | \item{\code{ParTable}:}{Named list describing the model parameters. Can be coerced to a data.frame. In the documentation, this is called the `parameter table'.} 31 | \item{\code{pta}:}{Named list containing parameter table attributes.} 32 | \item{\code{Data}:}{Object of internal class \code{"Data"}: information 33 | about the data.} 34 | \item{\code{SampleStats}:}{Object of internal class \code{"SampleStats"}: sample 35 | statistics} 36 | \item{\code{Model}:}{Object of internal class \code{"Model"}: the 37 | internal (matrix) representation of the model} 38 | \item{\code{Cache}:}{List using objects that we try to compute only once, and reuse many times.} 39 | \item{\code{Fit}:}{Object of internal class \code{"Fit"}: the 40 | results of fitting the model. No longer used.} 41 | \item{\code{boot}:}{List. Unused for Bayesian models.} 42 | \item{\code{optim}:}{List. Information about the optimization.} 43 | \item{\code{loglik}:}{List. Information about the loglikelihood of the model (if maximum likelihood was used).} 44 | \item{\code{implied}:}{List. Model implied statistics.} 45 | \item{\code{vcov}:}{List. Information about the variance matrix (vcov) of the model parameters.} 46 | \item{\code{test}:}{List. Different test statistics.} 47 | \item{\code{h1}:}{List. Information about the unrestricted h1 model (if available).} 48 | \item{\code{baseline}:}{List. Information about a baseline model (often 49 | the independence model) (if available).} 50 | \item{\code{external}:}{List. Includes Stan or JAGS objects used for MCMC.} 51 | } 52 | } 53 | \section{Methods}{ 54 | \describe{ 55 | \item{coef}{\code{signature(object = "blavaan", type = "free")}: Returns 56 | the estimates of the parameters in the model as a named numeric vector. 57 | If \code{type="free"}, only the free parameters are returned. 58 | If \code{type="user"}, all parameters listed in the parameter table 59 | are returned, including constrained and fixed parameters.} 60 | \item{vcov}{\code{signature(object = "lavaan")}: returns the 61 | covariance matrix of the estimated parameters.} 62 | \item{show}{\code{signature(object = "blavaan")}: Print a short summary 63 | of the model fit} 64 | % \item{plot}{\code{signature(object = "blavaan")}: S4 method for 65 | % creating plots. Also see \code{?plot.blavaan}.} 66 | \item{summary}{\code{signature(object = "blavaan", header = TRUE, 67 | fit.measures = FALSE, estimates = TRUE, ci = TRUE, 68 | standardized = FALSE, rsquare = FALSE, std.nox = FALSE, 69 | psrf = TRUE, neff = FALSE, postmedian = FALSE, postmode = FALSE, 70 | priors = TRUE, bf = FALSE, nd = 3L)}: 71 | Print a nice summary of the model estimates. 72 | If \code{header = TRUE}, the header section (including fit measures) is 73 | printed. 74 | If \code{fit.measures = TRUE}, additional fit measures are added to the 75 | header section. 76 | If \code{estimates = TRUE}, print the parameter estimates section. 77 | If \code{ci = TRUE}, add confidence intervals to the parameter estimates 78 | section. 79 | If \code{standardized = TRUE}, 80 | the standardized solution is also printed. Note that \emph{SE}s and 81 | tests are still based on unstandardized estimates. Use 82 | \code{\link[lavaan]{standardizedSolution}} to obtain \emph{SE}s and test 83 | statistics for standardized estimates. 84 | If \code{rsquare=TRUE}, the R-Square values for the dependent variables 85 | in the model are printed. 86 | If \code{std.nox = TRUE}, the \code{std.all} column contains the 87 | the \code{std.nox} column from the parameterEstimates() output. 88 | If \code{psrf = TRUE}, potential scale reduction factors (Rhats) 89 | are printed. 90 | If \code{neff = TRUE}, effective sample sizes are printed. 91 | If \code{postmedian} or \code{postmode} are TRUE, posterior 92 | medians or modes are printed instead of posterior means. 93 | If \code{priors = TRUE}, parameter prior distributions are 94 | printed. 95 | If \code{bf = TRUE}, Savage-Dickey approximations of the Bayes 96 | factor are printed for certain parameters. 97 | Nothing is returned (use 98 | \code{lavInspect} or another extractor function 99 | to extract information from a fitted model).} 100 | } 101 | } 102 | \references{ 103 | Edgar C. Merkle, Ellen Fitzsimmons, James Uanhoro, & Ben Goodrich (2021). Efficient Bayesian Structural Equation Modeling in Stan. Journal of Statistical 104 | Software, 100(6), 1-22. URL http://www.jstatsoft.org/v100/i06/. 105 | 106 | Edgar C. Merkle & Yves Rosseel (2018). blavaan: Bayesian Structural 107 | Equation Models via Parameter Expansion. Journal of Statistical 108 | Software, 85(4), 1-30. URL http://www.jstatsoft.org/v85/i04/. 109 | 110 | Yves Rosseel (2012). lavaan: An R Package for Structural Equation 111 | Modeling. Journal of Statistical Software, 48(2), 1-36. URL 112 | http://www.jstatsoft.org/v48/i02/. 113 | } 114 | \seealso{ 115 | \code{\link{bcfa}}, \code{\link{bsem}}, \code{\link{bgrowth}} 116 | } 117 | \examples{ 118 | \dontrun{ 119 | HS.model <- ' visual =~ x1 + x2 + x3 120 | textual =~ x4 + x5 + x6 121 | speed =~ x7 + x8 + x9 ' 122 | 123 | fit <- bcfa(HS.model, data=HolzingerSwineford1939) 124 | 125 | summary(fit, standardized=TRUE, fit.measures=TRUE, rsquare=TRUE) 126 | coef(fit) 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /man/blavaan.Rd: -------------------------------------------------------------------------------- 1 | \name{blavaan} 2 | \alias{blavaan} 3 | \title{Fit a Bayesian Latent Variable Model} 4 | \description{ 5 | Fit a Bayesian latent variable model.} 6 | \usage{ 7 | blavaan(..., cp = "srs", 8 | dp = NULL, n.chains = 3, burnin, sample, 9 | adapt, mcmcfile = FALSE, mcmcextra = list(), inits = "simple", 10 | convergence = "manual", target = "stan", save.lvs = FALSE, 11 | wiggle = NULL, wiggle.sd = 0.1, prisamp = FALSE, jags.ic = FALSE, 12 | seed = NULL, bcontrol = list()) 13 | } 14 | \arguments{ 15 | \item{...}{Default lavaan arguments. See \code{\link[lavaan]{lavaan}}.} 16 | \item{cp}{Handling of prior distributions on covariance parameters: 17 | possible values are \code{"srs"} (default) or \code{"fa"}. Option 18 | \code{"fa"} is only available for \code{target="jags"}.} 19 | \item{dp}{Default prior distributions on different types of 20 | parameters, typically the result of a call to \code{dpriors()}. 21 | See the \code{dpriors()} help file for more information.} 22 | \item{n.chains}{Number of desired MCMC chains.} 23 | \item{burnin}{Number of burnin/warmup iterations (not including the adaptive 24 | iterations, for target="jags"). Defaults to 4000 or target="jags" and 25 | 500 for Stan targets.} 26 | \item{sample}{The total number of samples to take after burnin. Defaults 27 | to 10000 for target="jags" and 1000 for Stan targets.} 28 | \item{adapt}{For target="jags", the number of adaptive iterations to use at the start of 29 | sampling. Defaults to 1000.} 30 | \item{mcmcfile}{If \code{TRUE}, the JAGS/Stan model and data will be written 31 | to files (in the lavExport directory). Can also supply a character 32 | string, which serves as the name of the directory to which files will be written.} 33 | \item{mcmcextra}{A list with potential names \code{syntax} (unavailable 34 | for target=\code{"stan"}), 35 | \code{monitor}, \code{data}, and \code{llnsamp}. The \code{syntax} object is a text string containing extra 36 | code to insert in the JAGS/Stan model syntax. The \code{data} object 37 | is a list of extra data to send to the JAGS/Stan model. If 38 | \code{moment_match_k_threshold} is specified within \code{data} the looic of 39 | the model will be calculated using moment matching. The \code{monitor} object 40 | is a character vector containing extra JAGS/Stan parameters to 41 | monitor. The \code{llnsamp} object is only relevant to models with ordinal 42 | variables, and specifies the number of samples that should be drawn to approximate 43 | the model log-likelihood (larger numbers imply higher accuracy and 44 | longer time). This log-likelihood is specifically used to compute 45 | information criteria.} 46 | \item{inits}{If it is a character string, the options are currently 47 | \code{"simple"} (default), \code{"Mplus"}, \code{"prior"}, or \code{"jags"}. In the first two 48 | cases, parameter values are set as though they will be estimated via 49 | ML (see \code{\link[lavaan]{lavaan}}). The starting parameter value for 50 | each chain is then perturbed from the original values through the 51 | addition of random uniform noise. If \code{"prior"} is used, the starting 52 | parameter values are obtained based on the prior distributions 53 | (while also trying to ensure that the starting values will not crash 54 | the model estimation). If \code{"jags"}, no starting values are 55 | specified and JAGS will choose values on its own (and this will probably 56 | crash Stan targets). You can also supply 57 | a list of starting values for each chain, where the list format can 58 | be obtained from, e.g., \code{blavInspect(fit, "inits")}. Finally, 59 | you can specify starting values in a similar way to lavaan, 60 | using the lavaan \code{start} argument (see the lavaan 61 | documentation for all the options there). In this case, you should also set 62 | \code{inits="simple"}, and be aware that the same starting values 63 | will be used for each chain.} 64 | \item{convergence}{Useful only for \code{target="jags"}. If \code{"auto"}, parameters are 65 | sampled until convergence is achieved (via \code{autorun.jags()}). In 66 | this case, the arguments \code{burnin} and \code{sample} are passed to 67 | \code{autorun.jags()} as \code{startburnin} and \code{startsample}, 68 | respectively. Otherwise, parameters 69 | are sampled as specified by the user (or by the \code{run.jags} 70 | defaults).} 71 | \item{target}{Desired MCMC sampling, with \code{"stan"} (pre-compiled 72 | marginal approach) as 73 | default. Also available is \code{"vb"}, which calls the rstan function 74 | \code{vb()}. Other options include \code{"jags"}, \code{"stancond"}, and 75 | \code{"stanclassic"}, which sample latent variables and provide some 76 | greater functionality (because syntax is written "on the fly"). But 77 | they are slower and less efficient.} 78 | \item{save.lvs}{Should sampled latent variables (factor scores) be 79 | saved? Logical; defaults to FALSE} 80 | \item{wiggle}{Labels of equality-constrained parameters that should be 81 | "approximately" equal. Can also be "intercepts", "loadings", 82 | "regressions", "means".} 83 | \item{wiggle.sd}{The prior sd (of normal distribution) to be used in approximate equality 84 | constraints. Can be one value, or (for target="stan") a numeric vector 85 | of values that is the same length as wiggle.} 86 | \item{prisamp}{Should samples be drawn from the prior, instead of the 87 | posterior (\code{target="stan"} only)? Logical; defaults to FALSE} 88 | \item{jags.ic}{Should DIC be computed the JAGS way, in addition to the BUGS way? Logical; defaults to FALSE} 89 | \item{seed}{A vector of length \code{n.chains} (for target 90 | \code{"jags"}) or an integer (for target \code{"stan"}) containing random 91 | seeds for the MCMC run. If \code{NULL}, seeds will be chosen randomly.} 92 | \item{bcontrol}{A list containing additional parameters passed to 93 | \code{run.jags} (or \code{autorun.jags}) or \code{stan}. See the manpage of those functions for an 94 | overview of the additional parameters that can be set.} 95 | } 96 | \value{ 97 | An object that inherits from class \link[lavaan:lavaan-class]{lavaan}, for which several methods 98 | are available, including a \code{summary} method. 99 | } 100 | \references{ 101 | Edgar C. Merkle, Ellen Fitzsimmons, James Uanhoro, & Ben Goodrich (2021). Efficient Bayesian Structural Equation Modeling in Stan. Journal of Statistical 102 | Software, 100(6), 1-22. URL http://www.jstatsoft.org/v100/i06/. 103 | 104 | Edgar C. Merkle & Yves Rosseel (2018). blavaan: Bayesian Structural 105 | Equation Models via Parameter Expansion. Journal of Statistical 106 | Software, 85(4), 1-30. URL http://www.jstatsoft.org/v85/i04/. 107 | 108 | Yves Rosseel (2012). lavaan: An R Package for Structural Equation 109 | Modeling. Journal of Statistical Software, 48(2), 1-36. URL 110 | http://www.jstatsoft.org/v48/i02/. 111 | } 112 | \seealso{ 113 | \code{\link{bcfa}}, \code{\link{bsem}}, \code{\link{bgrowth}} 114 | } 115 | \examples{ 116 | \dontrun{ 117 | data(HolzingerSwineford1939, package = "lavaan") 118 | 119 | # The Holzinger and Swineford (1939) example 120 | HS.model <- ' visual =~ x1 + x2 + x3 121 | textual =~ x4 + x5 + x6 122 | speed =~ x7 + x8 + x9 ' 123 | 124 | fit <- blavaan(HS.model, data = HolzingerSwineford1939, 125 | auto.var = TRUE, auto.fix.first = TRUE, 126 | auto.cov.lv.x = TRUE) 127 | summary(fit) 128 | coef(fit) 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /man/bsem.Rd: -------------------------------------------------------------------------------- 1 | \name{bsem} 2 | \alias{bsem} 3 | \title{Fit Structural Equation Models} 4 | \description{ 5 | Fit a Structural Equation Model (SEM).} 6 | \usage{ 7 | bsem(..., cp = "srs", 8 | dp = NULL, n.chains = 3, burnin, sample, 9 | adapt, mcmcfile = FALSE, mcmcextra = list(), inits = "simple", 10 | convergence = "manual", target = "stan", save.lvs = FALSE, 11 | wiggle = NULL, wiggle.sd = 0.1, prisamp = FALSE, jags.ic = FALSE, 12 | seed = NULL, bcontrol = list()) 13 | } 14 | \arguments{ 15 | \item{...}{Default lavaan arguments. See \code{\link[lavaan]{lavaan}}.} 16 | \item{cp}{Handling of prior distributions on covariance parameters: 17 | possible values are \code{"srs"} (default) or \code{"fa"}. Option \code{"fa"} is only available for \code{target="jags"}.} 18 | \item{dp}{Default prior distributions on different types of 19 | parameters, typically the result of a call to \code{dpriors()}. 20 | See the \code{dpriors()} help file for more information.} 21 | \item{n.chains}{Number of desired MCMC chains.} 22 | \item{burnin}{Number of burnin/warmup iterations (not including the adaptive 23 | iterations, for target="jags"). Defaults to 4000 or target="jags" and 24 | 500 for Stan targets.} 25 | \item{sample}{The total number of samples to take after burnin. Defaults 26 | to 10000 for target="jags" and 1000 for Stan targets.} 27 | \item{adapt}{For target="jags", the number of adaptive iterations to use at the start of 28 | sampling. Defaults to 1000.} 29 | \item{mcmcfile}{If \code{TRUE}, the JAGS/Stan model will be written to file 30 | (in the lavExport directory). Can also supply a character 31 | string, which serves as the name of the directory to which files will be written.} 32 | \item{mcmcextra}{A list with potential names \code{syntax} (unavailable 33 | for target=\code{"stan"}), 34 | \code{monitor}, \code{data}, and \code{llnsamp}. The \code{syntax} object is a text string containing extra 35 | code to insert in the JAGS/Stan model syntax. The \code{data} object 36 | is a list of extra data to send to the JAGS/Stan model. If 37 | \code{moment_match_k_threshold} is specified within \code{data} the looic of 38 | the model will be calculated using moment matching. The \code{monitor} object 39 | is a character vector containing extra JAGS/Stan parameters to 40 | monitor. The \code{llnsamp} object is only relevant to models with ordinal 41 | variables, and specifies the number of samples that should be drawn to approximate 42 | the model log-likelihood (larger numbers imply higher accuracy and 43 | longer time). This log-likelihood is specifically used to compute 44 | information criteria.} 45 | \item{inits}{If it is a character string, the options are currently 46 | \code{"simple"} (default), \code{"Mplus"}, \code{"prior"}, or \code{"jags"}. In the first two 47 | cases, parameter values are set as though they will be estimated via 48 | ML (see \code{\link[lavaan]{lavaan}}). The starting parameter value for 49 | each chain is then perturbed from the original values through the 50 | addition of random uniform noise. If \code{"prior"} is used, the starting 51 | parameter values are obtained based on the prior distributions 52 | (while also trying to ensure that the starting values will not crash 53 | the model estimation). If \code{"jags"}, no starting values are 54 | specified and JAGS will choose values on its own (and this will probably 55 | crash Stan targets). You can also supply 56 | a list of starting values for each chain, where the list format can 57 | be obtained from, e.g., \code{blavInspect(fit, "inits")}. Finally, 58 | you can specify starting values in a similar way to lavaan, 59 | using the lavaan \code{start} argument (see the lavaan 60 | documentation for all the options there). In this case, you should also set 61 | \code{inits="simple"}, and be aware that the same starting values 62 | will be used for each chain.} 63 | \item{convergence}{Useful only for \code{target="jags"}. If \code{"auto"}, parameters are 64 | sampled until convergence is achieved (via \code{autorun.jags()}). In 65 | this case, the arguments \code{burnin} and \code{sample} are passed to 66 | \code{autorun.jags()} as \code{startburnin} and \code{startsample}, 67 | respectively. Otherwise, parameters 68 | are sampled as specified by the user (or by the \code{run.jags} 69 | defaults).} 70 | \item{target}{Desired MCMC sampling, with \code{"stan"} (pre-compiled 71 | marginal approach) as 72 | default. Also available is \code{"vb"}, which calls the rstan function 73 | \code{vb()}. Other options include \code{"jags"}, \code{"stancond"}, and 74 | \code{"stanclassic"}, which sample latent variables and provide some 75 | greater functionality (because syntax is written "on the fly"). But 76 | they are slower and less efficient.} 77 | \item{save.lvs}{Should sampled latent variables (factor scores) be saved? Logical; defaults to FALSE} 78 | \item{wiggle}{Labels of equality-constrained parameters that should be 79 | "approximately" equal. Can also be "intercepts", "loadings", 80 | "regressions", "means".} 81 | \item{wiggle.sd}{The prior sd (of normal distribution) to be used in approximate equality 82 | constraints. Can be one value, or (for target="stan") a numeric vector 83 | of values that is the same length as wiggle.} 84 | \item{prisamp}{Should samples be drawn from the prior, instead of the 85 | posterior (\code{target="stan"} only)? Logical; defaults to FALSE} 86 | \item{jags.ic}{Should DIC be computed the JAGS way, in addition to the BUGS way? Logical; defaults to FALSE} 87 | \item{seed}{A vector of length \code{n.chains} (for target 88 | \code{"jags"}) or an integer (for target \code{"stan"}) containing random 89 | seeds for the MCMC run. If \code{NULL}, seeds will be chosen randomly.} 90 | \item{bcontrol}{A list containing additional parameters passed to 91 | \code{run.jags} (or \code{autorun.jags}) or \code{stan}. See the manpage of those functions for an 92 | overview of the additional parameters that can be set.} 93 | } 94 | \details{ 95 | The \code{bsem} function is a wrapper for the more general 96 | \code{\link{blavaan}} function, using the following default 97 | \code{\link[lavaan]{lavaan}} arguments: 98 | \code{int.ov.free = TRUE}, \code{int.lv.free = FALSE}, 99 | \code{auto.fix.first = TRUE} (unless \code{std.lv = TRUE}), 100 | \code{auto.fix.single = TRUE}, \code{auto.var = TRUE}, 101 | \code{auto.cov.lv.x = TRUE}, 102 | \code{auto.th = TRUE}, \code{auto.delta = TRUE}, 103 | and \code{auto.cov.y = TRUE}. 104 | } 105 | \value{ 106 | An object of class \link[lavaan:lavaan-class]{lavaan}, for which several methods 107 | are available, including a \code{summary} method. 108 | } 109 | \references{ 110 | Edgar C. Merkle, Ellen Fitzsimmons, James Uanhoro, & Ben Goodrich (2021). Efficient Bayesian Structural Equation Modeling in Stan. Journal of Statistical 111 | Software, 100(6), 1-22. URL http://www.jstatsoft.org/v100/i06/. 112 | 113 | Edgar C. Merkle & Yves Rosseel (2018). blavaan: Bayesian Structural 114 | Equation Models via Parameter Expansion. Journal of Statistical 115 | Software, 85(4), 1-30. URL http://www.jstatsoft.org/v85/i04/. 116 | 117 | Yves Rosseel (2012). lavaan: An R Package for Structural Equation 118 | Modeling. Journal of Statistical Software, 48(2), 1-36. URL 119 | http://www.jstatsoft.org/v48/i02/. 120 | } 121 | \seealso{ 122 | \code{\link{blavaan}} 123 | } 124 | \examples{ 125 | # The industrialization and Political Democracy Example 126 | # Bollen (1989), page 332 127 | data(PoliticalDemocracy, package = "lavaan") 128 | 129 | model <- ' 130 | # latent variable definitions 131 | ind60 =~ x1 + x2 + x3 132 | dem60 =~ y1 + a*y2 + b*y3 + c*y4 133 | dem65 =~ y5 + a*y6 + b*y7 + c*y8 134 | 135 | # regressions 136 | dem60 ~ ind60 137 | dem65 ~ ind60 + dem60 138 | 139 | # residual correlations 140 | y1 ~~ y5 141 | y2 ~~ y4 + y6 142 | y3 ~~ y7 143 | y4 ~~ y8 144 | y6 ~~ y8 145 | ' 146 | 147 | \dontrun{ 148 | # mildly informative priors for mv intercepts and loadings 149 | fit <- bsem(model, data = PoliticalDemocracy, 150 | dp = dpriors(nu = "normal(5,10)", lambda = "normal(1,.5)")) 151 | summary(fit) 152 | } 153 | 154 | # A short run for rough results 155 | fit <- bsem(model, data = PoliticalDemocracy, burnin = 100, sample = 100, 156 | dp = dpriors(nu = "normal(5,10)", lambda = "normal(1,.5)"), 157 | n.chains = 2) 158 | summary(fit) 159 | } 160 | -------------------------------------------------------------------------------- /man/dpriors.Rd: -------------------------------------------------------------------------------- 1 | \name{dpriors} 2 | \alias{dpriors} 3 | \title{Specify Default Prior Distributions} 4 | \description{ 5 | Specify "default" prior distributions for classes of model parameters. 6 | } 7 | \usage{ 8 | dpriors(..., target = "stan") 9 | } 10 | \arguments{ 11 | \item{...}{Parameter names paired with desired priors (see example 12 | below).} 13 | \item{target}{Are the priors for jags, stan (default), or stanclassic?} 14 | } 15 | \details{The prior distributions always use JAGS/Stan syntax and parameterizations. For 16 | example, the normal distribution in JAGS is parameterized via the 17 | precision, whereas the normal distribution in Stan is parameterized 18 | via the standard deviation. 19 | 20 | User-specified prior distributions for specific parameters 21 | (using the \code{prior()} operator within the model syntax) always 22 | override prior distributions set using \code{dpriors()}. 23 | 24 | The parameter names are: 25 | \itemize{ 26 | \item{nu: Observed variable intercept parameters.} 27 | \item{alpha: Latent variable intercept parameters.} 28 | \item{lambda: Loading parameters.} 29 | \item{beta: Regression parameters.} 30 | \item{itheta: Observed variable precision parameters.} 31 | \item{ipsi: Latent variable precision parameters.} 32 | \item{rho: Correlation parameters (associated with covariance parameters).} 33 | \item{ibpsi: Inverse covariance matrix of 34 | blocks of latent variables (used for \code{target="jags"}).} 35 | \item{tau: Threshold parameters (ordinal data only).} 36 | \item{delta: Delta parameters (ordinal data only).} 37 | } 38 | } 39 | \value{ 40 | A character vector containing the prior distribution for each type of parameter. 41 | } 42 | \references{ 43 | Edgar C. Merkle, Ellen Fitzsimmons, James Uanhoro, & Ben Goodrich (2021). Efficient Bayesian Structural Equation Modeling in Stan. Journal of Statistical 44 | Software, 100(6), 1-22. URL http://www.jstatsoft.org/v100/i06/. 45 | 46 | Edgar C. Merkle & Yves Rosseel (2018). blavaan: Bayesian Structural 47 | Equation Models via Parameter Expansion. Journal of Statistical 48 | Software, 85(4), 1-30. URL http://www.jstatsoft.org/v85/i04/. 49 | } 50 | \seealso{ 51 | \code{\link{bcfa}}, \code{\link{bsem}}, \code{\link{bgrowth}} 52 | } 53 | \examples{ 54 | dpriors(nu = "normal(0,10)", lambda = "normal(0,1)", rho = "beta(3,3)") 55 | } 56 | -------------------------------------------------------------------------------- /man/plot.blavaan.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.blavaan} 2 | \alias{plot.blavaan} 3 | \title{blavaan Diagnostic Plots} 4 | \description{ 5 | Convenience functions to create plots of blavaan objects, via the 6 | bayesplot package. 7 | } 8 | \usage{ 9 | \method{plot}{blavaan}(x, pars = NULL, plot.type = "trace", showplot = TRUE, ...) 10 | } 11 | \arguments{ 12 | \item{x}{An object of class \code{blavaan}.} 13 | \item{pars}{Parameter numbers to plot, where the numbers correspond to the order of parameters as reported by \code{coef()} (also as shown in the 'free' column of the parTable). If no numbers are provided, all free parameters will be plotted.} 14 | \item{plot.type}{The type of plot desired. This should be the name of a \code{\link[bayesplot:MCMC-overview]{MCMC}} function, without the \code{mcmc_} prefix.} 15 | \item{showplot}{Should the plot be sent to the graphic device? Defaults to \code{TRUE}.} 16 | \item{...}{Other arguments sent to the bayesplot function.} 17 | } 18 | \details{In previous versions of blavaan, the plotting functionality was 19 | handled separately for JAGS and for Stan (using plot functionality in 20 | packages runjags and rstan, respectively). For uniformity, all 21 | plotting functionality is now handled by bayesplot. If users desire 22 | additional functionality that is not immediately available, they can extract the matrix of MCMC draws via \code{as.matrix(blavInspect(x, 'mcmc'))}. 23 | } 24 | \value{ 25 | An invisible ggplot object that, if desired, can be further customized. 26 | } 27 | \examples{ 28 | \dontrun{ 29 | data(HolzingerSwineford1939, package = "lavaan") 30 | 31 | HS.model <- ' visual =~ x1 + x2 + x3 32 | textual =~ x4 + x5 + x6 33 | speed =~ x7 + x8 + x9 ' 34 | 35 | fit <- bcfa(HS.model, data = HolzingerSwineford1939) 36 | 37 | # trace plots of free loadings 38 | plot(fit, pars = 1:6) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /man/sampleData.Rd: -------------------------------------------------------------------------------- 1 | \name{sampleData} 2 | \alias{sampleData} 3 | \alias{sampledata} 4 | \title{Sample data from the posterior (or prior) distribution.} 5 | \description{ 6 | The purpose of the \code{sampleData()} function is to simulate new data 7 | from a model that has already been estimated. This can faciliate 8 | posterior predictive checks, as well as prior predictive checks (setting 9 | prisamp = TRUE during model estimation).} 10 | \usage{ 11 | sampleData(object, nrep = NULL, conditional = FALSE, type = "response", 12 | simplify = FALSE, ...) 13 | } 14 | \arguments{ 15 | \item{object}{An object of class \code{\linkS4class{blavaan}}.} 16 | \item{nrep}{How many datasets to generate? If not supplied, defaults to 17 | the total number of posterior samples.} 18 | \item{conditional}{Logical indicating whether to sample from the 19 | distribution that is marginal over latent variables (\code{FALSE}; 20 | default) or from the distribution that conditions on latent variables 21 | (\code{TRUE}). For \code{TRUE}, you must set \code{save.lvs = TRUE} 22 | during model estimation.} 23 | \item{type}{The type of data desired (only relevant to ordinal 24 | data). The \code{type = "response"} option generates ordinal data. The 25 | \code{type = "link"} option generates continuous variables underlying 26 | ordinal data (which would be cut by thresholds to yield ordinal data).} 27 | \item{simplify}{For single-group models, should the list structure be 28 | simplified? This makes each dataset a single list entry, instead of a 29 | list within a list (which reflects group 1 of dataset 1). Defaults to \code{FALSE}.} 30 | \item{...}{Other arguments, which for now is only \code{parallel}. Parallelization via \code{future_lapply()} is available by setting \code{parallel = TRUE}.} 31 | } 32 | \details{ 33 | This is a convenience function to generate data for posterior or prior 34 | predictive checking. The underlying code is also used to generate data 35 | for posterior predictive p-value computation. 36 | } 37 | \seealso{ 38 | This function overlaps with \code{blavPredict()}. The 39 | \code{blavPredict()} function is more focused on generating pieces of 40 | data conditioned on other pieces of observed data (i.e., latent 41 | variables conditioned on observed variables; missing variables 42 | conditioned on observed variables). In contrast, the \code{sampleData()} 43 | function is more focused on generating new data given the sampled model parameters. 44 | } 45 | \examples{ 46 | \dontrun{ 47 | data(HolzingerSwineford1939, package = "lavaan") 48 | 49 | ## fit model 50 | HS.model <- ' visual =~ x1 + x2 + x3 51 | textual =~ x4 + x5 + x6 52 | speed =~ x7 + x8 + x9 ' 53 | 54 | fit <- bcfa(HS.model, data = HolzingerSwineford1939) 55 | 56 | ## 1 dataset generated from the posterior 57 | out <- sampleData(fit, nrep = 1) 58 | 59 | ## nested lists: 1 list entry per nrep. 60 | ## then, within a rep, 1 list entry per group 61 | ## so our dataset is here: 62 | dim(out[[1]][[1]]) 63 | 64 | ## 1 posterior dataset per posterior sample: 65 | out <- sampleData(fit) 66 | 67 | ## obtain the data on x1 across reps and summarize: 68 | x1dat <- sapply(out, function(x) x[[1]][,1]) 69 | summary( as.numeric(x1dat) ) 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /man/standardizedPosterior.Rd: -------------------------------------------------------------------------------- 1 | \name{standardizedPosterior} 2 | \alias{standardizedPosterior} 3 | \alias{standardizedposterior} 4 | \title{Standardized Posterior} 5 | \description{ 6 | Standardized posterior distribution of a latent variable model.} 7 | \usage{ 8 | standardizedPosterior(object, ...) 9 | } 10 | \arguments{ 11 | \item{object}{An object of class \code{\linkS4class{blavaan}}.} 12 | \item{...}{Additional arguments passed to lavaan's 13 | \code{standardizedSolution()}} 14 | } 15 | \note{ 16 | The only allowed \code{standardizedSolution()} arguments are type, 17 | cov.std, remove.eq, remove.ineq, and remove.def. Other arguments are not 18 | immediately suited to posterior distributions. 19 | } 20 | \value{ 21 | A matrix containing standardized posterior draws, where rows are draws 22 | and columns are parameters. 23 | } 24 | \examples{ 25 | \dontrun{ 26 | data(PoliticalDemocracy, package = "lavaan") 27 | 28 | model <- ' 29 | # latent variable definitions 30 | ind60 =~ x1 + x2 + x3 31 | dem60 =~ y1 + a*y2 + b*y3 + c*y4 32 | dem65 =~ y5 + a*y6 + b*y7 + c*y8 33 | 34 | # regressions 35 | dem60 ~ ind60 36 | dem65 ~ ind60 + dem60 37 | 38 | # residual correlations 39 | y1 ~~ y5 40 | y2 ~~ y4 + y6 41 | y3 ~~ y7 42 | y4 ~~ y8 43 | y6 ~~ y8 44 | ' 45 | 46 | fit <- bsem(model, data = PoliticalDemocracy, 47 | dp = dpriors(nu = "normal(5, 10)")) 48 | 49 | standardizedPosterior(fit) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /tests/blavaan_examples.R: -------------------------------------------------------------------------------- 1 | ## short examples to test functionality 2 | set.seed(341) 3 | 4 | ## seems to be needed for stanclassic: 5 | oopts <- options(future.globals.maxSize = 1.0 * 1e9) 6 | on.exit(options(oopts)) 7 | 8 | x1 <- rnorm(100) 9 | y1 <- 0.5 + 2*x1 + rnorm(100) 10 | g <- rep(1:2, each=50) 11 | Data <- data.frame(y1 = y1, x1 = x1, g = g) 12 | 13 | ## seemed to help if running this via R CMD check: 14 | ## Sys.unsetenv('R_TESTS') 15 | library("blavaan") 16 | ## NB! unload DBI package or problems with CRAN! 17 | unloadNamespace("DBI") 18 | 19 | ## don't care that models are not converged, keeping file size small 20 | model <- ' y1 ~ prior("dnorm(0,1)")*x1 ' 21 | fitjags <- bsem(model, data=Data, fixed.x=TRUE, burnin=20, 22 | sample=20, target="jags", group="g", seed=1:3) 23 | 24 | model <- ' y1 ~ prior("normal(0,1)")*x1 ' 25 | fitstan <- bsem(model, data=Data, fixed.x=TRUE, burnin=20, 26 | sample=20, target="stan", group="g", seed=1, meanstructure=TRUE) 27 | 28 | fitstanc <- bsem(model, data=Data, fixed.x=TRUE, burnin=20, 29 | sample=20, target="stanclassic", group="g", seed=1) 30 | 31 | ## for checking factor score functionality 32 | HS.model <- ' visual =~ x1 + x2 + x3 33 | textual =~ x4 + x5 + x6 ' 34 | 35 | fitstanfs <- bcfa(HS.model, data=lavaan::HolzingerSwineford1939, 36 | burnin=30, sample=10, target="stan", 37 | save.lvs=TRUE, n.chains=2, seed=1, meanstructure=TRUE) 38 | 39 | ## this really blows up file size if kept: 40 | attr(fitstan@external$mcmcout, 'stanmodel') <- NULL 41 | #attr(fitstan@external$mcmcout, 'sim') <- NULL 42 | attr(fitstan@external$mcmcout, 'inits') <- NULL 43 | attr(fitstanc@external$mcmcout, 'stanmodel') <- NULL 44 | #attr(fitstanc@external$mcmcout, 'sim') <- NULL 45 | attr(fitstanc@external$mcmcout, 'inits') <- NULL 46 | attr(fitstanfs@external$mcmcout, 'stanmodel') <- NULL 47 | #attr(fitstanfs@external$mcmcout, 'sim') <- NULL 48 | attr(fitstanfs@external$mcmcout, 'inits') <- NULL 49 | 50 | save(list=c("fitjags", "fitstan", "fitstanc", "fitstanfs"), 51 | file="../inst/testdata/sysdata.rda") 52 | 53 | -------------------------------------------------------------------------------- /tests/tinytest.R: -------------------------------------------------------------------------------- 1 | ## test only if it is a development version 2 | if (length(strsplit(packageDescription("blavaan")$Version, "\\.")[[1]]) > 2 & 3 | requireNamespace("tinytest", quietly=TRUE)) { 4 | tinytest::test_package("blavaan") 5 | } -------------------------------------------------------------------------------- /vignettes/approx_fi.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Approximate fit indices" 3 | author: Mauricio Garnier-Villarreal 4 | bibliography: 5 | - refs.bib 6 | --- 7 | 8 | ```{r pkgld, include=FALSE} 9 | library(blavaan, quietly=TRUE) 10 | library(lavaan, quietly=TRUE) 11 | library(bayesplot) 12 | ``` 13 | 14 | 15 | ### Introduction 16 | In SEM, one of the first steps is to evaluate the model's global fit. This is commonly done by presenting multiple fit indices, with some of the most common being based on the model's $\chi^2$. We have developed Bayesian versions of these indices [@garnier_adapting_2020] that can be computed with *blavaan*. 17 | 18 | ### Noncentrality-Based Fit Indices 19 | 20 | This group of indices compares the hypothesized model against the perfect saturated model. It specifically uses the noncentrality parameter $\hat{\lambda} = \chi^2 - df$, with the df being adjusted by different model/data characterictics. 21 | Specific indices include Root Mean Square Error of approximation (RMSEA), McDonald’s centrality index (Mc), gamma-hat ($\hat{\Gamma}$), and adjusted gamma-hat ($\hat{\Gamma}_{adj}$). 22 | 23 | We will show an example with the @holswi39 example. You first estimate your SEM/CFA model as usual 24 | 25 | ```{r, include=FALSE, cache=TRUE} 26 | HS.model <- ' visual =~ x1 + x2 + x3 27 | textual =~ x4 + x5 + x6 28 | speed =~ x7 + x8 + x9 ' 29 | 30 | fit <- bcfa(HS.model, data=HolzingerSwineford1939, std.lv=TRUE) 31 | ``` 32 | 33 | ```{r, eval=F} 34 | HS.model <- ' visual =~ x1 + x2 + x3 35 | textual =~ x4 + x5 + x6 36 | speed =~ x7 + x8 + x9 ' 37 | 38 | fit <- bcfa(HS.model, data=HolzingerSwineford1939, std.lv=TRUE) 39 | ``` 40 | 41 | 42 | You then need to pass the model to the ```blavFitIndices()``` function 43 | 44 | ```{r bfi1, cache=TRUE, warning=FALSE} 45 | gl_fits <- blavFitIndices(fit) 46 | ``` 47 | 48 | Finally, you can describe the posterior distribution for each of the indices with their ```summary()``` function. With this call, we see the 3 central tendency measures (mean median, and mode), the standard deviation, and the 90% Credible Interval 49 | 50 | ```{r} 51 | summary(gl_fits, central.tendency = c("mean","median","mode"), prob = .90) 52 | ``` 53 | 54 | ### Incremental Fit Indices 55 | 56 | Another group of fit indices compares the hypothesized model with the *worst* possible model, so they are called incremental indices. Such indices compare your model's $\chi^2_H$ to the *null* model's $\chi^2_0$ in different ways. Indices include the Comparative Fit Index (CFI), Tucker-Lewis Index (TLI), and Normed Fit Index (NFI). 57 | 58 | To estimate these indices we need to define and estimate the respective *null* model. The standard *null* model used by default in frequentist SEM programs (like *lavaan*) includes only the indicators variances and intercepts, and no covariances between items. 59 | 60 | You can specify your *null* model by including only the respective indicator variances in your model syntax, such as 61 | 62 | ```{r nmod, include=FALSE, cache=TRUE} 63 | HS.model_null <- ' 64 | x1 ~~ x1 65 | x2 ~~ x2 66 | x3 ~~ x3 67 | x4 ~~ x4 68 | x5 ~~ x5 69 | x6 ~~ x6 70 | x7 ~~ x7 71 | x8 ~~ x8 72 | x9 ~~ x9 ' 73 | 74 | fit_null <- bcfa(HS.model_null, data=HolzingerSwineford1939) 75 | ``` 76 | 77 | ```{r, eval=F} 78 | HS.model_null <- ' 79 | x1 ~~ x1 80 | x2 ~~ x2 81 | x3 ~~ x3 82 | x4 ~~ x4 83 | x5 ~~ x5 84 | x6 ~~ x6 85 | x7 ~~ x7 86 | x8 ~~ x8 87 | x9 ~~ x9 ' 88 | 89 | fit_null <- bcfa(HS.model_null, data=HolzingerSwineford1939) 90 | ``` 91 | 92 | 93 | Once you have your hypothesized and null models, you pass both to the ```blavFitIndices``` function, and now it will provide both types of fit indices 94 | 95 | ```{r bfi2, cache = TRUE, warning = FALSE} 96 | gl_fits_all <- blavFitIndices(fit, baseline.model = fit_null) 97 | 98 | summary(gl_fits_all, central.tendency = c("mean","median","mode"), prob = .90) 99 | ``` 100 | 101 | The ```summary()``` method now presents the central tendency measure you asked for, standard deviation, and credible interval for the noncentrality and incremental fit indices. 102 | 103 | ### Access the indices posterior distributions 104 | 105 | You can also extract the posterior distributions for the respective indices, this way you can explore further details. For example, diagnostic plots using the ```bayesplot``` package. 106 | 107 | ```{r expi} 108 | dist_fits <- data.frame(gl_fits_all@indices) 109 | head(dist_fits) 110 | ``` 111 | 112 | Once we have saved the posterior distributions, we can explore the the histogram and scatterplots between indices. 113 | 114 | ```{r plpi, warning = FALSE} 115 | mcmc_pairs(dist_fits, pars = c("BRMSEA","BGammaHat","BCFI","BTLI"), 116 | diag_fun = "hist") 117 | ``` 118 | 119 | 120 | ### Summary 121 | 122 | You can estimate posterior distributions for $\chi^2$ based global fit indices. Notice that here we only presented the fit indices based on the recommended method *devM* and with the recommended number of parameters metric *loo*. These can be adjusted by the user if desired. 123 | 124 | The general recommendation is to prefer $\hat{\Gamma}$ and CFI, as these have shown to be less sensitive to model and data characteristics. 125 | 126 | These defaults and recommendations are made based on previous simulation research. For more details about the fit indices please see @garnier_adapting_2020. 127 | 128 | ### References 129 | 130 | -------------------------------------------------------------------------------- /vignettes/convergence_efficiency.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Convergence and Efficiency Evaluation" 3 | author: Mauricio Garnier-Villarreal 4 | bibliography: 5 | - refs.bib 6 | --- 7 | 8 | ```{r pkgld, include=FALSE} 9 | library(blavaan, quietly=TRUE) 10 | library(lavaan, quietly=TRUE) 11 | ``` 12 | 13 | ### Introduction 14 | 15 | When Bayesian models are estimated with a Markov-Chain Monte Carlo (MCMC) sampler, the model estimation doesn't stop when it has achieved some convergence criteria. It will run as long as desired (determined by the `burnin` and `sample` arguments), and then you need to evaluate the convergence and efficiency of the estimated posterior distributions. You should only analyze the results if convergence has been achieved, as judged by the metrics described below. 16 | 17 | For this example we will use the Industrialization and Political Democracy example [@bollen_structural_1989]. 18 | 19 | ```{r, eval=T, include=FALSE, cache=TRUE} 20 | model <- ' 21 | # latent variable definitions 22 | ind60 =~ x1 + x2 + x3 23 | dem60 =~ a*y1 + b*y2 + c*y3 + d*y4 24 | dem65 =~ a*y5 + b*y6 + c*y7 + d*y8 25 | 26 | # regressions 27 | dem60 ~ ind60 28 | dem65 ~ ind60 + dem60 29 | 30 | # residual correlations 31 | y1 ~~ y5 32 | y2 ~~ y4 + y6 33 | y3 ~~ y7 34 | y4 ~~ y8 35 | y6 ~~ y8 36 | ' 37 | 38 | fit <- bsem(model, data=PoliticalDemocracy, 39 | std.lv=T, meanstructure=T, n.chains=3, 40 | burnin=500, sample=1000) 41 | ``` 42 | 43 | ```{r, eval=F} 44 | model <- ' 45 | # latent variable definitions 46 | ind60 =~ x1 + x2 + x3 47 | dem60 =~ a*y1 + b*y2 + c*y3 + d*y4 48 | dem65 =~ a*y5 + b*y6 + c*y7 + d*y8 49 | 50 | # regressions 51 | dem60 ~ ind60 52 | dem65 ~ ind60 + dem60 53 | 54 | # residual correlations 55 | y1 ~~ y5 56 | y2 ~~ y4 + y6 57 | y3 ~~ y7 58 | y4 ~~ y8 59 | y6 ~~ y8 60 | ' 61 | 62 | fit <- bsem(model, data=PoliticalDemocracy, 63 | std.lv=T, meanstructure=T, n.chains=3, 64 | burnin=500, sample=1000) 65 | ``` 66 | 67 | 68 | ### Convergence 69 | 70 | The primary convergence diagnostic is $\hat{R}$, which compares the between- and within-chain samples of model parameters and other univariate quantities of interest [@new_rhat]. If chains have not mixed well (ie, the between- and within-chain estimates don't agree), $\hat{R}$ is larger than 1. We recommend running at least three chains by default and only using the posterior samples if $\hat{R} < 1.05$ for all the parameters. 71 | 72 | ```blavaan``` presents the $\hat{R}$ reported by the underlying MCMC program, either Stan or JAGS (Stan by default). We can obtain the $\hat{R}$ from the ```summary()``` function, and we can also extract it with the ```blavInspect()``` function 73 | 74 | ```{r} 75 | blavInspect(fit, "rhat") 76 | ``` 77 | 78 | With large models it can be cumbersome to look over all of these entries. We can instead find the largest $\hat{R}$ to see if they are all less than $1.05$ 79 | 80 | ```{r} 81 | max(blavInspect(fit, "psrf")) 82 | ``` 83 | 84 | If all $\hat{R} < 1.05$ then we can establish that the MCMC chains have converged to a stable solution. If the model has not converged, you might increase the number of ```burnin``` iterations 85 | 86 | ```{r, eval=F} 87 | fit <- bsem(model, data=PoliticalDemocracy, 88 | std.lv=T, meanstructure=T, n.chains=3, 89 | burnin=1000, sample=1000) 90 | ``` 91 | 92 | and/or change the model priors with the ```dpriors()``` function. These address issues where the model failed to converge due to needing more iterations or due to a model misspecification (such as bad priors). As a rule of thumb, we seldom see a model require more than 1,000 burnin samples in Stan. If your model is not converging after 1,000 burnin samples, it is likely that the default prior distributions clash with your data. This can happen, e.g., if your variables contain values in the 100s or 1000s. 93 | 94 | ### Efficiency 95 | 96 | We should also evaluate the efficiency of the posterior samples. Effective sample size (ESS) is a useful measure for sampling efficiency, and is well defined even if the chains do not have finite mean or variance [@new_rhat]. 97 | 98 | In short, the posterior samples produced by MCMC are autocorrelated. This means that, if you draw 500 posterior samples, you do not have 500 independent pieces of information about the posterior distribution, because the samples are autocorlated. The ESS metric is like a currency conversion, telling you how much your autocorrelated samples are worth if we were to convert them to independent samples. In ```blavaan``` we can print it from the ```summary``` function with the ```neff``` argument 99 | 100 | ```{r, eval=F} 101 | summary(fit, neff=T) 102 | ``` 103 | 104 | We can also extract only those with the ```blavInspect()``` function 105 | 106 | ```{r} 107 | blavInspect(fit, "neff") 108 | ``` 109 | 110 | ESS is a sample size, so it should be at least 100 (optimally, much more than 100) times the number of chains in order to be reliable and to indicate that estimates of the posterior quantiles are reliable. In this example, because we have 3 chains, we would want to see at least ```neff=300``` for every parameter. 111 | 112 | And we can easily find the lowest ESS with the ```min()``` function: 113 | 114 | ```{r} 115 | min(blavInspect(fit, "neff")) 116 | ``` 117 | 118 | 119 | ### References 120 | -------------------------------------------------------------------------------- /vignettes/convergence_loop.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Convergence loop" 3 | author: Mauricio Garnier-Villarreal 4 | bibliography: 5 | - refs.bib 6 | --- 7 | 8 | ```{r pkgld, include=FALSE} 9 | library(blavaan, quietly=TRUE) 10 | library(lavaan, quietly=TRUE) 11 | ``` 12 | 13 | 14 | ### Introduction 15 | 16 | In many cases you would need to run the BSEM models multiple times until it has converged. This can take a while and you might want to have *R* do it for you. This tutorial shows how to use a ```while``` loop to increase the number of burnin samples until the model converges, so you can let it run without having to adjust it every time 17 | 18 | ### Convergence loop 19 | 20 | You will start by writing the model syntax as always. Then instead of running the ```blavaan``` functions as usual, we will run them inside the ```while``` loop as follows. 21 | 22 | Before the loop starts you need to define a starting ```BURN <- 0``` number of iterations, and a convergence value higher than the desired such as ```rhat <- 20```. 23 | 24 | Then the loop will be set stop when the convergence criteria (```rhat```) is lower than a desired value, like $\hat{R} < 1.05$, we specify this with ```while(rhat > 1.05)```, meaning the the loop will continue as long as rhat is higher than 1.05. 25 | 26 | ```{r, eval=T, include=FALSE, cache=TRUE} 27 | HS.model <- ' visual =~ x1 + x2 + x3 28 | textual =~ x4 + x5 + x6 29 | speed =~ x7 + x8 + x9 ' 30 | 31 | BURN <- 0 32 | rhat <- 20 33 | while(rhat > 1.05) { 34 | 35 | BURN <- BURN + 1000 ### increase burn in by 1000 iterations every time 36 | 37 | fit <- bcfa(HS.model, std.lv=T, 38 | data=HolzingerSwineford1939, 39 | n.chains = 3, burnin = BURN, 40 | sample=1000) 41 | rhat <- max(blavInspect(fit, "psrf"), na.rm=T) 42 | print(paste0("Rhat=",rhat)) 43 | } 44 | 45 | ``` 46 | 47 | ```{r, eval=F} 48 | HS.model <- ' visual =~ x1 + x2 + x3 49 | textual =~ x4 + x5 + x6 50 | speed =~ x7 + x8 + x9 ' 51 | 52 | BURN <- 0 53 | rhat <- 20 54 | while(rhat > 1.05) { 55 | 56 | BURN <- BURN + 1000 ### increase burn in by 1000 iterations every time 57 | 58 | fit <- bcfa(HS.model, std.lv=T, 59 | data=HolzingerSwineford1939, 60 | n.chains = 3, burnin = BURN, 61 | sample=1000) 62 | rhat <- max(blavInspect(fit, "psrf"), na.rm=T) 63 | print(paste0("Rhat=",rhat)) 64 | } 65 | 66 | ``` 67 | 68 | 69 | Then inside the loop we will increase the number of ```BURN``` iterations by 1000 in this example. And after estimating the model, we will evaluate the convergence by getting the highest estimated $\hat{R}$, and printing it in the screen so you will see how far is the model from converging. 70 | 71 | ```{r, eval=T} 72 | print(paste0("Rhat=",rhat)) 73 | ``` 74 | 75 | Note that we are only increasing the number burnin iterations, and keeping the number of saved samples the same (1000 in this case). If you want you can increase or decrease the number of saved iterations according to your case. 76 | 77 | And you can visualize the convergence with the trace plots 78 | 79 | ```{r} 80 | plot(fit, pars = 1:9, plot.type = "trace") 81 | ``` 82 | 83 | 84 | ### Convergence criteria 85 | 86 | In this example we use $\hat{R} < 1.05$ as the convergence criteria. We recommend you to use this or $\hat{R} < 1.01$ as the convergence criteria, but not higher. 87 | 88 | As $\hat{R}$ approximates 1, we can argue that the model has converged as the estimates have achieve stability between and within chains [@gelman_bayesian_2014] 89 | 90 | 91 | ### References -------------------------------------------------------------------------------- /vignettes/cross_loadings_strong_priors.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Cross-loadings with strong priors" 3 | author: Mauricio Garnier-Villarreal 4 | bibliography: 5 | - refs.bib 6 | --- 7 | 8 | ```{r pkgld, include=FALSE} 9 | library(blavaan, quietly=TRUE) 10 | library(lavaan, quietly=TRUE) 11 | ``` 12 | 13 | ### Introduction 14 | 15 | An advantage of BSEM is that we can use priors to set up **soft** constraints in the model, by estimating a parameter with a strong prior. This way the parameter is estimated, but the prior will restrict the possible values. 16 | 17 | This was suggested by @muthen_bayesian_2012, as a way to estimate all possible cross-loadings in a CFA. This way, if the posterior distribution of the restricted parameters includes values outside of the strong prior, it can be interpreted as a model modification. This means that the parameters should be less restricted, or that the prior distribution should be relaxed. 18 | 19 | In this tutorial we present how to estimate a CFA where all possible cross-loadings are restricted by strong priors. 20 | 21 | ### Cross-loadings 22 | 23 | We will show an example with the @holswi39 data. First we will estimate the regular model with no cross-loadings and default priors. 24 | 25 | ```{r, eval=T, include=FALSE, cache=TRUE} 26 | HS.model <- ' visual =~ x1 + x2 + x3 27 | textual =~ x4 + x5 + x6 28 | speed =~ x7 + x8 + x9 ' 29 | 30 | fit_df <- bcfa(HS.model, data=HolzingerSwineford1939, 31 | std.lv=TRUE, meanstructure=T) 32 | ``` 33 | 34 | ```{r, eval=F} 35 | HS.model <- ' visual =~ x1 + x2 + x3 36 | textual =~ x4 + x5 + x6 37 | speed =~ x7 + x8 + x9 ' 38 | 39 | fit_df <- bcfa(HS.model, data=HolzingerSwineford1939, 40 | std.lv=TRUE, meanstructure=T) 41 | ``` 42 | 43 | We can see the overall model results with the ```summary()``` function, looking at the posterior distribution for the factor loadings, correlations, intercepts and variances. 44 | 45 | ```{r} 46 | summary(fit_df) 47 | ``` 48 | 49 | Next, we will add all possible cross-loadings with a strong prior of $N(0, \sigma = 0.08)$. The prior centers the loadings around 0 and allows them little space to move. 50 | 51 | ```{r, eval=T, include=FALSE, cache=TRUE} 52 | HS.model.cl<-' visual =~ x1 + x2 + x3 53 | textual =~ x4 + x5 + x6 54 | speed =~ x7 + x8 + x9 55 | 56 | ## Cross-loadings 57 | visual =~ prior("normal(0,.08)")*x4 + prior("normal(0,.08)")*x5 + prior("normal(0,.08)")*x6 + prior("normal(0,.08)")*x7 + prior("normal(0,.08)")*x8 + prior("normal(0,.08)")*x9 58 | textual =~ prior("normal(0,.08)")*x1 + prior("normal(0,.08)")*x2 + prior("normal(0,.08)")*x3 + prior("normal(0,.08)")*x7 + prior("normal(0,.08)")*x8 + prior("normal(0,.08)")*x9 59 | speed =~ prior("normal(0,.08)")*x1 + prior("normal(0,.08)")*x2 + prior("normal(0,.08)")*x3 + prior("normal(0,.08)")*x4 + prior("normal(0,.08)")*x5 + prior("normal(0,.08)")*x6' 60 | 61 | fit_cl <- bcfa(HS.model.cl, data=HolzingerSwineford1939, 62 | std.lv=TRUE, meanstructure=T, seed=867) 63 | ``` 64 | 65 | ```{r, eval=F} 66 | HS.model.cl<-' visual =~ x1 + x2 + x3 67 | textual =~ x4 + x5 + x6 68 | speed =~ x7 + x8 + x9 69 | 70 | ## Cross-loadings 71 | visual =~ prior("normal(0,.08)")*x4 + prior("normal(0,.08)")*x5 + prior("normal(0,.08)")*x6 + prior("normal(0,.08)")*x7 + prior("normal(0,.08)")*x8 + prior("normal(0,.08)")*x9 72 | textual =~ prior("normal(0,.08)")*x1 + prior("normal(0,.08)")*x2 + prior("normal(0,.08)")*x3 + prior("normal(0,.08)")*x7 + prior("normal(0,.08)")*x8 + prior("normal(0,.08)")*x9 73 | speed =~ prior("normal(0,.08)")*x1 + prior("normal(0,.08)")*x2 + prior("normal(0,.08)")*x3 + prior("normal(0,.08)")*x4 + prior("normal(0,.08)")*x5 + prior("normal(0,.08)")*x6' 74 | 75 | fit_cl <- bcfa(HS.model.cl, data=HolzingerSwineford1939, 76 | std.lv=TRUE, meanstructure=T) 77 | ``` 78 | 79 | It is important that, for each factor, the first variable after `=~` is one whose loading we expect to be far from 0. So, in the above model, we specified the regular cfa first (whose loadings we expect to be larger), then the loadings with small-variance priors on a separate line. This is important because, in blavaan, the first loading is either constrained to be positive or fixed to 1 (depending on `std.lv`). If the posterior distribution of that constrained loading is centered near 0, we may experience identification problems. Reverse-coded variables can also be problematic here, because a positive constraint on a reverse-coded loading can lead other loadings to assume negative values. If you use informative priors in this situation, then you should verify that the prior density is on the correct side of 0. 80 | 81 | After estimation, you can look at the ```summary()``` of this model and evaluate the cross-loadings. You can specifically see whether any of the cross-loadings seem large enough to suggest that they should be kept in the model, by looking at the posterior mean (```Estimate```) and credible interval. 82 | 83 | ```{r} 84 | summary(fit_cl) 85 | ``` 86 | 87 | We suggest to not simply look at whether the CI excludes 0 (similar to the null hypothesis), but to evaluate whether the minimum value of the CI (the value closer to 0) is far enough away from 0 to be relavant instead of just **different** from 0. 88 | 89 | ### Caveats 90 | 91 | The model with all possible cross-loadings should not be kept as the final analysis model, but should be used as a step to make decisions about model changes. This for two main reasons, (1) this model is overfitted and would present *good* overall fit just due to the inclusion of a lot of nuisance parameters. In this example the posterior predictive p-value goes from `r paste0('ppp = ', round(fitMeasures(fit_df, 'ppp')[[1]], 3))` to `r paste0('ppp = ', round(fitMeasures(fit_cl, 'ppp')[[1]], 3))`, and is not that the model is better theoretically but that we are inflating the model fit. And (2), the addition of small-variance priors can prevent detection of important misspecifications in Bayesian confirmatory factor analysis, as it can obscure underlying problems in the model by diluting it through a large number of nuisance parameters [@jorgensen_small_variance_2019]. 92 | 93 | ### References 94 | -------------------------------------------------------------------------------- /vignettes/estimate.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Model Estimation" 3 | --- 4 | 5 | ```{r echo=FALSE, message=FALSE} 6 | library(blavaan, quietly=TRUE) 7 | library(lavaan, quietly=TRUE) 8 | ``` 9 | 10 | Models are specified using *lavaan* syntax, and prior distribution specification has already been covered on the [previous page](prior.html). The specified model can then be estimated via the `bsem()` command, with other models being estimated via `bcfa()`, `bgrowth()`, or `blavaan()`. Regardless of the command, there are many arguments that allow you to tailor the model estimation to your needs. We discuss here some of the most popular arguments, as well as some easy-to-miss arguments. 11 | 12 | ### Primary arguments 13 | Primary arguments to the model estimation commands include `burnin`, `sample`, `n.chains`, and `target`. The `burnin` and `sample` arguments are used to specify the desired number of burn-in iterations and posterior samples for each of `n.chains` chains (and the `burnin` argument controls the *warm-up* iterations in Stan). The `target` argument, on the other hand, is used to specify the MCMC strategy used for estimation. The default, `target = "stan"`, tends to be fastest and most efficient. Other options are slightly more flexible, including `target = "stanclassic"` and `target = "jags"`. Both of these approaches sample latent variables as if they are model parameters, whereas `target = "stan"` marginalizes out the latent variables. For more detail of these approaches, see the [JSS paper](https://www.jstatsoft.org/article/view/v100i06). 14 | 15 | 16 | ### Secondary arguments 17 | Noteworthy secondary arguments include `save.lvs`, `mcmcfile`, `mcmcextra`, and `inits`. 18 | 19 | The `save.lvs` argument controls whether or not latent variables are sampled during model estimation. It defaults to `FALSE` because the latent variable sampling can take a large amount of memory, and can slow down some post-estimation summaries. But setting `save.lvs = TRUE` allows for model summaries of latent variables and observed variable predictions using `blavPredict()` and other functions. 20 | 21 | By setting `mcmcfile = TRUE`, users can obtain the Stan (JAGS) code and data for the specified model. These files are written to the `lavExport` folder within a user's working directory. One file has extension .jag or .stan, and the second file is an R data file (extension .rda). The rda file can be loaded in R (via `load()`) and will be a list including elements `data`, `monitors`, and `inits`. These elements can be supplied to `stan()` for model estimation outside of *blavaan*. 22 | 23 | The `mcmcextra` argument is used to supply extra information to Stan or JAGS. Users can supply a list with element names `monitor`, `data`, `syntax`, or `llnsamp`. These elements are respectively used to specify extra parameters to monitor, extra data to pass to the model estimation, extra syntax to include in the model file (JAGS only), and the number of importance samples for likelihood approximation (which is only relevant to models with ordinal variables). 24 | 25 | The `inits` argument is used to control the starting values for MCMC estimation. It can sometimes salvage a model that immediately crashes. The default, `inits = "simple"`, initializes model parameters to 0 and 1 in fashion similar to *lavaan*'s use of this argument. A second option, `inits = "prior"`, draws initial values from the prior distributions. The user can also specify a list of their own initial values via this argument, though the required list format is somewhat cumbersome. We recommend exporting the model and data using `mcmcfile = TRUE`, loading the resulting rda file, and looking at the format of the initial values that *blavaan* created there. 26 | 27 | 28 | ### Parallelization 29 | Speed is always an issue when we sample via MCMC, especially using software like Stan or JAGS. For computers with multiple cores, the estimation can be sped up by sending each MCMC chain to a separate core. This is accomplished with the `bcontrol` argument, which is a list whose elements correspond to `stan()` or `run.jags()` arguments. For parallelizing the chains in Stan, we would want to use the argument `bcontrol = list(cores = 3)`. Many other arguments are available here to control other aspects of estimation; see `?stan` or `?run.jags` for all the possibilities. 30 | 31 | Parallelization can also be helpful to speed up post-estimation computations. The *future* package controls this parallelization, which requires an extra command prior to estimation. The most common commands would be 32 | 33 | ```{r parallel, eval = FALSE} 34 | library("future") 35 | plan("multicore") ## mac or linux 36 | plan("multisession") ## windows 37 | ``` 38 | -------------------------------------------------------------------------------- /vignettes/invariance.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Measurement Invariance" 3 | bibliography: 4 | - refs.bib 5 | --- 6 | 7 | ```{r echo=FALSE, message=FALSE} 8 | library(blavaan, quietly=TRUE) 9 | library(lavaan, quietly=TRUE) 10 | ``` 11 | 12 | The overt intent of this example is to illustrate the use of blavaan for studying measurement invariance. Along the way, we will also see how to obtain Bayesian model selection measures and how to send a lavaan object directly to blavaan for re-estimation. 13 | 14 | 15 | ### Model Estimation 16 | Consider a measurement invariance study of the @holswi39 data. In lavaan, we may first estimate two models: 17 | 18 | ```{r, eval=FALSE} 19 | HS.model <- ' visual =~ x1 + x2 + x3 20 | textual =~ x4 + x5 + x6 21 | speed =~ x7 + x8 + x9 ' 22 | 23 | fit1 <- cfa(HS.model, data = HolzingerSwineford1939, group = "school") 24 | 25 | fit2 <- cfa(HS.model, data = HolzingerSwineford1939, group = "school", 26 | group.equal = "loadings") 27 | ``` 28 | and then examine the absolute fit of `fit1`. We could also compare `fit2` to `fit1` via a Likelihood Ratio Test. Instead of this, we wish to do something similar via Bayesian methods. 29 | 30 | To accomplish this via blavaan, we can fit Bayesian versions of `fit1` and `fit2` using similar syntax. 31 | ```{r bf1, eval=FALSE} 32 | bfit1 <- bcfa(HS.model, data = HolzingerSwineford1939, group = "school") 33 | 34 | bfit2 <- bcfa(HS.model, data = HolzingerSwineford1939, group = "school", 35 | group.equal = "loadings") 36 | ``` 37 | 38 | Model fit and comparison statistics are then available via the `fitMeasures()` and `blavCompare()` functions: 39 | ```{r eval=FALSE} 40 | fitMeasures(bfit1) 41 | 42 | fitMeasures(bfit2) 43 | 44 | blavCompare(bfit1, bfit2) 45 | ``` 46 | 47 | 48 | ### Approximate Invariance 49 | In *approximate* measurement invariance studies, we replace the hard equality constraints with soft constraints by using informative prior distributions. The `wiggle` argument can be used to invoke these types of constraints. For example: 50 | 51 | ```{r, eval=FALSE} 52 | HS.model <- ' visual =~ x1 + c("a", "a")*x2 + c("b", "b")*x3 53 | textual =~ x4 + x5 + x6 54 | speed =~ x7 + x8 + x9 ' 55 | 56 | bfit3 <- bcfa(HS.model, data = HolzingerSwineford1939, group = "school", wiggle = c("a", "b"), 57 | wiggle.sd = 0.05) 58 | ``` 59 | 60 | This constrains the loadings associated with `x2` and `x3` to be approximately equal across groups, where the informative priors associated with these constraints are normal with standard deviations of 0.05. 61 | 62 | Using the above strategy, the syntax can become very cumbersome. In many cases, the `group.equal` argument can help here. For example: 63 | 64 | ```{r, eval=FALSE} 65 | HS.model <- ' visual =~ x1 + x2 + x3 66 | textual =~ x4 + x5 + x6 67 | speed =~ x7 + x8 + x9 ' 68 | 69 | bfit4 <- bcfa(HS.model, data = HolzingerSwineford1939, group = "school", 70 | group.equal = c("intercepts", "loadings"), wiggle = "loadings", 71 | wiggle.sd = 0.05) 72 | ``` 73 | 74 | In the above example, the model intercepts and loadings have across-group constraints. The loadings are approximately equal across groups, due to the argument `wiggle = "loadings"`. And the intercepts are constrained to be exactly equal across groups. In this way, it becomes easy to use exact and approximate equality constraints in the same model, if desired. 75 | 76 | 77 | ### References 78 | 79 | -------------------------------------------------------------------------------- /vignettes/mod_indices.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Modification indices" 3 | author: Mauricio Garnier-Villarreal 4 | bibliography: 5 | - refs.bib 6 | --- 7 | 8 | ```{r pkgld, include=FALSE} 9 | library(blavaan, quietly=TRUE) 10 | library(lavaan, quietly=TRUE) 11 | ``` 12 | 13 | ### Introduction 14 | In SEM, one of the first steps is to evaluate the model's global fit. After global fit, we need to evaluate the local fit of a model, meaning how the model reproduces specific correlations between observed variables. 15 | 16 | There are a couple of common methods for this, (a) testing for high residual correlations, or (b) modification indices. This tutorial focuses on the second. Modification indices test the **likely** change in the model fit if a single parameter is added to the model that was not originally included. This test can be carried out for every possible parameter that was not included [@bentler_fit_1990]. 17 | 18 | ### Modification Indices 19 | 20 | Modification indices present different **indices** to quantify the effect of each parameter, and we will focus on two here. These are (a) the modification index (MI) or Lagrange multiplier, which estimates the extent to which the model’s chi-square ($\chi^2$) test statistic would decrease if a parameter were added to the model and freely estimated, and (b) standardized expected parameter change (SEPC), which is the approximated standardized value of the parameter if it were to be estimated in the model [@whittaker_using_2012; @garniervillarreal_evaluating_2024]. 21 | 22 | MI presents the possible effect on the overall model, and SEPC presents the effect size for the missed parameter. 23 | 24 | We will show an example with the @holswi39 model. You first estimate your SEM/CFA model as usual 25 | 26 | ```{r, eval=T, include=FALSE, cache=TRUE} 27 | HS.model <- ' visual =~ x1 + x2 + x3 28 | textual =~ x4 + x5 + x6 29 | speed =~ x7 + x8 + x9 ' 30 | 31 | fit <- bcfa(HS.model, data=HolzingerSwineford1939, std.lv=TRUE, seed=866) 32 | ``` 33 | 34 | ```{r, eval=F} 35 | HS.model <- ' visual =~ x1 + x2 + x3 36 | textual =~ x4 + x5 + x6 37 | speed =~ x7 + x8 + x9 ' 38 | 39 | fit <- bcfa(HS.model, data=HolzingerSwineford1939, std.lv=TRUE) 40 | ``` 41 | 42 | 43 | Then we would need to write a **discrepancy** function to collect the modification indices. The list below contains two functions that estimate and save the MI and SEPC. 44 | 45 | ```{r, eval=T, include=T} 46 | discFUN <- list( 47 | mod.ind_mi = function(object){ 48 | temp <- modificationindices(object, free.remove = F) 49 | mods <- temp$mi 50 | names(mods) <- paste0(temp$lhs, temp$op, temp$rhs) 51 | return(mods) 52 | }, 53 | mod.ind_sepc.all = function(object){ 54 | temp <- modificationindices(object, free.remove = F) 55 | sepc.all <- temp$sepc.all 56 | names(sepc.all) <- paste0(temp$lhs, temp$op, temp$rhs) 57 | return(sepc.all) 58 | } 59 | ) 60 | ``` 61 | 62 | Then we will pass this function to the ```ppmc()``` function of *blavaan*. With this function, the MI and SEPC are computed for each posterior sample, leading to posterior distributions for each of them. 63 | 64 | ```{r, eval=T, include=FALSE, cache=TRUE} 65 | out <- ppmc(fit, discFUN = discFUN) 66 | ``` 67 | 68 | ```{r, eval=F, include=T} 69 | out <- ppmc(fit, discFUN = discFUN) 70 | ``` 71 | 72 | Then we view the top 5 parameters arrange by the posterior mean (EAP) MI, which in this case shows that the parameter having the highest impact in overall model fit (according to EAP) is **visual=~x9**, the cross-loading from the Visual factor to item **x9**. 73 | 74 | ```{r, eval=T, include=T} 75 | summary(out, prob=.9, discFUN = "mod.ind_mi", sort.by="EAP", decreasing=T)[1:5,] 76 | ``` 77 | 78 | But according to the posterior median, the parameter that would have the highest impact would be the residual correlation between indicators **x7** and **x8** 79 | 80 | ```{r, eval=T, include=T} 81 | summary(out, prob=.9, discFUN = "mod.ind_mi", sort.by="Median", decreasing=T)[1:5,] 82 | ``` 83 | 84 | The MI is still recommended as the best metric to indicate which parameter is best to include next, and we can use the SEPC to evaluate the **likely** effect size for the respective parameters. 85 | 86 | ```{r, eval=T, include=T} 87 | summary(out, prob=.9, discFUN = "mod.ind_sepc.all", sort.by="EAP", decreasing=T)[1:5,] 88 | ``` 89 | 90 | ```{r, eval=T, include=F} 91 | tmptab <- summary(out, prob=.9, discFUN = "mod.ind_sepc.all", sort.by="EAP", decreasing=T)[1:5,] 92 | ``` 93 | 94 | Here we see that for the 2 highest parameters, the likely SEPC is `r paste0('x7~~x8 = ', tmptab['x7~~x8', 'EAP'])` and `r paste0('visual=~x9 = ', tmptab['visual=~x9', 'EAP'])`. With this information we can decide to include one of these new parameters in the model (one at the time). For this example, because factor loadings have a larger impact on the model-implied covariance matrix, I would choose **visual=~x9** 95 | 96 | ```{r, eval=T, include=FALSE, cache=TRUE} 97 | HS.model <- ' visual =~ x1 + x2 + x3 + x9 98 | textual =~ x4 + x5 + x6 99 | speed =~ x7 + x8 + x9 ' 100 | 101 | fit2 <- bcfa(HS.model, data=HolzingerSwineford1939, std.lv=TRUE) 102 | ``` 103 | 104 | 105 | ```{r, eval=F} 106 | HS.model <- ' visual =~ x1 + x2 + x3 + x9 107 | textual =~ x4 + x5 + x6 108 | speed =~ x7 + x8 + x9 ' 109 | 110 | fit2 <- bcfa(HS.model, data=HolzingerSwineford1939, std.lv=TRUE) 111 | ``` 112 | 113 | And you can check if the added parameter has the expected impact on overall fit with the ```blavFitIndices()``` and the ```summary()``` functions. 114 | 115 | It is important to consider also the theoretical relevance of the suggested parameters, and to ensure that they make sense, instead of just adding parameters until having **good** fit. 116 | 117 | 118 | ### Summary 119 | 120 | You can see more details about the application an test of these indices in Bayesian SEM in @garniervillarreal_evaluating_2024. 121 | 122 | In this tutorial we show how to calculate the MI and SEPC across posterior distributions, and evaluate which parameters can be added. 123 | 124 | With the ```ppmc()``` function we are able to calculate relevant information after model estimation, and build posterior distributions of them. 125 | 126 | The general recommendations are to use MI to identify the most likely parameter to add, and SEPC as the effect size of the new parameter [@garniervillarreal_evaluating_2024]. 127 | 128 | ### References 129 | -------------------------------------------------------------------------------- /vignettes/multilevel.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Two-level SEM" 3 | bibliography: 4 | - refs.bib 5 | --- 6 | 7 | ```{r include=FALSE, echo=FALSE, message=FALSE} 8 | library(blavaan, quietly=TRUE) 9 | library(lavaan, quietly=TRUE) 10 | ``` 11 | 12 | Starting with version 0.5-1, *blavaan* supports two-level SEM with random intercepts. 13 | The specification and estimation commands are similar to those of *lavaan*, including use of `level:` in the model specification and use of the `cluster` argument for estimation. Consequently, examples involving *lavaan* also generally apply to *blavaan*, such as the *lavaan* tutorial example below. 14 | 15 | ```{r ex1, eval = FALSE} 16 | data(Demo.twolevel, package = "lavaan") 17 | 18 | model <- ' 19 | level: within 20 | fw =~ y1 + y2 + y3 21 | fw ~ x1 + x2 + x3 22 | level: between 23 | fb =~ y1 + y2 + y3 24 | fb ~ w1 + w2 25 | ' 26 | 27 | bfit <- bsem(model = model, data = Demo.twolevel, cluster = "cluster") 28 | ``` 29 | 30 | Below, we discuss what is currently covered by *blavaan* and some features that are unique to Bayesian modeling. 31 | 32 | ### *blavaan* Coverage 33 | As of version 0.5-1, *blavaan* handles two-level, random intercept models for complete, continuous data. Handling missing data (assuming missingness at random) will come in a future release. In the meantime, multiple imputation might be used in combination with the current *blavaan* functionality (though there is not currently an automatic way to do it). Alternatively, if there is not much missing data and it occurs only for lower-level units, listwise deletion could work. 34 | 35 | The *blavaan* approach to model estimation mimics the *lavaan* approach, which uses matrix results [see @ros21] that enable us to efficiently evaluate the multilevel SEM likelihood. This will often lead to more efficient MCMC estimation, as compared to sampling all the level 1 and level 2 latent variables and working with conditional likelihoods [see @merfit21 for discussion of marginal vs conditional likelihoods]. 36 | 37 | Similar to single-level models, users can sample latent variables using the `save.lvs = TRUE` argument in their `bcfa/bsem/bgrowth/blavaan` commands. Marginal information criteria (marginal over all latent variables) are also automatically computed, with these information criteria generally being preferred over those than condition on latent variables [see @merfur19 for detail in the context of single-level models]. 38 | 39 | ### Bayes-specific Options 40 | All Bayesian models require prior distributions. The previous *blavaan* defaults for single-level models are now used for two-level models. You can continue to use commands like `dpriors(lambda = "normal(1,.5)")` to specify a Normal(1,.5) prior for all factor loadings and, for two-level models, that specification will apply to both the level 1 and level 2 loadings. Depending on the model, it may also be useful to specify priors on individual parameters via the `prior()` argument inside the model specification syntax. The default prior distributions do not always work well for observed variables whose values are far from 0. We continue to encourage users to consider their own prior distributions, possibly using the `prisamp = TRUE` option to draw samples from the prior (which could be further used for prior predictive checking). 41 | 42 | Model checking also differs between Bayesian and frequentist methods. Just like it did for one-level models, *blavaan* reports a posterior predictive p-value for general model assessment. This is computed by comparing the marginal likelihood of the observed data (marginal over all latent variables) to the marginal likelihood of artificial data, for each iteration of MCMC sampling. For finer-grained model assessment, we encourage users to try `ppmc()`. It allows you to compute a posterior predictive p-value using your own, custom model assessment (defined as an R function). 43 | 44 | ### Concluding Thoughts 45 | We think that the new *blavaan* functionality provides a viable option for Bayesian two-level SEM, and it should provide a solid base for future model developments. As always, the underlying Stan files and supporting data are available via the `mcmcfile = TRUE` argument, and all the *blavaan* code is available on Github. Bug reports are appreciated, either at the *blavaan* Google group or as a Github issue. 46 | 47 | ### References 48 | -------------------------------------------------------------------------------- /vignettes/plotting.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Plot Functionality" 3 | bibliography: 4 | - refs.bib 5 | --- 6 | 7 | ```{r pld, include=FALSE} 8 | library(blavaan, quietly=TRUE) 9 | library(lavaan, quietly=TRUE) 10 | library(ggplot2, quietly=TRUE) 11 | library(bayesplot, quietly=TRUE) 12 | ``` 13 | 14 | ```{r modfit, include=FALSE, cache=TRUE} 15 | HS.model <- ' visual =~ x1 + x2 + x3 16 | textual =~ x4 + x5 + x6 17 | speed =~ x7 + x8 + x9 ' 18 | 19 | fit <- bcfa(HS.model, data=HolzingerSwineford1939, seed=959) 20 | ``` 21 | 22 | A `plot()` method exists for *blavaan* objects, with this method making use of the *bayesplot* package [@bayesplot]. We provide details here about how this functionality works. We will use a 3-factor model for demonstration: 23 | 24 | ```{r moddisp, eval=FALSE} 25 | HS.model <- ' visual =~ x1 + x2 + x3 26 | textual =~ x4 + x5 + x6 27 | speed =~ x7 + x8 + x9 ' 28 | 29 | fit <- bcfa(HS.model, data=HolzingerSwineford1939) 30 | ``` 31 | 32 | ### Basics 33 | Because many *blavaan* models will have many parameters, users generally need to specify which parameters they wish to plot. This is accomplished by supplying numbers to the `pars` argument, where the numbers correspond to the order of parameters from the `coef()` command (the numbers also appear in the `free` column of the parameter table). Users must also specify the type of plot that they desire via the `plot.type` argument. So, for example, a trace plot of the first four model parameters looks like 34 | 35 | ```{r p1} 36 | plot(fit, pars = 1:4, plot.type = "trace") 37 | ``` 38 | 39 | Many other plot types are available, coming from the *bayesplot* package. In general, for *bayesplot* functions that begin with `mcmc_`, the corresponding `plot.type` is the remainder of the function name without the leading `mcmc_`. Examples of many of these plots can be found in [this bayesplot vignette](https://cran.r-project.org/web/packages/bayesplot/vignettes/plotting-mcmc-draws.html). 40 | 41 | 42 | ### Customization 43 | Users may wish to customize some aspects of the resulting plots. For this, the `plot()` function will output a *ggplot* object. This makes it possible to modify the plot as if it were any other *ggplot* object, which allows for many possibilities. One starting point for exploring *ggplot2* is [here](https://ggplot2.tidyverse.org/). 44 | 45 | ```{r p2} 46 | p <- plot(fit, pars = 1:4, plot.type = "trace", showplot = FALSE) 47 | 48 | p + facet_text(size=15) + legend_none() 49 | ``` 50 | 51 | Alternatively, users may wish to create a plot that is entirely different from what is available via `plot()`. This can be facilitated by extracting the posterior samples or the Stan model, via `blavInspect()`: 52 | 53 | ```{r eval=FALSE} 54 | ## list of draws 55 | ## (one list entry per chain): 56 | draws <- blavInspect(fit, "mcmc") 57 | 58 | ## convert the list to a matrix 59 | ## (each row is a sample, 60 | ## each column is a parameter) 61 | draws <- do.call("rbind", draws) 62 | 63 | ## Stan (or JAGS) model 64 | modobj <- blavInspect(fit, "mcobj") 65 | ``` 66 | 67 | 68 | ### References 69 | -------------------------------------------------------------------------------- /vignettes/prior.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Specifying Prior Distributions" 3 | bibliography: 4 | - refs.bib 5 | --- 6 | 7 | ```{r echo=FALSE, message=FALSE} 8 | library(blavaan, quietly=TRUE) 9 | library(lavaan, quietly=TRUE) 10 | ``` 11 | 12 | There are two ways to specify prior distributions in blavaan. First, each type of model parameter has a default prior distribution that may or may not be suitable for your specific situation. You are free to modify the defaults. Second, the priors for individual model parameters can be specified in the model syntax. Each is discussed below. 13 | 14 | 15 | ### Defaults 16 | The default priors can be seen via 17 | 18 | ```{r} 19 | dpriors() 20 | ``` 21 | 22 | It is important to note that these prior distributions correspond to Stan parameterizations. These are similar to R parameterizations but not necessarily exactly the same. The Greek(ish) names above correspond to the following parameter types (where MV is manifest/observed variable and LV is latent variable): 23 | 24 | ```{r, echo=FALSE} 25 | pnames <- dpriors() 26 | pdesc <- c("MV intercept", "LV intercept", "Loading", "Regression", "MV precision", "LV precision", "Correlation", "Covariance matrix", "Threshold") 27 | pnames[1:9] <- pdesc 28 | pnames 29 | ``` 30 | For further information about priors on thresholds, see the [ordinal modeling details](ordinal.html). 31 | 32 | For `target = "stan"` (the default), priors are currently restricted to one distribution per parameter type. You can change the prior distribution parameters (for example, the mean and standard deviation of a normal), but you cannot change the prior distribution type. The only exceptions here are the "theta" and "psi" parameters: for those, you can use the modifiers "[sd]", "[var]", or "[prec]" to specify whether you want the priors to apply to the standard deviation, variance, or precision. If you require more flexibility in prior specification, you change the target to either `"stanclassic"` (the old Stan approach) or `"jags"` (the JAGS approach). Alternatively, you can export the Stan model via `mcmcfile = TRUE`, edit the file as needed, then fit it via the rstan package. 33 | 34 | 35 | To modify prior distributions, we could simply supply a new text string to `dpriors()` like this: 36 | 37 | ```{r} 38 | mydp <- dpriors(lambda="normal(1,2)") 39 | mydp 40 | ``` 41 | 42 | so that the default prior for loadings is now normal with mean 1 and standard deviation 2, and the rest of the parameters remain at the original defaults. The next time we estimate a model (via `bsem()`, `bcfa()`, `bgrowth()`, or `blavaan()`), we would add the argument `dp=mydp` to use this new set of default priors. 43 | 44 | 45 | ### Individual Parameters 46 | 47 | In addition to setting the prior for one type of model parameter, the user may wish to set the prior of a specific model parameter. This is accomplished by using the `prior()` modifier within the model specification. For example, consider the following syntax for the @holswi39 confirmatory factor model: 48 | 49 | ```{r eval=FALSE} 50 | HS.model <- ' visual =~ x1 + prior("normal(1,2)")*x2 + x3 51 | textual =~ x4 + x5 + prior("normal(3,1.5)")*x6 52 | speed =~ x7 + x8 + x9 53 | x1 ~~ prior("gamma(3,3)[sd]")*x1 ' 54 | ``` 55 | 56 | The loading from `visual` to `x2` now has a normal prior with mean 1 and standard deviation 2, while the loading from `textual` to `x6` has a normal prior with mean 3 and standard deviation 1.5. All other loadings have the default prior distribution. 57 | 58 | In the above syntax, we have additionally specified a gamma(3,3) prior associated with the residual of `x1`. The `[sd]` text at the end of the distribution says that this prior goes on the residual standard deviation, as opposed to the residual precision or residual variance. There exist two more options here: a `[var]` option for the residual variance, and no brackets for the precision (or you could also use `[prec]`). This bracketed text can be used for any model variance/SD/precision parameter and could also be used in default prior specification if desired. 59 | 60 | 61 | ### Covariance Parameters 62 | 63 | One additional note on covariance parameters defined in the model syntax: the `prior()` syntax specifies a prior on the correlation associated with the covariance parameter, as opposed to the covariance itself. The specified distribution should have support on (0,1), and blavaan automatically translates the prior to an equivalent distribution with support on (-1,1). It is safest to stick with beta priors here. For example, the syntax 64 | 65 | ```{r eval=FALSE} 66 | HS.model <- ' visual =~ x1 + x2 + x3 67 | textual =~ x4 + x5 + x6 68 | speed =~ x7 + x8 + x9 69 | visual ~~ prior("beta(1,1)")*textual ' 70 | ``` 71 | 72 | places a Beta(1,1) (uniform) prior on the correlation between the `visual` and `textual` factors. If desired, we could also specify priors on the standard deviations (or variances or precisions) of the `visual` and `textual` factors. Together with the prior on the correlation, these priors would imply a prior on the covariance between `visual` and `textual`. 73 | 74 | 75 | ### References 76 | 77 | -------------------------------------------------------------------------------- /vignettes/prior_pred_checks.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Prior Predictive Checks" 3 | author: Mauricio Garnier-Villarreal 4 | bibliography: 5 | - refs.bib 6 | --- 7 | 8 | ```{r pkgld, include=FALSE} 9 | library(blavaan, quietly=TRUE) 10 | library(lavaan, quietly=TRUE) 11 | library(bayesplot, quietly=TRUE) 12 | ``` 13 | 14 | 15 | ### Introduction 16 | 17 | In Bayesian models we need to specify priors for the model parameters. Priors are the distribution that we *think* the parameters would follow, even before having data. These can represent **high** or **low** uncertainty, so that a *diffuse* prior indicates that we don not know a lot about how the parameter would behave, while an *informative* prior means that we are quite certain about the expected distribution. 18 | 19 | ### Prior Predictive Checks 20 | 21 | Prior predictive checks (PPC) generate data according to the prior in order to asses whether a prior is appropriate [@Gabry_2019_vis]. A posterior predictive check generates replicated data according to the posterior predictive distribution. In contrast, the prior predictive check generates data according to the prior predictive distribution $y^{sim} ∼ p(y)$. 22 | 23 | The prior predictive distribution is just like the posterior predictive distribution with no observed data, so that a prior predictive check is nothing more than the limiting case of a posterior predictive check with no data. 24 | 25 | This is easy to carry out mechanically by simulating parameters $θ^{sim}∼p(\theta)$ according to the priors, then simulating data $y^{sim}∼p(y∣ \theta^{sim})$ according to the sampling distribution given the simulated parameters. The result is a simulation from the joint distribution, $(y^{sim},θ^{sim})∼p(y,\theta)$ and thus $y^{sim}∼p(y)$ is a simulation from the prior predictive distribution. 26 | 27 | In *blavaan* we can get PPC when we use the argument ```prisamp=TRUE``` , this will tell blavaan to ignore the data and build distributions only from the priors. Here we will start by adjusting the priors, instead of using the default priors. 28 | 29 | #### Weakly informative priors 30 | 31 | We will show an example with the @holswi39 data, first a case with weakly informative priors. Here we are stpecifying that the observeded variable intercepts will have a prior of $N(3, 2)$, the factor loadings will have a prior of $N(0.4, 2)$, and the residual standard deviation with a prior of $\Gamma(1,1)$. 32 | 33 | ```{r, eval=T, include=FALSE, cache=TRUE} 34 | 35 | priors <- dpriors(nu="normal(3,2)", 36 | lambda="normal(0.4, 2)", 37 | theta="gamma(1,1)[sd]") 38 | 39 | HS.model <- ' visual =~ x1 + x2 + x3 40 | textual =~ x4 + x5 + x6 41 | speed =~ x7 + x8 + x9 ' 42 | 43 | fit_wi <- bcfa(HS.model, data=HolzingerSwineford1939, 44 | std.lv=TRUE, meanstructure=T, test = "none", 45 | dp=priors, prisamp = T) 46 | ``` 47 | 48 | ```{r, eval=FALSE} 49 | priors <- dpriors(nu="normal(3,2)", 50 | lambda="normal(0.4, 2)", 51 | theta="gamma(1,1)[sd]") 52 | ``` 53 | 54 | Then we estimate the BSEM model with the respective priors on the ```dp``` argument, and with ```prisamp=TRUE```, so getting PPC instead of posterior distributions. 55 | 56 | ```{r, eval=F} 57 | HS.model <- ' visual =~ x1 + x2 + x3 58 | textual =~ x4 + x5 + x6 59 | speed =~ x7 + x8 + x9 ' 60 | 61 | fit_wi <- bcfa(HS.model, data=HolzingerSwineford1939, 62 | std.lv=TRUE, meanstructure=T, test = "none", 63 | dp=priors, prisamp = T) 64 | ``` 65 | 66 | You might get some warning messages from either divergent and/or failed convergence. For this we would ignore these messages as it is likely to have issues for the evaluations of prior predictions. 67 | 68 | We now have a **blavaan** object with prior predictive distributions, so we can use any of the package functions to describe them, and see if the parameters are within expected ranges. For example we can get the PPC density distributions for the first 9 parameters (factor loadings in this case). The basic ```plot()``` method calls the functions from the ```bayesplot``` package [@bayesplot] and with the ```plot.type = "dens"``` argument we can plot the density distributions 69 | 70 | ```{r, eval=T} 71 | plot(fit_wi, pars=1:9, plot.type = "dens") 72 | ``` 73 | 74 | You can also pick which parameters to plot, like the factor correlations by chossing parameters ```19:21``` in this case 75 | 76 | ```{r, eval=T} 77 | plot(fit_wi, pars=19:21, plot.type = "dens") 78 | ``` 79 | 80 | From the factor loading distributions we see that the first loading on each factor is bounded by 0, this is due to a modeling identification constraint in blavaan, and with a maximum value around 6. And for all other distributions they range between -6 to 6 or -4 to 4, and for all priors the most likely value is around 0. This would be described as weakly informative as it allows negative and positive values but without allowing crazy high/low values. 81 | 82 | What would be a *realistic* range would depend on the parameter, model specification, and data. So, consider these priors in function of those characteristics. 83 | 84 | For the factor correlations we kept the default diffuse priors, so these allowed very high and low correlation, but the prior distributions are not flat across all possible correlation values. 85 | 86 | 87 | #### Default priors 88 | 89 | In this next example, we will estimate the PPC with the package default priors, that would consider diffuse priors. You can see the blavaan default priors with the function ```dpriors()``` 90 | 91 | ```{r, eval=T} 92 | dpriors() 93 | ``` 94 | 95 | Then we estimate the BSEM model and ignore the ```dp``` argument letting run with the default priors, and with ```prisamp=TRUE```, so getting PPC instead of posterior distributions. 96 | 97 | 98 | ```{r, eval=T, include=FALSE, cache=TRUE} 99 | fit_df <- bcfa(HS.model, data=HolzingerSwineford1939, 100 | std.lv=TRUE, meanstructure=T, test = "none", 101 | prisamp = T) 102 | ``` 103 | 104 | ```{r, eval=F} 105 | fit_df <- bcfa(HS.model, data=HolzingerSwineford1939, 106 | std.lv=TRUE, meanstructure=T, test = "none", 107 | prisamp = T) 108 | ``` 109 | 110 | Then we can plot the density distributions and compare them. We see that with the default diffuse priors, the model allows up to very high values such as -30 to 30 111 | 112 | ```{r, eval=T} 113 | plot(fit_df, pars=1:9, plot.type = "dens") 114 | ``` 115 | 116 | This way we can see that the more diffuse priors allows a higher range of values. It is up to the researcher to decide which range of priors better present their expectations. 117 | 118 | It is important to note that these PPC allows to see the expected distributions based on the priors, but these might not be the same as the priors used in the estimation process, as the priors interact with the model specification and constraints (such as the o bound constraint for the first factor loading) [@merkle2023opaque] 119 | 120 | ### References -------------------------------------------------------------------------------- /vignettes/resources.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Resources" 3 | --- 4 | 5 | Please use the blavaan discussion group for all questions/issues related to blavaan. This can be found at 6 | 7 | https://groups.google.com/d/forum/blavaan

8 | 9 | Other resources are listed below: 10 | 11 | - Materials from the 2024 NCME workshop titled *Bayesian latent variable modeling in education research*. pdf R 12 | 13 | - The lavaan resources page is helpful for model specification examples; it is mostly the same specification that is used in blavaan, and the main blavaan commands just add the letter "b" to the start of the main lavaan commands. 14 | 15 | - "Getting started with blavaan" materials by Rens van de Schoot and colleagues: html 16 | 17 | - A Stan case study on *blavaan*, by Feng Ji and colleagues: html 18 | 19 | - Many recorded presentations are available on the internet: 20 | - The longest talk with the most details: Presentation to the Oslo UseR Group, including real-time coding examples near the end. (Code from those examples is here.) 21 | 22 | - Details about how the package works: A virtual presentation to the 2022 International Workshop on Psychometric Computing. 23 | 24 | - An episode of the Learning Bayesian Statistics podcast, discussing blavaan and related topics. 25 | 26 | - Basic overview of the package: useR! 2020 virtual presentation (done early in the pandemic, and demonstrating the author's poor video editing skills.) 27 | 28 | 29 | - Slides from an hour-long seminar presentation to the Michigan State Methods Group, October, 2021. 30 | 31 | - Slides (blavaan and others) from a symposium on the lavaan ecosystem at the 2019 International Meeting of the Psychometric Society, Santiago, Chile. 32 | -------------------------------------------------------------------------------- /vignettes/start.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Getting Started with blavaan" 3 | bibliography: 4 | - refs.bib 5 | --- 6 | 7 | ```{r echo=FALSE, message=FALSE} 8 | library(blavaan, quietly=TRUE) 9 | ``` 10 | 11 | This page contains some tips for getting started with the *blavaan* package. 12 | 13 | ### Installation 14 | *blavaan* can be installed from CRAN in the usual way: 15 | 16 | ```{r, eval=FALSE} 17 | install.packages("blavaan") 18 | ``` 19 | 20 | In some situations, you may wish to install *blavaan* from GitHub. The GitHub version sometimes contains bug fixes that are not yet on CRAN, though it can also be less stable. To install from GitHub, use the following command. 21 | 22 | ```{r, eval=FALSE} 23 | remotes::install_github("ecmerkle/blavaan", INSTALL_opts = "--no-multiarch") 24 | ``` 25 | 26 | This command requires that your system can compile Stan models, which is not guaranteed if you usually install *blavaan* from CRAN. If you are having trouble, it may help to look at the [RStan Getting Started page.](https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started) 27 | 28 | 29 | ### Commands and Syntax 30 | The *blavaan* package depends on the *lavaan* package for model specification and for some computations. This means that, if you already know *lavaan*, then you should already be able to do many things in *blavaan*. In particular, many *blavaan* commands add the letter "b" to the start of the *lavaan* command. For example, `sem()` becomes `bsem()`, and `lavInspect()` becomes `blavInspect()`. It is also sometimes possible to use a *lavaan* command on a *blavaan* object, though the results may not always be what you expect. 31 | 32 | With these details in mind, look at the [lavaan tutorial](https://lavaan.ugent.be/tutorial/index.html) for many examples of models. You can translate many of those examples to *blavaan* by adding a "b" to the start of the commands. And look at the other pages here, to learn about the additional *blavaan* arguments that are specific to Bayesian methods. 33 | 34 | -------------------------------------------------------------------------------- /vignettes/summaries.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Model Summaries" 3 | bibliography: 4 | - refs.bib 5 | --- 6 | 7 | ```{r echo=FALSE, message=FALSE} 8 | library(blavaan, quietly=TRUE) 9 | library(lavaan, quietly=TRUE) 10 | ``` 11 | 12 | Say that we specify a model of the Bollen political democracy data and draw posterior samples using the following *blavaan* code (where `save.lvs` saves the latent variable samples for further use): 13 | 14 | ```{r eval=FALSE} 15 | model <- ' 16 | # latent variable definitions 17 | ind60 =~ x1 + x2 + x3 18 | dem60 =~ y1 + a*y2 + b*y3 + c*y4 19 | dem65 =~ y5 + a*y6 + b*y7 + c*y8 20 | 21 | # regressions 22 | dem60 ~ ind60 23 | dem65 ~ ind60 + dem60 24 | 25 | # residual correlations 26 | y1 ~~ y5 27 | y2 ~~ y4 + y6 28 | y3 ~~ y7 29 | y4 ~~ y8 30 | y6 ~~ y8 31 | ' 32 | 33 | fit <- bsem(model, data=PoliticalDemocracy, save.lvs = TRUE) 34 | ``` 35 | 36 | We describe here how to summarize the fitted model. The most obvious functions are `summary()`, `coef()`, and `vcov()`, which all work in a manner similar to the analogous *lavaan* functions. But instead of maximum likelihood estimates and standard errors, *blavaan* reports posterior means and posterior standard deviations. Other summaries that are unique to Bayesian models include model convergence metrics, model fit/comparison metrics, and samples of latent variables. These are discussed below. 37 | 38 | 39 | ### Convergence 40 | Following model estimation, we immediately wish to look at the "goodness" of the posterior samples, including convergence to a stationary distribution and autocorrelation. Popular convergence metrics are available via the `blavInspect()` function: 41 | 42 | ```{r eval=FALSE} 43 | blavInspect(fit, 'rhat') 44 | blavInspect(fit, 'neff') 45 | ``` 46 | 47 | where R-hat values near 1.00 indicate convergence, and large effective sample sizes (hundreds or above) are preferred. For details on these metrics, see, e.g., the Posterior Analysis section of the [Stan Reference Manual](https://mc-stan.org/docs/2_28/reference-manual/index.html). 48 | 49 | 50 | If the model has definitely not converged (as judged by Rhat), blavaan will issue multiple warnings. Lack of convergence is sometimes caused by bad initial values or by a chain that strays to an extreme region of the posterior space. In these cases, it can be helpful to re-estimate the model a second time. It is also helpful to specify mildly-informative priors on loading parameters, so that the chains do not wander to extreme loading values. For example, if you expect all your variables to be positively correlated and some loadings are being fixed to 1 for identification, then Normal(1,.5) would often be a mildly-informative prior. Otherwise, lack of convergence may imply prior distributions that severely conflict with the data, or an ill-defined model. It is sometimes helpful to try to fit the same model in *lavaan*, to observe whether errors occur there. 51 | 52 | 53 | ### Model Fit & Comparison 54 | Next, we may wish to examine some model fit metrics. While many metrics are available from the `summary()` output, more are available from the `fitMeasures()` function: 55 | 56 | ```{r eval=FALSE} 57 | summary(fit) 58 | fitMeasures(fit) 59 | ``` 60 | 61 | For judging absolute fit, *blavaan* supplies a posterior predictive p-value that is based on the likelihood ratio statistic. Good-fitting models have values near 0.5 on this metric. For examining models' relative fits, *blavaan* supplies the DIC, WAIC, and LOOIC. The latter two metrics are computed with the help of the *loo* package [@loo]. Comparison of multiple models on these criteria is facilitated via `blavCompare()`, which provides standard errors of the difference between two criteria. 62 | 63 | Other notable functions include `blavFitIndices()` for alternative measures of absolute fit and `ppmc()` for general posterior predictive checks. 64 | 65 | 66 | ### Latent Variables & Standardization 67 | An often-discussed advantage of Bayesian models is their abilities to describe uncertainty in "random" parameters, including random effects and latent variables. To access this functionality in *blavaan*, users must set `save.lvs = TRUE` during model estimation, as is done at the top of this page. After model estimation, uses can access this information via `blavInspect()` or `blavPredict()`. Relevant arguments to `blavInspect()` include `lvmeans` and `lvs`. The former returns posterior means of latent variables, which are similar to the predictions supplied by frequentist models. The latter returns posterior samples of latent variables, so that users could summarize their uncertainties or other functions of latent variables. These posterior samples are returned as a list of length `n.chains`, where each list entry has a row per posterior sample (and number of columns is total number of latent variables in the model): 68 | 69 | ```{r eval=FALSE} 70 | postmns <- blavInspect(fit, what = "lvmeans") 71 | postsamps <- blavInspect(fit, what = "lvs") 72 | ``` 73 | 74 | Some related, but different, information can be obtained by `blavPredict()`. This function will also return posterior samples of latent variables, but in a matrix instead of a list: 75 | 76 | ```{r eval=FALSE} 77 | postsamps <- blavPredict(fit, type = "lv") 78 | ``` 79 | 80 | The `blavPredict()` function will also return predictions of observed variables conditioned on the sampled latent variables. The `type = "yhat"` argument returns expected values of observed variables conditioned on latent variable samples; the `type = "ypred"` argument returns posterior predictions of observed variables including residual noise (essentially `yhat` + error); and the `type = "ymis"` argument returns posterior predictions of missing variables conditioned on observed. These expected values and predictions are returned in list format; for a matrix, see the last line of code below. 81 | 82 | ```{r eval=FALSE} 83 | evpreds <- blavPredict(fit, type = "yhat") 84 | postpreds <- blavPredict(fit, type = "ypred") 85 | mispreds <- blavPredict(fit, type = "ymis") 86 | 87 | ## convert to matrix from list: 88 | evpreds <- do.call("rbind", evpreds) 89 | ``` 90 | 91 | Finally, not fully related to latent variables: the `standardizedPosterior()` function will return standardized posterior draws. It calls the *lavaan* function `standardizedSolution()` in the background and has some of that function's flexibility. 92 | 93 | 94 | ### References 95 | --------------------------------------------------------------------------------