├── .Rbuildignore
├── .github
    ├── model_addendum.pdf
    └── workflows
    │   ├── R-CMD-check.yaml
    │   ├── revdep-check.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
    ├── MAVB_functions.R
    ├── RcppExports.R
    ├── aux_functions.R
    ├── control_functions.R
    ├── format_functions.R
    ├── helper_functions.R
    ├── mgcv_functions.R
    ├── predict_functions.R
    ├── print_functions.R
    ├── spline_functions.R
    ├── squarem_functions.R
    ├── superlearner_functions.R
    └── vglmer_regression.R
├── README.md
├── codecov.yml
├── cran-comments.md
├── man
    ├── LinRegChol.Rd
    ├── MAVB.Rd
    ├── Predict.matrix.randwalk.smooth.Rd
    ├── custom_glmer_samples.Rd
    ├── fallback_interpret.gam0.Rd
    ├── fallback_subbars.Rd
    ├── formOmega.Rd
    ├── posterior_samples.vglmer.Rd
    ├── reexports.Rd
    ├── simple_EM.Rd
    ├── sl_vglmer.Rd
    ├── smooth.construct.randwalk.smooth.spec.Rd
    ├── v_s.Rd
    ├── var_mat.Rd
    ├── vglmer-class.Rd
    ├── vglmer.Rd
    ├── vglmer_control.Rd
    └── vglmer_predict.Rd
├── src
    ├── RcppExports.cpp
    ├── eigen_alpha.cpp
    ├── eigen_helpers.cpp
    ├── eigen_linalg.cpp
    └── eigen_px.cpp
└── tests
    ├── testthat.R
    └── testthat
        ├── test-MAVB.R
        ├── test-alpha_px.R
        ├── test-errors.R
        ├── test-format_vglmer.R
        ├── test-gKRLS.R
        ├── test-generic.R
        ├── test-match_glmer.R
        ├── test-predict.R
        ├── test-splines.R
        ├── test-superlearner.R
        ├── test-translation_px.R
        └── test-update_method.R


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^misc_files/*
 2 | ^pres_figures/*
 3 | ^\.Rproj\.user$
 4 | ^old_versions/*
 5 | ^figures/*
 6 | ^old_figures/*
 7 | ^\.travis\.yml$
 8 | ^codecov\.yml$
 9 | ^paper_draft/*
10 | ^\.github/*
11 | ^.*\.Rproj$
12 | cran-comments.md


--------------------------------------------------------------------------------
/.github/model_addendum.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mgoplerud/vglmer/cfc962c92350b0d343343d039e77ecb4e879e7e7/.github/model_addendum.pdf


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
  1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples
  2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
  3 | on:
  4 |   push:
  5 |     branches:
  6 |       - '**'
  7 |   pull_request:
  8 |     branches: [main, master]
  9 | 
 10 | name: R-CMD-check
 11 | 
 12 | jobs:
 13 |   R-CMD-check:
 14 |     if: "contains(github.event.head_commit.message, '[run ci]')"
 15 |     runs-on: ${{ matrix.config.os }}
 16 | 
 17 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
 18 | 
 19 |     strategy:
 20 |       fail-fast: false
 21 |       matrix:
 22 |         config:
 23 |           - {os: macOS-latest,   r: 'release'}
 24 |           - {os: windows-latest, r: 'release'}
 25 |           - {os: ubuntu-latest,   r: 'devel',   rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
 26 |           - {os: ubuntu-latest,   r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
 27 |           - {os: ubuntu-latest,   r: 'oldrel',  rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
 28 |           - {os: ubuntu-latest,   r: '3.6.3',   rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
 29 |     env:
 30 |       R_KEEP_PKG_SOURCE: yes
 31 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
 32 |       RSPM: ${{ matrix.config.rspm }}
 33 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
 34 |       OLD_PKGS: ${{ matrix.config.r == '3.6.3'}}
 35 | 
 36 |     steps:
 37 |       - uses: actions/checkout@v3
 38 | 
 39 |       - uses: r-lib/actions/setup-r@v2
 40 |         with:
 41 |           r-version: ${{ matrix.config.r }}
 42 |           http-user-agent: ${{ matrix.config.http-user-agent }}
 43 | 
 44 |       - uses: r-lib/actions/setup-pandoc@v2
 45 | 
 46 |       - name: Query dependencies
 47 |         run: |
 48 |           install.packages('remotes')
 49 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
 50 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
 51 |         shell: Rscript {0}
 52 | 
 53 |       - name: Cache R packages
 54 |         if: runner.os != 'Windows'
 55 |         uses: actions/cache@v3
 56 |         with:
 57 |           path: ${{ env.R_LIBS_USER }}
 58 |           key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
 59 |           restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
 60 | 
 61 |       - name: Install system dependencies
 62 |         if: runner.os == 'Linux'
 63 |         run: |
 64 |           while read -r cmd
 65 |           do
 66 |             eval sudo $cmd
 67 |           done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
 68 |       
 69 |       - name: Install curl (for rcmdcheck)
 70 |         if: runner.os == 'Linux'
 71 |         run: |
 72 |           sudo apt-get install libcurl4-openssl-dev
 73 | 
 74 |       - name: Install old version of "evaluate" for "mlr3" and "waldo" for "testthat"
 75 |         if: ${{ env.OLD_PKGS }}
 76 |         run: |
 77 |           Sys.getenv("OLD_PKGS")
 78 |           if (Sys.getenv("OLD_PKGS")) {
 79 |             remotes::install_version(package = "waldo", version = "0.5.3")
 80 |             remotes::install_version(package = "evaluate", version = "0.23")
 81 |             remotes::install_version(package = "gam", version = "1.20")
 82 |           }
 83 |         shell: Rscript {0}
 84 | 
 85 |       - name: Install dependencies
 86 |         run: |
 87 |           remotes::install_deps(dependencies = TRUE)
 88 |           remotes::install_cran("rcmdcheck")
 89 |         shell: Rscript {0}
 90 | 
 91 |       - name: Session info
 92 |         run: |
 93 |           options(width = 100)
 94 |           pkgs <- installed.packages()[, "Package"]
 95 |           sessioninfo::session_info(pkgs, include_base = TRUE)
 96 |         shell: Rscript {0}
 97 | 
 98 |       - name: Check
 99 |         env:
100 |           _R_CHECK_CRAN_INCOMING_: false
101 |         run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
102 |         shell: Rscript {0}
103 | 
104 |       - name: Show testthat output
105 |         if: always()
106 |         run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true
107 |         shell: bash
108 | 
109 |       - name: Upload check results
110 |         if: failure()
111 |         uses: actions/upload-artifact@v3
112 |         with:
113 |           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
114 |           path: check
115 | 


--------------------------------------------------------------------------------
/.github/workflows/revdep-check.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   workflow_dispatch:
 3 |     inputs:
 4 |       which:
 5 |         type: choice
 6 |         description: Which dependents to check
 7 |         options:
 8 |           - strong
 9 |           - most
10 | 
11 | name: Reverse dependency check
12 | 
13 | jobs:
14 |   revdep_check:
15 |     name: Reverse check ${{ inputs.which }} dependents
16 |     uses: r-devel/recheck/.github/workflows/recheck.yml@v1
17 |     with:
18 |       which: ${{ inputs.which }}
19 |       subdirectory: '' #if your package is in a git subdir


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master, SQUAREM]
 6 |   pull_request:
 7 |     branches: [main, master, SQUAREM]
 8 | 
 9 | name: test-coverage
10 | 
11 | jobs:
12 |   test-coverage:
13 |     if: "contains(github.event.head_commit.message, '[run ci]')||contains(github.event.head_commit.message, '[run covr]')"
14 |     runs-on: ubuntu-latest
15 |     env:
16 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
17 | 
18 |     steps:
19 |       - uses: actions/checkout@v3
20 | 
21 |       - uses: r-lib/actions/setup-r@v2
22 |         with:
23 |           use-public-rspm: true
24 | 
25 |       - uses: r-lib/actions/setup-r-dependencies@v2
26 |         with:
27 |           extra-packages: covr
28 | 
29 |       - name: Test coverage
30 |         run: covr::codecov()
31 |         shell: Rscript {0}
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | src/*.o
6 | src/*.so
7 | src/*.dll
8 | vglmer.Rproj
9 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: vglmer
 2 | Type: Package
 3 | Title: Variational Inference for Hierarchical Generalized Linear Models
 4 | Version: 1.0.6
 5 | Authors@R: person("Max", "Goplerud", email = "mgoplerud@austin.utexas.edu", 
 6 |     role = c("aut", "cre"))
 7 | Encoding: UTF-8
 8 | License: GPL (>= 2)
 9 | Description: Estimates hierarchical models using variational inference. 
10 |     At present, it can estimate logistic, linear, and negative binomial models. 
11 |     It can accommodate models with an arbitrary number of random effects and 
12 |     requires no integration to estimate. It also provides the ability to improve 
13 |     the quality of the approximation using marginal augmentation. 
14 |     Goplerud (2022) <doi:10.1214/21-BA1266> and Goplerud (2024) <doi:10.1017/S0003055423000035> 
15 |     provide details on the variational algorithms.
16 | Imports: 
17 | 	Rcpp (>= 1.0.1), 
18 | 	lme4, 
19 | 	CholWishart,
20 | 	mvtnorm, 
21 | 	Matrix, 
22 | 	stats, 
23 | 	graphics, 
24 | 	methods, 
25 | 	lmtest, 
26 | 	splines, 
27 | 	mgcv
28 | Depends:
29 |  R (>= 3.0.2)
30 | Suggests: 
31 | 	SuperLearner,
32 | 	MASS,
33 | 	tictoc,
34 |   testthat,
35 |   gKRLS
36 | LinkingTo: Rcpp, RcppEigen (>= 0.3.3.4.0)
37 | URL: https://github.com/mgoplerud/vglmer
38 | BugReports: https://github.com/mgoplerud/vglmer/issues
39 | RoxygenNote: 7.3.2
40 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(Predict.matrix,randwalk.smooth)
 4 | S3method(coef,vglmer)
 5 | S3method(fitted,vglmer)
 6 | S3method(fixef,vglmer)
 7 | S3method(formula,vglmer)
 8 | S3method(predict,SL.glmer)
 9 | S3method(predict,SL.vglmer)
10 | S3method(predict,vglmer)
11 | S3method(print,vglmer)
12 | S3method(ranef,vglmer)
13 | S3method(sigma,vglmer)
14 | S3method(smooth.construct,randwalk.smooth.spec)
15 | S3method(summary,vglmer)
16 | S3method(vcov,vglmer)
17 | export(ELBO)
18 | export(MAVB)
19 | export(SL.glmer)
20 | export(SL.vglmer)
21 | export(add_formula_SL)
22 | export(fixef)
23 | export(format_glmer)
24 | export(format_vglmer)
25 | export(posterior_samples.vglmer)
26 | export(predict_MAVB)
27 | export(ranef)
28 | export(v_s)
29 | export(vglmer)
30 | export(vglmer_control)
31 | import(CholWishart)
32 | import(Matrix)
33 | import(lme4)
34 | import(mgcv)
35 | importFrom(Matrix,sparseMatrix)
36 | importFrom(Rcpp,sourceCpp)
37 | importFrom(graphics,plot)
38 | importFrom(lme4,findbars)
39 | importFrom(lme4,fixef)
40 | importFrom(lme4,mkReTrms)
41 | importFrom(lme4,ranef)
42 | importFrom(lme4,subbars)
43 | importFrom(lmtest,coeftest)
44 | importFrom(methods,as)
45 | importFrom(mgcv,Predict.matrix)
46 | importFrom(mgcv,interpret.gam)
47 | importFrom(mgcv,smooth.construct)
48 | importFrom(mvtnorm,rmvnorm)
49 | importFrom(splines,bs)
50 | importFrom(splines,spline.des)
51 | importFrom(stats,.getXlevels)
52 | importFrom(stats,as.formula)
53 | importFrom(stats,delete.response)
54 | importFrom(stats,formula)
55 | importFrom(stats,lm)
56 | importFrom(stats,model.frame)
57 | importFrom(stats,model.matrix)
58 | importFrom(stats,model.response)
59 | importFrom(stats,na.pass)
60 | importFrom(stats,optim)
61 | importFrom(stats,plogis)
62 | importFrom(stats,predict)
63 | importFrom(stats,qlogis)
64 | importFrom(stats,qnorm)
65 | importFrom(stats,quantile)
66 | importFrom(stats,rWishart)
67 | importFrom(stats,reformulate)
68 | importFrom(stats,residuals)
69 | importFrom(stats,rnorm)
70 | importFrom(stats,runif)
71 | importFrom(stats,setNames)
72 | importFrom(stats,sigma)
73 | importFrom(stats,terms)
74 | importFrom(stats,terms.formula)
75 | importFrom(stats,update.formula)
76 | importFrom(stats,var)
77 | importFrom(stats,vcov)
78 | importFrom(utils,getFromNamespace)
79 | useDynLib(vglmer)
80 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # vglmer 1.0.6
 2 | 
 3 | * Removes unnecessary model preparation steps for `parameter_expansion="translation"` and `factorization_method="strong"`. Improves speed on default settings for models with many random effects.
 4 | 
 5 | * Updated references in documentation.
 6 | 
 7 | * Small adjustment to tests to prevent failure from update to `waldo` package
 8 | 
 9 | # vglmer 1.0.5
10 | 
11 | ** Add gKRLS as an option for smoothing multiple (continuous) covariates. Chang and Goplerud (2024; https://doi.org/10.1017/pan.2023.27) provides more details.
12 | 
13 | # vglmer 1.0.4
14 | 
15 | * Adjust `predict.vglmer` to allow for faster predictions on large datasets by not copying and filling in a large sparse matrix. Thank you to Michael Auslen for pointing out this issue.
16 | 
17 | * Add the option for `terms` to `predict` to allow for predictions for each random effect separately
18 | 
19 | * Address a bug where predictions with `NA` in new data.frame would fail for certain splines or for cases where `newdata` had a single row.
20 | 
21 | # vglmer 1.0.3
22 | 
23 | * Adjust `vglmer` to not throw deprecation messages with Matrix 1.5. Thank you to Mikael Jagan for suggestions on how to adapt the code.
24 | 
25 | # vglmer 1.0.2
26 | 
27 | * IMPORTANT: Fixes bug where prediction with only one spline  (and no random effects) was wrong; the non-linear part of the spline was ignored.
28 | * Smaller bug fixes around splines (e.g., for using a single knot) have been added as well as updated tests.
29 | 
30 | # vglmer 1.0.1
31 | 
32 | * Patch to address compiler issues on CRAN
33 | * Add links to GitHub to description
34 | 
35 | # vglmer 1.0.0
36 | 
37 | * Initial submission to CRAN. Estimates linear, binomial, and negative binomial (experimental) models.
38 | 


--------------------------------------------------------------------------------
/R/MAVB_functions.R:
--------------------------------------------------------------------------------
  1 | #' Perform MAVB after fitting vglmer
  2 | #'
  3 | #' @description Given a model estimated using \code{vglmer}, this function
  4 | #'   performs marginally augmented variational Bayes (MAVB) to improve the
  5 | #'   approximation quality.
  6 | #'   
  7 | #' @details This function returns the improved estimates of the
  8 | #'   \emph{parameters}. To use MAVB when generating predictions, one should use
  9 | #'   \link{predict_MAVB}. At present, MAVB is only enabled for binomial models.
 10 | #'
 11 | #' @return This function returns a matrix with \code{samples} rows and columns
 12 | #'   for each fixed and random effect.
 13 | #'
 14 | #' @param object Model fit using \code{vglmer}.
 15 | #' @param samples Number of samples to draw.
 16 | #' @param var_px Variance of working prior for marginal augmentation. Default
 17 | #'   (\code{Inf}) is a flat, improper, prior.
 18 | #' @param verbose Show progress in drawing samples.
 19 | #' @import CholWishart
 20 | #' @importFrom mvtnorm rmvnorm
 21 | #' 
 22 | #' @references 
 23 | #' 
 24 | #' Goplerud, Max. 2022. "Fast and Accurate Estimation of Non-Nested Binomial
 25 | #' Hierarchical Models Using Variational Inference." \emph{Bayesian Analysis}. 17(2):
 26 | #' 623-650.
 27 | #' 
 28 | #' @export
 29 | MAVB <- function(object, samples, verbose = FALSE, var_px = Inf) {
 30 |   
 31 |   if (!inherits(object, "vglmer")) {
 32 |     stop("Must provide object from vglmer")
 33 |   }
 34 |   
 35 |   if (object$family != "binomial") {
 36 |     stop("MAVB only implemented for binomial at present.")
 37 |   }
 38 |   
 39 |   M_prime <- object$internal_parameters$MAVB_parameters$M_prime
 40 |   M_prime_one <- object$internal_parameters$MAVB_parameters$M_prime_one
 41 |   M_mu_to_beta <- object$internal_parameters$MAVB_parameters$M_mu_to_beta
 42 |   B_j <- object$internal_parameters$MAVB_parameters$B_j
 43 |   
 44 |   if (!isDiagonal(B_j)){
 45 |     stop('MAVB not set up for non-diagonal mean expansion; do all REs have a corresponding FE?')
 46 |   }else{
 47 |     if (!isTRUE(all.equal(B_j@x, rep(1, nrow(B_j))))){
 48 |       stop('B_j is diagonal but not identity matrix; do all REs have a corresponding FE?')
 49 |     }
 50 |   }
 51 |   d_j <- object$internal_parameters$MAVB_parameters$d_j
 52 |   g_j <- object$internal_parameters$MAVB_parameters$g_j
 53 |   outer_alpha_RE_positions <- object$internal_parameters$MAVB_parameters$outer_alpha_RE_positions
 54 |   factorization_method <- object$control$factorization_method
 55 | 
 56 |   if (factorization_method == "weak") {
 57 |     decomp_joint <- object$joint$decomp_var
 58 |     joint_mean <- rbind(object$beta$mean, object$alpha$mean)
 59 |     p.XZ <- ncol(decomp_joint)
 60 |     p.X <- nrow(object$beta$mean)
 61 |   } else {
 62 |     decomp_varA <- object$alpha$decomp_var
 63 |     decomp_varB <- object$beta$decomp_var
 64 |     p.XZ <- ncol(decomp_varA) + ncol(decomp_varB)
 65 |     p.X <- nrow(object$beta$mean)
 66 |     p.Z <- nrow(object$alpha$mean)
 67 |   }
 68 | 
 69 |   MAVB_sims <- matrix(NA, nrow = samples, ncol = p.XZ)
 70 |   regen_store <- MAVB_diff <- matrix(NA, nrow = samples, ncol = sum(d_j))
 71 | 
 72 |   n_MAVB <- samples
 73 | 
 74 |   alpha_mean <- object$alpha$mean
 75 |   beta_mean <- object$beta$mean
 76 |   sigma_df <- object$sigma$df
 77 |   sigma_cov <- object$sigma$cov
 78 | 
 79 |   all_sigma <- mapply(sigma_df, sigma_cov, SIMPLIFY = FALSE, FUN = function(i, j) {
 80 |     rInvWishart(n = n_MAVB, df = i, Sigma = j)
 81 |   })
 82 | 
 83 |   for (it in 1:n_MAVB) {
 84 |     if (it %% 1000 == 0 & verbose) {
 85 |       message(".", appendLF = F)
 86 |     }
 87 |     # Sim from VARIATIONAL approximation to posterior.
 88 |     if (factorization_method == "weak") {
 89 |       sim_joint <- joint_mean + t(decomp_joint) %*% rnorm(p.XZ)
 90 |       sim_beta <- sim_joint[1:p.X, , drop = F]
 91 |       sim_a <- sim_joint[-1:-p.X, , drop = F]
 92 |     } else {
 93 |       sim_a <- alpha_mean + t(decomp_varA) %*% rnorm(p.Z)
 94 |       sim_beta <- beta_mean + t(decomp_varB) %*% rnorm(p.X)
 95 |     }
 96 |     sim_sigma <- lapply(all_sigma, FUN = function(i) {
 97 |       i[, , it]
 98 |     })
 99 |     # Working Prior
100 |     if (var_px == Inf) {
101 |       sim_px <- rep(0, sum(d_j))
102 |     } else {
103 |       sim_px <- rnorm(sum(d_j), 0, sd = sqrt(var_px))
104 |     }
105 |     # Transform t^{-1}_a(z) = w
106 |     sim_atilde <- sim_a + M_prime_one %*% sim_px
107 |     sim_btilde <- sim_beta - t(M_mu_to_beta) %*% sim_px
108 |     # Draw sim_px AGAIN from its full conditional
109 |     if (var_px == Inf) {
110 |       # Use the LIMITING transition.
111 |       var_redux <- as.matrix(bdiag(mapply(sim_sigma, g_j, SIMPLIFY = FALSE, FUN = function(S, g) {
112 |         S / g
113 |       })))
114 |       # Get the MEAN not the SUM
115 |       mean_redux <- t(M_prime) %*% sim_atilde
116 |     } else {
117 |       var_redux <- solve(diag(x = 1 / var_px, ncol = sum(d_j), nrow = sum(d_j)) + as.matrix(bdiag(mapply(sim_sigma, g_j, SIMPLIFY = FALSE, FUN = function(S, g) {
118 |         solve(S) * g
119 |       }))))
120 |       # Use the SUM
121 |       mean_redux <- var_redux %*% solve(bdiag(sim_sigma)) %*% t(M_prime_one) %*% sim_atilde
122 |     }
123 |     regen_px <- t(rmvnorm(1, mean_redux, var_redux))
124 |     regen_store[it, ] <- regen_px
125 |     MAVB_diff[it, ] <- regen_px - sim_px
126 |     final_atilde <- sim_atilde - M_prime_one %*% regen_px
127 |     final_btilde <- sim_btilde + t(M_mu_to_beta) %*% regen_px
128 | 
129 |     MAVB_sims[it, ] <- c(as.vector(final_btilde), as.vector(final_atilde))
130 |   }
131 |   colnames(MAVB_sims) <- c(rownames(object$beta$mean), 
132 |                            rownames(object$alpha$mean))
133 |   
134 |   return(MAVB_sims)
135 | }
136 | 
137 | 
138 | #' @import lme4
139 | get_RE_groups <- function(formula, data) {
140 |   
141 |   if (inherits(formula, 'formula')){
142 |     bars <- findbars(formula)
143 |   }else{
144 |     bars <- formula
145 |   }
146 |   if (is.null(bars)){# Usually if only splines used, then NA.
147 |     return(list(factor = NA, design = NA))
148 |   }  
149 |   
150 |   barnames <- utils::getFromNamespace('barnames', 'lme4')
151 |   names(bars) <- barnames(bars)
152 |   
153 |   fr <- data
154 |   blist <- lapply(bars, simple_blist, fr, drop.unused.levels = F, reorder.vars = FALSE)
155 |   blist <- lapply(blist, FUN=function(i){i[c('ff', 'mm')]})
156 | 
157 |   ff <- lapply(blist, FUN=function(i){i$ff})
158 |   ff <- lapply(ff, FUN=function(i){match(i, levels(i))})
159 |   mm <- lapply(blist, FUN=function(i){i$mm})
160 |   return(list(factor = ff, design = mm))
161 | }
162 | 
163 | #' @import lme4 
164 | #' @importFrom utils getFromNamespace
165 | simple_blist <- function(x, frloc, drop.unused.levels = TRUE, reorder.vars = FALSE) {
166 |   frloc <- factorize(x, frloc)
167 |   makeFac <- utils::getFromNamespace('makeFac', 'lme4')
168 |   if (is.null(ff <- tryCatch(eval(substitute(makeFac(fac),
169 |                                              list(fac = x[[3]])), frloc), error = function(e) NULL)))
170 |     stop("couldn't evaluate grouping factor ", deparse(x[[3]]),
171 |          " within model frame:", " try adding grouping factor to data ",
172 |          "frame explicitly if possible", call. = FALSE)
173 |   if (all(is.na(ff)))
174 |     stop("Invalid grouping factor specification, ", deparse(x[[3]]),
175 |          call. = FALSE)
176 |   if (drop.unused.levels)
177 |     ff <- factor(ff, exclude = NA)
178 |   nl <- length(levels(ff))
179 |   mm <- model.matrix(eval(substitute(~foo, list(foo = x[[2]]))),
180 |                      frloc)
181 |   if (reorder.vars) {
182 |     
183 |     colSort <- utils::getFromNamespace("colSort", "lme4")
184 |     mm <- mm[colSort(colnames(mm)), ]
185 |   }
186 |   list(ff = ff, nl = nl, mm = mm, cnms = colnames(mm))
187 | }
188 | 
189 | 
190 | #' Variance of Rows or Columns of Matrices
191 | #'
192 | #' Base R implementation for variance. Analogue of rowMeans.
193 | #' @name var_mat
194 | #' @keywords internal
195 | #' @param matrix Matrix of numeric inputs.
196 | rowVar <- function(matrix) {
197 |   apply(matrix, MARGIN = 1, var)
198 | }
199 | 
200 | #' @importFrom stats var
201 | #' @rdname var_mat
202 | colVar <- function(matrix) {
203 |   apply(matrix, MARGIN = 2, var)
204 | }
205 | 
206 | #' Get samples from GLMER
207 | #'
208 | #' Order samples from glmer to match names from vglmer.
209 | #'
210 | #' @param glmer object fitted using glmer
211 | #' @param samples number of samples to draw
212 | #' @param ordering order of output
213 | #' @keywords internal
214 | #' @importFrom stats rnorm
215 | custom_glmer_samples <- function(glmer, samples, ordering) {
216 |   fmt_glmer <- format_glmer(glmer)
217 | 
218 |   glmer_samples <- mapply(fmt_glmer$mean, fmt_glmer$var, FUN = function(m, v) {
219 |     rnorm(samples, mean = m, sd = sqrt(v))
220 |   })
221 |   colnames(glmer_samples) <- fmt_glmer$name
222 | 
223 |   glmer_samples <- glmer_samples[, match(ordering, colnames(glmer_samples))]
224 |   return(glmer_samples)
225 | }
226 | 
227 | #' Draw samples from the variational distribution
228 | #' 
229 | #' @description This function draws samples from the estimated variational
230 | #'   distributions. If using \code{MAVB} to improve the quality of the
231 | #'   approximating distribution, please use \link{MAVB} or \link{predict_MAVB}.
232 | #' @param object Model fit using \code{vglmer}.
233 | #' @param samples Number of samples to draw.
234 | #' @param verbose Show progress in drawing samples.
235 | #' 
236 | #' @return This function returns a matrix with \code{samples} rows and columns
237 | #'   for each fixed and random effect.
238 | #' 
239 | #' @export
240 | posterior_samples.vglmer <- function(object, samples, verbose = FALSE) {
241 |   if (!inherits(object, "vglmer")) {
242 |     stop("Must provide object from vglmer")
243 |   }
244 |   
245 |   M_prime <- object$internal_parameters$MAVB_parameters$M_prime
246 |   M_prime_one <- object$internal_parameters$MAVB_parameters$M_prime_one
247 |   M_mu_to_beta <- object$internal_parameters$MAVB_parameters$M_mu_to_beta
248 |   d_j <- object$internal_parameters$MAVB_parameters$d_j
249 |   g_j <- object$internal_parameters$MAVB_parameters$g_j
250 |   outer_alpha_RE_positions <- object$internal_parameters$MAVB_parameters$outer_alpha_RE_positions
251 |   factorization_method <- object$control$factorization_method
252 |   if (factorization_method == "weak") {
253 |     decomp_joint <- object$joint$decomp_var
254 |     joint_mean <- rbind(object$beta$mean, object$alpha$mean)
255 |     p.XZ <- ncol(decomp_joint)
256 |     p.X <- nrow(object$beta$mean)
257 |   }
258 |   else {
259 |     decomp_varA <- object$alpha$decomp_var
260 |     decomp_varB <- object$beta$decomp_var
261 |     p.XZ <- ncol(decomp_varA) + ncol(decomp_varB)
262 |     p.X <- nrow(object$beta$mean)
263 |     p.Z <- nrow(object$alpha$mean)
264 |   }
265 |   post_sims <- matrix(NA, nrow = samples, ncol = p.XZ)
266 |   alpha_mean <- object$alpha$mean
267 |   beta_mean <- object$beta$mean
268 |   sigma_df <- object$sigma$df
269 |   sigma_cov <- object$sigma$cov
270 |   for (it in 1:samples) {
271 |     if (it%%1000 == 0 & verbose) {
272 |       message(".", appendLF = F)
273 |     }
274 |     if (factorization_method == "weak") {
275 |       sim_joint <- joint_mean + t(decomp_joint) %*% rnorm(p.XZ)
276 |       sim_beta <- sim_joint[1:p.X, , drop = F]
277 |       sim_a <- sim_joint[-1:-p.X, , drop = F]
278 |     }
279 |     else {
280 |       sim_a <- alpha_mean + t(decomp_varA) %*% rnorm(p.Z)
281 |       sim_beta <- beta_mean + t(decomp_varB) %*% rnorm(p.X)
282 |     }
283 |     post_sims[it, ] <- c(as.vector(sim_beta), as.vector(sim_a))
284 |   }
285 |   
286 |   colnames(post_sims) <- c(rownames(object$beta$mean), 
287 |     rownames(object$alpha$mean))
288 |   
289 |   return(post_sims)
290 |   
291 | }
292 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | #' Linear Regression by Cholesky
 5 | #' 
 6 | #' Do linear regression of form solve(X^T O X + P, X^T y) where O is omega, P
 7 | #' is precision.
 8 | #' 
 9 | #' @keywords internal
10 | #' 
11 | #' @param X Design Matrix
12 | #' @param omega Polya-Gamma weights
13 | #' @param prior_precision Prior Precision for Regression
14 | #' @param y Outcome
15 | #' @param save_chol Save cholesky factor
16 | LinRegChol <- function(X, omega, prior_precision, y, save_chol = TRUE) {
17 |     .Call('_vglmer_LinRegChol', PACKAGE = 'vglmer', X, omega, prior_precision, y, save_chol)
18 | }
19 | 
20 | calculate_expected_outer_alpha <- function(L, alpha_mu, re_position_list) {
21 |     .Call('_vglmer_calculate_expected_outer_alpha', PACKAGE = 'vglmer', L, alpha_mu, re_position_list)
22 | }
23 | 
24 | unique_rows <- function(m) {
25 |     .Call('_vglmer_unique_rows', PACKAGE = 'vglmer', m)
26 | }
27 | 
28 | prepare_Z_for_px <- function(Mmap) {
29 |     .Call('_vglmer_prepare_Z_for_px', PACKAGE = 'vglmer', Mmap)
30 | }
31 | 
32 | chol_sparse <- function(X, omega, precision) {
33 |     .Call('_vglmer_chol_sparse', PACKAGE = 'vglmer', X, omega, precision)
34 | }
35 | 
36 | cpp_zVz <- function(Z, V) {
37 |     .Call('_vglmer_cpp_zVz', PACKAGE = 'vglmer', Z, V)
38 | }
39 | 
40 | vecR_ridge_general <- function(L, pg_mean, Z, M, mapping_J, d, start_z, diag_only) {
41 |     .Call('_vglmer_vecR_ridge_general', PACKAGE = 'vglmer', L, pg_mean, Z, M, mapping_J, d, start_z, diag_only)
42 | }
43 | 
44 | vecR_design <- function(alpha_mu, Z, M, mapping_J, d, start_z) {
45 |     .Call('_vglmer_vecR_design', PACKAGE = 'vglmer', alpha_mu, Z, M, mapping_J, d, start_z)
46 | }
47 | 
48 | vecR_fast_ridge <- function(X, omega, prior_precision, y, adjust_y) {
49 |     .Call('_vglmer_vecR_fast_ridge', PACKAGE = 'vglmer', X, omega, prior_precision, y, adjust_y)
50 | }
51 | 
52 | vecR_ridge_new <- function(L, pg_mean, mapping_J, d, store_id, store_re_id, store_design, diag_only) {
53 |     .Call('_vglmer_vecR_ridge_new', PACKAGE = 'vglmer', L, pg_mean, mapping_J, d, store_id, store_re_id, store_design, diag_only)
54 | }
55 | 
56 | 


--------------------------------------------------------------------------------
/R/control_functions.R:
--------------------------------------------------------------------------------
  1 | #' Control for vglmer estimation
  2 | #'
  3 | #' This function controls various estimation options for \code{vglmer}.
  4 | #'
  5 | #' @param iterations Default of 1000; this sets the maximum number of iterations
  6 | #'   used in estimation.
  7 | #' @param factorization_method Factorization assumption for the variational
  8 | #'   approximation. Default of \code{"strong"}, i.e. a fully factorized model.
  9 | #'   Described in detail in Goplerud (2022). \code{"strong"}, \code{"partial"},
 10 | #'   and \code{"weak"} correspond to Schemes I, II, and III respectively in that
 11 | #'   paper.
 12 | #' @param prior_variance Prior distribution on the random effect variance
 13 | #'   \eqn{\Sigma_j}. Options are \code{hw}, \code{jeffreys}, \code{mean_exists},
 14 | #'   \code{uniform}, and \code{gamma}. The default (\code{hw}) is the Huang-Wand
 15 | #'   (2013) prior whose hyper-parameters are \eqn{\nu_j} = 2 and \eqn{A_{j,k}} =
 16 | #'   5. Otherwise, the prior is an Inverse Wishart with the following parameters
 17 | #'   where \eqn{d_j} is the dimensionality of the random effect \eqn{j}.
 18 | #'   \itemize{
 19 | #'   \item mean_exists: \eqn{IW(d_j + 1, I)}
 20 | #'   \item jeffreys: \eqn{IW(0, 0)}
 21 | #'   \item uniform: \eqn{IW(-[d_j+1], 0)}
 22 | #'   \item limit: \eqn{IW(d_j - 1, 0)}
 23 | #'   }
 24 | #'   Estimation may fail if an improper prior (\code{jeffreys}, \code{uniform},
 25 | #'   \code{limit}) is used.
 26 | #' @param tolerance_elbo Default (\code{1e-8}) sets a convergence threshold if
 27 | #'   the change in the ELBO is below the tolerance.
 28 | #' @param tolerance_parameters Default (\code{1e-5}) sets a convergence
 29 | #'   threshold that is achieved if no parameter changes by more than the
 30 | #'   tolerance from the prior estimated value.
 31 | #' @param parameter_expansion Default of \code{"translation"}  (see Goplerud
 32 | #'   2022b). Valid options are \code{"translation"}, \code{"mean"}, or
 33 | #'   \code{"none"}. \code{"mean"} should be employed if \code{"translation"} is
 34 | #'   not enabled or is too computationally expensive. For negative binomial
 35 | #'   estimation or any estimation where \code{factorization_method != "strong"},
 36 | #'   only \code{"mean"} and \code{"none"} are available.
 37 | #' @param px_method When code \code{parameter_expansion="translation"}, default
 38 | #'   (\code{"dynamic"}) tries a one-step late update and, if this fails, a
 39 | #'   numerical improvement by L-BFGS-B. For an Inverse-Wishart prior on
 40 | #'   \eqn{\Sigma_j}, this is set to \code{"osl"} that only attempts a
 41 | #'   one-step-late update.
 42 | #' @param px_numerical_it Default of 10; if L-BFGS_B is needed for a parameter
 43 | #'   expansion, this sets the number of steps used.
 44 | #' @param hw_inner If \code{prior_variance="hw"}, this sets the number of
 45 | #'   repeated iterations between estimating \eqn{\Sigma_j} and \eqn{a_{j,k}}
 46 | #'   variational distributions at each iteration. A larger number approximates
 47 | #'   jointly updating both parameters. Default (10) typically performs well.
 48 | #' @param force_whole Default (\code{TRUE}) requires integers for observed
 49 | #'   outcome for binomial or count models. \code{FALSE} allows for fractional
 50 | #'   responses.
 51 | #' @param vi_r_method Default (\code{"VEM"}) uses a variational EM algorithm for
 52 | #'   updating \eqn{r} if \code{family="negbin"}. This assumes a point mass
 53 | #'   distribution on \eqn{r}. A number can be provided to fix \eqn{r}. These are
 54 | #'   the only available options.
 55 | #' @param init Default (\code{"EM_FE"}) initializes the mean variational
 56 | #'   parameters for \eqn{q(\beta, \alpha)} by setting the random effects to zero
 57 | #'   and estimating the fixed effects using a short-running EM algorithm.
 58 | #'   \code{"EM"} initializes the model with a ridge regression with a guess as
 59 | #'   to the random effect variance. \code{"random"} initializes the means
 60 | #'   randomly. \code{"zero"} initializes them at zero.
 61 | #' @param debug_param Default (\code{FALSE}) does not store parameters before
 62 | #'   the final iteration. Set to \code{TRUE} to debug convergence issues.
 63 | #' @param debug_ELBO Default (\code{FALSE}) does not store the ELBO after each
 64 | #'   parameter update. Set to \code{TRUE} to debug convergence issues.
 65 | #' @param quiet_rho Default (\code{FALSE}) does not print information about
 66 | #'   parameter expansions. Set to \code{TRUE} to debug convergence issues.
 67 | #' @param debug_px Default (\code{FALSE}) does not store information about
 68 | #'   whether parameter expansion worked. Set to \code{TRUE} to convergence
 69 | #'   issues.
 70 | #' @param linpred_method Default (\code{"joint"}) updates the mean parameters
 71 | #'   for the fixed and random effects simultaneously. This can improve the speed
 72 | #'   of estimation but may be costly for large datasets; use \code{"cyclical"}
 73 | #'   to update each parameter block separately.
 74 | #' @param print_prog Default (\code{NULL}) prints a \code{"."} to indicate once
 75 | #'   5\% of the total iterations have elapsed. Set to a positive integer
 76 | #'   \code{int} to print a \code{"."} every \code{int} iterations.
 77 | #' @param quiet Default (\code{FALSE}) does not print intermediate output about
 78 | #'   convergence. Set to \code{TRUE} to debug.
 79 | #' @param return_data Default (\code{FALSE}) does not return the original
 80 | #'   design. Set to \code{TRUE} to debug convergence issues.
 81 | #' @param verbose_time Default (\code{FALSE}) does not print the time elapsed
 82 | #'   for each parameter update. Set to \code{TRUE}, in conjunction with
 83 | #'   \code{do_timing=TRUE}, to see the time taken for each parameter update.
 84 | #' @param do_timing Default (\code{FALSE}) does not estimate timing of each
 85 | #'   variational update; \code{TRUE} requires the package \code{tictoc}.
 86 | #' @param do_SQUAREM Default (\code{TRUE}) accelerates estimation using SQUAREM
 87 | #'   (Varadhan and Roland 2008).
 88 | #' @param verify_columns Default (\code{FALSE}) \bold{does not} verify that all
 89 | #'   columns are drawn from the data.frame itself versus the environment. Set to
 90 | #'   \code{TRUE} to debug potential issues.
 91 | #' 
 92 | #' @return This function returns a named list with class \code{vglmer_control}.
 93 | #'   It is passed to \code{vglmer} in the argument \code{control}. This argument
 94 | #'   only accepts objects created using \code{vglmer_control}.
 95 | #' 
 96 | #' @references 
 97 | #' 
 98 | #' Goplerud, Max. 2022. "Fast and Accurate Estimation of Non-Nested Binomial
 99 | #' Hierarchical Models Using Variational Inference." \emph{Bayesian Analysis}.
100 | #' 17(2): 623-650.
101 | #'
102 | #' Goplerud, Max. 2024. "Re-Evaluating Machine Learning for MRP Given the
103 | #' Comparable Performance of (Deep) Hierarchical Models." \emph{American
104 | #' Political Science Review}. 118(1): 529-536.
105 | #'
106 | #' Huang, Alan, and Matthew P. Wand. 2013. "Simple Marginally Noninformative
107 | #' Prior Distributions for Covariance Matrices." \emph{Bayesian Analysis}.
108 | #' 8(2):439-452.
109 | #'
110 | #' Varadhan, Ravi, and Christophe Roland. 2008. "Simple and Globally Convergent
111 | #' Methods for Accelerating the Convergence of any EM Algorithm."
112 | #' \emph{Scandinavian Journal of Statistics}. 35(2): 335-353.
113 | #' @export
114 | vglmer_control <- function(iterations = 1000,
115 |                            prior_variance = "hw",
116 |                            factorization_method = c("strong", "partial", "weak"),
117 |                            parameter_expansion = "translation", do_SQUAREM = TRUE, 
118 |                            tolerance_elbo = 1e-8, tolerance_parameters = 1e-5,
119 |                            force_whole = TRUE, print_prog = NULL,
120 |                            do_timing = FALSE, verbose_time = FALSE,
121 |                            return_data = FALSE, linpred_method = "joint",
122 |                            vi_r_method = "VEM", verify_columns = FALSE,
123 |                            debug_param = FALSE, debug_ELBO = FALSE, debug_px = FALSE, 
124 |                            quiet = TRUE, quiet_rho = TRUE,
125 |                            px_method = 'dynamic', px_numerical_it = 10,
126 |                            hw_inner = 10,
127 |                            init = "EM_FE") {
128 |   
129 |   factorization_method <- match.arg(factorization_method)
130 |   prior_variance <- match.arg(prior_variance, 
131 |                               choices = c("hw", "mean_exists", "jeffreys", "limit", "uniform"))
132 |   linpred_method <- match.arg(linpred_method, choices = c("joint", "cyclical", "solve_normal"))    
133 |   parameter_expansion <- match.arg(parameter_expansion, choices = c("translation", "mean", "none"))
134 |   # vi_r_method <- match.arg(vi_r_method, choices = c("VEM", "fixed", "Laplace", "delta"))
135 |   init <- match.arg(init, choices = c("EM_FE", "EM", "random", "zero"))
136 |   if (!is.null(print_prog)){
137 |     if (print_prog < 0){stop('print_prog must be non-negative integer or NULL.')}
138 |   }
139 |   
140 |   if (iterations < 0){stop('iterations must be positive integer')}
141 |   if (tolerance_elbo < 0 | tolerance_parameters < 0){
142 |     stop('tolerance for ELBO and parameters must be non-negative.')
143 |   }
144 |   
145 |   if (factorization_method != "strong" & parameter_expansion != "mean"){
146 |     message('Setting parameter_expansion to mean for non-strong factorization')
147 |     parameter_expansion <- 'mean'
148 |   }
149 |   if (prior_variance != 'hw' & px_method != 'OSL' & parameter_expansion %in% c('diagonal', 'translation')){
150 |     px_method <- 'OSL'
151 |     message('Setting px_method to "OSL" if translation & non-HW prior.')
152 |   }
153 |   
154 |   output <- mget(ls())
155 |   
156 |   class(output) <- c("vglmer_control")
157 |   return(output)
158 | }
159 | 


--------------------------------------------------------------------------------
/R/format_functions.R:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mgoplerud/vglmer/cfc962c92350b0d343343d039e77ecb4e879e7e7/R/format_functions.R


--------------------------------------------------------------------------------
/R/mgcv_functions.R:
--------------------------------------------------------------------------------
  1 | # Functions for creating custom "smooth.construct" to implement
  2 | # in mgcv but also vglmer
  3 | 
  4 | gen.trend.D <- function(order, l){
  5 |   bcoef <- choose(n = order + 1, k = 0:(order+1))
  6 |   bcoef <- bcoef * (-1)^(0:(order +1))
  7 |   if (length(bcoef) > l){
  8 |     stop('Lower Polynomial Order: order must be >= 2  + l')
  9 |   }
 10 |   n.rows <- l - order - 1
 11 |   t.D <- do.call('rbind', lapply(1:n.rows, FUN=function(i){
 12 |     cbind(i, i - 1 + 1:length(bcoef), bcoef)
 13 |   }))
 14 |   t.D <- sparseMatrix(i = t.D[,1], j = t.D[,2], x = t.D[,3])
 15 |   if (ncol(t.D) != l){
 16 |     stop('Unusual Error')
 17 |   }
 18 |   return(t.D)
 19 | }
 20 | 
 21 | #' Constructor for random walk smooth
 22 | #' @import mgcv
 23 | #' @importFrom Matrix sparseMatrix
 24 | #' @keywords internal
 25 | #' @details See the function \link{v_s} for details.
 26 | #' @param object a smooth object; see documentation for other methods in
 27 | #'   \code{mgcv}.
 28 | #' @param data a data.frame; see documentation for other methods in
 29 | #'   \code{mgcv}.
 30 | #' @param knots not used
 31 | #' @export
 32 | smooth.construct.randwalk.smooth.spec <- function(object, data, knots){
 33 |   
 34 |   if (length(knots) != 0){stop('"knots" not used for random walk.')}
 35 |   if (is.null(object$xt)) {
 36 |     object$xt <- NULL
 37 |   }
 38 |   
 39 |   if (is.null(object$xt)){
 40 |     smooth_order <- 0
 41 |   }else{
 42 |     smooth_order <- object$xt$order
 43 |     if (is.null(smooth_order)){
 44 |       smooth_order <- 0
 45 |     }
 46 |   }
 47 |   
 48 |   x <- data[[object$term]]
 49 |   if (is.character(x) || is.factor(x)){
 50 |     if (!is.ordered(x)){
 51 |       stop('randwalk requires an *ordered* factor to be provided.')
 52 |     }
 53 |   }else{
 54 |     stop('randwalk only set up for character or factor; you may consider using "by" for evolution of a continuous covariate over time')
 55 |   }
 56 |   
 57 |   x <- data[[object$term]]
 58 |   lx <- levels(x)
 59 |   
 60 |   X <- sparseMatrix(i = 1:length(x), j = match(x, lx), x = 1, dims = c(length(x), length(lx)))
 61 |   
 62 |   # Get the random walk penalty matrix
 63 |   S <- crossprod(gen.trend.D(order = smooth_order, l = length(lx)))
 64 |   eS <- eigen(S)
 65 |   zero_ev <- which(eS$values < sqrt(.Machine$double.eps))
 66 |   if (length(zero_ev) != 1 || isTRUE(zero_ev != ncol(S))){
 67 |     stop('Invalid eigendecomposition...')
 68 |   }
 69 |   X <- X %*% eS$vectors
 70 |   last_col <- X[,length(lx)]
 71 |   stopifnot(var(last_col) < .Machine$double.eps)
 72 |   X <- X[,-length(lx)]
 73 |   X <- X %*% Diagonal(x=1/sqrt(eS$values[-length(lx)]))
 74 |   
 75 |   S <- Diagonal(n = ncol(X))
 76 |   X <- as.matrix(X)
 77 |   S <- as.matrix(S)
 78 |   
 79 |   object$X <- X
 80 |   object$S <- list(S)
 81 |   # Required elements for prediction
 82 |   object$transf_mat <- eS$vectors[,-ncol(eS$vectors)] %*% Diagonal(x=1/sqrt(eS$values[-length(lx)]))
 83 |   object$levels <- lx
 84 |   # Required elements for "gam"
 85 |   object$rank <- ncol(S)
 86 |   object$null.space.dim <- 0
 87 |   object$df <- ncol(S)
 88 |   object$te.ok <- 0
 89 |   object$plot.me <- FALSE
 90 |   object$C <- matrix(nrow = 0, ncol = ncol(S))
 91 |   # If rescale_penalty is NOT true, then set "no.rescale" to TRUE, i.e. to not rescale.
 92 |   # otherwise, leave null.
 93 |   if (!is.null(object$xt$rescale_penalty) && !object$xt$rescale_penalty) {
 94 |     object$no.rescale <- TRUE
 95 |   }
 96 |   class(object) <- 'randwalk.smooth'
 97 |   object
 98 | }
 99 | 
100 | #' Predict Methods for random walk smooth
101 | #' @keywords internal
102 | #' @param object a smooth object; see documentation for other methods in
103 | #'   \code{mgcv}.
104 | #' @param data a data.frame; see documentation for other methods in
105 | #'   \code{mgcv}.
106 | #' @export
107 | Predict.matrix.randwalk.smooth <- function(object, data){
108 | 
109 |   x <- data[[object$term]]
110 |   lx <- object$levels
111 |   if (!all(x %in% c(NA, lx))){
112 |     stop('Prediction for random walk cannot work if levels in (new) data are not in original factor levels.')
113 |     # if (object$internal_override){
114 |     #   xj <- match(x, lx)
115 |     #   id_valid <- which(!is.na(xj))
116 |     #   X <- sparseMatrix(i = (1:length(x))[id_valid], j = xj[id_valid], x = 1, dims = c(length(x), length(lx)))
117 |     # }else{
118 |     #   stop('Prediction for random walk cannot work if levels not in original factor levels.')
119 |     # }
120 |   }else{
121 |     xj <- match(x, lx)
122 |     is_not_na <- which(!is.na(x))
123 |     X <- sparseMatrix(i = (1:length(x))[is_not_na], j = xj[is_not_na], x = 1, dims = c(length(x), length(lx)))
124 |   }
125 |   X <- as.matrix(X %*% object$transf_mat)
126 |   return(X)
127 | }
128 | 


--------------------------------------------------------------------------------
/R/predict_functions.R:
--------------------------------------------------------------------------------
  1 | #' Predict after vglmer
  2 | #'
  3 | #' @description These functions calculate the estimated linear predictor using
  4 | #'   the variational distributions. \code{predict.vglmer} draws predictions
  5 | #'   using the estimated variational distributions; \code{predict_MAVB} does so
  6 | #'   using the MAVB procedure described in Goplerud (2022).
  7 | #' @name vglmer_predict
  8 | #' @param object Model fit using \code{vglmer}.
  9 | #' @param newdata Dataset to use for predictions. It cannot be missing.
 10 | #' @param samples Number of samples to draw. Using \code{0} (default) gives the
 11 | #'   expectation of the linear predictor. A positive integer draws
 12 | #'   \code{samples} samples from the variational distributions and calculates
 13 | #'   the linear predictor.
 14 | #' @param type Default (\code{"link"}) returns the linear predictor;
 15 | #'   \code{"terms"} returns the predicted value for each random effect (or
 16 | #'   spline) separately as well as one that collects all fixed effects. At the
 17 | #'   moment, other options are not enabled.
 18 | #' @param samples_only Default (\code{FALSE}) returns the samples from the
 19 | #'   variational distributions, \bold{not} the prediction. Each row is a sample and
 20 | #'   each column is a parameter.
 21 | #' @param summary Default (\code{TRUE}) returns the mean and variance of the
 22 | #'   samples for each observation. \code{FALSE} returns a matrix of the sampled
 23 | #'   linear predictor for each observation. Each row is a sample and each column
 24 | #'   is an observation.
 25 | #' @param allow_missing_levels Default (\code{FALSE}) does not allow prediction
 26 | #'   for levels not observed in the original data. \code{TRUE} allows for
 27 | #'   prediction on unseen levels; the value of \code{0} (with no uncertainty) is
 28 | #'   used for the corresponding random effect.
 29 | #' @param ... Not used; included to maintain compatibility with existing
 30 | #'   methods.
 31 | #'
 32 | #' @examples
 33 | #' 
 34 | #' set.seed(123)
 35 | #' sim_data <- data.frame(
 36 | #'   x = rnorm(100),
 37 | #'   y = rbinom(100, 1, 0.5),
 38 | #'   g = sample(letters, 100, replace = TRUE)
 39 | #' )
 40 | #'
 41 | #' # Run with defaults
 42 | #' est_vglmer <- vglmer(y ~ x + (x | g), data = sim_data, family = "binomial")
 43 | #'
 44 | #' # Simple prediction
 45 | #' predict(est_vglmer, newdata = sim_data)
 46 | #' # Return 10 posterior draws of the linear predictor for each observation.
 47 | #' predict_MAVB(est_vglmer, newdata = sim_data, summary = FALSE, samples = 10)
 48 | #' # Predict with a new level; note this would fail if 
 49 | #' # allow_missing_levels = FALSE (the default)
 50 | #' predict(est_vglmer,
 51 | #'   newdata = data.frame(g = "AB", x = 0),
 52 | #'   allow_missing_levels = TRUE
 53 | #' )
 54 | #' @return This function returns an estimate of the linear predictor. The
 55 | #'   default returns the expected mean, i.e. \eqn{E_{q(\alpha,\beta)}[x_i^T
 56 | #'   \beta + z_i^T\alpha]}. If \code{samples > 0}, these functions return a
 57 | #'   summary of the prediction for each observation, i.e. the estimated mean and
 58 | #'   variance. If \code{summary = FALSE}, the sampled values of the linear
 59 | #'   predictor are returned as a matrix. \code{predict_MAVB} performs MAVB as
 60 | #'   described in Goplerud (2022) before returning the linear predictor.
 61 | #'   
 62 | #'   If \code{allow_missing_levels = TRUE}, then observations with a new
 63 | #'   (unseen) level for the random effect are given a value of zero for that
 64 | #'   term of the prediction.
 65 | #' @importFrom stats delete.response terms na.pass
 66 | #' @export
 67 | predict.vglmer <- function(object, newdata, type = 'link',
 68 |                            samples = 0, samples_only = FALSE,
 69 |                            summary = TRUE, allow_missing_levels = FALSE, ...) {
 70 |   if (length(list(...)) > 0) {
 71 |     stop("... not used for predict.vglmer")
 72 |   }
 73 |   if (!(type %in% c('link', 'terms'))){
 74 |     stop('vglmer only uses "terms" and "link" for "type" in predict.')
 75 |   }
 76 |   newdata <- as.data.frame(newdata)
 77 |   rownames(newdata) <- as.character(1:nrow(newdata))
 78 | 
 79 |   parse_formula <- object$formula$interpret_gam
 80 |   if (!all(parse_formula$pred.names %in% colnames(newdata))){
 81 |     missing_columns <- setdiff(parse_formula$pred.names, colnames(newdata))
 82 |     stop(
 83 |       paste0('The following columns are missing from "newdata": ', 
 84 |         paste(missing_columns, collapse =', '))
 85 |     )
 86 |   }
 87 |   fmla <- formula(object, form = 'original')
 88 |   
 89 |   newdata_FE <- model.frame(delete.response(object$formula$fe_terms), 
 90 |       data = newdata, xlev = object$formula$fe_Xlevels, na.action = na.pass)
 91 |   X <- model.matrix(
 92 |     delete.response(object$formula$fe_terms), newdata_FE, 
 93 |       contrasts.arg = object$formula$fe_contrasts)
 94 |   
 95 |   orig_X_names <- rownames(object$beta$mean)
 96 |   if (!identical(colnames(X), orig_X_names)) {
 97 |     print(all.equal(colnames(X), orig_X_names))
 98 |     stop("Misaligned Fixed Effects")
 99 |   }
100 | 
101 |   mk_Z <- model.frame(delete.response(terms(object$formula$interpret_gam$fake.formula)), 
102 |                       data = newdata, drop.unused.levels = TRUE)
103 |   rownames_Z <- rownames(mk_Z)
104 |   
105 | 
106 |   if (!is.null(object$formula$re) & (length(object$formula$re) > 0) ){
107 |     
108 |     # Extract the Z (Random Effect) design matrix.
109 |     mk_Z <- mkReTrms(formula(object, form = 're'), mk_Z, reorder.terms = FALSE, reorder.vars = FALSE)
110 |     Z <- t(mk_Z$Zt)
111 |     
112 |     # RE names and names of variables included for each.
113 |     names_of_RE <- mk_Z$cnms
114 |     
115 |     if (anyDuplicated(names(names_of_RE)) > 0){
116 |       warning('Some random effects names are duplicated. Re-naming for stability by adding "-[0-9]" at end.')
117 |       nre <- names(names_of_RE)
118 |       unre <- unique(nre)
119 |       for (u in unre){
120 |         nre_u <- which(nre == u)
121 |         if (length(nre_u) > 1){
122 |           nre[nre_u] <- paste0(nre[nre_u], '-', seq_len(length(nre_u)))
123 |         }
124 |       }
125 |       names(names_of_RE) <- nre
126 |       if (anyDuplicated(names(names_of_RE)) > 0){
127 |         stop('Renaming duplicates failed. Please rename random effects to proceed.')
128 |       }
129 |     }
130 |     
131 |     number_of_RE <- length(mk_Z$Gp) - 1
132 |     # The position that demarcates each random effect.
133 |     # That is, breaks_for_RE[2] means at that position + 1 does RE2 start.
134 |     breaks_for_RE <- c(0, cumsum(diff(mk_Z$Gp)))
135 |     # Dimensionality of \alpha_{j,g}, i.e. 1 if random intercept
136 |     # 2 if random intercept + random slope
137 |     d_j <- lengths(names_of_RE)
138 |     # Number of GROUPs for each random effect.
139 |     g_j <- diff(mk_Z$Gp) / d_j
140 |     
141 |     # Empty vector to build the formatted names for each random effect.
142 |     fmt_names_Z <- c()
143 |     init_Z_names <- colnames(Z)
144 |     for (v in 1:number_of_RE) {
145 |       name_of_effects_v <- names_of_RE[[v]]
146 |       
147 |       mod_name <- rep(name_of_effects_v, g_j[v])
148 |       
149 |       levels_of_re <- init_Z_names[(1 + breaks_for_RE[v]):breaks_for_RE[v + 1]]
150 |       
151 |       fmt_names_Z <- c(fmt_names_Z, paste0(names(names_of_RE)[v], " @ ", mod_name, " @ ", levels_of_re))
152 |     }
153 |     colnames(Z) <- fmt_names_Z
154 |   }else{
155 |     
156 |     Z <- drop0(Matrix(nrow = nrow(X), ncol = 0))
157 |     p.X <- ncol(X)
158 |     p.Z <- 0
159 |     names_of_RE <- c()
160 |     number_of_RE <- 0
161 |     breaks_for_RE <- c(0)
162 |     d_j <- c()
163 |     g_j <- c()
164 |     fmt_names_Z <- c()
165 |     cyclical_pos <- list()
166 |     
167 |   }
168 |   
169 |   # Extract the Specials
170 |   if (length(parse_formula$smooth.spec) > 0){
171 |     base_specials <- length(parse_formula$smooth.spec)
172 |     # Number of splines + one for each "by"...
173 |     n.specials <- base_specials +
174 |       sum(sapply(parse_formula$smooth.spec, FUN=function(i){i$by}) != "NA")
175 |     
176 |     
177 |     Z.spline <- as.list(rep(NA, n.specials))
178 |     Z.spline.size <- rep(NA, n.specials)
179 |     Z.spline.attr <- object$internal_parameters$spline$attr
180 |     
181 |     special_counter <- 1
182 |     store_spline_type <- rep(NA, n.specials)
183 |     for (i in 1:base_specials){
184 |       
185 |       special_i <- parse_formula$smooth.spec[[i]]
186 |       
187 |       if (special_i$type %in% c('gKRLS', 'randwalk')){
188 |         all_splines_i <- newdata[special_i$term]
189 |         special_i$fmt_term <- paste0('(', paste0(special_i$term, collapse=','), ')')
190 |       }else{
191 |         all_splines_i <- newdata[[special_i$term]]
192 |         special_i$fmt_term <- special_i$term
193 |       }
194 |       
195 |       all_splines_i <- vglmer_build_spline(x = all_splines_i, 
196 |          knots = Z.spline.attr[[i]]$knots, 
197 |          xt = Z.spline.attr[[i]]$xt,
198 |          Boundary.knots =  Z.spline.attr[[i]]$Boundary.knots,
199 |          by = newdata[[Z.spline.attr[[i]]$by]], outer_okay = TRUE,
200 |          type = Z.spline.attr[[i]]$type, override_warn = TRUE,
201 |          force_vector = TRUE)
202 |       
203 |       spline_counter <- 1
204 |       
205 |       for (spline_i in all_splines_i){
206 |         
207 |         stopifnot(spline_counter %in% 1:2)
208 |         
209 |         colnames(spline_i$x) <- paste0('spline @ ', special_i$fmt_term, ' @ ', colnames(spline_i$x))
210 |         
211 |         if (spline_counter > 1){
212 |           spline_name <- paste0('spline-',special_i$fmt_term,'-', i, '-int')
213 |         }else{
214 |           spline_name <- paste0('spline-', special_i$fmt_term, '-', i, '-base')
215 |         }
216 |         
217 |         Z.spline[[special_counter]] <- spline_i$x
218 |         Z.spline.size[special_counter] <- ncol(spline_i$x)
219 |        
220 |         names_of_RE[[spline_name]] <- spline_name
221 |         number_of_RE <- number_of_RE + 1
222 |         d_j <- setNames(c(d_j, 1), c(names(d_j), spline_name))
223 |         g_j <- setNames(c(g_j, ncol(spline_i$x)), c(names(g_j), spline_name))
224 |         breaks_for_RE <- c(breaks_for_RE, max(breaks_for_RE) + ncol(spline_i$x))
225 |         fmt_names_Z <- c(fmt_names_Z, colnames(spline_i$x))
226 |         
227 |         store_spline_type[special_counter] <- spline_counter
228 |         
229 |         spline_counter <- spline_counter + 1
230 |         special_counter <- special_counter + 1
231 |         
232 |       }
233 |       
234 |     }
235 | 
236 |     Z.spline <- drop0(do.call('cbind', Z.spline))
237 |     rownames(Z.spline) <- rownames(newdata)
238 |     
239 |     if (ncol(Z) > 0){
240 |       Z.spline <- Z.spline[match(rownames(Z), rownames(Z.spline)),, drop = FALSE]
241 |       Z <- drop0(cbind(Z, Z.spline))
242 |     }else{
243 |       Z <- Z.spline
244 |     }
245 |     
246 |     if (!isTRUE(all.equal(names_of_RE, object$internal_parameters$names_of_RE))){
247 |       stop('Names of REs do not match estimation data. This may occur when REs have to be re-named.')
248 |     }
249 |     
250 |     if (!isTRUE(identical(object$internal_parameters$spline$size[store_spline_type %in% 1], 
251 |                           Z.spline.size[store_spline_type  %in% 1]))){
252 |       stop('Misalignment of splines in prediction.')
253 |     }
254 |     if (!isTRUE(identical(names_of_RE, object$internal_parameters$names_of_RE))){
255 |       stop('Misalignment of spline names in prediction.')
256 |     }
257 |     
258 |   }else{
259 |     n.specials <- 0
260 |     Z.spline.attr <- NULL
261 |     Z.spline <- NULL
262 |     Z.spline.size <- NULL
263 |   }
264 |   
265 |   #####
266 |   ### Confirm Alignment of the Z
267 |   #####
268 |   orig_Z_names <- rownames(object$alpha$mean)
269 | 
270 |   not_in_original_Z <- setdiff(fmt_names_Z, orig_Z_names)
271 |   not_in_new_Z <- setdiff(orig_Z_names, fmt_names_Z)
272 |   
273 |   if (length(not_in_original_Z) > 0) {
274 |     if (!allow_missing_levels) {
275 |       stop("New levels not allowed unless allow_missing_levels = TRUE")
276 |     }
277 |   }
278 | 
279 |   # Select overlapping columns
280 |   in_both <- intersect(fmt_names_Z, orig_Z_names)
281 |   # Find the ones that are missing
282 |   missing_cols <- setdiff(orig_Z_names, in_both)
283 | 
284 |   recons_Z <- Z[, match(in_both, fmt_names_Z), drop = F]
285 |   if (length(missing_cols) > 0){
286 |     # Create a matrix of zeros to pad the missing columns
287 |     pad_zero <- sparseMatrix(i = 1, j = 1, x = 0, 
288 |                              dims = c(nrow(Z), length(missing_cols)))
289 |     colnames(pad_zero) <- missing_cols
290 |     # Combine and then reorder to be lined-up correctly
291 |     recons_Z <- cbind(recons_Z, pad_zero)
292 |   }
293 |   recons_Z <- recons_Z[, match(orig_Z_names, colnames(recons_Z)), drop = F]
294 |     
295 |   # Old method for prediction
296 |   # in_both <- intersect(fmt_names_Z, orig_Z_names)
297 |   # recons_Z <- drop0(sparseMatrix(i = 1, j = 1, x = 0, dims = c(nrow(Z), length(orig_Z_names))))
298 |   # colnames(recons_Z) <- orig_Z_names
299 |   # rownames(recons_Z) <- rownames_Z
300 |   # recons_Z[, match(in_both, orig_Z_names)] <- Z[, match(in_both, fmt_names_Z)]
301 |   
302 |   # Check that the entirely missing columns match those not in the original
303 |   checksum_align <- setdiff(not_in_new_Z,
304 |     sort(names(which(colSums(recons_Z != 0) == 0))))
305 |   if (length(checksum_align) > 0) {
306 |     stop("Alignment Error")
307 |   }
308 |   
309 |   Z <- recons_Z
310 |   rm(recons_Z); gc()
311 | 
312 |   ####
313 |   
314 |   total_obs <- rownames(newdata)
315 |   obs_in_both <- intersect(rownames(X), rownames(Z))
316 | 
317 |   if (type == 'terms'){
318 |     if (samples != 0){stop('"terms" only enabled for samples=0.')}
319 |     # Calculate the linear predictor separately for each random effect
320 |     # (and fixed effects) and report a matrix of those predictions.
321 |     
322 |     X <- X[match(obs_in_both, rownames(X)), , drop = F]
323 |     Z <- Z[match(obs_in_both, rownames(Z)), , drop = F]
324 |     lp_FE <- as.vector(X %*% object$beta$mean)
325 |     vi_alpha_mean <- object$alpha$mean
326 |     lp_terms <- lapply(object$internal_parameters$cyclical_pos, FUN=function(i){
327 |       as.vector(Z[,i,drop=F] %*% vi_alpha_mean[i,drop=F])
328 |     })
329 |     lp_terms <- do.call('cbind', lp_terms)
330 |     colnames(lp_terms) <- names(object$internal_parameters$names_of_RE)
331 |     lp_terms <- cbind('FE' = lp_FE, lp_terms)
332 |     lp_terms <- lp_terms[match(total_obs, obs_in_both), , drop = F]
333 |     gc()
334 |     return(lp_terms)
335 |     
336 |   }else{
337 |     XZ <- cbind(
338 |       X[match(obs_in_both, rownames(X)), , drop = F],
339 |       Z[match(obs_in_both, rownames(Z)), , drop = F]
340 |     )
341 |   }
342 |   gc()
343 |   
344 |   factorization_method <- object$control$factorization_method
345 |   if (is.matrix(samples)) {
346 |     if (ncol(samples) != ncol(XZ)) {
347 |       stop("Samples must be {m, ncol(Z) + ncol(X)}")
348 |     }
349 |     samples <- t(samples)
350 |     only.lp <- FALSE
351 |   } else {
352 |     if (samples == 0) {
353 |       only.lp <- TRUE
354 |     } else {
355 |       only.lp <- FALSE
356 |     }
357 |     if (factorization_method %in% c("strong", "partial")) {
358 |       vi_alpha_mean <- object$alpha$mean
359 |       vi_alpha_decomp <- object$alpha$decomp_var
360 | 
361 |       p.Z <- nrow(vi_alpha_mean)
362 | 
363 |       vi_beta_mean <- object$beta$mean
364 |       vi_beta_decomp <- object$beta$decomp_var
365 | 
366 |       p.X <- nrow(vi_beta_mean)
367 | 
368 |       if (!only.lp) {
369 |         sim_init_alpha <- matrix(rnorm(samples * p.Z), ncol = samples)
370 |         sim_init_alpha <- t(vi_alpha_decomp) %*% sim_init_alpha
371 |         sim_init_alpha <- sim_init_alpha + kronecker(vi_alpha_mean, t(matrix(1, samples)))
372 | 
373 |         sim_init_beta <- matrix(rnorm(samples * p.X), ncol = samples)
374 |         sim_init_beta <- t(vi_beta_decomp) %*% sim_init_beta
375 |         sim_init_beta <- sim_init_beta + kronecker(vi_beta_mean, t(matrix(1, samples)))
376 |       } else {
377 |         sim_init_alpha <- vi_alpha_mean
378 |         sim_init_beta <- vi_beta_mean
379 |       }
380 |     } else if (factorization_method == "weak") {
381 |       vi_alpha_mean <- object$alpha$mean
382 |       p.Z <- nrow(vi_alpha_mean)
383 | 
384 |       vi_beta_mean <- object$beta$mean
385 |       p.X <- nrow(vi_beta_mean)
386 | 
387 |       if (!only.lp) {
388 |         vi_joint_decomp <- object$joint$decomp_var
389 |         sim_init_joint <- matrix(rnorm(samples * (p.X + p.Z)), ncol = samples)
390 |         sim_init_joint <- t(vi_joint_decomp) %*% sim_init_joint
391 | 
392 |         sim_init_beta <- sim_init_joint[1:p.X, , drop = F]
393 |         sim_init_alpha <- sim_init_joint[-1:-p.X, , drop = F]
394 | 
395 |         rm(sim_init_joint)
396 | 
397 |         sim_init_alpha <- sim_init_alpha + kronecker(vi_alpha_mean, t(matrix(1, samples)))
398 |         sim_init_beta <- sim_init_beta + kronecker(vi_beta_mean, t(matrix(1, samples)))
399 |       } else {
400 |         sim_init_alpha <- vi_alpha_mean
401 |         sim_init_beta <- vi_beta_mean
402 |       }
403 |     } else {
404 |       stop("")
405 |     }
406 | 
407 |     samples <- rbind(sim_init_beta, sim_init_alpha)
408 |     rm(sim_init_beta, sim_init_alpha)
409 |   }
410 | 
411 |   if (samples_only) {
412 |     return(t(samples))
413 |   }
414 | 
415 |   lp <- XZ %*% samples
416 |   if (summary) {
417 |     if (!only.lp) {
418 |       lp <- t(apply(lp, MARGIN = 1, FUN = function(i) {
419 |         c(mean(i), var(i))
420 |       }))
421 |       lp <- data.frame(mean = lp[, 1], var = lp[, 2])
422 |       lp <- lp[match(total_obs, obs_in_both), ]
423 |       rownames(lp) <- NULL
424 |     } else {
425 |       
426 |       if (ncol(lp) != 1){
427 |         lp <- as.vector(lp)
428 |       }else{
429 |         lp <- as.vector(t(apply(lp, MARGIN = 1, FUN = function(i) {
430 |           mean(i)
431 |         })))
432 |       }
433 |       lp <- lp[match(total_obs, obs_in_both)]
434 |       rownames(lp) <- NULL
435 |     }
436 |     return(lp)
437 |   } else {
438 |     lp <- lp[match(total_obs, obs_in_both), , drop = F]
439 |     rownames(lp) <- NULL
440 |     return(t(lp))
441 |   }
442 | }
443 | 
444 | 
445 | #' @inheritParams MAVB
446 | #' @inheritParams vglmer_predict
447 | #' @rdname vglmer_predict
448 | #' @export
449 | predict_MAVB <- function(object, newdata, samples = 0, samples_only = FALSE,
450 |                          var_px = Inf, summary = TRUE, allow_missing_levels = FALSE) {
451 |   pxSamples <- MAVB(object = object, samples = samples, var_px = var_px)
452 |   lp <- predict.vglmer(object,
453 |                        newdata = newdata, samples = pxSamples, samples_only = samples_only,
454 |                        summary = summary, allow_missing_levels = allow_missing_levels
455 |   )
456 |   return(lp)
457 | }
458 | 


--------------------------------------------------------------------------------
/R/print_functions.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' Generic Functions after Running vglmer
  3 | #'
  4 | #' \code{vglmer} uses many standard methods from \code{lm} and \code{lme4} with
  5 | #' limited changes. These provide summaries of the estimated variational
  6 | #' distributions.
  7 | #'
  8 | #' @details The accompanying functions are briefly described below. 
  9 | #' 
 10 | #' \code{coef} and \code{vcov} return the mean and variance of the fixed effects
 11 | #' (\eqn{\beta}). \code{fixef} returns the mean of the fixed effects.
 12 | #'
 13 | #' \code{ranef} extracts the random effects (\eqn{\alpha}) in a similar,
 14 | #' although slightly different format, to \code{lme4}. It includes the estimated
 15 | #' posterior mean and variance in a list of data.frames with one entry per
 16 | #' random effect \eqn{j}.
 17 | #' 
 18 | #' \code{fitted} extracts the estimated expected \emph{linear predictor}, i.e.
 19 | #' \eqn{E_{q(\theta)}[x_i^T \beta + z_i^T \alpha]} at convergence.
 20 | #' 
 21 | #' \code{summary} reports the estimates for all fixed effects as in \code{lm} as
 22 | #' well as some summaries of the random effects (if \code{display_re=TRUE}).
 23 | #' 
 24 | #' \code{format_vglmer} collects the mean and variance of the fixed and random
 25 | #' effects into a single data.frame. This is useful for examining all of the
 26 | #' posterior estimates simultaneously. \code{format_glmer} converts an object
 27 | #' estimated with \code{[g]lmer} into a comparable format.
 28 | #'
 29 | #' \code{ELBO} extracts the ELBO from the estimated model. \code{type} can be
 30 | #' set equal to \code{"trajectory"} to get the estimated ELBO at each iteration
 31 | #' and assess convergence.
 32 | #'   
 33 | #' \code{sigma} extracts the square root of the posterior mode of
 34 | #' \eqn{q(\sigma^2)} if a linear model is used.
 35 | #' 
 36 | #' \code{formula} extracts the formula associated with the \code{vglmer} object.
 37 | #' By default, it returns the formula provided. The fixed and random effects
 38 | #' portions can be extracted separately using the \code{form} argument.
 39 | #' 
 40 | #' @name vglmer-class
 41 | #' @param object Model fit using vglmer
 42 | #'
 43 | #' @return The functions here return a variety of different objects depending on
 44 | #'   the specific function. "Details" describes the behavior of each one. Their
 45 | #'   output is similar to the typical behavior for the corresponding generic
 46 | #'   functions.
 47 | 
 48 | #' @rdname vglmer-class
 49 | #' @export
 50 | fixef.vglmer <- function(object, ...) {
 51 |   out <- object$beta$mean
 52 |   rn <- rownames(out)
 53 |   out <- as.vector(out)
 54 |   names(out) <- rn
 55 |   return(out)
 56 | }
 57 | 
 58 | # Load fixef, ranef, sigma from lme4
 59 | #' @export
 60 | lme4::fixef
 61 | #' @export
 62 | lme4::ranef
 63 | 
 64 | #' @importFrom stats sigma
 65 | 
 66 | #' @rdname vglmer-class
 67 | #' @export
 68 | sigma.vglmer <- function(object, ...){
 69 |   #{\displaystyle \frac{\sqrt{2}}{2} \left(\frac{(2m-1)\Omega}{m}\right)^{1/2}}
 70 |   
 71 |   if (object$family != 'linear'){
 72 |     stop('sigma from vglmer is only defined for linear models')
 73 |   }
 74 |   if (length(list(...)) > 0){
 75 |     stop('... not used for sigma.vglmer')
 76 |   }
 77 |   naive_sigma <- with(object$sigmasq, sqrt(b/(a+1)))
 78 |   return(naive_sigma)
 79 | }
 80 | 
 81 | #' @rdname vglmer-class
 82 | #' @export
 83 | ranef.vglmer <- function(object, ...) {
 84 |   if (length(list(...)) > 0) {
 85 |     stop("... not used for ranef.vglmer")
 86 |   }
 87 | 
 88 |   d_j <- object$internal_parameters$d_j
 89 |   g_j <- object$internal_parameters$g_j
 90 |   J <- length(d_j)
 91 | 
 92 |   vi_alpha_mean <- as.vector(object$alpha$mean)
 93 |   vi_alpha_var <- as.vector(object$alpha$dia.var)
 94 | 
 95 |   re_pos <- rep(1:J, d_j * g_j)
 96 | 
 97 |   vi_id <- gsub(rownames(object$alpha$mean), pattern = "^.* @ .* @ ", replacement = "")
 98 |   vi_id <- split(vi_id, re_pos)
 99 |   vi_alpha_mean <- split(vi_alpha_mean, re_pos)
100 |   vi_alpha_var <- split(vi_alpha_var, re_pos)
101 | 
102 |   vi_parsed <- mapply(d_j, g_j, vi_alpha_mean, vi_alpha_var, vi_id, object$internal_parameters$names_of_RE,
103 |     SIMPLIFY = F,
104 |     FUN = function(d, g, mean_j, var_j, id_j, name_j) {
105 |       mat_id <- matrix(id_j, byrow = TRUE, nrow = g, ncol = d)
106 |       mat_mean <- matrix(mean_j, byrow = TRUE, nrow = g, ncol = d)
107 |       mat_var <- matrix(var_j, byrow = TRUE, nrow = g, ncol = d)
108 |       colnames(mat_mean) <- colnames(mat_var) <- name_j
109 |       id <- mat_id[, 1]
110 |       mat_mean <- data.frame(id, mat_mean, check.names = FALSE, stringsAsFactors = F)
111 |       mat_var <- data.frame(id, mat_var, check.names = FALSE, stringsAsFactors = F)
112 |       attributes(mat_mean)$"variance" <- mat_var
113 |       return(mat_mean)
114 |     }
115 |   )
116 |   return(vi_parsed)
117 | }
118 | 
119 | #' @rdname vglmer-class
120 | #' @method coef vglmer
121 | #' @export
122 | coef.vglmer <- function(object, ...) {
123 |   if (length(list(...)) > 0) {
124 |     stop("... not used for coef.vglmer")
125 |   }
126 |   out <- as.vector(object$beta$mean)
127 |   names(out) <- rownames(object$beta$mean)
128 |   return(out)
129 | }
130 | #' @rdname vglmer-class
131 | #' @export
132 | vcov.vglmer <- function(object, ...) {
133 |   if (length(list(...)) > 0) {
134 |     stop("... not used for vcov.vglmer")
135 |   }
136 |   return(as.matrix(object$beta$var))
137 | }
138 | 
139 | #' @rdname vglmer-class
140 | #' @method fitted vglmer
141 | #' @export
142 | fitted.vglmer <- function(object, ...){
143 |   if (length(list(...)) > 0) {
144 |     stop("... not used for vcov.vglmer")
145 |   }
146 |   return(object$internal_parameters$lp)
147 | }
148 | 
149 | #' @rdname vglmer-class
150 | #' @param x Model fit using \code{vglmer}.
151 | #' @param ... Not used; included to maintain compatibility with existing
152 | #'   methods.
153 | #' @method print vglmer
154 | #' @export
155 | print.vglmer <- function(x, ...) {
156 |   if (length(list(...)) > 0) {
157 |     "print.vglmer does not use ..."
158 |   }
159 |   N_obs <- x$internal_parameters$N
160 |   missing_obs <- x$internal_parameters$missing_obs
161 |   it_used <- x$internal_parameters$it_used
162 |   it_max <- x$internal_parameters$it_max
163 |   final_param_change <- round(max(x$internal_parameters$parameter.change), 6)
164 |   final_ELBO_change <- round(tail(diff(x$ELBO_trajectory$ELBO), 1), 8)
165 |   converged <- it_max != it_used
166 |   p.X <- nrow(x$beta$mean)
167 |   p.Z <- nrow(x$alpha$mean)
168 |   J <- length(x$sigma$cov)
169 | 
170 |   cat(paste0("Formula: J = ", J, ", |Z| = ", p.Z, ", |X| = ", p.X, "\n\n"))
171 |   cat(paste(format(formula(x, form = 'original')), collapse = "\n\n"))
172 |   cat("\n\n")
173 |   if (missing_obs > 0) {
174 |     missing_info <- paste0(" after ", missing_obs, " deleted because of missing data and")
175 |   } else {
176 |     missing_info <- " and"
177 |   }
178 |   cat(paste0("Model fit with ", N_obs, " observations", missing_info))
179 |   if (converged) {
180 |     cat(paste0(" converged after ", it_used, " iterations."))
181 |   } else {
182 |     cat(paste0(" *failed* to converge after ", it_max, " iterations."))
183 |   }
184 |   cat("\n\n")
185 |   cat(paste0("ELBO: ", round(x$ELBO[1], 2), "\n\n"))
186 |   cat(paste0("Factorization Method: ", x$control$factorization_method, "\n"))
187 |   cat(paste0("Parameter Expansion: ", x$control$parameter_expansion, "\n\n"))
188 |   cat(paste0("Largest Parameter Change at Convergence: ", formatC(final_param_change, format = "e", digits = 2), "\n"))
189 |   cat(paste0("ELBO Change at Convergence: ", formatC(final_ELBO_change, format = "e", digits = 2), "\n"))
190 | 
191 | 
192 |   invisible(list(paramater = final_param_change, ELBO = final_ELBO_change))
193 | }
194 | 
195 | #' @rdname vglmer-class
196 | #' @param display_re Default (\code{TRUE}) prints a summary of the
197 | #'   random effects alongside the fixed effects.
198 | #' @importFrom lmtest coeftest
199 | #' @method summary vglmer
200 | #' @export
201 | summary.vglmer <- function(object, display_re = TRUE, ...) {
202 |   sum_obj <- coeftest(x = object)
203 | 
204 |   sum_sigma <- mapply(object$sigma$cov, object$sigma$df, SIMPLIFY = FALSE, FUN = function(a, b) {
205 |     fmt_IW_mean(a, b)
206 |   })
207 |   sum_sigma <- mapply(sum_sigma, object$internal_parameters$names_of_RE, SIMPLIFY = FALSE, FUN = function(i, j) {
208 |     rownames(i) <- colnames(i) <- j
209 |     return(i)
210 |   })
211 |   re_names <- names(object$internal_parameters$names_of_RE)
212 |   cat(paste0("Output from vglmer using '", object$control$factorization_method, "' factorization.\n"))
213 |   cat("\nSummary of Fixed Effects\n")
214 |   print(sum_obj)
215 |   cat("\n")
216 |   if (display_re) {
217 |     cat("Summary of Random Effects: Mean of Sigma_j (Variance)")
218 |     for (v in seq_len(length(re_names))) {
219 |       cat("\n")
220 |       cat(re_names[v])
221 |       cat("\n")
222 |       print(sum_sigma[[v]], quote = FALSE)
223 |     }
224 |     cat("\n")
225 |   }
226 |   if (object$family == "negbin") {
227 |     r_output <- object$r
228 |     # fmt_r <- function(x){formatC(x, format = 'e', digits = 2)}
229 |     fmt_r <- function(x) {
230 |       round(x, digits = 2)
231 |     }
232 |     r_ci <- exp(r_output$mu + sqrt(2 * r_output$sigma) * erfinv(c(0.05, 0.95)))
233 |     r_ci <- paste0("[", paste(fmt_r(r_ci), collapse = ", "), "]")
234 |     r_mean <- fmt_r(exp(r_output$mu + r_output$sigma / 2))
235 | 
236 |     cat("Summary of Auxiliary Parameters:\n")
237 |     cat("Dispersion Parameter r:\n")
238 |     if (object$r$method == "VI") {
239 |       cat(paste0("Mean (90% Interval): ", r_mean, " ", r_ci))
240 |     } else {
241 |       cat(paste0("Mean: ", r_mean))
242 |     }
243 |     cat("\n")
244 |     cat("\n")
245 |   }
246 |   invisible()
247 | }
248 | 
249 | #' @rdname vglmer-class
250 | #' @param form Describes the type of formula to report:
251 | #'   \code{"original"} returns the user input, \code{"fe"} returns the fixed
252 | #'   effects only, \code{"re"} returns the random effects only.
253 | #' @export
254 | formula.vglmer <- function(x, form = "original", ...) {
255 |   
256 |   if (form == 'original'){
257 |     x$formula$formula
258 |   }else if (form == 'fe'){
259 |     x$formula$fe
260 |   }else if (form == 're'){
261 |     x$formula$re
262 |   }else{stop('form must be "original", "fe", or "re".')}
263 |   
264 | }
265 | 
266 | #' @importFrom stats qnorm
267 | erfinv <- function(x) {
268 |   qnorm((1 + x) / 2) / sqrt(2)
269 | }
270 | 
271 | # Internal function to tidy-up
272 | # inverse Wishart to extract mean
273 | fmt_IW_mean <- function(Phi, nu, digits = 2) {
274 |   mean <- solve(as.matrix(Phi)) / (nu - nrow(Phi) - 1)
275 |   if (nu - nrow(Phi) - 1 < 0) {
276 |     return(matrix(NA, nrow = nrow(Phi), ncol = ncol(Phi)))
277 |   } else {
278 |     return(formatC(mean, format = "e", digits = 2))
279 |   }
280 | }
281 | 
282 | #' @rdname vglmer-class
283 | #' @export
284 | format_vglmer <- function(object) {
285 |   beta.output <- data.frame(name = rownames(object$beta$mean), mean = as.vector(object$beta$mean), var = diag(object$beta$var), stringsAsFactors = F)
286 |   alpha.output <- data.frame(name = rownames(object$alpha$mean), mean = as.vector(object$alpha$mean), var = as.vector(object$alpha$dia.var), stringsAsFactors = F)
287 |   output <- rbind(beta.output, alpha.output)
288 |   return(output)
289 | }
290 | 
291 | #' @rdname vglmer-class
292 | #' @importFrom stats vcov
293 | #' @export
294 | format_glmer <- function(object) {
295 |   
296 |   output <- do.call('rbind', mapply(ranef(object), names(ranef(object)), SIMPLIFY = FALSE, FUN = function(i,j) {
297 |     obj <- data.frame(
298 |       var = as.vector(apply(attributes(i)$postVar, MARGIN = 3, FUN = function(i) {
299 |         diag(i)
300 |       })),
301 |       mean = as.vector(t(as.matrix(i))),
302 |       name = paste0(rep(colnames(i), nrow(i)), " @ ", rep(rownames(i), each = ncol(i))), stringsAsFactors = F
303 |     )
304 |     obj[[".re"]] <- j
305 |     return(obj)
306 |   }))
307 |   output$name <- paste0(output[[".re"]], ' @ ', output[["name"]])
308 |   output_fe <- data.frame(mean = fixef(object), var = diag(stats::vcov(object)))
309 |   output_fe$name <- rownames(output_fe)
310 |   output_fe[[".re"]] <- NA
311 |   output <- rbind(output, output_fe)
312 |   output <- output[, (names(output) != ".re")]
313 |   
314 |   rownames(output) <- NULL
315 |   
316 |   return(output)
317 | }
318 | 
319 | #' @rdname vglmer-class
320 | #' @param object Model fit using \code{vglmer}.
321 | #' @param type Default (\code{"final"}) gives the ELBO at convergence.
322 | #'   \code{"trajectory"} gives the ELBO estimated at each iteration. This is
323 | #'   used to assess model convergence.
324 | #' @export
325 | ELBO <- function(object, type = c('final', 'trajectory')){
326 |   
327 |   type <- match.arg(type)
328 | 
329 |   if (type == 'final'){
330 |     object$ELBO$ELBO
331 |   }else{
332 |     object$ELBO_trajectory$ELBO
333 |   }
334 |   
335 | }
336 | 


--------------------------------------------------------------------------------
/R/squarem_functions.R:
--------------------------------------------------------------------------------
 1 | 
 2 | prep_lu <- function(M){
 3 |  fact_lu <- expand(Matrix::lu(M))
 4 |  if (is.null(fact_lu$Q)){
 5 |    fact_lu$Q <- Diagonal(n = ncol(fact_lu$U))
 6 |  }
 7 |  fact_lu$L <- drop0(fact_lu$L)
 8 |  fact_lu$U <- drop0(fact_lu$U)
 9 |  return(fact_lu)
10 | }
11 | unprep_lu <- function(M){
12 |   recons_M <- t(M$P) %*% M$L %*% M$U %*% M$Q
13 |   # recons_M <- t(M$P) %*% drop0(zapsmall(M$L %*% M$U, 15)) %*% M$Q
14 |   logdet_M <- 2 * sum(log(abs(diag(M$U))))
15 |   return(list(M = recons_M, logdet_M = logdet_M, diag_U = diag(M$U)))
16 | }
17 | 
18 | prep_cholesky <- function(L){
19 |   diag(L) <- log(diag(L))
20 |   return(L)
21 | }
22 | unprep_cholesky <- function(L){
23 |   diag(L) <- exp(diag(L))
24 |   return(L)
25 | }
26 | prep_matrix <- function(M){drop0(chol(as.matrix(M)))}
27 | unprep_matrix <- function(M){t(M) %*% M}
28 | 
29 | prep_positive <- function(x){log(x)}
30 | unprep_positive <- function(x){exp(x)}
31 | 
32 | squarem_prep_function <- function(x, type){
33 |   if (type == 'real'){
34 |     x
35 |   }else if (type == 'lu'){
36 |     prep_lu(x)
37 |   }else if (type == 'cholesky'){
38 |     prep_cholesky(x)
39 |   }else if (type == 'matrix'){
40 |     prep_matrix(x)
41 |   }else if (type == 'positive'){
42 |     prep_positive(x)
43 |   }else{stop('Invalid type')}
44 | }
45 | 
46 | squarem_unprep_function <- function(x, type){
47 |   if (type == 'real'){
48 |     x
49 |   }else if (type == 'lu'){
50 |     unprep_lu(x)
51 |   }else if (type == 'cholesky'){
52 |     unprep_cholesky(x)
53 |   }else if (type == 'matrix'){
54 |     unprep_matrix(x)
55 |   }else if (type == 'positive'){
56 |     unprep_positive(x)
57 |   }else{stop('Invalid type')}
58 | }
59 | 


--------------------------------------------------------------------------------
/R/superlearner_functions.R:
--------------------------------------------------------------------------------
  1 | #' SuperLearner with (Variational) Hierarchical Models
  2 | #' 
  3 | #' These functions integrate \code{vglmer} (or \code{glmer}) into
  4 | #' \code{SuperLearner}. Most of the arguments are standard for
  5 | #' \code{SuperLearner} functions.
  6 | #'
  7 | #' @param Y From \code{SuperLearner}: The outcome in the training data set.
  8 | #' @param X From \code{SuperLearner}: The predictor variables in the training
  9 | #'   data.
 10 | #' @param newX From \code{SuperLearner}: The predictor variables in validation
 11 | #'   data.
 12 | #' @param formula The formula used for estimation.
 13 | #' @param family From \code{SuperLearner}: Currently allows \code{gaussian} or
 14 | #'   \code{binomial}.
 15 | #' @param id From \code{SuperLearner}: Optional cluster identification variable.
 16 | #'   See \code{SuperLearner} for more details.
 17 | #' @param obsWeights From \code{SuperLearner}: Weights for each observation. Not
 18 | #'   permitted for \code{SL.vglmer}.
 19 | #' @param control Control object for estimating \code{vglmer} (e.g.,
 20 | #'   \link{vglmer_control}) or \code{[g]lmer}.
 21 | #' @param object Used in \code{predict} for \code{SL.glmer} and
 22 | #'   \code{SL.vglmer}. A model estimated using either \code{SL.vglmer} or
 23 | #'   \code{SL.glmer}.
 24 | #' @param ... Not used; included to maintain compatibility with existing
 25 | #'   methods.
 26 | #' @param learner Character name of model from \code{SuperLearner}. See
 27 | #'   "Details" for how this is used.
 28 | #' @param env Environment to assign model. See "Details" for how this is used.
 29 | #' @name sl_vglmer
 30 | #' 
 31 | #' @details This documentation describes two types of function. 
 32 | #' 
 33 | #'   \bold{Estimating Hierarchical Models in SuperLearner}: Two methods for
 34 | #'   estimating hierarchical models are provided one for variational methods
 35 | #'   (\code{SL.vglmer}) and one for non-variational methods (\code{SL.glmer}).
 36 | #'   The accompanying prediction functions are also provided.
 37 | #' 
 38 | #'   \bold{Formula with SuperLearner}: The \code{vglmer} package provides a way
 39 | #'   to estimate models that require or use a formula with \code{SuperLearner}.
 40 | #'   This allows for a design to be passed that contains variables that are
 41 | #'   \emph{not} used in estimation. This can be used as follows (see
 42 | #'   "Examples"). One calls the function \code{add_formula_SL} around the quoted
 43 | #'   name of a \code{SuperLearner} model, e.g. \code{add_formula_SL(learner =
 44 | #'   "SL.knn")}. This creates a new model and predict function with the suffix
 45 | #'   \code{"_f"}. This \bold{requires} a formula to be provided for estimation.
 46 | #'   
 47 | #'   With this in hand, \code{"SL.knn_f"} can be passed to \code{SuperLearner} with the
 48 | #'   accompanying formula argument and thus one can compare models with
 49 | #'   different formula or design on the same ensemble. The \code{env} argument
 50 | #'   may need to be manually specified to ensure the created functions can be
 51 | #'   called by \code{SuperLearner}.
 52 | #' 
 53 | #' @return The functions here return different types of output. \code{SL.vglmer}
 54 | #'   and \code{SL.glmer} return fitted models with the in-sample predictions as
 55 | #'   standard for \code{SuperLearner}. The \code{predict} methods return vectors
 56 | #'   of predicted values. \code{add_formula_SL} creates two objects in the
 57 | #'   environment (one for estimation \code{model_f} and one for prediction
 58 | #'   \code{predict.model_f}) used for \code{SuperLearner}.
 59 | #' @examples
 60 | #' 
 61 | #' set.seed(456)
 62 | #' 
 63 | #' if (requireNamespace('SuperLearner', quietly = TRUE)){
 64 | #' require(SuperLearner)
 65 | #' sim_data <- data.frame(
 66 | #'   x = rnorm(100),
 67 | #'   g = sample(letters, 100, replace = TRUE)
 68 | #' )
 69 | #' sim_data$y <- rbinom(nrow(sim_data), 
 70 | #'   1, plogis(runif(26)[match(sim_data$g, letters)]))
 71 | #' sim_data$g <- factor(sim_data$g)
 72 | #' sl_vglmer <- function(...){SL.vglmer(..., formula = y ~ x + (1 | g))}
 73 | #' SL.glm <- SuperLearner::SL.glm
 74 | #' add_formula_SL('SL.glm')
 75 | #' sl_glm_form <- function(...){SL.glm_f(..., formula = ~ x)}
 76 | #  
 77 | #' \donttest{
 78 | #'    SuperLearner::SuperLearner(
 79 | #'      Y = sim_data$y, family = 'binomial',
 80 | #'      X = sim_data[, c('x', 'g')],
 81 | #'      cvControl = list(V = 2),
 82 | #'      SL.library = c('sl_vglmer', 'sl_glm_form')
 83 | #'    )
 84 | #' }
 85 | #' }
 86 | #' @export
 87 | SL.vglmer <- function(Y, X, newX, formula, family, id, obsWeights, control = vglmer_control()) {
 88 |   if(!requireNamespace('vglmer', quietly = FALSE)) {stop("SL.vglmer requires the vglmer package, but it isn't available")} 
 89 |   
 90 |   if (is.character(formula)){
 91 |     formula <- as.formula(formula)
 92 |   }
 93 |   # https://stackoverflow.com/questions/13217322/how-to-reliably-get-dependent-variable-name-from-formula-object
 94 |   getResponseFromFormula = function(formula) {
 95 |     if (attr(terms(as.formula(formula))    , which = 'response'))
 96 |       all.vars(formula)[1]
 97 |     else
 98 |       NULL
 99 |   }
100 |   rformula <- getResponseFromFormula(formula)
101 |   if (!is.null(rformula)){
102 |     if (rformula %in% names(X)){
103 |       warning(paste0('Outcome "', rformula, '" seems to be in "X". This is likely ill-advised'))
104 |     }
105 |   }
106 |   if ('...Y' %in% names(X)){
107 |     stop('SL.vglmer cannot accept a column in "X" called "...Y". Please rename.')
108 |   }
109 |   if (!all(obsWeights == 1)){
110 |     warning('SL.vglmer does not use weights')
111 |   }
112 |   if (family$family == 'binomial'){
113 |     family <- 'binomial'
114 |   }else if (family$family == 'gaussian'){
115 |     family <- 'linear'
116 |   }else{stop('Family must be binomial or Gaussian for SL.vglmer.')}
117 |   X[['...Y']] <- Y
118 |   formula <- update.formula(formula, '`...Y` ~ .')
119 |   
120 |   fit.vglmer <- vglmer::vglmer(formula, data = X, family = family, control = control)
121 |   pred <- predict(fit.vglmer, newdata = newX, allow_missing_levels = TRUE)
122 |   if (family == 'binomial'){
123 |     pred <- plogis(pred)
124 |   }else if (family != 'linear'){
125 |     stop('SuperLearner not set up for non-linear, non-binomial families.')
126 |   }
127 |   fit <- list(object = fit.vglmer)
128 |   out <- list(pred = pred, fit = fit)
129 |   class(out$fit) <- c("SL.vglmer")
130 |   return(out)
131 | }
132 | 
133 | #' @rdname sl_vglmer
134 | #' @param newdata Dataset to use for predictions.
135 | #' @param allow_missing_levels Default (\code{TRUE}) allows prediction for
136 | #'   levels not observed in the estimation data; the value of \code{0} (with no
137 | #'   uncertainty) is used for the corresponding random effect. \bold{Note:} This
138 | #'   default differs from \code{predict.vglmer}.
139 | #' @export
140 | predict.SL.vglmer <- function(object, newdata, allow_missing_levels = TRUE, ...){
141 |   if(!requireNamespace('vglmer', quietly = FALSE)) {stop("SL.vglmer requires the vglmer package, but it isn't available")} 
142 |   
143 |   pred <- predict(object$object, newdata = newdata, allow_missing_levels = allow_missing_levels)
144 |   if (object$object$family == 'binomial'){
145 |     pred <- plogis(pred)
146 |   }else if (object$object$family != 'linear'){
147 |     stop('SuperLearner not set up for non-linear, non-binomial families.')
148 |   }
149 |   return(pred)
150 | }
151 | 
152 | #' @importFrom stats predict
153 | #' @rdname sl_vglmer
154 | #' @export
155 | SL.glmer <- function(Y, X, newX, formula, family, id, obsWeights, control = NULL) {
156 |   if(!requireNamespace('lme4', quietly = FALSE)) {stop("SL.glmer requires the lme4 package, but it isn't available")} 
157 |   
158 |   if (is.character(formula)){
159 |     formula <- as.formula(formula)
160 |   }
161 |   # https://stackoverflow.com/questions/13217322/how-to-reliably-get-dependent-variable-name-from-formula-object
162 |   getResponseFromFormula = function(formula) {
163 |     if (attr(terms(as.formula(formula))    , which = 'response'))
164 |       all.vars(formula)[1]
165 |     else
166 |       NULL
167 |   }
168 |   rformula <- getResponseFromFormula(formula)
169 |   if (!is.null(rformula)){
170 |     if (rformula %in% names(X)){
171 |       warning(paste0('Outcome "', rformula, '" seems to be in "X". This is likely ill-advised'))
172 |     }
173 |   }
174 |   if ('...Y' %in% names(X)){
175 |     stop('SL.glmer cannot accept a column in "X" called "...Y". Please rename.')
176 |   }
177 |   X[['...Y']] <- Y
178 |   formula <- update.formula(formula, '`...Y` ~ .')
179 |   environment(formula) <- environment()
180 |   
181 |   if (family$family == 'gaussian'){
182 |     if (is.null(control)){
183 |       control <- lmerControl()
184 |     }
185 |     fit.glmer <- lme4::lmer(formula, data = X, weights = obsWeights, control = control)
186 |   }else{
187 |     if (is.null(control)){
188 |       control <- glmerControl()
189 |     }
190 |     fit.glmer <- lme4::glmer(formula, data = X, weights = obsWeights, family = family, control = control)
191 |   }
192 |   pred <- stats::predict(fit.glmer, newdata = newX, allow.new.levels = TRUE, type = 'response')
193 |   fit <- list(object = fit.glmer)
194 |   out <- list(pred = pred, fit = fit)
195 |   class(out$fit) <- c("SL.glmer")
196 |   return(out)
197 | }
198 | 
199 | #' @rdname sl_vglmer
200 | #' @param allow.new.levels From \code{lme4}: Allow levels in prediction that are
201 | #'   not in the training data. Default is \code{TRUE} for \code{SuperLearner}.
202 | #' @export
203 | predict.SL.glmer <- function(object, newdata, allow.new.levels = TRUE, ...){
204 |   if(!requireNamespace('lme4', quietly = FALSE)) {stop("SL.glmer requires the lme4 package, but it isn't available")} 
205 |   
206 |   pred <- predict(object$object, newdata = newdata, allow.new.levels = allow.new.levels, type = 'response')
207 |   return(pred)
208 | }
209 | 
210 | #' @rdname sl_vglmer
211 | #' @export
212 | add_formula_SL <- function(learner, env = parent.frame()){
213 | 
214 |   base_learner <- get(learner, envir = env)
215 |   base_learner_predict <- get(paste0('predict.', learner), envir = env)
216 |   # Add an argument for "formula"
217 |   f_formals <- c(alist(formula = ), formals(base_learner, envir = env))
218 |   f_formals_predict <- c(formals(base_learner_predict, envir = env))
219 |   # Use model.matrix formula *first*
220 |   
221 |   # Placeholder to pass CRAN checks
222 |   object <- newdata <- X <- newX <- NULL
223 |   
224 |   f_learner <- function(formula, ...){
225 |     args <- mget(ls())
226 |     args$X <- model.frame(as.formula(formula), X)
227 |     args$newX <- model.frame(as.formula(formula), newX)
228 |     args$formula <- NULL
229 |     out <- do.call("base_learner", args)
230 |     out$fit$SL_formula <- formula
231 |     class(out$fit) <- 'base_learner_f'
232 |     return(out)
233 |   }
234 |   f_learner <- deparse(f_learner)
235 |   f_learner <- eval(parse(text = paste(gsub(f_learner, pattern='base_learner', replacement = learner), collapse = '\n')))
236 |   formals(f_learner) <- f_formals
237 |   
238 |   f_learner_predict <- function(...){
239 |     args <- mget(ls())
240 |     args$newdata <- model.frame(as.formula(object$SL_formula), newdata)
241 |     args$formula <- NULL
242 |     out <- do.call("predict.base_learner", args)
243 |     return(out)
244 |   }
245 |   f_learner_predict <- deparse(f_learner_predict)
246 |   f_learner_predict <- eval(parse(text = paste(gsub(f_learner_predict, pattern='base_learner', replacement = learner), collapse = '\n')))
247 |   formals(f_learner_predict) <- f_formals_predict
248 |   
249 |   assign(x = paste0(learner, '_f'), value = f_learner, envir = env)
250 |   assign(x = paste0('predict.', learner, '_f'), value = f_learner_predict, envir = env)
251 |   return(paste0(learner, '_f'))
252 | }
253 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # vglmer: Variational Generalized Linear Mixed Effects Regression   
 2 | [![CRAN status](https://www.r-pkg.org/badges/version/vglmer)](https://CRAN.R-project.org/package=vglmer) [![R-CMD-check](https://github.com/mgoplerud/vglmer/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/mgoplerud/vglmer/actions/workflows/R-CMD-check.yaml) [![codecov](https://codecov.io/gh/mgoplerud/vglmer/branch/master/graph/badge.svg?token=L8C4260BUW)](https://app.codecov.io/gh/mgoplerud/vglmer)
 3 | 
 4 | A package to estimate non-linear hierarchical models using the variational algorithms described in [Goplerud (2022)](https://doi.org/10.1214/21-BA1266) and in [Goplerud (2024)](https://doi.org/10.1017/S0003055423000035). It also provides the option to improve an initial approximation using marginally augmented variational Bayes (MAVB) also described in [Goplerud (2022)](https://doi.org/10.1214/21-BA1266). It can be installed from CRAN or the most-to-update version can be installed using `devtools`.  
 5 | 
 6 | ```
 7 | # CRAN
 8 | install.packages("vglmer")
 9 | # Up-to-Date GitHub Version
10 | library(devtools)
11 | devtools::install_github("mgoplerud/vglmer", dependencies = TRUE)
12 | ```
13 | 
14 | If you are interested in using partially factorized variational inference [(Goplerud, Papaspiliopoulos, and Zanella 2023)](https://arxiv.org/abs/2312.13148), please switch to the `collapsed` branch and install that version of the package. There are some important differences with this main branch, especially in terms of some `vglmer_control` naming conventions. This branch will be eventually integrated into the main package.
15 | 
16 | At present, `vglmer` can fit logistic, linear, and negative binomial outcomes with an arbitrary number of random effects. Details on negative binomial inference can be found [here](https://github.com/mgoplerud/vglmer/blob/master/.github/model_addendum.pdf) and are more experimental at the moment.
17 | 
18 | This package accepts "standard" glmer syntax of the form:
19 | 
20 | ```
21 | vglmer(formula = y ~ x + (x | g), data = data, family = 'binomial')
22 | ```
23 | 
24 | Splines can be estimated using `v_s(x)`, similar to the functionality in `mgcv`, although with many fewer options.
25 | 
26 | ```
27 | vglmer(formula = y ~ v_s(x) + (x | g), data = data, family = 'binomial')
28 | ```
29 | 
30 | Many standard methods from `lme4` work, e.g. `fixef`, `coef`, `vcov`, `ranef`, `predict`. Use `format_vglmer` to parse all parameters into a single data.frame. Estimation can be controlled via the numerous arguments to `control` using `vglmer_control`. At the moment, Schemes I, II, and III in Goplerud (2022) correspond to `strong`, `partial`, and `weak`. The default is `strong` which correspond to the strongest (worst) approximation. If the variance of the parameters is of interest, then `weak` will return better results.
31 | 
32 | Please make an issue on GitHub with any concerns you have.
33 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |     patch:
10 |       default:
11 |         target: auto
12 |         threshold: 1%
13 | 
14 | codecov:
15 |   token: b21bd71f-359a-4f1a-9aee-02b676659301
16 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Resubmission
 2 | 
 3 | This is an update on vglmer 1.0.5 that adds some additional, minor,
 4 | functionalities. It considerably improves the speed of
 5 | `parameter_expansion="translation"` for `factorization_method="strong"` in large
 6 | models by speeding up the model preparation.
 7 | 
 8 | It also addresses a failure in the tests from updates to `testthat` and `waldo`.
 9 | 
10 | ## R CMD check results
11 | 
12 | There were no ERRORs, WARNINGs or NOTES. 
13 | 
14 | ## Downstream dependencies
15 | 
16 | I have checked the downstream dependencies using
17 | https://github.com/r-devel/recheck and all pass R CMD CHECK.


--------------------------------------------------------------------------------
/man/LinRegChol.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{LinRegChol}
 4 | \alias{LinRegChol}
 5 | \title{Linear Regression by Cholesky}
 6 | \usage{
 7 | LinRegChol(X, omega, prior_precision, y, save_chol = TRUE)
 8 | }
 9 | \arguments{
10 | \item{X}{Design Matrix}
11 | 
12 | \item{omega}{Polya-Gamma weights}
13 | 
14 | \item{prior_precision}{Prior Precision for Regression}
15 | 
16 | \item{y}{Outcome}
17 | 
18 | \item{save_chol}{Save cholesky factor}
19 | }
20 | \description{
21 | Do linear regression of form solve(X^T O X + P, X^T y) where O is omega, P
22 | is precision.
23 | }
24 | \keyword{internal}
25 | 


--------------------------------------------------------------------------------
/man/MAVB.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MAVB_functions.R
 3 | \name{MAVB}
 4 | \alias{MAVB}
 5 | \title{Perform MAVB after fitting vglmer}
 6 | \usage{
 7 | MAVB(object, samples, verbose = FALSE, var_px = Inf)
 8 | }
 9 | \arguments{
10 | \item{object}{Model fit using \code{vglmer}.}
11 | 
12 | \item{samples}{Number of samples to draw.}
13 | 
14 | \item{verbose}{Show progress in drawing samples.}
15 | 
16 | \item{var_px}{Variance of working prior for marginal augmentation. Default
17 | (\code{Inf}) is a flat, improper, prior.}
18 | }
19 | \value{
20 | This function returns a matrix with \code{samples} rows and columns
21 |   for each fixed and random effect.
22 | }
23 | \description{
24 | Given a model estimated using \code{vglmer}, this function
25 |   performs marginally augmented variational Bayes (MAVB) to improve the
26 |   approximation quality.
27 | }
28 | \details{
29 | This function returns the improved estimates of the
30 |   \emph{parameters}. To use MAVB when generating predictions, one should use
31 |   \link{predict_MAVB}. At present, MAVB is only enabled for binomial models.
32 | }
33 | \references{
34 | Goplerud, Max. 2022. "Fast and Accurate Estimation of Non-Nested Binomial
35 | Hierarchical Models Using Variational Inference." \emph{Bayesian Analysis}. 17(2):
36 | 623-650.
37 | }
38 | 


--------------------------------------------------------------------------------
/man/Predict.matrix.randwalk.smooth.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mgcv_functions.R
 3 | \name{Predict.matrix.randwalk.smooth}
 4 | \alias{Predict.matrix.randwalk.smooth}
 5 | \title{Predict Methods for random walk smooth}
 6 | \usage{
 7 | \method{Predict.matrix}{randwalk.smooth}(object, data)
 8 | }
 9 | \arguments{
10 | \item{object}{a smooth object; see documentation for other methods in
11 | \code{mgcv}.}
12 | 
13 | \item{data}{a data.frame; see documentation for other methods in
14 | \code{mgcv}.}
15 | }
16 | \description{
17 | Predict Methods for random walk smooth
18 | }
19 | \keyword{internal}
20 | 


--------------------------------------------------------------------------------
/man/custom_glmer_samples.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MAVB_functions.R
 3 | \name{custom_glmer_samples}
 4 | \alias{custom_glmer_samples}
 5 | \title{Get samples from GLMER}
 6 | \usage{
 7 | custom_glmer_samples(glmer, samples, ordering)
 8 | }
 9 | \arguments{
10 | \item{glmer}{object fitted using glmer}
11 | 
12 | \item{samples}{number of samples to draw}
13 | 
14 | \item{ordering}{order of output}
15 | }
16 | \description{
17 | Order samples from glmer to match names from vglmer.
18 | }
19 | \keyword{internal}
20 | 


--------------------------------------------------------------------------------
/man/fallback_interpret.gam0.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/spline_functions.R
 3 | \name{fallback_interpret.gam0}
 4 | \alias{fallback_interpret.gam0}
 5 | \title{Interpret a vglmer formula for splines}
 6 | \usage{
 7 | fallback_interpret.gam0(gf, textra = NULL, extra.special = NULL)
 8 | }
 9 | \arguments{
10 | \item{gf}{A vglmer formula}
11 | 
12 | \item{textra}{Unused internal argument}
13 | 
14 | \item{extra.special}{Allow extra special terms to be passed}
15 | }
16 | \description{
17 | A modified version of interpret.gam0 from mgcv. Used when mgcv's
18 |   interpret.gam fails; usually when some environment object is passed to v_s.
19 | }
20 | \keyword{internal}
21 | 


--------------------------------------------------------------------------------
/man/fallback_subbars.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/spline_functions.R
 3 | \name{fallback_subbars}
 4 | \alias{fallback_subbars}
 5 | \title{To be used if subbars fails, usually when there is an argument to 
 6 | v_s ; adapted from lme4}
 7 | \usage{
 8 | fallback_subbars(term)
 9 | }
10 | \arguments{
11 | \item{term}{a formula with lme4-style syntax; see \link[lme4]{subbars}}
12 | }
13 | \description{
14 | To be used if subbars fails, usually when there is an argument to 
15 | v_s ; adapted from lme4
16 | }
17 | \keyword{internal}
18 | 


--------------------------------------------------------------------------------
/man/formOmega.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/spline_functions.R
 3 | \name{formOmega}
 4 | \alias{formOmega}
 5 | \title{Code from Wand and Ormerod (2008)
 6 | Found here here: 10.1111/j.1467-842X.2008.00507.x}
 7 | \usage{
 8 | formOmega(a, b, intKnots)
 9 | }
10 | \arguments{
11 | \item{a}{lower boundary}
12 | 
13 | \item{b}{upper boundary}
14 | 
15 | \item{intKnots}{internal knots}
16 | }
17 | \description{
18 | Code from Wand and Ormerod (2008)
19 | Found here here: 10.1111/j.1467-842X.2008.00507.x
20 | }
21 | \keyword{internal}
22 | 


--------------------------------------------------------------------------------
/man/posterior_samples.vglmer.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MAVB_functions.R
 3 | \name{posterior_samples.vglmer}
 4 | \alias{posterior_samples.vglmer}
 5 | \title{Draw samples from the variational distribution}
 6 | \usage{
 7 | posterior_samples.vglmer(object, samples, verbose = FALSE)
 8 | }
 9 | \arguments{
10 | \item{object}{Model fit using \code{vglmer}.}
11 | 
12 | \item{samples}{Number of samples to draw.}
13 | 
14 | \item{verbose}{Show progress in drawing samples.}
15 | }
16 | \value{
17 | This function returns a matrix with \code{samples} rows and columns
18 |   for each fixed and random effect.
19 | }
20 | \description{
21 | This function draws samples from the estimated variational
22 |   distributions. If using \code{MAVB} to improve the quality of the
23 |   approximating distribution, please use \link{MAVB} or \link{predict_MAVB}.
24 | }
25 | 


--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/print_functions.R
 3 | \docType{import}
 4 | \name{reexports}
 5 | \alias{reexports}
 6 | \alias{fixef}
 7 | \alias{ranef}
 8 | \title{Objects exported from other packages}
 9 | \keyword{internal}
10 | \description{
11 | These objects are imported from other packages. Follow the links
12 | below to see their documentation.
13 | 
14 | \describe{
15 |   \item{lme4}{\code{\link[lme4]{fixef}}, \code{\link[lme4]{ranef}}}
16 | }}
17 | 
18 | 


--------------------------------------------------------------------------------
/man/simple_EM.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/helper_functions.R
 3 | \name{simple_EM}
 4 | \alias{simple_EM}
 5 | \alias{EM_prelim_logit}
 6 | \alias{EM_prelim_nb}
 7 | \title{Simple EM algorithm for starting values.}
 8 | \usage{
 9 | EM_prelim_logit(X, Z, s, pg_b, iter, ridge = 2)
10 | 
11 | EM_prelim_nb(X, Z, y, est_r, iter, ridge = 2)
12 | }
13 | \arguments{
14 | \item{X}{Design matrix}
15 | 
16 | \item{Z}{RE design matrix}
17 | 
18 | \item{s}{(y_i - n_i)/2 for polya-gamma input}
19 | 
20 | \item{pg_b}{n_i as vector input}
21 | 
22 | \item{iter}{iterations}
23 | 
24 | \item{ridge}{variance of ridge prior}
25 | 
26 | \item{y}{Raw observed y_i}
27 | 
28 | \item{est_r}{Initial r value (not updated!)}
29 | }
30 | \description{
31 | Use ridge penalty to prevent separation. Not be called by user!
32 | }
33 | \keyword{internal}
34 | 


--------------------------------------------------------------------------------
/man/sl_vglmer.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/superlearner_functions.R
  3 | \name{sl_vglmer}
  4 | \alias{sl_vglmer}
  5 | \alias{SL.vglmer}
  6 | \alias{predict.SL.vglmer}
  7 | \alias{SL.glmer}
  8 | \alias{predict.SL.glmer}
  9 | \alias{add_formula_SL}
 10 | \title{SuperLearner with (Variational) Hierarchical Models}
 11 | \usage{
 12 | SL.vglmer(
 13 |   Y,
 14 |   X,
 15 |   newX,
 16 |   formula,
 17 |   family,
 18 |   id,
 19 |   obsWeights,
 20 |   control = vglmer_control()
 21 | )
 22 | 
 23 | \method{predict}{SL.vglmer}(object, newdata, allow_missing_levels = TRUE, ...)
 24 | 
 25 | SL.glmer(Y, X, newX, formula, family, id, obsWeights, control = NULL)
 26 | 
 27 | \method{predict}{SL.glmer}(object, newdata, allow.new.levels = TRUE, ...)
 28 | 
 29 | add_formula_SL(learner, env = parent.frame())
 30 | }
 31 | \arguments{
 32 | \item{Y}{From \code{SuperLearner}: The outcome in the training data set.}
 33 | 
 34 | \item{X}{From \code{SuperLearner}: The predictor variables in the training
 35 | data.}
 36 | 
 37 | \item{newX}{From \code{SuperLearner}: The predictor variables in validation
 38 | data.}
 39 | 
 40 | \item{formula}{The formula used for estimation.}
 41 | 
 42 | \item{family}{From \code{SuperLearner}: Currently allows \code{gaussian} or
 43 | \code{binomial}.}
 44 | 
 45 | \item{id}{From \code{SuperLearner}: Optional cluster identification variable.
 46 | See \code{SuperLearner} for more details.}
 47 | 
 48 | \item{obsWeights}{From \code{SuperLearner}: Weights for each observation. Not
 49 | permitted for \code{SL.vglmer}.}
 50 | 
 51 | \item{control}{Control object for estimating \code{vglmer} (e.g.,
 52 | \link{vglmer_control}) or \code{[g]lmer}.}
 53 | 
 54 | \item{object}{Used in \code{predict} for \code{SL.glmer} and
 55 | \code{SL.vglmer}. A model estimated using either \code{SL.vglmer} or
 56 | \code{SL.glmer}.}
 57 | 
 58 | \item{newdata}{Dataset to use for predictions.}
 59 | 
 60 | \item{allow_missing_levels}{Default (\code{TRUE}) allows prediction for
 61 | levels not observed in the estimation data; the value of \code{0} (with no
 62 | uncertainty) is used for the corresponding random effect. \bold{Note:} This
 63 | default differs from \code{predict.vglmer}.}
 64 | 
 65 | \item{...}{Not used; included to maintain compatibility with existing
 66 | methods.}
 67 | 
 68 | \item{allow.new.levels}{From \code{lme4}: Allow levels in prediction that are
 69 | not in the training data. Default is \code{TRUE} for \code{SuperLearner}.}
 70 | 
 71 | \item{learner}{Character name of model from \code{SuperLearner}. See
 72 | "Details" for how this is used.}
 73 | 
 74 | \item{env}{Environment to assign model. See "Details" for how this is used.}
 75 | }
 76 | \value{
 77 | The functions here return different types of output. \code{SL.vglmer}
 78 |   and \code{SL.glmer} return fitted models with the in-sample predictions as
 79 |   standard for \code{SuperLearner}. The \code{predict} methods return vectors
 80 |   of predicted values. \code{add_formula_SL} creates two objects in the
 81 |   environment (one for estimation \code{model_f} and one for prediction
 82 |   \code{predict.model_f}) used for \code{SuperLearner}.
 83 | }
 84 | \description{
 85 | These functions integrate \code{vglmer} (or \code{glmer}) into
 86 | \code{SuperLearner}. Most of the arguments are standard for
 87 | \code{SuperLearner} functions.
 88 | }
 89 | \details{
 90 | This documentation describes two types of function. 
 91 | 
 92 |   \bold{Estimating Hierarchical Models in SuperLearner}: Two methods for
 93 |   estimating hierarchical models are provided one for variational methods
 94 |   (\code{SL.vglmer}) and one for non-variational methods (\code{SL.glmer}).
 95 |   The accompanying prediction functions are also provided.
 96 | 
 97 |   \bold{Formula with SuperLearner}: The \code{vglmer} package provides a way
 98 |   to estimate models that require or use a formula with \code{SuperLearner}.
 99 |   This allows for a design to be passed that contains variables that are
100 |   \emph{not} used in estimation. This can be used as follows (see
101 |   "Examples"). One calls the function \code{add_formula_SL} around the quoted
102 |   name of a \code{SuperLearner} model, e.g. \code{add_formula_SL(learner =
103 |   "SL.knn")}. This creates a new model and predict function with the suffix
104 |   \code{"_f"}. This \bold{requires} a formula to be provided for estimation.
105 |   
106 |   With this in hand, \code{"SL.knn_f"} can be passed to \code{SuperLearner} with the
107 |   accompanying formula argument and thus one can compare models with
108 |   different formula or design on the same ensemble. The \code{env} argument
109 |   may need to be manually specified to ensure the created functions can be
110 |   called by \code{SuperLearner}.
111 | }
112 | \examples{
113 | 
114 | set.seed(456)
115 | 
116 | if (requireNamespace('SuperLearner', quietly = TRUE)){
117 | require(SuperLearner)
118 | sim_data <- data.frame(
119 |   x = rnorm(100),
120 |   g = sample(letters, 100, replace = TRUE)
121 | )
122 | sim_data$y <- rbinom(nrow(sim_data), 
123 |   1, plogis(runif(26)[match(sim_data$g, letters)]))
124 | sim_data$g <- factor(sim_data$g)
125 | sl_vglmer <- function(...){SL.vglmer(..., formula = y ~ x + (1 | g))}
126 | SL.glm <- SuperLearner::SL.glm
127 | add_formula_SL('SL.glm')
128 | sl_glm_form <- function(...){SL.glm_f(..., formula = ~ x)}
129 | \donttest{
130 |    SuperLearner::SuperLearner(
131 |      Y = sim_data$y, family = 'binomial',
132 |      X = sim_data[, c('x', 'g')],
133 |      cvControl = list(V = 2),
134 |      SL.library = c('sl_vglmer', 'sl_glm_form')
135 |    )
136 | }
137 | }
138 | }
139 | 


--------------------------------------------------------------------------------
/man/smooth.construct.randwalk.smooth.spec.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mgcv_functions.R
 3 | \name{smooth.construct.randwalk.smooth.spec}
 4 | \alias{smooth.construct.randwalk.smooth.spec}
 5 | \title{Constructor for random walk smooth}
 6 | \usage{
 7 | \method{smooth.construct}{randwalk.smooth.spec}(object, data, knots)
 8 | }
 9 | \arguments{
10 | \item{object}{a smooth object; see documentation for other methods in
11 | \code{mgcv}.}
12 | 
13 | \item{data}{a data.frame; see documentation for other methods in
14 | \code{mgcv}.}
15 | 
16 | \item{knots}{not used}
17 | }
18 | \description{
19 | Constructor for random walk smooth
20 | }
21 | \details{
22 | See the function \link{v_s} for details.
23 | }
24 | \keyword{internal}
25 | 


--------------------------------------------------------------------------------
/man/v_s.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/spline_functions.R
 3 | \name{v_s}
 4 | \alias{v_s}
 5 | \title{Create splines for use in vglmer}
 6 | \usage{
 7 | v_s(
 8 |   ...,
 9 |   type = "tpf",
10 |   knots = NULL,
11 |   by = NA,
12 |   xt = NULL,
13 |   by_re = TRUE,
14 |   force_vector = FALSE,
15 |   outer_okay = FALSE
16 | )
17 | }
18 | \arguments{
19 | \item{...}{Variable name, e.g. \code{v_s(x)}}
20 | 
21 | \item{type}{Default (\code{"tpf"}) uses truncated linear splines for the
22 | basis. \code{"o"} uses O'Sullivan splines (Wand and Ormerod 2008).
23 | Smoothing across multiple covariates, e.g. \code{v_s(x,x2,type="gKRLS")},
24 | can be done using kernel ridge regression. Chang and Goplerud (2024)
25 | provide a detailed discussion. Note that \code{"gKRLS"} by default uses
26 | random sketching to create the relevant bases and thus a seed would need to
27 | be set to ensure exact replicability.}
28 | 
29 | \item{knots}{Default (\code{NULL}) uses \eqn{K=min(N/4,35)} knots evenly
30 | spaced at quantiles of the covariate \code{x}. A single number specifies a
31 | specific number of knots; a vector can set custom locations for knots.}
32 | 
33 | \item{by}{A categorical or factor covariate to interact the spline with; for
34 | example, \code{v_s(x, by = g)}.}
35 | 
36 | \item{xt}{Arguments passed to \code{xt} from \code{mgcv}; at the moment, this
37 | is only used for \code{type="gKRLS"} to pass the function \code{gKRLS()}.
38 | Please see the documentation of \code{\link[gKRLS]{gKRLS}} for more
39 | details.}
40 | 
41 | \item{by_re}{Default (\code{TRUE}) regularizes the interactions between the
42 | categorical factor and the covariate. See "Details" in \link{vglmer} for
43 | more discussion.}
44 | 
45 | \item{force_vector}{Force that argument to \code{knots} is treated as vector.
46 | This is usually not needed unless \code{knots} is a single integer that
47 | should be treated as a single knot (vs. the number of knots).}
48 | 
49 | \item{outer_okay}{Default (\code{FALSE}) does not permit values in \code{x}
50 | to exceed the outer knots.}
51 | }
52 | \value{
53 | This function returns a list of class of \code{vglmer_spline} that is
54 |   passed to unexported functions. It contains the arguments noted above where
55 |   \code{...} is parsed into an argument called \code{term}.
56 | }
57 | \description{
58 | This function estimates splines in \code{vglmer}, similar to \code{s(...)} in
59 | \code{mgcv} albeit with many fewer options than \code{mgcv}. It allows for
60 | truncated (linear) splines (\code{type="tpf"}), O'Sullivan splines
61 | (\code{type="o"}), or kernel ridge  regression (\code{type="gKRLS"}). Please
62 | see \link{vglmer} for more discussion and examples. For information on kernel
63 | ridge regression, please consult \link[gKRLS]{gKRLS}.
64 | }
65 | \references{
66 | Chang, Qing, and Max Goplerud. 2024. "Generalized Kernel Regularized Least
67 | Squares." \emph{Political Analysis} 32(2):157-171.
68 | 
69 | Wand, Matt P. and Ormerod, John T. 2008. "On Semiparametric Regression with
70 | O'Sullivan Penalized Splines". \emph{Australian & New Zealand Journal of
71 | Statistics}. 50(2): 179-198.
72 | 
73 | Wood, Simon N. 2017. \emph{Generalized Additive Models: An Introduction with
74 | R}. Chapman and Hall/CRC.
75 | }
76 | 


--------------------------------------------------------------------------------
/man/var_mat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/MAVB_functions.R
 3 | \name{var_mat}
 4 | \alias{var_mat}
 5 | \alias{rowVar}
 6 | \alias{colVar}
 7 | \title{Variance of Rows or Columns of Matrices}
 8 | \usage{
 9 | rowVar(matrix)
10 | 
11 | colVar(matrix)
12 | }
13 | \arguments{
14 | \item{matrix}{Matrix of numeric inputs.}
15 | }
16 | \description{
17 | Base R implementation for variance. Analogue of rowMeans.
18 | }
19 | \keyword{internal}
20 | 


--------------------------------------------------------------------------------
/man/vglmer-class.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/print_functions.R
  3 | \name{vglmer-class}
  4 | \alias{vglmer-class}
  5 | \alias{fixef.vglmer}
  6 | \alias{sigma.vglmer}
  7 | \alias{ranef.vglmer}
  8 | \alias{coef.vglmer}
  9 | \alias{vcov.vglmer}
 10 | \alias{fitted.vglmer}
 11 | \alias{print.vglmer}
 12 | \alias{summary.vglmer}
 13 | \alias{formula.vglmer}
 14 | \alias{format_vglmer}
 15 | \alias{format_glmer}
 16 | \alias{ELBO}
 17 | \title{Generic Functions after Running vglmer}
 18 | \usage{
 19 | \method{fixef}{vglmer}(object, ...)
 20 | 
 21 | \method{sigma}{vglmer}(object, ...)
 22 | 
 23 | \method{ranef}{vglmer}(object, ...)
 24 | 
 25 | \method{coef}{vglmer}(object, ...)
 26 | 
 27 | \method{vcov}{vglmer}(object, ...)
 28 | 
 29 | \method{fitted}{vglmer}(object, ...)
 30 | 
 31 | \method{print}{vglmer}(x, ...)
 32 | 
 33 | \method{summary}{vglmer}(object, display_re = TRUE, ...)
 34 | 
 35 | \method{formula}{vglmer}(x, form = "original", ...)
 36 | 
 37 | format_vglmer(object)
 38 | 
 39 | format_glmer(object)
 40 | 
 41 | ELBO(object, type = c("final", "trajectory"))
 42 | }
 43 | \arguments{
 44 | \item{object}{Model fit using \code{vglmer}.}
 45 | 
 46 | \item{...}{Not used; included to maintain compatibility with existing
 47 | methods.}
 48 | 
 49 | \item{x}{Model fit using \code{vglmer}.}
 50 | 
 51 | \item{display_re}{Default (\code{TRUE}) prints a summary of the
 52 | random effects alongside the fixed effects.}
 53 | 
 54 | \item{form}{Describes the type of formula to report:
 55 | \code{"original"} returns the user input, \code{"fe"} returns the fixed
 56 | effects only, \code{"re"} returns the random effects only.}
 57 | 
 58 | \item{type}{Default (\code{"final"}) gives the ELBO at convergence.
 59 | \code{"trajectory"} gives the ELBO estimated at each iteration. This is
 60 | used to assess model convergence.}
 61 | }
 62 | \value{
 63 | The functions here return a variety of different objects depending on
 64 |   the specific function. "Details" describes the behavior of each one. Their
 65 |   output is similar to the typical behavior for the corresponding generic
 66 |   functions.
 67 | }
 68 | \description{
 69 | \code{vglmer} uses many standard methods from \code{lm} and \code{lme4} with
 70 | limited changes. These provide summaries of the estimated variational
 71 | distributions.
 72 | }
 73 | \details{
 74 | The accompanying functions are briefly described below. 
 75 | 
 76 | \code{coef} and \code{vcov} return the mean and variance of the fixed effects
 77 | (\eqn{\beta}). \code{fixef} returns the mean of the fixed effects.
 78 | 
 79 | \code{ranef} extracts the random effects (\eqn{\alpha}) in a similar,
 80 | although slightly different format, to \code{lme4}. It includes the estimated
 81 | posterior mean and variance in a list of data.frames with one entry per
 82 | random effect \eqn{j}.
 83 | 
 84 | \code{fitted} extracts the estimated expected \emph{linear predictor}, i.e.
 85 | \eqn{E_{q(\theta)}[x_i^T \beta + z_i^T \alpha]} at convergence.
 86 | 
 87 | \code{summary} reports the estimates for all fixed effects as in \code{lm} as
 88 | well as some summaries of the random effects (if \code{display_re=TRUE}).
 89 | 
 90 | \code{format_vglmer} collects the mean and variance of the fixed and random
 91 | effects into a single data.frame. This is useful for examining all of the
 92 | posterior estimates simultaneously. \code{format_glmer} converts an object
 93 | estimated with \code{[g]lmer} into a comparable format.
 94 | 
 95 | \code{ELBO} extracts the ELBO from the estimated model. \code{type} can be
 96 | set equal to \code{"trajectory"} to get the estimated ELBO at each iteration
 97 | and assess convergence.
 98 |   
 99 | \code{sigma} extracts the square root of the posterior mode of
100 | \eqn{q(\sigma^2)} if a linear model is used.
101 | 
102 | \code{formula} extracts the formula associated with the \code{vglmer} object.
103 | By default, it returns the formula provided. The fixed and random effects
104 | portions can be extracted separately using the \code{form} argument.
105 | }
106 | 


--------------------------------------------------------------------------------
/man/vglmer.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/vglmer_regression.R
  3 | \name{vglmer}
  4 | \alias{vglmer}
  5 | \title{Variational Inference for Hierarchical Generalized Linear Models}
  6 | \usage{
  7 | vglmer(formula, data, family, control = vglmer_control())
  8 | }
  9 | \arguments{
 10 | \item{formula}{\code{lme4} style-formula for random effects. Typically,
 11 | \code{(1 + z | g)} indicates a random effect for each level of variable
 12 | \code{"g"} with a differing slope for the effect of variable \code{"z"} and
 13 | an intercept (\code{1}); see "Details" for further discussion and how to
 14 | incorporate splines.}
 15 | 
 16 | \item{data}{\code{data.frame} containing the outcome and predictors.}
 17 | 
 18 | \item{family}{Options are "binomial", "linear", or "negbin" (experimental).
 19 | If "binomial", outcome must be either binary (\eqn{\{0,1\}}) or
 20 | \code{cbind(success, failure)} as per standard \code{glm(er)} syntax.
 21 | Non-integer values are permitted for binomial if \code{force_whole} is set
 22 | to \code{FALSE} in \code{vglmer_control}.}
 23 | 
 24 | \item{control}{Adjust internal options for estimation. Must use an object
 25 | created by \link{vglmer_control}.}
 26 | }
 27 | \value{
 28 | This returns an object of class \code{vglmer}. The available methods
 29 |   (e.g. \code{coef}) can be found using \code{methods(class="vglmer")}.
 30 | \describe{
 31 | \item{beta}{Contains the estimated distribution of the fixed effects
 32 | (\eqn{\beta}). It is multivariate normal. \code{mean} contains the means;
 33 | \code{var} contains the variance matrix; \code{decomp_var} contains a matrix
 34 | \eqn{L} such that \eqn{L^T L} equals the full variance matrix.}
 35 | \item{alpha}{Contains the estimated distribution of the random effects
 36 | (\eqn{\alpha}). They are all multivariate normal. \code{mean} contains the
 37 | means; \code{dia.var} contains the variance of each random effect. \code{var}
 38 | contains the variance matrix of each random effect (j,g). \code{decomp_var}
 39 | contains a matrix \eqn{L} such that \eqn{L^T L} equals the full variance of
 40 | the entire set of random effects.}
 41 | \item{joint}{If \code{factorization_method="weak"}, this is a list with one
 42 | element (\code{decomp_var}) that contains a matrix \eqn{L} such that \eqn{L^T
 43 | L} equals the full variance matrix between the fixed and random effects
 44 | \eqn{q(\beta,\alpha)}. The marginal variances are included in \code{beta} and
 45 | \code{alpha}. If the factorization method is not \code{"weak"}, this is
 46 | \code{NULL}.}
 47 | \item{sigma}{Contains the estimated distribution of each random
 48 | effect covariance \eqn{\Sigma_j}; all distributions are Inverse-Wishart.
 49 | \code{cov} contains a list of the estimated scale matrices. \code{df}
 50 | contains a list of the degrees of freedom.}
 51 | \item{hw}{If a Huang-Wand prior is used (see Huang and Wand 2013 or Goplerud
 52 | 2024 for more details), then the estimated distribution. Otherwise, it is
 53 | \code{NULL}. All distributions are Inverse-Gamma. \code{a} contains a list of
 54 | the scale parameters. \code{b} contains a list of the shape parameters.}
 55 | \item{sigmasq}{If \code{family="linear"}, this contains a list of the
 56 | estimated parameters for \eqn{\sigma^2}; its distribution is Inverse-Gamma.
 57 | \code{a} contains the scale parameter; \code{b} contains the shape
 58 | parameter.}
 59 | \item{ln_r}{If \code{family="negbin"}, this contains the variational
 60 | parameters for the log dispersion parameter \eqn{\ln(r)}. \code{mu} contains
 61 | the mean; \code{sigma} contains the variance.}
 62 | \item{family}{Family of outcome.}
 63 | \item{ELBO}{Contains the ELBO at the termination of the algorithm.}
 64 | \item{ELBO_trajectory}{\code{data.frame} tracking the ELBO per iteration.}
 65 | \item{control}{Contains the control parameters from \code{vglmer_control}
 66 | used in estimation.}
 67 | \item{internal_parameters}{Variety of internal parameters used in
 68 | post-estimation functions.}
 69 | \item{formula}{Contains the formula used for estimation; contains the
 70 | original formula, fixed effects, and random effects parts separately for
 71 | post-estimation functions. See \code{formula.vglmer} for more details.}
 72 | }
 73 | }
 74 | \description{
 75 | This function estimates hierarchical models using mean-field variational
 76 | inference. \code{vglmer} accepts standard syntax used for \code{lme4}, e.g.,
 77 | \code{y ~ x + (x | g)}. Options are described below. Goplerud (2022; 2024)
 78 | provides details on the variational algorithms.
 79 | }
 80 | \details{
 81 | \bold{Estimation Syntax:} The \code{formula} argument takes syntax designed
 82 | to be a similar as possible to \code{lme4}. That is, one can specify models
 83 | using \code{y ~ x + (1 | g)} where \code{(1 | g)} indicates a random intercept. While
 84 | not tested extensively, terms of \code{(1 | g / f)} should work as expected. Terms
 85 | of \code{(1 + x || g)} may work, although will raise a warning about duplicated
 86 | names of random effects. \code{(1 + x || g)} terms may not work with spline
 87 | estimation. To get around this, one can might copy the column \code{g} to
 88 | \code{g_copy} and then write \code{(1 | g) + (0 + x | g_copy)}.
 89 | 
 90 | \bold{Splines:} Splines can be added using the term \code{v_s(x)} for a
 91 | spline on the variable \code{x}. These are transformed into hierarchical
 92 | terms in a standard fashion (e.g. Ruppert et al. 2003) and then estimated
 93 | using the variational algorithms. At the present, only truncated linear
 94 | functions (\code{type = "tpf"}; the default) and O'Sullivan splines (Wand and
 95 | Ormerod 2008) are included. The options are described in more detail at
 96 | \link{v_s}.
 97 | 
 98 | It is possible to have the spline vary across some categorical predictor by
 99 | specifying the \code{"by"} argument such as \code{v_s(x, by = g)}. In effect,
100 | this adds additional hierarchical terms for the group-level deviations from
101 | the "global" spline. \emph{Note:} In contrast to the typical presentation of
102 | these splines interacted with categorical variables (e.g., Ruppert et al.
103 | 2003), the default use of \code{"by"} includes the lower order interactions
104 | that are regularized, i.e. \code{(1 + x | g)}, versus their unregularized
105 | version (e.g., \code{x * g}); this can be changed using the \code{by_re}
106 | argument described in \link{v_s}. Further, all group-level deviations from
107 | the global spline share the same smoothing parameter (same prior
108 | distribution).
109 | 
110 | \bold{Default Settings:} By default, the model is estimated using the
111 | "strong" (i.e. fully factorized) variational assumption. Setting
112 | \code{vglmer_control(factorization_method = "weak")} will improve the quality
113 | of the variance approximation but may take considerably more time to
114 | estimate. See Goplerud (2022) for discussion. 
115 | 
116 | By default, the prior on each random effect variance (\eqn{\Sigma_j}) uses a Huang-Wand prior (Huang
117 | and Wand 2013) with hyper-parameters \eqn{\nu_j = 2} and \eqn{A_{j,k} = 5}.
118 | This is designed to be proper but weakly informative. Other options are
119 | discussed in \link{vglmer_control} under the \code{prior_variance} argument.
120 | 
121 | By default, estimation is accelerated using SQUAREM (Varadhan and Roland
122 | 2008) and (one-step-late) parameter expansion for variational Bayes. Under
123 | the default \code{"strong"} factorization, a "translation" expansion is used;
124 | under other factorizations a "mean" expansion is used. These can be adjusted
125 | using \link{vglmer_control}. See Goplerud (2024) for more discussion of
126 | these methods.
127 | }
128 | \examples{
129 | 
130 | set.seed(234)
131 | sim_data <- data.frame(
132 |   x = rnorm(100),
133 |   y = rbinom(100, 1, 0.5),
134 |   g = sample(letters, 100, replace = TRUE)
135 | )
136 | 
137 | # Run with defaults
138 | est_vglmer <- vglmer(y ~ x + (x | g), data = sim_data, family = "binomial")
139 | 
140 | # Simple prediction
141 | predict(est_vglmer, newdata = sim_data)
142 | 
143 | # Summarize results
144 | summary(est_vglmer)
145 | 
146 | # Extract parameters
147 | coef(est_vglmer); vcov(est_vglmer)
148 | 
149 | # Comparability with lme4,
150 | # although ranef is formatted differently.
151 | ranef(est_vglmer); fixef(est_vglmer)
152 | 
153 | \donttest{
154 | # Run with weaker (i.e. better) approximation
155 | vglmer(y ~ x + (x | g),
156 |   data = sim_data,
157 |   control = vglmer_control(factorization_method = "weak"),
158 |   family = "binomial")
159 | }
160 | 
161 | \donttest{
162 | # Use a spline on x with a linear outcome
163 | vglmer(y ~ v_s(x),
164 |   data = sim_data,
165 |   family = "linear")
166 | }
167 | 
168 | }
169 | \references{
170 | Goplerud, Max. 2022. "Fast and Accurate Estimation of Non-Nested Binomial
171 | Hierarchical Models Using Variational Inference." \emph{Bayesian Analysis}. 17(2):
172 | 623-650.
173 | 
174 | Goplerud, Max. 2024. "Re-Evaluating Machine Learning for MRP Given the
175 | Comparable Performance of (Deep) Hierarchical Models." \emph{American
176 | Political Science Review}. 118(1): 529-536.
177 | 
178 | Huang, Alan, and Matthew P. Wand. 2013. "Simple Marginally Noninformative
179 | Prior Distributions for Covariance Matrices." \emph{Bayesian Analysis}.
180 | 8(2):439-452.
181 | 
182 | Ruppert, David, Matt P. Wand, and Raymond J. Carroll. 2003.
183 | \emph{Semiparametric Regression}. Cambridge University Press.
184 | 
185 | Varadhan, Ravi, and Christophe Roland. 2008. "Simple and Globally Convergent
186 | Methods for Accelerating the Convergence of any EM Algorithm." \emph{Scandinavian
187 | Journal of Statistics}. 35(2): 335-353.
188 | 
189 | Wand, Matt P. and Ormerod, John T. 2008. "On Semiparametric Regression with
190 | O'Sullivan Penalized Splines". \emph{Australian & New Zealand Journal of Statistics}.
191 | 50(2): 179-198.
192 | }
193 | 


--------------------------------------------------------------------------------
/man/vglmer_control.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/control_functions.R
  3 | \name{vglmer_control}
  4 | \alias{vglmer_control}
  5 | \title{Control for vglmer estimation}
  6 | \usage{
  7 | vglmer_control(
  8 |   iterations = 1000,
  9 |   prior_variance = "hw",
 10 |   factorization_method = c("strong", "partial", "weak"),
 11 |   parameter_expansion = "translation",
 12 |   do_SQUAREM = TRUE,
 13 |   tolerance_elbo = 1e-08,
 14 |   tolerance_parameters = 1e-05,
 15 |   force_whole = TRUE,
 16 |   print_prog = NULL,
 17 |   do_timing = FALSE,
 18 |   verbose_time = FALSE,
 19 |   return_data = FALSE,
 20 |   linpred_method = "joint",
 21 |   vi_r_method = "VEM",
 22 |   verify_columns = FALSE,
 23 |   debug_param = FALSE,
 24 |   debug_ELBO = FALSE,
 25 |   debug_px = FALSE,
 26 |   quiet = TRUE,
 27 |   quiet_rho = TRUE,
 28 |   px_method = "dynamic",
 29 |   px_numerical_it = 10,
 30 |   hw_inner = 10,
 31 |   init = "EM_FE"
 32 | )
 33 | }
 34 | \arguments{
 35 | \item{iterations}{Default of 1000; this sets the maximum number of iterations
 36 | used in estimation.}
 37 | 
 38 | \item{prior_variance}{Prior distribution on the random effect variance
 39 | \eqn{\Sigma_j}. Options are \code{hw}, \code{jeffreys}, \code{mean_exists},
 40 | \code{uniform}, and \code{gamma}. The default (\code{hw}) is the Huang-Wand
 41 | (2013) prior whose hyper-parameters are \eqn{\nu_j} = 2 and \eqn{A_{j,k}} =
 42 | 5. Otherwise, the prior is an Inverse Wishart with the following parameters
 43 | where \eqn{d_j} is the dimensionality of the random effect \eqn{j}.
 44 | \itemize{
 45 | \item mean_exists: \eqn{IW(d_j + 1, I)}
 46 | \item jeffreys: \eqn{IW(0, 0)}
 47 | \item uniform: \eqn{IW(-[d_j+1], 0)}
 48 | \item limit: \eqn{IW(d_j - 1, 0)}
 49 | }
 50 | Estimation may fail if an improper prior (\code{jeffreys}, \code{uniform},
 51 | \code{limit}) is used.}
 52 | 
 53 | \item{factorization_method}{Factorization assumption for the variational
 54 | approximation. Default of \code{"strong"}, i.e. a fully factorized model.
 55 | Described in detail in Goplerud (2022). \code{"strong"}, \code{"partial"},
 56 | and \code{"weak"} correspond to Schemes I, II, and III respectively in that
 57 | paper.}
 58 | 
 59 | \item{parameter_expansion}{Default of \code{"translation"}  (see Goplerud
 60 | 2022b). Valid options are \code{"translation"}, \code{"mean"}, or
 61 | \code{"none"}. \code{"mean"} should be employed if \code{"translation"} is
 62 | not enabled or is too computationally expensive. For negative binomial
 63 | estimation or any estimation where \code{factorization_method != "strong"},
 64 | only \code{"mean"} and \code{"none"} are available.}
 65 | 
 66 | \item{do_SQUAREM}{Default (\code{TRUE}) accelerates estimation using SQUAREM
 67 | (Varadhan and Roland 2008).}
 68 | 
 69 | \item{tolerance_elbo}{Default (\code{1e-8}) sets a convergence threshold if
 70 | the change in the ELBO is below the tolerance.}
 71 | 
 72 | \item{tolerance_parameters}{Default (\code{1e-5}) sets a convergence
 73 | threshold that is achieved if no parameter changes by more than the
 74 | tolerance from the prior estimated value.}
 75 | 
 76 | \item{force_whole}{Default (\code{TRUE}) requires integers for observed
 77 | outcome for binomial or count models. \code{FALSE} allows for fractional
 78 | responses.}
 79 | 
 80 | \item{print_prog}{Default (\code{NULL}) prints a \code{"."} to indicate once
 81 | 5\% of the total iterations have elapsed. Set to a positive integer
 82 | \code{int} to print a \code{"."} every \code{int} iterations.}
 83 | 
 84 | \item{do_timing}{Default (\code{FALSE}) does not estimate timing of each
 85 | variational update; \code{TRUE} requires the package \code{tictoc}.}
 86 | 
 87 | \item{verbose_time}{Default (\code{FALSE}) does not print the time elapsed
 88 | for each parameter update. Set to \code{TRUE}, in conjunction with
 89 | \code{do_timing=TRUE}, to see the time taken for each parameter update.}
 90 | 
 91 | \item{return_data}{Default (\code{FALSE}) does not return the original
 92 | design. Set to \code{TRUE} to debug convergence issues.}
 93 | 
 94 | \item{linpred_method}{Default (\code{"joint"}) updates the mean parameters
 95 | for the fixed and random effects simultaneously. This can improve the speed
 96 | of estimation but may be costly for large datasets; use \code{"cyclical"}
 97 | to update each parameter block separately.}
 98 | 
 99 | \item{vi_r_method}{Default (\code{"VEM"}) uses a variational EM algorithm for
100 | updating \eqn{r} if \code{family="negbin"}. This assumes a point mass
101 | distribution on \eqn{r}. A number can be provided to fix \eqn{r}. These are
102 | the only available options.}
103 | 
104 | \item{verify_columns}{Default (\code{FALSE}) \bold{does not} verify that all
105 | columns are drawn from the data.frame itself versus the environment. Set to
106 | \code{TRUE} to debug potential issues.}
107 | 
108 | \item{debug_param}{Default (\code{FALSE}) does not store parameters before
109 | the final iteration. Set to \code{TRUE} to debug convergence issues.}
110 | 
111 | \item{debug_ELBO}{Default (\code{FALSE}) does not store the ELBO after each
112 | parameter update. Set to \code{TRUE} to debug convergence issues.}
113 | 
114 | \item{debug_px}{Default (\code{FALSE}) does not store information about
115 | whether parameter expansion worked. Set to \code{TRUE} to convergence
116 | issues.}
117 | 
118 | \item{quiet}{Default (\code{FALSE}) does not print intermediate output about
119 | convergence. Set to \code{TRUE} to debug.}
120 | 
121 | \item{quiet_rho}{Default (\code{FALSE}) does not print information about
122 | parameter expansions. Set to \code{TRUE} to debug convergence issues.}
123 | 
124 | \item{px_method}{When code \code{parameter_expansion="translation"}, default
125 | (\code{"dynamic"}) tries a one-step late update and, if this fails, a
126 | numerical improvement by L-BFGS-B. For an Inverse-Wishart prior on
127 | \eqn{\Sigma_j}, this is set to \code{"osl"} that only attempts a
128 | one-step-late update.}
129 | 
130 | \item{px_numerical_it}{Default of 10; if L-BFGS_B is needed for a parameter
131 | expansion, this sets the number of steps used.}
132 | 
133 | \item{hw_inner}{If \code{prior_variance="hw"}, this sets the number of
134 | repeated iterations between estimating \eqn{\Sigma_j} and \eqn{a_{j,k}}
135 | variational distributions at each iteration. A larger number approximates
136 | jointly updating both parameters. Default (10) typically performs well.}
137 | 
138 | \item{init}{Default (\code{"EM_FE"}) initializes the mean variational
139 | parameters for \eqn{q(\beta, \alpha)} by setting the random effects to zero
140 | and estimating the fixed effects using a short-running EM algorithm.
141 | \code{"EM"} initializes the model with a ridge regression with a guess as
142 | to the random effect variance. \code{"random"} initializes the means
143 | randomly. \code{"zero"} initializes them at zero.}
144 | }
145 | \value{
146 | This function returns a named list with class \code{vglmer_control}.
147 |   It is passed to \code{vglmer} in the argument \code{control}. This argument
148 |   only accepts objects created using \code{vglmer_control}.
149 | }
150 | \description{
151 | This function controls various estimation options for \code{vglmer}.
152 | }
153 | \references{
154 | Goplerud, Max. 2022. "Fast and Accurate Estimation of Non-Nested Binomial
155 | Hierarchical Models Using Variational Inference." \emph{Bayesian Analysis}.
156 | 17(2): 623-650.
157 | 
158 | Goplerud, Max. 2024. "Re-Evaluating Machine Learning for MRP Given the
159 | Comparable Performance of (Deep) Hierarchical Models." \emph{American
160 | Political Science Review}. 118(1): 529-536.
161 | 
162 | Huang, Alan, and Matthew P. Wand. 2013. "Simple Marginally Noninformative
163 | Prior Distributions for Covariance Matrices." \emph{Bayesian Analysis}.
164 | 8(2):439-452.
165 | 
166 | Varadhan, Ravi, and Christophe Roland. 2008. "Simple and Globally Convergent
167 | Methods for Accelerating the Convergence of any EM Algorithm."
168 | \emph{Scandinavian Journal of Statistics}. 35(2): 335-353.
169 | }
170 | 


--------------------------------------------------------------------------------
/man/vglmer_predict.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/predict_functions.R
  3 | \name{vglmer_predict}
  4 | \alias{vglmer_predict}
  5 | \alias{predict.vglmer}
  6 | \alias{predict_MAVB}
  7 | \title{Predict after vglmer}
  8 | \usage{
  9 | \method{predict}{vglmer}(
 10 |   object,
 11 |   newdata,
 12 |   type = "link",
 13 |   samples = 0,
 14 |   samples_only = FALSE,
 15 |   summary = TRUE,
 16 |   allow_missing_levels = FALSE,
 17 |   ...
 18 | )
 19 | 
 20 | predict_MAVB(
 21 |   object,
 22 |   newdata,
 23 |   samples = 0,
 24 |   samples_only = FALSE,
 25 |   var_px = Inf,
 26 |   summary = TRUE,
 27 |   allow_missing_levels = FALSE
 28 | )
 29 | }
 30 | \arguments{
 31 | \item{object}{Model fit using \code{vglmer}.}
 32 | 
 33 | \item{newdata}{Dataset to use for predictions. It cannot be missing.}
 34 | 
 35 | \item{type}{Default (\code{"link"}) returns the linear predictor;
 36 | \code{"terms"} returns the predicted value for each random effect (or
 37 | spline) separately as well as one that collects all fixed effects. At the
 38 | moment, other options are not enabled.}
 39 | 
 40 | \item{samples}{Number of samples to draw. Using \code{0} (default) gives the
 41 | expectation of the linear predictor. A positive integer draws
 42 | \code{samples} samples from the variational distributions and calculates
 43 | the linear predictor.}
 44 | 
 45 | \item{samples_only}{Default (\code{FALSE}) returns the samples from the
 46 | variational distributions, \bold{not} the prediction. Each row is a sample and
 47 | each column is a parameter.}
 48 | 
 49 | \item{summary}{Default (\code{TRUE}) returns the mean and variance of the
 50 | samples for each observation. \code{FALSE} returns a matrix of the sampled
 51 | linear predictor for each observation. Each row is a sample and each column
 52 | is an observation.}
 53 | 
 54 | \item{allow_missing_levels}{Default (\code{FALSE}) does not allow prediction
 55 | for levels not observed in the original data. \code{TRUE} allows for
 56 | prediction on unseen levels; the value of \code{0} (with no uncertainty) is
 57 | used for the corresponding random effect.}
 58 | 
 59 | \item{...}{Not used; included to maintain compatibility with existing
 60 | methods.}
 61 | 
 62 | \item{var_px}{Variance of working prior for marginal augmentation. Default
 63 | (\code{Inf}) is a flat, improper, prior.}
 64 | }
 65 | \value{
 66 | This function returns an estimate of the linear predictor. The
 67 |   default returns the expected mean, i.e. \eqn{E_{q(\alpha,\beta)}[x_i^T
 68 |   \beta + z_i^T\alpha]}. If \code{samples > 0}, these functions return a
 69 |   summary of the prediction for each observation, i.e. the estimated mean and
 70 |   variance. If \code{summary = FALSE}, the sampled values of the linear
 71 |   predictor are returned as a matrix. \code{predict_MAVB} performs MAVB as
 72 |   described in Goplerud (2022) before returning the linear predictor.
 73 |   
 74 |   If \code{allow_missing_levels = TRUE}, then observations with a new
 75 |   (unseen) level for the random effect are given a value of zero for that
 76 |   term of the prediction.
 77 | }
 78 | \description{
 79 | These functions calculate the estimated linear predictor using
 80 |   the variational distributions. \code{predict.vglmer} draws predictions
 81 |   using the estimated variational distributions; \code{predict_MAVB} does so
 82 |   using the MAVB procedure described in Goplerud (2022).
 83 | }
 84 | \examples{
 85 | 
 86 | set.seed(123)
 87 | sim_data <- data.frame(
 88 |   x = rnorm(100),
 89 |   y = rbinom(100, 1, 0.5),
 90 |   g = sample(letters, 100, replace = TRUE)
 91 | )
 92 | 
 93 | # Run with defaults
 94 | est_vglmer <- vglmer(y ~ x + (x | g), data = sim_data, family = "binomial")
 95 | 
 96 | # Simple prediction
 97 | predict(est_vglmer, newdata = sim_data)
 98 | # Return 10 posterior draws of the linear predictor for each observation.
 99 | predict_MAVB(est_vglmer, newdata = sim_data, summary = FALSE, samples = 10)
100 | # Predict with a new level; note this would fail if 
101 | # allow_missing_levels = FALSE (the default)
102 | predict(est_vglmer,
103 |   newdata = data.frame(g = "AB", x = 0),
104 |   allow_missing_levels = TRUE
105 | )
106 | }
107 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
  1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
  2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
  3 | 
  4 | #include <RcppEigen.h>
  5 | #include <Rcpp.h>
  6 | 
  7 | using namespace Rcpp;
  8 | 
  9 | #ifdef RCPP_USE_GLOBAL_ROSTREAM
 10 | Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
 11 | Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
 12 | #endif
 13 | 
 14 | // LinRegChol
 15 | List LinRegChol(const Eigen::MappedSparseMatrix<double> X, const Eigen::MappedSparseMatrix<double> omega, const Eigen::MappedSparseMatrix<double> prior_precision, const Eigen::Map<Eigen::VectorXd> y, const bool save_chol);
 16 | RcppExport SEXP _vglmer_LinRegChol(SEXP XSEXP, SEXP omegaSEXP, SEXP prior_precisionSEXP, SEXP ySEXP, SEXP save_cholSEXP) {
 17 | BEGIN_RCPP
 18 |     Rcpp::RObject rcpp_result_gen;
 19 |     Rcpp::RNGScope rcpp_rngScope_gen;
 20 |     Rcpp::traits::input_parameter< const Eigen::MappedSparseMatrix<double> >::type X(XSEXP);
 21 |     Rcpp::traits::input_parameter< const Eigen::MappedSparseMatrix<double> >::type omega(omegaSEXP);
 22 |     Rcpp::traits::input_parameter< const Eigen::MappedSparseMatrix<double> >::type prior_precision(prior_precisionSEXP);
 23 |     Rcpp::traits::input_parameter< const Eigen::Map<Eigen::VectorXd> >::type y(ySEXP);
 24 |     Rcpp::traits::input_parameter< const bool >::type save_chol(save_cholSEXP);
 25 |     rcpp_result_gen = Rcpp::wrap(LinRegChol(X, omega, prior_precision, y, save_chol));
 26 |     return rcpp_result_gen;
 27 | END_RCPP
 28 | }
 29 | // calculate_expected_outer_alpha
 30 | List calculate_expected_outer_alpha(const Eigen::MappedSparseMatrix<double> L, const Eigen::Map<Eigen::VectorXd> alpha_mu, const Rcpp::List& re_position_list);
 31 | RcppExport SEXP _vglmer_calculate_expected_outer_alpha(SEXP LSEXP, SEXP alpha_muSEXP, SEXP re_position_listSEXP) {
 32 | BEGIN_RCPP
 33 |     Rcpp::RObject rcpp_result_gen;
 34 |     Rcpp::RNGScope rcpp_rngScope_gen;
 35 |     Rcpp::traits::input_parameter< const Eigen::MappedSparseMatrix<double> >::type L(LSEXP);
 36 |     Rcpp::traits::input_parameter< const Eigen::Map<Eigen::VectorXd> >::type alpha_mu(alpha_muSEXP);
 37 |     Rcpp::traits::input_parameter< const Rcpp::List& >::type re_position_list(re_position_listSEXP);
 38 |     rcpp_result_gen = Rcpp::wrap(calculate_expected_outer_alpha(L, alpha_mu, re_position_list));
 39 |     return rcpp_result_gen;
 40 | END_RCPP
 41 | }
 42 | // unique_rows
 43 | IntegerMatrix unique_rows(const IntegerMatrix m);
 44 | RcppExport SEXP _vglmer_unique_rows(SEXP mSEXP) {
 45 | BEGIN_RCPP
 46 |     Rcpp::RObject rcpp_result_gen;
 47 |     Rcpp::RNGScope rcpp_rngScope_gen;
 48 |     Rcpp::traits::input_parameter< const IntegerMatrix >::type m(mSEXP);
 49 |     rcpp_result_gen = Rcpp::wrap(unique_rows(m));
 50 |     return rcpp_result_gen;
 51 | END_RCPP
 52 | }
 53 | // prepare_Z_for_px
 54 | Rcpp::List prepare_Z_for_px(Rcpp::IntegerMatrix& Mmap);
 55 | RcppExport SEXP _vglmer_prepare_Z_for_px(SEXP MmapSEXP) {
 56 | BEGIN_RCPP
 57 |     Rcpp::RObject rcpp_result_gen;
 58 |     Rcpp::RNGScope rcpp_rngScope_gen;
 59 |     Rcpp::traits::input_parameter< Rcpp::IntegerMatrix& >::type Mmap(MmapSEXP);
 60 |     rcpp_result_gen = Rcpp::wrap(prepare_Z_for_px(Mmap));
 61 |     return rcpp_result_gen;
 62 | END_RCPP
 63 | }
 64 | // chol_sparse
 65 | Rcpp::List chol_sparse(const Eigen::MappedSparseMatrix<double> X, const Eigen::MappedSparseMatrix<double> omega, const Eigen::MappedSparseMatrix<double> precision);
 66 | RcppExport SEXP _vglmer_chol_sparse(SEXP XSEXP, SEXP omegaSEXP, SEXP precisionSEXP) {
 67 | BEGIN_RCPP
 68 |     Rcpp::RObject rcpp_result_gen;
 69 |     Rcpp::RNGScope rcpp_rngScope_gen;
 70 |     Rcpp::traits::input_parameter< const Eigen::MappedSparseMatrix<double> >::type X(XSEXP);
 71 |     Rcpp::traits::input_parameter< const Eigen::MappedSparseMatrix<double> >::type omega(omegaSEXP);
 72 |     Rcpp::traits::input_parameter< const Eigen::MappedSparseMatrix<double> >::type precision(precisionSEXP);
 73 |     rcpp_result_gen = Rcpp::wrap(chol_sparse(X, omega, precision));
 74 |     return rcpp_result_gen;
 75 | END_RCPP
 76 | }
 77 | // cpp_zVz
 78 | Eigen::VectorXd cpp_zVz(const Eigen::MappedSparseMatrix<double> Z, const Eigen::MappedSparseMatrix<double> V);
 79 | RcppExport SEXP _vglmer_cpp_zVz(SEXP ZSEXP, SEXP VSEXP) {
 80 | BEGIN_RCPP
 81 |     Rcpp::RObject rcpp_result_gen;
 82 |     Rcpp::RNGScope rcpp_rngScope_gen;
 83 |     Rcpp::traits::input_parameter< const Eigen::MappedSparseMatrix<double> >::type Z(ZSEXP);
 84 |     Rcpp::traits::input_parameter< const Eigen::MappedSparseMatrix<double> >::type V(VSEXP);
 85 |     rcpp_result_gen = Rcpp::wrap(cpp_zVz(Z, V));
 86 |     return rcpp_result_gen;
 87 | END_RCPP
 88 | }
 89 | // vecR_ridge_general
 90 | Eigen::MatrixXd vecR_ridge_general(const Eigen::MappedSparseMatrix<double> L, const Rcpp::NumericVector pg_mean, const Eigen::Map<Eigen::MatrixXd> Z, const Eigen::Map<Eigen::MatrixXi> M, const Rcpp::NumericVector mapping_J, const Rcpp::NumericVector d, const Eigen::VectorXi start_z, bool diag_only);
 91 | RcppExport SEXP _vglmer_vecR_ridge_general(SEXP LSEXP, SEXP pg_meanSEXP, SEXP ZSEXP, SEXP MSEXP, SEXP mapping_JSEXP, SEXP dSEXP, SEXP start_zSEXP, SEXP diag_onlySEXP) {
 92 | BEGIN_RCPP
 93 |     Rcpp::RObject rcpp_result_gen;
 94 |     Rcpp::RNGScope rcpp_rngScope_gen;
 95 |     Rcpp::traits::input_parameter< const Eigen::MappedSparseMatrix<double> >::type L(LSEXP);
 96 |     Rcpp::traits::input_parameter< const Rcpp::NumericVector >::type pg_mean(pg_meanSEXP);
 97 |     Rcpp::traits::input_parameter< const Eigen::Map<Eigen::MatrixXd> >::type Z(ZSEXP);
 98 |     Rcpp::traits::input_parameter< const Eigen::Map<Eigen::MatrixXi> >::type M(MSEXP);
 99 |     Rcpp::traits::input_parameter< const Rcpp::NumericVector >::type mapping_J(mapping_JSEXP);
100 |     Rcpp::traits::input_parameter< const Rcpp::NumericVector >::type d(dSEXP);
101 |     Rcpp::traits::input_parameter< const Eigen::VectorXi >::type start_z(start_zSEXP);
102 |     Rcpp::traits::input_parameter< bool >::type diag_only(diag_onlySEXP);
103 |     rcpp_result_gen = Rcpp::wrap(vecR_ridge_general(L, pg_mean, Z, M, mapping_J, d, start_z, diag_only));
104 |     return rcpp_result_gen;
105 | END_RCPP
106 | }
107 | // vecR_design
108 | Eigen::MatrixXd vecR_design(const Eigen::Map<Eigen::VectorXd> alpha_mu, const Eigen::Map<Eigen::MatrixXd> Z, const Eigen::Map<Eigen::MatrixXi> M, const Rcpp::NumericVector mapping_J, const Rcpp::NumericVector d, const Eigen::VectorXi start_z);
109 | RcppExport SEXP _vglmer_vecR_design(SEXP alpha_muSEXP, SEXP ZSEXP, SEXP MSEXP, SEXP mapping_JSEXP, SEXP dSEXP, SEXP start_zSEXP) {
110 | BEGIN_RCPP
111 |     Rcpp::RObject rcpp_result_gen;
112 |     Rcpp::RNGScope rcpp_rngScope_gen;
113 |     Rcpp::traits::input_parameter< const Eigen::Map<Eigen::VectorXd> >::type alpha_mu(alpha_muSEXP);
114 |     Rcpp::traits::input_parameter< const Eigen::Map<Eigen::MatrixXd> >::type Z(ZSEXP);
115 |     Rcpp::traits::input_parameter< const Eigen::Map<Eigen::MatrixXi> >::type M(MSEXP);
116 |     Rcpp::traits::input_parameter< const Rcpp::NumericVector >::type mapping_J(mapping_JSEXP);
117 |     Rcpp::traits::input_parameter< const Rcpp::NumericVector >::type d(dSEXP);
118 |     Rcpp::traits::input_parameter< const Eigen::VectorXi >::type start_z(start_zSEXP);
119 |     rcpp_result_gen = Rcpp::wrap(vecR_design(alpha_mu, Z, M, mapping_J, d, start_z));
120 |     return rcpp_result_gen;
121 | END_RCPP
122 | }
123 | // vecR_fast_ridge
124 | Eigen::VectorXd vecR_fast_ridge(const Eigen::MappedSparseMatrix<double> X, const Eigen::MappedSparseMatrix<double> omega, const Eigen::MappedSparseMatrix<double> prior_precision, const Eigen::Map<Eigen::VectorXd> y, const Eigen::Map<Eigen::VectorXd> adjust_y);
125 | RcppExport SEXP _vglmer_vecR_fast_ridge(SEXP XSEXP, SEXP omegaSEXP, SEXP prior_precisionSEXP, SEXP ySEXP, SEXP adjust_ySEXP) {
126 | BEGIN_RCPP
127 |     Rcpp::RObject rcpp_result_gen;
128 |     Rcpp::RNGScope rcpp_rngScope_gen;
129 |     Rcpp::traits::input_parameter< const Eigen::MappedSparseMatrix<double> >::type X(XSEXP);
130 |     Rcpp::traits::input_parameter< const Eigen::MappedSparseMatrix<double> >::type omega(omegaSEXP);
131 |     Rcpp::traits::input_parameter< const Eigen::MappedSparseMatrix<double> >::type prior_precision(prior_precisionSEXP);
132 |     Rcpp::traits::input_parameter< const Eigen::Map<Eigen::VectorXd> >::type y(ySEXP);
133 |     Rcpp::traits::input_parameter< const Eigen::Map<Eigen::VectorXd> >::type adjust_y(adjust_ySEXP);
134 |     rcpp_result_gen = Rcpp::wrap(vecR_fast_ridge(X, omega, prior_precision, y, adjust_y));
135 |     return rcpp_result_gen;
136 | END_RCPP
137 | }
138 | // vecR_ridge_new
139 | Eigen::MatrixXd vecR_ridge_new(const Eigen::MappedSparseMatrix<double> L, const Eigen::ArrayXd pg_mean, const Rcpp::NumericVector mapping_J, const Rcpp::NumericVector d, const Rcpp::List store_id, const Rcpp::List store_re_id, const Rcpp::List store_design, bool diag_only);
140 | RcppExport SEXP _vglmer_vecR_ridge_new(SEXP LSEXP, SEXP pg_meanSEXP, SEXP mapping_JSEXP, SEXP dSEXP, SEXP store_idSEXP, SEXP store_re_idSEXP, SEXP store_designSEXP, SEXP diag_onlySEXP) {
141 | BEGIN_RCPP
142 |     Rcpp::RObject rcpp_result_gen;
143 |     Rcpp::RNGScope rcpp_rngScope_gen;
144 |     Rcpp::traits::input_parameter< const Eigen::MappedSparseMatrix<double> >::type L(LSEXP);
145 |     Rcpp::traits::input_parameter< const Eigen::ArrayXd >::type pg_mean(pg_meanSEXP);
146 |     Rcpp::traits::input_parameter< const Rcpp::NumericVector >::type mapping_J(mapping_JSEXP);
147 |     Rcpp::traits::input_parameter< const Rcpp::NumericVector >::type d(dSEXP);
148 |     Rcpp::traits::input_parameter< const Rcpp::List >::type store_id(store_idSEXP);
149 |     Rcpp::traits::input_parameter< const Rcpp::List >::type store_re_id(store_re_idSEXP);
150 |     Rcpp::traits::input_parameter< const Rcpp::List >::type store_design(store_designSEXP);
151 |     Rcpp::traits::input_parameter< bool >::type diag_only(diag_onlySEXP);
152 |     rcpp_result_gen = Rcpp::wrap(vecR_ridge_new(L, pg_mean, mapping_J, d, store_id, store_re_id, store_design, diag_only));
153 |     return rcpp_result_gen;
154 | END_RCPP
155 | }
156 | 
157 | static const R_CallMethodDef CallEntries[] = {
158 |     {"_vglmer_LinRegChol", (DL_FUNC) &_vglmer_LinRegChol, 5},
159 |     {"_vglmer_calculate_expected_outer_alpha", (DL_FUNC) &_vglmer_calculate_expected_outer_alpha, 3},
160 |     {"_vglmer_unique_rows", (DL_FUNC) &_vglmer_unique_rows, 1},
161 |     {"_vglmer_prepare_Z_for_px", (DL_FUNC) &_vglmer_prepare_Z_for_px, 1},
162 |     {"_vglmer_chol_sparse", (DL_FUNC) &_vglmer_chol_sparse, 3},
163 |     {"_vglmer_cpp_zVz", (DL_FUNC) &_vglmer_cpp_zVz, 2},
164 |     {"_vglmer_vecR_ridge_general", (DL_FUNC) &_vglmer_vecR_ridge_general, 8},
165 |     {"_vglmer_vecR_design", (DL_FUNC) &_vglmer_vecR_design, 6},
166 |     {"_vglmer_vecR_fast_ridge", (DL_FUNC) &_vglmer_vecR_fast_ridge, 5},
167 |     {"_vglmer_vecR_ridge_new", (DL_FUNC) &_vglmer_vecR_ridge_new, 8},
168 |     {NULL, NULL, 0}
169 | };
170 | 
171 | RcppExport void R_init_vglmer(DllInfo *dll) {
172 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
173 |     R_useDynamicSymbols(dll, FALSE);
174 | }
175 | 


--------------------------------------------------------------------------------
/src/eigen_alpha.cpp:
--------------------------------------------------------------------------------
  1 | #include "RcppEigen.h"
  2 | 
  3 | // [[Rcpp::depends(RcppEigen)]]
  4 | 
  5 | using namespace Rcpp;
  6 | 
  7 | 
  8 | // Q = (X^T X + precision)
  9 | // "Ch" finds L L^T = Q.
 10 | // "Ch.solve" finds "x" such that (X^T X + precision) "x" = X^T Y
 11 | // That is, it finds the ridge solution.
 12 | // If z is from i.i.d. Gaussian, then
 13 | // L^T \alpha = z
 14 | // L^T \alpha = z where \alpha \sim N(0, Q^{-1})
 15 | 
 16 | //' Linear Regression by Cholesky
 17 | //' 
 18 | //' Do linear regression of form solve(X^T O X + P, X^T y) where O is omega, P
 19 | //' is precision.
 20 | //' 
 21 | //' @keywords internal
 22 | //' 
 23 | //' @param X Design Matrix
 24 | //' @param omega Polya-Gamma weights
 25 | //' @param prior_precision Prior Precision for Regression
 26 | //' @param y Outcome
 27 | //' @param save_chol Save cholesky factor
 28 | // [[Rcpp::export]]
 29 | List LinRegChol(
 30 |      const Eigen::MappedSparseMatrix<double> X,
 31 |      const Eigen::MappedSparseMatrix<double> omega,
 32 |      const Eigen::MappedSparseMatrix<double> prior_precision,
 33 |      const Eigen::Map<Eigen::VectorXd> y,
 34 |      const bool save_chol = true
 35 |   ){
 36 |   Eigen::SparseMatrix<double> adj_X = X.adjoint();
 37 |   Eigen::SimplicialLLT<Eigen::SparseMatrix<double> > Ch(adj_X * omega * X + prior_precision);
 38 |   Eigen::VectorXd mean = Ch.solve(adj_X * y);
 39 |   if (save_chol == false){
 40 |     return List::create(
 41 |       Rcpp::Named("mean") = mean
 42 |     );
 43 |   }
 44 |   //Extract L to get the transformation of the std.normal
 45 |   //Into the correct form of N(0, Q^{-1}) and add the mean.
 46 |   Eigen::SparseMatrix<double> lower_l = Ch.matrixL();
 47 |   Eigen::PermutationMatrix<Eigen::Dynamic,Eigen::Dynamic> invP = Ch.permutationPinv();
 48 |   Eigen::SparseMatrix<double> invperm_L = invP * lower_l;
 49 |   Eigen::VectorXd diag_L = lower_l.diagonal();
 50 |   
 51 |   // double i_sum = 0;
 52 |   // int size_L = lower_l.rows();
 53 |   // 
 54 |   // for (int i = 0; i < (size_L - 1); ++i){
 55 |   //   Eigen::VectorXd unit_i = Eigen::VectorXd::Unit(size_L, i);
 56 |   //   i_sum += Ch.solve(unit_i).sum();
 57 |   // }
 58 |   
 59 |   // Eigen::SparseMatrix<double> I(lower_l.rows(),lower_l.rows());
 60 |   // I.setIdentity();
 61 |   // Eigen::SparseMatrix<double> test_inv = Ch.solve(I);
 62 |   // 
 63 |   
 64 |   // Eigen::PermutationMatrix<Eigen::Dynamic,Eigen::Dynamic> P = Ch.permutationP();
 65 |   // Eigen::SparseMatrix<double> permuted_L = P * lower_l;
 66 |   // Eigen::VectorXd gibbs = Ch.solve(invperm_L * stdnorm) + mean_eb;
 67 |   return List::create(///Rcpp::Named("gibbs") = gibbs,
 68 |     Rcpp::Named("mean") = mean,
 69 |     Rcpp::Named("diag_L") = diag_L,
 70 |     Rcpp::Named("invPindex") =  invP.indices(),
 71 |     Rcpp::Named("Pindex") = Ch.permutationP().indices(),
 72 |     Rcpp::Named("origL") = lower_l,
 73 |     Rcpp::Named("L") = invperm_L
 74 |   );
 75 | }
 76 | 
 77 | // [[Rcpp::export]]
 78 | List calculate_expected_outer_alpha(
 79 |     const Eigen::MappedSparseMatrix<double> L, // L^T L = Var(alpha)
 80 |     const Eigen::Map<Eigen::VectorXd> alpha_mu, // E[alpha]
 81 |     const Rcpp::List& re_position_list
 82 | ){
 83 |   //Get the number of REs.
 84 |   int n_REs = re_position_list.size();
 85 |   
 86 |   List outer_alpha(n_REs);
 87 |   List var_alpha(n_REs);
 88 |   List mu_alpha(n_REs);
 89 |   
 90 |   Rcpp::NumericVector dim_RE = NumericVector(n_REs);
 91 |   //For each random effect:
 92 |   for (int j = 0; j < n_REs; ++j){
 93 |     List re_positions_j = re_position_list[j];
 94 |     int g_j = re_positions_j.size();
 95 |     
 96 |     NumericVector first_position = re_positions_j[0];
 97 |     int size_RE_j = first_position.size();
 98 |     
 99 |     Eigen::MatrixXd summed_var_alpha = Eigen::MatrixXd::Zero(size_RE_j,size_RE_j);
100 |     Eigen::MatrixXd summed_outer_alpha = Eigen::MatrixXd::Zero(size_RE_j,size_RE_j);
101 |     
102 |     List var_alpha_j(g_j);
103 |     
104 |     // For each group g in random effect j.
105 |     for(int g = 0; g < g_j; ++g) {
106 |       //For each random effect alpha_{j,g}.
107 |       //Adjust for zero indexing.
108 |       NumericVector g_prime = re_positions_j[g];
109 | 
110 |       Eigen::MatrixXd var_alpha_j_g = Eigen::MatrixXd::Zero(size_RE_j,size_RE_j);
111 |       for (int i = 0; i < size_RE_j; i++){
112 |         for (int i_prime = 0; i_prime <= i; i_prime++){
113 |           int index_i = g_prime[i] - 1;
114 |           int index_i_prime = g_prime[i_prime] - 1;
115 |           
116 |           double sum_i = L.col(index_i).cwiseProduct(L.col(index_i_prime)).sum();
117 |           
118 |           var_alpha_j_g(i, i_prime) = sum_i;
119 |             
120 |           summed_var_alpha(i,i_prime) = summed_var_alpha(i,i_prime) + sum_i;
121 |           summed_outer_alpha(i, i_prime) = summed_outer_alpha(i, i_prime) +
122 |             alpha_mu(index_i) * alpha_mu(index_i_prime);
123 |         }
124 |       }
125 | 
126 |       var_alpha_j_g.triangularView<Eigen::StrictlyUpper>() = var_alpha_j_g.adjoint();
127 |       var_alpha_j[g] = var_alpha_j_g;
128 |     }
129 |     Eigen::MatrixXd oa_j = summed_var_alpha + summed_outer_alpha;
130 |     oa_j.triangularView<Eigen::StrictlyUpper>() = oa_j.adjoint();
131 |     outer_alpha[j] = oa_j;
132 |     var_alpha[j] = var_alpha_j;
133 |     mu_alpha[j] = summed_outer_alpha;
134 |   }
135 |   
136 |   return List::create(
137 |     Rcpp::Named("outer_alpha") = outer_alpha,
138 |     Rcpp::Named("variance_jg") = var_alpha,
139 |     Rcpp::Named("mu_j") = mu_alpha
140 |   );
141 | }
142 | 
143 | 


--------------------------------------------------------------------------------
/src/eigen_helpers.cpp:
--------------------------------------------------------------------------------
 1 | #include "RcppEigen.h"
 2 | 
 3 | // [[Rcpp::depends(RcppEigen)]]
 4 | 
 5 | using namespace Rcpp;
 6 | 
 7 | 
 8 | // [[Rcpp::export]]
 9 | IntegerMatrix unique_rows(const IntegerMatrix m) {
10 | 
11 |   Rcpp::Environment base("package:base");
12 |   Function do_unique = base["unique"];
13 | 
14 |   return do_unique(m);
15 | }
16 | 
17 | // [[Rcpp::export]]
18 | Rcpp::List prepare_Z_for_px(
19 |   Rcpp::IntegerMatrix& Mmap //Integer matrix of group IDs
20 | ){
21 |   Rcout  << "A";
22 |   int J = Mmap.cols();
23 |   int N = Mmap.rows();
24 |   
25 |   Rcpp::List store_re_id(J);
26 |   Rcpp::List store_id(J);
27 |   
28 |   IntegerVector out(N);
29 |   
30 |   for (int j = 0; j < J; j++){
31 |     Rcout  << "m"; 
32 |     Rcpp::List store_re_id_j(j);
33 |     Rcpp::List store_id_j(j);
34 |     
35 |     for (int jprime = 0; jprime < j; jprime++){
36 |       
37 |       Rcout << "|";
38 |       
39 |       IntegerMatrix sub_M(N, 2);
40 |       sub_M.column( 0 ) = Mmap.column(j);
41 |       sub_M.column( 1 ) = Mmap.column(jprime);
42 |       IntegerMatrix umap = unique_rows(sub_M);
43 | 
44 |       int n_unique = umap.rows();
45 | 
46 |       IntegerVector col_0 = umap.column(0);
47 |       IntegerVector col_1 = umap.column(1);
48 | 
49 |       IntegerVector position_index(N);
50 | 
51 |       for (int i = 0; i < N; i++){ //Loop over each observation
52 |         IntegerVector M_i = Mmap.row(i);
53 |         for(int g=0; g < n_unique; g++) {
54 |           IntegerVector umap_g = umap.row(g);
55 |           if ( (umap_g(0) == M_i(0)) && (umap_g(1) == M_i(1)) ){
56 |             position_index(i) = g;
57 |             break;
58 |           }
59 |         }
60 |       }
61 |       
62 |       store_re_id_j[jprime] = umap;
63 |       store_id_j[jprime] = position_index;
64 |     }
65 |     
66 |     store_re_id[j] = store_re_id_j;
67 |     store_id[j] = store_id_j;
68 |   }
69 |   
70 |   return store_id;
71 | }
72 | 


--------------------------------------------------------------------------------
/src/eigen_linalg.cpp:
--------------------------------------------------------------------------------
 1 | #include "RcppEigen.h"
 2 | // [[Rcpp::depends(RcppEigen)]]
 3 | 
 4 | using namespace Rcpp;
 5 | 
 6 | // [[Rcpp::export]]
 7 | Rcpp::List chol_sparse(
 8 |   const Eigen::MappedSparseMatrix<double> X,
 9 |   const Eigen::MappedSparseMatrix<double> omega,
10 |   const Eigen::MappedSparseMatrix<double> precision
11 | ){
12 |   Eigen::SimplicialLLT<Eigen::SparseMatrix<double> > Ch(X.adjoint() * omega * X + precision);
13 | 
14 |   Eigen::SparseMatrix<double> lower_l = Ch.matrixL();
15 |   Eigen::PermutationMatrix<Eigen::Dynamic,Eigen::Dynamic> invP = Ch.permutationPinv();
16 |   Eigen::VectorXd diag_L = lower_l.diagonal();
17 |   
18 |   return List::create(
19 |     Rcpp::Named("diag_L") = diag_L,
20 |     Rcpp::Named("Pindex") = Ch.permutationP().indices(),
21 |     Rcpp::Named("origL") = lower_l
22 |   );
23 | }
24 | 
25 | // [[Rcpp::export]]
26 | Eigen::VectorXd cpp_zVz(
27 |   const Eigen::MappedSparseMatrix<double> Z,
28 |   const Eigen::MappedSparseMatrix<double> V
29 | ){
30 | 
31 |   Eigen::SparseMatrix<double> VZ_t = V * Z.adjoint();
32 |   int N = Z.rows();
33 |   Eigen::VectorXd output(N);
34 |   
35 |   for (int i = 0; i < N; i++){
36 |     output(i) = VZ_t.col(i).squaredNorm();
37 |   }
38 |   return output;
39 |   
40 |   // for (int j = 0; j < N; j++){
41 |   //   double norm_j = 0;
42 |   //   for (Eigen::SparseMatrix<double>::InnerIterator i_(VZ_t, j); i_; ++i_){
43 |   //     norm_j += std::pow(i_.value(), 2)
44 |   //   }
45 |   //   output(j) = norm_j;
46 |   // }
47 | }


--------------------------------------------------------------------------------
/src/eigen_px.cpp:
--------------------------------------------------------------------------------
  1 | #include "RcppEigen.h"
  2 | // [[Rcpp::depends(RcppEigen)]]
  3 | 
  4 | using namespace Rcpp;
  5 | 
  6 | // [[Rcpp::export]]
  7 | Eigen::MatrixXd vecR_ridge_general(
  8 |     const Eigen::MappedSparseMatrix<double> L,    //Decomposition of variance L^T L = VAR(alpha)
  9 |     const Rcpp::NumericVector pg_mean,
 10 |     const Eigen::Map<Eigen::MatrixXd> Z,
 11 |     const Eigen::Map<Eigen::MatrixXi> M,
 12 |     const Rcpp::NumericVector mapping_J, // Where to assign the elements to the larger matrix.
 13 |     const Rcpp::NumericVector d,
 14 |     const Eigen::VectorXi start_z,
 15 |     bool diag_only
 16 | ){
 17 |   //const Rcpp::List& mapping_z, // The base data split by observation i and level.
 18 |   //const Rcpp::List& mapping_to_re,    //The mapping to RE of each individual observation i.
 19 |   //Get preliminary variables.
 20 |   int nrow_Z = Z.rows();
 21 |   
 22 |   Rcpp::NumericVector dsq = d * d;
 23 |   int size_vecR = Rcpp::sum(dsq);
 24 |   int J = d.size();
 25 |   
 26 |   Eigen::MatrixXd ridge_R = Eigen::MatrixXd::Zero(size_vecR, size_vecR);
 27 |   
 28 |   //Loop over each observation.
 29 |   for (int i = 0; i < nrow_Z; ++i){
 30 |     //Matrix to store the output
 31 |     Eigen::MatrixXd store_i = Eigen::MatrixXd::Zero(size_vecR, size_vecR);
 32 |     
 33 |     // List positions_i = mapping_to_re[i];
 34 |     // List l_z_i = mapping_z[i];
 35 | 
 36 |     Eigen::VectorXd z_i = Z.row(i);
 37 |     Eigen::VectorXi m_i = M.row(i);
 38 |     
 39 |     //Loop over all pairs of random effects.
 40 |     for (int j = 0; j < J; ++j){
 41 |       for (int jprime = 0; jprime <= j; ++jprime){
 42 |         if (diag_only & (jprime != j)){
 43 |           continue;         
 44 |         }
 45 |         int d_j = d[j];
 46 |         int d_jprime = d[jprime];
 47 | 
 48 |         Eigen::MatrixXd outer_alpha = Eigen::MatrixXd::Zero(d_j, d_jprime);
 49 |         Eigen::MatrixXd outer_z = z_i.segment(start_z[j], d_j) * z_i.segment(start_z[jprime], d_jprime).transpose();
 50 |         
 51 |         
 52 |         int m_ij = m_i[j];
 53 |         int m_ijprime = m_i[jprime];
 54 |         
 55 |         for (int k = 0; k < d_j; ++k){
 56 |           for (int kprime = 0; kprime < d_jprime; ++kprime){
 57 |             outer_alpha(k,kprime) = L.col(m_ij + k - 1).cwiseProduct(L.col(m_ijprime + kprime - 1)).sum();
 58 |           }
 59 |         }
 60 |         
 61 |         Eigen::MatrixXd kron = Eigen::kroneckerProduct(outer_alpha, outer_z);
 62 | 
 63 |         store_i.block(mapping_J[j], mapping_J[jprime], dsq[j], dsq[jprime]) = kron;
 64 |         if (j != jprime){
 65 |           store_i.block(mapping_J[jprime], mapping_J[j], dsq[jprime], dsq[j]) = kron.transpose();
 66 |         }
 67 |       }
 68 |     }
 69 |     ridge_R = ridge_R + store_i * pg_mean[i];
 70 |   }
 71 |   
 72 |   
 73 |   return(ridge_R);
 74 | }
 75 | 
 76 | // The BULK of time is here.
 77 | // Eigen::VectorXd z_ij = l_z_i[j];
 78 | // Eigen::VectorXd z_ijprime = l_z_i[jprime];
 79 | // Eigen::MatrixXd outer_z = z_ij * z_ijprime.transpose();
 80 | //Decent amount of time is here too.
 81 | // Eigen::VectorXi index_j = positions_i[j];
 82 | // Eigen::VectorXi index_jprime = positions_i[jprime];
 83 | // 
 84 | // for (int k = 0; k < d_j; ++k){
 85 | //   for (int kprime = 0; kprime < d_jprime; ++kprime){
 86 | //     int ik = index_j[k] - 1;
 87 | //     int ikprime = index_jprime[kprime] - 1;
 88 | // 
 89 | //     outer_alpha(k,kprime) = L.col(ik).cwiseProduct(L.col(ikprime)).sum();
 90 | // 
 91 | //   }
 92 | // }
 93 | 
 94 | 
 95 | // [[Rcpp::export]]
 96 | Eigen::MatrixXd vecR_design(
 97 |     const Eigen::Map<Eigen::VectorXd> alpha_mu,
 98 |     const Eigen::Map<Eigen::MatrixXd> Z,
 99 |     const Eigen::Map<Eigen::MatrixXi> M,
100 |     const Rcpp::NumericVector mapping_J, // Where to assign the elements to the larger matrix.
101 |     const Rcpp::NumericVector d,
102 |     const Eigen::VectorXi start_z
103 | ){
104 |   int nrow_Z = Z.rows();
105 |   
106 |   Rcpp::NumericVector dsq = d * d;
107 |   int size_vecR = Rcpp::sum(dsq);
108 |   int J = d.size();
109 |   
110 |   Eigen::MatrixXd design_R(nrow_Z, size_vecR);
111 |   
112 |   // Loop over each observation
113 |   for (int i = 0; i < nrow_Z; ++i){
114 | 
115 |     Eigen::VectorXd z_i = Z.row(i);
116 |     Eigen::VectorXi m_i = M.row(i);
117 |     
118 |     for (int j = 0; j < J; ++j){
119 |       int d_j = d[j];
120 |       
121 |       design_R.block(i, mapping_J[j], 1, dsq[j]) = Eigen::kroneckerProduct(alpha_mu.segment(m_i[j] - 1, d_j), z_i.segment(start_z[j], d_j)).transpose();
122 |     }    
123 |   }
124 | 
125 |   return design_R;
126 | }
127 | 
128 | 
129 | 
130 | // [[Rcpp::export]]
131 | Eigen::VectorXd vecR_fast_ridge(
132 |     const Eigen::MappedSparseMatrix<double> X,
133 |     const Eigen::MappedSparseMatrix<double> omega,
134 |     const Eigen::MappedSparseMatrix<double> prior_precision,
135 |     const Eigen::Map<Eigen::VectorXd> y,
136 |     const Eigen::Map<Eigen::VectorXd> adjust_y
137 | ){
138 |   Eigen::SparseMatrix<double> adj_X = X.adjoint();
139 |   Eigen::SimplicialLLT<Eigen::SparseMatrix<double> > Ch(adj_X * omega * X + prior_precision);
140 |   Eigen::VectorXd mean = Ch.solve(adj_X * y + adjust_y);
141 |   return mean;
142 | }
143 | 
144 | 
145 | // [[Rcpp::export]]
146 | Eigen::MatrixXd vecR_ridge_new(
147 |     const Eigen::MappedSparseMatrix<double> L,    //Decomposition of variance L^T L = VAR(alpha)
148 |     const Eigen::ArrayXd pg_mean,
149 |     const Rcpp::NumericVector mapping_J, // Where to assign the elements to the larger matrix.
150 |     const Rcpp::NumericVector d,
151 |     const Rcpp::List store_id,
152 |     const Rcpp::List store_re_id,
153 |     const Rcpp::List store_design,
154 |     bool diag_only
155 | ){
156 |   
157 |   Rcpp::NumericVector dsq = d * d;
158 |   int size_vecR = Rcpp::sum(dsq);
159 |   int J = d.size();
160 |   
161 |   Eigen::MatrixXd ridge_R = Eigen::MatrixXd::Zero(size_vecR, size_vecR);
162 |   
163 |   for (int j = 0; j < J; ++j){
164 |     
165 |     Rcpp::List id_j = store_id[j];
166 |     Rcpp::List re_id_j = store_re_id[j];
167 |     Eigen::MatrixXd design_j = store_design[j];
168 |     int d_j = d[j];
169 |     for (int jprime = 0; jprime <= j; ++jprime){
170 |       
171 |       if (diag_only & (jprime != j)){
172 |         continue;
173 |       }
174 |       
175 |       Eigen::MatrixXd design_jprime = store_design[jprime];
176 |       int d_jprime = d[jprime];
177 |       Rcpp::List id_jjprime = id_j[jprime];
178 |       Rcpp::List re_id_jjprime = re_id_j[jprime];
179 |       int length_combo = id_jjprime.size();
180 |       
181 |       Eigen::MatrixXd kron = Eigen::MatrixXd::Zero(dsq[j], dsq[jprime]) ;
182 |       
183 |       for (int combo = 0; combo < length_combo; ++combo){
184 |         Rcpp::IntegerVector col_re = re_id_jjprime[combo];
185 |         Rcpp::IntegerVector u_comb = id_jjprime[combo];
186 |         Eigen::MatrixXd outer_Z = Eigen::MatrixXd::Zero(d_j, d_jprime);
187 |         for (int i = 0; i < u_comb.size(); ++i){
188 |         // for (IntegerVector::iterator i = u_comb.begin();  i != u_comb.end(); ++i){
189 |           int pos_i = u_comb[i] - 1;
190 |           Eigen::VectorXd row_j = design_j.row(pos_i);
191 |           Eigen::VectorXd row_jprime = design_jprime.row(pos_i);
192 | 
193 |           Eigen::MatrixXd tmp_outer = row_j * row_jprime.transpose();
194 |           outer_Z += tmp_outer * pg_mean[pos_i];
195 |         }
196 |         Eigen::MatrixXd outer_alpha = Eigen::MatrixXd::Zero(d_j, d_jprime);
197 |         
198 |         int m_j = col_re[0];
199 |         int m_jprime = col_re[1];
200 |         
201 |         for (int k = 0; k < d_j; ++k){
202 |           for (int kprime = 0; kprime < d_jprime; ++kprime){
203 |             outer_alpha(k,kprime) = L.col(m_j + k - 1).cwiseProduct(L.col(m_jprime + kprime - 1)).sum();
204 |           }
205 |         }
206 |         kron += Eigen::kroneckerProduct(outer_alpha, outer_Z);
207 |       }
208 |       
209 |       ridge_R.block(mapping_J[j], mapping_J[jprime], dsq[j], dsq[jprime]) = kron;
210 |       if (j != jprime){
211 |         ridge_R.block(mapping_J[jprime], mapping_J[j], dsq[jprime], dsq[j]) = kron.transpose();
212 |       }
213 |       
214 |     } 
215 |   }
216 |   return(ridge_R);
217 | }
218 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(vglmer)
3 | 
4 | test_check("vglmer")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test-MAVB.R:
--------------------------------------------------------------------------------
 1 | context("Test MAVB")
 2 | 
 3 | if (isTRUE(as.logical(Sys.getenv("CI")))){
 4 |   # If on CI
 5 |   NITER <- 2
 6 |   env_test <- "CI"
 7 | }else if (!identical(Sys.getenv("NOT_CRAN"), "true")){
 8 |   # If on CRAN
 9 |   NITER <- 2
10 |   env_test <- "CRAN"
11 |   set.seed(161)
12 | }else{
13 |   # If on local machine
14 |   NITER <- 2000
15 |   env_test <- 'local'
16 | }
17 | 
18 | test_that("Short test of MAVB running for CRAN", {
19 |   N <- 50
20 |   G <- 5
21 |   x <- rnorm(N)
22 |   g <- sample(1:G, N, replace = T)
23 |   g2 <- sample(1:G, N, replace = T)
24 |   alpha <- rnorm(G)
25 |   alpha2 <- rnorm(G)
26 | 
27 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g] + alpha2[g]))
28 | 
29 |   example_vglmer <- vglmer(
30 |     formula = y ~ x + (1 + x | g) + (1 | g2), data = NULL, family = "binomial",
31 |     control = vglmer_control(factorization_method = "weak",
32 |                              iterations = 5)
33 |   )
34 | 
35 |   mavb_samples <- tryCatch(MAVB(object = example_vglmer, samples = 10),
36 |     error = function(e) {
37 |       NULL
38 |     }
39 |   )
40 |   expect_false(is.null(mavb_samples))
41 |   
42 |   
43 |   example_vglmer <- vglmer(
44 |     formula = y ~ x + (1 + x | g) + (1 | g2), data = NULL, family = "binomial",
45 |     control = vglmer_control(factorization_method = "strong", iterations = 5)
46 |   )
47 |   
48 |   mavb_samples <- tryCatch(MAVB(object = example_vglmer, samples = 10),
49 |                            error = function(e) {
50 |                              NULL
51 |                            }
52 |   )
53 |   expect_false(is.null(mavb_samples))
54 |   
55 | })


--------------------------------------------------------------------------------
/tests/testthat/test-alpha_px.R:
--------------------------------------------------------------------------------
 1 | context("C++ Verification")
 2 | 
 3 | if (isTRUE(as.logical(Sys.getenv("CI")))){
 4 |   # If on CI
 5 |   NITER <- 2
 6 |   env_test <- "CI"
 7 | }else if (!identical(Sys.getenv("NOT_CRAN"), "true")){
 8 |   # If on CRAN
 9 |   NITER <- 2
10 |   env_test <- "CRAN"
11 |   set.seed(111)
12 | }else{
13 |   # If on local machine
14 |   NITER <- 2000
15 |   env_test <- 'local'
16 | }
17 | 
18 | test_that("Calculate E[alpha alpha^T] comparing cpp and base R", {
19 |   loop_outer_alpha <- function(vi_alpha_mean, vi_alpha_decomp, outer_alpha_RE_positions) {
20 |     # Must be such that t(vi_alpha_decomp) %*% vi_alpha_decomp = VAR
21 |     store_oa <- as.list(rep(NA, length(outer_alpha_RE_positions)))
22 |     store_alpha_outer <- as.list(rep(NA, length(outer_alpha_RE_positions)))
23 |     counter_j <- 1
24 |     for (j in outer_alpha_RE_positions) {
25 |       # cat('.')
26 |       summed_oa <- summed_alpha_outer <- array(0, dim = rep(length(j[[1]]), 2))
27 |       for (g in j) {
28 |         summed_oa <- summed_oa + as.matrix(crossprod(vi_alpha_decomp[, g]))
29 |         summed_alpha_outer <- summed_alpha_outer + as.matrix(tcrossprod(vi_alpha_mean[g]))
30 |       }
31 |       store_oa[[counter_j]] <- summed_oa
32 |       store_alpha_outer[[counter_j]] <- summed_alpha_outer
33 |       counter_j <- counter_j + 1
34 |     }
35 |     return(list(outer_alpha = as.matrix(store_oa), alpha_mu_outer = as.matrix(store_alpha_outer)))
36 |   }
37 | 
38 |   dta <- data.frame(y = 1, g = letters, g2 = LETTERS, g3 = 1:26, a = rnorm(26), x = rnorm(26), z = rnorm(26))
39 |   formula <- y ~ x + (1 + z | g) + (1 + z | g2) + (0 + a | g3)
40 | 
41 |   mk_Z <- lme4::mkReTrms(lme4::findbars(formula), model.frame(lme4::subbars(formula), data = dta), reorder.terms = FALSE, reorder.vars = FALSE)
42 | 
43 |   breaks_for_RE <- c(0, cumsum(diff(mk_Z$Gp)))
44 |   d_j <- c(2, 2, 1)
45 |   # Number of GROUPs for each random effect.
46 |   g_j <- diff(mk_Z$Gp) / d_j
47 | 
48 | 
49 |   outer_alpha_RE_positions <- mapply(d_j, g_j, breaks_for_RE[-length(breaks_for_RE)], SIMPLIFY = FALSE, FUN = function(a, b, m) {
50 |     split(m + seq(1, a * b), rep(1:b, each = a))
51 |   })
52 | 
53 |   vi_alpha_var <- drop0(rWishart(n = 1, df = nrow(mk_Z$Zt) + 10, Sigma = diag(nrow(mk_Z$Zt)))[, , 1])
54 |   vi_alpha_chol <- as(drop0((chol(vi_alpha_var))), "generalMatrix")
55 |   expect_equal(as.matrix(t(vi_alpha_chol) %*% vi_alpha_chol), as.matrix(vi_alpha_var))
56 | 
57 |   vi_alpha_mean <- Matrix(rnorm(nrow(vi_alpha_var)))
58 | 
59 |   legacy_method <- loop_outer_alpha(vi_alpha_mean, vi_alpha_chol, outer_alpha_RE_positions)
60 |   legacy_method <- mapply(legacy_method[[1]], legacy_method[[2]], SIMPLIFY = FALSE, FUN = function(a, b) {
61 |     a + b
62 |   })
63 | 
64 |   cpp_method <- calculate_expected_outer_alpha(L = vi_alpha_chol, alpha_mu = as.vector(vi_alpha_mean), re_position_list = outer_alpha_RE_positions)
65 |   cpp_method <- cpp_method$outer_alpha
66 | 
67 | 
68 |   comp <- mapply(legacy_method, cpp_method, FUN = function(i, j) {
69 |     all.equal(as.vector(i), as.vector(j))
70 |   })
71 | 
72 |   expect_equal(unlist(legacy_method), unlist(cpp_method))
73 | })
74 | 


--------------------------------------------------------------------------------
/tests/testthat/test-errors.R:
--------------------------------------------------------------------------------
  1 | context("Test vglmer robustness to certain situations")
  2 | 
  3 | if (isTRUE(as.logical(Sys.getenv("CI")))){
  4 |   # If on CI
  5 |   NITER <- 2
  6 |   env_test <- "CI"
  7 | }else if (!identical(Sys.getenv("NOT_CRAN"), "true")){
  8 |   # If on CRAN
  9 |   NITER <- 2
 10 |   env_test <- "CRAN"
 11 |   set.seed(131)
 12 | }else{
 13 |   # If on local machine
 14 |   NITER <- 2000
 15 |   env_test <- 'local'
 16 | }
 17 | 
 18 | test_that("vglmer can run with objects in environment", {
 19 |   N <- 100
 20 |   G <- 5
 21 |   G_names <- paste(sample(letters, G, replace = T), 1:G)
 22 |   x <- rnorm(N)
 23 |   g <- sample(G_names, N, replace = T)
 24 |   alpha <- rnorm(G)
 25 | 
 26 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[match(g, G_names)]))
 27 | 
 28 |   test_nodata <- tryCatch(suppressMessages(vglmer(y ~ x + (1 | g),
 29 |     data = NULL,
 30 |     control = vglmer_control(
 31 |       init = "zero",
 32 |       iterations = 1, print_prog = 10
 33 |     ),
 34 |     family = "binomial"
 35 |   )),
 36 |   error = function(e) {
 37 |     NULL
 38 |   }
 39 |   )
 40 |   expect_false(is.null(test_nodata))
 41 | 
 42 |   dta <- data.frame(Y = y, X = x, G = g)
 43 |   # Inject missingness into
 44 |   dta$Y[38] <- NA
 45 |   dta$X[39] <- NA
 46 |   dta$G[84] <- NA
 47 |   dta[3, ] <- NA
 48 |   test_missing <- tryCatch(suppressMessages(vglmer(Y ~ X + (1 | G),
 49 |     data = dta,
 50 |     control = vglmer_control(
 51 |       init = "zero", return_data = T,
 52 |       iterations = 1, print_prog = 10
 53 |     ),
 54 |     family = "binomial"
 55 |   )),
 56 |   error = function(e) {
 57 |     NULL
 58 |   }
 59 |   )
 60 |   # Confirm runs
 61 |   expect_false(is.null(test_missing))
 62 |   # Confirms deletion "works"
 63 |   expect_equivalent(dta$X[-c(3, 38, 39, 84)], test_missing$data$X[, 2])
 64 |   expect_equivalent(dta$Y[-c(3, 38, 39, 84)], test_missing$data$y)
 65 | })
 66 | 
 67 | test_that('vglmer runs with timing and "quiet=F"', {
 68 |   N <- 25
 69 |   G <- 2
 70 |   G_names <- paste(sample(letters, G, replace = T), 1:G)
 71 |   x <- rnorm(N)
 72 |   g <- sample(G_names, N, replace = T)
 73 |   alpha <- rnorm(G)
 74 | 
 75 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[match(g, G_names)]))
 76 |   
 77 |   if (all(y == 0)){
 78 |     y[1] <- 1
 79 |   }
 80 |   if (all(y == 1)){
 81 |     y[1] <- 0
 82 |   }
 83 |   
 84 |   est_simple <- suppressMessages(vglmer(y ~ x + (1 | g),
 85 |     data = NULL,
 86 |     control = vglmer_control(do_timing = T, quiet = F, iteration = 5),
 87 |     family = "binomial"
 88 |   ))
 89 |   expect_true(inherits(est_simple$timing, "data.frame"))
 90 |   expect_gte(min(diff(est_simple$ELBO_trajectory$ELBO)), 0)
 91 | })
 92 | 
 93 | test_that('vglmer parses environment correctly', {
 94 |   rm(list=ls())  
 95 |   N <- 25
 96 |   G <- 2
 97 |   G_names <- paste(sample(letters, G, replace = T), 1:G)
 98 |   
 99 |   dta <- data.frame(x = rnorm(N), g = sample(G_names, N, replace = T))
100 |   alpha <- rnorm(G)
101 |   
102 |   dta$y <- rbinom(n = N, size = 1, prob = plogis(-1 + dta$x + alpha[match(dta$g, G_names)]))
103 |   dta$size <- rpois(n = N, lambda = 2) + 1
104 |   dta$y_b <- rbinom(n = N, size = dta$size, prob = plogis(-1 + dta$x + alpha[match(dta$g, G_names)]))
105 |   #runs with clean environment
106 |   est_simple <- suppressMessages(vglmer(y ~ x + (1 | g), data = dta, 
107 |     control = vglmer_control(iterations = 5),
108 |     family = 'binomial'))
109 |   expect_true(inherits(est_simple, 'vglmer'))
110 |   
111 |   est_simple <- suppressMessages(vglmer(cbind(y_b, size) ~ x + (1 | g), 
112 |     control = vglmer_control(iterations = 5),                                        
113 |     data = dta, family = 'binomial'))
114 |   expect_true(inherits(est_simple, 'vglmer'))
115 | })
116 | 
117 | test_that("vglmer can run with 'debug' settings", {
118 |   N <- 20
119 |   G <- 5
120 |   G_names <- paste(sample(letters, G, replace = T), 1:G)
121 |   x <- rnorm(N)
122 |   g <- sample(G_names, N, replace = T)
123 |   alpha <- rnorm(G)
124 |   
125 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[match(g, G_names)]))
126 |   
127 |   # Avoid perfect separation
128 |   if (all(y == 0)){
129 |     y[1] <- 1
130 |   }
131 |   if (all(y == 1)){
132 |     y[1] <- 0
133 |   }
134 |   
135 |   # Debug to collect parameters
136 |   est_vglmer <- vglmer(y ~ x + (1 | g), data = data.frame(y = y, x = x, g = g),
137 |          family = 'binomial',
138 |          control = vglmer_control(debug_param = TRUE, iterations = 5))  
139 |   
140 |   expect_true(all(c('beta', 'alpha') %in% names(est_vglmer$parameter_trajectory)))
141 | 
142 |   est_vglmer <- vglmer(y ~ x + (1 | g), 
143 |       data = data.frame(y = y, x = x, g = g),
144 |       family = 'binomial',
145 |       control = vglmer_control(debug_ELBO = TRUE))
146 |   expect_true(!is.null(est_vglmer$ELBO_trajectory$step))
147 |   
148 | })
149 | 
150 | test_that("vglmer can run with exactly balanced classes", {
151 |   N <- 50
152 |   G <- 5
153 |   G_names <- paste(sample(letters, G, replace = T), 1:G)
154 |   x <- rnorm(N)
155 |   g <- sample(G_names, N, replace = T)
156 |   alpha <- rnorm(G)
157 |   
158 |   y <- c(rep(0, N/2), rep(1, N/2))
159 |   
160 |   # Debug to collect parameters
161 |   est_vglmer <- vglmer(y ~ x + (1 | g), data = data.frame(y = y, x = x, g = g),
162 |       family = 'binomial',
163 |       control = vglmer_control(iterations = 1))  
164 |   
165 |   expect_s3_class(est_vglmer, 'vglmer')
166 | })
167 | 
168 | test_that("Run without FE for corresponding random slope", {
169 | 
170 |   N <- 25
171 |   G <- 2
172 |   G_names <- paste(sample(letters, G, replace = T), 1:G)
173 |   x <- rnorm(N)
174 |   g <- sample(G_names, N, replace = T)
175 |   alpha <- rnorm(G)
176 |   
177 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[match(g, G_names)]))
178 |   # Avoid perfect separation
179 |   if (all(y == 0)){
180 |     y[1] <- 1
181 |   }
182 |   if (all(y == 1)){
183 |     y[1] <- 0
184 |   }
185 |   fit_noFE_for_RE <- vglmer(
186 |     formula = y ~ 1 + (1 + x | g),
187 |     family = 'linear', control = vglmer_control(iterations = 4),
188 |     data = NULL)
189 |   expect_s3_class(fit_noFE_for_RE, 'vglmer')
190 |   
191 | })
192 | 
193 | test_that("predict works with N=1", {
194 |   
195 |   N <- 25
196 |   G <- 2
197 |   G_names <- paste(sample(letters, G, replace = T), 1:G)
198 |   x <- rnorm(N)
199 |   g <- sample(G_names, N, replace = T)
200 |   alpha <- rnorm(G)
201 |   
202 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[match(g, G_names)]))
203 |   
204 |   est_simple <- suppressMessages(vglmer(y ~ x + (1 | g),
205 |       data = NULL,
206 |       control = vglmer_control(iterations = 1),
207 |       family = "linear"
208 |   ))
209 |   pred_single <- predict(est_simple, newdata = data.frame(x = x[1], g = 'NEW'), 
210 |      allow_missing_levels = TRUE)
211 |   term_single <- predict(est_simple, newdata = data.frame(x = x[1], g = 'NEW'),
212 |      type = 'terms', allow_missing_levels = TRUE)
213 |   expect_equal(pred_single, sum(coef(est_simple) * c(1, x[1])))
214 |   expect_equivalent(c(pred_single, 0), term_single)
215 |   
216 |   est_spline <- suppressMessages(vglmer(y ~ v_s(x) + (1 | g),
217 |       data = NULL,
218 |       control = vglmer_control(iterations = 1),
219 |       family = "linear"
220 |   ))
221 |   pred_spline <- predict(est_spline, 
222 |     newdata = data.frame(x = x[1], g = 'NEW'), 
223 |     allow_missing_levels = TRUE)
224 |   term_spline <- predict(est_spline, type = 'terms',
225 |     newdata = data.frame(x = x[1], g = 'NEW'), 
226 |     allow_missing_levels = TRUE)
227 |   expect_equal(pred_spline, rowSums(term_spline))
228 |   expect_equivalent(term_spline[, 'FE'], sum(c(1, x[1]) * coef(est_spline)))
229 |   
230 | })
231 | 


--------------------------------------------------------------------------------
/tests/testthat/test-format_vglmer.R:
--------------------------------------------------------------------------------
1 | context("format_vglmer")
2 | 


--------------------------------------------------------------------------------
/tests/testthat/test-gKRLS.R:
--------------------------------------------------------------------------------
  1 | context("Test Generic Methods")
  2 | 
  3 | if (isTRUE(as.logical(Sys.getenv("CI")))){
  4 |   # If on CI
  5 |   NITER <- 2
  6 |   env_test <- "CI"
  7 | }else if (!identical(Sys.getenv("NOT_CRAN"), "true")){
  8 |   # If on CRAN
  9 |   NITER <- 2
 10 |   env_test <- "CRAN"
 11 |   set.seed(151)
 12 | }else{
 13 |   # If on local machine
 14 |   NITER <- 2000
 15 |   env_test <- 'local'
 16 | }
 17 | 
 18 | test_that("Test gKRLS runs in a few configurations", {
 19 |   
 20 |   N <- 1000
 21 |   G <- 10
 22 |   x <- rnorm(N)
 23 |   x2 <- rnorm(N)
 24 |   x3 <- rnorm(N)
 25 |   g <- sample(1:G, N, replace = T)
 26 |   g2 <- sample(1:G, N, replace = T)
 27 |   alpha <- rnorm(G)
 28 |   alpha2 <- rnorm(G)
 29 |   alpha3 <- rnorm(G)
 30 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + sin(alpha3[g] * x) + cos(x2 * x) + alpha[g] + alpha2[g]))
 31 |   
 32 |   if (requireNamespace("gKRLS", quietly = TRUE)) {
 33 |     require(gKRLS)
 34 |     
 35 |     example_vglmer <- vglmer(
 36 |       formula = y ~ x + (1 | g) + v_s(x, x2, type = 'gKRLS'), data = NULL, family = "binomial",
 37 |       control = vglmer_control(iterations = NITER, return_data = TRUE, factorization_method = "strong")
 38 |     )
 39 |     expect_gte(min(diff(example_vglmer$ELBO_trajectory$ELBO)), 0)
 40 |     expect_false('x2' %in% names(fixef(example_vglmer)))
 41 |     
 42 |     id_vglmer <- example_vglmer$internal_parameters$spline$attr[[1]]$knots$subsampling_id
 43 |     fit_mgcv <- gam(y ~ x + s(x, x2, bs = 'gKRLS', xt = gKRLS(sketch_method = id_vglmer)))
 44 |     # Check that the *design* aligns from gKRLS and vglmer
 45 |     expect_equivalent(
 46 |       as.matrix(example_vglmer$data$Z[,grepl(colnames(example_vglmer$data$Z), pattern='spline')]),
 47 |       model.matrix(fit_mgcv)[,-1:-2]
 48 |     )
 49 |     
 50 |     # Check with "weak" and many smooth terms but not "proper" REs
 51 |     example_vglmer <- vglmer(
 52 |       formula = y ~ v_s(x2) +
 53 |         v_s(x,type = 'gKRLS') + 
 54 |         v_s(x, x2, type = 'gKRLS',  xt = gKRLS(sketch_method = 'none', standardize = 'none')),
 55 |       data = NULL, family = "binomial",
 56 |       control = vglmer_control(iterations = NITER, factorization_method = "weak")
 57 |     )
 58 |     expect_gte(min(diff(example_vglmer$ELBO_trajectory$ELBO)), 0)
 59 |     expect_false('x' %in% names(fixef(example_vglmer)))
 60 |     expect_true('x2' %in% names(fixef(example_vglmer)))
 61 |     # Check that prediction works
 62 |     test_pred <- predict(example_vglmer, newdata = data.frame(
 63 |       x = rnorm(10), x2 = rnorm(10), g = '2'
 64 |     ))
 65 |     
 66 |     # Check that this works with "by"
 67 |     f_g <- factor(g)
 68 |     example_vglmer <- vglmer(
 69 |       formula = y ~ x + 
 70 |         v_s(x, x2, type = 'gKRLS',  by = f_g),
 71 |       data = NULL, family = "binomial",
 72 |       control = vglmer_control(iterations = NITER, factorization_method = "strong")
 73 |     )
 74 |     expect_gte(min(diff(example_vglmer$ELBO_trajectory$ELBO)), 0)
 75 |     expect_true('f_g' %in% names(ranef(example_vglmer)))
 76 |     expect_true(ncol(ranef(example_vglmer)$f_g) == 2)
 77 |     expect_false('x2' %in% names(fixef(example_vglmer)))
 78 |     
 79 |     test_pred <- predict(example_vglmer, allow_missing_levels = TRUE,
 80 |                          newdata = data.frame(x, x2, f_g = 1590)[1:15,])
 81 |     
 82 |     # Runs fine with "data" provided directly
 83 |     example_vglmer <- vglmer(
 84 |       formula = yn ~ a + v_s(a, b, type = 'gKRLS'),
 85 |       data = data.frame(yn = y, a = x, b = x2),
 86 |       family = 'linear'
 87 |     )
 88 |     
 89 |   }
 90 |   
 91 | })
 92 | 
 93 | test_that("Test random walk runs in a few configurations", {
 94 | 
 95 |   N <- 1000
 96 |   G <- 10
 97 |   x <- rnorm(N)
 98 |   x2 <- rnorm(N)
 99 |   x3 <- rnorm(N)
100 |   g <- sample(1:G, N, replace = T)
101 |   g2 <- sample(1:G, N, replace = T)
102 |   alpha <- sort(rnorm(G))
103 |   alpha2 <- rnorm(G)
104 |   alpha3 <- rnorm(G)
105 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + sin(alpha3[g] * x) + cos(x2 * x) + alpha[g] + alpha2[g]))
106 |   
107 |   fg <- g
108 |   example_vglmer <- expect_error(
109 |     vglmer(
110 |       formula = y ~ x + v_s(fg, type = 'randwalk'), data = NULL, family = "binomial",
111 |       control = vglmer_control(iterations = NITER, return_data = TRUE, factorization_method = "strong")
112 |     ), regexp='character or factor'
113 |   )
114 |   fg <- factor(g)
115 |   example_vglmer <- expect_error(
116 |     vglmer(
117 |       formula = y ~ x + v_s(fg, type = 'randwalk'), data = NULL, family = "binomial",
118 |       control = vglmer_control(iterations = NITER, return_data = TRUE, factorization_method = "strong")
119 |     ), regexp='ordered'
120 |   )
121 |   fg <- factor(g, ordered = TRUE)
122 |   example_vglmer <- vglmer(
123 |       formula = y ~ x + v_s(fg, type = 'randwalk'), data = NULL, family = "binomial",
124 |       control = vglmer_control(iterations = NITER, return_data = TRUE, factorization_method = "strong")
125 |   )
126 |   expect_gte(min(diff(example_vglmer$ELBO_trajectory$ELBO)), 0)
127 |   
128 |   # Check that manual predictions work
129 |   
130 |   new_fg <- data.frame(fg = sample(1:5, size = 10, replace = T), x = rnorm(10))
131 |   pred_new_fg <- predict(example_vglmer, newdata = new_fg)
132 |   manual_pred <- sparseMatrix(i = 1:nrow(new_fg), j = match(new_fg$fg, 
133 |       example_vglmer$internal_parameters$spline$attr[[1]]$knots$levels),
134 |       x = 1, dims = c(nrow(new_fg), 10)
135 |   )
136 |   manual_pred <- as.vector(manual_pred %*% 
137 |     example_vglmer$internal_parameters$spline$attr[[1]]$knots$transf_mat %*%
138 |     example_vglmer$alpha$mean + 
139 |     cbind(1, new_fg$x) %*% coef(example_vglmer)
140 |   )
141 |   expect_equivalent(manual_pred, pred_new_fg)
142 |   
143 |   fs <- sample(state.abb, N, replace = T)
144 |   fs <- data.frame(
145 |     y = y, state = factor(fs, ordered = TRUE)
146 |   )
147 |   fs$y <- fs$y + rnorm(nrow(fs)) + cos(2 * pi * ((0:50)/50))[match(fs$state, state.abb)]
148 |   
149 |   example_randwalk <- vglmer(
150 |     formula = y ~ x + v_s(state, type = 'randwalk'), data = fs, family = "linear",
151 |     control = vglmer_control(iterations = NITER, factorization_method = "weak")
152 |   )
153 |   expect_gte(min(diff(example_randwalk$ELBO_trajectory$ELBO)), -.Machine$double.eps)
154 |   
155 |   pred_randwalk <- predict(example_randwalk,
156 |    newdata = data.frame(x = 0, state = state.abb),
157 |    type = 'terms')
158 |   
159 |   # Check that prediction works with NA and that "by" works
160 |   th <- 8
161 |   example_vglmer <- vglmer(
162 |     yo ~ v_s(a, type = 'randwalk', by = st),
163 |     data = data.frame(yo = y, a = cut(x, th, ordered_result = TRUE), st = fg),
164 |     family = 'binomial', control = vglmer_control(iterations = NITER)
165 |   )
166 |   expect_gte(min(diff(example_vglmer$ELBO_trajectory$ELBO)), -.Machine$double.eps)
167 |   # Check that REs are added for the "by" term
168 |   expect_equivalent(
169 |     names(ranef(example_vglmer)),
170 |     c('st', 'spline-(a)-1-base', 'spline-(a)-1-int')
171 |   )
172 |   
173 |   full_grid <- expand.grid(st = unique(fg), a = unique(cut(x, th, ordered_result = TRUE)))
174 |   full_grid[31,]$a <- NA
175 |   pred_grid <- predict(example_vglmer, newdata = full_grid)
176 |   expect_true(is.na(pred_grid[31]))
177 |   
178 |   # Check that prediction fails
179 |   example_vglmer <- vglmer(
180 |     yo ~ v_s(a, type = 'randwalk') + (1 | fg),
181 |     data = data.frame(yo = y, a = cut(x, th, ordered_result = TRUE), st = fg),
182 |     family = 'binomial', control = vglmer_control(iterations = NITER)
183 |   )
184 |   expect_gte(min(diff(example_vglmer$ELBO_trajectory$ELBO)), -.Machine$double.eps)
185 |   expect_error(
186 |     predict(example_vglmer, newdata = data.frame(fg = 3, a = '(1.00, 0.99]', stringsAsFactors = F)),
187 |     regexp = 'cannot work if levels'
188 |   )
189 |   
190 | })
191 | 


--------------------------------------------------------------------------------
/tests/testthat/test-generic.R:
--------------------------------------------------------------------------------
 1 | context("Test Generic Methods")
 2 | 
 3 | if (isTRUE(as.logical(Sys.getenv("CI")))){
 4 |   # If on CI
 5 |   NITER <- 2
 6 |   env_test <- "CI"
 7 | }else if (!identical(Sys.getenv("NOT_CRAN"), "true")){
 8 |   # If on CRAN
 9 |   NITER <- 2
10 |   env_test <- "CRAN"
11 |   set.seed(141)
12 | }else{
13 |   # If on local machine
14 |   NITER <- 2000
15 |   env_test <- 'local'
16 | }
17 | 
18 | test_that("Test generic methods (fixed, ranef, coef, vcov)", {
19 |   N <- 100
20 |   G <- 10
21 |   x <- rnorm(N)
22 |   g <- sample(1:G, N, replace = T)
23 |   g2 <- sample(1:G, N, replace = T)
24 |   alpha <- rnorm(G)
25 |   alpha2 <- rnorm(G)
26 | 
27 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g] + alpha2[g]))
28 | 
29 |   example_vglmer <- vglmer(
30 |     formula = y ~ x + (1 + x | g) + (1 | g2), data = NULL, family = "binomial",
31 |     control = vglmer_control(factorization_method = "strong")
32 |   )
33 |   expect_gte(min(diff(example_vglmer$ELBO_trajectory$ELBO)), 0)
34 | 
35 |   expect_equivalent(coef(example_vglmer), as.vector(example_vglmer$beta$mean))
36 |   expect_equivalent(fixef(example_vglmer), coef(example_vglmer))
37 |   expect_equivalent(vcov(example_vglmer), as.matrix(example_vglmer$beta$var))
38 | 
39 |   generic_ranef <- ranef(example_vglmer)
40 | 
41 |   generic_ranef$g
42 | 
43 |   expect_equivalent(example_vglmer$alpha$mean[-(1:(2 * G))], generic_ranef$g2$`(Intercept)`)
44 |   expect_equivalent(example_vglmer$alpha$mean[(1:(2 * G))], as.vector(t(generic_ranef$g[, -1])))
45 | })
46 | 
47 | test_that("Test that print and summary run", {
48 |   N <- 100
49 |   G <- 10
50 |   x <- rnorm(N)
51 |   g <- sample(1:G, N, replace = T)
52 |   g2 <- sample(1:G, N, replace = T)
53 |   alpha <- rnorm(G)
54 |   alpha2 <- rnorm(G)
55 | 
56 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g] + alpha2[g]))
57 | 
58 |   example_vglmer <- vglmer(
59 |     formula = y ~ x + (1 + x | g) + (1 | g2), data = NULL, family = "binomial",
60 |     control = vglmer_control(factorization_method = "strong")
61 |   )
62 |   expect_gte(min(diff(example_vglmer$ELBO_trajectory$ELBO)), 0)
63 |   save_print <- invisible(capture.output(print(example_vglmer)))
64 |   save_summary <- invisible(capture.output(summary(example_vglmer)))
65 | })
66 | 
67 | 


--------------------------------------------------------------------------------
/tests/testthat/test-match_glmer.R:
--------------------------------------------------------------------------------
  1 | context("Match glmer")
  2 | 
  3 | if (isTRUE(as.logical(Sys.getenv("CI")))){
  4 |   # If on CI
  5 |   NITER <- 2
  6 |   env_test <- "CI"
  7 | }else if (!identical(Sys.getenv("NOT_CRAN"), "true")){
  8 |   # If on CRAN
  9 |   NITER <- 2
 10 |   env_test <- "CRAN"
 11 |   set.seed(12345)
 12 | }else{
 13 |   # If on local machine
 14 |   NITER <- 2000
 15 |   env_test <- 'local'
 16 | }
 17 | 
 18 | test_that("Compare against lmer", {
 19 |   
 20 |   skip_on_cran()
 21 |   
 22 |   N <- 1000
 23 |   G <- 100
 24 |   x <- rnorm(N)
 25 |   g <- sample(1:G, N, replace = T)
 26 |   alpha <- rnorm(G)
 27 |   sigmasq <- abs(rcauchy(1))
 28 |   coef_scale <- rexp(1, rate = 1/2)
 29 |   y <- rnorm(n = N, mean = coef_scale * (-1 + x + alpha[g]) * sqrt(sigmasq), sd = sqrt(sigmasq))
 30 |   
 31 |   est_glmer <- suppressWarnings(lme4::lmer(y ~ x + (1 | g), REML = FALSE))
 32 |   fmt_glmer <- format_glmer(est_glmer)
 33 |   
 34 |   for (v in c("weak", "partial", "strong")) {
 35 |     
 36 |     example_vglmer <- suppressWarnings(vglmer(
 37 |       formula = y ~ x + (1 | g), data = NULL,
 38 |       control = vglmer_control(prior_variance = 'mean_exists', 
 39 |                                debug_px = TRUE,
 40 |                                factorization_method = v),
 41 |       family = "linear"
 42 |     ))
 43 |     expect_gte(min(diff(example_vglmer$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
 44 |     
 45 |     fmt_vglmer <- format_vglmer(example_vglmer)
 46 |     # comp_methods <- merge(fmt_glmer, fmt_vglmer, by = c("name"))
 47 |     # 
 48 |     # cor_mean <- with(comp_methods, cor(mean.x, mean.y))
 49 |     # expect_gt(cor_mean, expected = 0.80)
 50 |     # expect_lt(with(comp_methods, mean(abs(mean.x - mean.y))), 0.1)
 51 |     # expect_equal(sigma(example_vglmer), sigma(est_glmer), tol = 1e-1)
 52 |   }
 53 | })
 54 | 
 55 | 
 56 | test_that("Compare against glmer", {
 57 |   
 58 |   skip_on_cran()
 59 |   
 60 |   N <- 1000
 61 |   G <- 100
 62 |   x <- rnorm(N)
 63 |   g <- sample(1:G, N, replace = T)
 64 |   alpha <- rnorm(G)
 65 | 
 66 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g]))
 67 | 
 68 |   est_glmer <- suppressWarnings(lme4::glmer(y ~ x + (1 | g), family = binomial))
 69 |   fmt_glmer <- format_glmer(est_glmer)
 70 | 
 71 |   for (v in c("weak", "partial", "strong")) {
 72 |     
 73 |     example_vglmer <- suppressWarnings(vglmer(
 74 |       formula = y ~ x + (1 | g), data = NULL,
 75 |       control = vglmer_control(factorization_method = v, debug_px = TRUE),
 76 |       family = "binomial"
 77 |     ))
 78 | 
 79 |     expect_gte(min(diff(example_vglmer$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
 80 | 
 81 |     fmt_vglmer <- format_vglmer(example_vglmer)
 82 |     comp_methods <- merge(fmt_glmer, fmt_vglmer, by = c("name"))
 83 | 
 84 |     # cor_mean <- with(comp_methods, cor(mean.x, mean.y))
 85 |     # expect_gt(cor_mean, expected = 0.80)
 86 |   }
 87 | })
 88 | 
 89 | 
 90 | # test_that("Compare against glmer.nb", {
 91 | #   
 92 | #   skip_on_cran()
 93 | # 
 94 | #   N <- 1000
 95 | #   G <- 50
 96 | #   x <- rnorm(N)
 97 | #   g <- sample(1:G, N, replace = T)
 98 | #   alpha <- rnorm(G)
 99 | # 
100 | #   y <- rnbinom(n = N, mu = exp(-1 + x + alpha[g]), size = 5)
101 | #   data <- data.frame(y = y, x = x, g = g)
102 | #   est_glmer <- suppressWarnings(glmer.nb(y ~ x + (1 | g), data = data, family = binomial))
103 | #   fmt_glmer <- format_glmer(est_glmer)
104 | # 
105 | #   for (v in c("weak", "partial", "strong")) {
106 | #     example_vglmer <- suppressWarnings(vglmer(
107 | #       formula = y ~ x + (1 | g), data = data,
108 | #       family = "negbin",
109 | #       control = vglmer_control(factorization_method = v, parameter_expansion = 'mean', debug_px = TRUE)
110 | #     ))
111 | #     # Test whether it monotonically increases
112 | #     expect_gte(min(diff(ELBO(example_vglmer, 'traj'))), -sqrt(.Machine$double.eps))
113 | # 
114 | #     fmt_vglmer <- format_vglmer(example_vglmer)
115 | #     comp_methods <- merge(fmt_glmer, fmt_vglmer, by = c("name"))
116 | #     cor_mean <- with(comp_methods, cor(mean.x, mean.y))
117 | #     # Test whether it is close to the truth.
118 | #     expect_gt(cor_mean, expected = 0.99)
119 | #   }
120 | # })
121 | 
122 | test_that("EM_prelim matches glm", {
123 |   N <- 100
124 |   p <- 5
125 | 
126 |   Z <- matrix(rnorm(N * p), ncol = p)
127 |   beta <- runif(p, -1, 1)
128 | 
129 |   y <- rbinom(N, 1, plogis(Z %*% beta))
130 | 
131 |   est_glm <- glm(y ~ Z, family = binomial)
132 |   est_init <- EM_prelim_logit(
133 |     X = drop0(matrix(1, nrow = N)),
134 |     Z = drop0(Z), s = y - 1 / 2, pg_b = rep(1, N), iter = 200, ridge = Inf
135 |   )
136 |   est_init <- c(est_init$beta, est_init$alpha)
137 |   expect_equal(as.vector(coef(est_glm)), est_init, tolerance = 1e-4)
138 | })
139 | 
140 | 
141 | test_that("EM_prelim matches glm.nb", {
142 |   
143 |   if (requireNamespace('MASS', quietly = TRUE)){
144 |     quine <- MASS::quine
145 |     N <- nrow(quine)
146 |     quine.nb1 <- MASS::glm.nb(Days ~ Sex / (Age + Eth * Lrn), data = quine)
147 |     X <- model.matrix(quine.nb1)
148 |     y <- quine$Days
149 |     
150 |     est_init <- EM_prelim_nb(
151 |       X = drop0(matrix(1, nrow = N)), Z = drop0(X[, -1]), y = y,
152 |       est_r = quine.nb1$theta, iter = 100, ridge = Inf
153 |     )
154 |     est_init <- c(est_init$beta, est_init$alpha)
155 |     expect_equal(as.vector(coef(quine.nb1)), est_init, tolerance = 1e-4)
156 |   }
157 | })
158 | 
159 | test_that("Compare against glmer (binomial)", {
160 |   
161 |   skip_on_cran()
162 |   
163 |   N <- 1000
164 |   G <- 100
165 |   x <- rnorm(N)
166 |   g <- sample(1:G, N, replace = T)
167 |   alpha <- rnorm(G)
168 | 
169 |   size <- rpois(N, 1) + 1
170 |   y <- rbinom(n = N, size = size, prob = plogis(-1 + x + alpha[g]))
171 | 
172 |   est_glmer <- suppressWarnings(lme4::glmer(cbind(y, size - y) ~ x + (1 | g), family = binomial))
173 |   fmt_glmer <- format_glmer(est_glmer)
174 | 
175 |   example_vglmer <- suppressWarnings(vglmer(
176 |     formula = cbind(y, size - y) ~ x + (1 | g), data = NULL,
177 |     family = "binomial",
178 |     control = vglmer_control(debug_px = TRUE)
179 |   ))
180 | 
181 |   expect_gte(min(diff(example_vglmer$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
182 | 
183 |   fmt_vglmer <- format_vglmer(example_vglmer)
184 |   
185 |   
186 | })
187 | 
188 | 
189 | test_that("Compare against more unusual lmer syntax", {
190 |   N <- 50
191 |   G <- 3
192 |   x <- rnorm(N)
193 |   g <- sample(1:G, N, replace = T)
194 |   alpha <- rnorm(G)
195 |   sigmasq <- abs(rcauchy(1))
196 |   coef_scale <- rexp(1, rate = 1/2)
197 |   y <- rnorm(n = N, mean = coef_scale * (-1 + x + alpha[g]) * sqrt(sigmasq), sd = sqrt(sigmasq))
198 |   
199 |   df <- data.frame(y = y, x = x, g = g, g_copy = g)  
200 |   
201 |   expect_warning(vglmer(y ~ (1 + x || g), 
202 |                         control = vglmer_control(iterations = 1),
203 |                         data = df, family = 'linear'), 'are duplicated')
204 |   
205 |   est_v <- suppressWarnings(vglmer(y ~ (1 + x || g), 
206 |          control = vglmer_control(iterations = NITER),
207 |          data = df, family = 'linear'))
208 |   est_v_copy <- vglmer(y ~ (1 | g) + (0 + x | g_copy), 
209 |                                    control = vglmer_control(iterations = NITER),
210 |                                    data = df, family = 'linear')
211 |   
212 |   expect_equivalent(suppressWarnings(predict(est_v, df)), predict(est_v_copy, df))
213 |   expect_equivalent(ELBO(est_v), ELBO(est_v_copy))
214 | })
215 | 


--------------------------------------------------------------------------------
/tests/testthat/test-predict.R:
--------------------------------------------------------------------------------
  1 | context("Test Predict")
  2 | 
  3 | if (isTRUE(as.logical(Sys.getenv("CI")))){
  4 |   # If on CI
  5 |   NITER <- 2
  6 |   env_test <- "CI"
  7 | }else if (!identical(Sys.getenv("NOT_CRAN"), "true")){
  8 |   # If on CRAN
  9 |   NITER <- 2
 10 |   env_test <- "CRAN"
 11 |   set.seed(171)
 12 | }else{
 13 |   # If on local machine
 14 |   NITER <- 2000
 15 |   env_test <- 'local'
 16 | }
 17 | 
 18 | test_that("Prediction Matches Manual and (nearly) glmer", {
 19 |   
 20 |   if (env_test == 'local'){
 21 |     N <- 1000
 22 |   }else{
 23 |     N <- 50
 24 |   }
 25 |   G <- 3
 26 |   x <- rnorm(N)
 27 |   g <- sample(1:G, N, replace = T)
 28 |   g2 <- sample(1:G, N, replace = T)
 29 |   alpha <- rnorm(G)
 30 |   alpha2 <- rnorm(G)
 31 | 
 32 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g] + alpha2[g]))
 33 | 
 34 |   est_glmer <- suppressMessages(suppressWarnings(lme4::glmer(y ~ x + (1 + x | g) + (1 | g2), family = binomial)))
 35 | 
 36 |   example_vglmer <- vglmer(
 37 |     formula = y ~ x + (1 + x | g) + (1 | g2), data = NULL, family = "binomial",
 38 |     control = vglmer_control(factorization_method = "weak")
 39 |   )
 40 |   draw_samples <- posterior_samples.vglmer(example_vglmer, samples = 10)
 41 |   draw_MAVB <- MAVB(example_vglmer, samples = 10, var_px = 1)
 42 |   expect_true(is.matrix(draw_MAVB))
 43 |   expect_true(is.matrix(draw_samples))
 44 |   
 45 |   def_predict <- predict(example_vglmer, newdata = data.frame(y = y, x = x, g = g, g2 = g2))
 46 |   
 47 |   if (env_test == 'local'){
 48 |     glmer_predict <- predict(est_glmer)
 49 |     
 50 |     expect_gt(
 51 |       cor(def_predict, glmer_predict), 0.95
 52 |     )
 53 |   }
 54 | 
 55 |   alpha_names <- rownames(example_vglmer$alpha$mean)
 56 |   manual_predict <- as.vector(
 57 |     example_vglmer$alpha$mean[match(paste0("g2 @ (Intercept) @ ", g2), alpha_names)] +
 58 |       example_vglmer$alpha$mean[match(paste0("g @ x @ ", g), alpha_names)] * x +
 59 |       example_vglmer$alpha$mean[match(paste0("g @ (Intercept) @ ", g), alpha_names)] +
 60 |       cbind(1, x) %*% example_vglmer$beta$mean
 61 |   )
 62 |   expect_equal(def_predict, manual_predict)
 63 | })
 64 | 
 65 | test_that("Prediction Matches for New Levels in newdata", {
 66 |   N <- 50
 67 |   G <- 10
 68 |   x <- rnorm(N)
 69 |   g <- sample(1:G, N, replace = T)
 70 |   alpha <- rnorm(G)
 71 | 
 72 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g]))
 73 | 
 74 |   example_vglmer <- vglmer(
 75 |     formula = y ~ x + (1 | g), data = NULL,
 76 |     control = vglmer_control(iterations = 2, print_prog = 10, init = "zero"),
 77 |     family = "binomial"
 78 |   )
 79 |   # No old level in new level
 80 |   new_data <- data.frame(x = rep(0, 5), g = 11)
 81 |   expect_error(predict(example_vglmer, newdata = new_data)) # Error on default
 82 |   expect_equal( # zero when allow_missing_levels = TRUE
 83 |     predict(example_vglmer, newdata = new_data, allow_missing_levels = TRUE),
 84 |     rep(example_vglmer$beta$mean[1], nrow(new_data))
 85 |   )
 86 |   mixed_data <- data.frame(x = rnorm(10), g = sample(1:25, 10, replace = T))
 87 | 
 88 |   man_beta <- as.vector(cbind(1, mixed_data$x) %*% example_vglmer$beta$mean)
 89 |   man_alpha <- as.vector(example_vglmer$alpha$mean)[match(paste0("g @ (Intercept) @ ", mixed_data$g), rownames(example_vglmer$alpha$mean))]
 90 |   man_alpha[is.na(man_alpha)] <- 0
 91 |   expect_equivalent(man_beta + man_alpha, predict(example_vglmer, newdata = mixed_data, allow_missing_levels = T))
 92 | })
 93 | 
 94 | test_that("Prediction Matches for Missing in new.data", {
 95 |   N <- 50
 96 |   G <- 10
 97 |   x <- rnorm(N + G)
 98 |   g <- c(sample(1:G, N, replace = T), 1:G)
 99 |   alpha <- rnorm(G)
100 | 
101 |   y <- rbinom(n = N + G, size = 1, prob = plogis(-1 + x + alpha[g]))
102 | 
103 |   example_vglmer <- vglmer(
104 |     formula = y ~ x + (1 | g), data = NULL,
105 |     control = vglmer_control(iterations = 2), family = "binomial"
106 |   )
107 | 
108 |   mixed_data <- data.frame(x = rnorm(20), g = rep(1:10, 2))
109 |   rownames(mixed_data) <- letters[1:20]
110 | 
111 |   mixed_data$x[8] <- NA
112 |   mixed_data$g[7] <- NA
113 |   mixed_data$x[2] <- mixed_data$g[2] <- NA
114 | 
115 |   man_beta <- as.vector(cbind(1, mixed_data$x) %*% example_vglmer$beta$mean)
116 |   man_alpha <- example_vglmer$alpha$mean[match(paste0("g @ (Intercept) @ ", mixed_data$g), rownames(example_vglmer$alpha$mean))]
117 |   expect_equivalent(
118 |     man_beta + man_alpha,
119 |     predict(example_vglmer, newdata = mixed_data)
120 |   )
121 | })
122 | 
123 | test_that("Prediction Matches for Simulation", {
124 |   
125 |   if (env_test == "local"){
126 |     N <- 1000
127 |     G <- 10
128 |   }else{
129 |     N <- 50
130 |     G <- 3
131 |   }
132 |   x <- rnorm(N)
133 |   g <- sample(1:G, N, replace = T)
134 |   g2 <- sample(1:G, N, replace = T)
135 |   alpha <- rnorm(G)
136 |   alpha2 <- rnorm(G)
137 | 
138 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g] + alpha2[g]))
139 | 
140 |   est_glmer <- suppressMessages(suppressWarnings(lme4::glmer(y ~ x + (1 + x | g) + (1 | g2), family = binomial)))
141 | 
142 |   example_vglmer <- vglmer(
143 |     formula = y ~ x + (1 + x | g) + (1 | g2), data = NULL, family = "binomial",
144 |     control = vglmer_control(factorization_method = "weak", iterations = NITER)
145 |   )
146 | 
147 |   mixed_data <- data.frame(x = rnorm(20), g = rep(1:10, 2), g2 = sample(1:25, 20, replace = T))
148 |   rownames(mixed_data) <- letters[1:20]
149 | 
150 |   mixed_data$x[8] <- NA
151 |   mixed_data$g[7] <- NA
152 |   mixed_data$x[2] <- mixed_data$g[2] <- NA
153 | 
154 |   test_data <- rbind(mixed_data, data.frame(x = x, g = g, g2 = g2)[sample(1:length(y), 100, replace = T), ])
155 | 
156 |   point_predict <- predict(example_vglmer, newdata = test_data, allow_missing_levels = T)
157 |   terms_predict <- predict(example_vglmer, newdata = test_data, allow_missing_levels = TRUE, type = 'terms')
158 |   
159 |   manual_fe <- test_data$x * coef(example_vglmer)['x'] + coef(example_vglmer)[1]
160 |   expect_equal(
161 |     terms_predict[,'FE'], 
162 |     ifelse(is.na(point_predict), NA, manual_fe)
163 |   )
164 |   
165 |   manual_g <- rowSums(ranef(example_vglmer)$g[test_data$g,2:3] * cbind(1, test_data$x))
166 |   manual_g[is.na(manual_g) & !is.na(test_data$g)] <- 0
167 |   expect_equal(
168 |     terms_predict[,'g'], 
169 |     ifelse(is.na(point_predict), NA, manual_g)
170 |   )
171 | 
172 |   manual_g2 <- ranef(example_vglmer)$g2[test_data$g2,2]
173 |   manual_g2[is.na(manual_g2) & !is.na(test_data$g2)] <- 0
174 |   expect_equal(
175 |     terms_predict[,'g2'], 
176 |     ifelse(is.na(point_predict), NA, manual_g2)
177 |   )
178 |   
179 |   expect_equivalent(
180 |     manual_fe + manual_g + manual_g2, point_predict
181 |   )
182 |   
183 |   if (env_test == "local"){
184 |     n_samples <- 2 * 10^4
185 |   }else{
186 |     n_samples <- 2
187 |   }
188 |   mean_predict <- predict(example_vglmer,
189 |     newdata = test_data,
190 |     samples = n_samples, allow_missing_levels = T
191 |   )
192 |   # should have "clean" rownames
193 |   expect_equal(rownames(mean_predict), as.character(1:nrow(mean_predict)))
194 |   
195 |   if (env_test == "local"){
196 |     # Should be very close
197 |     expect_equal(mean_predict$mean, point_predict, 0.01)
198 |   }
199 | 
200 |   if (env_test == "local"){
201 |     matrix_predict <- predict(example_vglmer,
202 |                               newdata = test_data,
203 |                               samples = 2 * 10^4, allow_missing_levels = T, samples_only = TRUE
204 |     )
205 |     matrix_predict <- colMeans(matrix_predict)
206 |     expect_equivalent(
207 |       c(coef(example_vglmer), as.vector(example_vglmer$alpha$mean)),
208 |       matrix_predict, 0.01
209 |     )
210 |   }
211 | })
212 | 
213 | 
214 | test_that("Prediction Matches for vglmer after MAVB", {
215 |   
216 |   skip_on_cran()
217 |   
218 |   N <- 50
219 |   G <- 4
220 |   x <- rnorm(N)
221 |   g <- sample(1:G, N, replace = T)
222 |   g2 <- sample(1:G, N, replace = T)
223 |   alpha <- rnorm(G)
224 |   alpha2 <- rnorm(G)
225 | 
226 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g] + alpha2[g]))
227 | 
228 |   example_vglmer <- vglmer(
229 |     formula = y ~ x + (1 + x | g) + (1 | g2), data = NULL, family = "binomial",
230 |     control = vglmer_control(factorization_method = "weak")
231 |   )
232 | 
233 | 
234 |   mixed_data <- data.frame(x = rnorm(20), g = rep(1:10, 2), g2 = sample(1:25, 20, replace = T))
235 |   rownames(mixed_data) <- letters[1:20]
236 | 
237 |   mixed_data$x[8] <- NA
238 |   mixed_data$g[7] <- NA
239 |   mixed_data$x[2] <- mixed_data$g[2] <- NA
240 | 
241 |   test_data <- rbind(mixed_data, data.frame(x = x, g = g, g2 = g2)[sample(1:length(y), 100, replace = T), ])
242 | 
243 |   pred.MAVB <- predict_MAVB(example_vglmer,
244 |     newdata = test_data,
245 |     samples = 1000, allow_missing_levels = T
246 |   )
247 |   base_predict <- predict(example_vglmer, newdata = test_data, allow_missing_levels = T)
248 | 
249 |   expect_equal(pred.MAVB$mean, base_predict, tolerance = 0.1)
250 | })
251 | 
252 | test_that("Prediction with Samples", {
253 |   
254 |   skip_on_cran()
255 |   
256 |   if (env_test == "local"){
257 |     n_samples <- 2 * 10^4
258 |   }else{
259 |     n_samples <- 5
260 |   }
261 |   N <- 50
262 |   G <- 10
263 |   x <- rnorm(N + G)
264 |   g <- c(sample(1:G, N, replace = T), 1:G)
265 |   alpha <- rnorm(G)
266 | 
267 |   y <- rbinom(n = N + G, size = 1, prob = plogis(-1 + x + alpha[g]))
268 | 
269 |   example_vglmer <- vglmer(
270 |     formula = y ~ x + (1 | g), data = NULL,
271 |     control = vglmer_control(iterations = 2, factorization_method = 'strong'), 
272 |     family = "binomial"
273 |   )
274 | 
275 |   pred_samples <- predict(example_vglmer, newdata = data.frame(x = x, g = g), samples = 10, summary = F)
276 |   expect_equivalent(dim(pred_samples), c(10, N + G))
277 | 
278 |   draw_coef <- predict(example_vglmer,
279 |     newdata = data.frame(x = x, g = g),
280 |     samples = n_samples, samples_only = T
281 |   )
282 |   expect_equivalent(dim(draw_coef), c(n_samples, G + 2))
283 | 
284 |   if (env_test == "local"){
285 |     expect_equivalent(
286 |       colMeans(draw_coef), format_vglmer(example_vglmer)$mean,
287 |       tolerance = 0.02
288 |     )
289 |   }
290 |   #Confirms that it works with "weak"
291 |   example_vglmer <- vglmer(
292 |     formula = y ~ x + (1 | g), data = NULL,
293 |     control = vglmer_control(iterations = 2, factorization_method = 'weak'), 
294 |     family = "binomial"
295 |   )
296 |   
297 |   pred_samples <- predict(example_vglmer, newdata = data.frame(x = x, g = g), samples = 10, summary = F)
298 |   expect_equivalent(dim(pred_samples), c(10, N + G))
299 |   
300 |   draw_coef <- predict(example_vglmer,
301 |                        newdata = data.frame(x = x, g = g),
302 |                        samples = n_samples, samples_only = T
303 |   )
304 |   expect_equivalent(dim(draw_coef), c(n_samples, G + 2))
305 |   
306 |   if (env_test == "local"){
307 |     expect_equivalent(
308 |       colMeans(draw_coef), format_vglmer(example_vglmer)$mean,
309 |       tolerance = 0.02
310 |     )
311 |   }
312 | })
313 | 
314 | test_that("Prediction with factor/categorical", {
315 |   
316 |   N <- 50
317 |   G <- 10
318 |   x <- rnorm(N + G)
319 |   g <- c(sample(1:G, N, replace = T), 1:G)
320 |   alpha <- rnorm(G)
321 |   
322 |   y <- rbinom(n = N + G, size = 1, prob = plogis(-1 + x + alpha[g]))
323 |   l <- sample(letters[1:3], length(x), replace = T)
324 |   
325 |   example_vglmer <- vglmer(
326 |     formula = y ~ x + l + (1  | g), data = NULL,
327 |     control = vglmer_control(iterations = 2, factorization_method = 'strong'), 
328 |     family = "binomial"
329 |   )
330 |   
331 |   pred1a <- predict(example_vglmer, newdata = data.frame(x = 3, g = 2, l = 'a', stringsAsFactors = TRUE))
332 |   expect_equivalent(pred1a, sum(coef(example_vglmer)  * c(1, 3, 0, 0)) + ranef(example_vglmer)$g[2,2])
333 |   
334 |   pred1b <- predict(example_vglmer, newdata = data.frame(x = 3, g = 2, l = 'a', stringsAsFactors = FALSE))
335 |   expect_equivalent(pred1b, sum(coef(example_vglmer)  * c(1, 3, 0, 0)) + ranef(example_vglmer)$g[2,2])
336 |   
337 |   pred2a <- predict(example_vglmer, newdata = data.frame(x = -1, g = 100, l = 'c', stringsAsFactors = TRUE), allow_missing_levels = TRUE)
338 |   expect_equivalent(pred2a, sum(coef(example_vglmer)  * c(1, -1, 0, 1)) )
339 |   pred2b <- predict(example_vglmer, newdata = data.frame(x = -1, g = 100, l = 'c', stringsAsFactors = FALSE), allow_missing_levels = TRUE)
340 |   expect_equivalent(pred2b, sum(coef(example_vglmer)  * c(1, -1, 0, 1)) )
341 |   
342 |   expect_error(predict(example_vglmer, 
343 |     newdata = data.frame(x = -1, g = 100, l = 'd'), 
344 |     allow_missing_levels = TRUE), regexp = 'has new level d')
345 | })
346 | 


--------------------------------------------------------------------------------
/tests/testthat/test-superlearner.R:
--------------------------------------------------------------------------------
 1 | context("SuperLearner tests")
 2 | 
 3 | if (isTRUE(as.logical(Sys.getenv("CI")))){
 4 |   # If on CI
 5 |   NITER <- 2
 6 |   env_test <- "CI"
 7 | }else if (!identical(Sys.getenv("NOT_CRAN"), "true")){
 8 |   # If on CRAN
 9 |   NITER <- 2
10 |   env_test <- "CRAN"
11 |   set.seed(181)
12 | }else{
13 |   # If on local machine
14 |   NITER <- 2000
15 |   env_test <- 'local'
16 | }
17 | 
18 | test_that("Test SuperLearner", {
19 |   
20 |   N <- 100
21 |   x1 <- rnorm(N)
22 |   x2 <- rbinom(N, size = 1, prob = .2)
23 |   f <- sample(letters[1:3], N, replace = T)
24 |   y <- rbinom(N, 1, plogis(x1 + runif(3)[match(f, letters)]))
25 |   
26 |   
27 |   if (requireNamespace("SuperLearner", quietly = TRUE)) {
28 |     require(SuperLearner)
29 |     
30 |     sl_k <- add_formula_SL('SL.glm', env = globalenv())
31 |     sl_k <- function(...){SL.glm_f(formula = ~ x1 + x2, ...)}
32 |     sl_m <- function(...) {
33 |       suppressMessages(SL.vglmer(formula = ~ x1 + (1 | f), 
34 |         control = vglmer_control(iterations = 2), ...))
35 |     }
36 |     sl_g <- function(...) {
37 |       suppressMessages(SL.glmer(formula = ~ x1 + (1 | f), ...))
38 |     }
39 |     fit_SL <- SuperLearner::SuperLearner(
40 |       Y = y, cvControl = list(V = 2),
41 |       X = data.frame(x1 = x1, x2 = x2, f = f),
42 |       SL.library = c("sl_m", "sl_g", "sl_k")
43 |     )
44 |     expect_s3_class(fit_SL, "SuperLearner")
45 | 
46 |     pred <- predict(fit_SL, newdata = data.frame(x1 = x1, x2 = x2, f = f))
47 |     expect_length(pred, n = 2)
48 |   }
49 | })
50 | 


--------------------------------------------------------------------------------
/tests/testthat/test-translation_px.R:
--------------------------------------------------------------------------------
  1 | context("Translation Expansion Tests")
  2 | 
  3 | if (isTRUE(as.logical(Sys.getenv("CI")))){
  4 |   # If on CI
  5 |   NITER <- 2
  6 |   env_test <- "CI"
  7 | }else if (!identical(Sys.getenv("NOT_CRAN"), "true")){
  8 |   # If on CRAN
  9 |   NITER <- 2
 10 |   env_test <- "CRAN"
 11 |   set.seed(54321)
 12 | }else{
 13 |   # If on local machine
 14 |   NITER <- 2000
 15 |   env_test <- 'local'
 16 | }
 17 | 
 18 | print(paste0(NITER, ' for tests because ', env_test))
 19 | 
 20 | test_that('Translation Data Test', {
 21 | 
 22 |   N <- 100
 23 |   G <- 5
 24 |   x <- rnorm(N)
 25 |   x2 <- rnorm(N)
 26 |   g <- sample(1:G, N, replace = T)
 27 |   g2 <- sample(1:G, N, replace = T)
 28 |   g3 <- sample(1:8, N, replace = T)
 29 |   alpha <- rnorm(G)
 30 |   
 31 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g] + rnorm(8)[g3]))
 32 | 
 33 |   for (v in c('dynamic', 'numerical', 'OSL', 'profiled')){
 34 |     
 35 |     est_vglmer <- vglmer(y ~ x + x2 + (1 + x | g) + (1 + x2 | g2) + (1 | g3), data = NULL,
 36 |                          family = 'binomial',
 37 |                          control = vglmer_control(iterations = NITER, px_method = v, debug_px = TRUE))
 38 |     
 39 |     expect_gt(min(diff(ELBO(est_vglmer, 'traj'))), -sqrt(.Machine$double.eps))
 40 |   }
 41 | 
 42 | })
 43 |   
 44 | test_that("Check that B_j has correct shape", {
 45 |   N <- 100
 46 |   G <- 5
 47 |   x <- rnorm(N)
 48 |   x2 <- rnorm(N)
 49 |   g <- sample(1:G, N, replace = T)
 50 |   g2 <- sample(1:G, N, replace = T)
 51 |   alpha <- rnorm(G)
 52 |   
 53 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g]))
 54 |   
 55 |   est_vglmer <- vglmer(y ~ v_s(x) + x2 + (1 + x | g) + (1 + x + x2 | g2), data = NULL,
 56 |                        family = 'linear',
 57 |                        control = vglmer_control(iterations = NITER))
 58 |   expect_gt(min(diff(ELBO(est_vglmer, 'traj'))), -sqrt(.Machine$double.eps))
 59 |   est_vglmer$MAVB_xx
 60 |   
 61 |   est_vglmer <- vglmer(y ~ v_s(x) + (1 + x | g) + (1 + x2 | g2), data = NULL,
 62 |                        family = 'linear',
 63 |                        control = vglmer_control(iterations = NITER))
 64 |   expect_gt(min(diff(ELBO(est_vglmer, 'traj'))), -sqrt(.Machine$double.eps))
 65 |   est_vglmer$MAVB_xx
 66 | 
 67 | })
 68 | 
 69 | 
 70 | 
 71 | test_that('Translation Data Test', {
 72 |   
 73 |   N <- 100
 74 |   G <- 5
 75 |   x <- rnorm(N)
 76 |   x2 <- rnorm(N)
 77 |   g <- sample(1:G, N, replace = T)
 78 |   g2 <- sample(1:G, N, replace = T)
 79 |   g3 <- sample(1:8, N, replace = T)
 80 |   alpha <- rnorm(G)
 81 |   
 82 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g] + rnorm(8)[g3]))
 83 |   
 84 |   for (v in c('dynamic', 'numerical', 'OSL', 'profiled')){
 85 |     
 86 |     est_vglmer <- vglmer(y ~ x + x2 + (1 + x | g) + (1 + x2 | g2) + (1 | g3), data = NULL,
 87 |                          family = 'binomial',
 88 |                          control = vglmer_control(iterations = NITER, px_method = v, debug_px = TRUE))
 89 |     
 90 |     expect_gt(min(diff(ELBO(est_vglmer, 'traj'))), -sqrt(.Machine$double.eps))
 91 |     
 92 |   }
 93 |   
 94 |   # Check that parameter_expansion = "mean" for non-strong factorization
 95 |   est_vglmer <- expect_message(
 96 |     vglmer(y ~ x + x2 + (1 + x | g) + (1 + x2 | g2) + (1 | g3), data = NULL,
 97 |      family = 'binomial',
 98 |      control = vglmer_control(iterations = NITER, factorization_method = 'weak', 
 99 |                               px_method = v, debug_px = TRUE)),
100 |     regexp = 'to mean for non-strong'
101 |   )
102 |   expect_true(est_vglmer$control$parameter_expansion == 'mean')
103 |   
104 | })
105 | 


--------------------------------------------------------------------------------
/tests/testthat/test-update_method.R:
--------------------------------------------------------------------------------
  1 | context("Test various update methods")
  2 | 
  3 | if (isTRUE(as.logical(Sys.getenv("CI")))){
  4 |   # If on CI
  5 |   NITER <- 2
  6 |   env_test <- "CI"
  7 | }else if (!identical(Sys.getenv("NOT_CRAN"), "true")){
  8 |   # If on CRAN
  9 |   NITER <- 2
 10 |   env_test <- "CRAN"
 11 |   set.seed(191)
 12 | }else{
 13 |   # If on local machine
 14 |   NITER <- 2000
 15 |   env_test <- 'local'
 16 | }
 17 | 
 18 | test_that("Joint vs Cyclical Update", {
 19 |   
 20 |   skip_on_cran()
 21 |   
 22 |   N <- 1000
 23 |   G <- 20
 24 |   x <- rnorm(N)
 25 |   g <- sample(1:G, N, replace = T)
 26 |   g2 <- sample(1:10, N, replace = T)
 27 |   alpha <- rnorm(G)
 28 |   alpha2 <- rnorm(10)
 29 |   
 30 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g] + alpha2[g2]))
 31 | 
 32 | 
 33 |   for (v in c("weak", "partial", "strong")) {
 34 |     
 35 |     ex_vglmer_cyclic <- vglmer(
 36 |       formula = y ~ x + (1 | g) + (1 | g2), family = "binomial",
 37 |       data = NULL, control = vglmer_control(factorization_method = v, linpred_method = "cyclical", init = "zero")
 38 |     )
 39 | 
 40 |     ex_vglmer_joint <- vglmer(
 41 |       formula = y ~ x + (1 | g) + (1 | g2), family = "binomial",
 42 |       data = NULL, control = vglmer_control(factorization_method = v, linpred_method = "joint", init = "zero")
 43 |     )
 44 | 
 45 |     fmt_vglmer_cyclic <- format_vglmer(ex_vglmer_cyclic)
 46 |     fmt_vglmer_joint <- format_vglmer(ex_vglmer_joint)
 47 | 
 48 |     expect_equivalent(fmt_vglmer_cyclic, fmt_vglmer_joint, tolerance = 1e-4, scale = 1)
 49 | 
 50 |     if (v == "strong") {
 51 | 
 52 |       ex_vglmer_normal <- vglmer(
 53 |         formula = y ~ x + (1 | g) + (1 | g2), family = "binomial",
 54 |         data = NULL, 
 55 |         control = vglmer_control(factorization_method = v, 
 56 |            do_SQUAREM = FALSE,
 57 |           linpred_method = "solve_normal", init = "zero")
 58 |       )
 59 | 
 60 |       fmt_vglmer_normal <- format_vglmer(ex_vglmer_normal)
 61 |       expect_equivalent(fmt_vglmer_normal, fmt_vglmer_joint, tolerance = 1e-4, scale = 1)
 62 |     }
 63 |   }
 64 | })
 65 | 
 66 | 
 67 | test_that("Joint vs Cyclical Update (Nested)", {
 68 |   
 69 |   skip_on_cran()
 70 | 
 71 |   N <- 1000
 72 |   G <- 20
 73 |   x <- rnorm(N)
 74 |   g <- sample(1:G, N, replace = T)
 75 |   g2 <- floor(g/3) + 1
 76 |   alpha <- rnorm(G)
 77 |   alpha2 <- rnorm(max(g2))
 78 |   
 79 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g] + alpha2[g2]))
 80 |   
 81 |   
 82 |   fmla <- y ~ x + (1 | g2) + (1 | g)
 83 |   fmla_perm <- y ~ x + (1 | g) + (1 | g2)
 84 |   
 85 |   warning('This should work with *default* initialization...')
 86 |   
 87 |   ex_vglmer_cyclic_perm <- suppressMessages(vglmer(
 88 |     formula = fmla_perm, family = "binomial",
 89 |     data = NULL, control = vglmer_control(
 90 |       iterations = 100, print_prog = 500,
 91 |       init = 'EM', linpred_method = "cyclical")
 92 |   ))
 93 |   
 94 |   ex_vglmer_joint_perm <- vglmer(
 95 |     formula = fmla_perm, family = "binomial",
 96 |     control = vglmer_control(iterations = 100, print_prog = 500),
 97 |     data = NULL
 98 |   )
 99 |   
100 |   ex_vglmer_cyclic <- suppressMessages(vglmer(
101 |     formula = fmla, family = "binomial",
102 |     data = NULL, control = vglmer_control(
103 |       iterations = 100, print_prog = 500,
104 |       init = 'EM', linpred_method = "cyclical")
105 |   ))
106 |   
107 |   expect_equal(names(ranef(ex_vglmer_cyclic)), c('g2', 'g'))  
108 |   expect_equal(names(ranef(ex_vglmer_cyclic_perm)), c('g', 'g2'))  
109 |   
110 |   ex_vglmer_joint <- vglmer(
111 |     formula = fmla, family = "binomial",
112 |     control = vglmer_control(iterations = 100,
113 |                              print_prog = 500),
114 |     data = NULL 
115 |   )
116 |   
117 |   fmt_vglmer_cyclic_perm <- format_vglmer(ex_vglmer_cyclic_perm)
118 |   fmt_vglmer_cyclic <- format_vglmer(ex_vglmer_cyclic)
119 |   fmt_vglmer_joint <- format_vglmer(ex_vglmer_joint)
120 |   
121 |   expect_equivalent(fmt_vglmer_cyclic, fmt_vglmer_joint, tolerance = 1e-3, scale = 1)
122 |   expect_equivalent(fmt_vglmer_cyclic, 
123 |                     fmt_vglmer_cyclic_perm[
124 |                       match(fmt_vglmer_cyclic$name, fmt_vglmer_cyclic_perm$name),
125 |                     ], tolerance = 1e-2, scale = 1)
126 |   
127 |   expect_equivalent(ex_vglmer_joint$ELBO, ex_vglmer_joint_perm$ELBO,
128 |                     tolerance = 1e-5, scale = 1)
129 |   ex_vglmer_cyclic$ELBO$it <- NULL
130 |   ex_vglmer_cyclic_perm$ELBO$it <- NULL
131 |   expect_equivalent(ex_vglmer_cyclic$ELBO[,1], ex_vglmer_cyclic_perm$ELBO[,1],
132 |                     tolerance = 1e-2, scale = 1)
133 |   
134 | })
135 | 
136 | test_that("Compare PX vs Non-PX", {
137 |   
138 |   skip_on_cran()
139 |   
140 |   N <- 1000
141 |   G <- 20
142 |   x <- rnorm(N)
143 |   g <- sample(1:G, N, replace = T)
144 |   alpha <- rnorm(G)
145 | 
146 |   y <- rbinom(n = N, size = 1, prob = plogis(-1 + x + alpha[g]))
147 | 
148 | 
149 |   ex_vglmer_px_t <- vglmer(
150 |     formula = y ~ x + (1 | g), family = "binomial",
151 |     data = NULL, control = vglmer_control(
152 |       factorization_method = "strong", debug_px = TRUE,
153 |       parameter_expansion = "translation", debug_ELBO = T, init = "zero"
154 |     )
155 |   )
156 |   
157 |   ex_vglmer_px <- vglmer(
158 |     formula = y ~ x + (1 | g), family = "binomial",
159 |     data = NULL, control = vglmer_control(
160 |       factorization_method = "strong", debug_px = TRUE,
161 |       parameter_expansion = "mean", debug_ELBO = T, init = "zero"
162 |     )
163 |   )
164 | 
165 |   ex_vglmer_no <- vglmer(
166 |     formula = y ~ x + (1 | g), family = "binomial",
167 |     data = NULL, control = vglmer_control(
168 |       factorization_method = "strong",
169 |       parameter_expansion = "none", debug_ELBO = T, init = "zero"
170 |     )
171 |   )
172 | 
173 |   expect_gte(min(diff(ex_vglmer_no$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
174 |   expect_gte(min(diff(ex_vglmer_px$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
175 |   expect_gte(min(diff(ex_vglmer_px_t$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
176 |   
177 |   fmt_vglmer_px <- format_vglmer(ex_vglmer_px)
178 |   fmt_vglmer_no <- format_vglmer(ex_vglmer_no)
179 |   fmt_vglmer_px_t <- format_vglmer(ex_vglmer_px_t)
180 |   
181 |   expect_equivalent(fmt_vglmer_px, fmt_vglmer_no, tolerance = 1e-4, scale = 1)
182 |   expect_equivalent(fmt_vglmer_px, fmt_vglmer_px_t, tolerance = 1e-4, scale = 1)
183 | })
184 | 
185 | test_that("Compare VI r methods", {
186 |   
187 |   skip_on_cran()
188 | 
189 |   N <- 1000
190 |   G <- 50
191 |   x <- rnorm(N)
192 |   g <- sample(1:G, N, replace = T)
193 |   alpha <- rnorm(G)
194 | 
195 |   y <- rnbinom(n = N, mu = exp(-1 + x + alpha[g]), size = 5)
196 |   data <- data.frame(y = y, x = x, g = g)
197 | 
198 |   list_output <- list()
199 |   list_r <- list()
200 |   
201 |   for (v in c("VEM", "fixed")) {
202 |     
203 |     if (v == 'fixed'){
204 |       v <- 1
205 |     }
206 |     example_vglmer <- suppressWarnings(vglmer(
207 |       formula = y ~ x + (1 | g), data = data,
208 |       family = "negbin",
209 |       control = vglmer_control(parameter_expansion = 'mean',
210 |         factorization_method = "strong",
211 |         iterations = ifelse(v != 'VEM', 2, 1000),
212 |         vi_r_method = v, init = "random"
213 |       )
214 |     ))
215 |     # Test whether it monotonically increases
216 |     if (v == "VEM") {
217 |       expect_gte(min(diff(example_vglmer$ELBO_trajectory$ELBO)), -sqrt(.Machine$double.eps))
218 |     }
219 |     
220 |     fmt_vglmer <- format_vglmer(example_vglmer)
221 |     names(fmt_vglmer)[-1] <- paste0(v, "_", names(fmt_vglmer)[-1])
222 |   }
223 | 
224 |   # Disable negative binomial tests for now
225 |   # list_output <- Reduce(function(a, b) {
226 |   #   merge(a, b, by = "name")
227 |   # }, list_output)
228 |   # expect_gte(min(as.vector(cor(list_output[, c("Laplace_mean", "delta_mean", "VEM_mean")]))), 0.95)
229 |   # expect_gte(min(as.vector(cor(list_output[, c("Laplace_var", "delta_var", "VEM_var")]))), 0.95)
230 |   # 
231 |   # all_r <- sapply(list_r, FUN = function(i) {
232 |   #   i$mu
233 |   # })
234 |   # # Check that mu are quite close
235 |   # expect_lte(diff(range(all_r)), 0.02)
236 |   # # Check that the mu standard errors are close for Laplace/delta
237 |   # expect_lte(diff(sqrt(sapply(list_r, FUN = function(i) {
238 |   #   i$sigma
239 |   # }))[-1]), 0.02)
240 | })
241 | 


--------------------------------------------------------------------------------