├── .github
    ├── .gitignore
    └── workflows
    │   └── R-CMD-check.yaml
├── src
    ├── .gitignore
    ├── utils.h
    ├── Makevars.win
    ├── locus_types.h
    ├── utils.cpp
    ├── Makevars
    ├── coreStructLoop.cpp
    ├── coreBatch.cpp
    ├── coreLoop.cpp
    └── RcppExports.cpp
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test_dimensions.R
    │   └── main.R
├── man
    ├── figures
    │   └── locus_logo.png
    ├── locus-package.Rd
    ├── set_blocks.Rd
    ├── set_groups.Rd
    ├── set_struct.Rd
    ├── set_cv.Rd
    ├── set_hyper.Rd
    ├── set_init.Rd
    └── locus.Rd
├── .travis.yml
├── .Rbuildignore
├── .gitignore
├── locus.Rproj
├── NAMESPACE
├── R
    ├── locus_package.R
    ├── RcppExports.R
    ├── locus_struct_core.R
    ├── elbo.R
    ├── locus_core.R
    ├── locus_group_core.R
    ├── utils.R
    ├── locus_probit_core.R
    ├── locus_logistic_core.R
    ├── update_vb.R
    ├── locus_z_core.R
    ├── locus_mix_core.R
    └── cross_validate.R
├── DESCRIPTION
├── inst
    └── sticker
    │   └── locus_logo.R
├── README.md
└── README.Rmd


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.dll
4 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(locus)
3 | 
4 | test_check("locus")
5 | 


--------------------------------------------------------------------------------
/man/figures/locus_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hruffieux/locus/HEAD/man/figures/locus_logo.png


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
2 | 
3 | language: R
4 | sudo: false
5 | cache: packages
6 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^\.travis\.yml$
4 | ^README\.Rmd$
5 | ^\.github$
6 | README.Rmd
7 | ^README\.Rmd$
8 | figures$
9 | ^inst/sticker$


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | *.pdf
 6 | .DS_Store
 7 | 
 8 | # temporary files emacs
 9 | *~
10 | [#]*[#]
11 | .\#*
12 | 


--------------------------------------------------------------------------------
/src/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOCUS_UTILS_H_
 2 | #define LOCUS_UTILS_H_
 3 | 
 4 | #include <RcppEigen.h>
 5 | #include "locus_types.h"
 6 | 
 7 | // [[Rcpp::depends(RcppEigen)]]
 8 | 
 9 | using namespace Rcpp;
10 | 
11 | Arr1D logOnePlusExp(const  Arr1D& x);
12 | 
13 | Arr2D logOnePlusExpMat(const  Arr2D& x);
14 | 
15 | #endif // LOCUS_UTILS_H_
16 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | ## With Rcpp 0.11.0 and later, we no longer need to set PKG_LIBS as there is
2 | ## no user-facing library. The include path to headers is already set by R.
3 | #PKG_LIBS = 
4 | 
5 | ## With R 3.1.0 or later, you can uncomment the following line to tell R to 
6 | ## enable compilation with C++11 (or even C++14) where available
7 | #CXX_STD = CXX11
8 | 


--------------------------------------------------------------------------------
/locus.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | 


--------------------------------------------------------------------------------
/src/locus_types.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOCUS_TYPES_H_
 2 | #define LOCUS_TYPES_H_
 3 | 
 4 | #include <RcppEigen.h>
 5 | 
 6 | // These typedefs have to be in a separate header file to be properly copied to RcppExports
 7 | 
 8 | typedef Eigen::ArrayXd Arr1D;
 9 | typedef Eigen::ArrayXXd Arr2D;
10 | typedef Eigen::Map<Eigen::ArrayXd> MapArr1D;
11 | typedef Eigen::Map<Eigen::ArrayXXd> MapArr2D;
12 | typedef Eigen::Map<Eigen::VectorXd> MapVec;
13 | typedef Eigen::Map<Eigen::MatrixXd> MapMat;
14 | 
15 | 
16 | #endif // LOCUS_TYPES_H_
17 | 


--------------------------------------------------------------------------------
/src/utils.cpp:
--------------------------------------------------------------------------------
 1 | #include "utils.h"
 2 | 
 3 | Arr1D logOnePlusExp(const Arr1D& x){
 4 | 
 5 |   Arr1D m = x;
 6 |   for (int i = 0; i < x.size(); ++i) {
 7 |     if (x[i] < 0) m[i] = 0;
 8 |   }
 9 | 
10 |   return log(exp(x - m) + exp(-m)) + m;
11 | 
12 | }
13 | 
14 | 
15 | Arr2D logOnePlusExpMat(const Arr2D& x){
16 | 
17 |   Arr2D m = x;
18 |   for (int i = 0; i < x.rows(); ++i) {
19 | 
20 |     for (int j = 0; j < x.cols(); ++j) {
21 | 
22 |      if (x(i, j) < 0) m(i, j) = 0;
23 | 
24 |     }
25 | 
26 |   }
27 | 
28 |   return log(exp(x - m) + exp(-m)) + m;
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/tests/testthat/test_dimensions.R:
--------------------------------------------------------------------------------
 1 | source("main.R")
 2 | 
 3 | context("Checking dimension of output objects.")
 4 | 
 5 | test_that("Dimension of gam_vb is compatible with the data", {
 6 |   expect_equal(dim(vb$gam_vb), c(p, d))
 7 |   expect_equal(dim(vb_z$gam_vb), c(p, d))
 8 |   expect_equal(dim(vb_logit$gam_vb), c(p, d))
 9 |   expect_equal(dim(vb_logit_z$gam_vb), c(p, d))
10 | })
11 | 
12 | 
13 | test_that("Length of om_vb is compatible with the data", {
14 |   expect_equal(length(vb$om_vb), p)
15 |   expect_equal(length(vb_z$om_vb), p)
16 |   expect_equal(length(vb_logit$om_vb), p)
17 |   expect_equal(length(vb_logit_z$om_vb), p)
18 | })
19 | 


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
 1 | ## With Rcpp 0.11.0 and later, we no longer need to set PKG_LIBS as there is
 2 | ## no user-facing library. The include path to headers is already set by R.
 3 | 
 4 | ## strips debug symbols (smaller .so size)
 5 | PKG_LIBS = -Wl,-S
 6 | 
 7 | ## With R 3.1.0 or later, you can uncomment the following line to tell R to
 8 | ## enable compilation with C++11 (or even C++14) where available
 9 | #CXX_STD = CXX11
10 | 
11 | #CXXFLAGS +=     -march=native
12 | #CXXFLAGS +=    -mtune=native
13 | 
14 | #CFLAGS +=       -O0 -Wall
15 | #CXXFLAGS +=     -O0 -Wall
16 | #CPPFLAGS +=     -O0 -Wall
17 | 
18 | #PKG_CFLAGS +=       -O0 -Wall
19 | #PKG_CXXFLAGS +=     -O0 -Wall
20 | #PKG_CPPFLAGS +=     -O0 -Wall
21 | 
22 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(locus)
 4 | export(set_blocks)
 5 | export(set_cv)
 6 | export(set_groups)
 7 | export(set_hyper)
 8 | export(set_init)
 9 | export(set_struct)
10 | import(RcppEigen)
11 | importFrom(Rcpp,sourceCpp)
12 | importFrom(grDevices,dev.off)
13 | importFrom(grDevices,png)
14 | importFrom(graphics,abline)
15 | importFrom(graphics,legend)
16 | importFrom(graphics,matplot)
17 | importFrom(graphics,points)
18 | importFrom(stats,cor)
19 | importFrom(stats,dnorm)
20 | importFrom(stats,median)
21 | importFrom(stats,pnorm)
22 | importFrom(stats,qnorm)
23 | importFrom(stats,rbeta)
24 | importFrom(stats,rbinom)
25 | importFrom(stats,rgamma)
26 | importFrom(stats,rnorm)
27 | importFrom(stats,setNames)
28 | importFrom(stats,uniroot)
29 | importFrom(stats,var)
30 | useDynLib(locus, .registration = TRUE)
31 | 


--------------------------------------------------------------------------------
/man/locus-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/locus_package.R
 3 | \docType{package}
 4 | \name{locus-package}
 5 | \alias{locus-package}
 6 | \title{locus: a package for combined predictor and outcome selection in
 7 | high-dimensional set-ups using variational inference}
 8 | \description{
 9 | The locus package provides an efficient variational algorithm for
10 | simultaneous variable selection of predictors and associated outcomes based
11 | on a sparse multivariate regression model (H. Ruffieux, A. C. Davison,
12 | J. Hager, I. Irincheeva, Efficient inference for genetic association studies
13 | with multiple outcomes, Biostatistics, 2017). The methods from this package
14 | have been used on large genetic datasets from molecular quantitative trait
15 | locus (QTL) problems with over 200K single nucleotide polymorphisms (SNPs),
16 | hundreds of molecular expression levels and hundreds of samples.
17 | }
18 | \section{locus functions}{
19 |  set_hyper, set_init, locus, set_blocks, set_cv, 
20 |    set_groups, set_struct.
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/R/locus_package.R:
--------------------------------------------------------------------------------
 1 | #' locus: a package for combined predictor and outcome selection in
 2 | #' high-dimensional set-ups using variational inference
 3 | #'
 4 | #' The locus package provides an efficient variational algorithm for
 5 | #' simultaneous variable selection of predictors and associated outcomes based
 6 | #' on a sparse multivariate regression model (H. Ruffieux, A. C. Davison,
 7 | #' J. Hager, I. Irincheeva, Efficient inference for genetic association studies
 8 | #' with multiple outcomes, Biostatistics, 2017). The methods from this package
 9 | #' have been used on large genetic datasets from molecular quantitative trait
10 | #' locus (QTL) problems with over 200K single nucleotide polymorphisms (SNPs),
11 | #' hundreds of molecular expression levels and hundreds of samples.
12 | #'
13 | #' @section locus functions: set_hyper, set_init, locus, set_blocks, set_cv, 
14 | #'    set_groups, set_struct.
15 | #'
16 | #' @docType package
17 | #' @name locus-package
18 | #' @useDynLib locus, .registration = TRUE
19 | #' @import RcppEigen
20 | #' @importFrom Rcpp sourceCpp
21 | #' @importFrom stats cor dnorm median pnorm qnorm rbeta rbinom rgamma rnorm setNames uniroot var
22 | #' @importFrom grDevices dev.off png
23 | #' @importFrom graphics abline legend matplot points
24 | NULL
25 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: locus
 2 | Type: Package
 3 | Title: Large-Scale Variational Inference for Combined Selection of Covariate and
 4 |     Response Variables in Regression Models
 5 | Version: 1.0.0
 6 | Date: 2021-01-16
 7 | Authors@R: c(
 8 |     person("Helene", "Ruffieux", email = "helene.ruffieux@mrc-bsu.cam.ac.uk",
 9 |             role = c("aut", "cre"))
10 |             )
11 | Description: Variational inference for sparse multivariate regression models
12 |     that allow combined selection of responses and associated predictos in
13 |     high-dimensional set-ups (H. Ruffieux, A. C. Davison, J. Hager and 
14 |     I. Irincheeva, Efficient inference for genetic association studies with 
15 |     multiple outcomes, *Biostatistics*, 18:618:636, 2017, 
16 |     doi: 10.1093/biostatistics/kxx007). This software has been used with
17 |     genetic data from molecular quantitative trait locus (QTL) problems with over
18 |     500K single nucleotide polymorphisms (SNPs), thousands of molecular expression
19 |     levels and thousands of individuals.
20 | Depends:
21 |     R (>= 3.2.0)
22 | License: GPL (>= 2) | file LICENSE
23 | LazyData: TRUE
24 | Imports:
25 |     Matrix (>= 1.2-2),
26 |     parallel,
27 |     plyr (>= 1.8.4),
28 |     Rcpp,
29 |     RcppEigen (>= 0.3.2.9.0)
30 | RoxygenNote: 7.1.1
31 | Suggests: testthat
32 | LinkingTo: Rcpp, RcppEigen
33 | 


--------------------------------------------------------------------------------
/src/coreStructLoop.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  *
 3 |  * This file is part of the `locus` R package:
 4 |  *     https://github.com/hruffieux/locus
 5 |  *
 6 |  * Functions for computationally expensive updates for structured regression.
 7 |  *
 8 |  * These functions use Eigen::Map to pass large matrices by reference from R.
 9 |  * Given dimensionalities involved in some applications, copying such matrices
10 |  * would imply a prohibitive RAM overconsumption.
11 |  *
12 |  */
13 | 
14 | #include "utils.h"
15 | 
16 | // for locus_struct_core function
17 | // [[Rcpp::export]]
18 | void coreStructLoop(const MapMat X,
19 |                     const MapMat Y,
20 |                     MapArr2D gam_vb,
21 |                     const MapArr1D log_Phi_mu_theta_vb,
22 |                     const MapArr1D log_1_min_Phi_mu_theta_vb,
23 |                     const double log_sig2_inv_vb,
24 |                     const MapArr1D log_tau_vb,
25 |                     MapMat beta_vb,
26 |                     MapMat mat_x_m1,
27 |                     MapArr2D mu_beta_vb,
28 |                     const MapArr1D sig2_beta_vb,
29 |                     const MapArr1D tau_vb,
30 |                     const MapArr1D shuffled_ind) {
31 | 
32 |   const Arr1D cst = -(log_tau_vb + log_sig2_inv_vb + log(sig2_beta_vb) )/ 2;
33 | 
34 |   for (int i = 0; i < X.cols(); ++i) {
35 | 
36 |     int j = shuffled_ind[i];
37 | 
38 |     mat_x_m1.noalias() -= X.col(j) * beta_vb.row(j);
39 | 
40 |     mu_beta_vb.row(j) = sig2_beta_vb * tau_vb *
41 |       ((Y - mat_x_m1).transpose() * X.col(j)).array();
42 | 
43 |     gam_vb.row(j) = exp(-logOnePlusExp(log_1_min_Phi_mu_theta_vb(j) - log_Phi_mu_theta_vb(j) -
44 |       mu_beta_vb.row(j).square() / (2 * sig2_beta_vb.transpose()) + cst.transpose()));
45 | 
46 |     beta_vb.row(j) = mu_beta_vb.row(j) * gam_vb.row(j);
47 | 
48 |     mat_x_m1.noalias() += X.col(j) * beta_vb.row(j);
49 | 
50 |   }
51 | 
52 | }
53 | 


--------------------------------------------------------------------------------
/tests/testthat/main.R:
--------------------------------------------------------------------------------
 1 | rm(list = ls())
 2 | 
 3 | set.seed(123)
 4 | 
 5 | ############################
 6 | ## simulate basic dataset ##
 7 | ############################
 8 | 
 9 | n <- 100; p <- 75; d <- 20; q <- 3; p0 <- 10
10 | 
11 | # covariates (not subject to selection)
12 | Z <- matrix(rnorm(n * q), nrow = n)
13 | 
14 | alpha <-  matrix(rnorm(q * d), nrow = q)
15 | 
16 | # candidate predictors (subject to selection)
17 | X_act <- matrix(rbinom(n * p0, size = 2, p = 0.2), nrow = n)
18 | X_inact <- matrix(rbinom(n * (p - p0), size = 2, p = 0.2), nrow = n)
19 | X <- cbind(X_act, X_inact)[, sample(p)]
20 | 
21 | beta <-  matrix(rnorm(p0 * d), nrow = p0)
22 | 
23 | # Gaussian outcomes
24 | Y <- matrix(rnorm(n * d, mean = Z %*% alpha + X_act %*% beta, sd = 1), nrow = n)
25 | 
26 | # Binary outcomes
27 | Y_bin <- ifelse(Y > 0, 1, 0)
28 | 
29 | # remove constant variables (needed for checking dimension consistency)
30 | X <- scale(X); Z <- scale(Z)
31 | rm_cst <- function(mat_sc) mat_sc[, !is.nan(colSums(mat_sc))]
32 | rm_coll <- function(mat_sc) mat_sc[, !duplicated(mat_sc, MARGIN = 2)]
33 | 
34 | X <- rm_cst(X); Z <- rm_cst(Z)
35 | X <- rm_coll(X); Z <- rm_coll(Z)
36 | 
37 | p <- ncol(X); q <- ncol(Z)
38 | 
39 | ####################
40 | ## locus settings ##
41 | ####################
42 | 
43 | # hyperparameter (prior number of active predictors)
44 | p0_av <- p0
45 | 
46 | 
47 | #####################
48 | ## locus inference ##
49 | #####################
50 | 
51 | # Continuous outcomes, no covariates
52 | #
53 | vb <- locus(Y = Y, X = X, p0_av = p0_av, link = "identity")
54 | 
55 | # Continuous outcomes, with covariates
56 | #
57 | vb_z <- locus(Y = Y, X = X, p0_av = p0_av, Z = Z, link = "identity")
58 | 
59 | # Binary outcomes, no covariates
60 | vb_logit <- locus(Y = Y_bin, X = X, p0_av = p0_av, link = "logit")
61 | 
62 | # Binary outcomes, with covariates
63 | vb_logit_z <- locus(Y = Y_bin, X = X, p0_av = p0_av, Z = Z, link = "logit")
64 | 
65 | 


--------------------------------------------------------------------------------
/man/set_blocks.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/prepare_locus.R
 3 | \name{set_blocks}
 4 | \alias{set_blocks}
 5 | \title{Gather settings for parallel inference on partitioned predictor space.}
 6 | \usage{
 7 | set_blocks(p, pos_bl, n_cpus, verbose = TRUE)
 8 | }
 9 | \arguments{
10 | \item{p}{Number of candidate predictors.}
11 | 
12 | \item{pos_bl}{Vector gathering the predictor block positions (first index of
13 | each block).}
14 | 
15 | \item{n_cpus}{Number of CPUs to be used. If large, one should ensure that
16 | enough RAM will be available for parallel execution. Set to 1 for serial
17 | execution.}
18 | 
19 | \item{verbose}{If \code{TRUE}, messages are displayed when calling
20 | \code{set_blocks}.}
21 | }
22 | \value{
23 | An object of class "\code{blocks}" preparing the settings for parallel
24 |   inference in a form that can be passed to the \code{\link{locus}}
25 |   function.
26 | }
27 | \description{
28 | Parallel applications of the method on blocks of candidate predictors for
29 | large datasets allows faster and less RAM-greedy executions.
30 | }
31 | \examples{
32 | seed <- 123; set.seed(seed)
33 | 
34 | ###################
35 | ## Simulate data ##
36 | ###################
37 | 
38 | ## Example using small problem sizes:
39 | ##
40 | n <- 200; p <- 1200; p0 <- 200; d <- 50; d0 <- 40
41 | 
42 | ## Candidate predictors (subject to selection)
43 | ##
44 | # Here we simulate common genetic variants (but any type of candidate
45 | # predictors can be supplied).
46 | # 0 = homozygous, major allele, 1 = heterozygous, 2 = homozygous, minor allele
47 | #
48 | X_act <- matrix(rbinom(n * p0, size = 2, p = 0.25), nrow = n)
49 | X_inact <- matrix(rbinom(n * (p - p0), size = 2, p = 0.25), nrow = n)
50 | 
51 | shuff_x_ind <- sample(p)
52 | X <- cbind(X_act, X_inact)[, shuff_x_ind]
53 | 
54 | bool_x_act <- shuff_x_ind <= p0
55 | 
56 | pat_act <- beta <- matrix(0, nrow = p0, ncol = d0)
57 | pat_act[sample(p0*d0, floor(p0*d0/5))] <- 1
58 | beta[as.logical(pat_act)] <-  rnorm(sum(pat_act))
59 | 
60 | ## Gaussian responses
61 | ##
62 | Y_act <- matrix(rnorm(n * d0, mean = X_act \%*\% beta, sd = 0.5), nrow = n)
63 | Y_inact <- matrix(rnorm(n * (d - d0), sd = 0.5), nrow = n)
64 | shuff_y_ind <- sample(d)
65 | Y <- cbind(Y_act, Y_inact)[, shuff_y_ind]
66 | 
67 | ########################
68 | ## Infer associations ##
69 | ########################
70 | 
71 | n_bl <- 6
72 | pos_bl <- seq(1, p, by = ceiling(p/n_bl))
73 | list_blocks <- set_blocks(p, pos_bl, n_cpus = 1)
74 | 
75 | vb <- locus(Y = Y, X = X, p0_av = p0, link = "identity",
76 |             list_blocks = list_blocks, user_seed = seed)
77 | 
78 | }
79 | \seealso{
80 | \code{\link{locus}}
81 | }
82 | 


--------------------------------------------------------------------------------
/inst/sticker/locus_logo.R:
--------------------------------------------------------------------------------
 1 | require(hexSticker)
 2 | require(dplyr)
 3 | require(ggpubr)
 4 | require(gridExtra)
 5 | require(GenomicRanges)
 6 | require(ggbio)
 7 | require(RColorBrewer)
 8 | 
 9 | 
10 | seed <- 110
11 | set.seed(seed)
12 | 
13 | p <- 11
14 | q <- 6
15 | 
16 | 
17 | # hotspot_propensity <- rbeta(p, shape1 = 1.5, shape2 = 1.5)
18 | # m <- sweep(matrix(rnorm(p*q, mean = 1, sd = 0.1), nrow = p), 1, hotspot_propensity, "*")
19 | # m[m<0] <- 0
20 | # m[m>1] <- 1
21 | # m[m < 0.8] <- 0 
22 | 
23 | m <- matrix(rbeta(p*q, shape1 = 1, shape2 = 1), nrow = p)
24 | colnames(m) <- paste("Col", 1:q)
25 | rownames(m) <- paste("Row", 1:p)
26 | m[m < 0.80] <- 0
27 | m[sample(c(T,F), p, prob = c(0.5, 0.5), replace = T),] <- 0
28 | 
29 | # Transform the matrix in long format
30 | df <- reshape::melt(m)
31 | colnames(df) <- c("x", "y", "value")
32 | 
33 | logo_top <- ggplot(df, aes(x = x, y = y, fill = value)) +  
34 |   scale_fill_gradientn(colours=c("white", brewer.pal(7, "Dark2"))) +
35 |   geom_tile(color = "grey80",
36 |             lwd = 0.2,
37 |             linetype = 1) +
38 |   coord_fixed()  +theme(axis.title.x=element_blank(),
39 |                         axis.text.x=element_blank(),
40 |                         axis.ticks.x=element_blank(),
41 |                         axis.title.y=element_blank(),
42 |                         axis.text.y=element_blank(),
43 |                         axis.ticks.y=element_blank(),
44 |                         legend.position="none")
45 | 
46 | 
47 | seed <- 321
48 | set.seed(seed)
49 | 
50 | N <- 5
51 | 
52 | gr <- GRanges(seqnames = rep("chr1", N),
53 |               IRanges(
54 |                 start = sample(1:(20*N), size = N, replace = TRUE),
55 |                 width = sample(25:50, size = N, replace = TRUE)),
56 |               strand = sample(c("+", "-", "*"), size = N,
57 |                               replace = TRUE),
58 |               value = rnorm(N, 2, 20), score = rnorm(N, 100, 30),
59 |               sample = sample(c("Normal", "Tumor"),
60 |                               size = N, replace = TRUE),
61 |               pair = sample(letters, size = N,
62 |                             replace = TRUE)) 
63 | 
64 | logo_bottom <- ggplot() + geom_alignment(gr,fill = "grey80") + theme_void() + scale_y_reverse()
65 | 
66 | dir.create("man/figures/", showWarnings = FALSE)
67 | 
68 | sticker(grid.arrange(logo_top, logo_bottom, heights=c(11,2.4)), 
69 |         package="locus", 
70 |         p_size=6.3, 
71 |         p_color = "grey25",
72 |         s_x=0.97, 
73 |         s_y=0.985, 
74 |         s_width=1.5, 
75 |         s_height=1.35,
76 |         p_x = 1.355, 
77 |         p_y = 0.65, 
78 |         h_size = 1,
79 |         h_fill="white", 
80 |         h_color= "grey25", #"grey80",
81 |         filename="man/figures/locus_logo.png",
82 |         dpi = 1200)
83 | 
84 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 |       - master
 6 |   pull_request:
 7 |     branches:
 8 |       - main
 9 |       - master
10 | 
11 | name: R-CMD-check
12 | 
13 | jobs:
14 |   R-CMD-check:
15 |     runs-on: ${{ matrix.config.os }}
16 | 
17 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
18 | 
19 |     strategy:
20 |       fail-fast: false
21 |       matrix:
22 |         config:
23 |           - {os: windows-latest, r: 'release'}
24 |           - {os: macOS-latest, r: 'release'}
25 |           - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
26 |           - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
27 | 
28 |     env:
29 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
30 |       RSPM: ${{ matrix.config.rspm }}
31 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
32 | 
33 |     steps:
34 |       - uses: actions/checkout@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@v1
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 | 
40 |       - uses: r-lib/actions/setup-pandoc@v1
41 | 
42 |       - name: Query dependencies
43 |         run: |
44 |           install.packages('remotes')
45 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
46 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
47 |         shell: Rscript {0}
48 | 
49 |       - name: Cache R packages
50 |         if: runner.os != 'Windows'
51 |         uses: actions/cache@v2
52 |         with:
53 |           path: ${{ env.R_LIBS_USER }}
54 |           key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
55 |           restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
56 | 
57 |       - name: Install system dependencies
58 |         if: runner.os == 'Linux'
59 |         run: |
60 |           while read -r cmd
61 |           do
62 |             eval sudo $cmd
63 |           done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
64 |       - name: Install dependencies
65 |         run: |
66 |           remotes::install_deps(dependencies = TRUE)
67 |           remotes::install_cran("rcmdcheck")
68 |         shell: Rscript {0}
69 | 
70 |       - name: Check
71 |         env:
72 |           _R_CHECK_CRAN_INCOMING_REMOTE_: false
73 |         run: rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
74 |         shell: Rscript {0}
75 | 
76 |       - name: Upload check results
77 |         if: failure()
78 |         uses: actions/upload-artifact@main
79 |         with:
80 |           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
81 |           path: check
82 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | coreBatch <- function(X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, log_tau_vb, beta_vb, mat_x_m1, mu_beta_vb, sig2_beta_vb, tau_vb) {
 5 |     invisible(.Call(`_locus_coreBatch`, X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, log_tau_vb, beta_vb, mat_x_m1, mu_beta_vb, sig2_beta_vb, tau_vb))
 6 | }
 7 | 
 8 | coreZBatch <- function(X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, log_tau_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, sig2_beta_vb, tau_vb) {
 9 |     invisible(.Call(`_locus_coreZBatch`, X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, log_tau_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, sig2_beta_vb, tau_vb))
10 | }
11 | 
12 | coreProbitBatch <- function(X, W, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, sig2_beta_vb) {
13 |     invisible(.Call(`_locus_coreProbitBatch`, X, W, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, sig2_beta_vb))
14 | }
15 | 
16 | coreLoop <- function(X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, log_tau_vb, beta_vb, mat_x_m1, mu_beta_vb, sig2_beta_vb, tau_vb, shuffled_ind, c = 1) {
17 |     invisible(.Call(`_locus_coreLoop`, X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, log_tau_vb, beta_vb, mat_x_m1, mu_beta_vb, sig2_beta_vb, tau_vb, shuffled_ind, c))
18 | }
19 | 
20 | coreZLoop <- function(X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, log_tau_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, sig2_beta_vb, tau_vb, shuffled_ind, c = 1) {
21 |     invisible(.Call(`_locus_coreZLoop`, X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, log_tau_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, sig2_beta_vb, tau_vb, shuffled_ind, c))
22 | }
23 | 
24 | coreLogitLoop <- function(X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, psi_vb, sig2_beta_vb, shuffled_ind) {
25 |     invisible(.Call(`_locus_coreLogitLoop`, X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, psi_vb, sig2_beta_vb, shuffled_ind))
26 | }
27 | 
28 | coreProbitLoop <- function(X, W, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, sig2_beta_vb, shuffled_ind) {
29 |     invisible(.Call(`_locus_coreProbitLoop`, X, W, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, sig2_beta_vb, shuffled_ind))
30 | }
31 | 
32 | coreStructLoop <- function(X, Y, gam_vb, log_Phi_mu_theta_vb, log_1_min_Phi_mu_theta_vb, log_sig2_inv_vb, log_tau_vb, beta_vb, mat_x_m1, mu_beta_vb, sig2_beta_vb, tau_vb, shuffled_ind) {
33 |     invisible(.Call(`_locus_coreStructLoop`, X, Y, gam_vb, log_Phi_mu_theta_vb, log_1_min_Phi_mu_theta_vb, log_sig2_inv_vb, log_tau_vb, beta_vb, mat_x_m1, mu_beta_vb, sig2_beta_vb, tau_vb, shuffled_ind))
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/man/set_groups.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/prepare_locus.R
 3 | \name{set_groups}
 4 | \alias{set_groups}
 5 | \title{Gather settings for application of the `locus` function with group selection.}
 6 | \usage{
 7 | set_groups(n, p, pos_gr, verbose = TRUE)
 8 | }
 9 | \arguments{
10 | \item{n}{Number of samples.}
11 | 
12 | \item{p}{Number of candidate predictors.}
13 | 
14 | \item{pos_gr}{Vector gathering the predictor group positions (first index of
15 | each group). The predictors must be ordered by groups.}
16 | 
17 | \item{verbose}{If \code{TRUE}, messages are displayed when calling
18 | \code{set_blocks}.}
19 | }
20 | \value{
21 | An object of class "\code{groups}" preparing the settings for group
22 |   selection in a form that can be passed to the \code{\link{locus}}
23 |   function.
24 | }
25 | \description{
26 | [FUNCTIONALITY UNDER ACTIVE DEVELOPMENT, PERFORMANCE (CPU TIME) NOT OPTIMIZED].
27 | Posterior probabilities of associations are computed for predefined groups of
28 | candidate predictors. Within each group, the mean-field inference procedure
29 | makes no independence assumptions for the regression coefficients; variables
30 | in each group are approximated by a multivariate normal distribution, and
31 | they share a single binary latent selection variable.
32 | }
33 | \examples{
34 | seed <- 123; set.seed(seed)
35 | 
36 | ###################
37 | ## Simulate data ##
38 | ###################
39 | 
40 | ## Example using small problem sizes:
41 | ##
42 | n <- 200; p <- 250; p0 <- 75; d <- 50; d0 <- 40
43 | 
44 | ## Candidate predictors (subject to selection)
45 | ##
46 | # Here we simulate common genetic variants (but any type of candidate
47 | # predictors can be supplied).
48 | # 0 = homozygous, major allele, 1 = heterozygous, 2 = homozygous, minor allele
49 | #
50 | X_act <- matrix(rbinom(n * p0, size = 2, p = 0.25), nrow = n)
51 | X_inact <- matrix(rbinom(n * (p - p0), size = 2, p = 0.25), nrow = n)
52 | 
53 | shuff_x_ind <- sample(p)
54 | X <- cbind(X_act, X_inact)[, shuff_x_ind]
55 | 
56 | bool_x_act <- shuff_x_ind <= p0
57 | 
58 | pat_act <- beta <- matrix(0, nrow = p0, ncol = d0)
59 | pat_act[sample(p0*d0, floor(p0*d0/5))] <- 1
60 | beta[as.logical(pat_act)] <-  rnorm(sum(pat_act))
61 | 
62 | ## Gaussian responses
63 | ##
64 | Y_act <- matrix(rnorm(n * d0, mean = X_act \%*\% beta, sd = 0.5), nrow = n)
65 | Y_inact <- matrix(rnorm(n * (d - d0), sd = 0.5), nrow = n)
66 | shuff_y_ind <- sample(d)
67 | Y <- cbind(Y_act, Y_inact)[, shuff_y_ind]
68 | 
69 | ########################
70 | ## Infer associations ##
71 | ########################
72 | 
73 | n_gr <- 100
74 | pos_gr <- seq(1, p, by = ceiling(p/n_gr))
75 | list_groups <- set_groups(n, p, pos_gr)
76 | 
77 | g0_av <- 50 # Number of active groups. /!\ Often best to set it large, as a
78 |             # too small value may (wrong) result in no group being selected.
79 | 
80 | vb <- locus(Y = Y, X = X, p0_av = g0_av, link = "identity",
81 |   list_groups = list_groups, user_seed = seed)
82 | 
83 | }
84 | \seealso{
85 | \code{\link{locus}}
86 | }
87 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 2 | <!-- First time: run usethis::use_readme_rmd() to create a pre-commit hook that 
 3 | prevents from committing if the README.Rmd has changed, but has not been 
 4 | re-knitted to generate an updated README.md -->
 5 | 
 6 | ## LOCUS – Large-scale variational inference for Bayesian variable selection in multiple-response regression <img src="man/figures/locus_logo.png" align="right" height="150"/>
 7 | 
 8 | <!-- Run for the R CMD checks, run usethis::use_github_actions() to set up the pipeline, possibly modify the .yaml file and then: -->
 9 | <!-- [![](https://travis-ci.org/hruffieux/locus.svg?branch=master)](https://travis-ci.org/hruffieux/locus) -->
10 | <!-- [![R build status](https://github.com/hruffieux/locus/workflows/R-CMD-check/badge.svg)](https://github.com/hruffieux/locus/actions)  -->
11 | 
12 | [![License: GPL
13 | v2](https://img.shields.io/badge/license-GPL%20v2-blue.svg)](https://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html)
14 | [![](https://img.shields.io/badge/devel%20version-1.0.0-blue.svg)](https://github.com/hruffieux/locus)
15 | [![](https://img.shields.io/github/languages/code-size/hruffieux/locus.svg)](https://github.com/hruffieux/locus)
16 | [![](https://img.shields.io/badge/doi-10.1093/biostatistics/kxx007-yellow.svg)](https://doi.org/10.1093/biostatistics/kxx007)
17 | 
18 | ## Overview
19 | 
20 | **locus** is an R package providing efficient variational algorithms for
21 | simultaneous variable selection of covariates and associated responses
22 | based on multivariate regression models. Dependence across responses
23 | linked to the same covariates is captured through the model hierarchical
24 | structure (H. Ruffieux, A. C. Davison, J. Hager, I. Irincheeva,
25 | Efficient inference for genetic association studies with multiple
26 | outcomes, *Biostatistics*, 18:618–636, 2017).
27 | 
28 | ## Installation
29 | 
30 | To install, run the following command in R:
31 | 
32 | ``` r
33 | if(!require(remotes)) install.packages("remotes")
34 | remotes::install_github("hruffieux/locus")
35 | ```
36 | 
37 | ## Algorithms
38 | 
39 | The algorithms for joint covariate and response selection provided in
40 | **locus** implement inference for regression models with
41 | 
42 | - identity link;
43 | - logistic link;
44 | - probit link;
45 | - identity-probit link.
46 | 
47 | Inference on models for group selection and based on a spatial Gaussian
48 | process to encode the dependence structure of the candidate predictors
49 | are also implemented. Moreover, covariate-level external information
50 | variables can be incorporated to inform the selection.
51 | 
52 | ## License and authors
53 | 
54 | This software uses the GPL v2 license, see [LICENSE](LICENSE). Authors
55 | and copyright are provided in [DESCRIPTION](DESCRIPTION). Loris Michel
56 | has also contributed to the development of this project.
57 | 
58 | Please cite the software using the following reference: H. Ruffieux, A.
59 | C. Davison, J. Hager, I. Irincheeva, Efficient inference for genetic
60 | association studies with multiple outcomes, *Biostatistics*, 18:618–636,
61 | 2017.
62 | 
63 | ## Issues
64 | 
65 | To report an issue, please use the [locus issue
66 | tracker](https://github.com/hruffieux/locus/issues) at github.com.
67 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output:
 3 |   md_document:
 4 |     variant: gfm
 5 | html_preview: false
 6 | ---
 7 | 
 8 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 9 | <!-- First time: run usethis::use_readme_rmd() to create a pre-commit hook that 
10 | prevents from committing if the README.Rmd has changed, but has not been 
11 | re-knitted to generate an updated README.md -->
12 | 
13 | ```{r, include = FALSE}
14 | knitr::opts_chunk$set(
15 |   collapse = TRUE,
16 |   comment = "#>",
17 |   fig.path = "man/figures/README-",
18 |   out.width = "100%"
19 | )
20 | ```
21 | 
22 | 
23 | ```{r echo=FALSE, results="hide", message=FALSE}
24 | require(badger)
25 | ```
26 | 
27 | 
28 | ## LOCUS – Large-scale variational inference for Bayesian variable selection in multiple-response regression  <img src="man/figures/locus_logo.png" align="right" height="150"/>
29 | 
30 | <!-- Run for the R CMD checks, run usethis::use_github_actions() to set up the pipeline, possibly modify the .yaml file and then: -->
31 | <!-- `r badge_travis("hruffieux/locus")` -->
32 | <!-- `r badge_github_actions("hruffieux/locus")`  -->
33 | `r badge_license("GPL v2", url = "https://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html")`
34 | `r badge_devel("hruffieux/locus", "blue")`
35 | `r badge_code_size("hruffieux/locus")`
36 | `r badge_doi("10.1093/biostatistics/kxx007", "yellow")`
37 | 
38 | ## Overview
39 | 
40 | **locus** is an R package providing efficient variational algorithms for
41 | simultaneous variable selection of covariates and associated responses based
42 | on multivariate regression models. Dependence across responses linked to the 
43 | same covariates is captured through the model hierarchical structure 
44 | (H. Ruffieux, A. C. Davison, J. Hager, I. Irincheeva, Efficient inference 
45 | for genetic association studies with multiple outcomes, *Biostatistics*, 
46 | 18:618–636, 2017). 
47 | 
48 | ## Installation
49 | 
50 | To install, run the following command in R:
51 | 
52 | ``` r
53 | if(!require(remotes)) install.packages("remotes")
54 | remotes::install_github("hruffieux/locus")
55 | ```
56 | 
57 | ## Algorithms
58 | 
59 | The algorithms for joint covariate and response selection provided in **locus**
60 | implement inference for regression models with 
61 | 
62 | * identity link;
63 | * logistic link;
64 | * probit link;
65 | * identity-probit link.
66 | 
67 | Inference on models for group selection and based on a spatial Gaussian process to 
68 | encode the dependence structure of the candidate predictors are also implemented. 
69 | Moreover, covariate-level external information variables can be incorporated to 
70 | inform the selection.
71 | 
72 | ## License and authors
73 | 
74 | This software uses the GPL v2 license, see [LICENSE](LICENSE).
75 | Authors and copyright are provided in [DESCRIPTION](DESCRIPTION). Loris Michel
76 | has also contributed to the development of this project.
77 | 
78 | Please cite the software using the following reference: 
79 | H. Ruffieux, A. C. Davison, J. Hager, I. Irincheeva, Efficient inference 
80 | for genetic association studies with multiple outcomes, *Biostatistics*, 
81 | 18:618–636, 2017. 
82 | 
83 | ## Issues
84 | 
85 | To report an issue, please use the [locus issue tracker](https://github.com/hruffieux/locus/issues) at github.com.
86 | 
87 | 


--------------------------------------------------------------------------------
/man/set_struct.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/prepare_locus.R
 3 | \name{set_struct}
 4 | \alias{set_struct}
 5 | \title{Gather settings for application of the `locus` function with structured
 6 | sparse prior.}
 7 | \usage{
 8 | set_struct(n, p, pos_st, n_cpus, verbose = TRUE)
 9 | }
10 | \arguments{
11 | \item{n}{Number of samples.}
12 | 
13 | \item{p}{Number of candidate predictors.}
14 | 
15 | \item{pos_st}{Vector gathering the predictor block positions (first index of
16 | each block). The predictors must be ordered by blocks.}
17 | 
18 | \item{n_cpus}{Number of CPUs to be used. Only used if \code{hyper} is 
19 | \code{FALSE}, otherwise set it to 1. If large, one should ensure that 
20 | enough RAM will be available for parallel execution. Set to 1 for serial 
21 | execution.}
22 | 
23 | \item{verbose}{If \code{TRUE}, messages are displayed when calling
24 | \code{set_struct}.}
25 | }
26 | \value{
27 | An object of class "\code{struct}" preparing the settings for group
28 |   selection in a form that can be passed to the \code{\link{locus}}
29 |   function.
30 | }
31 | \description{
32 | [FUNCTIONALITY UNDER ACTIVE DEVELOPMENT, PERFORMANCE (CPU TIME) NOT OPTIMIZED].
33 | Posterior probabilities of associations are computed using an empirical
34 | covariance estimate of the candidate predictors. This estimate has a block
35 | structure (which could reflect linkage disequilibrium patterns when
36 | considering genome-wide associations). Such a structure is necessary in
37 | large problems for tractability both time- and memory-wise. The posterior
38 | probablity of inclusion corresponding to a given block are approximated by a
39 | multivariate distribution through a Bernoulli-probit link function.
40 | }
41 | \examples{
42 | seed <- 123; set.seed(seed)
43 | 
44 | ###################
45 | ## Simulate data ##
46 | ###################
47 | 
48 | ## Example using small problem sizes:
49 | ##
50 | n <- 200; p <- 300; p0 <- 100; d <- 50; d0 <- 40
51 | 
52 | ## Candidate predictors (subject to selection)
53 | ##
54 | # Here we simulate common genetic variants (but any type of candidate
55 | # predictors can be supplied).
56 | # 0 = homozygous, major allele, 1 = heterozygous, 2 = homozygous, minor allele
57 | #
58 | X_act <- matrix(rbinom(n * p0, size = 2, p = 0.25), nrow = n)
59 | X_inact <- matrix(rbinom(n * (p - p0), size = 2, p = 0.25), nrow = n)
60 | 
61 | shuff_x_ind <- sample(p)
62 | X <- cbind(X_act, X_inact)[, shuff_x_ind]
63 | 
64 | bool_x_act <- shuff_x_ind <= p0
65 | 
66 | pat_act <- beta <- matrix(0, nrow = p0, ncol = d0)
67 | pat_act[sample(p0*d0, floor(p0*d0/5))] <- 1
68 | beta[as.logical(pat_act)] <-  rnorm(sum(pat_act))
69 | 
70 | ## Gaussian responses
71 | ##
72 | Y_act <- matrix(rnorm(n * d0, mean = X_act \%*\% beta, sd = 0.5), nrow = n)
73 | Y_inact <- matrix(rnorm(n * (d - d0), sd = 0.5), nrow = n)
74 | shuff_y_ind <- sample(d)
75 | Y <- cbind(Y_act, Y_inact)[, shuff_y_ind]
76 | 
77 | ########################
78 | ## Infer associations ##
79 | ########################
80 | 
81 | n_st <- 100
82 | pos_st <- seq(1, p, by = ceiling(p/n_st))
83 | list_struct <- set_struct(n, p, pos_st, n_cpus = 1)
84 | 
85 | vb <- locus(Y = Y, X = X, p0_av = p0, link = "identity",
86 |    list_struct = list_struct, user_seed = seed)
87 | 
88 | }
89 | \seealso{
90 | \code{\link{locus}}
91 | }
92 | 


--------------------------------------------------------------------------------
/man/set_cv.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/cross_validate.R
  3 | \name{set_cv}
  4 | \alias{set_cv}
  5 | \title{Gather settings for the cross-validation procedure used in \code{locus}.}
  6 | \usage{
  7 | set_cv(
  8 |   n,
  9 |   p,
 10 |   n_folds,
 11 |   size_p0_av_grid,
 12 |   n_cpus,
 13 |   tol_cv = 0.1,
 14 |   maxit_cv = 1000,
 15 |   verbose = TRUE
 16 | )
 17 | }
 18 | \arguments{
 19 | \item{n}{Number of samples.}
 20 | 
 21 | \item{p}{Number of candidate predictors.}
 22 | 
 23 | \item{n_folds}{Number of number of folds. Large folds are not recommended for
 24 | large datasets as the procedure may become computationally expensive. Must
 25 | be greater than 2 and smaller than the number of samples.}
 26 | 
 27 | \item{size_p0_av_grid}{Number of possible values of p0_av to be compared.
 28 | Large numbers are not recommended for large datasets as the procedure may
 29 | become computationally expensive.}
 30 | 
 31 | \item{n_cpus}{Number of CPUs to be used for the cross-validation procedure.
 32 | If large, one should ensure that enough RAM will be available for parallel
 33 | execution. Set to 1 for serial execution.}
 34 | 
 35 | \item{tol_cv}{Tolerance for the variational algorithm stopping criterion used
 36 | within the cross-validation procedure.}
 37 | 
 38 | \item{maxit_cv}{Maximum number of iterations allowed for the variational
 39 | algorithm used within the cross-validation procedure.}
 40 | 
 41 | \item{verbose}{If \code{TRUE}, messages are displayed when calling
 42 | \code{set_cv}.}
 43 | }
 44 | \value{
 45 | An object of class "\code{cv}" preparing the settings for the
 46 |   cross-validation settings in a form that can be passed to the
 47 |   \code{\link{locus}} function.
 48 | }
 49 | \description{
 50 | The cross-validation procedure uses the variational lower bound as objective
 51 | function and is used to select the prior average number of predictors
 52 | \code{p0_av} expected to be included in the model. \code{p0_av} is used to
 53 | set the model hyperparameters and ensure sparse predictor selections.
 54 | }
 55 | \details{
 56 | This cross-validation procedure is available only for
 57 | \code{link = "identity"}.
 58 | }
 59 | \examples{
 60 | seed <- 123; set.seed(seed)
 61 | 
 62 | ###################
 63 | ## Simulate data ##
 64 | ###################
 65 | 
 66 | ## Example using small problem sizes:
 67 | ##
 68 | n <- 150; p <- 200; p0 <- 50; d <- 25; d0 <- 20
 69 | 
 70 | ## Candidate predictors (subject to selection)
 71 | ##
 72 | # Here we simulate common genetic variants (but any type of candidate
 73 | # predictors can be supplied).
 74 | # 0 = homozygous, major allele, 1 = heterozygous, 2 = homozygous, minor allele
 75 | #
 76 | X_act <- matrix(rbinom(n * p0, size = 2, p = 0.25), nrow = n)
 77 | X_inact <- matrix(rbinom(n * (p - p0), size = 2, p = 0.25), nrow = n)
 78 | 
 79 | shuff_x_ind <- sample(p)
 80 | X <- cbind(X_act, X_inact)[, shuff_x_ind]
 81 | 
 82 | bool_x_act <- shuff_x_ind <= p0
 83 | 
 84 | pat_act <- beta <- matrix(0, nrow = p0, ncol = d0)
 85 | pat_act[sample(p0*d0, floor(p0*d0/5))] <- 1
 86 | beta[as.logical(pat_act)] <-  rnorm(sum(pat_act))
 87 | 
 88 | ## Gaussian responses
 89 | ##
 90 | Y_act <- matrix(rnorm(n * d0, mean = X_act \%*\% beta, sd = 0.5), nrow = n)
 91 | Y_inact <- matrix(rnorm(n * (d - d0), sd = 0.5), nrow = n)
 92 | shuff_y_ind <- sample(d)
 93 | Y <- cbind(Y_act, Y_inact)[, shuff_y_ind]
 94 | 
 95 | ########################
 96 | ## Infer associations ##
 97 | ########################
 98 | 
 99 | list_cv <- set_cv(n, p, n_folds = 3, size_p0_av_grid = 3, n_cpus = 1)
100 | 
101 | vb <- locus(Y = Y, X = X, p0_av = NULL, link = "identity", list_cv = list_cv,
102 |             user_seed = seed)
103 | 
104 | }
105 | \seealso{
106 | \code{\link{locus}}
107 | }
108 | 


--------------------------------------------------------------------------------
/src/coreBatch.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 | *
  3 | * This file is part of the `locus` R package:
  4 | *     https://github.com/hruffieux/locus
  5 | *
  6 | * Functions for computationally expensive multiconcave block coordinate ascent updates.
  7 | *
  8 | * These functions use Eigen::Map to pass large matrices by reference from R.
  9 | * Given dimensionalities involved in some applications, copying such matrices
 10 | * would imply a prohibitive RAM overconsumption.
 11 | *
 12 | *
 13 | */
 14 | 
 15 | #include "utils.h"
 16 | 
 17 | // for locus_core function
 18 | // [[Rcpp::export]]
 19 | void coreBatch(const MapMat X,
 20 |                    const MapMat Y,
 21 |                    MapArr2D gam_vb,
 22 |                    const MapArr1D log_om_vb,
 23 |                    const MapArr1D log_1_min_om_vb,
 24 |                    const double log_sig2_inv_vb,
 25 |                    const MapArr1D log_tau_vb,
 26 |                    MapMat beta_vb,
 27 |                    MapMat mat_x_m1,
 28 |                    MapArr2D mu_beta_vb,
 29 |                    const MapArr1D sig2_beta_vb,
 30 |                    const MapArr1D tau_vb) {
 31 | 
 32 |   mu_beta_vb = (X.transpose() * (Y - mat_x_m1) +
 33 |     (Y.rows() - 1) * beta_vb).array().rowwise() * (sig2_beta_vb * tau_vb).transpose();
 34 | 
 35 |   gam_vb = exp(-logOnePlusExpMat(((mu_beta_vb.square().array().rowwise() / (-2 * sig2_beta_vb.transpose())).rowwise() -
 36 |     (log_tau_vb.transpose() / 2 + log(sig2_beta_vb).transpose() / 2)).colwise()
 37 |                                    + (log_1_min_om_vb - log_om_vb) - log_sig2_inv_vb / 2));
 38 | 
 39 |   beta_vb = mu_beta_vb * gam_vb;
 40 |   mat_x_m1 = X * beta_vb;
 41 | 
 42 | }
 43 | 
 44 | 
 45 | 
 46 | // for locus_z_core and locus_mix_core function
 47 | // [[Rcpp::export]]
 48 | void coreZBatch(const MapMat X,
 49 |                     const MapMat Y,
 50 |                     MapArr2D gam_vb,
 51 |                     const MapArr1D log_om_vb,
 52 |                     const MapArr1D log_1_min_om_vb,
 53 |                     const double log_sig2_inv_vb,
 54 |                     const MapArr1D log_tau_vb,
 55 |                     MapMat beta_vb,
 56 |                     MapMat mat_x_m1,
 57 |                     MapMat mat_z_mu,
 58 |                     MapArr2D mu_beta_vb,
 59 |                     const MapArr1D sig2_beta_vb,
 60 |                     const MapArr1D tau_vb) {
 61 | 
 62 | 
 63 |   mu_beta_vb = (X.transpose() * (Y - mat_x_m1 - mat_z_mu) +
 64 |     (Y.rows() - 1) * beta_vb).array().rowwise() * (sig2_beta_vb * tau_vb).transpose();
 65 | 
 66 |   gam_vb = exp(-logOnePlusExpMat(((mu_beta_vb.square().array().rowwise() / (-2 * sig2_beta_vb.transpose())).rowwise() -
 67 |     (log_tau_vb.transpose() / 2 + log(sig2_beta_vb).transpose() / 2)).colwise()
 68 |                                    + (log_1_min_om_vb - log_om_vb) - log_sig2_inv_vb / 2));
 69 | 
 70 |   beta_vb = mu_beta_vb * gam_vb;
 71 |   mat_x_m1 = X * beta_vb;
 72 | 
 73 | }
 74 | 
 75 | 
 76 | // for locus_probit_core function
 77 | // [[Rcpp::export]]
 78 | void coreProbitBatch(const MapMat X,
 79 |                      const MapMat W,
 80 |                      MapArr2D gam_vb,
 81 |                      const MapArr1D log_om_vb,
 82 |                      const MapArr1D log_1_min_om_vb,
 83 |                      const double log_sig2_inv_vb,
 84 |                      MapMat beta_vb,
 85 |                      MapMat mat_x_m1,
 86 |                      MapMat mat_z_mu,
 87 |                      MapArr2D mu_beta_vb,
 88 |                      const double sig2_beta_vb) {
 89 | 
 90 | 
 91 |   mu_beta_vb = sig2_beta_vb * (X.transpose() * (W - mat_x_m1 - mat_z_mu) +
 92 |     (W.rows() - 1) * beta_vb).array();
 93 | 
 94 |   gam_vb = exp(-logOnePlusExpMat((mu_beta_vb.square().array() / (-2 * sig2_beta_vb)).colwise() +
 95 |     (log_1_min_om_vb - log_om_vb) - log_sig2_inv_vb / 2 - log(sig2_beta_vb) / 2));
 96 | 
 97 |   beta_vb = mu_beta_vb * gam_vb;
 98 |   mat_x_m1 = X * beta_vb;
 99 | 
100 | }
101 | 
102 | 


--------------------------------------------------------------------------------
/src/coreLoop.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  *
  3 |  * This file is part of the `locus` R package:
  4 |  *     https://github.com/hruffieux/locus
  5 |  *
  6 |  * Functions for computationally expensive updates
  7 |  *
  8 |  * These functions use Eigen::Map to pass large matrices by reference from R.
  9 |  * Given dimensionalities involved in some applications, copying such matrices
 10 |  * would imply a prohibitive RAM overconsumption.
 11 |  *
 12 |  */
 13 | 
 14 | #include "utils.h"
 15 | 
 16 | 
 17 | // for locus_core function
 18 | // [[Rcpp::export]]
 19 | void coreLoop(const MapMat X,
 20 |               const MapMat Y,
 21 |               MapArr2D gam_vb,
 22 |               const MapArr1D log_om_vb,
 23 |               const MapArr1D log_1_min_om_vb,
 24 |               const double log_sig2_inv_vb,
 25 |               const MapArr1D log_tau_vb,
 26 |               MapMat beta_vb,
 27 |               MapMat mat_x_m1,
 28 |               MapArr2D mu_beta_vb,
 29 |               const MapArr1D sig2_beta_vb,
 30 |               const MapArr1D tau_vb,
 31 |               const MapArr1D shuffled_ind,
 32 |               const double c = 1) {
 33 | 
 34 |   const Arr1D cst = - (log_tau_vb + log_sig2_inv_vb + log(sig2_beta_vb)) / 2;
 35 | 
 36 |   for (int i = 0; i < X.cols(); ++i) {
 37 | 
 38 |     int j = shuffled_ind[i];
 39 | 
 40 |     mat_x_m1.noalias() -= X.col(j) * beta_vb.row(j);
 41 | 
 42 |     mu_beta_vb.row(j) = c * sig2_beta_vb * tau_vb *
 43 |       ((Y - mat_x_m1).transpose() * X.col(j)).array();
 44 | 
 45 |     gam_vb.row(j) = exp(-logOnePlusExp(c * (log_1_min_om_vb(j) - log_om_vb(j) -
 46 |       mu_beta_vb.row(j).transpose().square() / (2 * sig2_beta_vb) + cst)));
 47 | 
 48 |     beta_vb.row(j) = mu_beta_vb.row(j) * gam_vb.row(j);
 49 | 
 50 |     mat_x_m1.noalias() += X.col(j) * beta_vb.row(j);
 51 | 
 52 |   }
 53 | 
 54 | }
 55 | 
 56 | 
 57 | 
 58 | // for locus_z_core and locus_mix_core function
 59 | // [[Rcpp::export]]
 60 | void coreZLoop(const MapMat X,
 61 |                const MapMat Y,
 62 |                MapArr2D gam_vb,
 63 |                const MapArr1D log_om_vb,
 64 |                const MapArr1D log_1_min_om_vb,
 65 |                const double log_sig2_inv_vb,
 66 |                const MapArr1D log_tau_vb,
 67 |                MapMat beta_vb,
 68 |                MapMat mat_x_m1,
 69 |                MapMat mat_z_mu,
 70 |                MapArr2D mu_beta_vb,
 71 |                const MapArr1D sig2_beta_vb,
 72 |                const MapArr1D tau_vb,
 73 |                const MapArr1D shuffled_ind,
 74 |                const double c = 1) {
 75 | 
 76 |   const Arr1D cst = -(log_tau_vb + log_sig2_inv_vb + log(sig2_beta_vb) )/ 2;
 77 | 
 78 |   for (int i = 0; i < X.cols(); ++i) {
 79 | 
 80 |     int j = shuffled_ind[i];
 81 | 
 82 |     mat_x_m1.noalias() -= X.col(j) * beta_vb.row(j);
 83 | 
 84 |     mu_beta_vb.row(j) = c * sig2_beta_vb * tau_vb *
 85 |       ((Y - mat_x_m1 - mat_z_mu).transpose() * X.col(j)).array();
 86 | 
 87 |     gam_vb.row(j) = exp(-logOnePlusExp(c * (log_1_min_om_vb(j) - log_om_vb(j) -
 88 |       mu_beta_vb.row(j).transpose().square() / (2 * sig2_beta_vb) + cst)));
 89 | 
 90 |     beta_vb.row(j) = mu_beta_vb.row(j) * gam_vb.row(j);
 91 | 
 92 |     mat_x_m1.noalias() += X.col(j) * beta_vb.row(j);
 93 | 
 94 |   }
 95 | 
 96 | }
 97 | 
 98 | 
 99 | // for locus_logit_core function
100 | // [[Rcpp::export]]
101 | void coreLogitLoop(const MapMat X,
102 |                    const MapArr2D Y,
103 |                    MapArr2D gam_vb,
104 |                    const MapArr1D log_om_vb,
105 |                    const MapArr1D log_1_min_om_vb,
106 |                    const double log_sig2_inv_vb,
107 |                    MapMat beta_vb,
108 |                    MapArr2D mat_x_m1,
109 |                    MapArr2D mat_z_mu,
110 |                    MapArr2D mu_beta_vb,
111 |                    const MapArr2D psi_vb,
112 |                    const MapArr2D sig2_beta_vb,
113 |                    const MapArr1D shuffled_ind) {
114 | 
115 |   for (int i = 0; i < X.cols(); ++i) {
116 | 
117 |     int j = shuffled_ind[i];
118 | 
119 |     mat_x_m1.matrix().noalias() -= X.col(j) * beta_vb.row(j);
120 | 
121 |     mu_beta_vb.row(j) = sig2_beta_vb.row(j) * (X.col(j).transpose() * (Y - 2 * psi_vb * (mat_x_m1 + mat_z_mu)).matrix()).array();
122 | 
123 |     gam_vb.row(j) = exp(-logOnePlusExp(log_1_min_om_vb(j) - log_om_vb(j) -
124 |       log_sig2_inv_vb / 2 - mu_beta_vb.row(j).square() / (2 * sig2_beta_vb.row(j)) -
125 |       log(sig2_beta_vb.row(j)) / 2));
126 | 
127 |     beta_vb.row(j) = mu_beta_vb.row(j) * gam_vb.row(j);
128 | 
129 |     mat_x_m1.matrix().noalias() += X.col(j) * beta_vb.row(j);
130 | 
131 |   }
132 | 
133 | }
134 | 
135 | 
136 | 
137 | // for locus_probit_core function
138 | // [[Rcpp::export]]
139 | void coreProbitLoop(const MapMat X,
140 |                     const MapMat W,
141 |                     MapArr2D gam_vb,
142 |                     const MapArr1D log_om_vb,
143 |                     const MapArr1D log_1_min_om_vb,
144 |                     const double log_sig2_inv_vb,
145 |                     MapMat beta_vb,
146 |                     MapMat mat_x_m1,
147 |                     MapMat mat_z_mu,
148 |                     MapArr2D mu_beta_vb,
149 |                     const double sig2_beta_vb,
150 |                     const MapArr1D shuffled_ind) {
151 | 
152 |   const double cst = -(log_sig2_inv_vb + log(sig2_beta_vb) )/ 2;
153 | 
154 |   for (int i = 0; i < X.cols(); ++i) {
155 | 
156 |     int j = shuffled_ind[i];
157 | 
158 |     mat_x_m1.noalias() -= X.col(j) * beta_vb.row(j);
159 | 
160 |     mu_beta_vb.row(j) = sig2_beta_vb * ((W - mat_x_m1 - mat_z_mu).transpose() * X.col(j)).array();
161 | 
162 |     gam_vb.row(j) = exp(-logOnePlusExp(log_1_min_om_vb(j) - log_om_vb(j) -
163 |       mu_beta_vb.row(j).square() / (2 * sig2_beta_vb) + cst));
164 | 
165 |     beta_vb.row(j) = mu_beta_vb.row(j) * gam_vb.row(j);
166 | 
167 |     mat_x_m1.noalias() += X.col(j) * beta_vb.row(j);
168 | 
169 |   }
170 | 
171 | }
172 | 
173 | 


--------------------------------------------------------------------------------
/R/locus_struct_core.R:
--------------------------------------------------------------------------------
  1 | # This file is part of the `locus` R package:
  2 | #     https://github.com/hruffieux/locus
  3 | #
  4 | # Internal core function to call the variational algorithm for structured
  5 | # sparse regression with identity link, no fixed covariates.
  6 | # See help of `locus` function for details.
  7 | #
  8 | locus_struct_core_ <- function(Y, X, list_hyper, gam_vb, mu_beta_vb, sig2_beta_vb,
  9 |                                tau_vb, list_struct, tol, maxit, verbose, batch = "y",
 10 |                                full_output = FALSE, debug = FALSE) {
 11 | 
 12 |   # Y must have been centered, and X standardized.
 13 | 
 14 |   d <- ncol(Y)
 15 |   n <- nrow(Y)
 16 |   p <- ncol(X)
 17 |   vec_fac_st <- list_struct$vec_fac_st
 18 |   n_cpus <- list_struct$n_cpus
 19 | 
 20 |   with(list_hyper, { # list_init not used with the with() function to avoid
 21 |                      # copy-on-write for large objects
 22 | 
 23 |     # Parameter initialization here for the top level only
 24 |     #
 25 |     theta_vb <- m0
 26 | 
 27 | 
 28 |     # Covariate-specific parameters: objects derived from s02, list_struct (possible block-wise in parallel)
 29 |     #
 30 |     obj_theta_vb <- update_sig2_theta_vb_(d, p, list_struct, s02, X)
 31 | 
 32 |     list_S0_inv <- obj_theta_vb$S0_inv
 33 |     list_sig2_theta_vb <- obj_theta_vb$sig2_theta_vb
 34 |     vec_sum_log_det <- obj_theta_vb$vec_sum_log_det_theta
 35 | 
 36 |     vec_fac_st <- obj_theta_vb$vec_fac_st
 37 | 
 38 | 
 39 |     # Stored/precomputed objects
 40 |     #
 41 |     beta_vb <- update_beta_vb_(gam_vb, mu_beta_vb)
 42 |     m2_beta <- update_m2_beta_(gam_vb, mu_beta_vb, sig2_beta_vb, sweep = TRUE)
 43 | 
 44 |     mat_x_m1 <- update_mat_x_m1_(X, beta_vb)
 45 | 
 46 | 
 47 |     converged <- FALSE
 48 |     lb_new <- -Inf
 49 |     it <- 0
 50 | 
 51 |     while ((!converged) & (it < maxit)) {
 52 | 
 53 |       lb_old <- lb_new
 54 |       it <- it + 1
 55 | 
 56 |       if (verbose & (it == 1 | it %% 5 == 0))
 57 |         cat(paste0("Iteration ", format(it), "... \n"))
 58 | 
 59 |       # % #
 60 |       lambda_vb <- update_lambda_vb_(lambda, sum(gam_vb))
 61 |       nu_vb <- update_nu_vb_(nu, m2_beta, tau_vb)
 62 | 
 63 |       sig2_inv_vb <- lambda_vb / nu_vb
 64 |       # % #
 65 | 
 66 |       # % #
 67 |       eta_vb <- update_eta_vb_(n, eta, gam_vb)
 68 |       kappa_vb <- update_kappa_vb_(Y, kappa, mat_x_m1, beta_vb, m2_beta, sig2_inv_vb)
 69 | 
 70 |       tau_vb <- eta_vb / kappa_vb
 71 |       # % #
 72 | 
 73 |       sig2_beta_vb <- update_sig2_beta_vb_(n, sig2_inv_vb, tau_vb)
 74 | 
 75 |       log_tau_vb <- update_log_tau_vb_(eta_vb, kappa_vb)
 76 |       log_sig2_inv_vb <- update_log_sig2_inv_vb_(lambda_vb, nu_vb)
 77 | 
 78 | 
 79 |       # different possible batch-coordinate ascent schemes:
 80 | 
 81 |       if (batch == "y") { # optimal scheme
 82 | 
 83 |         log_Phi_theta_vb <- pnorm(theta_vb, log.p = TRUE)
 84 |         log_1_min_Phi_theta_vb <- pnorm(theta_vb, lower.tail = FALSE, log.p = TRUE)
 85 | 
 86 |         # C++ Eigen call for expensive updates
 87 |         shuffled_ind <- as.numeric(sample(0:(p-1))) # Zero-based index in C++
 88 | 
 89 |         coreStructLoop(X, Y, gam_vb, log_Phi_theta_vb, log_1_min_Phi_theta_vb,
 90 |                        log_sig2_inv_vb, log_tau_vb, beta_vb, mat_x_m1, mu_beta_vb,
 91 |                        sig2_beta_vb, tau_vb, shuffled_ind)
 92 | 
 93 |       } else if (batch == "0"){ # no batch, used only internally
 94 |                                 # schemes "x" of "x-y" are not batch concave
 95 |                                 # hence not implemented as they may diverge
 96 | 
 97 |         for (k in sample(1:d)) {
 98 | 
 99 |           for (j in sample(1:p)) {
100 | 
101 |             mat_x_m1[, k] <- mat_x_m1[, k] - X[, j] * beta_vb[j, k]
102 | 
103 |             mu_beta_vb[j, k] <- sig2_beta_vb[k] * tau_vb[k] * crossprod(Y[, k] - mat_x_m1[, k], X[, j])
104 | 
105 |             gam_vb[j, k] <- exp(-log_one_plus_exp_(pnorm(theta_vb[j], lower.tail = FALSE, log.p = TRUE) -
106 |                                                      pnorm(theta_vb[j], log.p = TRUE) -
107 |                                                      log_tau_vb[k] / 2 - log_sig2_inv_vb / 2 -
108 |                                                      mu_beta_vb[j, k] ^ 2 / (2 * sig2_beta_vb[k]) -
109 |                                                      log(sig2_beta_vb[k]) / 2))
110 | 
111 |             beta_vb[j, k] <- gam_vb[j, k] * mu_beta_vb[j, k]
112 | 
113 |             mat_x_m1[, k] <- mat_x_m1[, k] + X[, j] * beta_vb[j, k]
114 | 
115 |           }
116 |         }
117 | 
118 |       } else {
119 | 
120 |         stop ("Batch scheme not defined. Exit.")
121 | 
122 |       }
123 | 
124 |       m2_beta <- update_m2_beta_(gam_vb, mu_beta_vb, sig2_beta_vb, sweep = TRUE)
125 | 
126 | 
127 |       W <- update_W_struct_(gam_vb, theta_vb)
128 | 
129 |       theta_vb <- update_theta_vb_(W, m0, list_S0_inv, list_sig2_theta_vb, vec_fac_st)
130 | 
131 | 
132 |       lb_new <- elbo_struct_(Y, beta_vb, eta, eta_vb, gam_vb, kappa, kappa_vb, lambda,
133 |                              lambda_vb, m0, theta_vb, nu, nu_vb, sig2_beta_vb,
134 |                              list_S0_inv, list_sig2_theta_vb, sig2_inv_vb, tau_vb,
135 |                              m2_beta, mat_x_m1, vec_fac_st, vec_sum_log_det)
136 | 
137 |       if (verbose & (it == 1 | it %% 5 == 0))
138 |         cat(paste0("ELBO = ", format(lb_new), "\n\n"))
139 | 
140 |       if (debug && lb_new < lb_old)
141 |         stop("ELBO not increasing monotonically. Exit. ")
142 | 
143 |       converged <- (abs(lb_new - lb_old) < tol)
144 | 
145 |     }
146 | 
147 | 
148 |     if (verbose) {
149 |       if (converged) {
150 |         cat(paste0("Convergence obtained after ", format(it), " iterations. \n",
151 |                   "Optimal marginal log-likelihood variational lower bound ",
152 |                   "(ELBO) = ", format(lb_new), ". \n\n"))
153 |       } else {
154 |         warning("Maximal number of iterations reached before convergence. Exit.")
155 |       }
156 |     }
157 | 
158 |     lb_opt <- lb_new
159 | 
160 |     names_x <- colnames(X)
161 |     names_y <- colnames(Y)
162 |     
163 |     rownames(gam_vb) <- rownames(beta_vb) <- names_x
164 |     colnames(gam_vb) <- colnames(beta_vb) <- names_y
165 |     names(theta_vb) <- names_x
166 |     
167 |     diff_lb <- abs(lb_opt - lb_old)
168 |     
169 |     if (full_output) { # for internal use only
170 | 
171 |       create_named_list_(beta_vb, eta, eta_vb, gam_vb, kappa, kappa_vb, lambda,
172 |                          lambda_vb, m0, mu_beta_vb, theta_vb, nu, nu_vb, sig2_beta_vb,
173 |                          list_S0_inv, list_sig2_theta_vb, sig2_inv_vb, tau_vb,
174 |                          m2_beta, vec_fac_st, vec_sum_log_det, converged, it, 
175 |                          lb_opt, diff_lb)
176 | 
177 |     } else {
178 | 
179 |       create_named_list_(beta_vb, gam_vb, theta_vb, converged, it, lb_opt, diff_lb)
180 | 
181 |     }
182 |   })
183 | 
184 | }
185 | 
186 | 
187 | 
188 | # Internal function which implements the marginal log-likelihood variational
189 | # lower bound (ELBO) corresponding to the `locus_struct_core` algorithm.
190 | #
191 | elbo_struct_ <- function(Y, beta_vb, eta, eta_vb, gam_vb, kappa, kappa_vb, lambda,
192 |                          lambda_vb, m0, theta_vb, nu, nu_vb, sig2_beta_vb,
193 |                          list_S0_inv, list_sig2_theta_vb, sig2_inv_vb, tau_vb,
194 |                          m2_beta, mat_x_m1, vec_fac_st, vec_sum_log_det) {
195 | 
196 | 
197 |   n <- nrow(Y)
198 | 
199 |   # needed for monotonically increasing elbo.
200 |   #
201 |   eta_vb <- update_eta_vb_(n, eta, gam_vb)
202 |   kappa_vb <- update_kappa_vb_(Y, kappa, mat_x_m1, beta_vb, m2_beta, sig2_inv_vb)
203 | 
204 |   lambda_vb <- update_lambda_vb_(lambda, sum(gam_vb))
205 |   nu_vb <- update_nu_vb_(nu, m2_beta, tau_vb)
206 | 
207 |   log_tau_vb <- update_log_tau_vb_(eta_vb, kappa_vb)
208 |   log_sig2_inv_vb <- update_log_sig2_inv_vb_(lambda_vb, nu_vb)
209 | 
210 | 
211 |   elbo_A <- e_y_(n, kappa, kappa_vb, log_tau_vb, m2_beta, sig2_inv_vb, tau_vb)
212 | 
213 |   elbo_B <- e_beta_gamma_struct_(gam_vb, log_sig2_inv_vb, log_tau_vb,
214 |                                  theta_vb, m2_beta, sig2_beta_vb,
215 |                                  list_sig2_theta_vb, sig2_inv_vb, tau_vb)
216 | 
217 |   elbo_C <- e_theta_(m0, theta_vb, list_S0_inv, list_sig2_theta_vb, vec_fac_st, vec_sum_log_det)
218 | 
219 |   elbo_D <- e_tau_(eta, eta_vb, kappa, kappa_vb, log_tau_vb, tau_vb)
220 | 
221 |   elbo_E <- e_sig2_inv_(lambda, lambda_vb, log_sig2_inv_vb, nu, nu_vb, sig2_inv_vb)
222 | 
223 |   elbo_A + elbo_B + elbo_C + elbo_D + elbo_E
224 | 
225 | }
226 | 
227 | 


--------------------------------------------------------------------------------
/man/set_hyper.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/set_hyper_init.R
  3 | \name{set_hyper}
  4 | \alias{set_hyper}
  5 | \title{Gather model hyperparameters provided by the user.}
  6 | \usage{
  7 | set_hyper(
  8 |   d,
  9 |   p,
 10 |   lambda,
 11 |   nu,
 12 |   a,
 13 |   b,
 14 |   eta,
 15 |   kappa,
 16 |   link = "identity",
 17 |   ind_bin = NULL,
 18 |   q = NULL,
 19 |   phi = NULL,
 20 |   xi = NULL,
 21 |   m0 = NULL,
 22 |   s02 = NULL,
 23 |   G = NULL,
 24 |   struct = FALSE
 25 | )
 26 | }
 27 | \arguments{
 28 | \item{d}{Number of responses.}
 29 | 
 30 | \item{p}{Number of candidate predictors.}
 31 | 
 32 | \item{lambda}{Vector of length 1 providing the values of hyperparameter
 33 | \eqn{\lambda} for the prior distribution of \eqn{\sigma^{-2}}. \eqn{\sigma}
 34 | represents the typical size of nonzero effects.}
 35 | 
 36 | \item{nu}{Vector of length 1 providing the values of hyperparameter \eqn{\nu}
 37 | for the prior distribution of \eqn{\sigma^{-2}}. \eqn{\sigma} represents
 38 | the typical size of nonzero effects.}
 39 | 
 40 | \item{a}{Vector of length 1 or p providing the values of hyperparameter
 41 | \eqn{a} for the prior distributions for the proportion of responses
 42 | associated with each candidate predictor, \eqn{\omega} (vector of length p).
 43 | If of length 1, the provided value is repeated p times.}
 44 | 
 45 | \item{b}{Vector of length 1 or p providing the values of hyperparameter
 46 | \eqn{b} for the prior distributions for the proportion of responses
 47 | associated with each candidate predictor, \eqn{\omega} (vector of length p).
 48 | If of length 1, the provided value is repeated p times.}
 49 | 
 50 | \item{eta}{Vector of length 1 or d for \code{link = "identity"}, and of
 51 | length 1 or d_cont = d - length(ind_bin) (the number of continuous response
 52 | variables) for \code{link = "mix"}. Provides the values of
 53 | hyperparameter \eqn{\eta} for the prior distributions of the continuous
 54 | response residual precisions, \eqn{\tau}. If of length 1, the provided
 55 | value is repeated d, resp. d_cont, times. Must be \code{NULL} for
 56 | \code{link = "logit"} and \code{link = "probit"}.}
 57 | 
 58 | \item{kappa}{Vector of length 1 or d for \code{link = "identity"}, and of
 59 | length 1 or d_cont = d - length(ind_bin) (the number of continuous response
 60 | variables) for \code{link = "mix"}. Provides the values of hyperparameter
 61 | \eqn{\kappa} for the prior distributions of the response residual
 62 | precisions, \eqn{\tau}. If of length 1, the provided value is repeated d,
 63 | resp. d_cont, times. Must be \code{NULL} for \code{link = "logit"} and
 64 | \code{link = "probit"}.}
 65 | 
 66 | \item{link}{Response link. Must be "\code{identity}" for linear regression,
 67 | "\code{logit}" for logistic regression, "\code{probit}"
 68 | for probit regression, or "\code{mix}" for a mix of identity and probit
 69 | link functions (in this case, the indices of the binary responses must be
 70 | gathered in argument \code{ind_bin}, see below).}
 71 | 
 72 | \item{ind_bin}{If \code{link = "mix"}, vector of indices corresponding to
 73 | the binary variables in \code{Y}. Must be \code{NULL} if
 74 | \code{link != "mix"}.}
 75 | 
 76 | \item{q}{Number of covariates. Default is \code{NULL}, for \code{Z}
 77 | \code{NULL}.}
 78 | 
 79 | \item{phi}{Vector of length 1 or q providing the values of hyperparameter
 80 | \eqn{\phi} for the prior distributions for the sizes of the nonzero
 81 | covariate effects, \eqn{\zeta}. If of length 1, the provided value is
 82 | repeated q times. Default is \code{NULL}, for \code{Z} \code{NULL}.}
 83 | 
 84 | \item{xi}{Vector of length 1 or q providing the values of hyperparameter
 85 | \eqn{\xi} for the prior distributions for the sizes of the nonzero
 86 | covariate effects, \eqn{\zeta}. If of length 1, the provided value is
 87 | repeated q times. Default is \code{NULL}, for \code{Z} \code{NULL}.}
 88 | 
 89 | \item{m0}{Vector of length 1 or p. Hyperparameter when \code{list_struct} 
 90 | non-\code{NULL}. Default is \code{NULL}.}
 91 | 
 92 | \item{s02}{Variance hyperparameter when \code{list_struct} is 
 93 | non-\code{NULL}. Default is \code{NULL}.}
 94 | 
 95 | \item{G}{Number of candidate predictor groups when using the group selection
 96 | model from the \code{\link{locus}} function. Default is \code{NULL},
 97 | for no group selection.}
 98 | 
 99 | \item{struct}{Boolean indicating the use of structured sparse priors
100 | set through the \code{\link{set_struct}} function. Default is \code{FALSE},
101 | for no structured selection.}
102 | }
103 | \value{
104 | An object of class "\code{hyper}" preparing user hyperparameter in a
105 |   form that can be passed to the \code{\link{locus}} function.
106 | }
107 | \description{
108 | This function must be used to provide hyperparameter values for the model
109 | used in \code{\link{locus}}.
110 | }
111 | \details{
112 | The \code{\link{locus}} function can also be used with default hyperparameter
113 | choices (without using \code{\link{set_hyper}}) by setting its argument
114 | \code{list_hyper} to \code{NULL}.
115 | }
116 | \examples{
117 | seed <- 123; set.seed(seed)
118 | 
119 | ###################
120 | ## Simulate data ##
121 | ###################
122 | 
123 | ## Examples using small problem sizes:
124 | ##
125 | n <- 200; p <- 200; p0 <- 20; d <- 20; d0 <- 15; q <- 2
126 | 
127 | ## Candidate predictors (subject to selection)
128 | ##
129 | # Here we simulate common genetic variants (but any type of candidate
130 | # predictors can be supplied).
131 | # 0 = homozygous, major allele, 1 = heterozygous, 2 = homozygous, minor allele
132 | 
133 | X_act <- matrix(rbinom(n * p0, size = 2, p = 0.25), nrow = n)
134 | X_inact <- matrix(rbinom(n * (p - p0), size = 2, p = 0.25), nrow = n)
135 | 
136 | shuff_x_ind <- sample(p)
137 | X <- cbind(X_act, X_inact)[, shuff_x_ind]
138 | 
139 | bool_x_act <- shuff_x_ind <= p0
140 | 
141 | pat_act <- beta <- matrix(0, nrow = p0, ncol = d0)
142 | pat_act[sample(p0*d0, floor(p0*d0/5))] <- 1
143 | beta[as.logical(pat_act)] <-  rnorm(sum(pat_act))
144 | 
145 | ## Covariates (not subject to selection)
146 | ##
147 | Z <- matrix(rnorm(n * q), nrow = n)
148 | 
149 | alpha <-  matrix(rnorm(q * d), nrow = q)
150 | 
151 | ## Gaussian responses
152 | ##
153 | Y_act <- matrix(rnorm(n * d0, mean = X_act \%*\% beta, sd = 0.5), nrow = n)
154 | Y_inact <- matrix(rnorm(n * (d - d0), sd = 0.5), nrow = n)
155 | shuff_y_ind <- sample(d)
156 | Y <- cbind(Y_act, Y_inact)[, shuff_y_ind] + Z \%*\% alpha
157 | 
158 | ## Binary responses
159 | ##
160 | Y_bin <- ifelse(Y > 0, 1, 0)
161 | 
162 | ########################
163 | ## Infer associations ##
164 | ########################
165 | 
166 | ## Continuous responses
167 | ##
168 | 
169 | # No covariate
170 | #
171 | # a and b chosen so that the prior mean number of responses associated with
172 | # each candidate predictor is 1/4.
173 | list_hyper_g <- set_hyper(d, p, lambda = 1, nu = 1, a = 1, b = 4*d-1,
174 |                           eta = 1, kappa = apply(Y, 2, var),
175 |                           link = "identity")
176 | 
177 | # We take p0_av = p0 (known here); this choice may result in variable
178 | # selections that are (too) conservative in some cases. In practice, it is
179 | # advised to set p0_av as a slightly overestimated guess of p0, or perform
180 | # cross-validation using function `set_cv'.
181 | 
182 | vb_g <- locus(Y = Y, X = X, p0_av = p0, link = "identity",
183 |               list_hyper = list_hyper_g, user_seed = seed)
184 | 
185 | # With covariates
186 | #
187 | list_hyper_g_z <- set_hyper(d, p, lambda = 1, nu = 1, a = 1, b = 4*d-1,
188 |                             eta = 1, kappa = apply(Y, 2, var),
189 |                             link = "identity", q = q, phi = 1, xi = 1)
190 | 
191 | vb_g_z <- locus(Y = Y, X = X, p0_av = p0, Z = Z, link = "identity",
192 |                 list_hyper = list_hyper_g_z, user_seed = seed)
193 | 
194 | ## Binary responses
195 | ##
196 | list_hyper_logit <- set_hyper(d, p, lambda = 1, nu = 1, a = 1, b = 4*d-1,
197 |                               eta = NULL, kappa = NULL, link = "logit",
198 |                               q = q, phi = 1, xi = 1)
199 | 
200 | vb_logit <- locus(Y = Y_bin, X = X, p0_av = p0, Z = Z, link = "logit",
201 |                   list_hyper = list_hyper_logit, user_seed = seed)
202 | 
203 | list_hyper_probit <- set_hyper(d, p, lambda = 1, nu = 1, a = 1, b = 4*d-1,
204 |                                eta = NULL, kappa = NULL, link = "probit",
205 |                                q = q, phi = 1, xi = 1)
206 | 
207 | vb_probit <- locus(Y = Y_bin, X = X, p0_av = p0, Z = Z, link = "probit",
208 |                    list_hyper = list_hyper_probit, user_seed = seed)
209 | 
210 | 
211 | ## Mix of continuous and binary responses
212 | ##
213 | Y_mix <- cbind(Y, Y_bin)
214 | ind_bin <- (d+1):(2*d)
215 | 
216 | list_hyper_mix <- set_hyper(2*d, p, lambda = 1, nu = 1, a = 1, b = 8*d-1,
217 |                             eta = 1, kappa = apply(Y, 2, var), link = "mix",
218 |                             ind_bin = ind_bin, q = q, phi = 1, xi = 1)
219 | 
220 | vb_mix <- locus(Y = Y_mix, X = X, p0_av = p0, Z = Z, link = "mix",
221 |                 ind_bin = ind_bin, list_hyper = list_hyper_mix,
222 |                 user_seed = seed)
223 | 
224 | }
225 | \seealso{
226 | \code{\link{set_init}}, \code{\link{locus}}
227 | }
228 | 


--------------------------------------------------------------------------------
/R/elbo.R:
--------------------------------------------------------------------------------
  1 | # This file is part of the `locus` R package:
  2 | #     https://github.com/hruffieux/locus
  3 | #
  4 | # Internal functions gathering the ELBO terms common to core algorithms.
  5 | #
  6 | 
  7 | ############################################
  8 | ## E log p(alpha | rest) - E log q(alpha) ##
  9 | ############################################
 10 | 
 11 | e_alpha_ <- function(m2_alpha, log_tau_vb, log_zeta2_inv_vb, sig2_alpha_vb,
 12 |                      tau_vb, zeta2_inv_vb) {
 13 |   
 14 |   1 / 2 * sum(sweep( sweep( sweep( sweep(m2_alpha, 2, tau_vb, `*`), 1,
 15 |                                    -zeta2_inv_vb, `*`), 2, log_tau_vb, `+`), 1,
 16 |                      log_zeta2_inv_vb , `+`) + log(sig2_alpha_vb) + 1)
 17 |   
 18 |   
 19 | }
 20 | 
 21 | 
 22 | e_alpha_logit_ <- function(m2_alpha, log_zeta2_inv_vb, sig2_alpha_vb, zeta2_inv_vb) {
 23 |   
 24 |   1 / 2 * sum( sweep(-sweep(m2_alpha, 1, zeta2_inv_vb, `*`), 1,
 25 |                      log_zeta2_inv_vb, `+`) + log(sig2_alpha_vb) + 1)
 26 |   
 27 | }
 28 | 
 29 | 
 30 | e_alpha_probit_ <- function(m2_alpha, log_zeta2_inv_vb, sig2_alpha_vb, zeta2_inv_vb) {
 31 |   
 32 |   1 / 2 * sum(sweep(-sweep(m2_alpha, 1, zeta2_inv_vb, `*`), 1,
 33 |                     log_zeta2_inv_vb + log(sig2_alpha_vb), `+`) + 1)
 34 |   
 35 | }
 36 | 
 37 | 
 38 | ########################################################
 39 | ## E log p(beta, gamma | rest) - E log q(beta, gamma) ##
 40 | ########################################################
 41 | 
 42 | 
 43 | e_beta_gamma_ <- function(gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb,
 44 |                           log_tau_vb, m2_beta, sig2_beta_vb, sig2_inv_vb, tau_vb) {
 45 |   
 46 |   eps <- .Machine$double.eps # to control the argument of the log when gamma is very small
 47 |   sum(log_sig2_inv_vb * gam_vb / 2 + sweep(gam_vb, 2, log_tau_vb, `*`) / 2 -
 48 |         sweep(m2_beta, 2, tau_vb, `*`) * sig2_inv_vb / 2 +
 49 |         sweep(gam_vb, 1, log_om_vb, `*`) +
 50 |         sweep(1 - gam_vb, 1, log_1_min_om_vb, `*`) +
 51 |         1 / 2 * sweep(gam_vb, 2, log(sig2_beta_vb) + 1, `*`) -
 52 |         gam_vb * log(gam_vb + eps) - (1 - gam_vb) * log(1 - gam_vb + eps))
 53 |   
 54 | }
 55 | 
 56 | 
 57 | e_g_beta_gamma_ <- function(gam_vb, g_sizes, log_om_vb, log_1_min_om_vb,
 58 |                             log_sig2_inv_vb, log_tau_vb, list_m1_btb,
 59 |                             list_sig2_beta_star, sig2_inv_vb, tau_vb, vec_log_det) {
 60 |   
 61 |   eps <- .Machine$double.eps # to control the argument of the log when gamma is very small
 62 |   
 63 |   G <- length(list_m1_btb)
 64 |   
 65 |   sum(unlist(lapply(1:G, function(g) {
 66 |     
 67 |     sum(g_sizes[g] / 2 * gam_vb[g, ] * (log_sig2_inv_vb + log_tau_vb) -
 68 |           list_m1_btb[[g]] * tau_vb * sig2_inv_vb / 2 +
 69 |           gam_vb[g, ] * log_om_vb[g] + (1 - gam_vb[g, ]) * log_1_min_om_vb[g] +
 70 |           1 / 2 * gam_vb[g, ] * (vec_log_det[g] - g_sizes[g] * (log(tau_vb) - 1)) -
 71 |           gam_vb[g, ] * log(gam_vb[g, ] + eps) - (1 - gam_vb[g, ]) * log(1 - gam_vb[g, ] + eps))
 72 |   })))
 73 |   
 74 | }
 75 | 
 76 | 
 77 | e_beta_gamma_bin_ <- function(gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb,
 78 |                               m2_beta, sig2_beta_vb, sig2_inv_vb) {
 79 |   
 80 |   eps <- .Machine$double.eps # to control the argument of the log when gamma is very small
 81 |   sum(gam_vb * log_sig2_inv_vb / 2 - m2_beta * sig2_inv_vb / 2 +
 82 |         sweep(gam_vb, 1, log_om_vb, `*`) +
 83 |         sweep(1 - gam_vb, 1, log_1_min_om_vb, `*`) +
 84 |         gam_vb * (log(sig2_beta_vb) + 1) / 2 -
 85 |         gam_vb * log(gam_vb + eps) - (1 - gam_vb) * log(1 - gam_vb + eps))
 86 |   
 87 | }
 88 | 
 89 | 
 90 | e_beta_gamma_struct_ <- function(gam_vb, log_sig2_inv_vb, log_tau_vb, theta_vb,
 91 |                                  m2_beta, sig2_beta_vb, list_sig2_theta_vb,
 92 |                                  sig2_inv_vb, tau_vb) {
 93 |   
 94 |   eps <- .Machine$double.eps^0.75 # to control the argument of the log when gamma is very small
 95 |   
 96 |   diag_sig2_theta_vb <- unlist(lapply(list_sig2_theta_vb, diag))
 97 |   
 98 |   sum(log_sig2_inv_vb * gam_vb / 2 +
 99 |         sweep(gam_vb, 2, log_tau_vb, `*`) / 2 -
100 |         sweep(m2_beta, 2, tau_vb, `*`) * sig2_inv_vb / 2 +
101 |         sweep(gam_vb, 1, pnorm(theta_vb, log.p = TRUE), `*`) +
102 |         sweep(sweep((1 - gam_vb), 1, pnorm(theta_vb, lower.tail = FALSE, log.p = TRUE), `*`), 1, diag_sig2_theta_vb / 2, `-`) +
103 |         1 / 2 * sweep(gam_vb, 2, log(sig2_beta_vb) + 1, `*`) -
104 |         gam_vb * log(gam_vb + eps) - (1 - gam_vb) * log(1 - gam_vb + eps))
105 |   
106 | }
107 | 
108 | 
109 | 
110 | ############################################
111 | ## E log p(omega | rest) - E log q(omega) ##
112 | ############################################
113 | 
114 | e_omega_ <- function(a, a_vb, b, b_vb, log_om_vb, log_1_min_om_vb) {
115 |   
116 |   sum((a - a_vb) * log_om_vb + (b - b_vb) * log_1_min_om_vb - lbeta(a, b) + lbeta(a_vb, b_vb))
117 |   
118 | }
119 | 
120 | 
121 | ##################################################
122 | ## E log p(sig2_inv | rest) - E log q(sig2_inv) ##
123 | ##################################################
124 | 
125 | e_sig2_inv_ <- function(lambda, lambda_vb, log_sig2_inv_vb, nu, nu_vb, sig2_inv_vb) {
126 |   
127 |   (lambda - lambda_vb) * log_sig2_inv_vb - (nu - nu_vb) * sig2_inv_vb +
128 |     lambda * log(nu) - lambda_vb * log(nu_vb) - lgamma(lambda) + lgamma(lambda_vb)
129 |   
130 | }
131 | 
132 | 
133 | 
134 | ########################################
135 | ## E log p(tau | rest) - E log q(tau) ##
136 | ########################################
137 | 
138 | e_tau_ <- function(eta, eta_vb, kappa, kappa_vb, log_tau_vb, tau_vb) {
139 |   
140 |   sum((eta - eta_vb) * log_tau_vb - (kappa - kappa_vb) * tau_vb +
141 |         eta * log(kappa) - eta_vb * log(kappa_vb) - lgamma(eta) + lgamma(eta_vb))
142 |   
143 | }
144 | 
145 | 
146 | ############################################
147 | ## E log p(theta | rest) - E log q(theta) ##
148 | ############################################
149 | 
150 | # S0_inv is assumed to be block-diagonal
151 | e_theta_ <- function(m0, theta_vb, list_S0_inv, list_sig2_theta_vb, vec_fac_st, vec_sum_log_det) {
152 |   
153 |   if (is.null(vec_fac_st)) {
154 |     
155 |     p <- length(theta_vb)
156 |     
157 |     arg <- (vec_sum_log_det - # vec_sum_log_det[bl] = log(det(S0_inv_bl)) + log(det(sig2_theta_vb_bl))
158 |               list_S0_inv * crossprod(theta_vb - m0) -
159 |               p * list_S0_inv * list_sig2_theta_vb + p) / 2 # trace of a product
160 |     
161 |   } else {
162 |     
163 |     bl_ids <- unique(vec_fac_st)
164 |     n_bl <- length(list_S0_inv)
165 |     
166 |     arg <- unlist(lapply(1:n_bl, function(bl) {
167 |       
168 |       theta_vb_bl <- theta_vb[vec_fac_st == bl_ids[bl]]
169 |       m0_bl <- m0[vec_fac_st == bl_ids[bl]]
170 |       S0_inv_bl <- list_S0_inv[[bl]]
171 |       sig2_theta_vb_bl <- list_sig2_theta_vb[[bl]]
172 |       
173 |       (vec_sum_log_det[bl] - # vec_sum_log_det[bl] = log(det(S0_inv_bl)) + log(det(sig2_theta_vb_bl))
174 |         crossprod((theta_vb_bl - m0_bl),
175 |                   S0_inv_bl %*% (theta_vb_bl - m0_bl)) -
176 |         sum(S0_inv_bl * sig2_theta_vb_bl) + ncol(S0_inv_bl)) / 2 # trace of a product
177 |     }))
178 |   }
179 |   
180 |   sum(arg)
181 |   
182 | }
183 | 
184 | 
185 | #######################
186 | ## E log p(y | rest) ##
187 | #######################
188 | 
189 | e_y_ <- function(n, kappa, kappa_vb, log_tau_vb, m2_beta, sig2_inv_vb, tau_vb,
190 |                  m2_alpha = NULL, zeta2_inv_vb = NULL) {
191 |   
192 |   arg <- -n / 2 * log(2 * pi) + n / 2 * log_tau_vb - tau_vb *
193 |     (kappa_vb - colSums(m2_beta) * sig2_inv_vb / 2 - kappa)
194 |   
195 |   if (!is.null(m2_alpha))
196 |     arg <- arg + tau_vb * crossprod(m2_alpha, zeta2_inv_vb) / 2
197 |   
198 |   sum(arg)
199 |   
200 | }
201 | 
202 | e_g_y_ <- function(n, kappa, kappa_vb, list_m1_btb, log_tau_vb, sig2_inv_vb, tau_vb) {
203 |   
204 |   sum(-n / 2 * log(2 * pi) + n / 2 * log_tau_vb - tau_vb *
205 |         (kappa_vb - Reduce(`+`, list_m1_btb) * sig2_inv_vb / 2 - kappa))
206 |   
207 | }
208 | 
209 | 
210 | e_y_logit_ <- function(X, Y, Z, chi_vb, beta_vb, m2_alpha, m2_beta, mat_x_m1,
211 |                        mat_z_mu, alpha_vb, psi_vb) {
212 |   
213 |   sum(log_sigmoid_(chi_vb) + Y * (mat_x_m1 + mat_z_mu)  -  chi_vb / 2 -
214 |         psi_vb * (X^2 %*% m2_beta + mat_x_m1^2 - X^2 %*% beta_vb^2 +
215 |                     Z^2 %*% m2_alpha + mat_z_mu^2 - Z^2 %*% alpha_vb^2 +
216 |                     2 * mat_x_m1 * mat_z_mu - chi_vb^2))
217 |   
218 | }
219 | 
220 | 
221 | e_y_probit_ <- function(X, Y, Z, beta_vb, m2_beta, mat_x_m1, mat_z_mu,
222 |                         sig2_alpha_vb, sweep = TRUE) {
223 |   
224 |   U <- mat_x_m1 + mat_z_mu
225 |   
226 |   arg <- Y * pnorm(U, log.p = TRUE) +
227 |     (1 - Y) * pnorm(U, lower.tail = FALSE, log.p = TRUE) -
228 |     X^2 %*% (m2_beta - beta_vb^2) / 2
229 |   
230 |   if (sweep)
231 |     arg <- sweep(arg, 1, Z^2 %*% sig2_alpha_vb / 2, `-`)
232 |   else
233 |     arg <- arg - Z^2 %*% sig2_alpha_vb / 2
234 |   
235 |   sum(arg)
236 |   
237 | }
238 | 
239 | 
240 | ####################################################
241 | ## E log p(zeta2_inv | rest) - E log q(zeta2_inv) ##
242 | ####################################################
243 | 
244 | e_zeta2_inv_ <- function(log_zeta2_inv_vb, phi, phi_vb, xi, xi_vb, zeta2_inv_vb) {
245 |   
246 |   sum((phi - phi_vb) * log_zeta2_inv_vb - (xi - xi_vb) * zeta2_inv_vb +
247 |         phi * log(xi) - phi_vb * log(xi_vb) - lgamma(phi) + lgamma(phi_vb))
248 |   
249 | }
250 | 


--------------------------------------------------------------------------------
/man/set_init.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/set_hyper_init.R
  3 | \name{set_init}
  4 | \alias{set_init}
  5 | \title{Gather initial variational parameters provided by the user.}
  6 | \usage{
  7 | set_init(
  8 |   d,
  9 |   p,
 10 |   gam_vb,
 11 |   mu_beta_vb,
 12 |   sig2_beta_vb,
 13 |   tau_vb,
 14 |   link = "identity",
 15 |   ind_bin = NULL,
 16 |   q = NULL,
 17 |   alpha_vb = NULL,
 18 |   sig2_alpha_vb = NULL,
 19 |   sig2_inv_vb = NULL,
 20 |   G = NULL
 21 | )
 22 | }
 23 | \arguments{
 24 | \item{d}{Number of responses.}
 25 | 
 26 | \item{p}{Number of candidate predictors.}
 27 | 
 28 | \item{gam_vb}{Matrix of size p x d with initial values for the variational
 29 | parameter yielding posterior probabilities of inclusion.}
 30 | 
 31 | \item{mu_beta_vb}{Matrix of size p x d with initial values for the
 32 | variational parameter yielding regression coefficient estimates for
 33 | predictor-response pairs included in the model.}
 34 | 
 35 | \item{sig2_beta_vb}{Vector of length d, for \code{link = "identity"} and
 36 | for \code{link = "mix"}, of length 1 for \code{link = "probit"}, and a
 37 | matrix of size p x d, for \code{link = "logit"}, with initial values for
 38 | the variational parameter yielding estimates of effect variances for
 39 | predictor-response pairs included in the model. For
 40 | \code{link = "identity"} and \code{link = "mix"}, these values are the same
 41 | for all the predictors (as a result of the predictor variables being
 42 | standardized before the variational algorithm). For \code{link = "probit"},
 43 | they are the same for all the predictors and responses.}
 44 | 
 45 | \item{tau_vb}{Vector of length d, for \code{link = "identity"}, and of
 46 | length d_cont = d - length(ind_bin) (number of continuous responses), for
 47 | \code{link = "mix"}, with initial values for the variational parameter
 48 | yielding estimates for the continuous response residual precisions. Must be
 49 | \code{NULL} for \code{link = "logit"} and \code{link = "probit"}.}
 50 | 
 51 | \item{link}{Response link. Must be "\code{identity}" for linear regression,
 52 | "\code{logit}" for logistic regression, "\code{probit}" for probit
 53 | regression, or "\code{mix}" for a mix of identity and probit link functions
 54 | (in this case, the indices of the binary responses must be gathered in
 55 | argument \code{ind_bin}, see below).}
 56 | 
 57 | \item{ind_bin}{If \code{link = "mix"}, vector of indices corresponding to the
 58 | binary variables in \code{Y}. Must be \code{NULL} if \code{link != "mix"}.}
 59 | 
 60 | \item{q}{Number of covariates. Default is \code{NULL}, for \code{Z}
 61 | \code{NULL}.}
 62 | 
 63 | \item{alpha_vb}{Matrix of size q x d with initial values for the
 64 | variational parameter yielding regression coefficient estimates for
 65 | covariate-response pairs. Default is \code{NULL}, for \code{Z} \code{NULL}.}
 66 | 
 67 | \item{sig2_alpha_vb}{Matrix of size q x d for \code{link = "identity"},
 68 | for \code{link = "logit"} and for \code{link = "mix"} with initial values
 69 | for the variational parameter yielding estimates of effect variances for
 70 | covariate-response pairs. Vector of length q for \code{link = "probit"}.
 71 | Default is \code{NULL}, for \code{Z} \code{NULL}.}
 72 | 
 73 | \item{sig2_inv_vb}{Initial parameters necessary when \code{G} is
 74 | non-\code{NULL}. Its inverse square root corresponds to the typical size of
 75 | non-zero effects. Must be \code{NULL} if \code{G} is \code{NULL}.}
 76 | 
 77 | \item{G}{Number of candidate predictor groups when using the group selection
 78 | model from the \code{\link{locus}} function. Default is \code{NULL},
 79 | for no group selection.}
 80 | }
 81 | \value{
 82 | An object of class "\code{init}" preparing user initial values for
 83 |   the variational parameters in a form that can be passed to the
 84 |   \code{\link{locus}} function.
 85 | }
 86 | \description{
 87 | This function must be used to provide initial values for the variational
 88 | parameters used in \code{\link{locus}}.
 89 | }
 90 | \details{
 91 | The \code{\link{locus}} function can also be used with default initial
 92 | parameter choices (without using \code{\link{set_init}}) by setting
 93 | its argument \code{list_init} to \code{NULL}.
 94 | }
 95 | \examples{
 96 | seed <- 123; set.seed(seed)
 97 | 
 98 | ###################
 99 | ## Simulate data ##
100 | ###################
101 | 
102 | ## Examples using small problem sizes:
103 | ##
104 | n <- 200; p <- 200; p0 <- 20; d <- 20; d0 <- 15; q <- 2
105 | 
106 | ## Candidate predictors (subject to selection)
107 | ##
108 | # Here we simulate common genetic variants (but any type of candidate
109 | # predictors can be supplied).
110 | # 0 = homozygous, major allele, 1 = heterozygous, 2 = homozygous, minor allele
111 | 
112 | X_act <- matrix(rbinom(n * p0, size = 2, p = 0.25), nrow = n)
113 | X_inact <- matrix(rbinom(n * (p - p0), size = 2, p = 0.25), nrow = n)
114 | 
115 | shuff_x_ind <- sample(p)
116 | X <- cbind(X_act, X_inact)[, shuff_x_ind]
117 | 
118 | bool_x_act <- shuff_x_ind <= p0
119 | 
120 | pat_act <- beta <- matrix(0, nrow = p0, ncol = d0)
121 | pat_act[sample(p0*d0, floor(p0*d0/5))] <- 1
122 | beta[as.logical(pat_act)] <-  rnorm(sum(pat_act))
123 | 
124 | ## Covariates (not subject to selection)
125 | ##
126 | Z <- matrix(rnorm(n * q), nrow = n)
127 | 
128 | alpha <-  matrix(rnorm(q * d), nrow = q)
129 | 
130 | ## Gaussian responses
131 | ##
132 | Y_act <- matrix(rnorm(n * d0, mean = X_act \%*\% beta, sd = 0.5), nrow = n)
133 | Y_inact <- matrix(rnorm(n * (d - d0), sd = 0.5), nrow = n)
134 | shuff_y_ind <- sample(d)
135 | Y <- cbind(Y_act, Y_inact)[, shuff_y_ind] + Z \%*\% alpha
136 | 
137 | ## Binary responses
138 | ##
139 | Y_bin <- ifelse(Y > 0, 1, 0)
140 | 
141 | ########################
142 | ## Infer associations ##
143 | ########################
144 | 
145 | ## Continuous responses
146 | ##
147 | 
148 | # No covariate
149 | #
150 | # gam_vb chosen so that the prior mean number of responses associated with
151 | # each candidate predictor is 1/4.
152 | gam_vb <- matrix(rbeta(p * d, shape1 = 1, shape2 = 4*d-1), nrow = p)
153 | mu_beta_vb <- matrix(rnorm(p * d), nrow = p)
154 | tau_vb <- 1 / apply(Y, 2, var)
155 | sig2_beta_vb <- 1 / rgamma(d, shape = 2, rate = 1 / tau_vb)
156 | 
157 | list_init_g <- set_init(d, p, gam_vb, mu_beta_vb, sig2_beta_vb, tau_vb,
158 |                         link = "identity")
159 | 
160 | # We take p0_av = p0 (known here); this choice may result in variable
161 | # selections that are (too) conservative in some cases. In practice, it is
162 | # advised to set p0_av as a slightly overestimated guess of p0, or perform
163 | # cross-validation using function `set_cv'.
164 | 
165 | vb_g <- locus(Y = Y, X = X, p0_av = p0, link = "identity",
166 |               list_init = list_init_g)
167 | 
168 | # With covariates
169 | #
170 | alpha_vb <- matrix(rnorm(q * d), nrow = q)
171 | sig2_alpha_vb <- 1 / matrix(rgamma(q * d, shape = 2, rate = 1), nrow = q)
172 | 
173 | list_init_g_z <- set_init(d, p, gam_vb, mu_beta_vb, sig2_beta_vb, tau_vb,
174 |                           link = "identity", q = q,
175 |                           alpha_vb = alpha_vb,
176 |                           sig2_alpha_vb = sig2_alpha_vb)
177 | 
178 | vb_g_z <- locus(Y = Y, X = X, p0_av = p0, Z = Z, link = "identity",
179 |                 list_init = list_init_g_z)
180 | 
181 | ## Binary responses
182 | ##
183 | # gam_vb chosen so that the prior mean number of responses associated with
184 | # each candidate predictor is 1/4.
185 | sig2_beta_vb_logit <- 1 / t(replicate(p, rgamma(d, shape = 2, rate = 1)))
186 | 
187 | list_init_logit <- set_init(d, p, gam_vb, mu_beta_vb, sig2_beta_vb_logit,
188 |                             tau_vb = NULL, link = "logit", q = q,
189 |                             alpha_vb = alpha_vb,
190 |                             sig2_alpha_vb = sig2_alpha_vb)
191 | 
192 | vb_logit <- locus(Y = Y_bin, X = X, p0_av = p0, Z = Z, link = "logit",
193 |                   list_init = list_init_logit)
194 | 
195 | sig2_alpha_vb_probit <- sig2_alpha_vb[, 1]
196 | sig2_beta_vb_probit <- sig2_beta_vb[1]
197 | list_init_probit <- set_init(d, p, gam_vb, mu_beta_vb, sig2_beta_vb_probit,
198 |                              tau_vb = NULL, link = "probit", q = q,
199 |                              alpha_vb = alpha_vb,
200 |                              sig2_alpha_vb = sig2_alpha_vb_probit)
201 | 
202 | vb_probit <- locus(Y = Y_bin, X = X, p0_av = p0, Z = Z, link = "probit",
203 |                    list_init = list_init_probit)
204 | 
205 | ## Mix of continuous and binary responses
206 | ##
207 | Y_mix <- cbind(Y, Y_bin)
208 | ind_bin <- (d+1):(2*d)
209 | 
210 | # gam_vb chosen so that the prior mean number of responses associated with
211 | # each candidate predictor is 1/4.
212 | gam_vb_mix <- matrix(rbeta(p * 2*d, shape1 = 1, shape2 = 8*d-1), nrow = p)
213 | mu_beta_vb_mix <- matrix(rnorm(p * 2*d), nrow = p)
214 | sig2_beta_vb_mix <- 1 / c(rgamma(d, shape = 2, rate = 1 / tau_vb),
215 |                           rgamma(d, shape = 2, rate = 1))
216 | alpha_vb_mix <- matrix(rnorm(q * 2*d), nrow = q)
217 | sig2_alpha_vb_mix <- 1 / matrix(rgamma(q * 2*d, shape = 2, rate = 1), nrow = q)
218 | 
219 | list_init_mix <- set_init(2*d, p, gam_vb_mix, mu_beta_vb_mix,
220 |                           sig2_beta_vb_mix, tau_vb, link = "mix",
221 |                           ind_bin = ind_bin, q = q,
222 |                           alpha_vb = alpha_vb_mix,
223 |                           sig2_alpha_vb = sig2_alpha_vb_mix)
224 | 
225 | vb_mix <- locus(Y = Y_mix, X = X, p0_av = p0, Z = Z, link = "mix",
226 |                 ind_bin = ind_bin, list_init = list_init_mix)
227 | 
228 | }
229 | \seealso{
230 | \code{\link{set_hyper}}, \code{\link{locus}}
231 | }
232 | 


--------------------------------------------------------------------------------
/R/locus_core.R:
--------------------------------------------------------------------------------
  1 | # This file is part of the `locus` R package:
  2 | #     https://github.com/hruffieux/locus
  3 | #
  4 | # Internal core function to call the variational algorithm for identity link, no
  5 | # fixed covariates and no external annotation variables.
  6 | # See help of `locus` function for details.
  7 | #
  8 | locus_core_ <- function(Y, X, list_hyper, gam_vb, mu_beta_vb, sig2_beta_vb,
  9 |                         tau_vb, tol, maxit, anneal, verbose, batch = "y",
 10 |                         full_output = FALSE, debug = TRUE, checkpoint_path = NULL) {
 11 | 
 12 | 
 13 |   # Y must have been centered, and X, standardized.
 14 | 
 15 |   d <- ncol(Y)
 16 |   n <- nrow(Y)
 17 |   p <- ncol(X)
 18 | 
 19 | 
 20 |   # Preparing annealing if any
 21 |   #
 22 |   if (is.null(anneal)) {
 23 |     annealing <- FALSE
 24 |     c <- 1
 25 |   } else {
 26 |     annealing <- TRUE
 27 |     ladder <- get_annealing_ladder_(anneal, verbose)
 28 |     c <- ladder[1]
 29 |   }
 30 | 
 31 |   eps <- .Machine$double.eps^0.5
 32 | 
 33 |   with(list_hyper, { # list_init not used with the with() function to avoid
 34 |                      # copy-on-write for large objects
 35 | 
 36 |     beta_vb <- update_beta_vb_(gam_vb, mu_beta_vb)
 37 |     m2_beta <- update_m2_beta_(gam_vb, mu_beta_vb, sig2_beta_vb, sweep = TRUE)
 38 | 
 39 |     mat_x_m1 <- update_mat_x_m1_(X, beta_vb)
 40 | 
 41 |     rs_gam <- rowSums(gam_vb)
 42 |     sum_gam <- sum(rs_gam)
 43 | 
 44 |     converged <- FALSE
 45 |     lb_new <- -Inf
 46 |     it <- 0
 47 | 
 48 | 
 49 |     while ((!converged) & (it < maxit)) {
 50 | 
 51 |       lb_old <- lb_new
 52 |       it <- it + 1
 53 | 
 54 |       if (verbose & (it == 1 | it %% 5 == 0))
 55 |         cat(paste0("Iteration ", format(it), "... \n"))
 56 | 
 57 |       digam_sum <- digamma(c * (a + b + d) - 2 * c + 2)
 58 | 
 59 |       # % #
 60 |       lambda_vb <- update_lambda_vb_(lambda, sum_gam, c = c)
 61 |       nu_vb <- update_nu_vb_(nu, m2_beta, tau_vb, c = c)
 62 | 
 63 |       sig2_inv_vb <- lambda_vb / nu_vb
 64 |       # % #
 65 | 
 66 |       # % #
 67 |       eta_vb <- update_eta_vb_(n, eta, gam_vb, c = c)
 68 |       kappa_vb <- update_kappa_vb_(Y, kappa, mat_x_m1, beta_vb, m2_beta, sig2_inv_vb, c = c)
 69 | 
 70 |       tau_vb <- eta_vb / kappa_vb
 71 |       # % #
 72 | 
 73 |       sig2_beta_vb <- update_sig2_beta_vb_(n, sig2_inv_vb, tau_vb, c = c)
 74 | 
 75 |       log_tau_vb <- update_log_tau_vb_(eta_vb, kappa_vb)
 76 |       log_sig2_inv_vb <- update_log_sig2_inv_vb_(lambda_vb, nu_vb)
 77 | 
 78 | 
 79 |       # different possible batch-coordinate ascent schemes:
 80 | 
 81 |       if (batch == "y") { # optimal scheme
 82 | 
 83 |         log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam, c = c)
 84 |         log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam, c = c)
 85 | 
 86 | 
 87 |         # C++ Eigen call for expensive updates
 88 |         shuffled_ind <- as.numeric(sample(0:(p-1))) # Zero-based index in C++
 89 | 
 90 |         coreLoop(X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb,
 91 |                  log_tau_vb, beta_vb, mat_x_m1, mu_beta_vb, sig2_beta_vb,
 92 |                  tau_vb, shuffled_ind, c = c)
 93 | 
 94 | 
 95 |         rs_gam <- rowSums(gam_vb)
 96 | 
 97 |       } else if (batch == "x") { # used only internally, convergence not ensured
 98 | 
 99 |         log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam, c = c)
100 |         log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam, c = c)
101 | 
102 |         for (k in sample(1:d)) {
103 | 
104 |           mu_beta_vb[, k] <- c * sig2_beta_vb[k] * tau_vb[k] *
105 |             (crossprod(Y[, k] - mat_x_m1[, k], X) + (n - 1) * beta_vb[, k])
106 | 
107 | 
108 |           gam_vb[, k] <- exp(-log_one_plus_exp_(c * (log_1_min_om_vb - log_om_vb -
109 |                                                        log_tau_vb[k] / 2 - log_sig2_inv_vb / 2 -
110 |                                                        mu_beta_vb[, k] ^ 2 / (2 * sig2_beta_vb[k]) -
111 |                                                   log(sig2_beta_vb[k]) / 2)))
112 | 
113 |           beta_vb[, k] <- mu_beta_vb[, k] * gam_vb[, k]
114 | 
115 |           mat_x_m1[, k] <- X %*% beta_vb[, k]
116 | 
117 |         }
118 | 
119 |         rs_gam <- rowSums(gam_vb)
120 | 
121 |       } else if (batch == "x-y") { # used only internally, convergence not ensured
122 | 
123 |         if (annealing)
124 |           stop("Annealing not implemented for this scheme. Exit.")
125 | 
126 |         log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam, c = c)
127 |         log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam, c = c)
128 | 
129 |         # C++ Eigen call for expensive updates
130 |         coreBatch(X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb,
131 |                   log_tau_vb, beta_vb, mat_x_m1, mu_beta_vb, sig2_beta_vb, tau_vb)
132 | 
133 |         rs_gam <- rowSums(gam_vb)
134 | 
135 |       } else if (batch == "0") { # no batch, used only internally
136 | 
137 |         for (k in sample(1:d)) {
138 | 
139 |           log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam, c = c)
140 |           log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam, c = c)
141 | 
142 |           for (j in sample(1:p)) {
143 | 
144 |             mat_x_m1[, k] <- mat_x_m1[, k] - X[, j] * beta_vb[j, k]
145 | 
146 |             mu_beta_vb[j, k] <- c * sig2_beta_vb[k] * tau_vb[k] * crossprod(Y[, k] - mat_x_m1[, k], X[, j])
147 | 
148 |             gam_vb[j, k] <- exp(-log_one_plus_exp_(c * (log_1_min_om_vb[j] - log_om_vb[j] -
149 |                                                           log_tau_vb[k] / 2 - log_sig2_inv_vb / 2 -
150 |                                                           mu_beta_vb[j, k] ^ 2 / (2 * sig2_beta_vb[k]) -
151 |                                                      log(sig2_beta_vb[k]) / 2)))
152 | 
153 |             beta_vb[j, k] <- mu_beta_vb[j, k] * gam_vb[j, k]
154 | 
155 |             mat_x_m1[, k] <- mat_x_m1[, k] + X[, j] * beta_vb[j, k]
156 | 
157 |           }
158 | 
159 |           rs_gam <- rowSums(gam_vb)
160 | 
161 |         }
162 | 
163 |       } else {
164 | 
165 |         stop ("Batch scheme not defined. Exit.")
166 | 
167 |       }
168 | 
169 |       m2_beta <- update_m2_beta_(gam_vb, mu_beta_vb, sig2_beta_vb, sweep = TRUE)
170 | 
171 |       a_vb <- update_a_vb(a, rs_gam, c = c)
172 |       b_vb <- update_b_vb(b, d, rs_gam, c = c)
173 |       om_vb <- a_vb / (a_vb + b_vb)
174 | 
175 |       sum_gam <- sum(rs_gam)
176 | 
177 |       if (annealing) {
178 | 
179 |         if (verbose & (it == 1 | it %% 5 == 0))
180 |           cat(paste0("Temperature = ", format(1 / c, digits = 4), "\n\n"))
181 | 
182 |         c <- ifelse(it < length(ladder), ladder[it + 1], 1)
183 | 
184 |         if (isTRUE(all.equal(c, 1))) {
185 | 
186 |           annealing <- FALSE
187 | 
188 |           if (verbose)
189 |             cat("** Exiting annealing mode. **\n\n")
190 |         }
191 | 
192 |       } else {
193 | 
194 | 
195 |         lb_new <- elbo_(Y, a, a_vb, b, b_vb, beta_vb, eta, gam_vb, kappa, 
196 |                         lambda, nu, sig2_beta_vb, sig2_inv_vb, tau_vb, m2_beta,
197 |                         mat_x_m1, sum_gam)
198 | 
199 |         if (verbose & (it == 1 | it %% 5 == 0))
200 |           cat(paste0("ELBO = ", format(lb_new), "\n\n"))
201 | 
202 |         if (debug && lb_new + eps < lb_old)
203 |           stop("ELBO not increasing monotonically. Exit. ")
204 | 
205 |         converged <- (abs(lb_new - lb_old) < tol)
206 |         
207 |         checkpoint_(it, checkpoint_path, gam_vb, converged, lb_new, lb_old, 
208 |                     om_vb = om_vb)
209 | 
210 |       }
211 | 
212 | 
213 |     }
214 |     
215 |     checkpoint_clean_up_(checkpoint_path)
216 | 
217 | 
218 |     if (verbose) {
219 |       if (converged) {
220 |         cat(paste0("Convergence obtained after ", format(it), " iterations. \n",
221 |                   "Optimal marginal log-likelihood variational lower bound ",
222 |                   "(ELBO) = ", format(lb_new), ". \n\n"))
223 |       } else {
224 |         warning("Maximal number of iterations reached before convergence. Exit.")
225 |       }
226 |     }
227 | 
228 |     lb_opt <- lb_new
229 | 
230 |     
231 |     names_x <- colnames(X)
232 |     names_y <- colnames(Y)
233 |     
234 |     rownames(gam_vb) <- rownames(beta_vb) <- names_x
235 |     colnames(gam_vb) <- colnames(beta_vb) <- names_y
236 |     names(om_vb) <- names_x
237 |     
238 |     diff_lb <- abs(lb_opt - lb_old)
239 |     
240 |     annealing <- ifelse(is.null(anneal), FALSE, anneal[1])
241 |     
242 |     if (full_output) { # for internal use only
243 |       
244 |       create_named_list_(a, a_vb, b, b_vb, beta_vb, eta, gam_vb, kappa, lambda,
245 |                          mu_beta_vb, nu, om_vb, sig2_beta_vb, sig2_inv_vb, tau_vb, 
246 |                          m2_beta, mat_x_m1, sum_gam, converged, it, lb_opt, 
247 |                          diff_lb, annealing)
248 |     } else {
249 |      
250 |       create_named_list_(beta_vb, gam_vb, om_vb, converged, it, lb_opt, diff_lb, 
251 |                          annealing)
252 |     }
253 |   })
254 | 
255 | }
256 | 
257 | 
258 | # Internal function which implements the marginal log-likelihood variational
259 | # lower bound (ELBO) corresponding to the `locus_core` algorithm.
260 | #
261 | elbo_ <- function(Y, a, a_vb, b, b_vb, beta_vb, eta, gam_vb, kappa, lambda, nu,
262 |                   sig2_beta_vb, sig2_inv_vb, tau_vb, m2_beta, mat_x_m1, sum_gam) {
263 | 
264 |   n <- nrow(Y)
265 | 
266 |   eta_vb <- update_eta_vb_(n, eta, gam_vb)
267 |   kappa_vb <- update_kappa_vb_(Y, kappa, mat_x_m1, beta_vb, m2_beta, sig2_inv_vb)
268 | 
269 |   lambda_vb <- update_lambda_vb_(lambda, sum_gam)
270 |   nu_vb <- update_nu_vb_(nu, m2_beta, tau_vb)
271 | 
272 |   log_tau_vb <- digamma(eta_vb) - log(kappa_vb)
273 |   log_sig2_inv_vb <- digamma(lambda_vb) - log(nu_vb)
274 |   log_om_vb <- digamma(a_vb) - digamma(a_vb + b_vb)
275 |   log_1_min_om_vb <- digamma(b_vb) - digamma(a_vb + b_vb)
276 | 
277 | 
278 |   elbo_A <- e_y_(n, kappa, kappa_vb, log_tau_vb, m2_beta, sig2_inv_vb, tau_vb)
279 | 
280 |   elbo_B <- e_beta_gamma_(gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb,
281 |                           log_tau_vb, m2_beta, sig2_beta_vb, sig2_inv_vb, tau_vb)
282 | 
283 |   elbo_C <- e_tau_(eta, eta_vb, kappa, kappa_vb, log_tau_vb, tau_vb)
284 | 
285 |   elbo_D <- e_sig2_inv_(lambda, lambda_vb, log_sig2_inv_vb, nu, nu_vb, sig2_inv_vb)
286 | 
287 |   elbo_E <- e_omega_(a, a_vb, b, b_vb, log_om_vb, log_1_min_om_vb)
288 | 
289 | 
290 |   elbo_A + elbo_B + elbo_C + elbo_D + elbo_E
291 | 
292 | }
293 | 
294 | 


--------------------------------------------------------------------------------
/R/locus_group_core.R:
--------------------------------------------------------------------------------
  1 | # This file is part of the `locus` R package:
  2 | #     https://github.com/hruffieux/locus
  3 | #
  4 | # Internal core function to call the variational algorithm for group selection
  5 | # with identity link, no fixed covariates and no external annotation variables.
  6 | # See help of `locus` function for details.
  7 | #
  8 | locus_group_core_ <- function(Y, list_X, list_hyper, gam_vb, list_mu_beta_vb,
  9 |                                sig2_inv_vb, tau_vb, tol, maxit, verbose,
 10 |                                batch = "y", full_output = FALSE, debug = TRUE) {
 11 | 
 12 | 
 13 |   # Y must have been centered, and X, standardized.
 14 | 
 15 |   d <- ncol(Y)
 16 |   n <- nrow(Y)
 17 |   G <- length(list_X)
 18 | 
 19 |   g_sizes <- sapply(list_X, ncol)
 20 | 
 21 |   with(list_hyper, { # list_init not used with the with() function to avoid
 22 |                      # copy-on-write for large objects
 23 |     
 24 |     eps <- .Machine$double.eps^0.5
 25 | 
 26 |     list_sig2_beta_star_inv <- lapply(list_X, function(X_g) crossprod(X_g) + diag(sig2_inv_vb, nrow = ncol(X_g)))
 27 | 
 28 |     list_sig2_beta_star <- lapply(list_sig2_beta_star_inv, solve)
 29 | 
 30 |     list_beta_vb <- update_g_beta_vb_(list_mu_beta_vb, gam_vb)
 31 | 
 32 |     list_m1_btb <- update_g_m1_btb_(gam_vb, list_mu_beta_vb, list_sig2_beta_star, tau_vb)
 33 | 
 34 |     list_m1_btXtXb <- update_g_m1_btXtXb_(list_X, gam_vb, list_mu_beta_vb,
 35 |                                           list_sig2_beta_star, tau_vb)
 36 | 
 37 |     mat_x_m1 <- update_g_mat_x_m1_(list_X, list_beta_vb)
 38 | 
 39 | 
 40 |     log_tau_vb <- update_log_tau_vb_(eta, kappa)  # do not update tau_vb here as
 41 |                                                   # its current form was already used
 42 |                                                   # in list_m1_btb as part of the vb
 43 |                                                   # parameter sig2_beta = sig2_beta_star / tau_vb
 44 |     rs_gam <- rowSums(gam_vb)
 45 |     digam_sum <- digamma(a + b + d)
 46 | 
 47 |     converged <- FALSE
 48 |     lb_new <- -Inf
 49 |     it <- 0
 50 | 
 51 |     while ((!converged) & (it < maxit)) {
 52 | 
 53 |       lb_old <- lb_new
 54 |       it <- it + 1
 55 | 
 56 |       if (verbose & (it == 1 | it %% 5 == 0))
 57 |         cat(paste0("Iteration ", format(it), "... \n"))
 58 | 
 59 |       # % #
 60 |       lambda_vb <- update_g_lambda_vb_(lambda, g_sizes, rs_gam)
 61 |       nu_vb <- update_g_nu_vb_(nu, list_m1_btb, tau_vb)
 62 | 
 63 |       list_sig2_beta_star_inv <- lapply(list_sig2_beta_star_inv, function(sig2_beta_star_inv)
 64 |         sig2_beta_star_inv - diag(sig2_inv_vb, nrow = nrow(sig2_beta_star_inv))) # to avoid recomputing X_g^TX_g each time
 65 | 
 66 |       sig2_inv_vb <- lambda_vb / nu_vb
 67 | 
 68 |       list_sig2_beta_star_inv <- lapply(list_sig2_beta_star_inv, function(sig2_beta_star_inv)
 69 |         sig2_beta_star_inv + diag(sig2_inv_vb, nrow = nrow(sig2_beta_star_inv)))
 70 | 
 71 |       list_sig2_beta_star <- lapply(list_sig2_beta_star_inv, solve)
 72 | 
 73 |       vec_log_det <- log_det(list_sig2_beta_star)
 74 |       # % #
 75 | 
 76 |       log_sig2_inv_vb <- update_log_sig2_inv_vb_(lambda_vb, nu_vb)
 77 | 
 78 | 
 79 |       # different possible batch-coordinate ascent schemes:
 80 | 
 81 |       if (batch == "y") { # optimal scheme
 82 | 
 83 |         log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam)
 84 |         log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam)
 85 | 
 86 |         for (g in sample(1:G)) {
 87 |           mat_x_m1 <- mat_x_m1 - list_X[[g]] %*% list_beta_vb[[g]]
 88 | 
 89 |           list_mu_beta_vb[[g]] <- list_sig2_beta_star[[g]] %*% crossprod(list_X[[g]], Y - mat_x_m1)
 90 | 
 91 |           gam_vb[g, ] <- exp(-log_one_plus_exp_(log_1_min_om_vb[g] - log_om_vb[g] -
 92 |                                                   g_sizes[g] * (log_sig2_inv_vb + log_tau_vb - log(tau_vb)) / 2 - # |g| * log(tau_vb) /2 came out of the determinant
 93 |                                                   colSums(list_mu_beta_vb[[g]] *
 94 |                                                             (list_sig2_beta_star_inv[[g]] %*% list_mu_beta_vb[[g]])) * tau_vb / 2 -
 95 |                                                   vec_log_det[g] / 2))
 96 | 
 97 |           list_beta_vb[[g]] <- sweep(list_mu_beta_vb[[g]], 2, gam_vb[g, ], `*`)
 98 | 
 99 |           mat_x_m1 <- mat_x_m1 + list_X[[g]] %*% list_beta_vb[[g]]
100 |         }
101 | 
102 |         rs_gam <- rowSums(gam_vb)
103 | 
104 |       } else if (batch == "x") { # used only internally, convergence not ensured
105 | 
106 |         stop("Not implemented")
107 | 
108 |       } else if (batch == "x-y") { # used only internally, convergence not ensured
109 | 
110 |         stop("Not implemented")
111 | 
112 |       } else if (batch == "0") { # no batch, used only internally
113 | 
114 |         for (k in sample(1:d)) {
115 | 
116 |           log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam)
117 |           log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam)
118 | 
119 |           for (g in sample(1:G)) {
120 | 
121 |             mat_x_m1[, k] <- mat_x_m1[, k] - list_X[[g]] %*% list_beta_vb[[g]][, k]
122 | 
123 |             list_mu_beta_vb[[g]][, k] <- list_sig2_beta_star[[g]] %*% crossprod(list_X[[g]], Y[, k] - mat_x_m1[, k])
124 | 
125 |             gam_vb[g, k] <- exp(-log_one_plus_exp_(log_1_min_om_vb[g] - log_om_vb[g] -
126 |                                                      g_sizes[g] * (log_sig2_inv_vb + log_tau_vb[k] - log(tau_vb[k])) / 2 -
127 |                                                      sum(list_mu_beta_vb[[g]][, k] * (list_sig2_beta_star_inv[[g]] %*% list_mu_beta_vb[[g]][, k])) * tau_vb[k] / 2 -
128 |                                                      vec_log_det[g] / 2))
129 | 
130 |             list_beta_vb[[g]][, k] <- list_mu_beta_vb[[g]][, k] * gam_vb[g, k]
131 | 
132 |             mat_x_m1[, k] <- mat_x_m1[, k] + list_X[[g]] %*% list_beta_vb[[g]][, k]
133 | 
134 |           }
135 | 
136 |           rs_gam <- rowSums(gam_vb)
137 | 
138 |         }
139 | 
140 |       } else {
141 | 
142 |         stop ("Batch scheme not defined. Exit.")
143 | 
144 |       }
145 | 
146 | 
147 |       list_m1_btb <- update_g_m1_btb_(gam_vb, list_mu_beta_vb, list_sig2_beta_star, tau_vb)
148 | 
149 |       list_m1_btXtXb <- update_g_m1_btXtXb_(list_X, gam_vb, list_mu_beta_vb,
150 |                                             list_sig2_beta_star, tau_vb)
151 | 
152 | 
153 |       a_vb <- update_a_vb(a, rs_gam)
154 |       b_vb <- update_b_vb(b, d, rs_gam)
155 |       om_vb <- a_vb / (a_vb + b_vb)
156 | 
157 | 
158 |       # % #
159 |       eta_vb <- update_g_eta_vb_(n, eta, g_sizes, gam_vb)
160 |       kappa_vb <- update_g_kappa_vb_(Y, list_X, kappa, list_beta_vb, list_m1_btb,
161 |                                      list_m1_btXtXb, mat_x_m1, sig2_inv_vb)
162 | 
163 | 
164 |       lb_new <- elbo_group_(Y, list_X, a, a_vb, b, b_vb, eta, eta_vb, g_sizes,
165 |                             gam_vb, kappa, kappa_vb, lambda, lambda_vb, nu, nu_vb,
166 |                             rs_gam, list_sig2_beta_star, sig2_inv_vb, tau_vb,
167 |                             vec_log_det, list_beta_vb, list_m1_btb,
168 |                             list_m1_btXtXb, mat_x_m1)
169 | 
170 |       tau_vb <- eta_vb / kappa_vb # has to be updated after the elbo, as list_sig2_beta_star depends on it.
171 |       # % #
172 | 
173 |       log_tau_vb <- update_log_tau_vb_(eta_vb, kappa_vb)
174 | 
175 |       if (debug && lb_new + eps < lb_old)
176 |         cat(paste0("ELBO = ", format(lb_new), "\n\n"))
177 | 
178 |       if (debug && lb_new < lb_old)
179 |         stop("ELBO not increasing monotonically. Exit. ")
180 | 
181 |       converged <- (abs(lb_new - lb_old) < tol)
182 | 
183 |     }
184 | 
185 | 
186 |     if (verbose) {
187 |       if (converged) {
188 |         cat(paste0("Convergence obtained after ", format(it), " iterations. \n",
189 |                   "Optimal marginal log-likelihood variational lower bound ",
190 |                   "(ELBO) = ", format(lb_new), ". \n\n"))
191 |       } else {
192 |         warning("Maximal number of iterations reached before convergence. Exit.")
193 |       }
194 |     }
195 | 
196 |     lb_opt <- lb_new
197 | 
198 |     names_y <- colnames(Y)
199 |     
200 |     names_G <- unlist(lapply(list_X,
201 |                              function(X_g) paste0(as.character(colnames(X_g)), collapse = "-")))
202 |     
203 |     rownames(gam_vb) <- names_G
204 |     colnames(gam_vb) <- names_y
205 |     names(om_vb) <- names_G
206 |     
207 |     names(list_beta_vb) <- names_G
208 |     
209 |     diff_lb <- abs(lb_opt - lb_old)
210 |     
211 |     if (full_output) { # for internal use only
212 |       
213 |       create_named_list_(a, a_vb, b, b_vb, eta, eta_vb, g_sizes,
214 |                          gam_vb, kappa, kappa_vb, lambda, lambda_vb, nu, nu_vb,
215 |                          om_vb, rs_gam, list_sig2_beta_star, sig2_inv_vb, tau_vb,
216 |                          vec_log_det, list_beta_vb, list_mu_beta_vb, 
217 |                          list_m1_btb, list_m1_btXtXb, converged, it, lb_opt, diff_lb)
218 |     } else {
219 | 
220 |       create_named_list_(list_beta_vb, gam_vb, om_vb, converged, it, lb_opt, diff_lb)
221 |     }
222 |   })
223 | 
224 | }
225 | 
226 | 
227 | # Internal function which implements the marginal log-likelihood variational
228 | # lower bound (ELBO) corresponding to the `locus_group_core` algorithm.
229 | #
230 | elbo_group_ <- function(Y, list_X, a, a_vb, b, b_vb, eta, eta_vb, g_sizes,
231 |                         gam_vb, kappa, kappa_vb, lambda, lambda_vb, nu, nu_vb,
232 |                         rs_gam, list_sig2_beta_star, sig2_inv_vb, tau_vb,
233 |                         vec_log_det, list_beta_vb, list_m1_btb, list_m1_btXtXb,
234 |                         mat_x_m1) {
235 | 
236 |   n <- nrow(Y)
237 | 
238 |   log_tau_vb <- digamma(eta_vb) - log(kappa_vb)
239 |   log_sig2_inv_vb <- digamma(lambda_vb) - log(nu_vb)
240 |   log_om_vb <- digamma(a_vb) - digamma(a_vb + b_vb)
241 |   log_1_min_om_vb <- digamma(b_vb) - digamma(a_vb + b_vb)
242 | 
243 | 
244 |   elbo_A <- e_g_y_(n, kappa, kappa_vb, list_m1_btb, log_tau_vb, sig2_inv_vb, tau_vb)
245 | 
246 |   elbo_B <- e_g_beta_gamma_(gam_vb, g_sizes, log_om_vb, log_1_min_om_vb,
247 |                             log_sig2_inv_vb, log_tau_vb, list_m1_btb,
248 |                             list_sig2_beta_star, sig2_inv_vb, tau_vb, vec_log_det)
249 | 
250 |   elbo_C <- e_tau_(eta, eta_vb, kappa, kappa_vb, log_tau_vb, tau_vb)
251 | 
252 |   elbo_D <- e_sig2_inv_(lambda, lambda_vb, log_sig2_inv_vb, nu, nu_vb, sig2_inv_vb)
253 | 
254 |   elbo_E <- e_omega_(a, a_vb, b, b_vb, log_om_vb, log_1_min_om_vb)
255 | 
256 | 
257 |   elbo_A + elbo_B + elbo_C + elbo_D + elbo_E
258 | 
259 | }
260 | 
261 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | # This file is part of the `locus` R package:
  2 | #     https://github.com/hruffieux/locus
  3 | #
  4 | 
  5 | # Diverse utility functions implementing sanity checks, basic preprocessing,
  6 | # and ticks to prevent overflow/underflow.
  7 | #
  8 | 
  9 | check_natural_ <- function(x, eps = .Machine$double.eps^0.75){
 10 |   if (any(x < eps | abs(x - round(x)) > eps)) {
 11 |     stop(paste0(deparse(substitute(x)),
 12 |                " must be natural."))
 13 |   }
 14 | }
 15 | 
 16 | check_positive_ <- function(x, eps = .Machine$double.eps^0.75){
 17 |   if (any(x < eps)) {
 18 |     err_mess <- paste0(deparse(substitute(x)), " must be positive, greater than ",
 19 |                       format(eps, digits = 3), ".")
 20 |     if (length(x) > 1) err_mess <- paste0("All entries of ", err_mess)
 21 |     stop(err_mess)
 22 |   }
 23 | }
 24 | 
 25 | check_zero_one_ <- function(x){
 26 |   if (any(x < 0) | any(x > 1)) {
 27 |     err_mess <- paste0(deparse(substitute(x)), " must lie between 0 and 1.")
 28 |     if (length(x) > 1) err_mess <- paste0("All entries of ", err_mess)
 29 |     stop(err_mess)
 30 |   }
 31 | }
 32 | 
 33 | check_structure_ <- function(x, struct, type, size = NULL,
 34 |                              null_ok = FALSE,  inf_ok = FALSE, na_ok = FALSE) {
 35 |   if (type == "double") {
 36 |     bool_type <-  is.double(x)
 37 |     type_mess <- "a double-precision "
 38 |   } else if (type == "integer") {
 39 |     bool_type <- is.integer(x)
 40 |     type_mess <- "an integer "
 41 |   } else if (type == "numeric") {
 42 |     bool_type <- is.numeric(x)
 43 |     type_mess <- "a numeric "
 44 |   } else if (type == "logical") {
 45 |     bool_type <- is.logical(x)
 46 |     type_mess <- "a boolean "
 47 |   } else if (type == "string") {
 48 |     bool_type <- is.character(x)
 49 |     type_mess <- "string "
 50 |   }
 51 | 
 52 |   bool_size <- TRUE # for case size = NULL (no assertion on the size/dimension)
 53 |   size_mess <- ""
 54 |   if (struct == "vector") {
 55 |     bool_struct <- is.vector(x) & (length(x) > 0) # not an empty vector
 56 |     if (!is.null(size)) {
 57 |       bool_size <- length(x) %in% size
 58 |       size_mess <- paste0(" of length ", paste0(size, collapse=" or "))
 59 |     }
 60 |   } else if (struct == "matrix") {
 61 |     bool_struct <- is.matrix(x) & (length(x) > 0) # not an empty matrix
 62 |     if (!is.null(size)) {
 63 |       bool_size <- all(dim(x) == size)
 64 |       size_mess <- paste0(" of dimension ", size[1], " x ", size[2])
 65 |     }
 66 |   }
 67 | 
 68 |   correct_obj <- bool_struct & bool_type & bool_size
 69 | 
 70 |   bool_null <- is.null(x)
 71 | 
 72 |   if (!is.list(x) & type != "string") {
 73 |     na_mess <- ""
 74 |     if (!na_ok) {
 75 |       if (!bool_null) correct_obj <- correct_obj & !any(is.na(x))
 76 |       na_mess <- " without missing value"
 77 |     }
 78 | 
 79 |     inf_mess <- ""
 80 |     if (!inf_ok) {
 81 |       if (!bool_null) correct_obj <- correct_obj & all(is.finite(x[!is.na(x)]))
 82 |       inf_mess <- ", finite"
 83 |     }
 84 |   } else {
 85 |     na_mess <- ""
 86 |     inf_mess <- ""
 87 |   }
 88 | 
 89 |   null_mess <- ""
 90 |   if (null_ok) {
 91 |     correct_obj <- correct_obj | bool_null
 92 |     null_mess <- " or must be NULL"
 93 |   }
 94 | 
 95 |   if(!(correct_obj)) {
 96 |     stop(paste0(deparse(substitute(x)), " must be a non-empty ", type_mess, struct,
 97 |                size_mess, inf_mess, na_mess, null_mess, "."))
 98 |   }
 99 | }
100 | 
101 | 
102 | create_named_list_ <- function(...) {
103 |   setNames(list(...), as.character(match.call()[-1]))
104 | }
105 | 
106 | 
107 | 
108 | get_annealing_ladder_ <- function(anneal, verbose) {
109 | 
110 |   # ladder set following:
111 |   # Importance Tempering, Robert B. Gramacy & Richard J. Samworth, pp.9-10, arxiv v4
112 | 
113 |   k_m <- 1 / anneal[2]
114 |   m <- anneal[3]
115 | 
116 |   if(anneal[1] == 1) {
117 | 
118 |     type <- "geometric"
119 | 
120 |     delta_k <- k_m^(1 / (1 - m)) - 1
121 | 
122 |     ladder <- (1 + delta_k)^(1 - m:1)
123 | 
124 |   } else if (anneal[1] == 2) { # harmonic spacing
125 | 
126 |     type <- "harmonic"
127 | 
128 |     delta_k <- ( 1 / k_m - 1) / (m - 1)
129 | 
130 |     ladder <- 1 / (1 + delta_k * (m:1 - 1))
131 | 
132 |   } else { # linear spacing
133 | 
134 |     type <- "linear"
135 | 
136 |     delta_k <- (1 - k_m) / (m - 1)
137 | 
138 |     ladder <- k_m + delta_k * (1:m - 1)
139 |   }
140 | 
141 |   if (verbose)
142 |     cat(paste0("** Annealing with ", type," spacing ** \n\n"))
143 | 
144 |   ladder
145 | 
146 | }
147 | 
148 | 
149 | log_one_plus_exp_ <- function(x) { # computes log(1 + exp(x)) avoiding
150 |                                    # numerical overflow
151 |   m <- x
152 |   m[x < 0] <- 0
153 | 
154 |   log(exp(x - m) + exp(- m)) + m
155 | }
156 | 
157 | 
158 | log_sigmoid_ <- function(chi) {
159 | 
160 |   - log(1 + exp(- chi)) # chi is always positive so no overflow possible (underflow neither, thanks to the "+1")
161 | 
162 | }
163 | 
164 | log_det <- function(list_mat) {
165 | 
166 |   if (is.list(list_mat)) {
167 |     sapply(list_mat, function(mat) {
168 |       log_det <- determinant(mat, logarithm = TRUE)
169 |       log_det$modulus * log_det$sign
170 |     })
171 |   } else {
172 |     log_det <- determinant(list_mat, logarithm = TRUE)
173 |     log_det$modulus * log_det$sign
174 |   }
175 | 
176 | }
177 | 
178 | inv_mills_ratio_matrix_ <- function(Y, U) {
179 | 
180 |   if (is.matrix(U)) m <- matrix(NA, nrow = nrow(U), ncol = ncol(U))
181 |   else m <- rep(NA, length(U))
182 | 
183 |   U_1 <- U[Y==1]
184 |   m_1 <- exp(dnorm(U_1, log = TRUE) - pnorm(U_1, log.p = TRUE))
185 |   m_1[m_1 < -U_1] <- -U_1
186 | 
187 |   m[Y==1] <- m_1
188 | 
189 | 
190 |   U_0 <- U[Y==0]
191 |   m_0 <- - exp(dnorm(U_0, log = TRUE) - pnorm(U_0, lower.tail = FALSE, log.p = TRUE))
192 |   m_0[m_0 > -U_0] <- -U_0
193 | 
194 |   m[Y==0] <- m_0
195 | 
196 |   m
197 | 
198 | }
199 | 
200 | 
201 | inv_mills_ratio_ <- function(y, U, log_1_pnorm_U, log_pnorm_U) {
202 |   
203 |   stopifnot(y %in% c(0, 1))
204 |   
205 |   # writing explicitely the formula for pnorm(, log = TRUE) is faster...
206 |   if (y == 1) {
207 |     
208 |     m <- exp(-U^2/2 - log(sqrt(2*pi)) - log_pnorm_U)
209 |     m[m < -U] <- -U
210 |     
211 |   } else {
212 |     
213 |     m <- - exp(-U^2/2 - log(sqrt(2*pi)) - log_1_pnorm_U)
214 |     m[m > -U] <- -U
215 |     
216 |   }
217 |   
218 |   m
219 |   
220 | }
221 | 
222 | 
223 | log_sum_exp_ <- function(x) {
224 |   # Computes log(sum(exp(x))
225 | 
226 |   if ( max(abs(x)) > max(x) )
227 |     offset <- min(x)
228 |   else
229 |     offset <- max(x)
230 |   log(sum(exp(x - offset))) + offset
231 | 
232 | }
233 | 
234 | 
235 | # entropy_ <- function(Y, U) {
236 | #
237 | #   log((2 * pi * exp(1))^(1/2) *
238 | #            exp(Y * pnorm(U, log.p = TRUE) +
239 | #                  (1-Y) * pnorm(U, lower.tail = FALSE, log.p = TRUE))) -
240 | #   U * inv_mills_ratio_matrix_(Y, U) / 2
241 | #
242 | # }
243 | 
244 | rm_constant_ <- function(mat, verbose) {
245 | 
246 |   bool_cst <- is.nan(colSums(mat))
247 | 
248 |   if (any(bool_cst)) {
249 | 
250 |     rmvd_cst <- colnames(mat)[bool_cst]
251 | 
252 |     if (verbose) {
253 |       if (sum(bool_cst) < 50) {
254 |         cat(paste0("Variable(s) ", paste0(rmvd_cst, collapse=", "),
255 |                   " constant across subjects. \n",
256 |                   "Removing corresponding column(s) and saving its/their id(s) ",
257 |                   "in the function output ... \n\n"))
258 |       } else {
259 |         cat(paste0(sum(bool_cst), " variables constant across subjects. \n",
260 |                   "Removing corresponding column(s) and saving their ids ",
261 |                   "in the function output ... \n\n"))
262 |       }
263 |     }
264 | 
265 |     mat <- mat[, !bool_cst, drop = FALSE]
266 |   } else {
267 |     rmvd_cst <- NULL
268 |   }
269 | 
270 |   create_named_list_(mat, bool_cst, rmvd_cst)
271 | }
272 | 
273 | rm_collinear_ <- function(mat, verbose) {
274 | 
275 |   bool_coll <- duplicated(mat, MARGIN = 2)
276 | 
277 |   if (any(bool_coll)) {
278 | 
279 |     mat_coll <- mat[, bool_coll, drop = FALSE]
280 |     rmvd_coll <- colnames(mat_coll)
281 | 
282 |     if (verbose) {
283 |       if (length(rmvd_coll) < 50) {
284 |         cat(paste0("Presence of collinear variable(s). ",
285 |                   paste0(rmvd_coll, collapse=", "), " redundant. \n",
286 |                   "Removing corresponding column(s) and saving its/their id(s) ",
287 |                   "in the function output ... \n"))
288 |       } else {
289 |         cat(paste0("Presence of collinear variables. ", length(rmvd_coll),
290 |                   " redundant.\n", "Removing corresponding columns and saving ",
291 |                   "their ids in the function output ... \n"))
292 |       }
293 |     }
294 | 
295 |     # associate to each removed replicate the name of the covariate with which
296 |     # it is duplicated and that is kept in the dataset
297 |     bool_with_coll <- duplicated(mat, MARGIN = 2, fromLast = TRUE) & !bool_coll
298 |     mat_with_coll <- mat[, bool_with_coll, drop = FALSE]
299 | 
300 |     assoc_coll <- colnames(mat_with_coll)[match(data.frame(mat_coll),
301 |                                                 data.frame(mat_with_coll))]
302 |     names(rmvd_coll) <- assoc_coll
303 | 
304 |     mat <- mat[, !bool_coll, drop = FALSE]
305 | 
306 |   } else {
307 |     rmvd_coll <- NULL
308 |   }
309 | 
310 |   create_named_list_(mat, bool_coll, rmvd_coll)
311 | }
312 | 
313 | make_chunks_ <- function(x, n_g) split(x, factor(sort(rank(x) %% n_g)))
314 | 
315 | 
316 | checkpoint_ <- function(it, checkpoint_path, gam_vb, converged, lb_new, lb_old, 
317 |                         om_vb = NULL, rate = 100) {
318 |   
319 |   if (!is.null(checkpoint_path) && it %% rate == 0) {
320 |     
321 |     diff_lb <- abs(lb_new - lb_old)
322 |     
323 |     tmp_vb <- create_named_list_(gam_vb, converged, it, lb_new, diff_lb, om_vb)
324 |     
325 |     file_save <- paste0(checkpoint_path, "tmp_output_it_", it, ".RData")
326 |     
327 |     save(tmp_vb, file = file_save)
328 |     
329 |     old_file_clean_up <- paste0(checkpoint_path, "tmp_output_it_", it - 2 * rate, ".RData") # keep only the last two for comparison
330 |     
331 |     if (file.exists(old_file_clean_up)) 
332 |       file.remove(old_file_clean_up)
333 | 
334 |   }
335 |     
336 | }
337 | 
338 | 
339 | checkpoint_clean_up_ <- function(checkpoint_path) {
340 |   
341 |   if (!is.null(checkpoint_path)) {
342 |     
343 |     old_files_clean_up <- list.files(path = checkpoint_path, pattern = "tmp_output_it_")
344 |     
345 |     sapply(old_files_clean_up, function(ff) {
346 |       if (file.exists(file.path(checkpoint_path, ff))) 
347 |         file.remove(file.path(checkpoint_path, ff))
348 |     })
349 |     
350 |   } 
351 |   
352 | }
353 |  


--------------------------------------------------------------------------------
/R/locus_probit_core.R:
--------------------------------------------------------------------------------
  1 | # This file is part of the `locus` R package:
  2 | #     https://github.com/hruffieux/locus
  3 | #
  4 | # Internal core function to call the variational algorithm for probit link, optional
  5 | # fixed covariates and no external annotation variables.
  6 | # See help of `locus` function for details.
  7 | #
  8 | locus_probit_core_ <- function(Y, X, Z, list_hyper, gam_vb, alpha_vb,
  9 |                                mu_beta_vb, sig2_alpha_vb, sig2_beta_vb, tol,
 10 |                                maxit, verbose, batch = "y", full_output = FALSE,
 11 |                                debug = FALSE) {
 12 | 
 13 |   # an intercept must be present in Z (column of ones), and X and Z must be standardized (except intercept in Z)
 14 | 
 15 |   d <- ncol(Y)
 16 |   n <- nrow(Y)
 17 |   p <- ncol(X)
 18 |   q <- ncol(Z)
 19 | 
 20 |   with(list_hyper, { # list_init not used with the with() function to avoid
 21 |     # copy-on-write for large objects
 22 | 
 23 |     m2_alpha <- update_m2_alpha_(alpha_vb, sig2_alpha_vb, sweep = TRUE)
 24 |     beta_vb <- update_beta_vb_(gam_vb, mu_beta_vb)
 25 |     m2_beta <- update_m2_beta_(gam_vb, mu_beta_vb, sig2_beta_vb)
 26 | 
 27 |     mat_x_m1 <- update_mat_x_m1_(X, beta_vb)
 28 |     mat_z_mu <- update_mat_z_mu_(Z, alpha_vb)
 29 | 
 30 |     W <- update_W_probit_(Y, mat_z_mu, mat_x_m1)
 31 | 
 32 |     rs_gam <- rowSums(gam_vb)
 33 |     sum_gam <- sum(rs_gam)
 34 |     digam_sum <- digamma(a + b + d)
 35 | 
 36 |     phi_vb <- update_phi_z_vb_(phi, d)
 37 | 
 38 |     converged <- FALSE
 39 |     lb_new <- -Inf
 40 |     it <- 0
 41 | 
 42 |     while ((!converged) & (it < maxit)) {
 43 | 
 44 |       lb_old <- lb_new
 45 |       it <- it + 1
 46 | 
 47 |       if (verbose & (it == 1 | it %% 5 == 0))
 48 |         cat(paste0("Iteration ", format(it), "... \n"))
 49 | 
 50 |       # % #
 51 |       xi_vb <- update_xi_bin_vb_(xi, m2_alpha)
 52 | 
 53 |       zeta2_inv_vb <- phi_vb / xi_vb
 54 |       # % #
 55 | 
 56 |       # % #
 57 |       lambda_vb <- update_lambda_vb_(lambda, sum_gam)
 58 |       nu_vb <- update_nu_bin_vb_(nu, m2_beta)
 59 | 
 60 |       sig2_inv_vb <- lambda_vb / nu_vb
 61 |       # % #
 62 | 
 63 |       sig2_alpha_vb <- update_sig2_alpha_vb_(n, zeta2_inv_vb, intercept = TRUE)
 64 |       sig2_beta_vb <- update_sig2_beta_vb_(n, sig2_inv_vb)
 65 | 
 66 |       log_sig2_inv_vb <- update_log_sig2_inv_vb_(lambda_vb, nu_vb)
 67 | 
 68 | 
 69 |       # different possible batch-coordinate ascent schemes:
 70 | 
 71 |       if (batch == "y") { # optimal scheme
 72 | 
 73 |         log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam)
 74 |         log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam)
 75 | 
 76 |         for (i in sample(1:q)) {
 77 | 
 78 |           mat_z_mu <- mat_z_mu - tcrossprod(Z[, i], alpha_vb[i, ])
 79 | 
 80 |           alpha_vb[i, ] <- sig2_alpha_vb[i] * crossprod(W  - mat_z_mu - mat_x_m1, Z[, i])
 81 | 
 82 |           mat_z_mu <- mat_z_mu + tcrossprod(Z[, i], alpha_vb[i, ])
 83 | 
 84 |         }
 85 | 
 86 |         # C++ Eigen call for expensive updates
 87 |         shuffled_ind <- as.numeric(sample(0:(p-1))) # Zero-based index in C++
 88 | 
 89 |         coreProbitLoop(X, W, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb,
 90 |                        beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, sig2_beta_vb,
 91 |                        shuffled_ind)
 92 | 
 93 |         rs_gam <- rowSums(gam_vb)
 94 | 
 95 |       } else if (batch == "x") {  # used internally for testing purposes,
 96 |         # convergence not ensured as ELBO not batch-concave
 97 | 
 98 |         log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam)
 99 |         log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam)
100 | 
101 |         for (k in sample(1:d)) {
102 | 
103 |           alpha_vb[, k] <- sig2_alpha_vb * (crossprod(W[, k]  - mat_z_mu[, k] - mat_x_m1[, k], Z) +  (n - 1) * alpha_vb[, k])
104 |           alpha_vb[1, k] <- alpha_vb[1, k] + sig2_alpha_vb[1] * alpha_vb[1, k] # correction for the intercept (sums to 1)
105 | 
106 |           mat_z_mu[, k] <- Z %*% alpha_vb[, k]
107 | 
108 |           mu_beta_vb[, k] <- sig2_beta_vb * (crossprod(W[, k] -  mat_z_mu[, k] - mat_x_m1[, k], X) + (n - 1) * beta_vb[, k])
109 | 
110 |           gam_vb[, k] <- exp(-log_one_plus_exp_(log_1_min_om_vb - log_om_vb -
111 |                                                   log_sig2_inv_vb / 2 -
112 |                                                   mu_beta_vb[, k] ^ 2 / (2 * sig2_beta_vb) -
113 |                                                   log(sig2_beta_vb) / 2))
114 | 
115 |           beta_vb[, k] <- mu_beta_vb[, k] * gam_vb[, k]
116 | 
117 |           mat_x_m1[, k] <- X %*% beta_vb[, k]
118 | 
119 |         }
120 | 
121 |         rs_gam <- rowSums(gam_vb)
122 | 
123 |       } else if (batch == "x-y") { # used internally for testing purposes,
124 |         # convergence not ensured as ELBO not batch-concave
125 | 
126 |         log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam)
127 |         log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam)
128 | 
129 |         alpha_vb <- sweep(crossprod(Z, W  - mat_z_mu - mat_x_m1) +  (n - 1) * alpha_vb, 1, sig2_alpha_vb, `*`)
130 |         alpha_vb[1, ] <- alpha_vb[1, ] + sig2_alpha_vb[1] * alpha_vb[1, ] # correction for the intercept (sums to 1)
131 | 
132 |         mat_z_mu <- Z %*% alpha_vb
133 | 
134 |         # C++ Eigen call for expensive updates
135 |         coreProbitBatch(X, W, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb,
136 |                         beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, sig2_beta_vb)
137 | 
138 |         rs_gam <- rowSums(gam_vb)
139 | 
140 |       } else if (batch == "0") { # no batch, used only internally
141 | 
142 |         for (k in sample(1:d)) {
143 | 
144 |           log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam)
145 |           log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam)
146 | 
147 |           for (i in sample(1:q)) {
148 | 
149 |             mat_z_mu[, k] <- mat_z_mu[, k] - Z[, i] * alpha_vb[i, k]
150 | 
151 |             alpha_vb[i, k] <- sig2_alpha_vb[i] * crossprod(Z[, i], W[, k] - mat_z_mu[, k] - mat_x_m1[, k])
152 | 
153 |             mat_z_mu[, k] <- mat_z_mu[, k] + Z[, i] * alpha_vb[i, k]
154 | 
155 |           }
156 | 
157 |           for (j in sample(1:p)) {
158 | 
159 |             mat_x_m1[, k] <- mat_x_m1[, k] - X[, j] * beta_vb[j, k]
160 | 
161 |             mu_beta_vb[j, k] <- sig2_beta_vb * crossprod(W[, k] - mat_x_m1[, k] - mat_z_mu[, k], X[, j])
162 | 
163 |             gam_vb[j, k] <- exp(-log_one_plus_exp_(log_1_min_om_vb[j] - log_om_vb[j] -
164 |                                                      log_sig2_inv_vb / 2 -
165 |                                                      mu_beta_vb[j, k] ^ 2 / (2 * sig2_beta_vb) -
166 |                                                      log(sig2_beta_vb) / 2))
167 | 
168 | 
169 |             beta_vb[j, k] <- mu_beta_vb[j, k] * gam_vb[j, k]
170 | 
171 |             mat_x_m1[, k] <- mat_x_m1[, k] + X[, j] * beta_vb[j, k]
172 | 
173 |           }
174 | 
175 |           rs_gam <- rowSums(gam_vb)
176 | 
177 |         }
178 | 
179 |       } else {
180 | 
181 |         stop ("Batch scheme not defined. Exit.")
182 | 
183 |       }
184 | 
185 |       sum_gam <- sum(rs_gam)
186 | 
187 |       m2_alpha <- update_m2_alpha_(alpha_vb, sig2_alpha_vb, sweep = TRUE)
188 |       m2_beta <- update_m2_beta_(gam_vb, mu_beta_vb, sig2_beta_vb)
189 | 
190 |       W <- update_W_probit_(Y, mat_z_mu, mat_x_m1)
191 | 
192 |       a_vb <- update_a_vb(a, rs_gam)
193 |       b_vb <- update_b_vb(b, d, rs_gam)
194 |       om_vb <- a_vb / (a_vb + b_vb)
195 | 
196 | 
197 |       lb_new <- elbo_probit_(Y, X, Z, a, a_vb, b, b_vb, beta_vb, gam_vb, lambda, 
198 |                              nu, phi, phi_vb, sig2_alpha_vb, sig2_beta_vb, 
199 |                              sig2_inv_vb, xi, zeta2_inv_vb, alpha_vb,  m2_alpha,
200 |                              m2_beta, mat_x_m1, mat_z_mu, sum_gam)
201 | 
202 |       if (verbose & (it == 1 | it %% 5 == 0))
203 |         cat(paste0("ELBO = ", format(lb_new), "\n\n"))
204 | 
205 |       if (debug && lb_new < lb_old)
206 |         stop("ELBO not increasing monotonically. Exit. ")
207 | 
208 |       converged <- (abs(lb_new - lb_old) < tol)
209 | 
210 |     }
211 | 
212 |     if (verbose) {
213 |       if (converged) {
214 |         cat(paste0("Convergence obtained after ", format(it), " iterations. \n",
215 |                   "Optimal marginal log-likelihood variational lower bound ",
216 |                   "(ELBO) = ", format(lb_new), ". \n\n"))
217 |       } else {
218 |         warning("Maximal number of iterations reached before convergence. Exit.")
219 |       }
220 |     }
221 | 
222 |     lb_opt <- lb_new
223 | 
224 |     
225 |     names_x <- colnames(X)
226 |     names_y <- colnames(Y)
227 |     names_z <- colnames(Z)
228 |     
229 |     rownames(gam_vb) <- rownames(beta_vb) <- names_x
230 |     colnames(gam_vb) <- colnames(beta_vb) <- names_y
231 |     names(om_vb) <- names_x
232 |     rownames(alpha_vb) <- names_z
233 |     colnames(alpha_vb) <- names_y
234 |     
235 |     diff_lb <- abs(lb_opt - lb_old)
236 |     
237 |     if (full_output) { # for internal use only
238 |       
239 |       create_named_list_(a, a_vb, b, b_vb, beta_vb, gam_vb, lambda, mu_beta_vb, 
240 |                          nu, om_vb, phi, phi_vb, sig2_alpha_vb, sig2_beta_vb, 
241 |                          sig2_inv_vb, xi, zeta2_inv_vb, alpha_vb, m2_alpha, 
242 |                          m2_beta, sum_gam, converged, it, lb_opt, diff_lb)
243 |     } else {
244 | 
245 |       create_named_list_(beta_vb, gam_vb, om_vb, alpha_vb, converged, it, 
246 |                          lb_opt, diff_lb)
247 |     }
248 |   })
249 | 
250 | }
251 | 
252 | 
253 | 
254 | # Internal function which implements the marginal log-likelihood variational
255 | # lower bound (ELBO) corresponding to the `locus_probit_core` algorithm.
256 | #
257 | elbo_probit_ <- function(Y, X, Z, a, a_vb, b, b_vb, beta_vb, gam_vb, lambda, nu, 
258 |                          phi, phi_vb, sig2_alpha_vb, sig2_beta_vb, sig2_inv_vb, 
259 |                          xi, zeta2_inv_vb, alpha_vb, m2_alpha, m2_beta,
260 |                          mat_x_m1, mat_z_mu, sum_gam) {
261 | 
262 |   lambda_vb <- update_lambda_vb_(lambda, sum_gam)
263 |   nu_vb <- update_nu_bin_vb_(nu, m2_beta)
264 | 
265 |   xi_vb <- update_xi_bin_vb_(xi, m2_alpha)
266 | 
267 |   log_sig2_inv_vb <- digamma(lambda_vb) - log(nu_vb)
268 |   log_zeta2_inv_vb <- digamma(phi_vb) - log(xi_vb)
269 |   log_om_vb <- digamma(a_vb) - digamma(a_vb + b_vb)
270 |   log_1_min_om_vb <- digamma(b_vb) - digamma(a_vb + b_vb)
271 | 
272 | 
273 |   elbo_A <- e_y_probit_(X, Y, Z, beta_vb, m2_beta, mat_x_m1, mat_z_mu, sig2_alpha_vb)
274 | 
275 |   elbo_B <- e_beta_gamma_bin_(gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb,
276 |                               m2_beta, sig2_beta_vb, sig2_inv_vb)
277 | 
278 |   elbo_C <- e_sig2_inv_(lambda, lambda_vb, log_sig2_inv_vb, nu, nu_vb, sig2_inv_vb)
279 | 
280 |   elbo_D <- e_omega_(a, a_vb, b, b_vb, log_om_vb, log_1_min_om_vb)
281 | 
282 |   elbo_E <- e_alpha_probit_(m2_alpha, log_zeta2_inv_vb, sig2_alpha_vb, zeta2_inv_vb)
283 | 
284 |   elbo_F <- e_zeta2_inv_(log_zeta2_inv_vb, phi, phi_vb, xi, xi_vb, zeta2_inv_vb)
285 | 
286 |   elbo_A + elbo_B + elbo_C + elbo_D + elbo_E + elbo_F
287 | 
288 | }
289 | 
290 | 


--------------------------------------------------------------------------------
/R/locus_logistic_core.R:
--------------------------------------------------------------------------------
  1 | # This file is part of the `locus` R package:
  2 | #     https://github.com/hruffieux/locus
  3 | #
  4 | # Internal core function to call the variational algorithm for logit link,
  5 | # optional fixed covariates and no external annotation variables.
  6 | # See help of `locus` function for details.
  7 | #
  8 | locus_logit_core_ <- function(Y, X, Z, list_hyper, chi_vb, gam_vb, alpha_vb,
  9 |                               mu_beta_vb, sig2_alpha_vb, sig2_beta_vb, tol,
 10 |                               maxit, verbose, batch = "y", full_output = FALSE,
 11 |                               debug = FALSE) {
 12 | 
 13 |   # 1/2 must have been substracted from Y and X must have been standardized (except intercept in Z)
 14 | 
 15 |   d <- ncol(Y)
 16 |   n <- nrow(Y)
 17 |   p <- ncol(X)
 18 |   q <- ncol(Z)
 19 | 
 20 |   with(list_hyper, { # list_init not used with the with() function to avoid
 21 |     # copy-on-write for large objects
 22 | 
 23 |     m2_alpha <- update_m2_alpha_(alpha_vb, sig2_alpha_vb)
 24 |     beta_vb <- update_beta_vb_(gam_vb, mu_beta_vb)
 25 |     m2_beta <- update_m2_beta_(gam_vb, mu_beta_vb, sig2_beta_vb)
 26 | 
 27 |     mat_x_m1 <- update_mat_x_m1_(X, beta_vb)
 28 |     mat_z_mu <- update_mat_z_mu_(Z, alpha_vb)
 29 | 
 30 |     phi_vb <- update_phi_z_vb_(phi, d)
 31 | 
 32 |     rs_gam <- rowSums(gam_vb)
 33 |     sum_gam <- sum(rs_gam)
 34 |     digam_sum <- digamma(a + b + d)
 35 | 
 36 |     converged <- FALSE
 37 |     lb_new <- -Inf
 38 |     it <- 0
 39 | 
 40 |     while ((!converged) & (it < maxit)) {
 41 | 
 42 |       lb_old <- lb_new
 43 |       it <- it + 1
 44 | 
 45 |       if (verbose & (it == 1 | it %% 5 == 0))
 46 |         cat(paste0("Iteration ", format(it), "... \n"))
 47 | 
 48 |       # % #
 49 |       chi_vb <- update_chi_vb_(X, Z, beta_vb, m2_beta, mat_x_m1, mat_z_mu, sig2_alpha_vb)
 50 | 
 51 |       psi_vb <- update_psi_logit_vb_(chi_vb)
 52 |       # % #
 53 | 
 54 |       # % #
 55 |       xi_vb <- update_xi_bin_vb_(xi, m2_alpha)
 56 | 
 57 |       zeta2_inv_vb <- phi_vb / xi_vb
 58 |       # % #
 59 | 
 60 |       # % #
 61 |       lambda_vb <- update_lambda_vb_(lambda, sum_gam)
 62 |       nu_vb <- update_nu_bin_vb_(nu, m2_beta)
 63 | 
 64 |       sig2_inv_vb <- lambda_vb / nu_vb
 65 |       # % #
 66 | 
 67 |       sig2_alpha_vb <- update_sig2_alpha_logit_vb_(Z, psi_vb, zeta2_inv_vb)
 68 |       sig2_beta_vb <- update_sig2_beta_logit_vb_(X, psi_vb, sig2_inv_vb)
 69 | 
 70 |       log_sig2_inv_vb <- update_log_sig2_inv_vb_(lambda_vb, nu_vb)
 71 | 
 72 |       # different possible batch-coordinate ascent schemes:
 73 | 
 74 |       if (batch == "y") { # optimal scheme
 75 | 
 76 |         log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam)
 77 |         log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam)
 78 | 
 79 |         for (i in sample(1:q)) {
 80 |           mat_z_mu <- mat_z_mu - tcrossprod(Z[, i], alpha_vb[i, ])
 81 | 
 82 |           alpha_vb[i, ] <- sig2_alpha_vb[i, ] * crossprod(Y - 2 * psi_vb * (mat_z_mu + mat_x_m1), Z[, i])
 83 | 
 84 |           mat_z_mu <- mat_z_mu + tcrossprod(Z[, i], alpha_vb[i, ])
 85 |         }
 86 | 
 87 |         # C++ Eigen call for expensive updates
 88 |         shuffled_ind <- as.numeric(sample(0:(p-1))) # Zero-based index in C++
 89 | 
 90 |         coreLogitLoop(X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb,
 91 |                       beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, psi_vb,
 92 |                       sig2_beta_vb, shuffled_ind)
 93 | 
 94 |         rs_gam <- rowSums(gam_vb)
 95 | 
 96 |       } else if (batch == "x") {  # used internally for testing purposes,
 97 |         # convergence not ensured as ELBO not batch-concave
 98 | 
 99 |         log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam)
100 |         log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam)
101 | 
102 |         for (k in sample(1:d)) {
103 | 
104 |           alpha_vb[, k] <- sig2_alpha_vb[, k] * (crossprod(Y[, k] - 2 * psi_vb[, k] * (mat_z_mu[, k] + mat_x_m1[, k]), Z) +
105 |                                                       colSums(sweep(sweep(Z, 2, alpha_vb[, k], `*`), 1, 2 * psi_vb[, k], `*`) * Z))
106 | 
107 |           mat_z_mu[, k] <- Z %*% alpha_vb[, k]
108 | 
109 |           mu_beta_vb[, k] <- sig2_beta_vb[, k] * (crossprod(Y[, k] -  2 * psi_vb[, k] * (mat_z_mu[, k] + mat_x_m1[, k]), X) +
110 |                                                     colSums(sweep(sweep(X, 2, beta_vb[, k], `*`), 1, 2 * psi_vb[, k], `*`) * X))
111 | 
112 | 
113 |           gam_vb[, k] <- exp(-log_one_plus_exp_(log_1_min_om_vb - log_om_vb -
114 |                                                   log_sig2_inv_vb / 2 -
115 |                                                   mu_beta_vb[, k] ^ 2 / (2 * sig2_beta_vb[, k]) -
116 |                                                   log(sig2_beta_vb[, k]) / 2))
117 | 
118 |           beta_vb[, k] <- mu_beta_vb[, k] * gam_vb[, k]
119 | 
120 |           mat_x_m1[, k] <- X %*% beta_vb[, k]
121 | 
122 |         }
123 | 
124 |         rs_gam <- rowSums(gam_vb)
125 | 
126 |       } else if (batch == "x-y") {  # used internally for testing purposes,
127 |         # convergence not ensured as ELBO not batch-concave
128 | 
129 |         log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam)
130 |         log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam)
131 | 
132 |         alpha_vb <- sig2_alpha_vb * (crossprod(Z, Y - 2 * psi_vb * (mat_z_mu + mat_x_m1)) +
133 |                                           sapply(1:d, function(k)
134 |                                             colSums(sweep(sweep(Z, 2, alpha_vb[, k], `*`), 1, 2 * psi_vb[, k], `*`) * Z)))
135 | 
136 |         mat_z_mu <- Z %*% alpha_vb
137 | 
138 |         mu_beta_vb <- sig2_beta_vb * (crossprod(X, Y -  2 * psi_vb * (mat_z_mu + mat_x_m1)) +
139 |                                         sapply(1:d, function(k)
140 |                                           colSums(sweep(sweep(X, 2, beta_vb[, k], `*`), 1, 2 * psi_vb[, k], `*`) * X)))
141 | 
142 | 
143 |         gam_vb <- exp(-log_one_plus_exp_(log_1_min_om_vb - log_om_vb -
144 |                                            log_sig2_inv_vb / 2 -
145 |                                            mu_beta_vb ^ 2 / (2 * sig2_beta_vb) -
146 |                                            log(sig2_beta_vb) / 2))
147 | 
148 |         beta_vb<- mu_beta_vb * gam_vb
149 | 
150 |         mat_x_m1 <- X %*% beta_vb
151 | 
152 | 
153 |         rs_gam <- rowSums(gam_vb)
154 | 
155 |       } else if (batch == "0"){ # no batch, used only internally
156 | 
157 | 
158 |         for (k in sample(1:d)) {
159 | 
160 |           log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam)
161 |           log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam)
162 | 
163 |           for (i in sample(1:q)) {
164 | 
165 |             mat_z_mu[, k] <- mat_z_mu[, k] - Z[, i] * alpha_vb[i, k]
166 | 
167 |             alpha_vb[i, k] <- sig2_alpha_vb[i, k] *
168 |               crossprod(Y[, k] - 2 * psi_vb[, k] * (mat_z_mu[, k] + mat_x_m1[, k]), Z[, i])
169 | 
170 |             mat_z_mu[, k] <- mat_z_mu[, k] + Z[, i] * alpha_vb[i, k]
171 | 
172 |           }
173 | 
174 |           for (j in sample(1:p)) {
175 | 
176 |             mat_x_m1[, k] <- mat_x_m1[, k] - X[, j] * beta_vb[j, k]
177 | 
178 |             mu_beta_vb[j, k] <- sig2_beta_vb[j, k] *
179 |               crossprod(Y[, k] - 2 * psi_vb[, k] * (mat_z_mu[, k] + mat_x_m1[, k]), X[, j])
180 | 
181 |             gam_vb[j, k] <- exp(-log_one_plus_exp_(log_1_min_om_vb[j] - log_om_vb[j] -
182 |                                                      log_sig2_inv_vb / 2 -
183 |                                                      mu_beta_vb[j, k] ^ 2 / (2 * sig2_beta_vb[j, k]) -
184 |                                                      log(sig2_beta_vb[j, k]) / 2))
185 | 
186 |             beta_vb[j, k] <- mu_beta_vb[j, k] * gam_vb[j, k]
187 | 
188 |             mat_x_m1[, k] <- mat_x_m1[, k] + X[, j] * beta_vb[j, k]
189 | 
190 |           }
191 | 
192 |           rs_gam <- rowSums(gam_vb)
193 | 
194 |         }
195 | 
196 |       } else {
197 | 
198 |         stop ("Batch scheme not defined. Exit.")
199 | 
200 |       }
201 | 
202 |       sum_gam <- sum(rs_gam)
203 | 
204 |       m2_alpha <- update_m2_alpha_(alpha_vb, sig2_alpha_vb)
205 |       m2_beta <- update_m2_beta_(gam_vb, mu_beta_vb, sig2_beta_vb)
206 | 
207 |       a_vb <- update_a_vb(a, rs_gam)
208 |       b_vb <- update_b_vb(b, d, rs_gam)
209 |       om_vb <- a_vb / (a_vb + b_vb)
210 | 
211 |       lb_new <- elbo_logit_(Y, X, Z, a, a_vb, b, b_vb, beta_vb, chi_vb, gam_vb,
212 |                             lambda, nu, phi, phi_vb, psi_vb, sig2_alpha_vb,
213 |                             sig2_beta_vb, sig2_inv_vb, xi, zeta2_inv_vb,
214 |                             alpha_vb, m2_alpha, m2_beta, mat_x_m1,
215 |                             mat_z_mu, sum_gam)
216 | 
217 |       if (verbose & (it == 1 | it %% 5 == 0))
218 |         cat(paste0("ELBO = ", format(lb_new), "\n\n"))
219 | 
220 |       if (debug && lb_new < lb_old)
221 |         stop("ELBO not increasing monotonically. Exit. ")
222 | 
223 |       converged <- (abs(lb_new - lb_old) < tol)
224 | 
225 |     }
226 | 
227 |     if (verbose) {
228 |       if (converged) {
229 |         cat(paste0("Convergence obtained after ", format(it), " iterations. \n",
230 |                   "Optimal marginal log-likelihood variational lower bound ",
231 |                   "(ELBO) = ", format(lb_new), ". \n\n"))
232 |       } else {
233 |         warning("Maximal number of iterations reached before convergence. Exit.")
234 |       }
235 |     }
236 | 
237 |     lb_opt <- lb_new
238 | 
239 |     names_x <- colnames(X)
240 |     names_y <- colnames(Y)
241 |     names_z <- colnames(Z)
242 |     
243 |     rownames(gam_vb) <- rownames(beta_vb) <- names_x
244 |     colnames(gam_vb) <- colnames(beta_vb) <- names_y
245 |     
246 |     names(om_vb) <- names_x
247 |     rownames(alpha_vb) <- names_z
248 |     colnames(alpha_vb) <- names_y
249 |     
250 |     diff_lb <- abs(lb_opt - lb_old)
251 |     
252 |     if (full_output) { # for internal use only
253 |       
254 |       create_named_list_(a, a_vb, b, b_vb, beta_vb, chi_vb, gam_vb, lambda, 
255 |                          mu_beta_vb, nu, om_vb, phi, phi_vb, psi_vb, sig2_alpha_vb, 
256 |                          sig2_beta_vb, sig2_inv_vb, xi, zeta2_inv_vb, alpha_vb, 
257 |                          m2_alpha, m2_beta, mat_x_m1, mat_z_mu, sum_gam, 
258 |                          converged, it, lb_opt, diff_lb)
259 |     } else {
260 | 
261 |       create_named_list_(beta_vb, gam_vb, om_vb, alpha_vb, converged, it, lb_opt, diff_lb)
262 |     }
263 |   })
264 | 
265 | }
266 | 
267 | 
268 | # Internal function which implements the marginal log-likelihood variational
269 | # lower bound (ELBO) corresponding to the `locus_logit_core` algorithm.
270 | #
271 | elbo_logit_ <- function(Y, X, Z, a, a_vb, b, b_vb, beta_vb, chi_vb, gam_vb, 
272 |                         lambda, nu, phi, phi_vb, psi_vb, sig2_alpha_vb, 
273 |                         sig2_beta_vb, sig2_inv_vb, xi, zeta2_inv_vb, alpha_vb,
274 |                         m2_alpha, m2_beta, mat_x_m1, mat_z_mu, sum_gam) {
275 | 
276 |   lambda_vb <- update_lambda_vb_(lambda, sum_gam)
277 |   nu_vb <- update_nu_bin_vb_(nu, m2_beta)
278 | 
279 |   xi_vb <- update_xi_bin_vb_(xi, m2_alpha)
280 | 
281 |   log_sig2_inv_vb <- digamma(lambda_vb) - log(nu_vb)
282 |   log_zeta2_inv_vb <- digamma(phi_vb) - log(xi_vb)
283 |   log_om_vb <- digamma(a_vb) - digamma(a_vb + b_vb)
284 |   log_1_min_om_vb <- digamma(b_vb) - digamma(a_vb + b_vb)
285 | 
286 |   elbo_A <- e_y_logit_(X, Y, Z, chi_vb, beta_vb, m2_alpha, m2_beta, mat_x_m1,
287 |                        mat_z_mu, alpha_vb, psi_vb)
288 | 
289 |   elbo_B <- e_beta_gamma_bin_(gam_vb, log_om_vb, log_1_min_om_vb,
290 |                               log_sig2_inv_vb, m2_beta, sig2_beta_vb, sig2_inv_vb)
291 | 
292 |   elbo_C <- e_sig2_inv_(lambda, lambda_vb, log_sig2_inv_vb, nu, nu_vb, sig2_inv_vb)
293 | 
294 |   elbo_D <- e_omega_(a, a_vb, b, b_vb, log_om_vb, log_1_min_om_vb)
295 | 
296 |   elbo_E <- e_alpha_logit_(m2_alpha, log_zeta2_inv_vb, sig2_alpha_vb, zeta2_inv_vb)
297 | 
298 |   elbo_F <- e_zeta2_inv_(log_zeta2_inv_vb, phi, phi_vb, xi, xi_vb, zeta2_inv_vb)
299 | 
300 | 
301 |   elbo_A + elbo_B + elbo_C + elbo_D + elbo_E + elbo_F
302 | 
303 | }
304 | 
305 | 


--------------------------------------------------------------------------------
/man/locus.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/locus.R
  3 | \name{locus}
  4 | \alias{locus}
  5 | \title{Fit sparse multivariate regression models using variational inference.}
  6 | \usage{
  7 | locus(
  8 |   Y,
  9 |   X,
 10 |   p0_av,
 11 |   Z = NULL,
 12 |   link = "identity",
 13 |   ind_bin = NULL,
 14 |   list_hyper = NULL,
 15 |   list_init = NULL,
 16 |   list_cv = NULL,
 17 |   list_blocks = NULL,
 18 |   list_groups = NULL,
 19 |   list_struct = NULL,
 20 |   user_seed = NULL,
 21 |   tol = 0.1,
 22 |   maxit = 1000,
 23 |   anneal = NULL,
 24 |   save_hyper = FALSE,
 25 |   save_init = FALSE,
 26 |   full_output = FALSE,
 27 |   verbose = TRUE,
 28 |   checkpoint_path = NULL
 29 | )
 30 | }
 31 | \arguments{
 32 | \item{Y}{Response data matrix of dimension n x d, where n is the number of
 33 | samples and d is the number of response variables.}
 34 | 
 35 | \item{X}{Input matrix of dimension n x p, where p is the number of candidate
 36 | predictors. \code{X} cannot contain NAs. No intercept must be supplied.}
 37 | 
 38 | \item{p0_av}{Prior average number of predictors (or groups of predictors if 
 39 | \code{list_groups} is non-\code{NULL}) expected to be included in the 
 40 | model.  Can also be a vector of length p (resp. of length the number of 
 41 | groups) with entry s corresponding to the prior probability that candidate 
 42 | predictor s (resp. group s) is associated with at least one response. Must 
 43 | be \code{NULL} if \code{list_init} and \code{list_hyper} are both 
 44 | non-\code{NULL} or if \code{list_cv} is non-\code{NULL}.}
 45 | 
 46 | \item{Z}{Covariate matrix of dimension n x q, where q is the number of
 47 | covariates. Variables in \code{Z} are not subject to selection. \code{NULL}
 48 | if no covariate. Factor covariates must be supplied after transformation to
 49 | dummy coding. No intercept must be supplied.}
 50 | 
 51 | \item{link}{Response link. Must be "\code{identity}" for linear regression,
 52 | "\code{logit}" for logistic regression, "\code{probit}" for probit
 53 | regression, or "\code{mix}" for a mix of identity and probit link functions
 54 | (in this case, the indices of the binary responses must be gathered in
 55 | argument \code{ind_bin}, see below).}
 56 | 
 57 | \item{ind_bin}{If \code{link = "mix"}, vector of indices corresponding to
 58 | the binary variables in \code{Y}. Must be \code{NULL} if
 59 | \code{link != "mix"}.}
 60 | 
 61 | \item{list_hyper}{An object of class "\code{hyper}" containing the model
 62 | hyperparameters. Must be filled using the \code{\link{set_hyper}}
 63 | function or must be \code{NULL} for default hyperparameters.}
 64 | 
 65 | \item{list_init}{An object of class "\code{init}" containing the initial
 66 | variational parameters. Must be filled using the \code{\link{set_init}}
 67 | function or be \code{NULL} for a default initialization.}
 68 | 
 69 | \item{list_cv}{An object of class "\code{cv}" containing settings for
 70 | choosing the prior average number of predictors expected to be included in
 71 | the model, \code{p0_av}, by cross-validation. Must be filled using the
 72 | \code{\link{set_cv}} function or must be \code{NULL} for no
 73 | cross-validation. If non-\code{NULL}, \code{p0_av}, \code{list_init} and
 74 | \code{list_hyper} must all be \code{NULL}. Cross-validation only available
 75 | for \code{link = "identity"}.}
 76 | 
 77 | \item{list_blocks}{An object of class "\code{blocks}" containing settings for
 78 | parallel inference on a partitioned predictor space. Must be filled using
 79 | the \code{\link{set_blocks}} function or must be \code{NULL} for no
 80 | partitioning.}
 81 | 
 82 | \item{list_groups}{An object of class "\code{groups}" containing settings for
 83 | group selection of candidate predictors. Must be filled using the
 84 | \code{\link{set_groups}} function or must be \code{NULL} for group
 85 | selection.}
 86 | 
 87 | \item{list_struct}{An object of class "\code{struct}" containing settings for
 88 | structure sparsity priors. Must be filled using the
 89 | \code{\link{set_struct}} function or must be \code{NULL} for structured
 90 | selection.}
 91 | 
 92 | \item{user_seed}{Seed set for reproducible default choices of hyperparameters
 93 | (if \code{list_hyper} is \code{NULL}) and initial variational parameters
 94 | (if \code{list_init} is \code{NULL}). Also used at the cross-validation
 95 | stage (if \code{list_cv} is non-\code{NULL}). Default is \code{NULL}, no
 96 | seed set.}
 97 | 
 98 | \item{tol}{Tolerance for the stopping criterion.}
 99 | 
100 | \item{maxit}{Maximum number of iterations allowed.}
101 | 
102 | \item{anneal}{Parameters for annealing scheme. Must be a vector whose first
103 | entry is sets the type of ladder: 1 = geometric spacing, 2 = harmonic
104 | spacing or 3 = linear spacing, the second entry is the initial temperature,
105 | and the third entry is the ladder size. If \code{NULL} (default), no
106 | annealing is performed.}
107 | 
108 | \item{save_hyper}{If \code{TRUE}, the hyperparameters used for the model are
109 | returned.}
110 | 
111 | \item{save_init}{If \code{TRUE}, the initial variational parameters used for
112 | the inference are returned (note that the size of the resulting objects is
113 | likely to be large). Default is \code{FALSE}.}
114 | 
115 | \item{full_output}{If \code{TRUE}, the inferred variational parameters for 
116 | all parameters are returned.}
117 | 
118 | \item{verbose}{If \code{TRUE}, messages are displayed during execution.}
119 | 
120 | \item{checkpoint_path}{Path where to save temporary checkpoint outputs. 
121 | Default is \code{NULL}, for no checkpointing.}
122 | }
123 | \value{
124 | An object of class "\code{vb}" containing the following variational
125 |   estimates and settings:
126 |  \item{gam_vb}{Posterior inclusion probability matrix of dimension p x d.
127 |                Entry (s, t) corresponds to the posterior probability of
128 |                association between candidate predictor s and response t.}
129 |  \item{alpha_vb}{Matrix of dimension q x d whose entries are the posterior
130 |                     mean regression coefficients for the covariates provided
131 |                     in \code{Z} (if \code{link = "logit"},
132 |                     \code{link = "logit"} or
133 |                     \code{link = "mix"} also for the intercept).
134 |                     \code{NULL} if \code{Z} is \code{NULL}.}
135 |  \item{om_vb}{Vector of length p containing the posterior mean of omega.
136 |               Entry s controls the proportion of responses associated with
137 |               candidate predictor s.}
138 |  \item{converged}{A boolean indicating whether the algorithm has converged
139 |                   before reaching \code{maxit} iterations.}
140 |  \item{it}{Final number of iterations.}
141 |  \item{lb_opt}{Optimized variational lower bound for the marginal
142 |                log-likelihood (ELBO).}
143 |  \item{diff_lb}{Difference in ELBO between the last and penultimate
144 |                 iterations. This may be a useful diagnostic information when
145 |                 convergence has not been reached before \code{maxit}.}
146 |  \item{p_star}{Vector of length 1 or p defining the applied sparsity control.}
147 |  \item{rmvd_cst_x, rmvd_cst_z}{Vectors containing the indices of constant
148 |                                variables in \code{X} (resp. \code{Z}) removed
149 |                                prior to the analysis.}
150 |  \item{rmvd_coll_x, rmvd_coll_z}{Vectors containing the indices of variables
151 |                                  in \code{X} (resp. \code{Z}) removed prior
152 |                                  to the analysis because collinear to other
153 |                                  variables. The entry name indicates the
154 |                                  corresponding variable kept in the analysis
155 |                                  (i.e., that causing the collinearity for the
156 |                                  entry in question).}
157 |  \item{list_hyper, list_init}{If \code{save_hyper}, resp. \code{save_init},
158 |                               \code{TRUE}, hyperparameters, resp. initial
159 |                               variational parameters, used for inference are
160 |                               saved as output.}
161 |  \item{group_labels}{If \code{list_groups} is non-\code{NULL}, labels of the
162 |                      groups to which the candidate predictor belong (these
163 |                      labels are gathered after removal of constant and
164 |                      collinear predictors, whose indices are stored in
165 |                      \code{rmvd_cst_x} and \code{rmvd_coll_x}).}
166 |  \item{...}{Other specific outputs are possible depending on the model used.}
167 | }
168 | \description{
169 | Variational approximation procedure fitting sparse multivariate regression
170 | models for combined selection of predictors and associated responses in
171 | high-dimensional set-ups. Dependence across responses linked to the same
172 | predictors is modelled through the model hierarchical structure.
173 | The responses can be purely continuous, purely binary (logit or probit link
174 | fits), or a mix of continuous and binary variables.
175 | }
176 | \details{
177 | The optimization uses efficient block coordinate ascent schemes, for
178 | which convergence is ensured as the objective (elbo) is multiconcave
179 | for the selected blocks, i.e., it is concave in each block of parameters
180 | whose updates are made simultaneously, see Wu et al. (reference Section
181 | below).
182 | 
183 | The continuous response variables in \code{Y} (if any) will be centered
184 | before application of the variational algorithm, and the candidate predictors
185 | and covariates resp. in \code{X} and \code{Z} will be standardized. An
186 | intercept will be added if \code{link} is \code{"logit"}, \code{"probit"} or
187 | \code{"mix"} (do not supply it in \code{X} or \code{Z}).
188 | }
189 | \examples{
190 | seed <- 123; set.seed(seed)
191 | 
192 | ###################
193 | ## Simulate data ##
194 | ###################
195 | 
196 | ## Examples using small problem sizes:
197 | ##
198 | n <- 200; p <- 250; p0 <- 25; d <- 30; d0 <- 25; q <- 3
199 | 
200 | ## Candidate predictors (subject to selection)
201 | ##
202 | # Here we simulate common genetic variants (but any type of candidate
203 | # predictors can be supplied).
204 | # 0 = homozygous, major allele, 1 = heterozygous, 2 = homozygous, minor allele
205 | #
206 | X_act <- matrix(rbinom(n * p0, size = 2, p = 0.25), nrow = n)
207 | X_inact <- matrix(rbinom(n * (p - p0), size = 2, p = 0.25), nrow = n)
208 | 
209 | shuff_x_ind <- sample(p)
210 | X <- cbind(X_act, X_inact)[, shuff_x_ind]
211 | 
212 | bool_x_act <- shuff_x_ind <= p0
213 | 
214 | pat_act <- beta <- matrix(0, nrow = p0, ncol = d0)
215 | pat_act[sample(p0*d0, floor(p0*d0/5))] <- 1
216 | beta[as.logical(pat_act)] <-  rnorm(sum(pat_act))
217 | 
218 | ## Covariates (not subject to selection)
219 | ##
220 | Z <- matrix(rnorm(n * q), nrow = n)
221 | 
222 | alpha <-  matrix(rnorm(q * d), nrow = q)
223 | 
224 | ## Gaussian responses
225 | ##
226 | Y_act <- matrix(rnorm(n * d0, mean = X_act \%*\% beta, sd = 0.5), nrow = n)
227 | Y_inact <- matrix(rnorm(n * (d - d0), sd = 0.5), nrow = n)
228 | shuff_y_ind <- sample(d)
229 | Y <- cbind(Y_act, Y_inact)[, shuff_y_ind] + Z \%*\% alpha
230 | 
231 | ## Binary responses
232 | ##
233 | Y_bin <- ifelse(Y > 0, 1, 0)
234 | 
235 | ########################
236 | ## Infer associations ##
237 | ########################
238 | 
239 | ## Continuous responses
240 | ##
241 | # We take p0_av = p0 (known here); this choice may, in some cases, result in
242 | # (too) conservative variable selections. In practice, it is advised to set
243 | # p0_av as a slightly overestimated guess of p0, or perform cross-validation
244 | # using function `set_cv'.
245 | 
246 | # No covariate
247 | #
248 | vb_g <- locus(Y = Y, X = X, p0_av = p0, link = "identity", user_seed = seed)
249 | 
250 | # With covariates
251 | #
252 | vb_g_z <- locus(Y = Y, X = X, p0_av = p0,  Z = Z, link = "identity",
253 |                 user_seed = seed)
254 | 
255 | ## Binary responses
256 | ##
257 | vb_logit <- locus(Y = Y_bin, X = X, p0_av = p0, Z = Z, link = "logit",
258 |                   user_seed = seed)
259 | 
260 | vb_probit <- locus(Y = Y_bin, X = X, p0_av = p0, Z = Z, link = "probit",
261 |                    user_seed = seed)
262 | 
263 | ## Mix of continuous and binary responses
264 | ##
265 | Y_mix <- cbind(Y, Y_bin)
266 | ind_bin <- (d+1):(2*d)
267 | 
268 | vb_mix <- locus(Y = Y_mix, X = X, p0_av = p0, Z = Z, link = "mix",
269 |                 ind_bin = ind_bin, user_seed = seed)
270 | 
271 | }
272 | \references{
273 | H. Ruffieux, A. C. Davison, J. Hager, I. Irincheeva. Efficient inference for
274 |   genetic association studies with multiple outcomes. Biostatistics, 2017.
275 | 
276 | Y. Xu, and W. Yin. A block coordinate descent method for
277 |   regularized multiconvex optimization with applications to nonnegative
278 |   tensor factorization and completion. SIAM Journal on imaging sciences, 6,
279 |   pp.1758-1789, 2013.
280 | }
281 | \seealso{
282 | \code{\link{set_hyper}}, \code{\link{set_init}},
283 |   \code{\link{set_cv}}, \code{\link{set_blocks}}, \code{\link{set_groups}}
284 |   and \code{\link{set_struct}}.
285 | }
286 | 


--------------------------------------------------------------------------------
/R/update_vb.R:
--------------------------------------------------------------------------------
  1 | # This file is part of the `locus` R package:
  2 | #     https://github.com/hruffieux/locus
  3 | #
  4 | # Internal functions gathering the variational updates for the core algorithms.
  5 | # Besides improving code readability via modular programming, the main purpose
  6 | # is to avoid copy-and-paste programming, as most of these updates (or slightly
  7 | # modified versions) are used more than once in the different core algorithms.
  8 | # For this reason, we choose to create functions for most variational updates,
  9 | # even for those consisting in very basic operations.
 10 | # Note that we don't modularize the body of the core for loops for performance
 11 | # reasons.
 12 | 
 13 | 
 14 | #####################
 15 | ## alpha's updates ##
 16 | #####################
 17 | 
 18 | update_m2_alpha_ <- function(alpha_vb, sig2_alpha_vb, sweep = FALSE) {
 19 | 
 20 |   if(sweep) {
 21 | 
 22 |     sweep(alpha_vb ^ 2, 1, sig2_alpha_vb, `+`)
 23 | 
 24 |   } else {
 25 | 
 26 |     sig2_alpha_vb + alpha_vb ^ 2
 27 |   }
 28 | 
 29 | }
 30 | 
 31 | 
 32 | update_sig2_alpha_vb_ <- function(n, zeta2_inv_vb, tau_vb = NULL, intercept = FALSE, c = 1) {
 33 | 
 34 |   den <- n - 1 + zeta2_inv_vb
 35 | 
 36 |   if (intercept)
 37 |     den[1] <- den[1] + 1 # the first column of Z was not scaled, it is the intercept.
 38 | 
 39 |   if (is.null(tau_vb)) {
 40 | 
 41 |     1 / (c * den)
 42 | 
 43 |   } else {
 44 | 
 45 |     1 / (c * tcrossprod(den, as.matrix(tau_vb)))
 46 | 
 47 |   }
 48 | 
 49 | }
 50 | 
 51 | 
 52 | update_sig2_alpha_logit_vb_ <- function(Z, psi_vb, zeta2_inv_vb) {
 53 | 
 54 |   1 / sweep(2 * crossprod(Z ^ 2, psi_vb), 1, zeta2_inv_vb, `+`)
 55 | 
 56 | }
 57 | 
 58 | 
 59 | update_mat_z_mu_ <- function(Z, alpha_vb) Z %*% alpha_vb
 60 | 
 61 | 
 62 | ####################
 63 | ## beta's updates ##
 64 | ####################
 65 | 
 66 | update_beta_vb_ <- function(gam_vb, mu_beta_vb) gam_vb * mu_beta_vb
 67 | 
 68 | 
 69 | update_g_beta_vb_ <- function(list_mu_beta_vb, gam_vb) {
 70 | 
 71 |   G <- length(list_mu_beta_vb)
 72 | 
 73 |   lapply(1:G, function(g) sweep(list_mu_beta_vb[[g]], 2, gam_vb[g, ], `*`))
 74 | 
 75 | }
 76 | 
 77 | 
 78 | update_m2_beta_ <- function(gam_vb, mu_beta_vb, sig2_beta_vb, sweep = FALSE) {
 79 | 
 80 |   if(sweep) {
 81 | 
 82 |     sweep(mu_beta_vb ^ 2, 2, sig2_beta_vb, `+`) * gam_vb
 83 | 
 84 |   } else {
 85 | 
 86 |     (mu_beta_vb ^ 2 + sig2_beta_vb) * gam_vb
 87 | 
 88 |   }
 89 | 
 90 | }
 91 | 
 92 | 
 93 | update_sig2_beta_vb_ <- function(n, sig2_inv_vb, tau_vb = NULL, c = 1) {
 94 | 
 95 |   if(is.null(tau_vb)) {
 96 | 
 97 |     1 / (c * (n - 1 + sig2_inv_vb))
 98 | 
 99 |   } else {
100 | 
101 |     1 / (c * (n - 1 + sig2_inv_vb) * tau_vb)
102 | 
103 |   }
104 | }
105 | 
106 | 
107 | update_sig2_beta_logit_vb_ <- function(X, psi_vb, sig2_inv_vb) {
108 | 
109 |   1 / (2 * crossprod(X ^ 2, psi_vb) + sig2_inv_vb)
110 | 
111 | }
112 | 
113 | 
114 | update_mat_x_m1_ <- function(X, beta_vb) X %*% beta_vb
115 | 
116 | 
117 | update_g_mat_x_m1_ <- function(list_X, list_beta_vb) {
118 | 
119 |   G <- length(list_X)
120 | 
121 |   Reduce(`+`, lapply(1:G, function(g) list_X[[g]] %*% list_beta_vb[[g]]))
122 | }
123 | 
124 | 
125 | update_g_m1_btb_ <- function(gam_vb, list_mu_beta_vb, list_sig2_beta_star, tau_vb) { ## not list_sig2_beta_star_inv!
126 | 
127 |   d <- length(tau_vb)
128 |   G <- length(list_mu_beta_vb)
129 | 
130 |   lapply(1:G, function(g) {
131 |     gam_vb[g, ]^2 * colSums(list_mu_beta_vb[[g]]^2) + # colSums(A^2) = diag(crossprod(A))
132 |       gam_vb[g, ] * (sum(diag(as.matrix(list_sig2_beta_star[[g]]))) / tau_vb +
133 |                        sapply(1:d, function(k) (1-gam_vb[g, k]) * sum(list_mu_beta_vb[[g]][, k]^2))) # tr(mu_gt mu_gt^T) = sum(mu_gt^2)
134 |   })
135 | 
136 | }
137 | 
138 | 
139 | update_g_m1_btXtXb_ <- function(list_X, gam_vb, list_mu_beta_vb, list_sig2_beta_star, tau_vb) {
140 | 
141 |   d <- length(tau_vb)
142 |   G <- length(list_mu_beta_vb)
143 | 
144 |   lapply(1:G, function(g) {
145 |     gam_vb[g, ]^2 * colSums((list_X[[g]] %*% list_mu_beta_vb[[g]])^2) +
146 |       gam_vb[g, ] * (sum(crossprod(list_X[[g]]) * list_sig2_beta_star[[g]]) / tau_vb +
147 |                        sapply(1:d, function(k) (1-gam_vb[g, k]) * sum(crossprod(list_X[[g]]) * tcrossprod(list_mu_beta_vb[[g]][, k])))) # tr(AB^T) = sum_ij A_ij B_ij
148 |   })
149 | }
150 | 
151 | 
152 | ########################
153 | ## c0 and c's updates ##
154 | ########################
155 | 
156 | update_sig2_c0_vb_ <- function(d, s02, c = 1) 1 / (c * (d + (1/s02)))
157 | 
158 | 
159 | ###################
160 | ## chi's updates ##
161 | ###################
162 | 
163 | update_chi_vb_ <- function(X, Z, beta_vb, m2_beta, mat_x_m1, mat_z_mu, sig2_alpha_vb) {
164 | 
165 |   sqrt(X^2 %*% m2_beta + mat_x_m1^2 - X^2 %*% beta_vb^2 + Z^2 %*% sig2_alpha_vb +
166 |          mat_z_mu^2 + 2 * mat_x_m1 * mat_z_mu)
167 | }
168 | 
169 | 
170 | update_psi_logit_vb_ <- function(chi_vb) {
171 | 
172 |   exp(log(exp(log_sigmoid_(chi_vb)) - 1 / 2) - log(2 * chi_vb))
173 | 
174 | }
175 | 
176 | 
177 | #####################
178 | ## omega's updates ##
179 | #####################
180 | 
181 | a_vb <- update_a_vb <- function(a, rs_gam, c = 1) c * (a + rs_gam) - c + 1
182 | 
183 | 
184 | b_vb <- update_b_vb <- function(b, d, rs_gam, c = 1) c * (b - rs_gam + d) - c + 1
185 | 
186 | 
187 | update_log_om_vb <- function(a, digam_sum, rs_gam, c = 1) digamma(c * (a + rs_gam) - c + 1) - digam_sum
188 | 
189 | 
190 | update_log_1_min_om_vb <- function(b, d, digam_sum, rs_gam, c = 1) digamma(c * (b - rs_gam + d) - c + 1) - digam_sum
191 | 
192 | 
193 | 
194 | #####################
195 | ## sigma's updates ##
196 | #####################
197 | 
198 | update_lambda_vb_ <- function(lambda, sum_gam, c = 1) c * (lambda + sum_gam / 2) - c + 1
199 | 
200 | 
201 | update_g_lambda_vb_ <- function(lambda, g_sizes, rs_gam) lambda + sum(g_sizes * rs_gam) / 2
202 | 
203 | 
204 | update_nu_vb_ <- function(nu, m2_beta, tau_vb, c = 1) c * as.numeric(nu + crossprod(tau_vb, colSums(m2_beta)) / 2)
205 | 
206 | 
207 | update_g_nu_vb_ <- function(nu, list_m1_btb, tau_vb) nu + sum(tau_vb * Reduce(`+`, list_m1_btb))/2
208 | 
209 | 
210 | update_nu_bin_vb_ <- function(nu, m2_beta) nu + sum(m2_beta) / 2
211 | 
212 | 
213 | update_log_sig2_inv_vb_ <- function(lambda_vb, nu_vb) digamma(lambda_vb) - log(nu_vb)
214 | 
215 | 
216 | ###################
217 | ## tau's updates ##
218 | ###################
219 | 
220 | update_eta_vb_ <- function(n, eta, gam_vb, c = 1) c * (eta + n / 2 + colSums(gam_vb) / 2) - c + 1
221 | 
222 | 
223 | update_g_eta_vb_ <- function(n, eta, g_sizes, gam_vb) eta + n / 2 + as.numeric(crossprod(gam_vb, g_sizes)) / 2
224 | 
225 | 
226 | update_eta_z_vb_ <- function(n, q, eta, gam_vb, c = 1) c * (eta + n / 2 + colSums(gam_vb) / 2 + q / 2) - c + 1
227 | 
228 | 
229 | update_kappa_vb_ <- function(Y, kappa, mat_x_m1, beta_vb, m2_beta, sig2_inv_vb, c = 1) {
230 | 
231 |   n <- nrow(Y)
232 | 
233 |   c * (kappa + (colSums(Y^2) - 2 * colSums(Y * mat_x_m1)  +
234 |                   (n - 1 + sig2_inv_vb) * colSums(m2_beta) +
235 |                   colSums(mat_x_m1^2) - (n - 1) * colSums(beta_vb^2))/ 2)
236 | 
237 | }
238 | 
239 | 
240 | update_g_kappa_vb_ <- function(Y, list_X, kappa, list_beta_vb, list_m1_btb,
241 |                                list_m1_btXtXb, mat_x_m1, sig2_inv_vb) {
242 | 
243 |   n <- nrow(Y)
244 |   G <- length(list_beta_vb)
245 | 
246 |   # avoid using do.call() as can trigger node stack overflow
247 |   kappa + (colSums(Y^2) - 2 * colSums(Y * mat_x_m1)  +
248 |              Reduce(`+`, list_m1_btXtXb) +
249 |              sig2_inv_vb * Reduce(`+`, list_m1_btb) +
250 |              colSums(mat_x_m1^2) -
251 |              Reduce(`+`, lapply(1:G, function(g) colSums((list_X[[g]] %*% list_beta_vb[[g]])^2) ))) / 2
252 | }
253 | 
254 | 
255 | update_kappa_z_vb_ <- function(Y, Z, kappa, alpha_vb, beta_vb, m2_alpha,
256 |                                m2_beta, mat_x_m1, mat_z_mu, sig2_inv_vb,
257 |                                zeta2_inv_vb, intercept = FALSE, c = 1) {
258 |   n <- nrow(Y)
259 | 
260 |   kappa_vb <- c * (kappa + (colSums(Y^2) - 2 * colSums(Y * (mat_x_m1 + mat_z_mu))  +
261 |                          (n - 1 + sig2_inv_vb) * colSums(m2_beta) +
262 |                          colSums(mat_x_m1^2) - (n - 1) * colSums(beta_vb^2) +
263 |                          (n - 1) * colSums(m2_alpha) +
264 |                          crossprod(m2_alpha, zeta2_inv_vb) +
265 |                          colSums(mat_z_mu^2) - (n - 1) * colSums(alpha_vb^2) +
266 |                          2 * colSums(mat_x_m1 * mat_z_mu))/ 2)
267 | 
268 |   if (intercept)
269 |     kappa_vb <- kappa_vb + c * (m2_alpha[1, ] - (alpha_vb[1, ])^2) / 2
270 | 
271 |   kappa_vb
272 | }
273 | 
274 | 
275 | update_log_tau_vb_ <- function(eta_vb, kappa_vb) digamma(eta_vb) - log(kappa_vb)
276 | 
277 | 
278 | #####################
279 | ## theta's updates ##
280 | #####################
281 | 
282 | update_theta_vb_ <- function(W, m0, S0_inv, sig2_theta_vb, vec_fac_st,
283 |                                 mat_add = 0, is_mat = FALSE, c = 1) {
284 | 
285 |   if (is.null(vec_fac_st)) {
286 | 
287 |     if (is_mat) {
288 | 
289 |       theta_vb <- c * sig2_theta_vb * (rowSums(W) + S0_inv * m0 - rowSums(mat_add)) 
290 | 
291 |     } else {
292 | 
293 |       theta_vb <- c * sig2_theta_vb * (rowSums(W) + S0_inv * m0 - sum(mat_add)) 
294 | 
295 |     }
296 | 
297 | 
298 |   } else {
299 | 
300 |     if (c != 1)
301 |       stop("Annealing not implemented when Sigma_0 is not the identity matrix.")
302 | 
303 |     bl_ids <- unique(vec_fac_st)
304 |     n_bl <- length(bl_ids)
305 | 
306 |     if (is_mat) {
307 | 
308 |       theta_vb <- unlist(lapply(1:n_bl, function(bl) {
309 |         sig2_theta_vb[[bl]] %*% (rowSums(W[vec_fac_st == bl_ids[bl], , drop = FALSE]) +
310 |                                    S0_inv[[bl]] %*% m0[vec_fac_st == bl_ids[bl]] -
311 |                                    rowSums(mat_add[vec_fac_st == bl_ids[bl], , drop = FALSE]))  # mat_add = sweep(mat_v_mu, 1, theta_vb, `-`)
312 |       }))
313 |     } else {
314 | 
315 |       theta_vb <- unlist(lapply(1:n_bl, function(bl) {
316 |         sig2_theta_vb[[bl]] %*% (rowSums(W[vec_fac_st == bl_ids[bl], , drop = FALSE]) +
317 |                                    S0_inv[[bl]] %*% m0[vec_fac_st == bl_ids[bl]] -
318 |                                    sum(mat_add)) 
319 |       }))
320 |     }
321 | 
322 |   }
323 | 
324 | }
325 | 
326 | 
327 | update_sig2_theta_vb_ <- function(d, p, list_struct, s02, X = NULL, c = 1) {
328 | 
329 |   if (is.null(list_struct)) {
330 | 
331 |     vec_fac_st <- NULL
332 | 
333 |     S0_inv <- 1 / s02 # stands for a diagonal matrix of size p with this value on the (constant) diagonal
334 |     sig2_theta_vb <- as.numeric(update_sig2_c0_vb_(d, s02, c = c)) # idem
335 | 
336 |     vec_sum_log_det_theta <- - p * (log(s02) + log(d + S0_inv))
337 | 
338 |   } else {
339 | 
340 |     if (c != 1)
341 |       stop("Annealing not implemented when Sigma_0 is not the identity matrix.")
342 | 
343 |     if (is.null(X))
344 |       stop("X must be passed to the update_sig2_theta_function.")
345 | 
346 |     vec_fac_st <- list_struct$vec_fac_st
347 |     n_cpus <- list_struct$n_cpus
348 | 
349 |     S0_inv <- parallel::mclapply(unique(vec_fac_st), function(bl) {
350 | 
351 |       corX <- cor(X[, vec_fac_st == bl, drop = FALSE])
352 |       corX <- as.matrix(Matrix::nearPD(corX, corr = TRUE, do2eigen = TRUE)$mat) # regularization in case of non-positive definiteness.
353 | 
354 |       as.matrix(solve(corX) / s02)
355 |     }, mc.cores = n_cpus)
356 | 
357 |     if (is.list(S0_inv)) {
358 | 
359 |       sig2_theta_vb <- parallel::mclapply(S0_inv, function(mat) {
360 |         as.matrix(solve(mat + diag(d, nrow(mat))))
361 |       }, mc.cores = n_cpus)
362 | 
363 |     } else {
364 | 
365 |       sig2_theta_vb <- 1 / (S0_inv + d)
366 | 
367 |     }
368 | 
369 |     vec_sum_log_det_theta <- log_det(S0_inv) + log_det(sig2_theta_vb) # vec_sum_log_det_theta[bl] = log(det(S0_inv_bl)) + log(det(sig2_theta_vb_bl))
370 | 
371 |   }
372 | 
373 |   create_named_list_(S0_inv, sig2_theta_vb, vec_sum_log_det_theta, vec_fac_st)
374 | }
375 | 
376 | 
377 | 
378 | #################
379 | ## W's updates ##
380 | #################
381 | 
382 | update_W_probit_ <- function(Y, mat_z_mu, mat_x_m1) {
383 |   
384 |   mat_z_mu + mat_x_m1 + inv_mills_ratio_matrix_(Y, mat_z_mu + mat_x_m1)
385 | 
386 | }
387 | 
388 | 
389 | update_W_struct_ <- function(gam_vb, theta_vb) {
390 | 
391 |   log_pnorm <- pnorm(theta_vb, log.p = TRUE)
392 |   log_1_pnorm <- pnorm(theta_vb, log.p = TRUE, lower.tail = FALSE)
393 |   
394 |   imr0 <- inv_mills_ratio_(0, theta_vb, log_1_pnorm, log_pnorm)
395 |   
396 |   sweep(sweep(gam_vb, 1, (inv_mills_ratio_(1, theta_vb, log_1_pnorm, log_pnorm) - imr0), `*`),
397 |         1,  theta_vb + imr0, `+`)
398 | 
399 | }
400 | 
401 | 
402 | ####################
403 | ## zeta's updates ##
404 | ####################
405 | 
406 | 
407 | update_phi_z_vb_ <- function(phi, d, c = 1) c * (phi + d / 2) - c + 1
408 | 
409 | 
410 | update_xi_z_vb_ <- function(xi, tau_vb, m2_alpha, c = 1) c * (xi + m2_alpha %*% tau_vb / 2)
411 | 
412 | 
413 | update_xi_bin_vb_ <- function(xi, m2_alpha) xi + rowSums(m2_alpha) / 2
414 | 
415 | 
416 | update_log_zeta2_inv_vb_ <- function(phi_vb, xi_vb) digamma(phi_vb) - log(xi_vb)
417 | 


--------------------------------------------------------------------------------
/R/locus_z_core.R:
--------------------------------------------------------------------------------
  1 | # This file is part of the `locus` R package:
  2 | #     https://github.com/hruffieux/locus
  3 | #
  4 | # Internal core function to call the variational algorithm for identity link,
  5 | # fixed covariates and no external annotation variables.
  6 | # See help of `locus` function for details.
  7 | #
  8 | locus_z_core_ <- function(Y, X, Z, list_hyper, gam_vb, alpha_vb, mu_beta_vb,
  9 |                           sig2_alpha_vb, sig2_beta_vb, tau_vb, tol, maxit, anneal,
 10 |                           verbose, batch = "y", full_output = FALSE, debug = TRUE) {
 11 | 
 12 |   # Y must have been centered, and X and Z, standardized (except the intercept in Z).
 13 | 
 14 |   d <- ncol(Y)
 15 |   n <- nrow(Y)
 16 |   p <- ncol(X)
 17 |   q <- ncol(Z)
 18 |   
 19 |   # Preparing annealing if any
 20 |   #
 21 |   if (is.null(anneal)) {
 22 |     annealing <- FALSE
 23 |     c <- 1
 24 |   } else {
 25 |     annealing <- TRUE
 26 |     ladder <- get_annealing_ladder_(anneal, verbose)
 27 |     c <- ladder[1]
 28 |   }
 29 |   
 30 |   eps <- .Machine$double.eps^0.5
 31 | 
 32 |   with(list_hyper, {  # list_init not used with the with() function to avoid
 33 |                       # copy-on-write for large objects
 34 | 
 35 |     m2_alpha <- update_m2_alpha_(alpha_vb, sig2_alpha_vb)
 36 | 
 37 |     beta_vb <- update_beta_vb_(gam_vb, mu_beta_vb)
 38 |     m2_beta <- update_m2_beta_(gam_vb, mu_beta_vb, sig2_beta_vb, sweep = TRUE)
 39 | 
 40 |     mat_x_m1 <- update_mat_x_m1_(X, beta_vb)
 41 |     mat_z_mu <- update_mat_z_mu_(Z, alpha_vb)
 42 | 
 43 |     rs_gam <- rowSums(gam_vb)
 44 |     sum_gam <- sum(rs_gam)
 45 | 
 46 |     converged <- FALSE
 47 |     lb_new <- -Inf
 48 |     it <- 0
 49 | 
 50 |     while ((!converged) & (it < maxit)) {
 51 | 
 52 |       lb_old <- lb_new
 53 |       it <- it + 1
 54 | 
 55 |       if (verbose & (it == 1 | it %% 5 == 0))
 56 |         cat(paste0("Iteration ", format(it), "... \n"))
 57 | 
 58 |       digam_sum <- digamma(c * (a + b + d) - 2 * c + 2) 
 59 |       
 60 |       # % #
 61 |       phi_vb <- update_phi_z_vb_(phi, d, c = c)
 62 |       xi_vb <- update_xi_z_vb_(xi, tau_vb, m2_alpha, c = c) ###
 63 | 
 64 |       zeta2_inv_vb <- phi_vb / xi_vb
 65 |       # % #
 66 | 
 67 |       # % #
 68 |       lambda_vb <- update_lambda_vb_(lambda, sum_gam, c = c)
 69 |       nu_vb <- update_nu_vb_(nu, m2_beta, tau_vb, c = c)
 70 | 
 71 |       sig2_inv_vb <- lambda_vb / nu_vb
 72 |       # % #
 73 | 
 74 |       # % #
 75 |       eta_vb <- update_eta_z_vb_(n, q, eta, gam_vb, c = c)
 76 |       kappa_vb <- update_kappa_z_vb_(Y, Z, kappa, alpha_vb, beta_vb,
 77 |                                      m2_alpha, m2_beta, mat_x_m1, mat_z_mu,
 78 |                                      sig2_inv_vb, zeta2_inv_vb, c = c)
 79 | 
 80 |       tau_vb <- eta_vb / kappa_vb
 81 |       # % #
 82 | 
 83 |       sig2_alpha_vb <- update_sig2_alpha_vb_(n, zeta2_inv_vb, tau_vb, c = c)
 84 |       sig2_beta_vb <- update_sig2_beta_vb_(n, sig2_inv_vb, tau_vb, c = c)
 85 | 
 86 |       log_tau_vb <- update_log_tau_vb_(eta_vb, kappa_vb)
 87 |       log_sig2_inv_vb <- update_log_sig2_inv_vb_(lambda_vb, nu_vb)
 88 | 
 89 | 
 90 |       # different possible batch-coordinate ascent schemes:
 91 | 
 92 |       if (batch == "y") { # optimal scheme
 93 | 
 94 |         log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam, c = c)
 95 |         log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam, c = c)
 96 | 
 97 |         for (i in sample(1:q)) {
 98 |           mat_z_mu <- mat_z_mu - tcrossprod(Z[, i], alpha_vb[i, ])
 99 | 
100 |           alpha_vb[i, ] <- c * sig2_alpha_vb[i, ] * (tau_vb *
101 |                                                       crossprod(Y  - mat_z_mu - mat_x_m1, Z[, i]))
102 | 
103 |           mat_z_mu <- mat_z_mu + tcrossprod(Z[, i], alpha_vb[i, ])
104 |         }
105 | 
106 |         # C++ Eigen call for expensive updates
107 |         shuffled_ind <- as.numeric(sample(0:(p-1))) # Zero-based index in C++
108 | 
109 |         coreZLoop(X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb,
110 |                   log_tau_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb,
111 |                   sig2_beta_vb, tau_vb, shuffled_ind, c = c)
112 | 
113 |         rs_gam <- rowSums(gam_vb)
114 | 
115 |       } else if (batch == "x") { # used only internally, convergence not ensured
116 | 
117 |         log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam, c = c)
118 |         log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam, c = c)
119 | 
120 |         for (k in sample(1:d)) {
121 | 
122 |           alpha_vb[, k] <- c * sig2_alpha_vb[, k] * tau_vb[k] *
123 |             (crossprod(Y[, k]  - mat_z_mu[, k] - mat_x_m1[, k], Z) +  (n - 1) * alpha_vb[, k])
124 | 
125 |           mat_z_mu[, k] <- Z %*% alpha_vb[, k]
126 | 
127 |           mu_beta_vb[, k] <- c * sig2_beta_vb[k] * tau_vb[k] *
128 |             (crossprod(Y[, k] -  mat_z_mu[, k] - mat_x_m1[, k], X) + (n - 1) * beta_vb[, k])
129 | 
130 | 
131 |           gam_vb[, k] <- exp(-log_one_plus_exp_(c * (log_1_min_om_vb - log_om_vb -
132 |                                                   log_tau_vb[k] / 2 - log_sig2_inv_vb / 2 -
133 |                                                   mu_beta_vb[, k] ^ 2 / (2 * sig2_beta_vb[k]) -
134 |                                                   log(sig2_beta_vb[k]) / 2)))
135 | 
136 |           beta_vb[, k] <- mu_beta_vb[, k] * gam_vb[, k]
137 | 
138 |           mat_x_m1[, k] <- X %*% beta_vb[, k]
139 | 
140 |         }
141 | 
142 |         rs_gam <- rowSums(gam_vb)
143 | 
144 |       } else if (batch == "x-y") { # used only internally, convergence not ensured
145 | 
146 |         if (annealing)
147 |           stop("Annealing not implemented for this scheme. Exit.")
148 |         
149 |         log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam, c = c)
150 |         log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam, c = c)
151 | 
152 |         alpha_vb <- sweep(sig2_alpha_vb * (crossprod(Z, Y - mat_z_mu - mat_x_m1) + (n - 1) * alpha_vb), 2, tau_vb, `*`)
153 | 
154 |         mat_z_mu <- Z %*% alpha_vb
155 | 
156 |         # C++ Eigen call for expensive updates
157 |         coreZBatch(X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb,
158 |                    log_tau_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, sig2_beta_vb, tau_vb)
159 | 
160 |         rs_gam <- rowSums(gam_vb)
161 | 
162 |       } else if (batch == "0"){ # no batch, used only internally
163 | 
164 |         for (k in sample(1:d)) {
165 | 
166 |           log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam, c = c)
167 |           log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam, c = c)
168 | 
169 |           for (i in sample(1:q)) {
170 | 
171 |             mat_z_mu[, k] <- mat_z_mu[, k] - Z[, i] * alpha_vb[i, k]
172 | 
173 |             alpha_vb[i, k] <- c * sig2_alpha_vb[i, k] * tau_vb[k] *
174 |               crossprod(Z[, i], Y[,k]  - mat_z_mu[, k] - mat_x_m1[, k])
175 | 
176 |             mat_z_mu[, k] <- mat_z_mu[, k] + Z[, i] * alpha_vb[i, k]
177 |           }
178 | 
179 |           for (j in sample(1:p)) {
180 | 
181 |             mat_x_m1[, k] <- mat_x_m1[, k] - X[, j] * beta_vb[j, k]
182 | 
183 |             mu_beta_vb[j, k] <- c * sig2_beta_vb[k] * tau_vb[k] *
184 |               crossprod(Y[,k] - mat_x_m1[, k] - mat_z_mu[, k], X[, j])
185 | 
186 |             gam_vb[j, k] <- exp(-log_one_plus_exp_(c * (log_1_min_om_vb[j] - log_om_vb[j] -
187 |                                                      log_tau_vb[k] / 2 - log_sig2_inv_vb / 2 -
188 |                                                      mu_beta_vb[j, k] ^ 2 / (2 * sig2_beta_vb[k]) -
189 |                                                      log(sig2_beta_vb[k]) / 2)))
190 | 
191 |             beta_vb[j, k] <- mu_beta_vb[j, k] * gam_vb[j, k]
192 | 
193 |             mat_x_m1[, k] <- mat_x_m1[, k] + X[, j] * beta_vb[j, k]
194 | 
195 |           }
196 | 
197 |           rs_gam <- rowSums(gam_vb)
198 | 
199 |         }
200 | 
201 |       } else {
202 | 
203 |         stop ("Batch scheme not defined. Exit.")
204 | 
205 |       }
206 | 
207 |       m2_alpha <- update_m2_alpha_(alpha_vb, sig2_alpha_vb)
208 |       m2_beta <- update_m2_beta_(gam_vb, mu_beta_vb, sig2_beta_vb, sweep = TRUE)
209 | 
210 |       a_vb <- update_a_vb(a, rs_gam, c = c)
211 |       b_vb <- update_b_vb(b, d, rs_gam, c = c)
212 |       om_vb <- a_vb / (a_vb + b_vb)
213 | 
214 |       sum_gam <- sum(rs_gam)
215 | 
216 |       
217 |       if (annealing) {
218 |         
219 |         if (verbose & (it == 1 | it %% 5 == 0))
220 |           cat(paste0("Temperature = ", format(1 / c, digits = 4), "\n\n"))
221 |         
222 |         c <- ifelse(it < length(ladder), ladder[it + 1], 1)
223 |         
224 |         if (isTRUE(all.equal(c, 1))) {
225 |           
226 |           annealing <- FALSE
227 |           
228 |           if (verbose)
229 |             cat("** Exiting annealing mode. **\n\n")
230 |         }
231 |         
232 |       } else {
233 |         
234 |         lb_new <- elbo_z_(Y, Z, a, a_vb, b, b_vb, beta_vb, eta, gam_vb, kappa, lambda,
235 |                           alpha_vb, nu, phi, phi_vb, sig2_alpha_vb,
236 |                           sig2_beta_vb, sig2_inv_vb, tau_vb, xi, zeta2_inv_vb,
237 |                           m2_alpha, m2_beta, mat_x_m1, mat_z_mu, sum_gam)
238 |   
239 |         if (verbose & (it == 1 | it %% 5 == 0))
240 |           cat(paste0("ELBO = ", format(lb_new), "\n\n"))
241 |         
242 |         if (debug && lb_new + eps < lb_old)
243 |           stop("ELBO not increasing monotonically. Exit. ")
244 |         
245 |         converged <- (abs(lb_new - lb_old) < tol)
246 | 
247 |       }
248 |       
249 |     }
250 | 
251 |     if (verbose) {
252 |       if (converged) {
253 |         cat(paste0("Convergence obtained after ", format(it), " iterations. \n",
254 |                   "Optimal marginal log-likelihood variational lower bound ",
255 |                   "(ELBO) = ", format(lb_new), ". \n\n"))
256 |       } else {
257 |         warning("Maximal number of iterations reached before convergence. Exit.")
258 |       }
259 |     }
260 | 
261 |     lb_opt <- lb_new
262 | 
263 |     names_x <- colnames(X)
264 |     names_y <- colnames(Y)
265 |     names_z <- colnames(Z)
266 |     
267 |     rownames(gam_vb) <- rownames(beta_vb) <- names_x
268 |     colnames(gam_vb) <- colnames(beta_vb) <- names_y
269 |     
270 |     names(om_vb) <- names_x
271 |     rownames(alpha_vb) <- names_z
272 |     colnames(alpha_vb) <- names_y
273 |     
274 |     diff_lb <- abs(lb_opt - lb_old)
275 |     
276 |     if (full_output) { # for internal use only
277 |       
278 |       create_named_list_(a, a_vb, b, b_vb, beta_vb, eta, gam_vb, kappa, lambda,
279 |                          alpha_vb, mu_beta_vb, nu, om_vb, phi, phi_vb, 
280 |                          sig2_alpha_vb, sig2_beta_vb, sig2_inv_vb, tau_vb, xi, 
281 |                          zeta2_inv_vb, m2_alpha, m2_beta, sum_gam, converged, 
282 |                          it, lb_opt, diff_lb, annealing)
283 |     } else {
284 | 
285 |       create_named_list_(beta_vb, gam_vb, om_vb, alpha_vb, converged, it, lb_opt, diff_lb, annealing)
286 |     }
287 |   })
288 | }
289 | 
290 | 
291 | 
292 | # Internal function which implements the marginal log-likelihood variational
293 | # lower bound (ELBO) corresponding to the `locus_z_core` algorithm.
294 | #
295 | elbo_z_ <- function(Y, Z, a, a_vb, b, b_vb, beta_vb, eta, gam_vb, kappa, lambda,
296 |                     alpha_vb, nu, phi, phi_vb, sig2_alpha_vb, sig2_beta_vb,
297 |                     sig2_inv_vb, tau_vb, xi, zeta2_inv_vb, m2_alpha, m2_beta, 
298 |                     mat_x_m1, mat_z_mu, sum_gam) {
299 | 
300 | 
301 |   n <- nrow(Y)
302 |   q <- ncol(Z)
303 | 
304 |   xi_vb <- update_xi_z_vb_(xi, tau_vb, m2_alpha)
305 | 
306 |   eta_vb <- update_eta_z_vb_(n, q, eta, gam_vb)
307 | 
308 |   kappa_vb <- update_kappa_z_vb_(Y, Z, kappa, alpha_vb, beta_vb, m2_alpha,
309 |                                  m2_beta, mat_x_m1, mat_z_mu, sig2_inv_vb,
310 |                                  zeta2_inv_vb)
311 | 
312 |   lambda_vb <- update_lambda_vb_(lambda, sum_gam)
313 |   nu_vb <- update_nu_vb_(nu, m2_beta, tau_vb)
314 | 
315 |   log_tau_vb <- digamma(eta_vb) - log(kappa_vb)
316 |   log_zeta2_inv_vb <- digamma(phi_vb) - log(xi_vb)
317 |   log_sig2_inv_vb <- digamma(lambda_vb) - log(nu_vb)
318 |   log_om_vb <- digamma(a_vb) - digamma(a_vb + b_vb)
319 |   log_1_min_om_vb <- digamma(b_vb) - digamma(a_vb + b_vb)
320 | 
321 | 
322 |   elbo_A <- e_y_(n, kappa, kappa_vb, log_tau_vb, m2_beta, sig2_inv_vb, tau_vb,
323 |                 m2_alpha, zeta2_inv_vb)
324 | 
325 |   elbo_B <- e_beta_gamma_(gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb,
326 |                          log_tau_vb, m2_beta, sig2_beta_vb, sig2_inv_vb, tau_vb)
327 | 
328 |   elbo_C <- e_tau_(eta, eta_vb, kappa, kappa_vb, log_tau_vb, tau_vb)
329 | 
330 |   elbo_D <- e_sig2_inv_(lambda, lambda_vb, log_sig2_inv_vb, nu, nu_vb, sig2_inv_vb)
331 | 
332 |   elbo_E <- e_omega_(a, a_vb, b, b_vb, log_om_vb, log_1_min_om_vb)
333 | 
334 |   elbo_F <- e_alpha_(m2_alpha, log_tau_vb, log_zeta2_inv_vb, sig2_alpha_vb,
335 |                        tau_vb, zeta2_inv_vb)
336 | 
337 |   elbo_G <- e_zeta2_inv_(log_zeta2_inv_vb, phi, phi_vb, xi, xi_vb, zeta2_inv_vb)
338 | 
339 | 
340 |   elbo_A + elbo_B + elbo_C + elbo_D + elbo_E + elbo_F + elbo_G
341 | }
342 | 
343 | 


--------------------------------------------------------------------------------
/R/locus_mix_core.R:
--------------------------------------------------------------------------------
  1 | # This file is part of the `locus` R package:
  2 | #     https://github.com/hruffieux/locus
  3 | #
  4 | # Internal core function to call the variational algorithm for identity-probit link,
  5 | # optional fixed covariates and no external annotation variables.
  6 | # See help of `locus` function for details.
  7 | #
  8 | locus_mix_core_ <- function(Y, X, Z, ind_bin, list_hyper, gam_vb, alpha_vb,
  9 |                             mu_beta_vb, sig2_alpha_vb, sig2_beta_vb, tau_vb,
 10 |                             tol, maxit, verbose, batch = "y",
 11 |                             full_output = FALSE, debug = FALSE) {
 12 | 
 13 |   # Y must have its continuous variables centered,
 14 |   # and X and Z must have been standardized (except intercept in Z).
 15 | 
 16 |   d <- ncol(Y)
 17 |   n <- nrow(Y)
 18 |   p <- ncol(X)
 19 |   q <- ncol(Z)
 20 | 
 21 |   W <- Y
 22 |   Y_bin <- Y[, ind_bin, drop = FALSE]
 23 |   Y_cont <- Y[, -ind_bin, drop = FALSE]
 24 |   rm(Y)
 25 | 
 26 |   with(list_hyper, {  # list_init not used with the with() function to avoid
 27 |     # copy-on-write for large objects
 28 |     m2_alpha <- update_m2_alpha_(alpha_vb, sig2_alpha_vb)
 29 | 
 30 |     beta_vb <- update_beta_vb_(gam_vb, mu_beta_vb)
 31 |     m2_beta <- update_m2_beta_(gam_vb, mu_beta_vb, sig2_beta_vb, sweep = TRUE)
 32 | 
 33 |     mat_x_m1 <- update_mat_x_m1_(X, beta_vb)
 34 |     mat_z_mu <- update_mat_z_mu_(Z, alpha_vb)
 35 | 
 36 |     # no drop = FALSE for W, as replacement not allowed in this case
 37 |     W[, ind_bin] <- update_W_probit_(Y_bin,
 38 |                                      mat_z_mu[, ind_bin, drop = FALSE],
 39 |                                      mat_x_m1[, ind_bin, drop = FALSE])
 40 | 
 41 |     phi_vb <- update_phi_z_vb_(phi, d)
 42 | 
 43 |     rs_gam <- rowSums(gam_vb)
 44 |     sum_gam <- sum(rs_gam)
 45 |     digam_sum <- digamma(a + b + d)
 46 | 
 47 |     log_tau_vb <- rep(0, d)
 48 | 
 49 |     converged <- FALSE
 50 |     lb_new <- -Inf
 51 |     it <- 0
 52 | 
 53 |     while ((!converged) & (it < maxit)) {
 54 | 
 55 |       lb_old <- lb_new
 56 |       it <- it + 1
 57 | 
 58 |       if (verbose & (it == 1 | it %% 5 == 0))
 59 |         cat(paste0("Iteration ", format(it), "... \n"))
 60 | 
 61 |       # % #
 62 |       xi_vb <- update_xi_z_vb_(xi, tau_vb, m2_alpha)
 63 | 
 64 |       zeta2_inv_vb <- phi_vb / xi_vb
 65 |       # % #
 66 | 
 67 |       # % #
 68 |       lambda_vb <- update_lambda_vb_(lambda, sum_gam)
 69 |       nu_vb <- update_nu_vb_(nu, m2_beta, tau_vb)
 70 | 
 71 |       sig2_inv_vb <- lambda_vb / nu_vb
 72 |       # % #
 73 | 
 74 |       # % #
 75 |       eta_vb <- update_eta_z_vb_(n, q, eta, gam_vb[, -ind_bin, drop = FALSE])
 76 | 
 77 |       kappa_vb <- update_kappa_z_vb_(Y_cont, Z, kappa,
 78 |                                      alpha_vb[, -ind_bin, drop = FALSE],
 79 |                                      beta_vb[, -ind_bin, drop = FALSE],
 80 |                                      m2_alpha[, -ind_bin, drop = FALSE],
 81 |                                      m2_beta[, -ind_bin, drop = FALSE],
 82 |                                      mat_x_m1[, -ind_bin, drop = FALSE],
 83 |                                      mat_z_mu[, -ind_bin, drop = FALSE],
 84 |                                      sig2_inv_vb, zeta2_inv_vb, intercept = TRUE)
 85 | 
 86 |       tau_vb[-ind_bin] <- eta_vb / kappa_vb
 87 |       # % #
 88 | 
 89 |       sig2_alpha_vb <- update_sig2_alpha_vb_(n, zeta2_inv_vb, tau_vb, intercept = TRUE)
 90 |       sig2_beta_vb <- update_sig2_beta_vb_(n, sig2_inv_vb, tau_vb)
 91 | 
 92 |       log_tau_vb[-ind_bin] <- update_log_tau_vb_(eta_vb, kappa_vb)
 93 |       log_sig2_inv_vb <- update_log_sig2_inv_vb_(lambda_vb, nu_vb)
 94 | 
 95 | 
 96 |       # different possible batch-coordinate ascent schemes:
 97 | 
 98 |       if (batch == "y") { # optimal scheme
 99 | 
100 |         log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam)
101 |         log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam)
102 | 
103 |         for (i in sample(1:q)) {
104 |           mat_z_mu <- mat_z_mu - tcrossprod(Z[, i], alpha_vb[i, ])
105 | 
106 |           alpha_vb[i, ] <- sig2_alpha_vb[i, ] * (tau_vb *
107 |                                                       crossprod(W  - mat_z_mu - mat_x_m1, Z[, i]))
108 | 
109 |           mat_z_mu <- mat_z_mu + tcrossprod(Z[, i], alpha_vb[i, ])
110 |         }
111 | 
112 |         # C++ Eigen call for expensive updates
113 |         shuffled_ind <- as.numeric(sample(0:(p-1))) # Zero-based index in C++
114 | 
115 |         coreZLoop(X, W, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb,
116 |                   log_tau_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb,
117 |                   sig2_beta_vb, tau_vb, shuffled_ind)
118 | 
119 |         rs_gam <- rowSums(gam_vb)
120 | 
121 |       } else if (batch == "x") { # used internally for testing purposes,
122 |         # convergence not ensured as ELBO not batch-concave
123 | 
124 |         log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam)
125 |         log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam)
126 | 
127 |         for (k in sample(1:d)) {
128 | 
129 |           alpha_vb[, k] <- sig2_alpha_vb[, k] * tau_vb[k] *
130 |             (crossprod(W[, k]  - mat_z_mu[, k] - mat_x_m1[, k], Z) +  (n - 1) * alpha_vb[, k])
131 | 
132 |           alpha_vb[1, k] <- alpha_vb[1, k] + sig2_alpha_vb[1, k] * tau_vb[k] * alpha_vb[1, k]
133 | 
134 |           mat_z_mu[, k] <- Z %*% alpha_vb[, k]
135 | 
136 |           mu_beta_vb[, k] <- sig2_beta_vb[k] * tau_vb[k] * (crossprod(W[, k] -  mat_z_mu[, k] - mat_x_m1[, k], X) +
137 |                                                               (n - 1) * beta_vb[, k])
138 | 
139 | 
140 |           gam_vb[, k] <- exp(-log_one_plus_exp_(log_1_min_om_vb - log_om_vb -
141 |                                                   log_tau_vb[k] / 2 - log_sig2_inv_vb / 2 -
142 |                                                   mu_beta_vb[, k] ^ 2 / (2 * sig2_beta_vb[k]) -
143 |                                                   log(sig2_beta_vb[k]) / 2))
144 | 
145 |           beta_vb[, k] <- mu_beta_vb[, k] * gam_vb[, k]
146 | 
147 |           mat_x_m1[, k] <- X %*% beta_vb[, k]
148 | 
149 |         }
150 | 
151 |         rs_gam <- rowSums(gam_vb)
152 | 
153 |       } else if (batch == "x-y") { # used only internally, convergence not ensured
154 | 
155 |         log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam)
156 |         log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam)
157 | 
158 |         alpha_vb <- sweep(sig2_alpha_vb * (crossprod(Z, W - mat_z_mu - mat_x_m1) + (n - 1) * alpha_vb), 2, tau_vb, `*`)
159 | 
160 |         alpha_vb[1, ] <- alpha_vb[1, ] + sig2_alpha_vb[1, ] * tau_vb * alpha_vb[1, ]
161 | 
162 |         mat_z_mu <- Z %*% alpha_vb
163 | 
164 |         # C++ Eigen call for expensive updates
165 |         coreZBatch(X, W, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb,
166 |                    log_tau_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb,
167 |                    sig2_beta_vb, tau_vb)
168 | 
169 |         rs_gam <- rowSums(gam_vb)
170 | 
171 |       } else if (batch == "0"){ # no batch, used only internally
172 | 
173 |         for (k in sample(1:d)) {
174 | 
175 |           log_om_vb <- update_log_om_vb(a, digam_sum, rs_gam)
176 |           log_1_min_om_vb <- update_log_1_min_om_vb(b, d, digam_sum, rs_gam)
177 | 
178 |           for (i in sample(1:q)) {
179 | 
180 |             mat_z_mu[, k] <- mat_z_mu[, k] - Z[, i] * alpha_vb[i, k]
181 | 
182 |             alpha_vb[i, k] <- sig2_alpha_vb[i, k] * tau_vb[k] *
183 |               crossprod(W[, k] - mat_z_mu[, k] - mat_x_m1[, k], Z[, i])
184 | 
185 |             mat_z_mu[, k] <- mat_z_mu[, k] + Z[, i] * alpha_vb[i, k]
186 |           }
187 | 
188 |           for (j in sample(1:p)) {
189 | 
190 |             mat_x_m1[, k] <- mat_x_m1[, k] - X[, j] * beta_vb[j, k]
191 | 
192 |             mu_beta_vb[j, k] <- sig2_beta_vb[k] * tau_vb[k] *
193 |               crossprod(W[, k] - mat_x_m1[, k] - mat_z_mu[, k], X[, j])
194 | 
195 |             gam_vb[j, k] <- exp(-log_one_plus_exp_(log_1_min_om_vb[j] - log_om_vb[j] -
196 |                                                      log_tau_vb[k] / 2 - log_sig2_inv_vb / 2 -
197 |                                                      mu_beta_vb[j, k] ^ 2 / (2 * sig2_beta_vb[k]) -
198 |                                                      log(sig2_beta_vb[k]) / 2))
199 | 
200 |             beta_vb[j, k] <- mu_beta_vb[j, k] * gam_vb[j, k]
201 | 
202 |             mat_x_m1[, k] <- mat_x_m1[, k] + X[, j] * beta_vb[j, k]
203 | 
204 |           }
205 | 
206 |           rs_gam <- rowSums(gam_vb)
207 | 
208 |         }
209 | 
210 |       } else {
211 | 
212 |         stop ("Batch scheme not defined. Exit.")
213 | 
214 |       }
215 | 
216 |       m2_alpha <- update_m2_alpha_(alpha_vb, sig2_alpha_vb)
217 |       m2_beta <- update_m2_beta_(gam_vb, mu_beta_vb, sig2_beta_vb, sweep = TRUE)
218 | 
219 |       W[, ind_bin] <- update_W_probit_(Y_bin,
220 |                                        mat_z_mu[, ind_bin, drop = FALSE],
221 |                                        mat_x_m1[, ind_bin, drop = FALSE])
222 | 
223 |       a_vb <- update_a_vb(a, rs_gam)
224 |       b_vb <- update_b_vb(b, d, rs_gam)
225 |       om_vb <- a_vb / (a_vb + b_vb)
226 | 
227 |       sum_gam <- sum(rs_gam)
228 | 
229 |       lb_new <- elbo_mix_(Y_bin, Y_cont, ind_bin, X, Z, a, a_vb, b, b_vb, 
230 |                           beta_vb, eta, gam_vb, kappa, lambda, alpha_vb, nu, 
231 |                           phi, phi_vb, sig2_alpha_vb, sig2_beta_vb, sig2_inv_vb, 
232 |                           tau_vb, log_tau_vb, xi, zeta2_inv_vb, m2_alpha, 
233 |                           m2_beta, mat_x_m1, mat_z_mu, sum_gam)
234 | 
235 |       if (verbose & (it == 1 | it %% 5 == 0))
236 |         cat(paste0("ELBO = ", format(lb_new), "\n\n"))
237 | 
238 |       if (debug && lb_new < lb_old)
239 |         stop("ELBO not increasing monotonically. Exit. ")
240 | 
241 |       converged <- (abs(lb_new-lb_old) < tol)
242 | 
243 |     }
244 | 
245 |     if (verbose) {
246 |       if (converged) {
247 |         cat(paste0("Convergence obtained after ", format(it), " iterations. \n",
248 |                   "Optimal marginal log-likelihood variational lower bound ",
249 |                   "(ELBO) = ", format(lb_new), ". \n\n"))
250 |       } else {
251 |         warning("Maximal number of iterations reached before convergence. Exit.")
252 |       }
253 |     }
254 | 
255 |     lb_opt <- lb_new
256 | 
257 |     names_x <- colnames(X)
258 |     names_y <- colnames(W)
259 |     names_z <- colnames(Z)
260 |     
261 |     rownames(gam_vb) <- rownames(beta_vb) <- names_x
262 |     colnames(gam_vb) <- colnames(beta_vb) <- names_y
263 |     names(om_vb) <- names_x
264 |     rownames(alpha_vb) <- names_z
265 |     colnames(alpha_vb) <- names_y
266 |     
267 |     diff_lb <- abs(lb_opt - lb_old)
268 |     
269 |     if (full_output) { # for internal use only
270 |       
271 |       create_named_list_(ind_bin, a, a_vb, b, b_vb, beta_vb, eta, gam_vb, kappa, 
272 |                          lambda, alpha_vb, mu_beta_vb, nu, om_vb, phi, phi_vb, 
273 |                          sig2_alpha_vb, sig2_beta_vb, sig2_inv_vb, tau_vb, 
274 |                          log_tau_vb, xi, zeta2_inv_vb, m2_alpha, m2_beta, 
275 |                          sum_gam, converged, it, lb_opt, diff_lb)
276 |     } else {
277 | 
278 |       create_named_list_(beta_vb, gam_vb, om_vb, alpha_vb, converged, it, lb_opt, diff_lb)
279 |     }
280 |   })
281 | }
282 | 
283 | 
284 | # Internal function which implements the marginal log-likelihood variational
285 | # lower bound (ELBO) corresponding to the `locus_mix_core` algorithm.
286 | #
287 | elbo_mix_ <- function(Y_bin, Y_cont, ind_bin, X, Z, a, a_vb, b, b_vb, beta_vb, 
288 |                       eta, gam_vb, kappa, lambda, alpha_vb, nu, phi, phi_vb,
289 |                       sig2_alpha_vb, sig2_beta_vb, sig2_inv_vb, tau_vb,
290 |                       log_tau_vb, xi, zeta2_inv_vb, m2_alpha, m2_beta,
291 |                       mat_x_m1, mat_z_mu, sum_gam) {
292 | 
293 |   n <- nrow(Z)
294 |   q <- ncol(Z)
295 | 
296 |   xi_vb <- update_xi_z_vb_(xi, tau_vb, m2_alpha)
297 | 
298 |   eta_vb <- update_eta_z_vb_(n, q, eta, gam_vb[, -ind_bin, drop = FALSE])
299 | 
300 | 
301 |   kappa_vb <- update_kappa_z_vb_(Y_cont, Z, kappa,
302 |                                  alpha_vb[, -ind_bin, drop = FALSE],
303 |                                  beta_vb[, -ind_bin, drop = FALSE],
304 |                                  m2_alpha[, -ind_bin, drop = FALSE],
305 |                                  m2_beta[, -ind_bin, drop = FALSE],
306 |                                  mat_x_m1[, -ind_bin, drop = FALSE],
307 |                                  mat_z_mu[, -ind_bin, drop = FALSE],
308 |                                  sig2_inv_vb, zeta2_inv_vb, intercept = TRUE)
309 | 
310 |   lambda_vb <- update_lambda_vb_(lambda, sum_gam)
311 |   nu_vb <- update_nu_vb_(nu, m2_beta, tau_vb)
312 | 
313 |   log_tau_vb[-ind_bin] <- digamma(eta_vb) - log(kappa_vb)
314 |   log_zeta2_inv_vb <- digamma(phi_vb) - log(xi_vb)
315 |   log_sig2_inv_vb <- digamma(lambda_vb) - log(nu_vb)
316 |   log_om_vb <- digamma(a_vb) - digamma(a_vb + b_vb)
317 |   log_1_min_om_vb <- digamma(b_vb) - digamma(a_vb + b_vb)
318 | 
319 | 
320 |   elbo_A_cont <- e_y_(n, kappa, kappa_vb, log_tau_vb[-ind_bin],
321 |                       m2_beta[, -ind_bin, drop = FALSE], sig2_inv_vb,
322 |                       tau_vb[-ind_bin], m2_alpha[, -ind_bin, drop = FALSE],
323 |                       zeta2_inv_vb)
324 | 
325 |   elbo_A_bin <- e_y_probit_(X, Y_bin, Z, beta_vb[, ind_bin, drop = FALSE],
326 |                             m2_beta[, ind_bin, drop = FALSE],
327 |                             mat_x_m1[, ind_bin, drop = FALSE],
328 |                             mat_z_mu[, ind_bin, drop = FALSE],
329 |                             sig2_alpha_vb[, ind_bin, drop = FALSE], sweep = FALSE)
330 | 
331 | 
332 |   elbo_B <- e_beta_gamma_(gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb,
333 |                           log_tau_vb, m2_beta, sig2_beta_vb, sig2_inv_vb, tau_vb)
334 | 
335 |   elbo_C <- e_tau_(eta, eta_vb, kappa, kappa_vb, log_tau_vb[-ind_bin], tau_vb[-ind_bin])
336 | 
337 |   elbo_D <- e_sig2_inv_(lambda, lambda_vb, log_sig2_inv_vb, nu, nu_vb, sig2_inv_vb)
338 | 
339 |   elbo_E <- e_omega_(a, a_vb, b, b_vb, log_om_vb, log_1_min_om_vb)
340 | 
341 |   elbo_F <- e_alpha_(m2_alpha, log_tau_vb, log_zeta2_inv_vb, sig2_alpha_vb,
342 |                      tau_vb, zeta2_inv_vb)
343 | 
344 |   elbo_G <- e_zeta2_inv_(log_zeta2_inv_vb, phi, phi_vb, xi, xi_vb, zeta2_inv_vb)
345 | 
346 |   elbo_A_cont + elbo_A_bin + elbo_B + elbo_C + elbo_D + elbo_E + elbo_F + elbo_G
347 | 
348 | }
349 | 
350 | 


--------------------------------------------------------------------------------
/R/cross_validate.R:
--------------------------------------------------------------------------------
  1 | # This file is part of the `locus` R package:
  2 | #     https://github.com/hruffieux/locus
  3 | 
  4 | #' Gather settings for the cross-validation procedure used in \code{locus}.
  5 | #'
  6 | #' The cross-validation procedure uses the variational lower bound as objective
  7 | #' function and is used to select the prior average number of predictors
  8 | #' \code{p0_av} expected to be included in the model. \code{p0_av} is used to
  9 | #' set the model hyperparameters and ensure sparse predictor selections.
 10 | #'
 11 | #' This cross-validation procedure is available only for
 12 | #' \code{link = "identity"}.
 13 | #'
 14 | #' @param n Number of samples.
 15 | #' @param p Number of candidate predictors.
 16 | #' @param n_folds Number of number of folds. Large folds are not recommended for
 17 | #'   large datasets as the procedure may become computationally expensive. Must
 18 | #'   be greater than 2 and smaller than the number of samples.
 19 | #' @param size_p0_av_grid Number of possible values of p0_av to be compared.
 20 | #'   Large numbers are not recommended for large datasets as the procedure may
 21 | #'   become computationally expensive.
 22 | #' @param n_cpus Number of CPUs to be used for the cross-validation procedure.
 23 | #'   If large, one should ensure that enough RAM will be available for parallel
 24 | #'   execution. Set to 1 for serial execution.
 25 | #' @param tol_cv Tolerance for the variational algorithm stopping criterion used
 26 | #'   within the cross-validation procedure.
 27 | #' @param maxit_cv Maximum number of iterations allowed for the variational
 28 | #'   algorithm used within the cross-validation procedure.
 29 | #' @param verbose If \code{TRUE}, messages are displayed when calling
 30 | #'   \code{set_cv}.
 31 | #'
 32 | #' @return An object of class "\code{cv}" preparing the settings for the
 33 | #'   cross-validation settings in a form that can be passed to the
 34 | #'   \code{\link{locus}} function.
 35 | #'
 36 | #' @examples
 37 | #' seed <- 123; set.seed(seed)
 38 | #'
 39 | #' ###################
 40 | #' ## Simulate data ##
 41 | #' ###################
 42 | #'
 43 | #' ## Example using small problem sizes:
 44 | #' ##
 45 | #' n <- 150; p <- 200; p0 <- 50; d <- 25; d0 <- 20
 46 | #'
 47 | #' ## Candidate predictors (subject to selection)
 48 | #' ##
 49 | #' # Here we simulate common genetic variants (but any type of candidate
 50 | #' # predictors can be supplied).
 51 | #' # 0 = homozygous, major allele, 1 = heterozygous, 2 = homozygous, minor allele
 52 | #' #
 53 | #' X_act <- matrix(rbinom(n * p0, size = 2, p = 0.25), nrow = n)
 54 | #' X_inact <- matrix(rbinom(n * (p - p0), size = 2, p = 0.25), nrow = n)
 55 | #'
 56 | #' shuff_x_ind <- sample(p)
 57 | #' X <- cbind(X_act, X_inact)[, shuff_x_ind]
 58 | #'
 59 | #' bool_x_act <- shuff_x_ind <= p0
 60 | #'
 61 | #' pat_act <- beta <- matrix(0, nrow = p0, ncol = d0)
 62 | #' pat_act[sample(p0*d0, floor(p0*d0/5))] <- 1
 63 | #' beta[as.logical(pat_act)] <-  rnorm(sum(pat_act))
 64 | #'
 65 | #' ## Gaussian responses
 66 | #' ##
 67 | #' Y_act <- matrix(rnorm(n * d0, mean = X_act %*% beta, sd = 0.5), nrow = n)
 68 | #' Y_inact <- matrix(rnorm(n * (d - d0), sd = 0.5), nrow = n)
 69 | #' shuff_y_ind <- sample(d)
 70 | #' Y <- cbind(Y_act, Y_inact)[, shuff_y_ind]
 71 | #'
 72 | #' ########################
 73 | #' ## Infer associations ##
 74 | #' ########################
 75 | #'
 76 | #' list_cv <- set_cv(n, p, n_folds = 3, size_p0_av_grid = 3, n_cpus = 1)
 77 | #'
 78 | #' vb <- locus(Y = Y, X = X, p0_av = NULL, link = "identity", list_cv = list_cv,
 79 | #'             user_seed = seed)
 80 | #'
 81 | #' @seealso \code{\link{locus}}
 82 | #'
 83 | #' @export
 84 | #'
 85 | set_cv <- function(n, p, n_folds, size_p0_av_grid, n_cpus, tol_cv = 0.1,
 86 |                    maxit_cv = 1000, verbose = TRUE) {
 87 | 
 88 |   check_structure_(n_folds, "vector", "numeric", 1)
 89 |   check_natural_(n_folds)
 90 | 
 91 |   if (!(n_folds %in% 2:n))
 92 |     stop("n_folds must be a natural number greater than 2 and smaller than the number of samples.")
 93 | 
 94 |   # 16 may correspond to (a multiple of) the number of cores available
 95 |   if (n_folds > 16) warning("n_folds is large and may induce expensive computations.")
 96 | 
 97 | 
 98 |   check_structure_(size_p0_av_grid, "vector", "numeric", 1)
 99 |   check_natural_(size_p0_av_grid)
100 |   if (size_p0_av_grid < 2) stop(paste0("size_p0_av_grid must be at greater 1 ",
101 |                                       "to allow for comparisons."))
102 |   if (size_p0_av_grid > 10) stop(paste0("size_p0_av_grid is large and may ",
103 |                                        "induce expensive computations. Choose ",
104 |                                        "size_p0_av_grid in {2, 3, ..., 10}."))
105 | 
106 |   p0_av_grid <- create_grid_(p, size_p0_av_grid)
107 | 
108 |   new_size <- length(p0_av_grid)
109 |   if (size_p0_av_grid > new_size) {
110 |     if (verbose) cat(paste0("Cross-validation p0_av_grid reduced to ", new_size,
111 |                            " elements as p is small.\n"))
112 |     size_p0_av_grid <- new_size
113 |   }
114 | 
115 |   check_structure_(tol_cv, "vector", "numeric", 1)
116 |   check_positive_(tol_cv, eps=.Machine$double.eps)
117 | 
118 |   check_structure_(maxit_cv, "vector", "numeric", 1)
119 |   check_natural_(maxit_cv)
120 | 
121 |   check_structure_(n_cpus, "vector", "numeric", 1)
122 |   check_natural_(n_cpus)
123 |   if (n_cpus > n_folds){
124 |     message <- paste0("The number of cpus in use will be at most equal to n_folds.",
125 |                      "n_cpus is therefore set to n_folds = ", n_folds, ". \n")
126 |     if(verbose) cat(message)
127 |     else warning(message)
128 |     n_cpus <- n_folds
129 |   }
130 | 
131 |   if (n_cpus > 1) {
132 | 
133 |     n_cpus_avail <- parallel::detectCores()
134 |     if (n_cpus > n_cpus_avail) {
135 |       n_cpus <- n_cpus_avail
136 |       warning(paste0("The number of CPUs specified exceeds the number of CPUs ",
137 |                     "available on the machine. The latter has been used instead."))
138 |     }
139 |     if (verbose) cat(paste0("Cross-validation with ", n_cpus, " CPUs.\n",
140 |                            "Please make sure that enough RAM is available. \n"))
141 |   }
142 | 
143 |   n_cv <- n
144 |   p_cv <- p
145 | 
146 |   list_cv <- create_named_list_(n_cv, p_cv, n_folds, p0_av_grid, size_p0_av_grid,
147 |                                 tol_cv, n_cpus, maxit_cv)
148 |   class(list_cv) <- "cv"
149 | 
150 |   list_cv
151 | 
152 | }
153 | 
154 | # Internal function creating input grid values for the cross-validation procedure.
155 | #
156 | create_grid_ <- function(p, size_p0_av_grid) {
157 | 
158 |   if (p < 75) { # a different treatment to avoid having a single element in the grid
159 |     p0_av_grid <- unique(round(seq(max(floor(p/4), 1), max(ceiling(p/3), 2),
160 |                                    length.out = size_p0_av_grid), 0))
161 |   } else {
162 | 
163 |     p0_av_grid <- seq(max(min(1000, p/4), 1),
164 |                       max(min(1500, p/2), 1),
165 |                       length.out = size_p0_av_grid)
166 | 
167 |     base_round_ <- function(x, base){
168 |       sapply( round(x / base) * base, function(el) max(el, 1) )
169 |     }
170 | 
171 |     p0_av_grid <- unique(base_round_(p0_av_grid, 25))
172 |   }
173 | 
174 |   p0_av_grid
175 | 
176 | }
177 | 
178 | # Internal core function for the cross-validation procedure.
179 | #
180 | cross_validate_ <- function(Y, X, Z, link, ind_bin, list_cv, user_seed, verbose) {
181 | 
182 | 
183 |   d <- ncol(Y)
184 |   n <- nrow(Y)
185 |   p <- ncol(X)
186 | 
187 |   if (!is.null(Z)) q <- ncol(Z)
188 |   else q <- NULL
189 | 
190 |   with(list_cv, {
191 | 
192 |     folds <- rep_len(1:n_folds, n)
193 | 
194 |     evaluate_fold_ <- function(k) {
195 |       if (verbose) { cat(paste0("Evaluating fold k = ", k, "... \n"))
196 |         cat("-------------------------\n")
197 |       }
198 | 
199 |       current <- which(folds == k)
200 | 
201 |       Y_tr <- Y[-current,, drop = FALSE]
202 |       Y_test <- Y[current,, drop = FALSE] # drop = FALSE for the case where n_folds = n
203 | 
204 |       X_tr <- X[-current,, drop = FALSE]
205 |       X_test <- X[current,, drop = FALSE]
206 | 
207 |       # rescale X_tr as the algorithm then uses sample variance 1
208 |       X_tr <- scale(X_tr)
209 |       bool_cst_x <- is.nan(colSums(X_tr))
210 |       if (any(bool_cst_x)) {
211 |         X_tr <- X_tr[, !bool_cst_x, drop = FALSE]
212 |         # remove the corresponding columns in X_test too so that the lower bound
213 |         # obtained with X_tr can be evaluated on X_test
214 |         X_test <- X_test[, !bool_cst_x, drop = FALSE]
215 |         p <- ncol(X_tr)
216 |       }
217 | 
218 | 
219 |       if (link == "identity") {
220 | 
221 |         Y_tr <- scale(Y_tr, center = TRUE, scale = FALSE)
222 |         Y_test <- scale(Y_test, center = TRUE, scale = FALSE)
223 | 
224 |       } else if (link == "mix") {
225 | 
226 |         Y_tr[, -ind_bin] <- scale(Y_tr[, -ind_bin], center = TRUE, scale = FALSE)
227 |         Y_test[, -ind_bin] <- scale(Y_test[, -ind_bin], center = TRUE, scale = FALSE)
228 | 
229 |       }
230 | 
231 |       if (!is.null(Z)) {
232 |         Z_tr <- Z[-current,, drop = FALSE]
233 |         Z_test <- Z[current,, drop = FALSE]
234 |         Z_tr <- scale(Z_tr)
235 |         bool_cst_z <- is.nan(colSums(Z_tr))
236 |         if (any(bool_cst_z)) {
237 |           Z_tr <- Z_tr[, !bool_cst_z, drop = FALSE]
238 |           Z_test <- Z_test[, !bool_cst_z, drop = FALSE]
239 |           q <- ncol(Z_tr)
240 |         }
241 |       } else {
242 |         Z_tr <- Z_test <- NULL
243 |       }
244 | 
245 |       lb_vec <- vector(length = size_p0_av_grid)
246 | 
247 |       for(ind_pg in 1:size_p0_av_grid) {
248 | 
249 |         pg <-  p0_av_grid[ind_pg]
250 | 
251 |         if (verbose) cat(paste0("Evaluating p0_av = ", pg, "... \n"))
252 | 
253 |         list_hyper_pg <- auto_set_hyper_(Y_tr, p, pg, q, link = link, 
254 |                                          ind_bin = ind_bin, struct = FALSE, 
255 |                                          vec_fac_gr = NULL)
256 |         list_init_pg <- auto_set_init_(Y_tr, G = NULL, p, pg, q, user_seed,
257 |                                        link = link, ind_bin = ind_bin)
258 | 
259 |         nq <- is.null(q)
260 | 
261 |         if (link != "identity") { # adds an intercept for probit/mix regression
262 | 
263 |           if (nq) {
264 | 
265 |             Z_tr <- matrix(1, nrow = nrow(X_tr), ncol = 1)
266 |             Z_test <- matrix(1, nrow = nrow(X_test), ncol = 1)
267 | 
268 |             # uninformative prior
269 |             list_hyper_pg$phi <- list_hyper_pg$xi <- 1e-3
270 | 
271 |             list_init_pg$alpha_vb <- matrix(0, nrow = 1, ncol = d)
272 | 
273 |             if (link == "probit") {
274 | 
275 |               list_init_pg$sig2_alpha_vb <- 1
276 | 
277 |             } else {
278 | 
279 |               list_init_pg$sig2_alpha_vb <- matrix(1, nrow = 1, ncol = d)
280 | 
281 |             }
282 | 
283 |           } else{
284 | 
285 |             Z_tr <- cbind(rep(1, nrow(Z_tr)), Z_tr)
286 |             Z_test <- cbind(rep(1, nrow(Z_test)), Z_test)
287 | 
288 |             # uninformative prior
289 |             list_hyper_pg$phi <- c(1e-3, list_hyper_pg$phi)
290 |             list_hyper_pg$xi <- c(1e-3, list_hyper_pg$xi)
291 | 
292 |             list_init_pg$alpha_vb <- rbind(rep(0, d), list_init_pg$alpha_vb)
293 | 
294 |             if (link == "probit") {
295 | 
296 |               list_init_pg$sig2_alpha_vb <- c(1, list_init_pg$sig2_alpha_vb)
297 | 
298 |             } else {
299 | 
300 |               list_init_pg$sig2_alpha_vb <- rbind(rep(1, d), list_init_pg$sig2_alpha_vb)
301 | 
302 |             }
303 | 
304 | 
305 |           }
306 |           colnames(Z_tr)[1] <- colnames(Z_test)[1] <- "Intercept"
307 |         }
308 | 
309 | 
310 |         if (link == "identity") {
311 | 
312 |           if (nq) {
313 |             vb_tr <- locus_core_(Y_tr, X_tr, list_hyper_pg,
314 |                                  list_init_pg$gam_vb, list_init_pg$mu_beta_vb,
315 |                                  list_init_pg$sig2_beta_vb, list_init_pg$tau_vb,
316 |                                  tol_cv, maxit_cv, anneal = NULL,
317 |                                  verbose = FALSE, full_output = TRUE)
318 | 
319 |             lb_vec[ind_pg] <- with(vb_tr, {
320 | 
321 |               mat_x_m1 <-  X_test %*% beta_vb
322 | 
323 |               elbo_(Y_test, a, a_vb, b, b_vb, beta_vb, eta, gam_vb, kappa,
324 |                     lambda, nu, sig2_beta_vb, sig2_inv_vb, tau_vb, 
325 |                     m2_beta, mat_x_m1, sum_gam)
326 | 
327 |             })
328 |           } else {
329 |             vb_tr <- locus_z_core_(Y_tr, X_tr, Z_tr, list_hyper_pg,
330 |                                    list_init_pg$gam_vb, list_init_pg$alpha_vb,
331 |                                    list_init_pg$mu_beta_vb, list_init_pg$sig2_alpha_vb,
332 |                                    list_init_pg$sig2_beta_vb, list_init_pg$tau_vb,
333 |                                    tol_cv, maxit_cv, verbose = FALSE, full_output = TRUE)
334 | 
335 |             lb_vec[ind_pg] <- with(vb_tr, {
336 | 
337 |               mat_z_mu <-  Z_test %*% alpha_vb
338 |               mat_x_m1 <-  X_test %*% beta_vb
339 | 
340 |               elbo_z_(Y_test, Z_test, a, a_vb, b, b_vb, beta_vb, eta, gam_vb,
341 |                       kappa, lambda, alpha_vb, nu, phi, phi_vb,
342 |                       sig2_alpha_vb, sig2_beta_vb, sig2_inv_vb, tau_vb, xi,
343 |                       zeta2_inv_vb, m2_alpha, m2_beta, mat_x_m1,
344 |                       mat_z_mu, sum_gam)
345 |             })
346 |           }
347 | 
348 |         } else if (link == "probit") {
349 | 
350 |           vb_tr <- locus_probit_core_(Y_tr, X_tr, Z_tr, list_hyper_pg,
351 |                                       list_init_pg$gam_vb, list_init_pg$alpha_vb,
352 |                                       list_init_pg$mu_beta_vb, list_init_pg$sig2_alpha_vb,
353 |                                       list_init_pg$sig2_beta_vb, tol_cv, maxit_cv,
354 |                                       verbose = FALSE, full_output = TRUE)
355 | 
356 |           lb_vec[ind_pg] <- with(vb_tr, {
357 | 
358 |             mat_z_mu <-  Z_test %*% alpha_vb
359 |             mat_x_m1 <-  X_test %*% beta_vb
360 | 
361 |             elbo_probit_(Y_test, X_test, Z_test, a, a_vb, b, b_vb, beta_vb, gam_vb,
362 |                          lambda, nu, phi, phi_vb, sig2_alpha_vb, sig2_beta_vb,
363 |                          sig2_inv_vb, xi, zeta2_inv_vb, alpha_vb, 
364 |                          m2_alpha, m2_beta, mat_x_m1, mat_z_mu, sum_gam)
365 |           })
366 | 
367 |         } else if (link == "mix") {
368 | 
369 |           vb_tr <- locus_mix_core_(Y_tr, X_tr, Z_tr, ind_bin, list_hyper_pg,
370 |                                    list_init_pg$gam_vb, list_init_pg$alpha_vb,
371 |                                    list_init_pg$mu_beta_vb, list_init_pg$sig2_alpha_vb,
372 |                                    list_init_pg$sig2_beta_vb, list_init_pg$tau_vb,
373 |                                    tol_cv, maxit_cv, verbose = FALSE, full_output = TRUE)
374 | 
375 |           lb_vec[ind_pg] <- with(vb_tr, {
376 | 
377 |             mat_z_mu <-  Z_test %*% alpha_vb
378 |             mat_x_m1 <-  X_test %*% beta_vb
379 | 
380 |             elbo_mix_(Y_test[, ind_bin, drop = FALSE],
381 |                       Y_test[, -ind_bin, drop = FALSE], ind_bin, X_test, Z_test,
382 |                       a, a_vb, b, b_vb, beta_vb, eta, gam_vb, kappa, lambda, alpha_vb,
383 |                       nu, phi, phi_vb, sig2_alpha_vb, sig2_beta_vb, sig2_inv_vb,
384 |                       tau_vb, log_tau_vb, xi, zeta2_inv_vb, m2_alpha, 
385 |                       m2_beta, mat_x_m1, mat_z_mu, sum_gam)
386 | 
387 |           })
388 | 
389 |         }
390 | 
391 |         if (verbose) { cat(paste0("Lower bound on test set, fold ", k, ", p0_av ",
392 |                                  pg, ": ", format(lb_vec[ind_pg]), ". \n"))
393 |           cat("-------------------------\n") }
394 |       }
395 |       lb_vec
396 |     }
397 | 
398 |     RNGkind("L'Ecuyer-CMRG") # ensure reproducibility when using mclapply
399 | 
400 |     lb_mat <- parallel::mclapply(1:n_folds, function(k) evaluate_fold_(k),
401 |                                  mc.cores = n_cpus)
402 |     lb_mat <- do.call(rbind, lb_mat)
403 | 
404 |     rownames(lb_mat) <- paste0("fold_", 1:n_folds)
405 |     colnames(lb_mat) <- paste0("p0_av_", p0_av_grid)
406 | 
407 |     p0_av_opt <- p0_av_grid[which.max(colMeans(lb_mat))]
408 | 
409 |     if (verbose) {
410 |       cat("Lower bounds on test sets for each fold and each grid element: \n")
411 |       print(lb_mat)
412 |       cat(paste0("===== ...end of cross-validation with selected p0_av = ",
413 |                 p0_av_opt, " ===== \n"))
414 |     }
415 | 
416 |     p0_av_opt
417 |   })
418 | }
419 | 
420 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
  1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
  2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
  3 | 
  4 | #include "locus_types.h"
  5 | #include <RcppEigen.h>
  6 | #include <Rcpp.h>
  7 | 
  8 | using namespace Rcpp;
  9 | 
 10 | #ifdef RCPP_USE_GLOBAL_ROSTREAM
 11 | Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
 12 | Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
 13 | #endif
 14 | 
 15 | // coreBatch
 16 | void coreBatch(const MapMat X, const MapMat Y, MapArr2D gam_vb, const MapArr1D log_om_vb, const MapArr1D log_1_min_om_vb, const double log_sig2_inv_vb, const MapArr1D log_tau_vb, MapMat beta_vb, MapMat mat_x_m1, MapArr2D mu_beta_vb, const MapArr1D sig2_beta_vb, const MapArr1D tau_vb);
 17 | RcppExport SEXP _locus_coreBatch(SEXP XSEXP, SEXP YSEXP, SEXP gam_vbSEXP, SEXP log_om_vbSEXP, SEXP log_1_min_om_vbSEXP, SEXP log_sig2_inv_vbSEXP, SEXP log_tau_vbSEXP, SEXP beta_vbSEXP, SEXP mat_x_m1SEXP, SEXP mu_beta_vbSEXP, SEXP sig2_beta_vbSEXP, SEXP tau_vbSEXP) {
 18 | BEGIN_RCPP
 19 |     Rcpp::RNGScope rcpp_rngScope_gen;
 20 |     Rcpp::traits::input_parameter< const MapMat >::type X(XSEXP);
 21 |     Rcpp::traits::input_parameter< const MapMat >::type Y(YSEXP);
 22 |     Rcpp::traits::input_parameter< MapArr2D >::type gam_vb(gam_vbSEXP);
 23 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_om_vb(log_om_vbSEXP);
 24 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_1_min_om_vb(log_1_min_om_vbSEXP);
 25 |     Rcpp::traits::input_parameter< const double >::type log_sig2_inv_vb(log_sig2_inv_vbSEXP);
 26 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_tau_vb(log_tau_vbSEXP);
 27 |     Rcpp::traits::input_parameter< MapMat >::type beta_vb(beta_vbSEXP);
 28 |     Rcpp::traits::input_parameter< MapMat >::type mat_x_m1(mat_x_m1SEXP);
 29 |     Rcpp::traits::input_parameter< MapArr2D >::type mu_beta_vb(mu_beta_vbSEXP);
 30 |     Rcpp::traits::input_parameter< const MapArr1D >::type sig2_beta_vb(sig2_beta_vbSEXP);
 31 |     Rcpp::traits::input_parameter< const MapArr1D >::type tau_vb(tau_vbSEXP);
 32 |     coreBatch(X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, log_tau_vb, beta_vb, mat_x_m1, mu_beta_vb, sig2_beta_vb, tau_vb);
 33 |     return R_NilValue;
 34 | END_RCPP
 35 | }
 36 | // coreZBatch
 37 | void coreZBatch(const MapMat X, const MapMat Y, MapArr2D gam_vb, const MapArr1D log_om_vb, const MapArr1D log_1_min_om_vb, const double log_sig2_inv_vb, const MapArr1D log_tau_vb, MapMat beta_vb, MapMat mat_x_m1, MapMat mat_z_mu, MapArr2D mu_beta_vb, const MapArr1D sig2_beta_vb, const MapArr1D tau_vb);
 38 | RcppExport SEXP _locus_coreZBatch(SEXP XSEXP, SEXP YSEXP, SEXP gam_vbSEXP, SEXP log_om_vbSEXP, SEXP log_1_min_om_vbSEXP, SEXP log_sig2_inv_vbSEXP, SEXP log_tau_vbSEXP, SEXP beta_vbSEXP, SEXP mat_x_m1SEXP, SEXP mat_z_muSEXP, SEXP mu_beta_vbSEXP, SEXP sig2_beta_vbSEXP, SEXP tau_vbSEXP) {
 39 | BEGIN_RCPP
 40 |     Rcpp::RNGScope rcpp_rngScope_gen;
 41 |     Rcpp::traits::input_parameter< const MapMat >::type X(XSEXP);
 42 |     Rcpp::traits::input_parameter< const MapMat >::type Y(YSEXP);
 43 |     Rcpp::traits::input_parameter< MapArr2D >::type gam_vb(gam_vbSEXP);
 44 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_om_vb(log_om_vbSEXP);
 45 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_1_min_om_vb(log_1_min_om_vbSEXP);
 46 |     Rcpp::traits::input_parameter< const double >::type log_sig2_inv_vb(log_sig2_inv_vbSEXP);
 47 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_tau_vb(log_tau_vbSEXP);
 48 |     Rcpp::traits::input_parameter< MapMat >::type beta_vb(beta_vbSEXP);
 49 |     Rcpp::traits::input_parameter< MapMat >::type mat_x_m1(mat_x_m1SEXP);
 50 |     Rcpp::traits::input_parameter< MapMat >::type mat_z_mu(mat_z_muSEXP);
 51 |     Rcpp::traits::input_parameter< MapArr2D >::type mu_beta_vb(mu_beta_vbSEXP);
 52 |     Rcpp::traits::input_parameter< const MapArr1D >::type sig2_beta_vb(sig2_beta_vbSEXP);
 53 |     Rcpp::traits::input_parameter< const MapArr1D >::type tau_vb(tau_vbSEXP);
 54 |     coreZBatch(X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, log_tau_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, sig2_beta_vb, tau_vb);
 55 |     return R_NilValue;
 56 | END_RCPP
 57 | }
 58 | // coreProbitBatch
 59 | void coreProbitBatch(const MapMat X, const MapMat W, MapArr2D gam_vb, const MapArr1D log_om_vb, const MapArr1D log_1_min_om_vb, const double log_sig2_inv_vb, MapMat beta_vb, MapMat mat_x_m1, MapMat mat_z_mu, MapArr2D mu_beta_vb, const double sig2_beta_vb);
 60 | RcppExport SEXP _locus_coreProbitBatch(SEXP XSEXP, SEXP WSEXP, SEXP gam_vbSEXP, SEXP log_om_vbSEXP, SEXP log_1_min_om_vbSEXP, SEXP log_sig2_inv_vbSEXP, SEXP beta_vbSEXP, SEXP mat_x_m1SEXP, SEXP mat_z_muSEXP, SEXP mu_beta_vbSEXP, SEXP sig2_beta_vbSEXP) {
 61 | BEGIN_RCPP
 62 |     Rcpp::RNGScope rcpp_rngScope_gen;
 63 |     Rcpp::traits::input_parameter< const MapMat >::type X(XSEXP);
 64 |     Rcpp::traits::input_parameter< const MapMat >::type W(WSEXP);
 65 |     Rcpp::traits::input_parameter< MapArr2D >::type gam_vb(gam_vbSEXP);
 66 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_om_vb(log_om_vbSEXP);
 67 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_1_min_om_vb(log_1_min_om_vbSEXP);
 68 |     Rcpp::traits::input_parameter< const double >::type log_sig2_inv_vb(log_sig2_inv_vbSEXP);
 69 |     Rcpp::traits::input_parameter< MapMat >::type beta_vb(beta_vbSEXP);
 70 |     Rcpp::traits::input_parameter< MapMat >::type mat_x_m1(mat_x_m1SEXP);
 71 |     Rcpp::traits::input_parameter< MapMat >::type mat_z_mu(mat_z_muSEXP);
 72 |     Rcpp::traits::input_parameter< MapArr2D >::type mu_beta_vb(mu_beta_vbSEXP);
 73 |     Rcpp::traits::input_parameter< const double >::type sig2_beta_vb(sig2_beta_vbSEXP);
 74 |     coreProbitBatch(X, W, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, sig2_beta_vb);
 75 |     return R_NilValue;
 76 | END_RCPP
 77 | }
 78 | // coreLoop
 79 | void coreLoop(const MapMat X, const MapMat Y, MapArr2D gam_vb, const MapArr1D log_om_vb, const MapArr1D log_1_min_om_vb, const double log_sig2_inv_vb, const MapArr1D log_tau_vb, MapMat beta_vb, MapMat mat_x_m1, MapArr2D mu_beta_vb, const MapArr1D sig2_beta_vb, const MapArr1D tau_vb, const MapArr1D shuffled_ind, const double c);
 80 | RcppExport SEXP _locus_coreLoop(SEXP XSEXP, SEXP YSEXP, SEXP gam_vbSEXP, SEXP log_om_vbSEXP, SEXP log_1_min_om_vbSEXP, SEXP log_sig2_inv_vbSEXP, SEXP log_tau_vbSEXP, SEXP beta_vbSEXP, SEXP mat_x_m1SEXP, SEXP mu_beta_vbSEXP, SEXP sig2_beta_vbSEXP, SEXP tau_vbSEXP, SEXP shuffled_indSEXP, SEXP cSEXP) {
 81 | BEGIN_RCPP
 82 |     Rcpp::RNGScope rcpp_rngScope_gen;
 83 |     Rcpp::traits::input_parameter< const MapMat >::type X(XSEXP);
 84 |     Rcpp::traits::input_parameter< const MapMat >::type Y(YSEXP);
 85 |     Rcpp::traits::input_parameter< MapArr2D >::type gam_vb(gam_vbSEXP);
 86 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_om_vb(log_om_vbSEXP);
 87 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_1_min_om_vb(log_1_min_om_vbSEXP);
 88 |     Rcpp::traits::input_parameter< const double >::type log_sig2_inv_vb(log_sig2_inv_vbSEXP);
 89 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_tau_vb(log_tau_vbSEXP);
 90 |     Rcpp::traits::input_parameter< MapMat >::type beta_vb(beta_vbSEXP);
 91 |     Rcpp::traits::input_parameter< MapMat >::type mat_x_m1(mat_x_m1SEXP);
 92 |     Rcpp::traits::input_parameter< MapArr2D >::type mu_beta_vb(mu_beta_vbSEXP);
 93 |     Rcpp::traits::input_parameter< const MapArr1D >::type sig2_beta_vb(sig2_beta_vbSEXP);
 94 |     Rcpp::traits::input_parameter< const MapArr1D >::type tau_vb(tau_vbSEXP);
 95 |     Rcpp::traits::input_parameter< const MapArr1D >::type shuffled_ind(shuffled_indSEXP);
 96 |     Rcpp::traits::input_parameter< const double >::type c(cSEXP);
 97 |     coreLoop(X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, log_tau_vb, beta_vb, mat_x_m1, mu_beta_vb, sig2_beta_vb, tau_vb, shuffled_ind, c);
 98 |     return R_NilValue;
 99 | END_RCPP
100 | }
101 | // coreZLoop
102 | void coreZLoop(const MapMat X, const MapMat Y, MapArr2D gam_vb, const MapArr1D log_om_vb, const MapArr1D log_1_min_om_vb, const double log_sig2_inv_vb, const MapArr1D log_tau_vb, MapMat beta_vb, MapMat mat_x_m1, MapMat mat_z_mu, MapArr2D mu_beta_vb, const MapArr1D sig2_beta_vb, const MapArr1D tau_vb, const MapArr1D shuffled_ind, const double c);
103 | RcppExport SEXP _locus_coreZLoop(SEXP XSEXP, SEXP YSEXP, SEXP gam_vbSEXP, SEXP log_om_vbSEXP, SEXP log_1_min_om_vbSEXP, SEXP log_sig2_inv_vbSEXP, SEXP log_tau_vbSEXP, SEXP beta_vbSEXP, SEXP mat_x_m1SEXP, SEXP mat_z_muSEXP, SEXP mu_beta_vbSEXP, SEXP sig2_beta_vbSEXP, SEXP tau_vbSEXP, SEXP shuffled_indSEXP, SEXP cSEXP) {
104 | BEGIN_RCPP
105 |     Rcpp::RNGScope rcpp_rngScope_gen;
106 |     Rcpp::traits::input_parameter< const MapMat >::type X(XSEXP);
107 |     Rcpp::traits::input_parameter< const MapMat >::type Y(YSEXP);
108 |     Rcpp::traits::input_parameter< MapArr2D >::type gam_vb(gam_vbSEXP);
109 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_om_vb(log_om_vbSEXP);
110 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_1_min_om_vb(log_1_min_om_vbSEXP);
111 |     Rcpp::traits::input_parameter< const double >::type log_sig2_inv_vb(log_sig2_inv_vbSEXP);
112 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_tau_vb(log_tau_vbSEXP);
113 |     Rcpp::traits::input_parameter< MapMat >::type beta_vb(beta_vbSEXP);
114 |     Rcpp::traits::input_parameter< MapMat >::type mat_x_m1(mat_x_m1SEXP);
115 |     Rcpp::traits::input_parameter< MapMat >::type mat_z_mu(mat_z_muSEXP);
116 |     Rcpp::traits::input_parameter< MapArr2D >::type mu_beta_vb(mu_beta_vbSEXP);
117 |     Rcpp::traits::input_parameter< const MapArr1D >::type sig2_beta_vb(sig2_beta_vbSEXP);
118 |     Rcpp::traits::input_parameter< const MapArr1D >::type tau_vb(tau_vbSEXP);
119 |     Rcpp::traits::input_parameter< const MapArr1D >::type shuffled_ind(shuffled_indSEXP);
120 |     Rcpp::traits::input_parameter< const double >::type c(cSEXP);
121 |     coreZLoop(X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, log_tau_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, sig2_beta_vb, tau_vb, shuffled_ind, c);
122 |     return R_NilValue;
123 | END_RCPP
124 | }
125 | // coreLogitLoop
126 | void coreLogitLoop(const MapMat X, const MapArr2D Y, MapArr2D gam_vb, const MapArr1D log_om_vb, const MapArr1D log_1_min_om_vb, const double log_sig2_inv_vb, MapMat beta_vb, MapArr2D mat_x_m1, MapArr2D mat_z_mu, MapArr2D mu_beta_vb, const MapArr2D psi_vb, const MapArr2D sig2_beta_vb, const MapArr1D shuffled_ind);
127 | RcppExport SEXP _locus_coreLogitLoop(SEXP XSEXP, SEXP YSEXP, SEXP gam_vbSEXP, SEXP log_om_vbSEXP, SEXP log_1_min_om_vbSEXP, SEXP log_sig2_inv_vbSEXP, SEXP beta_vbSEXP, SEXP mat_x_m1SEXP, SEXP mat_z_muSEXP, SEXP mu_beta_vbSEXP, SEXP psi_vbSEXP, SEXP sig2_beta_vbSEXP, SEXP shuffled_indSEXP) {
128 | BEGIN_RCPP
129 |     Rcpp::RNGScope rcpp_rngScope_gen;
130 |     Rcpp::traits::input_parameter< const MapMat >::type X(XSEXP);
131 |     Rcpp::traits::input_parameter< const MapArr2D >::type Y(YSEXP);
132 |     Rcpp::traits::input_parameter< MapArr2D >::type gam_vb(gam_vbSEXP);
133 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_om_vb(log_om_vbSEXP);
134 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_1_min_om_vb(log_1_min_om_vbSEXP);
135 |     Rcpp::traits::input_parameter< const double >::type log_sig2_inv_vb(log_sig2_inv_vbSEXP);
136 |     Rcpp::traits::input_parameter< MapMat >::type beta_vb(beta_vbSEXP);
137 |     Rcpp::traits::input_parameter< MapArr2D >::type mat_x_m1(mat_x_m1SEXP);
138 |     Rcpp::traits::input_parameter< MapArr2D >::type mat_z_mu(mat_z_muSEXP);
139 |     Rcpp::traits::input_parameter< MapArr2D >::type mu_beta_vb(mu_beta_vbSEXP);
140 |     Rcpp::traits::input_parameter< const MapArr2D >::type psi_vb(psi_vbSEXP);
141 |     Rcpp::traits::input_parameter< const MapArr2D >::type sig2_beta_vb(sig2_beta_vbSEXP);
142 |     Rcpp::traits::input_parameter< const MapArr1D >::type shuffled_ind(shuffled_indSEXP);
143 |     coreLogitLoop(X, Y, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, psi_vb, sig2_beta_vb, shuffled_ind);
144 |     return R_NilValue;
145 | END_RCPP
146 | }
147 | // coreProbitLoop
148 | void coreProbitLoop(const MapMat X, const MapMat W, MapArr2D gam_vb, const MapArr1D log_om_vb, const MapArr1D log_1_min_om_vb, const double log_sig2_inv_vb, MapMat beta_vb, MapMat mat_x_m1, MapMat mat_z_mu, MapArr2D mu_beta_vb, const double sig2_beta_vb, const MapArr1D shuffled_ind);
149 | RcppExport SEXP _locus_coreProbitLoop(SEXP XSEXP, SEXP WSEXP, SEXP gam_vbSEXP, SEXP log_om_vbSEXP, SEXP log_1_min_om_vbSEXP, SEXP log_sig2_inv_vbSEXP, SEXP beta_vbSEXP, SEXP mat_x_m1SEXP, SEXP mat_z_muSEXP, SEXP mu_beta_vbSEXP, SEXP sig2_beta_vbSEXP, SEXP shuffled_indSEXP) {
150 | BEGIN_RCPP
151 |     Rcpp::RNGScope rcpp_rngScope_gen;
152 |     Rcpp::traits::input_parameter< const MapMat >::type X(XSEXP);
153 |     Rcpp::traits::input_parameter< const MapMat >::type W(WSEXP);
154 |     Rcpp::traits::input_parameter< MapArr2D >::type gam_vb(gam_vbSEXP);
155 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_om_vb(log_om_vbSEXP);
156 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_1_min_om_vb(log_1_min_om_vbSEXP);
157 |     Rcpp::traits::input_parameter< const double >::type log_sig2_inv_vb(log_sig2_inv_vbSEXP);
158 |     Rcpp::traits::input_parameter< MapMat >::type beta_vb(beta_vbSEXP);
159 |     Rcpp::traits::input_parameter< MapMat >::type mat_x_m1(mat_x_m1SEXP);
160 |     Rcpp::traits::input_parameter< MapMat >::type mat_z_mu(mat_z_muSEXP);
161 |     Rcpp::traits::input_parameter< MapArr2D >::type mu_beta_vb(mu_beta_vbSEXP);
162 |     Rcpp::traits::input_parameter< const double >::type sig2_beta_vb(sig2_beta_vbSEXP);
163 |     Rcpp::traits::input_parameter< const MapArr1D >::type shuffled_ind(shuffled_indSEXP);
164 |     coreProbitLoop(X, W, gam_vb, log_om_vb, log_1_min_om_vb, log_sig2_inv_vb, beta_vb, mat_x_m1, mat_z_mu, mu_beta_vb, sig2_beta_vb, shuffled_ind);
165 |     return R_NilValue;
166 | END_RCPP
167 | }
168 | // coreStructLoop
169 | void coreStructLoop(const MapMat X, const MapMat Y, MapArr2D gam_vb, const MapArr1D log_Phi_mu_theta_vb, const MapArr1D log_1_min_Phi_mu_theta_vb, const double log_sig2_inv_vb, const MapArr1D log_tau_vb, MapMat beta_vb, MapMat mat_x_m1, MapArr2D mu_beta_vb, const MapArr1D sig2_beta_vb, const MapArr1D tau_vb, const MapArr1D shuffled_ind);
170 | RcppExport SEXP _locus_coreStructLoop(SEXP XSEXP, SEXP YSEXP, SEXP gam_vbSEXP, SEXP log_Phi_mu_theta_vbSEXP, SEXP log_1_min_Phi_mu_theta_vbSEXP, SEXP log_sig2_inv_vbSEXP, SEXP log_tau_vbSEXP, SEXP beta_vbSEXP, SEXP mat_x_m1SEXP, SEXP mu_beta_vbSEXP, SEXP sig2_beta_vbSEXP, SEXP tau_vbSEXP, SEXP shuffled_indSEXP) {
171 | BEGIN_RCPP
172 |     Rcpp::RNGScope rcpp_rngScope_gen;
173 |     Rcpp::traits::input_parameter< const MapMat >::type X(XSEXP);
174 |     Rcpp::traits::input_parameter< const MapMat >::type Y(YSEXP);
175 |     Rcpp::traits::input_parameter< MapArr2D >::type gam_vb(gam_vbSEXP);
176 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_Phi_mu_theta_vb(log_Phi_mu_theta_vbSEXP);
177 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_1_min_Phi_mu_theta_vb(log_1_min_Phi_mu_theta_vbSEXP);
178 |     Rcpp::traits::input_parameter< const double >::type log_sig2_inv_vb(log_sig2_inv_vbSEXP);
179 |     Rcpp::traits::input_parameter< const MapArr1D >::type log_tau_vb(log_tau_vbSEXP);
180 |     Rcpp::traits::input_parameter< MapMat >::type beta_vb(beta_vbSEXP);
181 |     Rcpp::traits::input_parameter< MapMat >::type mat_x_m1(mat_x_m1SEXP);
182 |     Rcpp::traits::input_parameter< MapArr2D >::type mu_beta_vb(mu_beta_vbSEXP);
183 |     Rcpp::traits::input_parameter< const MapArr1D >::type sig2_beta_vb(sig2_beta_vbSEXP);
184 |     Rcpp::traits::input_parameter< const MapArr1D >::type tau_vb(tau_vbSEXP);
185 |     Rcpp::traits::input_parameter< const MapArr1D >::type shuffled_ind(shuffled_indSEXP);
186 |     coreStructLoop(X, Y, gam_vb, log_Phi_mu_theta_vb, log_1_min_Phi_mu_theta_vb, log_sig2_inv_vb, log_tau_vb, beta_vb, mat_x_m1, mu_beta_vb, sig2_beta_vb, tau_vb, shuffled_ind);
187 |     return R_NilValue;
188 | END_RCPP
189 | }
190 | 
191 | static const R_CallMethodDef CallEntries[] = {
192 |     {"_locus_coreBatch", (DL_FUNC) &_locus_coreBatch, 12},
193 |     {"_locus_coreZBatch", (DL_FUNC) &_locus_coreZBatch, 13},
194 |     {"_locus_coreProbitBatch", (DL_FUNC) &_locus_coreProbitBatch, 11},
195 |     {"_locus_coreLoop", (DL_FUNC) &_locus_coreLoop, 14},
196 |     {"_locus_coreZLoop", (DL_FUNC) &_locus_coreZLoop, 15},
197 |     {"_locus_coreLogitLoop", (DL_FUNC) &_locus_coreLogitLoop, 13},
198 |     {"_locus_coreProbitLoop", (DL_FUNC) &_locus_coreProbitLoop, 12},
199 |     {"_locus_coreStructLoop", (DL_FUNC) &_locus_coreStructLoop, 13},
200 |     {NULL, NULL, 0}
201 | };
202 | 
203 | RcppExport void R_init_locus(DllInfo *dll) {
204 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
205 |     R_useDynamicSymbols(dll, FALSE);
206 | }
207 | 


--------------------------------------------------------------------------------