├── my-doc
    ├── tests
    │   ├── .Rapp.history
    │   ├── tests_knots.R
    │   ├── tests_noX.R
    │   ├── tests_Znew.R
    │   └── tests_randint.R
    ├── scratch_hnew_pred_methods.R
    ├── scratch.R
    └── probit_reg.Rmd
├── man
    ├── .gitignore
    ├── print.bkmrfit.Rd
    ├── ExtractSamps.Rd
    ├── ExtractEsts.Rd
    ├── summary.bkmrfit.Rd
    ├── ExtractPIPs.Rd
    ├── TracePlot.Rd
    ├── PlotPriorFits.Rd
    ├── SimData.Rd
    ├── InvestigatePrior.Rd
    ├── PredictorResponseBivarLevels.Rd
    ├── SamplePred.Rd
    ├── OverallRiskSummaries.Rd
    ├── ComputePostmeanHnew.Rd
    ├── PredictorResponseUnivar.Rd
    ├── SingVarIntSummaries.Rd
    ├── SingVarRiskSummaries.Rd
    ├── PredictorResponseBivar.Rd
    ├── PredictorResponseBivarPair.Rd
    └── kmbayes.Rd
├── CRAN-SUBMISSION
├── R
    ├── base.r
    ├── zzz.R
    ├── CalcR2.R
    ├── TracePlot.R
    ├── argval_controlParams.R
    ├── argval_ParamInput.R
    ├── print_verbose.R
    ├── argval_ListInput.R
    ├── SamplePred.R
    ├── CalcPIPs.R
    ├── SimData.R
    ├── bkmr_r_parameter_helper_functions.R
    ├── ExtractEsts.R
    ├── InvestigatePrior_rm.R
    ├── ComputePostmeanHnew.R
    ├── ComputeMixtureSummaries.R
    ├── PredictorResponseFunctions.R
    ├── bkmr_parameter_update_functions.R
    └── bkmr_main_functions.R
├── .Rbuildignore
├── .gitignore
├── data-raw
    └── VarRealistic.R
├── bkmr.Rproj
├── NAMESPACE
├── DESCRIPTION
├── README.md
├── cran-comments.md
└── NEWS.md


/my-doc/tests/.Rapp.history:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/man/.gitignore:
--------------------------------------------------------------------------------
1 | .Rapp.history
2 | 


--------------------------------------------------------------------------------
/CRAN-SUBMISSION:
--------------------------------------------------------------------------------
1 | Version: 0.2.2
2 | Date: 2022-03-22 17:41:18 UTC
3 | SHA: 201280422de50255cb3587caac718d694b4d5c72
4 | 


--------------------------------------------------------------------------------
/R/base.r:
--------------------------------------------------------------------------------
1 | #' @importFrom magrittr %>%
2 | NULL
3 | 
4 | #' @importFrom magrittr %<>%
5 | NULL
6 | 
7 | #' @import stats
8 | NULL


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^cran-comments\.md$
4 | ^my-doc$
5 | ^data-raw$
6 | ^.*\.DS_Store
7 | ^revdep$
8 | ^CRAN-SUBMISSION$
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | inst/doc
 5 | data
 6 | my-doc/overview.html
 7 | my-doc/fitted_objects.RData
 8 | my-doc/probit_reg.RData
 9 | my-doc/probit_reg.html
10 | *.DS_Store


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | .onAttach <- function(libname, pkgname) {
2 |   packageStartupMessage("For guided examples, go to 'https://jenfb.github.io/bkmr/overview.html'")
3 | }
4 | 
5 | release_questions <- function() {
6 |   c(
7 |     "Have you updated the vignette and posted to GitHub?"
8 |   )
9 | }


--------------------------------------------------------------------------------
/data-raw/VarRealistic.R:
--------------------------------------------------------------------------------
1 | load("H:/Research/Completed Projects/2014 Bayesian kernel machine regression (Biostatistics)/Code/simulation/run_on_cluster_h1_h2_h3_scenarios/data/xrfdat.RData")
2 | 
3 | VarRealistic <- round(cov(xrfdat), 2)
4 | dimnames(VarRealistic) <- NULL
5 | 
6 | dput(VarRealistic)
7 | 
8 | 


--------------------------------------------------------------------------------
/R/CalcR2.R:
--------------------------------------------------------------------------------
 1 | CalcR2 <- function(fit, sel = NULL) {
 2 |     ests <- ExtractEsts(fit, sel = sel)
 3 |     y <- fit$y
 4 | 
 5 |     ## http://en.wikipedia.org/wiki/Coefficient_of_determination
 6 |     preds <- with(ests, h[, "mean"] + fit$X %*% beta[, "mean"])
 7 |     SStot <- sum((y - mean(y))^2)
 8 |     SSres <- sum((y - preds)^2)
 9 |     R2 <- 1 - SSres/SStot
10 |     R2
11 | }
12 | 


--------------------------------------------------------------------------------
/bkmr.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | PackageRoxygenize: rd,collate,namespace,vignette
19 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(print,bkmrfit)
 4 | S3method(summary,bkmrfit)
 5 | export(ComputePostmeanHnew)
 6 | export(ExtractEsts)
 7 | export(ExtractPIPs)
 8 | export(ExtractSamps)
 9 | export(InvestigatePrior)
10 | export(OverallRiskSummaries)
11 | export(PlotPriorFits)
12 | export(PredictorResponseBivar)
13 | export(PredictorResponseBivarLevels)
14 | export(PredictorResponseBivarPair)
15 | export(PredictorResponseUnivar)
16 | export(SamplePred)
17 | export(SimData)
18 | export(SingVarIntSummaries)
19 | export(SingVarRiskSummaries)
20 | export(TracePlot)
21 | export(kmbayes)
22 | import(graphics)
23 | import(nlme)
24 | import(stats)
25 | import(utils)
26 | importFrom(magrittr,"%<>%")
27 | importFrom(magrittr,"%>%")
28 | importFrom(tmvtnorm,rtmvnorm)
29 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: bkmr
 2 | Title: Bayesian Kernel Machine Regression
 3 | Version: 0.2.2.9000
 4 | Authors@R: c(person("Jennifer F.", "Bobb", 
 5 |                   email = "jenniferfederbobb@gmail.com", 
 6 |                   role = c("aut", "cre")),
 7 |           person('Luke', 'Duttweiler',
 8 |                   email = 'lduttweiler@hsph.harvard.edu',
 9 |                   role = c('ctb'),
10 |                   comment = c(ORCID = "0000-0002-0467-995X")))
11 | Description: Implementation of a statistical approach 
12 |   for estimating the joint health effects of multiple 
13 |   concurrent exposures, as described in Bobb et al (2015) 
14 |   <doi:10.1093/biostatistics/kxu058>.
15 | URL: https://github.com/jenfb/bkmr
16 | BugReports: https://github.com/jenfb/bkmr/issues
17 | Depends:
18 |     R (>= 3.1.2)
19 | License: GPL-2
20 | Imports:
21 |     dplyr,
22 |     magrittr,
23 |     nlme,
24 |     fields,
25 |     truncnorm,
26 |     tidyr,
27 |     MASS,
28 |     tmvtnorm,
29 |     tibble
30 | RoxygenNote: 7.3.1
31 | Encoding: UTF-8
32 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | The R package `bkmr` implements Bayesian kernel machine regression, a statistical approach for estimating the joint health effects of multiple concurrent exposures. Additional information on the statistical methodology and on the computational details are provided in [Bobb et al. 2015](https://academic.oup.com/biostatistics/article/16/3/493/269719). More recent extensions, details on the software, and worked-through examples are provided in [Bobb et al. 2018](https://ehjournal.biomedcentral.com/articles/10.1186/s12940-018-0413-y).
 2 | 
 3 | You can install the latest released version of `bkmr` from CRAN with:
 4 | ```R
 5 | install.packages("bkmr")
 6 | ```
 7 | Or the latest development version from github with:
 8 | ```R
 9 | install.packages("devtools")
10 | devtools::install_github("jenfb/bkmr")
11 | ```
12 | 
13 | For a general overview and guided examples, go to https://jenfb.github.io/bkmr/overview.html.
14 | 
15 | For examples from the software paper, please see
16 | 
17 | * https://jenfb.github.io/bkmr/SimData1
18 | * https://jenfb.github.io/bkmr/ProbitEx


--------------------------------------------------------------------------------
/man/print.bkmrfit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bkmr_main_functions.R
 3 | \name{print.bkmrfit}
 4 | \alias{print.bkmrfit}
 5 | \title{Print basic summary of BKMR model fit}
 6 | \usage{
 7 | \method{print}{bkmrfit}(x, digits = 5, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{an object of class "bkmrfit"}
11 | 
12 | \item{digits}{the number of digits to show when printing}
13 | 
14 | \item{...}{further arguments passed to or from other methods.}
15 | }
16 | \value{
17 | No return value, prints basic summary of fit to console
18 | }
19 | \description{
20 | \code{print} method for class "bkmrfit"
21 | }
22 | \examples{
23 | ## First generate dataset
24 | set.seed(111)
25 | dat <- SimData(n = 50, M = 4)
26 | y <- dat$y
27 | Z <- dat$Z
28 | X <- dat$X
29 | 
30 | ## Fit model with component-wise variable selection
31 | ## Using only 100 iterations to make example run quickly
32 | ## Typically should use a large number of iterations for inference
33 | set.seed(111)
34 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
35 | fitkm
36 | }
37 | 


--------------------------------------------------------------------------------
/man/ExtractSamps.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ExtractEsts.R
 3 | \name{ExtractSamps}
 4 | \alias{ExtractSamps}
 5 | \title{Extract samples}
 6 | \usage{
 7 | ExtractSamps(fit, sel = NULL)
 8 | }
 9 | \arguments{
10 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function}
11 | 
12 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples}
13 | }
14 | \value{
15 | a list where each component contains the posterior samples of one of the parameters (or vector of parameters) being estimated
16 | }
17 | \description{
18 | Extract samples of each parameter from the BKMR fit
19 | }
20 | \examples{
21 | ## First generate dataset
22 | set.seed(111)
23 | dat <- SimData(n = 50, M = 4)
24 | y <- dat$y
25 | Z <- dat$Z
26 | X <- dat$X
27 | 
28 | ## Fit model with component-wise variable selection
29 | ## Using only 100 iterations to make example run quickly
30 | ## Typically should use a large number of iterations for inference
31 | set.seed(111)
32 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
33 | 
34 | samps <- ExtractSamps(fitkm)
35 | }
36 | 


--------------------------------------------------------------------------------
/man/ExtractEsts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ExtractEsts.R
 3 | \name{ExtractEsts}
 4 | \alias{ExtractEsts}
 5 | \title{Extract summary statistics}
 6 | \usage{
 7 | ExtractEsts(fit, q = c(0.025, 0.25, 0.5, 0.75, 0.975), sel = NULL)
 8 | }
 9 | \arguments{
10 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function}
11 | 
12 | \item{q}{vector of quantiles}
13 | 
14 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples}
15 | }
16 | \value{
17 | a list where each component is a data frame containing the summary statistics of the posterior distribution of one of the parameters (or vector of parameters) being estimated
18 | }
19 | \description{
20 | Obtain summary statistics of each parameter from the BKMR fit
21 | }
22 | \examples{
23 | ## First generate dataset
24 | set.seed(111)
25 | dat <- SimData(n = 50, M = 4)
26 | y <- dat$y
27 | Z <- dat$Z
28 | X <- dat$X
29 | 
30 | ## Fit model with component-wise variable selection
31 | ## Using only 100 iterations to make example run quickly
32 | ## Typically should use a large number of iterations for inference
33 | set.seed(111)
34 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
35 | 
36 | ests <- ExtractEsts(fitkm)
37 | names(ests)
38 | ests$beta
39 | }
40 | 


--------------------------------------------------------------------------------
/man/summary.bkmrfit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bkmr_main_functions.R
 3 | \name{summary.bkmrfit}
 4 | \alias{summary.bkmrfit}
 5 | \title{Summarizing BKMR model fits}
 6 | \usage{
 7 | \method{summary}{bkmrfit}(
 8 |   object,
 9 |   q = c(0.025, 0.975),
10 |   digits = 5,
11 |   show_ests = TRUE,
12 |   show_MH = TRUE,
13 |   ...
14 | )
15 | }
16 | \arguments{
17 | \item{object}{an object of class "bkmrfit"}
18 | 
19 | \item{q}{quantiles of posterior distribution to show}
20 | 
21 | \item{digits}{the number of digits to show when printing}
22 | 
23 | \item{show_ests}{logical; if \code{TRUE}, prints summary statistics of posterior distribution}
24 | 
25 | \item{show_MH}{logical; if \code{TRUE}, prints acceptance rates from the Metropolis-Hastings algorithm}
26 | 
27 | \item{...}{further arguments passed to or from other methods.}
28 | }
29 | \value{
30 | No return value, prints more detailed summary of fit to console
31 | }
32 | \description{
33 | \code{summary} method for class "bkmrfit"
34 | }
35 | \examples{
36 | ## First generate dataset
37 | set.seed(111)
38 | dat <- SimData(n = 50, M = 4)
39 | y <- dat$y
40 | Z <- dat$Z
41 | X <- dat$X
42 | 
43 | ## Fit model with component-wise variable selection
44 | ## Using only 100 iterations to make example run quickly
45 | ## Typically should use a large number of iterations for inference
46 | set.seed(111)
47 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
48 | summary(fitkm)
49 | }
50 | 


--------------------------------------------------------------------------------
/man/ExtractPIPs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CalcPIPs.R
 3 | \name{ExtractPIPs}
 4 | \alias{ExtractPIPs}
 5 | \title{Extract posterior inclusion probabilities (PIPs) from BKMR model fit}
 6 | \usage{
 7 | ExtractPIPs(fit, sel = NULL, z.names = NULL)
 8 | }
 9 | \arguments{
10 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function}
11 | 
12 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples}
13 | 
14 | \item{z.names}{optional argument providing the names of the variables included in the \code{h} function.}
15 | }
16 | \value{
17 | a data frame with the variable-specific PIPs for BKMR fit with component-wise variable selection, and with the group-specific and conditional (within-group) PIPs for BKMR fit with hierarchical variable selection.
18 | }
19 | \description{
20 | Extract posterior inclusion probabilities (PIPs) from Bayesian Kernel Machine Regression (BKMR) model fit
21 | }
22 | \details{
23 | For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html}
24 | }
25 | \examples{
26 | ## First generate dataset
27 | set.seed(111)
28 | dat <- SimData(n = 50, M = 4)
29 | y <- dat$y
30 | Z <- dat$Z
31 | X <- dat$X
32 | 
33 | ## Fit model with component-wise variable selection
34 | ## Using only 100 iterations to make example run quickly
35 | ## Typically should use a large number of iterations for inference
36 | set.seed(111)
37 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
38 | 
39 | ExtractPIPs(fitkm)
40 | }
41 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Resubmission
 2 | This is a resubmission. In this version I have:
 3 | 
 4 | * added the paper reference to the description field of the DESCRIPTION file
 5 | * added a \value field to the documentation for exported methods
 6 | * added small executable examples in Rd-files for exported functions
 7 | 
 8 | ## Test environments
 9 | * local OS X install, R-devel
10 | * win-builder (devel)
11 | * Windows Server 2008 R2 SP1, R-release, 32/64 bit
12 | 
13 | ## R CMD check results
14 | There were no ERRORs or WARNINGs.
15 | 
16 | There was one NOTE:
17 | 
18 | * New submission
19 | 
20 |   Package was archived on CRAN
21 | 
22 |   Possibly misspelled words in DESCRIPTION:
23 |     Bobb (9:41)
24 |     al (9:49)
25 |     et (9:46)
26 |     
27 |     * A reference for the methods in the package is included in the 
28 |       description as requested by CRAN; these words are the author names 
29 |       included as part of the reference.
30 |     
31 |   CRAN repository db overrides:
32 |     X-CRAN-Comment: Archived on 2022-03-20 as check problems were not
33 |       corrected in time.
34 | 
35 |     'length > 1 in coercion to logical' error in check of 'bkmrhat'.
36 | 
37 |     * Check problems have now been corrected. 
38 | 
39 |   Found the following (possibly) invalid DOIs:
40 |     DOI: 10.1093/biostatistics/kxu058
41 |       From: DESCRIPTION
42 |       Status: Forbidden
43 |       Message: 403
44 |       
45 |     * This is the DOI provided on the article website <https://academic.oup.com/biostatistics/article/16/3/493/269719>.
46 | 
47 | ## Downstream dependencies
48 | I have also run R CMD check on downstream dependencies with no issues.
49 | 


--------------------------------------------------------------------------------
/man/TracePlot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/TracePlot.R
 3 | \name{TracePlot}
 4 | \alias{TracePlot}
 5 | \title{Trace plot}
 6 | \usage{
 7 | TracePlot(
 8 |   fit,
 9 |   par,
10 |   comp = 1,
11 |   sel = NULL,
12 |   main = "",
13 |   xlab = "iteration",
14 |   ylab = "parameter value",
15 |   ...
16 | )
17 | }
18 | \arguments{
19 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function}
20 | 
21 | \item{par}{which parameter to plot}
22 | 
23 | \item{comp}{which component of the parameter vector to plot}
24 | 
25 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples}
26 | 
27 | \item{main}{title}
28 | 
29 | \item{xlab}{x axis label}
30 | 
31 | \item{ylab}{y axis label}
32 | 
33 | \item{...}{other arguments to pass onto the plotting function}
34 | }
35 | \value{
36 | No return value, generates plot
37 | }
38 | \description{
39 | Trace plot
40 | }
41 | \details{
42 | For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html}
43 | }
44 | \examples{
45 | ## First generate dataset
46 | set.seed(111)
47 | dat <- SimData(n = 50, M = 4)
48 | y <- dat$y
49 | Z <- dat$Z
50 | X <- dat$X
51 | 
52 | ## Fit model with component-wise variable selection
53 | ## Using only 100 iterations to make example run quickly
54 | ## Typically should use a large number of iterations for inference
55 | set.seed(111)
56 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
57 | 
58 | TracePlot(fit = fitkm, par = "beta")
59 | TracePlot(fit = fitkm, par = "sigsq.eps")
60 | TracePlot(fit = fitkm, par = "r", comp = 1)
61 | }
62 | 


--------------------------------------------------------------------------------
/R/TracePlot.R:
--------------------------------------------------------------------------------
 1 | #' Trace plot
 2 | #'
 3 | #' @inheritParams ExtractEsts
 4 | #' @param par which parameter to plot
 5 | #' @param comp which component of the parameter vector to plot
 6 | #' @param main title
 7 | #' @param xlab x axis label
 8 | #' @param ylab y axis label
 9 | #' @param ... other arguments to pass onto the plotting function
10 | #' @export
11 | #' @import graphics
12 | #' @details For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html}
13 | #' 
14 | #' @return No return value, generates plot
15 | #' 
16 | #' @examples
17 | #' ## First generate dataset
18 | #' set.seed(111)
19 | #' dat <- SimData(n = 50, M = 4)
20 | #' y <- dat$y
21 | #' Z <- dat$Z
22 | #' X <- dat$X
23 | #' 
24 | #' ## Fit model with component-wise variable selection
25 | #' ## Using only 100 iterations to make example run quickly
26 | #' ## Typically should use a large number of iterations for inference
27 | #' set.seed(111)
28 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
29 | #' 
30 | #' TracePlot(fit = fitkm, par = "beta")
31 | #' TracePlot(fit = fitkm, par = "sigsq.eps")
32 | #' TracePlot(fit = fitkm, par = "r", comp = 1)
33 | TracePlot <- function(fit, par, comp = 1, sel = NULL, main = "", xlab = "iteration", ylab = "parameter value", ...) {
34 |     samps <- ExtractSamps(fit, sel = sel)[[par]]
35 |     if (!is.null(ncol(samps))) {
36 |         nm <- colnames(samps)[comp]
37 |         samps <- samps[, comp]
38 |     } else {
39 |         nm <- par
40 |     }
41 |     main <- paste0(main, "\n(", nm, " = ", format(mean(samps), digits = 2), ")")
42 |     plot(samps, type = "l", main = main, xlab = xlab,  ylab = ylab, ...)
43 |     abline(h = mean(samps), col = "blue", lwd = 2)
44 | }
45 | 


--------------------------------------------------------------------------------
/man/PlotPriorFits.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/InvestigatePrior_rm.R
 3 | \name{PlotPriorFits}
 4 | \alias{PlotPriorFits}
 5 | \title{Plot of exposure-response function from univariate KMR fit}
 6 | \usage{
 7 | PlotPriorFits(
 8 |   y,
 9 |   X,
10 |   Z,
11 |   fits,
12 |   which.z = NULL,
13 |   which.q = NULL,
14 |   plot.resid = TRUE,
15 |   ylim = NULL,
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{y}{a vector of outcome data of length \code{n}.}
21 | 
22 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.}
23 | 
24 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.}
25 | 
26 | \item{fits}{output from \code{\link{InvestigatePrior}}}
27 | 
28 | \item{which.z}{which predictors (columns in \code{Z}) to plot}
29 | 
30 | \item{which.q}{which q.values to plot; defaults to all possible}
31 | 
32 | \item{plot.resid}{whether to plot the data points}
33 | 
34 | \item{ylim}{plotting limits for the y-axis}
35 | 
36 | \item{...}{other plotting arguments}
37 | }
38 | \value{
39 | No return value, generates plot
40 | }
41 | \description{
42 | Plot the estimated \code{h(z[m])} estimated from frequentist KMR for \code{r[m]} fixed to specific values
43 | }
44 | \examples{
45 | ## First generate dataset
46 | set.seed(111)
47 | dat <- SimData(n = 50, M = 4)
48 | y <- dat$y
49 | Z <- dat$Z
50 | X <- dat$X
51 | 
52 | priorfits <- InvestigatePrior(y = y, Z = Z, X = X, q.seq = c(2, 1/2, 1/4, 1/16))
53 | PlotPriorFits(y = y, Z = Z, X = X, fits = priorfits)
54 | }
55 | 


--------------------------------------------------------------------------------
/man/SimData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SimData.R
 3 | \name{SimData}
 4 | \alias{SimData}
 5 | \title{Simulate dataset}
 6 | \usage{
 7 | SimData(
 8 |   n = 100,
 9 |   M = 5,
10 |   sigsq.true = 0.5,
11 |   beta.true = 2,
12 |   hfun = 3,
13 |   Zgen = "norm",
14 |   ind = 1:2,
15 |   family = "gaussian"
16 | )
17 | }
18 | \arguments{
19 | \item{n}{Number of observations}
20 | 
21 | \item{M}{Number of predictor variables to generate}
22 | 
23 | \item{sigsq.true}{Variance of normally distributed residual error}
24 | 
25 | \item{beta.true}{Coefficient on the covariate}
26 | 
27 | \item{hfun}{An integer from 1 to 3 identifying which predictor-response function to generate}
28 | 
29 | \item{Zgen}{Method for generating the matrix Z of exposure variables, taking one of the values c("unif", "norm", "corr", "realistic")}
30 | 
31 | \item{ind}{select which predictor(s) will be included in the \code{h} function; how many predictors that can be included will depend on which \code{h} function is being used.}
32 | 
33 | \item{family}{a description of the error distribution and link function to be used in the model. Currently implemented for \code{gaussian} and \code{binomial} families.}
34 | }
35 | \value{
36 | a list containing the parameter values and generated variables of the simulated datasets
37 | }
38 | \description{
39 | Simulate predictor, covariate, and continuous outcome data
40 | }
41 | \details{
42 | \itemize{
43 |  \item \code{hfun = 1}: A nonlinear function of the first predictor 
44 |  \item \code{hfun = 2}: A linear function of the first two predictors and their product term
45 |  \item \code{hfun = 3}: A nonlinear and nonadditive function of the first two predictor variables
46 | }
47 | }
48 | \examples{
49 | set.seed(5)
50 | dat <- SimData()
51 | }
52 | 


--------------------------------------------------------------------------------
/my-doc/scratch_hnew_pred_methods.R:
--------------------------------------------------------------------------------
 1 | set.seed(111)
 2 | dat <- SimData(n = 500, M = 4)
 3 | y <- dat$y
 4 | Z <- dat$Z
 5 | X <- dat$X
 6 | 
 7 | Xnew <- matrix(0, nrow(Z), ncol(X))
 8 | 
 9 | set.seed(111)
10 | fit0 <- kmbayes(y = y, Z = Z, X = X, iter = 2000, verbose = FALSE, varsel = TRUE)
11 | 
12 | ests_full <- ExtractEsts(fit0)$h[, c("mean", "sd")]
13 | 
14 | ## add Vcomps
15 | sel <- with(fit0, seq(floor(iter/2) + 1, iter, 10))
16 | sel <- unique(floor(sel))
17 | s0 <- system.time(
18 |   Vinv <- with(fit0, lapply(sel, function(s) makeVcomps(r = r[s, ], lambda = lambda[s, ], Z, data.comps)$Vinv))
19 | )
20 | attr(Vinv, "sel") <- sel
21 | fit0V <- fit0
22 | fit0V$Vinv <- Vinv
23 | print(object.size(fit0), units = "Mb")
24 | print(object.size(fit0V), units = "Mb")
25 | 
26 | s1 <- system.time(
27 |   samps <- SamplePred(fit0, Xnew = Xnew)
28 | )
29 | 
30 | ests_samp <- samps %>%
31 |   {cbind(mean = colMeans(.), sd = apply(., 2, sd))}
32 | 
33 | s2 <- system.time(
34 |   ests_approx <- ComputePostmeanHnew(fit0) %$%
35 |     cbind(mean = postmean, sd = sqrt(diag(postvar)))
36 | )
37 | 
38 | s3 <- system.time(
39 |   ests_approx2 <- ComputePostmeanHnew2(fit0) %$%
40 |     cbind(mean = postmean, sd = sqrt(diag(postvar)))
41 | )
42 | 
43 | s4 <- system.time(
44 |   ests_approx2 <- ComputePostmeanHnew2(fit0V) %$%
45 |     cbind(mean = postmean, sd = sqrt(diag(postvar)))
46 | )
47 | 
48 | s1
49 | s2
50 | s3
51 | s4
52 | 
53 | head(ests_full)
54 | head(ests_samp)
55 | head(ests_approx)
56 | head(ests_approx2)
57 | 
58 | par(mfrow = c(1, 2))
59 | plot(ests_samp[, "mean"], ests_approx2[, "mean"])
60 | abline(0, 1, col = "red")
61 | plot(ests_samp[, "sd"], ests_approx2[, "sd"])
62 | abline(0, 1, col = "red")
63 | 
64 | par(mfrow = c(1, 2))
65 | plot(ests_samp[, "mean"], ests_approx[, "mean"])
66 | abline(0, 1, col = "red")
67 | plot(ests_samp[, "sd"], ests_approx[, "sd"])
68 | abline(0, 1, col = "red")
69 | 
70 | par(mfrow = c(1, 2))
71 | plot(ests_approx[, "mean"], ests_approx2[, "mean"])
72 | abline(0, 1, col = "red")
73 | plot(ests_approx[, "sd"], ests_approx2[, "sd"])
74 | abline(0, 1, col = "red")
75 | 
76 | 


--------------------------------------------------------------------------------
/my-doc/tests/tests_knots.R:
--------------------------------------------------------------------------------
 1 | devtools::load_all()
 2 | library(ggplot2)
 3 | 
 4 | family <- "gaussian"
 5 | #family <- "binomial"
 6 | 
 7 | ## example using a matrix of knots ####
 8 | 
 9 | n <- 500
10 | M <- 5
11 | sigsq.true <- ifelse(family == "gaussian", 0.05, 1)
12 | beta.true <- 0.5
13 | Z <- matrix(rnorm(n * M), n, M)
14 | X <- cbind(3*cos(Z[, 1]) + 2*rnorm(n))
15 | eps <- rnorm(n, sd = sqrt(sigsq.true))
16 | h <- apply(Z, 1, function(z, ind = 1) 4*plogis(z[ind[1]], 0, 0.3))
17 | eps <- rnorm(n, sd = sqrt(sigsq.true))
18 | y <- drop(X*beta.true + h + eps)
19 | if (family == "binomial") {
20 |   ystar <- y
21 |   y <- ifelse(ystar > 0, 1, 0)
22 | }
23 | 
24 | des <- fields::cover.design(Z, nd = 100)
25 | knots <- des$design
26 | 
27 | ## compare timing to not using knots 
28 | set.seed(111)
29 | fit_full <- kmbayes(y = y, Z = Z, X = X, iter = 300, varsel = TRUE, family = family, verbose = FALSE)
30 | with(fit_full, difftime(time2, time1))
31 | fit_knots <- kmbayes(y = y, Z = Z, X = X, iter = 300, varsel = TRUE, family = family, verbose = FALSE, knots = knots)
32 | with(fit_knots, difftime(time2, time1))
33 | 
34 | pred.resp.univar <- PredictorResponseUnivar(fit = fit_full)
35 | s1 <- system.time(pred.resp.univar <- pred.resp.univar.full <- PredictorResponseUnivar(fit = fit_full, method = "exact"))
36 | s1
37 | ggplot(pred.resp.univar, aes(z, est, ymin = est - 1.96*se, ymax = est + 1.96*se)) + 
38 |   geom_smooth(stat = "identity") + 
39 |   facet_wrap(~ variable) +
40 |   ylab("h(z)")
41 | 
42 | pred.resp.univar <- PredictorResponseUnivar(fit = fit_knots)
43 | s2 <- system.time(pred.resp.univar <- pred.resp.univar.knots <- PredictorResponseUnivar(fit = fit_knots, method = "exact"))
44 | s2
45 | ggplot(pred.resp.univar, aes(z, est, ymin = est - 1.96*se, ymax = est + 1.96*se)) + 
46 |   geom_smooth(stat = "identity") + 
47 |   facet_wrap(~ variable) +
48 |   ylab("h(z)")
49 | 
50 | 
51 | ## with with a larger number of iterations
52 | fit0 <- kmbayes(y = y, Z = Z, X = X, iter = 5000, varsel = TRUE, family = family, knots = knots, control.params = list(verbose_show_ests = TRUE))
53 | 
54 | summary(fit0)
55 | 
56 | TracePlot(fit = fit0, par = "beta")
57 | ExtractPIPs(fit0)


--------------------------------------------------------------------------------
/R/argval_controlParams.R:
--------------------------------------------------------------------------------
 1 | # Validate control params list
 2 | ##components of list
 3 | ##lambda.jump             / default=10
 4 | ##mu.lambda, sigma.lambda / default=10
 5 | ##a.p0, b.p0              / default=1
 6 | ##r.prior                 / default = "gamma", alt=invunif, unif
 7 | ##a.sigsq, b.sigsq        / default=0.001
 8 | ##mu.r, sigma.r           / default=5
 9 | ##r.muprop                / default=1
10 | ##r.jump                  / default=0.2
11 | ##r.jump1, r.jump2        / default=2, 0.2
12 | ##r.a, r.b                / default=0, 100
13 | 
14 | # 
15 | validateControlParams <- function(varsel, family, id, control.params) {
16 |   message ("Validating control.params...")
17 |   ##print(control.params)
18 |   if (family == "gaussian"){
19 |     stopifnot(control.params$a.sigsq > 0, control.params$b.sigsq > 0)
20 |   } 
21 |   if (varsel == TRUE) {
22 |     stopifnot(control.params$a.p0 > 0, control.params$b.p0 > 0, control.params$r.jump1 > 0, control.params$r.jump2 > 0, control.params$r.muprop > 0)
23 |   }
24 |   else  {
25 |     stopifnot(control.params$r.jump > 0)
26 |   } ##end varsel-specific stuff
27 |   ##if id, need two elements in mu.lambda, sigma.lambda and lambda.jump
28 |   if (!is.null(id)) {
29 |     stopifnot(length(control.params$mu.lambda) == 2, length(control.params$sigma.lambda) == 2, length(control.params$lambda.jump) == 2)
30 |   }
31 |   ##regardless of id, validate each element of these params
32 |   for (i in 1:length(control.params$mu.lambda)) {
33 |     stopifnot(control.params$mu.lambda > 0) 
34 |   }
35 |   for (i in 1:length(control.params$sigma.lambda)) {
36 |     stopifnot(control.params$sigma.lambda > 0) 
37 |   }
38 |   for (i in 1:length(control.params$lambda.jump)) {
39 |     stopifnot(control.params$lambda.jump > 0) 
40 |   }
41 |   rprior=control.params$r.prior
42 |   stopifnot(rprior == "gamma" | rprior == "unif" | rprior == "invunif")
43 |   ##stopifnot(length(intersect (control.params$r.prior, c("gamma","unif","invunif")))>0)
44 |   if (control.params$r.prior == "gamma") {
45 |     stopifnot(control.params$mu.r > 0, control.params$sigma.r > 0)
46 |   }
47 |   else {  
48 |     stopifnot(control.params$r.a >= 0, control.params$r.b > control.params$r.a)
49 |   }
50 | }


--------------------------------------------------------------------------------
/my-doc/tests/tests_noX.R:
--------------------------------------------------------------------------------
 1 | devtools::load_all()
 2 | library(ggplot2)
 3 | 
 4 | family <- "gaussian"
 5 | family <- "binomial"
 6 | 
 7 | ## example where there is no X matrix ####
 8 | 
 9 | n <- 100
10 | M <- 5
11 | sigsq.true <- ifelse(family == "gaussian", 0.05, 1)
12 | Z <- matrix(rnorm(n * M), n, M)
13 | X <- cbind(3*cos(Z[, 1]) + 2*rnorm(n))
14 | eps <- rnorm(n, sd = sqrt(sigsq.true))
15 | h <- apply(Z, 1, function(z, ind = 1) 4*plogis(z[ind[1]], 0, 0.3))
16 | eps <- rnorm(n)
17 | y <- drop(h + eps)
18 | if (family == "binomial") {
19 |   ystar <- y
20 |   y <- ifelse(ystar > 0, 1, 0)
21 | }
22 | 
23 | set.seed(111)
24 | fit0 <- kmbayes(y = y, Z = Z, iter = 5000, varsel = TRUE, family = family)
25 | 
26 | fit0
27 | 
28 | summary(fit0)
29 | 
30 | TracePlot(fit = fit0, par = "beta")
31 | ExtractPIPs(fit0)
32 | 
33 | pred.resp.univar <- PredictorResponseUnivar(fit = fit0)
34 | ggplot(pred.resp.univar, aes(z, est, ymin = est - 1.96*se, ymax = est + 1.96*se)) + 
35 |   geom_smooth(stat = "identity") + 
36 |   facet_wrap(~ variable) +
37 |   ylab("h(z)")
38 | 
39 | pred.resp.bivar <- PredictorResponseBivar(fit = fit0, 
40 |                                           min.plot.dist = 1)
41 | 
42 | pred.resp.bivar.levels <- PredictorResponseBivarLevels(pred.resp.df = pred.resp.bivar, 
43 |                                                        Z = Z, qs = c(0.25, 0.5, 0.75))
44 | 
45 | ggplot(pred.resp.bivar.levels, aes(z1, est)) + 
46 |   geom_smooth(aes(col = quantile), stat = "identity") + 
47 |   facet_grid(variable2 ~ variable1) +
48 |   ggtitle("h(expos1 | quantiles of expos2)") +
49 |   xlab("expos1")
50 | 
51 | risks.overall.approx <- OverallRiskSummaries(fit = fit0, 
52 |                                              qs = seq(0.25, 0.75, by = 0.05), q.fixed = 0.5)
53 | risks.overall.approx
54 | 
55 | risks.overall.exact <- OverallRiskSummaries(fit = fit0, 
56 |                                              qs = seq(0.25, 0.75, by = 0.05), q.fixed = 0.5, method = "exact")
57 | risks.overall.exact
58 | 
59 | risks.singvar <- SingVarRiskSummaries(fit = fit0,
60 |                                       qs.diff = c(0.25, 0.75), q.fixed = c(0.25, 0.50, 0.75))
61 | risks.singvar
62 | 
63 | risks.int <- SingVarIntSummaries(fit = fit0, 
64 |                                  qs.diff = c(0.25, 0.75), qs.fixed = c(0.25, 0.75))
65 | risks.int
66 | 


--------------------------------------------------------------------------------
/man/InvestigatePrior.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/InvestigatePrior_rm.R
 3 | \name{InvestigatePrior}
 4 | \alias{InvestigatePrior}
 5 | \title{Investigate prior}
 6 | \usage{
 7 | InvestigatePrior(
 8 |   y,
 9 |   Z,
10 |   X,
11 |   ngrid = 50,
12 |   q.seq = c(2, 1, 1/2, 1/4, 1/8, 1/16),
13 |   r.seq = NULL,
14 |   Drange = NULL,
15 |   verbose = FALSE
16 | )
17 | }
18 | \arguments{
19 | \item{y}{a vector of outcome data of length \code{n}.}
20 | 
21 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.}
22 | 
23 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.}
24 | 
25 | \item{ngrid}{Number of grid points over which to plot the exposure-response function}
26 | 
27 | \item{q.seq}{Sequence of values corresponding to different degrees of smoothness in the estimated exposure-response function. A value of q corresponds to fractions of the range of the data over which there is a decay in the correlation \code{cor(h[i],h[j])} between two subjects by 50\code{\%}.}
28 | 
29 | \item{r.seq}{sequence of values at which to fix \code{r} for estimating the exposure-response function}
30 | 
31 | \item{Drange}{the range of the \code{z_m} data over which to apply the values of \code{q.seq}. If not specified, will be calculated as the maximum of the ranges of \code{z_1} through \code{z_M}.}
32 | 
33 | \item{verbose}{TRUE or FALSE: flag indicating whether to print to the screen which exposure variable and q value has been completed}
34 | }
35 | \value{
36 | a list containing the predicted values, residuals, and estimated predictor-response function for each degree of smoothness being considered
37 | }
38 | \description{
39 | Investigate the impact of the \code{r[m]} parameters on the smoothness of the exposure-response function \code{h(z[m])}.
40 | }
41 | \details{
42 | For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html}
43 | }
44 | \examples{
45 | ## First generate dataset
46 | set.seed(111)
47 | dat <- SimData(n = 50, M = 4)
48 | y <- dat$y
49 | Z <- dat$Z
50 | X <- dat$X
51 | 
52 | priorfits <- InvestigatePrior(y = y, Z = Z, X = X, q.seq = c(2, 1/2, 1/4, 1/16))
53 | PlotPriorFits(y = y, Z = Z, X = X, fits = priorfits)
54 | }
55 | 


--------------------------------------------------------------------------------
/my-doc/tests/tests_Znew.R:
--------------------------------------------------------------------------------
 1 | devtools::load_all()
 2 | library(magrittr)
 3 | 
 4 | ## Gaussian outcome
 5 | 
 6 | set.seed(111)
 7 | dat <- SimData(n = 50, M = 4)
 8 | y <- dat$y
 9 | Z <- dat$Z
10 | X <- dat$X
11 | 
12 | Znew <- rbind(c(0,0,0,0), c(1,1,1,1))
13 | 
14 | set.seed(111)
15 | fit0 <- kmbayes(y = y, Z = Z, X = X, iter = 10000, verbose = FALSE, varsel = TRUE, Znew = Znew)
16 | 
17 | ests_samp <- ExtractEsts(fit0)$hnew
18 | ests_samp2 <- SamplePred(fit0, Znew = Znew, Xnew = matrix(0, nrow(Znew), ncol(X))) %>%
19 | {cbind(colMeans(.), apply(., 2, sd))}
20 | system.time(
21 |   ests_approx <- ComputePostmeanHnew(fit0, Znew = Znew) %$%
22 |     cbind(mean = postmean, sd = sqrt(diag(postvar)))
23 | )
24 | system.time(
25 |   ests_approx2 <- ComputePostmeanHnew2(fit0, Znew = Znew) %$%
26 |     cbind(mean = postmean, sd = sqrt(diag(postvar)))
27 | )
28 | 
29 | ests_samp
30 | ests_samp2
31 | ests_approx
32 | ests_approx2
33 | 
34 | fit0
35 | 
36 | summary(fit0)
37 | 
38 | ## Binomial outcome
39 | 
40 | set.seed(123)
41 | n <- 200 ## number of observations
42 | M <- 4 ## number of exposure variables
43 | beta.true <- 0.1
44 | Z <- matrix(runif(n * M, -1, 1), n, M)
45 | x <- 3*cos(Z[, 1]) + 2*rnorm(n)
46 | hfun <- function(z) (2*z + 0.5) ^ 2
47 | h <- hfun(Z[, 1]) ## only depends on z1
48 | ## generate using latent normal representation
49 | eps <- rnorm(n)
50 | ystar <- x * beta.true + h + eps
51 | y <- ifelse(ystar > 0, 1, 0)
52 | datp <- list(n = n, M = M, beta.true = beta.true, Z = Z, h = h, X = cbind(x), y = y, eps = eps, ystar = ystar)
53 | rm(n, M, beta.true, Z, x, h, eps, y, ystar)
54 | 
55 | Znew <- rbind(c(0,0,0,0), c(1,1,1,1))
56 | 
57 | set.seed(123)
58 | fit0 <- kmbayes(y = datp$y, Z = datp$Z, X = datp$X, iter = 5000, verbose = TRUE, varsel = TRUE, Znew = Znew, family = "binomial", control.params = list(r.jump2 = 0.5))
59 | 
60 | ests_samp <- ExtractEsts(fit0)$hnew
61 | system.time(
62 |   ests_samp2 <- SamplePred(fit0, Znew = Znew, Xnew = matrix(0, nrow(Znew), ncol(datp$X))) %>%
63 |     {cbind(colMeans(.), apply(., 2, sd))}
64 | )
65 | system.time(
66 |   ests_approx <- ComputePostmeanHnew(fit0, Znew = Znew) %$%
67 |     cbind(mean = postmean, sd = sqrt(diag(postvar)))
68 | )
69 | system.time(
70 |   ests_approx2 <- ComputePostmeanHnew2(fit0, Znew = Znew) %$%
71 |     cbind(mean = postmean, sd = sqrt(diag(postvar)))
72 | )
73 | 
74 | ests_samp
75 | ests_samp2
76 | ests_approx
77 | ests_approx2
78 | 
79 | fit0
80 | 
81 | summary(fit0)
82 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # bkmr 0.2.2.9000
 2 | 
 3 | ## Bug fixes
 4 | 
 5 | * Updated MCMC step for parameter lambda to improve MCMC convergence for probit model
 6 | 
 7 | ## Minor changes
 8 | 
 9 | * Modifications to speed up running time 
10 | 
11 |   * Optimized the `makeKpart` function
12 |   
13 |   * Print diagnostics options moved outside of main loops
14 | 
15 | # bkmr 0.2.2
16 | 
17 | ## Bug fixes
18 | 
19 | * Corrected code that produced warning `length > 1 in coercion to logical`
20 | 
21 | * Update functions that use deprecated functions from `dplyr` package 
22 | 
23 | ## Minor changes
24 | 
25 | * No longer export the following functions:
26 | 
27 |   * `CalcGroupPIPs`, `CalcWithinGroupPIPs`, and `CalcPIPs` as these should typically be calculated using the function `ExtractPIPs`
28 | 
29 |   * `ComputePostmeanHnew.approx` and `ComputePostmeanHnew.exact` as these should typically be calculated using the function `ComputePostmeanHnew`
30 |   
31 |   * `set_verbose_opts` as this is only called internally
32 | 
33 | * Expanded function documentation by adding example code
34 | 
35 | # bkmr 0.2.1
36 | 
37 | ## Bug fixes
38 | 
39 | * allowable values for starting parameter for `r[m]` parameters updated as follows
40 | 
41 |   * no longer truncated to a single value (when `varsel = FALSE` and `rmethod = "varying"`)
42 | 
43 |   * can be equal to 0 (when `varsel = TRUE`)
44 | 
45 | * Error no longer generated if starting values for h.hat are not positive 
46 | 
47 | * When checking class of an object, use `inherits()` instead of `class()`
48 | 
49 | # bkmr 0.2.0
50 | 
51 | ## Major changes
52 | 
53 | * Added ability to have binomial outcome `family` by implementing probit regression within `kmbayes()`
54 | 
55 | * Removed computation of the subject-specific effects `h[i]` within `kmbayes()`, as this is not always desired, and greatly slows down model fitting
56 | 
57 |   * This could still be done by setting the option `est.h = TRUE` in the `kmbayes` function
58 |   
59 |   * posterior samples of `h[i]` can now be obtained via the post-processing `SamplePred` function; alternatively, posterior summaries (mean, variance) can be obtained via the post-processing `ComputePostmeanHnew` function
60 | 
61 | * Added ability to use exact estimates of the posterior mean and variance by specifying the argument `method = 'exact'` within the post-processing functions (e.g., `OverallRiskSummaries()`, `PredictorResponseUnivar()`)
62 | 
63 | ## Bug fixes
64 | 
65 | * Fixed `PredictorResponseBivarLevels()` when argument `both_pairs = TRUE` (#4)
66 | 


--------------------------------------------------------------------------------
/man/PredictorResponseBivarLevels.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PredictorResponseFunctions.R
 3 | \name{PredictorResponseBivarLevels}
 4 | \alias{PredictorResponseBivarLevels}
 5 | \title{Plot cross-sections of the bivariate predictor-response function}
 6 | \usage{
 7 | PredictorResponseBivarLevels(
 8 |   pred.resp.df,
 9 |   Z = NULL,
10 |   qs = c(0.25, 0.5, 0.75),
11 |   both_pairs = TRUE,
12 |   z.names = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{pred.resp.df}{object obtained from running the function \code{\link{PredictorResponseBivar}}}
17 | 
18 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.}
19 | 
20 | \item{qs}{vector of quantiles at which to fix the second variable}
21 | 
22 | \item{both_pairs}{flag indicating whether, if \code{h(z1)} is being plotted for z2 fixed at different levels, that they should be plotted in the reverse order as well (for \code{h(z2)} at different levels of z1)}
23 | 
24 | \item{z.names}{optional vector of names for the columns of \code{z}}
25 | }
26 | \value{
27 | a long data frame with the name of the first predictor, the name of the second predictor, the value of the first predictor, the quantile at which the second predictor is fixed, the posterior mean estimate, and the posterior standard deviation of the estimated exposure response function
28 | }
29 | \description{
30 | Function to plot the \code{h} function of a particular variable at different levels (quantiles) of a second variable
31 | }
32 | \details{
33 | For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html}
34 | }
35 | \examples{
36 | ## First generate dataset
37 | set.seed(111)
38 | dat <- SimData(n = 50, M = 4)
39 | y <- dat$y
40 | Z <- dat$Z
41 | X <- dat$X
42 | 
43 | ## Fit model with component-wise variable selection
44 | ## Using only 100 iterations to make example run quickly
45 | ## Typically should use a large number of iterations for inference
46 | set.seed(111)
47 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
48 | 
49 | ## Obtain predicted value on new grid of points for each pair of predictors
50 | ## Using only a 10-by-10 point grid to make example run quickly
51 | pred.resp.bivar <- PredictorResponseBivar(fit = fitkm, min.plot.dist = 1, ngrid = 10)
52 | pred.resp.bivar.levels <- PredictorResponseBivarLevels(pred.resp.df = pred.resp.bivar, 
53 | Z = Z, qs = c(0.1, 0.5, 0.9))
54 | }
55 | 


--------------------------------------------------------------------------------
/man/SamplePred.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SamplePred.R
 3 | \name{SamplePred}
 4 | \alias{SamplePred}
 5 | \title{Obtain posterior samples of predictions at new points}
 6 | \usage{
 7 | SamplePred(
 8 |   fit,
 9 |   Znew = NULL,
10 |   Xnew = NULL,
11 |   Z = NULL,
12 |   X = NULL,
13 |   y = NULL,
14 |   sel = NULL,
15 |   type = c("link", "response"),
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function}
21 | 
22 | \item{Znew}{optional matrix of new predictor values at which to predict new \code{h}, where each row represents a new observation. If not specified, defaults to using observed Z values}
23 | 
24 | \item{Xnew}{optional matrix of new covariate values at which to obtain predictions. If not specified, defaults to using observed X values}
25 | 
26 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.}
27 | 
28 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.}
29 | 
30 | \item{y}{a vector of outcome data of length \code{n}.}
31 | 
32 | \item{sel}{A vector selecting which iterations of the BKMR fit should be retained for inference. If not specified, will default to keeping every 10 iterations after dropping the first 50\% of samples, or if this results in fewer than 100 iterations, than 100 iterations are kept}
33 | 
34 | \item{type}{whether to make predictions on the scale of the link or of the response; only relevant for the binomial outcome family}
35 | 
36 | \item{...}{other arguments; not currently used}
37 | }
38 | \value{
39 | a matrix with the posterior samples at the new points
40 | }
41 | \description{
42 | Obtains posterior samples of \code{E(Y) = h(Znew) + beta*Xnew} or of \code{g^{-1}[E(y)]}
43 | }
44 | \details{
45 | For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html}
46 | }
47 | \examples{
48 | set.seed(111)
49 | dat <- SimData(n = 50, M = 4)
50 | y <- dat$y
51 | Z <- dat$Z
52 | X <- dat$X
53 | 
54 | ## Fit model with component-wise variable selection
55 | ## Using only 100 iterations to make example run quickly
56 | ## Typically should use a large number of iterations for inference
57 | set.seed(111)
58 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
59 | 
60 | med_vals <- apply(Z, 2, median)
61 | Znew <- matrix(med_vals, nrow = 1)
62 | h_true <- dat$HFun(Znew)
63 | set.seed(111)
64 | samps3 <- SamplePred(fitkm, Znew = Znew, Xnew = cbind(0))
65 | head(samps3)
66 | }
67 | 


--------------------------------------------------------------------------------
/man/OverallRiskSummaries.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ComputeMixtureSummaries.R
 3 | \name{OverallRiskSummaries}
 4 | \alias{OverallRiskSummaries}
 5 | \title{Calculate overall risk summaries}
 6 | \usage{
 7 | OverallRiskSummaries(
 8 |   fit,
 9 |   y = NULL,
10 |   Z = NULL,
11 |   X = NULL,
12 |   qs = seq(0.25, 0.75, by = 0.05),
13 |   q.fixed = 0.5,
14 |   method = "approx",
15 |   sel = NULL
16 | )
17 | }
18 | \arguments{
19 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function}
20 | 
21 | \item{y}{a vector of outcome data of length \code{n}.}
22 | 
23 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.}
24 | 
25 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.}
26 | 
27 | \item{qs}{vector of quantiles at which to calculate the overall risk summary}
28 | 
29 | \item{q.fixed}{a second quantile at which to compare the estimated \code{h} function}
30 | 
31 | \item{method}{method for obtaining posterior summaries at a vector of new points. Options are "approx" and "exact"; defaults to "approx", which is faster particularly for large datasets; see details}
32 | 
33 | \item{sel}{selects which iterations of the MCMC sampler to use for inference; see details}
34 | }
35 | \value{
36 | a data frame containing the (posterior mean) estimate and posterior standard deviation of the overall risk measures
37 | }
38 | \description{
39 | Compare estimated \code{h} function when all predictors are at a particular quantile to when all are at a second fixed quantile
40 | }
41 | \details{
42 | \itemize{
43 |   \item If \code{method == "approx"}, the argument \code{sel} defaults to the second half of the MCMC iterations.
44 |   \item If \code{method == "exact"}, the argument \code{sel} defaults to keeping every 10 iterations after dropping the first 50\% of samples, or if this results in fewer than 100 iterations, than 100 iterations are kept
45 | }
46 | For guided examples and additional information, go to \url{https://jenfb.github.io/bkmr/overview.html}
47 | }
48 | \examples{
49 | ## First generate dataset
50 | set.seed(111)
51 | dat <- SimData(n = 50, M = 4)
52 | y <- dat$y
53 | Z <- dat$Z
54 | X <- dat$X
55 | 
56 | ## Fit model with component-wise variable selection
57 | ## Using only 100 iterations to make example run quickly
58 | ## Typically should use a large number of iterations for inference
59 | set.seed(111)
60 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
61 | 
62 | risks.overall <- OverallRiskSummaries(fit = fitkm, qs = seq(0.25, 0.75, by = 0.05), 
63 | q.fixed = 0.5, method = "exact")
64 | }
65 | 


--------------------------------------------------------------------------------
/man/ComputePostmeanHnew.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ComputePostmeanHnew.R
 3 | \name{ComputePostmeanHnew}
 4 | \alias{ComputePostmeanHnew}
 5 | \title{Compute the posterior mean and variance of \code{h} at a new predictor values}
 6 | \usage{
 7 | ComputePostmeanHnew(
 8 |   fit,
 9 |   y = NULL,
10 |   Z = NULL,
11 |   X = NULL,
12 |   Znew = NULL,
13 |   sel = NULL,
14 |   method = "approx"
15 | )
16 | }
17 | \arguments{
18 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function}
19 | 
20 | \item{y}{a vector of outcome data of length \code{n}.}
21 | 
22 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.}
23 | 
24 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.}
25 | 
26 | \item{Znew}{matrix of new predictor values at which to predict new \code{h}, where each row represents a new observation. If set to NULL then will default to using the observed exposures Z.}
27 | 
28 | \item{sel}{selects which iterations of the MCMC sampler to use for inference; see details}
29 | 
30 | \item{method}{method for obtaining posterior summaries at a vector of new points. Options are "approx" and "exact"; defaults to "approx", which is faster particularly for large datasets; see details}
31 | }
32 | \value{
33 | a list of length two containing the posterior mean vector and posterior variance matrix
34 | }
35 | \description{
36 | Compute the posterior mean and variance of \code{h} at a new predictor values
37 | }
38 | \details{
39 | \itemize{
40 |   \item If \code{method == "approx"}, the argument \code{sel} defaults to the second half of the MCMC iterations.
41 |   \item If \code{method == "exact"}, the argument \code{sel} defaults to keeping every 10 iterations after dropping the first 50\% of samples, or if this results in fewer than 100 iterations, than 100 iterations are kept
42 | }
43 | For guided examples and additional information, go to \url{https://jenfb.github.io/bkmr/overview.html}
44 | }
45 | \examples{
46 | set.seed(111)
47 | dat <- SimData(n = 50, M = 4)
48 | y <- dat$y
49 | Z <- dat$Z
50 | X <- dat$X
51 | 
52 | ## Fit model with component-wise variable selection
53 | ## Using only 100 iterations to make example run quickly
54 | ## Typically should use a large number of iterations for inference
55 | set.seed(111)
56 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
57 | 
58 | med_vals <- apply(Z, 2, median)
59 | Znew <- matrix(med_vals, nrow = 1)
60 | h_true <- dat$HFun(Znew)
61 | h_est1 <- ComputePostmeanHnew(fitkm, Znew = Znew, method = "approx")
62 | h_est2 <- ComputePostmeanHnew(fitkm, Znew = Znew, method = "exact")
63 | }
64 | 


--------------------------------------------------------------------------------
/man/PredictorResponseUnivar.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PredictorResponseFunctions.R
 3 | \name{PredictorResponseUnivar}
 4 | \alias{PredictorResponseUnivar}
 5 | \title{Plot univariate predictor-response function on a new grid of points}
 6 | \usage{
 7 | PredictorResponseUnivar(
 8 |   fit,
 9 |   y = NULL,
10 |   Z = NULL,
11 |   X = NULL,
12 |   which.z = 1:ncol(Z),
13 |   method = "approx",
14 |   ngrid = 50,
15 |   q.fixed = 0.5,
16 |   sel = NULL,
17 |   min.plot.dist = Inf,
18 |   center = TRUE,
19 |   z.names = colnames(Z),
20 |   ...
21 | )
22 | }
23 | \arguments{
24 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function}
25 | 
26 | \item{y}{a vector of outcome data of length \code{n}.}
27 | 
28 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.}
29 | 
30 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.}
31 | 
32 | \item{which.z}{vector identifying which predictors (columns of \code{Z}) should be plotted}
33 | 
34 | \item{method}{method for obtaining posterior summaries at a vector of new points. Options are "approx" and "exact"; defaults to "approx", which is faster particularly for large datasets; see details}
35 | 
36 | \item{ngrid}{number of grid points to cover the range of each predictor (column in \code{Z})}
37 | 
38 | \item{q.fixed}{vector of quantiles at which to fix the remaining predictors in \code{Z}}
39 | 
40 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples}
41 | 
42 | \item{min.plot.dist}{specifies a minimum distance that a new grid point needs to be from an observed data point in order to compute the prediction; points further than this will not be computed}
43 | 
44 | \item{center}{flag for whether to scale the exposure-response function to have mean zero}
45 | 
46 | \item{z.names}{optional vector of names for the columns of \code{z}}
47 | 
48 | \item{...}{other arguments to pass on to the prediction function}
49 | }
50 | \value{
51 | a long data frame with the predictor name, predictor value, posterior mean estimate, and posterior standard deviation
52 | }
53 | \description{
54 | Plot univariate predictor-response function on a new grid of points
55 | }
56 | \details{
57 | For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html}
58 | }
59 | \examples{
60 | ## First generate dataset
61 | set.seed(111)
62 | dat <- SimData(n = 50, M = 4)
63 | y <- dat$y
64 | Z <- dat$Z
65 | X <- dat$X
66 | 
67 | ## Fit model with component-wise variable selection
68 | ## Using only 100 iterations to make example run quickly
69 | ## Typically should use a large number of iterations for inference
70 | set.seed(111)
71 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
72 | pred.resp.univar <- PredictorResponseUnivar(fit = fitkm)
73 | }
74 | 


--------------------------------------------------------------------------------
/man/SingVarIntSummaries.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ComputeMixtureSummaries.R
 3 | \name{SingVarIntSummaries}
 4 | \alias{SingVarIntSummaries}
 5 | \title{Single Variable Interaction Summaries}
 6 | \usage{
 7 | SingVarIntSummaries(
 8 |   fit,
 9 |   y = NULL,
10 |   Z = NULL,
11 |   X = NULL,
12 |   which.z = 1:ncol(Z),
13 |   qs.diff = c(0.25, 0.75),
14 |   qs.fixed = c(0.25, 0.75),
15 |   method = "approx",
16 |   sel = NULL,
17 |   z.names = colnames(Z),
18 |   ...
19 | )
20 | }
21 | \arguments{
22 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function}
23 | 
24 | \item{y}{a vector of outcome data of length \code{n}.}
25 | 
26 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.}
27 | 
28 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.}
29 | 
30 | \item{which.z}{vector indicating which variables (columns of \code{Z}) for which the summary should be computed}
31 | 
32 | \item{qs.diff}{vector indicating the two quantiles at which to compute the single-predictor risk summary}
33 | 
34 | \item{qs.fixed}{vector indicating the two quantiles at which to fix all of the remaining exposures in \code{Z}}
35 | 
36 | \item{method}{method for obtaining posterior summaries at a vector of new points. Options are "approx" and "exact"; defaults to "approx", which is faster particularly for large datasets; see details}
37 | 
38 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples}
39 | 
40 | \item{z.names}{optional vector of names for the columns of \code{z}}
41 | 
42 | \item{...}{other arguments to pass on to the prediction function}
43 | }
44 | \value{
45 | a data frame containing the (posterior mean) estimate and posterior standard deviation of the single-predictor risk measures
46 | }
47 | \description{
48 | Compare the single-predictor health risks when all of the other predictors in Z are fixed to their a specific quantile to when all of the other predictors in Z are fixed to their a second specific quantile.
49 | }
50 | \details{
51 | \itemize{
52 |   \item If \code{method == "approx"}, the argument \code{sel} defaults to the second half of the MCMC iterations.
53 |   \item If \code{method == "exact"}, the argument \code{sel} defaults to keeping every 10 iterations after dropping the first 50\% of samples, or if this results in fewer than 100 iterations, than 100 iterations are kept
54 | }
55 | For guided examples and additional information, go to \url{https://jenfb.github.io/bkmr/overview.html}
56 | }
57 | \examples{
58 | ## First generate dataset
59 | set.seed(111)
60 | dat <- SimData(n = 50, M = 4)
61 | y <- dat$y
62 | Z <- dat$Z
63 | X <- dat$X
64 | 
65 | ## Fit model with component-wise variable selection
66 | ## Using only 100 iterations to make example run quickly
67 | ## Typically should use a large number of iterations for inference
68 | set.seed(111)
69 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
70 | 
71 | risks.int <- SingVarIntSummaries(fit = fitkm, method = "exact")
72 | }
73 | 


--------------------------------------------------------------------------------
/man/SingVarRiskSummaries.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ComputeMixtureSummaries.R
 3 | \name{SingVarRiskSummaries}
 4 | \alias{SingVarRiskSummaries}
 5 | \title{Single Variable Risk Summaries}
 6 | \usage{
 7 | SingVarRiskSummaries(
 8 |   fit,
 9 |   y = NULL,
10 |   Z = NULL,
11 |   X = NULL,
12 |   which.z = 1:ncol(Z),
13 |   qs.diff = c(0.25, 0.75),
14 |   q.fixed = c(0.25, 0.5, 0.75),
15 |   method = "approx",
16 |   sel = NULL,
17 |   z.names = colnames(Z),
18 |   ...
19 | )
20 | }
21 | \arguments{
22 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function}
23 | 
24 | \item{y}{a vector of outcome data of length \code{n}.}
25 | 
26 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.}
27 | 
28 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.}
29 | 
30 | \item{which.z}{vector indicating which variables (columns of \code{Z}) for which the summary should be computed}
31 | 
32 | \item{qs.diff}{vector indicating the two quantiles \code{q_1} and \code{q_2} at which to compute \code{h(z_{q2}) - h(z_{q1})}}
33 | 
34 | \item{q.fixed}{vector of quantiles at which to fix the remaining predictors in \code{Z}}
35 | 
36 | \item{method}{method for obtaining posterior summaries at a vector of new points. Options are "approx" and "exact"; defaults to "approx", which is faster particularly for large datasets; see details}
37 | 
38 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples}
39 | 
40 | \item{z.names}{optional vector of names for the columns of \code{z}}
41 | 
42 | \item{...}{other arguments to pass on to the prediction function}
43 | }
44 | \value{
45 | a data frame containing the (posterior mean) estimate and posterior standard deviation of the single-predictor risk measures
46 | }
47 | \description{
48 | Compute summaries of the risks associated with a change in a single variable in \code{Z} from a single level (quantile) to a second level (quantile), for the other variables in \code{Z} fixed to a specific level (quantile)
49 | }
50 | \details{
51 | \itemize{
52 |   \item If \code{method == "approx"}, the argument \code{sel} defaults to the second half of the MCMC iterations.
53 |   \item If \code{method == "exact"}, the argument \code{sel} defaults to keeping every 10 iterations after dropping the first 50\% of samples, or if this results in fewer than 100 iterations, than 100 iterations are kept
54 | }
55 | For guided examples and additional information, go to \url{https://jenfb.github.io/bkmr/overview.html}
56 | }
57 | \examples{
58 | ## First generate dataset
59 | set.seed(111)
60 | dat <- SimData(n = 50, M = 4)
61 | y <- dat$y
62 | Z <- dat$Z
63 | X <- dat$X
64 | 
65 | ## Fit model with component-wise variable selection
66 | ## Using only 100 iterations to make example run quickly
67 | ## Typically should use a large number of iterations for inference
68 | set.seed(111)
69 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
70 | 
71 | risks.singvar <- SingVarRiskSummaries(fit = fitkm, method = "exact")
72 | }
73 | 


--------------------------------------------------------------------------------
/man/PredictorResponseBivar.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PredictorResponseFunctions.R
 3 | \name{PredictorResponseBivar}
 4 | \alias{PredictorResponseBivar}
 5 | \title{Predict the exposure-response function at a new grid of points}
 6 | \usage{
 7 | PredictorResponseBivar(
 8 |   fit,
 9 |   y = NULL,
10 |   Z = NULL,
11 |   X = NULL,
12 |   z.pairs = NULL,
13 |   method = "approx",
14 |   ngrid = 50,
15 |   q.fixed = 0.5,
16 |   sel = NULL,
17 |   min.plot.dist = 0.5,
18 |   center = TRUE,
19 |   z.names = colnames(Z),
20 |   verbose = TRUE,
21 |   ...
22 | )
23 | }
24 | \arguments{
25 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function}
26 | 
27 | \item{y}{a vector of outcome data of length \code{n}.}
28 | 
29 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.}
30 | 
31 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.}
32 | 
33 | \item{z.pairs}{data frame showing which pairs of predictors to plot}
34 | 
35 | \item{method}{method for obtaining posterior summaries at a vector of new points. Options are "approx" and "exact"; defaults to "approx", which is faster particularly for large datasets; see details}
36 | 
37 | \item{ngrid}{number of grid points in each dimension}
38 | 
39 | \item{q.fixed}{vector of quantiles at which to fix the remaining predictors in \code{Z}}
40 | 
41 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples}
42 | 
43 | \item{min.plot.dist}{specifies a minimum distance that a new grid point needs to be from an observed data point in order to compute the prediction; points further than this will not be computed}
44 | 
45 | \item{center}{flag for whether to scale the exposure-response function to have mean zero}
46 | 
47 | \item{z.names}{optional vector of names for the columns of \code{z}}
48 | 
49 | \item{verbose}{TRUE or FALSE: flag of whether to print intermediate output to the screen}
50 | 
51 | \item{...}{other arguments to pass on to the prediction function}
52 | }
53 | \value{
54 | a long data frame with the name of the first predictor, the name of the second predictor, the value of the first predictor, the value of the second predictor, the posterior mean estimate, and the posterior standard deviation of the estimated exposure response function
55 | }
56 | \description{
57 | Predict the exposure-response function at a new grid of points
58 | }
59 | \details{
60 | For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html}
61 | }
62 | \examples{
63 | ## First generate dataset
64 | set.seed(111)
65 | dat <- SimData(n = 50, M = 4)
66 | y <- dat$y
67 | Z <- dat$Z
68 | X <- dat$X
69 | 
70 | ## Fit model with component-wise variable selection
71 | ## Using only 100 iterations to make example run quickly
72 | ## Typically should use a large number of iterations for inference
73 | set.seed(111)
74 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
75 | 
76 | ## Obtain predicted value on new grid of points for each pair of predictors
77 | ## Using only a 10-by-10 point grid to make example run quickly
78 | pred.resp.bivar <- PredictorResponseBivar(fit = fitkm, min.plot.dist = 1, ngrid = 10)
79 | 
80 | }
81 | 


--------------------------------------------------------------------------------
/man/PredictorResponseBivarPair.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PredictorResponseFunctions.R
 3 | \name{PredictorResponseBivarPair}
 4 | \alias{PredictorResponseBivarPair}
 5 | \title{Plot bivariate predictor-response function on a new grid of points}
 6 | \usage{
 7 | PredictorResponseBivarPair(
 8 |   fit,
 9 |   y = NULL,
10 |   Z = NULL,
11 |   X = NULL,
12 |   whichz1 = 1,
13 |   whichz2 = 2,
14 |   whichz3 = NULL,
15 |   method = "approx",
16 |   prob = 0.5,
17 |   q.fixed = 0.5,
18 |   sel = NULL,
19 |   ngrid = 50,
20 |   min.plot.dist = 0.5,
21 |   center = TRUE,
22 |   ...
23 | )
24 | }
25 | \arguments{
26 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function}
27 | 
28 | \item{y}{a vector of outcome data of length \code{n}.}
29 | 
30 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.}
31 | 
32 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.}
33 | 
34 | \item{whichz1}{vector identifying the first predictor that (column of \code{Z}) should be plotted}
35 | 
36 | \item{whichz2}{vector identifying the second predictor that (column of \code{Z}) should be plotted}
37 | 
38 | \item{whichz3}{vector identifying the third predictor that will be set to a pre-specified fixed quantile (determined by \code{prob})}
39 | 
40 | \item{method}{method for obtaining posterior summaries at a vector of new points. Options are "approx" and "exact"; defaults to "approx", which is faster particularly for large datasets; see details}
41 | 
42 | \item{prob}{pre-specified quantile to set the third predictor (determined by \code{whichz3}); defaults to 0.5 (50th percentile)}
43 | 
44 | \item{q.fixed}{vector of quantiles at which to fix the remaining predictors in \code{Z}}
45 | 
46 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples}
47 | 
48 | \item{ngrid}{number of grid points to cover the range of each predictor (column in \code{Z})}
49 | 
50 | \item{min.plot.dist}{specifies a minimum distance that a new grid point needs to be from an observed data point in order to compute the prediction; points further than this will not be computed}
51 | 
52 | \item{center}{flag for whether to scale the exposure-response function to have mean zero}
53 | 
54 | \item{...}{other arguments to pass on to the prediction function}
55 | }
56 | \value{
57 | a data frame with value of the first predictor, the value of the second predictor, the posterior mean estimate, and the posterior standard deviation
58 | }
59 | \description{
60 | Plot bivariate predictor-response function on a new grid of points
61 | }
62 | \examples{
63 | ## First generate dataset
64 | set.seed(111)
65 | dat <- SimData(n = 50, M = 4)
66 | y <- dat$y
67 | Z <- dat$Z
68 | X <- dat$X
69 | 
70 | ## Fit model with component-wise variable selection
71 | ## Using only 100 iterations to make example run quickly
72 | ## Typically should use a large number of iterations for inference
73 | set.seed(111)
74 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
75 | 
76 | ## Obtain predicted value on new grid of points
77 | ## Using only a 10-by-10 point grid to make example run quickly
78 | pred.resp.bivar12 <- PredictorResponseBivarPair(fit = fitkm, min.plot.dist = 1, ngrid = 10)
79 | }
80 | 


--------------------------------------------------------------------------------
/R/argval_ParamInput.R:
--------------------------------------------------------------------------------
 1 | # Validate control params list
 2 | ##components of list
 3 | ##lambda.jump             / default=10
 4 | ##mu.lambda, sigma.lambda / default=10
 5 | ##a.p0, b.p0              / default=1
 6 | ##r.prior                 / default = "gamma", alt=invunif, unif
 7 | ##a.sigsq, b.sigsq        / default=0.001
 8 | ##mu.r, sigma.r           / default=5
 9 | ##r.muprop                / default=1
10 | ##r.jump                  / default=0.2
11 | ##r.jump1, r.jump2        / default=2, 0.2
12 | ##r.a, r.b                / default=0, 100
13 | 
14 | # 
15 | validateStartingParams <- function(varsel, Ylength, Xwidth, Zwidth, starting.params) {
16 |   message ("Validating starting.params...")
17 |   print(starting.params)
18 |   stopifnot(starting.params$b.sigsq.eps > 0, starting.params$lambda > 0)
19 |   ##messages only for scalar where vector required, expansion happens in main function
20 |   ##beta length, any values
21 |   if (length(starting.params$beta) != Xwidth) {
22 |     message("beta should be a vector of length equal to the number of columns of X.  A vector will be created of repeating the input value.")
23 |   }
24 |   ##h.hat length and values
25 |   if (length(starting.params$h.hat) != Ylength) {
26 |     message("h.hat should be a vector of length equal to number of rows in Y.  A vector will be created of repeating the input value.")
27 |   }
28 |   for (i in 1:length(starting.params$h.hat)) {
29 |     stopifnot(starting.params$h.hat > 0) 
30 |   }
31 |   ##delta length, 0 or 1 are valid values
32 |   if (length(starting.params$delta) != Zwidth) {
33 |     message("delta should be a vector of length equal to the number of columns of Z.  A vector will be created of repeating the input value.")
34 |   }
35 |   for (i in 1:length(starting.params$delta)) {
36 |     stopifnot(starting.params$delta == 1 || starting.params$delta == 0) 
37 |   }
38 |   ## r length depends on varsel, truncate here but expand if necessary in main function
39 |   if (varsel == TRUE) {
40 |     if (length(starting.params$r) != Zwidth) {
41 |       message("r should be a vector of length equal to the number of columns of Z.  A vector will be created of repeating the input value.")
42 |     }
43 |     else {
44 |       if (length(starting.params$r) > 1) {
45 |         message("r should a scalar.  Vector input will be truncated.")
46 |         starting.params$r=starting.params$r[1]
47 |       }
48 |     }
49 |   }
50 |   for (i in 1:length(starting.params$r)) {
51 |     stopifnot(starting.params$delta > 0) 
52 |   }
53 | }
54 |   
55 |     
56 | validateControlParams <- function(varsel, family, id, control.params) {
57 |   message ("Validating control.params...")
58 |   ##print(control.params)
59 |   if (family == "gaussian"){
60 |     stopifnot(control.params$a.sigsq > 0, control.params$b.sigsq > 0)
61 |   } 
62 |   if (varsel == TRUE) {
63 |     stopifnot(control.params$a.p0 > 0, control.params$b.p0 > 0, control.params$r.jump1 > 0, control.params$r.jump2 > 0, control.params$r.muprop > 0)
64 |   }
65 |   else  {
66 |     stopifnot(control.params$r.jump > 0)
67 |   } ##end varsel-specific stuff
68 |   ##if id, need two elements in mu.lambda, sigma.lambda and lambda.jump
69 |   if (!is.null(id)) {
70 |     stopifnot(length(control.params$mu.lambda) == 2, length(control.params$sigma.lambda) == 2, length(control.params$lambda.jump) == 2)
71 |   }
72 |   ##regardless of id, validate each element of these params
73 |   for (i in 1:length(control.params$mu.lambda)) {
74 |     stopifnot(control.params$mu.lambda > 0) 
75 |   }
76 |   for (i in 1:length(control.params$sigma.lambda)) {
77 |     stopifnot(control.params$sigma.lambda > 0) 
78 |   }
79 |   for (i in 1:length(control.params$lambda.jump)) {
80 |     stopifnot(control.params$lambda.jump > 0) 
81 |   }
82 |   rprior=control.params$r.prior
83 |   stopifnot(rprior == "gamma" | rprior == "unif" | rprior == "invunif")
84 |   ##stopifnot(length(intersect (control.params$r.prior, c("gamma","unif","invunif")))>0)
85 |   if (control.params$r.prior == "gamma") {
86 |     stopifnot(control.params$mu.r > 0, control.params$sigma.r > 0)
87 |   }
88 |   else {  
89 |     stopifnot(control.params$r.a >= 0, control.params$r.b > control.params$r.a)
90 |   }
91 | }


--------------------------------------------------------------------------------
/R/print_verbose.R:
--------------------------------------------------------------------------------
  1 | #' Options for printing summary of model fit to the console
  2 | #'
  3 | #' Set options for what will be printed to the console when verbose = TRUE in the main kmbayes function
  4 | #'
  5 | #' @param verbose_freq After this percentage of iterations has been completed the summary of the model fit so far will be printed to the console 
  6 | #' @param verbose_show_ests TRUE or FALSE: flag indicating whether to print out summary statistics of all posterior samples obtained up until this point, for select parameters
  7 | #' @param verbose_digits Number of digits to be printed to the console 
  8 | #'
  9 | #' @export
 10 | #'
 11 | 
 12 | set_verbose_opts <- function(verbose_freq = NULL, verbose_show_ests = NULL, verbose_digits = NULL,
 13 |                              tot_iter) {
 14 |   if (is.null(verbose_freq)) verbose_freq <- 10
 15 |   if (is.null(verbose_digits)) verbose_digits <- 5
 16 |   if (is.null(verbose_show_ests)) verbose_show_ests <- FALSE
 17 |   
 18 |   all_iter <- 100*(1:tot_iter)/tot_iter
 19 |   sel_iter <- seq(verbose_freq, 100, by = verbose_freq)
 20 |   print_iter <- sapply(sel_iter, function(x) min(which(all_iter >= x)))
 21 |   
 22 |   opts <- list(
 23 |     verbose_freq = verbose_freq,
 24 |     verbose_digits = verbose_digits,
 25 |     verbose_show_ests = verbose_show_ests,
 26 |     print_iter = print_iter
 27 |   )
 28 |   opts
 29 | }
 30 | 
 31 | 
 32 | 
 33 | print_diagnostics <- function(verbose, opts, curr_iter, tot_iter, chain, varsel, hier_varsel, ztest, Z, groups) {
 34 |   verbose_freq <- opts$verbose_freq
 35 |   verbose_digits <- opts$verbose_digits
 36 |   verbose_show_ests <- opts$verbose_show_ests
 37 |   print_iter <- opts$print_iter
 38 | 
 39 |   s <- curr_iter
 40 |   nsamp <- tot_iter
 41 |   perc_iter_completed <- round(100*curr_iter/tot_iter, 1)
 42 |   
 43 |   elapsed_time <- difftime(Sys.time(), chain$time1)
 44 |   
 45 |   if (s %in% print_iter) {
 46 |     #if (verbose) message("------------------------------------------")
 47 |     if (verbose) cat("\n")
 48 |     message("Iteration: ", s, " (", perc_iter_completed, "% completed; ", round(elapsed_time, verbose_digits), " ", attr(elapsed_time, "units"), " elapsed)")
 49 |     
 50 |     if (verbose) {
 51 |       cat("Acceptance rates for Metropolis-Hastings algorithm:\n")
 52 |       accep_rates <- data.frame()
 53 |       ## lambda
 54 |       nm <- "lambda"
 55 |       rate <- colMeans(chain$acc.lambda[2:s, ,drop = FALSE])
 56 |       if (length(rate) > 1) nm <- paste0(nm, seq_along(rate))
 57 |       accep_rates %<>% rbind(data.frame(param = nm, rate = rate))
 58 |       ## r_m
 59 |       if (!varsel) {
 60 |         nm <- "r"
 61 |         rate <- colMeans(chain$acc.r[2:s, , drop = FALSE])
 62 |         if (length(rate) > 1) nm <- paste0(nm, seq_along(rate))
 63 |         accep_rates %<>% rbind(data.frame(param = nm, rate = rate))
 64 |       } else {
 65 |         nm <- "r/delta (overall)"
 66 |         rate <- mean(chain$acc.rdelta[2:s])
 67 |         accep_rates %<>% rbind(data.frame(param = nm, rate = rate))
 68 |         ##
 69 |         nm <- "r/delta  (move 1)"
 70 |         rate <- mean(chain$acc.rdelta[2:s][chain$move.type[2:s] == 1])
 71 |         accep_rates %<>% rbind(data.frame(param = nm, rate = rate))
 72 |         ##
 73 |         nm <- "r/delta  (move 2)"
 74 |         rate <- mean(chain$acc.rdelta[2:s][chain$move.type[2:s] == 2])
 75 |         accep_rates %<>% rbind(data.frame(param = nm, rate = rate))
 76 |         if (hier_varsel) {
 77 |           nm <- "r/delta  (move 3)"
 78 |           rate <- mean(chain$acc.rdelta[2:s][chain$move.type[2:s] == 3])
 79 |           accep_rates %<>% rbind(data.frame(param = nm, rate = rate))
 80 |         }
 81 |       }
 82 |       print(accep_rates)
 83 |       
 84 |       ## extra information
 85 |       if (verbose_show_ests) {
 86 |         cat("\nCurrent parameter estimates:\n")
 87 |         chain$varsel <- varsel
 88 |         class(chain) <- c("bkmrfit", class(chain))
 89 |         chain$Z <- Z
 90 |         if (hier_varsel) chain$groups <- groups
 91 |         ests <- ExtractEsts(chain, q = c(0.025, 0.975), sel = 2:s)
 92 |         #ests$h <- ests$h[c(1,2,nrow(ests$h)), ]
 93 |         summ <- with(ests, rbind(beta, sigsq.eps, r, lambda))
 94 |         summ <- data.frame(param = rownames(summ), round(summ, verbose_digits))
 95 |         rownames(summ) <- NULL
 96 |         print(summ)
 97 |         if (varsel) {
 98 |           cat("\nCurrent posterior inclusion probabilities:\n")
 99 |           pips <- ExtractPIPs(chain, sel = 2:s)
100 |           pips[, -1] <- round(pips[, -1], verbose_digits)
101 |           print(pips)
102 |         }
103 |       }
104 |     }
105 |   }
106 |   
107 | }


--------------------------------------------------------------------------------
/R/argval_ListInput.R:
--------------------------------------------------------------------------------
 1 | # Validate control params list
 2 | ##components of list
 3 | ##lambda.jump             / default=10
 4 | ##mu.lambda, sigma.lambda / default=10
 5 | ##a.p0, b.p0              / default=1
 6 | ##r.prior                 / default = "gamma", alt=invunif, unif
 7 | ##a.sigsq, b.sigsq        / default=0.001
 8 | ##mu.r, sigma.r           / default=5
 9 | ##r.muprop                / default=1
10 | ##r.jump                  / default=0.2
11 | ##r.jump1, r.jump2        / default=2, 0.2
12 | ##r.a, r.b                / default=0, 100
13 | 
14 | # 
15 | validateStartingValues <- function(varsel, y, X, Z, starting.values, rmethod) {
16 |   Ylength <- length(y)
17 |   Xwidth <- ncol(X)
18 |   Zwidth <- ncol(Z) 
19 |   message ("Validating starting.values...")
20 |   ##print(starting.values)
21 |   stopifnot(starting.values$sigsq.eps > 0, all(starting.values$lambda > 0))
22 |   ##messages only for scalar where vector required, expansion happens in main function
23 |   if (length(starting.values$beta) != Xwidth) {
24 |     message("beta should be a vector of length equal to the number of columns of X.  Input will be repeated or truncated as necessary.")
25 |   }
26 |   ##h.hat length and values
27 |   if (length(starting.values$h.hat) != Ylength) {
28 |     message("h.hat should be a vector of length equal to number of rows in Y.  Input will be repeated or truncated as necessary.")
29 |   }
30 |   # for (i in 1:length(starting.values$h.hat)) {
31 |   #   stopifnot(starting.values$h.hat > 0) 
32 |   # }
33 |   ##delta length, 0 or 1 are valid values
34 |   if (length(starting.values$delta) != Zwidth) {
35 |     message("delta should be a vector of length equal to the number of columns of Z.  Input will be repeated or truncated as necessary.")
36 |   }
37 |   #for (i in 1:length(starting.values$delta)) {
38 |     #stopifnot(starting.values$delta == 1 || starting.values$delta == 0)
39 |     stopifnot(all(starting.values$delta %in% c(1, 0)))
40 |   #}
41 |   ## r length depends on varsel, truncate here but expand if necessary in main function
42 |   if (varsel == TRUE) {
43 |     if (length(starting.values$r) != Zwidth) {
44 |       message("r should be a vector of length equal to the number of columns of Z.  Input will be repeated or truncated as necessary.")
45 |     }
46 |     stopifnot(starting.values$r >= 0) ## note: still need to add check that delta starting values and r starting values do not conflict (e.g., delta = 1 but r_m = 0)
47 |   }
48 |   else {
49 |     if (rmethod == "equal" & length(starting.values$r) > 1) {
50 |       message("r should a scalar.  Vector input will be truncated.")
51 |       starting.values$r=starting.values$r[1]
52 |     } else if (length(starting.values$r) != Zwidth) {
53 |       message("r should be a vector of length equal to the number of columns of Z.  Input will be repeated or truncated as necessary.")
54 |     }
55 |     stopifnot(all(starting.values$r > 0))
56 |   }
57 | 
58 |   # for (i in 1:length(starting.values$r)) {
59 |   #   stopifnot(starting.values$r > 0) 
60 |   # }
61 | }
62 |   
63 |     
64 | validateControlParams <- function(varsel, family, id, control.params) {
65 |   message ("Validating control.params...")
66 |   ##print(control.params)
67 |   if (family == "gaussian"){
68 |     stopifnot(control.params$a.sigsq > 0, control.params$b.sigsq > 0)
69 |   } 
70 |   if (varsel == TRUE) {
71 |     stopifnot(control.params$a.p0 > 0, control.params$b.p0 > 0, control.params$r.jump1 > 0, control.params$r.jump2 > 0, control.params$r.muprop > 0)
72 |   }
73 |   else  {
74 |     stopifnot(control.params$r.jump > 0)
75 |   } ##end varsel-specific stuff
76 |   ##if id, need two elements in mu.lambda, sigma.lambda and lambda.jump
77 |   if (!is.null(id)) {
78 |     stopifnot(length(control.params$mu.lambda) == 2, length(control.params$sigma.lambda) == 2, length(control.params$lambda.jump) == 2)
79 |   }
80 |   ##regardless of id, validate each element of these params
81 |   for (i in 1:length(control.params$mu.lambda)) {
82 |     stopifnot(control.params$mu.lambda[i] > 0) 
83 |   }
84 |   for (i in 1:length(control.params$sigma.lambda)) {
85 |     stopifnot(control.params$sigma.lambda[i] > 0) 
86 |   }
87 |   for (i in 1:length(control.params$lambda.jump)) {
88 |     stopifnot(control.params$lambda.jump[i] > 0) 
89 |   }
90 |   rprior=control.params$r.prior
91 |   stopifnot(rprior == "gamma" | rprior == "unif" | rprior == "invunif")
92 |   ##stopifnot(length(intersect (control.params$r.prior, c("gamma","unif","invunif")))>0)
93 |   if (control.params$r.prior == "gamma") {
94 |     stopifnot(control.params$mu.r > 0, control.params$sigma.r > 0)
95 |   }
96 |   else {  
97 |     stopifnot(control.params$r.a >= 0, control.params$r.b > control.params$r.a)
98 |   }
99 | }


--------------------------------------------------------------------------------
/R/SamplePred.R:
--------------------------------------------------------------------------------
  1 | #' Obtain posterior samples of predictions at new points
  2 | #'
  3 | #' Obtains posterior samples of \code{E(Y) = h(Znew) + beta*Xnew} or of \code{g^{-1}[E(y)]}
  4 | #' 
  5 | #' @param sel A vector selecting which iterations of the BKMR fit should be retained for inference. If not specified, will default to keeping every 10 iterations after dropping the first 50\% of samples, or if this results in fewer than 100 iterations, than 100 iterations are kept
  6 | #' @param Znew optional matrix of new predictor values at which to predict new \code{h}, where each row represents a new observation. If not specified, defaults to using observed Z values
  7 | #' @param Xnew optional matrix of new covariate values at which to obtain predictions. If not specified, defaults to using observed X values
  8 | #' @param type whether to make predictions on the scale of the link or of the response; only relevant for the binomial outcome family
  9 | #' @param ... other arguments; not currently used
 10 | #' @inheritParams kmbayes
 11 | #' @inheritParams ExtractEsts
 12 | #' @details For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html}
 13 | #' @export
 14 | #' 
 15 | #' @return a matrix with the posterior samples at the new points
 16 | #' 
 17 | #' @examples
 18 | #' set.seed(111)
 19 | #' dat <- SimData(n = 50, M = 4)
 20 | #' y <- dat$y
 21 | #' Z <- dat$Z
 22 | #' X <- dat$X
 23 | #' 
 24 | #' ## Fit model with component-wise variable selection
 25 | #' ## Using only 100 iterations to make example run quickly
 26 | #' ## Typically should use a large number of iterations for inference
 27 | #' set.seed(111)
 28 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
 29 | #' 
 30 | #' med_vals <- apply(Z, 2, median)
 31 | #' Znew <- matrix(med_vals, nrow = 1)
 32 | #' h_true <- dat$HFun(Znew)
 33 | #' set.seed(111)
 34 | #' samps3 <- SamplePred(fitkm, Znew = Znew, Xnew = cbind(0))
 35 | #' head(samps3)
 36 | SamplePred <- function(fit, Znew = NULL, Xnew = NULL, Z = NULL, X = NULL, y = NULL, sel = NULL, type = c("link", "response"), ...) {
 37 |   
 38 |   if (inherits(fit, "bkmrfit")) {
 39 |     if (is.null(y)) y <- fit$y
 40 |     if (is.null(Z)) Z <- fit$Z
 41 |     if (is.null(X)) X <- fit$X
 42 |   }
 43 |   if (length(type) > 1) type <- type[1]
 44 | 
 45 |   if (!is.null(Znew)) {
 46 |     if (is.null(dim(Znew))) Znew <- matrix(Znew, nrow = 1)
 47 |     if (inherits(Znew, "data.frame")) Znew <- data.matrix(Znew)
 48 |     if (ncol(Z) != ncol(Znew)) {
 49 |       stop("Znew must have the same number of columns as Z")
 50 |     }
 51 |   }
 52 | 
 53 |   if (is.null(Xnew)) Xnew <- X
 54 |   if (!inherits(Xnew, "matrix")) Xnew <- matrix(Xnew, nrow = 1)
 55 |   if (ncol(X) != ncol(Xnew)) {
 56 |     stop("Xnew must have the same number of columns as X")
 57 |   }
 58 |   
 59 |   if (is.null(sel)) {
 60 |     sel <- with(fit, seq(floor(iter/2) + 1, iter, 10))
 61 |     if (length(sel) < 100) {
 62 |       sel <- with(fit, seq(floor(iter/2) + 1, iter, length.out = 100))
 63 |     }
 64 |     sel <- unique(floor(sel))
 65 |   }
 66 |   
 67 |   family <- fit$family
 68 |   data.comps <- fit$data.comps
 69 |   lambda <- fit$lambda
 70 |   sigsq.eps <- fit$sigsq.eps
 71 |   beta <- fit$beta
 72 |   r <- fit$r
 73 |   
 74 |   if (!is.null(Znew)) {
 75 |     preds <- matrix(NA, length(sel), nrow(Znew))
 76 |     colnames(preds) <- paste0("znew", 1:nrow(Znew))
 77 |   } else {
 78 |     preds <- matrix(NA, length(sel), nrow(Z))
 79 |     colnames(preds) <- paste0("z", 1:nrow(Z))
 80 |   }
 81 |   rownames(preds) <- paste0("iter", sel)
 82 |   for (s in sel) {
 83 |     beta.samp <- beta[s, ]
 84 |     
 85 |     if (family == "gaussian") {
 86 |       ycont <- y
 87 |     } else if (family == "binomial") {
 88 |       ycont <- fit$ystar[s, ]
 89 |     }
 90 |     if (!is.null(Znew)) {
 91 |       hsamp <- newh.update(Z = Z, Znew = Znew, Vcomps = NULL, lambda = lambda[s, ], sigsq.eps = sigsq.eps[s], r = r[s, ], y = ycont, X = X, beta = beta.samp, data.comps = data.comps)  
 92 |     } else {
 93 |       hsamp <- h.update(lambda = lambda[s, ], Vcomps = NULL, sigsq.eps = sigsq.eps[s], y = ycont, X = X, beta = beta.samp, r = r[s, ], Z = Z, data.comps = data.comps)$hsamp
 94 |     }
 95 |     
 96 |     Xbeta <- drop(Xnew %*% beta.samp)
 97 |     linpred <- hsamp + Xbeta
 98 |     
 99 |     if (type == "link") {
100 |       pred <- linpred
101 |     } else if (type == "response") {
102 |       if (family == "gaussian") {
103 |         pred <- linpred
104 |       } else if (family == "binomial") {
105 |         pred <- pnorm(linpred)
106 |       }
107 |     }
108 |     preds[paste0("iter", s), ] <- pred
109 |   }
110 |   attr(preds, "type") <- type
111 |   attr(preds, "family") <- family
112 |   preds
113 |   
114 | }


--------------------------------------------------------------------------------
/man/kmbayes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bkmr_main_functions.R
 3 | \name{kmbayes}
 4 | \alias{kmbayes}
 5 | \title{Fit Bayesian kernel machine regression}
 6 | \usage{
 7 | kmbayes(
 8 |   y,
 9 |   Z,
10 |   X = NULL,
11 |   iter = 1000,
12 |   family = "gaussian",
13 |   id = NULL,
14 |   verbose = TRUE,
15 |   Znew = NULL,
16 |   starting.values = NULL,
17 |   control.params = NULL,
18 |   varsel = FALSE,
19 |   groups = NULL,
20 |   knots = NULL,
21 |   ztest = NULL,
22 |   rmethod = "varying",
23 |   est.h = FALSE
24 | )
25 | }
26 | \arguments{
27 | \item{y}{a vector of outcome data of length \code{n}.}
28 | 
29 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.}
30 | 
31 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.}
32 | 
33 | \item{iter}{number of iterations to run the sampler}
34 | 
35 | \item{family}{a description of the error distribution and link function to be used in the model. Currently implemented for \code{gaussian} and \code{binomial} families.}
36 | 
37 | \item{id}{optional vector (of length \code{n}) of grouping factors for fitting a model with a random intercept. If NULL then no random intercept will be included.}
38 | 
39 | \item{verbose}{TRUE or FALSE: flag indicating whether to print intermediate diagnostic information during the model fitting.}
40 | 
41 | \item{Znew}{optional matrix of new predictor values at which to predict \code{h}, where each row represents a new observation. This will slow down the model fitting, and can be done as a post-processing step using \code{\link{SamplePred}}}
42 | 
43 | \item{starting.values}{list of starting values for each parameter. If not specified default values will be chosen.}
44 | 
45 | \item{control.params}{list of parameters specifying the prior distributions and tuning parameters for the MCMC algorithm. If not specified default values will be chosen.}
46 | 
47 | \item{varsel}{TRUE or FALSE: indicator for whether to conduct variable selection on the Z variables in \code{h}}
48 | 
49 | \item{groups}{optional vector (of length \code{M}) of group indicators for fitting hierarchical variable selection if varsel=TRUE. If varsel=TRUE without group specification, component-wise variable selections will be performed.}
50 | 
51 | \item{knots}{optional matrix of knot locations for implementing the Gaussian predictive process of Banerjee et al. (2008). Currently only implemented for models without a random intercept.}
52 | 
53 | \item{ztest}{optional vector indicating on which variables in Z to conduct variable selection (the remaining variables will be forced into the model).}
54 | 
55 | \item{rmethod}{for those predictors being forced into the \code{h} function, the method for sampling the \code{r[m]} values. Takes the value of 'varying' to allow separate \code{r[m]} for each predictor; 'equal' to force the same \code{r[m]} for each predictor; or 'fixed' to fix the \code{r[m]} to their starting values}
56 | 
57 | \item{est.h}{TRUE or FALSE: indicator for whether to sample from the posterior distribution of the subject-specific effects h_i within the main sampler. This will slow down the model fitting.}
58 | }
59 | \value{
60 | an object of class "bkmrfit" (containing the posterior samples from the model fit), which has the associated methods:
61 | \itemize{
62 |   \item \code{\link{print}} (i.e., \code{\link{print.bkmrfit}}) 
63 |   \item \code{\link{summary}} (i.e., \code{\link{summary.bkmrfit}})
64 | }
65 | }
66 | \description{
67 | Fits the Bayesian kernel machine regression (BKMR) model using Markov chain Monte Carlo (MCMC) methods.
68 | }
69 | \examples{
70 | ## First generate dataset
71 | set.seed(111)
72 | dat <- SimData(n = 50, M = 4)
73 | y <- dat$y
74 | Z <- dat$Z
75 | X <- dat$X
76 | 
77 | ## Fit model with component-wise variable selection
78 | ## Using only 100 iterations to make example run quickly
79 | ## Typically should use a large number of iterations for inference
80 | set.seed(111)
81 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
82 | }
83 | \references{
84 | Bobb, JF, Valeri L, Claus Henn B, Christiani DC, Wright RO, Mazumdar M, Godleski JJ, Coull BA (2015). Bayesian Kernel Machine Regression for Estimating the Health Effects of Multi-Pollutant Mixtures. Biostatistics 16, no. 3: 493-508.
85 | 
86 | Banerjee S, Gelfand AE, Finley AO, Sang H (2008). Gaussian predictive process models for large spatial data sets. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 70(4), 825-848.
87 | }
88 | \seealso{
89 | For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html}
90 | }
91 | 


--------------------------------------------------------------------------------
/R/CalcPIPs.R:
--------------------------------------------------------------------------------
  1 | #' Calculate group-specific posterior inclusion probabilities
  2 | #'
  3 | #' Calculate posterior inclusion probabilities for each group of variables
  4 | #' 
  5 | #' @inheritParams ExtractEsts
  6 | #' 
  7 | #' @noRd
  8 | CalcGroupPIPs <- function(fit, sel = NULL) {
  9 |     groups <- fit$groups
 10 |     if (is.null(groups)) {
 11 |         stop("Cannot compute group-specific posterior inclusion probabilities; BKMR was not run with variable groups")
 12 |     }
 13 |     if (is.null(sel)) {
 14 |         sel <- with(fit, seq(floor(iter/2) + 1, iter))
 15 |     }
 16 |     grps <- unique(groups)
 17 |     groupincl.probs <- sapply(grps, function(x) mean(rowSums(fit$delta[sel, groups == x, drop = FALSE]) > 0))
 18 |     groupincl.probs
 19 | }
 20 | 
 21 | #' Calculate conditional predictor specific posterior inclusion probabilities
 22 | #'
 23 | #' For those predictors within a multi-predictor group, as defined using the \code{groups} argument, the posterior inclusion probabilities for the predictor conditional on the group being selected into the model.
 24 | #' 
 25 | #' @inheritParams ExtractEsts
 26 | #' 
 27 | #' @noRd
 28 | CalcWithinGroupPIPs <- function(fit, sel = NULL) {
 29 |     groups <- fit$groups
 30 |     if (is.null(groups)) {
 31 |         stop("Cannot compute group-specific posterior inclusion probabilities; BKMR was not run with variable groups")
 32 |     }
 33 |     if (is.null(sel)) {
 34 |         sel <- with(fit, seq(floor(iter/2) + 1, iter))
 35 |     }
 36 | 
 37 |     # grps <- unique(groups)
 38 |     # ngrps <- sapply(grps, function(x) sum(groups == x))
 39 |     condprobs.group <- rep(NA, length(groups))
 40 |     for (i in unique(groups)) {
 41 |         condprobs.group[groups == i] <- colMeans(fit$delta[sel, ][rowSums(fit$delta[sel, groups == i, drop = FALSE]) > 0, groups == i, drop = FALSE])
 42 |     }
 43 | 
 44 |     condprobs.group
 45 | }
 46 | 
 47 | 
 48 | #' Calculate variable-specific posterior inclusion probabilities
 49 | #' 
 50 | #' Calculate variable-specific posterior inclusion probabilities from BKMR model fit
 51 | #' 
 52 | #' @inheritParams ExtractEsts
 53 | #' 
 54 | #' @noRd
 55 | CalcPIPs <- function(fit, sel = NULL) {
 56 |   if (inherits(fit, "bkmrfit")) {
 57 |     if (is.null(sel)) {
 58 |       sel <- with(fit, seq(floor(iter/2) + 1, iter))
 59 |     }
 60 |     groups <- fit$groups
 61 |     if (is.null(groups)) {
 62 |       ret <- colMeans(fit$delta[sel, , drop = FALSE])
 63 |     }
 64 |     
 65 |   }
 66 |   ret
 67 | }
 68 | 
 69 | #' Extract posterior inclusion probabilities (PIPs) from BKMR model fit
 70 | #'
 71 | #' Extract posterior inclusion probabilities (PIPs) from Bayesian Kernel Machine Regression (BKMR) model fit
 72 | #'
 73 | #' @inheritParams ExtractEsts
 74 | #' @param z.names optional argument providing the names of the variables included in the \code{h} function.
 75 | #'
 76 | #' @return a data frame with the variable-specific PIPs for BKMR fit with component-wise variable selection, and with the group-specific and conditional (within-group) PIPs for BKMR fit with hierarchical variable selection.
 77 | #' @details For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html}
 78 | #' @export
 79 | #' 
 80 | #' @examples
 81 | #' ## First generate dataset
 82 | #' set.seed(111)
 83 | #' dat <- SimData(n = 50, M = 4)
 84 | #' y <- dat$y
 85 | #' Z <- dat$Z
 86 | #' X <- dat$X
 87 | #' 
 88 | #' ## Fit model with component-wise variable selection
 89 | #' ## Using only 100 iterations to make example run quickly
 90 | #' ## Typically should use a large number of iterations for inference
 91 | #' set.seed(111)
 92 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
 93 | #' 
 94 | #' ExtractPIPs(fitkm)
 95 | ExtractPIPs <- function(fit, sel = NULL, z.names = NULL) {
 96 |   if (inherits(fit, "bkmrfit")) {
 97 |     if (!fit$varsel) {
 98 |       stop("This model was not fit with variable selection.")
 99 |     }
100 |     if (is.null(sel)) {
101 |       sel <- with(fit, seq(floor(iter/2) + 1, iter))
102 |     }
103 |     if (is.null(z.names)) {
104 |       z.names <- colnames(fit$Z)
105 |     }
106 |     if (is.null(z.names)) {
107 |       z.names <- paste0("z", 1:ncol(fit$Z))
108 |     }
109 |     df <- data.frame(variable = z.names, stringsAsFactors = FALSE)
110 |     groups <- fit$groups
111 |     if (is.null(groups)) {
112 |       df$PIP <- colMeans(fit$delta[sel, , drop = FALSE])
113 |     } else {
114 |       
115 |       ## group-specific posterior inclusion probability
116 |       df$group <- groups
117 |       grps <- unique(groups)
118 |       groupincl.probs <- sapply(grps, function(x) mean(rowSums(fit$delta[sel, groups == x, drop = FALSE]) > 0))
119 |       df.group <- dplyr::tibble(group = grps,
120 |                                 groupPIP = groupincl.probs)
121 |       df <- dplyr::inner_join(df, df.group, by = "group")
122 |       
123 |       ## within-group conditional PIP
124 |       condprobs.group <- rep(NA, length(groups))
125 |       for (i in unique(groups)) {
126 |         condprobs.group[groups == i] <- colMeans(fit$delta[sel, ][rowSums(fit$delta[sel, groups == i, drop = FALSE]) > 0, groups == i, drop = FALSE])
127 |       }
128 |       df$condPIP <- condprobs.group
129 |     }
130 |   }
131 |   df
132 | }
133 | 
134 | 


--------------------------------------------------------------------------------
/R/SimData.R:
--------------------------------------------------------------------------------
  1 | HFun1 <- function(z, ind = 1) 4*plogis(z[ind[1]], 0, 0.3)
  2 | HFun2 <- function(z, ind = 1:2) 1/4*(z[ind[1]] + z[ind[2]] + 1/2*z[ind[1]]*z[ind[2]])
  3 | HFun3 <- function(z, ind = 1:2) 4*plogis(1/4*(z[ind[1]] + z[ind[2]] + 1/2*z[ind[1]]*z[ind[2]]), 0, 0.3)
  4 | 
  5 | #' Simulate dataset
  6 | #'
  7 | #' Simulate predictor, covariate, and continuous outcome data
  8 | #'
  9 | #' @export
 10 | #'
 11 | #' @inheritParams kmbayes
 12 | #' @param n Number of observations
 13 | #' @param M Number of predictor variables to generate
 14 | #' @param sigsq.true Variance of normally distributed residual error
 15 | #' @param beta.true Coefficient on the covariate
 16 | #' @param hfun An integer from 1 to 3 identifying which predictor-response function to generate
 17 | #' @param Zgen Method for generating the matrix Z of exposure variables, taking one of the values c("unif", "norm", "corr", "realistic")
 18 | #' @param ind select which predictor(s) will be included in the \code{h} function; how many predictors that can be included will depend on which \code{h} function is being used.
 19 | #' @examples
 20 | #' set.seed(5)
 21 | #' dat <- SimData()
 22 | #' @details 
 23 | #' \itemize{
 24 | #'  \item \code{hfun = 1}: A nonlinear function of the first predictor 
 25 | #'  \item \code{hfun = 2}: A linear function of the first two predictors and their product term
 26 | #'  \item \code{hfun = 3}: A nonlinear and nonadditive function of the first two predictor variables
 27 | #' }
 28 | #' 
 29 | #' @return a list containing the parameter values and generated variables of the simulated datasets
 30 | #' 
 31 | SimData <- function(n = 100, M = 5, sigsq.true = 0.5,
 32 |                     beta.true = 2, hfun = 3, Zgen = "norm", ind = 1:2, family = "gaussian") {
 33 |   
 34 |   stopifnot(n > 0, M > 0, sigsq.true >= 0, family %in% c("gaussian", "binomial"))
 35 |   
 36 |   if (family == "binomial") {
 37 |     sigsq.true <- 1
 38 |   }
 39 |   
 40 |   if (hfun == 1) {
 41 |     HFun <- HFun1
 42 |   } else if (hfun == 2) {
 43 |     HFun <- HFun2
 44 |   } else if (hfun == 3) {
 45 |     HFun <- HFun3
 46 |   } else {
 47 |     stop("hfun must be an integer from 1 to 3")
 48 |   }
 49 |   
 50 |   if (Zgen == "unif") {
 51 |     Z <- matrix(runif(n * M, -2, 2), n, M)
 52 |   } else if (Zgen == "norm") {
 53 |     Z <- matrix(rnorm(n * M), n, M)
 54 |   } else if (Zgen == "corr") {
 55 |     if (M < 3) {
 56 |       stop("M must be an integer > 2 for Zgen = 'corr'")
 57 |     }
 58 |     Sigma <- diag(1, M, M)
 59 |     Sigma[1,3] <- Sigma[3,1] <- 0.95
 60 |     Sigma[2,3] <- Sigma[3,2] <- 0.3
 61 |     Sigma[1,2] <- Sigma[2,1] <- 0.1
 62 |     Z <- MASS::mvrnorm(n = n, mu = rep(0, M), Sigma = Sigma)
 63 |   } else if (Zgen == "realistic") {
 64 |     VarRealistic <- structure(c(0.72, 0.65, 0.45, 0.48, 0.08, 0.14, 0.16, 0.42, 0.2, 
 65 |                                 0.11, 0.35, 0.1, 0.11, 0.65, 0.78, 0.48, 0.55, 0.06, 0.09, 0.17, 
 66 |                                 0.2, 0.16, 0.11, 0.32, 0.12, 0.12, 0.45, 0.48, 0.56, 0.43, 0.11, 
 67 |                                 0.15, 0.23, 0.25, 0.28, 0.16, 0.31, 0.15, 0.14, 0.48, 0.55, 0.43, 
 68 |                                 0.71, 0.2, 0.23, 0.32, 0.22, 0.29, 0.14, 0.3, 0.22, 0.18, 0.08, 
 69 |                                 0.06, 0.11, 0.2, 0.95, 0.7, 0.45, 0.22, 0.29, 0.16, 0.24, 0.2, 
 70 |                                 0.13, 0.14, 0.09, 0.15, 0.23, 0.7, 0.8, 0.36, 0.3, 0.35, 0.13, 
 71 |                                 0.23, 0.17, 0.1, 0.16, 0.17, 0.23, 0.32, 0.45, 0.36, 0.83, 0.24, 
 72 |                                 0.37, 0.2, 0.36, 0.34, 0.25, 0.42, 0.2, 0.25, 0.22, 0.22, 0.3, 
 73 |                                 0.24, 1.03, 0.41, 0.13, 0.39, 0.1, 0.1, 0.2, 0.16, 0.28, 0.29, 
 74 |                                 0.29, 0.35, 0.37, 0.41, 0.65, 0.18, 0.3, 0.18, 0.16, 0.11, 0.11, 
 75 |                                 0.16, 0.14, 0.16, 0.13, 0.2, 0.13, 0.18, 0.6, 0.18, 0.13, 0.08, 
 76 |                                 0.35, 0.32, 0.31, 0.3, 0.24, 0.23, 0.36, 0.39, 0.3, 0.18, 0.79, 
 77 |                                 0.42, 0.12, 0.1, 0.12, 0.15, 0.22, 0.2, 0.17, 0.34, 0.1, 0.18, 
 78 |                                 0.13, 0.42, 1.27, 0.1, 0.11, 0.12, 0.14, 0.18, 0.13, 0.1, 0.25, 
 79 |                                 0.1, 0.16, 0.08, 0.12, 0.1, 0.67), .Dim = c(13L, 13L))
 80 |     if (M > ncol(VarRealistic)) {
 81 |       stop("Currently can only generate exposure data based on a realistic correlation structure with M = 13 or fewer. Please set M = 13 or use Zgen = c('unif','norm'")
 82 |     } else if (M <= 13) {
 83 |       Sigma <- VarRealistic[1:M, 1:M]
 84 |     }
 85 |     Z <- MASS::mvrnorm(n = n, mu = rep(0, M), Sigma = Sigma)
 86 |   }
 87 |   colnames(Z) <- paste0("z", 1:M)
 88 |   
 89 |   X <- cbind(3*cos(Z[, 1]) + 2*rnorm(n))
 90 |   eps <- rnorm(n, sd = sqrt(sigsq.true))
 91 |   h <- apply(Z, 1, HFun)
 92 |   mu <- X * beta.true + h
 93 |   y <- drop(mu + eps)
 94 |   
 95 |   if (family == "binomial") {
 96 |     ystar <- y
 97 |     y <- ifelse(ystar > 0, 1, 0)
 98 |   }
 99 |   
100 |   dat <- list(n = n, M = M, sigsq.true = sigsq.true, beta.true = beta.true, Z = Z, h = h, X = X, y = y, hfun = hfun, HFun = HFun, family = family)
101 |   if (family == "binomial") {
102 |     dat$ystar <- ystar
103 |   }
104 |   dat
105 | }
106 | 


--------------------------------------------------------------------------------
/R/bkmr_r_parameter_helper_functions.R:
--------------------------------------------------------------------------------
  1 | make_r_params_comp <- function(r.params, whichcomp) {
  2 |   for(i in seq_along(r.params)) {
  3 |     if(length(r.params[[i]]) > 1) {
  4 |       r.params[[i]] <- r.params[[i]][whichcomp]
  5 |     }
  6 |   }
  7 |   r.params
  8 | }
  9 | 
 10 | set.r.params <- function(r.prior, comp, r.params) {
 11 | 	if(r.prior == "gamma") {
 12 | 		if(length(r.params$mu.r) > 1) r.params$mu.r <- r.params$mu.r[comp]
 13 | 		if(length(r.params$sigma.r) > 1) r.params$sigma.r <- r.params$sigma.r[comp]
 14 | 		if(length(r.params$r.jump1) > 1) r.params$r.jump1 <- r.params$r.jump1[comp]
 15 | 		if(length(r.params$r.jump2) > 1) r.params$r.jump2 <- r.params$r.jump2[comp]
 16 | 	}
 17 | 	if(r.prior %in% c("unif", "invunif")) {
 18 | 		if(length(r.params$r.a) > 1) r.params$r.a <- r.params$r.a[comp]
 19 | 		if(length(r.params$r.b) > 1) r.params$r.b <- r.params$r.b[comp]
 20 | 		if(length(r.params$r.jump2) > 1) r.params$r.jump2 <- r.params$r.jump2[comp]
 21 | 	}
 22 | 	r.params
 23 | }
 24 | 
 25 | set.r.MH.functions <- function(r.prior) {
 26 | 	if(r.prior == "gamma") {
 27 | 		# r.params <- list(mu.r, sigma.r, r.muprop, r.jump1, r.jump2)
 28 | 		rprior.logdens <- function(x, r.params) {
 29 | 			mu.r <- r.params$mu.r
 30 | 			sigma.r <- r.params$sigma.r
 31 | 			dgamma(x, shape=mu.r^2/sigma.r^2, rate=mu.r/sigma.r^2, log=TRUE)
 32 | 		}
 33 | 		rprop.gen1 <- function(r.params) {
 34 | 			r.muprop <- r.params$r.muprop
 35 | 			r.jump <- r.params$r.jump1
 36 | 			rgamma(1, shape=r.muprop^2/r.jump^2, rate=r.muprop/r.jump^2)
 37 | 		}
 38 | 		rprop.logdens1 <- function(x, r.params) {
 39 | 			r.muprop <- r.params$r.muprop
 40 | 			r.jump <- r.params$r.jump1
 41 | 			dgamma(x, shape=r.muprop^2/r.jump^2, rate=r.muprop/r.jump^2, log=TRUE)
 42 | 		}
 43 | 		rprop.gen2 <- function(current, r.params) {
 44 | 			r.jump <- r.params$r.jump2
 45 | 			rgamma(1, shape=current^2/r.jump^2, rate=current/r.jump^2)
 46 | 		}
 47 | 		rprop.logdens2 <- function(prop, current, r.params) {
 48 | 			r.jump <- r.params$r.jump2
 49 | 			dgamma(prop, shape=current^2/r.jump^2, rate=current/r.jump^2, log=TRUE)
 50 | 		}
 51 | 		rprop.gen <- function(current, r.params) {
 52 | 			r.jump <- r.params$r.jump
 53 | 			rgamma(1, shape=current^2/r.jump^2, rate=current/r.jump^2)
 54 | 		}
 55 | 		rprop.logdens <- function(prop, current, r.params) {
 56 | 			r.jump <- r.params$r.jump
 57 | 			dgamma(prop, shape=current^2/r.jump^2, rate=current/r.jump^2, log=TRUE)
 58 | 		}
 59 | 	}
 60 | 
 61 | 	if(r.prior == "invunif") {
 62 | 		# r.params <- list(r.a, r.b, r.jump2)
 63 | 		rprior.logdens <- function(x, r.params) {
 64 | 			r.a <- r.params$r.a
 65 | 			r.b <- r.params$r.b
 66 | 			ifelse(1/r.b <= x & x <= 1/r.a, -2*log(x) - log(r.b - r.a), log(0))
 67 | 		}
 68 | 		rprop.gen1 <- function(r.params) {
 69 | 			r.a <- r.params$r.a
 70 | 			r.b <- r.params$r.b
 71 | 			1/runif(1, r.a, r.b)
 72 | 		}
 73 | 		rprop.logdens1 <- function(x, r.params) {
 74 | 			r.a <- r.params$r.a
 75 | 			r.b <- r.params$r.b
 76 | 			ifelse(1/r.b <= x & x <= 1/r.a, -2*log(x) - log(r.b - r.a), log(0))
 77 | 		}
 78 | 		rprop.gen2 <- function(current, r.params) {
 79 | 			r.a <- r.params$r.a
 80 | 			r.b <- r.params$r.b
 81 | 			r.jump <- r.params$r.jump2
 82 | 			truncnorm::rtruncnorm(1, a = 1/r.b, b = 1/r.a, mean = current, sd = r.jump)
 83 | 		}
 84 | 		rprop.logdens2 <- function(prop, current, r.params) {
 85 | 			r.a <- r.params$r.a
 86 | 			r.b <- r.params$r.b
 87 | 			r.jump <- r.params$r.jump2
 88 | 			log(truncnorm::dtruncnorm(prop, a = 1/r.b, b = 1/r.a, mean = current, sd = r.jump))
 89 | 		}
 90 | 		rprop.gen <- function(current, r.params) {
 91 | 			r.a <- r.params$r.a
 92 | 			r.b <- r.params$r.b
 93 | 			r.jump <- r.params$r.jump
 94 | 			truncnorm::rtruncnorm(1, a = 1/r.b, b = 1/r.a, mean = current, sd = r.jump)
 95 | 		}
 96 | 		rprop.logdens <- function(prop, current, r.params) {
 97 | 			r.a <- r.params$r.a
 98 | 			r.b <- r.params$r.b
 99 | 			r.jump <- r.params$r.jump
100 | 			log(truncnorm::dtruncnorm(prop, a = 1/r.b, b = 1/r.a, mean = current, sd = r.jump))
101 | 		}
102 | 	}
103 | 
104 | 	if(r.prior == "unif") {
105 | 		# r.params <- list(r.a, r.b, r.jump2)
106 | 		rprior.logdens <- function(x, r.params) {
107 | 			r.a <- r.params$r.a
108 | 			r.b <- r.params$r.b
109 | 			dunif(x, r.a, r.b, log=TRUE)
110 | 		}
111 | 		rprop.gen1 <- function(r.params) {
112 | 			r.a <- r.params$r.a
113 | 			r.b <- r.params$r.b
114 | 			runif(1, r.a, r.b)
115 | 		}
116 | 		rprop.logdens1 <- function(x, r.params) {
117 | 			r.a <- r.params$r.a
118 | 			r.b <- r.params$r.b
119 | 			dunif(x, r.a, r.b, log=TRUE)
120 | 		}
121 | 		rprop.gen2 <- function(current, r.params) {
122 | 			r.a <- r.params$r.a
123 | 			r.b <- r.params$r.b
124 | 			r.jump <- r.params$r.jump2
125 | 			truncnorm::rtruncnorm(1, a = r.a, b = r.b, mean = current, sd = r.jump)
126 | 		}
127 | 		rprop.logdens2 <- function(prop, current, r.params) {
128 | 			r.a <- r.params$r.a
129 | 			r.b <- r.params$r.b
130 | 			r.jump <- r.params$r.jump2
131 | 			log(truncnorm::dtruncnorm(prop, a = r.a, b = r.b, mean = current, sd = r.jump))
132 | 		}
133 | 		rprop.gen <- function(current, r.params) {
134 | 			r.a <- r.params$r.a
135 | 			r.b <- r.params$r.b
136 | 			r.jump <- r.params$r.jump
137 | 			truncnorm::rtruncnorm(1, a = r.a, b = r.b, mean = current, sd = r.jump)
138 | 		}
139 | 		rprop.logdens <- function(prop, current, r.params) {
140 | 			r.a <- r.params$r.a
141 | 			r.b <- r.params$r.b
142 | 			r.jump <- r.params$r.jump
143 | 			log(truncnorm::dtruncnorm(prop, a = r.a, b = r.b, mean = current, sd = r.jump))
144 | 		}
145 | 	}
146 | 
147 | 	list(rprior.logdens = rprior.logdens, rprop.gen1 = rprop.gen1, rprop.logdens1 = rprop.logdens1, rprop.gen2 = rprop.gen2, rprop.logdens2 = rprop.logdens2, rprop.gen = rprop.gen, rprop.logdens = rprop.logdens)
148 | }
149 | 


--------------------------------------------------------------------------------
/R/ExtractEsts.R:
--------------------------------------------------------------------------------
  1 | #' Compute summary statistics
  2 | #'
  3 | #' @param q vector of quantiles
  4 | #' @param s vector of posterior samples
  5 | #' 
  6 | #' @noRd
  7 | SummarySamps <- function(s, q = c(0.025, 0.25, 0.5, 0.75, 0.975)) {
  8 |     qs <- quantile(s, q)
  9 |     names(qs) <- paste0("q_", 100*q)
 10 |     summ <- c(mean = mean(s), sd = sd(s), qs)
 11 |     summ <- matrix(summ, nrow = 1, dimnames = list(NULL, names(summ)))
 12 | }
 13 | 
 14 | #' Extract summary statistics
 15 | #'
 16 | #' Obtain summary statistics of each parameter from the BKMR fit
 17 | #'
 18 | #' @param fit An object containing the results returned by a the \code{kmbayes} function 
 19 | #' @param q vector of quantiles
 20 | #' @param sel logical expression indicating samples to keep; defaults to keeping the second half of all samples 
 21 | #'
 22 | #' @export
 23 | #' 
 24 | #' @return a list where each component is a data frame containing the summary statistics of the posterior distribution of one of the parameters (or vector of parameters) being estimated
 25 | #' 
 26 | #' @examples
 27 | #' ## First generate dataset
 28 | #' set.seed(111)
 29 | #' dat <- SimData(n = 50, M = 4)
 30 | #' y <- dat$y
 31 | #' Z <- dat$Z
 32 | #' X <- dat$X
 33 | #' 
 34 | #' ## Fit model with component-wise variable selection
 35 | #' ## Using only 100 iterations to make example run quickly
 36 | #' ## Typically should use a large number of iterations for inference
 37 | #' set.seed(111)
 38 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
 39 | #' 
 40 | #' ests <- ExtractEsts(fitkm)
 41 | #' names(ests)
 42 | #' ests$beta
 43 | ExtractEsts <- function(fit, q = c(0.025, 0.25, 0.5, 0.75, 0.975), sel = NULL) {
 44 |   if (inherits(fit, "bkmrfit")) {
 45 |     if (is.null(sel)) {
 46 |       sel <- with(fit, seq(floor(iter/2) + 1, iter))
 47 |     }
 48 |     sigsq.eps <- SummarySamps(fit$sigsq.eps[sel], q = q)
 49 |     rownames(sigsq.eps) <- "sigsq.eps"
 50 |     
 51 |     r <- t(apply(fit$r[sel, , drop = FALSE], 2, SummarySamps, q = q))
 52 |     rownames(r) <- paste0("r", 1:nrow(r))
 53 |     
 54 |     beta <- t(apply(fit$beta[sel, , drop = FALSE], 2, SummarySamps, q = q))
 55 |     
 56 |     lambda <- t(apply(fit$lambda[sel, ,drop = FALSE], 2, SummarySamps, q = q))
 57 |     if (nrow(lambda) > 1) {
 58 |       rownames(lambda) <- paste0("lambda", 1:nrow(lambda))
 59 |     } else {
 60 |       rownames(lambda) <- "lambda"
 61 |     }
 62 |     
 63 |     if (fit$est.h) {
 64 |       h <- t(apply(fit$h.hat[sel, ], 2, SummarySamps, q = q))
 65 |       rownames(h) <- paste0("h", 1:nrow(h))
 66 |     }
 67 |     
 68 |     if (!is.null(fit$hnew)) {
 69 |       hnew <- t(apply(fit$hnew[sel, ], 2, SummarySamps, q = q))
 70 |       rownames(hnew) <- paste0("hnew", 1:nrow(hnew))
 71 |     }
 72 |     
 73 |     if (!is.null(fit$ystar)) {
 74 |       ystar <- t(apply(fit$ystar[sel, ], 2, SummarySamps, q = q))
 75 |       rownames(ystar) <- paste0("ystar", 1:nrow(ystar))
 76 |     }
 77 |   }
 78 |   
 79 |   if (nrow(beta) > 1) {
 80 |     rownames(beta) <- paste0("beta", 1:nrow(beta))
 81 |   } else {
 82 |     rownames(beta) <- "beta"
 83 |   }
 84 |   
 85 |   colnames(beta) <- colnames(sigsq.eps)
 86 |   colnames(r) <- colnames(sigsq.eps)
 87 |   colnames(lambda) <- colnames(sigsq.eps)
 88 |   if (fit$est.h) {
 89 |     colnames(h) <- colnames(sigsq.eps)
 90 |   }
 91 |   if (!is.null(fit$hnew)) {
 92 |     colnames(hnew) <- colnames(sigsq.eps)
 93 |   }
 94 |   if (!is.null(fit$ystar)) {
 95 |     colnames(ystar) <- colnames(sigsq.eps)
 96 |   }
 97 |   
 98 |   ret <- list(sigsq.eps = data.frame(sigsq.eps), beta = beta, lambda = lambda, r = r)
 99 |   if (fit$est.h) ret$h <- h
100 |   if (!is.null(fit$hnew)) ret$hnew <- hnew
101 |   if (!is.null(fit$ystar)) ret$ystar <- ystar
102 |   
103 |   ret
104 | }
105 | 
106 | #' Extract samples
107 | #'
108 | #' Extract samples of each parameter from the BKMR fit
109 | #'
110 | #' @inheritParams ExtractEsts
111 | #'
112 | #' @export
113 | #' @return a list where each component contains the posterior samples of one of the parameters (or vector of parameters) being estimated
114 | #' 
115 | #' @examples
116 | #' ## First generate dataset
117 | #' set.seed(111)
118 | #' dat <- SimData(n = 50, M = 4)
119 | #' y <- dat$y
120 | #' Z <- dat$Z
121 | #' X <- dat$X
122 | #' 
123 | #' ## Fit model with component-wise variable selection
124 | #' ## Using only 100 iterations to make example run quickly
125 | #' ## Typically should use a large number of iterations for inference
126 | #' set.seed(111)
127 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
128 | #' 
129 | #' samps <- ExtractSamps(fitkm)
130 | ExtractSamps <- function(fit, sel = NULL) {
131 |   if (inherits(fit, "bkmrfit")) {
132 |     if (is.null(sel)) {
133 |       sel <- with(fit, seq(floor(iter/2) + 1, iter))
134 |     }
135 |     
136 |     sigsq.eps <- fit$sigsq.eps[sel]
137 |     sig.eps <- sqrt(sigsq.eps)
138 |     r <- fit$r[sel, , drop = FALSE]
139 |     beta <- fit$beta[sel, ]
140 |     lambda <- fit$lambda[sel, ]
141 |     tau <- lambda*sigsq.eps
142 |     h <- fit$h.hat[sel, ]
143 |     if (!is.null(fit$hnew)) hnew <- fit$hnew[sel, ]
144 |     if (!is.null(fit$ystar)) ystar <- fit$ystar[sel, ]
145 |   }
146 |   
147 |     if (!is.null(ncol(beta))) colnames(beta) <- paste0("beta", 1:ncol(beta))
148 |     colnames(r) <- paste0("r", 1:ncol(r))
149 |     colnames(h) <- paste0("h", 1:ncol(h))
150 |     if (!is.null(fit$hnew)) colnames(hnew) <- paste0("hnew", 1:ncol(hnew))
151 |     if (!is.null(fit$ystar)) colnames(ystar) <- paste0("ystar", 1:ncol(ystar))
152 |     
153 |     res <- list(sigsq.eps = sigsq.eps, sig.eps = sig.eps, r = r, beta = beta, lambda = lambda, tau = tau, h = h)
154 |     if (!is.null(fit$hnew)) res$hnew <- hnew
155 |     if (!is.null(fit$ystar)) res$ystar <- ystar
156 |     res
157 | }
158 | 


--------------------------------------------------------------------------------
/my-doc/tests/tests_randint.R:
--------------------------------------------------------------------------------
  1 | devtools::load_all()
  2 | library(ggplot2)
  3 | 
  4 | family <- "gaussian"
  5 | family <- "binomial"
  6 | 
  7 | ## example where there is a random intercept ####
  8 | 
  9 | n <- 100
 10 | M <- 5
 11 | sigsq.true <- ifelse(family == "gaussian", 0.05, 1)
 12 | beta.true <- 0.5
 13 | Z <- matrix(rnorm(n * M), n, M)
 14 | X <- cbind(3*cos(Z[, 1]) + 2*rnorm(n))
 15 | id <- rep(1:(n/2), each = 2)
 16 | h <- apply(Z, 1, function(z, ind = 1) 4*plogis(z[ind[1]], 0, 0.3))
 17 | u <- rep(rnorm(n/2), each = 2)
 18 | eps <- rnorm(n, 0, sqrt(sigsq.true))
 19 | y <- drop(X*beta.true + h + u + eps)
 20 | if (family == "binomial") {
 21 |   ystar <- y
 22 |   y <- ifelse(ystar > 0, 1, 0)
 23 | }
 24 | 
 25 | set.seed(111)
 26 | if (family == "gaussian") {
 27 |   fit0 <- kmbayes(y = y, Z = Z, X = X, iter = 5000, family = family, id = id, varsel = TRUE, control.params = list(verbose_show_ests = TRUE))
 28 | } else if (family == "binomial") {
 29 |   fit0 <- kmbayes(y = y, Z = Z, X = X, iter = 5000, family = family, id = id, varsel = TRUE, control.params = list(verbose_show_ests = TRUE, lambda.jump = c(10, 0.3)))
 30 | }
 31 | 
 32 | fit0
 33 | 
 34 | summary(fit0)
 35 | 
 36 | sigsq_u_chain <- fit0$lambda[, 2]*fit0$sigsq.eps
 37 | plot(sigsq_u_chain, type = "l")
 38 | plot(sigsq_u_chain[2501:5000], type = "l")
 39 | 
 40 | sigsq_u_est <- mean(sigsq_u_chain)
 41 | sigsq_u_est
 42 | 
 43 | TracePlot(fit = fit0, par = "beta")
 44 | ExtractPIPs(fit0)
 45 | 
 46 | pred.resp.univar <- PredictorResponseUnivar(fit = fit0)
 47 | ggplot(pred.resp.univar, aes(z, est, ymin = est - 1.96*se, ymax = est + 1.96*se)) + 
 48 |   geom_smooth(stat = "identity") + 
 49 |   facet_wrap(~ variable) +
 50 |   ylab("h(z)")
 51 | 
 52 | pred.resp.bivar <- PredictorResponseBivar(fit = fit0, 
 53 |                                           min.plot.dist = 1)
 54 | 
 55 | pred.resp.bivar.levels <- PredictorResponseBivarLevels(pred.resp.df = pred.resp.bivar, 
 56 |                                                        Z = Z, qs = c(0.25, 0.5, 0.75))
 57 | 
 58 | ggplot(pred.resp.bivar.levels, aes(z1, est)) + 
 59 |   geom_smooth(aes(col = quantile), stat = "identity") + 
 60 |   facet_grid(variable2 ~ variable1) +
 61 |   ggtitle("h(expos1 | quantiles of expos2)") +
 62 |   xlab("expos1")
 63 | 
 64 | risks.overall.approx <- OverallRiskSummaries(fit = fit0, 
 65 |                                              qs = seq(0.25, 0.75, by = 0.05), q.fixed = 0.5)
 66 | risks.overall.approx
 67 | 
 68 | risks.overall.exact <- OverallRiskSummaries(fit = fit0, 
 69 |                                             qs = seq(0.25, 0.75, by = 0.05), q.fixed = 0.5, method = "exact")
 70 | risks.overall.exact
 71 | 
 72 | risks.singvar <- SingVarRiskSummaries(fit = fit0,
 73 |                                       qs.diff = c(0.25, 0.75), q.fixed = c(0.25, 0.50, 0.75))
 74 | risks.singvar
 75 | 
 76 | risks.int <- SingVarIntSummaries(fit = fit0, 
 77 |                                  qs.diff = c(0.25, 0.75), qs.fixed = c(0.25, 0.75))
 78 | risks.int
 79 | 
 80 | ## tox application from BKMR paper ####
 81 | 
 82 | ## load & prep data
 83 | ## from Brent's HEI report
 84 | 
 85 | DIR <- "H:/Research/Completed Projects/2014 Bayesian kernel machine regression (Biostatistics)/Code/tox application/"
 86 | 
 87 | meanvals.dat <- read.table(paste0(DIR, "data/bp-xrf.csv"),sep=",",header=T)
 88 | exposure.dat <- read.table(paste0(DIR, "data/BC.csv"),sep=",",header=T)
 89 | 
 90 | caps.dat <- meanvals.dat[meanvals.dat$Exposure=="CAPs",]
 91 | sham.dat <- meanvals.dat[meanvals.dat$Exposure=="Sham",]
 92 | 
 93 | bccaps.dat <- merge(exposure.dat,caps.dat,by="DATE") 
 94 | bcsham.dat <- sham.dat
 95 | bcsham.dat$BC <- 0
 96 | 
 97 | bcall.dat <- rbind(bccaps.dat,bcsham.dat)
 98 | bcall.dat <- bcall.dat[order(bcall.dat$seq),]
 99 | bcall.dat <- bcall.dat[bcall.dat$seq!=112,]
100 | bcall.dat <- bcall.dat[bcall.dat$seq!=11,]
101 | 
102 | bcall.dat$Dog <- as.numeric(bcall.dat$Dog) 
103 | bcall.dat$exp <- rep(0,length(bcall.dat$Exposure))
104 | bcall.dat$exp[bcall.dat$Exposure=="CAPs"] <- 1
105 | bcall.dat$stat <- rep(0,length(bcall.dat$Status2))
106 | bcall.dat$stat[bcall.dat$Status2=="Post-Occlusio"] <- 1
107 | bcall.dat$stat2 <- rep(0,length(bcall.dat$Status2))
108 | bcall.dat$stat2[bcall.dat$Status2=="Prazosin"] <- 1
109 | 
110 | 
111 | n.times <- tapply(rep(1,length(bcall.dat$Dog)),bcall.dat$Dog,sum)
112 | time.var <- NULL
113 | for (i in 1:length(n.times))
114 | {
115 |   time.var <- c(time.var,1:n.times[i])	
116 | }
117 | bcall.dat$time <- time.var
118 | 
119 | #bcall.dat <- bcall.dat[bcall.dat$stat2==0,]
120 | 
121 | dat<-NULL
122 | #corresponds to new simulation study (except doesn't include bc because variable wasn't in bcall.dat and added Mn because that was the observed effect in the original study)
123 | varnames <- c("Al","Si","Ti","Ca","K","Cu","Mn", "Ni","V","Zn", "S", "Cl", "BC")
124 | groups <- c(1,1,1,1,1,1,1, 2,2,2, 3, 4, 5)
125 | dat$Z <- as.matrix(bcall.dat[,varnames])
126 | mean.z <- apply(dat$Z,2,mean)
127 | dat$Z <- sweep(dat$Z,2,mean.z)
128 | sd.z <- apply(dat$Z,2,sd)
129 | dat$Z <- sweep(dat$Z,2,sd.z,FUN="/")
130 | 
131 | dat$X <- cbind(bcall.dat$stat,bcall.dat$stat2,bcall.dat$exp)
132 | dat$y <- matrix(bcall.dat$mmrate,length(bcall.dat$mmrate),1)
133 | dat$id <- bcall.dat$Dog
134 | 
135 | ## take out outliers
136 | inds <- seq(1,dim(dat$X)[1])
137 | noout.inds <- inds[dat$Z[,1] < 6 & dat$Z[,2] < 6 & dat$Z[,3] < 6 & dat$Z[,4] < 6 & dat$Z[,5] < 6 & dat$Z[,6] < 6 & dat$Z[,7] < 6 & dat$Z[,8] < 6 & dat$Z[,9] < 6 & dat$Z[,10] < 6 & dat$Z[,11] < 6 & dat$Z[,12] < 6 & dat$Z[,13] < 6]
138 | 
139 | dat$y <- as.matrix(dat$y[noout.inds])
140 | dat$X <- as.matrix(dat$X[noout.inds,])
141 | dat$Z <- dat$Z[noout.inds,]
142 | # dat$Z <- scale(dat$Z)
143 | dat$id <- dat$id[noout.inds]
144 | 
145 | ## set up and fit
146 | 
147 | set.seed(111)
148 | fitkm <- kmbayes(y = dat$y, X = dat$X, Z = dat$Z, id = dat$id, iter = 1000, verbose = TRUE, varsel = TRUE)
149 | 


--------------------------------------------------------------------------------
/R/InvestigatePrior_rm.R:
--------------------------------------------------------------------------------
  1 | #' Investigate prior
  2 | #'
  3 | #' Investigate the impact of the \code{r[m]} parameters on the smoothness of the exposure-response function \code{h(z[m])}.
  4 | #'
  5 | #' @inheritParams kmbayes
  6 | #' @param ngrid Number of grid points over which to plot the exposure-response function
  7 | #' @param q.seq Sequence of values corresponding to different degrees of smoothness in the estimated exposure-response function. A value of q corresponds to fractions of the range of the data over which there is a decay in the correlation \code{cor(h[i],h[j])} between two subjects by 50\code{\%}.
  8 | #' @param r.seq sequence of values at which to fix \code{r} for estimating the exposure-response function
  9 | #' @param verbose TRUE or FALSE: flag indicating whether to print to the screen which exposure variable and q value has been completed
 10 | #' @param Drange the range of the \code{z_m} data over which to apply the values of \code{q.seq}. If not specified, will be calculated as the maximum of the ranges of \code{z_1} through \code{z_M}.
 11 | #' @details 
 12 | #' For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html}
 13 | #' @export
 14 | #' 
 15 | #' @return a list containing the predicted values, residuals, and estimated predictor-response function for each degree of smoothness being considered
 16 | #' 
 17 | #' @import nlme
 18 | #' 
 19 | #' @examples
 20 | #' ## First generate dataset
 21 | #' set.seed(111)
 22 | #' dat <- SimData(n = 50, M = 4)
 23 | #' y <- dat$y
 24 | #' Z <- dat$Z
 25 | #' X <- dat$X
 26 | #' 
 27 | #' priorfits <- InvestigatePrior(y = y, Z = Z, X = X, q.seq = c(2, 1/2, 1/4, 1/16))
 28 | #' PlotPriorFits(y = y, Z = Z, X = X, fits = priorfits)
 29 | InvestigatePrior <- function(y, Z, X, ngrid = 50, q.seq = c(2, 1, 1/2, 1/4, 1/8, 1/16), r.seq = NULL, Drange = NULL, verbose = FALSE) {
 30 |   
 31 |   if (is.null(r.seq)) {
 32 |     if (is.null(Drange)) {
 33 |       zranges <- diff(apply(Z, 2, range))
 34 |       Drange <- max(zranges)
 35 |     }
 36 |     r.seq <- -log(1 - 0.50)/(q.seq * Drange)^2
 37 |   } else {
 38 |     if (!is.null(q.seq)) {
 39 |       warning("Both 'q.seq' and 'r.seq' are specified; values of 'q.seq' will be ignored.")
 40 |     }
 41 |   }
 42 |   
 43 |   Znew.mat <- matrix(NA, ngrid, ncol(Z))
 44 |   preds <- vector("list", ncol(Z))
 45 |   resids <- vector("list", ncol(Z))
 46 |   h.hat.ests <- vector("list", ncol(Z))
 47 |   for(i in 1:ncol(Z)) {
 48 |     preds[[i]] <- matrix(NA, ngrid, length(q.seq))
 49 |     resids[[i]] <- matrix(NA, nrow(Z), length(q.seq))
 50 |     h.hat.ests[[i]] <- matrix(NA, nrow(Z), length(q.seq))
 51 |   }
 52 |   for(i in 1:ncol(Z)) {
 53 |     Zi <- cbind(Z[, i])
 54 |     Znew <- cbind(seq(min(Zi), max(Zi), length.out = ngrid))
 55 |     Znew.mat[, i] <- Znew
 56 |     n0 <- nrow(Zi)
 57 |     In <- diag(1,n0,n0)
 58 |     n1 <- nrow(Znew)
 59 |     Zall <- rbind(Zi, Znew)
 60 |     nall <- n0+n1
 61 |     for(j in seq_along(r.seq)) {
 62 |       r <- r.seq[j]
 63 |       Kpart <- as.matrix(stats::dist(Zall))^2
 64 |       Kmat <- exp(-r*Kpart)
 65 |       K <- Kmat0 <- Kmat[1:n0,1:n0 ,drop=FALSE]
 66 |       Kmat1 <- Kmat[(n0+1):nall,(n0+1):nall ,drop=FALSE]
 67 |       Kmat10 <- Kmat[(n0+1):nall,1:n0 ,drop=FALSE]
 68 |       U <- try(t(chol(K)), silent=TRUE)
 69 |       # all.equal(K, U %*% t(U))
 70 |       if(inherits(U, "try-error")) {
 71 |         sigsvd <- svd(K)
 72 |         U <- t(sigsvd$v %*% (t(sigsvd$u) * sqrt(sigsvd$d)))
 73 |         # all.equal(K, U %*% t(U), check.attributes=FALSE)
 74 |       }
 75 |       
 76 |       group <- rep(1, n0)
 77 |       fit <- lme(y ~ -1+X, random = list(group = pdIdent(~ -1+U)))
 78 |       #data.frame(sig=(fit$sigma)^2, tau=as.numeric(VarCorr(fit)[1,"Variance"]), rho=1/r, bet=fixef(fit))
 79 |       h.hat <- U %*% drop(fit$coef$random[[1]])
 80 |       Vinv <- chol2inv(chol(In + as.numeric(VarCorr(fit)[1,"Variance"])/(fit$sigma)^2*Kmat0))
 81 |       hnew <- drop(as.numeric(VarCorr(fit)[1,"Variance"])/(fit$sigma)^2*Kmat10%*%Vinv%*%(y - X%*%fixef(fit)))
 82 |       
 83 |       preds[[i]][, j] <- hnew
 84 |       resids[[i]][, j] <- stats::resid(fit)
 85 |       h.hat.ests[[i]][, j] <- h.hat
 86 |       
 87 |       if(verbose) message("Completed: variable", i, ", r value ", j)
 88 |     }
 89 |   }
 90 |   
 91 |   res <- list(q.seq = q.seq, r.seq = r.seq, Drange = Drange, Znew = Znew.mat, resids = resids, preds = preds, h.hat = h.hat.ests)
 92 | }
 93 | 
 94 | #' Plot of exposure-response function from univariate KMR fit
 95 | #' 
 96 | #' Plot the estimated \code{h(z[m])} estimated from frequentist KMR for \code{r[m]} fixed to specific values 
 97 | #' 
 98 | #' @inheritParams kmbayes
 99 | #' @param fits output from \code{\link{InvestigatePrior}}
100 | #' @param which.z which predictors (columns in \code{Z}) to plot
101 | #' @param which.q which q.values to plot; defaults to all possible
102 | #' @param plot.resid whether to plot the data points
103 | #' @param ylim plotting limits for the y-axis
104 | #' @param ... other plotting arguments
105 | #' @export
106 | #' @import graphics
107 | #' 
108 | #' @return No return value, generates plot
109 | #' 
110 | #' @examples
111 | #' ## First generate dataset
112 | #' set.seed(111)
113 | #' dat <- SimData(n = 50, M = 4)
114 | #' y <- dat$y
115 | #' Z <- dat$Z
116 | #' X <- dat$X
117 | #' 
118 | #' priorfits <- InvestigatePrior(y = y, Z = Z, X = X, q.seq = c(2, 1/2, 1/4, 1/16))
119 | #' PlotPriorFits(y = y, Z = Z, X = X, fits = priorfits)
120 | PlotPriorFits <- function(y, X, Z, fits, which.z = NULL, which.q = NULL, plot.resid = TRUE, ylim = NULL, ...) {
121 |   q.seq <- fits$q.seq
122 |   r.seq <- fits$r.seq
123 |   Znew <- fits$Znew
124 |   preds <- fits$preds
125 |   
126 |   if (is.null(which.z)) which.z <- 1:ncol(Z)
127 |   if (is.null(which.q)) which.q <- 1:length(q.seq)
128 |   
129 |   q.seq <- q.seq[which.q]
130 |   r.seq <- r.seq[which.q]
131 |   Znew <- Znew[, which.z]
132 |   preds <- preds[which.z]
133 |   Z <- Z[, which.z]
134 |   
135 |   if (plot.resid) {
136 |     lm0 <- lm(y ~ X)
137 |     res <- resid(lm0) + coef(lm0)["(Intercept)"]
138 |     if (is.null(ylim)) ylim <- range(res)
139 |   }
140 |   
141 |   opar <- par(mfrow=c(1 + length(which.z), length(which.q)), mar=c(4.1, 4.1, 1.6, 1.1))
142 |   on.exit(par(opar), add = TRUE)
143 |   
144 |   for(r in r.seq) {
145 |     fun_plot <- function(x) exp(-r*x^2)
146 |     curve(fun_plot, main=paste0("r = ", format(round(r,2), digits = 2, nsmall = 2)), ylab="correlation", cex.lab=1.5, cex.main=2, ylim=c(0,1), xlim=c(0, max(Z)), xlab=expression(d[ij]), xname = 'x')
147 |   }
148 |   for(i in 1:ncol(Z)) {
149 |     for(j in seq_along(r.seq)) {
150 |       est <- preds[[i]][, j]
151 |       if (is.null(ylim)) ylim <- range(est, na.rm = TRUE)
152 |       plot(0, type = "n", ylim = ylim, ylab = expression(hat(h)), xlab = colnames(Z)[i], cex.lab = 1.5, xlim = range(Znew), ...)
153 |       if (plot.resid) points(Z[, i], res, col="red", pch=19, cex=0.5)
154 |       lines(Znew[, i], est)
155 |     }
156 |   }
157 | }
158 | 


--------------------------------------------------------------------------------
/my-doc/scratch.R:
--------------------------------------------------------------------------------
  1 | devtools::load_all()
  2 | library(magrittr)
  3 | 
  4 | ## generate data ####
  5 | 
  6 | #seed <- 1234
  7 | seed <- 123
  8 | set.seed(seed)
  9 | n = 200
 10 | M = 5
 11 | sigsq.true = 1
 12 | beta.true = 0.1
 13 | Z <- matrix(runif(n * M, -1, 1), n, M)
 14 | X <- cbind(3*cos(Z[, 1]) + 2*rnorm(n))
 15 | eps <- rnorm(n, sd = sqrt(sigsq.true))
 16 | #h <- 4*plogis(Z[, 1], 0, 0.3)
 17 | h <- (2*Z[, 1] + 0.5) ^ 2
 18 | plot(Z[, 1], h)
 19 | 
 20 | mu <- X * beta.true + h
 21 | y <- drop(mu + eps)
 22 |   ystar <- y
 23 |   y <- ifelse(ystar > 0, 1, 0)
 24 | dat <- list(n = n, M = M, sigsq.true = sigsq.true, beta.true = beta.true, Z = Z, h = h, X = X, y = y)
 25 |   dat$ystar <- ystar
 26 | datp <- dat
 27 | #datp <- SimData(hfun = 1, family = "binomial", beta.true = 0.1)
 28 | y <- datp$y
 29 | Z <- datp$Z
 30 | X <- datp$X
 31 | mu <- datp %$% drop(h + X*beta.true)
 32 | #str(datp)
 33 | table(y)
 34 | z1ord <- order(Z[, 1])
 35 | 
 36 | plot(Z[, 1], datp$ystar)
 37 | abline(lm(datp$h ~ Z[, 1]), col = "blue")
 38 | lines(Z[z1ord, 1], predict(lm(datp$h ~ splines::ns(Z[, 1], 3)))[z1ord], col = "blue")
 39 | lines(Z[z1ord, 1], datp$h[z1ord], col = "red", lwd = 2)
 40 | 
 41 | ## linear probit model ####
 42 | 
 43 | probitfit0 <- glm(y ~ Z + X, family = binomial(link = "probit"))
 44 | 
 45 | tmp <- predict(probitfit0)
 46 | tmp2 <- tmp
 47 | tmp2 <- ifelse(y == 1, abs(tmp), -abs(tmp))
 48 | tmp2 <- ifelse(y == 1, tmp - min(tmp[y == 1]), tmp - max(tmp[y == 0]))
 49 | plot(tmp2, datp$ystar, col = ifelse(y == 1, "green", "blue"))
 50 | abline(0, 1, col = "red")
 51 | 
 52 | hpred <- drop(coef(probitfit0)["(Intercept)"] + Z %*% coef(probitfit0)[grep("Z", names(coef(probitfit0)))])
 53 | plot(hpred, datp$h)
 54 | abline(0, 1, col = "red")
 55 | 
 56 | ## Oracle probit model ####
 57 | 
 58 | oracle <- glm(y ~ Z[, 1] + I(Z[, 1]^2) + X, family = binomial(link = "probit"))
 59 | 
 60 | tmp <- predict(oracle)
 61 | tmp2 <- tmp
 62 | tmp2 <- ifelse(y == 1, abs(tmp), -abs(tmp))
 63 | tmp2 <- ifelse(y == 1, tmp - min(tmp[y == 1]), tmp - max(tmp[y == 0]))
 64 | plot(tmp2, datp$ystar, col = ifelse(y == 1, "green", "blue"))
 65 | abline(0, 1, col = "red")
 66 | 
 67 | hpred <- drop(coef(probitfit0)["(Intercept)"] + Z %*% coef(probitfit0)[grep("Z", names(coef(probitfit0)))])
 68 | plot(hpred, datp$h)
 69 | abline(0, 1, col = "red")
 70 | 
 71 | ## BKM probit model ####
 72 | 
 73 | # fitpr_gam0 <- kmbayes(iter = 5000, y = y, Z = Z, X = X, family = "binomial", varsel = TRUE, control.params = list(verbose_show_ests = TRUE, r.prior = "gamma"))
 74 | # fitpr <- fitpr_gam0
 75 | # 
 76 | # fitpr_gam1 <- kmbayes(iter = 5000, y = y, Z = Z, X = X, family = "binomial", varsel = TRUE, control.params = list(verbose_show_ests = TRUE, r.prior = "gamma", r.jump2 = 1))
 77 | # fitpr <- fitpr_gam1
 78 | 
 79 | fitpr_gam2 <- kmbayes(iter = 5000, y = y, Z = Z, X = X, family = "binomial", varsel = TRUE, control.params = list(verbose_show_ests = TRUE, r.prior = "gamma", r.jump2 = 2))
 80 | fitpr <- fitpr_gam2
 81 | 
 82 | fitpr_iu <- kmbayes(iter = 5000, y = y, Z = Z, X = X, family = "binomial", varsel = TRUE, control.params = list(verbose_show_ests = TRUE, r.prior = "invunif"))
 83 | fitpr <- fitpr_iu
 84 | 
 85 | fitpr_iu2 <- kmbayes(iter = 5000, y = y, Z = Z, X = X, family = "binomial", varsel = TRUE, control.params = list(verbose_show_ests = TRUE, r.prior = "invunif", r.jump1 = 3))
 86 | fitpr <- fitpr_iu2
 87 | 
 88 | ## with knots matrix
 89 | 
 90 | sel <- with(fitpr, seq(floor(iter/2) + 1, iter))
 91 | ests <- ExtractEsts(fitpr)
 92 | #summary(fitpr)
 93 | 
 94 | TracePlot(fit = fitpr, par = "beta")
 95 | TracePlot(fit = fitpr, par = "sigsq.eps")
 96 | TracePlot(fit = fitpr, par = "r", comp = 1)
 97 | TracePlot(fit = fitpr, par = "r", comp = 2)
 98 | TracePlot(fit = fitpr, par = "r", comp = 3)
 99 | TracePlot(fit = fitpr, par = "r", comp = 4)
100 | TracePlot(fit = fitpr, par = "r", comp = 5)
101 | TracePlot(fit = fitpr, par = "h", comp = 20)
102 | TracePlot(fit = fitpr, par = "ystar", comp = 1)
103 | 
104 | ## beta
105 | cbind(truth = datp$beta.true, ests$beta) %>% t()
106 | 
107 | ## h
108 | hhat <- ests$h[, "mean"]
109 | plot(hhat, datp$h)
110 | abline(0, 1, col = "red")
111 | #plot(Z[z1ord, 1], (datp$ystar - datp$X*0.1)[z1ord], col = "blue")
112 | plot(Z[z1ord, 1], hhat[z1ord], ylim = range(c(hhat, datp$h)), col = ifelse(y[z1ord] == 1, "green", "blue"))
113 | lines(Z[z1ord, 1], datp$h[z1ord], col = "red")
114 | 
115 | ## ystar
116 | ystar_hat <- ests$ystar[, "mean"]
117 | plot(ystar_hat, datp$ystar, col = ifelse(y == 1, "green", "blue"))
118 | abline(0, 1, col = "red")
119 | par(mfrow = c(1,2))
120 | plot(datp$y, datp$ystar)
121 | plot(datp$y, ystar_hat)
122 | par(mfrow = c(1,1))
123 | par(mfrow = c(1,2))
124 | hist(datp$ystar)
125 | hist(ystar_hat)
126 | par(mfrow = c(1,1))
127 | par(mfrow = c(1,2))
128 | plot(Z[z1ord, 1], datp$ystar[z1ord], col = ifelse(y[z1ord] == 1, "green", "blue"))
129 | lines(Z[z1ord, 1], datp$h[z1ord], col = "red")
130 | plot(Z[z1ord, 1], ests$ystar[z1ord], col = ifelse(y[z1ord] == 1, "green", "blue"))
131 | lines(Z[z1ord, 1], hhat[z1ord], col = "red")
132 | par(mfrow = c(1,1))
133 | 
134 | ## phat
135 | phat0 <- pnorm(datp$h + t(X %*% datp$beta.true))
136 | phat1 <- colMeans(pnorm(fitpr$h.hat[sel, ] + t(X %*% fitpr$beta[sel, ])))
137 | plot(phat0, phat1)
138 | abline(0, 1, col = "red")
139 | 
140 | if (FALSE) {
141 | 
142 |   iter = 1000; family = "binomial"; id = NULL; verbose = TRUE; Znew = NULL; starting.values = list(); control.params = list(); varsel = FALSE; groups = NULL; knots = NULL; ztest = NULL; rmethod = "varying"
143 |  
144 |   s <- 2
145 |   beta = chain$beta[s-1,]
146 |   Vinv = Vcomps$Vinv
147 |   Xbeta <-  drop(X %*% beta)
148 |   lower <- ifelse(y == 1, 0, -Inf)
149 |   upper <- ifelse(y == 0, 0,  Inf)
150 |   time <- system.time(samp <- tmvtnorm::rtmvnorm(1, mean = Xbeta, H = Vinv, lower = lower, upper = upper, algorithm = "gibbs", start.value = chain$ystar[s - 1, ]))
151 |   time
152 |   head(drop(samp))
153 |   
154 | }
155 | 
156 | ## regular probit regression ####
157 | 
158 | truth <- c(0.5, beta.true, 1, 2)
159 | tmpmu <- cbind(1, X, Z[, 1], Z[, 2]) %*% truth
160 | dattmp <- dplyr::data_frame(X = drop(X), Z1 = Z[, 1], Z2 = Z[, 2], ystar = drop(tmpmu + eps))
161 | dattmp$y <- ifelse(dattmp$ystar > 0, 1, 0)
162 | modtmp <- glm(y ~ X + Z1 + Z2, family = binomial(link = "probit"), data = dattmp)
163 | coef(modtmp)
164 | 
165 | niter <- 1000
166 | 
167 | samps <- list()
168 | samps$coef <- matrix(NA, niter, 4, dimnames = list(NULL, c("int", "beta", "Z1", "Z2")))
169 | samps$coef[1, ] <- rep(1, ncol(samps$coef))
170 | samps$ystar <- matrix(NA, niter, n)
171 | samps$ystar[1, ] <- ifelse(dattmp$y == 1, 1/2, -1/2)
172 | 
173 | for (i in 2:niter) {
174 |   m0 <- lm(samps$ystar[i-1, ] ~ X + Z1 + Z2, data = dattmp)
175 |   samps$coef[i, ] <- coef(m0)
176 |   samps$ystar[i, ] <- truncnorm::rtruncnorm(1, a = ifelse(dattmp$y == 1, 0, -Inf), b = ifelse(dattmp$y == 1, Inf, 0), mean = predict(m0), sd = 1)
177 | }
178 | 
179 | colMeans(samps$coef)
180 | coef(modtmp)
181 | 
182 | apply(samps$coef, 2, sd)
183 | summary(modtmp)$coef[, "Std. Error"]
184 | 
185 | plot(dattmp$ystar, colMeans(samps$ystar), col = ifelse(dattmp$y == 1, "green", "blue"))
186 | abline(0, 1, col = "red")
187 | 
188 | par(mfrow = c(1,2))
189 | plot(dattmp$y, dattmp$ystar)
190 | plot(dattmp$y, colMeans(samps$ystar))
191 | par(mfrow = c(1,1))
192 | par(mfrow = c(1,2))
193 | hist(dattmp$ystar)
194 | hist(colMeans(samps$ystar))
195 | par(mfrow = c(1,1))
196 | 


--------------------------------------------------------------------------------
/R/ComputePostmeanHnew.R:
--------------------------------------------------------------------------------
  1 | #' Compute the posterior mean and variance of \code{h} at a new predictor values
  2 | #'
  3 | #' @inheritParams kmbayes
  4 | #' @param fit An object containing the results returned by a the \code{kmbayes} function 
  5 | #' @param Znew matrix of new predictor values at which to predict new \code{h}, where each row represents a new observation. If set to NULL then will default to using the observed exposures Z.
  6 | #' @param method method for obtaining posterior summaries at a vector of new points. Options are "approx" and "exact"; defaults to "approx", which is faster particularly for large datasets; see details
  7 | #' @param sel selects which iterations of the MCMC sampler to use for inference; see details
  8 | #' @details
  9 | #' \itemize{
 10 | #'   \item If \code{method == "approx"}, the argument \code{sel} defaults to the second half of the MCMC iterations.
 11 | #'   \item If \code{method == "exact"}, the argument \code{sel} defaults to keeping every 10 iterations after dropping the first 50\% of samples, or if this results in fewer than 100 iterations, than 100 iterations are kept
 12 | #' }
 13 | #' For guided examples and additional information, go to \url{https://jenfb.github.io/bkmr/overview.html}
 14 | #' @export
 15 | #' 
 16 | #' @return a list of length two containing the posterior mean vector and posterior variance matrix 
 17 | #' 
 18 | #' @examples
 19 | #' set.seed(111)
 20 | #' dat <- SimData(n = 50, M = 4)
 21 | #' y <- dat$y
 22 | #' Z <- dat$Z
 23 | #' X <- dat$X
 24 | #' 
 25 | #' ## Fit model with component-wise variable selection
 26 | #' ## Using only 100 iterations to make example run quickly
 27 | #' ## Typically should use a large number of iterations for inference
 28 | #' set.seed(111)
 29 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
 30 | #' 
 31 | #' med_vals <- apply(Z, 2, median)
 32 | #' Znew <- matrix(med_vals, nrow = 1)
 33 | #' h_true <- dat$HFun(Znew)
 34 | #' h_est1 <- ComputePostmeanHnew(fitkm, Znew = Znew, method = "approx")
 35 | #' h_est2 <- ComputePostmeanHnew(fitkm, Znew = Znew, method = "exact")
 36 | ComputePostmeanHnew <- function(fit, y = NULL, Z = NULL, X = NULL, Znew = NULL, sel = NULL, method = "approx") {
 37 |   if (method == "approx") {
 38 |     res <- ComputePostmeanHnew.approx(fit = fit, y = y, Z = Z, X = X, Znew = Znew, sel = sel)
 39 |   } else if (method == "exact") {
 40 |     res <- ComputePostmeanHnew.exact(fit = fit, y = y, Z = Z, X = X, Znew = Znew, sel = sel)
 41 |   }
 42 |   res
 43 | }
 44 | 
 45 | #' Compute the posterior mean and variance of \code{h} at a new predictor values
 46 | #'
 47 | #' Function to approximate the posterior mean and variance as a function of the estimated model parameters (e.g., tau, lambda, beta, and sigsq.eps)
 48 | #' @param Znew matrix of new predictor values at which to predict new \code{h}, where each row represents a new observation. If set to NULL then will default to using the observed exposures Z.
 49 | #' @inheritParams kmbayes
 50 | #' @inheritParams ExtractEsts
 51 | #' @noRd
 52 | ComputePostmeanHnew.approx <- function(fit, y = NULL, Z = NULL, X = NULL, Znew = NULL, sel = NULL) {
 53 |   
 54 |   if (inherits(fit, "bkmrfit")) {
 55 |     if (is.null(y)) y <- fit$y
 56 |     if (is.null(Z)) Z <- fit$Z
 57 |     if (is.null(X)) X <- fit$X
 58 |   }
 59 |   
 60 |   if (!is.null(Znew)) {
 61 |     if(is.null(dim(Znew))) Znew <- matrix(Znew, nrow=1)
 62 |     if(inherits(Znew, "data.frame")) Znew <- data.matrix(Znew)
 63 |   }
 64 |   if(is.null(dim(X))) X <- matrix(X, ncol=1)
 65 |   
 66 |   ests <- ExtractEsts(fit, sel = sel)
 67 |   sigsq.eps <- ests$sigsq.eps[, "mean"]
 68 |   r <- ests$r[, "mean"]
 69 |   beta <- ests$beta[, "mean"]
 70 |   lambda <- ests$lambda[, "mean"]
 71 |   if (fit$family == "gaussian") {
 72 |     ycont <- y
 73 |   } else if (fit$family == "binomial") {
 74 |     ycont <- ests$ystar[, "mean"]
 75 |   }
 76 |   
 77 |   Kpart <- makeKpart(r, Z)
 78 |   K <- exp(-Kpart)
 79 |   V <- diag(1, nrow(Z), nrow(Z)) + lambda[1]*K
 80 |   cholV <- chol(V)
 81 |   Vinv <- chol2inv(cholV)
 82 |   
 83 |   if (!is.null(Znew)) {
 84 |     # if(is.null(data.comps$knots)) {
 85 |     n0 <- nrow(Z)
 86 |     n1 <- nrow(Znew)
 87 |     nall <- n0 + n1
 88 |     Kpartall <- makeKpart(r, rbind(Z, Znew))
 89 |     Kmat <- exp(-Kpartall)
 90 |     Kmat0 <- Kmat[1:n0,1:n0 ,drop=FALSE]
 91 |     Kmat1 <- Kmat[(n0+1):nall,(n0+1):nall ,drop=FALSE]
 92 |     Kmat10 <- Kmat[(n0+1):nall,1:n0 ,drop=FALSE]
 93 |     
 94 |     lamK10Vinv <- lambda[1]*Kmat10 %*% Vinv
 95 |     postvar <- lambda[1]*sigsq.eps*(Kmat1 - lamK10Vinv %*% t(Kmat10))
 96 |     postmean <- lamK10Vinv %*% (ycont - X%*%beta)
 97 |     # } else {
 98 |     # stop("GPP not yet implemented")
 99 |     # }
100 |   } else {
101 |     lamKVinv <- lambda[1]*K%*%Vinv
102 |     postvar <- lambda[1]*sigsq.eps*(K - lamKVinv%*%K)
103 |     postmean <- lamKVinv %*% (ycont - X%*%beta)
104 |   }
105 |   ret <- list(postmean = drop(postmean), postvar = postvar)
106 |   ret
107 | }
108 | 
109 | #' Compute the posterior mean and variance of \code{h} at a new predictor values
110 | #'
111 | #' Function to estimate the posterior mean and variance by obtaining the posterior mean and variance at particular iterations and then using the iterated mean and variance formulas
112 | #' 
113 | #' @inheritParams kmbayes
114 | #' @inheritParams SamplePred
115 | #' @inheritParams ExtractEsts
116 | #' 
117 | #' @noRd
118 | ComputePostmeanHnew.exact <- function(fit, y = NULL, Z = NULL, X = NULL, Znew = NULL, sel = NULL) {
119 |   
120 |   if (inherits(fit, "bkmrfit")) {
121 |     if (is.null(y)) y <- fit$y
122 |     if (is.null(Z)) Z <- fit$Z
123 |     if (is.null(X)) X <- fit$X
124 |   }
125 |   
126 |   if (!is.null(Znew)) {
127 |     if (is.null(dim(Znew))) Znew <- matrix(Znew, nrow = 1)
128 |     if (inherits(Znew, "data.frame")) Znew <- data.matrix(Znew)
129 |     if (ncol(Z) != ncol(Znew)) {
130 |       stop("Znew must have the same number of columns as Z")
131 |     }
132 |   }
133 |   
134 |   if (is.null(dim(X))) X <- matrix(X, ncol=1)
135 |   
136 |   # if (!is.null(fit$Vinv)) {
137 |   #   sel <- attr(fit$Vinv, "sel")
138 |   # }
139 |   
140 |   if (is.null(sel)) {
141 |     sel <- with(fit, seq(floor(iter/2) + 1, iter, 10))
142 |     if (length(sel) < 100) {
143 |       sel <- with(fit, seq(floor(iter/2) + 1, iter, length.out = 100))
144 |     }
145 |     sel <- unique(floor(sel))
146 |   }
147 |   
148 |   family <- fit$family
149 |   data.comps <- fit$data.comps
150 |   post.comps.store <- list(postmean = vector("list", length(sel)),
151 |                            postvar = vector("list", length(sel))
152 |   )
153 |   
154 |   for (i in seq_along(sel)) {
155 |     s <- sel[i]
156 |     beta <- fit$beta[s, ]
157 |     lambda <- fit$lambda[s, ]
158 |     sigsq.eps <- fit$sigsq.eps[s]
159 |     r <- fit$r[s, ]
160 |     
161 |     if (family == "gaussian") {
162 |       ycont <- y
163 |     } else if (family == "binomial") {
164 |       ycont <- fit$ystar[s, ]
165 |     }
166 |     
167 |     Kpart <- makeKpart(r, Z)
168 |     K <- exp(-Kpart)
169 |     Vcomps <- makeVcomps(r = r, lambda = lambda, Z = Z, data.comps = data.comps)
170 |     Vinv <- Vcomps$Vinv
171 |     # if (is.null(fit$Vinv)) {
172 |     # V <- diag(1, nrow(Z), nrow(Z)) + lambda[1]*K
173 |     # cholV <- chol(V)
174 |     # Vinv <- chol2inv(cholV)
175 |     # } else {
176 |     #   Vinv <- fit$Vinv[[i]]
177 |     # }
178 |     
179 |     if (!is.null(Znew)) {
180 |       # if(is.null(data.comps$knots)) {
181 |       n0 <- nrow(Z)
182 |       n1 <- nrow(Znew)
183 |       nall <- n0 + n1
184 |       Kpartall <- makeKpart(r, rbind(Z, Znew))
185 |       Kmat <- exp(-Kpartall)
186 |       Kmat0 <- Kmat[1:n0,1:n0 ,drop=FALSE]
187 |       Kmat1 <- Kmat[(n0+1):nall,(n0+1):nall ,drop=FALSE]
188 |       Kmat10 <- Kmat[(n0+1):nall,1:n0 ,drop=FALSE]
189 |       
190 |       lamK10Vinv <- lambda[1]*Kmat10 %*% Vinv
191 |       postvar <- lambda[1]*sigsq.eps*(Kmat1 - lamK10Vinv %*% t(Kmat10))
192 |       postmean <- lamK10Vinv %*% (ycont - X%*%beta)
193 |       # } else {
194 |       # stop("GPP not yet implemented")
195 |       # }
196 |     } else {
197 |       lamKVinv <- lambda[1]*K%*%Vinv
198 |       postvar <- lambda[1]*sigsq.eps*(K - lamKVinv%*%K)
199 |       postmean <- lamKVinv %*% (ycont - X%*%beta)
200 |     }
201 |     
202 |     post.comps.store$postmean[[i]] <- postmean
203 |     post.comps.store$postvar[[i]] <- postvar
204 |     
205 |   }
206 |   
207 |   postmean_mat <- t(do.call("cbind", post.comps.store$postmean))
208 |   m <- colMeans(postmean_mat)
209 |   postvar_arr <- with(post.comps.store, 
210 |                       array(unlist(postvar), 
211 |                             dim = c(nrow(postvar[[1]]), ncol(postvar[[1]]), length(postvar)))
212 |   )
213 |   ve <- var(postmean_mat)
214 |   ev <- apply(postvar_arr, c(1, 2), mean)
215 |   v <- ve + ev
216 |   ret <- list(postmean = m, postvar = v)
217 |   
218 |   ret
219 | }
220 | 
221 | 


--------------------------------------------------------------------------------
/R/ComputeMixtureSummaries.R:
--------------------------------------------------------------------------------
  1 | riskSummary.approx <- function(point1, point2, preds.fun, ...) {
  2 |   cc <- c(-1, 1)
  3 |   newz <- rbind(point1, point2)
  4 |   preds <- preds.fun(newz, ...)
  5 |   diff <- drop(cc %*% preds$postmean)
  6 |   diff.sd <- drop(sqrt(cc %*% preds$postvar %*% cc))
  7 |   c(est = diff, sd = diff.sd)
  8 | }
  9 | riskSummary.samp <- function(point1, point2, preds.fun, ...) {
 10 |   cc <- c(-1, 1)
 11 |   newz <- rbind(point1, point2)
 12 |   preds <- preds.fun(newz, ...)
 13 |   diff.preds <- drop(preds %*% cc)
 14 |   c(est = mean(diff.preds), sd = sd(diff.preds))
 15 | }
 16 | interactionSummary.approx <- function(newz.q1, newz.q2, preds.fun, ...) {
 17 |   cc <- c(-1*c(-1, 1), c(-1, 1))
 18 |   newz <- rbind(newz.q1, newz.q2)
 19 |   preds <- preds.fun(newz, ...)
 20 |   int <- drop(cc %*% preds$postmean)
 21 |   int.se <- drop(sqrt(cc %*% preds$postvar %*% cc))
 22 |   c(est = int, sd = int.se)
 23 | }
 24 | interactionSummary.samp <- function(newz.q1, newz.q2, preds.fun, ...) {
 25 |   cc <- c(-1*c(-1, 1), c(-1, 1))
 26 |   newz <- rbind(newz.q1, newz.q2)
 27 |   preds <- preds.fun(newz, ...)
 28 |   int.preds <- drop(preds %*% cc)
 29 |   c(est = mean(int.preds), sd = sd(int.preds))
 30 | }
 31 | 
 32 | 
 33 | 
 34 | #' Calculate overall risk summaries
 35 | #' 
 36 | #' Compare estimated \code{h} function when all predictors are at a particular quantile to when all are at a second fixed quantile
 37 | #' @inheritParams kmbayes
 38 | #' @inheritParams ComputePostmeanHnew
 39 | #' @inherit ComputePostmeanHnew details
 40 | #' @param qs vector of quantiles at which to calculate the overall risk summary 
 41 | #' @param q.fixed a second quantile at which to compare the estimated \code{h} function
 42 | #' @export
 43 | #' @return a data frame containing the (posterior mean) estimate and posterior standard deviation of the overall risk measures
 44 | #' @examples
 45 | #' ## First generate dataset
 46 | #' set.seed(111)
 47 | #' dat <- SimData(n = 50, M = 4)
 48 | #' y <- dat$y
 49 | #' Z <- dat$Z
 50 | #' X <- dat$X
 51 | #' 
 52 | #' ## Fit model with component-wise variable selection
 53 | #' ## Using only 100 iterations to make example run quickly
 54 | #' ## Typically should use a large number of iterations for inference
 55 | #' set.seed(111)
 56 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
 57 | #' 
 58 | #' risks.overall <- OverallRiskSummaries(fit = fitkm, qs = seq(0.25, 0.75, by = 0.05), 
 59 | #' q.fixed = 0.5, method = "exact")
 60 | OverallRiskSummaries <- function(fit, y = NULL, Z = NULL, X = NULL, qs = seq(0.25, 0.75, by = 0.05), q.fixed = 0.5, method = "approx", sel = NULL) {
 61 |   
 62 |   if (inherits(fit, "bkmrfit")) {
 63 |     if (is.null(y)) y <- fit$y
 64 |     if (is.null(Z)) Z <- fit$Z
 65 |     if (is.null(X)) X <- fit$X
 66 |   }
 67 |   
 68 |   point1 <- apply(Z, 2, quantile, q.fixed)
 69 |   if (method %in% c("approx", "exact")) {
 70 |     preds.fun <- function(znew) ComputePostmeanHnew(fit = fit, y = y, Z = Z, X = X, Znew = znew, sel = sel, method = method)
 71 |     riskSummary <- riskSummary.approx
 72 |   } else {
 73 |     stop("method must be one of c('approx', 'exact')")
 74 |   }
 75 |   risks.overall <- t(sapply(qs, function(quant) riskSummary(point1 = point1, point2 = apply(Z, 2, quantile, quant), preds.fun = preds.fun)))
 76 |   risks.overall <- data.frame(quantile = qs, risks.overall)
 77 | }
 78 | 
 79 | #Compare estimated \code{h} function when a single variable (or a set of variables) is at the 75th versus 25th percentile, when all of the other variables are fixed at a particular percentile
 80 | VarRiskSummary <- function(whichz = 1, fit, y = NULL, Z = NULL, X = NULL, qs.diff = c(0.25, 0.75), q.fixed = 0.5, method = "approx", sel = NULL, ...) {
 81 |   
 82 |   if (inherits(fit, "bkmrfit")) {
 83 |     if (is.null(y)) y <- fit$y
 84 |     if (is.null(Z)) Z <- fit$Z
 85 |     if (is.null(X)) X <- fit$X
 86 |   }
 87 |   
 88 |   point2 <- point1 <- apply(Z, 2, quantile, q.fixed)
 89 |   point2[whichz] <- apply(Z[, whichz, drop = FALSE], 2, quantile, qs.diff[2])
 90 |   point1[whichz] <- apply(Z[, whichz, drop = FALSE], 2, quantile, qs.diff[1])
 91 |   # point1 <- makePoint(whichz, Z, qs.diff[1], q.fixed)
 92 |   # point2 <- makePoint(whichz, Z, qs.diff[2], q.fixed)
 93 |   if (method %in% c("approx", "exact")) {
 94 |     preds.fun <- function(znew) ComputePostmeanHnew(fit = fit, y = y, Z = Z, X = X, Znew = znew, sel = sel, method = method)
 95 |     riskSummary <- riskSummary.approx
 96 |   }  else {
 97 |     stop("method must be one of c('approx', 'exact')")
 98 |   }
 99 |   riskSummary(point1 = point1, point2 = point2, preds.fun = preds.fun, ...)
100 | }
101 | 
102 | #' Single Variable Risk Summaries
103 | #' 
104 | #' Compute summaries of the risks associated with a change in a single variable in \code{Z} from a single level (quantile) to a second level (quantile), for the other variables in \code{Z} fixed to a specific level (quantile)
105 | #' 
106 | #' @inheritParams kmbayes
107 | #' @inheritParams ExtractEsts
108 | #' @inheritParams OverallRiskSummaries
109 | #' @inherit ComputePostmeanHnew details
110 | #' @param qs.diff vector indicating the two quantiles \code{q_1} and \code{q_2} at which to compute \code{h(z_{q2}) - h(z_{q1})}
111 | #' @param q.fixed vector of quantiles at which to fix the remaining predictors in \code{Z}
112 | #' @param z.names optional vector of names for the columns of \code{z}
113 | #' @param ... other arguments to pass on to the prediction function
114 | #' @param which.z vector indicating which variables (columns of \code{Z}) for which the summary should be computed
115 | #' @export
116 | #' 
117 | #' @return a data frame containing the (posterior mean) estimate and posterior standard deviation of the single-predictor risk measures
118 | #' 
119 | #' @examples
120 | #' ## First generate dataset
121 | #' set.seed(111)
122 | #' dat <- SimData(n = 50, M = 4)
123 | #' y <- dat$y
124 | #' Z <- dat$Z
125 | #' X <- dat$X
126 | #' 
127 | #' ## Fit model with component-wise variable selection
128 | #' ## Using only 100 iterations to make example run quickly
129 | #' ## Typically should use a large number of iterations for inference
130 | #' set.seed(111)
131 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
132 | #' 
133 | #' risks.singvar <- SingVarRiskSummaries(fit = fitkm, method = "exact")
134 | SingVarRiskSummaries <- function(fit, y = NULL, Z = NULL, X = NULL, which.z = 1:ncol(Z), qs.diff = c(0.25, 0.75), q.fixed = c(0.25, 0.50, 0.75), method = "approx", sel = NULL, z.names = colnames(Z), ...) {
135 |   
136 |   if (inherits(fit, "bkmrfit")) {
137 |     if (is.null(y)) y <- fit$y
138 |     if (is.null(Z)) Z <- fit$Z
139 |     if (is.null(X)) X <- fit$X
140 |   }
141 |   
142 |   if(is.null(z.names)) z.names <- paste0("z", 1:ncol(Z))
143 |   
144 |   df <- dplyr::tibble()
145 |   for(i in seq_along(q.fixed)) {
146 |     for(j in seq_along(which.z)) {
147 |       risk <- VarRiskSummary(whichz = which.z[j], fit = fit, y = y, Z = Z, X = X, qs.diff = qs.diff, q.fixed = q.fixed[i], method = method, sel = sel, ...)
148 |       df0 <- dplyr::tibble(q.fixed = q.fixed[i], variable = z.names[j], est = risk["est"], sd = risk["sd"])
149 |       df <- dplyr::bind_rows(df, df0)
150 |     }
151 |   }
152 |   #df <- dplyr::mutate_(df, variable = ~factor(variable, levels = z.names[which.z]), q.fixed = ~as.factor(q.fixed))
153 |   df <- dplyr::mutate_at(df, "variable", function(x) factor(x, levels = z.names[which.z]))
154 |   df <- dplyr::mutate_at(df, "q.fixed", function(x) as.factor(x))
155 |   attr(df, "qs.diff") <- qs.diff
156 |   df
157 | }
158 | 
159 | SingVarIntSummary <- function(whichz = 1, fit, y = NULL, Z = NULL, X = NULL, qs.diff = c(0.25, 0.75), qs.fixed = c(0.25, 0.75), method = "approx", sel = NULL, ...) {
160 |   
161 |   if (inherits(fit, "bkmrfit")) {
162 |     if (is.null(y)) y <- fit$y
163 |     if (is.null(Z)) Z <- fit$Z
164 |     if (is.null(X)) X <- fit$X
165 |   }
166 |   
167 |   q.fixed <- qs.fixed[1]
168 |   point2 <- point1 <- apply(Z, 2, quantile, q.fixed)
169 |   point2[whichz] <- quantile(Z[, whichz], qs.diff[2])
170 |   point1[whichz] <- quantile(Z[, whichz], qs.diff[1])
171 |   newz.q1 <- rbind(point1, point2)
172 |   
173 |   q.fixed <- qs.fixed[2]
174 |   point2 <- point1 <- apply(Z, 2, quantile, q.fixed)
175 |   point2[whichz] <- quantile(Z[, whichz], qs.diff[2])
176 |   point1[whichz] <- quantile(Z[, whichz], qs.diff[1])
177 |   newz.q2 <- rbind(point1, point2)
178 |   
179 |   if (method %in% c("approx", "exact")) {
180 |     preds.fun <- function(znew) ComputePostmeanHnew(fit = fit, y = y, Z = Z, X = X, Znew = znew, sel = sel, method = method)
181 |     interactionSummary <- interactionSummary.approx
182 |   } else {
183 |     stop("method must be one of c('approx', 'exact')")
184 |   }
185 |   interactionSummary(newz.q1, newz.q2, preds.fun, ...)
186 | }
187 | 
188 | #' Single Variable Interaction Summaries
189 | #' 
190 | #' Compare the single-predictor health risks when all of the other predictors in Z are fixed to their a specific quantile to when all of the other predictors in Z are fixed to their a second specific quantile.
191 | #' @inheritParams kmbayes
192 | #' @inheritParams ExtractEsts
193 | #' @inheritParams SingVarRiskSummaries
194 | #' @inherit ComputePostmeanHnew details
195 | #' @param qs.diff vector indicating the two quantiles at which to compute the single-predictor risk summary
196 | #' @param qs.fixed vector indicating the two quantiles at which to fix all of the remaining exposures in \code{Z}
197 | #' @export
198 | #' 
199 | #' @return a data frame containing the (posterior mean) estimate and posterior standard deviation of the single-predictor risk measures
200 | #' 
201 | #' @examples
202 | #' ## First generate dataset
203 | #' set.seed(111)
204 | #' dat <- SimData(n = 50, M = 4)
205 | #' y <- dat$y
206 | #' Z <- dat$Z
207 | #' X <- dat$X
208 | #' 
209 | #' ## Fit model with component-wise variable selection
210 | #' ## Using only 100 iterations to make example run quickly
211 | #' ## Typically should use a large number of iterations for inference
212 | #' set.seed(111)
213 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
214 | #' 
215 | #' risks.int <- SingVarIntSummaries(fit = fitkm, method = "exact")
216 | SingVarIntSummaries <- function(fit, y = NULL, Z = NULL, X = NULL, which.z = 1:ncol(Z), qs.diff = c(0.25, 0.75), qs.fixed = c(0.25, 0.75), method = "approx", sel = NULL, z.names = colnames(Z), ...) {
217 |   
218 |   if (inherits(fit, "bkmrfit")) {
219 |     if (is.null(y)) y <- fit$y
220 |     if (is.null(Z)) Z <- fit$Z
221 |     if (is.null(X)) X <- fit$X
222 |   }
223 |   
224 |   if(is.null(z.names)) z.names <- paste0("z", 1:ncol(Z))
225 |   
226 |   ints <- sapply(which.z, function(whichz)
227 |     SingVarIntSummary(whichz = whichz, fit = fit, Z = Z, X = X, y = y, qs.diff = qs.diff, qs.fixed = qs.fixed, method, sel = sel, ...)
228 |   )
229 |   
230 |   df <- dplyr::tibble(variable = factor(z.names[which.z], levels = z.names), est = ints["est", ], sd = ints["sd", ])
231 | }
232 | 
233 | 
234 | 
235 | 


--------------------------------------------------------------------------------
/my-doc/probit_reg.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Probit Bayesian kernel machine regression for binary outcomes"
  3 | author: "Jennifer F. Bobb"
  4 | date: "`r Sys.Date()`"
  5 | output: 
  6 |   html_document:
  7 |     theme: cosmo
  8 |     toc: true
  9 |     toc_float: true
 10 | ---
 11 | 
 12 | ```{r options, echo=FALSE, message=FALSE, warning=FALSE}
 13 | ## if the current working directory is the directory where this file is located rather than the project directory, set the working directory to the project directory
 14 | knitr::opts_chunk$set(fig.width = 5, fig.height = 3, message = FALSE)
 15 | ```
 16 | 
 17 | In this document, we demonstrate how to apply Bayesian kernel machine regression (BKMR) for binary outcomes using the `bkmr` R package. See the [overview guide](https://jenfb.github.io/bkmr/overview.html) for additional information about BKMR, including guided examples for continuous outcomes.
 18 | 
 19 | ## Probit kernel machine regression
 20 | 
 21 | We implement kernel machine regression (KMR) for binary outcomes,  
 22 | 
 23 | $$
 24 | \Phi^{-1}(P(Y_i = 1)) = h(z_{i1}, \ldots, z_{iM}) + \beta{\bf x}_i, \quad i = 1, \ldots, n
 25 | $$
 26 | where $\Phi$ is the cummulative distribution function (CDF) for the standard normal distribution ($\Phi^{-1}$ is the probit link function), the outcome $Y_i$ is a binary (0/1) variable, $h$ is a flexible function of the predictor variables $z_{i1}, \ldots, z_{iM}$, and ${\bf x}$ is a vector of covariates ($\beta$ is the corresponding vector of coefficients). We will refer to the predictors ${\bf z}$ as exposure variables and to $h(\cdot)$ as the exposure-response function. The function $h$ is modeled using a kernel machine representation that can capture complex, non-linear and non-additive, exposure-response relationships.
 27 | 
 28 | We implement probit, rather than logistic regression, primarily for reasons of computational convenience and efficiency for Bayesien inference using Gibbs sampling. In particular, for this we note that the probit  model above can be reexpressed by incorporating a latent normal random variable ($Y^*$), as 
 29 | $$
 30 | Y_i^* = h(z_{i1}, \ldots, z_{iM}) + \beta{\bf x}_i + \epsilon_i, \quad i = 1, \ldots, n
 31 | $$
 32 | where $\epsilon_i ~ \mbox{N}(0,1)$ and $Y_i = I(Y_i^* > 0)$ is equal to 1 if $Y_i^* > 0$ and is equal to 0 otherwise. In our example below we will demonstrate how the exposure-response function $h$ can be interpreted under the probit regression model.
 33 | 
 34 | ## Example
 35 | 
 36 | First, load the R package.
 37 | ```{r load package 0, echo=FALSE, eval=TRUE, message=FALSE, warning=FALSE, results='hide'}
 38 | suppressMessages(library(bkmr))
 39 | ##suppressMessages(devtools::document())
 40 | ##devtools::load_all()
 41 | ```
 42 | ```{r load package 1, eval=FALSE, message=FALSE, warning=FALSE}
 43 | library("bkmr")
 44 | ```
 45 | 
 46 | ### Generate data
 47 | 
 48 | Let's consider a simple example with outcome data are generating under the probit model above, where the true exposure-response function only depends on a single exposure variable. 
 49 | 
 50 | ```{r simdata}
 51 | set.seed(123)
 52 | n <- 200 ## number of observations
 53 | M <- 4 ## number of exposure variables
 54 | beta.true <- 0.1
 55 | Z <- matrix(runif(n * M, -1, 1), n, M)
 56 | x <- 3*cos(Z[, 1]) + 2*rnorm(n)
 57 | hfun <- function(z) (2*z + 0.5) ^ 2
 58 | h <- hfun(Z[, 1]) ## only depends on z1
 59 | 
 60 | ## generate using latent normal representation
 61 | eps <- rnorm(n)
 62 | ystar <- x * beta.true + h + eps
 63 | y <- ifelse(ystar > 0, 1, 0)
 64 | 
 65 | datp <- list(n = n, M = M, beta.true = beta.true, Z = Z, h = h, X = cbind(x), y = y, eps = eps, ystar = ystar)
 66 | rm(n, M, beta.true, Z, x, h, eps, y, ystar)
 67 | ```
 68 | 
 69 | Let's view the true exposure-response function used to generate the data.
 70 | ```{r plot, fig.height=3.5, fig.width=3.5}
 71 | curve(hfun, from = min(datp$Z[, 1]), max(datp$Z[, 1]),
 72 |       xlab = expression(z[1]), ylab = expression(h(z[1])))
 73 | ```
 74 | 
 75 | ### Fit BKMR 
 76 | 
 77 | To fit the BKMR model, we use the `kmbayes` function. 
 78 | ```{r fit orig, eval=FALSE}
 79 | set.seed(123)
 80 | fitpr <- kmbayes(y = datp$y, Z = datp$Z, X = datp$X, 
 81 |                  iter = 10000, verbose = FALSE, 
 82 |                  varsel = TRUE, family = "binomial",
 83 |                  control.params = list(r.jump2 = 0.5))
 84 | ```
 85 | ```{r fit orig load, echo=FALSE}
 86 | DIR <- ifelse(grepl("my-doc", getwd()), getwd(), paste(getwd(), "my-doc", sep = "/"))
 87 | load(paste(DIR, "probit_reg.RData", sep = "/"))
 88 | rm(DIR)
 89 | ```
 90 | The argument `family` indicates the outcome distribution, which is currently implemented for 'gaussian' and 'binomial'. Note that here we changed the tuning parameter `r.jump2` of the Metropolis-Hastings algorithm for updating the $r_m$ parameters under variable selection to get an improved acceptance rate of `r 100*with(fitpr, round(mean(acc.rdelta[move.type == 2]), 2))`% versus ~65% under the default tuning parameters (details of the tuning parameters are in the [overview guide](https://jenfb.github.io/bkmr/overview.html)).
 91 | 
 92 | ```{r summary}
 93 | summary(fitpr)
 94 | ```
 95 | 
 96 | ## Interpretting output
 97 | 
 98 | ### On probit scale
 99 | 
100 | We may wish to interpret the estimated $h$ function directly. We note that $h$ quantifies the relationship between the exposures and the (probit of the) probability of an event ($Y = 1$), holding the covariates ${\bf x}$ fixed. By considering the latent normal formulation above, $h$ may alternatively be interpreted as the relationship between the exposures and some underlying, continuous latent variable $Y^*$. For example, if $Y$ is an indicator variable for whether an individual has a particular health outcome, $Y^*$ could be interpreted as a latent health marker of health status.
101 | 
102 | Let's investigate the estimated exposure-response function $h$. Here we plot the univariate relationship h($z_m$), where all of the other exposures are fixed to their median values.
103 | 
104 | ```{r pred-resp}
105 | pred.resp.univar <- PredictorResponseUnivar(fit = fitpr, method = "exact")
106 | ```
107 | We use the `ggplot2` package to plot the resulting cross section of $h$.
108 | ```{r plot pred-resp, fig.height=4, fig.width=6}
109 | library(ggplot2)
110 | ggplot(pred.resp.univar, aes(z, est, ymin = est - 1.96*se, ymax = est + 1.96*se)) + 
111 |     geom_smooth(stat = "identity") + 
112 |     facet_wrap(~ variable) +
113 |   ylab("h(z)")
114 | ```
115 | 
116 | As expected based on small posterior inclusion probabilities for $z_2$, $z_3$, and $z_4$, there is no association between these exposures and the outcome, which matches the true data generating distribution. We next compare the estimated exposure response function for $z_1$ estimated under BKMR with that estimated by a probit model assuming linear terms of each of the exposure variables, as well as with an 'oracle' probit model that knows the true form of the exposure-response function, fitted using maximum likelihood:
117 | 
118 | ```{r probit}
119 | z1 <- datp$Z[, 1]
120 | x <- drop(datp$X)
121 | oracle <- glm(y ~ z1 + I(z1^2) + x, family = binomial(link = "probit"), data = datp)
122 | lin <- glm(y ~ Z + x, family = binomial(link = "probit"), data = datp)
123 | 
124 | ## predictions under the oracle model
125 | z1_grid <- seq(min(datp$Z[, 1]), max(datp$Z[, 1]), length.out = 50)
126 | hpred_oracle <- predict(oracle, newdata = data.frame(z1 = z1_grid, x = 0), se.fit = TRUE)
127 | 
128 | ## predictions under BKMR
129 | Znew <- cbind(z1 = z1_grid, z2 = median(datp$Z[, 2]), 
130 |               z3 = median(datp$Z[, 3]), z4 = median(datp$Z[, 4]))
131 | hpred_bkmr <- ComputePostmeanHnew(fit = fitpr, Znew = Znew, method = "exact")
132 | 
133 | ## predictions under the model with linear terms
134 | hpred_lin <- predict(lin, newdata = list(Z = Znew, x = rep(0, nrow(Znew))), se.fit = TRUE)
135 | ```
136 | 
137 | Now let's compare the estimated exposure-response functions $h(z_1)$.
138 | ```{r compare, fig.height=4, fig.width=6}
139 | plot(z1_grid, hpred_bkmr$postmean, type = "l", 
140 |      ylim = c(0.95*min(datp$h), max(datp$h)), 
141 |      xlab = expression(z[1]), ylab = expression(h(z[1])))
142 | lines(z1_grid, hpred_oracle$fit, col = "red", lty = 2, lwd = 2)
143 | lines(z1_grid, hfun(z1_grid), col = "blue", lty = 3, lwd = 2)
144 | lines(z1_grid, hpred_lin$fit, col = "orange", lty = 4, lwd = 2)
145 | legend(-1, 6, c("BKMR", "oracle", "truth", "linear"), lwd = 2, 
146 |        col = c("black", "red", "blue", "orange"), lty = 1:4, 
147 |        y.intersp = 0.8)
148 | ```
149 | 
150 | As expected, we see that the BKMR fit performs better than the model assuming a linear exposure-response relationship, but not as well as the oracle model. 
151 | 
152 | ### On probability scale
153 | 
154 | Alternatively, we may wish to interpret the association between the exposures and the (untransformed) probility of the outcome. For this we observe 
155 | $$
156 | P(Y = 1 \mid {\bf z}, {\bf x}) = \Phi\{h(z_{1}, \ldots, z_{M}) + \beta{\bf x}\},
157 | $$
158 | so that the probability of the event depends not just on $h$, but also on the particular values of the covariates ${\bf x}$. Thus, to estimate the association between $h$ and the probability of the outcome, we must either fix the covariates or integrate over them. Posterior samples of the predicted probabilities may be obtained using the `SamplePred` function, in which the user specifies the new Z matrix at which to obtain predictions, as well as a particular value of the vector ${\bf x}$. Here we plot the posterior mean of the predicted probabilities as a function of $z_1$ for particular, fixed values of the covariates and for the other exposures set to zero.
159 | 
160 | ```{r pred, fig.height=3.5, fig.width=7}
161 | Xnew1 <- quantile(datp$X, 0.1)
162 | Xnew2 <- quantile(datp$X, 0.9)
163 | 
164 | ptrue1 <- with(datp, pnorm(hfun(z1_grid) + Xnew1*beta.true))
165 | ptrue2 <- with(datp, pnorm(hfun(z1_grid) + Xnew2*beta.true))
166 | 
167 | pred_samps1 <- SamplePred(fit = fitpr, Znew = Znew, Xnew = Xnew1, type = "response")
168 | pred_samps2 <- SamplePred(fit = fitpr, Znew = Znew, Xnew = Xnew2, type = "response")
169 | pred_ests1 <- colMeans(pred_samps1)
170 | pred_ests2 <- colMeans(pred_samps2)
171 | 
172 | par(mfrow = c(1, 2))
173 | plot(z1_grid, pred_ests1, type = "l", ylim = range(ptrue1),
174 |      xlab = expression(z[1]), ylab = expression(P(Y == 1)))
175 | lines(z1_grid, ptrue1, col = "blue", lty = 3, lwd = 2)
176 | legend("bottomright", c("BKMR", "truth"), lwd = 2, 
177 |        col = c("black", "blue"), lty = c(1,3), 
178 |        y.intersp = 0.8, cex = 0.8)
179 | plot(z1_grid, pred_ests2, type = "l", ylim = range(ptrue1),
180 |      xlab = expression(z[1]), ylab = expression(P(Y == 1)))
181 | lines(z1_grid, ptrue2, col = "blue", lty = 3, lwd = 2)
182 | ```
183 | 
184 | 
185 | To integrate over the covariates, we use the fact that, under the assumed probit model, the probability of the outcome given the exposure variables ${\bf z}$ may be expressed as
186 | $$
187 | P(Y = 1 \mid {\bf z})
188 | = \mbox{E}[P(Y = 1 \mid {\bf z}, {\bf x}) \mid {\bf z}]
189 | = \mbox{E}[\Phi\{h(z_{1}, \ldots, z_{M}) + \beta{\bf x}\} \mid z_{1}, \ldots, z_{M}].
190 | $$
191 | We estimate this quantity as 
192 | $$
193 | \frac{1}{n}\sum_{i = 1}^n\Phi\{h(z_{1}, \ldots, z_{M}) + \beta{\bf x_i}\}.
194 | $$
195 | Currently, built-in functions to do this have not yet been implemented.
196 | 
197 | 
198 | 
199 | 
200 | 
201 | 
202 | 
203 | 
204 | 
205 | 
206 | 
207 | 
208 | 
209 | 
210 | 
211 | 
212 | 
213 | 
214 | 
215 | 
216 | 
217 | 
218 | 


--------------------------------------------------------------------------------
/R/PredictorResponseFunctions.R:
--------------------------------------------------------------------------------
  1 | PredictorResponseUnivarVar <- function(whichz = 1, fit, y, Z, X, method = "approx", ngrid = 50, q.fixed = 0.5, sel = NULL, min.plot.dist = Inf, center = TRUE, z.names = colnames(Z), ...) {
  2 | 
  3 |     if (ncol(Z) < 2) stop("requires there to be at least 2 predictor variables")
  4 | 
  5 |     if (is.null(z.names)) {
  6 |         colnames(Z) <- paste0("z", 1:ncol(Z))
  7 |     } else {
  8 |         colnames(Z) <- z.names
  9 |     }
 10 | 
 11 |     ord <- c(whichz, setdiff(1:ncol(Z), whichz))
 12 |     z1 <- seq(min(Z[,ord[1]]), max(Z[,ord[1]]), length = ngrid)
 13 |     z.others <- lapply(2:ncol(Z), function(x) quantile(Z[,ord[x]], q.fixed))
 14 |     z.all <- c(list(z1), z.others)
 15 |     newz.grid <- expand.grid(z.all)
 16 |     colnames(newz.grid) <- colnames(Z)[ord]
 17 |     newz.grid <- newz.grid[,colnames(Z)]
 18 | 
 19 |     if (!is.null(min.plot.dist)) {
 20 |         mindists <- rep(NA,nrow(newz.grid))
 21 |         for (i in seq_along(mindists)) {
 22 |             pt <- as.numeric(newz.grid[i, colnames(Z)[ord[1]]])
 23 |             dists <- fields::rdist(matrix(pt, nrow = 1), Z[, colnames(Z)[ord[1]]])
 24 |             mindists[i] <- min(dists)
 25 |         }
 26 |     }
 27 | 
 28 |     if (method %in% c("approx", "exact")) {
 29 |       preds <- ComputePostmeanHnew(fit = fit, y = y, Z = Z, X = X, Znew = newz.grid, sel = sel, method = method)
 30 |       preds.plot <- preds$postmean
 31 |       se.plot <- sqrt(diag(preds$postvar))
 32 |     } else {
 33 |       stop("method must be one of c('approx', 'exact')")
 34 |     }
 35 |     if(center) preds.plot <- preds.plot - mean(preds.plot)
 36 |     if(!is.null(min.plot.dist)) {
 37 |         preds.plot[mindists > min.plot.dist] <- NA
 38 |         se.plot[mindists > min.plot.dist] <- NA
 39 |     }
 40 | 
 41 |     res <- dplyr::tibble(z = z1, est = preds.plot, se = se.plot)
 42 | }
 43 | 
 44 | #' Plot univariate predictor-response function on a new grid of points
 45 | #' 
 46 | #' Plot univariate predictor-response function on a new grid of points
 47 | #' 
 48 | #' @inheritParams kmbayes
 49 | #' @inheritParams ExtractEsts
 50 | #' @inheritParams SingVarRiskSummaries
 51 | #' 
 52 | #' @param which.z vector identifying which predictors (columns of \code{Z}) should be plotted
 53 | #' @param ngrid number of grid points to cover the range of each predictor (column in \code{Z})
 54 | #' @param min.plot.dist specifies a minimum distance that a new grid point needs to be from an observed data point in order to compute the prediction; points further than this will not be computed
 55 | #' @param center flag for whether to scale the exposure-response function to have mean zero
 56 | #' @details For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html}
 57 | #'
 58 | #' @export
 59 | #' 
 60 | #' @return a long data frame with the predictor name, predictor value, posterior mean estimate, and posterior standard deviation
 61 | #' 
 62 | #' @examples
 63 | #' ## First generate dataset
 64 | #' set.seed(111)
 65 | #' dat <- SimData(n = 50, M = 4)
 66 | #' y <- dat$y
 67 | #' Z <- dat$Z
 68 | #' X <- dat$X
 69 | #' 
 70 | #' ## Fit model with component-wise variable selection
 71 | #' ## Using only 100 iterations to make example run quickly
 72 | #' ## Typically should use a large number of iterations for inference
 73 | #' set.seed(111)
 74 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
 75 | #' pred.resp.univar <- PredictorResponseUnivar(fit = fitkm)
 76 | PredictorResponseUnivar <- function(fit, y = NULL, Z = NULL, X = NULL, which.z = 1:ncol(Z), method = "approx", ngrid = 50, q.fixed = 0.5, sel = NULL, min.plot.dist = Inf, center = TRUE, z.names = colnames(Z), ...) {
 77 |   
 78 |   if (inherits(fit, "bkmrfit")) {
 79 |     y <- fit$y
 80 |     Z <- fit$Z
 81 |     X <- fit$X
 82 |   }
 83 | 
 84 |   if (is.null(z.names)) {
 85 |     z.names <- paste0("z", 1:ncol(Z))
 86 |   }
 87 |   
 88 |   df <- dplyr::tibble()
 89 |   for(i in which.z) {
 90 |     res <- PredictorResponseUnivarVar(whichz = i, fit = fit, y = y, Z = Z, X = X, method = method, ngrid = ngrid, q.fixed = q.fixed, sel = sel, min.plot.dist = min.plot.dist, center = center, z.names = z.names, ...)
 91 |     #df0 <- dplyr::mutate(res, variable = z.names[i]) %>%
 92 |     #  dplyr::select_(~variable, ~z, ~est, ~se)
 93 |     df0 <- dplyr::mutate(res, variable = z.names[i]) %>%
 94 |       dplyr::select_at(c("variable", "z", "est", "se"))
 95 |     df <- dplyr::bind_rows(df, df0)
 96 |   }
 97 |   df$variable <- factor(df$variable, levels = z.names[which.z])
 98 |   df
 99 | }
100 | 
101 | 
102 | 
103 | #' Plot bivariate predictor-response function on a new grid of points
104 | #'
105 | #' @inheritParams kmbayes
106 | #' @inheritParams ExtractEsts
107 | #' @inheritParams SingVarRiskSummaries
108 | #' @inheritParams PredictorResponseUnivar
109 | #' @param whichz1 vector identifying the first predictor that (column of \code{Z}) should be plotted
110 | #' @param whichz2 vector identifying the second predictor that (column of \code{Z}) should be plotted
111 | #' @param whichz3 vector identifying the third predictor that will be set to a pre-specified fixed quantile (determined by \code{prob})
112 | #' @param prob pre-specified quantile to set the third predictor (determined by \code{whichz3}); defaults to 0.5 (50th percentile)
113 | #'
114 | #' @export
115 | #' 
116 | #' @return a data frame with value of the first predictor, the value of the second predictor, the posterior mean estimate, and the posterior standard deviation
117 | #' 
118 | #' @examples
119 | #' ## First generate dataset
120 | #' set.seed(111)
121 | #' dat <- SimData(n = 50, M = 4)
122 | #' y <- dat$y
123 | #' Z <- dat$Z
124 | #' X <- dat$X
125 | #' 
126 | #' ## Fit model with component-wise variable selection
127 | #' ## Using only 100 iterations to make example run quickly
128 | #' ## Typically should use a large number of iterations for inference
129 | #' set.seed(111)
130 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
131 | #' 
132 | #' ## Obtain predicted value on new grid of points
133 | #' ## Using only a 10-by-10 point grid to make example run quickly
134 | #' pred.resp.bivar12 <- PredictorResponseBivarPair(fit = fitkm, min.plot.dist = 1, ngrid = 10)
135 | PredictorResponseBivarPair <- function(fit, y = NULL, Z = NULL, X = NULL, whichz1 = 1, whichz2 = 2, whichz3 = NULL, method = "approx", prob = 0.5, q.fixed = 0.5, sel = NULL, ngrid = 50, min.plot.dist = 0.5, center = TRUE, ...) {
136 |   
137 |   if (inherits(fit, "bkmrfit")) {
138 |     if (is.null(y)) y <- fit$y
139 |     if (is.null(Z)) Z <- fit$Z
140 |     if (is.null(X)) X <- fit$X
141 |   }
142 |   
143 |     if(ncol(Z) < 3) stop("requires there to be at least 3 Z variables")
144 | 
145 |     if(is.null(colnames(Z))) colnames(Z) <- paste0("z", 1:ncol(Z))
146 | 
147 |     if(is.null(whichz3)) {
148 |         ord <- c(whichz1, whichz2, setdiff(1:ncol(Z), c(whichz1, whichz2)))
149 |     } else {
150 |         ord <- c(whichz1, whichz2, whichz3, setdiff(1:ncol(Z), c(whichz1, whichz2, whichz3)))
151 |     }
152 |     z1 <- seq(min(Z[,ord[1]]), max(Z[,ord[1]]), length=ngrid)
153 |     z2 <- seq(min(Z[,ord[2]]), max(Z[,ord[2]]), length=ngrid)
154 |     z3 <- quantile(Z[, ord[3]], probs = prob)
155 |     z.all <- c(list(z1), list(z2), list(z3))
156 |     if(ncol(Z) > 3) {
157 |         z.others <- lapply(4:ncol(Z), function(x) quantile(Z[,ord[x]], q.fixed))
158 |         z.all <- c(z.all, z.others)
159 |     }
160 |     newz.grid <- expand.grid(z.all)
161 |     z1save <- newz.grid[, 1]
162 |     z2save <- newz.grid[, 2]
163 |     colnames(newz.grid) <- colnames(Z)[ord]
164 |     newz.grid <- newz.grid[,colnames(Z)]
165 | 
166 |     if(!is.null(min.plot.dist)) {
167 |         mindists <- rep(NA, nrow(newz.grid))
168 |         for(k in seq_along(mindists)) {
169 |             pt <- as.numeric(newz.grid[k,c(colnames(Z)[ord[1]],colnames(Z)[ord[2]])])
170 |             dists <- fields::rdist(matrix(pt, nrow = 1), Z[, c(colnames(Z)[ord[1]],colnames(Z)[ord[2]])])
171 |             mindists[k] <- min(dists)
172 |         }
173 |     }
174 | 
175 |     if (method %in% c("approx", "exact")) {
176 |       preds <- ComputePostmeanHnew(fit = fit, y = y, Z = Z, X = X, Znew = newz.grid, sel = sel, method = method)
177 |       preds.plot <- preds$postmean
178 |       se.plot <- sqrt(diag(preds$postvar))
179 |     } else {
180 |       stop("method must be one of c('approx', 'exact')")
181 |     }
182 |     if(center) preds.plot <- preds.plot - mean(preds.plot)
183 |     if(!is.null(min.plot.dist)) {
184 |         preds.plot[mindists > min.plot.dist] <- NA
185 |         se.plot[mindists > min.plot.dist] <- NA
186 |     }
187 | #     hgrid <- matrix(preds.plot, ngrid, ngrid, dimnames=list(z1=round(z1,2), z2=round(z2,2)))
188 | #     se.grid <- matrix(se.plot, ngrid, ngrid, dimnames=list(z1=round(z1,2), z2=round(z2,2)))
189 | 
190 |     res <- dplyr::tibble(z1 = z1save, z2 = z2save, est = preds.plot, se = se.plot)
191 | }
192 | 
193 | #' Predict the exposure-response function at a new grid of points
194 | #'
195 | #' Predict the exposure-response function at a new grid of points
196 | #'
197 | #' @inheritParams kmbayes
198 | #' @inheritParams ExtractEsts
199 | #' @inheritParams SingVarRiskSummaries
200 | #' @inheritParams PredictorResponseUnivar
201 | #' @param z.pairs data frame showing which pairs of predictors to plot
202 | #' @param ngrid number of grid points in each dimension
203 | #' @param verbose TRUE or FALSE: flag of whether to print intermediate output to the screen
204 | #' @details For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html}
205 | #' @export
206 | #' 
207 | #' @return a long data frame with the name of the first predictor, the name of the second predictor, the value of the first predictor, the value of the second predictor, the posterior mean estimate, and the posterior standard deviation of the estimated exposure response function
208 | #' 
209 | #' @examples
210 | #' ## First generate dataset
211 | #' set.seed(111)
212 | #' dat <- SimData(n = 50, M = 4)
213 | #' y <- dat$y
214 | #' Z <- dat$Z
215 | #' X <- dat$X
216 | #' 
217 | #' ## Fit model with component-wise variable selection
218 | #' ## Using only 100 iterations to make example run quickly
219 | #' ## Typically should use a large number of iterations for inference
220 | #' set.seed(111)
221 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
222 | #' 
223 | #' ## Obtain predicted value on new grid of points for each pair of predictors
224 | #' ## Using only a 10-by-10 point grid to make example run quickly
225 | #' pred.resp.bivar <- PredictorResponseBivar(fit = fitkm, min.plot.dist = 1, ngrid = 10)
226 | #' 
227 | PredictorResponseBivar <- function(fit, y = NULL, Z = NULL, X = NULL, z.pairs = NULL, method = "approx", ngrid = 50, q.fixed = 0.5, sel = NULL, min.plot.dist = 0.5, center = TRUE, z.names = colnames(Z), verbose = TRUE, ...) {
228 |   
229 |   if (inherits(fit, "bkmrfit")) {
230 |     if (is.null(y)) y <- fit$y
231 |     if (is.null(Z)) Z <- fit$Z
232 |     if (is.null(X)) X <- fit$X
233 |   }
234 |   
235 |   if (is.null(z.names)) {
236 |     z.names <- colnames(Z)
237 |     if (is.null(z.names)) {
238 |       z.names <- paste0("z", 1:ncol(Z))
239 |     }
240 |   }
241 |   
242 |   if (is.null(z.pairs)) {
243 |     z.pairs <- expand.grid(z1 = 1:ncol(Z), z2 = 1:ncol(Z))
244 |     z.pairs <- z.pairs[z.pairs$z1 < z.pairs$z2, ]
245 |   }
246 |   
247 |   df <- dplyr::tibble()
248 |   for(i in 1:nrow(z.pairs)) {
249 |     compute <- TRUE
250 |     whichz1 <- z.pairs[i, 1] %>% unlist %>% unname
251 |     whichz2 <- z.pairs[i, 2] %>% unlist %>% unname
252 |     if(whichz1 == whichz2) compute <- FALSE
253 |     z.name1 <- z.names[whichz1]
254 |     z.name2 <- z.names[whichz2]
255 |     names.pair <- c(z.name1, z.name2)
256 |     if(nrow(df) > 0) { ## determine whether the current pair of variables has already been done
257 |       completed.pairs <- df %>%
258 |         #dplyr::select_('variable1', 'variable2') %>%
259 |         dplyr::select_at(c('variable1', 'variable2')) %>%
260 |         dplyr::distinct() %>%
261 |         dplyr::transmute(z.pair = paste('variable1', 'variable2', sep = ":")) %>%
262 |         unlist %>% unname
263 |       if(paste(names.pair, collapse = ":") %in% completed.pairs | paste(rev(names.pair), collapse = ":") %in% completed.pairs) compute <- FALSE
264 |     }
265 |     if(compute) {
266 |       if(verbose) message("Pair ", i, " out of ", nrow(z.pairs))
267 |       res <- PredictorResponseBivarPair(fit = fit, y = y, Z = Z, X = X, whichz1 = whichz1, whichz2 = whichz2, method = method, ngrid = ngrid, q.fixed = q.fixed, sel = sel, min.plot.dist = min.plot.dist, center = center, z.names = z.names, ...)
268 |       df0 <- res
269 |       df0$variable1 <- z.name1
270 |       df0$variable2 <- z.name2
271 |       df0 %<>%
272 |         #dplyr::select_(~variable1, ~variable2, ~z1, ~z2, ~est, ~se)
273 |         dplyr::select_at(c("variable1", "variable2", "z1", "z2", "est", "se"))
274 |       df <- dplyr::bind_rows(df, df0)
275 |     }
276 |   }
277 |   df$variable1 <- factor(df$variable1, levels = z.names)
278 |   df$variable2 <- factor(df$variable2, levels = z.names)
279 |   df
280 | }
281 | 
282 | #' Plot cross-sections of the bivariate predictor-response function
283 | #' 
284 | #' Function to plot the \code{h} function of a particular variable at different levels (quantiles) of a second variable
285 | #' 
286 | #' @export
287 | #' @inheritParams kmbayes
288 | #' @inheritParams PredictorResponseBivar
289 | #' @param pred.resp.df object obtained from running the function \code{\link{PredictorResponseBivar}}
290 | #' @param qs vector of quantiles at which to fix the second variable
291 | #' @param both_pairs flag indicating whether, if \code{h(z1)} is being plotted for z2 fixed at different levels, that they should be plotted in the reverse order as well (for \code{h(z2)} at different levels of z1) 
292 | #' @details For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html}
293 | #' 
294 | #' @return a long data frame with the name of the first predictor, the name of the second predictor, the value of the first predictor, the quantile at which the second predictor is fixed, the posterior mean estimate, and the posterior standard deviation of the estimated exposure response function
295 | #'  
296 | #' @examples
297 | #' ## First generate dataset
298 | #' set.seed(111)
299 | #' dat <- SimData(n = 50, M = 4)
300 | #' y <- dat$y
301 | #' Z <- dat$Z
302 | #' X <- dat$X
303 | #' 
304 | #' ## Fit model with component-wise variable selection
305 | #' ## Using only 100 iterations to make example run quickly
306 | #' ## Typically should use a large number of iterations for inference
307 | #' set.seed(111)
308 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
309 | #' 
310 | #' ## Obtain predicted value on new grid of points for each pair of predictors
311 | #' ## Using only a 10-by-10 point grid to make example run quickly
312 | #' pred.resp.bivar <- PredictorResponseBivar(fit = fitkm, min.plot.dist = 1, ngrid = 10)
313 | #' pred.resp.bivar.levels <- PredictorResponseBivarLevels(pred.resp.df = pred.resp.bivar, 
314 | #' Z = Z, qs = c(0.1, 0.5, 0.9))
315 | PredictorResponseBivarLevels <- function(pred.resp.df, Z = NULL, qs = c(0.25, 0.5, 0.75), both_pairs = TRUE, z.names = NULL) {
316 |   #var.pairs <- dplyr::distinct(dplyr::select_(pred.resp.df, ~variable1, ~variable2))
317 |   var.pairs <- dplyr::distinct(dplyr::select_at(pred.resp.df, c("variable1", "variable2")))
318 |   if (both_pairs) {
319 |     var.pairs.rev <- dplyr::tibble(
320 |       variable1 = var.pairs$variable2,
321 |       
322 |       variable2 = var.pairs$variable1
323 |     )
324 |     var.pairs <- rbind(var.pairs, var.pairs.rev)
325 |   }
326 | 
327 |   if (is.null(z.names)) {
328 |     z.names <- colnames(Z)
329 |     if (is.null(z.names)) {
330 |       z.names <- paste0("z", 1:ncol(Z))
331 |       colnames(Z) <- z.names
332 |     }
333 |   }
334 |   
335 |   df <- data.frame()
336 |   for (i in 1:nrow(var.pairs)) {
337 |     var1 <- as.character(unlist(var.pairs[i, "variable1"]))
338 |     var2 <- as.character(unlist(var.pairs[i, "variable2"]))
339 |     preds <- pred.resp.df[pred.resp.df$variable1 == var1 & pred.resp.df$variable2 == var2, ]
340 |     if (nrow(preds) == 0) {
341 |       preds <- pred.resp.df[pred.resp.df$variable1 == var2 & pred.resp.df$variable2 == var1, ]
342 |       preds.rev <- dplyr::tibble(
343 |         variable1 = preds$variable2,
344 |         variable2 = preds$variable1,
345 |         z1 = preds$z2,
346 |         z2 = preds$z1,
347 |         est = preds$est,
348 |         se = preds$se
349 |       )
350 |       preds <- preds.rev
351 |       #preds <- dplyr::arrange_(preds, ~z2, ~z1)
352 |       preds <- dplyr::arrange_at(preds, c("z2", "z1"))
353 |     }
354 |     
355 |     ngrid <- sqrt(nrow(preds))
356 |     preds.plot <- preds$est
357 |     se.plot <- preds$se
358 |     
359 |     hgrid <- matrix(preds.plot, ngrid, ngrid)
360 |     se.grid <- matrix(se.plot, ngrid, ngrid)
361 |     z1 <- preds$z1[1:ngrid]
362 |     z2 <- preds$z2[seq(1, by = ngrid, length.out = ngrid)]
363 |     
364 |     quants <- quantile(Z[, var2], qs)
365 |     
366 |     ## relation of z1 with outcome at different levels of z2
367 |     se.grid.sub <- hgrid.sub <- matrix(NA, ngrid, length(qs))
368 |     for (k in seq_along(quants)) {
369 |       sub.sel <- which.min(abs(z2 - quants[k]))
370 |       hgrid.sub[, k] <- hgrid[, sub.sel]
371 |       se.grid.sub[, k] <- se.grid[, sub.sel]
372 |     }
373 |     colnames(hgrid.sub) <- colnames(se.grid.sub) <- paste0("q", seq_along(qs))
374 |     hgrid.df <- tidyr::gather(data.frame(hgrid.sub), quantile, 'est', convert = TRUE)
375 |     se.grid.df <- tidyr::gather(data.frame(se.grid.sub), quantile, 'se')
376 |     
377 |     df.curr <- data.frame(variable1 = var1, variable2 = var2, z1 = z1, quantile = factor(hgrid.df$quantile, labels = qs), est = hgrid.df$est, se = se.grid.df$se, stringsAsFactors = FALSE)
378 |     df <- rbind(df, df.curr)
379 |   }
380 |   df <- tibble::as_tibble(df) %>% #dplyr::tbl_df(df) %>%
381 |     #dplyr::arrange_(~variable1, ~variable2)
382 |     dplyr::arrange_at(c("variable1", "variable2"))
383 |   df
384 | }
385 | 


--------------------------------------------------------------------------------
/R/bkmr_parameter_update_functions.R:
--------------------------------------------------------------------------------
  1 | beta.update <- function(X, Vinv, y, sigsq.eps) {
  2 | 	XVinv <- crossprod(X, Vinv)
  3 | 	Vbeta <- chol2inv(chol(XVinv %*% X))
  4 | 	cholVbeta <- chol(Vbeta)
  5 | 	betahat <- Vbeta %*% XVinv %*% y
  6 | 	n01 <- rnorm(ncol(X))
  7 | 	betahat + crossprod(sqrt(sigsq.eps)*cholVbeta, n01)
  8 | }
  9 | 
 10 | sigsq.eps.update <- function(y, X, beta, Vinv, a.eps=1e-3, b.eps=1e-3) {
 11 | 	mu <- y - X%*%beta
 12 | 	prec.y <- rgamma(1, shape=a.eps + nrow(X)/2, rate=b.eps + 1/2*crossprod(mu, Vinv)%*%mu)
 13 | 	1/prec.y
 14 | }
 15 | 
 16 | ystar.update <- function(y, X, beta, h) {
 17 |   mu <-  drop(h + X %*% beta)
 18 |   lower <- ifelse(y == 1, 0, -Inf)
 19 |   upper <- ifelse(y == 0, 0,  Inf)
 20 |   samp <- truncnorm::rtruncnorm(1, a = lower, b = upper, mean = mu, sd = 1)
 21 |   drop(samp)
 22 | }
 23 | #' @importFrom tmvtnorm rtmvnorm
 24 | ystar.update.noh <- function(y, X, beta, Vinv, ystar) {
 25 |   mu <-  drop(X %*% beta)
 26 |   lower <- ifelse(y == 1, 0, -Inf)
 27 |   upper <- ifelse(y == 0, 0,  Inf)
 28 |   samp <- tmvtnorm::rtmvnorm(1, mean = mu, H = Vinv, lower = lower, upper = upper, algorithm = "gibbs", start.value = ystar)
 29 |   #samp <- truncnorm::rtruncnorm(1, a = lower, b = upper, mean = mu, sd = 1)
 30 |   drop(samp)
 31 | }
 32 | 
 33 | r.update <- function(r, whichcomp, delta, lambda, y, X, beta, sigsq.eps, Vcomps, Z, data.comps, control.params, rprop.gen, rprop.logdens, rprior.logdens, ...) {
 34 | 	# r.params <- set.r.params(r.prior = control.params$r.prior, comp = whichcomp, r.params = control.params$r.params)
 35 | 	r.params <- make_r_params_comp(control.params$r.params, whichcomp)
 36 | 	rcomp <- unique(r[whichcomp])
 37 | 	if(length(rcomp) > 1) stop("rcomp should only be 1-dimensional")
 38 | 	
 39 | 	## generate a proposal
 40 | 	rcomp.star <- rprop.gen(current = rcomp, r.params = r.params)
 41 | 	lambda.star <- lambda
 42 | 	delta.star <- delta
 43 | 	move.type <- NA
 44 | 
 45 | 	## part of M-H ratio that depends on the proposal distribution
 46 | 	negdifflogproposal <- -rprop.logdens(rcomp.star, rcomp, r.params = r.params) + rprop.logdens(rcomp, rcomp.star, r.params = r.params)
 47 | 
 48 | 	## prior distribution
 49 | 	diffpriors <- rprior.logdens(rcomp.star, r.params = r.params) - rprior.logdens(rcomp, r.params = r.params)
 50 | 
 51 | 	r.star <- r
 52 | 	r.star[whichcomp] <- rcomp.star
 53 | 
 54 | 	## M-H step
 55 | 	return(MHstep(r=r, lambda=lambda, lambda.star=lambda.star, r.star=r.star, delta=delta, delta.star=delta.star, y=y, X=X, Z=Z, beta=beta, sigsq.eps=sigsq.eps, diffpriors=diffpriors, negdifflogproposal=negdifflogproposal, Vcomps=Vcomps, move.type=move.type, data.comps=data.comps))
 56 | }
 57 | 
 58 | rdelta.comp.update <- function(r, delta, lambda, y, X, beta, sigsq.eps, Vcomps, Z, ztest, data.comps, control.params, rprop.gen2, rprop.logdens1, rprior.logdens, rprior.logdens2, rprop.logdens2, rprop.gen1, ...) { ## individual variable selection
 59 | 	r.params <- control.params$r.params
 60 | 	a.p0 <- control.params$a.p0
 61 | 	b.p0 <- control.params$b.p0
 62 | 	delta.star <- delta
 63 | 	r.star <- r
 64 | 
 65 | 	move.type <- ifelse(all(delta[ztest] == 0), 1, sample(c(1,2),1))
 66 | 	move.prob <- ifelse(all(delta[ztest] == 0), 1, 1/2)
 67 | 	if(move.type == 1) {
 68 | 		comp <- ifelse(length(ztest) == 1, ztest, sample(ztest, 1))
 69 | 		r.params <- set.r.params(r.prior = control.params$r.prior, comp = comp, r.params = r.params)
 70 | 
 71 | 		delta.star[comp] <- 1 - delta[comp]
 72 | 		move.prob.star <- ifelse(all(delta.star[ztest] == 0), 1, 1/2)
 73 | 		r.star[comp] <- ifelse(delta.star[comp] == 0, 0, rprop.gen1(r.params = r.params))
 74 | 
 75 | 		diffpriors <- (lgamma(sum(delta.star[ztest]) + a.p0) + lgamma(length(ztest) - sum(delta.star[ztest]) + b.p0) - lgamma(sum(delta[ztest]) + a.p0) - lgamma(length(ztest) - sum(delta[ztest]) + b.p0)) + ifelse(delta[comp] == 1, -1, 1)*with(list(r.sel = ifelse(delta[comp] == 1, r[comp], r.star[comp])), rprior.logdens(x = r.sel, r.params = r.params))
 76 | 
 77 |  		negdifflogproposal <- -log(move.prob.star) + log(move.prob) - ifelse(delta[comp] == 1, -1, 1)*with(list(r.sel = ifelse(delta[comp] == 1, r[comp], r.star[comp])), rprop.logdens1(x = r.sel, r.params = r.params))
 78 | 
 79 | 	} else if(move.type == 2) {
 80 | 		comp <- ifelse(length(which(delta == 1)) == 1, which(delta == 1), sample(which(delta == 1), 1))
 81 | 		r.params <- set.r.params(r.prior = control.params$r.prior, comp = comp, r.params = r.params)
 82 | 
 83 | 		r.star[comp] <- rprop.gen2(current = r[comp], r.params = r.params)
 84 | 
 85 | 		diffpriors <- rprior.logdens(r.star[comp], r.params = r.params) - rprior.logdens(r[comp], r.params = r.params)
 86 | 
 87 | 		negdifflogproposal <- -rprop.logdens2(r.star[comp], r[comp], r.params = r.params) + rprop.logdens2(r[comp], r.star[comp], r.params = r.params)
 88 | 	}
 89 | 
 90 | 	lambda.star <- lambda
 91 | 
 92 | 	## M-H step
 93 | 	return(MHstep(r=r, lambda=lambda, lambda.star=lambda.star, r.star=r.star, delta=delta, delta.star=delta.star, y=y, X=X, Z=Z, beta=beta, sigsq.eps=sigsq.eps, diffpriors=diffpriors, negdifflogproposal=negdifflogproposal, Vcomps=Vcomps, move.type=move.type, data.comps=data.comps))
 94 | }
 95 | 
 96 | rdelta.group.update <- function(r, delta, lambda, y, X, beta, sigsq.eps, Vcomps, Z, ztest, data.comps, control.params, rprop.gen1, rprior.logdens, rprop.logdens1, rprop.gen2, rprop.logdens2, ...) { ## grouped variable selection
 97 | 	r.params <- control.params$r.params
 98 | 	a.p0 <- control.params$a.p0
 99 | 	b.p0 <- control.params$b.p0
100 | 	groups <- control.params$group.params$groups
101 | 	sel.groups <- control.params$group.params$sel.groups
102 | 	neach.group <- control.params$group.params$neach.group
103 | 	delta.star <- delta
104 | 	r.star <- r
105 | 
106 | 	# if(length(mu.r) == 1) mu.r <- rep(mu.r, nz)
107 | 	# if(length(sigma.r) == 1) sigma.r <- rep(sigma.r, nz)
108 | 
109 | 	delta.source <- sapply(sel.groups, function(x) ifelse(any(delta[which(groups == groups[x])] == 1), 1, 0))
110 | 	delta.source.star <- delta.source
111 | 
112 | 	## randomly select move type
113 | 	if(all(delta.source == 0)) {
114 | 		move.type <- 1
115 | 		move.prob <- 1
116 | 	} else if(length(which(neach.group > 1 & delta.source == 1)) == 0) {
117 | 		move.type <- sample(c(1, 3), 1)
118 | 		move.prob <- 1/2
119 | 	} else {
120 | 		move.type <- sample(1:3, 1)
121 | 		move.prob <- 1/3
122 | 	}
123 | 	# move.type <- ifelse(all(delta.source == 0), 1, ifelse(length(which(neach.group > 1 & delta.source == 1)) == 0, sample(c(1, 3), 1), sample(1:3, 1)))
124 | 
125 | 	# print(move.type)
126 | 
127 | 	if(move.type == 1) { ## randomly select a source and change its state (e.g., from being in the model to not being in the model)
128 | 
129 | 		source <- sample(seq_along(delta.source), 1)
130 | 		source.comps <- which(groups == source)
131 | 
132 | 		# r.params <- set.r.params(r.prior = control.params$r.prior, comp = source.comps, r.params = r.params)
133 | 
134 | 		delta.source.star[source] <- 1 - delta.source[source]
135 | 		delta.star[source.comps] <- rmultinom(1, delta.source.star[source], rep(1/length(source.comps), length(source.comps)))
136 | 		move.prob.star <- ifelse(all(delta.source.star == 0), 1, ifelse(length(which(neach.group > 1 & delta.source.star == 1)) == 0, 1/2, 1/3))
137 | 
138 | 		## which component got switched
139 | 		comp <- ifelse(delta.source[source] == 1, source.comps[which(delta[source.comps] == 1)], source.comps[which(delta.star[source.comps] == 1)])
140 | 		r.params <- set.r.params(r.prior = control.params$r.prior, comp = comp, r.params = r.params)
141 | 
142 | 		r.star[comp] <- ifelse(delta.star[comp] == 0, 0, rprop.gen1(r.params = r.params))
143 | 
144 | 		# diffpriors <- ifelse(delta.source[source] == 1, log(length(sel.groups) - sum(delta.source) + b.p0) - log(sum(delta.source.star) + a.p0), log(sum(delta.source) + a.p0) - log(length(sel.groups) - sum(delta.source.star) + b.p0)) + ifelse(delta.source[source] == 1, 1, -1)*log(length(source.comps)) + ifelse(delta.source[source] == 1, -1, 1)*with(list(r.sel = ifelse(delta.source[source] == 1, r[source.comps][which(delta[source.comps] == 1)], r.star[source.comps][which(delta.star[source.comps] == 1)])), rprior.logdens(x = r.sel, r.params = r.params))
145 | 		diffpriors <- ifelse(delta.source[source] == 1, log(length(sel.groups) - sum(delta.source) + b.p0) - log(sum(delta.source.star) + a.p0), log(sum(delta.source) + a.p0) - log(length(sel.groups) - sum(delta.source.star) + b.p0)) + ifelse(delta.source[source] == 1, 1, -1)*log(length(source.comps)) + ifelse(delta.source[source] == 1, -1, 1)*with(list(r.sel = ifelse(delta.source[source] == 1, r[comp], r.star[comp])), rprior.logdens(x = r.sel, r.params = r.params))
146 | 
147 | 		# negdifflogproposal <- -log(move.prob.star) + log(move.prob) -ifelse(delta.source[source] == 1, 1, -1)*(log(length(source.comps)) - with(list(r.sel = ifelse(delta.source[source] == 1, r[source.comps][which(delta[source.comps] == 1)], r.star[source.comps][which(delta.star[source.comps] == 1)])), rprop.logdens1(x = r.sel, r.params = r.params)))
148 | 		negdifflogproposal <- -log(move.prob.star) + log(move.prob) -ifelse(delta.source[source] == 1, 1, -1)*(log(length(source.comps)) - with(list(r.sel = ifelse(delta.source[source] == 1, r[comp], r.star[comp])), rprop.logdens1(x = r.sel, r.params = r.params)))
149 | 
150 | 	} else if(move.type == 2) { ## randomly select a multi-component source that is in the model and change which component is included
151 | 
152 | 		tmp <- which(neach.group > 1 & delta.source == 1)
153 | 		source <- ifelse(length(tmp) == 1, tmp, sample(tmp, 1))
154 | 		source.comps <- which(groups == source)
155 | 
156 | 		oldcomp <- source.comps[delta[source.comps] == 1]
157 | 		tmp <- source.comps[delta[source.comps] == 0]
158 | 		comp <- ifelse(length(tmp) == 1, tmp, sample(tmp, 1))
159 | 
160 | 		r.params.oldcomp <- set.r.params(r.prior = control.params$r.prior, comp = oldcomp, r.params = r.params)
161 | 		r.params <- set.r.params(r.prior = control.params$r.prior, comp = comp, r.params = r.params)
162 | 
163 | 		delta.star[oldcomp] <- 0
164 | 		delta.star[comp] <- 1
165 | 
166 | 		r.star[oldcomp] <- 0
167 | 		r.star[comp] <- rprop.gen1(r.params = r.params)
168 | 
169 | 		diffpriors <- rprior.logdens(r.star[comp], r.params = r.params) - rprior.logdens(r[oldcomp], r.params = r.params.oldcomp)
170 | 
171 | 		negdifflogproposal <- -rprop.logdens1(r.star[comp], r.params = r.params) + rprop.logdens1(r[oldcomp], r.params = r.params.oldcomp)
172 | 
173 | 	} else if(move.type == 3) { ## randomly select a component that is in the model and update it
174 | 		tmp <- which(delta == 1)
175 | 		comp <- ifelse(length(tmp) == 1, tmp, sample(tmp, 1))
176 | 
177 | 		r.params <- set.r.params(r.prior = control.params$r.prior, comp = comp, r.params = r.params)
178 | 
179 | 		r.star[comp] <- rprop.gen2(current = r[comp], r.params = r.params)
180 | 
181 | 		diffpriors <- rprior.logdens(r.star[comp], r.params = r.params) - rprior.logdens(r[comp], r.params = r.params)
182 | 
183 | 		negdifflogproposal <- -rprop.logdens2(r.star[comp], r[comp], r.params = r.params) + rprop.logdens2(r[comp], r.star[comp], r.params = r.params)
184 | 	}
185 | 
186 | 	lambda.star <- lambda
187 | 
188 | 	## M-H step
189 | 	return(MHstep(r=r, lambda=lambda, lambda.star=lambda.star, r.star=r.star, delta=delta, delta.star=delta.star, y=y, X=X, Z=Z, beta=beta, sigsq.eps=sigsq.eps, diffpriors=diffpriors, negdifflogproposal=negdifflogproposal, Vcomps=Vcomps, move.type=move.type, data.comps=data.comps))
190 | }
191 | 
192 | lambda.update <- function(r, delta, lambda, whichcomp=1, y, X, Z = Z, beta, sigsq.eps, Vcomps, data.comps, control.params) {
193 | 	lambda.jump <- control.params$lambda.jump[whichcomp]
194 | 	mu.lambda <- control.params$mu.lambda[whichcomp]
195 | 	sigma.lambda <- control.params$sigma.lambda[whichcomp]
196 | 	lambdacomp <- lambda[whichcomp]
197 | 	
198 | 	#Get lambdacomp adjusted by lamAdj function (to correct for very small lambdas)
199 | 	adjLambdaComp <- lamAdj(lambdacomp)
200 | 
201 | 	## generate a proposal for lambda
202 | 	#lambdacomp.star <- rgamma(1, shape=lambdacomp^2/lambda.jump^2, rate=lambdacomp/lambda.jump^2)
203 | 	lambdacomp.star <- rgamma(1, shape = adjLambdaComp^2/lambda.jump^2,
204 | 	                          rate = adjLambdaComp/lambda.jump^2)
205 | 	r.star <- r
206 | 	delta.star <- delta
207 | 	move.type <- NA
208 | 
209 | 	## part of M-H ratio that depends on the proposal distribution
210 | 	#negdifflogproposal <- -dgamma(lambdacomp.star, shape=lambdacomp^2/lambda.jump^2, rate=lambdacomp/lambda.jump^2, log=TRUE) + dgamma(lambdacomp, shape=lambdacomp.star^2/lambda.jump^2, rate=lambdacomp.star/lambda.jump^2, log=TRUE)
211 | 	negdifflogproposal <- -dgamma(lambdacomp.star, shape= adjLambdaComp^2/lambda.jump^2, rate=adjLambdaComp/lambda.jump^2, log=TRUE) + dgamma(lambdacomp, shape=lamAdj(lambdacomp.star)^2/lambda.jump^2, rate=lamAdj(lambdacomp.star)/lambda.jump^2, log=TRUE)
212 | 	
213 | 
214 | 	## prior distribution
215 | 	diffpriors <- dgamma(lambdacomp.star, shape=mu.lambda^2/sigma.lambda^2, rate=mu.lambda/sigma.lambda^2, log=TRUE) - dgamma(lambdacomp, shape=mu.lambda^2/sigma.lambda^2, rate=mu.lambda/sigma.lambda^2, log=TRUE)
216 | 
217 | 	lambda.star <- lambda
218 | 	lambda.star[whichcomp] <- lambdacomp.star
219 | 
220 | 	## M-H step
221 | 	return(MHstep(r=r, lambda=lambda, lambda.star=lambda.star, r.star=r.star, delta=delta, delta.star=delta.star, y=y, X=X, Z=Z, beta=beta, sigsq.eps=sigsq.eps, diffpriors=diffpriors, negdifflogproposal=negdifflogproposal, Vcomps=Vcomps, move.type=move.type, data.comps=data.comps))
222 | }
223 | 
224 | MHstep <- function(r, lambda, lambda.star, r.star, delta, delta.star, y, X, Z, beta, sigsq.eps, diffpriors, negdifflogproposal, Vcomps, move.type, data.comps) {
225 | 	## compute log M-H ratio
226 | 	Vcomps.star <- makeVcomps(r.star, lambda.star, Z, data.comps)
227 | 	mu <- y - X%*%beta
228 | 	diffliks <- 1/2*Vcomps.star$logdetVinv - 1/2*Vcomps$logdetVinv - 1/2/sigsq.eps*crossprod(mu, Vcomps.star$Vinv - Vcomps$Vinv)%*%mu
229 | 	logMHratio <- diffliks + diffpriors + negdifflogproposal
230 | 	logalpha <- min(0,logMHratio)
231 | 
232 | 	## return value
233 | 	acc <- FALSE
234 | 	if( log(runif(1)) <= logalpha ) {
235 | 		r <- r.star
236 | 		delta <- delta.star
237 | 		lambda <- lambda.star
238 | 		Vcomps <- Vcomps.star
239 | 		acc <- TRUE
240 | 	}
241 | 	return(list(r=r, lambda=lambda, delta=delta, acc=acc, Vcomps=Vcomps, move.type=move.type))
242 | }
243 | 
244 | h.update <- function(lambda, Vcomps, sigsq.eps, y, X, beta, r, Z, data.comps) {
245 |   if (is.null(Vcomps)) {
246 |     Vcomps <- makeVcomps(r = r, lambda = lambda, Z = Z, data.comps = data.comps)
247 |   }
248 | 	if(is.null(Vcomps$Q)) {
249 | 		Kpart <- makeKpart(r, Z)
250 | 		K <- exp(-Kpart)
251 | 		Vinv <- Vcomps$Vinv
252 | 		lambda <- lambda[1] ## in case with random intercept (randint==TRUE), where lambda is 2-dimensional
253 | 		lamKVinv <- lambda*K%*%Vinv
254 | 		h.postmean <- lamKVinv%*%(y-X%*%beta)
255 | 		##h.postvar <- sigsq.eps*lamKVinv
256 | 		h.postvar <- sigsq.eps*lambda*(K - lamKVinv%*%K)
257 | 		h.postvar.sqrt <- try(chol(h.postvar), silent=TRUE)
258 | 		if(inherits(h.postvar.sqrt, "try-error")) {
259 | 			sigsvd <- svd(h.postvar)
260 | 			h.postvar.sqrt <- t(sigsvd$v %*% (t(sigsvd$u) * sqrt(sigsvd$d)))
261 | 		}
262 | 		hsamp <- h.postmean + crossprod(h.postvar.sqrt, rnorm(length(h.postmean)))
263 | 		hcomps <- list(hsamp = hsamp)
264 | 	} else {
265 | 		h.star.postvar.sqrt <- sqrt(sigsq.eps*lambda)*forwardsolve(t(Vcomps$cholR), Vcomps$Q)
266 | 		h.star.postmean <- lambda[1]*Vcomps$Q %*% Vcomps$Rinv %*% Vcomps$K10 %*% (y - X %*% beta)
267 | 		hsamp.star <- h.star.postmean + crossprod(h.star.postvar.sqrt, rnorm(length(h.star.postmean)))
268 | 		hsamp <- t(Vcomps$K10) %*% Vcomps$Qinv %*% hsamp.star
269 | 		hcomps <- list(hsamp = hsamp, hsamp.star = hsamp.star)
270 | 	}
271 | 	hcomps
272 | }
273 | 
274 | newh.update <- function(Z, Znew, Vcomps, lambda, sigsq.eps, r, y, X, beta, data.comps) {
275 | 
276 | 	if(is.null(data.comps$knots)) {
277 | 		n0 <- nrow(Z)
278 | 		n1 <- nrow(Znew)
279 | 		nall <- n0 + n1
280 | 		# Kpartall <- makeKpart(r, rbind(Z, Znew))
281 | 		# Kmat <- exp(-Kpartall)
282 | 		# Kmat0 <- Kmat[1:n0,1:n0 ,drop=FALSE]
283 | 		# Kmat1 <- Kmat[(n0+1):nall,(n0+1):nall ,drop=FALSE]
284 | 		# Kmat10 <- Kmat[(n0+1):nall,1:n0 ,drop=FALSE]
285 | 		Kmat1 <- exp(-makeKpart(r, Znew))
286 | 		Kmat10 <- exp(-makeKpart(r, Znew, Z))
287 | 
288 | 		if(is.null(Vcomps)) {
289 | 			Vcomps <- makeVcomps(r = r, lambda = lambda, Z = Z, data.comps = data.comps)
290 | 		}
291 | 		Vinv <- Vcomps$Vinv
292 | 
293 | 		lamK10Vinv <- lambda[1]*Kmat10 %*% Vinv
294 | 		Sigma.hnew <- lambda[1]*sigsq.eps*(Kmat1 - lamK10Vinv %*% t(Kmat10))
295 | 		mu.hnew <- lamK10Vinv %*% (y - X%*%beta)
296 | 		root.Sigma.hnew <- try(chol(Sigma.hnew), silent=TRUE)
297 | 		if(inherits(root.Sigma.hnew, "try-error")) {
298 | 			sigsvd <- svd(Sigma.hnew)
299 | 			root.Sigma.hnew <- t(sigsvd$v %*% (t(sigsvd$u) * sqrt(sigsvd$d)))
300 | 		}
301 | 		hsamp <- mu.hnew + crossprod(root.Sigma.hnew, rnorm(n1))
302 | 	} else {
303 | 		n0 <- nrow(data.comps$knots)
304 | 		n1 <- nrow(Znew)
305 | 		nall <- n0 + n1
306 | 		# Kpartall <- makeKpart(r, rbind(data.comps$knots, Znew))
307 | 		# Kmat <- exp(-Kpartall)
308 | 		# Kmat0 <- Kmat[1:n0,1:n0 ,drop=FALSE]
309 | 		# Kmat1 <- Kmat[(n0+1):nall,(n0+1):nall ,drop=FALSE]
310 | 		# Kmat10 <- Kmat[(n0+1):nall,1:n0 ,drop=FALSE]
311 | 		Kmat10 <- exp(-makeKpart(r, Znew, data.comps$knots))
312 | 
313 | 		if(is.null(Vcomps)) {
314 | 			Vcomps <- makeVcomps(r = r, lambda = lambda, Z = Z, data.comps = data.comps)
315 | 			h.star.postvar.sqrt <- sqrt(sigsq.eps*lambda[1])*forwardsolve(t(Vcomps$cholR), Vcomps$Q)
316 | 			h.star.postmean <- lambda[1]*Vcomps$Q %*% Vcomps$Rinv %*% Vcomps$K10 %*% (y - X %*% beta)
317 | 			Vcomps$hsamp.star <- h.star.postmean + crossprod(h.star.postvar.sqrt, rnorm(length(h.star.postmean)))
318 | 		}
319 | 		hsamp <- Kmat10 %*% Vcomps$Qinv %*% Vcomps$hsamp.star
320 | 	}
321 | 
322 | 	hsamp
323 | }
324 | 
325 | ## function to obtain posterior samples of h(znew) from fit of Bayesian kernel machine regression
326 | predz.samps <- function(fit, Znew, verbose = TRUE) {
327 | 	if(is.null(dim(Znew))) Znew <- matrix(Znew, nrow=1)
328 | 	if(inherits(Znew, "data.frame")) Znew <- data.matrix(Znew)
329 | 	Z <- fit$Z
330 | 	if(ncol(Z) != ncol(Znew)) {
331 | 		stop("Znew must have the same number of columns as Z")
332 | 	}
333 | 
334 | 	hnew.samps <- sapply(1:fit$nsamp, function(s) {
335 | 		if(s%%(fit$nsamp/10)==0 & verbose) print(s)
336 | 		newh.update(Z = Z, Znew = Znew, Vcomps = NULL, lambda = fit$lambda[s], sigsq.eps = fit$sigsq.eps[s], r = fit$r[s,], y = fit$y, X = fit$X, beta = fit$beta[s,], data.comps = fit$data.comps)
337 | 	})
338 | 	rownames(hnew.samps) <- rownames(Znew)
339 | 	t(hnew.samps)
340 | }
341 | 
342 | ## function to approximate the posterior mean and variance as a function of the estimated tau, lambda, beta, and sigsq.eps
343 | newh.postmean <- function(fit, Znew, sel) {
344 | 	if(is.null(dim(Znew))) Znew <- matrix(Znew, nrow=1)
345 | 	if(inherits(Znew, "data.frame")) Znew <- data.matrix(Znew)
346 | 
347 | 	Z <- fit$Z
348 | 	X <- fit$X
349 | 	y <- fit$y
350 | 	data.comps <- fit$data.comps
351 | 	lambda <- colMeans(fit$lambda[sel, ,drop = FALSE])
352 | 	sigsq.eps <- mean(fit$sigsq.eps[sel])
353 | 	r <- colMeans(fit$r[sel,])
354 | 	beta <- colMeans(fit$beta[sel, ,drop=FALSE])
355 | 
356 | 	if(is.null(data.comps$knots)) {
357 | 		n0 <- nrow(Z)
358 | 		n1 <- nrow(Znew)
359 | 		nall <- n0 + n1
360 | 		Kpartall <- makeKpart(r, rbind(Z, Znew))
361 | 		Kmat <- exp(-Kpartall)
362 | 		Kmat0 <- Kmat[1:n0,1:n0 ,drop=FALSE]
363 | 		Kmat1 <- Kmat[(n0+1):nall,(n0+1):nall ,drop=FALSE]
364 | 		Kmat10 <- Kmat[(n0+1):nall,1:n0 ,drop=FALSE]
365 | 
366 | 		Vcomps <- makeVcomps(r = r, lambda = lambda, Z = Z, data.comps = data.comps)
367 | 		Vinv <- Vcomps$Vinv
368 | 
369 | 		lamK10Vinv <- lambda[1]*Kmat10 %*% Vinv
370 | 		Sigma.hnew <- lambda[1]*sigsq.eps*(Kmat1 - lamK10Vinv %*% t(Kmat10))
371 | 		mu.hnew <- lamK10Vinv %*% (y - X%*%beta)
372 | 	} else {
373 | 		n0 <- nrow(data.comps$knots)
374 | 		n1 <- nrow(Znew)
375 | 		nall <- n0 + n1
376 | 		Kpartall <- makeKpart(r, rbind(data.comps$knots, Znew))
377 | 		# Kmat <- exp(-Kpartall)
378 | 		# Kmat0 <- Kmat[1:n0,1:n0 ,drop=FALSE]
379 | 		# Kmat1 <- Kmat[(n0+1):nall,(n0+1):nall ,drop=FALSE]
380 | 		# Kmat10 <- Kmat[(n0+1):nall,1:n0 ,drop=FALSE]
381 | 		Kmat1 <- exp(-makeKpart(r, Znew))
382 | 		Kmat10 <- exp(-makeKpart(r, Znew, data.comps$knots))
383 | 
384 | 		Vcomps <- makeVcomps(r = r, lambda = lambda, Z = Z, data.comps = data.comps)
385 | 
386 | 		Sigma.hnew <- lambda[1]*sigsq.eps*Kmat10 %*% Vcomps$Rinv %*% t(Kmat10)
387 | 		mu.hnew <- lambda[1]*Kmat10 %*% Vcomps$Rinv %*% Vcomps$K10 %*% (y - X%*%beta)
388 | 	}
389 | 
390 | 	ret <- list(postmean = drop(mu.hnew), postvar = Sigma.hnew)
391 | 	ret
392 | }
393 | 
394 | #####################
395 | #LAMBDA UPDATE HELPER
396 | #####################
397 | #This function chooses the mean of the proposal function for the lambda MH step based on
398 | #the most recent iteration of lambda
399 | #If this is the identity then E[lambda_star] = lambda_t
400 | #Otherwise we have E[lambda_star] = lamAdj(lambda_t).
401 | lamAdj <- function(lam){
402 |   if(lam <=2 ){
403 |     return(3)
404 |   }else{
405 |     return((lam^2)/(lam-1)-1)
406 |   }
407 | }
408 | 


--------------------------------------------------------------------------------
/R/bkmr_main_functions.R:
--------------------------------------------------------------------------------
  1 | # makeKpart <- function(r, Z) {
  2 | # Kpart <- as.matrix(dist(sqrt(matrix(r, byrow=TRUE, nrow(Z), ncol(Z)))*Z))^2
  3 | # Kpart
  4 | # }
  5 | makeKpart <- function(r, Z1, Z2 = NULL) {
  6 |   Z1r <- t(t(Z1) * c(sqrt(r)))
  7 |   if (is.null(Z2)) {
  8 |     Kpart <- fields::rdist(Z1r)^2
  9 |   } else {
 10 |     Z2r <- t(t(Z2) * c(sqrt(r)))
 11 |     Kpart <- fields::rdist(Z1r, Z2r)^2
 12 |   }
 13 |   Kpart
 14 | }
 15 | makeVcomps <- function(r, lambda, Z, data.comps) {
 16 |   if (is.null(data.comps$knots)) {
 17 |     Kpart <- makeKpart(r, Z)
 18 |     V <- diag(1, nrow(Z), nrow(Z)) + lambda[1]*exp(-Kpart)
 19 |     if (data.comps$nlambda == 2) {
 20 |       V <- V + lambda[2]*data.comps$crossTT
 21 |     }
 22 |     cholV <- chol(V)
 23 |     Vinv <- chol2inv(cholV)
 24 |     logdetVinv <- -2*sum(log(diag(cholV)))
 25 |     Vcomps <- list(Vinv = Vinv, logdetVinv = logdetVinv)
 26 |   } else {## predictive process approach
 27 |     ## note: currently does not work with random intercept model
 28 |     nugget <- 0.001
 29 |     n0 <- nrow(Z)
 30 |     n1 <- nrow(data.comps$knots)
 31 |     nall <- n0 + n1
 32 |     # Kpartall <- makeKpart(r, rbind(Z, data.comps$knots))
 33 |     # Kall <- exp(-Kpartall)
 34 |     # K0 <- Kall[1:n0, 1:n0 ,drop=FALSE]
 35 |     # K1 <- Kall[(n0+1):nall, (n0+1):nall ,drop=FALSE]
 36 |     # K10 <- Kall[(n0+1):nall, 1:n0 ,drop=FALSE]
 37 |     K1 <- exp(-makeKpart(r, data.comps$knots))
 38 |     K10 <- exp(-makeKpart(r, data.comps$knots, Z))
 39 |     Q <- K1 + diag(nugget, n1, n1)
 40 |     R <- Q + lambda[1]*tcrossprod(K10)
 41 |     cholQ <- chol(Q)
 42 |     cholR <- chol(R)
 43 |     Qinv <- chol2inv(cholQ)
 44 |     Rinv <- chol2inv(cholR)
 45 |     Vinv <- diag(1, n0, n0) - lambda[1]*t(K10) %*% Rinv %*% K10
 46 |     logdetVinv <- 2*sum(log(diag(cholQ))) - 2*sum(log(diag(cholR)))
 47 |     Vcomps <- list(Vinv = Vinv, logdetVinv = logdetVinv, cholR = cholR, Q = Q, K10 = K10, Qinv = Qinv, Rinv = Rinv)
 48 |   }
 49 |   Vcomps
 50 | }
 51 | 
 52 | #' Fit Bayesian kernel machine regression
 53 | #'
 54 | #' Fits the Bayesian kernel machine regression (BKMR) model using Markov chain Monte Carlo (MCMC) methods.
 55 | #'
 56 | #' @export
 57 | #'
 58 | #' @param y a vector of outcome data of length \code{n}.
 59 | #' @param Z an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.
 60 | #' @param X an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.
 61 | #' @param iter number of iterations to run the sampler
 62 | #' @param family a description of the error distribution and link function to be used in the model. Currently implemented for \code{gaussian} and \code{binomial} families.
 63 | #' @param id optional vector (of length \code{n}) of grouping factors for fitting a model with a random intercept. If NULL then no random intercept will be included.
 64 | #' @param verbose TRUE or FALSE: flag indicating whether to print intermediate diagnostic information during the model fitting.
 65 | #' @param Znew optional matrix of new predictor values at which to predict \code{h}, where each row represents a new observation. This will slow down the model fitting, and can be done as a post-processing step using \code{\link{SamplePred}}
 66 | #' @param starting.values list of starting values for each parameter. If not specified default values will be chosen.
 67 | #' @param control.params list of parameters specifying the prior distributions and tuning parameters for the MCMC algorithm. If not specified default values will be chosen.
 68 | #' @param varsel TRUE or FALSE: indicator for whether to conduct variable selection on the Z variables in \code{h}
 69 | #' @param groups optional vector (of length \code{M}) of group indicators for fitting hierarchical variable selection if varsel=TRUE. If varsel=TRUE without group specification, component-wise variable selections will be performed.
 70 | #' @param knots optional matrix of knot locations for implementing the Gaussian predictive process of Banerjee et al. (2008). Currently only implemented for models without a random intercept.
 71 | #' @param ztest optional vector indicating on which variables in Z to conduct variable selection (the remaining variables will be forced into the model).
 72 | #' @param rmethod for those predictors being forced into the \code{h} function, the method for sampling the \code{r[m]} values. Takes the value of 'varying' to allow separate \code{r[m]} for each predictor; 'equal' to force the same \code{r[m]} for each predictor; or 'fixed' to fix the \code{r[m]} to their starting values
 73 | #' @param est.h TRUE or FALSE: indicator for whether to sample from the posterior distribution of the subject-specific effects h_i within the main sampler. This will slow down the model fitting.
 74 | #' @return an object of class "bkmrfit" (containing the posterior samples from the model fit), which has the associated methods:
 75 | #' \itemize{
 76 | #'   \item \code{\link{print}} (i.e., \code{\link{print.bkmrfit}}) 
 77 | #'   \item \code{\link{summary}} (i.e., \code{\link{summary.bkmrfit}})
 78 | #' }
 79 | #' 
 80 | #' @seealso For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html}
 81 | #' @references Bobb, JF, Valeri L, Claus Henn B, Christiani DC, Wright RO, Mazumdar M, Godleski JJ, Coull BA (2015). Bayesian Kernel Machine Regression for Estimating the Health Effects of Multi-Pollutant Mixtures. Biostatistics 16, no. 3: 493-508.
 82 | #' @references Banerjee S, Gelfand AE, Finley AO, Sang H (2008). Gaussian predictive process models for large spatial data sets. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 70(4), 825-848.
 83 | #' @import utils
 84 | #' 
 85 | #' @examples
 86 | #' ## First generate dataset
 87 | #' set.seed(111)
 88 | #' dat <- SimData(n = 50, M = 4)
 89 | #' y <- dat$y
 90 | #' Z <- dat$Z
 91 | #' X <- dat$X
 92 | #' 
 93 | #' ## Fit model with component-wise variable selection
 94 | #' ## Using only 100 iterations to make example run quickly
 95 | #' ## Typically should use a large number of iterations for inference
 96 | #' set.seed(111)
 97 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
 98 | kmbayes <- function(y, Z, X = NULL, iter = 1000, family = "gaussian", id = NULL, verbose = TRUE, Znew = NULL, starting.values = NULL, control.params = NULL, varsel = FALSE, groups = NULL, knots = NULL, ztest = NULL, rmethod = "varying", est.h = FALSE) {
 99 |   
100 |   missingX <- is.null(X)
101 |   if (missingX) X <- matrix(0, length(y), 1)
102 |   hier_varsel <- !is.null(groups)
103 |   
104 |   ##Argument check 1, required arguments without defaults
105 |   ##check vector/matrix sizes
106 |   stopifnot (length(y) > 0, is.numeric(y), anyNA(y) == FALSE)
107 |   if (!inherits(Z, "matrix"))  Z <- as.matrix(Z)
108 |   stopifnot (is.numeric(Z), nrow(Z) == length(y), anyNA(Z) == FALSE)
109 |   if (!inherits(X, "matrix"))  X <- as.matrix(X)
110 |   stopifnot (is.numeric(X), nrow(X) == length(y), anyNA(X) == FALSE) 
111 |   
112 |   ##Argument check 2: for those with defaults, write message and reset to default if invalid
113 |   if (iter < 1) {
114 |     message ("invalid input for iter, resetting to default value 1000")
115 |     nsamp <- 1000
116 |   } else {
117 |     nsamp <- iter
118 |   }
119 |   if (!family %in% c("gaussian", "binomial")) {
120 |     stop("family", family, "not yet implemented; must specify either 'gaussian' or 'binomial'")
121 |   }
122 |   if (family == "binomial") {
123 |     message("Fitting probit regression model")
124 |     if (!all(y %in% c(0, 1))) {
125 |       stop("When family == 'binomial', y must be a vector containing only zeros and ones")
126 |     }
127 |   }
128 |   if (rmethod != "varying" & rmethod != "equal" & rmethod != "fixed") {
129 |     message ("invalid value for rmethod, resetting to default varying")
130 |     rmethod <- "varying"
131 |   }
132 |   if (verbose != FALSE & verbose != TRUE) {
133 |     message ("invalid value for verbose, resetting to default FALSE")
134 |     verbose <- FALSE
135 |   }
136 |   if (varsel != FALSE & varsel != TRUE) {
137 |     message ("invalid value for varsel, resetting to default FALSE")
138 |     varsel <- FALSE
139 |   }
140 |   
141 |   ##Argument check 3: the rest id (below) znew, knots, groups, ztest
142 |   if (!is.null(id)) { 
143 |     stopifnot(length(id) == length(y), anyNA(id) == FALSE)
144 |     if (!is.null(knots)) { 
145 |       message ("knots cannot be specified with id, resetting knots to null")
146 |       knots<-NA
147 |     }
148 |   }
149 |   if (!is.null(Znew)) { 
150 |     if (!inherits(Znew, "matrix"))  Znew <- as.matrix(Znew)
151 |     stopifnot(is.numeric(Znew), ncol(Znew) == ncol(Z), anyNA(Znew) == FALSE)
152 |   }
153 |   if (!is.null(knots)) { 
154 |     if (!inherits(knots, "matrix"))  knots <- as.matrix(knots)
155 |     stopifnot(is.numeric(knots), ncol(knots )== ncol(Z), anyNA(knots) == FALSE)
156 |   }
157 |   if (!is.null(groups)) { 
158 |     if (varsel == FALSE) {
159 |       message ("groups should only be specified if varsel=TRUE, resetting varsel to TRUE")
160 |       varsel <- TRUE
161 |     } else {
162 |       stopifnot(is.numeric(groups), length(groups) == ncol(Z), anyNA(groups) == FALSE)
163 |     }
164 |   }
165 |   if (!is.null(ztest)) { 
166 |     if (varsel == FALSE) {
167 |       message ("ztest should only be specified if varsel=TRUE, resetting varsel to TRUE")
168 |       varsel <- TRUE
169 |     } else {
170 |       stopifnot(is.numeric(ztest), length(ztest) <= ncol(Z), anyNA(ztest) == FALSE, max(ztest) <= ncol(Z) )
171 |     }
172 |   }
173 |   
174 |   ## start JB code
175 |   if (!is.null(id)) { ## for random intercept model
176 |     randint <- TRUE
177 |     id <- as.numeric(as.factor(id))
178 |     nid <- length(unique(id))
179 |     nlambda <- 2
180 |     
181 |     ## matrix that multiplies the random intercept vector
182 |     TT <- matrix(0, length(id), nid)
183 |     for (i in 1:nid) {
184 |       TT[which(id == i), i] <- 1
185 |     }
186 |     crossTT <- tcrossprod(TT)
187 |     rm(TT, nid)
188 |   } else {
189 |     randint <- FALSE
190 |     nlambda <- 1
191 |     crossTT <- 0
192 |   }
193 |   data.comps <- list(randint = randint, nlambda = nlambda, crossTT = crossTT)
194 |   if (!is.null(knots)) data.comps$knots <- knots
195 |   rm(randint, nlambda, crossTT)
196 |   
197 |   ## create empty matrices to store the posterior draws in
198 |   chain <- list(h.hat = matrix(0, nsamp, nrow(Z)),
199 |                 beta = matrix(0, nsamp, ncol(X)),
200 |                 lambda = matrix(NA, nsamp, data.comps$nlambda),
201 |                 sigsq.eps = rep(NA, nsamp),
202 |                 r = matrix(NA, nsamp, ncol(Z)),
203 |                 acc.r = matrix(0, nsamp, ncol(Z)),
204 |                 acc.lambda = matrix(0, nsamp, data.comps$nlambda),
205 |                 delta = matrix(1, nsamp, ncol(Z))
206 |   )
207 |   if (varsel) {
208 |     chain$acc.rdelta <- rep(0, nsamp)
209 |     chain$move.type <- rep(0, nsamp)
210 |   }
211 |   if (family == "binomial") {
212 |     chain$ystar <- matrix(0, nsamp, length(y))
213 |   }
214 |   
215 |   ## components to predict h(Znew)
216 |   if (!is.null(Znew)) {
217 |     if (is.null(dim(Znew))) Znew <- matrix(Znew, nrow=1)
218 |     if (inherits(Znew, "data.frame")) Znew <- data.matrix(Znew)
219 |     if (ncol(Z) != ncol(Znew)) {
220 |       stop("Znew must have the same number of columns as Z")
221 |     }
222 |     ##Kpartall <- as.matrix(dist(rbind(Z,Znew)))^2
223 |     chain$hnew <- matrix(0,nsamp,nrow(Znew))
224 |     colnames(chain$hnew) <- rownames(Znew)
225 |   }
226 |   
227 |   ## components if model selection is being done
228 |   if (varsel) {
229 |     if (is.null(ztest)) {
230 |       ztest <- 1:ncol(Z)
231 |     }
232 |     rdelta.update <- rdelta.comp.update
233 |   } else {
234 |     ztest <- NULL
235 |   }
236 |   
237 |   ## control parameters (lambda.jump default lower for probit model to improve mixing)
238 |   control.params.default <- list(lambda.jump = rep(ifelse(family == 'binomial', sqrt(10), 10), data.comps$nlambda),
239 |                                  mu.lambda = rep(10, data.comps$nlambda),
240 |                                  sigma.lambda = rep(10, data.comps$nlambda),
241 |                                  a.p0 = 1, b.p0 = 1, r.prior = "invunif", a.sigsq = 1e-3,
242 |                                  b.sigsq = 1e-3, mu.r = 5, sigma.r = 5, r.muprop = 1,
243 |                                  r.jump = 0.1, r.jump1 = 2, r.jump2 = 0.1, r.a = 0, r.b = 100)
244 |   if (!is.null(control.params)){
245 |     control.params <- modifyList(control.params.default, as.list(control.params))
246 |     validateControlParams(varsel, family, id, control.params)
247 |   } else {
248 |     control.params <- control.params.default
249 |   }
250 |   
251 |   control.params$r.params <- with(control.params, list(mu.r = mu.r, sigma.r = sigma.r, r.muprop = r.muprop, r.jump = r.jump, r.jump1 = r.jump1, r.jump2 = r.jump2, r.a = r.a, r.b = r.b))
252 |   
253 |   ## components if grouped model selection is being done
254 |   if (!is.null(groups)) {
255 |     if (!varsel) {
256 |       stop("if doing grouped variable selection, must set varsel = TRUE")
257 |     }
258 |     rdelta.update <- rdelta.group.update
259 |     control.params$group.params <- list(groups = groups, sel.groups = sapply(unique(groups), function(x) min(seq_along(groups)[groups == x])), neach.group = sapply(unique(groups), function(x) sum(groups %in% x)))
260 |   }
261 |   
262 |   ## specify functions for doing the Metropolis-Hastings steps to update r
263 |   e <- environment()
264 |   rfn <- set.r.MH.functions(r.prior = control.params$r.prior)
265 |   rprior.logdens <- rfn$rprior.logdens
266 |   environment(rprior.logdens) <- e
267 |   rprop.gen1 <- rfn$rprop.gen1
268 |   environment(rprop.gen1) <- e
269 |   rprop.logdens1 <- rfn$rprop.logdens1
270 |   environment(rprop.logdens1) <- e
271 |   rprop.gen2 <- rfn$rprop.gen2
272 |   environment(rprop.gen2) <- e
273 |   rprop.logdens2 <- rfn$rprop.logdens2
274 |   environment(rprop.logdens2) <- e
275 |   rprop.gen <- rfn$rprop.gen
276 |   environment(rprop.gen) <- e
277 |   rprop.logdens <- rfn$rprop.logdens
278 |   environment(rprop.logdens) <- e
279 |   rm(e, rfn)
280 |   
281 |   ## initial values
282 |   starting.values0 <- list(h.hat = 1, beta = NULL, sigsq.eps = NULL, r = 1, lambda = 10, delta = 1)
283 |   if (is.null(starting.values)) {
284 |     starting.values <- starting.values0
285 |   } else {
286 |     starting.values <- modifyList(starting.values0, starting.values)
287 |     validateStartingValues(varsel, y, X, Z, starting.values, rmethod)
288 |   }
289 |   if (family == "gaussian") {
290 |     if (is.null(starting.values$beta) | is.null(starting.values$sigsq.eps)) {
291 |       lmfit0 <- lm(y ~ Z + X)
292 |       if (is.null(starting.values$beta)) {
293 |         coefX <- coef(lmfit0)[grep("X", names(coef(lmfit0)))]
294 |         starting.values$beta <- unname(ifelse(is.na(coefX), 0, coefX))
295 |       }
296 |       if (is.null(starting.values$sigsq.eps)) {
297 |         starting.values$sigsq.eps <- summary(lmfit0)$sigma^2
298 |       }
299 |     } 
300 |   } else if (family == "binomial") {
301 |     starting.values$sigsq.eps <- 1 ## always equal to 1
302 |     if (is.null(starting.values$beta) | is.null(starting.values$ystar)) {
303 |       probitfit0 <- try(glm(y ~ Z + X, family = binomial(link = "probit")))
304 |       if (!inherits(probitfit0, "try-error")) {
305 |         if (is.null(starting.values$beta)) {
306 |           coefX <- coef(probitfit0)[grep("X", names(coef(probitfit0)))]
307 |           starting.values$beta <- unname(ifelse(is.na(coefX), 0, coefX))
308 |         }
309 |         if (is.null(starting.values$ystar)) {
310 |           #prd <- predict(probitfit0)
311 |           #starting.values$ystar <- ifelse(y == 1, abs(prd), -abs(prd))
312 |           starting.values$ystar <- ifelse(y == 1, 1/2, -1/2)
313 |         }
314 |       } else {
315 |         starting.values$beta <- 0
316 |         starting.values$ystar <- ifelse(y == 1, 1/2, -1/2)
317 |       }
318 |     } 
319 |   }
320 |     
321 |   ##print (starting.values)
322 |   ##truncate vectors that are too long
323 |   if (length(starting.values$h.hat) > length(y)) {
324 |     starting.values$h.hat <- starting.values$h.hat[1:length(y)]
325 |   }
326 |   if (length(starting.values$beta) > ncol(X)) {
327 |     starting.values$beta <- starting.values$beta[1:ncol(X)]
328 |   }
329 |   if (length(starting.values$delta) > ncol(Z)) {
330 |     starting.values$delta <- starting.values$delta[1:ncol(Z)]
331 |   }
332 |   if (varsel==FALSE & rmethod == "equal" & length(starting.values$r) > 1) {
333 |     starting.values$r <- starting.values$r[1] ## this should only happen if rmethod == "equal"
334 |   } else if (length(starting.values$r) > ncol(Z)) {
335 |     starting.values$r <- starting.values$r[1:ncol(Z)]
336 |   }
337 | 
338 |   chain$h.hat[1, ] <- starting.values$h.hat
339 |   chain$beta[1, ] <- starting.values$beta
340 |   chain$lambda[1, ] <- starting.values$lambda
341 |   chain$sigsq.eps[1] <- starting.values$sigsq.eps
342 |   chain$r[1, ] <- starting.values$r
343 |   if (varsel) {
344 |     chain$delta[1,ztest] <- starting.values$delta
345 |   }
346 |   if (family == "binomial") {
347 |     chain$ystar[1, ] <- starting.values$ystar
348 |     chain$sigsq.eps[] <- starting.values$sigsq.eps ## does not get updated
349 |   }
350 |   if (!is.null(groups)) {
351 |     ## make sure starting values are consistent with structure of model
352 |     if (!all(sapply(unique(groups), function(x) sum(chain$delta[1, ztest][groups == x])) == 1)) {
353 |       # warning("Specified starting values for delta not consistent with model; using default")
354 |       starting.values$delta <- rep(0, length(groups))
355 |       starting.values$delta[sapply(unique(groups), function(x) min(which(groups == x)))] <- 1
356 |     }
357 |     chain$delta[1,ztest] <- starting.values$delta
358 |     chain$r[1,ztest] <- ifelse(chain$delta[1,ztest] == 1, chain$r[1,ztest], 0)
359 |   }
360 |   chain$est.h <- est.h
361 |   
362 |   ## components
363 |   Vcomps <- makeVcomps(r = chain$r[1, ], lambda = chain$lambda[1, ], Z = Z, data.comps = data.comps)
364 | 
365 |   # set print progress options
366 |   opts <- set_verbose_opts(
367 |     verbose_freq = control.params$verbose_freq, 
368 |     verbose_digits = control.params$verbose_digits,
369 |     verbose_show_ests = control.params$verbose_show_ests,
370 |     tot_iter=nsamp
371 |   )
372 |   
373 |   ## start sampling ####
374 |   chain$time1 <- Sys.time()
375 |   for (s in 2:nsamp) {
376 | 
377 |     ## continuous version of outcome (latent outcome under binomial probit model)
378 |     if (family == "gaussian") {
379 |       ycont <- y
380 |     } else if (family == "binomial") {
381 |       if (est.h) {
382 |         chain$ystar[s,] <- ystar.update(y = y, X = X, beta = chain$beta[s - 1,], h = chain$h[s - 1, ])
383 |       } else {
384 |         chain$ystar[s,] <- ystar.update.noh(y = y, X = X, beta = chain$beta[s - 1,], Vinv = Vcomps$Vinv, ystar = chain$ystar[s - 1, ])
385 |       }
386 |       ycont <- chain$ystar[s, ]
387 |     }
388 |     
389 |     ## generate posterior samples from marginalized distribution P(beta, sigsq.eps, lambda, r | y)
390 |     
391 |     ## beta
392 |     if (!missingX) {
393 |       chain$beta[s,] <- beta.update(X = X, Vinv = Vcomps$Vinv, y = ycont, sigsq.eps = chain$sigsq.eps[s - 1])
394 |     }
395 |       
396 |     ## \sigma_\epsilon^2
397 |     if (family == "gaussian") {
398 |       chain$sigsq.eps[s] <- sigsq.eps.update(y = ycont, X = X, beta = chain$beta[s,], Vinv = Vcomps$Vinv, a.eps = control.params$a.sigsq, b.eps = control.params$b.sigsq)
399 |     }
400 |     
401 |     ## lambda
402 |     lambdaSim <- chain$lambda[s - 1,]
403 |     for (comp in 1:data.comps$nlambda) {
404 |       varcomps <- lambda.update(r = chain$r[s - 1,], delta = chain$delta[s - 1,], lambda = lambdaSim, whichcomp = comp, y = ycont, X = X, Z = Z, beta = chain$beta[s,], sigsq.eps = chain$sigsq.eps[s], Vcomps = Vcomps, data.comps = data.comps, control.params = control.params)
405 |       lambdaSim <- varcomps$lambda
406 |       if (varcomps$acc) {
407 |         Vcomps <- varcomps$Vcomps
408 |         chain$acc.lambda[s,comp] <- varcomps$acc
409 |       }
410 |     }
411 |     chain$lambda[s,] <- lambdaSim
412 |     
413 |     ## r
414 |     rSim <- chain$r[s - 1,]
415 |     comp <- which(!1:ncol(Z) %in% ztest)
416 |     if (length(comp) != 0) {
417 |       if (rmethod == "equal") { ## common r for those variables not being selected
418 |         varcomps <- r.update(r = rSim, whichcomp = comp, delta = chain$delta[s - 1,],
419 |                              lambda = chain$lambda[s,], y = ycont, X = X, beta = chain$beta[s,],
420 |                              sigsq.eps = chain$sigsq.eps[s], Vcomps = Vcomps, Z = Z,
421 |                              data.comps = data.comps, control.params = control.params,
422 |                              rprior.logdens = rprior.logdens, rprop.gen1 = rprop.gen1,
423 |                              rprop.logdens1 = rprop.logdens1, rprop.gen2 = rprop.gen2,
424 |                              rprop.logdens2 = rprop.logdens2, rprop.gen = rprop.gen,
425 |                              rprop.logdens = rprop.logdens)
426 |         rSim <- varcomps$r
427 |         if (varcomps$acc) {
428 |           Vcomps <- varcomps$Vcomps
429 |           chain$acc.r[s, comp] <- varcomps$acc
430 |         }
431 |       } else if (rmethod == "varying") { ## allow a different r_m
432 |         for (whichr in comp) {
433 |           varcomps <- r.update(r = rSim, whichcomp = whichr, delta = chain$delta[s - 1,],
434 |                                lambda = chain$lambda[s,], y = ycont, X = X, beta = chain$beta[s,],
435 |                                sigsq.eps = chain$sigsq.eps[s], Vcomps = Vcomps, Z = Z,
436 |                                data.comps = data.comps, control.params = control.params,
437 |                                rprior.logdens = rprior.logdens, rprop.gen1 = rprop.gen1,
438 |                                rprop.logdens1 = rprop.logdens1, rprop.gen2 = rprop.gen2,
439 |                                rprop.logdens2 = rprop.logdens2, rprop.gen = rprop.gen,
440 |                                rprop.logdens = rprop.logdens)
441 |           rSim <- varcomps$r
442 |           if (varcomps$acc) {
443 |             Vcomps <- varcomps$Vcomps
444 |             chain$acc.r[s, whichr] <- varcomps$acc
445 |           }
446 |         }
447 |       }
448 |     }
449 |     ## for those variables being selected: joint posterior of (r,delta)
450 |     if (varsel) {
451 |       varcomps <- rdelta.update(r = rSim, delta = chain$delta[s - 1,],
452 |                                 lambda = chain$lambda[s,], y = ycont, X = X,
453 |                                 beta = chain$beta[s,], sigsq.eps = chain$sigsq.eps[s],
454 |                                 Vcomps = Vcomps, Z = Z, ztest = ztest,
455 |                                 data.comps = data.comps, control.params = control.params,
456 |                                 rprior.logdens = rprior.logdens, rprop.gen1 = rprop.gen1,
457 |                                 rprop.logdens1 = rprop.logdens1, rprop.gen2 = rprop.gen2,
458 |                                 rprop.logdens2 = rprop.logdens2, rprop.gen = rprop.gen,
459 |                                 rprop.logdens = rprop.logdens)
460 |       chain$delta[s,] <- varcomps$delta
461 |       rSim <- varcomps$r
462 |       chain$move.type[s] <- varcomps$move.type
463 |       if (varcomps$acc) {
464 |         Vcomps <- varcomps$Vcomps
465 |         chain$acc.rdelta[s] <- varcomps$acc
466 |       }
467 |     }
468 |     chain$r[s,] <- rSim
469 |     
470 |     ###################################################
471 |     ## generate posterior sample of h(z) from its posterior P(h | beta, sigsq.eps, lambda, r, y)
472 |     
473 |     if (est.h) {
474 |       hcomps <- h.update(lambda = chain$lambda[s,], Vcomps = Vcomps, sigsq.eps = chain$sigsq.eps[s], y = ycont, X = X, beta = chain$beta[s,], r = chain$r[s,], Z = Z, data.comps = data.comps)
475 |       chain$h.hat[s,] <- hcomps$hsamp
476 |       if (!is.null(hcomps$hsamp.star)) { ## GPP
477 |         Vcomps$hsamp.star <- hcomps$hsamp.star
478 |       }
479 |       rm(hcomps)
480 |     }
481 |       
482 |     ###################################################
483 |     ## generate posterior samples of h(Znew) from its posterior P(hnew | beta, sigsq.eps, lambda, r, y)
484 |     
485 |     if (!is.null(Znew)) {
486 |       chain$hnew[s,] <- newh.update(Z = Z, Znew = Znew, Vcomps = Vcomps, lambda = chain$lambda[s,], sigsq.eps = chain$sigsq.eps[s], r = chain$r[s,], y = ycont, X = X, beta = chain$beta[s,], data.comps = data.comps)
487 |     }
488 |     
489 |     ###################################################
490 |     ## print details of the model fit so far
491 |     print_diagnostics(verbose = verbose, opts = opts, curr_iter = s, tot_iter = nsamp, chain = chain, varsel = varsel, hier_varsel = hier_varsel, ztest = ztest, Z = Z, groups = groups)
492 |    
493 |   }
494 |   control.params$r.params <- NULL
495 |   chain$time2 <- Sys.time()
496 |   chain$iter <- nsamp
497 |   chain$family <- family
498 |   chain$starting.values <- starting.values
499 |   chain$control.params <- control.params
500 |   chain$X <- X
501 |   chain$Z <- Z
502 |   chain$y <- y
503 |   chain$ztest <- ztest
504 |   chain$data.comps <- data.comps
505 |   if (!is.null(Znew)) chain$Znew <- Znew
506 |   if (!is.null(groups)) chain$groups <- groups
507 |   chain$varsel <- varsel
508 |   class(chain) <- c("bkmrfit", class(chain))
509 |   chain
510 | }
511 | 
512 | #' Print basic summary of BKMR model fit
513 | #'
514 | #' \code{print} method for class "bkmrfit"
515 | #'
516 | #' @param x an object of class "bkmrfit"
517 | #' @param digits the number of digits to show when printing
518 | #' @param ...	further arguments passed to or from other methods.
519 | #'  
520 | #' @export
521 | #' 
522 | #' @return No return value, prints basic summary of fit to console
523 | #' 
524 | #' @examples
525 | #' ## First generate dataset
526 | #' set.seed(111)
527 | #' dat <- SimData(n = 50, M = 4)
528 | #' y <- dat$y
529 | #' Z <- dat$Z
530 | #' X <- dat$X
531 | #' 
532 | #' ## Fit model with component-wise variable selection
533 | #' ## Using only 100 iterations to make example run quickly
534 | #' ## Typically should use a large number of iterations for inference
535 | #' set.seed(111)
536 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
537 | #' fitkm
538 | print.bkmrfit <- function(x, digits = 5, ...) {
539 |   cat("Fitted object of class 'bkmrfit'\n")
540 |   cat("Iterations:", x$iter, "\n")
541 |   cat("Outcome family:", x$family, ifelse(x$family == "binomial", "(probit link)", ""), "\n")
542 |   cat("Model fit on:", as.character(x$time2), "\n")
543 | }
544 | 
545 | #' Summarizing BKMR model fits
546 | #'
547 | #' \code{summary} method for class "bkmrfit"
548 | #'
549 | #' @param object an object of class "bkmrfit"
550 | #' @param q quantiles of posterior distribution to show
551 | #' @param digits the number of digits to show when printing
552 | #' @param show_ests logical; if \code{TRUE}, prints summary statistics of posterior distribution
553 | #' @param show_MH logical; if \code{TRUE}, prints acceptance rates from the Metropolis-Hastings algorithm
554 | #' @param ...	further arguments passed to or from other methods.
555 | #'  
556 | #' @export
557 | #' 
558 | #' @return No return value, prints more detailed summary of fit to console
559 | #' 
560 | #' @examples
561 | #' ## First generate dataset
562 | #' set.seed(111)
563 | #' dat <- SimData(n = 50, M = 4)
564 | #' y <- dat$y
565 | #' Z <- dat$Z
566 | #' X <- dat$X
567 | #' 
568 | #' ## Fit model with component-wise variable selection
569 | #' ## Using only 100 iterations to make example run quickly
570 | #' ## Typically should use a large number of iterations for inference
571 | #' set.seed(111)
572 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE)
573 | #' summary(fitkm)
574 | summary.bkmrfit <- function(object, q = c(0.025, 0.975), digits = 5, show_ests = TRUE, show_MH = TRUE, ...) {
575 |   x <- object
576 |   elapsed_time <- difftime(x$time2, x$time1)
577 | 
578 |   print(x, digits = digits)  
579 |   cat("Running time: ", round(elapsed_time, digits), attr(elapsed_time, "units"), "\n")
580 |   cat("\n")
581 |   
582 |   if (show_MH) {
583 |     cat("Acceptance rates for Metropolis-Hastings algorithm:\n")
584 |     accep_rates <- data.frame()
585 |     ## lambda
586 |     nm <- "lambda"
587 |     rate <- colMeans(x$acc.lambda[2:x$iter, ,drop = FALSE])
588 |     if (length(rate) > 1) nm <- paste0(nm, seq_along(rate))
589 |     accep_rates %<>% rbind(data.frame(param = nm, rate = rate))
590 |     ## r_m
591 |     if (!x$varsel) {
592 |       nm <- "r"
593 |       rate <- colMeans(x$acc.r[2:x$iter, , drop = FALSE])
594 |       if (length(rate) > 1) nm <- paste0(nm, seq_along(rate))
595 |       accep_rates %<>% rbind(data.frame(param = nm, rate = rate))
596 |     } else {
597 |       nm <- "r/delta (overall)"
598 |       rate <- mean(x$acc.rdelta[2:x$iter])
599 |       accep_rates %<>% rbind(data.frame(param = nm, rate = rate))
600 |       ##
601 |       nm <- "r/delta  (move 1)"
602 |       rate <- mean(x$acc.rdelta[2:x$iter][x$move.type[2:x$iter] == 1])
603 |       accep_rates %<>% rbind(data.frame(param = nm, rate = rate))
604 |       ##
605 |       nm <- "r/delta  (move 2)"
606 |       rate <- mean(x$acc.rdelta[2:x$iter][x$move.type[2:x$iter] == 2])
607 |       accep_rates %<>% rbind(data.frame(param = nm, rate = rate))
608 |       if (!is.null(x$groups)) {
609 |         nm <- "r/delta  (move 3)"
610 |         rate <- mean(x$acc.rdelta[2:x$iter][x$move.type[2:x$iter] == 3])
611 |         accep_rates %<>% rbind(data.frame(param = nm, rate = rate))
612 |       }
613 |     }
614 |     print(accep_rates)
615 |   }
616 |   if (show_ests) {
617 |     sel <- with(x, seq(floor(iter/2) + 1, iter))
618 |     cat("\nParameter estimates (based on iterations ", min(sel), "-", max(sel), "):\n", sep = "")
619 |     ests <- ExtractEsts(x, q = q, sel = sel)
620 |     if (!is.null(ests$h)) {
621 |       ests$h <- ests$h[c(1,2,nrow(ests$h)), ]
622 |     }
623 |     if (!is.null(ests$ystar)) {
624 |       ests$ystar <- ests$ystar[c(1,2,nrow(ests$ystar)), ]
625 |     }
626 |     summ <- with(ests, rbind(beta, sigsq.eps, r, lambda))
627 |     if (!is.null(ests$h)) {
628 |       summ <- rbind(summ, ests$h)
629 |     }
630 |     if (!is.null(ests$ystar)) {
631 |       summ <- rbind(summ, ests$ystar)
632 |     }
633 |     summ <- data.frame(param = rownames(summ), round(summ, digits))
634 |     rownames(summ) <- NULL
635 |     print(summ)
636 |     if (x$varsel) {
637 |       cat("\nPosterior inclusion probabilities:\n")
638 |       pips <- ExtractPIPs(x)
639 |       pips[, -1] <- round(pips[, -1], digits)
640 |       print(pips)
641 |     }
642 |   }
643 |   return()
644 | }
645 | 


--------------------------------------------------------------------------------