├── my-doc ├── tests │ ├── .Rapp.history │ ├── tests_knots.R │ ├── tests_noX.R │ ├── tests_Znew.R │ └── tests_randint.R ├── scratch_hnew_pred_methods.R ├── scratch.R └── probit_reg.Rmd ├── man ├── .gitignore ├── print.bkmrfit.Rd ├── ExtractSamps.Rd ├── ExtractEsts.Rd ├── summary.bkmrfit.Rd ├── ExtractPIPs.Rd ├── TracePlot.Rd ├── PlotPriorFits.Rd ├── SimData.Rd ├── InvestigatePrior.Rd ├── PredictorResponseBivarLevels.Rd ├── SamplePred.Rd ├── OverallRiskSummaries.Rd ├── ComputePostmeanHnew.Rd ├── PredictorResponseUnivar.Rd ├── SingVarIntSummaries.Rd ├── SingVarRiskSummaries.Rd ├── PredictorResponseBivar.Rd ├── PredictorResponseBivarPair.Rd └── kmbayes.Rd ├── CRAN-SUBMISSION ├── R ├── base.r ├── zzz.R ├── CalcR2.R ├── TracePlot.R ├── argval_controlParams.R ├── argval_ParamInput.R ├── print_verbose.R ├── argval_ListInput.R ├── SamplePred.R ├── CalcPIPs.R ├── SimData.R ├── bkmr_r_parameter_helper_functions.R ├── ExtractEsts.R ├── InvestigatePrior_rm.R ├── ComputePostmeanHnew.R ├── ComputeMixtureSummaries.R ├── PredictorResponseFunctions.R ├── bkmr_parameter_update_functions.R └── bkmr_main_functions.R ├── .Rbuildignore ├── .gitignore ├── data-raw └── VarRealistic.R ├── bkmr.Rproj ├── NAMESPACE ├── DESCRIPTION ├── README.md ├── cran-comments.md └── NEWS.md /my-doc/tests/.Rapp.history: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /man/.gitignore: -------------------------------------------------------------------------------- 1 | .Rapp.history 2 | -------------------------------------------------------------------------------- /CRAN-SUBMISSION: -------------------------------------------------------------------------------- 1 | Version: 0.2.2 2 | Date: 2022-03-22 17:41:18 UTC 3 | SHA: 201280422de50255cb3587caac718d694b4d5c72 4 | -------------------------------------------------------------------------------- /R/base.r: -------------------------------------------------------------------------------- 1 | #' @importFrom magrittr %>% 2 | NULL 3 | 4 | #' @importFrom magrittr %<>% 5 | NULL 6 | 7 | #' @import stats 8 | NULL -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^cran-comments\.md$ 4 | ^my-doc$ 5 | ^data-raw$ 6 | ^.*\.DS_Store 7 | ^revdep$ 8 | ^CRAN-SUBMISSION$ 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | inst/doc 5 | data 6 | my-doc/overview.html 7 | my-doc/fitted_objects.RData 8 | my-doc/probit_reg.RData 9 | my-doc/probit_reg.html 10 | *.DS_Store -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onAttach <- function(libname, pkgname) { 2 | packageStartupMessage("For guided examples, go to 'https://jenfb.github.io/bkmr/overview.html'") 3 | } 4 | 5 | release_questions <- function() { 6 | c( 7 | "Have you updated the vignette and posted to GitHub?" 8 | ) 9 | } -------------------------------------------------------------------------------- /data-raw/VarRealistic.R: -------------------------------------------------------------------------------- 1 | load("H:/Research/Completed Projects/2014 Bayesian kernel machine regression (Biostatistics)/Code/simulation/run_on_cluster_h1_h2_h3_scenarios/data/xrfdat.RData") 2 | 3 | VarRealistic <- round(cov(xrfdat), 2) 4 | dimnames(VarRealistic) <- NULL 5 | 6 | dput(VarRealistic) 7 | 8 | -------------------------------------------------------------------------------- /R/CalcR2.R: -------------------------------------------------------------------------------- 1 | CalcR2 <- function(fit, sel = NULL) { 2 | ests <- ExtractEsts(fit, sel = sel) 3 | y <- fit$y 4 | 5 | ## http://en.wikipedia.org/wiki/Coefficient_of_determination 6 | preds <- with(ests, h[, "mean"] + fit$X %*% beta[, "mean"]) 7 | SStot <- sum((y - mean(y))^2) 8 | SSres <- sum((y - preds)^2) 9 | R2 <- 1 - SSres/SStot 10 | R2 11 | } 12 | -------------------------------------------------------------------------------- /bkmr.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | PackageRoxygenize: rd,collate,namespace,vignette 19 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,bkmrfit) 4 | S3method(summary,bkmrfit) 5 | export(ComputePostmeanHnew) 6 | export(ExtractEsts) 7 | export(ExtractPIPs) 8 | export(ExtractSamps) 9 | export(InvestigatePrior) 10 | export(OverallRiskSummaries) 11 | export(PlotPriorFits) 12 | export(PredictorResponseBivar) 13 | export(PredictorResponseBivarLevels) 14 | export(PredictorResponseBivarPair) 15 | export(PredictorResponseUnivar) 16 | export(SamplePred) 17 | export(SimData) 18 | export(SingVarIntSummaries) 19 | export(SingVarRiskSummaries) 20 | export(TracePlot) 21 | export(kmbayes) 22 | import(graphics) 23 | import(nlme) 24 | import(stats) 25 | import(utils) 26 | importFrom(magrittr,"%<>%") 27 | importFrom(magrittr,"%>%") 28 | importFrom(tmvtnorm,rtmvnorm) 29 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: bkmr 2 | Title: Bayesian Kernel Machine Regression 3 | Version: 0.2.2.9000 4 | Authors@R: c(person("Jennifer F.", "Bobb", 5 | email = "jenniferfederbobb@gmail.com", 6 | role = c("aut", "cre")), 7 | person('Luke', 'Duttweiler', 8 | email = 'lduttweiler@hsph.harvard.edu', 9 | role = c('ctb'), 10 | comment = c(ORCID = "0000-0002-0467-995X"))) 11 | Description: Implementation of a statistical approach 12 | for estimating the joint health effects of multiple 13 | concurrent exposures, as described in Bobb et al (2015) 14 | . 15 | URL: https://github.com/jenfb/bkmr 16 | BugReports: https://github.com/jenfb/bkmr/issues 17 | Depends: 18 | R (>= 3.1.2) 19 | License: GPL-2 20 | Imports: 21 | dplyr, 22 | magrittr, 23 | nlme, 24 | fields, 25 | truncnorm, 26 | tidyr, 27 | MASS, 28 | tmvtnorm, 29 | tibble 30 | RoxygenNote: 7.3.1 31 | Encoding: UTF-8 32 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | The R package `bkmr` implements Bayesian kernel machine regression, a statistical approach for estimating the joint health effects of multiple concurrent exposures. Additional information on the statistical methodology and on the computational details are provided in [Bobb et al. 2015](https://academic.oup.com/biostatistics/article/16/3/493/269719). More recent extensions, details on the software, and worked-through examples are provided in [Bobb et al. 2018](https://ehjournal.biomedcentral.com/articles/10.1186/s12940-018-0413-y). 2 | 3 | You can install the latest released version of `bkmr` from CRAN with: 4 | ```R 5 | install.packages("bkmr") 6 | ``` 7 | Or the latest development version from github with: 8 | ```R 9 | install.packages("devtools") 10 | devtools::install_github("jenfb/bkmr") 11 | ``` 12 | 13 | For a general overview and guided examples, go to https://jenfb.github.io/bkmr/overview.html. 14 | 15 | For examples from the software paper, please see 16 | 17 | * https://jenfb.github.io/bkmr/SimData1 18 | * https://jenfb.github.io/bkmr/ProbitEx -------------------------------------------------------------------------------- /man/print.bkmrfit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bkmr_main_functions.R 3 | \name{print.bkmrfit} 4 | \alias{print.bkmrfit} 5 | \title{Print basic summary of BKMR model fit} 6 | \usage{ 7 | \method{print}{bkmrfit}(x, digits = 5, ...) 8 | } 9 | \arguments{ 10 | \item{x}{an object of class "bkmrfit"} 11 | 12 | \item{digits}{the number of digits to show when printing} 13 | 14 | \item{...}{further arguments passed to or from other methods.} 15 | } 16 | \value{ 17 | No return value, prints basic summary of fit to console 18 | } 19 | \description{ 20 | \code{print} method for class "bkmrfit" 21 | } 22 | \examples{ 23 | ## First generate dataset 24 | set.seed(111) 25 | dat <- SimData(n = 50, M = 4) 26 | y <- dat$y 27 | Z <- dat$Z 28 | X <- dat$X 29 | 30 | ## Fit model with component-wise variable selection 31 | ## Using only 100 iterations to make example run quickly 32 | ## Typically should use a large number of iterations for inference 33 | set.seed(111) 34 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 35 | fitkm 36 | } 37 | -------------------------------------------------------------------------------- /man/ExtractSamps.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ExtractEsts.R 3 | \name{ExtractSamps} 4 | \alias{ExtractSamps} 5 | \title{Extract samples} 6 | \usage{ 7 | ExtractSamps(fit, sel = NULL) 8 | } 9 | \arguments{ 10 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function} 11 | 12 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples} 13 | } 14 | \value{ 15 | a list where each component contains the posterior samples of one of the parameters (or vector of parameters) being estimated 16 | } 17 | \description{ 18 | Extract samples of each parameter from the BKMR fit 19 | } 20 | \examples{ 21 | ## First generate dataset 22 | set.seed(111) 23 | dat <- SimData(n = 50, M = 4) 24 | y <- dat$y 25 | Z <- dat$Z 26 | X <- dat$X 27 | 28 | ## Fit model with component-wise variable selection 29 | ## Using only 100 iterations to make example run quickly 30 | ## Typically should use a large number of iterations for inference 31 | set.seed(111) 32 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 33 | 34 | samps <- ExtractSamps(fitkm) 35 | } 36 | -------------------------------------------------------------------------------- /man/ExtractEsts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ExtractEsts.R 3 | \name{ExtractEsts} 4 | \alias{ExtractEsts} 5 | \title{Extract summary statistics} 6 | \usage{ 7 | ExtractEsts(fit, q = c(0.025, 0.25, 0.5, 0.75, 0.975), sel = NULL) 8 | } 9 | \arguments{ 10 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function} 11 | 12 | \item{q}{vector of quantiles} 13 | 14 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples} 15 | } 16 | \value{ 17 | a list where each component is a data frame containing the summary statistics of the posterior distribution of one of the parameters (or vector of parameters) being estimated 18 | } 19 | \description{ 20 | Obtain summary statistics of each parameter from the BKMR fit 21 | } 22 | \examples{ 23 | ## First generate dataset 24 | set.seed(111) 25 | dat <- SimData(n = 50, M = 4) 26 | y <- dat$y 27 | Z <- dat$Z 28 | X <- dat$X 29 | 30 | ## Fit model with component-wise variable selection 31 | ## Using only 100 iterations to make example run quickly 32 | ## Typically should use a large number of iterations for inference 33 | set.seed(111) 34 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 35 | 36 | ests <- ExtractEsts(fitkm) 37 | names(ests) 38 | ests$beta 39 | } 40 | -------------------------------------------------------------------------------- /man/summary.bkmrfit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bkmr_main_functions.R 3 | \name{summary.bkmrfit} 4 | \alias{summary.bkmrfit} 5 | \title{Summarizing BKMR model fits} 6 | \usage{ 7 | \method{summary}{bkmrfit}( 8 | object, 9 | q = c(0.025, 0.975), 10 | digits = 5, 11 | show_ests = TRUE, 12 | show_MH = TRUE, 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{object}{an object of class "bkmrfit"} 18 | 19 | \item{q}{quantiles of posterior distribution to show} 20 | 21 | \item{digits}{the number of digits to show when printing} 22 | 23 | \item{show_ests}{logical; if \code{TRUE}, prints summary statistics of posterior distribution} 24 | 25 | \item{show_MH}{logical; if \code{TRUE}, prints acceptance rates from the Metropolis-Hastings algorithm} 26 | 27 | \item{...}{further arguments passed to or from other methods.} 28 | } 29 | \value{ 30 | No return value, prints more detailed summary of fit to console 31 | } 32 | \description{ 33 | \code{summary} method for class "bkmrfit" 34 | } 35 | \examples{ 36 | ## First generate dataset 37 | set.seed(111) 38 | dat <- SimData(n = 50, M = 4) 39 | y <- dat$y 40 | Z <- dat$Z 41 | X <- dat$X 42 | 43 | ## Fit model with component-wise variable selection 44 | ## Using only 100 iterations to make example run quickly 45 | ## Typically should use a large number of iterations for inference 46 | set.seed(111) 47 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 48 | summary(fitkm) 49 | } 50 | -------------------------------------------------------------------------------- /man/ExtractPIPs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CalcPIPs.R 3 | \name{ExtractPIPs} 4 | \alias{ExtractPIPs} 5 | \title{Extract posterior inclusion probabilities (PIPs) from BKMR model fit} 6 | \usage{ 7 | ExtractPIPs(fit, sel = NULL, z.names = NULL) 8 | } 9 | \arguments{ 10 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function} 11 | 12 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples} 13 | 14 | \item{z.names}{optional argument providing the names of the variables included in the \code{h} function.} 15 | } 16 | \value{ 17 | a data frame with the variable-specific PIPs for BKMR fit with component-wise variable selection, and with the group-specific and conditional (within-group) PIPs for BKMR fit with hierarchical variable selection. 18 | } 19 | \description{ 20 | Extract posterior inclusion probabilities (PIPs) from Bayesian Kernel Machine Regression (BKMR) model fit 21 | } 22 | \details{ 23 | For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html} 24 | } 25 | \examples{ 26 | ## First generate dataset 27 | set.seed(111) 28 | dat <- SimData(n = 50, M = 4) 29 | y <- dat$y 30 | Z <- dat$Z 31 | X <- dat$X 32 | 33 | ## Fit model with component-wise variable selection 34 | ## Using only 100 iterations to make example run quickly 35 | ## Typically should use a large number of iterations for inference 36 | set.seed(111) 37 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 38 | 39 | ExtractPIPs(fitkm) 40 | } 41 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Resubmission 2 | This is a resubmission. In this version I have: 3 | 4 | * added the paper reference to the description field of the DESCRIPTION file 5 | * added a \value field to the documentation for exported methods 6 | * added small executable examples in Rd-files for exported functions 7 | 8 | ## Test environments 9 | * local OS X install, R-devel 10 | * win-builder (devel) 11 | * Windows Server 2008 R2 SP1, R-release, 32/64 bit 12 | 13 | ## R CMD check results 14 | There were no ERRORs or WARNINGs. 15 | 16 | There was one NOTE: 17 | 18 | * New submission 19 | 20 | Package was archived on CRAN 21 | 22 | Possibly misspelled words in DESCRIPTION: 23 | Bobb (9:41) 24 | al (9:49) 25 | et (9:46) 26 | 27 | * A reference for the methods in the package is included in the 28 | description as requested by CRAN; these words are the author names 29 | included as part of the reference. 30 | 31 | CRAN repository db overrides: 32 | X-CRAN-Comment: Archived on 2022-03-20 as check problems were not 33 | corrected in time. 34 | 35 | 'length > 1 in coercion to logical' error in check of 'bkmrhat'. 36 | 37 | * Check problems have now been corrected. 38 | 39 | Found the following (possibly) invalid DOIs: 40 | DOI: 10.1093/biostatistics/kxu058 41 | From: DESCRIPTION 42 | Status: Forbidden 43 | Message: 403 44 | 45 | * This is the DOI provided on the article website . 46 | 47 | ## Downstream dependencies 48 | I have also run R CMD check on downstream dependencies with no issues. 49 | -------------------------------------------------------------------------------- /man/TracePlot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TracePlot.R 3 | \name{TracePlot} 4 | \alias{TracePlot} 5 | \title{Trace plot} 6 | \usage{ 7 | TracePlot( 8 | fit, 9 | par, 10 | comp = 1, 11 | sel = NULL, 12 | main = "", 13 | xlab = "iteration", 14 | ylab = "parameter value", 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function} 20 | 21 | \item{par}{which parameter to plot} 22 | 23 | \item{comp}{which component of the parameter vector to plot} 24 | 25 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples} 26 | 27 | \item{main}{title} 28 | 29 | \item{xlab}{x axis label} 30 | 31 | \item{ylab}{y axis label} 32 | 33 | \item{...}{other arguments to pass onto the plotting function} 34 | } 35 | \value{ 36 | No return value, generates plot 37 | } 38 | \description{ 39 | Trace plot 40 | } 41 | \details{ 42 | For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html} 43 | } 44 | \examples{ 45 | ## First generate dataset 46 | set.seed(111) 47 | dat <- SimData(n = 50, M = 4) 48 | y <- dat$y 49 | Z <- dat$Z 50 | X <- dat$X 51 | 52 | ## Fit model with component-wise variable selection 53 | ## Using only 100 iterations to make example run quickly 54 | ## Typically should use a large number of iterations for inference 55 | set.seed(111) 56 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 57 | 58 | TracePlot(fit = fitkm, par = "beta") 59 | TracePlot(fit = fitkm, par = "sigsq.eps") 60 | TracePlot(fit = fitkm, par = "r", comp = 1) 61 | } 62 | -------------------------------------------------------------------------------- /R/TracePlot.R: -------------------------------------------------------------------------------- 1 | #' Trace plot 2 | #' 3 | #' @inheritParams ExtractEsts 4 | #' @param par which parameter to plot 5 | #' @param comp which component of the parameter vector to plot 6 | #' @param main title 7 | #' @param xlab x axis label 8 | #' @param ylab y axis label 9 | #' @param ... other arguments to pass onto the plotting function 10 | #' @export 11 | #' @import graphics 12 | #' @details For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html} 13 | #' 14 | #' @return No return value, generates plot 15 | #' 16 | #' @examples 17 | #' ## First generate dataset 18 | #' set.seed(111) 19 | #' dat <- SimData(n = 50, M = 4) 20 | #' y <- dat$y 21 | #' Z <- dat$Z 22 | #' X <- dat$X 23 | #' 24 | #' ## Fit model with component-wise variable selection 25 | #' ## Using only 100 iterations to make example run quickly 26 | #' ## Typically should use a large number of iterations for inference 27 | #' set.seed(111) 28 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 29 | #' 30 | #' TracePlot(fit = fitkm, par = "beta") 31 | #' TracePlot(fit = fitkm, par = "sigsq.eps") 32 | #' TracePlot(fit = fitkm, par = "r", comp = 1) 33 | TracePlot <- function(fit, par, comp = 1, sel = NULL, main = "", xlab = "iteration", ylab = "parameter value", ...) { 34 | samps <- ExtractSamps(fit, sel = sel)[[par]] 35 | if (!is.null(ncol(samps))) { 36 | nm <- colnames(samps)[comp] 37 | samps <- samps[, comp] 38 | } else { 39 | nm <- par 40 | } 41 | main <- paste0(main, "\n(", nm, " = ", format(mean(samps), digits = 2), ")") 42 | plot(samps, type = "l", main = main, xlab = xlab, ylab = ylab, ...) 43 | abline(h = mean(samps), col = "blue", lwd = 2) 44 | } 45 | -------------------------------------------------------------------------------- /man/PlotPriorFits.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/InvestigatePrior_rm.R 3 | \name{PlotPriorFits} 4 | \alias{PlotPriorFits} 5 | \title{Plot of exposure-response function from univariate KMR fit} 6 | \usage{ 7 | PlotPriorFits( 8 | y, 9 | X, 10 | Z, 11 | fits, 12 | which.z = NULL, 13 | which.q = NULL, 14 | plot.resid = TRUE, 15 | ylim = NULL, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{y}{a vector of outcome data of length \code{n}.} 21 | 22 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.} 23 | 24 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.} 25 | 26 | \item{fits}{output from \code{\link{InvestigatePrior}}} 27 | 28 | \item{which.z}{which predictors (columns in \code{Z}) to plot} 29 | 30 | \item{which.q}{which q.values to plot; defaults to all possible} 31 | 32 | \item{plot.resid}{whether to plot the data points} 33 | 34 | \item{ylim}{plotting limits for the y-axis} 35 | 36 | \item{...}{other plotting arguments} 37 | } 38 | \value{ 39 | No return value, generates plot 40 | } 41 | \description{ 42 | Plot the estimated \code{h(z[m])} estimated from frequentist KMR for \code{r[m]} fixed to specific values 43 | } 44 | \examples{ 45 | ## First generate dataset 46 | set.seed(111) 47 | dat <- SimData(n = 50, M = 4) 48 | y <- dat$y 49 | Z <- dat$Z 50 | X <- dat$X 51 | 52 | priorfits <- InvestigatePrior(y = y, Z = Z, X = X, q.seq = c(2, 1/2, 1/4, 1/16)) 53 | PlotPriorFits(y = y, Z = Z, X = X, fits = priorfits) 54 | } 55 | -------------------------------------------------------------------------------- /man/SimData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SimData.R 3 | \name{SimData} 4 | \alias{SimData} 5 | \title{Simulate dataset} 6 | \usage{ 7 | SimData( 8 | n = 100, 9 | M = 5, 10 | sigsq.true = 0.5, 11 | beta.true = 2, 12 | hfun = 3, 13 | Zgen = "norm", 14 | ind = 1:2, 15 | family = "gaussian" 16 | ) 17 | } 18 | \arguments{ 19 | \item{n}{Number of observations} 20 | 21 | \item{M}{Number of predictor variables to generate} 22 | 23 | \item{sigsq.true}{Variance of normally distributed residual error} 24 | 25 | \item{beta.true}{Coefficient on the covariate} 26 | 27 | \item{hfun}{An integer from 1 to 3 identifying which predictor-response function to generate} 28 | 29 | \item{Zgen}{Method for generating the matrix Z of exposure variables, taking one of the values c("unif", "norm", "corr", "realistic")} 30 | 31 | \item{ind}{select which predictor(s) will be included in the \code{h} function; how many predictors that can be included will depend on which \code{h} function is being used.} 32 | 33 | \item{family}{a description of the error distribution and link function to be used in the model. Currently implemented for \code{gaussian} and \code{binomial} families.} 34 | } 35 | \value{ 36 | a list containing the parameter values and generated variables of the simulated datasets 37 | } 38 | \description{ 39 | Simulate predictor, covariate, and continuous outcome data 40 | } 41 | \details{ 42 | \itemize{ 43 | \item \code{hfun = 1}: A nonlinear function of the first predictor 44 | \item \code{hfun = 2}: A linear function of the first two predictors and their product term 45 | \item \code{hfun = 3}: A nonlinear and nonadditive function of the first two predictor variables 46 | } 47 | } 48 | \examples{ 49 | set.seed(5) 50 | dat <- SimData() 51 | } 52 | -------------------------------------------------------------------------------- /my-doc/scratch_hnew_pred_methods.R: -------------------------------------------------------------------------------- 1 | set.seed(111) 2 | dat <- SimData(n = 500, M = 4) 3 | y <- dat$y 4 | Z <- dat$Z 5 | X <- dat$X 6 | 7 | Xnew <- matrix(0, nrow(Z), ncol(X)) 8 | 9 | set.seed(111) 10 | fit0 <- kmbayes(y = y, Z = Z, X = X, iter = 2000, verbose = FALSE, varsel = TRUE) 11 | 12 | ests_full <- ExtractEsts(fit0)$h[, c("mean", "sd")] 13 | 14 | ## add Vcomps 15 | sel <- with(fit0, seq(floor(iter/2) + 1, iter, 10)) 16 | sel <- unique(floor(sel)) 17 | s0 <- system.time( 18 | Vinv <- with(fit0, lapply(sel, function(s) makeVcomps(r = r[s, ], lambda = lambda[s, ], Z, data.comps)$Vinv)) 19 | ) 20 | attr(Vinv, "sel") <- sel 21 | fit0V <- fit0 22 | fit0V$Vinv <- Vinv 23 | print(object.size(fit0), units = "Mb") 24 | print(object.size(fit0V), units = "Mb") 25 | 26 | s1 <- system.time( 27 | samps <- SamplePred(fit0, Xnew = Xnew) 28 | ) 29 | 30 | ests_samp <- samps %>% 31 | {cbind(mean = colMeans(.), sd = apply(., 2, sd))} 32 | 33 | s2 <- system.time( 34 | ests_approx <- ComputePostmeanHnew(fit0) %$% 35 | cbind(mean = postmean, sd = sqrt(diag(postvar))) 36 | ) 37 | 38 | s3 <- system.time( 39 | ests_approx2 <- ComputePostmeanHnew2(fit0) %$% 40 | cbind(mean = postmean, sd = sqrt(diag(postvar))) 41 | ) 42 | 43 | s4 <- system.time( 44 | ests_approx2 <- ComputePostmeanHnew2(fit0V) %$% 45 | cbind(mean = postmean, sd = sqrt(diag(postvar))) 46 | ) 47 | 48 | s1 49 | s2 50 | s3 51 | s4 52 | 53 | head(ests_full) 54 | head(ests_samp) 55 | head(ests_approx) 56 | head(ests_approx2) 57 | 58 | par(mfrow = c(1, 2)) 59 | plot(ests_samp[, "mean"], ests_approx2[, "mean"]) 60 | abline(0, 1, col = "red") 61 | plot(ests_samp[, "sd"], ests_approx2[, "sd"]) 62 | abline(0, 1, col = "red") 63 | 64 | par(mfrow = c(1, 2)) 65 | plot(ests_samp[, "mean"], ests_approx[, "mean"]) 66 | abline(0, 1, col = "red") 67 | plot(ests_samp[, "sd"], ests_approx[, "sd"]) 68 | abline(0, 1, col = "red") 69 | 70 | par(mfrow = c(1, 2)) 71 | plot(ests_approx[, "mean"], ests_approx2[, "mean"]) 72 | abline(0, 1, col = "red") 73 | plot(ests_approx[, "sd"], ests_approx2[, "sd"]) 74 | abline(0, 1, col = "red") 75 | 76 | -------------------------------------------------------------------------------- /my-doc/tests/tests_knots.R: -------------------------------------------------------------------------------- 1 | devtools::load_all() 2 | library(ggplot2) 3 | 4 | family <- "gaussian" 5 | #family <- "binomial" 6 | 7 | ## example using a matrix of knots #### 8 | 9 | n <- 500 10 | M <- 5 11 | sigsq.true <- ifelse(family == "gaussian", 0.05, 1) 12 | beta.true <- 0.5 13 | Z <- matrix(rnorm(n * M), n, M) 14 | X <- cbind(3*cos(Z[, 1]) + 2*rnorm(n)) 15 | eps <- rnorm(n, sd = sqrt(sigsq.true)) 16 | h <- apply(Z, 1, function(z, ind = 1) 4*plogis(z[ind[1]], 0, 0.3)) 17 | eps <- rnorm(n, sd = sqrt(sigsq.true)) 18 | y <- drop(X*beta.true + h + eps) 19 | if (family == "binomial") { 20 | ystar <- y 21 | y <- ifelse(ystar > 0, 1, 0) 22 | } 23 | 24 | des <- fields::cover.design(Z, nd = 100) 25 | knots <- des$design 26 | 27 | ## compare timing to not using knots 28 | set.seed(111) 29 | fit_full <- kmbayes(y = y, Z = Z, X = X, iter = 300, varsel = TRUE, family = family, verbose = FALSE) 30 | with(fit_full, difftime(time2, time1)) 31 | fit_knots <- kmbayes(y = y, Z = Z, X = X, iter = 300, varsel = TRUE, family = family, verbose = FALSE, knots = knots) 32 | with(fit_knots, difftime(time2, time1)) 33 | 34 | pred.resp.univar <- PredictorResponseUnivar(fit = fit_full) 35 | s1 <- system.time(pred.resp.univar <- pred.resp.univar.full <- PredictorResponseUnivar(fit = fit_full, method = "exact")) 36 | s1 37 | ggplot(pred.resp.univar, aes(z, est, ymin = est - 1.96*se, ymax = est + 1.96*se)) + 38 | geom_smooth(stat = "identity") + 39 | facet_wrap(~ variable) + 40 | ylab("h(z)") 41 | 42 | pred.resp.univar <- PredictorResponseUnivar(fit = fit_knots) 43 | s2 <- system.time(pred.resp.univar <- pred.resp.univar.knots <- PredictorResponseUnivar(fit = fit_knots, method = "exact")) 44 | s2 45 | ggplot(pred.resp.univar, aes(z, est, ymin = est - 1.96*se, ymax = est + 1.96*se)) + 46 | geom_smooth(stat = "identity") + 47 | facet_wrap(~ variable) + 48 | ylab("h(z)") 49 | 50 | 51 | ## with with a larger number of iterations 52 | fit0 <- kmbayes(y = y, Z = Z, X = X, iter = 5000, varsel = TRUE, family = family, knots = knots, control.params = list(verbose_show_ests = TRUE)) 53 | 54 | summary(fit0) 55 | 56 | TracePlot(fit = fit0, par = "beta") 57 | ExtractPIPs(fit0) -------------------------------------------------------------------------------- /R/argval_controlParams.R: -------------------------------------------------------------------------------- 1 | # Validate control params list 2 | ##components of list 3 | ##lambda.jump / default=10 4 | ##mu.lambda, sigma.lambda / default=10 5 | ##a.p0, b.p0 / default=1 6 | ##r.prior / default = "gamma", alt=invunif, unif 7 | ##a.sigsq, b.sigsq / default=0.001 8 | ##mu.r, sigma.r / default=5 9 | ##r.muprop / default=1 10 | ##r.jump / default=0.2 11 | ##r.jump1, r.jump2 / default=2, 0.2 12 | ##r.a, r.b / default=0, 100 13 | 14 | # 15 | validateControlParams <- function(varsel, family, id, control.params) { 16 | message ("Validating control.params...") 17 | ##print(control.params) 18 | if (family == "gaussian"){ 19 | stopifnot(control.params$a.sigsq > 0, control.params$b.sigsq > 0) 20 | } 21 | if (varsel == TRUE) { 22 | stopifnot(control.params$a.p0 > 0, control.params$b.p0 > 0, control.params$r.jump1 > 0, control.params$r.jump2 > 0, control.params$r.muprop > 0) 23 | } 24 | else { 25 | stopifnot(control.params$r.jump > 0) 26 | } ##end varsel-specific stuff 27 | ##if id, need two elements in mu.lambda, sigma.lambda and lambda.jump 28 | if (!is.null(id)) { 29 | stopifnot(length(control.params$mu.lambda) == 2, length(control.params$sigma.lambda) == 2, length(control.params$lambda.jump) == 2) 30 | } 31 | ##regardless of id, validate each element of these params 32 | for (i in 1:length(control.params$mu.lambda)) { 33 | stopifnot(control.params$mu.lambda > 0) 34 | } 35 | for (i in 1:length(control.params$sigma.lambda)) { 36 | stopifnot(control.params$sigma.lambda > 0) 37 | } 38 | for (i in 1:length(control.params$lambda.jump)) { 39 | stopifnot(control.params$lambda.jump > 0) 40 | } 41 | rprior=control.params$r.prior 42 | stopifnot(rprior == "gamma" | rprior == "unif" | rprior == "invunif") 43 | ##stopifnot(length(intersect (control.params$r.prior, c("gamma","unif","invunif")))>0) 44 | if (control.params$r.prior == "gamma") { 45 | stopifnot(control.params$mu.r > 0, control.params$sigma.r > 0) 46 | } 47 | else { 48 | stopifnot(control.params$r.a >= 0, control.params$r.b > control.params$r.a) 49 | } 50 | } -------------------------------------------------------------------------------- /my-doc/tests/tests_noX.R: -------------------------------------------------------------------------------- 1 | devtools::load_all() 2 | library(ggplot2) 3 | 4 | family <- "gaussian" 5 | family <- "binomial" 6 | 7 | ## example where there is no X matrix #### 8 | 9 | n <- 100 10 | M <- 5 11 | sigsq.true <- ifelse(family == "gaussian", 0.05, 1) 12 | Z <- matrix(rnorm(n * M), n, M) 13 | X <- cbind(3*cos(Z[, 1]) + 2*rnorm(n)) 14 | eps <- rnorm(n, sd = sqrt(sigsq.true)) 15 | h <- apply(Z, 1, function(z, ind = 1) 4*plogis(z[ind[1]], 0, 0.3)) 16 | eps <- rnorm(n) 17 | y <- drop(h + eps) 18 | if (family == "binomial") { 19 | ystar <- y 20 | y <- ifelse(ystar > 0, 1, 0) 21 | } 22 | 23 | set.seed(111) 24 | fit0 <- kmbayes(y = y, Z = Z, iter = 5000, varsel = TRUE, family = family) 25 | 26 | fit0 27 | 28 | summary(fit0) 29 | 30 | TracePlot(fit = fit0, par = "beta") 31 | ExtractPIPs(fit0) 32 | 33 | pred.resp.univar <- PredictorResponseUnivar(fit = fit0) 34 | ggplot(pred.resp.univar, aes(z, est, ymin = est - 1.96*se, ymax = est + 1.96*se)) + 35 | geom_smooth(stat = "identity") + 36 | facet_wrap(~ variable) + 37 | ylab("h(z)") 38 | 39 | pred.resp.bivar <- PredictorResponseBivar(fit = fit0, 40 | min.plot.dist = 1) 41 | 42 | pred.resp.bivar.levels <- PredictorResponseBivarLevels(pred.resp.df = pred.resp.bivar, 43 | Z = Z, qs = c(0.25, 0.5, 0.75)) 44 | 45 | ggplot(pred.resp.bivar.levels, aes(z1, est)) + 46 | geom_smooth(aes(col = quantile), stat = "identity") + 47 | facet_grid(variable2 ~ variable1) + 48 | ggtitle("h(expos1 | quantiles of expos2)") + 49 | xlab("expos1") 50 | 51 | risks.overall.approx <- OverallRiskSummaries(fit = fit0, 52 | qs = seq(0.25, 0.75, by = 0.05), q.fixed = 0.5) 53 | risks.overall.approx 54 | 55 | risks.overall.exact <- OverallRiskSummaries(fit = fit0, 56 | qs = seq(0.25, 0.75, by = 0.05), q.fixed = 0.5, method = "exact") 57 | risks.overall.exact 58 | 59 | risks.singvar <- SingVarRiskSummaries(fit = fit0, 60 | qs.diff = c(0.25, 0.75), q.fixed = c(0.25, 0.50, 0.75)) 61 | risks.singvar 62 | 63 | risks.int <- SingVarIntSummaries(fit = fit0, 64 | qs.diff = c(0.25, 0.75), qs.fixed = c(0.25, 0.75)) 65 | risks.int 66 | -------------------------------------------------------------------------------- /man/InvestigatePrior.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/InvestigatePrior_rm.R 3 | \name{InvestigatePrior} 4 | \alias{InvestigatePrior} 5 | \title{Investigate prior} 6 | \usage{ 7 | InvestigatePrior( 8 | y, 9 | Z, 10 | X, 11 | ngrid = 50, 12 | q.seq = c(2, 1, 1/2, 1/4, 1/8, 1/16), 13 | r.seq = NULL, 14 | Drange = NULL, 15 | verbose = FALSE 16 | ) 17 | } 18 | \arguments{ 19 | \item{y}{a vector of outcome data of length \code{n}.} 20 | 21 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.} 22 | 23 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.} 24 | 25 | \item{ngrid}{Number of grid points over which to plot the exposure-response function} 26 | 27 | \item{q.seq}{Sequence of values corresponding to different degrees of smoothness in the estimated exposure-response function. A value of q corresponds to fractions of the range of the data over which there is a decay in the correlation \code{cor(h[i],h[j])} between two subjects by 50\code{\%}.} 28 | 29 | \item{r.seq}{sequence of values at which to fix \code{r} for estimating the exposure-response function} 30 | 31 | \item{Drange}{the range of the \code{z_m} data over which to apply the values of \code{q.seq}. If not specified, will be calculated as the maximum of the ranges of \code{z_1} through \code{z_M}.} 32 | 33 | \item{verbose}{TRUE or FALSE: flag indicating whether to print to the screen which exposure variable and q value has been completed} 34 | } 35 | \value{ 36 | a list containing the predicted values, residuals, and estimated predictor-response function for each degree of smoothness being considered 37 | } 38 | \description{ 39 | Investigate the impact of the \code{r[m]} parameters on the smoothness of the exposure-response function \code{h(z[m])}. 40 | } 41 | \details{ 42 | For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html} 43 | } 44 | \examples{ 45 | ## First generate dataset 46 | set.seed(111) 47 | dat <- SimData(n = 50, M = 4) 48 | y <- dat$y 49 | Z <- dat$Z 50 | X <- dat$X 51 | 52 | priorfits <- InvestigatePrior(y = y, Z = Z, X = X, q.seq = c(2, 1/2, 1/4, 1/16)) 53 | PlotPriorFits(y = y, Z = Z, X = X, fits = priorfits) 54 | } 55 | -------------------------------------------------------------------------------- /my-doc/tests/tests_Znew.R: -------------------------------------------------------------------------------- 1 | devtools::load_all() 2 | library(magrittr) 3 | 4 | ## Gaussian outcome 5 | 6 | set.seed(111) 7 | dat <- SimData(n = 50, M = 4) 8 | y <- dat$y 9 | Z <- dat$Z 10 | X <- dat$X 11 | 12 | Znew <- rbind(c(0,0,0,0), c(1,1,1,1)) 13 | 14 | set.seed(111) 15 | fit0 <- kmbayes(y = y, Z = Z, X = X, iter = 10000, verbose = FALSE, varsel = TRUE, Znew = Znew) 16 | 17 | ests_samp <- ExtractEsts(fit0)$hnew 18 | ests_samp2 <- SamplePred(fit0, Znew = Znew, Xnew = matrix(0, nrow(Znew), ncol(X))) %>% 19 | {cbind(colMeans(.), apply(., 2, sd))} 20 | system.time( 21 | ests_approx <- ComputePostmeanHnew(fit0, Znew = Znew) %$% 22 | cbind(mean = postmean, sd = sqrt(diag(postvar))) 23 | ) 24 | system.time( 25 | ests_approx2 <- ComputePostmeanHnew2(fit0, Znew = Znew) %$% 26 | cbind(mean = postmean, sd = sqrt(diag(postvar))) 27 | ) 28 | 29 | ests_samp 30 | ests_samp2 31 | ests_approx 32 | ests_approx2 33 | 34 | fit0 35 | 36 | summary(fit0) 37 | 38 | ## Binomial outcome 39 | 40 | set.seed(123) 41 | n <- 200 ## number of observations 42 | M <- 4 ## number of exposure variables 43 | beta.true <- 0.1 44 | Z <- matrix(runif(n * M, -1, 1), n, M) 45 | x <- 3*cos(Z[, 1]) + 2*rnorm(n) 46 | hfun <- function(z) (2*z + 0.5) ^ 2 47 | h <- hfun(Z[, 1]) ## only depends on z1 48 | ## generate using latent normal representation 49 | eps <- rnorm(n) 50 | ystar <- x * beta.true + h + eps 51 | y <- ifelse(ystar > 0, 1, 0) 52 | datp <- list(n = n, M = M, beta.true = beta.true, Z = Z, h = h, X = cbind(x), y = y, eps = eps, ystar = ystar) 53 | rm(n, M, beta.true, Z, x, h, eps, y, ystar) 54 | 55 | Znew <- rbind(c(0,0,0,0), c(1,1,1,1)) 56 | 57 | set.seed(123) 58 | fit0 <- kmbayes(y = datp$y, Z = datp$Z, X = datp$X, iter = 5000, verbose = TRUE, varsel = TRUE, Znew = Znew, family = "binomial", control.params = list(r.jump2 = 0.5)) 59 | 60 | ests_samp <- ExtractEsts(fit0)$hnew 61 | system.time( 62 | ests_samp2 <- SamplePred(fit0, Znew = Znew, Xnew = matrix(0, nrow(Znew), ncol(datp$X))) %>% 63 | {cbind(colMeans(.), apply(., 2, sd))} 64 | ) 65 | system.time( 66 | ests_approx <- ComputePostmeanHnew(fit0, Znew = Znew) %$% 67 | cbind(mean = postmean, sd = sqrt(diag(postvar))) 68 | ) 69 | system.time( 70 | ests_approx2 <- ComputePostmeanHnew2(fit0, Znew = Znew) %$% 71 | cbind(mean = postmean, sd = sqrt(diag(postvar))) 72 | ) 73 | 74 | ests_samp 75 | ests_samp2 76 | ests_approx 77 | ests_approx2 78 | 79 | fit0 80 | 81 | summary(fit0) 82 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # bkmr 0.2.2.9000 2 | 3 | ## Bug fixes 4 | 5 | * Updated MCMC step for parameter lambda to improve MCMC convergence for probit model 6 | 7 | ## Minor changes 8 | 9 | * Modifications to speed up running time 10 | 11 | * Optimized the `makeKpart` function 12 | 13 | * Print diagnostics options moved outside of main loops 14 | 15 | # bkmr 0.2.2 16 | 17 | ## Bug fixes 18 | 19 | * Corrected code that produced warning `length > 1 in coercion to logical` 20 | 21 | * Update functions that use deprecated functions from `dplyr` package 22 | 23 | ## Minor changes 24 | 25 | * No longer export the following functions: 26 | 27 | * `CalcGroupPIPs`, `CalcWithinGroupPIPs`, and `CalcPIPs` as these should typically be calculated using the function `ExtractPIPs` 28 | 29 | * `ComputePostmeanHnew.approx` and `ComputePostmeanHnew.exact` as these should typically be calculated using the function `ComputePostmeanHnew` 30 | 31 | * `set_verbose_opts` as this is only called internally 32 | 33 | * Expanded function documentation by adding example code 34 | 35 | # bkmr 0.2.1 36 | 37 | ## Bug fixes 38 | 39 | * allowable values for starting parameter for `r[m]` parameters updated as follows 40 | 41 | * no longer truncated to a single value (when `varsel = FALSE` and `rmethod = "varying"`) 42 | 43 | * can be equal to 0 (when `varsel = TRUE`) 44 | 45 | * Error no longer generated if starting values for h.hat are not positive 46 | 47 | * When checking class of an object, use `inherits()` instead of `class()` 48 | 49 | # bkmr 0.2.0 50 | 51 | ## Major changes 52 | 53 | * Added ability to have binomial outcome `family` by implementing probit regression within `kmbayes()` 54 | 55 | * Removed computation of the subject-specific effects `h[i]` within `kmbayes()`, as this is not always desired, and greatly slows down model fitting 56 | 57 | * This could still be done by setting the option `est.h = TRUE` in the `kmbayes` function 58 | 59 | * posterior samples of `h[i]` can now be obtained via the post-processing `SamplePred` function; alternatively, posterior summaries (mean, variance) can be obtained via the post-processing `ComputePostmeanHnew` function 60 | 61 | * Added ability to use exact estimates of the posterior mean and variance by specifying the argument `method = 'exact'` within the post-processing functions (e.g., `OverallRiskSummaries()`, `PredictorResponseUnivar()`) 62 | 63 | ## Bug fixes 64 | 65 | * Fixed `PredictorResponseBivarLevels()` when argument `both_pairs = TRUE` (#4) 66 | -------------------------------------------------------------------------------- /man/PredictorResponseBivarLevels.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PredictorResponseFunctions.R 3 | \name{PredictorResponseBivarLevels} 4 | \alias{PredictorResponseBivarLevels} 5 | \title{Plot cross-sections of the bivariate predictor-response function} 6 | \usage{ 7 | PredictorResponseBivarLevels( 8 | pred.resp.df, 9 | Z = NULL, 10 | qs = c(0.25, 0.5, 0.75), 11 | both_pairs = TRUE, 12 | z.names = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{pred.resp.df}{object obtained from running the function \code{\link{PredictorResponseBivar}}} 17 | 18 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.} 19 | 20 | \item{qs}{vector of quantiles at which to fix the second variable} 21 | 22 | \item{both_pairs}{flag indicating whether, if \code{h(z1)} is being plotted for z2 fixed at different levels, that they should be plotted in the reverse order as well (for \code{h(z2)} at different levels of z1)} 23 | 24 | \item{z.names}{optional vector of names for the columns of \code{z}} 25 | } 26 | \value{ 27 | a long data frame with the name of the first predictor, the name of the second predictor, the value of the first predictor, the quantile at which the second predictor is fixed, the posterior mean estimate, and the posterior standard deviation of the estimated exposure response function 28 | } 29 | \description{ 30 | Function to plot the \code{h} function of a particular variable at different levels (quantiles) of a second variable 31 | } 32 | \details{ 33 | For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html} 34 | } 35 | \examples{ 36 | ## First generate dataset 37 | set.seed(111) 38 | dat <- SimData(n = 50, M = 4) 39 | y <- dat$y 40 | Z <- dat$Z 41 | X <- dat$X 42 | 43 | ## Fit model with component-wise variable selection 44 | ## Using only 100 iterations to make example run quickly 45 | ## Typically should use a large number of iterations for inference 46 | set.seed(111) 47 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 48 | 49 | ## Obtain predicted value on new grid of points for each pair of predictors 50 | ## Using only a 10-by-10 point grid to make example run quickly 51 | pred.resp.bivar <- PredictorResponseBivar(fit = fitkm, min.plot.dist = 1, ngrid = 10) 52 | pred.resp.bivar.levels <- PredictorResponseBivarLevels(pred.resp.df = pred.resp.bivar, 53 | Z = Z, qs = c(0.1, 0.5, 0.9)) 54 | } 55 | -------------------------------------------------------------------------------- /man/SamplePred.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SamplePred.R 3 | \name{SamplePred} 4 | \alias{SamplePred} 5 | \title{Obtain posterior samples of predictions at new points} 6 | \usage{ 7 | SamplePred( 8 | fit, 9 | Znew = NULL, 10 | Xnew = NULL, 11 | Z = NULL, 12 | X = NULL, 13 | y = NULL, 14 | sel = NULL, 15 | type = c("link", "response"), 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function} 21 | 22 | \item{Znew}{optional matrix of new predictor values at which to predict new \code{h}, where each row represents a new observation. If not specified, defaults to using observed Z values} 23 | 24 | \item{Xnew}{optional matrix of new covariate values at which to obtain predictions. If not specified, defaults to using observed X values} 25 | 26 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.} 27 | 28 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.} 29 | 30 | \item{y}{a vector of outcome data of length \code{n}.} 31 | 32 | \item{sel}{A vector selecting which iterations of the BKMR fit should be retained for inference. If not specified, will default to keeping every 10 iterations after dropping the first 50\% of samples, or if this results in fewer than 100 iterations, than 100 iterations are kept} 33 | 34 | \item{type}{whether to make predictions on the scale of the link or of the response; only relevant for the binomial outcome family} 35 | 36 | \item{...}{other arguments; not currently used} 37 | } 38 | \value{ 39 | a matrix with the posterior samples at the new points 40 | } 41 | \description{ 42 | Obtains posterior samples of \code{E(Y) = h(Znew) + beta*Xnew} or of \code{g^{-1}[E(y)]} 43 | } 44 | \details{ 45 | For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html} 46 | } 47 | \examples{ 48 | set.seed(111) 49 | dat <- SimData(n = 50, M = 4) 50 | y <- dat$y 51 | Z <- dat$Z 52 | X <- dat$X 53 | 54 | ## Fit model with component-wise variable selection 55 | ## Using only 100 iterations to make example run quickly 56 | ## Typically should use a large number of iterations for inference 57 | set.seed(111) 58 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 59 | 60 | med_vals <- apply(Z, 2, median) 61 | Znew <- matrix(med_vals, nrow = 1) 62 | h_true <- dat$HFun(Znew) 63 | set.seed(111) 64 | samps3 <- SamplePred(fitkm, Znew = Znew, Xnew = cbind(0)) 65 | head(samps3) 66 | } 67 | -------------------------------------------------------------------------------- /man/OverallRiskSummaries.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ComputeMixtureSummaries.R 3 | \name{OverallRiskSummaries} 4 | \alias{OverallRiskSummaries} 5 | \title{Calculate overall risk summaries} 6 | \usage{ 7 | OverallRiskSummaries( 8 | fit, 9 | y = NULL, 10 | Z = NULL, 11 | X = NULL, 12 | qs = seq(0.25, 0.75, by = 0.05), 13 | q.fixed = 0.5, 14 | method = "approx", 15 | sel = NULL 16 | ) 17 | } 18 | \arguments{ 19 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function} 20 | 21 | \item{y}{a vector of outcome data of length \code{n}.} 22 | 23 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.} 24 | 25 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.} 26 | 27 | \item{qs}{vector of quantiles at which to calculate the overall risk summary} 28 | 29 | \item{q.fixed}{a second quantile at which to compare the estimated \code{h} function} 30 | 31 | \item{method}{method for obtaining posterior summaries at a vector of new points. Options are "approx" and "exact"; defaults to "approx", which is faster particularly for large datasets; see details} 32 | 33 | \item{sel}{selects which iterations of the MCMC sampler to use for inference; see details} 34 | } 35 | \value{ 36 | a data frame containing the (posterior mean) estimate and posterior standard deviation of the overall risk measures 37 | } 38 | \description{ 39 | Compare estimated \code{h} function when all predictors are at a particular quantile to when all are at a second fixed quantile 40 | } 41 | \details{ 42 | \itemize{ 43 | \item If \code{method == "approx"}, the argument \code{sel} defaults to the second half of the MCMC iterations. 44 | \item If \code{method == "exact"}, the argument \code{sel} defaults to keeping every 10 iterations after dropping the first 50\% of samples, or if this results in fewer than 100 iterations, than 100 iterations are kept 45 | } 46 | For guided examples and additional information, go to \url{https://jenfb.github.io/bkmr/overview.html} 47 | } 48 | \examples{ 49 | ## First generate dataset 50 | set.seed(111) 51 | dat <- SimData(n = 50, M = 4) 52 | y <- dat$y 53 | Z <- dat$Z 54 | X <- dat$X 55 | 56 | ## Fit model with component-wise variable selection 57 | ## Using only 100 iterations to make example run quickly 58 | ## Typically should use a large number of iterations for inference 59 | set.seed(111) 60 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 61 | 62 | risks.overall <- OverallRiskSummaries(fit = fitkm, qs = seq(0.25, 0.75, by = 0.05), 63 | q.fixed = 0.5, method = "exact") 64 | } 65 | -------------------------------------------------------------------------------- /man/ComputePostmeanHnew.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ComputePostmeanHnew.R 3 | \name{ComputePostmeanHnew} 4 | \alias{ComputePostmeanHnew} 5 | \title{Compute the posterior mean and variance of \code{h} at a new predictor values} 6 | \usage{ 7 | ComputePostmeanHnew( 8 | fit, 9 | y = NULL, 10 | Z = NULL, 11 | X = NULL, 12 | Znew = NULL, 13 | sel = NULL, 14 | method = "approx" 15 | ) 16 | } 17 | \arguments{ 18 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function} 19 | 20 | \item{y}{a vector of outcome data of length \code{n}.} 21 | 22 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.} 23 | 24 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.} 25 | 26 | \item{Znew}{matrix of new predictor values at which to predict new \code{h}, where each row represents a new observation. If set to NULL then will default to using the observed exposures Z.} 27 | 28 | \item{sel}{selects which iterations of the MCMC sampler to use for inference; see details} 29 | 30 | \item{method}{method for obtaining posterior summaries at a vector of new points. Options are "approx" and "exact"; defaults to "approx", which is faster particularly for large datasets; see details} 31 | } 32 | \value{ 33 | a list of length two containing the posterior mean vector and posterior variance matrix 34 | } 35 | \description{ 36 | Compute the posterior mean and variance of \code{h} at a new predictor values 37 | } 38 | \details{ 39 | \itemize{ 40 | \item If \code{method == "approx"}, the argument \code{sel} defaults to the second half of the MCMC iterations. 41 | \item If \code{method == "exact"}, the argument \code{sel} defaults to keeping every 10 iterations after dropping the first 50\% of samples, or if this results in fewer than 100 iterations, than 100 iterations are kept 42 | } 43 | For guided examples and additional information, go to \url{https://jenfb.github.io/bkmr/overview.html} 44 | } 45 | \examples{ 46 | set.seed(111) 47 | dat <- SimData(n = 50, M = 4) 48 | y <- dat$y 49 | Z <- dat$Z 50 | X <- dat$X 51 | 52 | ## Fit model with component-wise variable selection 53 | ## Using only 100 iterations to make example run quickly 54 | ## Typically should use a large number of iterations for inference 55 | set.seed(111) 56 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 57 | 58 | med_vals <- apply(Z, 2, median) 59 | Znew <- matrix(med_vals, nrow = 1) 60 | h_true <- dat$HFun(Znew) 61 | h_est1 <- ComputePostmeanHnew(fitkm, Znew = Znew, method = "approx") 62 | h_est2 <- ComputePostmeanHnew(fitkm, Znew = Znew, method = "exact") 63 | } 64 | -------------------------------------------------------------------------------- /man/PredictorResponseUnivar.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PredictorResponseFunctions.R 3 | \name{PredictorResponseUnivar} 4 | \alias{PredictorResponseUnivar} 5 | \title{Plot univariate predictor-response function on a new grid of points} 6 | \usage{ 7 | PredictorResponseUnivar( 8 | fit, 9 | y = NULL, 10 | Z = NULL, 11 | X = NULL, 12 | which.z = 1:ncol(Z), 13 | method = "approx", 14 | ngrid = 50, 15 | q.fixed = 0.5, 16 | sel = NULL, 17 | min.plot.dist = Inf, 18 | center = TRUE, 19 | z.names = colnames(Z), 20 | ... 21 | ) 22 | } 23 | \arguments{ 24 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function} 25 | 26 | \item{y}{a vector of outcome data of length \code{n}.} 27 | 28 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.} 29 | 30 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.} 31 | 32 | \item{which.z}{vector identifying which predictors (columns of \code{Z}) should be plotted} 33 | 34 | \item{method}{method for obtaining posterior summaries at a vector of new points. Options are "approx" and "exact"; defaults to "approx", which is faster particularly for large datasets; see details} 35 | 36 | \item{ngrid}{number of grid points to cover the range of each predictor (column in \code{Z})} 37 | 38 | \item{q.fixed}{vector of quantiles at which to fix the remaining predictors in \code{Z}} 39 | 40 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples} 41 | 42 | \item{min.plot.dist}{specifies a minimum distance that a new grid point needs to be from an observed data point in order to compute the prediction; points further than this will not be computed} 43 | 44 | \item{center}{flag for whether to scale the exposure-response function to have mean zero} 45 | 46 | \item{z.names}{optional vector of names for the columns of \code{z}} 47 | 48 | \item{...}{other arguments to pass on to the prediction function} 49 | } 50 | \value{ 51 | a long data frame with the predictor name, predictor value, posterior mean estimate, and posterior standard deviation 52 | } 53 | \description{ 54 | Plot univariate predictor-response function on a new grid of points 55 | } 56 | \details{ 57 | For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html} 58 | } 59 | \examples{ 60 | ## First generate dataset 61 | set.seed(111) 62 | dat <- SimData(n = 50, M = 4) 63 | y <- dat$y 64 | Z <- dat$Z 65 | X <- dat$X 66 | 67 | ## Fit model with component-wise variable selection 68 | ## Using only 100 iterations to make example run quickly 69 | ## Typically should use a large number of iterations for inference 70 | set.seed(111) 71 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 72 | pred.resp.univar <- PredictorResponseUnivar(fit = fitkm) 73 | } 74 | -------------------------------------------------------------------------------- /man/SingVarIntSummaries.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ComputeMixtureSummaries.R 3 | \name{SingVarIntSummaries} 4 | \alias{SingVarIntSummaries} 5 | \title{Single Variable Interaction Summaries} 6 | \usage{ 7 | SingVarIntSummaries( 8 | fit, 9 | y = NULL, 10 | Z = NULL, 11 | X = NULL, 12 | which.z = 1:ncol(Z), 13 | qs.diff = c(0.25, 0.75), 14 | qs.fixed = c(0.25, 0.75), 15 | method = "approx", 16 | sel = NULL, 17 | z.names = colnames(Z), 18 | ... 19 | ) 20 | } 21 | \arguments{ 22 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function} 23 | 24 | \item{y}{a vector of outcome data of length \code{n}.} 25 | 26 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.} 27 | 28 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.} 29 | 30 | \item{which.z}{vector indicating which variables (columns of \code{Z}) for which the summary should be computed} 31 | 32 | \item{qs.diff}{vector indicating the two quantiles at which to compute the single-predictor risk summary} 33 | 34 | \item{qs.fixed}{vector indicating the two quantiles at which to fix all of the remaining exposures in \code{Z}} 35 | 36 | \item{method}{method for obtaining posterior summaries at a vector of new points. Options are "approx" and "exact"; defaults to "approx", which is faster particularly for large datasets; see details} 37 | 38 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples} 39 | 40 | \item{z.names}{optional vector of names for the columns of \code{z}} 41 | 42 | \item{...}{other arguments to pass on to the prediction function} 43 | } 44 | \value{ 45 | a data frame containing the (posterior mean) estimate and posterior standard deviation of the single-predictor risk measures 46 | } 47 | \description{ 48 | Compare the single-predictor health risks when all of the other predictors in Z are fixed to their a specific quantile to when all of the other predictors in Z are fixed to their a second specific quantile. 49 | } 50 | \details{ 51 | \itemize{ 52 | \item If \code{method == "approx"}, the argument \code{sel} defaults to the second half of the MCMC iterations. 53 | \item If \code{method == "exact"}, the argument \code{sel} defaults to keeping every 10 iterations after dropping the first 50\% of samples, or if this results in fewer than 100 iterations, than 100 iterations are kept 54 | } 55 | For guided examples and additional information, go to \url{https://jenfb.github.io/bkmr/overview.html} 56 | } 57 | \examples{ 58 | ## First generate dataset 59 | set.seed(111) 60 | dat <- SimData(n = 50, M = 4) 61 | y <- dat$y 62 | Z <- dat$Z 63 | X <- dat$X 64 | 65 | ## Fit model with component-wise variable selection 66 | ## Using only 100 iterations to make example run quickly 67 | ## Typically should use a large number of iterations for inference 68 | set.seed(111) 69 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 70 | 71 | risks.int <- SingVarIntSummaries(fit = fitkm, method = "exact") 72 | } 73 | -------------------------------------------------------------------------------- /man/SingVarRiskSummaries.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ComputeMixtureSummaries.R 3 | \name{SingVarRiskSummaries} 4 | \alias{SingVarRiskSummaries} 5 | \title{Single Variable Risk Summaries} 6 | \usage{ 7 | SingVarRiskSummaries( 8 | fit, 9 | y = NULL, 10 | Z = NULL, 11 | X = NULL, 12 | which.z = 1:ncol(Z), 13 | qs.diff = c(0.25, 0.75), 14 | q.fixed = c(0.25, 0.5, 0.75), 15 | method = "approx", 16 | sel = NULL, 17 | z.names = colnames(Z), 18 | ... 19 | ) 20 | } 21 | \arguments{ 22 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function} 23 | 24 | \item{y}{a vector of outcome data of length \code{n}.} 25 | 26 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.} 27 | 28 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.} 29 | 30 | \item{which.z}{vector indicating which variables (columns of \code{Z}) for which the summary should be computed} 31 | 32 | \item{qs.diff}{vector indicating the two quantiles \code{q_1} and \code{q_2} at which to compute \code{h(z_{q2}) - h(z_{q1})}} 33 | 34 | \item{q.fixed}{vector of quantiles at which to fix the remaining predictors in \code{Z}} 35 | 36 | \item{method}{method for obtaining posterior summaries at a vector of new points. Options are "approx" and "exact"; defaults to "approx", which is faster particularly for large datasets; see details} 37 | 38 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples} 39 | 40 | \item{z.names}{optional vector of names for the columns of \code{z}} 41 | 42 | \item{...}{other arguments to pass on to the prediction function} 43 | } 44 | \value{ 45 | a data frame containing the (posterior mean) estimate and posterior standard deviation of the single-predictor risk measures 46 | } 47 | \description{ 48 | Compute summaries of the risks associated with a change in a single variable in \code{Z} from a single level (quantile) to a second level (quantile), for the other variables in \code{Z} fixed to a specific level (quantile) 49 | } 50 | \details{ 51 | \itemize{ 52 | \item If \code{method == "approx"}, the argument \code{sel} defaults to the second half of the MCMC iterations. 53 | \item If \code{method == "exact"}, the argument \code{sel} defaults to keeping every 10 iterations after dropping the first 50\% of samples, or if this results in fewer than 100 iterations, than 100 iterations are kept 54 | } 55 | For guided examples and additional information, go to \url{https://jenfb.github.io/bkmr/overview.html} 56 | } 57 | \examples{ 58 | ## First generate dataset 59 | set.seed(111) 60 | dat <- SimData(n = 50, M = 4) 61 | y <- dat$y 62 | Z <- dat$Z 63 | X <- dat$X 64 | 65 | ## Fit model with component-wise variable selection 66 | ## Using only 100 iterations to make example run quickly 67 | ## Typically should use a large number of iterations for inference 68 | set.seed(111) 69 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 70 | 71 | risks.singvar <- SingVarRiskSummaries(fit = fitkm, method = "exact") 72 | } 73 | -------------------------------------------------------------------------------- /man/PredictorResponseBivar.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PredictorResponseFunctions.R 3 | \name{PredictorResponseBivar} 4 | \alias{PredictorResponseBivar} 5 | \title{Predict the exposure-response function at a new grid of points} 6 | \usage{ 7 | PredictorResponseBivar( 8 | fit, 9 | y = NULL, 10 | Z = NULL, 11 | X = NULL, 12 | z.pairs = NULL, 13 | method = "approx", 14 | ngrid = 50, 15 | q.fixed = 0.5, 16 | sel = NULL, 17 | min.plot.dist = 0.5, 18 | center = TRUE, 19 | z.names = colnames(Z), 20 | verbose = TRUE, 21 | ... 22 | ) 23 | } 24 | \arguments{ 25 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function} 26 | 27 | \item{y}{a vector of outcome data of length \code{n}.} 28 | 29 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.} 30 | 31 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.} 32 | 33 | \item{z.pairs}{data frame showing which pairs of predictors to plot} 34 | 35 | \item{method}{method for obtaining posterior summaries at a vector of new points. Options are "approx" and "exact"; defaults to "approx", which is faster particularly for large datasets; see details} 36 | 37 | \item{ngrid}{number of grid points in each dimension} 38 | 39 | \item{q.fixed}{vector of quantiles at which to fix the remaining predictors in \code{Z}} 40 | 41 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples} 42 | 43 | \item{min.plot.dist}{specifies a minimum distance that a new grid point needs to be from an observed data point in order to compute the prediction; points further than this will not be computed} 44 | 45 | \item{center}{flag for whether to scale the exposure-response function to have mean zero} 46 | 47 | \item{z.names}{optional vector of names for the columns of \code{z}} 48 | 49 | \item{verbose}{TRUE or FALSE: flag of whether to print intermediate output to the screen} 50 | 51 | \item{...}{other arguments to pass on to the prediction function} 52 | } 53 | \value{ 54 | a long data frame with the name of the first predictor, the name of the second predictor, the value of the first predictor, the value of the second predictor, the posterior mean estimate, and the posterior standard deviation of the estimated exposure response function 55 | } 56 | \description{ 57 | Predict the exposure-response function at a new grid of points 58 | } 59 | \details{ 60 | For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html} 61 | } 62 | \examples{ 63 | ## First generate dataset 64 | set.seed(111) 65 | dat <- SimData(n = 50, M = 4) 66 | y <- dat$y 67 | Z <- dat$Z 68 | X <- dat$X 69 | 70 | ## Fit model with component-wise variable selection 71 | ## Using only 100 iterations to make example run quickly 72 | ## Typically should use a large number of iterations for inference 73 | set.seed(111) 74 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 75 | 76 | ## Obtain predicted value on new grid of points for each pair of predictors 77 | ## Using only a 10-by-10 point grid to make example run quickly 78 | pred.resp.bivar <- PredictorResponseBivar(fit = fitkm, min.plot.dist = 1, ngrid = 10) 79 | 80 | } 81 | -------------------------------------------------------------------------------- /man/PredictorResponseBivarPair.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PredictorResponseFunctions.R 3 | \name{PredictorResponseBivarPair} 4 | \alias{PredictorResponseBivarPair} 5 | \title{Plot bivariate predictor-response function on a new grid of points} 6 | \usage{ 7 | PredictorResponseBivarPair( 8 | fit, 9 | y = NULL, 10 | Z = NULL, 11 | X = NULL, 12 | whichz1 = 1, 13 | whichz2 = 2, 14 | whichz3 = NULL, 15 | method = "approx", 16 | prob = 0.5, 17 | q.fixed = 0.5, 18 | sel = NULL, 19 | ngrid = 50, 20 | min.plot.dist = 0.5, 21 | center = TRUE, 22 | ... 23 | ) 24 | } 25 | \arguments{ 26 | \item{fit}{An object containing the results returned by a the \code{kmbayes} function} 27 | 28 | \item{y}{a vector of outcome data of length \code{n}.} 29 | 30 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.} 31 | 32 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.} 33 | 34 | \item{whichz1}{vector identifying the first predictor that (column of \code{Z}) should be plotted} 35 | 36 | \item{whichz2}{vector identifying the second predictor that (column of \code{Z}) should be plotted} 37 | 38 | \item{whichz3}{vector identifying the third predictor that will be set to a pre-specified fixed quantile (determined by \code{prob})} 39 | 40 | \item{method}{method for obtaining posterior summaries at a vector of new points. Options are "approx" and "exact"; defaults to "approx", which is faster particularly for large datasets; see details} 41 | 42 | \item{prob}{pre-specified quantile to set the third predictor (determined by \code{whichz3}); defaults to 0.5 (50th percentile)} 43 | 44 | \item{q.fixed}{vector of quantiles at which to fix the remaining predictors in \code{Z}} 45 | 46 | \item{sel}{logical expression indicating samples to keep; defaults to keeping the second half of all samples} 47 | 48 | \item{ngrid}{number of grid points to cover the range of each predictor (column in \code{Z})} 49 | 50 | \item{min.plot.dist}{specifies a minimum distance that a new grid point needs to be from an observed data point in order to compute the prediction; points further than this will not be computed} 51 | 52 | \item{center}{flag for whether to scale the exposure-response function to have mean zero} 53 | 54 | \item{...}{other arguments to pass on to the prediction function} 55 | } 56 | \value{ 57 | a data frame with value of the first predictor, the value of the second predictor, the posterior mean estimate, and the posterior standard deviation 58 | } 59 | \description{ 60 | Plot bivariate predictor-response function on a new grid of points 61 | } 62 | \examples{ 63 | ## First generate dataset 64 | set.seed(111) 65 | dat <- SimData(n = 50, M = 4) 66 | y <- dat$y 67 | Z <- dat$Z 68 | X <- dat$X 69 | 70 | ## Fit model with component-wise variable selection 71 | ## Using only 100 iterations to make example run quickly 72 | ## Typically should use a large number of iterations for inference 73 | set.seed(111) 74 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 75 | 76 | ## Obtain predicted value on new grid of points 77 | ## Using only a 10-by-10 point grid to make example run quickly 78 | pred.resp.bivar12 <- PredictorResponseBivarPair(fit = fitkm, min.plot.dist = 1, ngrid = 10) 79 | } 80 | -------------------------------------------------------------------------------- /R/argval_ParamInput.R: -------------------------------------------------------------------------------- 1 | # Validate control params list 2 | ##components of list 3 | ##lambda.jump / default=10 4 | ##mu.lambda, sigma.lambda / default=10 5 | ##a.p0, b.p0 / default=1 6 | ##r.prior / default = "gamma", alt=invunif, unif 7 | ##a.sigsq, b.sigsq / default=0.001 8 | ##mu.r, sigma.r / default=5 9 | ##r.muprop / default=1 10 | ##r.jump / default=0.2 11 | ##r.jump1, r.jump2 / default=2, 0.2 12 | ##r.a, r.b / default=0, 100 13 | 14 | # 15 | validateStartingParams <- function(varsel, Ylength, Xwidth, Zwidth, starting.params) { 16 | message ("Validating starting.params...") 17 | print(starting.params) 18 | stopifnot(starting.params$b.sigsq.eps > 0, starting.params$lambda > 0) 19 | ##messages only for scalar where vector required, expansion happens in main function 20 | ##beta length, any values 21 | if (length(starting.params$beta) != Xwidth) { 22 | message("beta should be a vector of length equal to the number of columns of X. A vector will be created of repeating the input value.") 23 | } 24 | ##h.hat length and values 25 | if (length(starting.params$h.hat) != Ylength) { 26 | message("h.hat should be a vector of length equal to number of rows in Y. A vector will be created of repeating the input value.") 27 | } 28 | for (i in 1:length(starting.params$h.hat)) { 29 | stopifnot(starting.params$h.hat > 0) 30 | } 31 | ##delta length, 0 or 1 are valid values 32 | if (length(starting.params$delta) != Zwidth) { 33 | message("delta should be a vector of length equal to the number of columns of Z. A vector will be created of repeating the input value.") 34 | } 35 | for (i in 1:length(starting.params$delta)) { 36 | stopifnot(starting.params$delta == 1 || starting.params$delta == 0) 37 | } 38 | ## r length depends on varsel, truncate here but expand if necessary in main function 39 | if (varsel == TRUE) { 40 | if (length(starting.params$r) != Zwidth) { 41 | message("r should be a vector of length equal to the number of columns of Z. A vector will be created of repeating the input value.") 42 | } 43 | else { 44 | if (length(starting.params$r) > 1) { 45 | message("r should a scalar. Vector input will be truncated.") 46 | starting.params$r=starting.params$r[1] 47 | } 48 | } 49 | } 50 | for (i in 1:length(starting.params$r)) { 51 | stopifnot(starting.params$delta > 0) 52 | } 53 | } 54 | 55 | 56 | validateControlParams <- function(varsel, family, id, control.params) { 57 | message ("Validating control.params...") 58 | ##print(control.params) 59 | if (family == "gaussian"){ 60 | stopifnot(control.params$a.sigsq > 0, control.params$b.sigsq > 0) 61 | } 62 | if (varsel == TRUE) { 63 | stopifnot(control.params$a.p0 > 0, control.params$b.p0 > 0, control.params$r.jump1 > 0, control.params$r.jump2 > 0, control.params$r.muprop > 0) 64 | } 65 | else { 66 | stopifnot(control.params$r.jump > 0) 67 | } ##end varsel-specific stuff 68 | ##if id, need two elements in mu.lambda, sigma.lambda and lambda.jump 69 | if (!is.null(id)) { 70 | stopifnot(length(control.params$mu.lambda) == 2, length(control.params$sigma.lambda) == 2, length(control.params$lambda.jump) == 2) 71 | } 72 | ##regardless of id, validate each element of these params 73 | for (i in 1:length(control.params$mu.lambda)) { 74 | stopifnot(control.params$mu.lambda > 0) 75 | } 76 | for (i in 1:length(control.params$sigma.lambda)) { 77 | stopifnot(control.params$sigma.lambda > 0) 78 | } 79 | for (i in 1:length(control.params$lambda.jump)) { 80 | stopifnot(control.params$lambda.jump > 0) 81 | } 82 | rprior=control.params$r.prior 83 | stopifnot(rprior == "gamma" | rprior == "unif" | rprior == "invunif") 84 | ##stopifnot(length(intersect (control.params$r.prior, c("gamma","unif","invunif")))>0) 85 | if (control.params$r.prior == "gamma") { 86 | stopifnot(control.params$mu.r > 0, control.params$sigma.r > 0) 87 | } 88 | else { 89 | stopifnot(control.params$r.a >= 0, control.params$r.b > control.params$r.a) 90 | } 91 | } -------------------------------------------------------------------------------- /R/print_verbose.R: -------------------------------------------------------------------------------- 1 | #' Options for printing summary of model fit to the console 2 | #' 3 | #' Set options for what will be printed to the console when verbose = TRUE in the main kmbayes function 4 | #' 5 | #' @param verbose_freq After this percentage of iterations has been completed the summary of the model fit so far will be printed to the console 6 | #' @param verbose_show_ests TRUE or FALSE: flag indicating whether to print out summary statistics of all posterior samples obtained up until this point, for select parameters 7 | #' @param verbose_digits Number of digits to be printed to the console 8 | #' 9 | #' @export 10 | #' 11 | 12 | set_verbose_opts <- function(verbose_freq = NULL, verbose_show_ests = NULL, verbose_digits = NULL, 13 | tot_iter) { 14 | if (is.null(verbose_freq)) verbose_freq <- 10 15 | if (is.null(verbose_digits)) verbose_digits <- 5 16 | if (is.null(verbose_show_ests)) verbose_show_ests <- FALSE 17 | 18 | all_iter <- 100*(1:tot_iter)/tot_iter 19 | sel_iter <- seq(verbose_freq, 100, by = verbose_freq) 20 | print_iter <- sapply(sel_iter, function(x) min(which(all_iter >= x))) 21 | 22 | opts <- list( 23 | verbose_freq = verbose_freq, 24 | verbose_digits = verbose_digits, 25 | verbose_show_ests = verbose_show_ests, 26 | print_iter = print_iter 27 | ) 28 | opts 29 | } 30 | 31 | 32 | 33 | print_diagnostics <- function(verbose, opts, curr_iter, tot_iter, chain, varsel, hier_varsel, ztest, Z, groups) { 34 | verbose_freq <- opts$verbose_freq 35 | verbose_digits <- opts$verbose_digits 36 | verbose_show_ests <- opts$verbose_show_ests 37 | print_iter <- opts$print_iter 38 | 39 | s <- curr_iter 40 | nsamp <- tot_iter 41 | perc_iter_completed <- round(100*curr_iter/tot_iter, 1) 42 | 43 | elapsed_time <- difftime(Sys.time(), chain$time1) 44 | 45 | if (s %in% print_iter) { 46 | #if (verbose) message("------------------------------------------") 47 | if (verbose) cat("\n") 48 | message("Iteration: ", s, " (", perc_iter_completed, "% completed; ", round(elapsed_time, verbose_digits), " ", attr(elapsed_time, "units"), " elapsed)") 49 | 50 | if (verbose) { 51 | cat("Acceptance rates for Metropolis-Hastings algorithm:\n") 52 | accep_rates <- data.frame() 53 | ## lambda 54 | nm <- "lambda" 55 | rate <- colMeans(chain$acc.lambda[2:s, ,drop = FALSE]) 56 | if (length(rate) > 1) nm <- paste0(nm, seq_along(rate)) 57 | accep_rates %<>% rbind(data.frame(param = nm, rate = rate)) 58 | ## r_m 59 | if (!varsel) { 60 | nm <- "r" 61 | rate <- colMeans(chain$acc.r[2:s, , drop = FALSE]) 62 | if (length(rate) > 1) nm <- paste0(nm, seq_along(rate)) 63 | accep_rates %<>% rbind(data.frame(param = nm, rate = rate)) 64 | } else { 65 | nm <- "r/delta (overall)" 66 | rate <- mean(chain$acc.rdelta[2:s]) 67 | accep_rates %<>% rbind(data.frame(param = nm, rate = rate)) 68 | ## 69 | nm <- "r/delta (move 1)" 70 | rate <- mean(chain$acc.rdelta[2:s][chain$move.type[2:s] == 1]) 71 | accep_rates %<>% rbind(data.frame(param = nm, rate = rate)) 72 | ## 73 | nm <- "r/delta (move 2)" 74 | rate <- mean(chain$acc.rdelta[2:s][chain$move.type[2:s] == 2]) 75 | accep_rates %<>% rbind(data.frame(param = nm, rate = rate)) 76 | if (hier_varsel) { 77 | nm <- "r/delta (move 3)" 78 | rate <- mean(chain$acc.rdelta[2:s][chain$move.type[2:s] == 3]) 79 | accep_rates %<>% rbind(data.frame(param = nm, rate = rate)) 80 | } 81 | } 82 | print(accep_rates) 83 | 84 | ## extra information 85 | if (verbose_show_ests) { 86 | cat("\nCurrent parameter estimates:\n") 87 | chain$varsel <- varsel 88 | class(chain) <- c("bkmrfit", class(chain)) 89 | chain$Z <- Z 90 | if (hier_varsel) chain$groups <- groups 91 | ests <- ExtractEsts(chain, q = c(0.025, 0.975), sel = 2:s) 92 | #ests$h <- ests$h[c(1,2,nrow(ests$h)), ] 93 | summ <- with(ests, rbind(beta, sigsq.eps, r, lambda)) 94 | summ <- data.frame(param = rownames(summ), round(summ, verbose_digits)) 95 | rownames(summ) <- NULL 96 | print(summ) 97 | if (varsel) { 98 | cat("\nCurrent posterior inclusion probabilities:\n") 99 | pips <- ExtractPIPs(chain, sel = 2:s) 100 | pips[, -1] <- round(pips[, -1], verbose_digits) 101 | print(pips) 102 | } 103 | } 104 | } 105 | } 106 | 107 | } -------------------------------------------------------------------------------- /R/argval_ListInput.R: -------------------------------------------------------------------------------- 1 | # Validate control params list 2 | ##components of list 3 | ##lambda.jump / default=10 4 | ##mu.lambda, sigma.lambda / default=10 5 | ##a.p0, b.p0 / default=1 6 | ##r.prior / default = "gamma", alt=invunif, unif 7 | ##a.sigsq, b.sigsq / default=0.001 8 | ##mu.r, sigma.r / default=5 9 | ##r.muprop / default=1 10 | ##r.jump / default=0.2 11 | ##r.jump1, r.jump2 / default=2, 0.2 12 | ##r.a, r.b / default=0, 100 13 | 14 | # 15 | validateStartingValues <- function(varsel, y, X, Z, starting.values, rmethod) { 16 | Ylength <- length(y) 17 | Xwidth <- ncol(X) 18 | Zwidth <- ncol(Z) 19 | message ("Validating starting.values...") 20 | ##print(starting.values) 21 | stopifnot(starting.values$sigsq.eps > 0, all(starting.values$lambda > 0)) 22 | ##messages only for scalar where vector required, expansion happens in main function 23 | if (length(starting.values$beta) != Xwidth) { 24 | message("beta should be a vector of length equal to the number of columns of X. Input will be repeated or truncated as necessary.") 25 | } 26 | ##h.hat length and values 27 | if (length(starting.values$h.hat) != Ylength) { 28 | message("h.hat should be a vector of length equal to number of rows in Y. Input will be repeated or truncated as necessary.") 29 | } 30 | # for (i in 1:length(starting.values$h.hat)) { 31 | # stopifnot(starting.values$h.hat > 0) 32 | # } 33 | ##delta length, 0 or 1 are valid values 34 | if (length(starting.values$delta) != Zwidth) { 35 | message("delta should be a vector of length equal to the number of columns of Z. Input will be repeated or truncated as necessary.") 36 | } 37 | #for (i in 1:length(starting.values$delta)) { 38 | #stopifnot(starting.values$delta == 1 || starting.values$delta == 0) 39 | stopifnot(all(starting.values$delta %in% c(1, 0))) 40 | #} 41 | ## r length depends on varsel, truncate here but expand if necessary in main function 42 | if (varsel == TRUE) { 43 | if (length(starting.values$r) != Zwidth) { 44 | message("r should be a vector of length equal to the number of columns of Z. Input will be repeated or truncated as necessary.") 45 | } 46 | stopifnot(starting.values$r >= 0) ## note: still need to add check that delta starting values and r starting values do not conflict (e.g., delta = 1 but r_m = 0) 47 | } 48 | else { 49 | if (rmethod == "equal" & length(starting.values$r) > 1) { 50 | message("r should a scalar. Vector input will be truncated.") 51 | starting.values$r=starting.values$r[1] 52 | } else if (length(starting.values$r) != Zwidth) { 53 | message("r should be a vector of length equal to the number of columns of Z. Input will be repeated or truncated as necessary.") 54 | } 55 | stopifnot(all(starting.values$r > 0)) 56 | } 57 | 58 | # for (i in 1:length(starting.values$r)) { 59 | # stopifnot(starting.values$r > 0) 60 | # } 61 | } 62 | 63 | 64 | validateControlParams <- function(varsel, family, id, control.params) { 65 | message ("Validating control.params...") 66 | ##print(control.params) 67 | if (family == "gaussian"){ 68 | stopifnot(control.params$a.sigsq > 0, control.params$b.sigsq > 0) 69 | } 70 | if (varsel == TRUE) { 71 | stopifnot(control.params$a.p0 > 0, control.params$b.p0 > 0, control.params$r.jump1 > 0, control.params$r.jump2 > 0, control.params$r.muprop > 0) 72 | } 73 | else { 74 | stopifnot(control.params$r.jump > 0) 75 | } ##end varsel-specific stuff 76 | ##if id, need two elements in mu.lambda, sigma.lambda and lambda.jump 77 | if (!is.null(id)) { 78 | stopifnot(length(control.params$mu.lambda) == 2, length(control.params$sigma.lambda) == 2, length(control.params$lambda.jump) == 2) 79 | } 80 | ##regardless of id, validate each element of these params 81 | for (i in 1:length(control.params$mu.lambda)) { 82 | stopifnot(control.params$mu.lambda[i] > 0) 83 | } 84 | for (i in 1:length(control.params$sigma.lambda)) { 85 | stopifnot(control.params$sigma.lambda[i] > 0) 86 | } 87 | for (i in 1:length(control.params$lambda.jump)) { 88 | stopifnot(control.params$lambda.jump[i] > 0) 89 | } 90 | rprior=control.params$r.prior 91 | stopifnot(rprior == "gamma" | rprior == "unif" | rprior == "invunif") 92 | ##stopifnot(length(intersect (control.params$r.prior, c("gamma","unif","invunif")))>0) 93 | if (control.params$r.prior == "gamma") { 94 | stopifnot(control.params$mu.r > 0, control.params$sigma.r > 0) 95 | } 96 | else { 97 | stopifnot(control.params$r.a >= 0, control.params$r.b > control.params$r.a) 98 | } 99 | } -------------------------------------------------------------------------------- /R/SamplePred.R: -------------------------------------------------------------------------------- 1 | #' Obtain posterior samples of predictions at new points 2 | #' 3 | #' Obtains posterior samples of \code{E(Y) = h(Znew) + beta*Xnew} or of \code{g^{-1}[E(y)]} 4 | #' 5 | #' @param sel A vector selecting which iterations of the BKMR fit should be retained for inference. If not specified, will default to keeping every 10 iterations after dropping the first 50\% of samples, or if this results in fewer than 100 iterations, than 100 iterations are kept 6 | #' @param Znew optional matrix of new predictor values at which to predict new \code{h}, where each row represents a new observation. If not specified, defaults to using observed Z values 7 | #' @param Xnew optional matrix of new covariate values at which to obtain predictions. If not specified, defaults to using observed X values 8 | #' @param type whether to make predictions on the scale of the link or of the response; only relevant for the binomial outcome family 9 | #' @param ... other arguments; not currently used 10 | #' @inheritParams kmbayes 11 | #' @inheritParams ExtractEsts 12 | #' @details For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html} 13 | #' @export 14 | #' 15 | #' @return a matrix with the posterior samples at the new points 16 | #' 17 | #' @examples 18 | #' set.seed(111) 19 | #' dat <- SimData(n = 50, M = 4) 20 | #' y <- dat$y 21 | #' Z <- dat$Z 22 | #' X <- dat$X 23 | #' 24 | #' ## Fit model with component-wise variable selection 25 | #' ## Using only 100 iterations to make example run quickly 26 | #' ## Typically should use a large number of iterations for inference 27 | #' set.seed(111) 28 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 29 | #' 30 | #' med_vals <- apply(Z, 2, median) 31 | #' Znew <- matrix(med_vals, nrow = 1) 32 | #' h_true <- dat$HFun(Znew) 33 | #' set.seed(111) 34 | #' samps3 <- SamplePred(fitkm, Znew = Znew, Xnew = cbind(0)) 35 | #' head(samps3) 36 | SamplePred <- function(fit, Znew = NULL, Xnew = NULL, Z = NULL, X = NULL, y = NULL, sel = NULL, type = c("link", "response"), ...) { 37 | 38 | if (inherits(fit, "bkmrfit")) { 39 | if (is.null(y)) y <- fit$y 40 | if (is.null(Z)) Z <- fit$Z 41 | if (is.null(X)) X <- fit$X 42 | } 43 | if (length(type) > 1) type <- type[1] 44 | 45 | if (!is.null(Znew)) { 46 | if (is.null(dim(Znew))) Znew <- matrix(Znew, nrow = 1) 47 | if (inherits(Znew, "data.frame")) Znew <- data.matrix(Znew) 48 | if (ncol(Z) != ncol(Znew)) { 49 | stop("Znew must have the same number of columns as Z") 50 | } 51 | } 52 | 53 | if (is.null(Xnew)) Xnew <- X 54 | if (!inherits(Xnew, "matrix")) Xnew <- matrix(Xnew, nrow = 1) 55 | if (ncol(X) != ncol(Xnew)) { 56 | stop("Xnew must have the same number of columns as X") 57 | } 58 | 59 | if (is.null(sel)) { 60 | sel <- with(fit, seq(floor(iter/2) + 1, iter, 10)) 61 | if (length(sel) < 100) { 62 | sel <- with(fit, seq(floor(iter/2) + 1, iter, length.out = 100)) 63 | } 64 | sel <- unique(floor(sel)) 65 | } 66 | 67 | family <- fit$family 68 | data.comps <- fit$data.comps 69 | lambda <- fit$lambda 70 | sigsq.eps <- fit$sigsq.eps 71 | beta <- fit$beta 72 | r <- fit$r 73 | 74 | if (!is.null(Znew)) { 75 | preds <- matrix(NA, length(sel), nrow(Znew)) 76 | colnames(preds) <- paste0("znew", 1:nrow(Znew)) 77 | } else { 78 | preds <- matrix(NA, length(sel), nrow(Z)) 79 | colnames(preds) <- paste0("z", 1:nrow(Z)) 80 | } 81 | rownames(preds) <- paste0("iter", sel) 82 | for (s in sel) { 83 | beta.samp <- beta[s, ] 84 | 85 | if (family == "gaussian") { 86 | ycont <- y 87 | } else if (family == "binomial") { 88 | ycont <- fit$ystar[s, ] 89 | } 90 | if (!is.null(Znew)) { 91 | hsamp <- newh.update(Z = Z, Znew = Znew, Vcomps = NULL, lambda = lambda[s, ], sigsq.eps = sigsq.eps[s], r = r[s, ], y = ycont, X = X, beta = beta.samp, data.comps = data.comps) 92 | } else { 93 | hsamp <- h.update(lambda = lambda[s, ], Vcomps = NULL, sigsq.eps = sigsq.eps[s], y = ycont, X = X, beta = beta.samp, r = r[s, ], Z = Z, data.comps = data.comps)$hsamp 94 | } 95 | 96 | Xbeta <- drop(Xnew %*% beta.samp) 97 | linpred <- hsamp + Xbeta 98 | 99 | if (type == "link") { 100 | pred <- linpred 101 | } else if (type == "response") { 102 | if (family == "gaussian") { 103 | pred <- linpred 104 | } else if (family == "binomial") { 105 | pred <- pnorm(linpred) 106 | } 107 | } 108 | preds[paste0("iter", s), ] <- pred 109 | } 110 | attr(preds, "type") <- type 111 | attr(preds, "family") <- family 112 | preds 113 | 114 | } -------------------------------------------------------------------------------- /man/kmbayes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bkmr_main_functions.R 3 | \name{kmbayes} 4 | \alias{kmbayes} 5 | \title{Fit Bayesian kernel machine regression} 6 | \usage{ 7 | kmbayes( 8 | y, 9 | Z, 10 | X = NULL, 11 | iter = 1000, 12 | family = "gaussian", 13 | id = NULL, 14 | verbose = TRUE, 15 | Znew = NULL, 16 | starting.values = NULL, 17 | control.params = NULL, 18 | varsel = FALSE, 19 | groups = NULL, 20 | knots = NULL, 21 | ztest = NULL, 22 | rmethod = "varying", 23 | est.h = FALSE 24 | ) 25 | } 26 | \arguments{ 27 | \item{y}{a vector of outcome data of length \code{n}.} 28 | 29 | \item{Z}{an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor.} 30 | 31 | \item{X}{an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column.} 32 | 33 | \item{iter}{number of iterations to run the sampler} 34 | 35 | \item{family}{a description of the error distribution and link function to be used in the model. Currently implemented for \code{gaussian} and \code{binomial} families.} 36 | 37 | \item{id}{optional vector (of length \code{n}) of grouping factors for fitting a model with a random intercept. If NULL then no random intercept will be included.} 38 | 39 | \item{verbose}{TRUE or FALSE: flag indicating whether to print intermediate diagnostic information during the model fitting.} 40 | 41 | \item{Znew}{optional matrix of new predictor values at which to predict \code{h}, where each row represents a new observation. This will slow down the model fitting, and can be done as a post-processing step using \code{\link{SamplePred}}} 42 | 43 | \item{starting.values}{list of starting values for each parameter. If not specified default values will be chosen.} 44 | 45 | \item{control.params}{list of parameters specifying the prior distributions and tuning parameters for the MCMC algorithm. If not specified default values will be chosen.} 46 | 47 | \item{varsel}{TRUE or FALSE: indicator for whether to conduct variable selection on the Z variables in \code{h}} 48 | 49 | \item{groups}{optional vector (of length \code{M}) of group indicators for fitting hierarchical variable selection if varsel=TRUE. If varsel=TRUE without group specification, component-wise variable selections will be performed.} 50 | 51 | \item{knots}{optional matrix of knot locations for implementing the Gaussian predictive process of Banerjee et al. (2008). Currently only implemented for models without a random intercept.} 52 | 53 | \item{ztest}{optional vector indicating on which variables in Z to conduct variable selection (the remaining variables will be forced into the model).} 54 | 55 | \item{rmethod}{for those predictors being forced into the \code{h} function, the method for sampling the \code{r[m]} values. Takes the value of 'varying' to allow separate \code{r[m]} for each predictor; 'equal' to force the same \code{r[m]} for each predictor; or 'fixed' to fix the \code{r[m]} to their starting values} 56 | 57 | \item{est.h}{TRUE or FALSE: indicator for whether to sample from the posterior distribution of the subject-specific effects h_i within the main sampler. This will slow down the model fitting.} 58 | } 59 | \value{ 60 | an object of class "bkmrfit" (containing the posterior samples from the model fit), which has the associated methods: 61 | \itemize{ 62 | \item \code{\link{print}} (i.e., \code{\link{print.bkmrfit}}) 63 | \item \code{\link{summary}} (i.e., \code{\link{summary.bkmrfit}}) 64 | } 65 | } 66 | \description{ 67 | Fits the Bayesian kernel machine regression (BKMR) model using Markov chain Monte Carlo (MCMC) methods. 68 | } 69 | \examples{ 70 | ## First generate dataset 71 | set.seed(111) 72 | dat <- SimData(n = 50, M = 4) 73 | y <- dat$y 74 | Z <- dat$Z 75 | X <- dat$X 76 | 77 | ## Fit model with component-wise variable selection 78 | ## Using only 100 iterations to make example run quickly 79 | ## Typically should use a large number of iterations for inference 80 | set.seed(111) 81 | fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 82 | } 83 | \references{ 84 | Bobb, JF, Valeri L, Claus Henn B, Christiani DC, Wright RO, Mazumdar M, Godleski JJ, Coull BA (2015). Bayesian Kernel Machine Regression for Estimating the Health Effects of Multi-Pollutant Mixtures. Biostatistics 16, no. 3: 493-508. 85 | 86 | Banerjee S, Gelfand AE, Finley AO, Sang H (2008). Gaussian predictive process models for large spatial data sets. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 70(4), 825-848. 87 | } 88 | \seealso{ 89 | For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html} 90 | } 91 | -------------------------------------------------------------------------------- /R/CalcPIPs.R: -------------------------------------------------------------------------------- 1 | #' Calculate group-specific posterior inclusion probabilities 2 | #' 3 | #' Calculate posterior inclusion probabilities for each group of variables 4 | #' 5 | #' @inheritParams ExtractEsts 6 | #' 7 | #' @noRd 8 | CalcGroupPIPs <- function(fit, sel = NULL) { 9 | groups <- fit$groups 10 | if (is.null(groups)) { 11 | stop("Cannot compute group-specific posterior inclusion probabilities; BKMR was not run with variable groups") 12 | } 13 | if (is.null(sel)) { 14 | sel <- with(fit, seq(floor(iter/2) + 1, iter)) 15 | } 16 | grps <- unique(groups) 17 | groupincl.probs <- sapply(grps, function(x) mean(rowSums(fit$delta[sel, groups == x, drop = FALSE]) > 0)) 18 | groupincl.probs 19 | } 20 | 21 | #' Calculate conditional predictor specific posterior inclusion probabilities 22 | #' 23 | #' For those predictors within a multi-predictor group, as defined using the \code{groups} argument, the posterior inclusion probabilities for the predictor conditional on the group being selected into the model. 24 | #' 25 | #' @inheritParams ExtractEsts 26 | #' 27 | #' @noRd 28 | CalcWithinGroupPIPs <- function(fit, sel = NULL) { 29 | groups <- fit$groups 30 | if (is.null(groups)) { 31 | stop("Cannot compute group-specific posterior inclusion probabilities; BKMR was not run with variable groups") 32 | } 33 | if (is.null(sel)) { 34 | sel <- with(fit, seq(floor(iter/2) + 1, iter)) 35 | } 36 | 37 | # grps <- unique(groups) 38 | # ngrps <- sapply(grps, function(x) sum(groups == x)) 39 | condprobs.group <- rep(NA, length(groups)) 40 | for (i in unique(groups)) { 41 | condprobs.group[groups == i] <- colMeans(fit$delta[sel, ][rowSums(fit$delta[sel, groups == i, drop = FALSE]) > 0, groups == i, drop = FALSE]) 42 | } 43 | 44 | condprobs.group 45 | } 46 | 47 | 48 | #' Calculate variable-specific posterior inclusion probabilities 49 | #' 50 | #' Calculate variable-specific posterior inclusion probabilities from BKMR model fit 51 | #' 52 | #' @inheritParams ExtractEsts 53 | #' 54 | #' @noRd 55 | CalcPIPs <- function(fit, sel = NULL) { 56 | if (inherits(fit, "bkmrfit")) { 57 | if (is.null(sel)) { 58 | sel <- with(fit, seq(floor(iter/2) + 1, iter)) 59 | } 60 | groups <- fit$groups 61 | if (is.null(groups)) { 62 | ret <- colMeans(fit$delta[sel, , drop = FALSE]) 63 | } 64 | 65 | } 66 | ret 67 | } 68 | 69 | #' Extract posterior inclusion probabilities (PIPs) from BKMR model fit 70 | #' 71 | #' Extract posterior inclusion probabilities (PIPs) from Bayesian Kernel Machine Regression (BKMR) model fit 72 | #' 73 | #' @inheritParams ExtractEsts 74 | #' @param z.names optional argument providing the names of the variables included in the \code{h} function. 75 | #' 76 | #' @return a data frame with the variable-specific PIPs for BKMR fit with component-wise variable selection, and with the group-specific and conditional (within-group) PIPs for BKMR fit with hierarchical variable selection. 77 | #' @details For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html} 78 | #' @export 79 | #' 80 | #' @examples 81 | #' ## First generate dataset 82 | #' set.seed(111) 83 | #' dat <- SimData(n = 50, M = 4) 84 | #' y <- dat$y 85 | #' Z <- dat$Z 86 | #' X <- dat$X 87 | #' 88 | #' ## Fit model with component-wise variable selection 89 | #' ## Using only 100 iterations to make example run quickly 90 | #' ## Typically should use a large number of iterations for inference 91 | #' set.seed(111) 92 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 93 | #' 94 | #' ExtractPIPs(fitkm) 95 | ExtractPIPs <- function(fit, sel = NULL, z.names = NULL) { 96 | if (inherits(fit, "bkmrfit")) { 97 | if (!fit$varsel) { 98 | stop("This model was not fit with variable selection.") 99 | } 100 | if (is.null(sel)) { 101 | sel <- with(fit, seq(floor(iter/2) + 1, iter)) 102 | } 103 | if (is.null(z.names)) { 104 | z.names <- colnames(fit$Z) 105 | } 106 | if (is.null(z.names)) { 107 | z.names <- paste0("z", 1:ncol(fit$Z)) 108 | } 109 | df <- data.frame(variable = z.names, stringsAsFactors = FALSE) 110 | groups <- fit$groups 111 | if (is.null(groups)) { 112 | df$PIP <- colMeans(fit$delta[sel, , drop = FALSE]) 113 | } else { 114 | 115 | ## group-specific posterior inclusion probability 116 | df$group <- groups 117 | grps <- unique(groups) 118 | groupincl.probs <- sapply(grps, function(x) mean(rowSums(fit$delta[sel, groups == x, drop = FALSE]) > 0)) 119 | df.group <- dplyr::tibble(group = grps, 120 | groupPIP = groupincl.probs) 121 | df <- dplyr::inner_join(df, df.group, by = "group") 122 | 123 | ## within-group conditional PIP 124 | condprobs.group <- rep(NA, length(groups)) 125 | for (i in unique(groups)) { 126 | condprobs.group[groups == i] <- colMeans(fit$delta[sel, ][rowSums(fit$delta[sel, groups == i, drop = FALSE]) > 0, groups == i, drop = FALSE]) 127 | } 128 | df$condPIP <- condprobs.group 129 | } 130 | } 131 | df 132 | } 133 | 134 | -------------------------------------------------------------------------------- /R/SimData.R: -------------------------------------------------------------------------------- 1 | HFun1 <- function(z, ind = 1) 4*plogis(z[ind[1]], 0, 0.3) 2 | HFun2 <- function(z, ind = 1:2) 1/4*(z[ind[1]] + z[ind[2]] + 1/2*z[ind[1]]*z[ind[2]]) 3 | HFun3 <- function(z, ind = 1:2) 4*plogis(1/4*(z[ind[1]] + z[ind[2]] + 1/2*z[ind[1]]*z[ind[2]]), 0, 0.3) 4 | 5 | #' Simulate dataset 6 | #' 7 | #' Simulate predictor, covariate, and continuous outcome data 8 | #' 9 | #' @export 10 | #' 11 | #' @inheritParams kmbayes 12 | #' @param n Number of observations 13 | #' @param M Number of predictor variables to generate 14 | #' @param sigsq.true Variance of normally distributed residual error 15 | #' @param beta.true Coefficient on the covariate 16 | #' @param hfun An integer from 1 to 3 identifying which predictor-response function to generate 17 | #' @param Zgen Method for generating the matrix Z of exposure variables, taking one of the values c("unif", "norm", "corr", "realistic") 18 | #' @param ind select which predictor(s) will be included in the \code{h} function; how many predictors that can be included will depend on which \code{h} function is being used. 19 | #' @examples 20 | #' set.seed(5) 21 | #' dat <- SimData() 22 | #' @details 23 | #' \itemize{ 24 | #' \item \code{hfun = 1}: A nonlinear function of the first predictor 25 | #' \item \code{hfun = 2}: A linear function of the first two predictors and their product term 26 | #' \item \code{hfun = 3}: A nonlinear and nonadditive function of the first two predictor variables 27 | #' } 28 | #' 29 | #' @return a list containing the parameter values and generated variables of the simulated datasets 30 | #' 31 | SimData <- function(n = 100, M = 5, sigsq.true = 0.5, 32 | beta.true = 2, hfun = 3, Zgen = "norm", ind = 1:2, family = "gaussian") { 33 | 34 | stopifnot(n > 0, M > 0, sigsq.true >= 0, family %in% c("gaussian", "binomial")) 35 | 36 | if (family == "binomial") { 37 | sigsq.true <- 1 38 | } 39 | 40 | if (hfun == 1) { 41 | HFun <- HFun1 42 | } else if (hfun == 2) { 43 | HFun <- HFun2 44 | } else if (hfun == 3) { 45 | HFun <- HFun3 46 | } else { 47 | stop("hfun must be an integer from 1 to 3") 48 | } 49 | 50 | if (Zgen == "unif") { 51 | Z <- matrix(runif(n * M, -2, 2), n, M) 52 | } else if (Zgen == "norm") { 53 | Z <- matrix(rnorm(n * M), n, M) 54 | } else if (Zgen == "corr") { 55 | if (M < 3) { 56 | stop("M must be an integer > 2 for Zgen = 'corr'") 57 | } 58 | Sigma <- diag(1, M, M) 59 | Sigma[1,3] <- Sigma[3,1] <- 0.95 60 | Sigma[2,3] <- Sigma[3,2] <- 0.3 61 | Sigma[1,2] <- Sigma[2,1] <- 0.1 62 | Z <- MASS::mvrnorm(n = n, mu = rep(0, M), Sigma = Sigma) 63 | } else if (Zgen == "realistic") { 64 | VarRealistic <- structure(c(0.72, 0.65, 0.45, 0.48, 0.08, 0.14, 0.16, 0.42, 0.2, 65 | 0.11, 0.35, 0.1, 0.11, 0.65, 0.78, 0.48, 0.55, 0.06, 0.09, 0.17, 66 | 0.2, 0.16, 0.11, 0.32, 0.12, 0.12, 0.45, 0.48, 0.56, 0.43, 0.11, 67 | 0.15, 0.23, 0.25, 0.28, 0.16, 0.31, 0.15, 0.14, 0.48, 0.55, 0.43, 68 | 0.71, 0.2, 0.23, 0.32, 0.22, 0.29, 0.14, 0.3, 0.22, 0.18, 0.08, 69 | 0.06, 0.11, 0.2, 0.95, 0.7, 0.45, 0.22, 0.29, 0.16, 0.24, 0.2, 70 | 0.13, 0.14, 0.09, 0.15, 0.23, 0.7, 0.8, 0.36, 0.3, 0.35, 0.13, 71 | 0.23, 0.17, 0.1, 0.16, 0.17, 0.23, 0.32, 0.45, 0.36, 0.83, 0.24, 72 | 0.37, 0.2, 0.36, 0.34, 0.25, 0.42, 0.2, 0.25, 0.22, 0.22, 0.3, 73 | 0.24, 1.03, 0.41, 0.13, 0.39, 0.1, 0.1, 0.2, 0.16, 0.28, 0.29, 74 | 0.29, 0.35, 0.37, 0.41, 0.65, 0.18, 0.3, 0.18, 0.16, 0.11, 0.11, 75 | 0.16, 0.14, 0.16, 0.13, 0.2, 0.13, 0.18, 0.6, 0.18, 0.13, 0.08, 76 | 0.35, 0.32, 0.31, 0.3, 0.24, 0.23, 0.36, 0.39, 0.3, 0.18, 0.79, 77 | 0.42, 0.12, 0.1, 0.12, 0.15, 0.22, 0.2, 0.17, 0.34, 0.1, 0.18, 78 | 0.13, 0.42, 1.27, 0.1, 0.11, 0.12, 0.14, 0.18, 0.13, 0.1, 0.25, 79 | 0.1, 0.16, 0.08, 0.12, 0.1, 0.67), .Dim = c(13L, 13L)) 80 | if (M > ncol(VarRealistic)) { 81 | stop("Currently can only generate exposure data based on a realistic correlation structure with M = 13 or fewer. Please set M = 13 or use Zgen = c('unif','norm'") 82 | } else if (M <= 13) { 83 | Sigma <- VarRealistic[1:M, 1:M] 84 | } 85 | Z <- MASS::mvrnorm(n = n, mu = rep(0, M), Sigma = Sigma) 86 | } 87 | colnames(Z) <- paste0("z", 1:M) 88 | 89 | X <- cbind(3*cos(Z[, 1]) + 2*rnorm(n)) 90 | eps <- rnorm(n, sd = sqrt(sigsq.true)) 91 | h <- apply(Z, 1, HFun) 92 | mu <- X * beta.true + h 93 | y <- drop(mu + eps) 94 | 95 | if (family == "binomial") { 96 | ystar <- y 97 | y <- ifelse(ystar > 0, 1, 0) 98 | } 99 | 100 | dat <- list(n = n, M = M, sigsq.true = sigsq.true, beta.true = beta.true, Z = Z, h = h, X = X, y = y, hfun = hfun, HFun = HFun, family = family) 101 | if (family == "binomial") { 102 | dat$ystar <- ystar 103 | } 104 | dat 105 | } 106 | -------------------------------------------------------------------------------- /R/bkmr_r_parameter_helper_functions.R: -------------------------------------------------------------------------------- 1 | make_r_params_comp <- function(r.params, whichcomp) { 2 | for(i in seq_along(r.params)) { 3 | if(length(r.params[[i]]) > 1) { 4 | r.params[[i]] <- r.params[[i]][whichcomp] 5 | } 6 | } 7 | r.params 8 | } 9 | 10 | set.r.params <- function(r.prior, comp, r.params) { 11 | if(r.prior == "gamma") { 12 | if(length(r.params$mu.r) > 1) r.params$mu.r <- r.params$mu.r[comp] 13 | if(length(r.params$sigma.r) > 1) r.params$sigma.r <- r.params$sigma.r[comp] 14 | if(length(r.params$r.jump1) > 1) r.params$r.jump1 <- r.params$r.jump1[comp] 15 | if(length(r.params$r.jump2) > 1) r.params$r.jump2 <- r.params$r.jump2[comp] 16 | } 17 | if(r.prior %in% c("unif", "invunif")) { 18 | if(length(r.params$r.a) > 1) r.params$r.a <- r.params$r.a[comp] 19 | if(length(r.params$r.b) > 1) r.params$r.b <- r.params$r.b[comp] 20 | if(length(r.params$r.jump2) > 1) r.params$r.jump2 <- r.params$r.jump2[comp] 21 | } 22 | r.params 23 | } 24 | 25 | set.r.MH.functions <- function(r.prior) { 26 | if(r.prior == "gamma") { 27 | # r.params <- list(mu.r, sigma.r, r.muprop, r.jump1, r.jump2) 28 | rprior.logdens <- function(x, r.params) { 29 | mu.r <- r.params$mu.r 30 | sigma.r <- r.params$sigma.r 31 | dgamma(x, shape=mu.r^2/sigma.r^2, rate=mu.r/sigma.r^2, log=TRUE) 32 | } 33 | rprop.gen1 <- function(r.params) { 34 | r.muprop <- r.params$r.muprop 35 | r.jump <- r.params$r.jump1 36 | rgamma(1, shape=r.muprop^2/r.jump^2, rate=r.muprop/r.jump^2) 37 | } 38 | rprop.logdens1 <- function(x, r.params) { 39 | r.muprop <- r.params$r.muprop 40 | r.jump <- r.params$r.jump1 41 | dgamma(x, shape=r.muprop^2/r.jump^2, rate=r.muprop/r.jump^2, log=TRUE) 42 | } 43 | rprop.gen2 <- function(current, r.params) { 44 | r.jump <- r.params$r.jump2 45 | rgamma(1, shape=current^2/r.jump^2, rate=current/r.jump^2) 46 | } 47 | rprop.logdens2 <- function(prop, current, r.params) { 48 | r.jump <- r.params$r.jump2 49 | dgamma(prop, shape=current^2/r.jump^2, rate=current/r.jump^2, log=TRUE) 50 | } 51 | rprop.gen <- function(current, r.params) { 52 | r.jump <- r.params$r.jump 53 | rgamma(1, shape=current^2/r.jump^2, rate=current/r.jump^2) 54 | } 55 | rprop.logdens <- function(prop, current, r.params) { 56 | r.jump <- r.params$r.jump 57 | dgamma(prop, shape=current^2/r.jump^2, rate=current/r.jump^2, log=TRUE) 58 | } 59 | } 60 | 61 | if(r.prior == "invunif") { 62 | # r.params <- list(r.a, r.b, r.jump2) 63 | rprior.logdens <- function(x, r.params) { 64 | r.a <- r.params$r.a 65 | r.b <- r.params$r.b 66 | ifelse(1/r.b <= x & x <= 1/r.a, -2*log(x) - log(r.b - r.a), log(0)) 67 | } 68 | rprop.gen1 <- function(r.params) { 69 | r.a <- r.params$r.a 70 | r.b <- r.params$r.b 71 | 1/runif(1, r.a, r.b) 72 | } 73 | rprop.logdens1 <- function(x, r.params) { 74 | r.a <- r.params$r.a 75 | r.b <- r.params$r.b 76 | ifelse(1/r.b <= x & x <= 1/r.a, -2*log(x) - log(r.b - r.a), log(0)) 77 | } 78 | rprop.gen2 <- function(current, r.params) { 79 | r.a <- r.params$r.a 80 | r.b <- r.params$r.b 81 | r.jump <- r.params$r.jump2 82 | truncnorm::rtruncnorm(1, a = 1/r.b, b = 1/r.a, mean = current, sd = r.jump) 83 | } 84 | rprop.logdens2 <- function(prop, current, r.params) { 85 | r.a <- r.params$r.a 86 | r.b <- r.params$r.b 87 | r.jump <- r.params$r.jump2 88 | log(truncnorm::dtruncnorm(prop, a = 1/r.b, b = 1/r.a, mean = current, sd = r.jump)) 89 | } 90 | rprop.gen <- function(current, r.params) { 91 | r.a <- r.params$r.a 92 | r.b <- r.params$r.b 93 | r.jump <- r.params$r.jump 94 | truncnorm::rtruncnorm(1, a = 1/r.b, b = 1/r.a, mean = current, sd = r.jump) 95 | } 96 | rprop.logdens <- function(prop, current, r.params) { 97 | r.a <- r.params$r.a 98 | r.b <- r.params$r.b 99 | r.jump <- r.params$r.jump 100 | log(truncnorm::dtruncnorm(prop, a = 1/r.b, b = 1/r.a, mean = current, sd = r.jump)) 101 | } 102 | } 103 | 104 | if(r.prior == "unif") { 105 | # r.params <- list(r.a, r.b, r.jump2) 106 | rprior.logdens <- function(x, r.params) { 107 | r.a <- r.params$r.a 108 | r.b <- r.params$r.b 109 | dunif(x, r.a, r.b, log=TRUE) 110 | } 111 | rprop.gen1 <- function(r.params) { 112 | r.a <- r.params$r.a 113 | r.b <- r.params$r.b 114 | runif(1, r.a, r.b) 115 | } 116 | rprop.logdens1 <- function(x, r.params) { 117 | r.a <- r.params$r.a 118 | r.b <- r.params$r.b 119 | dunif(x, r.a, r.b, log=TRUE) 120 | } 121 | rprop.gen2 <- function(current, r.params) { 122 | r.a <- r.params$r.a 123 | r.b <- r.params$r.b 124 | r.jump <- r.params$r.jump2 125 | truncnorm::rtruncnorm(1, a = r.a, b = r.b, mean = current, sd = r.jump) 126 | } 127 | rprop.logdens2 <- function(prop, current, r.params) { 128 | r.a <- r.params$r.a 129 | r.b <- r.params$r.b 130 | r.jump <- r.params$r.jump2 131 | log(truncnorm::dtruncnorm(prop, a = r.a, b = r.b, mean = current, sd = r.jump)) 132 | } 133 | rprop.gen <- function(current, r.params) { 134 | r.a <- r.params$r.a 135 | r.b <- r.params$r.b 136 | r.jump <- r.params$r.jump 137 | truncnorm::rtruncnorm(1, a = r.a, b = r.b, mean = current, sd = r.jump) 138 | } 139 | rprop.logdens <- function(prop, current, r.params) { 140 | r.a <- r.params$r.a 141 | r.b <- r.params$r.b 142 | r.jump <- r.params$r.jump 143 | log(truncnorm::dtruncnorm(prop, a = r.a, b = r.b, mean = current, sd = r.jump)) 144 | } 145 | } 146 | 147 | list(rprior.logdens = rprior.logdens, rprop.gen1 = rprop.gen1, rprop.logdens1 = rprop.logdens1, rprop.gen2 = rprop.gen2, rprop.logdens2 = rprop.logdens2, rprop.gen = rprop.gen, rprop.logdens = rprop.logdens) 148 | } 149 | -------------------------------------------------------------------------------- /R/ExtractEsts.R: -------------------------------------------------------------------------------- 1 | #' Compute summary statistics 2 | #' 3 | #' @param q vector of quantiles 4 | #' @param s vector of posterior samples 5 | #' 6 | #' @noRd 7 | SummarySamps <- function(s, q = c(0.025, 0.25, 0.5, 0.75, 0.975)) { 8 | qs <- quantile(s, q) 9 | names(qs) <- paste0("q_", 100*q) 10 | summ <- c(mean = mean(s), sd = sd(s), qs) 11 | summ <- matrix(summ, nrow = 1, dimnames = list(NULL, names(summ))) 12 | } 13 | 14 | #' Extract summary statistics 15 | #' 16 | #' Obtain summary statistics of each parameter from the BKMR fit 17 | #' 18 | #' @param fit An object containing the results returned by a the \code{kmbayes} function 19 | #' @param q vector of quantiles 20 | #' @param sel logical expression indicating samples to keep; defaults to keeping the second half of all samples 21 | #' 22 | #' @export 23 | #' 24 | #' @return a list where each component is a data frame containing the summary statistics of the posterior distribution of one of the parameters (or vector of parameters) being estimated 25 | #' 26 | #' @examples 27 | #' ## First generate dataset 28 | #' set.seed(111) 29 | #' dat <- SimData(n = 50, M = 4) 30 | #' y <- dat$y 31 | #' Z <- dat$Z 32 | #' X <- dat$X 33 | #' 34 | #' ## Fit model with component-wise variable selection 35 | #' ## Using only 100 iterations to make example run quickly 36 | #' ## Typically should use a large number of iterations for inference 37 | #' set.seed(111) 38 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 39 | #' 40 | #' ests <- ExtractEsts(fitkm) 41 | #' names(ests) 42 | #' ests$beta 43 | ExtractEsts <- function(fit, q = c(0.025, 0.25, 0.5, 0.75, 0.975), sel = NULL) { 44 | if (inherits(fit, "bkmrfit")) { 45 | if (is.null(sel)) { 46 | sel <- with(fit, seq(floor(iter/2) + 1, iter)) 47 | } 48 | sigsq.eps <- SummarySamps(fit$sigsq.eps[sel], q = q) 49 | rownames(sigsq.eps) <- "sigsq.eps" 50 | 51 | r <- t(apply(fit$r[sel, , drop = FALSE], 2, SummarySamps, q = q)) 52 | rownames(r) <- paste0("r", 1:nrow(r)) 53 | 54 | beta <- t(apply(fit$beta[sel, , drop = FALSE], 2, SummarySamps, q = q)) 55 | 56 | lambda <- t(apply(fit$lambda[sel, ,drop = FALSE], 2, SummarySamps, q = q)) 57 | if (nrow(lambda) > 1) { 58 | rownames(lambda) <- paste0("lambda", 1:nrow(lambda)) 59 | } else { 60 | rownames(lambda) <- "lambda" 61 | } 62 | 63 | if (fit$est.h) { 64 | h <- t(apply(fit$h.hat[sel, ], 2, SummarySamps, q = q)) 65 | rownames(h) <- paste0("h", 1:nrow(h)) 66 | } 67 | 68 | if (!is.null(fit$hnew)) { 69 | hnew <- t(apply(fit$hnew[sel, ], 2, SummarySamps, q = q)) 70 | rownames(hnew) <- paste0("hnew", 1:nrow(hnew)) 71 | } 72 | 73 | if (!is.null(fit$ystar)) { 74 | ystar <- t(apply(fit$ystar[sel, ], 2, SummarySamps, q = q)) 75 | rownames(ystar) <- paste0("ystar", 1:nrow(ystar)) 76 | } 77 | } 78 | 79 | if (nrow(beta) > 1) { 80 | rownames(beta) <- paste0("beta", 1:nrow(beta)) 81 | } else { 82 | rownames(beta) <- "beta" 83 | } 84 | 85 | colnames(beta) <- colnames(sigsq.eps) 86 | colnames(r) <- colnames(sigsq.eps) 87 | colnames(lambda) <- colnames(sigsq.eps) 88 | if (fit$est.h) { 89 | colnames(h) <- colnames(sigsq.eps) 90 | } 91 | if (!is.null(fit$hnew)) { 92 | colnames(hnew) <- colnames(sigsq.eps) 93 | } 94 | if (!is.null(fit$ystar)) { 95 | colnames(ystar) <- colnames(sigsq.eps) 96 | } 97 | 98 | ret <- list(sigsq.eps = data.frame(sigsq.eps), beta = beta, lambda = lambda, r = r) 99 | if (fit$est.h) ret$h <- h 100 | if (!is.null(fit$hnew)) ret$hnew <- hnew 101 | if (!is.null(fit$ystar)) ret$ystar <- ystar 102 | 103 | ret 104 | } 105 | 106 | #' Extract samples 107 | #' 108 | #' Extract samples of each parameter from the BKMR fit 109 | #' 110 | #' @inheritParams ExtractEsts 111 | #' 112 | #' @export 113 | #' @return a list where each component contains the posterior samples of one of the parameters (or vector of parameters) being estimated 114 | #' 115 | #' @examples 116 | #' ## First generate dataset 117 | #' set.seed(111) 118 | #' dat <- SimData(n = 50, M = 4) 119 | #' y <- dat$y 120 | #' Z <- dat$Z 121 | #' X <- dat$X 122 | #' 123 | #' ## Fit model with component-wise variable selection 124 | #' ## Using only 100 iterations to make example run quickly 125 | #' ## Typically should use a large number of iterations for inference 126 | #' set.seed(111) 127 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 128 | #' 129 | #' samps <- ExtractSamps(fitkm) 130 | ExtractSamps <- function(fit, sel = NULL) { 131 | if (inherits(fit, "bkmrfit")) { 132 | if (is.null(sel)) { 133 | sel <- with(fit, seq(floor(iter/2) + 1, iter)) 134 | } 135 | 136 | sigsq.eps <- fit$sigsq.eps[sel] 137 | sig.eps <- sqrt(sigsq.eps) 138 | r <- fit$r[sel, , drop = FALSE] 139 | beta <- fit$beta[sel, ] 140 | lambda <- fit$lambda[sel, ] 141 | tau <- lambda*sigsq.eps 142 | h <- fit$h.hat[sel, ] 143 | if (!is.null(fit$hnew)) hnew <- fit$hnew[sel, ] 144 | if (!is.null(fit$ystar)) ystar <- fit$ystar[sel, ] 145 | } 146 | 147 | if (!is.null(ncol(beta))) colnames(beta) <- paste0("beta", 1:ncol(beta)) 148 | colnames(r) <- paste0("r", 1:ncol(r)) 149 | colnames(h) <- paste0("h", 1:ncol(h)) 150 | if (!is.null(fit$hnew)) colnames(hnew) <- paste0("hnew", 1:ncol(hnew)) 151 | if (!is.null(fit$ystar)) colnames(ystar) <- paste0("ystar", 1:ncol(ystar)) 152 | 153 | res <- list(sigsq.eps = sigsq.eps, sig.eps = sig.eps, r = r, beta = beta, lambda = lambda, tau = tau, h = h) 154 | if (!is.null(fit$hnew)) res$hnew <- hnew 155 | if (!is.null(fit$ystar)) res$ystar <- ystar 156 | res 157 | } 158 | -------------------------------------------------------------------------------- /my-doc/tests/tests_randint.R: -------------------------------------------------------------------------------- 1 | devtools::load_all() 2 | library(ggplot2) 3 | 4 | family <- "gaussian" 5 | family <- "binomial" 6 | 7 | ## example where there is a random intercept #### 8 | 9 | n <- 100 10 | M <- 5 11 | sigsq.true <- ifelse(family == "gaussian", 0.05, 1) 12 | beta.true <- 0.5 13 | Z <- matrix(rnorm(n * M), n, M) 14 | X <- cbind(3*cos(Z[, 1]) + 2*rnorm(n)) 15 | id <- rep(1:(n/2), each = 2) 16 | h <- apply(Z, 1, function(z, ind = 1) 4*plogis(z[ind[1]], 0, 0.3)) 17 | u <- rep(rnorm(n/2), each = 2) 18 | eps <- rnorm(n, 0, sqrt(sigsq.true)) 19 | y <- drop(X*beta.true + h + u + eps) 20 | if (family == "binomial") { 21 | ystar <- y 22 | y <- ifelse(ystar > 0, 1, 0) 23 | } 24 | 25 | set.seed(111) 26 | if (family == "gaussian") { 27 | fit0 <- kmbayes(y = y, Z = Z, X = X, iter = 5000, family = family, id = id, varsel = TRUE, control.params = list(verbose_show_ests = TRUE)) 28 | } else if (family == "binomial") { 29 | fit0 <- kmbayes(y = y, Z = Z, X = X, iter = 5000, family = family, id = id, varsel = TRUE, control.params = list(verbose_show_ests = TRUE, lambda.jump = c(10, 0.3))) 30 | } 31 | 32 | fit0 33 | 34 | summary(fit0) 35 | 36 | sigsq_u_chain <- fit0$lambda[, 2]*fit0$sigsq.eps 37 | plot(sigsq_u_chain, type = "l") 38 | plot(sigsq_u_chain[2501:5000], type = "l") 39 | 40 | sigsq_u_est <- mean(sigsq_u_chain) 41 | sigsq_u_est 42 | 43 | TracePlot(fit = fit0, par = "beta") 44 | ExtractPIPs(fit0) 45 | 46 | pred.resp.univar <- PredictorResponseUnivar(fit = fit0) 47 | ggplot(pred.resp.univar, aes(z, est, ymin = est - 1.96*se, ymax = est + 1.96*se)) + 48 | geom_smooth(stat = "identity") + 49 | facet_wrap(~ variable) + 50 | ylab("h(z)") 51 | 52 | pred.resp.bivar <- PredictorResponseBivar(fit = fit0, 53 | min.plot.dist = 1) 54 | 55 | pred.resp.bivar.levels <- PredictorResponseBivarLevels(pred.resp.df = pred.resp.bivar, 56 | Z = Z, qs = c(0.25, 0.5, 0.75)) 57 | 58 | ggplot(pred.resp.bivar.levels, aes(z1, est)) + 59 | geom_smooth(aes(col = quantile), stat = "identity") + 60 | facet_grid(variable2 ~ variable1) + 61 | ggtitle("h(expos1 | quantiles of expos2)") + 62 | xlab("expos1") 63 | 64 | risks.overall.approx <- OverallRiskSummaries(fit = fit0, 65 | qs = seq(0.25, 0.75, by = 0.05), q.fixed = 0.5) 66 | risks.overall.approx 67 | 68 | risks.overall.exact <- OverallRiskSummaries(fit = fit0, 69 | qs = seq(0.25, 0.75, by = 0.05), q.fixed = 0.5, method = "exact") 70 | risks.overall.exact 71 | 72 | risks.singvar <- SingVarRiskSummaries(fit = fit0, 73 | qs.diff = c(0.25, 0.75), q.fixed = c(0.25, 0.50, 0.75)) 74 | risks.singvar 75 | 76 | risks.int <- SingVarIntSummaries(fit = fit0, 77 | qs.diff = c(0.25, 0.75), qs.fixed = c(0.25, 0.75)) 78 | risks.int 79 | 80 | ## tox application from BKMR paper #### 81 | 82 | ## load & prep data 83 | ## from Brent's HEI report 84 | 85 | DIR <- "H:/Research/Completed Projects/2014 Bayesian kernel machine regression (Biostatistics)/Code/tox application/" 86 | 87 | meanvals.dat <- read.table(paste0(DIR, "data/bp-xrf.csv"),sep=",",header=T) 88 | exposure.dat <- read.table(paste0(DIR, "data/BC.csv"),sep=",",header=T) 89 | 90 | caps.dat <- meanvals.dat[meanvals.dat$Exposure=="CAPs",] 91 | sham.dat <- meanvals.dat[meanvals.dat$Exposure=="Sham",] 92 | 93 | bccaps.dat <- merge(exposure.dat,caps.dat,by="DATE") 94 | bcsham.dat <- sham.dat 95 | bcsham.dat$BC <- 0 96 | 97 | bcall.dat <- rbind(bccaps.dat,bcsham.dat) 98 | bcall.dat <- bcall.dat[order(bcall.dat$seq),] 99 | bcall.dat <- bcall.dat[bcall.dat$seq!=112,] 100 | bcall.dat <- bcall.dat[bcall.dat$seq!=11,] 101 | 102 | bcall.dat$Dog <- as.numeric(bcall.dat$Dog) 103 | bcall.dat$exp <- rep(0,length(bcall.dat$Exposure)) 104 | bcall.dat$exp[bcall.dat$Exposure=="CAPs"] <- 1 105 | bcall.dat$stat <- rep(0,length(bcall.dat$Status2)) 106 | bcall.dat$stat[bcall.dat$Status2=="Post-Occlusio"] <- 1 107 | bcall.dat$stat2 <- rep(0,length(bcall.dat$Status2)) 108 | bcall.dat$stat2[bcall.dat$Status2=="Prazosin"] <- 1 109 | 110 | 111 | n.times <- tapply(rep(1,length(bcall.dat$Dog)),bcall.dat$Dog,sum) 112 | time.var <- NULL 113 | for (i in 1:length(n.times)) 114 | { 115 | time.var <- c(time.var,1:n.times[i]) 116 | } 117 | bcall.dat$time <- time.var 118 | 119 | #bcall.dat <- bcall.dat[bcall.dat$stat2==0,] 120 | 121 | dat<-NULL 122 | #corresponds to new simulation study (except doesn't include bc because variable wasn't in bcall.dat and added Mn because that was the observed effect in the original study) 123 | varnames <- c("Al","Si","Ti","Ca","K","Cu","Mn", "Ni","V","Zn", "S", "Cl", "BC") 124 | groups <- c(1,1,1,1,1,1,1, 2,2,2, 3, 4, 5) 125 | dat$Z <- as.matrix(bcall.dat[,varnames]) 126 | mean.z <- apply(dat$Z,2,mean) 127 | dat$Z <- sweep(dat$Z,2,mean.z) 128 | sd.z <- apply(dat$Z,2,sd) 129 | dat$Z <- sweep(dat$Z,2,sd.z,FUN="/") 130 | 131 | dat$X <- cbind(bcall.dat$stat,bcall.dat$stat2,bcall.dat$exp) 132 | dat$y <- matrix(bcall.dat$mmrate,length(bcall.dat$mmrate),1) 133 | dat$id <- bcall.dat$Dog 134 | 135 | ## take out outliers 136 | inds <- seq(1,dim(dat$X)[1]) 137 | noout.inds <- inds[dat$Z[,1] < 6 & dat$Z[,2] < 6 & dat$Z[,3] < 6 & dat$Z[,4] < 6 & dat$Z[,5] < 6 & dat$Z[,6] < 6 & dat$Z[,7] < 6 & dat$Z[,8] < 6 & dat$Z[,9] < 6 & dat$Z[,10] < 6 & dat$Z[,11] < 6 & dat$Z[,12] < 6 & dat$Z[,13] < 6] 138 | 139 | dat$y <- as.matrix(dat$y[noout.inds]) 140 | dat$X <- as.matrix(dat$X[noout.inds,]) 141 | dat$Z <- dat$Z[noout.inds,] 142 | # dat$Z <- scale(dat$Z) 143 | dat$id <- dat$id[noout.inds] 144 | 145 | ## set up and fit 146 | 147 | set.seed(111) 148 | fitkm <- kmbayes(y = dat$y, X = dat$X, Z = dat$Z, id = dat$id, iter = 1000, verbose = TRUE, varsel = TRUE) 149 | -------------------------------------------------------------------------------- /R/InvestigatePrior_rm.R: -------------------------------------------------------------------------------- 1 | #' Investigate prior 2 | #' 3 | #' Investigate the impact of the \code{r[m]} parameters on the smoothness of the exposure-response function \code{h(z[m])}. 4 | #' 5 | #' @inheritParams kmbayes 6 | #' @param ngrid Number of grid points over which to plot the exposure-response function 7 | #' @param q.seq Sequence of values corresponding to different degrees of smoothness in the estimated exposure-response function. A value of q corresponds to fractions of the range of the data over which there is a decay in the correlation \code{cor(h[i],h[j])} between two subjects by 50\code{\%}. 8 | #' @param r.seq sequence of values at which to fix \code{r} for estimating the exposure-response function 9 | #' @param verbose TRUE or FALSE: flag indicating whether to print to the screen which exposure variable and q value has been completed 10 | #' @param Drange the range of the \code{z_m} data over which to apply the values of \code{q.seq}. If not specified, will be calculated as the maximum of the ranges of \code{z_1} through \code{z_M}. 11 | #' @details 12 | #' For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html} 13 | #' @export 14 | #' 15 | #' @return a list containing the predicted values, residuals, and estimated predictor-response function for each degree of smoothness being considered 16 | #' 17 | #' @import nlme 18 | #' 19 | #' @examples 20 | #' ## First generate dataset 21 | #' set.seed(111) 22 | #' dat <- SimData(n = 50, M = 4) 23 | #' y <- dat$y 24 | #' Z <- dat$Z 25 | #' X <- dat$X 26 | #' 27 | #' priorfits <- InvestigatePrior(y = y, Z = Z, X = X, q.seq = c(2, 1/2, 1/4, 1/16)) 28 | #' PlotPriorFits(y = y, Z = Z, X = X, fits = priorfits) 29 | InvestigatePrior <- function(y, Z, X, ngrid = 50, q.seq = c(2, 1, 1/2, 1/4, 1/8, 1/16), r.seq = NULL, Drange = NULL, verbose = FALSE) { 30 | 31 | if (is.null(r.seq)) { 32 | if (is.null(Drange)) { 33 | zranges <- diff(apply(Z, 2, range)) 34 | Drange <- max(zranges) 35 | } 36 | r.seq <- -log(1 - 0.50)/(q.seq * Drange)^2 37 | } else { 38 | if (!is.null(q.seq)) { 39 | warning("Both 'q.seq' and 'r.seq' are specified; values of 'q.seq' will be ignored.") 40 | } 41 | } 42 | 43 | Znew.mat <- matrix(NA, ngrid, ncol(Z)) 44 | preds <- vector("list", ncol(Z)) 45 | resids <- vector("list", ncol(Z)) 46 | h.hat.ests <- vector("list", ncol(Z)) 47 | for(i in 1:ncol(Z)) { 48 | preds[[i]] <- matrix(NA, ngrid, length(q.seq)) 49 | resids[[i]] <- matrix(NA, nrow(Z), length(q.seq)) 50 | h.hat.ests[[i]] <- matrix(NA, nrow(Z), length(q.seq)) 51 | } 52 | for(i in 1:ncol(Z)) { 53 | Zi <- cbind(Z[, i]) 54 | Znew <- cbind(seq(min(Zi), max(Zi), length.out = ngrid)) 55 | Znew.mat[, i] <- Znew 56 | n0 <- nrow(Zi) 57 | In <- diag(1,n0,n0) 58 | n1 <- nrow(Znew) 59 | Zall <- rbind(Zi, Znew) 60 | nall <- n0+n1 61 | for(j in seq_along(r.seq)) { 62 | r <- r.seq[j] 63 | Kpart <- as.matrix(stats::dist(Zall))^2 64 | Kmat <- exp(-r*Kpart) 65 | K <- Kmat0 <- Kmat[1:n0,1:n0 ,drop=FALSE] 66 | Kmat1 <- Kmat[(n0+1):nall,(n0+1):nall ,drop=FALSE] 67 | Kmat10 <- Kmat[(n0+1):nall,1:n0 ,drop=FALSE] 68 | U <- try(t(chol(K)), silent=TRUE) 69 | # all.equal(K, U %*% t(U)) 70 | if(inherits(U, "try-error")) { 71 | sigsvd <- svd(K) 72 | U <- t(sigsvd$v %*% (t(sigsvd$u) * sqrt(sigsvd$d))) 73 | # all.equal(K, U %*% t(U), check.attributes=FALSE) 74 | } 75 | 76 | group <- rep(1, n0) 77 | fit <- lme(y ~ -1+X, random = list(group = pdIdent(~ -1+U))) 78 | #data.frame(sig=(fit$sigma)^2, tau=as.numeric(VarCorr(fit)[1,"Variance"]), rho=1/r, bet=fixef(fit)) 79 | h.hat <- U %*% drop(fit$coef$random[[1]]) 80 | Vinv <- chol2inv(chol(In + as.numeric(VarCorr(fit)[1,"Variance"])/(fit$sigma)^2*Kmat0)) 81 | hnew <- drop(as.numeric(VarCorr(fit)[1,"Variance"])/(fit$sigma)^2*Kmat10%*%Vinv%*%(y - X%*%fixef(fit))) 82 | 83 | preds[[i]][, j] <- hnew 84 | resids[[i]][, j] <- stats::resid(fit) 85 | h.hat.ests[[i]][, j] <- h.hat 86 | 87 | if(verbose) message("Completed: variable", i, ", r value ", j) 88 | } 89 | } 90 | 91 | res <- list(q.seq = q.seq, r.seq = r.seq, Drange = Drange, Znew = Znew.mat, resids = resids, preds = preds, h.hat = h.hat.ests) 92 | } 93 | 94 | #' Plot of exposure-response function from univariate KMR fit 95 | #' 96 | #' Plot the estimated \code{h(z[m])} estimated from frequentist KMR for \code{r[m]} fixed to specific values 97 | #' 98 | #' @inheritParams kmbayes 99 | #' @param fits output from \code{\link{InvestigatePrior}} 100 | #' @param which.z which predictors (columns in \code{Z}) to plot 101 | #' @param which.q which q.values to plot; defaults to all possible 102 | #' @param plot.resid whether to plot the data points 103 | #' @param ylim plotting limits for the y-axis 104 | #' @param ... other plotting arguments 105 | #' @export 106 | #' @import graphics 107 | #' 108 | #' @return No return value, generates plot 109 | #' 110 | #' @examples 111 | #' ## First generate dataset 112 | #' set.seed(111) 113 | #' dat <- SimData(n = 50, M = 4) 114 | #' y <- dat$y 115 | #' Z <- dat$Z 116 | #' X <- dat$X 117 | #' 118 | #' priorfits <- InvestigatePrior(y = y, Z = Z, X = X, q.seq = c(2, 1/2, 1/4, 1/16)) 119 | #' PlotPriorFits(y = y, Z = Z, X = X, fits = priorfits) 120 | PlotPriorFits <- function(y, X, Z, fits, which.z = NULL, which.q = NULL, plot.resid = TRUE, ylim = NULL, ...) { 121 | q.seq <- fits$q.seq 122 | r.seq <- fits$r.seq 123 | Znew <- fits$Znew 124 | preds <- fits$preds 125 | 126 | if (is.null(which.z)) which.z <- 1:ncol(Z) 127 | if (is.null(which.q)) which.q <- 1:length(q.seq) 128 | 129 | q.seq <- q.seq[which.q] 130 | r.seq <- r.seq[which.q] 131 | Znew <- Znew[, which.z] 132 | preds <- preds[which.z] 133 | Z <- Z[, which.z] 134 | 135 | if (plot.resid) { 136 | lm0 <- lm(y ~ X) 137 | res <- resid(lm0) + coef(lm0)["(Intercept)"] 138 | if (is.null(ylim)) ylim <- range(res) 139 | } 140 | 141 | opar <- par(mfrow=c(1 + length(which.z), length(which.q)), mar=c(4.1, 4.1, 1.6, 1.1)) 142 | on.exit(par(opar), add = TRUE) 143 | 144 | for(r in r.seq) { 145 | fun_plot <- function(x) exp(-r*x^2) 146 | curve(fun_plot, main=paste0("r = ", format(round(r,2), digits = 2, nsmall = 2)), ylab="correlation", cex.lab=1.5, cex.main=2, ylim=c(0,1), xlim=c(0, max(Z)), xlab=expression(d[ij]), xname = 'x') 147 | } 148 | for(i in 1:ncol(Z)) { 149 | for(j in seq_along(r.seq)) { 150 | est <- preds[[i]][, j] 151 | if (is.null(ylim)) ylim <- range(est, na.rm = TRUE) 152 | plot(0, type = "n", ylim = ylim, ylab = expression(hat(h)), xlab = colnames(Z)[i], cex.lab = 1.5, xlim = range(Znew), ...) 153 | if (plot.resid) points(Z[, i], res, col="red", pch=19, cex=0.5) 154 | lines(Znew[, i], est) 155 | } 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /my-doc/scratch.R: -------------------------------------------------------------------------------- 1 | devtools::load_all() 2 | library(magrittr) 3 | 4 | ## generate data #### 5 | 6 | #seed <- 1234 7 | seed <- 123 8 | set.seed(seed) 9 | n = 200 10 | M = 5 11 | sigsq.true = 1 12 | beta.true = 0.1 13 | Z <- matrix(runif(n * M, -1, 1), n, M) 14 | X <- cbind(3*cos(Z[, 1]) + 2*rnorm(n)) 15 | eps <- rnorm(n, sd = sqrt(sigsq.true)) 16 | #h <- 4*plogis(Z[, 1], 0, 0.3) 17 | h <- (2*Z[, 1] + 0.5) ^ 2 18 | plot(Z[, 1], h) 19 | 20 | mu <- X * beta.true + h 21 | y <- drop(mu + eps) 22 | ystar <- y 23 | y <- ifelse(ystar > 0, 1, 0) 24 | dat <- list(n = n, M = M, sigsq.true = sigsq.true, beta.true = beta.true, Z = Z, h = h, X = X, y = y) 25 | dat$ystar <- ystar 26 | datp <- dat 27 | #datp <- SimData(hfun = 1, family = "binomial", beta.true = 0.1) 28 | y <- datp$y 29 | Z <- datp$Z 30 | X <- datp$X 31 | mu <- datp %$% drop(h + X*beta.true) 32 | #str(datp) 33 | table(y) 34 | z1ord <- order(Z[, 1]) 35 | 36 | plot(Z[, 1], datp$ystar) 37 | abline(lm(datp$h ~ Z[, 1]), col = "blue") 38 | lines(Z[z1ord, 1], predict(lm(datp$h ~ splines::ns(Z[, 1], 3)))[z1ord], col = "blue") 39 | lines(Z[z1ord, 1], datp$h[z1ord], col = "red", lwd = 2) 40 | 41 | ## linear probit model #### 42 | 43 | probitfit0 <- glm(y ~ Z + X, family = binomial(link = "probit")) 44 | 45 | tmp <- predict(probitfit0) 46 | tmp2 <- tmp 47 | tmp2 <- ifelse(y == 1, abs(tmp), -abs(tmp)) 48 | tmp2 <- ifelse(y == 1, tmp - min(tmp[y == 1]), tmp - max(tmp[y == 0])) 49 | plot(tmp2, datp$ystar, col = ifelse(y == 1, "green", "blue")) 50 | abline(0, 1, col = "red") 51 | 52 | hpred <- drop(coef(probitfit0)["(Intercept)"] + Z %*% coef(probitfit0)[grep("Z", names(coef(probitfit0)))]) 53 | plot(hpred, datp$h) 54 | abline(0, 1, col = "red") 55 | 56 | ## Oracle probit model #### 57 | 58 | oracle <- glm(y ~ Z[, 1] + I(Z[, 1]^2) + X, family = binomial(link = "probit")) 59 | 60 | tmp <- predict(oracle) 61 | tmp2 <- tmp 62 | tmp2 <- ifelse(y == 1, abs(tmp), -abs(tmp)) 63 | tmp2 <- ifelse(y == 1, tmp - min(tmp[y == 1]), tmp - max(tmp[y == 0])) 64 | plot(tmp2, datp$ystar, col = ifelse(y == 1, "green", "blue")) 65 | abline(0, 1, col = "red") 66 | 67 | hpred <- drop(coef(probitfit0)["(Intercept)"] + Z %*% coef(probitfit0)[grep("Z", names(coef(probitfit0)))]) 68 | plot(hpred, datp$h) 69 | abline(0, 1, col = "red") 70 | 71 | ## BKM probit model #### 72 | 73 | # fitpr_gam0 <- kmbayes(iter = 5000, y = y, Z = Z, X = X, family = "binomial", varsel = TRUE, control.params = list(verbose_show_ests = TRUE, r.prior = "gamma")) 74 | # fitpr <- fitpr_gam0 75 | # 76 | # fitpr_gam1 <- kmbayes(iter = 5000, y = y, Z = Z, X = X, family = "binomial", varsel = TRUE, control.params = list(verbose_show_ests = TRUE, r.prior = "gamma", r.jump2 = 1)) 77 | # fitpr <- fitpr_gam1 78 | 79 | fitpr_gam2 <- kmbayes(iter = 5000, y = y, Z = Z, X = X, family = "binomial", varsel = TRUE, control.params = list(verbose_show_ests = TRUE, r.prior = "gamma", r.jump2 = 2)) 80 | fitpr <- fitpr_gam2 81 | 82 | fitpr_iu <- kmbayes(iter = 5000, y = y, Z = Z, X = X, family = "binomial", varsel = TRUE, control.params = list(verbose_show_ests = TRUE, r.prior = "invunif")) 83 | fitpr <- fitpr_iu 84 | 85 | fitpr_iu2 <- kmbayes(iter = 5000, y = y, Z = Z, X = X, family = "binomial", varsel = TRUE, control.params = list(verbose_show_ests = TRUE, r.prior = "invunif", r.jump1 = 3)) 86 | fitpr <- fitpr_iu2 87 | 88 | ## with knots matrix 89 | 90 | sel <- with(fitpr, seq(floor(iter/2) + 1, iter)) 91 | ests <- ExtractEsts(fitpr) 92 | #summary(fitpr) 93 | 94 | TracePlot(fit = fitpr, par = "beta") 95 | TracePlot(fit = fitpr, par = "sigsq.eps") 96 | TracePlot(fit = fitpr, par = "r", comp = 1) 97 | TracePlot(fit = fitpr, par = "r", comp = 2) 98 | TracePlot(fit = fitpr, par = "r", comp = 3) 99 | TracePlot(fit = fitpr, par = "r", comp = 4) 100 | TracePlot(fit = fitpr, par = "r", comp = 5) 101 | TracePlot(fit = fitpr, par = "h", comp = 20) 102 | TracePlot(fit = fitpr, par = "ystar", comp = 1) 103 | 104 | ## beta 105 | cbind(truth = datp$beta.true, ests$beta) %>% t() 106 | 107 | ## h 108 | hhat <- ests$h[, "mean"] 109 | plot(hhat, datp$h) 110 | abline(0, 1, col = "red") 111 | #plot(Z[z1ord, 1], (datp$ystar - datp$X*0.1)[z1ord], col = "blue") 112 | plot(Z[z1ord, 1], hhat[z1ord], ylim = range(c(hhat, datp$h)), col = ifelse(y[z1ord] == 1, "green", "blue")) 113 | lines(Z[z1ord, 1], datp$h[z1ord], col = "red") 114 | 115 | ## ystar 116 | ystar_hat <- ests$ystar[, "mean"] 117 | plot(ystar_hat, datp$ystar, col = ifelse(y == 1, "green", "blue")) 118 | abline(0, 1, col = "red") 119 | par(mfrow = c(1,2)) 120 | plot(datp$y, datp$ystar) 121 | plot(datp$y, ystar_hat) 122 | par(mfrow = c(1,1)) 123 | par(mfrow = c(1,2)) 124 | hist(datp$ystar) 125 | hist(ystar_hat) 126 | par(mfrow = c(1,1)) 127 | par(mfrow = c(1,2)) 128 | plot(Z[z1ord, 1], datp$ystar[z1ord], col = ifelse(y[z1ord] == 1, "green", "blue")) 129 | lines(Z[z1ord, 1], datp$h[z1ord], col = "red") 130 | plot(Z[z1ord, 1], ests$ystar[z1ord], col = ifelse(y[z1ord] == 1, "green", "blue")) 131 | lines(Z[z1ord, 1], hhat[z1ord], col = "red") 132 | par(mfrow = c(1,1)) 133 | 134 | ## phat 135 | phat0 <- pnorm(datp$h + t(X %*% datp$beta.true)) 136 | phat1 <- colMeans(pnorm(fitpr$h.hat[sel, ] + t(X %*% fitpr$beta[sel, ]))) 137 | plot(phat0, phat1) 138 | abline(0, 1, col = "red") 139 | 140 | if (FALSE) { 141 | 142 | iter = 1000; family = "binomial"; id = NULL; verbose = TRUE; Znew = NULL; starting.values = list(); control.params = list(); varsel = FALSE; groups = NULL; knots = NULL; ztest = NULL; rmethod = "varying" 143 | 144 | s <- 2 145 | beta = chain$beta[s-1,] 146 | Vinv = Vcomps$Vinv 147 | Xbeta <- drop(X %*% beta) 148 | lower <- ifelse(y == 1, 0, -Inf) 149 | upper <- ifelse(y == 0, 0, Inf) 150 | time <- system.time(samp <- tmvtnorm::rtmvnorm(1, mean = Xbeta, H = Vinv, lower = lower, upper = upper, algorithm = "gibbs", start.value = chain$ystar[s - 1, ])) 151 | time 152 | head(drop(samp)) 153 | 154 | } 155 | 156 | ## regular probit regression #### 157 | 158 | truth <- c(0.5, beta.true, 1, 2) 159 | tmpmu <- cbind(1, X, Z[, 1], Z[, 2]) %*% truth 160 | dattmp <- dplyr::data_frame(X = drop(X), Z1 = Z[, 1], Z2 = Z[, 2], ystar = drop(tmpmu + eps)) 161 | dattmp$y <- ifelse(dattmp$ystar > 0, 1, 0) 162 | modtmp <- glm(y ~ X + Z1 + Z2, family = binomial(link = "probit"), data = dattmp) 163 | coef(modtmp) 164 | 165 | niter <- 1000 166 | 167 | samps <- list() 168 | samps$coef <- matrix(NA, niter, 4, dimnames = list(NULL, c("int", "beta", "Z1", "Z2"))) 169 | samps$coef[1, ] <- rep(1, ncol(samps$coef)) 170 | samps$ystar <- matrix(NA, niter, n) 171 | samps$ystar[1, ] <- ifelse(dattmp$y == 1, 1/2, -1/2) 172 | 173 | for (i in 2:niter) { 174 | m0 <- lm(samps$ystar[i-1, ] ~ X + Z1 + Z2, data = dattmp) 175 | samps$coef[i, ] <- coef(m0) 176 | samps$ystar[i, ] <- truncnorm::rtruncnorm(1, a = ifelse(dattmp$y == 1, 0, -Inf), b = ifelse(dattmp$y == 1, Inf, 0), mean = predict(m0), sd = 1) 177 | } 178 | 179 | colMeans(samps$coef) 180 | coef(modtmp) 181 | 182 | apply(samps$coef, 2, sd) 183 | summary(modtmp)$coef[, "Std. Error"] 184 | 185 | plot(dattmp$ystar, colMeans(samps$ystar), col = ifelse(dattmp$y == 1, "green", "blue")) 186 | abline(0, 1, col = "red") 187 | 188 | par(mfrow = c(1,2)) 189 | plot(dattmp$y, dattmp$ystar) 190 | plot(dattmp$y, colMeans(samps$ystar)) 191 | par(mfrow = c(1,1)) 192 | par(mfrow = c(1,2)) 193 | hist(dattmp$ystar) 194 | hist(colMeans(samps$ystar)) 195 | par(mfrow = c(1,1)) 196 | -------------------------------------------------------------------------------- /R/ComputePostmeanHnew.R: -------------------------------------------------------------------------------- 1 | #' Compute the posterior mean and variance of \code{h} at a new predictor values 2 | #' 3 | #' @inheritParams kmbayes 4 | #' @param fit An object containing the results returned by a the \code{kmbayes} function 5 | #' @param Znew matrix of new predictor values at which to predict new \code{h}, where each row represents a new observation. If set to NULL then will default to using the observed exposures Z. 6 | #' @param method method for obtaining posterior summaries at a vector of new points. Options are "approx" and "exact"; defaults to "approx", which is faster particularly for large datasets; see details 7 | #' @param sel selects which iterations of the MCMC sampler to use for inference; see details 8 | #' @details 9 | #' \itemize{ 10 | #' \item If \code{method == "approx"}, the argument \code{sel} defaults to the second half of the MCMC iterations. 11 | #' \item If \code{method == "exact"}, the argument \code{sel} defaults to keeping every 10 iterations after dropping the first 50\% of samples, or if this results in fewer than 100 iterations, than 100 iterations are kept 12 | #' } 13 | #' For guided examples and additional information, go to \url{https://jenfb.github.io/bkmr/overview.html} 14 | #' @export 15 | #' 16 | #' @return a list of length two containing the posterior mean vector and posterior variance matrix 17 | #' 18 | #' @examples 19 | #' set.seed(111) 20 | #' dat <- SimData(n = 50, M = 4) 21 | #' y <- dat$y 22 | #' Z <- dat$Z 23 | #' X <- dat$X 24 | #' 25 | #' ## Fit model with component-wise variable selection 26 | #' ## Using only 100 iterations to make example run quickly 27 | #' ## Typically should use a large number of iterations for inference 28 | #' set.seed(111) 29 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 30 | #' 31 | #' med_vals <- apply(Z, 2, median) 32 | #' Znew <- matrix(med_vals, nrow = 1) 33 | #' h_true <- dat$HFun(Znew) 34 | #' h_est1 <- ComputePostmeanHnew(fitkm, Znew = Znew, method = "approx") 35 | #' h_est2 <- ComputePostmeanHnew(fitkm, Znew = Znew, method = "exact") 36 | ComputePostmeanHnew <- function(fit, y = NULL, Z = NULL, X = NULL, Znew = NULL, sel = NULL, method = "approx") { 37 | if (method == "approx") { 38 | res <- ComputePostmeanHnew.approx(fit = fit, y = y, Z = Z, X = X, Znew = Znew, sel = sel) 39 | } else if (method == "exact") { 40 | res <- ComputePostmeanHnew.exact(fit = fit, y = y, Z = Z, X = X, Znew = Znew, sel = sel) 41 | } 42 | res 43 | } 44 | 45 | #' Compute the posterior mean and variance of \code{h} at a new predictor values 46 | #' 47 | #' Function to approximate the posterior mean and variance as a function of the estimated model parameters (e.g., tau, lambda, beta, and sigsq.eps) 48 | #' @param Znew matrix of new predictor values at which to predict new \code{h}, where each row represents a new observation. If set to NULL then will default to using the observed exposures Z. 49 | #' @inheritParams kmbayes 50 | #' @inheritParams ExtractEsts 51 | #' @noRd 52 | ComputePostmeanHnew.approx <- function(fit, y = NULL, Z = NULL, X = NULL, Znew = NULL, sel = NULL) { 53 | 54 | if (inherits(fit, "bkmrfit")) { 55 | if (is.null(y)) y <- fit$y 56 | if (is.null(Z)) Z <- fit$Z 57 | if (is.null(X)) X <- fit$X 58 | } 59 | 60 | if (!is.null(Znew)) { 61 | if(is.null(dim(Znew))) Znew <- matrix(Znew, nrow=1) 62 | if(inherits(Znew, "data.frame")) Znew <- data.matrix(Znew) 63 | } 64 | if(is.null(dim(X))) X <- matrix(X, ncol=1) 65 | 66 | ests <- ExtractEsts(fit, sel = sel) 67 | sigsq.eps <- ests$sigsq.eps[, "mean"] 68 | r <- ests$r[, "mean"] 69 | beta <- ests$beta[, "mean"] 70 | lambda <- ests$lambda[, "mean"] 71 | if (fit$family == "gaussian") { 72 | ycont <- y 73 | } else if (fit$family == "binomial") { 74 | ycont <- ests$ystar[, "mean"] 75 | } 76 | 77 | Kpart <- makeKpart(r, Z) 78 | K <- exp(-Kpart) 79 | V <- diag(1, nrow(Z), nrow(Z)) + lambda[1]*K 80 | cholV <- chol(V) 81 | Vinv <- chol2inv(cholV) 82 | 83 | if (!is.null(Znew)) { 84 | # if(is.null(data.comps$knots)) { 85 | n0 <- nrow(Z) 86 | n1 <- nrow(Znew) 87 | nall <- n0 + n1 88 | Kpartall <- makeKpart(r, rbind(Z, Znew)) 89 | Kmat <- exp(-Kpartall) 90 | Kmat0 <- Kmat[1:n0,1:n0 ,drop=FALSE] 91 | Kmat1 <- Kmat[(n0+1):nall,(n0+1):nall ,drop=FALSE] 92 | Kmat10 <- Kmat[(n0+1):nall,1:n0 ,drop=FALSE] 93 | 94 | lamK10Vinv <- lambda[1]*Kmat10 %*% Vinv 95 | postvar <- lambda[1]*sigsq.eps*(Kmat1 - lamK10Vinv %*% t(Kmat10)) 96 | postmean <- lamK10Vinv %*% (ycont - X%*%beta) 97 | # } else { 98 | # stop("GPP not yet implemented") 99 | # } 100 | } else { 101 | lamKVinv <- lambda[1]*K%*%Vinv 102 | postvar <- lambda[1]*sigsq.eps*(K - lamKVinv%*%K) 103 | postmean <- lamKVinv %*% (ycont - X%*%beta) 104 | } 105 | ret <- list(postmean = drop(postmean), postvar = postvar) 106 | ret 107 | } 108 | 109 | #' Compute the posterior mean and variance of \code{h} at a new predictor values 110 | #' 111 | #' Function to estimate the posterior mean and variance by obtaining the posterior mean and variance at particular iterations and then using the iterated mean and variance formulas 112 | #' 113 | #' @inheritParams kmbayes 114 | #' @inheritParams SamplePred 115 | #' @inheritParams ExtractEsts 116 | #' 117 | #' @noRd 118 | ComputePostmeanHnew.exact <- function(fit, y = NULL, Z = NULL, X = NULL, Znew = NULL, sel = NULL) { 119 | 120 | if (inherits(fit, "bkmrfit")) { 121 | if (is.null(y)) y <- fit$y 122 | if (is.null(Z)) Z <- fit$Z 123 | if (is.null(X)) X <- fit$X 124 | } 125 | 126 | if (!is.null(Znew)) { 127 | if (is.null(dim(Znew))) Znew <- matrix(Znew, nrow = 1) 128 | if (inherits(Znew, "data.frame")) Znew <- data.matrix(Znew) 129 | if (ncol(Z) != ncol(Znew)) { 130 | stop("Znew must have the same number of columns as Z") 131 | } 132 | } 133 | 134 | if (is.null(dim(X))) X <- matrix(X, ncol=1) 135 | 136 | # if (!is.null(fit$Vinv)) { 137 | # sel <- attr(fit$Vinv, "sel") 138 | # } 139 | 140 | if (is.null(sel)) { 141 | sel <- with(fit, seq(floor(iter/2) + 1, iter, 10)) 142 | if (length(sel) < 100) { 143 | sel <- with(fit, seq(floor(iter/2) + 1, iter, length.out = 100)) 144 | } 145 | sel <- unique(floor(sel)) 146 | } 147 | 148 | family <- fit$family 149 | data.comps <- fit$data.comps 150 | post.comps.store <- list(postmean = vector("list", length(sel)), 151 | postvar = vector("list", length(sel)) 152 | ) 153 | 154 | for (i in seq_along(sel)) { 155 | s <- sel[i] 156 | beta <- fit$beta[s, ] 157 | lambda <- fit$lambda[s, ] 158 | sigsq.eps <- fit$sigsq.eps[s] 159 | r <- fit$r[s, ] 160 | 161 | if (family == "gaussian") { 162 | ycont <- y 163 | } else if (family == "binomial") { 164 | ycont <- fit$ystar[s, ] 165 | } 166 | 167 | Kpart <- makeKpart(r, Z) 168 | K <- exp(-Kpart) 169 | Vcomps <- makeVcomps(r = r, lambda = lambda, Z = Z, data.comps = data.comps) 170 | Vinv <- Vcomps$Vinv 171 | # if (is.null(fit$Vinv)) { 172 | # V <- diag(1, nrow(Z), nrow(Z)) + lambda[1]*K 173 | # cholV <- chol(V) 174 | # Vinv <- chol2inv(cholV) 175 | # } else { 176 | # Vinv <- fit$Vinv[[i]] 177 | # } 178 | 179 | if (!is.null(Znew)) { 180 | # if(is.null(data.comps$knots)) { 181 | n0 <- nrow(Z) 182 | n1 <- nrow(Znew) 183 | nall <- n0 + n1 184 | Kpartall <- makeKpart(r, rbind(Z, Znew)) 185 | Kmat <- exp(-Kpartall) 186 | Kmat0 <- Kmat[1:n0,1:n0 ,drop=FALSE] 187 | Kmat1 <- Kmat[(n0+1):nall,(n0+1):nall ,drop=FALSE] 188 | Kmat10 <- Kmat[(n0+1):nall,1:n0 ,drop=FALSE] 189 | 190 | lamK10Vinv <- lambda[1]*Kmat10 %*% Vinv 191 | postvar <- lambda[1]*sigsq.eps*(Kmat1 - lamK10Vinv %*% t(Kmat10)) 192 | postmean <- lamK10Vinv %*% (ycont - X%*%beta) 193 | # } else { 194 | # stop("GPP not yet implemented") 195 | # } 196 | } else { 197 | lamKVinv <- lambda[1]*K%*%Vinv 198 | postvar <- lambda[1]*sigsq.eps*(K - lamKVinv%*%K) 199 | postmean <- lamKVinv %*% (ycont - X%*%beta) 200 | } 201 | 202 | post.comps.store$postmean[[i]] <- postmean 203 | post.comps.store$postvar[[i]] <- postvar 204 | 205 | } 206 | 207 | postmean_mat <- t(do.call("cbind", post.comps.store$postmean)) 208 | m <- colMeans(postmean_mat) 209 | postvar_arr <- with(post.comps.store, 210 | array(unlist(postvar), 211 | dim = c(nrow(postvar[[1]]), ncol(postvar[[1]]), length(postvar))) 212 | ) 213 | ve <- var(postmean_mat) 214 | ev <- apply(postvar_arr, c(1, 2), mean) 215 | v <- ve + ev 216 | ret <- list(postmean = m, postvar = v) 217 | 218 | ret 219 | } 220 | 221 | -------------------------------------------------------------------------------- /R/ComputeMixtureSummaries.R: -------------------------------------------------------------------------------- 1 | riskSummary.approx <- function(point1, point2, preds.fun, ...) { 2 | cc <- c(-1, 1) 3 | newz <- rbind(point1, point2) 4 | preds <- preds.fun(newz, ...) 5 | diff <- drop(cc %*% preds$postmean) 6 | diff.sd <- drop(sqrt(cc %*% preds$postvar %*% cc)) 7 | c(est = diff, sd = diff.sd) 8 | } 9 | riskSummary.samp <- function(point1, point2, preds.fun, ...) { 10 | cc <- c(-1, 1) 11 | newz <- rbind(point1, point2) 12 | preds <- preds.fun(newz, ...) 13 | diff.preds <- drop(preds %*% cc) 14 | c(est = mean(diff.preds), sd = sd(diff.preds)) 15 | } 16 | interactionSummary.approx <- function(newz.q1, newz.q2, preds.fun, ...) { 17 | cc <- c(-1*c(-1, 1), c(-1, 1)) 18 | newz <- rbind(newz.q1, newz.q2) 19 | preds <- preds.fun(newz, ...) 20 | int <- drop(cc %*% preds$postmean) 21 | int.se <- drop(sqrt(cc %*% preds$postvar %*% cc)) 22 | c(est = int, sd = int.se) 23 | } 24 | interactionSummary.samp <- function(newz.q1, newz.q2, preds.fun, ...) { 25 | cc <- c(-1*c(-1, 1), c(-1, 1)) 26 | newz <- rbind(newz.q1, newz.q2) 27 | preds <- preds.fun(newz, ...) 28 | int.preds <- drop(preds %*% cc) 29 | c(est = mean(int.preds), sd = sd(int.preds)) 30 | } 31 | 32 | 33 | 34 | #' Calculate overall risk summaries 35 | #' 36 | #' Compare estimated \code{h} function when all predictors are at a particular quantile to when all are at a second fixed quantile 37 | #' @inheritParams kmbayes 38 | #' @inheritParams ComputePostmeanHnew 39 | #' @inherit ComputePostmeanHnew details 40 | #' @param qs vector of quantiles at which to calculate the overall risk summary 41 | #' @param q.fixed a second quantile at which to compare the estimated \code{h} function 42 | #' @export 43 | #' @return a data frame containing the (posterior mean) estimate and posterior standard deviation of the overall risk measures 44 | #' @examples 45 | #' ## First generate dataset 46 | #' set.seed(111) 47 | #' dat <- SimData(n = 50, M = 4) 48 | #' y <- dat$y 49 | #' Z <- dat$Z 50 | #' X <- dat$X 51 | #' 52 | #' ## Fit model with component-wise variable selection 53 | #' ## Using only 100 iterations to make example run quickly 54 | #' ## Typically should use a large number of iterations for inference 55 | #' set.seed(111) 56 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 57 | #' 58 | #' risks.overall <- OverallRiskSummaries(fit = fitkm, qs = seq(0.25, 0.75, by = 0.05), 59 | #' q.fixed = 0.5, method = "exact") 60 | OverallRiskSummaries <- function(fit, y = NULL, Z = NULL, X = NULL, qs = seq(0.25, 0.75, by = 0.05), q.fixed = 0.5, method = "approx", sel = NULL) { 61 | 62 | if (inherits(fit, "bkmrfit")) { 63 | if (is.null(y)) y <- fit$y 64 | if (is.null(Z)) Z <- fit$Z 65 | if (is.null(X)) X <- fit$X 66 | } 67 | 68 | point1 <- apply(Z, 2, quantile, q.fixed) 69 | if (method %in% c("approx", "exact")) { 70 | preds.fun <- function(znew) ComputePostmeanHnew(fit = fit, y = y, Z = Z, X = X, Znew = znew, sel = sel, method = method) 71 | riskSummary <- riskSummary.approx 72 | } else { 73 | stop("method must be one of c('approx', 'exact')") 74 | } 75 | risks.overall <- t(sapply(qs, function(quant) riskSummary(point1 = point1, point2 = apply(Z, 2, quantile, quant), preds.fun = preds.fun))) 76 | risks.overall <- data.frame(quantile = qs, risks.overall) 77 | } 78 | 79 | #Compare estimated \code{h} function when a single variable (or a set of variables) is at the 75th versus 25th percentile, when all of the other variables are fixed at a particular percentile 80 | VarRiskSummary <- function(whichz = 1, fit, y = NULL, Z = NULL, X = NULL, qs.diff = c(0.25, 0.75), q.fixed = 0.5, method = "approx", sel = NULL, ...) { 81 | 82 | if (inherits(fit, "bkmrfit")) { 83 | if (is.null(y)) y <- fit$y 84 | if (is.null(Z)) Z <- fit$Z 85 | if (is.null(X)) X <- fit$X 86 | } 87 | 88 | point2 <- point1 <- apply(Z, 2, quantile, q.fixed) 89 | point2[whichz] <- apply(Z[, whichz, drop = FALSE], 2, quantile, qs.diff[2]) 90 | point1[whichz] <- apply(Z[, whichz, drop = FALSE], 2, quantile, qs.diff[1]) 91 | # point1 <- makePoint(whichz, Z, qs.diff[1], q.fixed) 92 | # point2 <- makePoint(whichz, Z, qs.diff[2], q.fixed) 93 | if (method %in% c("approx", "exact")) { 94 | preds.fun <- function(znew) ComputePostmeanHnew(fit = fit, y = y, Z = Z, X = X, Znew = znew, sel = sel, method = method) 95 | riskSummary <- riskSummary.approx 96 | } else { 97 | stop("method must be one of c('approx', 'exact')") 98 | } 99 | riskSummary(point1 = point1, point2 = point2, preds.fun = preds.fun, ...) 100 | } 101 | 102 | #' Single Variable Risk Summaries 103 | #' 104 | #' Compute summaries of the risks associated with a change in a single variable in \code{Z} from a single level (quantile) to a second level (quantile), for the other variables in \code{Z} fixed to a specific level (quantile) 105 | #' 106 | #' @inheritParams kmbayes 107 | #' @inheritParams ExtractEsts 108 | #' @inheritParams OverallRiskSummaries 109 | #' @inherit ComputePostmeanHnew details 110 | #' @param qs.diff vector indicating the two quantiles \code{q_1} and \code{q_2} at which to compute \code{h(z_{q2}) - h(z_{q1})} 111 | #' @param q.fixed vector of quantiles at which to fix the remaining predictors in \code{Z} 112 | #' @param z.names optional vector of names for the columns of \code{z} 113 | #' @param ... other arguments to pass on to the prediction function 114 | #' @param which.z vector indicating which variables (columns of \code{Z}) for which the summary should be computed 115 | #' @export 116 | #' 117 | #' @return a data frame containing the (posterior mean) estimate and posterior standard deviation of the single-predictor risk measures 118 | #' 119 | #' @examples 120 | #' ## First generate dataset 121 | #' set.seed(111) 122 | #' dat <- SimData(n = 50, M = 4) 123 | #' y <- dat$y 124 | #' Z <- dat$Z 125 | #' X <- dat$X 126 | #' 127 | #' ## Fit model with component-wise variable selection 128 | #' ## Using only 100 iterations to make example run quickly 129 | #' ## Typically should use a large number of iterations for inference 130 | #' set.seed(111) 131 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 132 | #' 133 | #' risks.singvar <- SingVarRiskSummaries(fit = fitkm, method = "exact") 134 | SingVarRiskSummaries <- function(fit, y = NULL, Z = NULL, X = NULL, which.z = 1:ncol(Z), qs.diff = c(0.25, 0.75), q.fixed = c(0.25, 0.50, 0.75), method = "approx", sel = NULL, z.names = colnames(Z), ...) { 135 | 136 | if (inherits(fit, "bkmrfit")) { 137 | if (is.null(y)) y <- fit$y 138 | if (is.null(Z)) Z <- fit$Z 139 | if (is.null(X)) X <- fit$X 140 | } 141 | 142 | if(is.null(z.names)) z.names <- paste0("z", 1:ncol(Z)) 143 | 144 | df <- dplyr::tibble() 145 | for(i in seq_along(q.fixed)) { 146 | for(j in seq_along(which.z)) { 147 | risk <- VarRiskSummary(whichz = which.z[j], fit = fit, y = y, Z = Z, X = X, qs.diff = qs.diff, q.fixed = q.fixed[i], method = method, sel = sel, ...) 148 | df0 <- dplyr::tibble(q.fixed = q.fixed[i], variable = z.names[j], est = risk["est"], sd = risk["sd"]) 149 | df <- dplyr::bind_rows(df, df0) 150 | } 151 | } 152 | #df <- dplyr::mutate_(df, variable = ~factor(variable, levels = z.names[which.z]), q.fixed = ~as.factor(q.fixed)) 153 | df <- dplyr::mutate_at(df, "variable", function(x) factor(x, levels = z.names[which.z])) 154 | df <- dplyr::mutate_at(df, "q.fixed", function(x) as.factor(x)) 155 | attr(df, "qs.diff") <- qs.diff 156 | df 157 | } 158 | 159 | SingVarIntSummary <- function(whichz = 1, fit, y = NULL, Z = NULL, X = NULL, qs.diff = c(0.25, 0.75), qs.fixed = c(0.25, 0.75), method = "approx", sel = NULL, ...) { 160 | 161 | if (inherits(fit, "bkmrfit")) { 162 | if (is.null(y)) y <- fit$y 163 | if (is.null(Z)) Z <- fit$Z 164 | if (is.null(X)) X <- fit$X 165 | } 166 | 167 | q.fixed <- qs.fixed[1] 168 | point2 <- point1 <- apply(Z, 2, quantile, q.fixed) 169 | point2[whichz] <- quantile(Z[, whichz], qs.diff[2]) 170 | point1[whichz] <- quantile(Z[, whichz], qs.diff[1]) 171 | newz.q1 <- rbind(point1, point2) 172 | 173 | q.fixed <- qs.fixed[2] 174 | point2 <- point1 <- apply(Z, 2, quantile, q.fixed) 175 | point2[whichz] <- quantile(Z[, whichz], qs.diff[2]) 176 | point1[whichz] <- quantile(Z[, whichz], qs.diff[1]) 177 | newz.q2 <- rbind(point1, point2) 178 | 179 | if (method %in% c("approx", "exact")) { 180 | preds.fun <- function(znew) ComputePostmeanHnew(fit = fit, y = y, Z = Z, X = X, Znew = znew, sel = sel, method = method) 181 | interactionSummary <- interactionSummary.approx 182 | } else { 183 | stop("method must be one of c('approx', 'exact')") 184 | } 185 | interactionSummary(newz.q1, newz.q2, preds.fun, ...) 186 | } 187 | 188 | #' Single Variable Interaction Summaries 189 | #' 190 | #' Compare the single-predictor health risks when all of the other predictors in Z are fixed to their a specific quantile to when all of the other predictors in Z are fixed to their a second specific quantile. 191 | #' @inheritParams kmbayes 192 | #' @inheritParams ExtractEsts 193 | #' @inheritParams SingVarRiskSummaries 194 | #' @inherit ComputePostmeanHnew details 195 | #' @param qs.diff vector indicating the two quantiles at which to compute the single-predictor risk summary 196 | #' @param qs.fixed vector indicating the two quantiles at which to fix all of the remaining exposures in \code{Z} 197 | #' @export 198 | #' 199 | #' @return a data frame containing the (posterior mean) estimate and posterior standard deviation of the single-predictor risk measures 200 | #' 201 | #' @examples 202 | #' ## First generate dataset 203 | #' set.seed(111) 204 | #' dat <- SimData(n = 50, M = 4) 205 | #' y <- dat$y 206 | #' Z <- dat$Z 207 | #' X <- dat$X 208 | #' 209 | #' ## Fit model with component-wise variable selection 210 | #' ## Using only 100 iterations to make example run quickly 211 | #' ## Typically should use a large number of iterations for inference 212 | #' set.seed(111) 213 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 214 | #' 215 | #' risks.int <- SingVarIntSummaries(fit = fitkm, method = "exact") 216 | SingVarIntSummaries <- function(fit, y = NULL, Z = NULL, X = NULL, which.z = 1:ncol(Z), qs.diff = c(0.25, 0.75), qs.fixed = c(0.25, 0.75), method = "approx", sel = NULL, z.names = colnames(Z), ...) { 217 | 218 | if (inherits(fit, "bkmrfit")) { 219 | if (is.null(y)) y <- fit$y 220 | if (is.null(Z)) Z <- fit$Z 221 | if (is.null(X)) X <- fit$X 222 | } 223 | 224 | if(is.null(z.names)) z.names <- paste0("z", 1:ncol(Z)) 225 | 226 | ints <- sapply(which.z, function(whichz) 227 | SingVarIntSummary(whichz = whichz, fit = fit, Z = Z, X = X, y = y, qs.diff = qs.diff, qs.fixed = qs.fixed, method, sel = sel, ...) 228 | ) 229 | 230 | df <- dplyr::tibble(variable = factor(z.names[which.z], levels = z.names), est = ints["est", ], sd = ints["sd", ]) 231 | } 232 | 233 | 234 | 235 | -------------------------------------------------------------------------------- /my-doc/probit_reg.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Probit Bayesian kernel machine regression for binary outcomes" 3 | author: "Jennifer F. Bobb" 4 | date: "`r Sys.Date()`" 5 | output: 6 | html_document: 7 | theme: cosmo 8 | toc: true 9 | toc_float: true 10 | --- 11 | 12 | ```{r options, echo=FALSE, message=FALSE, warning=FALSE} 13 | ## if the current working directory is the directory where this file is located rather than the project directory, set the working directory to the project directory 14 | knitr::opts_chunk$set(fig.width = 5, fig.height = 3, message = FALSE) 15 | ``` 16 | 17 | In this document, we demonstrate how to apply Bayesian kernel machine regression (BKMR) for binary outcomes using the `bkmr` R package. See the [overview guide](https://jenfb.github.io/bkmr/overview.html) for additional information about BKMR, including guided examples for continuous outcomes. 18 | 19 | ## Probit kernel machine regression 20 | 21 | We implement kernel machine regression (KMR) for binary outcomes, 22 | 23 | $$ 24 | \Phi^{-1}(P(Y_i = 1)) = h(z_{i1}, \ldots, z_{iM}) + \beta{\bf x}_i, \quad i = 1, \ldots, n 25 | $$ 26 | where $\Phi$ is the cummulative distribution function (CDF) for the standard normal distribution ($\Phi^{-1}$ is the probit link function), the outcome $Y_i$ is a binary (0/1) variable, $h$ is a flexible function of the predictor variables $z_{i1}, \ldots, z_{iM}$, and ${\bf x}$ is a vector of covariates ($\beta$ is the corresponding vector of coefficients). We will refer to the predictors ${\bf z}$ as exposure variables and to $h(\cdot)$ as the exposure-response function. The function $h$ is modeled using a kernel machine representation that can capture complex, non-linear and non-additive, exposure-response relationships. 27 | 28 | We implement probit, rather than logistic regression, primarily for reasons of computational convenience and efficiency for Bayesien inference using Gibbs sampling. In particular, for this we note that the probit model above can be reexpressed by incorporating a latent normal random variable ($Y^*$), as 29 | $$ 30 | Y_i^* = h(z_{i1}, \ldots, z_{iM}) + \beta{\bf x}_i + \epsilon_i, \quad i = 1, \ldots, n 31 | $$ 32 | where $\epsilon_i ~ \mbox{N}(0,1)$ and $Y_i = I(Y_i^* > 0)$ is equal to 1 if $Y_i^* > 0$ and is equal to 0 otherwise. In our example below we will demonstrate how the exposure-response function $h$ can be interpreted under the probit regression model. 33 | 34 | ## Example 35 | 36 | First, load the R package. 37 | ```{r load package 0, echo=FALSE, eval=TRUE, message=FALSE, warning=FALSE, results='hide'} 38 | suppressMessages(library(bkmr)) 39 | ##suppressMessages(devtools::document()) 40 | ##devtools::load_all() 41 | ``` 42 | ```{r load package 1, eval=FALSE, message=FALSE, warning=FALSE} 43 | library("bkmr") 44 | ``` 45 | 46 | ### Generate data 47 | 48 | Let's consider a simple example with outcome data are generating under the probit model above, where the true exposure-response function only depends on a single exposure variable. 49 | 50 | ```{r simdata} 51 | set.seed(123) 52 | n <- 200 ## number of observations 53 | M <- 4 ## number of exposure variables 54 | beta.true <- 0.1 55 | Z <- matrix(runif(n * M, -1, 1), n, M) 56 | x <- 3*cos(Z[, 1]) + 2*rnorm(n) 57 | hfun <- function(z) (2*z + 0.5) ^ 2 58 | h <- hfun(Z[, 1]) ## only depends on z1 59 | 60 | ## generate using latent normal representation 61 | eps <- rnorm(n) 62 | ystar <- x * beta.true + h + eps 63 | y <- ifelse(ystar > 0, 1, 0) 64 | 65 | datp <- list(n = n, M = M, beta.true = beta.true, Z = Z, h = h, X = cbind(x), y = y, eps = eps, ystar = ystar) 66 | rm(n, M, beta.true, Z, x, h, eps, y, ystar) 67 | ``` 68 | 69 | Let's view the true exposure-response function used to generate the data. 70 | ```{r plot, fig.height=3.5, fig.width=3.5} 71 | curve(hfun, from = min(datp$Z[, 1]), max(datp$Z[, 1]), 72 | xlab = expression(z[1]), ylab = expression(h(z[1]))) 73 | ``` 74 | 75 | ### Fit BKMR 76 | 77 | To fit the BKMR model, we use the `kmbayes` function. 78 | ```{r fit orig, eval=FALSE} 79 | set.seed(123) 80 | fitpr <- kmbayes(y = datp$y, Z = datp$Z, X = datp$X, 81 | iter = 10000, verbose = FALSE, 82 | varsel = TRUE, family = "binomial", 83 | control.params = list(r.jump2 = 0.5)) 84 | ``` 85 | ```{r fit orig load, echo=FALSE} 86 | DIR <- ifelse(grepl("my-doc", getwd()), getwd(), paste(getwd(), "my-doc", sep = "/")) 87 | load(paste(DIR, "probit_reg.RData", sep = "/")) 88 | rm(DIR) 89 | ``` 90 | The argument `family` indicates the outcome distribution, which is currently implemented for 'gaussian' and 'binomial'. Note that here we changed the tuning parameter `r.jump2` of the Metropolis-Hastings algorithm for updating the $r_m$ parameters under variable selection to get an improved acceptance rate of `r 100*with(fitpr, round(mean(acc.rdelta[move.type == 2]), 2))`% versus ~65% under the default tuning parameters (details of the tuning parameters are in the [overview guide](https://jenfb.github.io/bkmr/overview.html)). 91 | 92 | ```{r summary} 93 | summary(fitpr) 94 | ``` 95 | 96 | ## Interpretting output 97 | 98 | ### On probit scale 99 | 100 | We may wish to interpret the estimated $h$ function directly. We note that $h$ quantifies the relationship between the exposures and the (probit of the) probability of an event ($Y = 1$), holding the covariates ${\bf x}$ fixed. By considering the latent normal formulation above, $h$ may alternatively be interpreted as the relationship between the exposures and some underlying, continuous latent variable $Y^*$. For example, if $Y$ is an indicator variable for whether an individual has a particular health outcome, $Y^*$ could be interpreted as a latent health marker of health status. 101 | 102 | Let's investigate the estimated exposure-response function $h$. Here we plot the univariate relationship h($z_m$), where all of the other exposures are fixed to their median values. 103 | 104 | ```{r pred-resp} 105 | pred.resp.univar <- PredictorResponseUnivar(fit = fitpr, method = "exact") 106 | ``` 107 | We use the `ggplot2` package to plot the resulting cross section of $h$. 108 | ```{r plot pred-resp, fig.height=4, fig.width=6} 109 | library(ggplot2) 110 | ggplot(pred.resp.univar, aes(z, est, ymin = est - 1.96*se, ymax = est + 1.96*se)) + 111 | geom_smooth(stat = "identity") + 112 | facet_wrap(~ variable) + 113 | ylab("h(z)") 114 | ``` 115 | 116 | As expected based on small posterior inclusion probabilities for $z_2$, $z_3$, and $z_4$, there is no association between these exposures and the outcome, which matches the true data generating distribution. We next compare the estimated exposure response function for $z_1$ estimated under BKMR with that estimated by a probit model assuming linear terms of each of the exposure variables, as well as with an 'oracle' probit model that knows the true form of the exposure-response function, fitted using maximum likelihood: 117 | 118 | ```{r probit} 119 | z1 <- datp$Z[, 1] 120 | x <- drop(datp$X) 121 | oracle <- glm(y ~ z1 + I(z1^2) + x, family = binomial(link = "probit"), data = datp) 122 | lin <- glm(y ~ Z + x, family = binomial(link = "probit"), data = datp) 123 | 124 | ## predictions under the oracle model 125 | z1_grid <- seq(min(datp$Z[, 1]), max(datp$Z[, 1]), length.out = 50) 126 | hpred_oracle <- predict(oracle, newdata = data.frame(z1 = z1_grid, x = 0), se.fit = TRUE) 127 | 128 | ## predictions under BKMR 129 | Znew <- cbind(z1 = z1_grid, z2 = median(datp$Z[, 2]), 130 | z3 = median(datp$Z[, 3]), z4 = median(datp$Z[, 4])) 131 | hpred_bkmr <- ComputePostmeanHnew(fit = fitpr, Znew = Znew, method = "exact") 132 | 133 | ## predictions under the model with linear terms 134 | hpred_lin <- predict(lin, newdata = list(Z = Znew, x = rep(0, nrow(Znew))), se.fit = TRUE) 135 | ``` 136 | 137 | Now let's compare the estimated exposure-response functions $h(z_1)$. 138 | ```{r compare, fig.height=4, fig.width=6} 139 | plot(z1_grid, hpred_bkmr$postmean, type = "l", 140 | ylim = c(0.95*min(datp$h), max(datp$h)), 141 | xlab = expression(z[1]), ylab = expression(h(z[1]))) 142 | lines(z1_grid, hpred_oracle$fit, col = "red", lty = 2, lwd = 2) 143 | lines(z1_grid, hfun(z1_grid), col = "blue", lty = 3, lwd = 2) 144 | lines(z1_grid, hpred_lin$fit, col = "orange", lty = 4, lwd = 2) 145 | legend(-1, 6, c("BKMR", "oracle", "truth", "linear"), lwd = 2, 146 | col = c("black", "red", "blue", "orange"), lty = 1:4, 147 | y.intersp = 0.8) 148 | ``` 149 | 150 | As expected, we see that the BKMR fit performs better than the model assuming a linear exposure-response relationship, but not as well as the oracle model. 151 | 152 | ### On probability scale 153 | 154 | Alternatively, we may wish to interpret the association between the exposures and the (untransformed) probility of the outcome. For this we observe 155 | $$ 156 | P(Y = 1 \mid {\bf z}, {\bf x}) = \Phi\{h(z_{1}, \ldots, z_{M}) + \beta{\bf x}\}, 157 | $$ 158 | so that the probability of the event depends not just on $h$, but also on the particular values of the covariates ${\bf x}$. Thus, to estimate the association between $h$ and the probability of the outcome, we must either fix the covariates or integrate over them. Posterior samples of the predicted probabilities may be obtained using the `SamplePred` function, in which the user specifies the new Z matrix at which to obtain predictions, as well as a particular value of the vector ${\bf x}$. Here we plot the posterior mean of the predicted probabilities as a function of $z_1$ for particular, fixed values of the covariates and for the other exposures set to zero. 159 | 160 | ```{r pred, fig.height=3.5, fig.width=7} 161 | Xnew1 <- quantile(datp$X, 0.1) 162 | Xnew2 <- quantile(datp$X, 0.9) 163 | 164 | ptrue1 <- with(datp, pnorm(hfun(z1_grid) + Xnew1*beta.true)) 165 | ptrue2 <- with(datp, pnorm(hfun(z1_grid) + Xnew2*beta.true)) 166 | 167 | pred_samps1 <- SamplePred(fit = fitpr, Znew = Znew, Xnew = Xnew1, type = "response") 168 | pred_samps2 <- SamplePred(fit = fitpr, Znew = Znew, Xnew = Xnew2, type = "response") 169 | pred_ests1 <- colMeans(pred_samps1) 170 | pred_ests2 <- colMeans(pred_samps2) 171 | 172 | par(mfrow = c(1, 2)) 173 | plot(z1_grid, pred_ests1, type = "l", ylim = range(ptrue1), 174 | xlab = expression(z[1]), ylab = expression(P(Y == 1))) 175 | lines(z1_grid, ptrue1, col = "blue", lty = 3, lwd = 2) 176 | legend("bottomright", c("BKMR", "truth"), lwd = 2, 177 | col = c("black", "blue"), lty = c(1,3), 178 | y.intersp = 0.8, cex = 0.8) 179 | plot(z1_grid, pred_ests2, type = "l", ylim = range(ptrue1), 180 | xlab = expression(z[1]), ylab = expression(P(Y == 1))) 181 | lines(z1_grid, ptrue2, col = "blue", lty = 3, lwd = 2) 182 | ``` 183 | 184 | 185 | To integrate over the covariates, we use the fact that, under the assumed probit model, the probability of the outcome given the exposure variables ${\bf z}$ may be expressed as 186 | $$ 187 | P(Y = 1 \mid {\bf z}) 188 | = \mbox{E}[P(Y = 1 \mid {\bf z}, {\bf x}) \mid {\bf z}] 189 | = \mbox{E}[\Phi\{h(z_{1}, \ldots, z_{M}) + \beta{\bf x}\} \mid z_{1}, \ldots, z_{M}]. 190 | $$ 191 | We estimate this quantity as 192 | $$ 193 | \frac{1}{n}\sum_{i = 1}^n\Phi\{h(z_{1}, \ldots, z_{M}) + \beta{\bf x_i}\}. 194 | $$ 195 | Currently, built-in functions to do this have not yet been implemented. 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | -------------------------------------------------------------------------------- /R/PredictorResponseFunctions.R: -------------------------------------------------------------------------------- 1 | PredictorResponseUnivarVar <- function(whichz = 1, fit, y, Z, X, method = "approx", ngrid = 50, q.fixed = 0.5, sel = NULL, min.plot.dist = Inf, center = TRUE, z.names = colnames(Z), ...) { 2 | 3 | if (ncol(Z) < 2) stop("requires there to be at least 2 predictor variables") 4 | 5 | if (is.null(z.names)) { 6 | colnames(Z) <- paste0("z", 1:ncol(Z)) 7 | } else { 8 | colnames(Z) <- z.names 9 | } 10 | 11 | ord <- c(whichz, setdiff(1:ncol(Z), whichz)) 12 | z1 <- seq(min(Z[,ord[1]]), max(Z[,ord[1]]), length = ngrid) 13 | z.others <- lapply(2:ncol(Z), function(x) quantile(Z[,ord[x]], q.fixed)) 14 | z.all <- c(list(z1), z.others) 15 | newz.grid <- expand.grid(z.all) 16 | colnames(newz.grid) <- colnames(Z)[ord] 17 | newz.grid <- newz.grid[,colnames(Z)] 18 | 19 | if (!is.null(min.plot.dist)) { 20 | mindists <- rep(NA,nrow(newz.grid)) 21 | for (i in seq_along(mindists)) { 22 | pt <- as.numeric(newz.grid[i, colnames(Z)[ord[1]]]) 23 | dists <- fields::rdist(matrix(pt, nrow = 1), Z[, colnames(Z)[ord[1]]]) 24 | mindists[i] <- min(dists) 25 | } 26 | } 27 | 28 | if (method %in% c("approx", "exact")) { 29 | preds <- ComputePostmeanHnew(fit = fit, y = y, Z = Z, X = X, Znew = newz.grid, sel = sel, method = method) 30 | preds.plot <- preds$postmean 31 | se.plot <- sqrt(diag(preds$postvar)) 32 | } else { 33 | stop("method must be one of c('approx', 'exact')") 34 | } 35 | if(center) preds.plot <- preds.plot - mean(preds.plot) 36 | if(!is.null(min.plot.dist)) { 37 | preds.plot[mindists > min.plot.dist] <- NA 38 | se.plot[mindists > min.plot.dist] <- NA 39 | } 40 | 41 | res <- dplyr::tibble(z = z1, est = preds.plot, se = se.plot) 42 | } 43 | 44 | #' Plot univariate predictor-response function on a new grid of points 45 | #' 46 | #' Plot univariate predictor-response function on a new grid of points 47 | #' 48 | #' @inheritParams kmbayes 49 | #' @inheritParams ExtractEsts 50 | #' @inheritParams SingVarRiskSummaries 51 | #' 52 | #' @param which.z vector identifying which predictors (columns of \code{Z}) should be plotted 53 | #' @param ngrid number of grid points to cover the range of each predictor (column in \code{Z}) 54 | #' @param min.plot.dist specifies a minimum distance that a new grid point needs to be from an observed data point in order to compute the prediction; points further than this will not be computed 55 | #' @param center flag for whether to scale the exposure-response function to have mean zero 56 | #' @details For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html} 57 | #' 58 | #' @export 59 | #' 60 | #' @return a long data frame with the predictor name, predictor value, posterior mean estimate, and posterior standard deviation 61 | #' 62 | #' @examples 63 | #' ## First generate dataset 64 | #' set.seed(111) 65 | #' dat <- SimData(n = 50, M = 4) 66 | #' y <- dat$y 67 | #' Z <- dat$Z 68 | #' X <- dat$X 69 | #' 70 | #' ## Fit model with component-wise variable selection 71 | #' ## Using only 100 iterations to make example run quickly 72 | #' ## Typically should use a large number of iterations for inference 73 | #' set.seed(111) 74 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 75 | #' pred.resp.univar <- PredictorResponseUnivar(fit = fitkm) 76 | PredictorResponseUnivar <- function(fit, y = NULL, Z = NULL, X = NULL, which.z = 1:ncol(Z), method = "approx", ngrid = 50, q.fixed = 0.5, sel = NULL, min.plot.dist = Inf, center = TRUE, z.names = colnames(Z), ...) { 77 | 78 | if (inherits(fit, "bkmrfit")) { 79 | y <- fit$y 80 | Z <- fit$Z 81 | X <- fit$X 82 | } 83 | 84 | if (is.null(z.names)) { 85 | z.names <- paste0("z", 1:ncol(Z)) 86 | } 87 | 88 | df <- dplyr::tibble() 89 | for(i in which.z) { 90 | res <- PredictorResponseUnivarVar(whichz = i, fit = fit, y = y, Z = Z, X = X, method = method, ngrid = ngrid, q.fixed = q.fixed, sel = sel, min.plot.dist = min.plot.dist, center = center, z.names = z.names, ...) 91 | #df0 <- dplyr::mutate(res, variable = z.names[i]) %>% 92 | # dplyr::select_(~variable, ~z, ~est, ~se) 93 | df0 <- dplyr::mutate(res, variable = z.names[i]) %>% 94 | dplyr::select_at(c("variable", "z", "est", "se")) 95 | df <- dplyr::bind_rows(df, df0) 96 | } 97 | df$variable <- factor(df$variable, levels = z.names[which.z]) 98 | df 99 | } 100 | 101 | 102 | 103 | #' Plot bivariate predictor-response function on a new grid of points 104 | #' 105 | #' @inheritParams kmbayes 106 | #' @inheritParams ExtractEsts 107 | #' @inheritParams SingVarRiskSummaries 108 | #' @inheritParams PredictorResponseUnivar 109 | #' @param whichz1 vector identifying the first predictor that (column of \code{Z}) should be plotted 110 | #' @param whichz2 vector identifying the second predictor that (column of \code{Z}) should be plotted 111 | #' @param whichz3 vector identifying the third predictor that will be set to a pre-specified fixed quantile (determined by \code{prob}) 112 | #' @param prob pre-specified quantile to set the third predictor (determined by \code{whichz3}); defaults to 0.5 (50th percentile) 113 | #' 114 | #' @export 115 | #' 116 | #' @return a data frame with value of the first predictor, the value of the second predictor, the posterior mean estimate, and the posterior standard deviation 117 | #' 118 | #' @examples 119 | #' ## First generate dataset 120 | #' set.seed(111) 121 | #' dat <- SimData(n = 50, M = 4) 122 | #' y <- dat$y 123 | #' Z <- dat$Z 124 | #' X <- dat$X 125 | #' 126 | #' ## Fit model with component-wise variable selection 127 | #' ## Using only 100 iterations to make example run quickly 128 | #' ## Typically should use a large number of iterations for inference 129 | #' set.seed(111) 130 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 131 | #' 132 | #' ## Obtain predicted value on new grid of points 133 | #' ## Using only a 10-by-10 point grid to make example run quickly 134 | #' pred.resp.bivar12 <- PredictorResponseBivarPair(fit = fitkm, min.plot.dist = 1, ngrid = 10) 135 | PredictorResponseBivarPair <- function(fit, y = NULL, Z = NULL, X = NULL, whichz1 = 1, whichz2 = 2, whichz3 = NULL, method = "approx", prob = 0.5, q.fixed = 0.5, sel = NULL, ngrid = 50, min.plot.dist = 0.5, center = TRUE, ...) { 136 | 137 | if (inherits(fit, "bkmrfit")) { 138 | if (is.null(y)) y <- fit$y 139 | if (is.null(Z)) Z <- fit$Z 140 | if (is.null(X)) X <- fit$X 141 | } 142 | 143 | if(ncol(Z) < 3) stop("requires there to be at least 3 Z variables") 144 | 145 | if(is.null(colnames(Z))) colnames(Z) <- paste0("z", 1:ncol(Z)) 146 | 147 | if(is.null(whichz3)) { 148 | ord <- c(whichz1, whichz2, setdiff(1:ncol(Z), c(whichz1, whichz2))) 149 | } else { 150 | ord <- c(whichz1, whichz2, whichz3, setdiff(1:ncol(Z), c(whichz1, whichz2, whichz3))) 151 | } 152 | z1 <- seq(min(Z[,ord[1]]), max(Z[,ord[1]]), length=ngrid) 153 | z2 <- seq(min(Z[,ord[2]]), max(Z[,ord[2]]), length=ngrid) 154 | z3 <- quantile(Z[, ord[3]], probs = prob) 155 | z.all <- c(list(z1), list(z2), list(z3)) 156 | if(ncol(Z) > 3) { 157 | z.others <- lapply(4:ncol(Z), function(x) quantile(Z[,ord[x]], q.fixed)) 158 | z.all <- c(z.all, z.others) 159 | } 160 | newz.grid <- expand.grid(z.all) 161 | z1save <- newz.grid[, 1] 162 | z2save <- newz.grid[, 2] 163 | colnames(newz.grid) <- colnames(Z)[ord] 164 | newz.grid <- newz.grid[,colnames(Z)] 165 | 166 | if(!is.null(min.plot.dist)) { 167 | mindists <- rep(NA, nrow(newz.grid)) 168 | for(k in seq_along(mindists)) { 169 | pt <- as.numeric(newz.grid[k,c(colnames(Z)[ord[1]],colnames(Z)[ord[2]])]) 170 | dists <- fields::rdist(matrix(pt, nrow = 1), Z[, c(colnames(Z)[ord[1]],colnames(Z)[ord[2]])]) 171 | mindists[k] <- min(dists) 172 | } 173 | } 174 | 175 | if (method %in% c("approx", "exact")) { 176 | preds <- ComputePostmeanHnew(fit = fit, y = y, Z = Z, X = X, Znew = newz.grid, sel = sel, method = method) 177 | preds.plot <- preds$postmean 178 | se.plot <- sqrt(diag(preds$postvar)) 179 | } else { 180 | stop("method must be one of c('approx', 'exact')") 181 | } 182 | if(center) preds.plot <- preds.plot - mean(preds.plot) 183 | if(!is.null(min.plot.dist)) { 184 | preds.plot[mindists > min.plot.dist] <- NA 185 | se.plot[mindists > min.plot.dist] <- NA 186 | } 187 | # hgrid <- matrix(preds.plot, ngrid, ngrid, dimnames=list(z1=round(z1,2), z2=round(z2,2))) 188 | # se.grid <- matrix(se.plot, ngrid, ngrid, dimnames=list(z1=round(z1,2), z2=round(z2,2))) 189 | 190 | res <- dplyr::tibble(z1 = z1save, z2 = z2save, est = preds.plot, se = se.plot) 191 | } 192 | 193 | #' Predict the exposure-response function at a new grid of points 194 | #' 195 | #' Predict the exposure-response function at a new grid of points 196 | #' 197 | #' @inheritParams kmbayes 198 | #' @inheritParams ExtractEsts 199 | #' @inheritParams SingVarRiskSummaries 200 | #' @inheritParams PredictorResponseUnivar 201 | #' @param z.pairs data frame showing which pairs of predictors to plot 202 | #' @param ngrid number of grid points in each dimension 203 | #' @param verbose TRUE or FALSE: flag of whether to print intermediate output to the screen 204 | #' @details For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html} 205 | #' @export 206 | #' 207 | #' @return a long data frame with the name of the first predictor, the name of the second predictor, the value of the first predictor, the value of the second predictor, the posterior mean estimate, and the posterior standard deviation of the estimated exposure response function 208 | #' 209 | #' @examples 210 | #' ## First generate dataset 211 | #' set.seed(111) 212 | #' dat <- SimData(n = 50, M = 4) 213 | #' y <- dat$y 214 | #' Z <- dat$Z 215 | #' X <- dat$X 216 | #' 217 | #' ## Fit model with component-wise variable selection 218 | #' ## Using only 100 iterations to make example run quickly 219 | #' ## Typically should use a large number of iterations for inference 220 | #' set.seed(111) 221 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 222 | #' 223 | #' ## Obtain predicted value on new grid of points for each pair of predictors 224 | #' ## Using only a 10-by-10 point grid to make example run quickly 225 | #' pred.resp.bivar <- PredictorResponseBivar(fit = fitkm, min.plot.dist = 1, ngrid = 10) 226 | #' 227 | PredictorResponseBivar <- function(fit, y = NULL, Z = NULL, X = NULL, z.pairs = NULL, method = "approx", ngrid = 50, q.fixed = 0.5, sel = NULL, min.plot.dist = 0.5, center = TRUE, z.names = colnames(Z), verbose = TRUE, ...) { 228 | 229 | if (inherits(fit, "bkmrfit")) { 230 | if (is.null(y)) y <- fit$y 231 | if (is.null(Z)) Z <- fit$Z 232 | if (is.null(X)) X <- fit$X 233 | } 234 | 235 | if (is.null(z.names)) { 236 | z.names <- colnames(Z) 237 | if (is.null(z.names)) { 238 | z.names <- paste0("z", 1:ncol(Z)) 239 | } 240 | } 241 | 242 | if (is.null(z.pairs)) { 243 | z.pairs <- expand.grid(z1 = 1:ncol(Z), z2 = 1:ncol(Z)) 244 | z.pairs <- z.pairs[z.pairs$z1 < z.pairs$z2, ] 245 | } 246 | 247 | df <- dplyr::tibble() 248 | for(i in 1:nrow(z.pairs)) { 249 | compute <- TRUE 250 | whichz1 <- z.pairs[i, 1] %>% unlist %>% unname 251 | whichz2 <- z.pairs[i, 2] %>% unlist %>% unname 252 | if(whichz1 == whichz2) compute <- FALSE 253 | z.name1 <- z.names[whichz1] 254 | z.name2 <- z.names[whichz2] 255 | names.pair <- c(z.name1, z.name2) 256 | if(nrow(df) > 0) { ## determine whether the current pair of variables has already been done 257 | completed.pairs <- df %>% 258 | #dplyr::select_('variable1', 'variable2') %>% 259 | dplyr::select_at(c('variable1', 'variable2')) %>% 260 | dplyr::distinct() %>% 261 | dplyr::transmute(z.pair = paste('variable1', 'variable2', sep = ":")) %>% 262 | unlist %>% unname 263 | if(paste(names.pair, collapse = ":") %in% completed.pairs | paste(rev(names.pair), collapse = ":") %in% completed.pairs) compute <- FALSE 264 | } 265 | if(compute) { 266 | if(verbose) message("Pair ", i, " out of ", nrow(z.pairs)) 267 | res <- PredictorResponseBivarPair(fit = fit, y = y, Z = Z, X = X, whichz1 = whichz1, whichz2 = whichz2, method = method, ngrid = ngrid, q.fixed = q.fixed, sel = sel, min.plot.dist = min.plot.dist, center = center, z.names = z.names, ...) 268 | df0 <- res 269 | df0$variable1 <- z.name1 270 | df0$variable2 <- z.name2 271 | df0 %<>% 272 | #dplyr::select_(~variable1, ~variable2, ~z1, ~z2, ~est, ~se) 273 | dplyr::select_at(c("variable1", "variable2", "z1", "z2", "est", "se")) 274 | df <- dplyr::bind_rows(df, df0) 275 | } 276 | } 277 | df$variable1 <- factor(df$variable1, levels = z.names) 278 | df$variable2 <- factor(df$variable2, levels = z.names) 279 | df 280 | } 281 | 282 | #' Plot cross-sections of the bivariate predictor-response function 283 | #' 284 | #' Function to plot the \code{h} function of a particular variable at different levels (quantiles) of a second variable 285 | #' 286 | #' @export 287 | #' @inheritParams kmbayes 288 | #' @inheritParams PredictorResponseBivar 289 | #' @param pred.resp.df object obtained from running the function \code{\link{PredictorResponseBivar}} 290 | #' @param qs vector of quantiles at which to fix the second variable 291 | #' @param both_pairs flag indicating whether, if \code{h(z1)} is being plotted for z2 fixed at different levels, that they should be plotted in the reverse order as well (for \code{h(z2)} at different levels of z1) 292 | #' @details For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html} 293 | #' 294 | #' @return a long data frame with the name of the first predictor, the name of the second predictor, the value of the first predictor, the quantile at which the second predictor is fixed, the posterior mean estimate, and the posterior standard deviation of the estimated exposure response function 295 | #' 296 | #' @examples 297 | #' ## First generate dataset 298 | #' set.seed(111) 299 | #' dat <- SimData(n = 50, M = 4) 300 | #' y <- dat$y 301 | #' Z <- dat$Z 302 | #' X <- dat$X 303 | #' 304 | #' ## Fit model with component-wise variable selection 305 | #' ## Using only 100 iterations to make example run quickly 306 | #' ## Typically should use a large number of iterations for inference 307 | #' set.seed(111) 308 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 309 | #' 310 | #' ## Obtain predicted value on new grid of points for each pair of predictors 311 | #' ## Using only a 10-by-10 point grid to make example run quickly 312 | #' pred.resp.bivar <- PredictorResponseBivar(fit = fitkm, min.plot.dist = 1, ngrid = 10) 313 | #' pred.resp.bivar.levels <- PredictorResponseBivarLevels(pred.resp.df = pred.resp.bivar, 314 | #' Z = Z, qs = c(0.1, 0.5, 0.9)) 315 | PredictorResponseBivarLevels <- function(pred.resp.df, Z = NULL, qs = c(0.25, 0.5, 0.75), both_pairs = TRUE, z.names = NULL) { 316 | #var.pairs <- dplyr::distinct(dplyr::select_(pred.resp.df, ~variable1, ~variable2)) 317 | var.pairs <- dplyr::distinct(dplyr::select_at(pred.resp.df, c("variable1", "variable2"))) 318 | if (both_pairs) { 319 | var.pairs.rev <- dplyr::tibble( 320 | variable1 = var.pairs$variable2, 321 | 322 | variable2 = var.pairs$variable1 323 | ) 324 | var.pairs <- rbind(var.pairs, var.pairs.rev) 325 | } 326 | 327 | if (is.null(z.names)) { 328 | z.names <- colnames(Z) 329 | if (is.null(z.names)) { 330 | z.names <- paste0("z", 1:ncol(Z)) 331 | colnames(Z) <- z.names 332 | } 333 | } 334 | 335 | df <- data.frame() 336 | for (i in 1:nrow(var.pairs)) { 337 | var1 <- as.character(unlist(var.pairs[i, "variable1"])) 338 | var2 <- as.character(unlist(var.pairs[i, "variable2"])) 339 | preds <- pred.resp.df[pred.resp.df$variable1 == var1 & pred.resp.df$variable2 == var2, ] 340 | if (nrow(preds) == 0) { 341 | preds <- pred.resp.df[pred.resp.df$variable1 == var2 & pred.resp.df$variable2 == var1, ] 342 | preds.rev <- dplyr::tibble( 343 | variable1 = preds$variable2, 344 | variable2 = preds$variable1, 345 | z1 = preds$z2, 346 | z2 = preds$z1, 347 | est = preds$est, 348 | se = preds$se 349 | ) 350 | preds <- preds.rev 351 | #preds <- dplyr::arrange_(preds, ~z2, ~z1) 352 | preds <- dplyr::arrange_at(preds, c("z2", "z1")) 353 | } 354 | 355 | ngrid <- sqrt(nrow(preds)) 356 | preds.plot <- preds$est 357 | se.plot <- preds$se 358 | 359 | hgrid <- matrix(preds.plot, ngrid, ngrid) 360 | se.grid <- matrix(se.plot, ngrid, ngrid) 361 | z1 <- preds$z1[1:ngrid] 362 | z2 <- preds$z2[seq(1, by = ngrid, length.out = ngrid)] 363 | 364 | quants <- quantile(Z[, var2], qs) 365 | 366 | ## relation of z1 with outcome at different levels of z2 367 | se.grid.sub <- hgrid.sub <- matrix(NA, ngrid, length(qs)) 368 | for (k in seq_along(quants)) { 369 | sub.sel <- which.min(abs(z2 - quants[k])) 370 | hgrid.sub[, k] <- hgrid[, sub.sel] 371 | se.grid.sub[, k] <- se.grid[, sub.sel] 372 | } 373 | colnames(hgrid.sub) <- colnames(se.grid.sub) <- paste0("q", seq_along(qs)) 374 | hgrid.df <- tidyr::gather(data.frame(hgrid.sub), quantile, 'est', convert = TRUE) 375 | se.grid.df <- tidyr::gather(data.frame(se.grid.sub), quantile, 'se') 376 | 377 | df.curr <- data.frame(variable1 = var1, variable2 = var2, z1 = z1, quantile = factor(hgrid.df$quantile, labels = qs), est = hgrid.df$est, se = se.grid.df$se, stringsAsFactors = FALSE) 378 | df <- rbind(df, df.curr) 379 | } 380 | df <- tibble::as_tibble(df) %>% #dplyr::tbl_df(df) %>% 381 | #dplyr::arrange_(~variable1, ~variable2) 382 | dplyr::arrange_at(c("variable1", "variable2")) 383 | df 384 | } 385 | -------------------------------------------------------------------------------- /R/bkmr_parameter_update_functions.R: -------------------------------------------------------------------------------- 1 | beta.update <- function(X, Vinv, y, sigsq.eps) { 2 | XVinv <- crossprod(X, Vinv) 3 | Vbeta <- chol2inv(chol(XVinv %*% X)) 4 | cholVbeta <- chol(Vbeta) 5 | betahat <- Vbeta %*% XVinv %*% y 6 | n01 <- rnorm(ncol(X)) 7 | betahat + crossprod(sqrt(sigsq.eps)*cholVbeta, n01) 8 | } 9 | 10 | sigsq.eps.update <- function(y, X, beta, Vinv, a.eps=1e-3, b.eps=1e-3) { 11 | mu <- y - X%*%beta 12 | prec.y <- rgamma(1, shape=a.eps + nrow(X)/2, rate=b.eps + 1/2*crossprod(mu, Vinv)%*%mu) 13 | 1/prec.y 14 | } 15 | 16 | ystar.update <- function(y, X, beta, h) { 17 | mu <- drop(h + X %*% beta) 18 | lower <- ifelse(y == 1, 0, -Inf) 19 | upper <- ifelse(y == 0, 0, Inf) 20 | samp <- truncnorm::rtruncnorm(1, a = lower, b = upper, mean = mu, sd = 1) 21 | drop(samp) 22 | } 23 | #' @importFrom tmvtnorm rtmvnorm 24 | ystar.update.noh <- function(y, X, beta, Vinv, ystar) { 25 | mu <- drop(X %*% beta) 26 | lower <- ifelse(y == 1, 0, -Inf) 27 | upper <- ifelse(y == 0, 0, Inf) 28 | samp <- tmvtnorm::rtmvnorm(1, mean = mu, H = Vinv, lower = lower, upper = upper, algorithm = "gibbs", start.value = ystar) 29 | #samp <- truncnorm::rtruncnorm(1, a = lower, b = upper, mean = mu, sd = 1) 30 | drop(samp) 31 | } 32 | 33 | r.update <- function(r, whichcomp, delta, lambda, y, X, beta, sigsq.eps, Vcomps, Z, data.comps, control.params, rprop.gen, rprop.logdens, rprior.logdens, ...) { 34 | # r.params <- set.r.params(r.prior = control.params$r.prior, comp = whichcomp, r.params = control.params$r.params) 35 | r.params <- make_r_params_comp(control.params$r.params, whichcomp) 36 | rcomp <- unique(r[whichcomp]) 37 | if(length(rcomp) > 1) stop("rcomp should only be 1-dimensional") 38 | 39 | ## generate a proposal 40 | rcomp.star <- rprop.gen(current = rcomp, r.params = r.params) 41 | lambda.star <- lambda 42 | delta.star <- delta 43 | move.type <- NA 44 | 45 | ## part of M-H ratio that depends on the proposal distribution 46 | negdifflogproposal <- -rprop.logdens(rcomp.star, rcomp, r.params = r.params) + rprop.logdens(rcomp, rcomp.star, r.params = r.params) 47 | 48 | ## prior distribution 49 | diffpriors <- rprior.logdens(rcomp.star, r.params = r.params) - rprior.logdens(rcomp, r.params = r.params) 50 | 51 | r.star <- r 52 | r.star[whichcomp] <- rcomp.star 53 | 54 | ## M-H step 55 | return(MHstep(r=r, lambda=lambda, lambda.star=lambda.star, r.star=r.star, delta=delta, delta.star=delta.star, y=y, X=X, Z=Z, beta=beta, sigsq.eps=sigsq.eps, diffpriors=diffpriors, negdifflogproposal=negdifflogproposal, Vcomps=Vcomps, move.type=move.type, data.comps=data.comps)) 56 | } 57 | 58 | rdelta.comp.update <- function(r, delta, lambda, y, X, beta, sigsq.eps, Vcomps, Z, ztest, data.comps, control.params, rprop.gen2, rprop.logdens1, rprior.logdens, rprior.logdens2, rprop.logdens2, rprop.gen1, ...) { ## individual variable selection 59 | r.params <- control.params$r.params 60 | a.p0 <- control.params$a.p0 61 | b.p0 <- control.params$b.p0 62 | delta.star <- delta 63 | r.star <- r 64 | 65 | move.type <- ifelse(all(delta[ztest] == 0), 1, sample(c(1,2),1)) 66 | move.prob <- ifelse(all(delta[ztest] == 0), 1, 1/2) 67 | if(move.type == 1) { 68 | comp <- ifelse(length(ztest) == 1, ztest, sample(ztest, 1)) 69 | r.params <- set.r.params(r.prior = control.params$r.prior, comp = comp, r.params = r.params) 70 | 71 | delta.star[comp] <- 1 - delta[comp] 72 | move.prob.star <- ifelse(all(delta.star[ztest] == 0), 1, 1/2) 73 | r.star[comp] <- ifelse(delta.star[comp] == 0, 0, rprop.gen1(r.params = r.params)) 74 | 75 | diffpriors <- (lgamma(sum(delta.star[ztest]) + a.p0) + lgamma(length(ztest) - sum(delta.star[ztest]) + b.p0) - lgamma(sum(delta[ztest]) + a.p0) - lgamma(length(ztest) - sum(delta[ztest]) + b.p0)) + ifelse(delta[comp] == 1, -1, 1)*with(list(r.sel = ifelse(delta[comp] == 1, r[comp], r.star[comp])), rprior.logdens(x = r.sel, r.params = r.params)) 76 | 77 | negdifflogproposal <- -log(move.prob.star) + log(move.prob) - ifelse(delta[comp] == 1, -1, 1)*with(list(r.sel = ifelse(delta[comp] == 1, r[comp], r.star[comp])), rprop.logdens1(x = r.sel, r.params = r.params)) 78 | 79 | } else if(move.type == 2) { 80 | comp <- ifelse(length(which(delta == 1)) == 1, which(delta == 1), sample(which(delta == 1), 1)) 81 | r.params <- set.r.params(r.prior = control.params$r.prior, comp = comp, r.params = r.params) 82 | 83 | r.star[comp] <- rprop.gen2(current = r[comp], r.params = r.params) 84 | 85 | diffpriors <- rprior.logdens(r.star[comp], r.params = r.params) - rprior.logdens(r[comp], r.params = r.params) 86 | 87 | negdifflogproposal <- -rprop.logdens2(r.star[comp], r[comp], r.params = r.params) + rprop.logdens2(r[comp], r.star[comp], r.params = r.params) 88 | } 89 | 90 | lambda.star <- lambda 91 | 92 | ## M-H step 93 | return(MHstep(r=r, lambda=lambda, lambda.star=lambda.star, r.star=r.star, delta=delta, delta.star=delta.star, y=y, X=X, Z=Z, beta=beta, sigsq.eps=sigsq.eps, diffpriors=diffpriors, negdifflogproposal=negdifflogproposal, Vcomps=Vcomps, move.type=move.type, data.comps=data.comps)) 94 | } 95 | 96 | rdelta.group.update <- function(r, delta, lambda, y, X, beta, sigsq.eps, Vcomps, Z, ztest, data.comps, control.params, rprop.gen1, rprior.logdens, rprop.logdens1, rprop.gen2, rprop.logdens2, ...) { ## grouped variable selection 97 | r.params <- control.params$r.params 98 | a.p0 <- control.params$a.p0 99 | b.p0 <- control.params$b.p0 100 | groups <- control.params$group.params$groups 101 | sel.groups <- control.params$group.params$sel.groups 102 | neach.group <- control.params$group.params$neach.group 103 | delta.star <- delta 104 | r.star <- r 105 | 106 | # if(length(mu.r) == 1) mu.r <- rep(mu.r, nz) 107 | # if(length(sigma.r) == 1) sigma.r <- rep(sigma.r, nz) 108 | 109 | delta.source <- sapply(sel.groups, function(x) ifelse(any(delta[which(groups == groups[x])] == 1), 1, 0)) 110 | delta.source.star <- delta.source 111 | 112 | ## randomly select move type 113 | if(all(delta.source == 0)) { 114 | move.type <- 1 115 | move.prob <- 1 116 | } else if(length(which(neach.group > 1 & delta.source == 1)) == 0) { 117 | move.type <- sample(c(1, 3), 1) 118 | move.prob <- 1/2 119 | } else { 120 | move.type <- sample(1:3, 1) 121 | move.prob <- 1/3 122 | } 123 | # move.type <- ifelse(all(delta.source == 0), 1, ifelse(length(which(neach.group > 1 & delta.source == 1)) == 0, sample(c(1, 3), 1), sample(1:3, 1))) 124 | 125 | # print(move.type) 126 | 127 | if(move.type == 1) { ## randomly select a source and change its state (e.g., from being in the model to not being in the model) 128 | 129 | source <- sample(seq_along(delta.source), 1) 130 | source.comps <- which(groups == source) 131 | 132 | # r.params <- set.r.params(r.prior = control.params$r.prior, comp = source.comps, r.params = r.params) 133 | 134 | delta.source.star[source] <- 1 - delta.source[source] 135 | delta.star[source.comps] <- rmultinom(1, delta.source.star[source], rep(1/length(source.comps), length(source.comps))) 136 | move.prob.star <- ifelse(all(delta.source.star == 0), 1, ifelse(length(which(neach.group > 1 & delta.source.star == 1)) == 0, 1/2, 1/3)) 137 | 138 | ## which component got switched 139 | comp <- ifelse(delta.source[source] == 1, source.comps[which(delta[source.comps] == 1)], source.comps[which(delta.star[source.comps] == 1)]) 140 | r.params <- set.r.params(r.prior = control.params$r.prior, comp = comp, r.params = r.params) 141 | 142 | r.star[comp] <- ifelse(delta.star[comp] == 0, 0, rprop.gen1(r.params = r.params)) 143 | 144 | # diffpriors <- ifelse(delta.source[source] == 1, log(length(sel.groups) - sum(delta.source) + b.p0) - log(sum(delta.source.star) + a.p0), log(sum(delta.source) + a.p0) - log(length(sel.groups) - sum(delta.source.star) + b.p0)) + ifelse(delta.source[source] == 1, 1, -1)*log(length(source.comps)) + ifelse(delta.source[source] == 1, -1, 1)*with(list(r.sel = ifelse(delta.source[source] == 1, r[source.comps][which(delta[source.comps] == 1)], r.star[source.comps][which(delta.star[source.comps] == 1)])), rprior.logdens(x = r.sel, r.params = r.params)) 145 | diffpriors <- ifelse(delta.source[source] == 1, log(length(sel.groups) - sum(delta.source) + b.p0) - log(sum(delta.source.star) + a.p0), log(sum(delta.source) + a.p0) - log(length(sel.groups) - sum(delta.source.star) + b.p0)) + ifelse(delta.source[source] == 1, 1, -1)*log(length(source.comps)) + ifelse(delta.source[source] == 1, -1, 1)*with(list(r.sel = ifelse(delta.source[source] == 1, r[comp], r.star[comp])), rprior.logdens(x = r.sel, r.params = r.params)) 146 | 147 | # negdifflogproposal <- -log(move.prob.star) + log(move.prob) -ifelse(delta.source[source] == 1, 1, -1)*(log(length(source.comps)) - with(list(r.sel = ifelse(delta.source[source] == 1, r[source.comps][which(delta[source.comps] == 1)], r.star[source.comps][which(delta.star[source.comps] == 1)])), rprop.logdens1(x = r.sel, r.params = r.params))) 148 | negdifflogproposal <- -log(move.prob.star) + log(move.prob) -ifelse(delta.source[source] == 1, 1, -1)*(log(length(source.comps)) - with(list(r.sel = ifelse(delta.source[source] == 1, r[comp], r.star[comp])), rprop.logdens1(x = r.sel, r.params = r.params))) 149 | 150 | } else if(move.type == 2) { ## randomly select a multi-component source that is in the model and change which component is included 151 | 152 | tmp <- which(neach.group > 1 & delta.source == 1) 153 | source <- ifelse(length(tmp) == 1, tmp, sample(tmp, 1)) 154 | source.comps <- which(groups == source) 155 | 156 | oldcomp <- source.comps[delta[source.comps] == 1] 157 | tmp <- source.comps[delta[source.comps] == 0] 158 | comp <- ifelse(length(tmp) == 1, tmp, sample(tmp, 1)) 159 | 160 | r.params.oldcomp <- set.r.params(r.prior = control.params$r.prior, comp = oldcomp, r.params = r.params) 161 | r.params <- set.r.params(r.prior = control.params$r.prior, comp = comp, r.params = r.params) 162 | 163 | delta.star[oldcomp] <- 0 164 | delta.star[comp] <- 1 165 | 166 | r.star[oldcomp] <- 0 167 | r.star[comp] <- rprop.gen1(r.params = r.params) 168 | 169 | diffpriors <- rprior.logdens(r.star[comp], r.params = r.params) - rprior.logdens(r[oldcomp], r.params = r.params.oldcomp) 170 | 171 | negdifflogproposal <- -rprop.logdens1(r.star[comp], r.params = r.params) + rprop.logdens1(r[oldcomp], r.params = r.params.oldcomp) 172 | 173 | } else if(move.type == 3) { ## randomly select a component that is in the model and update it 174 | tmp <- which(delta == 1) 175 | comp <- ifelse(length(tmp) == 1, tmp, sample(tmp, 1)) 176 | 177 | r.params <- set.r.params(r.prior = control.params$r.prior, comp = comp, r.params = r.params) 178 | 179 | r.star[comp] <- rprop.gen2(current = r[comp], r.params = r.params) 180 | 181 | diffpriors <- rprior.logdens(r.star[comp], r.params = r.params) - rprior.logdens(r[comp], r.params = r.params) 182 | 183 | negdifflogproposal <- -rprop.logdens2(r.star[comp], r[comp], r.params = r.params) + rprop.logdens2(r[comp], r.star[comp], r.params = r.params) 184 | } 185 | 186 | lambda.star <- lambda 187 | 188 | ## M-H step 189 | return(MHstep(r=r, lambda=lambda, lambda.star=lambda.star, r.star=r.star, delta=delta, delta.star=delta.star, y=y, X=X, Z=Z, beta=beta, sigsq.eps=sigsq.eps, diffpriors=diffpriors, negdifflogproposal=negdifflogproposal, Vcomps=Vcomps, move.type=move.type, data.comps=data.comps)) 190 | } 191 | 192 | lambda.update <- function(r, delta, lambda, whichcomp=1, y, X, Z = Z, beta, sigsq.eps, Vcomps, data.comps, control.params) { 193 | lambda.jump <- control.params$lambda.jump[whichcomp] 194 | mu.lambda <- control.params$mu.lambda[whichcomp] 195 | sigma.lambda <- control.params$sigma.lambda[whichcomp] 196 | lambdacomp <- lambda[whichcomp] 197 | 198 | #Get lambdacomp adjusted by lamAdj function (to correct for very small lambdas) 199 | adjLambdaComp <- lamAdj(lambdacomp) 200 | 201 | ## generate a proposal for lambda 202 | #lambdacomp.star <- rgamma(1, shape=lambdacomp^2/lambda.jump^2, rate=lambdacomp/lambda.jump^2) 203 | lambdacomp.star <- rgamma(1, shape = adjLambdaComp^2/lambda.jump^2, 204 | rate = adjLambdaComp/lambda.jump^2) 205 | r.star <- r 206 | delta.star <- delta 207 | move.type <- NA 208 | 209 | ## part of M-H ratio that depends on the proposal distribution 210 | #negdifflogproposal <- -dgamma(lambdacomp.star, shape=lambdacomp^2/lambda.jump^2, rate=lambdacomp/lambda.jump^2, log=TRUE) + dgamma(lambdacomp, shape=lambdacomp.star^2/lambda.jump^2, rate=lambdacomp.star/lambda.jump^2, log=TRUE) 211 | negdifflogproposal <- -dgamma(lambdacomp.star, shape= adjLambdaComp^2/lambda.jump^2, rate=adjLambdaComp/lambda.jump^2, log=TRUE) + dgamma(lambdacomp, shape=lamAdj(lambdacomp.star)^2/lambda.jump^2, rate=lamAdj(lambdacomp.star)/lambda.jump^2, log=TRUE) 212 | 213 | 214 | ## prior distribution 215 | diffpriors <- dgamma(lambdacomp.star, shape=mu.lambda^2/sigma.lambda^2, rate=mu.lambda/sigma.lambda^2, log=TRUE) - dgamma(lambdacomp, shape=mu.lambda^2/sigma.lambda^2, rate=mu.lambda/sigma.lambda^2, log=TRUE) 216 | 217 | lambda.star <- lambda 218 | lambda.star[whichcomp] <- lambdacomp.star 219 | 220 | ## M-H step 221 | return(MHstep(r=r, lambda=lambda, lambda.star=lambda.star, r.star=r.star, delta=delta, delta.star=delta.star, y=y, X=X, Z=Z, beta=beta, sigsq.eps=sigsq.eps, diffpriors=diffpriors, negdifflogproposal=negdifflogproposal, Vcomps=Vcomps, move.type=move.type, data.comps=data.comps)) 222 | } 223 | 224 | MHstep <- function(r, lambda, lambda.star, r.star, delta, delta.star, y, X, Z, beta, sigsq.eps, diffpriors, negdifflogproposal, Vcomps, move.type, data.comps) { 225 | ## compute log M-H ratio 226 | Vcomps.star <- makeVcomps(r.star, lambda.star, Z, data.comps) 227 | mu <- y - X%*%beta 228 | diffliks <- 1/2*Vcomps.star$logdetVinv - 1/2*Vcomps$logdetVinv - 1/2/sigsq.eps*crossprod(mu, Vcomps.star$Vinv - Vcomps$Vinv)%*%mu 229 | logMHratio <- diffliks + diffpriors + negdifflogproposal 230 | logalpha <- min(0,logMHratio) 231 | 232 | ## return value 233 | acc <- FALSE 234 | if( log(runif(1)) <= logalpha ) { 235 | r <- r.star 236 | delta <- delta.star 237 | lambda <- lambda.star 238 | Vcomps <- Vcomps.star 239 | acc <- TRUE 240 | } 241 | return(list(r=r, lambda=lambda, delta=delta, acc=acc, Vcomps=Vcomps, move.type=move.type)) 242 | } 243 | 244 | h.update <- function(lambda, Vcomps, sigsq.eps, y, X, beta, r, Z, data.comps) { 245 | if (is.null(Vcomps)) { 246 | Vcomps <- makeVcomps(r = r, lambda = lambda, Z = Z, data.comps = data.comps) 247 | } 248 | if(is.null(Vcomps$Q)) { 249 | Kpart <- makeKpart(r, Z) 250 | K <- exp(-Kpart) 251 | Vinv <- Vcomps$Vinv 252 | lambda <- lambda[1] ## in case with random intercept (randint==TRUE), where lambda is 2-dimensional 253 | lamKVinv <- lambda*K%*%Vinv 254 | h.postmean <- lamKVinv%*%(y-X%*%beta) 255 | ##h.postvar <- sigsq.eps*lamKVinv 256 | h.postvar <- sigsq.eps*lambda*(K - lamKVinv%*%K) 257 | h.postvar.sqrt <- try(chol(h.postvar), silent=TRUE) 258 | if(inherits(h.postvar.sqrt, "try-error")) { 259 | sigsvd <- svd(h.postvar) 260 | h.postvar.sqrt <- t(sigsvd$v %*% (t(sigsvd$u) * sqrt(sigsvd$d))) 261 | } 262 | hsamp <- h.postmean + crossprod(h.postvar.sqrt, rnorm(length(h.postmean))) 263 | hcomps <- list(hsamp = hsamp) 264 | } else { 265 | h.star.postvar.sqrt <- sqrt(sigsq.eps*lambda)*forwardsolve(t(Vcomps$cholR), Vcomps$Q) 266 | h.star.postmean <- lambda[1]*Vcomps$Q %*% Vcomps$Rinv %*% Vcomps$K10 %*% (y - X %*% beta) 267 | hsamp.star <- h.star.postmean + crossprod(h.star.postvar.sqrt, rnorm(length(h.star.postmean))) 268 | hsamp <- t(Vcomps$K10) %*% Vcomps$Qinv %*% hsamp.star 269 | hcomps <- list(hsamp = hsamp, hsamp.star = hsamp.star) 270 | } 271 | hcomps 272 | } 273 | 274 | newh.update <- function(Z, Znew, Vcomps, lambda, sigsq.eps, r, y, X, beta, data.comps) { 275 | 276 | if(is.null(data.comps$knots)) { 277 | n0 <- nrow(Z) 278 | n1 <- nrow(Znew) 279 | nall <- n0 + n1 280 | # Kpartall <- makeKpart(r, rbind(Z, Znew)) 281 | # Kmat <- exp(-Kpartall) 282 | # Kmat0 <- Kmat[1:n0,1:n0 ,drop=FALSE] 283 | # Kmat1 <- Kmat[(n0+1):nall,(n0+1):nall ,drop=FALSE] 284 | # Kmat10 <- Kmat[(n0+1):nall,1:n0 ,drop=FALSE] 285 | Kmat1 <- exp(-makeKpart(r, Znew)) 286 | Kmat10 <- exp(-makeKpart(r, Znew, Z)) 287 | 288 | if(is.null(Vcomps)) { 289 | Vcomps <- makeVcomps(r = r, lambda = lambda, Z = Z, data.comps = data.comps) 290 | } 291 | Vinv <- Vcomps$Vinv 292 | 293 | lamK10Vinv <- lambda[1]*Kmat10 %*% Vinv 294 | Sigma.hnew <- lambda[1]*sigsq.eps*(Kmat1 - lamK10Vinv %*% t(Kmat10)) 295 | mu.hnew <- lamK10Vinv %*% (y - X%*%beta) 296 | root.Sigma.hnew <- try(chol(Sigma.hnew), silent=TRUE) 297 | if(inherits(root.Sigma.hnew, "try-error")) { 298 | sigsvd <- svd(Sigma.hnew) 299 | root.Sigma.hnew <- t(sigsvd$v %*% (t(sigsvd$u) * sqrt(sigsvd$d))) 300 | } 301 | hsamp <- mu.hnew + crossprod(root.Sigma.hnew, rnorm(n1)) 302 | } else { 303 | n0 <- nrow(data.comps$knots) 304 | n1 <- nrow(Znew) 305 | nall <- n0 + n1 306 | # Kpartall <- makeKpart(r, rbind(data.comps$knots, Znew)) 307 | # Kmat <- exp(-Kpartall) 308 | # Kmat0 <- Kmat[1:n0,1:n0 ,drop=FALSE] 309 | # Kmat1 <- Kmat[(n0+1):nall,(n0+1):nall ,drop=FALSE] 310 | # Kmat10 <- Kmat[(n0+1):nall,1:n0 ,drop=FALSE] 311 | Kmat10 <- exp(-makeKpart(r, Znew, data.comps$knots)) 312 | 313 | if(is.null(Vcomps)) { 314 | Vcomps <- makeVcomps(r = r, lambda = lambda, Z = Z, data.comps = data.comps) 315 | h.star.postvar.sqrt <- sqrt(sigsq.eps*lambda[1])*forwardsolve(t(Vcomps$cholR), Vcomps$Q) 316 | h.star.postmean <- lambda[1]*Vcomps$Q %*% Vcomps$Rinv %*% Vcomps$K10 %*% (y - X %*% beta) 317 | Vcomps$hsamp.star <- h.star.postmean + crossprod(h.star.postvar.sqrt, rnorm(length(h.star.postmean))) 318 | } 319 | hsamp <- Kmat10 %*% Vcomps$Qinv %*% Vcomps$hsamp.star 320 | } 321 | 322 | hsamp 323 | } 324 | 325 | ## function to obtain posterior samples of h(znew) from fit of Bayesian kernel machine regression 326 | predz.samps <- function(fit, Znew, verbose = TRUE) { 327 | if(is.null(dim(Znew))) Znew <- matrix(Znew, nrow=1) 328 | if(inherits(Znew, "data.frame")) Znew <- data.matrix(Znew) 329 | Z <- fit$Z 330 | if(ncol(Z) != ncol(Znew)) { 331 | stop("Znew must have the same number of columns as Z") 332 | } 333 | 334 | hnew.samps <- sapply(1:fit$nsamp, function(s) { 335 | if(s%%(fit$nsamp/10)==0 & verbose) print(s) 336 | newh.update(Z = Z, Znew = Znew, Vcomps = NULL, lambda = fit$lambda[s], sigsq.eps = fit$sigsq.eps[s], r = fit$r[s,], y = fit$y, X = fit$X, beta = fit$beta[s,], data.comps = fit$data.comps) 337 | }) 338 | rownames(hnew.samps) <- rownames(Znew) 339 | t(hnew.samps) 340 | } 341 | 342 | ## function to approximate the posterior mean and variance as a function of the estimated tau, lambda, beta, and sigsq.eps 343 | newh.postmean <- function(fit, Znew, sel) { 344 | if(is.null(dim(Znew))) Znew <- matrix(Znew, nrow=1) 345 | if(inherits(Znew, "data.frame")) Znew <- data.matrix(Znew) 346 | 347 | Z <- fit$Z 348 | X <- fit$X 349 | y <- fit$y 350 | data.comps <- fit$data.comps 351 | lambda <- colMeans(fit$lambda[sel, ,drop = FALSE]) 352 | sigsq.eps <- mean(fit$sigsq.eps[sel]) 353 | r <- colMeans(fit$r[sel,]) 354 | beta <- colMeans(fit$beta[sel, ,drop=FALSE]) 355 | 356 | if(is.null(data.comps$knots)) { 357 | n0 <- nrow(Z) 358 | n1 <- nrow(Znew) 359 | nall <- n0 + n1 360 | Kpartall <- makeKpart(r, rbind(Z, Znew)) 361 | Kmat <- exp(-Kpartall) 362 | Kmat0 <- Kmat[1:n0,1:n0 ,drop=FALSE] 363 | Kmat1 <- Kmat[(n0+1):nall,(n0+1):nall ,drop=FALSE] 364 | Kmat10 <- Kmat[(n0+1):nall,1:n0 ,drop=FALSE] 365 | 366 | Vcomps <- makeVcomps(r = r, lambda = lambda, Z = Z, data.comps = data.comps) 367 | Vinv <- Vcomps$Vinv 368 | 369 | lamK10Vinv <- lambda[1]*Kmat10 %*% Vinv 370 | Sigma.hnew <- lambda[1]*sigsq.eps*(Kmat1 - lamK10Vinv %*% t(Kmat10)) 371 | mu.hnew <- lamK10Vinv %*% (y - X%*%beta) 372 | } else { 373 | n0 <- nrow(data.comps$knots) 374 | n1 <- nrow(Znew) 375 | nall <- n0 + n1 376 | Kpartall <- makeKpart(r, rbind(data.comps$knots, Znew)) 377 | # Kmat <- exp(-Kpartall) 378 | # Kmat0 <- Kmat[1:n0,1:n0 ,drop=FALSE] 379 | # Kmat1 <- Kmat[(n0+1):nall,(n0+1):nall ,drop=FALSE] 380 | # Kmat10 <- Kmat[(n0+1):nall,1:n0 ,drop=FALSE] 381 | Kmat1 <- exp(-makeKpart(r, Znew)) 382 | Kmat10 <- exp(-makeKpart(r, Znew, data.comps$knots)) 383 | 384 | Vcomps <- makeVcomps(r = r, lambda = lambda, Z = Z, data.comps = data.comps) 385 | 386 | Sigma.hnew <- lambda[1]*sigsq.eps*Kmat10 %*% Vcomps$Rinv %*% t(Kmat10) 387 | mu.hnew <- lambda[1]*Kmat10 %*% Vcomps$Rinv %*% Vcomps$K10 %*% (y - X%*%beta) 388 | } 389 | 390 | ret <- list(postmean = drop(mu.hnew), postvar = Sigma.hnew) 391 | ret 392 | } 393 | 394 | ##################### 395 | #LAMBDA UPDATE HELPER 396 | ##################### 397 | #This function chooses the mean of the proposal function for the lambda MH step based on 398 | #the most recent iteration of lambda 399 | #If this is the identity then E[lambda_star] = lambda_t 400 | #Otherwise we have E[lambda_star] = lamAdj(lambda_t). 401 | lamAdj <- function(lam){ 402 | if(lam <=2 ){ 403 | return(3) 404 | }else{ 405 | return((lam^2)/(lam-1)-1) 406 | } 407 | } 408 | -------------------------------------------------------------------------------- /R/bkmr_main_functions.R: -------------------------------------------------------------------------------- 1 | # makeKpart <- function(r, Z) { 2 | # Kpart <- as.matrix(dist(sqrt(matrix(r, byrow=TRUE, nrow(Z), ncol(Z)))*Z))^2 3 | # Kpart 4 | # } 5 | makeKpart <- function(r, Z1, Z2 = NULL) { 6 | Z1r <- t(t(Z1) * c(sqrt(r))) 7 | if (is.null(Z2)) { 8 | Kpart <- fields::rdist(Z1r)^2 9 | } else { 10 | Z2r <- t(t(Z2) * c(sqrt(r))) 11 | Kpart <- fields::rdist(Z1r, Z2r)^2 12 | } 13 | Kpart 14 | } 15 | makeVcomps <- function(r, lambda, Z, data.comps) { 16 | if (is.null(data.comps$knots)) { 17 | Kpart <- makeKpart(r, Z) 18 | V <- diag(1, nrow(Z), nrow(Z)) + lambda[1]*exp(-Kpart) 19 | if (data.comps$nlambda == 2) { 20 | V <- V + lambda[2]*data.comps$crossTT 21 | } 22 | cholV <- chol(V) 23 | Vinv <- chol2inv(cholV) 24 | logdetVinv <- -2*sum(log(diag(cholV))) 25 | Vcomps <- list(Vinv = Vinv, logdetVinv = logdetVinv) 26 | } else {## predictive process approach 27 | ## note: currently does not work with random intercept model 28 | nugget <- 0.001 29 | n0 <- nrow(Z) 30 | n1 <- nrow(data.comps$knots) 31 | nall <- n0 + n1 32 | # Kpartall <- makeKpart(r, rbind(Z, data.comps$knots)) 33 | # Kall <- exp(-Kpartall) 34 | # K0 <- Kall[1:n0, 1:n0 ,drop=FALSE] 35 | # K1 <- Kall[(n0+1):nall, (n0+1):nall ,drop=FALSE] 36 | # K10 <- Kall[(n0+1):nall, 1:n0 ,drop=FALSE] 37 | K1 <- exp(-makeKpart(r, data.comps$knots)) 38 | K10 <- exp(-makeKpart(r, data.comps$knots, Z)) 39 | Q <- K1 + diag(nugget, n1, n1) 40 | R <- Q + lambda[1]*tcrossprod(K10) 41 | cholQ <- chol(Q) 42 | cholR <- chol(R) 43 | Qinv <- chol2inv(cholQ) 44 | Rinv <- chol2inv(cholR) 45 | Vinv <- diag(1, n0, n0) - lambda[1]*t(K10) %*% Rinv %*% K10 46 | logdetVinv <- 2*sum(log(diag(cholQ))) - 2*sum(log(diag(cholR))) 47 | Vcomps <- list(Vinv = Vinv, logdetVinv = logdetVinv, cholR = cholR, Q = Q, K10 = K10, Qinv = Qinv, Rinv = Rinv) 48 | } 49 | Vcomps 50 | } 51 | 52 | #' Fit Bayesian kernel machine regression 53 | #' 54 | #' Fits the Bayesian kernel machine regression (BKMR) model using Markov chain Monte Carlo (MCMC) methods. 55 | #' 56 | #' @export 57 | #' 58 | #' @param y a vector of outcome data of length \code{n}. 59 | #' @param Z an \code{n}-by-\code{M} matrix of predictor variables to be included in the \code{h} function. Each row represents an observation and each column represents an predictor. 60 | #' @param X an \code{n}-by-\code{K} matrix of covariate data where each row represents an observation and each column represents a covariate. Should not contain an intercept column. 61 | #' @param iter number of iterations to run the sampler 62 | #' @param family a description of the error distribution and link function to be used in the model. Currently implemented for \code{gaussian} and \code{binomial} families. 63 | #' @param id optional vector (of length \code{n}) of grouping factors for fitting a model with a random intercept. If NULL then no random intercept will be included. 64 | #' @param verbose TRUE or FALSE: flag indicating whether to print intermediate diagnostic information during the model fitting. 65 | #' @param Znew optional matrix of new predictor values at which to predict \code{h}, where each row represents a new observation. This will slow down the model fitting, and can be done as a post-processing step using \code{\link{SamplePred}} 66 | #' @param starting.values list of starting values for each parameter. If not specified default values will be chosen. 67 | #' @param control.params list of parameters specifying the prior distributions and tuning parameters for the MCMC algorithm. If not specified default values will be chosen. 68 | #' @param varsel TRUE or FALSE: indicator for whether to conduct variable selection on the Z variables in \code{h} 69 | #' @param groups optional vector (of length \code{M}) of group indicators for fitting hierarchical variable selection if varsel=TRUE. If varsel=TRUE without group specification, component-wise variable selections will be performed. 70 | #' @param knots optional matrix of knot locations for implementing the Gaussian predictive process of Banerjee et al. (2008). Currently only implemented for models without a random intercept. 71 | #' @param ztest optional vector indicating on which variables in Z to conduct variable selection (the remaining variables will be forced into the model). 72 | #' @param rmethod for those predictors being forced into the \code{h} function, the method for sampling the \code{r[m]} values. Takes the value of 'varying' to allow separate \code{r[m]} for each predictor; 'equal' to force the same \code{r[m]} for each predictor; or 'fixed' to fix the \code{r[m]} to their starting values 73 | #' @param est.h TRUE or FALSE: indicator for whether to sample from the posterior distribution of the subject-specific effects h_i within the main sampler. This will slow down the model fitting. 74 | #' @return an object of class "bkmrfit" (containing the posterior samples from the model fit), which has the associated methods: 75 | #' \itemize{ 76 | #' \item \code{\link{print}} (i.e., \code{\link{print.bkmrfit}}) 77 | #' \item \code{\link{summary}} (i.e., \code{\link{summary.bkmrfit}}) 78 | #' } 79 | #' 80 | #' @seealso For guided examples, go to \url{https://jenfb.github.io/bkmr/overview.html} 81 | #' @references Bobb, JF, Valeri L, Claus Henn B, Christiani DC, Wright RO, Mazumdar M, Godleski JJ, Coull BA (2015). Bayesian Kernel Machine Regression for Estimating the Health Effects of Multi-Pollutant Mixtures. Biostatistics 16, no. 3: 493-508. 82 | #' @references Banerjee S, Gelfand AE, Finley AO, Sang H (2008). Gaussian predictive process models for large spatial data sets. Journal of the Royal Statistical Society: Series B (Statistical Methodology), 70(4), 825-848. 83 | #' @import utils 84 | #' 85 | #' @examples 86 | #' ## First generate dataset 87 | #' set.seed(111) 88 | #' dat <- SimData(n = 50, M = 4) 89 | #' y <- dat$y 90 | #' Z <- dat$Z 91 | #' X <- dat$X 92 | #' 93 | #' ## Fit model with component-wise variable selection 94 | #' ## Using only 100 iterations to make example run quickly 95 | #' ## Typically should use a large number of iterations for inference 96 | #' set.seed(111) 97 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 98 | kmbayes <- function(y, Z, X = NULL, iter = 1000, family = "gaussian", id = NULL, verbose = TRUE, Znew = NULL, starting.values = NULL, control.params = NULL, varsel = FALSE, groups = NULL, knots = NULL, ztest = NULL, rmethod = "varying", est.h = FALSE) { 99 | 100 | missingX <- is.null(X) 101 | if (missingX) X <- matrix(0, length(y), 1) 102 | hier_varsel <- !is.null(groups) 103 | 104 | ##Argument check 1, required arguments without defaults 105 | ##check vector/matrix sizes 106 | stopifnot (length(y) > 0, is.numeric(y), anyNA(y) == FALSE) 107 | if (!inherits(Z, "matrix")) Z <- as.matrix(Z) 108 | stopifnot (is.numeric(Z), nrow(Z) == length(y), anyNA(Z) == FALSE) 109 | if (!inherits(X, "matrix")) X <- as.matrix(X) 110 | stopifnot (is.numeric(X), nrow(X) == length(y), anyNA(X) == FALSE) 111 | 112 | ##Argument check 2: for those with defaults, write message and reset to default if invalid 113 | if (iter < 1) { 114 | message ("invalid input for iter, resetting to default value 1000") 115 | nsamp <- 1000 116 | } else { 117 | nsamp <- iter 118 | } 119 | if (!family %in% c("gaussian", "binomial")) { 120 | stop("family", family, "not yet implemented; must specify either 'gaussian' or 'binomial'") 121 | } 122 | if (family == "binomial") { 123 | message("Fitting probit regression model") 124 | if (!all(y %in% c(0, 1))) { 125 | stop("When family == 'binomial', y must be a vector containing only zeros and ones") 126 | } 127 | } 128 | if (rmethod != "varying" & rmethod != "equal" & rmethod != "fixed") { 129 | message ("invalid value for rmethod, resetting to default varying") 130 | rmethod <- "varying" 131 | } 132 | if (verbose != FALSE & verbose != TRUE) { 133 | message ("invalid value for verbose, resetting to default FALSE") 134 | verbose <- FALSE 135 | } 136 | if (varsel != FALSE & varsel != TRUE) { 137 | message ("invalid value for varsel, resetting to default FALSE") 138 | varsel <- FALSE 139 | } 140 | 141 | ##Argument check 3: the rest id (below) znew, knots, groups, ztest 142 | if (!is.null(id)) { 143 | stopifnot(length(id) == length(y), anyNA(id) == FALSE) 144 | if (!is.null(knots)) { 145 | message ("knots cannot be specified with id, resetting knots to null") 146 | knots<-NA 147 | } 148 | } 149 | if (!is.null(Znew)) { 150 | if (!inherits(Znew, "matrix")) Znew <- as.matrix(Znew) 151 | stopifnot(is.numeric(Znew), ncol(Znew) == ncol(Z), anyNA(Znew) == FALSE) 152 | } 153 | if (!is.null(knots)) { 154 | if (!inherits(knots, "matrix")) knots <- as.matrix(knots) 155 | stopifnot(is.numeric(knots), ncol(knots )== ncol(Z), anyNA(knots) == FALSE) 156 | } 157 | if (!is.null(groups)) { 158 | if (varsel == FALSE) { 159 | message ("groups should only be specified if varsel=TRUE, resetting varsel to TRUE") 160 | varsel <- TRUE 161 | } else { 162 | stopifnot(is.numeric(groups), length(groups) == ncol(Z), anyNA(groups) == FALSE) 163 | } 164 | } 165 | if (!is.null(ztest)) { 166 | if (varsel == FALSE) { 167 | message ("ztest should only be specified if varsel=TRUE, resetting varsel to TRUE") 168 | varsel <- TRUE 169 | } else { 170 | stopifnot(is.numeric(ztest), length(ztest) <= ncol(Z), anyNA(ztest) == FALSE, max(ztest) <= ncol(Z) ) 171 | } 172 | } 173 | 174 | ## start JB code 175 | if (!is.null(id)) { ## for random intercept model 176 | randint <- TRUE 177 | id <- as.numeric(as.factor(id)) 178 | nid <- length(unique(id)) 179 | nlambda <- 2 180 | 181 | ## matrix that multiplies the random intercept vector 182 | TT <- matrix(0, length(id), nid) 183 | for (i in 1:nid) { 184 | TT[which(id == i), i] <- 1 185 | } 186 | crossTT <- tcrossprod(TT) 187 | rm(TT, nid) 188 | } else { 189 | randint <- FALSE 190 | nlambda <- 1 191 | crossTT <- 0 192 | } 193 | data.comps <- list(randint = randint, nlambda = nlambda, crossTT = crossTT) 194 | if (!is.null(knots)) data.comps$knots <- knots 195 | rm(randint, nlambda, crossTT) 196 | 197 | ## create empty matrices to store the posterior draws in 198 | chain <- list(h.hat = matrix(0, nsamp, nrow(Z)), 199 | beta = matrix(0, nsamp, ncol(X)), 200 | lambda = matrix(NA, nsamp, data.comps$nlambda), 201 | sigsq.eps = rep(NA, nsamp), 202 | r = matrix(NA, nsamp, ncol(Z)), 203 | acc.r = matrix(0, nsamp, ncol(Z)), 204 | acc.lambda = matrix(0, nsamp, data.comps$nlambda), 205 | delta = matrix(1, nsamp, ncol(Z)) 206 | ) 207 | if (varsel) { 208 | chain$acc.rdelta <- rep(0, nsamp) 209 | chain$move.type <- rep(0, nsamp) 210 | } 211 | if (family == "binomial") { 212 | chain$ystar <- matrix(0, nsamp, length(y)) 213 | } 214 | 215 | ## components to predict h(Znew) 216 | if (!is.null(Znew)) { 217 | if (is.null(dim(Znew))) Znew <- matrix(Znew, nrow=1) 218 | if (inherits(Znew, "data.frame")) Znew <- data.matrix(Znew) 219 | if (ncol(Z) != ncol(Znew)) { 220 | stop("Znew must have the same number of columns as Z") 221 | } 222 | ##Kpartall <- as.matrix(dist(rbind(Z,Znew)))^2 223 | chain$hnew <- matrix(0,nsamp,nrow(Znew)) 224 | colnames(chain$hnew) <- rownames(Znew) 225 | } 226 | 227 | ## components if model selection is being done 228 | if (varsel) { 229 | if (is.null(ztest)) { 230 | ztest <- 1:ncol(Z) 231 | } 232 | rdelta.update <- rdelta.comp.update 233 | } else { 234 | ztest <- NULL 235 | } 236 | 237 | ## control parameters (lambda.jump default lower for probit model to improve mixing) 238 | control.params.default <- list(lambda.jump = rep(ifelse(family == 'binomial', sqrt(10), 10), data.comps$nlambda), 239 | mu.lambda = rep(10, data.comps$nlambda), 240 | sigma.lambda = rep(10, data.comps$nlambda), 241 | a.p0 = 1, b.p0 = 1, r.prior = "invunif", a.sigsq = 1e-3, 242 | b.sigsq = 1e-3, mu.r = 5, sigma.r = 5, r.muprop = 1, 243 | r.jump = 0.1, r.jump1 = 2, r.jump2 = 0.1, r.a = 0, r.b = 100) 244 | if (!is.null(control.params)){ 245 | control.params <- modifyList(control.params.default, as.list(control.params)) 246 | validateControlParams(varsel, family, id, control.params) 247 | } else { 248 | control.params <- control.params.default 249 | } 250 | 251 | control.params$r.params <- with(control.params, list(mu.r = mu.r, sigma.r = sigma.r, r.muprop = r.muprop, r.jump = r.jump, r.jump1 = r.jump1, r.jump2 = r.jump2, r.a = r.a, r.b = r.b)) 252 | 253 | ## components if grouped model selection is being done 254 | if (!is.null(groups)) { 255 | if (!varsel) { 256 | stop("if doing grouped variable selection, must set varsel = TRUE") 257 | } 258 | rdelta.update <- rdelta.group.update 259 | control.params$group.params <- list(groups = groups, sel.groups = sapply(unique(groups), function(x) min(seq_along(groups)[groups == x])), neach.group = sapply(unique(groups), function(x) sum(groups %in% x))) 260 | } 261 | 262 | ## specify functions for doing the Metropolis-Hastings steps to update r 263 | e <- environment() 264 | rfn <- set.r.MH.functions(r.prior = control.params$r.prior) 265 | rprior.logdens <- rfn$rprior.logdens 266 | environment(rprior.logdens) <- e 267 | rprop.gen1 <- rfn$rprop.gen1 268 | environment(rprop.gen1) <- e 269 | rprop.logdens1 <- rfn$rprop.logdens1 270 | environment(rprop.logdens1) <- e 271 | rprop.gen2 <- rfn$rprop.gen2 272 | environment(rprop.gen2) <- e 273 | rprop.logdens2 <- rfn$rprop.logdens2 274 | environment(rprop.logdens2) <- e 275 | rprop.gen <- rfn$rprop.gen 276 | environment(rprop.gen) <- e 277 | rprop.logdens <- rfn$rprop.logdens 278 | environment(rprop.logdens) <- e 279 | rm(e, rfn) 280 | 281 | ## initial values 282 | starting.values0 <- list(h.hat = 1, beta = NULL, sigsq.eps = NULL, r = 1, lambda = 10, delta = 1) 283 | if (is.null(starting.values)) { 284 | starting.values <- starting.values0 285 | } else { 286 | starting.values <- modifyList(starting.values0, starting.values) 287 | validateStartingValues(varsel, y, X, Z, starting.values, rmethod) 288 | } 289 | if (family == "gaussian") { 290 | if (is.null(starting.values$beta) | is.null(starting.values$sigsq.eps)) { 291 | lmfit0 <- lm(y ~ Z + X) 292 | if (is.null(starting.values$beta)) { 293 | coefX <- coef(lmfit0)[grep("X", names(coef(lmfit0)))] 294 | starting.values$beta <- unname(ifelse(is.na(coefX), 0, coefX)) 295 | } 296 | if (is.null(starting.values$sigsq.eps)) { 297 | starting.values$sigsq.eps <- summary(lmfit0)$sigma^2 298 | } 299 | } 300 | } else if (family == "binomial") { 301 | starting.values$sigsq.eps <- 1 ## always equal to 1 302 | if (is.null(starting.values$beta) | is.null(starting.values$ystar)) { 303 | probitfit0 <- try(glm(y ~ Z + X, family = binomial(link = "probit"))) 304 | if (!inherits(probitfit0, "try-error")) { 305 | if (is.null(starting.values$beta)) { 306 | coefX <- coef(probitfit0)[grep("X", names(coef(probitfit0)))] 307 | starting.values$beta <- unname(ifelse(is.na(coefX), 0, coefX)) 308 | } 309 | if (is.null(starting.values$ystar)) { 310 | #prd <- predict(probitfit0) 311 | #starting.values$ystar <- ifelse(y == 1, abs(prd), -abs(prd)) 312 | starting.values$ystar <- ifelse(y == 1, 1/2, -1/2) 313 | } 314 | } else { 315 | starting.values$beta <- 0 316 | starting.values$ystar <- ifelse(y == 1, 1/2, -1/2) 317 | } 318 | } 319 | } 320 | 321 | ##print (starting.values) 322 | ##truncate vectors that are too long 323 | if (length(starting.values$h.hat) > length(y)) { 324 | starting.values$h.hat <- starting.values$h.hat[1:length(y)] 325 | } 326 | if (length(starting.values$beta) > ncol(X)) { 327 | starting.values$beta <- starting.values$beta[1:ncol(X)] 328 | } 329 | if (length(starting.values$delta) > ncol(Z)) { 330 | starting.values$delta <- starting.values$delta[1:ncol(Z)] 331 | } 332 | if (varsel==FALSE & rmethod == "equal" & length(starting.values$r) > 1) { 333 | starting.values$r <- starting.values$r[1] ## this should only happen if rmethod == "equal" 334 | } else if (length(starting.values$r) > ncol(Z)) { 335 | starting.values$r <- starting.values$r[1:ncol(Z)] 336 | } 337 | 338 | chain$h.hat[1, ] <- starting.values$h.hat 339 | chain$beta[1, ] <- starting.values$beta 340 | chain$lambda[1, ] <- starting.values$lambda 341 | chain$sigsq.eps[1] <- starting.values$sigsq.eps 342 | chain$r[1, ] <- starting.values$r 343 | if (varsel) { 344 | chain$delta[1,ztest] <- starting.values$delta 345 | } 346 | if (family == "binomial") { 347 | chain$ystar[1, ] <- starting.values$ystar 348 | chain$sigsq.eps[] <- starting.values$sigsq.eps ## does not get updated 349 | } 350 | if (!is.null(groups)) { 351 | ## make sure starting values are consistent with structure of model 352 | if (!all(sapply(unique(groups), function(x) sum(chain$delta[1, ztest][groups == x])) == 1)) { 353 | # warning("Specified starting values for delta not consistent with model; using default") 354 | starting.values$delta <- rep(0, length(groups)) 355 | starting.values$delta[sapply(unique(groups), function(x) min(which(groups == x)))] <- 1 356 | } 357 | chain$delta[1,ztest] <- starting.values$delta 358 | chain$r[1,ztest] <- ifelse(chain$delta[1,ztest] == 1, chain$r[1,ztest], 0) 359 | } 360 | chain$est.h <- est.h 361 | 362 | ## components 363 | Vcomps <- makeVcomps(r = chain$r[1, ], lambda = chain$lambda[1, ], Z = Z, data.comps = data.comps) 364 | 365 | # set print progress options 366 | opts <- set_verbose_opts( 367 | verbose_freq = control.params$verbose_freq, 368 | verbose_digits = control.params$verbose_digits, 369 | verbose_show_ests = control.params$verbose_show_ests, 370 | tot_iter=nsamp 371 | ) 372 | 373 | ## start sampling #### 374 | chain$time1 <- Sys.time() 375 | for (s in 2:nsamp) { 376 | 377 | ## continuous version of outcome (latent outcome under binomial probit model) 378 | if (family == "gaussian") { 379 | ycont <- y 380 | } else if (family == "binomial") { 381 | if (est.h) { 382 | chain$ystar[s,] <- ystar.update(y = y, X = X, beta = chain$beta[s - 1,], h = chain$h[s - 1, ]) 383 | } else { 384 | chain$ystar[s,] <- ystar.update.noh(y = y, X = X, beta = chain$beta[s - 1,], Vinv = Vcomps$Vinv, ystar = chain$ystar[s - 1, ]) 385 | } 386 | ycont <- chain$ystar[s, ] 387 | } 388 | 389 | ## generate posterior samples from marginalized distribution P(beta, sigsq.eps, lambda, r | y) 390 | 391 | ## beta 392 | if (!missingX) { 393 | chain$beta[s,] <- beta.update(X = X, Vinv = Vcomps$Vinv, y = ycont, sigsq.eps = chain$sigsq.eps[s - 1]) 394 | } 395 | 396 | ## \sigma_\epsilon^2 397 | if (family == "gaussian") { 398 | chain$sigsq.eps[s] <- sigsq.eps.update(y = ycont, X = X, beta = chain$beta[s,], Vinv = Vcomps$Vinv, a.eps = control.params$a.sigsq, b.eps = control.params$b.sigsq) 399 | } 400 | 401 | ## lambda 402 | lambdaSim <- chain$lambda[s - 1,] 403 | for (comp in 1:data.comps$nlambda) { 404 | varcomps <- lambda.update(r = chain$r[s - 1,], delta = chain$delta[s - 1,], lambda = lambdaSim, whichcomp = comp, y = ycont, X = X, Z = Z, beta = chain$beta[s,], sigsq.eps = chain$sigsq.eps[s], Vcomps = Vcomps, data.comps = data.comps, control.params = control.params) 405 | lambdaSim <- varcomps$lambda 406 | if (varcomps$acc) { 407 | Vcomps <- varcomps$Vcomps 408 | chain$acc.lambda[s,comp] <- varcomps$acc 409 | } 410 | } 411 | chain$lambda[s,] <- lambdaSim 412 | 413 | ## r 414 | rSim <- chain$r[s - 1,] 415 | comp <- which(!1:ncol(Z) %in% ztest) 416 | if (length(comp) != 0) { 417 | if (rmethod == "equal") { ## common r for those variables not being selected 418 | varcomps <- r.update(r = rSim, whichcomp = comp, delta = chain$delta[s - 1,], 419 | lambda = chain$lambda[s,], y = ycont, X = X, beta = chain$beta[s,], 420 | sigsq.eps = chain$sigsq.eps[s], Vcomps = Vcomps, Z = Z, 421 | data.comps = data.comps, control.params = control.params, 422 | rprior.logdens = rprior.logdens, rprop.gen1 = rprop.gen1, 423 | rprop.logdens1 = rprop.logdens1, rprop.gen2 = rprop.gen2, 424 | rprop.logdens2 = rprop.logdens2, rprop.gen = rprop.gen, 425 | rprop.logdens = rprop.logdens) 426 | rSim <- varcomps$r 427 | if (varcomps$acc) { 428 | Vcomps <- varcomps$Vcomps 429 | chain$acc.r[s, comp] <- varcomps$acc 430 | } 431 | } else if (rmethod == "varying") { ## allow a different r_m 432 | for (whichr in comp) { 433 | varcomps <- r.update(r = rSim, whichcomp = whichr, delta = chain$delta[s - 1,], 434 | lambda = chain$lambda[s,], y = ycont, X = X, beta = chain$beta[s,], 435 | sigsq.eps = chain$sigsq.eps[s], Vcomps = Vcomps, Z = Z, 436 | data.comps = data.comps, control.params = control.params, 437 | rprior.logdens = rprior.logdens, rprop.gen1 = rprop.gen1, 438 | rprop.logdens1 = rprop.logdens1, rprop.gen2 = rprop.gen2, 439 | rprop.logdens2 = rprop.logdens2, rprop.gen = rprop.gen, 440 | rprop.logdens = rprop.logdens) 441 | rSim <- varcomps$r 442 | if (varcomps$acc) { 443 | Vcomps <- varcomps$Vcomps 444 | chain$acc.r[s, whichr] <- varcomps$acc 445 | } 446 | } 447 | } 448 | } 449 | ## for those variables being selected: joint posterior of (r,delta) 450 | if (varsel) { 451 | varcomps <- rdelta.update(r = rSim, delta = chain$delta[s - 1,], 452 | lambda = chain$lambda[s,], y = ycont, X = X, 453 | beta = chain$beta[s,], sigsq.eps = chain$sigsq.eps[s], 454 | Vcomps = Vcomps, Z = Z, ztest = ztest, 455 | data.comps = data.comps, control.params = control.params, 456 | rprior.logdens = rprior.logdens, rprop.gen1 = rprop.gen1, 457 | rprop.logdens1 = rprop.logdens1, rprop.gen2 = rprop.gen2, 458 | rprop.logdens2 = rprop.logdens2, rprop.gen = rprop.gen, 459 | rprop.logdens = rprop.logdens) 460 | chain$delta[s,] <- varcomps$delta 461 | rSim <- varcomps$r 462 | chain$move.type[s] <- varcomps$move.type 463 | if (varcomps$acc) { 464 | Vcomps <- varcomps$Vcomps 465 | chain$acc.rdelta[s] <- varcomps$acc 466 | } 467 | } 468 | chain$r[s,] <- rSim 469 | 470 | ################################################### 471 | ## generate posterior sample of h(z) from its posterior P(h | beta, sigsq.eps, lambda, r, y) 472 | 473 | if (est.h) { 474 | hcomps <- h.update(lambda = chain$lambda[s,], Vcomps = Vcomps, sigsq.eps = chain$sigsq.eps[s], y = ycont, X = X, beta = chain$beta[s,], r = chain$r[s,], Z = Z, data.comps = data.comps) 475 | chain$h.hat[s,] <- hcomps$hsamp 476 | if (!is.null(hcomps$hsamp.star)) { ## GPP 477 | Vcomps$hsamp.star <- hcomps$hsamp.star 478 | } 479 | rm(hcomps) 480 | } 481 | 482 | ################################################### 483 | ## generate posterior samples of h(Znew) from its posterior P(hnew | beta, sigsq.eps, lambda, r, y) 484 | 485 | if (!is.null(Znew)) { 486 | chain$hnew[s,] <- newh.update(Z = Z, Znew = Znew, Vcomps = Vcomps, lambda = chain$lambda[s,], sigsq.eps = chain$sigsq.eps[s], r = chain$r[s,], y = ycont, X = X, beta = chain$beta[s,], data.comps = data.comps) 487 | } 488 | 489 | ################################################### 490 | ## print details of the model fit so far 491 | print_diagnostics(verbose = verbose, opts = opts, curr_iter = s, tot_iter = nsamp, chain = chain, varsel = varsel, hier_varsel = hier_varsel, ztest = ztest, Z = Z, groups = groups) 492 | 493 | } 494 | control.params$r.params <- NULL 495 | chain$time2 <- Sys.time() 496 | chain$iter <- nsamp 497 | chain$family <- family 498 | chain$starting.values <- starting.values 499 | chain$control.params <- control.params 500 | chain$X <- X 501 | chain$Z <- Z 502 | chain$y <- y 503 | chain$ztest <- ztest 504 | chain$data.comps <- data.comps 505 | if (!is.null(Znew)) chain$Znew <- Znew 506 | if (!is.null(groups)) chain$groups <- groups 507 | chain$varsel <- varsel 508 | class(chain) <- c("bkmrfit", class(chain)) 509 | chain 510 | } 511 | 512 | #' Print basic summary of BKMR model fit 513 | #' 514 | #' \code{print} method for class "bkmrfit" 515 | #' 516 | #' @param x an object of class "bkmrfit" 517 | #' @param digits the number of digits to show when printing 518 | #' @param ... further arguments passed to or from other methods. 519 | #' 520 | #' @export 521 | #' 522 | #' @return No return value, prints basic summary of fit to console 523 | #' 524 | #' @examples 525 | #' ## First generate dataset 526 | #' set.seed(111) 527 | #' dat <- SimData(n = 50, M = 4) 528 | #' y <- dat$y 529 | #' Z <- dat$Z 530 | #' X <- dat$X 531 | #' 532 | #' ## Fit model with component-wise variable selection 533 | #' ## Using only 100 iterations to make example run quickly 534 | #' ## Typically should use a large number of iterations for inference 535 | #' set.seed(111) 536 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 537 | #' fitkm 538 | print.bkmrfit <- function(x, digits = 5, ...) { 539 | cat("Fitted object of class 'bkmrfit'\n") 540 | cat("Iterations:", x$iter, "\n") 541 | cat("Outcome family:", x$family, ifelse(x$family == "binomial", "(probit link)", ""), "\n") 542 | cat("Model fit on:", as.character(x$time2), "\n") 543 | } 544 | 545 | #' Summarizing BKMR model fits 546 | #' 547 | #' \code{summary} method for class "bkmrfit" 548 | #' 549 | #' @param object an object of class "bkmrfit" 550 | #' @param q quantiles of posterior distribution to show 551 | #' @param digits the number of digits to show when printing 552 | #' @param show_ests logical; if \code{TRUE}, prints summary statistics of posterior distribution 553 | #' @param show_MH logical; if \code{TRUE}, prints acceptance rates from the Metropolis-Hastings algorithm 554 | #' @param ... further arguments passed to or from other methods. 555 | #' 556 | #' @export 557 | #' 558 | #' @return No return value, prints more detailed summary of fit to console 559 | #' 560 | #' @examples 561 | #' ## First generate dataset 562 | #' set.seed(111) 563 | #' dat <- SimData(n = 50, M = 4) 564 | #' y <- dat$y 565 | #' Z <- dat$Z 566 | #' X <- dat$X 567 | #' 568 | #' ## Fit model with component-wise variable selection 569 | #' ## Using only 100 iterations to make example run quickly 570 | #' ## Typically should use a large number of iterations for inference 571 | #' set.seed(111) 572 | #' fitkm <- kmbayes(y = y, Z = Z, X = X, iter = 100, verbose = FALSE, varsel = TRUE) 573 | #' summary(fitkm) 574 | summary.bkmrfit <- function(object, q = c(0.025, 0.975), digits = 5, show_ests = TRUE, show_MH = TRUE, ...) { 575 | x <- object 576 | elapsed_time <- difftime(x$time2, x$time1) 577 | 578 | print(x, digits = digits) 579 | cat("Running time: ", round(elapsed_time, digits), attr(elapsed_time, "units"), "\n") 580 | cat("\n") 581 | 582 | if (show_MH) { 583 | cat("Acceptance rates for Metropolis-Hastings algorithm:\n") 584 | accep_rates <- data.frame() 585 | ## lambda 586 | nm <- "lambda" 587 | rate <- colMeans(x$acc.lambda[2:x$iter, ,drop = FALSE]) 588 | if (length(rate) > 1) nm <- paste0(nm, seq_along(rate)) 589 | accep_rates %<>% rbind(data.frame(param = nm, rate = rate)) 590 | ## r_m 591 | if (!x$varsel) { 592 | nm <- "r" 593 | rate <- colMeans(x$acc.r[2:x$iter, , drop = FALSE]) 594 | if (length(rate) > 1) nm <- paste0(nm, seq_along(rate)) 595 | accep_rates %<>% rbind(data.frame(param = nm, rate = rate)) 596 | } else { 597 | nm <- "r/delta (overall)" 598 | rate <- mean(x$acc.rdelta[2:x$iter]) 599 | accep_rates %<>% rbind(data.frame(param = nm, rate = rate)) 600 | ## 601 | nm <- "r/delta (move 1)" 602 | rate <- mean(x$acc.rdelta[2:x$iter][x$move.type[2:x$iter] == 1]) 603 | accep_rates %<>% rbind(data.frame(param = nm, rate = rate)) 604 | ## 605 | nm <- "r/delta (move 2)" 606 | rate <- mean(x$acc.rdelta[2:x$iter][x$move.type[2:x$iter] == 2]) 607 | accep_rates %<>% rbind(data.frame(param = nm, rate = rate)) 608 | if (!is.null(x$groups)) { 609 | nm <- "r/delta (move 3)" 610 | rate <- mean(x$acc.rdelta[2:x$iter][x$move.type[2:x$iter] == 3]) 611 | accep_rates %<>% rbind(data.frame(param = nm, rate = rate)) 612 | } 613 | } 614 | print(accep_rates) 615 | } 616 | if (show_ests) { 617 | sel <- with(x, seq(floor(iter/2) + 1, iter)) 618 | cat("\nParameter estimates (based on iterations ", min(sel), "-", max(sel), "):\n", sep = "") 619 | ests <- ExtractEsts(x, q = q, sel = sel) 620 | if (!is.null(ests$h)) { 621 | ests$h <- ests$h[c(1,2,nrow(ests$h)), ] 622 | } 623 | if (!is.null(ests$ystar)) { 624 | ests$ystar <- ests$ystar[c(1,2,nrow(ests$ystar)), ] 625 | } 626 | summ <- with(ests, rbind(beta, sigsq.eps, r, lambda)) 627 | if (!is.null(ests$h)) { 628 | summ <- rbind(summ, ests$h) 629 | } 630 | if (!is.null(ests$ystar)) { 631 | summ <- rbind(summ, ests$ystar) 632 | } 633 | summ <- data.frame(param = rownames(summ), round(summ, digits)) 634 | rownames(summ) <- NULL 635 | print(summ) 636 | if (x$varsel) { 637 | cat("\nPosterior inclusion probabilities:\n") 638 | pips <- ExtractPIPs(x) 639 | pips[, -1] <- round(pips[, -1], digits) 640 | print(pips) 641 | } 642 | } 643 | return() 644 | } 645 | --------------------------------------------------------------------------------