├── .DS_Store ├── AdapativeMCMCLogistic ├── .Rhistory ├── LogisticAdaptive.r └── progressbar.R ├── BackfittingMCMC ├── BayesianResults.png ├── MLR_backfittingMCMC.r └── OriginalData.png ├── BayesianProbit ├── BayesProbitSim.r └── FunctionSourceCode.r ├── BayesianSurvival ├── .DS_Store ├── FunctionSourceCode.R ├── StanWeibullAFT.Rmd └── weibull_nocovar.R ├── LogisticReg ├── .Rhistory ├── Logistic_GibbsMHcombo.r ├── LogitMCMCChains.png ├── profilevis1.png └── profvis2.png ├── MH_with_caching ├── HelperFunctions.R ├── LogisticMHcache.r └── traceplots.png ├── MultipleLinearReg └── multiplelinearreg.r ├── PartialPooling ├── .Rhistory ├── PartialPool.png └── PartialPool.r ├── README.md ├── RcppBoost ├── .Rhistory ├── AcceptProb.png ├── Logisitc_MH_Rcpp.r ├── MCMCchains.png ├── Runtime.png └── log_post.cpp ├── SimpleLinearReg ├── .DS_Store ├── BayesModel.R └── README.md └── SimulatedTempering ├── .DS_Store ├── make_gif-.gif ├── tempering.R ├── tempering.html ├── tempering.rmd ├── tempering_cache └── html │ ├── __packages │ ├── unnamed-chunk-1_e24126a3f59693241424fe88e04ca445.RData │ ├── unnamed-chunk-1_e24126a3f59693241424fe88e04ca445.rdb │ └── unnamed-chunk-1_e24126a3f59693241424fe88e04ca445.rdx └── tempering_files └── figure-html └── make_gif-.gif /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/.DS_Store -------------------------------------------------------------------------------- /AdapativeMCMCLogistic/.Rhistory: -------------------------------------------------------------------------------- 1 | alpha_shell <- matrix(0, nrow = M, ncol = gibbs_iter) 2 | sigma_shell <- matrix(0, nrow = 1, ncol = gibbs_iter) 3 | # initialize 4 | beta_shell[,1] <- rep(100, p) 5 | alpha_shell[,1] <- rnorm(n = M, mean = 0, sd = 100) 6 | sigma_shell[1] <- 200 7 | for(iter in 2:gibbs_iter){ 8 | beta_shell[,iter]<- metrop_hastings(x_0 = beta_shell[, iter-1], 9 | log_post_density=condpost_beta, 10 | proposal_dist =function(x, prop_sigma) rmvnorm(1, mean = x, sigma = prop_sigma*diag(p)), 11 | y = y, x = x, 12 | alpha = alpha_shell[,iter-1], 13 | sigma = sigma_shell[,iter-1], 14 | subjid = subjid, M=M, 15 | prop_sigma = 50)$x_0 16 | alpha_shell[,iter]<- metrop_hastings(x_0 = alpha_shell[, iter-1], 17 | log_post_density=condpost_alpha, 18 | proposal_dist =function(x, prop_sigma) rmvnorm(1, mean = x, sigma = prop_sigma*diag(M)), 19 | y = y, x = x, 20 | beta = beta_shell[,iter], 21 | sigma = sigma_shell[,iter-1], 22 | subjid = subjid, M=M, 23 | prop_sigma = 100)$x_0 24 | sigma_shell[,iter]<- metrop_hastings(x_0 = sigma_shell[, iter-1], 25 | log_post_density=condpost_sigma, 26 | proposal_dist =function(x, prop_sigma) rmvnorm(1, mean = x, sigma = prop_sigma*diag(1)), 27 | y = y, x = x, 28 | beta = beta_shell[,iter], 29 | alpha = alpha_shell[,iter], 30 | subjid = subjid, M=M, lower=0, 31 | prop_sigma = 100)$x_0 32 | } 33 | return(list(beta_shell=beta_shell, alpha_shell=alpha_shell, sigma_shell=sigma_shell)) 34 | } 35 | N_sims <- 1000 36 | gibbs_iter <- 1000 37 | sim_dat <- replicate(n = N_sims, simulate_data(M=50), simplify = F) 38 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000) 39 | library(mvtnorm) 40 | library(invgamma) 41 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000) 42 | library(lme4) 43 | lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) ) 44 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) )) 45 | par(mfrow=c(2,1)) 46 | plot(res$beta_shell[1,], type='l') 47 | plot(res$beta_shell[2,], type='l') 48 | plot(res$sigma_shell, type='l') 49 | plot(res$sigma_shell[1,], type='l') 50 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 51 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 52 | library(MASS) 53 | N_sims <- 1000 54 | gibbs_iter <- 1000 55 | sim_dat <- replicate(n = N_sims, simulate_data(M=50), simplify = F) 56 | library(mvtnorm) 57 | library(invgamma) 58 | library(lme4) 59 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000) 60 | warnings() 61 | par(mfrow=c(3,1)) 62 | plot(res$beta_shell[1,], type='l') 63 | plot(res$beta_shell[2,], type='l') 64 | plot(res$sigma_shell[1,], type='l') 65 | cov(c(1,1,23,234), c(1,23,5,3,2)) 66 | cov(x = cbind(rnorm(n = 100), rnorm(100))) 67 | cov(x = cbind(rnorm(n = 100, sd=50), rnorm(100, sd = 10))) 68 | 50^2 69 | cov(x = cbind(rnorm(n = 100, sd=50), rnorm(100, sd = 10))) 70 | cbind(c(10, 0), c(0,10)) 71 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 72 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 73 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000) 74 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 75 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000) 76 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 77 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000) 78 | prop_sigma <- jumping_var[[tuning_end]] 79 | prop_sigma 80 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 81 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000) 82 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 83 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 84 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000) 85 | jumping_var[[tuning_end]] 86 | jumping_var 87 | jumping_var[[1]] 88 | jumping_var[[2]] 89 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 90 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000) 91 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 92 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000) 93 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 94 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 95 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) )) 96 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000) 97 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 98 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=30) 99 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 100 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 101 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000) 102 | par(mfrow=c(3,1)) 103 | plot(res$beta_shell[1,], type='l') 104 | plot(res$beta_shell[2,], type='l') 105 | plot(res$sigma_shell[1,], type='l') 106 | res$`` 107 | res[[4]] 108 | sqrt(72) 109 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 110 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 111 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 500) 112 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) )) 113 | par(mfrow=c(1,1)) 114 | plot(res$alpha_shell[3,], type='l') 115 | par(mfrow=c(3,1)) 116 | plot(res$beta_shell[1,], type='l') 117 | plot(res$beta_shell[2,], type='l') 118 | plot(res$sigma_shell[1,], type='l') 119 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 10) 120 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) )) 121 | par(mfrow=c(3,1)) 122 | plot(res$beta_shell[1,], type='l') 123 | plot(res$beta_shell[2,], type='l') 124 | plot(res$sigma_shell[1,], type='l') 125 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 2) 126 | par(mfrow=c(3,1)) 127 | plot(res$beta_shell[1,], type='l') 128 | plot(res$beta_shell[2,], type='l') 129 | plot(res$sigma_shell[1,], type='l') 130 | res$prop_sigma 131 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 5) 132 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) )) 133 | par(mfrow=c(3,1)) 134 | plot(res$beta_shell[1,], type='l') 135 | plot(res$beta_shell[2,], type='l') 136 | plot(res$sigma_shell[1,], type='l') 137 | res$prop_sigma 138 | sqrt(res$prop_sigma) 139 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 140 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 10) 141 | par(mfrow=c(3,1)) 142 | plot(res$beta_shell[1,], type='l') 143 | plot(res$beta_shell[2,], type='l') 144 | plot(res$sigma_shell[1,], type='l') 145 | res$prop_sigma 146 | sqrt(12) 147 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 148 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 10) 149 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) )) 150 | par(mfrow=c(3,1)) 151 | plot(res$beta_shell[1,], type='l') 152 | plot(res$beta_shell[2,], type='l') 153 | plot(res$sigma_shell[1,], type='l') 154 | res$prop_sigma 155 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 156 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 10) 157 | par(mfrow=c(3,1)) 158 | plot(res$beta_shell[1,], type='l') 159 | plot(res$beta_shell[2,], type='l') 160 | plot(res$sigma_shell[1,], type='l') 161 | res$prop_sigma 162 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 163 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 10) 164 | par(mfrow=c(3,1)) 165 | plot(res$beta_shell[1,], type='l') 166 | plot(res$beta_shell[2,], type='l') 167 | plot(res$sigma_shell[1,], type='l') 168 | res$prop_sigma 169 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 170 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 10) 171 | par(mfrow=c(3,1)) 172 | plot(res$beta_shell[1,], type='l') 173 | plot(res$beta_shell[2,], type='l') 174 | plot(res$sigma_shell[1,], type='l') 175 | res$prop_sigma 176 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 100) 177 | par(mfrow=c(3,1)) 178 | plot(res$beta_shell[1,], type='l') 179 | plot(res$beta_shell[2,], type='l') 180 | plot(res$sigma_shell[1,], type='l') 181 | res$prop_sigma 182 | # par(mfrow=c(1,1)) 183 | # plot(res$alpha_shell[3,], type='l') 184 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) )) 185 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 2) 186 | par(mfrow=c(3,1)) 187 | plot(res$beta_shell[1,], type='l') 188 | plot(res$beta_shell[2,], type='l') 189 | plot(res$sigma_shell[1,], type='l') 190 | res$prop_sigma 191 | # par(mfrow=c(1,1)) 192 | # plot(res$alpha_shell[3,], type='l') 193 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) )) 194 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 195 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 2) 196 | par(mfrow=c(3,1)) 197 | plot(res$beta_shell[1,], type='l') 198 | plot(res$beta_shell[2,], type='l') 199 | plot(res$sigma_shell[1,], type='l') 200 | res$prop_sigma 201 | # par(mfrow=c(1,1)) 202 | # plot(res$alpha_shell[3,], type='l') 203 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) )) 204 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 205 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R') 206 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 2) 207 | par(mfrow=c(3,1)) 208 | plot(res$beta_shell[1,], type='l') 209 | plot(res$beta_shell[2,], type='l') 210 | plot(res$sigma_shell[1,], type='l') 211 | res$prop_sigma 212 | # par(mfrow=c(1,1)) 213 | # plot(res$alpha_shell[3,], type='l') 214 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) )) 215 | library(ggplot2) 216 | setwd("/Users/arman/Documents/StableMarkets/BayesianTutorials/AdapativeMCMCLogistic") 217 | ################################################################################ 218 | ### 0 - Simulate Data 219 | ################################################################################ 220 | set.seed(10) 221 | N<-1000 222 | d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T)) 223 | d$age_1<-ifelse(d$age_group==1,1,0) 224 | d$age_2<-ifelse(d$age_group==2,1,0) 225 | d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2)) 226 | d$y<-rbinom(n = N, size = 1, 227 | prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt)) 228 | X<-as.matrix(cbind(1,d[,2:4])) # model matrix 229 | Y<-matrix(d$y, ncol=1) # outcome vector 230 | p<-ncol(X) 231 | #Author: Arman Oganisian 232 | library(LaplacesDemon) 233 | library(invgamma) 234 | library(MASS) 235 | library(tidyr) 236 | library(dplyr) 237 | library(ggplot2) 238 | setwd("/Users/arman/Documents/StableMarkets/BayesianTutorials/AdapativeMCMCLogistic") 239 | ################################################################################ 240 | ### 0 - Simulate Data 241 | ################################################################################ 242 | set.seed(10) 243 | N<-1000 244 | d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T)) 245 | d$age_1<-ifelse(d$age_group==1,1,0) 246 | d$age_2<-ifelse(d$age_group==2,1,0) 247 | d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2)) 248 | d$y<-rbinom(n = N, size = 1, 249 | prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt)) 250 | X<-as.matrix(cbind(1,d[,2:4])) # model matrix 251 | Y<-matrix(d$y, ncol=1) # outcome vector 252 | p<-ncol(X) 253 | log_posterior<-function(beta, X, Y){ 254 | # calculate likelihood 255 | xb <- X %*% beta 256 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb)) 257 | p_i <- invlogit(xb) 258 | lik <- sum(dbern(Y, p_i, log = T)) 259 | # calculate prior 260 | pr <- dnorm(x = beta, mean = 0, sd = 1000, log = T) 261 | log_cond_post <- lik + pr 262 | return(log_cond_post) 263 | } 264 | log_posterior(c(1,1,1,1), X, Y) 265 | log_posterior<-function(beta, X, Y){ 266 | # calculate likelihood 267 | xb <- X %*% beta 268 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb)) 269 | p_i <- invlogit(xb) 270 | lik <- sum(dbern(Y, p_i, log = T)) 271 | # calculate prior 272 | pr <- dmvn(x = beta, mu = 0, sigma = 1000*diag(p), log = T) 273 | log_cond_post <- lik + pr 274 | return(log_cond_post) 275 | } 276 | log_posterior(c(1,1,1,1), X, Y) 277 | log_posterior<-function(beta, X, Y){ 278 | # calculate likelihood 279 | xb <- X %*% beta 280 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb)) 281 | p_i <- invlogit(xb) 282 | lik <- sum(dbern(Y, p_i, log = T)) 283 | # calculate prior 284 | pr <- dmvn(x = beta, mu = 0, Sigma = 1000*diag(p), log = T) 285 | log_cond_post <- lik + pr 286 | return(log_cond_post) 287 | } 288 | log_posterior(c(1,1,1,1), X, Y) 289 | Rcpp::sourceCpp('samples.cpp') 290 | log_post(c(1,1,1,1), Y, X) 291 | log_posterior<-function(beta, X, Y){ 292 | # calculate likelihood 293 | xb <- X %*% beta 294 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb)) 295 | p_i <- invlogit(xb) 296 | lik <- sum(dbern(Y, p_i, log = T)) 297 | # calculate prior 298 | pr <- dmvn(x = beta, mu = 0, Sigma = (1000^2)*diag(p), log = T) 299 | log_cond_post <- lik + pr 300 | return(log_cond_post) 301 | } 302 | log_posterior(c(1,1,1,1), X, Y) 303 | log_posterior<-function(beta, X, Y){ 304 | # calculate likelihood 305 | xb <- X %*% beta 306 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb)) 307 | p_i <- invlogit(xb) 308 | lik <- sum(dbern(Y, p_i, log = T)) 309 | # calculate prior 310 | pr <- dmvn(x = beta, mu = rep(0,p), Sigma = (1000^2)*diag(p), log = T) 311 | log_cond_post <- lik + pr 312 | return(log_cond_post) 313 | } 314 | log_posterior(c(1,1,1,1), X, Y) 315 | log_post(c(1,1,1,1), Y, X) 316 | Rcpp::sourceCpp('samples.cpp') 317 | Rcpp::sourceCpp('samples.cpp') 318 | log_posterior(c(1,1,1,1), X, Y) 319 | log_post(c(1,1,1,1), Y, X) 320 | Rcpp::sourceCpp('samples.cpp') 321 | log_post(c(1,1,1,1), Y, X) 322 | log_posterior(c(1,1,1,1), X, Y) 323 | sample_mh_cpp <-function(X, Y, iter, jump_v){ 324 | # create shells 325 | p <- ncol(X) 326 | beta_shell <- matrix(NA, nrow = iter, ncol = p) 327 | accept_shell <- numeric(length = iter) 328 | # starting values 329 | beta_shell[1,] <- rep(10, p) 330 | for(i in 2:iter){ 331 | beta_0 <- beta_shell[i-1, ] 332 | # draw from proposal distribution 333 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p)) 334 | # calculate ratio of conditional posterior densities 335 | r_num <- log_post(beta_c, X, Y ) 336 | r_denom <- log_post(beta_0, X, Y ) 337 | # calculate acceptance probability 338 | r <- exp(r_num - r_denom) 339 | rmin<-min(r,1) 340 | # accept or reject proposal 341 | if( rbinom(1,1,rmin) == 1 ){ 342 | beta_shell[i, ] <- beta_c 343 | }else{ 344 | beta_shell[i, ] <- beta_0 345 | } 346 | accept_shell[i] <- rmin 347 | } 348 | colnames(beta_shell) <- colnames(X) 349 | colnames(beta_shell)[1] <- 'intercept' 350 | return(list(beta_shell, accept_shell) ) 351 | } 352 | library(rbenchmark) 353 | library(rbenchmark) 354 | benchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .05), 355 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .05) ) 356 | log_posterior<-function(beta, X, Y){ 357 | # calculate likelihood 358 | xb <- X %*% beta 359 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb)) 360 | p_i <- invlogit(xb) 361 | lik <- sum(dbern(Y, p_i, log = T)) 362 | # calculate prior 363 | pr <- dmvn(x = beta, mu = rep(0,p), Sigma = (1000^2)*diag(p), log = T) 364 | log_cond_post <- lik + pr 365 | return(log_cond_post) 366 | } 367 | # use Metropolis Hastings algorithm to sample from cond. post. of beta 368 | sample_mh<-function(X, Y, iter, jump_v){ 369 | # create shells 370 | p <- ncol(X) 371 | beta_shell <- matrix(NA, nrow = iter, ncol = p) 372 | accept_shell <- numeric(length = iter) 373 | # starting values 374 | beta_shell[1,] <- rep(10, p) 375 | for(i in 2:iter){ 376 | beta_0 <- beta_shell[i-1, ] 377 | # draw from proposal distribution 378 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p)) 379 | # calculate ratio of conditional posterior densities 380 | r_num <- log_posterior(beta_c, X, Y ) 381 | r_denom <- log_posterior(beta_0, X, Y ) 382 | # calculate acceptance probability 383 | r <- exp(r_num - r_denom) 384 | rmin<-min(r,1) 385 | # accept or reject proposal 386 | if( rbinom(1,1,rmin) == 1 ){ 387 | beta_shell[i, ] <- beta_c 388 | }else{ 389 | beta_shell[i, ] <- beta_0 390 | } 391 | accept_shell[i] <- rmin 392 | } 393 | colnames(beta_shell) <- colnames(X) 394 | colnames(beta_shell)[1] <- 'intercept' 395 | return(list(beta_shell, accept_shell) ) 396 | } 397 | # Adaptive Metropolis Hastings 398 | sample_amh <- function(X, Y, iter, jump_v, 399 | ad_start, ad_stop, ad_int, ad_period){ 400 | # create shells 401 | p <- ncol(X) 402 | beta_shell <- matrix(NA, nrow = iter, ncol = p) 403 | accept_shell <- numeric(length = iter) 404 | # starting values 405 | beta_shell[1,] <- rep(10, p) 406 | s <- 1 407 | for(i in 2:iter){ 408 | beta_0 <- beta_shell[i-1, ] 409 | if(i >= ad_start & i%%ad_int==0 & i <= ad_stop ){ 410 | accept_rate <- mean(accept_shell[ (i - ad_period):i ]) 411 | s <- s * (accept_rate/.234) # optimal acceptance rate 412 | jump_v <- s * cov(beta_shell[ (i - ad_period):(i-1) , ]) 413 | } 414 | # draw from proposal distribution 415 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v ) 416 | # calculate ratio of conditional posterior densities 417 | r_num <- log_posterior(beta_c, X, Y ) 418 | r_denom <- log_posterior(beta_0, X, Y ) 419 | # calculate acceptance probability 420 | r <- exp(r_num - r_denom) 421 | rmin<-min(r,1) 422 | # accept or reject proposal 423 | if( rbinom(1,1,rmin) == 1 ){ 424 | beta_shell[i, ] <- beta_c 425 | }else{ 426 | beta_shell[i, ] <- beta_0 427 | } 428 | accept_shell[i] <- rmin 429 | } 430 | colnames(beta_shell) <- colnames(X) 431 | colnames(beta_shell)[1] <- 'intercept' 432 | return(list(beta_shell, accept_shell) ) 433 | } 434 | sample_mh_cpp <-function(X, Y, iter, jump_v){ 435 | # create shells 436 | p <- ncol(X) 437 | beta_shell <- matrix(NA, nrow = iter, ncol = p) 438 | accept_shell <- numeric(length = iter) 439 | # starting values 440 | beta_shell[1,] <- rep(10, p) 441 | for(i in 2:iter){ 442 | beta_0 <- beta_shell[i-1, ] 443 | # draw from proposal distribution 444 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p)) 445 | # calculate ratio of conditional posterior densities 446 | r_num <- log_post(beta_c, X, Y ) 447 | r_denom <- log_post(beta_0, X, Y ) 448 | # calculate acceptance probability 449 | r <- exp(r_num - r_denom) 450 | rmin<-min(r,1) 451 | # accept or reject proposal 452 | if( rbinom(1,1,rmin) == 1 ){ 453 | beta_shell[i, ] <- beta_c 454 | }else{ 455 | beta_shell[i, ] <- beta_0 456 | } 457 | accept_shell[i] <- rmin 458 | } 459 | colnames(beta_shell) <- colnames(X) 460 | colnames(beta_shell)[1] <- 'intercept' 461 | return(list(beta_shell, accept_shell) ) 462 | } 463 | benchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .05), 464 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .05) ) 465 | ################################################################################ 466 | ### 2 - Run Sampler 467 | ################################################################################ 468 | iter <- 5000 469 | p <- ncol(X) 470 | benchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .05), 471 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .05) ) 472 | sample_mh_cpp <-function(X, Y, iter, jump_v){ 473 | # create shells 474 | p <- ncol(X) 475 | beta_shell <- matrix(NA, nrow = iter, ncol = p) 476 | accept_shell <- numeric(length = iter) 477 | # starting values 478 | beta_shell[1,] <- rep(10, p) 479 | for(i in 2:iter){ 480 | beta_0 <- beta_shell[i-1, ] 481 | # draw from proposal distribution 482 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p)) 483 | # calculate ratio of conditional posterior densities 484 | r_num <- log_post(beta_c, Y, X ) 485 | r_denom <- log_post(beta_0, Y, X ) 486 | # calculate acceptance probability 487 | r <- exp(r_num - r_denom) 488 | rmin<-min(r,1) 489 | # accept or reject proposal 490 | if( rbinom(1,1,rmin) == 1 ){ 491 | beta_shell[i, ] <- beta_c 492 | }else{ 493 | beta_shell[i, ] <- beta_0 494 | } 495 | accept_shell[i] <- rmin 496 | } 497 | colnames(beta_shell) <- colnames(X) 498 | colnames(beta_shell)[1] <- 'intercept' 499 | return(list(beta_shell, accept_shell) ) 500 | } 501 | benchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .05), 502 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .05) ) 503 | sample_mh_cpp(X, Y, iter = iter, jump_v = .05) 504 | install.packages('microbenchmark') 505 | library(microbenchmark) 506 | microbenchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .05), 507 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .05)) 508 | microbenchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .05), 509 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .05),times = 10) 510 | Rcpp::sourceCpp('samples.cpp') 511 | Rcpp::sourceCpp('samples.cpp') 512 | library(Rcpp) 513 | -------------------------------------------------------------------------------- /AdapativeMCMCLogistic/LogisticAdaptive.r: -------------------------------------------------------------------------------- 1 | #Author: Arman Oganisian 2 | library(LaplacesDemon) 3 | library(invgamma) 4 | library(MASS) 5 | library(tidyr) 6 | library(dplyr) 7 | library(ggplot2) 8 | library(microbenchmark) 9 | library(Rcpp) 10 | 11 | setwd("/Users/arman/Documents/StableMarkets/BayesianTutorials/AdapativeMCMCLogistic") 12 | sourceCpp('samples.cpp') 13 | 14 | ################################################################################ 15 | ### 0 - Simulate Data 16 | ################################################################################ 17 | set.seed(10) 18 | N<-1000 19 | 20 | d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T)) 21 | d$age_1<-ifelse(d$age_group==1,1,0) 22 | d$age_2<-ifelse(d$age_group==2,1,0) 23 | 24 | d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2)) 25 | 26 | d$y<-rbinom(n = N, size = 1, 27 | prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt)) 28 | 29 | X<-as.matrix(cbind(1,d[,2:4])) # model matrix 30 | Y<-matrix(d$y, ncol=1) # outcome vector 31 | 32 | p<-ncol(X) 33 | 34 | log_post(c(1,1,1,1), Y, X) 35 | 36 | log_posterior(c(1,1,1,1), X, Y) 37 | 38 | ################################################################################ 39 | ### 1 - functions to sample from conditional posterior distributions 40 | ################################################################################ 41 | 42 | # unnormalized log posterior of beta vector 43 | log_posterior<-function(beta, X, Y){ 44 | 45 | # calculate likelihood 46 | xb <- X %*% beta 47 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb)) 48 | p_i <- invlogit(xb) 49 | 50 | lik <- sum(dbern(Y, p_i, log = T)) 51 | 52 | # calculate prior 53 | pr <- dmvn(x = beta, mu = rep(0,p), Sigma = (1000^2)*diag(p), log = T) 54 | 55 | log_cond_post <- lik + pr 56 | return(log_cond_post) 57 | } 58 | 59 | # use Metropolis Hastings algorithm to sample from cond. post. of beta 60 | sample_mh<-function(X, Y, iter, jump_v){ 61 | 62 | # create shells 63 | p <- ncol(X) 64 | beta_shell <- matrix(NA, nrow = iter, ncol = p) 65 | accept_shell <- numeric(length = iter) 66 | 67 | # starting values 68 | beta_shell[1,] <- rep(10, p) 69 | 70 | for(i in 2:iter){ 71 | beta_0 <- beta_shell[i-1, ] 72 | 73 | # draw from proposal distribution 74 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p)) 75 | 76 | # calculate ratio of conditional posterior densities 77 | r_num <- log_posterior(beta_c, X, Y ) 78 | r_denom <- log_posterior(beta_0, X, Y ) 79 | 80 | # calculate acceptance probability 81 | r <- exp(r_num - r_denom) 82 | rmin<-min(r,1) 83 | 84 | # accept or reject proposal 85 | if( rbinom(1,1,rmin) == 1 ){ 86 | beta_shell[i, ] <- beta_c 87 | }else{ 88 | beta_shell[i, ] <- beta_0 89 | } 90 | accept_shell[i] <- rmin 91 | 92 | } 93 | colnames(beta_shell) <- colnames(X) 94 | colnames(beta_shell)[1] <- 'intercept' 95 | return(list(beta_shell, accept_shell) ) 96 | } 97 | 98 | # Adaptive Metropolis Hastings 99 | sample_amh <- function(X, Y, iter, jump_v, 100 | ad_start, ad_stop, ad_int, ad_period){ 101 | 102 | # create shells 103 | p <- ncol(X) 104 | beta_shell <- matrix(NA, nrow = iter, ncol = p) 105 | accept_shell <- numeric(length = iter) 106 | 107 | # starting values 108 | beta_shell[1,] <- rep(10, p) 109 | 110 | s <- 1 111 | 112 | for(i in 2:iter){ 113 | beta_0 <- beta_shell[i-1, ] 114 | 115 | if(i >= ad_start & i%%ad_int==0 & i <= ad_stop ){ 116 | accept_rate <- mean(accept_shell[ (i - ad_period):i ]) 117 | s <- s * (accept_rate/.234) # optimal acceptance rate 118 | jump_v <- s * cov(beta_shell[ (i - ad_period):(i-1) , ]) 119 | } 120 | # draw from proposal distribution 121 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v ) 122 | 123 | # calculate ratio of conditional posterior densities 124 | r_num <- log_posterior(beta_c, X, Y ) 125 | r_denom <- log_posterior(beta_0, X, Y ) 126 | 127 | # calculate acceptance probability 128 | r <- exp(r_num - r_denom) 129 | rmin<-min(r,1) 130 | 131 | # accept or reject proposal 132 | if( rbinom(1,1,rmin) == 1 ){ 133 | beta_shell[i, ] <- beta_c 134 | }else{ 135 | beta_shell[i, ] <- beta_0 136 | } 137 | accept_shell[i] <- rmin 138 | 139 | } 140 | 141 | colnames(beta_shell) <- colnames(X) 142 | colnames(beta_shell)[1] <- 'intercept' 143 | return(list(beta_shell, accept_shell) ) 144 | } 145 | 146 | sample_mh_cpp <-function(X, Y, iter, jump_v){ 147 | # create shells 148 | p <- ncol(X) 149 | beta_shell <- matrix(NA, nrow = iter, ncol = p) 150 | accept_shell <- numeric(length = iter) 151 | 152 | # starting values 153 | beta_shell[1,] <- rep(10, p) 154 | 155 | for(i in 2:iter){ 156 | beta_0 <- beta_shell[i-1, ] 157 | 158 | # draw from proposal distribution 159 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p)) 160 | 161 | # calculate ratio of conditional posterior densities 162 | r_num <- log_post(beta_c, Y, X ) 163 | r_denom <- log_post(beta_0, Y, X ) 164 | 165 | # calculate acceptance probability 166 | r <- exp(r_num - r_denom) 167 | rmin<-min(r,1) 168 | 169 | # accept or reject proposal 170 | if( rbinom(1,1,rmin) == 1 ){ 171 | beta_shell[i, ] <- beta_c 172 | }else{ 173 | beta_shell[i, ] <- beta_0 174 | } 175 | accept_shell[i] <- rmin 176 | 177 | } 178 | colnames(beta_shell) <- colnames(X) 179 | colnames(beta_shell)[1] <- 'intercept' 180 | return(list(beta_shell, accept_shell) ) 181 | } 182 | 183 | 184 | ################################################################################ 185 | ### 2 - Run Sampler 186 | ################################################################################ 187 | iter <- 5000 188 | p <- ncol(X) 189 | 190 | res_mh <- sample_mh(X, Y, iter = iter, jump_v = .05) 191 | samples_mh <- res_mh[[1]] 192 | 193 | samples_mh_long <- data.frame(samples_mh) %>% 194 | mutate(iter = 1:iter) %>% 195 | gather( Beta, Draw, intercept:trt) %>% 196 | mutate(Sampler='MH') 197 | 198 | res_amh <- sample_amh(X, Y, iter = iter, jump_v = diag(p), 199 | ad_start = 102, ad_stop = 500, 200 | ad_int = 100, ad_period = 100) 201 | samples_amh <- res_amh[[1]] 202 | 203 | samples_amh_long <- data.frame(samples_amh) %>% 204 | mutate(iter = 1:iter) %>% 205 | gather( Beta, Draw, intercept:trt) %>% 206 | mutate(Sampler='Adaptive MH') 207 | 208 | all_samples <- bind_rows(samples_mh_long, samples_amh_long) %>% 209 | mutate(Beta = as.factor(Beta), Sampler = as.factor(Sampler)) %>% 210 | filter(iter>1000) 211 | 212 | ################################################################################ 213 | ### 3 - Benchmarks 214 | ################################################################################ 215 | microbenchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .05), 216 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .05), 217 | times = 10) 218 | 219 | ################################################################################ 220 | ### 4 - Plot Results 221 | ################################################################################ 222 | 223 | ggplot(all_samples, aes(x=iter, y = Draw, col=Sampler)) + 224 | geom_line() + 225 | facet_grid(Beta~.) 226 | 227 | par(mfrow=c(1,1)) 228 | plot(cumsum(res_mh[[2]])/1:iter, type='l') 229 | lines(cumsum(res_amh[[2]])/1:iter, col='red') 230 | 231 | 232 | par(mfrow=c(2,2)) 233 | plot(samples_mh[,1],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'), 234 | main='Intercept', col='gray') 235 | lines(samples_amh[,1], col='black') 236 | abline(h=-1,col='red') 237 | plot(samples_mh[,2],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'), 238 | main='Age1', col='gray') 239 | lines(samples_amh[,2], col='black') 240 | abline(h=.7,col='red') 241 | plot(samples_mh[,3],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'), 242 | main='Age2', col='gray') 243 | lines(samples_amh[,3], col='black') 244 | abline(h=1.1,col='red') 245 | plot(samples_mh[,4],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'), 246 | main='Treatment', col='gray') 247 | lines(samples_amh[,4], col='black') 248 | abline(h=1.1,col='red') 249 | 250 | -------------------------------------------------------------------------------- /AdapativeMCMCLogistic/progressbar.R: -------------------------------------------------------------------------------- 1 | 2 | iter <- 1000000 3 | 4 | pb<-txtProgressBar(min = 1, max = iter, style = 3) 5 | t<-0 6 | for(i in 1:iter){ 7 | t<-t+i 8 | setTxtProgressBar(pb,value = i) 9 | } 10 | 11 | 12 | 13 | draws <- mvrnorm(n = 10, mu = rep(0,5), Sigma = diag(5)) 14 | cov(draws) 15 | -------------------------------------------------------------------------------- /BackfittingMCMC/BayesianResults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/BackfittingMCMC/BayesianResults.png -------------------------------------------------------------------------------- /BackfittingMCMC/MLR_backfittingMCMC.r: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ###### 0 - Packages and Simulate Data 3 | ################################################################################ 4 | library(mvtnorm) 5 | library(invgamma) 6 | library(ggplot2) 7 | library(dplyr) 8 | library(tidyr) 9 | library(xtable) 10 | set.seed(200) 11 | 12 | n<-100 # number of observation 13 | # simulate model matrix 14 | xvec <- rnorm(n, 0, 1.5) 15 | x<-cbind(1, xvec, xvec^2,xvec^3) 16 | 17 | # true beta coefficients 18 | tb<-c(0, 50, -20, 10) 19 | 20 | # true phi 21 | tphi<-10000 22 | I<-diag(1,n,n) # identity matrix used for covariance matrix 23 | 24 | # simulate outcome for regression 25 | y<-t(rmvnorm(1, x%*%tb, tphi*I)) 26 | plot(xvec, y) 27 | # simulate many outcomes...used later for asymptotic evaluations 28 | y_list<-replicate(1000, t(rmvnorm(1, x%*%tb, tphi*I)),simplify = FALSE) 29 | 30 | ################################################################################ 31 | ###### 1 - Run Blocked Gibbs Sampler 32 | ################################################################################ 33 | 34 | # function for blocked gibbs sampler 35 | backfit_mcmc<-function(y, x, iter, burnin, trim){ 36 | # initialize gibbs 37 | xprimex_inv<-solve(t(x)%*%x) # calculate once for repeated use in sampler 38 | phi<-numeric(iter) # shell for phi 39 | p <- ncol(x) # number of beta parameters 40 | b<-matrix(nrow=iter, ncol = p) # shell for betas 41 | pred_y <- matrix(nrow=length(y), ncol=iter) 42 | 43 | phi[1]<-6 # initial phi value to start sampler 44 | b[1,] <- rnorm(p) # random initial values for betas 45 | 46 | # phi hyperparameters 47 | a<-.5 48 | g<-10000 49 | 50 | # beta hyperparameters 51 | # mu_0 <- 0 52 | # phi_0 <- 1000 53 | 54 | # gibbs sampling 55 | for(i in 2:iter ){ 56 | 57 | for(par in 1:p){ 58 | # compute residuals after applying other parameters 59 | x_sub <- x[,-par] 60 | b_sub <- b[i, -par] 61 | b_sub_prev <- b[i-1, -par] 62 | b_sub[is.na(b_sub)] <- b_sub_prev[is.na(b_sub)] 63 | 64 | r <- y - x_sub %*% t(t(b_sub)) 65 | 66 | # mean for this paramater 67 | x_j <- x[,par, drop=F] 68 | sum_xj_sq <- sum(x_j^2) 69 | sum_r_xj <- sum(as.vector(r)*as.vector(x_j)) 70 | 71 | b[i, par]<-rnorm(n = 1, mean = sum_r_xj/sum_xj_sq , sd = sqrt( phi[i-1]/sum_xj_sq ) ) 72 | } 73 | 74 | 75 | phi[i]<-rinvgamma(n = 1, 76 | shape = (n/2 + a), 77 | rate = .5*( t((y - x%*%t(t(b[i,])) ))%*%(y - x%*%t(t(b[i,])) ) ) + g) 78 | 79 | pred_y[,i] <- rmvnorm(1, x%*% t(t(b[i,])), phi[i]*I ) # posterior predictive draw. 80 | } 81 | 82 | # apply burnin and trimming 83 | keep_draws<-seq(burnin,iter,trim) 84 | phi<-phi[keep_draws] 85 | b<-b[keep_draws,] 86 | 87 | # format and output 88 | joint_post<-data.frame(b=b,phi=phi) 89 | colnames(joint_post)[1:(ncol(x))]<-paste0('B',0:(ncol(x)-1) ) 90 | 91 | joint_post_long<-gather(joint_post,keep_draws) %>% 92 | rename(param=keep_draws, draw=value) %>% 93 | mutate(iter=rep(keep_draws,ncol(joint_post))) 94 | 95 | return(list(joint_post_long, pred_y)) 96 | } 97 | 98 | # run gibbs sampler with specified parameters 99 | post_dist<-backfit_mcmc(y = y, x = x, iter = 10000, burnin = 5000, trim = 1) 100 | 101 | 102 | tt <- post_dist[[2]] # extract posterior predictive 103 | png(filename = 'OriginalData.png') 104 | plot(xvec, y, 105 | xlab = 'x', ylab='y', 106 | main='Simulated (x,y) Data', 107 | ylim=c(-2500,1500), pch=20) 108 | dev.off() 109 | 110 | png(filename = 'BayesianResults.png') 111 | plot(xvec, y, 112 | xlab = 'x', ylab='y', 113 | main='Posterior Mean Function fit using Backfitting MCMC', 114 | ylim=c(-2500,1500), pch=20) 115 | for(i in 5000:10000) lines(sort(xvec), sort(tt[,i]), col='gray') 116 | lines(sort(xvec), sort(rowMeans(tt[,9000:10000]) ), col='red', lwd=3) 117 | points(xvec, y, pch=20 ) 118 | legend('bottomright', 119 | legend = c('Posterior Predictive Mean', '5000 posterior predictive draws','Data'), 120 | col = c('red','gray','black'), pch = c(NA, 22, 20), bty='n', 121 | lty=c(1, NA, NA), lwd=c(1,5, NA) ) 122 | dev.off() -------------------------------------------------------------------------------- /BackfittingMCMC/OriginalData.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/BackfittingMCMC/OriginalData.png -------------------------------------------------------------------------------- /BayesianProbit/BayesProbitSim.r: -------------------------------------------------------------------------------- 1 | library(LaplacesDemon) 2 | library(truncnorm) 3 | library(mvtnorm) 4 | 5 | set.seed(1) 6 | 7 | n <- 2000 8 | 9 | d_list <- replicate(n = 100, expr = sim_dat(n=n), simplify = F) 10 | 11 | d <- d_list[[1]] 12 | 13 | 14 | gibbs_iter <- 200000 15 | burnin <- 10000 16 | trimm <- 1 17 | p <- 3 18 | 19 | beta <- matrix(NA, nrow = p, ncol = gibbs_iter) 20 | xm <- cbind(1, d$x1, d$x2) 21 | 22 | z <- rnorm(n = n, mean = 0, sd = 1) 23 | 24 | for(i in 1:gibbs_iter){ 25 | 26 | beta[, i] <- rcond_post_beta(nparams = p, xm=xm, y = z, psi = 1, 27 | beta_prior_mean = c(0,0,0), 28 | beta_prior_var = c(10,10,10) ) 29 | 30 | z[d$y==0] <- rtruncnorm(n = sum(d$y==0),a = -Inf, b = 0, mean = xm[d$y==0,] %*% beta[,i], sd = 1 ) 31 | z[d$y==1] <- rtruncnorm(n = sum(d$y==1),a = 0, b = Inf, mean = xm[d$y==1,] %*% beta[,i], sd = 1 ) 32 | 33 | 34 | } 35 | 36 | freqres <- summary(glm(data=d, formula = y ~ x1 + x2, family = binomial(link = 'probit'))) 37 | freqres$coefficients[,'Estimate'] 38 | 39 | post_draw_ind <- seq(burnin, gibbs_iter, trimm) 40 | par(mfrow=c(3,1)) 41 | plot(beta[1,post_draw_ind], type='l') 42 | abline(h=freqres$coefficients[1,'Estimate'], col='red', lwd=2) 43 | 44 | plot(beta[2,post_draw_ind], type='l') 45 | abline(h=freqres$coefficients[2,'Estimate'], col='red', lwd=2) 46 | 47 | plot(beta[3,post_draw_ind], type='l') 48 | abline(h=freqres$coefficients[3,'Estimate'], col='red', lwd=2) 49 | 50 | -------------------------------------------------------------------------------- /BayesianProbit/FunctionSourceCode.r: -------------------------------------------------------------------------------- 1 | rcond_post_beta <- function(nparams, y, xm, psi, 2 | beta_prior_mean, beta_prior_var){ 3 | 4 | mu_beta <- beta_prior_mean 5 | v_beta <- diag(beta_prior_var) 6 | v_beta_inv <- diag((1/beta_prior_var)) 7 | 8 | xtx <- t(xm)%*%xm 9 | 10 | post_cov <- solve( v_beta_inv + (1/psi)*xtx) 11 | post_mean <- post_cov %*% (v_beta_inv %*% mu_beta + (1/psi)*t(xm)%*%y ) 12 | 13 | draw <- rmvnorm(n = 1, mean = post_mean, sigma = post_cov) 14 | return(draw) 15 | } 16 | 17 | sim_dat <- function(n=10000){ 18 | x1 <- rbinom(n = n, size = 1, prob = .5) 19 | x2 <- rnorm(n = n, mean = 0, sd = 10) 20 | 21 | p <- invlogit(1 + -2*x1 + 1*x2) 22 | 23 | y <- rbinom(n = n, size = 1, prob = p) 24 | 25 | d <- data.frame(y=y, x1=x1, x2=x2) 26 | return(d) 27 | } 28 | -------------------------------------------------------------------------------- /BayesianSurvival/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/BayesianSurvival/.DS_Store -------------------------------------------------------------------------------- /BayesianSurvival/FunctionSourceCode.R: -------------------------------------------------------------------------------- 1 | log_post_beta <- function(beta, log_alpha, X, survt){ # shape 2 | alpha <- exp(log_alpha) 3 | mu <- X %*% beta 4 | lambda <- exp(-1*mu*alpha) 5 | 6 | lik <- sum(dweibull( survt, shape = alpha, scale = lambda, log=T)) 7 | pr <- sum(dnorm(x = beta, mean = 0, sd = 100, log = T)) 8 | return(lik + pr) 9 | } 10 | 11 | log_post_alpha <- function(log_alpha, beta, X, survt){ # scale 12 | alpha <- exp(log_alpha) 13 | 14 | mu <- X %*% beta 15 | lambda <- exp(-1*mu*alpha) 16 | 17 | lik <- sum(dweibull( survt, shape = alpha, scale = lambda, log=T)) 18 | pr <- dexp(x = alpha, rate = 1, log = T) 19 | return(lik + pr) 20 | } 21 | 22 | 23 | metrop_hastings<-function(x_0, iter=1, log_post_density, 24 | proposal_dist = function(x, prop_sigma){ 25 | MASS::mvrnorm(1, mu = x, Sigma = prop_sigma ) 26 | }, 27 | lower=-Inf, upper=Inf, prop_sigma, 28 | ... ){ 29 | for(i in 1:iter){ 30 | # draw from proposal distribution 31 | x_star <- proposal_dist(x_0, prop_sigma) 32 | 33 | # calculate ratio of conditional posterior densities 34 | r_num <- do.call(log_post_density, c(list(x_star), list(...)) ) 35 | r_denom <- do.call(log_post_density, c(list(x_0), list(...)) ) 36 | r <- exp(r_num - r_denom) 37 | rmin<-min(r,1) 38 | if(is.na(rmin)) browser() 39 | # accept / reject proposal 40 | if(rbinom(1,1,rmin)==1){ 41 | x_0<-x_star 42 | } 43 | } 44 | 45 | res<-list(x_0 = x_0, accept_prob = rmin ) 46 | return(res) 47 | } -------------------------------------------------------------------------------- /BayesianSurvival/StanWeibullAFT.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Weibull AFT Model in Stan" 3 | author: "Arman Oganisian" 4 | date: "3/9/2019" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE) 10 | library(rstan) 11 | library(survival) 12 | ``` 13 | 14 | ```{r simulate_data, } 15 | set.seed(1) 16 | 17 | n <- 1000 18 | A <- rbinom(n, 1, .5) 19 | 20 | X <- model.matrix(~ A) 21 | 22 | true_beta <- (1/2)*matrix(c(-1/3, 2), ncol=1) 23 | true_mu <- X %*% true_beta 24 | 25 | true_sigma <- 1 26 | 27 | true_alpha <- 1/true_sigma 28 | true_lambda <- exp(-1*true_mu*true_alpha) 29 | 30 | # simulate censoring and survival times 31 | survt = rweibull(n, shape=true_alpha, scale = true_lambda) 32 | cent = rweibull(n, shape=true_alpha, scale = true_lambda) 33 | 34 | ## observed data: 35 | #censoring indicator 36 | delta <- cent < survt 37 | survt[delta==1] <- cent[delta==1] # censor survival time. 38 | 39 | # count number of missing/censored survival times 40 | n_miss <- sum(delta) 41 | 42 | d_list <- list(N_m = n_miss, N_o = n - n_miss, P=2, # number of betas 43 | # data for censored subjects 44 | y_m=survt[delta==1], X_m=X[delta==1,], 45 | # data for uncensored subjects 46 | y_o=survt[delta==0], X_o=X[delta==0,]) 47 | ``` 48 | 49 | 50 | ```{stan specify_stan_mod, output.var="weibull_mod"} 51 | data { 52 | int P; // number of beta parameters 53 | 54 | // data for censored subjects 55 | int N_m; 56 | matrix[N_m,P] X_m; 57 | vector[N_m] y_m; 58 | 59 | // data for observed subjects 60 | int N_o; 61 | matrix[N_o,P] X_o; 62 | real y_o[N_o]; 63 | } 64 | 65 | parameters { 66 | vector[P] beta; 67 | real alpha; // Weibull Shape 68 | } 69 | 70 | transformed parameters{ 71 | // model Weibull rate as function of covariates 72 | vector[N_m] lambda_m; 73 | vector[N_o] lambda_o; 74 | 75 | // standard weibull AFT re-parameterization 76 | lambda_m = exp((X_m*beta)*alpha); 77 | lambda_o = exp((X_o*beta)*alpha); 78 | } 79 | 80 | model { 81 | beta ~ normal(0, 100); 82 | alpha ~ exponential(1); 83 | 84 | // evaluate likelihood for censored and uncensored subjects 85 | target += weibull_lpdf(y_o | alpha, lambda_o); 86 | target += weibull_lccdf(y_m | alpha, lambda_m); 87 | } 88 | 89 | 90 | // generate posterior quantities of interest 91 | generated quantities{ 92 | vector[1000] post_pred_trt; 93 | vector[1000] post_pred_pbo; 94 | real lambda_trt; 95 | real lambda_pbo; 96 | real hazard_ratio; 97 | 98 | // generate hazard ratio 99 | lambda_trt = exp((beta[1] + beta[2])*alpha ) ; 100 | lambda_pbo = exp((beta[1])*alpha ) ; 101 | 102 | hazard_ratio = exp(beta[2]*alpha ) ; 103 | 104 | // generate survival times (for plotting survival curves) 105 | for(i in 1:1000){ 106 | post_pred_trt[i] = weibull_rng(alpha, lambda_trt); 107 | post_pred_pbo[i] = weibull_rng(alpha, lambda_pbo); 108 | } 109 | } 110 | 111 | ``` 112 | 113 | ```{r run_stan_mod, } 114 | 115 | weibull_fit <- sampling(weibull_mod, 116 | data = d_list, 117 | chains = 1, iter=20000, warmup=15000, 118 | pars= c('hazard_ratio','post_pred_trt','post_pred_pbo')) 119 | 120 | post_draws<-extract(weibull_fit) 121 | ``` 122 | 123 | ```{r plot_hazard_ratio, } 124 | hist(post_draws$hazard_ratio, 125 | xlab='Hazard Ratio', main='Hazard Ratio Posterior Distribution') 126 | abline(v=.367, col='red') 127 | 128 | mean(post_draws$hazard_ratio) 129 | quantile(post_draws$hazard_ratio, probs = c(.025, .975)) 130 | ``` 131 | 132 | 133 | ```{r plot_survival,} 134 | library(survival) 135 | 136 | plot(survfit(Surv(survt, 1-delta) ~ A ), col=c('black','blue'), 137 | xlab='Time',ylab='Survival Probability') 138 | 139 | for(i in 1:5000){ 140 | trt_ecdf <- ecdf(post_draws$post_pred_trt[i,]) 141 | curve(1 - trt_ecdf(x), from = 0, to=4, add=T, col='gray') 142 | 143 | pbo_ecdf <- ecdf(post_draws$post_pred_pbo[i,]) 144 | curve(1 - pbo_ecdf(x), from = 0, to=4, add=T, col='lightblue') 145 | } 146 | 147 | lines(survfit(Surv(survt, 1-delta) ~ A ), col=c('black','blue'), add=T ) 148 | 149 | legend('topright', 150 | legend = c('KM Curve and Intervals (TRT)', 151 | 'Posterior Survival Draws (TRT)', 152 | 'KM Curve and Intervals (PBO)', 153 | 'Posterior Survival Draws (PBO)'), 154 | col=c('black','gray','blue','lightblue'), 155 | lty=c(1,0,1,0), pch=c(NA,15,NA,15), bty='n') 156 | ``` 157 | 158 | -------------------------------------------------------------------------------- /BayesianSurvival/weibull_nocovar.R: -------------------------------------------------------------------------------- 1 | # sources: 2 | # https://cran.r-project.org/web/packages/SurvRegCensCov/vignettes/weibull.pdf 3 | 4 | library(survival) 5 | library(truncdist) 6 | 7 | setwd("/Users/aoganisi/Box Sync/Research/Analyses/BNP_CE/code/") 8 | source("FunctionSourceCode.r") 9 | ################################################################################ 10 | ### 0 - Simulate Data 11 | ################################################################################ 12 | set.seed(1) 13 | 14 | n <- 1000 15 | A <- rbinom(n, 1, .5) 16 | 17 | X <- model.matrix(~ A) 18 | 19 | true_beta <- (1/2)*matrix(c(-1/3, 2), ncol=1) 20 | true_mu <- X %*% true_beta 21 | 22 | true_sigma <- 1 23 | 24 | true_alpha <- 1/true_sigma 25 | true_lambda <- exp(-1*true_mu*true_alpha) 26 | 27 | hist(rweibull(n, shape=true_alpha, scale = true_lambda), breaks=100) 28 | 29 | # simulate censoring and survival times 30 | survt = rweibull(n, shape=true_alpha, scale = true_lambda) 31 | cent = rweibull(n, shape=true_alpha, scale = true_lambda) 32 | 33 | ## observed data: 34 | #censoring indicator 35 | delta <- cent < survt 36 | survt[delta==1] <- cent[delta==1] # censor survival time. 37 | 38 | # survt_all will combine observed and imputed survival times. 39 | survt_all <- survt 40 | 41 | # count number of missing/censored survival times 42 | n_miss <- sum(delta) 43 | row_miss <- c(1:n)[delta] # index for which rows are censored 44 | 45 | ################################################################################ 46 | ### 1 - Run Augmented Sampler Accounting for Censoring 47 | ################################################################################ 48 | iter <- 10000 # number of gibbs iterations 49 | burnin <- 9000 # burn-in iterations 50 | 51 | # shells for storing parameters 52 | hazard_ratio <- numeric(iter - burnin) 53 | 54 | # initial values 55 | beta_shell <- matrix(c(0,0), ncol=1) 56 | lalpha_shell <- c(0) 57 | 58 | prop_covar <- diag(c(.01,.01)) 59 | 60 | # plot stratified Kaplan-Meier 61 | par(mfrow=c(1,1)) 62 | plot(survfit(Surv(survt, 1-delta) ~ A),conf.int = F, col=c('blue','red'), 63 | xlab=c('Time'),ylab='Survival Probability', 64 | main = 'Data augmentation with all subjects') 65 | 66 | for(i in 2:iter){ 67 | ## sample from posterior of parameters, 68 | ## conditional on observed and missing survival times 69 | 70 | # metrop_hastings() is a custom function for generating a draw 71 | # from conditional posterior of beta: log_post_beta 72 | beta_shell <- metrop_hastings(x_0 = beta_shell, 73 | iter = 1, 74 | log_post_density = log_post_beta, 75 | prop_sigma = prop_covar, 76 | X=X, survt=survt_all, 77 | log_alpha=lalpha_shell )$x_0 78 | 79 | # sample from conditional posterior of alpha: log_post_alpha 80 | lalpha_shell <- metrop_hastings(x_0 = lalpha_shell, 81 | iter = 1, 82 | log_post_density = log_post_alpha, 83 | prop_sigma = matrix(.001), 84 | X=X, survt=survt_all, 85 | beta=beta_shell)$x_0 86 | 87 | ## sample from conditional posterior of missing survival times 88 | mu_curr <- X %*% beta_shell 89 | alpha_curr <- exp(lalpha_shell) 90 | 91 | for(m in row_miss){ 92 | lambda_curr <- exp(-1*mu_curr[m]*alpha_curr) 93 | 94 | survt_all[m] <- rtrunc(1, spec = 'weibull', 95 | a = survt[m], 96 | shape = alpha_curr, 97 | scale = lambda_curr) 98 | } 99 | 100 | if(i>burnin){ 101 | # plot 500 posterior survival curve draws for treated and placebo 102 | mu_trt <- sum(beta_shell) 103 | mu_pbo <- beta_shell[1] 104 | 105 | post_draw <- rweibull(n, shape = alpha_curr, scale = exp(-1*mu_trt*alpha_curr) ) 106 | post_ecdf <- ecdf(post_draw) 107 | curve(1-post_ecdf(x), add=T, from=0, to=15, col='lightblue') 108 | 109 | post_draw <- rweibull(n, shape = alpha_curr, scale = exp(-1*mu_pbo*alpha_curr) ) 110 | post_ecdf <- ecdf(post_draw) 111 | curve(1-post_ecdf(x), add=T, from=0, to=15, col='lightgray') 112 | 113 | # store hazard ratio 114 | hazard_ratio[i-burnin] <- exp(-beta_shell[2]*alpha_curr) 115 | } 116 | 117 | } 118 | 119 | # overlay KM curve and plot legend 120 | lines(survfit(Surv(survt, 1-delta) ~ A),conf.int = T, col=c('black','blue')) 121 | legend('topright', 122 | legend = c('KM Curve and Intervals (TRT)', 123 | 'Posterior Survival Draws (TRT)', 124 | 'KM Curve and Intervals (PBO)', 125 | 'Posterior Survival Draws (PBO)'), 126 | col=c('black','gray','blue','lightblue'), 127 | lty=c(1,0,1,0), pch=c(NA,15,NA,15), bty='n') 128 | 129 | plot(hazard_ratio, type='l') 130 | abline(h=exp(-true_beta[2]*true_alpha), col='red') 131 | 132 | hist(hazard_ratio) 133 | abline(v=exp(-true_beta[2]*true_alpha), col='red') 134 | 135 | -------------------------------------------------------------------------------- /LogisticReg/.Rhistory: -------------------------------------------------------------------------------- 1 | #Author: Arman Oganisian 2 | library(LaplacesDemon) 3 | library(invgamma) 4 | library(MASS) 5 | library(profvis) 6 | ################################################################################ 7 | ### 0 - Simulate Data 8 | ################################################################################ 9 | set.seed(10) 10 | N<-1000 11 | d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T)) 12 | d$age_1<-ifelse(d$age_group==1,1,0) 13 | d$age_2<-ifelse(d$age_group==2,1,0) 14 | d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2)) 15 | d$y<-rbinom(n = N, size = 1, 16 | prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt)) 17 | X<-as.matrix(cbind(1,d[,2:4])) # model matrix 18 | Y<-matrix(d$y, ncol=1) # outcome vector 19 | p<-ncol(X) 20 | ################################################################################ 21 | ### 1 - functions to sample from conditional posterior distributions 22 | ################################################################################ 23 | # unnormalized log posterior of beta vector 24 | log_cond_post_beta<-function(beta, phi, lambda, X, Y){ 25 | # calculate likelihood 26 | lik<-0 27 | for(i in 1:length(Y)){ 28 | xb <- X[i,] %*% beta 29 | xb<-ifelse(xb>10, 10, ifelse(xb< (-10),-10, xb)) 30 | p_i<-invlogit(xb) 31 | lik<-lik + Y[i]*log(p_i) + (1 - Y[i])*log(1 - p_i) 32 | } 33 | # calculate prior 34 | pr <- -.5 * (1/phi)*( t(beta - lambda)%*%(beta - lambda) ) 35 | log_cond_post <- lik + pr 36 | return(log_cond_post) 37 | } 38 | # use Metropolis Hastings algorithm to sample from cond. post. of beta 39 | rcond_post_beta_mh<-function(beta_0, phi, lambda, X, Y, mh_trials,jump_v){ 40 | for(i in 1:mh_trials){ 41 | # draw from proposal distribution 42 | beta_c <- mvrnorm(1,beta_0,Sigma = jump_v*diag(p)) 43 | # calculate ratio of conditional posterior densities 44 | r_num <- log_cond_post_beta(beta_c, phi, lambda, X, Y ) 45 | r_denom <- log_cond_post_beta(beta_0, phi, lambda, X, Y ) 46 | r <- exp(r_num - r_denom) 47 | rmin<-min(r,1) 48 | # accept or reject proposal 49 | accept<-0 50 | if(rmin>=1){ 51 | beta_0<-beta_c 52 | accept<-1 53 | }else{ 54 | if(rbinom(1,1,rmin)==1){ 55 | beta_0<-beta_c 56 | accept<-1 57 | } 58 | } 59 | } 60 | return(c(new_beta=beta_0, accept=accept) ) 61 | } 62 | ################################################################################ 63 | ### 2 - Run Gibbs Sampler 64 | ################################################################################ 65 | ### Gibbs Sampler 66 | # true hyperparameter values for betas 67 | lambda<-c(0,0,0,0) 68 | phi<-10000 # 69 | # shell for storing results 70 | gibbs_iter<-2000 + 1 71 | gibbs_res<-matrix(nrow=gibbs_iter, ncol=p+2) 72 | # initialize 73 | gibbs_res[1,1:p]<-c(0,0,0,0) 74 | View(gibbs_res) 75 | gibbs_res[1,1:p]<-c(0,0,0,0) 76 | for(i in 2:gibbs_iter){ 77 | # sample from posterior of phi 78 | # sample from posterior of beta vector ( using MH ) 79 | mh_draw <- rcond_post_beta_mh(gibbs_res[i-1,1:p], phi, 80 | lambda, X, Y, mh_trials=1, jump_v=.01) 81 | # store results 82 | gibbs_res[i,1:p] <- mh_draw[1:p] 83 | gibbs_res[i,p+2] <- mh_draw[p+1] 84 | } 85 | ################################################################################ 86 | ### 3 - Plot Results 87 | ################################################################################ 88 | par(mfrow=c(2,2)) 89 | plot(gibbs_res[,1],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'), 90 | main='Intercept') 91 | abline(h=-1,col='red') 92 | plot(gibbs_res[,2],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'), 93 | main='Age1') 94 | abline(h=.7,col='red') 95 | plot(gibbs_res[,3],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'), 96 | main='Age2') 97 | abline(h=1.1,col='red') 98 | plot(gibbs_res[,4],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'), 99 | main='Treatment') 100 | abline(h=1.1,col='red') 101 | View(gibbs_res) 102 | View(gibbs_res) 103 | mean(gibbs_res[,6]) 104 | View(gibbs_res) 105 | mean(gibbs_res[,6], na.rm=T) 106 | -------------------------------------------------------------------------------- /LogisticReg/Logistic_GibbsMHcombo.r: -------------------------------------------------------------------------------- 1 | #Author: Arman Oganisian 2 | 3 | library(LaplacesDemon) 4 | library(invgamma) 5 | library(MASS) 6 | library(profvis) 7 | 8 | ################################################################################ 9 | ### 0 - Simulate Data 10 | ################################################################################ 11 | set.seed(10) 12 | N<-1000 13 | 14 | d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T)) 15 | d$age_1<-ifelse(d$age_group==1,1,0) 16 | d$age_2<-ifelse(d$age_group==2,1,0) 17 | 18 | d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2)) 19 | 20 | d$y<-rbinom(n = N, size = 1, 21 | prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt)) 22 | 23 | X<-as.matrix(cbind(1,d[,2:4])) # model matrix 24 | Y<-matrix(d$y, ncol=1) # outcome vector 25 | 26 | p<-ncol(X) 27 | 28 | ################################################################################ 29 | ### 1 - functions to sample from conditional posterior distributions 30 | ################################################################################ 31 | 32 | ### sample from conditional posterior of phi - conjugate! 33 | rcond_post_phi<-function(beta, alpha, gamma, lambda, p){ 34 | 35 | post_alpha<-alpha + p/2 36 | post_gamma<-gamma + .5*t(beta - lambda)%*%(beta - lambda) 37 | draw<-invgamma::rinvgamma(n = 1, shape = post_alpha, rate = post_gamma) 38 | 39 | return(draw) 40 | } 41 | 42 | # unnormalized log posterior of beta vector 43 | log_cond_post_beta<-function(beta, phi, lambda, X, Y){ 44 | # calculate likelihood 45 | lik<-0 46 | for(i in 1:length(Y)){ 47 | xb <- X[i,] %*% beta 48 | xb<-ifelse(xb>10, 10, ifelse(xb< (-10),-10, xb)) 49 | 50 | p_i<-invlogit(xb) 51 | 52 | lik<-lik + Y[i]*log(p_i) + (1 - Y[i])*log(1 - p_i) 53 | } 54 | 55 | # calculate prior 56 | pr <- -.5 * (1/phi)*( t(beta - lambda)%*%(beta - lambda) ) 57 | 58 | log_cond_post <- lik + pr 59 | return(log_cond_post) 60 | } 61 | 62 | # use Metropolis Hastings algorithm to sample from cond. post. of beta 63 | rcond_post_beta_mh<-function(beta_0, phi, lambda, X, Y, mh_trials,jump_v){ 64 | 65 | for(i in 1:mh_trials){ 66 | # draw from proposal distribution 67 | beta_c <- mvrnorm(1,beta_0,Sigma = jump_v*diag(p)) 68 | 69 | # calculate ratio of conditional posterior densities 70 | r_num <- log_cond_post_beta(beta_c, phi, lambda, X, Y ) 71 | r_denom <- log_cond_post_beta(beta_0, phi, lambda, X, Y ) 72 | r <- exp(r_num - r_denom) 73 | rmin<-min(r,1) 74 | 75 | # accept or reject proposal 76 | accept<-0 77 | if(rmin>=1){ 78 | beta_0<-beta_c 79 | accept<-1 80 | }else{ 81 | if(rbinom(1,1,rmin)==1){ 82 | beta_0<-beta_c 83 | accept<-1 84 | } 85 | } 86 | } 87 | 88 | return(c(new_beta=beta_0, accept=accept) ) 89 | } 90 | 91 | ################################################################################ 92 | ### 2 - Run Gibbs Sampler 93 | ################################################################################ 94 | 95 | ### Gibbs Sampler 96 | # true hyperparameter values for phi 97 | alpha<-5 98 | gamma<-2 99 | 100 | # true hyperparameter values for betas 101 | lambda<-c(0,0,0,0) 102 | phi<-10000 # initialize 103 | 104 | # shell for storing results 105 | gibbs_iter<-2000 + 1 106 | gibbs_res<-matrix(nrow=gibbs_iter, ncol=p+2) 107 | 108 | # initialize 109 | gibbs_res[1,1:p]<-c(0,0,0,0) 110 | 111 | profvis(expr = { 112 | for(i in 2:gibbs_iter){ 113 | # sample from posterior of phi 114 | gibbs_res[i,p+1] <- rcond_post_phi(gibbs_res[i-1,1:p], 115 | alpha, gamma, lambda, p) 116 | # sample from posterior of beta vector ( using MH ) 117 | mh_draw <- rcond_post_beta_mh(gibbs_res[i-1,1:p], gibbs_res[i,p+1], 118 | lambda, X, Y, mh_trials=5, jump_v=.01) 119 | 120 | # store results 121 | gibbs_res[i,1:p] <- mh_draw[1:p] 122 | gibbs_res[i,p+2] <- mh_draw[p+1] 123 | } 124 | }) 125 | 126 | ################################################################################ 127 | ### 3 - Plot Results 128 | ################################################################################ 129 | 130 | par(mfrow=c(2,2)) 131 | plot(gibbs_res[,1],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'), 132 | main='Intercept') 133 | abline(h=-1,col='red') 134 | plot(gibbs_res[,2],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'), 135 | main='Age1') 136 | abline(h=.7,col='red') 137 | plot(gibbs_res[,3],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'), 138 | main='Age2') 139 | abline(h=1.1,col='red') 140 | plot(gibbs_res[,4],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'), 141 | main='Treatment') 142 | abline(h=1.1,col='red') 143 | 144 | # calculate posterior means and credible intervals 145 | post_burn_trim<-gibbs_res[seq(1000,gibbs_iter,100),] 146 | colMeans(post_burn_trim) 147 | apply(post_burn_trim, 2, quantile, p=c(.025,.975)) 148 | -------------------------------------------------------------------------------- /LogisticReg/LogitMCMCChains.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/LogisticReg/LogitMCMCChains.png -------------------------------------------------------------------------------- /LogisticReg/profilevis1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/LogisticReg/profilevis1.png -------------------------------------------------------------------------------- /LogisticReg/profvis2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/LogisticReg/profvis2.png -------------------------------------------------------------------------------- /MH_with_caching/HelperFunctions.R: -------------------------------------------------------------------------------- 1 | ## Author: Arman Oganisian 2 | 3 | # unnormalized log posterior of beta vector 4 | log_cond_post_beta<-function(beta, phi, lambda, X, Y){ 5 | # calculate likelihood 6 | xb <- X %*% beta 7 | xb <- ifelse(xb>10, 10, ifelse(xb< (-10),-10, xb)) 8 | p <- invlogit(xb) 9 | 10 | lik<- dbinom(x = Y, size = 1, prob = p, log = T) 11 | 12 | # calculate prior 13 | pr <- dnorm(x = beta, mean = lambda, sd = sqrt(phi), log = T) 14 | 15 | log_cond_post <- lik + pr 16 | return(log_cond_post) 17 | } 18 | 19 | # use Metropolis Hastings algorithm to sample from cond. post. of beta 20 | mh_vanilla <- function(beta_0, phi, lambda, X, Y, mh_trials,jump_v, p){ 21 | 22 | accept <- 0 23 | post_draws <- matrix(data = NA, nrow = mh_trials, ncol = length(beta_0)) 24 | jump_cov <- jump_v*diag(p) 25 | 26 | for(i in 1:mh_trials){ 27 | # draw from proposal distribution 28 | beta_c <- mvrnorm(1,beta_0,Sigma = jump_cov) 29 | 30 | # calculate ratio of conditional posterior densities 31 | r_num <- log_cond_post_beta(beta_c, phi, lambda, X, Y ) 32 | r_denom <- log_cond_post_beta(beta_0, phi, lambda, X, Y ) 33 | r <- exp(r_num - r_denom) 34 | rmin<-min(r,1) 35 | 36 | # accept or reject proposal 37 | if(rmin>=1){ 38 | beta_0 <- beta_c 39 | accept <- accept+1 40 | }else if(rbinom(1,1,rmin)==1){ 41 | beta_0 <- beta_c 42 | accept <- accept+1 43 | } 44 | 45 | post_draws[i, ] <- beta_0 46 | } 47 | 48 | return(list(post_draws=post_draws, accept=accept/mh_trials) ) 49 | } 50 | 51 | mh_cache <- function(beta_0, phi, lambda, X, Y, mh_trials,jump_v, p){ 52 | 53 | accept <- 0 54 | post_draws <- matrix(data = NA, nrow = mh_trials, ncol = length(beta_0)) 55 | jump_cov <- jump_v*diag(p) 56 | 57 | eval_curr <- log_cond_post_beta(beta_0, phi, lambda, X, Y ) 58 | 59 | for(i in 1:mh_trials){ 60 | # draw from proposal distribution 61 | beta_c <- mvrnorm(n = 1, mu = beta_0, Sigma = jump_cov) 62 | 63 | # calculate ratio of conditional posterior densities 64 | eval_prop <- log_cond_post_beta(beta_c, phi, lambda, X, Y ) 65 | r <- exp(eval_prop - eval_curr) 66 | 67 | rmin<-min(r,1) 68 | 69 | # accept or reject proposal 70 | if(rmin>=1){ 71 | beta_0 <- beta_c 72 | accept <- accept+1 73 | eval_curr <- eval_prop 74 | }else if(rbinom(1,1,rmin)==1){ 75 | beta_0 <- beta_c 76 | accept <- accept+1 77 | eval_curr <- eval_prop 78 | } 79 | 80 | post_draws[i, ] <- beta_0 81 | } 82 | 83 | return(list(post_draws=post_draws, accept=accept/mh_trials) ) 84 | } -------------------------------------------------------------------------------- /MH_with_caching/LogisticMHcache.r: -------------------------------------------------------------------------------- 1 | #Author: Arman Oganisian 2 | 3 | library(microbenchmark) 4 | library(LaplacesDemon) 5 | library(MASS) 6 | source("HelperFunctions.R") 7 | 8 | set.seed(10) 9 | 10 | 11 | ################################################################################ 12 | ### 0 - Simulate Data 13 | ################################################################################ 14 | # hyper-parameters and true values 15 | lambda<-c(0,0,0,0) 16 | phi<-10 17 | true_beta <- matrix(c(0,2,1,-2),ncol=1) 18 | N<-50000 19 | 20 | # simulate covariates 21 | X1 <- rnorm(N) 22 | X2 <- rnorm(N) 23 | X3 <- rnorm(N) 24 | X <- model.matrix(~ X1 + X2 + X3) 25 | 26 | # simulate outcome 27 | Y <- rbinom(n = N, size = 1, prob = invlogit( X %*% true_beta ) ) 28 | 29 | ################################################################################ 30 | ### 1 - Run Benchmark 31 | ################################################################################ 32 | 33 | bench<-microbenchmark( 34 | # Run Vanialla Metropolis 35 | MH_vanilla = mh_vanilla(beta_0 = c(0,0,0,0), # initial value 36 | p=4, # number of parameters 37 | phi = phi, lambda = lambda, #hyperparameters 38 | X = X, Y = Y, # Data 39 | #iterations and proposal variance 40 | mh_trials=1000, jump_v=.2 ), 41 | # Run Metropolis with Cache 42 | MH_cache = mh_cache(beta_0 = c(0,0,0,0), 43 | phi = phi, lambda = lambda, X = X, Y = Y, 44 | mh_trials=1000, jump_v=.2, p=4), 45 | times = 10) 46 | bench 47 | 48 | ################################################################################ 49 | ### 2 - Plot Chains 50 | ################################################################################ 51 | 52 | ## could do a better job at proposal tuning...but not the point of this post. 53 | set.seed(1) 54 | MH_cache <- mh_cache(beta_0 = c(0,0,0,0), 55 | phi = 100, lambda = lambda, X = X, Y = Y, 56 | mh_trials=20000, jump_v=.02, p=4) 57 | 58 | # set.seed(1) 59 | # MH_vanilla <- mh_vanilla(beta_0 = c(0,0,0,0), 60 | # phi = 100, lambda = lambda, X = X, Y = Y, 61 | # mh_trials=2000, jump_v=.7, p=4) 62 | 63 | 64 | par(mfrow=c(2,2)) 65 | plot(MH_cache$post_draws[,1], type='l') 66 | abline(h=0, col='red') 67 | plot(MH_cache$post_draws[,2], type='l') 68 | abline(h=2, col='red') 69 | plot(MH_cache$post_draws[,3], type='l') 70 | abline(h=1, col='red') 71 | plot(MH_cache$post_draws[,4], type='l') 72 | abline(h=-2, col='red') 73 | -------------------------------------------------------------------------------- /MH_with_caching/traceplots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/MH_with_caching/traceplots.png -------------------------------------------------------------------------------- /MultipleLinearReg/multiplelinearreg.r: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | ###### 0 - Packages and Simulate Data 3 | ################################################################################ 4 | library(mvtnorm) 5 | library(invgamma) 6 | library(ggplot2) 7 | library(dplyr) 8 | library(tidyr) 9 | library(xtable) 10 | set.seed(200) 11 | 12 | n<-50 # number of observation 13 | # simulate model matrix 14 | x<-cbind(1, rnorm(n, 0, 1), rnorm(n, 5,10),rnorm(n, 100,10)) 15 | 16 | # true beta coefficients 17 | tb<-c(1000, 50, -50, 10) 18 | 19 | # true phi 20 | tphi<-10000 21 | I<-diag(1,n,n) # identity matrix used for covariance matrix 22 | 23 | # simulate outcome for regression 24 | y<-t(rmvnorm(1, x%*%tb, tphi*I)) 25 | 26 | # simulate many outcomes...used later for asymptotic evaluations 27 | y_list<-replicate(1000, t(rmvnorm(1, x%*%tb, tphi*I)),simplify = FALSE) 28 | 29 | ################################################################################ 30 | ###### 1 - Run Blocked Gibbs Sampler 31 | ################################################################################ 32 | 33 | # function for blocked gibbs sampler 34 | block_gibbs<-function(y, x, iter, burnin, trim){ 35 | # initialize gibbs 36 | xprimex_inv<-solve(t(x)%*%x) # calculate once for repeated use in sampler 37 | phi<-numeric(iter) # shell for phi 38 | b<-matrix(nrow=iter, ncol = 4) # shell for betas 39 | phi[1]<-6 # initial phi value to start sampler 40 | 41 | # phi hyperparameters 42 | a<-.5 43 | g<-10000 44 | 45 | # gibbs sampling 46 | for(i in 2:iter ){ 47 | b[i,]<-rmvnorm(n = 1, 48 | mean = ((xprimex_inv%*%t(x))%*%y), 49 | sigma = phi[i-1]*xprimex_inv ) 50 | 51 | phi[i]<-rinvgamma(n = 1, 52 | shape = (n/2 + a), 53 | rate = .5*( t((y - x%*%t(t(b[i,])) ))%*%(y - x%*%t(t(b[i,])) ) ) + g) 54 | } 55 | 56 | # apply burnin and trimming 57 | keep_draws<-seq(burnin,iter,trim) 58 | phi<-phi[keep_draws] 59 | b<-b[keep_draws,] 60 | 61 | # format and output 62 | joint_post<-data.frame(b=b,phi=phi) 63 | colnames(joint_post)[1:(ncol(x))]<-paste0('B',0:(ncol(x)-1) ) 64 | 65 | joint_post_long<-gather(joint_post,keep_draws) %>% 66 | rename(param=keep_draws, draw=value) %>% 67 | mutate(iter=rep(keep_draws,ncol(joint_post))) 68 | 69 | return(joint_post_long) 70 | } 71 | 72 | # run gibbs sampler with specified parameters 73 | post_dist<-block_gibbs(y = y, x = x, iter = 500000, burnin = 100000, trim = 50) 74 | 75 | ################################################################################ 76 | ###### 2 - Summarize and Visualize Posterior Distributions 77 | ################################################################################ 78 | 79 | # calculate posterior summary statistics (stats not used in rest of code) 80 | post_sum_stats<-post_dist %>% 81 | group_by(param) %>% 82 | summarise(median=median(draw), 83 | lwr=quantile(draw,.025), 84 | upr=quantile(draw,.975)) %>% 85 | mutate(true_vals=c(tb,tphi)) 86 | 87 | # merge on summary statistics 88 | post_dist <- post_dist %>% 89 | left_join(post_sum_stats, by='param') 90 | 91 | # plot MCMC Chains 92 | ggplot(post_dist,aes(x=iter,y=draw)) + 93 | geom_line() + 94 | geom_hline(aes(yintercept=true_vals, col='red'), show.legend=FALSE)+ 95 | facet_grid(param ~ .,scale='free_y',switch = 'y') + 96 | theme_bw() + 97 | xlab('Gibbs Sample Iteration') + ylab('MCMC Chains') + 98 | ggtitle('Gibbs Sampler MCMC Chains by Parameter') 99 | 100 | # plot Posterior Distributions 101 | ggplot(post_dist,aes(x=draw)) + 102 | geom_histogram(aes(x=draw),bins=50) + 103 | geom_vline(aes(xintercept = true_vals,col='red'), show.legend = FALSE) + 104 | facet_grid(. ~ param, scale='free_x',switch = 'y') + 105 | theme_bw() + 106 | xlab('Posterior Distributions') + ylab('Count') + 107 | ggtitle('Posterior Distributions of Parameters (true values in red)') 108 | 109 | ################################################################################ 110 | ###### 3 - Assess Bias and Coverage 111 | ################################################################################ 112 | 113 | # run the estimation 1000 times to get 1000 posterior medians and CIs 114 | bayes_res<-lapply(y_list, block_gibbs, x=x, iter=1000, burnin=100, trim=1) 115 | 116 | calc_sumstats<-function(post_dist){ 117 | post_sum_stats<-post_dist %>% 118 | group_by(param) %>% 119 | summarise(post_median=median(draw), 120 | lwr=quantile(draw,.025), 121 | upr=quantile(draw,.975)) %>% 122 | mutate(true_vals=c(tb,tphi)) 123 | return(post_sum_stats) 124 | } 125 | 126 | 127 | all_sum_stats<-lapply(bayes_res, calc_sumstats) 128 | all_sum_stats_stack<-bind_rows(all_sum_stats) %>% 129 | arrange(param) %>% 130 | rename(est=post_median) 131 | 132 | eval_sum <- all_sum_stats_stack %>% 133 | mutate(covered=ifelse(true_valslwr,1,0)) %>% 134 | group_by(param) %>% 135 | summarise(est_var=var(est), 136 | est_mean=mean(est), 137 | bias=mean(est-true_vals), 138 | true_val=mean(true_vals), 139 | coverage=mean(covered)) %>% 140 | mutate(perc_bias=(bias/true_val)*100) 141 | 142 | # format table 143 | eval_sum<-eval_sum[,c(1,5,3,2,4,7,6)] 144 | names(eval_sum)<-c('Parameter','True Value','Estimator Mean', 145 | 'Estimator Variance','Bias','Percent Bias (of truth)', 146 | 'Coverage of 95% CI') 147 | 148 | print.xtable(xtable(eval_sum, 149 | caption = 'Estimator Evaluation'), 150 | caption.placement = 'top', 151 | include.rownames = F) 152 | -------------------------------------------------------------------------------- /PartialPooling/.Rhistory: -------------------------------------------------------------------------------- 1 | beta_prior_var*Imat 2 | Imat 3 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 4 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 5 | interv_var = 'trt', interv_val = 1) 6 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 7 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 8 | interv_var = 'trt', interv_val = 1) 9 | Imat 10 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 11 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 12 | interv_var = 'trt', interv_val = 1) 13 | DPglm_res$param_shell[[i]]$param_z[, class, drop=F] 14 | xz_vec 15 | xz_vec %*% DPglm_res$param_shell[[i]]$param_z[, class, drop=F] 16 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 17 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 18 | interv_var = 'trt', interv_val = 1) 19 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 20 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 21 | interv_var = 'trt', interv_val = 1) 22 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 23 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 24 | interv_var = 'trt', interv_val = 1) 25 | xz_vec 26 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 27 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 28 | interv_var = 'trt', interv_val = 1) 29 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 30 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 31 | interv_var = 'trt', interv_val = 1) 32 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 33 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 34 | interv_var = 'trt', interv_val = 1) 35 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 36 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 37 | interv_var = 'trt', interv_val = 1) 38 | gamma_new 39 | t(gamma_new) 40 | beta_new 41 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 42 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 43 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 44 | interv_var = 'trt', interv_val = 1) 45 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 46 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 47 | interv_var = 'trt', interv_val = 1) 48 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 49 | set.seed(1) 50 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 51 | interv_var = 'trt', interv_val = 1) 52 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 53 | set.seed(1) 54 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 55 | interv_var = 'trt', interv_val = 1) 56 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 57 | set.seed(1) 58 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 59 | interv_var = 'trt', interv_val = 1) 60 | DPglm_res$param_shell[[i]]$param_z[, class, drop=F] 61 | class 62 | DPglm_res$param_shell[[i]]$param_z 63 | pvec 64 | table(c_shell[,1]) 65 | DPglm_res$param_shell[[1]]$param_z 66 | DPglm_res$param_shell[[2]]$param_z 67 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 68 | set.seed(1) 69 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 70 | interv_var = 'trt', interv_val = 1) 71 | warnings() 72 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 73 | set.seed(1) 74 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 75 | interv_var = 'trt', interv_val = 1) 76 | x_vec 77 | interv_var 78 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 79 | set.seed(1) 80 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 81 | interv_var = 'trt', interv_val = 1) 82 | xz_vec 83 | x_vec 84 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 85 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 86 | set.seed(1) 87 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 88 | interv_var = 'trt', interv_val = 1) 89 | x_vec 90 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 91 | set.seed(1) 92 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 93 | interv_var = 'trt', interv_val = 1) 94 | x_vec 95 | 2:(nparams-1) 96 | nparams 97 | x_vec 98 | nparams 99 | nparams-1 100 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 101 | set.seed(1) 102 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 103 | interv_var = 'trt', interv_val = 1) 104 | x_vec 105 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 106 | set.seed(1) 107 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 108 | interv_var = 'trt', interv_val = 1) 109 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 110 | set.seed(1) 111 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 112 | interv_var = 'trt', interv_val = 1) 113 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 114 | set.seed(1) 115 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 116 | interv_var = 'trt', interv_val = 1) 117 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R') 118 | set.seed(1) 119 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000, 120 | interv_var = 'trt', interv_val = 1) 121 | par(mfrow=c(3,1)) 122 | hist(EY_1,breaks=60) 123 | abline(v=EY_1_true, lwd=2, col='red') 124 | par(mfrow=c(1,1)) 125 | hist(EY_1,breaks=60) 126 | plot(EY_1, type='l') 127 | EY_1_true <- true_standard(interv_var = 'trt', interv_val = 1, 128 | iter=10000000, covar_names = c('age', 'trt')) 129 | EY_0_true <- true_standard(interv_var = 'trt', interv_val = 0, 130 | iter=10000000, covar_names = c('age', 'trt')) 131 | delta <- EY_1_true - EY_0_true 132 | delta 133 | abline(h = EY_1_true, col='red', lwd=3) 134 | library(nimble) 135 | setwd("/Users/aoganisi/Dropbox/Stable Markets/BayesianTutorials/PartialPooling") 136 | ################################################################################ 137 | ##### Simulate Data ##### 138 | ################################################################################ 139 | set.seed(10) 140 | d_A<-rbinom(n = 5, size = 1, prob = .2) 141 | d_B<-rbinom(n = 100, size = 1, prob = .3) 142 | d_C<-rbinom(n = 5, size = 1, prob = .45) 143 | d_D<-rbinom(n = 100, size = 1, prob = .7) 144 | d_E<-rbinom(n = 5, size = 1, prob = .8) 145 | approve <- c(d_A, d_B, d_C, d_D, d_E) 146 | industry <- as.factor(c(rep('A', 5), rep('B', 100), rep('C', 5), 147 | rep('D', 100), rep('E', 5) )) 148 | mod_mat <- model.matrix(lm(approve ~ industry)) 149 | d_list <- list(X = mod_mat, 150 | approve = approve) 151 | p <- ncol(mod_mat) 152 | n <- nrow(mod_mat) 153 | ################################################################################ 154 | ##### Bayesian Esimtate ##### 155 | ################################################################################ 156 | code <- nimbleCode({ 157 | for(i in 1:p){ 158 | beta[i] ~ dnorm(0, 2) 159 | } 160 | logit(eta[1:n]) <- X[1:n,1:p] %*% beta[1:p] 161 | for(i in 1:n) { 162 | approve[i] ~ dbern(prob = eta[i] ) 163 | } 164 | p_approve[1] <- beta[1] 165 | p_approve[2] <- beta[1] + beta[2] 166 | p_approve[3] <- beta[1] + beta[3] 167 | p_approve[4] <- beta[1] + beta[4] 168 | p_approve[5] <- beta[1] + beta[5] 169 | }) 170 | merge_model <- nimbleModel(code=code, 171 | constants=list(p=p, n=n), 172 | inits = list(beta=c(0,0,0,0,0)), 173 | data=d_list) 174 | spec <- configureMCMC(merge_model) 175 | spec$addSampler(type = 'RW_block', target ='beta') 176 | spec$monitors <- c('p_approve') 177 | mcmc <- buildMCMC(spec) 178 | Cmodel <- compileNimble(merge_model) 179 | Cmcmc <- compileNimble(mcmc, project = merge_model) 180 | Cmcmc$run(10000) 181 | samples <- as.matrix(Cmcmc$mvSamples) 182 | summary(samples) 183 | ### no pooling 184 | no_pool_res <- glm(approve ~ industry, family = binomial(link = 'logit')) 185 | vcov(no_pool_res) 186 | ?run 187 | compiled_mcmc 188 | compiled_mcmc 189 | compiled_mcmc <- compileNimble(mcmc, project = merge_model) 190 | compiled_mcmc 191 | compiled_mcmc$trace 192 | compiled_mcmc$thin 193 | spec <- configureMCMC(merge_model) 194 | spec$controlDefaults 195 | summary(compiled_mcmc) 196 | summary(compiled_mcmc$run(1000)) 197 | compiled_mcmc$mvSamples2 198 | mcmc <- buildMCMC(spec,burnin=1000) 199 | compiled_model <- compileNimble(merge_model) 200 | compiled_mcmc <- compileNimble(mcmc, project = merge_model) 201 | compiled_mcmc$run(10000) 202 | samples <- as.matrix(compiled_mcmc$mvSamples) 203 | summary(samples) 204 | ?buildMCMC 205 | ?configureMCMC 206 | merge_model <- nimbleModel(code=code, 207 | constants=list(p=p, n=n), 208 | inits = list(beta=c(0,0,0,0,0)), 209 | data=d_list) 210 | spec <- configureMCMC(merge_model) 211 | spec$set 212 | spec$addSampler(type = 'RW_block', target ='beta', 213 | control = list(targetNodes='beta', 214 | adaptive = TRUE )) 215 | spec$monitors <- c('p_approve') 216 | mcmc <- buildMCMC(spec,) 217 | compiled_model <- compileNimble(merge_model) 218 | compiled_mcmc <- compileNimble(mcmc, project = merge_model) 219 | compiled_mcmc$run(10000) 220 | samples <- as.matrix(compiled_mcmc$mvSamples) 221 | summary(samples) 222 | merge_model <- nimbleModel(code=code, 223 | constants=list(p=p, n=n), 224 | inits = list(beta=c(0,0,0,0,0)), 225 | data=d_list) 226 | spec <- configureMCMC(merge_model) 227 | spec$set 228 | spec$addSampler(type = 'RW_block', target ='beta', 229 | control = list(targetNodes='beta', 230 | adaptive = TRUE )) 231 | spec$monitors <- c('p_approve') 232 | mcmc <- buildMCMC(spec) 233 | compiled_model <- compileNimble(merge_model) 234 | samples <- as.matrix(compiled_mcmc$mvSamples) 235 | summary(samples) 236 | code <- nimbleCode({ 237 | for(i in 1:p){ 238 | beta[i] ~ dnorm(0, 2) 239 | } 240 | logit(eta[1:n]) <- X[1:n,1:p] %*% beta[1:p] 241 | for(i in 1:n) { 242 | approve[i] ~ dbern(prob = eta[i] ) 243 | } 244 | p_approve[1] <- expit(beta[1]) 245 | p_approve[2] <- expit(beta[1] + beta[2]) 246 | p_approve[3] <- expit(beta[1] + beta[3]) 247 | p_approve[4] <- expit(beta[1] + beta[4]) 248 | p_approve[5] <- expit(beta[1] + beta[5]) 249 | }) 250 | merge_model <- nimbleModel(code=code, 251 | constants=list(p=p, n=n), 252 | inits = list(beta=c(0,0,0,0,0)), 253 | data=d_list) 254 | spec <- configureMCMC(merge_model) 255 | spec$addSampler(type = 'RW_block', target ='beta', 256 | control = list(targetNodes='beta', 257 | adaptive = TRUE )) 258 | spec$monitors <- c('p_approve') 259 | mcmc <- buildMCMC(spec) 260 | compiled_model <- compileNimble(merge_model) 261 | compiled_mcmc <- compileNimble(mcmc, project = merge_model) 262 | compiled_mcmc$run(10000) 263 | samples <- as.matrix(compiled_mcmc$mvSamples) 264 | summary(samples) 265 | partial_pool_p <- colMeans(samples) 266 | ### no pooling 267 | no_pool_res <- glm(approve ~ industry, family = binomial(link = 'logit')) 268 | # compute probabilities of merger for each industry separately. 269 | no_pooled_p <- predict(no_pool_res, 270 | newdata = data.frame(industry=as.factor(c('A','B','C','D','E') )), 271 | type = 'response') 272 | ### complete pooling 273 | pool_res <- glm(approve ~ 1, family = binomial(link = 'logit')) 274 | # compute probability of merger across all idustries, pooled. 275 | pooled_p <- exp(pool_res$coefficients)/(1 + exp(pool_res$coefficients)) 276 | plot(partial_pool_p, pch=20, col='red', ylim=c(0,1), axes=F, 277 | xlab='Industry', ylab='Probability of Merger') 278 | axis(1, at = 1:5, labels = paste0(partial_pool_p$industry, " (n =",c(5,10,20,10,5),')' ) ) 279 | axis(2, at = seq(0,1,.2), labels= seq(0,1,.2) ) 280 | points(1:5, no_pooled_p, pch=20, col='black') 281 | abline(h=pooled_p, lty=2) 282 | legend('bottomright', 283 | legend = c('Pooled Estimate','Stratified Estimates', 'Bayesian Estimate'), 284 | lty = c(2,NA,NA), col=c('black','black','red'), pch=c(NA, 20,20), bty='n') 285 | plot(partial_pool_p, pch=20, col='red', ylim=c(0,1), axes=F, 286 | xlab='Industry', ylab='Probability of Merger') 287 | axis(1, at = 1:5, labels = paste0(unique(industry), " (n =",c(5,10,20,10,5),')' ) ) 288 | axis(2, at = seq(0,1,.2), labels= seq(0,1,.2) ) 289 | points(1:5, no_pooled_p, pch=20, col='black') 290 | abline(h=pooled_p, lty=2) 291 | legend('bottomright', 292 | legend = c('Pooled Estimate','Stratified Estimates', 'Bayesian Estimate'), 293 | lty = c(2,NA,NA), col=c('black','black','red'), pch=c(NA, 20,20), bty='n') 294 | plot(partial_pool_p, pch=20, col='red', ylim=c(0,1), axes=F, 295 | xlab='Industry', ylab='Probability of Merger') 296 | axis(1, at = 1:5, labels = paste0(unique(industry), " (n =",c(5,100,5,100,5),')' ) ) 297 | axis(2, at = seq(0,1,.2), labels= seq(0,1,.2) ) 298 | points(1:5, no_pooled_p, pch=20, col='black') 299 | abline(h=pooled_p, lty=2) 300 | legend('bottomright', 301 | legend = c('Pooled Estimate','Stratified Estimates', 'Bayesian Estimate'), 302 | lty = c(2,NA,NA), col=c('black','black','red'), pch=c(NA, 20,20), bty='n') 303 | code <- nimbleCode({ 304 | for(i in 1:p){ 305 | beta[i] ~ dnorm(0, 3) 306 | } 307 | logit(eta[1:n]) <- X[1:n,1:p] %*% beta[1:p] 308 | for(i in 1:n) { 309 | approve[i] ~ dbern(prob = eta[i] ) 310 | } 311 | p_approve[1] <- expit(beta[1]) 312 | p_approve[2] <- expit(beta[1] + beta[2]) 313 | p_approve[3] <- expit(beta[1] + beta[3]) 314 | p_approve[4] <- expit(beta[1] + beta[4]) 315 | p_approve[5] <- expit(beta[1] + beta[5]) 316 | }) 317 | merge_model <- nimbleModel(code=code, 318 | constants=list(p=p, n=n), 319 | inits = list(beta=c(0,0,0,0,0)), 320 | data=d_list) 321 | spec <- configureMCMC(merge_model) 322 | spec$addSampler(type = 'RW_block', target ='beta', 323 | control = list(targetNodes='beta', 324 | adaptive = TRUE )) 325 | spec$monitors <- c('p_approve') 326 | mcmc <- buildMCMC(spec) 327 | compiled_model <- compileNimble(merge_model) 328 | compiled_mcmc <- compileNimble(mcmc, project = merge_model) 329 | compiled_mcmc$run(10000) 330 | samples <- as.matrix(compiled_mcmc$mvSamples) 331 | summary(samples) 332 | partial_pool_p <- colMeans(samples) 333 | ################################################################################ 334 | ##### Frequentist Estimates ##### 335 | ################################################################################ 336 | ### no pooling 337 | no_pool_res <- glm(approve ~ industry, family = binomial(link = 'logit')) 338 | # compute probabilities of merger for each industry separately. 339 | no_pooled_p <- predict(no_pool_res, 340 | newdata = data.frame(industry=as.factor(c('A','B','C','D','E') )), 341 | type = 'response') 342 | ### complete pooling 343 | pool_res <- glm(approve ~ 1, family = binomial(link = 'logit')) 344 | # compute probability of merger across all idustries, pooled. 345 | pooled_p <- exp(pool_res$coefficients)/(1 + exp(pool_res$coefficients)) 346 | ################################################################################ 347 | ##### Visualize Results ##### 348 | ################################################################################ 349 | plot(partial_pool_p, pch=20, col='red', ylim=c(0,1), axes=F, 350 | xlab='Industry', ylab='Probability of Merger') 351 | axis(1, at = 1:5, labels = paste0(unique(industry), " (n =",c(5,100,5,100,5),')' ) ) 352 | axis(2, at = seq(0,1,.2), labels= seq(0,1,.2) ) 353 | points(1:5, no_pooled_p, pch=20, col='black') 354 | abline(h=pooled_p, lty=2) 355 | legend('bottomright', 356 | legend = c('Pooled Estimate','Stratified Estimates', 'Bayesian Estimate'), 357 | lty = c(2,NA,NA), col=c('black','black','red'), pch=c(NA, 20,20), bty='n') 358 | partial_pool_p <- colMeans(samples[5000:10000,]) 359 | ################################################################################ 360 | ##### Frequentist Estimates ##### 361 | ################################################################################ 362 | ### no pooling 363 | no_pool_res <- glm(approve ~ industry, family = binomial(link = 'logit')) 364 | # compute probabilities of merger for each industry separately. 365 | no_pooled_p <- predict(no_pool_res, 366 | newdata = data.frame(industry=as.factor(c('A','B','C','D','E') )), 367 | type = 'response') 368 | ### complete pooling 369 | pool_res <- glm(approve ~ 1, family = binomial(link = 'logit')) 370 | # compute probability of merger across all idustries, pooled. 371 | pooled_p <- exp(pool_res$coefficients)/(1 + exp(pool_res$coefficients)) 372 | ################################################################################ 373 | ##### Visualize Results ##### 374 | ################################################################################ 375 | plot(partial_pool_p, pch=20, col='red', ylim=c(0,1), axes=F, 376 | xlab='Industry', ylab='Probability of Merger') 377 | axis(1, at = 1:5, labels = paste0(unique(industry), " (n =",c(5,100,5,100,5),')' ) ) 378 | axis(2, at = seq(0,1,.2), labels= seq(0,1,.2) ) 379 | points(1:5, no_pooled_p, pch=20, col='black') 380 | abline(h=pooled_p, lty=2) 381 | legend('bottomright', 382 | legend = c('Pooled Estimate','Stratified Estimates', 'Bayesian Estimate'), 383 | lty = c(2,NA,NA), col=c('black','black','red'), pch=c(NA, 20,20), bty='n') 384 | library(nimble) 385 | setwd("/Users/aoganisi/Dropbox/Stable Markets/BayesianTutorials/PartialPooling") 386 | ################################################################################ 387 | ##### Simulate Data ##### 388 | ################################################################################ 389 | set.seed(10) 390 | d_A<-rbinom(n = 10, size = 1, prob = .2) 391 | d_B<-rbinom(n = 100, size = 1, prob = .3) 392 | d_C<-rbinom(n = 10, size = 1, prob = .45) 393 | d_D<-rbinom(n = 100, size = 1, prob = .7) 394 | d_E<-rbinom(n = 10, size = 1, prob = .8) 395 | approve <- c(d_A, d_B, d_C, d_D, d_E) 396 | industry <- as.factor(c(rep('A', 10), rep('B', 100), rep('C', 10), 397 | rep('D', 100), rep('E', 10) )) 398 | mod_mat <- model.matrix(lm(approve ~ industry)) 399 | d_list <- list(X = mod_mat, 400 | approve = approve) 401 | p <- ncol(mod_mat) 402 | n <- nrow(mod_mat) 403 | ################################################################################ 404 | ##### Bayesian Esimtate ##### 405 | ################################################################################ 406 | code <- nimbleCode({ 407 | for(i in 1:p){ 408 | beta[i] ~ dnorm(0, 3) 409 | } 410 | logit(eta[1:n]) <- X[1:n,1:p] %*% beta[1:p] 411 | for(i in 1:n) { 412 | approve[i] ~ dbern(prob = eta[i] ) 413 | } 414 | p_approve[1] <- expit(beta[1]) 415 | p_approve[2] <- expit(beta[1] + beta[2]) 416 | p_approve[3] <- expit(beta[1] + beta[3]) 417 | p_approve[4] <- expit(beta[1] + beta[4]) 418 | p_approve[5] <- expit(beta[1] + beta[5]) 419 | }) 420 | merge_model <- nimbleModel(code=code, 421 | constants=list(p=p, n=n), 422 | inits = list(beta=c(0,0,0,0,0)), 423 | data=d_list) 424 | spec <- configureMCMC(merge_model) 425 | spec$addSampler(type = 'RW_block', target ='beta', 426 | control = list(targetNodes='beta', 427 | adaptive = TRUE )) 428 | spec$monitors <- c('p_approve') 429 | mcmc <- buildMCMC(spec) 430 | compiled_model <- compileNimble(merge_model) 431 | compiled_mcmc <- compileNimble(mcmc, project = merge_model) 432 | compiled_mcmc$run(10000) 433 | samples <- as.matrix(compiled_mcmc$mvSamples) 434 | summary(samples) 435 | partial_pool_p <- colMeans(samples[5000:10000,]) 436 | ################################################################################ 437 | ##### Frequentist Estimates ##### 438 | ################################################################################ 439 | ### no pooling 440 | no_pool_res <- glm(approve ~ industry, family = binomial(link = 'logit')) 441 | # compute probabilities of merger for each industry separately. 442 | no_pooled_p <- predict(no_pool_res, 443 | newdata = data.frame(industry=as.factor(c('A','B','C','D','E') )), 444 | type = 'response') 445 | ### complete pooling 446 | pool_res <- glm(approve ~ 1, family = binomial(link = 'logit')) 447 | # compute probability of merger across all idustries, pooled. 448 | pooled_p <- exp(pool_res$coefficients)/(1 + exp(pool_res$coefficients)) 449 | ################################################################################ 450 | ##### Visualize Results ##### 451 | ################################################################################ 452 | plot(partial_pool_p, pch=20, col='red', ylim=c(0,1), axes=F, 453 | xlab='Industry', ylab='Probability of Merger') 454 | axis(1, at = 1:5, labels = paste0(unique(industry), " (n =",c(5,100,5,100,5),')' ) ) 455 | axis(2, at = seq(0,1,.2), labels= seq(0,1,.2) ) 456 | points(1:5, no_pooled_p, pch=20, col='black') 457 | abline(h=pooled_p, lty=2) 458 | legend('bottomright', 459 | legend = c('Pooled Estimate','Stratified Estimates', 'Bayesian Estimate'), 460 | lty = c(2,NA,NA), col=c('black','black','red'), pch=c(NA, 20,20), bty='n') 461 | d_A 462 | d_B 463 | d_C 464 | d_E 465 | sum(d_E) 466 | getwd() 467 | png(filename = 'PartialPool.png') 468 | plot(partial_pool_p, pch=20, col='red', ylim=c(0,1), axes=F, 469 | xlab='Industry', ylab='Probability of Merger') 470 | axis(1, at = 1:5, labels = paste0(unique(industry), " (n =",c(10,100,10,100,10),')' ) ) 471 | axis(2, at = seq(0,1,.2), labels= seq(0,1,.2) ) 472 | points(1:5, no_pooled_p, pch=20, col='black') 473 | abline(h=pooled_p, lty=2) 474 | legend('bottomright', 475 | legend = c('Pooled Estimate','Stratified Estimates', 'Bayesian Estimate'), 476 | lty = c(2,NA,NA), col=c('black','black','red'), pch=c(NA, 20,20), bty='n') 477 | dev.off() 478 | knitr::opts_chunk$set(echo = TRUE) 479 | head(data.frame(approve, industry)) 480 | data.frame(approve, industry)[5:15,] 481 | data.frame(approve, industry)[5:20,] 482 | data.frame(approve, industry)[5:30,] 483 | data.frame(approve, industry)[1:30,] 484 | data.frame(approve, industry)[30:40,] 485 | data.frame(approve, industry)[30:50,] 486 | data.frame(approve, industry)[110:120,] 487 | data.frame(approve, industry)[105:120,] 488 | library(dplyr) 489 | library(dplyr) 490 | library(tidyr) 491 | # glimpse of the data 492 | data.frame(approve, industry)[105:120,] 493 | data.frame(approve,industry) %>% 494 | group_by(industry) %>% 495 | summarise(mean(approve)) 496 | data.frame(approve,industry) %>% 497 | group_by(industry) %>% 498 | summarise(mean(approve), n()) 499 | data.frame(approve,industry) %>% 500 | group_by(industry) %>% 501 | summarise(mean(approve), n_mergers_started=n()) 502 | data.frame(approve,industry) %>% 503 | group_by(industry) %>% 504 | summarise(succes_rate=mean(approve), n_mergers_started=n()) 505 | mean(approve) 506 | knitr::opts_chunk$set(echo = T, warning = F, error = F, message = F) 507 | unpooled <- data.frame(approve,industry) %>% 508 | group_by(industry) %>% 509 | summarise(success_rate=mean(approve), n_mergers_started=n()) 510 | unpooled 511 | sessionInfo() 512 | getwd() 513 | -------------------------------------------------------------------------------- /PartialPooling/PartialPool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/PartialPooling/PartialPool.png -------------------------------------------------------------------------------- /PartialPooling/PartialPool.r: -------------------------------------------------------------------------------- 1 | library(nimble) 2 | setwd("/Users/aoganisi/Dropbox/Stable Markets/BayesianTutorials/PartialPooling") 3 | 4 | ################################################################################ 5 | ##### Simulate Data ##### 6 | ################################################################################ 7 | set.seed(10) 8 | 9 | d_A<-rbinom(n = 10, size = 1, prob = .2) 10 | d_B<-rbinom(n = 100, size = 1, prob = .3) 11 | d_C<-rbinom(n = 10, size = 1, prob = .45) 12 | d_D<-rbinom(n = 100, size = 1, prob = .7) 13 | d_E<-rbinom(n = 10, size = 1, prob = .8) 14 | 15 | approve <- c(d_A, d_B, d_C, d_D, d_E) 16 | industry <- as.factor(c(rep('A', 10), rep('B', 100), rep('C', 10), 17 | rep('D', 100), rep('E', 10) )) 18 | 19 | mod_mat <- model.matrix(lm(approve ~ industry)) 20 | 21 | d_list <- list(X = mod_mat, 22 | approve = approve) 23 | 24 | p <- ncol(mod_mat) 25 | n <- nrow(mod_mat) 26 | 27 | ################################################################################ 28 | ##### Bayesian Esimtate ##### 29 | ################################################################################ 30 | 31 | code <- nimbleCode({ 32 | 33 | for(i in 1:p){ 34 | beta[i] ~ dnorm(0, 3) 35 | } 36 | 37 | 38 | logit(eta[1:n]) <- X[1:n,1:p] %*% beta[1:p] 39 | 40 | 41 | for(i in 1:n) { 42 | approve[i] ~ dbern(prob = eta[i] ) 43 | } 44 | 45 | p_approve[1] <- expit(beta[1]) 46 | p_approve[2] <- expit(beta[1] + beta[2]) 47 | p_approve[3] <- expit(beta[1] + beta[3]) 48 | p_approve[4] <- expit(beta[1] + beta[4]) 49 | p_approve[5] <- expit(beta[1] + beta[5]) 50 | 51 | }) 52 | 53 | merge_model <- nimbleModel(code=code, 54 | constants=list(p=p, n=n), 55 | inits = list(beta=c(0,0,0,0,0)), 56 | data=d_list) 57 | 58 | spec <- configureMCMC(merge_model) 59 | spec$addSampler(type = 'RW_block', target ='beta', 60 | control = list(targetNodes='beta', 61 | adaptive = TRUE )) 62 | spec$monitors <- c('p_approve') 63 | 64 | 65 | 66 | mcmc <- buildMCMC(spec) 67 | 68 | compiled_model <- compileNimble(merge_model) 69 | compiled_mcmc <- compileNimble(mcmc, project = merge_model) 70 | 71 | compiled_mcmc$run(10000) 72 | 73 | samples <- as.matrix(compiled_mcmc$mvSamples) 74 | summary(samples) 75 | 76 | partial_pool_p <- colMeans(samples[5000:10000,]) 77 | 78 | ################################################################################ 79 | ##### Frequentist Estimates ##### 80 | ################################################################################ 81 | 82 | ### no pooling 83 | no_pool_res <- glm(approve ~ industry, family = binomial(link = 'logit')) 84 | 85 | # compute probabilities of merger for each industry separately. 86 | no_pooled_p <- predict(no_pool_res, 87 | newdata = data.frame(industry=as.factor(c('A','B','C','D','E') )), 88 | type = 'response') 89 | 90 | ### complete pooling 91 | pool_res <- glm(approve ~ 1, family = binomial(link = 'logit')) 92 | 93 | # compute probability of merger across all idustries, pooled. 94 | pooled_p <- exp(pool_res$coefficients)/(1 + exp(pool_res$coefficients)) 95 | 96 | 97 | ################################################################################ 98 | ##### Visualize Results ##### 99 | ################################################################################ 100 | 101 | png(filename = 'PartialPool.png') 102 | plot(partial_pool_p, pch=20, col='red', ylim=c(0,1), axes=F, 103 | xlab='Industry', ylab='Probability of Merger') 104 | axis(1, at = 1:5, labels = paste0(unique(industry), " (n =",c(10,100,10,100,10),')' ) ) 105 | axis(2, at = seq(0,1,.2), labels= seq(0,1,.2) ) 106 | 107 | points(1:5, no_pooled_p, pch=20, col='black') 108 | abline(h=pooled_p, lty=2) 109 | 110 | legend('bottomright', 111 | legend = c('Pooled Estimate','Stratified Estimates', 'Bayesian Estimate'), 112 | lty = c(2,NA,NA), col=c('black','black','red'), pch=c(NA, 20,20), bty='n') 113 | dev.off() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bayesian Tutorials 2 | This repo hosts code behind the series of posts that walk through MCMC implementations for various classes of Bayesian models. I wrote these in grad school as I was teaching myself Bayesian computation. I never felt like I truly understood something until I could implement it from scratch. The code is probably all very inefficient and perhaps even wrong, but is a kind of diary of my "from scratch" journey learning Bayes. 3 | 4 | In order of publication: 5 | 1. [Bayesian Simple Linear Regression with Gibbs](https://stablemarkets.wordpress.com/2017/08/07/bayesian-simple-linear-regression-with-gibbs-sampling-in-r/) 6 | 2. [Blocked Gibbs for Bayesian Multivariate Linear Regression](https://stablemarkets.wordpress.com/2017/09/05/blocked-gibbs-sampling-in-r-for-bayesian-multiple-linear-regression/) 7 | 3. [Metropolis Hastings-in-Gibbs Sampler for Bayesian Logistic Regression](https://stablemarkets.wordpress.com/2017/11/07/metropolis-in-gibbs-sampling-and-runtime-analysis-with-profviz/) 8 | 4. [Using Rcpp to speed up Metropolis-Hastings](https://stablemarkets.wordpress.com/2018/03/16/speeding-up-metropolis-hastings-with-rcpp/) 9 | 5. [Bayesian Inference with Backfitting MCMC](https://stablemarkets.wordpress.com/2018/05/03/bayesian-inference-with-backfitting-mcmc/) 10 | 6. [Efficient MCMC with Caching](https://stablemarkets.wordpress.com/2019/03/02/efficient-mcmc-with-caching/) 11 | -------------------------------------------------------------------------------- /RcppBoost/.Rhistory: -------------------------------------------------------------------------------- 1 | for(i in 2:iter){ 2 | beta_0 <- beta_shell[i-1, ] 3 | # draw from proposal distribution 4 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p)) 5 | # calculate ratio of conditional posterior densities 6 | r_num <- log_posterior(beta_c, X, Y ) 7 | r_denom <- log_posterior(beta_0, X, Y ) 8 | # calculate acceptance probability 9 | r <- exp(r_num - r_denom) 10 | rmin<-min(r,1) 11 | # accept or reject proposal 12 | if( rbinom(1,1,rmin) == 1 ){ 13 | beta_shell[i, ] <- beta_c 14 | }else{ 15 | beta_shell[i, ] <- beta_0 16 | } 17 | accept_shell[i] <- rmin 18 | } 19 | colnames(beta_shell) <- colnames(X) 20 | colnames(beta_shell)[1] <- 'intercept' 21 | return(list(beta_shell, accept_shell) ) 22 | } 23 | # Metropolis-Hastings Sampler using log_post(), 24 | # which is the log posterior coded in C++. 25 | sample_mh_cpp <-function(X, Y, iter, jump_v){ 26 | # create shells 27 | p <- ncol(X) 28 | beta_shell <- matrix(NA, nrow = iter, ncol = p) 29 | accept_shell <- numeric(length = iter) 30 | # starting values 31 | beta_shell[1,] <- rep(10, p) 32 | for(i in 2:iter){ 33 | beta_0 <- beta_shell[i-1, ] 34 | # draw from proposal distribution 35 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p)) 36 | # calculate ratio of conditional posterior densities 37 | r_num <- log_post(beta_c, Y, X ) 38 | r_denom <- log_post(beta_0, Y, X ) 39 | # calculate acceptance probability 40 | r <- exp(r_num - r_denom) 41 | rmin<-min(r,1) 42 | # accept or reject proposal 43 | if( rbinom(1,1,rmin) == 1 ){ 44 | beta_shell[i, ] <- beta_c 45 | }else{ 46 | beta_shell[i, ] <- beta_0 47 | } 48 | accept_shell[i] <- rmin 49 | } 50 | colnames(beta_shell) <- colnames(X) 51 | colnames(beta_shell)[1] <- 'intercept' 52 | return(list(beta_shell, accept_shell) ) 53 | } 54 | ################################################################################ 55 | ### 2 - Test the Samplers 56 | ################################################################################ 57 | burnin <- 1000 58 | p <- ncol(X) 59 | res_mh_cpp <- sample_mh_cpp(X, Y, iter = iter, jump_v = .03) 60 | par(mfrow=c(2,2)) 61 | plot(res_mh_cpp[[1]][burnin:iter,'intercept'], type='l', 62 | xlab='MH Iteration', ylab='Posterior Draw', main='Intercept') 63 | abline(h= -1, col='red') 64 | plot(res_mh_cpp[[1]][burnin:iter,'age_1'], type='l', 65 | xlab='MH Iteration', ylab='Posterior Draw', main='age1') 66 | abline(h= .7, col='red') 67 | plot(res_mh_cpp[[1]][burnin:iter,'age_2'], type='l', 68 | xlab='MH Iteration', ylab='Posterior Draw', main='age2') 69 | abline(h= 1.1, col='red') 70 | plot(res_mh_cpp[[1]][burnin:iter,'trt'], type='l', 71 | xlab='MH Iteration', ylab='Posterior Draw', main='trt') 72 | abline(h= 1.1, col='red') 73 | par(mfrow=c(1,1)) 74 | plot(cumsum(res_mh_cpp[[2]])/1:iter, type='l', 75 | xlab='MH Iteration', ylab='Cumulative Average Acceptance Rate', 76 | main='Acceptance Rate Over Sampling Run') 77 | abline(h= 1.1, col='red') 78 | burnin <- 1000 79 | p <- ncol(X) 80 | res_mh_cpp <- sample_mh_cpp(X, Y, iter = 50000, jump_v = .03) 81 | #Author: Arman Oganisian 82 | library(LaplacesDemon) 83 | library(invgamma) 84 | library(MASS) 85 | library(tidyr) 86 | library(dplyr) 87 | library(ggplot2) 88 | library(microbenchmark) 89 | library(Rcpp) 90 | sourceCpp('log_post.cpp') 91 | ################################################################################ 92 | ### 0 - Simulate Data 93 | ################################################################################ 94 | set.seed(10) 95 | sim_dat <- function(N){ 96 | d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T)) 97 | d$age_1<-ifelse(d$age_group==1,1,0) 98 | d$age_2<-ifelse(d$age_group==2,1,0) 99 | d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2)) 100 | d$y<-rbinom(n = N, size = 1, 101 | prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt)) 102 | X<-as.matrix(cbind(1,d[,2:4])) # model matrix 103 | Y<-matrix(d$y, ncol=1) # outcome vector 104 | return(list(X=X, Y=Y)) 105 | } 106 | d <- sim_dat(N=1000) 107 | X <- d$X 108 | Y <- d$Y 109 | ################################################################################ 110 | ### 1 - functions to sample from conditional posterior distributions 111 | ################################################################################ 112 | # unnormalized log posterior of beta vector 113 | log_posterior<-function(beta, X, Y){ 114 | # calculate likelihood 115 | xb <- X %*% beta 116 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb)) 117 | p_i <- invlogit(xb) 118 | lik <- sum(dbern(Y, p_i, log = T)) 119 | # calculate prior 120 | pr <- dmvn(x = beta, mu = rep(0,p), Sigma = (1000^2)*diag(p), log = T) 121 | log_cond_post <- lik + pr 122 | return(log_cond_post) 123 | } 124 | # Metropolis-Hastings Sampler using log_posterior(), 125 | # which is the log posterior coded in R. 126 | sample_mh<-function(X, Y, iter, jump_v){ 127 | # create shells 128 | p <- ncol(X) 129 | beta_shell <- matrix(NA, nrow = iter, ncol = p) 130 | accept_shell <- numeric(length = iter) 131 | # starting values 132 | beta_shell[1,] <- rep(10, p) 133 | for(i in 2:iter){ 134 | beta_0 <- beta_shell[i-1, ] 135 | # draw from proposal distribution 136 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p)) 137 | # calculate ratio of conditional posterior densities 138 | r_num <- log_posterior(beta_c, X, Y ) 139 | r_denom <- log_posterior(beta_0, X, Y ) 140 | # calculate acceptance probability 141 | r <- exp(r_num - r_denom) 142 | rmin<-min(r,1) 143 | # accept or reject proposal 144 | if( rbinom(1,1,rmin) == 1 ){ 145 | beta_shell[i, ] <- beta_c 146 | }else{ 147 | beta_shell[i, ] <- beta_0 148 | } 149 | accept_shell[i] <- rmin 150 | } 151 | colnames(beta_shell) <- colnames(X) 152 | colnames(beta_shell)[1] <- 'intercept' 153 | return(list(beta_shell, accept_shell) ) 154 | } 155 | # Metropolis-Hastings Sampler using log_post(), 156 | # which is the log posterior coded in C++. 157 | sample_mh_cpp <-function(X, Y, iter, jump_v){ 158 | # create shells 159 | p <- ncol(X) 160 | beta_shell <- matrix(NA, nrow = iter, ncol = p) 161 | accept_shell <- numeric(length = iter) 162 | # starting values 163 | beta_shell[1,] <- rep(10, p) 164 | for(i in 2:iter){ 165 | beta_0 <- beta_shell[i-1, ] 166 | # draw from proposal distribution 167 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p)) 168 | # calculate ratio of conditional posterior densities 169 | r_num <- log_post(beta_c, Y, X ) 170 | r_denom <- log_post(beta_0, Y, X ) 171 | # calculate acceptance probability 172 | r <- exp(r_num - r_denom) 173 | rmin<-min(r,1) 174 | # accept or reject proposal 175 | if( rbinom(1,1,rmin) == 1 ){ 176 | beta_shell[i, ] <- beta_c 177 | }else{ 178 | beta_shell[i, ] <- beta_0 179 | } 180 | accept_shell[i] <- rmin 181 | } 182 | colnames(beta_shell) <- colnames(X) 183 | colnames(beta_shell)[1] <- 'intercept' 184 | return(list(beta_shell, accept_shell) ) 185 | } 186 | ################################################################################ 187 | ### 2 - Test the Samplers 188 | ################################################################################ 189 | burnin <- 1000 190 | p <- ncol(X) 191 | res_mh_cpp <- sample_mh_cpp(X, Y, iter = 50000, jump_v = .03) 192 | par(mfrow=c(2,2)) 193 | plot(res_mh_cpp[[1]][burnin:iter,'intercept'], type='l', 194 | xlab='MH Iteration', ylab='Posterior Draw', main='Intercept') 195 | abline(h= -1, col='red') 196 | plot(res_mh_cpp[[1]][burnin:iter,'age_1'], type='l', 197 | xlab='MH Iteration', ylab='Posterior Draw', main='age1') 198 | abline(h= .7, col='red') 199 | plot(res_mh_cpp[[1]][burnin:iter,'age_2'], type='l', 200 | xlab='MH Iteration', ylab='Posterior Draw', main='age2') 201 | abline(h= 1.1, col='red') 202 | plot(res_mh_cpp[[1]][burnin:iter,'trt'], type='l', 203 | xlab='MH Iteration', ylab='Posterior Draw', main='trt') 204 | abline(h= 1.1, col='red') 205 | par(mfrow=c(1,1)) 206 | plot(cumsum(res_mh_cpp[[2]])/1:iter, type='l', 207 | xlab='MH Iteration', ylab='Cumulative Average Acceptance Rate', 208 | main='Acceptance Rate Over Sampling Run') 209 | abline(h= 1.1, col='red') 210 | #Author: Arman Oganisian 211 | library(LaplacesDemon) 212 | library(invgamma) 213 | library(MASS) 214 | library(tidyr) 215 | library(dplyr) 216 | library(ggplot2) 217 | library(microbenchmark) 218 | library(Rcpp) 219 | sourceCpp('log_post.cpp') 220 | ################################################################################ 221 | ### 0 - Simulate Data 222 | ################################################################################ 223 | set.seed(10) 224 | sim_dat <- function(N){ 225 | d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T)) 226 | d$age_1<-ifelse(d$age_group==1,1,0) 227 | d$age_2<-ifelse(d$age_group==2,1,0) 228 | d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2)) 229 | d$y<-rbinom(n = N, size = 1, 230 | prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt)) 231 | X<-as.matrix(cbind(1,d[,2:4])) # model matrix 232 | Y<-matrix(d$y, ncol=1) # outcome vector 233 | return(list(X=X, Y=Y)) 234 | } 235 | d <- sim_dat(N=1000) 236 | X <- d$X 237 | Y <- d$Y 238 | ################################################################################ 239 | ### 1 - functions to sample from conditional posterior distributions 240 | ################################################################################ 241 | # unnormalized log posterior of beta vector 242 | log_posterior<-function(beta, X, Y){ 243 | # calculate likelihood 244 | xb <- X %*% beta 245 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb)) 246 | p_i <- invlogit(xb) 247 | lik <- sum(dbern(Y, p_i, log = T)) 248 | # calculate prior 249 | pr <- dmvn(x = beta, mu = rep(0,p), Sigma = (1000^2)*diag(p), log = T) 250 | log_cond_post <- lik + pr 251 | return(log_cond_post) 252 | } 253 | # Metropolis-Hastings Sampler using log_posterior(), 254 | # which is the log posterior coded in R. 255 | sample_mh<-function(X, Y, iter, jump_v){ 256 | # create shells 257 | p <- ncol(X) 258 | beta_shell <- matrix(NA, nrow = iter, ncol = p) 259 | accept_shell <- numeric(length = iter) 260 | # starting values 261 | beta_shell[1,] <- rep(10, p) 262 | for(i in 2:iter){ 263 | beta_0 <- beta_shell[i-1, ] 264 | # draw from proposal distribution 265 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p)) 266 | # calculate ratio of conditional posterior densities 267 | r_num <- log_posterior(beta_c, X, Y ) 268 | r_denom <- log_posterior(beta_0, X, Y ) 269 | # calculate acceptance probability 270 | r <- exp(r_num - r_denom) 271 | rmin<-min(r,1) 272 | # accept or reject proposal 273 | if( rbinom(1,1,rmin) == 1 ){ 274 | beta_shell[i, ] <- beta_c 275 | }else{ 276 | beta_shell[i, ] <- beta_0 277 | } 278 | accept_shell[i] <- rmin 279 | } 280 | colnames(beta_shell) <- colnames(X) 281 | colnames(beta_shell)[1] <- 'intercept' 282 | return(list(beta_shell, accept_shell) ) 283 | } 284 | # Metropolis-Hastings Sampler using log_post(), 285 | # which is the log posterior coded in C++. 286 | sample_mh_cpp <-function(X, Y, iter, jump_v){ 287 | # create shells 288 | p <- ncol(X) 289 | beta_shell <- matrix(NA, nrow = iter, ncol = p) 290 | accept_shell <- numeric(length = iter) 291 | # starting values 292 | beta_shell[1,] <- rep(10, p) 293 | for(i in 2:iter){ 294 | beta_0 <- beta_shell[i-1, ] 295 | # draw from proposal distribution 296 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p)) 297 | # calculate ratio of conditional posterior densities 298 | r_num <- log_post(beta_c, Y, X ) 299 | r_denom <- log_post(beta_0, Y, X ) 300 | # calculate acceptance probability 301 | r <- exp(r_num - r_denom) 302 | rmin<-min(r,1) 303 | # accept or reject proposal 304 | if( rbinom(1,1,rmin) == 1 ){ 305 | beta_shell[i, ] <- beta_c 306 | }else{ 307 | beta_shell[i, ] <- beta_0 308 | } 309 | accept_shell[i] <- rmin 310 | } 311 | colnames(beta_shell) <- colnames(X) 312 | colnames(beta_shell)[1] <- 'intercept' 313 | return(list(beta_shell, accept_shell) ) 314 | } 315 | ################################################################################ 316 | ### 2 - Test the Samplers 317 | ################################################################################ 318 | burnin <- 1000 319 | iter <- 100000 320 | p <- ncol(X) 321 | res_mh_cpp <- sample_mh_cpp(X, Y, iter = iter, jump_v = .03) 322 | par(mfrow=c(2,2)) 323 | plot(res_mh_cpp[[1]][burnin:iter,'intercept'], type='l', 324 | xlab='MH Iteration', ylab='Posterior Draw', main='Intercept') 325 | abline(h= -1, col='red') 326 | plot(res_mh_cpp[[1]][burnin:iter,'age_1'], type='l', 327 | xlab='MH Iteration', ylab='Posterior Draw', main='age1') 328 | abline(h= .7, col='red') 329 | plot(res_mh_cpp[[1]][burnin:iter,'age_2'], type='l', 330 | xlab='MH Iteration', ylab='Posterior Draw', main='age2') 331 | abline(h= 1.1, col='red') 332 | plot(res_mh_cpp[[1]][burnin:iter,'trt'], type='l', 333 | xlab='MH Iteration', ylab='Posterior Draw', main='trt') 334 | abline(h= 1.1, col='red') 335 | par(mfrow=c(1,1)) 336 | plot(cumsum(res_mh_cpp[[2]])/1:iter, type='l', 337 | xlab='MH Iteration', ylab='Cumulative Average Acceptance Rate', 338 | main='Acceptance Rate Over Sampling Run') 339 | abline(h= 1.1, col='red') 340 | ################################################################################ 341 | ### 3 - Benchmarks 342 | ################################################################################ 343 | iter <- 10000 344 | ss <- c(100, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000) 345 | rel_time <-numeric(length = length(ss)) 346 | for(i in 1:length(ss) ){ 347 | d <- sim_dat(N = ss[i]) 348 | X <- d$X 349 | Y <- d$Y 350 | bench<-microbenchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .03), 351 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .03), 352 | times = 2) 353 | bench_sum <- summary(bench) 354 | r_time <- bench_sum$mean[bench_sum$expr=='R_MH'] 355 | rcpp_time <- bench_sum$mean[bench_sum$expr=='Cpp_MH'] 356 | rel_time[i] <- r_time/rcpp_time 357 | } 358 | plot(ss, rel_time, type='l') 359 | #Author: Arman Oganisian 360 | library(LaplacesDemon) 361 | library(invgamma) 362 | library(MASS) 363 | library(tidyr) 364 | library(dplyr) 365 | library(ggplot2) 366 | library(microbenchmark) 367 | library(Rcpp) 368 | sourceCpp('log_post.cpp') 369 | ################################################################################ 370 | ### 0 - Simulate Data 371 | ################################################################################ 372 | set.seed(10) 373 | sim_dat <- function(N){ 374 | d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T)) 375 | d$age_1<-ifelse(d$age_group==1,1,0) 376 | d$age_2<-ifelse(d$age_group==2,1,0) 377 | d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2)) 378 | d$y<-rbinom(n = N, size = 1, 379 | prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt)) 380 | X<-as.matrix(cbind(1,d[,2:4])) # model matrix 381 | Y<-matrix(d$y, ncol=1) # outcome vector 382 | return(list(X=X, Y=Y)) 383 | } 384 | d <- sim_dat(N=1000) 385 | X <- d$X 386 | Y <- d$Y 387 | ################################################################################ 388 | ### 1 - functions to sample from conditional posterior distributions 389 | ################################################################################ 390 | # unnormalized log posterior of beta vector 391 | log_posterior<-function(beta, X, Y){ 392 | # calculate likelihood 393 | xb <- X %*% beta 394 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb)) 395 | p_i <- invlogit(xb) 396 | lik <- sum(dbern(Y, p_i, log = T)) 397 | # calculate prior 398 | pr <- dmvn(x = beta, mu = rep(0,p), Sigma = (1000^2)*diag(p), log = T) 399 | log_cond_post <- lik + pr 400 | return(log_cond_post) 401 | } 402 | # Metropolis-Hastings Sampler using log_posterior(), 403 | # which is the log posterior coded in R. 404 | sample_mh<-function(X, Y, iter, jump_v){ 405 | # create shells 406 | p <- ncol(X) 407 | beta_shell <- matrix(NA, nrow = iter, ncol = p) 408 | accept_shell <- numeric(length = iter) 409 | # starting values 410 | beta_shell[1,] <- rep(10, p) 411 | for(i in 2:iter){ 412 | beta_0 <- beta_shell[i-1, ] 413 | # draw from proposal distribution 414 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p)) 415 | # calculate ratio of conditional posterior densities 416 | r_num <- log_posterior(beta_c, X, Y ) 417 | r_denom <- log_posterior(beta_0, X, Y ) 418 | # calculate acceptance probability 419 | r <- exp(r_num - r_denom) 420 | rmin<-min(r,1) 421 | # accept or reject proposal 422 | if( rbinom(1,1,rmin) == 1 ){ 423 | beta_shell[i, ] <- beta_c 424 | }else{ 425 | beta_shell[i, ] <- beta_0 426 | } 427 | accept_shell[i] <- rmin 428 | } 429 | colnames(beta_shell) <- colnames(X) 430 | colnames(beta_shell)[1] <- 'intercept' 431 | return(list(beta_shell, accept_shell) ) 432 | } 433 | # Metropolis-Hastings Sampler using log_post(), 434 | # which is the log posterior coded in C++. 435 | sample_mh_cpp <-function(X, Y, iter, jump_v){ 436 | # create shells 437 | p <- ncol(X) 438 | beta_shell <- matrix(NA, nrow = iter, ncol = p) 439 | accept_shell <- numeric(length = iter) 440 | # starting values 441 | beta_shell[1,] <- rep(10, p) 442 | for(i in 2:iter){ 443 | beta_0 <- beta_shell[i-1, ] 444 | # draw from proposal distribution 445 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p)) 446 | # calculate ratio of conditional posterior densities 447 | r_num <- log_post(beta_c, Y, X ) 448 | r_denom <- log_post(beta_0, Y, X ) 449 | # calculate acceptance probability 450 | r <- exp(r_num - r_denom) 451 | rmin<-min(r,1) 452 | # accept or reject proposal 453 | if( rbinom(1,1,rmin) == 1 ){ 454 | beta_shell[i, ] <- beta_c 455 | }else{ 456 | beta_shell[i, ] <- beta_0 457 | } 458 | accept_shell[i] <- rmin 459 | } 460 | colnames(beta_shell) <- colnames(X) 461 | colnames(beta_shell)[1] <- 'intercept' 462 | return(list(beta_shell, accept_shell) ) 463 | } 464 | ################################################################################ 465 | ### 2 - Test the Samplers 466 | ################################################################################ 467 | burnin <- 1000 468 | iter <- 100000 469 | p <- ncol(X) 470 | res_mh_cpp <- sample_mh_cpp(X, Y, iter = iter, jump_v = .03) 471 | par(mfrow=c(2,2)) 472 | plot(res_mh_cpp[[1]][burnin:iter,'intercept'], type='l', 473 | xlab='MH Iteration', ylab='Posterior Draw', main='Intercept') 474 | abline(h= -1, col='red') 475 | plot(res_mh_cpp[[1]][burnin:iter,'age_1'], type='l', 476 | xlab='MH Iteration', ylab='Posterior Draw', main='age1') 477 | abline(h= .7, col='red') 478 | plot(res_mh_cpp[[1]][burnin:iter,'age_2'], type='l', 479 | xlab='MH Iteration', ylab='Posterior Draw', main='age2') 480 | abline(h= 1.1, col='red') 481 | plot(res_mh_cpp[[1]][burnin:iter,'trt'], type='l', 482 | xlab='MH Iteration', ylab='Posterior Draw', main='trt') 483 | abline(h= 1.1, col='red') 484 | par(mfrow=c(1,1)) 485 | plot(cumsum(res_mh_cpp[[2]])/1:iter, type='l', 486 | xlab='MH Iteration', ylab='Cumulative Average Acceptance Rate', 487 | main='Acceptance Rate Over Sampling Run') 488 | abline(h= 1.1, col='red') 489 | ################################################################################ 490 | ### 3 - Benchmarks 491 | ################################################################################ 492 | iter <- 10000 493 | ss <- c(100, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000) 494 | rel_time <-numeric(length = length(ss)) 495 | for(i in 1:length(ss) ){ 496 | d <- sim_dat(N = ss[i]) 497 | X <- d$X 498 | Y <- d$Y 499 | bench<-microbenchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .03), 500 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .03), 501 | times = 2) 502 | bench_sum <- summary(bench) 503 | r_time <- bench_sum$mean[bench_sum$expr=='R_MH'] 504 | rcpp_time <- bench_sum$mean[bench_sum$expr=='Cpp_MH'] 505 | rel_time[i] <- r_time/rcpp_time 506 | } 507 | ################################################################################ 508 | ### 4 - Plot Results 509 | ################################################################################ 510 | plot(ss, rel_time, type='l') 511 | plot(ss, rel_time, type='l', 512 | xlab='Data Sample Size', ylab='Relative Runtime (R v. Rcpp)') 513 | -------------------------------------------------------------------------------- /RcppBoost/AcceptProb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/RcppBoost/AcceptProb.png -------------------------------------------------------------------------------- /RcppBoost/Logisitc_MH_Rcpp.r: -------------------------------------------------------------------------------- 1 | #Author: Arman Oganisian 2 | library(LaplacesDemon) 3 | library(invgamma) 4 | library(MASS) 5 | library(tidyr) 6 | library(dplyr) 7 | library(ggplot2) 8 | library(microbenchmark) 9 | library(Rcpp) 10 | 11 | sourceCpp('log_post.cpp') 12 | 13 | ################################################################################ 14 | ### 0 - Simulate Data 15 | ################################################################################ 16 | set.seed(10) 17 | 18 | sim_dat <- function(N){ 19 | d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T)) 20 | d$age_1<-ifelse(d$age_group==1,1,0) 21 | d$age_2<-ifelse(d$age_group==2,1,0) 22 | 23 | d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2)) 24 | 25 | d$y<-rbinom(n = N, size = 1, 26 | prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt)) 27 | 28 | X<-as.matrix(cbind(1,d[,2:4])) # model matrix 29 | Y<-matrix(d$y, ncol=1) # outcome vector 30 | return(list(X=X, Y=Y)) 31 | } 32 | 33 | d <- sim_dat(N=1000) 34 | X <- d$X 35 | Y <- d$Y 36 | 37 | ################################################################################ 38 | ### 1 - functions to sample from conditional posterior distributions 39 | ################################################################################ 40 | 41 | # unnormalized log posterior of beta vector 42 | log_posterior<-function(beta, X, Y){ 43 | 44 | # calculate likelihood 45 | xb <- X %*% beta 46 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb)) 47 | p_i <- invlogit(xb) 48 | 49 | lik <- sum(dbern(Y, p_i, log = T)) 50 | 51 | # calculate prior 52 | pr <- dmvn(x = beta, mu = rep(0,p), Sigma = (1000^2)*diag(p), log = T) 53 | 54 | log_cond_post <- lik + pr 55 | return(log_cond_post) 56 | } 57 | 58 | # Metropolis-Hastings Sampler using log_posterior(), 59 | # which is the log posterior coded in R. 60 | sample_mh<-function(X, Y, iter, jump_v){ 61 | 62 | # create shells 63 | p <- ncol(X) 64 | beta_shell <- matrix(NA, nrow = iter, ncol = p) 65 | accept_shell <- numeric(length = iter) 66 | 67 | # starting values 68 | beta_shell[1,] <- rep(10, p) 69 | 70 | for(i in 2:iter){ 71 | beta_0 <- beta_shell[i-1, ] 72 | 73 | # draw from proposal distribution 74 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p)) 75 | 76 | # calculate ratio of conditional posterior densities 77 | r_num <- log_posterior(beta_c, X, Y ) 78 | r_denom <- log_posterior(beta_0, X, Y ) 79 | 80 | # calculate acceptance probability 81 | r <- exp(r_num - r_denom) 82 | rmin<-min(r,1) 83 | 84 | # accept or reject proposal 85 | if( rbinom(1,1,rmin) == 1 ){ 86 | beta_shell[i, ] <- beta_c 87 | }else{ 88 | beta_shell[i, ] <- beta_0 89 | } 90 | accept_shell[i] <- rmin 91 | 92 | } 93 | colnames(beta_shell) <- colnames(X) 94 | colnames(beta_shell)[1] <- 'intercept' 95 | return(list(beta_shell, accept_shell) ) 96 | } 97 | 98 | # Metropolis-Hastings Sampler using log_post(), 99 | # which is the log posterior coded in C++. 100 | 101 | sample_mh_cpp <-function(X, Y, iter, jump_v){ 102 | # create shells 103 | p <- ncol(X) 104 | beta_shell <- matrix(NA, nrow = iter, ncol = p) 105 | accept_shell <- numeric(length = iter) 106 | 107 | # starting values 108 | beta_shell[1,] <- rep(10, p) 109 | 110 | for(i in 2:iter){ 111 | beta_0 <- beta_shell[i-1, ] 112 | 113 | # draw from proposal distribution 114 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p)) 115 | 116 | # calculate ratio of conditional posterior densities 117 | r_num <- log_post(beta_c, Y, X ) 118 | r_denom <- log_post(beta_0, Y, X ) 119 | 120 | # calculate acceptance probability 121 | r <- exp(r_num - r_denom) 122 | rmin<-min(r,1) 123 | 124 | # accept or reject proposal 125 | if( rbinom(1,1,rmin) == 1 ){ 126 | beta_shell[i, ] <- beta_c 127 | }else{ 128 | beta_shell[i, ] <- beta_0 129 | } 130 | accept_shell[i] <- rmin 131 | 132 | } 133 | colnames(beta_shell) <- colnames(X) 134 | colnames(beta_shell)[1] <- 'intercept' 135 | return(list(beta_shell, accept_shell) ) 136 | } 137 | 138 | ################################################################################ 139 | ### 2 - Test the Samplers 140 | ################################################################################ 141 | 142 | burnin <- 1000 143 | iter <- 100000 144 | p <- ncol(X) 145 | 146 | res_mh_cpp <- sample_mh_cpp(X, Y, iter = iter, jump_v = .03) 147 | 148 | par(mfrow=c(2,2)) 149 | plot(res_mh_cpp[[1]][burnin:iter,'intercept'], type='l', 150 | xlab='MH Iteration', ylab='Posterior Draw', main='Intercept') 151 | abline(h= -1, col='red') 152 | plot(res_mh_cpp[[1]][burnin:iter,'age_1'], type='l', 153 | xlab='MH Iteration', ylab='Posterior Draw', main='age1') 154 | abline(h= .7, col='red') 155 | plot(res_mh_cpp[[1]][burnin:iter,'age_2'], type='l', 156 | xlab='MH Iteration', ylab='Posterior Draw', main='age2') 157 | abline(h= 1.1, col='red') 158 | plot(res_mh_cpp[[1]][burnin:iter,'trt'], type='l', 159 | xlab='MH Iteration', ylab='Posterior Draw', main='trt') 160 | abline(h= 1.1, col='red') 161 | 162 | par(mfrow=c(1,1)) 163 | plot(cumsum(res_mh_cpp[[2]])/1:iter, type='l', 164 | xlab='MH Iteration', ylab='Cumulative Average Acceptance Rate', 165 | main='Acceptance Rate Over Sampling Run') 166 | abline(h= 1.1, col='red') 167 | 168 | ################################################################################ 169 | ### 3 - Benchmarks 170 | ################################################################################ 171 | iter <- 10000 172 | 173 | ss <- c(100, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000) 174 | 175 | rel_time <-numeric(length = length(ss)) 176 | for(i in 1:length(ss) ){ 177 | d <- sim_dat(N = ss[i]) 178 | X <- d$X 179 | Y <- d$Y 180 | 181 | bench<-microbenchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .03), 182 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .03), 183 | times = 2) 184 | bench_sum <- summary(bench) 185 | r_time <- bench_sum$mean[bench_sum$expr=='R_MH'] 186 | rcpp_time <- bench_sum$mean[bench_sum$expr=='Cpp_MH'] 187 | rel_time[i] <- r_time/rcpp_time 188 | } 189 | 190 | plot(ss, rel_time, type='l', 191 | xlab='Data Sample Size', ylab='Relative Runtime (R v. Rcpp)') 192 | 193 | 194 | -------------------------------------------------------------------------------- /RcppBoost/MCMCchains.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/RcppBoost/MCMCchains.png -------------------------------------------------------------------------------- /RcppBoost/Runtime.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/RcppBoost/Runtime.png -------------------------------------------------------------------------------- /RcppBoost/log_post.cpp: -------------------------------------------------------------------------------- 1 | // [[Rcpp::depends(RcppArmadillo)]] 2 | #include 3 | using namespace Rcpp; 4 | 5 | arma::vec invlogit(arma::vec x){ 6 | arma::vec y = exp(x)/(1 + exp(x)); 7 | return(y); 8 | } 9 | 10 | // [[Rcpp::export]] 11 | double berntest(double y, double n, double p, int lg ){ 12 | return R::dbinom(y, n, p, lg); 13 | } 14 | 15 | // [[Rcpp::export]] 16 | double log_post(arma::vec b, arma::vec Y, arma::mat X) { 17 | int n = X.n_rows; 18 | int p = X.n_cols; 19 | arma::vec xb; 20 | arma::vec pi; 21 | double lik; 22 | double pr; 23 | double log_posterior; 24 | 25 | // linear predictor 26 | xb = X*b; 27 | 28 | // correct for numerical issues 29 | for( int i=0; i 10 ){ 31 | xb[i] = 10; 32 | }else if( xb[i] < -10 ){ 33 | xb[i] = -10; 34 | }else{ 35 | xb[i] = xb[i]; 36 | } 37 | } 38 | 39 | // apply inverse link function 40 | pi = invlogit(xb); 41 | 42 | // compute log likelihood contribution of each observation 43 | // and sum them 44 | lik = 0; 45 | for(int i=0; i