├── .DS_Store
├── AdapativeMCMCLogistic
    ├── .Rhistory
    ├── LogisticAdaptive.r
    └── progressbar.R
├── BackfittingMCMC
    ├── BayesianResults.png
    ├── MLR_backfittingMCMC.r
    └── OriginalData.png
├── BayesianProbit
    ├── BayesProbitSim.r
    └── FunctionSourceCode.r
├── BayesianSurvival
    ├── .DS_Store
    ├── FunctionSourceCode.R
    ├── StanWeibullAFT.Rmd
    └── weibull_nocovar.R
├── LogisticReg
    ├── .Rhistory
    ├── Logistic_GibbsMHcombo.r
    ├── LogitMCMCChains.png
    ├── profilevis1.png
    └── profvis2.png
├── MH_with_caching
    ├── HelperFunctions.R
    ├── LogisticMHcache.r
    └── traceplots.png
├── MultipleLinearReg
    └── multiplelinearreg.r
├── PartialPooling
    ├── .Rhistory
    ├── PartialPool.png
    └── PartialPool.r
├── README.md
├── RcppBoost
    ├── .Rhistory
    ├── AcceptProb.png
    ├── Logisitc_MH_Rcpp.r
    ├── MCMCchains.png
    ├── Runtime.png
    └── log_post.cpp
├── SimpleLinearReg
    ├── .DS_Store
    ├── BayesModel.R
    └── README.md
└── SimulatedTempering
    ├── .DS_Store
    ├── make_gif-.gif
    ├── tempering.R
    ├── tempering.html
    ├── tempering.rmd
    ├── tempering_cache
        └── html
        │   ├── __packages
        │   ├── unnamed-chunk-1_e24126a3f59693241424fe88e04ca445.RData
        │   ├── unnamed-chunk-1_e24126a3f59693241424fe88e04ca445.rdb
        │   └── unnamed-chunk-1_e24126a3f59693241424fe88e04ca445.rdx
    └── tempering_files
        └── figure-html
            └── make_gif-.gif


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/.DS_Store


--------------------------------------------------------------------------------
/AdapativeMCMCLogistic/.Rhistory:
--------------------------------------------------------------------------------
  1 | alpha_shell <- matrix(0, nrow = M, ncol = gibbs_iter)
  2 | sigma_shell <- matrix(0, nrow = 1, ncol = gibbs_iter)
  3 | # initialize
  4 | beta_shell[,1] <- rep(100, p)
  5 | alpha_shell[,1] <- rnorm(n = M, mean = 0, sd = 100)
  6 | sigma_shell[1] <- 200
  7 | for(iter in 2:gibbs_iter){
  8 | beta_shell[,iter]<- metrop_hastings(x_0 = beta_shell[, iter-1],
  9 | log_post_density=condpost_beta,
 10 | proposal_dist =function(x, prop_sigma) rmvnorm(1, mean = x, sigma = prop_sigma*diag(p)),
 11 | y = y, x = x,
 12 | alpha = alpha_shell[,iter-1],
 13 | sigma = sigma_shell[,iter-1],
 14 | subjid = subjid, M=M,
 15 | prop_sigma = 50)$x_0
 16 | alpha_shell[,iter]<- metrop_hastings(x_0 = alpha_shell[, iter-1],
 17 | log_post_density=condpost_alpha,
 18 | proposal_dist =function(x, prop_sigma) rmvnorm(1, mean = x, sigma = prop_sigma*diag(M)),
 19 | y = y, x = x,
 20 | beta = beta_shell[,iter],
 21 | sigma = sigma_shell[,iter-1],
 22 | subjid = subjid, M=M,
 23 | prop_sigma = 100)$x_0
 24 | sigma_shell[,iter]<- metrop_hastings(x_0 = sigma_shell[, iter-1],
 25 | log_post_density=condpost_sigma,
 26 | proposal_dist =function(x, prop_sigma) rmvnorm(1, mean = x, sigma = prop_sigma*diag(1)),
 27 | y = y, x = x,
 28 | beta = beta_shell[,iter],
 29 | alpha = alpha_shell[,iter],
 30 | subjid = subjid, M=M, lower=0,
 31 | prop_sigma = 100)$x_0
 32 | }
 33 | return(list(beta_shell=beta_shell, alpha_shell=alpha_shell, sigma_shell=sigma_shell))
 34 | }
 35 | N_sims <- 1000
 36 | gibbs_iter <- 1000
 37 | sim_dat <- replicate(n = N_sims, simulate_data(M=50), simplify = F)
 38 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000)
 39 | library(mvtnorm)
 40 | library(invgamma)
 41 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000)
 42 | library(lme4)
 43 | lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) )
 44 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) ))
 45 | par(mfrow=c(2,1))
 46 | plot(res$beta_shell[1,], type='l')
 47 | plot(res$beta_shell[2,], type='l')
 48 | plot(res$sigma_shell, type='l')
 49 | plot(res$sigma_shell[1,], type='l')
 50 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
 51 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
 52 | library(MASS)
 53 | N_sims <- 1000
 54 | gibbs_iter <- 1000
 55 | sim_dat <- replicate(n = N_sims, simulate_data(M=50), simplify = F)
 56 | library(mvtnorm)
 57 | library(invgamma)
 58 | library(lme4)
 59 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000)
 60 | warnings()
 61 | par(mfrow=c(3,1))
 62 | plot(res$beta_shell[1,], type='l')
 63 | plot(res$beta_shell[2,], type='l')
 64 | plot(res$sigma_shell[1,], type='l')
 65 | cov(c(1,1,23,234), c(1,23,5,3,2))
 66 | cov(x = cbind(rnorm(n = 100), rnorm(100)))
 67 | cov(x = cbind(rnorm(n = 100, sd=50), rnorm(100, sd = 10)))
 68 | 50^2
 69 | cov(x = cbind(rnorm(n = 100, sd=50), rnorm(100, sd = 10)))
 70 | cbind(c(10, 0), c(0,10))
 71 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
 72 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
 73 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000)
 74 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
 75 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000)
 76 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
 77 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000)
 78 | prop_sigma <- jumping_var[[tuning_end]]
 79 | prop_sigma
 80 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
 81 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000)
 82 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
 83 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
 84 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000)
 85 | jumping_var[[tuning_end]]
 86 | jumping_var
 87 | jumping_var[[1]]
 88 | jumping_var[[2]]
 89 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
 90 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000)
 91 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
 92 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000)
 93 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
 94 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
 95 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) ))
 96 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000)
 97 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
 98 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=30)
 99 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
100 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
101 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000)
102 | par(mfrow=c(3,1))
103 | plot(res$beta_shell[1,], type='l')
104 | plot(res$beta_shell[2,], type='l')
105 | plot(res$sigma_shell[1,], type='l')
106 | res$``
107 | res[[4]]
108 | sqrt(72)
109 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
110 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
111 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 500)
112 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) ))
113 | par(mfrow=c(1,1))
114 | plot(res$alpha_shell[3,], type='l')
115 | par(mfrow=c(3,1))
116 | plot(res$beta_shell[1,], type='l')
117 | plot(res$beta_shell[2,], type='l')
118 | plot(res$sigma_shell[1,], type='l')
119 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 10)
120 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) ))
121 | par(mfrow=c(3,1))
122 | plot(res$beta_shell[1,], type='l')
123 | plot(res$beta_shell[2,], type='l')
124 | plot(res$sigma_shell[1,], type='l')
125 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 2)
126 | par(mfrow=c(3,1))
127 | plot(res$beta_shell[1,], type='l')
128 | plot(res$beta_shell[2,], type='l')
129 | plot(res$sigma_shell[1,], type='l')
130 | res$prop_sigma
131 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 5)
132 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) ))
133 | par(mfrow=c(3,1))
134 | plot(res$beta_shell[1,], type='l')
135 | plot(res$beta_shell[2,], type='l')
136 | plot(res$sigma_shell[1,], type='l')
137 | res$prop_sigma
138 | sqrt(res$prop_sigma)
139 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
140 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 10)
141 | par(mfrow=c(3,1))
142 | plot(res$beta_shell[1,], type='l')
143 | plot(res$beta_shell[2,], type='l')
144 | plot(res$sigma_shell[1,], type='l')
145 | res$prop_sigma
146 | sqrt(12)
147 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
148 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 10)
149 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) ))
150 | par(mfrow=c(3,1))
151 | plot(res$beta_shell[1,], type='l')
152 | plot(res$beta_shell[2,], type='l')
153 | plot(res$sigma_shell[1,], type='l')
154 | res$prop_sigma
155 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
156 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 10)
157 | par(mfrow=c(3,1))
158 | plot(res$beta_shell[1,], type='l')
159 | plot(res$beta_shell[2,], type='l')
160 | plot(res$sigma_shell[1,], type='l')
161 | res$prop_sigma
162 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
163 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 10)
164 | par(mfrow=c(3,1))
165 | plot(res$beta_shell[1,], type='l')
166 | plot(res$beta_shell[2,], type='l')
167 | plot(res$sigma_shell[1,], type='l')
168 | res$prop_sigma
169 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
170 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 10)
171 | par(mfrow=c(3,1))
172 | plot(res$beta_shell[1,], type='l')
173 | plot(res$beta_shell[2,], type='l')
174 | plot(res$sigma_shell[1,], type='l')
175 | res$prop_sigma
176 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 100)
177 | par(mfrow=c(3,1))
178 | plot(res$beta_shell[1,], type='l')
179 | plot(res$beta_shell[2,], type='l')
180 | plot(res$sigma_shell[1,], type='l')
181 | res$prop_sigma
182 | # par(mfrow=c(1,1))
183 | # plot(res$alpha_shell[3,], type='l')
184 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) ))
185 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 2)
186 | par(mfrow=c(3,1))
187 | plot(res$beta_shell[1,], type='l')
188 | plot(res$beta_shell[2,], type='l')
189 | plot(res$sigma_shell[1,], type='l')
190 | res$prop_sigma
191 | # par(mfrow=c(1,1))
192 | # plot(res$alpha_shell[3,], type='l')
193 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) ))
194 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
195 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 2)
196 | par(mfrow=c(3,1))
197 | plot(res$beta_shell[1,], type='l')
198 | plot(res$beta_shell[2,], type='l')
199 | plot(res$sigma_shell[1,], type='l')
200 | res$prop_sigma
201 | # par(mfrow=c(1,1))
202 | # plot(res$alpha_shell[3,], type='l')
203 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) ))
204 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
205 | source('~/Box Sync/Coursework/Spring 2018/Longitudinal Data Analysis/Final Project/code/FunctionSourceCode.R')
206 | res<-gibbs_sampler(sim_dat[[1]], gibbs_iter=1000, tuning_end = 2)
207 | par(mfrow=c(3,1))
208 | plot(res$beta_shell[1,], type='l')
209 | plot(res$beta_shell[2,], type='l')
210 | plot(res$sigma_shell[1,], type='l')
211 | res$prop_sigma
212 | # par(mfrow=c(1,1))
213 | # plot(res$alpha_shell[3,], type='l')
214 | summary(lmer(data = sim_dat[[1]], formula = y ~ t1 + (1 | subjid) ))
215 | library(ggplot2)
216 | setwd("/Users/arman/Documents/StableMarkets/BayesianTutorials/AdapativeMCMCLogistic")
217 | ################################################################################
218 | ### 0 - Simulate Data
219 | ################################################################################
220 | set.seed(10)
221 | N<-1000
222 | d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T))
223 | d$age_1<-ifelse(d$age_group==1,1,0)
224 | d$age_2<-ifelse(d$age_group==2,1,0)
225 | d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2))
226 | d$y<-rbinom(n = N, size = 1,
227 | prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt))
228 | X<-as.matrix(cbind(1,d[,2:4])) # model matrix
229 | Y<-matrix(d$y, ncol=1) # outcome vector
230 | p<-ncol(X)
231 | #Author: Arman Oganisian
232 | library(LaplacesDemon)
233 | library(invgamma)
234 | library(MASS)
235 | library(tidyr)
236 | library(dplyr)
237 | library(ggplot2)
238 | setwd("/Users/arman/Documents/StableMarkets/BayesianTutorials/AdapativeMCMCLogistic")
239 | ################################################################################
240 | ### 0 - Simulate Data
241 | ################################################################################
242 | set.seed(10)
243 | N<-1000
244 | d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T))
245 | d$age_1<-ifelse(d$age_group==1,1,0)
246 | d$age_2<-ifelse(d$age_group==2,1,0)
247 | d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2))
248 | d$y<-rbinom(n = N, size = 1,
249 | prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt))
250 | X<-as.matrix(cbind(1,d[,2:4])) # model matrix
251 | Y<-matrix(d$y, ncol=1) # outcome vector
252 | p<-ncol(X)
253 | log_posterior<-function(beta, X, Y){
254 | # calculate likelihood
255 | xb <- X %*% beta
256 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb))
257 | p_i <- invlogit(xb)
258 | lik <- sum(dbern(Y, p_i, log = T))
259 | # calculate prior
260 | pr <- dnorm(x = beta, mean = 0, sd = 1000, log = T)
261 | log_cond_post <- lik + pr
262 | return(log_cond_post)
263 | }
264 | log_posterior(c(1,1,1,1), X, Y)
265 | log_posterior<-function(beta, X, Y){
266 | # calculate likelihood
267 | xb <- X %*% beta
268 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb))
269 | p_i <- invlogit(xb)
270 | lik <- sum(dbern(Y, p_i, log = T))
271 | # calculate prior
272 | pr <- dmvn(x = beta, mu = 0, sigma = 1000*diag(p), log = T)
273 | log_cond_post <- lik + pr
274 | return(log_cond_post)
275 | }
276 | log_posterior(c(1,1,1,1), X, Y)
277 | log_posterior<-function(beta, X, Y){
278 | # calculate likelihood
279 | xb <- X %*% beta
280 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb))
281 | p_i <- invlogit(xb)
282 | lik <- sum(dbern(Y, p_i, log = T))
283 | # calculate prior
284 | pr <- dmvn(x = beta, mu = 0, Sigma = 1000*diag(p), log = T)
285 | log_cond_post <- lik + pr
286 | return(log_cond_post)
287 | }
288 | log_posterior(c(1,1,1,1), X, Y)
289 | Rcpp::sourceCpp('samples.cpp')
290 | log_post(c(1,1,1,1), Y, X)
291 | log_posterior<-function(beta, X, Y){
292 | # calculate likelihood
293 | xb <- X %*% beta
294 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb))
295 | p_i <- invlogit(xb)
296 | lik <- sum(dbern(Y, p_i, log = T))
297 | # calculate prior
298 | pr <- dmvn(x = beta, mu = 0, Sigma = (1000^2)*diag(p), log = T)
299 | log_cond_post <- lik + pr
300 | return(log_cond_post)
301 | }
302 | log_posterior(c(1,1,1,1), X, Y)
303 | log_posterior<-function(beta, X, Y){
304 | # calculate likelihood
305 | xb <- X %*% beta
306 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb))
307 | p_i <- invlogit(xb)
308 | lik <- sum(dbern(Y, p_i, log = T))
309 | # calculate prior
310 | pr <- dmvn(x = beta, mu = rep(0,p), Sigma = (1000^2)*diag(p), log = T)
311 | log_cond_post <- lik + pr
312 | return(log_cond_post)
313 | }
314 | log_posterior(c(1,1,1,1), X, Y)
315 | log_post(c(1,1,1,1), Y, X)
316 | Rcpp::sourceCpp('samples.cpp')
317 | Rcpp::sourceCpp('samples.cpp')
318 | log_posterior(c(1,1,1,1), X, Y)
319 | log_post(c(1,1,1,1), Y, X)
320 | Rcpp::sourceCpp('samples.cpp')
321 | log_post(c(1,1,1,1), Y, X)
322 | log_posterior(c(1,1,1,1), X, Y)
323 | sample_mh_cpp <-function(X, Y, iter, jump_v){
324 | # create shells
325 | p <- ncol(X)
326 | beta_shell <- matrix(NA, nrow = iter, ncol = p)
327 | accept_shell <- numeric(length = iter)
328 | # starting values
329 | beta_shell[1,] <- rep(10, p)
330 | for(i in 2:iter){
331 | beta_0 <- beta_shell[i-1, ]
332 | # draw from proposal distribution
333 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p))
334 | # calculate ratio of conditional posterior densities
335 | r_num <- log_post(beta_c, X, Y )
336 | r_denom <- log_post(beta_0, X, Y )
337 | # calculate acceptance probability
338 | r <- exp(r_num - r_denom)
339 | rmin<-min(r,1)
340 | # accept or reject proposal
341 | if( rbinom(1,1,rmin) == 1 ){
342 | beta_shell[i, ] <- beta_c
343 | }else{
344 | beta_shell[i, ] <- beta_0
345 | }
346 | accept_shell[i] <- rmin
347 | }
348 | colnames(beta_shell) <- colnames(X)
349 | colnames(beta_shell)[1] <- 'intercept'
350 | return(list(beta_shell, accept_shell) )
351 | }
352 | library(rbenchmark)
353 | library(rbenchmark)
354 | benchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .05),
355 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .05) )
356 | log_posterior<-function(beta, X, Y){
357 | # calculate likelihood
358 | xb <- X %*% beta
359 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb))
360 | p_i <- invlogit(xb)
361 | lik <- sum(dbern(Y, p_i, log = T))
362 | # calculate prior
363 | pr <- dmvn(x = beta, mu = rep(0,p), Sigma = (1000^2)*diag(p), log = T)
364 | log_cond_post <- lik + pr
365 | return(log_cond_post)
366 | }
367 | # use Metropolis Hastings algorithm to sample from cond. post. of beta
368 | sample_mh<-function(X, Y, iter, jump_v){
369 | # create shells
370 | p <- ncol(X)
371 | beta_shell <- matrix(NA, nrow = iter, ncol = p)
372 | accept_shell <- numeric(length = iter)
373 | # starting values
374 | beta_shell[1,] <- rep(10, p)
375 | for(i in 2:iter){
376 | beta_0 <- beta_shell[i-1, ]
377 | # draw from proposal distribution
378 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p))
379 | # calculate ratio of conditional posterior densities
380 | r_num <- log_posterior(beta_c, X, Y )
381 | r_denom <- log_posterior(beta_0, X, Y )
382 | # calculate acceptance probability
383 | r <- exp(r_num - r_denom)
384 | rmin<-min(r,1)
385 | # accept or reject proposal
386 | if( rbinom(1,1,rmin) == 1 ){
387 | beta_shell[i, ] <- beta_c
388 | }else{
389 | beta_shell[i, ] <- beta_0
390 | }
391 | accept_shell[i] <- rmin
392 | }
393 | colnames(beta_shell) <- colnames(X)
394 | colnames(beta_shell)[1] <- 'intercept'
395 | return(list(beta_shell, accept_shell) )
396 | }
397 | # Adaptive Metropolis Hastings
398 | sample_amh <- function(X, Y, iter, jump_v,
399 | ad_start, ad_stop, ad_int, ad_period){
400 | # create shells
401 | p <- ncol(X)
402 | beta_shell <- matrix(NA, nrow = iter, ncol = p)
403 | accept_shell <- numeric(length = iter)
404 | # starting values
405 | beta_shell[1,] <- rep(10, p)
406 | s <- 1
407 | for(i in 2:iter){
408 | beta_0 <- beta_shell[i-1, ]
409 | if(i >= ad_start & i%%ad_int==0 & i <= ad_stop ){
410 | accept_rate <- mean(accept_shell[  (i - ad_period):i ])
411 | s <- s * (accept_rate/.234) # optimal acceptance rate
412 | jump_v <- s * cov(beta_shell[ (i - ad_period):(i-1) , ])
413 | }
414 | # draw from proposal distribution
415 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v )
416 | # calculate ratio of conditional posterior densities
417 | r_num <- log_posterior(beta_c, X, Y )
418 | r_denom <- log_posterior(beta_0, X, Y )
419 | # calculate acceptance probability
420 | r <- exp(r_num - r_denom)
421 | rmin<-min(r,1)
422 | # accept or reject proposal
423 | if( rbinom(1,1,rmin) == 1 ){
424 | beta_shell[i, ] <- beta_c
425 | }else{
426 | beta_shell[i, ] <- beta_0
427 | }
428 | accept_shell[i] <- rmin
429 | }
430 | colnames(beta_shell) <- colnames(X)
431 | colnames(beta_shell)[1] <- 'intercept'
432 | return(list(beta_shell, accept_shell) )
433 | }
434 | sample_mh_cpp <-function(X, Y, iter, jump_v){
435 | # create shells
436 | p <- ncol(X)
437 | beta_shell <- matrix(NA, nrow = iter, ncol = p)
438 | accept_shell <- numeric(length = iter)
439 | # starting values
440 | beta_shell[1,] <- rep(10, p)
441 | for(i in 2:iter){
442 | beta_0 <- beta_shell[i-1, ]
443 | # draw from proposal distribution
444 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p))
445 | # calculate ratio of conditional posterior densities
446 | r_num <- log_post(beta_c, X, Y )
447 | r_denom <- log_post(beta_0, X, Y )
448 | # calculate acceptance probability
449 | r <- exp(r_num - r_denom)
450 | rmin<-min(r,1)
451 | # accept or reject proposal
452 | if( rbinom(1,1,rmin) == 1 ){
453 | beta_shell[i, ] <- beta_c
454 | }else{
455 | beta_shell[i, ] <- beta_0
456 | }
457 | accept_shell[i] <- rmin
458 | }
459 | colnames(beta_shell) <- colnames(X)
460 | colnames(beta_shell)[1] <- 'intercept'
461 | return(list(beta_shell, accept_shell) )
462 | }
463 | benchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .05),
464 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .05) )
465 | ################################################################################
466 | ### 2 - Run Sampler
467 | ################################################################################
468 | iter <- 5000
469 | p <- ncol(X)
470 | benchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .05),
471 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .05) )
472 | sample_mh_cpp <-function(X, Y, iter, jump_v){
473 | # create shells
474 | p <- ncol(X)
475 | beta_shell <- matrix(NA, nrow = iter, ncol = p)
476 | accept_shell <- numeric(length = iter)
477 | # starting values
478 | beta_shell[1,] <- rep(10, p)
479 | for(i in 2:iter){
480 | beta_0 <- beta_shell[i-1, ]
481 | # draw from proposal distribution
482 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p))
483 | # calculate ratio of conditional posterior densities
484 | r_num <- log_post(beta_c, Y, X )
485 | r_denom <- log_post(beta_0, Y, X )
486 | # calculate acceptance probability
487 | r <- exp(r_num - r_denom)
488 | rmin<-min(r,1)
489 | # accept or reject proposal
490 | if( rbinom(1,1,rmin) == 1 ){
491 | beta_shell[i, ] <- beta_c
492 | }else{
493 | beta_shell[i, ] <- beta_0
494 | }
495 | accept_shell[i] <- rmin
496 | }
497 | colnames(beta_shell) <- colnames(X)
498 | colnames(beta_shell)[1] <- 'intercept'
499 | return(list(beta_shell, accept_shell) )
500 | }
501 | benchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .05),
502 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .05) )
503 | sample_mh_cpp(X, Y, iter = iter, jump_v = .05)
504 | install.packages('microbenchmark')
505 | library(microbenchmark)
506 | microbenchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .05),
507 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .05))
508 | microbenchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .05),
509 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .05),times = 10)
510 | Rcpp::sourceCpp('samples.cpp')
511 | Rcpp::sourceCpp('samples.cpp')
512 | library(Rcpp)
513 | 


--------------------------------------------------------------------------------
/AdapativeMCMCLogistic/LogisticAdaptive.r:
--------------------------------------------------------------------------------
  1 | #Author: Arman Oganisian
  2 | library(LaplacesDemon)
  3 | library(invgamma)
  4 | library(MASS)
  5 | library(tidyr)
  6 | library(dplyr)
  7 | library(ggplot2)
  8 | library(microbenchmark)
  9 | library(Rcpp)
 10 | 
 11 | setwd("/Users/arman/Documents/StableMarkets/BayesianTutorials/AdapativeMCMCLogistic")
 12 | sourceCpp('samples.cpp')
 13 | 
 14 | ################################################################################
 15 | ### 0 - Simulate Data 
 16 | ################################################################################
 17 | set.seed(10)
 18 | N<-1000
 19 | 
 20 | d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T))
 21 | d$age_1<-ifelse(d$age_group==1,1,0)
 22 | d$age_2<-ifelse(d$age_group==2,1,0)
 23 | 
 24 | d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2))
 25 | 
 26 | d$y<-rbinom(n = N, size = 1,
 27 |             prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt))
 28 | 
 29 | X<-as.matrix(cbind(1,d[,2:4])) # model matrix
 30 | Y<-matrix(d$y, ncol=1) # outcome vector
 31 | 
 32 | p<-ncol(X)
 33 | 
 34 | log_post(c(1,1,1,1), Y, X)
 35 | 
 36 | log_posterior(c(1,1,1,1), X, Y)
 37 | 
 38 | ################################################################################
 39 | ### 1 - functions to sample from conditional posterior distributions
 40 | ################################################################################
 41 | 
 42 | # unnormalized log posterior of beta vector
 43 | log_posterior<-function(beta, X, Y){
 44 | 
 45 |     # calculate likelihood
 46 |   xb <- X %*% beta
 47 |   xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb))
 48 |   p_i <- invlogit(xb)
 49 |   
 50 |   lik <- sum(dbern(Y, p_i, log = T))
 51 |   
 52 |   # calculate prior 
 53 |   pr <- dmvn(x = beta, mu = rep(0,p), Sigma = (1000^2)*diag(p), log = T)
 54 |   
 55 |   log_cond_post <- lik + pr
 56 |   return(log_cond_post)
 57 | }
 58 | 
 59 | # use Metropolis Hastings algorithm to sample from cond. post. of beta
 60 | sample_mh<-function(X, Y, iter, jump_v){
 61 |   
 62 |   # create shells
 63 |   p <- ncol(X)
 64 |   beta_shell <- matrix(NA, nrow = iter, ncol = p)
 65 |   accept_shell <- numeric(length = iter)
 66 |   
 67 |   # starting values
 68 |   beta_shell[1,] <- rep(10, p)
 69 |   
 70 |   for(i in 2:iter){
 71 |     beta_0 <- beta_shell[i-1, ]
 72 |     
 73 |     # draw from proposal distribution
 74 |     beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p))
 75 |     
 76 |     # calculate ratio of conditional posterior densities
 77 |     r_num <- log_posterior(beta_c, X, Y )
 78 |     r_denom <- log_posterior(beta_0, X, Y )
 79 |     
 80 |     # calculate acceptance probability
 81 |     r <- exp(r_num - r_denom)
 82 |     rmin<-min(r,1)
 83 |     
 84 |     # accept or reject proposal
 85 |     if( rbinom(1,1,rmin) == 1 ){ 
 86 |       beta_shell[i, ] <- beta_c
 87 |     }else{
 88 |       beta_shell[i, ] <- beta_0
 89 |     }
 90 |     accept_shell[i] <- rmin
 91 |     
 92 |   }
 93 |   colnames(beta_shell) <- colnames(X)
 94 |   colnames(beta_shell)[1] <- 'intercept'
 95 |   return(list(beta_shell, accept_shell) )
 96 | }
 97 | 
 98 | # Adaptive Metropolis Hastings
 99 | sample_amh <- function(X, Y, iter, jump_v, 
100 |                        ad_start, ad_stop, ad_int, ad_period){
101 |   
102 |   # create shells
103 |   p <- ncol(X)
104 |   beta_shell <- matrix(NA, nrow = iter, ncol = p)
105 |   accept_shell <- numeric(length = iter)
106 |   
107 |   # starting values
108 |   beta_shell[1,] <- rep(10, p)
109 |   
110 |   s <- 1
111 |   
112 |   for(i in 2:iter){
113 |     beta_0 <- beta_shell[i-1, ]
114 |     
115 |     if(i >= ad_start & i%%ad_int==0 & i <= ad_stop ){
116 |       accept_rate <- mean(accept_shell[  (i - ad_period):i ])
117 |       s <- s * (accept_rate/.234) # optimal acceptance rate
118 |       jump_v <- s * cov(beta_shell[ (i - ad_period):(i-1) , ])
119 |     }
120 |     # draw from proposal distribution
121 |     beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v )
122 |     
123 |     # calculate ratio of conditional posterior densities
124 |     r_num <- log_posterior(beta_c, X, Y )
125 |     r_denom <- log_posterior(beta_0, X, Y )
126 |     
127 |     # calculate acceptance probability
128 |     r <- exp(r_num - r_denom)
129 |     rmin<-min(r,1)
130 |     
131 |     # accept or reject proposal
132 |     if( rbinom(1,1,rmin) == 1 ){ 
133 |       beta_shell[i, ] <- beta_c
134 |     }else{
135 |       beta_shell[i, ] <- beta_0
136 |     }
137 |     accept_shell[i] <- rmin
138 |     
139 |   }
140 |   
141 |   colnames(beta_shell) <- colnames(X)
142 |   colnames(beta_shell)[1] <- 'intercept'
143 |   return(list(beta_shell, accept_shell) )
144 | }
145 | 
146 | sample_mh_cpp <-function(X, Y, iter, jump_v){
147 |   # create shells
148 |   p <- ncol(X)
149 |   beta_shell <- matrix(NA, nrow = iter, ncol = p)
150 |   accept_shell <- numeric(length = iter)
151 |   
152 |   # starting values
153 |   beta_shell[1,] <- rep(10, p)
154 |   
155 |   for(i in 2:iter){
156 |     beta_0 <- beta_shell[i-1, ]
157 |     
158 |     # draw from proposal distribution
159 |     beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p))
160 |     
161 |     # calculate ratio of conditional posterior densities
162 |     r_num <- log_post(beta_c, Y, X )
163 |     r_denom <- log_post(beta_0, Y, X )
164 |     
165 |     # calculate acceptance probability
166 |     r <- exp(r_num - r_denom)
167 |     rmin<-min(r,1)
168 |     
169 |     # accept or reject proposal
170 |     if( rbinom(1,1,rmin) == 1 ){ 
171 |       beta_shell[i, ] <- beta_c
172 |     }else{
173 |       beta_shell[i, ] <- beta_0
174 |     }
175 |     accept_shell[i] <- rmin
176 |     
177 |   }
178 |   colnames(beta_shell) <- colnames(X)
179 |   colnames(beta_shell)[1] <- 'intercept'
180 |   return(list(beta_shell, accept_shell) )
181 | }
182 | 
183 | 
184 | ################################################################################
185 | ### 2 - Run Sampler
186 | ################################################################################
187 | iter <- 5000
188 | p <- ncol(X)
189 | 
190 | res_mh <- sample_mh(X, Y, iter = iter, jump_v = .05)
191 | samples_mh <- res_mh[[1]]
192 | 
193 | samples_mh_long <- data.frame(samples_mh) %>% 
194 |   mutate(iter = 1:iter) %>%
195 |   gather( Beta, Draw, intercept:trt) %>%
196 |   mutate(Sampler='MH')
197 | 
198 | res_amh <- sample_amh(X, Y, iter = iter, jump_v = diag(p), 
199 |                       ad_start = 102, ad_stop = 500, 
200 |                       ad_int = 100, ad_period = 100)
201 | samples_amh <- res_amh[[1]]
202 | 
203 | samples_amh_long <- data.frame(samples_amh) %>% 
204 |   mutate(iter = 1:iter) %>%
205 |   gather( Beta, Draw, intercept:trt) %>%
206 |   mutate(Sampler='Adaptive MH')
207 | 
208 | all_samples <- bind_rows(samples_mh_long, samples_amh_long) %>%
209 |   mutate(Beta = as.factor(Beta), Sampler = as.factor(Sampler)) %>%
210 |   filter(iter>1000)
211 | 
212 | ################################################################################
213 | ### 3 - Benchmarks
214 | ################################################################################
215 | microbenchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .05),
216 |                Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .05),
217 |                times = 10)
218 | 
219 | ################################################################################
220 | ### 4 - Plot Results
221 | ################################################################################
222 | 
223 | ggplot(all_samples, aes(x=iter, y = Draw, col=Sampler)) +
224 |   geom_line() +
225 |   facet_grid(Beta~.)
226 | 
227 | par(mfrow=c(1,1))
228 | plot(cumsum(res_mh[[2]])/1:iter, type='l')
229 | lines(cumsum(res_amh[[2]])/1:iter, col='red')
230 | 
231 | 
232 | par(mfrow=c(2,2))
233 | plot(samples_mh[,1],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'),
234 |      main='Intercept', col='gray')
235 | lines(samples_amh[,1], col='black')
236 | abline(h=-1,col='red')
237 | plot(samples_mh[,2],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'),
238 |      main='Age1', col='gray')
239 | lines(samples_amh[,2], col='black')
240 | abline(h=.7,col='red')
241 | plot(samples_mh[,3],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'),
242 |      main='Age2', col='gray')
243 | lines(samples_amh[,3], col='black')
244 | abline(h=1.1,col='red')
245 | plot(samples_mh[,4],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'),
246 |      main='Treatment', col='gray')
247 | lines(samples_amh[,4], col='black')
248 | abline(h=1.1,col='red')
249 | 
250 | 


--------------------------------------------------------------------------------
/AdapativeMCMCLogistic/progressbar.R:
--------------------------------------------------------------------------------
 1 | 
 2 | iter <- 1000000
 3 | 
 4 | pb<-txtProgressBar(min = 1, max = iter, style = 3)
 5 | t<-0
 6 | for(i in 1:iter){
 7 |   t<-t+i
 8 |   setTxtProgressBar(pb,value = i)
 9 | }
10 | 
11 | 
12 | 
13 | draws <- mvrnorm(n = 10, mu = rep(0,5), Sigma = diag(5))
14 | cov(draws)
15 | 


--------------------------------------------------------------------------------
/BackfittingMCMC/BayesianResults.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/BackfittingMCMC/BayesianResults.png


--------------------------------------------------------------------------------
/BackfittingMCMC/MLR_backfittingMCMC.r:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | ###### 0 - Packages and Simulate Data
  3 | ################################################################################
  4 | library(mvtnorm)
  5 | library(invgamma)
  6 | library(ggplot2)
  7 | library(dplyr)
  8 | library(tidyr)
  9 | library(xtable)
 10 | set.seed(200)
 11 | 
 12 | n<-100 # number of observation
 13 | # simulate model matrix
 14 | xvec <- rnorm(n, 0, 1.5)
 15 | x<-cbind(1, xvec, xvec^2,xvec^3)
 16 | 
 17 | # true beta coefficients
 18 | tb<-c(0, 50, -20, 10)
 19 | 
 20 | # true phi
 21 | tphi<-10000
 22 | I<-diag(1,n,n) # identity matrix used for covariance matrix
 23 | 
 24 | # simulate outcome for regression 
 25 | y<-t(rmvnorm(1, x%*%tb, tphi*I))
 26 | plot(xvec, y)
 27 | # simulate many outcomes...used later for asymptotic evaluations
 28 | y_list<-replicate(1000, t(rmvnorm(1, x%*%tb, tphi*I)),simplify = FALSE)
 29 | 
 30 | ################################################################################
 31 | ###### 1 - Run Blocked Gibbs Sampler
 32 | ################################################################################
 33 | 
 34 | # function for blocked gibbs sampler
 35 | backfit_mcmc<-function(y, x, iter, burnin, trim){
 36 |   # initialize gibbs
 37 |   xprimex_inv<-solve(t(x)%*%x) # calculate once for repeated use in sampler
 38 |   phi<-numeric(iter) # shell for phi
 39 |   p <- ncol(x) # number of beta parameters
 40 |   b<-matrix(nrow=iter, ncol = p) # shell for betas
 41 |   pred_y <- matrix(nrow=length(y), ncol=iter)
 42 |   
 43 |   phi[1]<-6 # initial phi value to start sampler
 44 |   b[1,] <- rnorm(p) # random initial values for betas
 45 |   
 46 |   # phi hyperparameters
 47 |   a<-.5
 48 |   g<-10000
 49 |   
 50 |   # beta hyperparameters
 51 |   # mu_0 <- 0
 52 |   # phi_0 <- 1000
 53 |   
 54 |   # gibbs sampling
 55 |   for(i in 2:iter ){
 56 |     
 57 |     for(par in 1:p){
 58 |       # compute residuals after applying other parameters
 59 |       x_sub <- x[,-par]
 60 |       b_sub <- b[i, -par]
 61 |       b_sub_prev <- b[i-1, -par]
 62 |       b_sub[is.na(b_sub)] <- b_sub_prev[is.na(b_sub)]
 63 |       
 64 |       r <- y - x_sub %*% t(t(b_sub))
 65 | 
 66 |       # mean for this paramater
 67 |       x_j <- x[,par, drop=F]
 68 |       sum_xj_sq <- sum(x_j^2)
 69 |       sum_r_xj <- sum(as.vector(r)*as.vector(x_j))
 70 | 
 71 |       b[i, par]<-rnorm(n = 1, mean = sum_r_xj/sum_xj_sq , sd = sqrt( phi[i-1]/sum_xj_sq  ) )
 72 |     }
 73 | 
 74 |     
 75 |     phi[i]<-rinvgamma(n = 1, 
 76 |                       shape = (n/2 + a), 
 77 |                       rate = .5*( t((y - x%*%t(t(b[i,])) ))%*%(y - x%*%t(t(b[i,])) ) ) + g)
 78 |     
 79 |     pred_y[,i] <- rmvnorm(1, x%*% t(t(b[i,])), phi[i]*I ) # posterior predictive draw.
 80 |   }
 81 |   
 82 |   # apply burnin and trimming  
 83 |   keep_draws<-seq(burnin,iter,trim)
 84 |   phi<-phi[keep_draws]
 85 |   b<-b[keep_draws,]
 86 |   
 87 |   # format and output
 88 |   joint_post<-data.frame(b=b,phi=phi)
 89 |   colnames(joint_post)[1:(ncol(x))]<-paste0('B',0:(ncol(x)-1) )
 90 |   
 91 |   joint_post_long<-gather(joint_post,keep_draws) %>%
 92 |     rename(param=keep_draws, draw=value) %>%
 93 |     mutate(iter=rep(keep_draws,ncol(joint_post)))
 94 |   
 95 |   return(list(joint_post_long, pred_y))
 96 | }
 97 | 
 98 | # run gibbs sampler with specified parameters
 99 | post_dist<-backfit_mcmc(y = y, x = x, iter = 10000, burnin = 5000, trim = 1)
100 | 
101 | 
102 | tt <- post_dist[[2]] # extract posterior predictive
103 | png(filename = 'OriginalData.png')
104 | plot(xvec, y, 
105 |      xlab = 'x', ylab='y', 
106 |      main='Simulated (x,y) Data', 
107 |      ylim=c(-2500,1500), pch=20)
108 | dev.off()
109 | 
110 | png(filename = 'BayesianResults.png')
111 | plot(xvec, y, 
112 |      xlab = 'x', ylab='y', 
113 |      main='Posterior Mean Function fit using Backfitting MCMC', 
114 |      ylim=c(-2500,1500), pch=20)
115 | for(i in 5000:10000) lines(sort(xvec), sort(tt[,i]), col='gray')
116 | lines(sort(xvec), sort(rowMeans(tt[,9000:10000]) ), col='red', lwd=3)
117 | points(xvec, y, pch=20 )
118 | legend('bottomright', 
119 |        legend = c('Posterior Predictive Mean', '5000 posterior predictive draws','Data'),
120 |        col = c('red','gray','black'), pch = c(NA, 22, 20), bty='n', 
121 |        lty=c(1, NA, NA), lwd=c(1,5, NA)  )
122 | dev.off()


--------------------------------------------------------------------------------
/BackfittingMCMC/OriginalData.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/BackfittingMCMC/OriginalData.png


--------------------------------------------------------------------------------
/BayesianProbit/BayesProbitSim.r:
--------------------------------------------------------------------------------
 1 | library(LaplacesDemon)
 2 | library(truncnorm)
 3 | library(mvtnorm)
 4 | 
 5 | set.seed(1)
 6 | 
 7 | n <- 2000
 8 | 
 9 | d_list <- replicate(n = 100, expr = sim_dat(n=n), simplify = F)
10 | 
11 | d <- d_list[[1]]
12 | 
13 | 
14 | gibbs_iter <- 200000
15 | burnin <- 10000
16 | trimm <- 1
17 | p <- 3
18 | 
19 | beta <- matrix(NA, nrow = p, ncol = gibbs_iter)
20 | xm <- cbind(1, d$x1, d$x2)
21 | 
22 | z <- rnorm(n = n, mean = 0, sd = 1)
23 | 
24 | for(i in 1:gibbs_iter){
25 |   
26 |   beta[, i] <- rcond_post_beta(nparams = p, xm=xm, y = z, psi = 1, 
27 |                                      beta_prior_mean = c(0,0,0), 
28 |                                      beta_prior_var = c(10,10,10) )
29 |   
30 |   z[d$y==0] <- rtruncnorm(n = sum(d$y==0),a = -Inf, b = 0, mean = xm[d$y==0,] %*% beta[,i], sd = 1 )
31 |   z[d$y==1] <- rtruncnorm(n = sum(d$y==1),a = 0, b = Inf, mean = xm[d$y==1,] %*% beta[,i], sd = 1 )
32 |   
33 |   
34 | }
35 | 
36 | freqres <- summary(glm(data=d, formula = y ~ x1 + x2, family = binomial(link = 'probit')))
37 | freqres$coefficients[,'Estimate']
38 | 
39 | post_draw_ind <- seq(burnin, gibbs_iter, trimm)
40 | par(mfrow=c(3,1))
41 | plot(beta[1,post_draw_ind], type='l')
42 | abline(h=freqres$coefficients[1,'Estimate'], col='red', lwd=2)
43 | 
44 | plot(beta[2,post_draw_ind], type='l')
45 | abline(h=freqres$coefficients[2,'Estimate'], col='red', lwd=2)
46 | 
47 | plot(beta[3,post_draw_ind], type='l')
48 | abline(h=freqres$coefficients[3,'Estimate'], col='red', lwd=2)
49 | 
50 | 


--------------------------------------------------------------------------------
/BayesianProbit/FunctionSourceCode.r:
--------------------------------------------------------------------------------
 1 | rcond_post_beta <- function(nparams, y, xm, psi, 
 2 |                             beta_prior_mean, beta_prior_var){
 3 |   
 4 |   mu_beta <- beta_prior_mean
 5 |   v_beta <- diag(beta_prior_var)
 6 |   v_beta_inv <- diag((1/beta_prior_var))
 7 |   
 8 |   xtx <- t(xm)%*%xm
 9 |     
10 |   post_cov <- solve( v_beta_inv + (1/psi)*xtx)
11 |   post_mean <- post_cov %*% (v_beta_inv %*% mu_beta + (1/psi)*t(xm)%*%y )
12 |     
13 |   draw <- rmvnorm(n = 1, mean = post_mean, sigma = post_cov)
14 |   return(draw)  
15 | }
16 | 
17 | sim_dat <- function(n=10000){
18 |   x1 <- rbinom(n = n, size = 1, prob = .5)
19 |   x2 <- rnorm(n = n, mean = 0, sd = 10)
20 |   
21 |   p <- invlogit(1 + -2*x1 + 1*x2)
22 |   
23 |   y <- rbinom(n = n, size = 1, prob = p)
24 |   
25 |   d <- data.frame(y=y, x1=x1, x2=x2)
26 |   return(d)
27 | }
28 | 


--------------------------------------------------------------------------------
/BayesianSurvival/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/BayesianSurvival/.DS_Store


--------------------------------------------------------------------------------
/BayesianSurvival/FunctionSourceCode.R:
--------------------------------------------------------------------------------
 1 | log_post_beta <- function(beta, log_alpha, X, survt){ # shape
 2 |   alpha <- exp(log_alpha)
 3 |   mu <-  X %*% beta
 4 |   lambda <- exp(-1*mu*alpha)
 5 | 
 6 |   lik <- sum(dweibull( survt, shape = alpha, scale = lambda, log=T))
 7 |   pr <- sum(dnorm(x = beta, mean = 0, sd = 100, log = T))
 8 |   return(lik + pr)
 9 | }
10 | 
11 | log_post_alpha <- function(log_alpha, beta, X, survt){ # scale
12 |   alpha <- exp(log_alpha)
13 |   
14 |   mu <-  X %*% beta
15 |   lambda <- exp(-1*mu*alpha)
16 |     
17 |   lik <- sum(dweibull( survt, shape = alpha, scale = lambda, log=T))
18 |   pr <- dexp(x = alpha, rate = 1, log = T)
19 |   return(lik + pr)
20 | }
21 | 
22 | 
23 | metrop_hastings<-function(x_0, iter=1, log_post_density,
24 |                           proposal_dist = function(x, prop_sigma){ 
25 |                             MASS::mvrnorm(1, mu = x, Sigma = prop_sigma )
26 |                           }, 
27 |                           lower=-Inf, upper=Inf, prop_sigma,
28 |                           ... ){
29 |   for(i in 1:iter){
30 |     # draw from proposal distribution
31 |     x_star <- proposal_dist(x_0, prop_sigma) 
32 |     
33 |     # calculate ratio of conditional posterior densities
34 |     r_num <- do.call(log_post_density, c(list(x_star), list(...)) )
35 |     r_denom <- do.call(log_post_density, c(list(x_0), list(...)) )
36 |     r <- exp(r_num - r_denom)
37 |     rmin<-min(r,1)
38 |     if(is.na(rmin)) browser()
39 |     # accept / reject proposal
40 |     if(rbinom(1,1,rmin)==1){ 
41 |       x_0<-x_star
42 |     }
43 |   }
44 |   
45 |   res<-list(x_0 = x_0, accept_prob = rmin )
46 |   return(res)
47 | }


--------------------------------------------------------------------------------
/BayesianSurvival/StanWeibullAFT.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Weibull AFT Model in Stan"
  3 | author: "Arman Oganisian"
  4 | date: "3/9/2019"
  5 | output: html_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(echo = TRUE)
 10 | library(rstan)
 11 | library(survival)
 12 | ```
 13 | 
 14 | ```{r simulate_data, }
 15 | set.seed(1)
 16 | 
 17 | n <- 1000
 18 | A <- rbinom(n, 1, .5)
 19 | 
 20 | X <- model.matrix(~ A)
 21 | 
 22 | true_beta <- (1/2)*matrix(c(-1/3, 2), ncol=1)
 23 | true_mu <- X %*% true_beta
 24 | 
 25 | true_sigma <- 1
 26 | 
 27 | true_alpha <- 1/true_sigma
 28 | true_lambda <- exp(-1*true_mu*true_alpha)
 29 | 
 30 | # simulate censoring and survival times
 31 | survt = rweibull(n, shape=true_alpha, scale = true_lambda) 
 32 | cent = rweibull(n, shape=true_alpha, scale = true_lambda)
 33 | 
 34 | ## observed data:
 35 | #censoring indicator
 36 | delta <- cent < survt
 37 | survt[delta==1] <- cent[delta==1] # censor survival time.
 38 | 
 39 | # count number of missing/censored survival times
 40 | n_miss <- sum(delta)
 41 | 
 42 | d_list <- list(N_m = n_miss, N_o = n - n_miss, P=2, # number of betas
 43 |                # data for censored subjects
 44 |                y_m=survt[delta==1], X_m=X[delta==1,],
 45 |                # data for uncensored subjects
 46 |                y_o=survt[delta==0], X_o=X[delta==0,])
 47 | ```
 48 | 
 49 | 
 50 | ```{stan specify_stan_mod, output.var="weibull_mod"}
 51 | data {
 52 |   int<lower=0> P; // number of beta parameters
 53 |   
 54 |   // data for censored subjects
 55 |   int<lower=0> N_m;
 56 |   matrix[N_m,P] X_m;
 57 |   vector[N_m] y_m;
 58 |   
 59 |   // data for observed subjects
 60 |   int<lower=0> N_o;
 61 |   matrix[N_o,P] X_o;
 62 |   real y_o[N_o];
 63 | }
 64 | 
 65 | parameters {
 66 |   vector[P] beta;                
 67 |   real<lower=0> alpha; // Weibull Shape      
 68 | }
 69 | 
 70 | transformed parameters{
 71 |   // model Weibull rate as function of covariates
 72 |   vector[N_m] lambda_m;
 73 |   vector[N_o] lambda_o;
 74 |   
 75 |   // standard weibull AFT re-parameterization
 76 |   lambda_m = exp((X_m*beta)*alpha);
 77 |   lambda_o = exp((X_o*beta)*alpha);
 78 | }
 79 | 
 80 | model {
 81 |   beta ~ normal(0, 100);
 82 |   alpha ~ exponential(1);
 83 |   
 84 |   // evaluate likelihood for censored and uncensored subjects
 85 |   target += weibull_lpdf(y_o | alpha, lambda_o);
 86 |   target += weibull_lccdf(y_m | alpha, lambda_m);
 87 | }
 88 | 
 89 | 
 90 | // generate posterior quantities of interest
 91 | generated quantities{
 92 |   vector[1000] post_pred_trt;
 93 |   vector[1000] post_pred_pbo;
 94 |   real lambda_trt; 
 95 |   real lambda_pbo; 
 96 |   real hazard_ratio;
 97 |   
 98 |   // generate hazard ratio
 99 |   lambda_trt = exp((beta[1] + beta[2])*alpha ) ;
100 |   lambda_pbo = exp((beta[1])*alpha ) ;
101 |   
102 |   hazard_ratio = exp(beta[2]*alpha ) ;
103 |   
104 |   // generate survival times (for plotting survival curves)
105 |   for(i in 1:1000){
106 |     post_pred_trt[i] = weibull_rng(alpha,  lambda_trt);
107 |     post_pred_pbo[i] = weibull_rng(alpha,  lambda_pbo);
108 |   }
109 | }
110 | 
111 | ```
112 | 
113 | ```{r run_stan_mod, }
114 | 
115 | weibull_fit <- sampling(weibull_mod,
116 |                 data = d_list, 
117 |                 chains = 1, iter=20000, warmup=15000,
118 |                 pars= c('hazard_ratio','post_pred_trt','post_pred_pbo'))
119 | 
120 | post_draws<-extract(weibull_fit)
121 | ```
122 | 
123 | ```{r plot_hazard_ratio, }
124 | hist(post_draws$hazard_ratio,
125 |      xlab='Hazard Ratio', main='Hazard Ratio Posterior Distribution')
126 | abline(v=.367, col='red')
127 | 
128 | mean(post_draws$hazard_ratio)
129 | quantile(post_draws$hazard_ratio, probs = c(.025, .975))
130 | ```
131 | 
132 | 
133 | ```{r plot_survival,}
134 | library(survival)
135 | 
136 | plot(survfit(Surv(survt, 1-delta) ~ A ), col=c('black','blue'),
137 |      xlab='Time',ylab='Survival Probability')
138 | 
139 | for(i in 1:5000){
140 |   trt_ecdf <- ecdf(post_draws$post_pred_trt[i,])
141 |   curve(1 - trt_ecdf(x), from = 0, to=4, add=T, col='gray')
142 |   
143 |   pbo_ecdf <- ecdf(post_draws$post_pred_pbo[i,])
144 |   curve(1 - pbo_ecdf(x), from = 0, to=4, add=T, col='lightblue')
145 | }
146 | 
147 | lines(survfit(Surv(survt, 1-delta) ~ A ), col=c('black','blue'), add=T )
148 | 
149 | legend('topright', 
150 |        legend = c('KM Curve and Intervals (TRT)',
151 |                   'Posterior Survival Draws (TRT)',
152 |                   'KM Curve and Intervals (PBO)',
153 |                   'Posterior Survival Draws (PBO)'),
154 |        col=c('black','gray','blue','lightblue'), 
155 |        lty=c(1,0,1,0), pch=c(NA,15,NA,15), bty='n')
156 | ```
157 | 
158 | 


--------------------------------------------------------------------------------
/BayesianSurvival/weibull_nocovar.R:
--------------------------------------------------------------------------------
  1 | # sources:
  2 | # https://cran.r-project.org/web/packages/SurvRegCensCov/vignettes/weibull.pdf
  3 | 
  4 | library(survival)
  5 | library(truncdist)
  6 | 
  7 | setwd("/Users/aoganisi/Box Sync/Research/Analyses/BNP_CE/code/")
  8 | source("FunctionSourceCode.r")
  9 | ################################################################################
 10 | ### 0 - Simulate Data
 11 | ################################################################################
 12 | set.seed(1)
 13 | 
 14 | n <- 1000
 15 | A <- rbinom(n, 1, .5)
 16 | 
 17 | X <- model.matrix(~ A)
 18 | 
 19 | true_beta <- (1/2)*matrix(c(-1/3, 2), ncol=1)
 20 | true_mu <- X %*% true_beta
 21 | 
 22 | true_sigma <- 1
 23 | 
 24 | true_alpha <- 1/true_sigma
 25 | true_lambda <- exp(-1*true_mu*true_alpha)
 26 | 
 27 | hist(rweibull(n, shape=true_alpha, scale = true_lambda), breaks=100)
 28 | 
 29 | # simulate censoring and survival times
 30 | survt = rweibull(n, shape=true_alpha, scale = true_lambda) 
 31 | cent = rweibull(n, shape=true_alpha, scale = true_lambda)
 32 | 
 33 | ## observed data:
 34 | #censoring indicator
 35 | delta <- cent < survt
 36 | survt[delta==1] <- cent[delta==1] # censor survival time.
 37 | 
 38 | # survt_all will combine observed and imputed survival times.
 39 | survt_all <- survt
 40 | 
 41 | # count number of missing/censored survival times
 42 | n_miss <- sum(delta)
 43 | row_miss <- c(1:n)[delta] # index for which rows are censored
 44 | 
 45 | ################################################################################
 46 | ### 1 - Run Augmented Sampler Accounting for Censoring 
 47 | ################################################################################
 48 | iter <- 10000 # number of gibbs iterations
 49 | burnin <- 9000 # burn-in iterations
 50 | 
 51 | # shells for storing parameters
 52 | hazard_ratio <- numeric(iter - burnin)
 53 | 
 54 | # initial values
 55 | beta_shell <- matrix(c(0,0), ncol=1)
 56 | lalpha_shell <- c(0)
 57 | 
 58 | prop_covar <- diag(c(.01,.01))
 59 | 
 60 | # plot stratified Kaplan-Meier
 61 | par(mfrow=c(1,1))
 62 | plot(survfit(Surv(survt, 1-delta) ~ A),conf.int = F, col=c('blue','red'),
 63 |              xlab=c('Time'),ylab='Survival Probability',
 64 |              main = 'Data augmentation with all subjects')
 65 | 
 66 | for(i in 2:iter){
 67 |   ## sample from posterior of parameters, 
 68 |   ## conditional on observed and missing survival times
 69 |   
 70 |   # metrop_hastings() is a custom function for generating a draw 
 71 |   # from conditional posterior of beta: log_post_beta
 72 |   beta_shell <- metrop_hastings(x_0 = beta_shell, 
 73 |                                     iter = 1,
 74 |                                     log_post_density = log_post_beta,
 75 |                                     prop_sigma = prop_covar, 
 76 |                                     X=X, survt=survt_all, 
 77 |                                     log_alpha=lalpha_shell )$x_0
 78 |   
 79 |   # sample from conditional posterior of alpha: log_post_alpha
 80 |   lalpha_shell <- metrop_hastings(x_0 = lalpha_shell, 
 81 |                                      iter = 1,
 82 |                                      log_post_density = log_post_alpha,
 83 |                                      prop_sigma = matrix(.001), 
 84 |                                      X=X, survt=survt_all, 
 85 |                                      beta=beta_shell)$x_0
 86 |   
 87 |   ## sample from conditional posterior of missing survival times
 88 |   mu_curr <-  X %*% beta_shell
 89 |   alpha_curr <- exp(lalpha_shell)
 90 |   
 91 |   for(m in row_miss){
 92 |     lambda_curr <- exp(-1*mu_curr[m]*alpha_curr)
 93 |     
 94 |     survt_all[m] <- rtrunc(1, spec = 'weibull', 
 95 |                            a = survt[m], 
 96 |                            shape =  alpha_curr, 
 97 |                            scale =  lambda_curr)
 98 |   }
 99 |   
100 |   if(i>burnin){
101 |     # plot 500 posterior survival curve draws for treated and placebo
102 |     mu_trt <-  sum(beta_shell)
103 |     mu_pbo <-  beta_shell[1]
104 |     
105 |     post_draw <- rweibull(n, shape = alpha_curr, scale = exp(-1*mu_trt*alpha_curr)  )
106 |     post_ecdf <- ecdf(post_draw)
107 |     curve(1-post_ecdf(x), add=T, from=0, to=15, col='lightblue')
108 |     
109 |     post_draw <- rweibull(n, shape = alpha_curr, scale = exp(-1*mu_pbo*alpha_curr)  )
110 |     post_ecdf <- ecdf(post_draw)
111 |     curve(1-post_ecdf(x), add=T, from=0, to=15, col='lightgray')
112 |     
113 |     # store hazard ratio
114 |     hazard_ratio[i-burnin] <- exp(-beta_shell[2]*alpha_curr)
115 |   }
116 |   
117 | }
118 | 
119 | # overlay KM curve and plot legend
120 | lines(survfit(Surv(survt, 1-delta) ~ A),conf.int = T, col=c('black','blue'))
121 | legend('topright', 
122 |        legend = c('KM Curve and Intervals (TRT)',
123 |                   'Posterior Survival Draws (TRT)',
124 |                   'KM Curve and Intervals (PBO)',
125 |                   'Posterior Survival Draws (PBO)'),
126 |        col=c('black','gray','blue','lightblue'), 
127 |        lty=c(1,0,1,0), pch=c(NA,15,NA,15), bty='n')
128 | 
129 | plot(hazard_ratio, type='l')
130 | abline(h=exp(-true_beta[2]*true_alpha), col='red')
131 | 
132 | hist(hazard_ratio)
133 | abline(v=exp(-true_beta[2]*true_alpha), col='red')
134 | 
135 | 


--------------------------------------------------------------------------------
/LogisticReg/.Rhistory:
--------------------------------------------------------------------------------
  1 | #Author: Arman Oganisian
  2 | library(LaplacesDemon)
  3 | library(invgamma)
  4 | library(MASS)
  5 | library(profvis)
  6 | ################################################################################
  7 | ### 0 - Simulate Data
  8 | ################################################################################
  9 | set.seed(10)
 10 | N<-1000
 11 | d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T))
 12 | d$age_1<-ifelse(d$age_group==1,1,0)
 13 | d$age_2<-ifelse(d$age_group==2,1,0)
 14 | d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2))
 15 | d$y<-rbinom(n = N, size = 1,
 16 | prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt))
 17 | X<-as.matrix(cbind(1,d[,2:4])) # model matrix
 18 | Y<-matrix(d$y, ncol=1) # outcome vector
 19 | p<-ncol(X)
 20 | ################################################################################
 21 | ### 1 - functions to sample from conditional posterior distributions
 22 | ################################################################################
 23 | # unnormalized log posterior of beta vector
 24 | log_cond_post_beta<-function(beta, phi, lambda, X, Y){
 25 | # calculate likelihood
 26 | lik<-0
 27 | for(i in 1:length(Y)){
 28 | xb <- X[i,] %*% beta
 29 | xb<-ifelse(xb>10, 10, ifelse(xb< (-10),-10, xb))
 30 | p_i<-invlogit(xb)
 31 | lik<-lik + Y[i]*log(p_i) + (1 - Y[i])*log(1 - p_i)
 32 | }
 33 | # calculate prior
 34 | pr <- -.5 * (1/phi)*( t(beta - lambda)%*%(beta - lambda) )
 35 | log_cond_post <- lik + pr
 36 | return(log_cond_post)
 37 | }
 38 | # use Metropolis Hastings algorithm to sample from cond. post. of beta
 39 | rcond_post_beta_mh<-function(beta_0, phi, lambda, X, Y, mh_trials,jump_v){
 40 | for(i in 1:mh_trials){
 41 | # draw from proposal distribution
 42 | beta_c <- mvrnorm(1,beta_0,Sigma = jump_v*diag(p))
 43 | # calculate ratio of conditional posterior densities
 44 | r_num <- log_cond_post_beta(beta_c, phi, lambda, X, Y )
 45 | r_denom <- log_cond_post_beta(beta_0, phi, lambda, X, Y )
 46 | r <- exp(r_num - r_denom)
 47 | rmin<-min(r,1)
 48 | # accept or reject proposal
 49 | accept<-0
 50 | if(rmin>=1){
 51 | beta_0<-beta_c
 52 | accept<-1
 53 | }else{
 54 | if(rbinom(1,1,rmin)==1){
 55 | beta_0<-beta_c
 56 | accept<-1
 57 | }
 58 | }
 59 | }
 60 | return(c(new_beta=beta_0, accept=accept)  )
 61 | }
 62 | ################################################################################
 63 | ### 2 - Run Gibbs Sampler
 64 | ################################################################################
 65 | ### Gibbs Sampler
 66 | # true hyperparameter values for betas
 67 | lambda<-c(0,0,0,0)
 68 | phi<-10000 #
 69 | # shell for storing results
 70 | gibbs_iter<-2000 + 1
 71 | gibbs_res<-matrix(nrow=gibbs_iter, ncol=p+2)
 72 | # initialize
 73 | gibbs_res[1,1:p]<-c(0,0,0,0)
 74 | View(gibbs_res)
 75 | gibbs_res[1,1:p]<-c(0,0,0,0)
 76 | for(i in 2:gibbs_iter){
 77 | # sample from posterior of phi
 78 | # sample from posterior of beta vector ( using MH )
 79 | mh_draw <- rcond_post_beta_mh(gibbs_res[i-1,1:p], phi,
 80 | lambda, X, Y, mh_trials=1, jump_v=.01)
 81 | # store results
 82 | gibbs_res[i,1:p] <- mh_draw[1:p]
 83 | gibbs_res[i,p+2] <- mh_draw[p+1]
 84 | }
 85 | ################################################################################
 86 | ### 3 - Plot Results
 87 | ################################################################################
 88 | par(mfrow=c(2,2))
 89 | plot(gibbs_res[,1],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'),
 90 | main='Intercept')
 91 | abline(h=-1,col='red')
 92 | plot(gibbs_res[,2],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'),
 93 | main='Age1')
 94 | abline(h=.7,col='red')
 95 | plot(gibbs_res[,3],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'),
 96 | main='Age2')
 97 | abline(h=1.1,col='red')
 98 | plot(gibbs_res[,4],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'),
 99 | main='Treatment')
100 | abline(h=1.1,col='red')
101 | View(gibbs_res)
102 | View(gibbs_res)
103 | mean(gibbs_res[,6])
104 | View(gibbs_res)
105 | mean(gibbs_res[,6], na.rm=T)
106 | 


--------------------------------------------------------------------------------
/LogisticReg/Logistic_GibbsMHcombo.r:
--------------------------------------------------------------------------------
  1 | #Author: Arman Oganisian
  2 | 
  3 | library(LaplacesDemon)
  4 | library(invgamma)
  5 | library(MASS)
  6 | library(profvis)
  7 | 
  8 | ################################################################################
  9 | ### 0 - Simulate Data 
 10 | ################################################################################
 11 | set.seed(10)
 12 | N<-1000
 13 | 
 14 | d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T))
 15 | d$age_1<-ifelse(d$age_group==1,1,0)
 16 | d$age_2<-ifelse(d$age_group==2,1,0)
 17 | 
 18 | d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2))
 19 | 
 20 | d$y<-rbinom(n = N, size = 1,
 21 |             prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt))
 22 | 
 23 | X<-as.matrix(cbind(1,d[,2:4])) # model matrix
 24 | Y<-matrix(d$y, ncol=1) # outcome vector
 25 | 
 26 | p<-ncol(X)
 27 | 
 28 | ################################################################################
 29 | ### 1 - functions to sample from conditional posterior distributions
 30 | ################################################################################
 31 | 
 32 | ### sample from conditional posterior of phi - conjugate!
 33 | rcond_post_phi<-function(beta, alpha, gamma, lambda, p){
 34 |   
 35 |   post_alpha<-alpha + p/2
 36 |   post_gamma<-gamma + .5*t(beta - lambda)%*%(beta - lambda)
 37 |   draw<-invgamma::rinvgamma(n = 1, shape = post_alpha, rate = post_gamma)
 38 |   
 39 |   return(draw)
 40 | }
 41 | 
 42 | # unnormalized log posterior of beta vector
 43 | log_cond_post_beta<-function(beta, phi, lambda, X, Y){
 44 |   # calculate likelihood
 45 |   lik<-0
 46 |   for(i in 1:length(Y)){
 47 |     xb <- X[i,] %*% beta
 48 |     xb<-ifelse(xb>10, 10, ifelse(xb< (-10),-10, xb))
 49 |     
 50 |     p_i<-invlogit(xb)
 51 |     
 52 |     lik<-lik + Y[i]*log(p_i) + (1 - Y[i])*log(1 - p_i)
 53 |   }
 54 | 
 55 |   # calculate prior 
 56 |   pr <- -.5 * (1/phi)*( t(beta - lambda)%*%(beta - lambda) )
 57 |   
 58 |   log_cond_post <- lik + pr
 59 |   return(log_cond_post)
 60 | }
 61 | 
 62 | # use Metropolis Hastings algorithm to sample from cond. post. of beta
 63 | rcond_post_beta_mh<-function(beta_0, phi, lambda, X, Y, mh_trials,jump_v){
 64 | 
 65 |   for(i in 1:mh_trials){
 66 |     # draw from proposal distribution
 67 |     beta_c <- mvrnorm(1,beta_0,Sigma = jump_v*diag(p))
 68 |     
 69 |     # calculate ratio of conditional posterior densities
 70 |     r_num <- log_cond_post_beta(beta_c, phi, lambda, X, Y )
 71 |     r_denom <- log_cond_post_beta(beta_0, phi, lambda, X, Y )
 72 |     r <- exp(r_num - r_denom)
 73 |     rmin<-min(r,1)
 74 |     
 75 |     # accept or reject proposal
 76 |     accept<-0
 77 |     if(rmin>=1){ 
 78 |       beta_0<-beta_c 
 79 |       accept<-1
 80 |     }else{ 
 81 |       if(rbinom(1,1,rmin)==1){ 
 82 |         beta_0<-beta_c
 83 |         accept<-1
 84 |       }
 85 |     }
 86 |   }
 87 |   
 88 |   return(c(new_beta=beta_0, accept=accept)  )
 89 | }
 90 | 
 91 | ################################################################################
 92 | ### 2 - Run Gibbs Sampler
 93 | ################################################################################
 94 | 
 95 | ### Gibbs Sampler
 96 | # true hyperparameter values for phi
 97 | alpha<-5
 98 | gamma<-2
 99 | 
100 | # true hyperparameter values for betas
101 | lambda<-c(0,0,0,0)
102 | phi<-10000 # initialize 
103 | 
104 | # shell for storing results
105 | gibbs_iter<-2000 + 1
106 | gibbs_res<-matrix(nrow=gibbs_iter, ncol=p+2)
107 | 
108 | # initialize 
109 | gibbs_res[1,1:p]<-c(0,0,0,0)
110 | 
111 | profvis(expr = {
112 | for(i in 2:gibbs_iter){
113 |   # sample from posterior of phi
114 |   gibbs_res[i,p+1] <- rcond_post_phi(gibbs_res[i-1,1:p], 
115 |                                      alpha, gamma, lambda, p)
116 |   # sample from posterior of beta vector ( using MH )
117 |   mh_draw <- rcond_post_beta_mh(gibbs_res[i-1,1:p], gibbs_res[i,p+1], 
118 |                                 lambda, X, Y, mh_trials=5, jump_v=.01)
119 |   
120 |   # store results
121 |   gibbs_res[i,1:p] <- mh_draw[1:p]
122 |   gibbs_res[i,p+2] <- mh_draw[p+1]
123 | }
124 | })
125 | 
126 | ################################################################################
127 | ### 3 - Plot Results
128 | ################################################################################
129 | 
130 | par(mfrow=c(2,2))
131 | plot(gibbs_res[,1],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'),
132 |      main='Intercept')
133 | abline(h=-1,col='red')
134 | plot(gibbs_res[,2],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'),
135 |      main='Age1')
136 | abline(h=.7,col='red')
137 | plot(gibbs_res[,3],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'),
138 |      main='Age2')
139 | abline(h=1.1,col='red')
140 | plot(gibbs_res[,4],type='l',xlab='MCMC Iterations',ylab=c('Coefficient Draw'),
141 |      main='Treatment')
142 | abline(h=1.1,col='red')
143 | 
144 | # calculate posterior means and credible intervals
145 | post_burn_trim<-gibbs_res[seq(1000,gibbs_iter,100),]
146 | colMeans(post_burn_trim)
147 | apply(post_burn_trim, 2, quantile, p=c(.025,.975))
148 | 


--------------------------------------------------------------------------------
/LogisticReg/LogitMCMCChains.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/LogisticReg/LogitMCMCChains.png


--------------------------------------------------------------------------------
/LogisticReg/profilevis1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/LogisticReg/profilevis1.png


--------------------------------------------------------------------------------
/LogisticReg/profvis2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/LogisticReg/profvis2.png


--------------------------------------------------------------------------------
/MH_with_caching/HelperFunctions.R:
--------------------------------------------------------------------------------
 1 | ## Author: Arman Oganisian
 2 | 
 3 | # unnormalized log posterior of beta vector
 4 | log_cond_post_beta<-function(beta, phi, lambda, X, Y){
 5 |   # calculate likelihood
 6 |   xb <- X %*% beta
 7 |   xb <- ifelse(xb>10, 10, ifelse(xb< (-10),-10, xb))
 8 |   p <- invlogit(xb)
 9 |   
10 |   lik<- dbinom(x = Y, size = 1, prob = p, log = T)
11 |   
12 |   # calculate prior
13 |   pr <- dnorm(x = beta, mean = lambda, sd = sqrt(phi), log = T)
14 |   
15 |   log_cond_post <- lik + pr
16 |   return(log_cond_post)
17 | }
18 | 
19 | # use Metropolis Hastings algorithm to sample from cond. post. of beta
20 | mh_vanilla <- function(beta_0, phi, lambda, X, Y, mh_trials,jump_v, p){
21 |   
22 |   accept <- 0
23 |   post_draws <- matrix(data = NA, nrow = mh_trials, ncol = length(beta_0))
24 |   jump_cov <- jump_v*diag(p)
25 |   
26 |   for(i in 1:mh_trials){
27 |     # draw from proposal distribution
28 |     beta_c <- mvrnorm(1,beta_0,Sigma = jump_cov)
29 |     
30 |     # calculate ratio of conditional posterior densities
31 |     r_num <- log_cond_post_beta(beta_c, phi, lambda, X, Y )
32 |     r_denom <- log_cond_post_beta(beta_0, phi, lambda, X, Y )
33 |     r <- exp(r_num - r_denom)
34 |     rmin<-min(r,1)
35 |     
36 |     # accept or reject proposal
37 |     if(rmin>=1){ 
38 |       beta_0 <- beta_c 
39 |       accept <- accept+1
40 |     }else if(rbinom(1,1,rmin)==1){ 
41 |       beta_0 <- beta_c
42 |       accept <- accept+1
43 |     }
44 |     
45 |     post_draws[i, ] <- beta_0
46 |   }
47 |   
48 |   return(list(post_draws=post_draws, accept=accept/mh_trials)  )
49 | }
50 | 
51 | mh_cache <- function(beta_0, phi, lambda, X, Y, mh_trials,jump_v, p){
52 |   
53 |   accept <- 0
54 |   post_draws <- matrix(data = NA, nrow = mh_trials, ncol = length(beta_0))
55 |   jump_cov <- jump_v*diag(p)
56 |   
57 |   eval_curr <- log_cond_post_beta(beta_0, phi, lambda, X, Y )
58 |   
59 |   for(i in 1:mh_trials){
60 |     # draw from proposal distribution
61 |     beta_c <- mvrnorm(n = 1, mu = beta_0, Sigma = jump_cov)
62 |     
63 |     # calculate ratio of conditional posterior densities
64 |     eval_prop <- log_cond_post_beta(beta_c, phi, lambda, X, Y )
65 |     r <- exp(eval_prop - eval_curr)
66 |     
67 |     rmin<-min(r,1)
68 |     
69 |     # accept or reject proposal
70 |     if(rmin>=1){ 
71 |       beta_0 <- beta_c 
72 |       accept <- accept+1
73 |       eval_curr <- eval_prop
74 |     }else if(rbinom(1,1,rmin)==1){ 
75 |       beta_0 <- beta_c
76 |       accept <- accept+1
77 |       eval_curr <- eval_prop
78 |     }
79 |     
80 |     post_draws[i, ] <- beta_0
81 |   }
82 |   
83 |   return(list(post_draws=post_draws, accept=accept/mh_trials)  )
84 | }


--------------------------------------------------------------------------------
/MH_with_caching/LogisticMHcache.r:
--------------------------------------------------------------------------------
 1 | #Author: Arman Oganisian
 2 | 
 3 | library(microbenchmark)
 4 | library(LaplacesDemon)
 5 | library(MASS)
 6 | source("HelperFunctions.R")
 7 | 
 8 | set.seed(10)
 9 | 
10 | 
11 | ################################################################################
12 | ### 0 - Simulate Data
13 | ################################################################################
14 | # hyper-parameters and true values
15 | lambda<-c(0,0,0,0)
16 | phi<-10
17 | true_beta <- matrix(c(0,2,1,-2),ncol=1)
18 | N<-50000
19 | 
20 | # simulate covariates 
21 | X1 <- rnorm(N)
22 | X2 <- rnorm(N)
23 | X3 <- rnorm(N)
24 | X <- model.matrix(~ X1 + X2 + X3)
25 | 
26 | # simulate outcome
27 | Y <- rbinom(n = N, size = 1, prob = invlogit( X %*% true_beta  ) )
28 | 
29 | ################################################################################
30 | ### 1 - Run Benchmark
31 | ################################################################################
32 | 
33 | bench<-microbenchmark(
34 |                       # Run Vanialla Metropolis
35 |                       MH_vanilla = mh_vanilla(beta_0 = c(0,0,0,0), # initial value
36 |                                               p=4, # number of parameters
37 |                                               phi = phi, lambda = lambda, #hyperparameters
38 |                                               X = X, Y = Y, # Data
39 |                                               #iterations and proposal variance
40 |                                               mh_trials=1000, jump_v=.2 ),
41 |                       # Run Metropolis with Cache
42 |                       MH_cache = mh_cache(beta_0 = c(0,0,0,0), 
43 |                                           phi = phi, lambda = lambda, X = X, Y = Y,
44 |                                           mh_trials=1000, jump_v=.2, p=4),
45 |                       times = 10)
46 | bench
47 | 
48 | ################################################################################
49 | ### 2 - Plot Chains
50 | ################################################################################
51 | 
52 | ## could do a better job at proposal tuning...but not the point of this post.
53 | set.seed(1)
54 | MH_cache <- mh_cache(beta_0 = c(0,0,0,0), 
55 |                      phi = 100, lambda = lambda, X = X, Y = Y,
56 |                      mh_trials=20000, jump_v=.02, p=4)
57 | 
58 | # set.seed(1)
59 | # MH_vanilla <- mh_vanilla(beta_0 = c(0,0,0,0), 
60 | #                        phi = 100, lambda = lambda, X = X, Y = Y,
61 | #                        mh_trials=2000, jump_v=.7, p=4)
62 | 
63 | 
64 | par(mfrow=c(2,2))
65 | plot(MH_cache$post_draws[,1], type='l')
66 | abline(h=0, col='red')
67 | plot(MH_cache$post_draws[,2], type='l')
68 | abline(h=2, col='red')
69 | plot(MH_cache$post_draws[,3], type='l')
70 | abline(h=1, col='red')
71 | plot(MH_cache$post_draws[,4], type='l')
72 | abline(h=-2, col='red')
73 | 


--------------------------------------------------------------------------------
/MH_with_caching/traceplots.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/MH_with_caching/traceplots.png


--------------------------------------------------------------------------------
/MultipleLinearReg/multiplelinearreg.r:
--------------------------------------------------------------------------------
  1 | ################################################################################
  2 | ###### 0 - Packages and Simulate Data
  3 | ################################################################################
  4 | library(mvtnorm)
  5 | library(invgamma)
  6 | library(ggplot2)
  7 | library(dplyr)
  8 | library(tidyr)
  9 | library(xtable)
 10 | set.seed(200)
 11 | 
 12 | n<-50 # number of observation
 13 | # simulate model matrix
 14 | x<-cbind(1, rnorm(n, 0, 1), rnorm(n, 5,10),rnorm(n, 100,10))
 15 | 
 16 | # true beta coefficients
 17 | tb<-c(1000, 50, -50, 10)
 18 | 
 19 | # true phi
 20 | tphi<-10000
 21 | I<-diag(1,n,n) # identity matrix used for covariance matrix
 22 | 
 23 | # simulate outcome for regression 
 24 | y<-t(rmvnorm(1, x%*%tb, tphi*I))
 25 | 
 26 | # simulate many outcomes...used later for asymptotic evaluations
 27 | y_list<-replicate(1000, t(rmvnorm(1, x%*%tb, tphi*I)),simplify = FALSE)
 28 | 
 29 | ################################################################################
 30 | ###### 1 - Run Blocked Gibbs Sampler
 31 | ################################################################################
 32 | 
 33 | # function for blocked gibbs sampler
 34 | block_gibbs<-function(y, x, iter, burnin, trim){
 35 |   # initialize gibbs
 36 |   xprimex_inv<-solve(t(x)%*%x) # calculate once for repeated use in sampler
 37 |   phi<-numeric(iter) # shell for phi
 38 |   b<-matrix(nrow=iter, ncol = 4) # shell for betas
 39 |   phi[1]<-6 # initial phi value to start sampler
 40 |   
 41 |   # phi hyperparameters
 42 |   a<-.5
 43 |   g<-10000
 44 |   
 45 |   # gibbs sampling
 46 |   for(i in 2:iter ){
 47 |     b[i,]<-rmvnorm(n = 1, 
 48 |                    mean = ((xprimex_inv%*%t(x))%*%y), 
 49 |                    sigma = phi[i-1]*xprimex_inv )
 50 |     
 51 |     phi[i]<-rinvgamma(n = 1, 
 52 |                       shape = (n/2 + a), 
 53 |                       rate = .5*( t((y - x%*%t(t(b[i,])) ))%*%(y - x%*%t(t(b[i,])) ) ) + g)
 54 |   }
 55 |   
 56 |   # apply burnin and trimming  
 57 |   keep_draws<-seq(burnin,iter,trim)
 58 |   phi<-phi[keep_draws]
 59 |   b<-b[keep_draws,]
 60 |   
 61 |   # format and output
 62 |   joint_post<-data.frame(b=b,phi=phi)
 63 |   colnames(joint_post)[1:(ncol(x))]<-paste0('B',0:(ncol(x)-1) )
 64 |   
 65 |   joint_post_long<-gather(joint_post,keep_draws) %>%
 66 |     rename(param=keep_draws, draw=value) %>%
 67 |     mutate(iter=rep(keep_draws,ncol(joint_post)))
 68 |   
 69 |   return(joint_post_long)
 70 | }
 71 | 
 72 | # run gibbs sampler with specified parameters
 73 | post_dist<-block_gibbs(y = y, x = x, iter = 500000, burnin = 100000, trim = 50)
 74 | 
 75 | ################################################################################
 76 | ###### 2 - Summarize and Visualize Posterior Distributions 
 77 | ################################################################################
 78 | 
 79 | # calculate posterior summary statistics (stats not used in rest of code)
 80 | post_sum_stats<-post_dist %>%
 81 |   group_by(param) %>%
 82 |   summarise(median=median(draw),
 83 |             lwr=quantile(draw,.025),
 84 |             upr=quantile(draw,.975)) %>%
 85 |   mutate(true_vals=c(tb,tphi))
 86 | 
 87 | # merge on summary statistics
 88 | post_dist <- post_dist %>%
 89 |   left_join(post_sum_stats, by='param')
 90 | 
 91 | # plot MCMC Chains
 92 | ggplot(post_dist,aes(x=iter,y=draw)) +
 93 |   geom_line() +
 94 |   geom_hline(aes(yintercept=true_vals, col='red'), show.legend=FALSE)+
 95 |   facet_grid(param ~ .,scale='free_y',switch = 'y') +
 96 |   theme_bw() + 
 97 |   xlab('Gibbs Sample Iteration') + ylab('MCMC Chains') + 
 98 |   ggtitle('Gibbs Sampler MCMC Chains by Parameter')
 99 | 
100 | # plot Posterior Distributions
101 | ggplot(post_dist,aes(x=draw)) +
102 |   geom_histogram(aes(x=draw),bins=50) +
103 |   geom_vline(aes(xintercept = true_vals,col='red'), show.legend = FALSE) +
104 |   facet_grid(. ~ param, scale='free_x',switch = 'y') +
105 |   theme_bw() + 
106 |   xlab('Posterior Distributions') + ylab('Count') + 
107 |   ggtitle('Posterior Distributions of Parameters (true values in red)')
108 | 
109 | ################################################################################
110 | ###### 3 - Assess Bias and Coverage
111 | ################################################################################
112 | 
113 | # run the estimation 1000 times to get 1000 posterior medians and CIs
114 | bayes_res<-lapply(y_list, block_gibbs, x=x, iter=1000, burnin=100, trim=1)
115 | 
116 | calc_sumstats<-function(post_dist){
117 |   post_sum_stats<-post_dist %>%
118 |     group_by(param) %>%
119 |     summarise(post_median=median(draw),
120 |               lwr=quantile(draw,.025),
121 |               upr=quantile(draw,.975)) %>%
122 |     mutate(true_vals=c(tb,tphi))
123 |   return(post_sum_stats)
124 | }
125 | 
126 | 
127 | all_sum_stats<-lapply(bayes_res, calc_sumstats)
128 | all_sum_stats_stack<-bind_rows(all_sum_stats) %>%
129 |   arrange(param) %>%
130 |   rename(est=post_median)
131 | 
132 | eval_sum <- all_sum_stats_stack %>%
133 |   mutate(covered=ifelse(true_vals<upr & true_vals>lwr,1,0)) %>%
134 |   group_by(param) %>%
135 |   summarise(est_var=var(est),
136 |             est_mean=mean(est),
137 |             bias=mean(est-true_vals),
138 |             true_val=mean(true_vals),
139 |             coverage=mean(covered)) %>%
140 |   mutate(perc_bias=(bias/true_val)*100)
141 | 
142 | # format table
143 | eval_sum<-eval_sum[,c(1,5,3,2,4,7,6)]
144 | names(eval_sum)<-c('Parameter','True Value','Estimator Mean',
145 |                    'Estimator Variance','Bias','Percent Bias (of truth)',
146 |                    'Coverage of 95% CI')
147 | 
148 | print.xtable(xtable(eval_sum,
149 |                     caption = 'Estimator Evaluation'),
150 |              caption.placement = 'top', 
151 |              include.rownames = F)
152 | 


--------------------------------------------------------------------------------
/PartialPooling/.Rhistory:
--------------------------------------------------------------------------------
  1 | beta_prior_var*Imat
  2 | Imat
  3 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
  4 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
  5 | interv_var = 'trt', interv_val = 1)
  6 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
  7 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
  8 | interv_var = 'trt', interv_val = 1)
  9 | Imat
 10 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 11 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 12 | interv_var = 'trt', interv_val = 1)
 13 | DPglm_res$param_shell[[i]]$param_z[, class, drop=F]
 14 | xz_vec
 15 | xz_vec %*% DPglm_res$param_shell[[i]]$param_z[, class, drop=F]
 16 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 17 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 18 | interv_var = 'trt', interv_val = 1)
 19 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 20 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 21 | interv_var = 'trt', interv_val = 1)
 22 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 23 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 24 | interv_var = 'trt', interv_val = 1)
 25 | xz_vec
 26 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 27 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 28 | interv_var = 'trt', interv_val = 1)
 29 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 30 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 31 | interv_var = 'trt', interv_val = 1)
 32 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 33 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 34 | interv_var = 'trt', interv_val = 1)
 35 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 36 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 37 | interv_var = 'trt', interv_val = 1)
 38 | gamma_new
 39 | t(gamma_new)
 40 | beta_new
 41 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 42 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 43 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 44 | interv_var = 'trt', interv_val = 1)
 45 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 46 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 47 | interv_var = 'trt', interv_val = 1)
 48 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 49 | set.seed(1)
 50 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 51 | interv_var = 'trt', interv_val = 1)
 52 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 53 | set.seed(1)
 54 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 55 | interv_var = 'trt', interv_val = 1)
 56 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 57 | set.seed(1)
 58 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 59 | interv_var = 'trt', interv_val = 1)
 60 | DPglm_res$param_shell[[i]]$param_z[, class, drop=F]
 61 | class
 62 | DPglm_res$param_shell[[i]]$param_z
 63 | pvec
 64 | table(c_shell[,1])
 65 | DPglm_res$param_shell[[1]]$param_z
 66 | DPglm_res$param_shell[[2]]$param_z
 67 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 68 | set.seed(1)
 69 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 70 | interv_var = 'trt', interv_val = 1)
 71 | warnings()
 72 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 73 | set.seed(1)
 74 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 75 | interv_var = 'trt', interv_val = 1)
 76 | x_vec
 77 | interv_var
 78 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 79 | set.seed(1)
 80 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 81 | interv_var = 'trt', interv_val = 1)
 82 | xz_vec
 83 | x_vec
 84 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 85 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 86 | set.seed(1)
 87 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 88 | interv_var = 'trt', interv_val = 1)
 89 | x_vec
 90 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
 91 | set.seed(1)
 92 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
 93 | interv_var = 'trt', interv_val = 1)
 94 | x_vec
 95 | 2:(nparams-1)
 96 | nparams
 97 | x_vec
 98 | nparams
 99 | nparams-1
100 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
101 | set.seed(1)
102 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
103 | interv_var = 'trt', interv_val = 1)
104 | x_vec
105 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
106 | set.seed(1)
107 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
108 | interv_var = 'trt', interv_val = 1)
109 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
110 | set.seed(1)
111 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
112 | interv_var = 'trt', interv_val = 1)
113 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
114 | set.seed(1)
115 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
116 | interv_var = 'trt', interv_val = 1)
117 | source('~/Box Sync/Research/Analyses/ZeroInf_BNP_reg/code/G_comp_Full_SourceCode.R')
118 | set.seed(1)
119 | EY_1 <- bnp_standardization(DPmix_res = DPglm_res, iter=1000,
120 | interv_var = 'trt', interv_val = 1)
121 | par(mfrow=c(3,1))
122 | hist(EY_1,breaks=60)
123 | abline(v=EY_1_true, lwd=2, col='red')
124 | par(mfrow=c(1,1))
125 | hist(EY_1,breaks=60)
126 | plot(EY_1, type='l')
127 | EY_1_true <- true_standard(interv_var = 'trt', interv_val = 1,
128 | iter=10000000, covar_names = c('age', 'trt'))
129 | EY_0_true <- true_standard(interv_var = 'trt', interv_val = 0,
130 | iter=10000000, covar_names = c('age', 'trt'))
131 | delta <- EY_1_true - EY_0_true
132 | delta
133 | abline(h = EY_1_true, col='red', lwd=3)
134 | library(nimble)
135 | setwd("/Users/aoganisi/Dropbox/Stable Markets/BayesianTutorials/PartialPooling")
136 | ################################################################################
137 | #####                       Simulate Data                                  #####
138 | ################################################################################
139 | set.seed(10)
140 | d_A<-rbinom(n = 5,  size = 1, prob =  .2)
141 | d_B<-rbinom(n = 100, size = 1, prob = .3)
142 | d_C<-rbinom(n = 5, size = 1, prob = .45)
143 | d_D<-rbinom(n = 100, size = 1, prob = .7)
144 | d_E<-rbinom(n = 5, size = 1, prob = .8)
145 | approve <- c(d_A, d_B, d_C, d_D, d_E)
146 | industry <- as.factor(c(rep('A', 5),  rep('B', 100), rep('C', 5),
147 | rep('D', 100), rep('E', 5) ))
148 | mod_mat <- model.matrix(lm(approve ~ industry))
149 | d_list <- list(X = mod_mat,
150 | approve = approve)
151 | p <- ncol(mod_mat)
152 | n <- nrow(mod_mat)
153 | ################################################################################
154 | #####                       Bayesian Esimtate                              #####
155 | ################################################################################
156 | code <- nimbleCode({
157 | for(i in 1:p){
158 | beta[i] ~ dnorm(0, 2)
159 | }
160 | logit(eta[1:n]) <- X[1:n,1:p] %*% beta[1:p]
161 | for(i in 1:n) {
162 | approve[i] ~ dbern(prob = eta[i] )
163 | }
164 | p_approve[1] <- beta[1]
165 | p_approve[2] <- beta[1] + beta[2]
166 | p_approve[3] <- beta[1] + beta[3]
167 | p_approve[4] <- beta[1] + beta[4]
168 | p_approve[5] <- beta[1] + beta[5]
169 | })
170 | merge_model <- nimbleModel(code=code,
171 | constants=list(p=p, n=n),
172 | inits = list(beta=c(0,0,0,0,0)),
173 | data=d_list)
174 | spec <- configureMCMC(merge_model)
175 | spec$addSampler(type = 'RW_block', target ='beta')
176 | spec$monitors <- c('p_approve')
177 | mcmc <- buildMCMC(spec)
178 | Cmodel <- compileNimble(merge_model)
179 | Cmcmc <- compileNimble(mcmc, project = merge_model)
180 | Cmcmc$run(10000)
181 | samples <- as.matrix(Cmcmc$mvSamples)
182 | summary(samples)
183 | ### no pooling
184 | no_pool_res <- glm(approve ~ industry, family = binomial(link = 'logit'))
185 | vcov(no_pool_res)
186 | ?run
187 | compiled_mcmc
188 | compiled_mcmc
189 | compiled_mcmc <- compileNimble(mcmc, project = merge_model)
190 | compiled_mcmc
191 | compiled_mcmc$trace
192 | compiled_mcmc$thin
193 | spec <- configureMCMC(merge_model)
194 | spec$controlDefaults
195 | summary(compiled_mcmc)
196 | summary(compiled_mcmc$run(1000))
197 | compiled_mcmc$mvSamples2
198 | mcmc <- buildMCMC(spec,burnin=1000)
199 | compiled_model <- compileNimble(merge_model)
200 | compiled_mcmc <- compileNimble(mcmc, project = merge_model)
201 | compiled_mcmc$run(10000)
202 | samples <- as.matrix(compiled_mcmc$mvSamples)
203 | summary(samples)
204 | ?buildMCMC
205 | ?configureMCMC
206 | merge_model <- nimbleModel(code=code,
207 | constants=list(p=p, n=n),
208 | inits = list(beta=c(0,0,0,0,0)),
209 | data=d_list)
210 | spec <- configureMCMC(merge_model)
211 | spec$set
212 | spec$addSampler(type = 'RW_block', target ='beta',
213 | control = list(targetNodes='beta',
214 | adaptive = TRUE ))
215 | spec$monitors <- c('p_approve')
216 | mcmc <- buildMCMC(spec,)
217 | compiled_model <- compileNimble(merge_model)
218 | compiled_mcmc <- compileNimble(mcmc, project = merge_model)
219 | compiled_mcmc$run(10000)
220 | samples <- as.matrix(compiled_mcmc$mvSamples)
221 | summary(samples)
222 | merge_model <- nimbleModel(code=code,
223 | constants=list(p=p, n=n),
224 | inits = list(beta=c(0,0,0,0,0)),
225 | data=d_list)
226 | spec <- configureMCMC(merge_model)
227 | spec$set
228 | spec$addSampler(type = 'RW_block', target ='beta',
229 | control = list(targetNodes='beta',
230 | adaptive = TRUE ))
231 | spec$monitors <- c('p_approve')
232 | mcmc <- buildMCMC(spec)
233 | compiled_model <- compileNimble(merge_model)
234 | samples <- as.matrix(compiled_mcmc$mvSamples)
235 | summary(samples)
236 | code <- nimbleCode({
237 | for(i in 1:p){
238 | beta[i] ~ dnorm(0, 2)
239 | }
240 | logit(eta[1:n]) <- X[1:n,1:p] %*% beta[1:p]
241 | for(i in 1:n) {
242 | approve[i] ~ dbern(prob = eta[i] )
243 | }
244 | p_approve[1] <- expit(beta[1])
245 | p_approve[2] <- expit(beta[1] + beta[2])
246 | p_approve[3] <- expit(beta[1] + beta[3])
247 | p_approve[4] <- expit(beta[1] + beta[4])
248 | p_approve[5] <- expit(beta[1] + beta[5])
249 | })
250 | merge_model <- nimbleModel(code=code,
251 | constants=list(p=p, n=n),
252 | inits = list(beta=c(0,0,0,0,0)),
253 | data=d_list)
254 | spec <- configureMCMC(merge_model)
255 | spec$addSampler(type = 'RW_block', target ='beta',
256 | control = list(targetNodes='beta',
257 | adaptive = TRUE ))
258 | spec$monitors <- c('p_approve')
259 | mcmc <- buildMCMC(spec)
260 | compiled_model <- compileNimble(merge_model)
261 | compiled_mcmc <- compileNimble(mcmc, project = merge_model)
262 | compiled_mcmc$run(10000)
263 | samples <- as.matrix(compiled_mcmc$mvSamples)
264 | summary(samples)
265 | partial_pool_p <- colMeans(samples)
266 | ### no pooling
267 | no_pool_res <- glm(approve ~ industry, family = binomial(link = 'logit'))
268 | # compute probabilities of merger for each industry separately.
269 | no_pooled_p <- predict(no_pool_res,
270 | newdata = data.frame(industry=as.factor(c('A','B','C','D','E') )),
271 | type = 'response')
272 | ### complete pooling
273 | pool_res <- glm(approve ~ 1, family = binomial(link = 'logit'))
274 | # compute probability of merger across all idustries, pooled.
275 | pooled_p <- exp(pool_res$coefficients)/(1 + exp(pool_res$coefficients))
276 | plot(partial_pool_p, pch=20, col='red', ylim=c(0,1), axes=F,
277 | xlab='Industry', ylab='Probability of Merger')
278 | axis(1, at = 1:5, labels = paste0(partial_pool_p$industry, " (n =",c(5,10,20,10,5),')' ) )
279 | axis(2, at = seq(0,1,.2), labels= seq(0,1,.2) )
280 | points(1:5, no_pooled_p, pch=20, col='black')
281 | abline(h=pooled_p, lty=2)
282 | legend('bottomright',
283 | legend = c('Pooled Estimate','Stratified Estimates', 'Bayesian Estimate'),
284 | lty = c(2,NA,NA), col=c('black','black','red'), pch=c(NA, 20,20), bty='n')
285 | plot(partial_pool_p, pch=20, col='red', ylim=c(0,1), axes=F,
286 | xlab='Industry', ylab='Probability of Merger')
287 | axis(1, at = 1:5, labels = paste0(unique(industry), " (n =",c(5,10,20,10,5),')' ) )
288 | axis(2, at = seq(0,1,.2), labels= seq(0,1,.2) )
289 | points(1:5, no_pooled_p, pch=20, col='black')
290 | abline(h=pooled_p, lty=2)
291 | legend('bottomright',
292 | legend = c('Pooled Estimate','Stratified Estimates', 'Bayesian Estimate'),
293 | lty = c(2,NA,NA), col=c('black','black','red'), pch=c(NA, 20,20), bty='n')
294 | plot(partial_pool_p, pch=20, col='red', ylim=c(0,1), axes=F,
295 | xlab='Industry', ylab='Probability of Merger')
296 | axis(1, at = 1:5, labels = paste0(unique(industry), " (n =",c(5,100,5,100,5),')' ) )
297 | axis(2, at = seq(0,1,.2), labels= seq(0,1,.2) )
298 | points(1:5, no_pooled_p, pch=20, col='black')
299 | abline(h=pooled_p, lty=2)
300 | legend('bottomright',
301 | legend = c('Pooled Estimate','Stratified Estimates', 'Bayesian Estimate'),
302 | lty = c(2,NA,NA), col=c('black','black','red'), pch=c(NA, 20,20), bty='n')
303 | code <- nimbleCode({
304 | for(i in 1:p){
305 | beta[i] ~ dnorm(0, 3)
306 | }
307 | logit(eta[1:n]) <- X[1:n,1:p] %*% beta[1:p]
308 | for(i in 1:n) {
309 | approve[i] ~ dbern(prob = eta[i] )
310 | }
311 | p_approve[1] <- expit(beta[1])
312 | p_approve[2] <- expit(beta[1] + beta[2])
313 | p_approve[3] <- expit(beta[1] + beta[3])
314 | p_approve[4] <- expit(beta[1] + beta[4])
315 | p_approve[5] <- expit(beta[1] + beta[5])
316 | })
317 | merge_model <- nimbleModel(code=code,
318 | constants=list(p=p, n=n),
319 | inits = list(beta=c(0,0,0,0,0)),
320 | data=d_list)
321 | spec <- configureMCMC(merge_model)
322 | spec$addSampler(type = 'RW_block', target ='beta',
323 | control = list(targetNodes='beta',
324 | adaptive = TRUE ))
325 | spec$monitors <- c('p_approve')
326 | mcmc <- buildMCMC(spec)
327 | compiled_model <- compileNimble(merge_model)
328 | compiled_mcmc <- compileNimble(mcmc, project = merge_model)
329 | compiled_mcmc$run(10000)
330 | samples <- as.matrix(compiled_mcmc$mvSamples)
331 | summary(samples)
332 | partial_pool_p <- colMeans(samples)
333 | ################################################################################
334 | #####                       Frequentist Estimates                          #####
335 | ################################################################################
336 | ### no pooling
337 | no_pool_res <- glm(approve ~ industry, family = binomial(link = 'logit'))
338 | # compute probabilities of merger for each industry separately.
339 | no_pooled_p <- predict(no_pool_res,
340 | newdata = data.frame(industry=as.factor(c('A','B','C','D','E') )),
341 | type = 'response')
342 | ### complete pooling
343 | pool_res <- glm(approve ~ 1, family = binomial(link = 'logit'))
344 | # compute probability of merger across all idustries, pooled.
345 | pooled_p <- exp(pool_res$coefficients)/(1 + exp(pool_res$coefficients))
346 | ################################################################################
347 | #####                       Visualize Results                              #####
348 | ################################################################################
349 | plot(partial_pool_p, pch=20, col='red', ylim=c(0,1), axes=F,
350 | xlab='Industry', ylab='Probability of Merger')
351 | axis(1, at = 1:5, labels = paste0(unique(industry), " (n =",c(5,100,5,100,5),')' ) )
352 | axis(2, at = seq(0,1,.2), labels= seq(0,1,.2) )
353 | points(1:5, no_pooled_p, pch=20, col='black')
354 | abline(h=pooled_p, lty=2)
355 | legend('bottomright',
356 | legend = c('Pooled Estimate','Stratified Estimates', 'Bayesian Estimate'),
357 | lty = c(2,NA,NA), col=c('black','black','red'), pch=c(NA, 20,20), bty='n')
358 | partial_pool_p <- colMeans(samples[5000:10000,])
359 | ################################################################################
360 | #####                       Frequentist Estimates                          #####
361 | ################################################################################
362 | ### no pooling
363 | no_pool_res <- glm(approve ~ industry, family = binomial(link = 'logit'))
364 | # compute probabilities of merger for each industry separately.
365 | no_pooled_p <- predict(no_pool_res,
366 | newdata = data.frame(industry=as.factor(c('A','B','C','D','E') )),
367 | type = 'response')
368 | ### complete pooling
369 | pool_res <- glm(approve ~ 1, family = binomial(link = 'logit'))
370 | # compute probability of merger across all idustries, pooled.
371 | pooled_p <- exp(pool_res$coefficients)/(1 + exp(pool_res$coefficients))
372 | ################################################################################
373 | #####                       Visualize Results                              #####
374 | ################################################################################
375 | plot(partial_pool_p, pch=20, col='red', ylim=c(0,1), axes=F,
376 | xlab='Industry', ylab='Probability of Merger')
377 | axis(1, at = 1:5, labels = paste0(unique(industry), " (n =",c(5,100,5,100,5),')' ) )
378 | axis(2, at = seq(0,1,.2), labels= seq(0,1,.2) )
379 | points(1:5, no_pooled_p, pch=20, col='black')
380 | abline(h=pooled_p, lty=2)
381 | legend('bottomright',
382 | legend = c('Pooled Estimate','Stratified Estimates', 'Bayesian Estimate'),
383 | lty = c(2,NA,NA), col=c('black','black','red'), pch=c(NA, 20,20), bty='n')
384 | library(nimble)
385 | setwd("/Users/aoganisi/Dropbox/Stable Markets/BayesianTutorials/PartialPooling")
386 | ################################################################################
387 | #####                       Simulate Data                                  #####
388 | ################################################################################
389 | set.seed(10)
390 | d_A<-rbinom(n = 10,  size = 1, prob =  .2)
391 | d_B<-rbinom(n = 100, size = 1, prob = .3)
392 | d_C<-rbinom(n = 10, size = 1, prob = .45)
393 | d_D<-rbinom(n = 100, size = 1, prob = .7)
394 | d_E<-rbinom(n = 10, size = 1, prob = .8)
395 | approve <- c(d_A, d_B, d_C, d_D, d_E)
396 | industry <- as.factor(c(rep('A', 10),  rep('B', 100), rep('C', 10),
397 | rep('D', 100), rep('E', 10) ))
398 | mod_mat <- model.matrix(lm(approve ~ industry))
399 | d_list <- list(X = mod_mat,
400 | approve = approve)
401 | p <- ncol(mod_mat)
402 | n <- nrow(mod_mat)
403 | ################################################################################
404 | #####                       Bayesian Esimtate                              #####
405 | ################################################################################
406 | code <- nimbleCode({
407 | for(i in 1:p){
408 | beta[i] ~ dnorm(0, 3)
409 | }
410 | logit(eta[1:n]) <- X[1:n,1:p] %*% beta[1:p]
411 | for(i in 1:n) {
412 | approve[i] ~ dbern(prob = eta[i] )
413 | }
414 | p_approve[1] <- expit(beta[1])
415 | p_approve[2] <- expit(beta[1] + beta[2])
416 | p_approve[3] <- expit(beta[1] + beta[3])
417 | p_approve[4] <- expit(beta[1] + beta[4])
418 | p_approve[5] <- expit(beta[1] + beta[5])
419 | })
420 | merge_model <- nimbleModel(code=code,
421 | constants=list(p=p, n=n),
422 | inits = list(beta=c(0,0,0,0,0)),
423 | data=d_list)
424 | spec <- configureMCMC(merge_model)
425 | spec$addSampler(type = 'RW_block', target ='beta',
426 | control = list(targetNodes='beta',
427 | adaptive = TRUE ))
428 | spec$monitors <- c('p_approve')
429 | mcmc <- buildMCMC(spec)
430 | compiled_model <- compileNimble(merge_model)
431 | compiled_mcmc <- compileNimble(mcmc, project = merge_model)
432 | compiled_mcmc$run(10000)
433 | samples <- as.matrix(compiled_mcmc$mvSamples)
434 | summary(samples)
435 | partial_pool_p <- colMeans(samples[5000:10000,])
436 | ################################################################################
437 | #####                       Frequentist Estimates                          #####
438 | ################################################################################
439 | ### no pooling
440 | no_pool_res <- glm(approve ~ industry, family = binomial(link = 'logit'))
441 | # compute probabilities of merger for each industry separately.
442 | no_pooled_p <- predict(no_pool_res,
443 | newdata = data.frame(industry=as.factor(c('A','B','C','D','E') )),
444 | type = 'response')
445 | ### complete pooling
446 | pool_res <- glm(approve ~ 1, family = binomial(link = 'logit'))
447 | # compute probability of merger across all idustries, pooled.
448 | pooled_p <- exp(pool_res$coefficients)/(1 + exp(pool_res$coefficients))
449 | ################################################################################
450 | #####                       Visualize Results                              #####
451 | ################################################################################
452 | plot(partial_pool_p, pch=20, col='red', ylim=c(0,1), axes=F,
453 | xlab='Industry', ylab='Probability of Merger')
454 | axis(1, at = 1:5, labels = paste0(unique(industry), " (n =",c(5,100,5,100,5),')' ) )
455 | axis(2, at = seq(0,1,.2), labels= seq(0,1,.2) )
456 | points(1:5, no_pooled_p, pch=20, col='black')
457 | abline(h=pooled_p, lty=2)
458 | legend('bottomright',
459 | legend = c('Pooled Estimate','Stratified Estimates', 'Bayesian Estimate'),
460 | lty = c(2,NA,NA), col=c('black','black','red'), pch=c(NA, 20,20), bty='n')
461 | d_A
462 | d_B
463 | d_C
464 | d_E
465 | sum(d_E)
466 | getwd()
467 | png(filename = 'PartialPool.png')
468 | plot(partial_pool_p, pch=20, col='red', ylim=c(0,1), axes=F,
469 | xlab='Industry', ylab='Probability of Merger')
470 | axis(1, at = 1:5, labels = paste0(unique(industry), " (n =",c(10,100,10,100,10),')' ) )
471 | axis(2, at = seq(0,1,.2), labels= seq(0,1,.2) )
472 | points(1:5, no_pooled_p, pch=20, col='black')
473 | abline(h=pooled_p, lty=2)
474 | legend('bottomright',
475 | legend = c('Pooled Estimate','Stratified Estimates', 'Bayesian Estimate'),
476 | lty = c(2,NA,NA), col=c('black','black','red'), pch=c(NA, 20,20), bty='n')
477 | dev.off()
478 | knitr::opts_chunk$set(echo = TRUE)
479 | head(data.frame(approve, industry))
480 | data.frame(approve, industry)[5:15,]
481 | data.frame(approve, industry)[5:20,]
482 | data.frame(approve, industry)[5:30,]
483 | data.frame(approve, industry)[1:30,]
484 | data.frame(approve, industry)[30:40,]
485 | data.frame(approve, industry)[30:50,]
486 | data.frame(approve, industry)[110:120,]
487 | data.frame(approve, industry)[105:120,]
488 | library(dplyr)
489 | library(dplyr)
490 | library(tidyr)
491 | # glimpse of the data
492 | data.frame(approve, industry)[105:120,]
493 | data.frame(approve,industry) %>%
494 | group_by(industry) %>%
495 | summarise(mean(approve))
496 | data.frame(approve,industry) %>%
497 | group_by(industry) %>%
498 | summarise(mean(approve), n())
499 | data.frame(approve,industry) %>%
500 | group_by(industry) %>%
501 | summarise(mean(approve), n_mergers_started=n())
502 | data.frame(approve,industry) %>%
503 | group_by(industry) %>%
504 | summarise(succes_rate=mean(approve), n_mergers_started=n())
505 | mean(approve)
506 | knitr::opts_chunk$set(echo = T, warning = F, error = F, message = F)
507 | unpooled <- data.frame(approve,industry) %>%
508 | group_by(industry) %>%
509 | summarise(success_rate=mean(approve), n_mergers_started=n())
510 | unpooled
511 | sessionInfo()
512 | getwd()
513 | 


--------------------------------------------------------------------------------
/PartialPooling/PartialPool.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/PartialPooling/PartialPool.png


--------------------------------------------------------------------------------
/PartialPooling/PartialPool.r:
--------------------------------------------------------------------------------
  1 | library(nimble)
  2 | setwd("/Users/aoganisi/Dropbox/Stable Markets/BayesianTutorials/PartialPooling")
  3 | 
  4 | ################################################################################
  5 | #####                       Simulate Data                                  #####
  6 | ################################################################################
  7 | set.seed(10)
  8 | 
  9 | d_A<-rbinom(n = 10,  size = 1, prob =  .2)
 10 | d_B<-rbinom(n = 100, size = 1, prob = .3)
 11 | d_C<-rbinom(n = 10, size = 1, prob = .45)
 12 | d_D<-rbinom(n = 100, size = 1, prob = .7)
 13 | d_E<-rbinom(n = 10, size = 1, prob = .8)
 14 | 
 15 | approve <- c(d_A, d_B, d_C, d_D, d_E)
 16 | industry <- as.factor(c(rep('A', 10),  rep('B', 100), rep('C', 10),
 17 |               rep('D', 100), rep('E', 10) ))
 18 | 
 19 | mod_mat <- model.matrix(lm(approve ~ industry))
 20 | 
 21 | d_list <- list(X = mod_mat, 
 22 |                approve = approve)
 23 | 
 24 | p <- ncol(mod_mat)
 25 | n <- nrow(mod_mat)
 26 | 
 27 | ################################################################################
 28 | #####                       Bayesian Esimtate                              #####
 29 | ################################################################################
 30 | 
 31 | code <- nimbleCode({
 32 |   
 33 |   for(i in 1:p){
 34 |     beta[i] ~ dnorm(0, 3)
 35 |   }
 36 |   
 37 |   
 38 |   logit(eta[1:n]) <- X[1:n,1:p] %*% beta[1:p]
 39 |   
 40 |   
 41 |   for(i in 1:n) {
 42 |     approve[i] ~ dbern(prob = eta[i] )  
 43 |   }
 44 |   
 45 |   p_approve[1] <- expit(beta[1])
 46 |   p_approve[2] <- expit(beta[1] + beta[2])
 47 |   p_approve[3] <- expit(beta[1] + beta[3])
 48 |   p_approve[4] <- expit(beta[1] + beta[4])
 49 |   p_approve[5] <- expit(beta[1] + beta[5])
 50 |   
 51 | })
 52 | 
 53 | merge_model <- nimbleModel(code=code, 
 54 |                            constants=list(p=p, n=n),
 55 |                            inits = list(beta=c(0,0,0,0,0)),
 56 |                            data=d_list)
 57 | 
 58 | spec <- configureMCMC(merge_model)
 59 | spec$addSampler(type = 'RW_block', target ='beta',
 60 |                 control = list(targetNodes='beta',
 61 |                                adaptive = TRUE ))
 62 | spec$monitors <- c('p_approve')
 63 | 
 64 | 
 65 | 
 66 | mcmc <- buildMCMC(spec)
 67 | 
 68 | compiled_model <- compileNimble(merge_model)
 69 | compiled_mcmc <- compileNimble(mcmc, project = merge_model)
 70 | 
 71 | compiled_mcmc$run(10000)
 72 | 
 73 | samples <- as.matrix(compiled_mcmc$mvSamples)
 74 | summary(samples)
 75 | 
 76 | partial_pool_p <- colMeans(samples[5000:10000,])
 77 | 
 78 | ################################################################################
 79 | #####                       Frequentist Estimates                          #####
 80 | ################################################################################
 81 | 
 82 | ### no pooling
 83 | no_pool_res <- glm(approve ~ industry, family = binomial(link = 'logit'))
 84 | 
 85 | # compute probabilities of merger for each industry separately.
 86 | no_pooled_p <- predict(no_pool_res, 
 87 |                     newdata = data.frame(industry=as.factor(c('A','B','C','D','E') )), 
 88 |                     type = 'response')
 89 | 
 90 | ### complete pooling
 91 | pool_res <- glm(approve ~ 1, family = binomial(link = 'logit'))
 92 | 
 93 | # compute probability of merger across all idustries, pooled.
 94 | pooled_p <- exp(pool_res$coefficients)/(1 + exp(pool_res$coefficients))
 95 | 
 96 | 
 97 | ################################################################################
 98 | #####                       Visualize Results                              #####
 99 | ################################################################################
100 | 
101 | png(filename = 'PartialPool.png')
102 | plot(partial_pool_p, pch=20, col='red', ylim=c(0,1), axes=F, 
103 |      xlab='Industry', ylab='Probability of Merger')
104 | axis(1, at = 1:5, labels = paste0(unique(industry), " (n =",c(10,100,10,100,10),')' ) )
105 | axis(2, at = seq(0,1,.2), labels= seq(0,1,.2) )
106 | 
107 | points(1:5, no_pooled_p, pch=20, col='black')
108 | abline(h=pooled_p, lty=2)
109 | 
110 | legend('bottomright', 
111 |        legend = c('Pooled Estimate','Stratified Estimates', 'Bayesian Estimate'),
112 |        lty = c(2,NA,NA), col=c('black','black','red'), pch=c(NA, 20,20), bty='n')
113 | dev.off()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Bayesian Tutorials
 2 | This repo hosts code behind the series of posts that walk through MCMC implementations for various classes of Bayesian models. I wrote these in grad school as I was teaching myself Bayesian computation. I never felt like I truly understood something until I could implement it from scratch. The code is probably all very inefficient and perhaps even wrong, but is a kind of diary of my "from scratch" journey learning Bayes.
 3 | 
 4 | In order of publication:
 5 | 1. [Bayesian Simple Linear Regression with Gibbs](https://stablemarkets.wordpress.com/2017/08/07/bayesian-simple-linear-regression-with-gibbs-sampling-in-r/)
 6 | 2. [Blocked Gibbs for Bayesian Multivariate Linear Regression](https://stablemarkets.wordpress.com/2017/09/05/blocked-gibbs-sampling-in-r-for-bayesian-multiple-linear-regression/)
 7 | 3. [Metropolis Hastings-in-Gibbs Sampler for Bayesian Logistic Regression](https://stablemarkets.wordpress.com/2017/11/07/metropolis-in-gibbs-sampling-and-runtime-analysis-with-profviz/)
 8 | 4. [Using Rcpp to speed up Metropolis-Hastings](https://stablemarkets.wordpress.com/2018/03/16/speeding-up-metropolis-hastings-with-rcpp/)
 9 | 5. [Bayesian Inference with Backfitting MCMC](https://stablemarkets.wordpress.com/2018/05/03/bayesian-inference-with-backfitting-mcmc/)
10 | 6. [Efficient MCMC with Caching](https://stablemarkets.wordpress.com/2019/03/02/efficient-mcmc-with-caching/)
11 | 


--------------------------------------------------------------------------------
/RcppBoost/.Rhistory:
--------------------------------------------------------------------------------
  1 | for(i in 2:iter){
  2 | beta_0 <- beta_shell[i-1, ]
  3 | # draw from proposal distribution
  4 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p))
  5 | # calculate ratio of conditional posterior densities
  6 | r_num <- log_posterior(beta_c, X, Y )
  7 | r_denom <- log_posterior(beta_0, X, Y )
  8 | # calculate acceptance probability
  9 | r <- exp(r_num - r_denom)
 10 | rmin<-min(r,1)
 11 | # accept or reject proposal
 12 | if( rbinom(1,1,rmin) == 1 ){
 13 | beta_shell[i, ] <- beta_c
 14 | }else{
 15 | beta_shell[i, ] <- beta_0
 16 | }
 17 | accept_shell[i] <- rmin
 18 | }
 19 | colnames(beta_shell) <- colnames(X)
 20 | colnames(beta_shell)[1] <- 'intercept'
 21 | return(list(beta_shell, accept_shell) )
 22 | }
 23 | # Metropolis-Hastings Sampler using log_post(),
 24 | # which is the log posterior coded in C++.
 25 | sample_mh_cpp <-function(X, Y, iter, jump_v){
 26 | # create shells
 27 | p <- ncol(X)
 28 | beta_shell <- matrix(NA, nrow = iter, ncol = p)
 29 | accept_shell <- numeric(length = iter)
 30 | # starting values
 31 | beta_shell[1,] <- rep(10, p)
 32 | for(i in 2:iter){
 33 | beta_0 <- beta_shell[i-1, ]
 34 | # draw from proposal distribution
 35 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p))
 36 | # calculate ratio of conditional posterior densities
 37 | r_num <- log_post(beta_c, Y, X )
 38 | r_denom <- log_post(beta_0, Y, X )
 39 | # calculate acceptance probability
 40 | r <- exp(r_num - r_denom)
 41 | rmin<-min(r,1)
 42 | # accept or reject proposal
 43 | if( rbinom(1,1,rmin) == 1 ){
 44 | beta_shell[i, ] <- beta_c
 45 | }else{
 46 | beta_shell[i, ] <- beta_0
 47 | }
 48 | accept_shell[i] <- rmin
 49 | }
 50 | colnames(beta_shell) <- colnames(X)
 51 | colnames(beta_shell)[1] <- 'intercept'
 52 | return(list(beta_shell, accept_shell) )
 53 | }
 54 | ################################################################################
 55 | ### 2 - Test the Samplers
 56 | ################################################################################
 57 | burnin <- 1000
 58 | p <- ncol(X)
 59 | res_mh_cpp <- sample_mh_cpp(X, Y, iter = iter, jump_v = .03)
 60 | par(mfrow=c(2,2))
 61 | plot(res_mh_cpp[[1]][burnin:iter,'intercept'], type='l',
 62 | xlab='MH Iteration', ylab='Posterior Draw', main='Intercept')
 63 | abline(h= -1, col='red')
 64 | plot(res_mh_cpp[[1]][burnin:iter,'age_1'], type='l',
 65 | xlab='MH Iteration', ylab='Posterior Draw', main='age1')
 66 | abline(h= .7, col='red')
 67 | plot(res_mh_cpp[[1]][burnin:iter,'age_2'], type='l',
 68 | xlab='MH Iteration', ylab='Posterior Draw', main='age2')
 69 | abline(h= 1.1, col='red')
 70 | plot(res_mh_cpp[[1]][burnin:iter,'trt'], type='l',
 71 | xlab='MH Iteration', ylab='Posterior Draw', main='trt')
 72 | abline(h= 1.1, col='red')
 73 | par(mfrow=c(1,1))
 74 | plot(cumsum(res_mh_cpp[[2]])/1:iter, type='l',
 75 | xlab='MH Iteration', ylab='Cumulative Average Acceptance Rate',
 76 | main='Acceptance Rate Over Sampling Run')
 77 | abline(h= 1.1, col='red')
 78 | burnin <- 1000
 79 | p <- ncol(X)
 80 | res_mh_cpp <- sample_mh_cpp(X, Y, iter = 50000, jump_v = .03)
 81 | #Author: Arman Oganisian
 82 | library(LaplacesDemon)
 83 | library(invgamma)
 84 | library(MASS)
 85 | library(tidyr)
 86 | library(dplyr)
 87 | library(ggplot2)
 88 | library(microbenchmark)
 89 | library(Rcpp)
 90 | sourceCpp('log_post.cpp')
 91 | ################################################################################
 92 | ### 0 - Simulate Data
 93 | ################################################################################
 94 | set.seed(10)
 95 | sim_dat <- function(N){
 96 | d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T))
 97 | d$age_1<-ifelse(d$age_group==1,1,0)
 98 | d$age_2<-ifelse(d$age_group==2,1,0)
 99 | d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2))
100 | d$y<-rbinom(n = N, size = 1,
101 | prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt))
102 | X<-as.matrix(cbind(1,d[,2:4])) # model matrix
103 | Y<-matrix(d$y, ncol=1) # outcome vector
104 | return(list(X=X, Y=Y))
105 | }
106 | d <- sim_dat(N=1000)
107 | X <- d$X
108 | Y <- d$Y
109 | ################################################################################
110 | ### 1 - functions to sample from conditional posterior distributions
111 | ################################################################################
112 | # unnormalized log posterior of beta vector
113 | log_posterior<-function(beta, X, Y){
114 | # calculate likelihood
115 | xb <- X %*% beta
116 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb))
117 | p_i <- invlogit(xb)
118 | lik <- sum(dbern(Y, p_i, log = T))
119 | # calculate prior
120 | pr <- dmvn(x = beta, mu = rep(0,p), Sigma = (1000^2)*diag(p), log = T)
121 | log_cond_post <- lik + pr
122 | return(log_cond_post)
123 | }
124 | # Metropolis-Hastings Sampler using log_posterior(),
125 | # which is the log posterior coded in R.
126 | sample_mh<-function(X, Y, iter, jump_v){
127 | # create shells
128 | p <- ncol(X)
129 | beta_shell <- matrix(NA, nrow = iter, ncol = p)
130 | accept_shell <- numeric(length = iter)
131 | # starting values
132 | beta_shell[1,] <- rep(10, p)
133 | for(i in 2:iter){
134 | beta_0 <- beta_shell[i-1, ]
135 | # draw from proposal distribution
136 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p))
137 | # calculate ratio of conditional posterior densities
138 | r_num <- log_posterior(beta_c, X, Y )
139 | r_denom <- log_posterior(beta_0, X, Y )
140 | # calculate acceptance probability
141 | r <- exp(r_num - r_denom)
142 | rmin<-min(r,1)
143 | # accept or reject proposal
144 | if( rbinom(1,1,rmin) == 1 ){
145 | beta_shell[i, ] <- beta_c
146 | }else{
147 | beta_shell[i, ] <- beta_0
148 | }
149 | accept_shell[i] <- rmin
150 | }
151 | colnames(beta_shell) <- colnames(X)
152 | colnames(beta_shell)[1] <- 'intercept'
153 | return(list(beta_shell, accept_shell) )
154 | }
155 | # Metropolis-Hastings Sampler using log_post(),
156 | # which is the log posterior coded in C++.
157 | sample_mh_cpp <-function(X, Y, iter, jump_v){
158 | # create shells
159 | p <- ncol(X)
160 | beta_shell <- matrix(NA, nrow = iter, ncol = p)
161 | accept_shell <- numeric(length = iter)
162 | # starting values
163 | beta_shell[1,] <- rep(10, p)
164 | for(i in 2:iter){
165 | beta_0 <- beta_shell[i-1, ]
166 | # draw from proposal distribution
167 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p))
168 | # calculate ratio of conditional posterior densities
169 | r_num <- log_post(beta_c, Y, X )
170 | r_denom <- log_post(beta_0, Y, X )
171 | # calculate acceptance probability
172 | r <- exp(r_num - r_denom)
173 | rmin<-min(r,1)
174 | # accept or reject proposal
175 | if( rbinom(1,1,rmin) == 1 ){
176 | beta_shell[i, ] <- beta_c
177 | }else{
178 | beta_shell[i, ] <- beta_0
179 | }
180 | accept_shell[i] <- rmin
181 | }
182 | colnames(beta_shell) <- colnames(X)
183 | colnames(beta_shell)[1] <- 'intercept'
184 | return(list(beta_shell, accept_shell) )
185 | }
186 | ################################################################################
187 | ### 2 - Test the Samplers
188 | ################################################################################
189 | burnin <- 1000
190 | p <- ncol(X)
191 | res_mh_cpp <- sample_mh_cpp(X, Y, iter = 50000, jump_v = .03)
192 | par(mfrow=c(2,2))
193 | plot(res_mh_cpp[[1]][burnin:iter,'intercept'], type='l',
194 | xlab='MH Iteration', ylab='Posterior Draw', main='Intercept')
195 | abline(h= -1, col='red')
196 | plot(res_mh_cpp[[1]][burnin:iter,'age_1'], type='l',
197 | xlab='MH Iteration', ylab='Posterior Draw', main='age1')
198 | abline(h= .7, col='red')
199 | plot(res_mh_cpp[[1]][burnin:iter,'age_2'], type='l',
200 | xlab='MH Iteration', ylab='Posterior Draw', main='age2')
201 | abline(h= 1.1, col='red')
202 | plot(res_mh_cpp[[1]][burnin:iter,'trt'], type='l',
203 | xlab='MH Iteration', ylab='Posterior Draw', main='trt')
204 | abline(h= 1.1, col='red')
205 | par(mfrow=c(1,1))
206 | plot(cumsum(res_mh_cpp[[2]])/1:iter, type='l',
207 | xlab='MH Iteration', ylab='Cumulative Average Acceptance Rate',
208 | main='Acceptance Rate Over Sampling Run')
209 | abline(h= 1.1, col='red')
210 | #Author: Arman Oganisian
211 | library(LaplacesDemon)
212 | library(invgamma)
213 | library(MASS)
214 | library(tidyr)
215 | library(dplyr)
216 | library(ggplot2)
217 | library(microbenchmark)
218 | library(Rcpp)
219 | sourceCpp('log_post.cpp')
220 | ################################################################################
221 | ### 0 - Simulate Data
222 | ################################################################################
223 | set.seed(10)
224 | sim_dat <- function(N){
225 | d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T))
226 | d$age_1<-ifelse(d$age_group==1,1,0)
227 | d$age_2<-ifelse(d$age_group==2,1,0)
228 | d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2))
229 | d$y<-rbinom(n = N, size = 1,
230 | prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt))
231 | X<-as.matrix(cbind(1,d[,2:4])) # model matrix
232 | Y<-matrix(d$y, ncol=1) # outcome vector
233 | return(list(X=X, Y=Y))
234 | }
235 | d <- sim_dat(N=1000)
236 | X <- d$X
237 | Y <- d$Y
238 | ################################################################################
239 | ### 1 - functions to sample from conditional posterior distributions
240 | ################################################################################
241 | # unnormalized log posterior of beta vector
242 | log_posterior<-function(beta, X, Y){
243 | # calculate likelihood
244 | xb <- X %*% beta
245 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb))
246 | p_i <- invlogit(xb)
247 | lik <- sum(dbern(Y, p_i, log = T))
248 | # calculate prior
249 | pr <- dmvn(x = beta, mu = rep(0,p), Sigma = (1000^2)*diag(p), log = T)
250 | log_cond_post <- lik + pr
251 | return(log_cond_post)
252 | }
253 | # Metropolis-Hastings Sampler using log_posterior(),
254 | # which is the log posterior coded in R.
255 | sample_mh<-function(X, Y, iter, jump_v){
256 | # create shells
257 | p <- ncol(X)
258 | beta_shell <- matrix(NA, nrow = iter, ncol = p)
259 | accept_shell <- numeric(length = iter)
260 | # starting values
261 | beta_shell[1,] <- rep(10, p)
262 | for(i in 2:iter){
263 | beta_0 <- beta_shell[i-1, ]
264 | # draw from proposal distribution
265 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p))
266 | # calculate ratio of conditional posterior densities
267 | r_num <- log_posterior(beta_c, X, Y )
268 | r_denom <- log_posterior(beta_0, X, Y )
269 | # calculate acceptance probability
270 | r <- exp(r_num - r_denom)
271 | rmin<-min(r,1)
272 | # accept or reject proposal
273 | if( rbinom(1,1,rmin) == 1 ){
274 | beta_shell[i, ] <- beta_c
275 | }else{
276 | beta_shell[i, ] <- beta_0
277 | }
278 | accept_shell[i] <- rmin
279 | }
280 | colnames(beta_shell) <- colnames(X)
281 | colnames(beta_shell)[1] <- 'intercept'
282 | return(list(beta_shell, accept_shell) )
283 | }
284 | # Metropolis-Hastings Sampler using log_post(),
285 | # which is the log posterior coded in C++.
286 | sample_mh_cpp <-function(X, Y, iter, jump_v){
287 | # create shells
288 | p <- ncol(X)
289 | beta_shell <- matrix(NA, nrow = iter, ncol = p)
290 | accept_shell <- numeric(length = iter)
291 | # starting values
292 | beta_shell[1,] <- rep(10, p)
293 | for(i in 2:iter){
294 | beta_0 <- beta_shell[i-1, ]
295 | # draw from proposal distribution
296 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p))
297 | # calculate ratio of conditional posterior densities
298 | r_num <- log_post(beta_c, Y, X )
299 | r_denom <- log_post(beta_0, Y, X )
300 | # calculate acceptance probability
301 | r <- exp(r_num - r_denom)
302 | rmin<-min(r,1)
303 | # accept or reject proposal
304 | if( rbinom(1,1,rmin) == 1 ){
305 | beta_shell[i, ] <- beta_c
306 | }else{
307 | beta_shell[i, ] <- beta_0
308 | }
309 | accept_shell[i] <- rmin
310 | }
311 | colnames(beta_shell) <- colnames(X)
312 | colnames(beta_shell)[1] <- 'intercept'
313 | return(list(beta_shell, accept_shell) )
314 | }
315 | ################################################################################
316 | ### 2 - Test the Samplers
317 | ################################################################################
318 | burnin <- 1000
319 | iter <- 100000
320 | p <- ncol(X)
321 | res_mh_cpp <- sample_mh_cpp(X, Y, iter = iter, jump_v = .03)
322 | par(mfrow=c(2,2))
323 | plot(res_mh_cpp[[1]][burnin:iter,'intercept'], type='l',
324 | xlab='MH Iteration', ylab='Posterior Draw', main='Intercept')
325 | abline(h= -1, col='red')
326 | plot(res_mh_cpp[[1]][burnin:iter,'age_1'], type='l',
327 | xlab='MH Iteration', ylab='Posterior Draw', main='age1')
328 | abline(h= .7, col='red')
329 | plot(res_mh_cpp[[1]][burnin:iter,'age_2'], type='l',
330 | xlab='MH Iteration', ylab='Posterior Draw', main='age2')
331 | abline(h= 1.1, col='red')
332 | plot(res_mh_cpp[[1]][burnin:iter,'trt'], type='l',
333 | xlab='MH Iteration', ylab='Posterior Draw', main='trt')
334 | abline(h= 1.1, col='red')
335 | par(mfrow=c(1,1))
336 | plot(cumsum(res_mh_cpp[[2]])/1:iter, type='l',
337 | xlab='MH Iteration', ylab='Cumulative Average Acceptance Rate',
338 | main='Acceptance Rate Over Sampling Run')
339 | abline(h= 1.1, col='red')
340 | ################################################################################
341 | ### 3 - Benchmarks
342 | ################################################################################
343 | iter <- 10000
344 | ss <- c(100, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000)
345 | rel_time <-numeric(length = length(ss))
346 | for(i in 1:length(ss) ){
347 | d <- sim_dat(N = ss[i])
348 | X <- d$X
349 | Y <- d$Y
350 | bench<-microbenchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .03),
351 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .03),
352 | times = 2)
353 | bench_sum <- summary(bench)
354 | r_time <- bench_sum$mean[bench_sum$expr=='R_MH']
355 | rcpp_time <- bench_sum$mean[bench_sum$expr=='Cpp_MH']
356 | rel_time[i] <- r_time/rcpp_time
357 | }
358 | plot(ss, rel_time, type='l')
359 | #Author: Arman Oganisian
360 | library(LaplacesDemon)
361 | library(invgamma)
362 | library(MASS)
363 | library(tidyr)
364 | library(dplyr)
365 | library(ggplot2)
366 | library(microbenchmark)
367 | library(Rcpp)
368 | sourceCpp('log_post.cpp')
369 | ################################################################################
370 | ### 0 - Simulate Data
371 | ################################################################################
372 | set.seed(10)
373 | sim_dat <- function(N){
374 | d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T))
375 | d$age_1<-ifelse(d$age_group==1,1,0)
376 | d$age_2<-ifelse(d$age_group==2,1,0)
377 | d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2))
378 | d$y<-rbinom(n = N, size = 1,
379 | prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt))
380 | X<-as.matrix(cbind(1,d[,2:4])) # model matrix
381 | Y<-matrix(d$y, ncol=1) # outcome vector
382 | return(list(X=X, Y=Y))
383 | }
384 | d <- sim_dat(N=1000)
385 | X <- d$X
386 | Y <- d$Y
387 | ################################################################################
388 | ### 1 - functions to sample from conditional posterior distributions
389 | ################################################################################
390 | # unnormalized log posterior of beta vector
391 | log_posterior<-function(beta, X, Y){
392 | # calculate likelihood
393 | xb <- X %*% beta
394 | xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb))
395 | p_i <- invlogit(xb)
396 | lik <- sum(dbern(Y, p_i, log = T))
397 | # calculate prior
398 | pr <- dmvn(x = beta, mu = rep(0,p), Sigma = (1000^2)*diag(p), log = T)
399 | log_cond_post <- lik + pr
400 | return(log_cond_post)
401 | }
402 | # Metropolis-Hastings Sampler using log_posterior(),
403 | # which is the log posterior coded in R.
404 | sample_mh<-function(X, Y, iter, jump_v){
405 | # create shells
406 | p <- ncol(X)
407 | beta_shell <- matrix(NA, nrow = iter, ncol = p)
408 | accept_shell <- numeric(length = iter)
409 | # starting values
410 | beta_shell[1,] <- rep(10, p)
411 | for(i in 2:iter){
412 | beta_0 <- beta_shell[i-1, ]
413 | # draw from proposal distribution
414 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p))
415 | # calculate ratio of conditional posterior densities
416 | r_num <- log_posterior(beta_c, X, Y )
417 | r_denom <- log_posterior(beta_0, X, Y )
418 | # calculate acceptance probability
419 | r <- exp(r_num - r_denom)
420 | rmin<-min(r,1)
421 | # accept or reject proposal
422 | if( rbinom(1,1,rmin) == 1 ){
423 | beta_shell[i, ] <- beta_c
424 | }else{
425 | beta_shell[i, ] <- beta_0
426 | }
427 | accept_shell[i] <- rmin
428 | }
429 | colnames(beta_shell) <- colnames(X)
430 | colnames(beta_shell)[1] <- 'intercept'
431 | return(list(beta_shell, accept_shell) )
432 | }
433 | # Metropolis-Hastings Sampler using log_post(),
434 | # which is the log posterior coded in C++.
435 | sample_mh_cpp <-function(X, Y, iter, jump_v){
436 | # create shells
437 | p <- ncol(X)
438 | beta_shell <- matrix(NA, nrow = iter, ncol = p)
439 | accept_shell <- numeric(length = iter)
440 | # starting values
441 | beta_shell[1,] <- rep(10, p)
442 | for(i in 2:iter){
443 | beta_0 <- beta_shell[i-1, ]
444 | # draw from proposal distribution
445 | beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p))
446 | # calculate ratio of conditional posterior densities
447 | r_num <- log_post(beta_c, Y, X )
448 | r_denom <- log_post(beta_0, Y, X )
449 | # calculate acceptance probability
450 | r <- exp(r_num - r_denom)
451 | rmin<-min(r,1)
452 | # accept or reject proposal
453 | if( rbinom(1,1,rmin) == 1 ){
454 | beta_shell[i, ] <- beta_c
455 | }else{
456 | beta_shell[i, ] <- beta_0
457 | }
458 | accept_shell[i] <- rmin
459 | }
460 | colnames(beta_shell) <- colnames(X)
461 | colnames(beta_shell)[1] <- 'intercept'
462 | return(list(beta_shell, accept_shell) )
463 | }
464 | ################################################################################
465 | ### 2 - Test the Samplers
466 | ################################################################################
467 | burnin <- 1000
468 | iter <- 100000
469 | p <- ncol(X)
470 | res_mh_cpp <- sample_mh_cpp(X, Y, iter = iter, jump_v = .03)
471 | par(mfrow=c(2,2))
472 | plot(res_mh_cpp[[1]][burnin:iter,'intercept'], type='l',
473 | xlab='MH Iteration', ylab='Posterior Draw', main='Intercept')
474 | abline(h= -1, col='red')
475 | plot(res_mh_cpp[[1]][burnin:iter,'age_1'], type='l',
476 | xlab='MH Iteration', ylab='Posterior Draw', main='age1')
477 | abline(h= .7, col='red')
478 | plot(res_mh_cpp[[1]][burnin:iter,'age_2'], type='l',
479 | xlab='MH Iteration', ylab='Posterior Draw', main='age2')
480 | abline(h= 1.1, col='red')
481 | plot(res_mh_cpp[[1]][burnin:iter,'trt'], type='l',
482 | xlab='MH Iteration', ylab='Posterior Draw', main='trt')
483 | abline(h= 1.1, col='red')
484 | par(mfrow=c(1,1))
485 | plot(cumsum(res_mh_cpp[[2]])/1:iter, type='l',
486 | xlab='MH Iteration', ylab='Cumulative Average Acceptance Rate',
487 | main='Acceptance Rate Over Sampling Run')
488 | abline(h= 1.1, col='red')
489 | ################################################################################
490 | ### 3 - Benchmarks
491 | ################################################################################
492 | iter <- 10000
493 | ss <- c(100, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000)
494 | rel_time <-numeric(length = length(ss))
495 | for(i in 1:length(ss) ){
496 | d <- sim_dat(N = ss[i])
497 | X <- d$X
498 | Y <- d$Y
499 | bench<-microbenchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .03),
500 | Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .03),
501 | times = 2)
502 | bench_sum <- summary(bench)
503 | r_time <- bench_sum$mean[bench_sum$expr=='R_MH']
504 | rcpp_time <- bench_sum$mean[bench_sum$expr=='Cpp_MH']
505 | rel_time[i] <- r_time/rcpp_time
506 | }
507 | ################################################################################
508 | ### 4 - Plot Results
509 | ################################################################################
510 | plot(ss, rel_time, type='l')
511 | plot(ss, rel_time, type='l',
512 | xlab='Data Sample Size', ylab='Relative Runtime (R v. Rcpp)')
513 | 


--------------------------------------------------------------------------------
/RcppBoost/AcceptProb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/RcppBoost/AcceptProb.png


--------------------------------------------------------------------------------
/RcppBoost/Logisitc_MH_Rcpp.r:
--------------------------------------------------------------------------------
  1 | #Author: Arman Oganisian
  2 | library(LaplacesDemon)
  3 | library(invgamma)
  4 | library(MASS)
  5 | library(tidyr)
  6 | library(dplyr)
  7 | library(ggplot2)
  8 | library(microbenchmark)
  9 | library(Rcpp)
 10 | 
 11 | sourceCpp('log_post.cpp')
 12 | 
 13 | ################################################################################
 14 | ### 0 - Simulate Data 
 15 | ################################################################################
 16 | set.seed(10)
 17 | 
 18 | sim_dat <- function(N){
 19 |   d<-data.frame(age_group=sample(x = c(0,1,2), size = N, replace = T))
 20 |   d$age_1<-ifelse(d$age_group==1,1,0)
 21 |   d$age_2<-ifelse(d$age_group==2,1,0)
 22 |   
 23 |   d$trt<-rbinom(n = N, size = 1,prob = invlogit(0 + 2*d$age_1 + - 2*d$age_2))
 24 |   
 25 |   d$y<-rbinom(n = N, size = 1,
 26 |               prob = invlogit(-1 + .7*d$age_1 + 1.1*d$age_2 + 1.1*d$trt))
 27 |   
 28 |   X<-as.matrix(cbind(1,d[,2:4])) # model matrix
 29 |   Y<-matrix(d$y, ncol=1) # outcome vector
 30 |   return(list(X=X, Y=Y))  
 31 | }
 32 | 
 33 | d <- sim_dat(N=1000)
 34 | X <- d$X
 35 | Y <- d$Y
 36 | 
 37 | ################################################################################
 38 | ### 1 - functions to sample from conditional posterior distributions
 39 | ################################################################################
 40 | 
 41 | # unnormalized log posterior of beta vector
 42 | log_posterior<-function(beta, X, Y){
 43 | 
 44 |     # calculate likelihood
 45 |   xb <- X %*% beta
 46 |   xb <- ifelse(xb>10, 10, ifelse( xb< (-10) ,-10, xb))
 47 |   p_i <- invlogit(xb)
 48 |   
 49 |   lik <- sum(dbern(Y, p_i, log = T))
 50 |   
 51 |   # calculate prior 
 52 |   pr <- dmvn(x = beta, mu = rep(0,p), Sigma = (1000^2)*diag(p), log = T)
 53 |   
 54 |   log_cond_post <- lik + pr
 55 |   return(log_cond_post)
 56 | }
 57 | 
 58 | # Metropolis-Hastings Sampler using log_posterior(),
 59 | # which is the log posterior coded in R.
 60 | sample_mh<-function(X, Y, iter, jump_v){
 61 |   
 62 |   # create shells
 63 |   p <- ncol(X)
 64 |   beta_shell <- matrix(NA, nrow = iter, ncol = p)
 65 |   accept_shell <- numeric(length = iter)
 66 |   
 67 |   # starting values
 68 |   beta_shell[1,] <- rep(10, p)
 69 |   
 70 |   for(i in 2:iter){
 71 |     beta_0 <- beta_shell[i-1, ]
 72 |     
 73 |     # draw from proposal distribution
 74 |     beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p))
 75 |     
 76 |     # calculate ratio of conditional posterior densities
 77 |     r_num <- log_posterior(beta_c, X, Y )
 78 |     r_denom <- log_posterior(beta_0, X, Y )
 79 |     
 80 |     # calculate acceptance probability
 81 |     r <- exp(r_num - r_denom)
 82 |     rmin<-min(r,1)
 83 |     
 84 |     # accept or reject proposal
 85 |     if( rbinom(1,1,rmin) == 1 ){ 
 86 |       beta_shell[i, ] <- beta_c
 87 |     }else{
 88 |       beta_shell[i, ] <- beta_0
 89 |     }
 90 |     accept_shell[i] <- rmin
 91 |     
 92 |   }
 93 |   colnames(beta_shell) <- colnames(X)
 94 |   colnames(beta_shell)[1] <- 'intercept'
 95 |   return(list(beta_shell, accept_shell) )
 96 | }
 97 | 
 98 | # Metropolis-Hastings Sampler using log_post(),
 99 | # which is the log posterior coded in C++.
100 | 
101 | sample_mh_cpp <-function(X, Y, iter, jump_v){
102 |   # create shells
103 |   p <- ncol(X)
104 |   beta_shell <- matrix(NA, nrow = iter, ncol = p)
105 |   accept_shell <- numeric(length = iter)
106 |   
107 |   # starting values
108 |   beta_shell[1,] <- rep(10, p)
109 |   
110 |   for(i in 2:iter){
111 |     beta_0 <- beta_shell[i-1, ]
112 |     
113 |     # draw from proposal distribution
114 |     beta_c <- mvrnorm(n = 1, beta_0, Sigma = jump_v*diag(p))
115 |     
116 |     # calculate ratio of conditional posterior densities
117 |     r_num <- log_post(beta_c, Y, X )
118 |     r_denom <- log_post(beta_0, Y, X )
119 |     
120 |     # calculate acceptance probability
121 |     r <- exp(r_num - r_denom)
122 |     rmin<-min(r,1)
123 |     
124 |     # accept or reject proposal
125 |     if( rbinom(1,1,rmin) == 1 ){ 
126 |       beta_shell[i, ] <- beta_c
127 |     }else{
128 |       beta_shell[i, ] <- beta_0
129 |     }
130 |     accept_shell[i] <- rmin
131 |     
132 |   }
133 |   colnames(beta_shell) <- colnames(X)
134 |   colnames(beta_shell)[1] <- 'intercept'
135 |   return(list(beta_shell, accept_shell) )
136 | }
137 | 
138 | ################################################################################
139 | ### 2 - Test the Samplers
140 | ################################################################################
141 | 
142 | burnin <- 1000
143 | iter <- 100000
144 | p <- ncol(X)
145 | 
146 | res_mh_cpp <- sample_mh_cpp(X, Y, iter = iter, jump_v = .03)
147 | 
148 | par(mfrow=c(2,2))
149 | plot(res_mh_cpp[[1]][burnin:iter,'intercept'], type='l',
150 |      xlab='MH Iteration', ylab='Posterior Draw', main='Intercept')
151 | abline(h= -1, col='red')
152 | plot(res_mh_cpp[[1]][burnin:iter,'age_1'], type='l',
153 |      xlab='MH Iteration', ylab='Posterior Draw', main='age1')
154 | abline(h= .7, col='red')
155 | plot(res_mh_cpp[[1]][burnin:iter,'age_2'], type='l',
156 |      xlab='MH Iteration', ylab='Posterior Draw', main='age2')
157 | abline(h= 1.1, col='red')
158 | plot(res_mh_cpp[[1]][burnin:iter,'trt'], type='l',
159 |      xlab='MH Iteration', ylab='Posterior Draw', main='trt')
160 | abline(h= 1.1, col='red')
161 | 
162 | par(mfrow=c(1,1))
163 | plot(cumsum(res_mh_cpp[[2]])/1:iter, type='l',
164 |      xlab='MH Iteration', ylab='Cumulative Average Acceptance Rate', 
165 |      main='Acceptance Rate Over Sampling Run')
166 | abline(h= 1.1, col='red')
167 | 
168 | ################################################################################
169 | ### 3 - Benchmarks
170 | ################################################################################
171 | iter <- 10000
172 | 
173 | ss <- c(100, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000)
174 | 
175 | rel_time <-numeric(length = length(ss))
176 | for(i in 1:length(ss) ){
177 |   d <- sim_dat(N = ss[i])
178 |   X <- d$X
179 |   Y <- d$Y
180 |   
181 |   bench<-microbenchmark(R_MH = sample_mh(X, Y, iter = iter, jump_v = .03),
182 |                         Cpp_MH = sample_mh_cpp(X, Y, iter = iter, jump_v = .03),
183 |                         times = 2)
184 |   bench_sum <- summary(bench)
185 |   r_time <- bench_sum$mean[bench_sum$expr=='R_MH']
186 |   rcpp_time <- bench_sum$mean[bench_sum$expr=='Cpp_MH']
187 |   rel_time[i] <- r_time/rcpp_time
188 | }
189 | 
190 | plot(ss, rel_time, type='l', 
191 |      xlab='Data Sample Size', ylab='Relative Runtime (R v. Rcpp)')
192 | 
193 | 
194 | 


--------------------------------------------------------------------------------
/RcppBoost/MCMCchains.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/RcppBoost/MCMCchains.png


--------------------------------------------------------------------------------
/RcppBoost/Runtime.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/RcppBoost/Runtime.png


--------------------------------------------------------------------------------
/RcppBoost/log_post.cpp:
--------------------------------------------------------------------------------
 1 | // [[Rcpp::depends(RcppArmadillo)]]
 2 | #include <RcppArmadillo.h>
 3 | using namespace Rcpp;
 4 | 
 5 | arma::vec invlogit(arma::vec x){
 6 |   arma::vec y = exp(x)/(1 + exp(x));
 7 |   return(y);
 8 | }
 9 | 
10 | // [[Rcpp::export]]
11 | double berntest(double y, double n, double p, int lg ){
12 |   return R::dbinom(y, n, p, lg);
13 | }
14 | 
15 | // [[Rcpp::export]]
16 | double log_post(arma::vec b, arma::vec Y, arma::mat X) {
17 |   int n = X.n_rows;
18 |   int p = X.n_cols;
19 |   arma::vec xb;
20 |   arma::vec pi;
21 |   double lik;
22 |   double pr;
23 |   double log_posterior;
24 |   
25 |   // linear predictor
26 |   xb =  X*b;
27 |   
28 |   // correct for numerical issues
29 |   for( int i=0; i<n; i++){
30 |     if( xb[i] > 10 ){
31 |       xb[i] = 10;
32 |     }else if( xb[i] < -10 ){
33 |       xb[i] = -10;
34 |     }else{
35 |       xb[i] = xb[i];
36 |     }
37 |   } 
38 |   
39 |   // apply inverse link function
40 |   pi = invlogit(xb);
41 |   
42 |   // compute log likelihood contribution of each observation
43 |   // and sum them
44 |   lik = 0;
45 |   for(int i=0; i<n; i++){
46 |     lik = lik + R::dbinom(Y[i], 1, pi[i], 1);
47 |   }
48 |   
49 |   // compute log prior contribution for each parameter
50 |   pr=0;
51 |   for(int i=0; i<p; i++){
52 |     pr = pr + R::dnorm4(b[i], 0, 1000, 1);
53 |   }
54 |   
55 |   // evaluate log posterior as sum of likelihood and prior
56 |   log_posterior = lik + pr;
57 |   return log_posterior;
58 | }
59 | 
60 | 


--------------------------------------------------------------------------------
/SimpleLinearReg/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/SimpleLinearReg/.DS_Store


--------------------------------------------------------------------------------
/SimpleLinearReg/BayesModel.R:
--------------------------------------------------------------------------------
 1 | library(invgamma)
 2 | library(MASS)
 3 | library(xtable)
 4 | set.seed(1)
 5 | 
 6 | #############################################################################
 7 | ###########   0 - Simulate Data 
 8 | #############################################################################
 9 | # beta_0 hyper parameters (known)
10 | m0<-5
11 | t0<-1
12 | 
13 | m1<-5
14 | t1<-1
15 | 
16 | a<-.5 # shape
17 | g<-.7 # scale
18 | 
19 | n<-100
20 | x<-rnorm(n, 0, 1)
21 | 
22 | tphi<-rinvgamma(1, shape=a, rate=g)
23 | tb0<-rnorm(1, m0, sqrt(t0) )
24 | tb1<-rnorm(1, m1, sqrt(t1) )
25 | tphi; tb0; tb1;
26 | 
27 | y<-rnorm(n, tb0 + tb1*x, sqrt(tphi))
28 | 
29 | #############################################################################
30 | ###########   1 - Functions for grid evaluation of posterior densities
31 | #############################################################################
32 | rb0cond<-function(y, x, b1, phi, t0, m0){
33 |   grid<-seq(-10,10,.001)
34 |   
35 |   p<-numeric(length=length(grid))
36 |   for(i in 1:length(p) ){
37 |     p[i]<- (-(1/(2*phi))*sum( (y - (grid[i]+b1*x))^2 ))  + ( -(1/(2*t0))*(grid[i] - m0)^2)
38 |   }
39 |   
40 |   draw<-sample(grid, size = 1, prob = exp(1-p/max(p)))
41 |   return(draw)
42 | }
43 | 
44 | rb1cond<-function(y, x, phi, t1, m1, b0){
45 |   grid<-seq(-10,10,.001)
46 |   
47 |   p<-numeric(length=length(grid))
48 |   for(i in 1:length(p)){
49 |     p[i]<- (-(1/(2*phi) )*sum( (y - (b0+grid[i]*x))^2 )) + ( -(1/(2*t1) )*(grid[i] - m1)^2)
50 |   }
51 |   
52 |   draw<-sample(grid, size = 1, prob = exp(1-p/max(p)))
53 |   return(draw)
54 | }
55 | 
56 | #############################################################################
57 | ###########   2 -Implement Gibbs Sampling
58 | #############################################################################
59 | 
60 | iter<-1000
61 | burnin<-101
62 | phi<-b0<-b1<-numeric(iter)
63 | phi[1]<-b0[1]<-b1[1]<-6
64 | 
65 | for(i in 2:iter ){
66 |   phi[i]<-rinvgamma(1, shape = (n/2 + a), rate = .5*sum( (y - (b0[i-1]+b1[i-1]*x))^2 ) + g  )
67 |   b0[i]<-rb0cond(y, x, b1[i-1], phi[i], t0, m0)
68 |   b1[i]<-rb1cond(y, x, phi[i], t1, m1, b0[i] )
69 | }
70 | 
71 | #############################################################################
72 | ###########   3 - Visualize Results
73 | #############################################################################
74 | 
75 | par(mfrow=c(2,2))
76 | plot(phi[burnin:iter],type='l'); abline(h=tphi, col='red')
77 | plot(b0[burnin:iter],type='l'); abline(h=tb0, col='red')
78 | plot(b1[burnin:iter],type='l'); abline(h=tb1, col='red')
79 | 
80 | z <- kde2d(b0, b1, n=50)
81 | plot(b0,b1, pch=19, cex=.4)
82 | contour(z, drawlabels=FALSE, nlevels=10, col='red', add=TRUE)
83 | 


--------------------------------------------------------------------------------
/SimpleLinearReg/README.md:
--------------------------------------------------------------------------------
1 | # BayesSimpleLinearModel
2 | Code for illustrating simple Bayesian linear regression
3 | 


--------------------------------------------------------------------------------
/SimulatedTempering/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/SimulatedTempering/.DS_Store


--------------------------------------------------------------------------------
/SimulatedTempering/make_gif-.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/SimulatedTempering/make_gif-.gif


--------------------------------------------------------------------------------
/SimulatedTempering/tempering.R:
--------------------------------------------------------------------------------
 1 | ### ------------------------- Helper Functions  -----------------------------###
 2 | 
 3 | vanilla_mh <- function(log_target, theta_curr, prop_sd){
 4 |   theta_star <- rnorm(1, theta_curr, prop_sd )
 5 |   
 6 |   log_accept_prob <- log_target(theta_star) - log_target(theta_curr)
 7 |   accept <- log(runif(1)) < log_accept_prob
 8 |   if(is.na(accept)) browser()
 9 |   if(accept){ return(theta_star) }else{ return(theta_curr)}
10 | }
11 | 
12 | ### tempered distribution for various temperature, temp.
13 | ftemp <- function(x, temp) ( (1/3)*dnorm(x,-20,1) + (1/3)*dnorm(x,0,1) + (1/3)*dnorm(x,20,1))^(1/temp)
14 | log_target <- function(x) log(ftemp(x, temp=1))
15 | 
16 | ### ------------------------- Parallel Tempering  ---------------------------###
17 | set.seed(10)
18 | 
19 | iter <- 200
20 | tempv <- c(1,100)
21 | 
22 | n_temps <- length(tempv)
23 | temp_indx <- 1:n_temps
24 | 
25 | theta_shell <- matrix(0, nrow=iter, ncol=n_temps)
26 | theta_shell_no_switch = matrix(0, nrow=iter, ncol=n_temps)
27 | swap_shell <- numeric(length = iter)
28 | 
29 | for( i in 2:iter){
30 |   
31 |   ## update chains (potentially in parallel )
32 |   for(t in temp_indx){
33 |     log_target <- function(x) log( ftemp(x, temp=tempv[t]) )
34 |     
35 |     prop_sd = ifelse(t==1, 1, 10)
36 |     
37 |     theta_shell[i, t] <-  vanilla_mh(log_target = log_target, 
38 |                                      theta_curr = theta_shell[i-1, t], 
39 |                                      prop_sd = prop_sd)
40 |     
41 |   }
42 |   theta_shell_no_switch[i, ] = theta_shell[i, ]
43 |   
44 |   ## propose swap, from swap_idx[1] (chain j) to swap_idx[2] (chain k)
45 |   swap_idx <- sample(temp_indx, 2, replace = T)
46 |   cj <- swap_idx[1]
47 |   ck <- swap_idx[2]
48 |   theta_j <- theta_shell[ i , cj]
49 |   theta_k <- theta_shell[ i , ck]
50 |   
51 |   log_target <- function(x) log(ftemp(x, temp=1))
52 |   f1 <- tempv[cj]*( log_target( theta_j ) - log_target( theta_k ) )
53 |   f2 <- tempv[ck]*( log_target( theta_k ) - log_target( theta_j )  )
54 |   
55 |   accept_prob <- min( c(1, exp(f1 + f2) ) )
56 |     
57 |   if( rbinom(1,1, accept_prob)==1 ){
58 |     
59 |     ## make the swap
60 |     theta_shell[i, cj] <- theta_k
61 |     theta_shell[i, ck] <- theta_j
62 |     
63 |     ## record the swap
64 |     swap_shell[i] = ifelse(cj!=ck, 1, 0)
65 | 
66 |   }
67 |   
68 | }
69 | 
70 | par(mfrow=c(1,2))
71 | plot(theta_shell_no_switch[,2], type='l', col='gray', ylim=c(-40,40))
72 | lines(theta_shell[,1], col='steelblue')
73 | #lines(theta_shell_no_switch[,1], col='green')
74 | points(1:iter, theta_shell[,1], pch=20, 
75 |        col=ifelse(swap_shell==1, 'red', NA) )
76 | lines(theta_shell_no_switch[,2], col='gray')
77 | 
78 | hist(theta_shell[,1], breaks=30,freq = F, 
79 |      xlim=c(-25,25))
80 | curve(ftemp(x,temp=1), from = -40, to=40,ylim=c(0,1), add=T, col='pink', n = 100000,lwd=2,
81 |       ylim=c(0,.3))
82 | 


--------------------------------------------------------------------------------
/SimulatedTempering/tempering.rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Parallel Tempering"
  3 | author: "Arman Oganisian"
  4 | date: "10/20/2019"
  5 | output: html_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(echo = FALSE)
 10 | 
 11 | ```
 12 | 
 13 | 
 14 | ```{r helper_functions, include=F, echo=F}
 15 | vanilla_mh <- function(log_target, theta_curr, prop_sd){
 16 |   theta_star <- rnorm(1, theta_curr, prop_sd )
 17 |   
 18 |   log_accept_prob <- log_target(theta_star) - log_target(theta_curr)
 19 |   accept <- log(runif(1)) < log_accept_prob
 20 |   if(is.na(accept)) browser()
 21 |   if(accept){ return(theta_star) }else{ return(theta_curr)}
 22 | }
 23 | 
 24 | ### tempered distribution for various temperature, temp.
 25 | ftemp <- function(x, temp) ( (1/3)*dnorm(x,-20,1) + (1/3)*dnorm(x,0,1) + (1/3)*dnorm(x,20,1))^(1/temp)
 26 | 
 27 | log_target <- function(x) log(ftemp(x, temp=1))
 28 | ```
 29 | 
 30 | 
 31 | ```{r,  echo=F, warning=FALSE, message=FALSE, error=FALSE, cache=T, eval=T}
 32 | set.seed(10)
 33 | 
 34 | iter <- 200
 35 | tempv <- c(1,100)
 36 | 
 37 | n_temps <- length(tempv)
 38 | temp_indx <- 1:n_temps
 39 | 
 40 | theta_shell <- matrix(0, nrow=iter, ncol=n_temps)
 41 | theta_shell_no_switch = matrix(0, nrow=iter, ncol=n_temps)
 42 | swap_shell <- numeric(length = iter)
 43 | 
 44 | for( i in 2:iter){
 45 |   
 46 |   ## update chains (potentially in parallel )
 47 |   for(t in temp_indx){
 48 |     log_target <- function(x) log( ftemp(x, temp=tempv[t]) )
 49 |     
 50 |     prop_sd = ifelse(t==1, 1, 10)
 51 |     
 52 |     theta_shell[i, t] <-  vanilla_mh(log_target = log_target, 
 53 |                                      theta_curr = theta_shell[i-1, t], 
 54 |                                      prop_sd = prop_sd)
 55 |     
 56 |   }
 57 |   theta_shell_no_switch[i, ] = theta_shell[i, ]
 58 |   
 59 |   ## propose swap, from swap_idx[1] (chain j) to swap_idx[2] (chain k)
 60 |   swap_idx <- sample(temp_indx, 2, replace = T)
 61 |   cj <- swap_idx[1]
 62 |   ck <- swap_idx[2]
 63 |   theta_j <- theta_shell[ i , cj]
 64 |   theta_k <- theta_shell[ i , ck]
 65 |   
 66 |   log_target <- function(x) log(ftemp(x, temp=1))
 67 |   f1 <- tempv[cj]*( log_target( theta_j ) - log_target( theta_k ) )
 68 |   f2 <- tempv[ck]*( log_target( theta_k ) - log_target( theta_j )  )
 69 |   
 70 |   accept_prob <- min( c(1, exp(f1 + f2) ) )
 71 |     
 72 |   if( rbinom(1,1, accept_prob)==1 ){
 73 |     
 74 |     ## make the swap
 75 |     theta_shell[i, cj] <- theta_k
 76 |     theta_shell[i, ck] <- theta_j
 77 |     
 78 |     ## record the swap
 79 |     swap_shell[i] = ifelse(cj!=ck, 1, 0)
 80 | 
 81 |   }
 82 |   
 83 | }
 84 | ```
 85 | 
 86 | ```{r make_gif,  echo=F, warning=FALSE, message=FALSE, error=FALSE, cache=T, fig.show='animate', fig.width=10, interval=.25, fig.height=5, cache=FALSE, aniopts="loop", animation.hook='gifski', fig.align='center', eval=T}
 87 | 
 88 | for( i in seq(2,iter, 1)){
 89 |   par(mfrow=c(1,2))
 90 |   plot(theta_shell_no_switch[1:i,2], type='l', col='steelblue', 
 91 |        ylim=c(-50,50), xlim=c(0,iter), 
 92 |        xlab='Iteration', ylab='Posterior Draw')
 93 |   lines(theta_shell[1:i,1], col='steelblue')
 94 |   #lines(theta_shell_no_switch[,1], col='green')
 95 |   points(1:i, theta_shell[1:i, 1], pch=20, 
 96 |          col=ifelse(swap_shell[1:i]==1, 'red', NA) )
 97 |   lines(theta_shell_no_switch[1:i, 2], col='gray')
 98 |   legend('topleft', bty='n',
 99 |          legend = c('Chain exploring tempered posterior',
100 |                     'Chain exploring posterior', 
101 |                     'Swaps between chains'), 
102 |          col=c('gray', 'steelblue', 'red'), lty=c(1,1,NA), pch=c(NA,NA, 20) )
103 |   
104 |   
105 |   hist(theta_shell[1:i,1], breaks=30,freq = F, 
106 |        xlim=c(-25, 25), ylim=c(0,.25), 
107 |        main='Posterior Draws', xlab='', col='lightblue')
108 |   curve(ftemp(x,temp=1), from = -40, to=40,ylim=c(0,1), add=T, col='pink', 
109 |         n = 100000,lwd=2)
110 |   legend('topleft', legend = 'Posterior', col = 'pink', lty=1, bty='n')
111 |   }
112 | 
113 | ```


--------------------------------------------------------------------------------
/SimulatedTempering/tempering_cache/html/__packages:
--------------------------------------------------------------------------------
1 | base
2 | methods
3 | datasets
4 | utils
5 | grDevices
6 | graphics
7 | stats
8 | 


--------------------------------------------------------------------------------
/SimulatedTempering/tempering_cache/html/unnamed-chunk-1_e24126a3f59693241424fe88e04ca445.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/SimulatedTempering/tempering_cache/html/unnamed-chunk-1_e24126a3f59693241424fe88e04ca445.RData


--------------------------------------------------------------------------------
/SimulatedTempering/tempering_cache/html/unnamed-chunk-1_e24126a3f59693241424fe88e04ca445.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/SimulatedTempering/tempering_cache/html/unnamed-chunk-1_e24126a3f59693241424fe88e04ca445.rdb


--------------------------------------------------------------------------------
/SimulatedTempering/tempering_cache/html/unnamed-chunk-1_e24126a3f59693241424fe88e04ca445.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/SimulatedTempering/tempering_cache/html/unnamed-chunk-1_e24126a3f59693241424fe88e04ca445.rdx


--------------------------------------------------------------------------------
/SimulatedTempering/tempering_files/figure-html/make_gif-.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stablemarkets/BayesianTutorials/30f5db9f13215f72728924ab1bcbf8b5da085f72/SimulatedTempering/tempering_files/figure-html/make_gif-.gif


--------------------------------------------------------------------------------