├── .gitignore ├── 2depgps.Rmd ├── 2indgps.Rmd ├── LICENSE ├── README.md ├── compcorr.Rmd ├── coverage.Rmd ├── data ├── beh_sim.RData ├── compcorr_nsim.RData ├── compcorr_nsim_0004.RData ├── compcorr_nsim_0005.RData ├── compcorr_nsim_0506.RData ├── coverage50.RData ├── ex_g1_h0.RData ├── ex_g1_h0_t_samp_dist.RData ├── gh_coverage.RData ├── gh_t_sampdist.RData ├── onesamp_coverage.RData ├── onesamp_coverage2.RData ├── onesamp_coverage_n10.RData ├── onesamp_coverage_nboot200.RData ├── onesamp_coverage_nboot2000.RData ├── onesamp_stability.RData ├── ptb_params.RData ├── ptb_tboot.RData ├── twosamp_coverage.RData └── twosamp_fp.RData ├── docs ├── 2depgps.md ├── 2depgps_files │ └── figure-gfm │ │ ├── unnamed-chunk-10-1.png │ │ ├── unnamed-chunk-12-1.png │ │ ├── unnamed-chunk-13-1.png │ │ ├── unnamed-chunk-15-1.png │ │ ├── unnamed-chunk-6-1.png │ │ └── unnamed-chunk-8-1.png ├── 2indgps.md ├── 2indgps_files │ └── figure-gfm │ │ ├── unnamed-chunk-10-1.png │ │ ├── unnamed-chunk-11-1.png │ │ ├── unnamed-chunk-13-1.png │ │ ├── unnamed-chunk-14-1.png │ │ ├── unnamed-chunk-4-1.png │ │ ├── unnamed-chunk-7-1.png │ │ └── unnamed-chunk-8-1.png ├── compcorr.md ├── compcorr_files │ └── figure-gfm │ │ ├── define-populations-1.png │ │ ├── unnamed-chunk-11-1.png │ │ ├── unnamed-chunk-15-1.png │ │ ├── unnamed-chunk-20-1.png │ │ ├── unnamed-chunk-21-1.png │ │ ├── unnamed-chunk-22-1.png │ │ ├── unnamed-chunk-24-1.png │ │ ├── unnamed-chunk-25-1.png │ │ ├── unnamed-chunk-28-1.png │ │ ├── unnamed-chunk-29-1.png │ │ ├── unnamed-chunk-3-1.png │ │ ├── unnamed-chunk-30-1.png │ │ ├── unnamed-chunk-32-1.png │ │ ├── unnamed-chunk-33-1.png │ │ ├── unnamed-chunk-34-1.png │ │ ├── unnamed-chunk-37-1.png │ │ ├── unnamed-chunk-38-1.png │ │ ├── unnamed-chunk-42-1.png │ │ ├── unnamed-chunk-43-1.png │ │ └── unnamed-chunk-6-1.png ├── coverage.md ├── coverage_files │ └── figure-gfm │ │ ├── unnamed-chunk-10-1.png │ │ ├── unnamed-chunk-12-1.png │ │ ├── unnamed-chunk-14-1.png │ │ ├── unnamed-chunk-15-1.png │ │ ├── unnamed-chunk-16-1.png │ │ ├── unnamed-chunk-18-1.png │ │ ├── unnamed-chunk-20-1.png │ │ ├── unnamed-chunk-21-1.png │ │ ├── unnamed-chunk-22-1.png │ │ ├── unnamed-chunk-24-1.png │ │ ├── unnamed-chunk-4-1.png │ │ ├── unnamed-chunk-8-1.png │ │ └── unnamed-chunk-9-1.png ├── notrobust.md ├── notrobust_files │ └── figure-gfm │ │ ├── unnamed-chunk-3-1.png │ │ ├── unnamed-chunk-4-1.png │ │ ├── unnamed-chunk-5-1.png │ │ └── unnamed-chunk-6-1.png ├── pb.md ├── pb_files │ └── figure-gfm │ │ ├── unnamed-chunk-10-1.png │ │ ├── unnamed-chunk-12-1.png │ │ ├── unnamed-chunk-16-1.png │ │ ├── unnamed-chunk-20-1.png │ │ ├── unnamed-chunk-24-1.png │ │ ├── unnamed-chunk-6-1.png │ │ └── unnamed-chunk-7-1.png ├── pc.md ├── pc_files │ └── figure-gfm │ │ ├── unnamed-chunk-3-1.png │ │ └── unnamed-chunk-4-1.png ├── ptb.md ├── ptb_files │ └── figure-gfm │ │ ├── unnamed-chunk-11-1.png │ │ ├── unnamed-chunk-13-1.png │ │ ├── unnamed-chunk-15-1.png │ │ ├── unnamed-chunk-17-1.png │ │ ├── unnamed-chunk-18-1.png │ │ ├── unnamed-chunk-19-1.png │ │ ├── unnamed-chunk-20-1.png │ │ ├── unnamed-chunk-21-1.png │ │ ├── unnamed-chunk-22-1.png │ │ ├── unnamed-chunk-25-1.png │ │ ├── unnamed-chunk-26-1.png │ │ ├── unnamed-chunk-29-1.png │ │ ├── unnamed-chunk-30-1.png │ │ ├── unnamed-chunk-31-1.png │ │ ├── unnamed-chunk-32-1.png │ │ ├── unnamed-chunk-35-1.png │ │ └── unnamed-chunk-36-1.png ├── sampdist.md └── sampdist_files │ └── figure-gfm │ ├── unnamed-chunk-11-1.png │ ├── unnamed-chunk-12-1.png │ ├── unnamed-chunk-13-1.png │ ├── unnamed-chunk-14-1.png │ ├── unnamed-chunk-16-1.png │ ├── unnamed-chunk-17-1.png │ ├── unnamed-chunk-18-1.png │ ├── unnamed-chunk-19-1.png │ ├── unnamed-chunk-21-1.png │ ├── unnamed-chunk-22-1.png │ ├── unnamed-chunk-23-1.png │ ├── unnamed-chunk-24-1.png │ ├── unnamed-chunk-26-1.png │ ├── unnamed-chunk-27-1.png │ ├── unnamed-chunk-28-1.png │ ├── unnamed-chunk-29-1.png │ ├── unnamed-chunk-30-1.png │ ├── unnamed-chunk-31-1.png │ ├── unnamed-chunk-32-1.png │ ├── unnamed-chunk-33-1.png │ ├── unnamed-chunk-34-1.png │ ├── unnamed-chunk-35-1.png │ ├── unnamed-chunk-36-1.png │ ├── unnamed-chunk-37-1.png │ ├── unnamed-chunk-4-1.png │ ├── unnamed-chunk-6-1.png │ ├── unnamed-chunk-7-1.png │ └── unnamed-chunk-8-1.png ├── functions ├── Rallfun-v35.txt ├── Rallfun-v40.txt ├── corfun.txt ├── functions.txt ├── gengh.txt ├── ghpdf.txt └── theme_gar.txt ├── notrobust.Rmd ├── pb.Rmd ├── pc.Rmd ├── ptb.Rmd └── sampdist.Rmd /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # produced vignettes 21 | vignettes/*.html 22 | vignettes/*.pdf 23 | 24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 25 | .httr-oauth 26 | 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | 31 | # Temporary files created by R markdown 32 | *.utf8.md 33 | *.knit.md 34 | 35 | # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html 36 | rsconnect/ 37 | -------------------------------------------------------------------------------- /2depgps.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Compare two dependent groups using hierarchical bootstrap sampling" 3 | author: "Guillaume A. Rousselet" 4 | date: "`r Sys.Date()`" 5 | output: 6 | pdf_document: 7 | fig_caption: no 8 | number_sections: no 9 | toc: yes 10 | toc_depth: 2 11 | # github_document: 12 | # html_preview: yes 13 | # toc: yes 14 | # toc_depth: 2 15 | --- 16 | 17 | # Dependencies 18 | ```{r, message=FALSE, warning=FALSE} 19 | library(tibble) 20 | library(ggplot2) 21 | source("./functions/theme_gar.txt") 22 | source("./functions/functions.txt") 23 | # source("./functions/Rallfun-v40.txt") 24 | library(beepr) 25 | # install.packages("devtools") 26 | # devtools::install_github("GRousselet/rogme") 27 | library(rogme) 28 | # library(cowplot) 29 | # devtools::install_github("zeehio/facetscales") 30 | library(facetscales) # set different scales for each facet 31 | ``` 32 | 33 | ```{r} 34 | sessionInfo() 35 | ``` 36 | 37 | Rand Wilcox's functions for two dependent groups 38 | ```{r, eval = FALSE} 39 | # percentile bootstrap using any estimator 40 | # default to difference scores - dif=TRUE 41 | bootdpci(x,y,est=median) 42 | # to compare two variances 43 | comdvar(x,y,alpha=0.05) 44 | 45 | # To use these functions: 46 | # source("./functions/Rallfun-v40.txt") 47 | ``` 48 | 49 | For two dependent groups, an effective approach is to make inferences about the one-sample distribution of the pairwise differences. This case was already covered in the `pb` notebook. Instead, here we consider a hierarchical situation, in which each participant was tested in 2 conditions, with a large number of trials per condition. In this situation, we can build confidence intervals for each participant and at the group level. For each participant, we use the approach for 2 independent groups described in the `2indgps` notebook. At the group level, we apply a hierarchical bootstrap: following the data sampling, first we sample participants with replacement, second for each bootstrap participant, we sample trials with replacements. 50 | 51 | We use data from the [French Lexicon Project](https://sites.google.com/site/frenchlexicon/results). 52 | Click on "French Lexicon Project trial-level results with R scripts.zip". 53 | The `.RData` dataset was created by applying the script `getflprtdata.Rmd` available on [GitHub](https://github.com/GRousselet/rogme/tree/master/data-raw). 54 | 55 | ```{r} 56 | # get data - tibble = `flp` 57 | flp <- rogme::flp 58 | # columns = 59 | #1 = participant 60 | #2 = rt 61 | #3 = acc = accuracy 0/1 62 | #4 = condition = word/non-word 63 | ``` 64 | 65 | N = `r length(unique(flp$participant))` participants. 66 | 67 | # Illustrate random sample of 20 participants * 200 trials 68 | 69 | ## Create data frame 70 | ```{r} 71 | set.seed(666) 72 | p.list <- as.numeric(as.character(unique(flp$participant))) 73 | np <- 20 74 | nt <- 200 75 | bootid <- sample(p.list, np, replace = FALSE) 76 | resmat <- array(NA, dim = c(2,nt,np)) 77 | for(P in 1:np){ 78 | resmat[1,,P] <- sample(flp$rt[flp$participant %in% bootid[P] & flp$condition == "word"], 79 | nt, replace = TRUE) 80 | resmat[2,,P] <- sample(flp$rt[flp$participant %in% bootid[P] & flp$condition == "non-word"], 81 | nt, replace = TRUE) 82 | } 83 | ``` 84 | 85 | ## Make figure 86 | ```{r, fig.height = 4, fig.width = 10} 87 | 88 | df <- tibble(rt = c(as.vector(resmat[1,,]), 89 | as.vector(resmat[2,,])), 90 | condition = factor(rep(c("Word", "Non-Word"), nt*np)), 91 | participant = factor(rep(rep(1:np, each = nt),2)) 92 | ) 93 | 94 | df$condition <- as.character(df$condition) 95 | df$condition <- factor(df$condition, levels=unique(df$condition)) 96 | 97 | p <- ggplot(df, aes(x = rt)) + theme_gar + 98 | geom_line(stat = "density", aes(colour=participant), size = 0.5) + 99 | scale_color_viridis_d(begin = 0.1) + 100 | scale_x_continuous(breaks=seq(0,2000,250), 101 | minor_breaks = waiver(), 102 | expand = c(0, 0)) + 103 | coord_cartesian(xlim = c(0, 2000)) + 104 | theme(plot.title = element_text(size=22), 105 | axis.title.x = element_text(size = 18), 106 | axis.text = element_text(size = 16, colour = "black"), 107 | axis.text.y = element_blank(), 108 | axis.ticks.y = element_blank(), 109 | axis.title.y = element_text(size = 18), 110 | legend.text = element_text(size = 16), 111 | legend.title = element_text(size = 18), 112 | legend.key.width = unit(1.5,"cm"), 113 | legend.position = "none",#c(0.75,0.8), 114 | strip.text.y = element_text(size = 18, face = "bold", angle = 0)) + 115 | # legend.position = c(0.25,0.9)) + 116 | labs(x = "Reaction times (ms)", y = "Density") + 117 | facet_grid(. ~ condition) 118 | p 119 | p.20 <- p 120 | ``` 121 | 122 | # Group 20% trimmed mean 123 | Because there is skewness at the two levels of analysis, we need to consider our estimators carefully at each level. Here for simplicity we compute the 20% trimmed mean for each participant and condition, take the difference between conditions, and finally compute the 20% trimmed mean of these differences. Many other options could be considered, some of which are covered in an other tutorial ([Rousselet & Wilcox, 2019](https://psyarxiv.com/3y54r/)). 124 | 125 | ## Compute trimmed means 126 | ```{r} 127 | # get data: mean RT for every participant 128 | tmres <- apply(resmat, c(1,3), mean, trim = 0.2) 129 | ``` 130 | 131 | ## Illustrate distributions 132 | 133 | 20% trimmed means for the Word and Non-Word conditions and their differences. 134 | ```{r, fig.height = 3, fig.width = 10} 135 | set.seed(21) # reproducible jitter 136 | df <- tibble(x = c(tmres[1,],tmres[2,],tmres[2,]-tmres[1,]), 137 | condition = factor(rep(c("Word", "Non-Word", "Difference"), each = np)), 138 | y = rep(1, 3 * np)) 139 | 140 | df$condition <- as.character(df$condition) 141 | df$condition <- factor(df$condition, levels=unique(df$condition)) 142 | 143 | scales_x <- list( 144 | `Word` = scale_x_continuous(limits=c(400,1400), breaks=seq(0,1600,200)), 145 | `Non-Word` = scale_x_continuous(limits=c(400,1400), breaks=seq(0,1600,200)), 146 | `Difference` = scale_x_continuous(limits=c(-100,400), breaks=seq(-100,500,100)) 147 | ) 148 | 149 | # group trimmed means 150 | df.tm <- tibble(y = rep(0.9, 3), 151 | yend = rep(1.1, 3), 152 | x = c(mean(tmres[1,], trim = 0.2), 153 | mean(tmres[2,], trim = 0.2), 154 | mean(tmres[2,]-tmres[1,], trim = 0.2)), 155 | xend = x, 156 | condition = factor(c("Word", "Non-Word", "Difference")) 157 | ) 158 | 159 | p <- ggplot(df, aes(x = x, y = y)) + theme_gar + 160 | geom_jitter(height = .05, alpha = 0.5, 161 | size = 3, shape = 21, fill = "grey", colour = "black") + 162 | theme(axis.ticks.y = element_blank(), 163 | axis.text.y = element_blank(), 164 | axis.title.y = element_blank()) + 165 | scale_y_continuous(breaks = 1) + 166 | # Group trimmed means 167 | geom_segment(data = df.tm, 168 | aes(y = y, yend = yend, x = x, xend = xend), 169 | size = 2, lineend = 'round') + 170 | labs(x = "Group trimmed means (ms)") + 171 | facet_grid_sc(cols = vars(condition), scales = list( x = scales_x)) 172 | 173 | p.scat <- p 174 | p 175 | ``` 176 | 177 | # Group median 178 | 179 | ## Compute medians 180 | ```{r} 181 | # get data: mean RT for every participant 182 | mdres <- apply(resmat, c(1,3), median) 183 | ``` 184 | 185 | ## Illustrate distributions 186 | 187 | 20% medians for the Word and Non-Word conditions and their differences. 188 | ```{r, fig.height = 3, fig.width = 10} 189 | set.seed(21) # reproducible jitter 190 | df <- tibble(x = c(mdres[1,],mdres[2,],mdres[2,]-mdres[1,]), 191 | condition = factor(rep(c("Word", "Non-Word", "Difference"), each = np)), 192 | y = rep(1, 3 * np)) 193 | 194 | df$condition <- as.character(df$condition) 195 | df$condition <- factor(df$condition, levels=unique(df$condition)) 196 | 197 | scales_x <- list( 198 | `Word` = scale_x_continuous(limits=c(400,1400), breaks=seq(0,1600,200)), 199 | `Non-Word` = scale_x_continuous(limits=c(400,1400), breaks=seq(0,1600,200)), 200 | `Difference` = scale_x_continuous(limits=c(-101,400), breaks=seq(-100,500,100)) 201 | ) 202 | 203 | # Group medians 204 | df.tm <- tibble(y = rep(0.9, 3), 205 | yend = rep(1.1, 3), 206 | x = c(median(mdres[1,]), 207 | median(mdres[2,]), 208 | median(mdres[2,]-mdres[1,])), 209 | xend = x, 210 | condition = factor(c("Word", "Non-Word", "Difference")) 211 | ) 212 | 213 | p <- ggplot(df, aes(x = x, y = y)) + theme_gar + 214 | geom_jitter(height = .05, alpha = 0.5, 215 | size = 3, shape = 21, fill = "grey", colour = "black") + 216 | theme(axis.ticks.y = element_blank(), 217 | axis.text.y = element_blank(), 218 | axis.title.y = element_blank()) + 219 | scale_y_continuous(breaks = 1) + 220 | # Group medians 221 | geom_segment(data = df.tm, 222 | aes(y = y, yend = yend, x = x, xend = xend), 223 | size = 2, lineend = 'round') + 224 | labs(x = "Group medians (ms)") + 225 | facet_grid_sc(cols = vars(condition), scales = list( x = scales_x)) 226 | 227 | p.scat.md <- p 228 | p 229 | ``` 230 | 231 | # Hierarchical bootstrap: 20% trimmed means 232 | 233 | We perform the standard bootstrap and the hierarchical version on the same data for comparison. 234 | ```{r} 235 | set.seed(21) 236 | 237 | nboot <- 5000 238 | np <- 20 239 | nt <- 200 240 | 241 | # sample participants with replacement 242 | bootid <- matrix(sample(np, np*nboot, replace = TRUE), nrow = nboot) 243 | 244 | bootres.level1 <- matrix(NA, nrow = nboot, 3) 245 | bootres.hierar <- matrix(NA, nrow = nboot, 3) 246 | 247 | for(B in 1:nboot){ 248 | # sample participants with replacement 249 | # level 1 resampling 250 | bootsamp1 <- resmat[,,bootid[B,]] 251 | bootres.level1[B,1:2] <- apply(tmres[,bootid[B,]], 1, mean, trim = 0.2) 252 | bootres.level1[B,3] <- mean(tmres[2,bootid[B,]] - tmres[1,bootid[B,]], trim = 0.2) 253 | for(P in 1:np){ # for each bootstrap participant 254 | if(B==1 & P==1){bootsamp2 <- bootsamp1} 255 | bootsamp2[1,,P] <- sample(bootsamp1[1,,P], nt, replace = TRUE) 256 | bootsamp2[2,,P] <- sample(bootsamp1[2,,P], nt, replace = TRUE) 257 | } 258 | bootres.hierar[B,1] <- mean(apply(bootsamp2[1,,], 2, mean, trim = 0.2), trim = 0.2) 259 | bootres.hierar[B,2] <- mean(apply(bootsamp2[2,,], 2, mean, trim = 0.2), trim = 0.2) 260 | bootres.hierar[B,3] <- mean(apply(bootsamp2[2,,], 2, mean, trim = 0.2) - apply(bootsamp2[1,,], 2, mean, trim = 0.2), trim = 0.2) 261 | } 262 | ``` 263 | 264 | ## Illustrate results 265 | We illustrate the group confidence intervals for each condition and for the pairwise differences. 266 | ```{r, fig.height = 4, fig.width = 10, warning=FALSE} 267 | # compute confidence intervals -------------------------- 268 | alpha <- 0.05 269 | probs <- c(alpha/2, 1-alpha/2) 270 | 271 | ci.hierar <- matrix(NA, nrow = 2, ncol = 3) 272 | ci.hierar[,1] <- quantile(bootres.hierar[,1], type = 6, probs = probs) 273 | ci.hierar[,2] <- quantile(bootres.hierar[,2], type = 6, probs = probs) 274 | ci.hierar[,3] <- quantile(bootres.hierar[,3], type = 6, probs = probs) 275 | 276 | ci.level1 <- matrix(NA, nrow = 2, ncol = 3) 277 | ci.level1[,1] <- quantile(bootres.level1[,1], type = 6, probs = probs) 278 | ci.level1[,2] <- quantile(bootres.level1[,2], type = 6, probs = probs) 279 | ci.level1[,3] <- quantile(bootres.level1[,3], type = 6, probs = probs) 280 | 281 | # make tibbles -------------------------- 282 | df <- tibble(x = as.vector(bootres.hierar), 283 | cond = factor(rep(c("Word", "Non-Word", "Difference"), each = nboot))) 284 | 285 | df$cond <- keeporder(df$cond) 286 | 287 | # group trimmed means 288 | df.tm <- tibble(tm = c(mean(tmres[1,], trim = 0.2), 289 | mean(tmres[2,], trim = 0.2), 290 | mean(tmres[2,]-tmres[1,], trim = 0.2)), 291 | cond = factor(c("Word", "Non-Word", "Difference"))) 292 | 293 | # confidence intervals 294 | df.ci <- tibble(x = c(ci.hierar[1,]), 295 | xend = c(ci.hierar[2,]), 296 | y = rep(0, 3), 297 | yend = rep(0, 3), 298 | cond = factor(c("Word", "Non-Word", "Difference"))) 299 | 300 | scales_x <- list(`Word` = scale_x_continuous(limits = c(600, 1100)), 301 | `Non-Word` = scale_x_continuous(limits = c(600, 1100)), 302 | `Difference` = scale_x_continuous(limits = c(0, 200))) 303 | 304 | # make figure -------------------------- 305 | p <- ggplot(df, aes(x = x)) + theme_gar + 306 | # density 307 | geom_line(stat = "density", size = 1) + 308 | # sample trimmed means: vertical line + label 309 | geom_vline(data = df.tm, aes(xintercept = tm)) + 310 | # confidence interval ---------------------- 311 | geom_segment(data = df.ci, 312 | aes(x = x, xend = xend, y = y, yend = yend, group = cond), 313 | lineend = "round", size = 2, colour = "black") + 314 | labs(x = "Bootstrap group trimmed means (ms)", 315 | y = "Density") + 316 | theme(axis.text.y = element_blank(), 317 | axis.ticks.y = element_blank()) + 318 | facet_grid_sc(cols = vars(cond), scales = list(x = scales_x)) 319 | p.boot <- p 320 | p 321 | ``` 322 | ## Illustrate results (new version) 323 | New version with the hierarchical and non-hierarchical confidence intervals superimposed. 324 | ```{r, fig.height = 4, fig.width = 10, warning=FALSE} 325 | # compute confidence intervals -------------------------- 326 | alpha <- 0.05 327 | probs <- c(alpha/2, 1-alpha/2) 328 | 329 | ci.hierar <- matrix(NA, nrow = 2, ncol = 3) 330 | ci.hierar[,1] <- quantile(bootres.hierar[,1], type = 6, probs = probs) 331 | ci.hierar[,2] <- quantile(bootres.hierar[,2], type = 6, probs = probs) 332 | ci.hierar[,3] <- quantile(bootres.hierar[,3], type = 6, probs = probs) 333 | 334 | ci.level1 <- matrix(NA, nrow = 2, ncol = 3) 335 | ci.level1[,1] <- quantile(bootres.level1[,1], type = 6, probs = probs) 336 | ci.level1[,2] <- quantile(bootres.level1[,2], type = 6, probs = probs) 337 | ci.level1[,3] <- quantile(bootres.level1[,3], type = 6, probs = probs) 338 | 339 | # make tibbles -------------------------- 340 | df <- tibble(x = c(as.vector(bootres.hierar), as.vector(bootres.level1)), 341 | cond = factor(rep(rep(c("Word", "Non-Word", "Difference"), each = nboot),2)), 342 | type = factor(rep(c("Hierarchical","Non-hierarchical"), each = nboot * 3)) 343 | ) 344 | 345 | df$cond <- keeporder(df$cond) 346 | 347 | # group trimmed means 348 | df.tm <- tibble(tm = rep(c(mean(tmres[1,], trim = 0.2), 349 | mean(tmres[2,], trim = 0.2), 350 | mean(tmres[2,] - tmres[1,], trim = 0.2)),2), 351 | cond = factor(rep(c("Word", "Non-Word", "Difference"),2)), 352 | type = factor(rep(c("Hierarchical","Non-hierarchical"), each = 3)) 353 | ) 354 | 355 | # confidence intervals 356 | df.ci <- tibble(x = c(ci.hierar[1,], ci.level1[1,]), 357 | xend = c(ci.hierar[2,], ci.level1[2,]), 358 | y = c(rep(0, 3),rep(0.0008, 3)), 359 | yend = c(rep(0, 3),rep(0.0008, 3)), 360 | cond = factor(rep(c("Word", "Non-Word", "Difference"),2)), 361 | type = factor(rep(c("Hierarchical","Non-hierarchical"), each = 3)) 362 | ) 363 | 364 | scales_x <- list(`Word` = scale_x_continuous(limits = c(600, 1100)), 365 | `Non-Word` = scale_x_continuous(limits = c(600, 1100)), 366 | `Difference` = scale_x_continuous(limits = c(0, 200))) 367 | 368 | # make figure -------------------------- 369 | p <- ggplot(df, aes(x = x, colour = type)) + theme_gar + 370 | # density 371 | geom_line(stat = "density", size = 1) + 372 | theme(legend.position = "bottom", 373 | axis.text.y = element_blank(), 374 | axis.ticks.y = element_blank()) + 375 | # sample trimmed means: vertical line + label 376 | geom_vline(data = df.tm, aes(xintercept = tm)) + 377 | # confidence interval ---------------------- 378 | geom_segment(data = df.ci, 379 | aes(x = x, xend = xend, y = y, yend = yend, 380 | group = cond, colour = type), 381 | lineend = "round", size = 2) + 382 | scale_colour_manual(values = c("black", "#E69F00")) + 383 | labs(x = "Bootstrap group trimmed means (ms)", 384 | y = "Density") + 385 | facet_grid_sc(cols = vars(cond), scales = list(x = scales_x)) 386 | p.boot2 <- p 387 | p 388 | ``` 389 | 390 | H = hierarchical confidence interval 391 | NH = non-hierarchical confidence interval 392 | 393 | # Hierarchical bootstrap: median 394 | Previous example shows very similar results for the two types of bootstrap procedures. Because the median can lead to choppy bootstrap distributions, especially in the presence of duplicate values, the hierarchical procedure might help smooth the bootstrap distribution because of the variability introduced by random sampling of trials. 395 | 396 | ```{r} 397 | set.seed(21) 398 | 399 | nboot <- 5000 400 | np <- 20 401 | nt <- 200 402 | 403 | # sample participants with replacement 404 | bootid <- matrix(sample(np, np*nboot, replace = TRUE), nrow = nboot) 405 | 406 | bootres.level1 <- matrix(NA, nrow = nboot, 3) 407 | bootres.hierar <- matrix(NA, nrow = nboot, 3) 408 | 409 | for(B in 1:nboot){ 410 | # sample participants with replacement 411 | # level 1 resampling 412 | bootsamp1 <- resmat[,,bootid[B,]] 413 | bootres.level1[B,1:2] <- apply(mdres[,bootid[B,]], 1, median) 414 | bootres.level1[B,3] <- median(mdres[2,bootid[B,]] - mdres[1,bootid[B,]]) 415 | for(P in 1:np){ # for each bootstrap participant 416 | if(B==1 & P==1){bootsamp2 <- bootsamp1} 417 | bootsamp2[1,,P] <- sample(bootsamp1[1,,P], nt, replace = TRUE) 418 | bootsamp2[2,,P] <- sample(bootsamp1[2,,P], nt, replace = TRUE) 419 | } 420 | bootres.hierar[B,1] <- median(apply(bootsamp2[1,,], 2, median)) 421 | bootres.hierar[B,2] <- median(apply(bootsamp2[2,,], 2, median)) 422 | bootres.hierar[B,3] <- median(apply(bootsamp2[2,,], 2, median) - apply(bootsamp2[1,,], 2, median)) 423 | } 424 | ``` 425 | 426 | ## Illustrate results 427 | We illustrate the group confidence intervals for each condition and for the pairwise differences. 428 | ```{r, fig.height = 4, fig.width = 10, warning=FALSE} 429 | # compute confidence intervals -------------------------- 430 | alpha <- 0.05 431 | probs <- c(alpha/2, 1-alpha/2) 432 | 433 | ci.hierar <- matrix(NA, nrow = 2, ncol = 3) 434 | ci.hierar[,1] <- quantile(bootres.hierar[,1], type = 6, probs = probs) 435 | ci.hierar[,2] <- quantile(bootres.hierar[,2], type = 6, probs = probs) 436 | ci.hierar[,3] <- quantile(bootres.hierar[,3], type = 6, probs = probs) 437 | 438 | ci.level1 <- matrix(NA, nrow = 2, ncol = 3) 439 | ci.level1[,1] <- quantile(bootres.level1[,1], type = 6, probs = probs) 440 | ci.level1[,2] <- quantile(bootres.level1[,2], type = 6, probs = probs) 441 | ci.level1[,3] <- quantile(bootres.level1[,3], type = 6, probs = probs) 442 | 443 | # make tibbles -------------------------- 444 | df <- tibble(x = c(as.vector(bootres.hierar), as.vector(bootres.level1)), 445 | cond = factor(rep(rep(c("Word", "Non-Word", "Difference"), each = nboot),2)), 446 | type = factor(rep(c("Hierarchical","Non-hierarchical"), each = nboot * 3)) 447 | ) 448 | 449 | df$cond <- keeporder(df$cond) 450 | 451 | # Group medians 452 | df.tm <- tibble(tm = rep(c(median(mdres[1,]), 453 | median(mdres[2,]), 454 | median(mdres[2,] - mdres[1,])),2), 455 | cond = factor(rep(c("Word", "Non-Word", "Difference"),2)), 456 | type = factor(rep(c("Hierarchical","Non-hierarchical"), each = 3)) 457 | ) 458 | 459 | # confidence intervals 460 | df.ci <- tibble(x = c(ci.hierar[1,], ci.level1[1,]), 461 | xend = c(ci.hierar[2,], ci.level1[2,]), 462 | y = c(rep(0, 3),rep(0.001, 3)), 463 | yend = c(rep(0, 3),rep(0.001, 3)), 464 | cond = factor(rep(c("Word", "Non-Word", "Difference"),2)), 465 | type = factor(rep(c("Hierarchical","Non-hierarchical"), each = 3)) 466 | ) 467 | 468 | scales_x <- list(`Word` = scale_x_continuous(limits = c(600, 1100)), 469 | `Non-Word` = scale_x_continuous(limits = c(600, 1100)), 470 | `Difference` = scale_x_continuous(limits = c(0, 200))) 471 | 472 | # make figure -------------------------- 473 | p <- ggplot(df, aes(x = x, colour = type)) + theme_gar + 474 | # density 475 | geom_line(stat = "density", size = 1) + 476 | theme(legend.position = "bottom", 477 | axis.text.y = element_blank(), 478 | axis.ticks.y = element_blank()) + 479 | # sample trimmed means: vertical line + label 480 | geom_vline(data = df.tm, aes(xintercept = tm)) + 481 | # confidence interval ---------------------- 482 | geom_segment(data = df.ci, 483 | aes(x = x, xend = xend, y = y, yend = yend, 484 | group = cond, colour = type), 485 | lineend = "round", size = 2) + 486 | scale_colour_manual(values = c("black", "#E69F00")) + 487 | labs(x = "Bootstrap group medians (ms)", 488 | y = "Density") + 489 | # facet_grid(cols = vars(cond), scales = "free") 490 | facet_grid_sc(cols = vars(cond), scales = list(x = scales_x)) 491 | p.boot.md <- p 492 | p 493 | ``` 494 | 495 | The distributions of the bootstrap medians are very irregular. Bootstrapping at two levels (trials and participants) leads to much smoother distributions and different confidence intervals. 496 | 497 | # Summary figure 498 | ```{r, eval = FALSE} 499 | legend <- cowplot::get_legend(p.boot2) 500 | 501 | cowplot::plot_grid(p.20, 502 | p.scat, 503 | p.boot2 + theme(legend.position = "none"), 504 | p.scat.md, 505 | p.boot.md + theme(legend.position = "none"), 506 | legend, 507 | labels = c("A", "B", "C", "D", "E", NA), 508 | ncol = 1, 509 | nrow = 6, 510 | rel_heights = c(1, 1/2.5, 1, 1/2.5, 1, 1/5), 511 | align = "v", 512 | axis = "l", 513 | label_size = 20, 514 | hjust = -0.5, 515 | scale=.95) 516 | 517 | # save figure 518 | ggsave(filename=('./figures/figure_hpb.pdf'),width=13,height=15) 519 | ggsave(filename=('./figures/figure12.pdf'),width=13,height=15) 520 | ``` 521 | 522 | -------------------------------------------------------------------------------- /2indgps.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Compare two independent groups" 3 | author: "Guillaume A. Rousselet" 4 | date: "`r Sys.Date()`" 5 | output: 6 | pdf_document: 7 | fig_caption: no 8 | number_sections: no 9 | toc: yes 10 | toc_depth: 2 11 | # github_document: 12 | # html_preview: yes 13 | # toc: yes 14 | # toc_depth: 2 15 | --- 16 | 17 | # Dependencies 18 | ```{r, message=FALSE, warning=FALSE} 19 | library(tibble) 20 | library(ggplot2) 21 | # library(cowplot) 22 | source("./functions/theme_gar.txt") 23 | source("./functions/Rallfun-v40.txt") 24 | source("./functions/functions.txt") 25 | ``` 26 | 27 | ```{r} 28 | sessionInfo() 29 | ``` 30 | 31 | Wilcox's functions for two independent groups 32 | ```{r, eval = FALSE} 33 | # to compare any estimators 34 | pb2gen(x, y, alpha=0.05, nboot=1000, est=median, SEED=FALSE) 35 | # same as pb2gen but only to compare medians: 36 | medpb2() 37 | # same as pb2gen but only to compare trimmed means: 38 | trimpb2() 39 | # to compare variances: 40 | comvar2() 41 | # For robust measures of scale, use pb2gen 42 | ``` 43 | 44 | # Illustrate populations 45 | 46 | We sample from 2 populations that differ in skewness, leading to large differences in the tails. 47 | 48 | ```{r, fig.height=4} 49 | meanlog1 <- 0 50 | meanlog2 <- 0.2 51 | sdlog1 <- 0.5 52 | sdlog2 <- 0.7 53 | 54 | x <- seq(0, 5, 0.1) 55 | 56 | p <- ggplot(as.tibble(x), aes(x = x)) + theme_gar + 57 | stat_function(geom = "line", size = 1.5, aes(colour = "Lognormal(0, 0.5)"), 58 | fun = dlnorm, args = list(meanlog = meanlog1, sdlog = sdlog1)) + 59 | stat_function(geom = "line", size = 1.5, aes(colour = "Lognormal(0.2, 0.7)"), 60 | fun = dlnorm, args = list(meanlog = meanlog2, sdlog = sdlog2)) + 61 | labs(x = "Values", y = "Density") + 62 | scale_colour_manual(name = "", values = c("Lognormal(0, 0.5)" = "orange", 63 | "Lognormal(0.2, 0.7)" = "purple")) + 64 | theme(legend.position = c(.6, .8), 65 | axis.text.y=element_blank(), #remove y axis labels 66 | axis.ticks.y=element_blank()) + 67 | guides(colour = guide_legend(override.aes = list(size = 3))) 68 | p.pop <- p 69 | p 70 | ``` 71 | 72 | ## Population normalise difference 73 | 74 | We use samples of 100,000 values to define the population normalise difference between 3rd quartiles. 75 | 76 | ```{r} 77 | set.seed(21) 78 | normdiff(q3(rlnorm(100000, meanlog = meanlog1, sdlog = sdlog1)), q3(rlnorm(100000, meanlog = meanlog2, sdlog = sdlog2))) 79 | ``` 80 | 81 | # Generate samples 82 | 83 | We get 2 relatively large samples from 2 populations. 84 | 85 | ```{r} 86 | set.seed(21) 87 | n1 <- 50 88 | n2 <- 60 89 | samp1 <- rlnorm(n1, meanlog = meanlog1, sdlog = sdlog1) 90 | samp2 <- rlnorm(n2, meanlog = meanlog2, sdlog = sdlog2) 91 | ``` 92 | 93 | Instead of the traditional measures of central tendency, like the mean or the median, here we're going to estimate the 3rd quartile of the marginal distributions. This can be justified by an interest in differences among slower responses. To illustrate that with the bootstrap we can build confidence intervals for any quantity, the group estimations will be on a normalised difference of quartiles: $effect = (q3_1-q3_2)/(q3_1+q3_2)$. 94 | 95 | # Illustrate samples 96 | ```{r, fig.height=2} 97 | set.seed(21) # for reproducible jitter 98 | # raw data 99 | df <- tibble(val = c(samp1, samp2), 100 | y = rep(1, n1+n2), 101 | gp = factor(c(rep("Group 1",n1),rep("Group 2",n2))) 102 | ) 103 | 104 | df.q2 <- tibble(y = rep(0.9,2), 105 | yend = rep(1.1,2), 106 | x = c(q2(samp1),q2(samp2)), 107 | xend = x, 108 | gp = factor(c("Group 1","Group 2")) 109 | ) 110 | 111 | df.q3 <- tibble(y = rep(0.9,2), 112 | yend = rep(1.1,2), 113 | x = c(q3(samp1),q3(samp2)), 114 | xend = x, 115 | gp = factor(c("Group 1","Group 2")) 116 | ) 117 | 118 | p <- ggplot(data = df, aes(x = val, y = y)) + theme_gar + 119 | # scatterplots 120 | geom_jitter(height = .05, alpha = 0.5, 121 | size = 3, shape = 21, fill = "grey", colour = "black") + 122 | theme(axis.ticks.y = element_blank(), 123 | axis.text.y = element_blank(), 124 | axis.title.y = element_blank(), 125 | panel.grid.minor.x = element_blank()) + 126 | scale_y_continuous(breaks = 1) + 127 | # 3rd quartile 128 | geom_segment(data = df.q3, aes(y = y, yend = yend, 129 | x = x, xend = xend), 130 | size = 2, lineend = 'round', colour = "grey") + 131 | # median 132 | geom_segment(data = df.q2, aes(y = y, yend = yend, 133 | x = x, xend = xend), 134 | size = 0.5, lineend = 'round') + 135 | labs(x = "Values") + 136 | # ggtitle("Random sample") + 137 | facet_grid(cols = vars(gp)) 138 | p 139 | pA1 <- p 140 | ``` 141 | 142 | Thick vertical line = 3rd quartile. 143 | Thin vertical line = 2nd quartile (median). 144 | 145 | ## Sample difference 146 | 147 | ### Illustrate results 148 | ```{r, fig.height=2} 149 | nb <- 20 150 | 151 | df <- tibble(bootid = 1, 152 | res = normdiff(q3(samp1), q3(samp2)) 153 | ) 154 | 155 | p <- ggplot(df, aes(x = res, xend = res, y = bootid - 0.4, yend = bootid + 0.4)) + 156 | theme_gar + 157 | geom_vline(xintercept = normdiff(q3(samp1),q3(samp2)), 158 | size = 2, colour = "grey") + 159 | geom_vline(xintercept = 0, size = 0.75, linetype = "dashed") + 160 | coord_cartesian(xlim = c(-0.4, 0.4)) + 161 | scale_x_continuous(breaks = seq(-0.5, 0.5, 0.1)) + 162 | labs(x = "Normalised difference") + 163 | theme(panel.grid.minor = element_blank(), 164 | axis.title.y = element_blank()) + 165 | ggtitle("(Group 1 - Group 2) / (Group 1 + Group 2)") + 166 | theme(plot.title = element_text(size = 16)) 167 | pA2 <- p 168 | p 169 | ``` 170 | population normalised difference = `r round(normdiff(q3(samp1),q3(samp2)), digits = 2)` 171 | 172 | Thick grey vertical line = difference between 3rd quartiles. 173 | Vertical dashed line = zero reference line. 174 | 175 | # Generate bootstrap samples & confidence intervals 176 | 177 | We use 5,000 bootstrap samples. 178 | 179 | ```{r} 180 | set.seed(666) 181 | nboot <- 5000 182 | bootsamp1 <- matrix(sample(samp1, nboot * n1, replace = TRUE), nrow = nboot) 183 | bootsamp2 <- matrix(sample(samp2, nboot * n2, replace = TRUE), nrow = nboot) 184 | ``` 185 | 186 | ## Illustrate 20 bootstrap samples 187 | 188 | For each sample we superimpose the 3rd quartile---short vertical black line. 189 | Disks = bootstrap observations. 190 | Long vertical grey lines mark the sample 3rd quartiles. 191 | 192 | ```{r, fig.height=6} 193 | nb <- 20 194 | df <- tibble(res = c(as.vector(bootsamp1[1:nb,]),as.vector(bootsamp2[1:nb,])), 195 | bootid = c(rep(1:nb, each = n1),rep(1:nb, each = n2)), 196 | gp = factor(c(rep("Group 1",nb*n1),rep("Group 2",nb*n2)))) 197 | 198 | df2 <- tibble(bootid = rep(1:nb,2), 199 | res = c(apply(bootsamp1[1:nb,],1,q3),apply(bootsamp2[1:nb,],1,q3)), 200 | gp = factor(rep(c("Group 1", "Group 2"), each = nb)) 201 | ) 202 | 203 | p <- ggplot(df, aes(y = bootid, x = res)) + theme_gar + 204 | # 3rd quartile 205 | geom_vline(data = df.q3, aes(group = gp, xintercept = x), size = 2, colour = "grey") + 206 | # scatterplots 207 | geom_point(alpha = 0.3, position = position_jitter(height=0.1)) + 208 | labs(x = "Values", y = "Bootstrap samples") + 209 | geom_segment(data = df2, aes(x = res, xend = res, 210 | y = bootid - 0.4, yend = bootid + 0.4), 211 | size = 1.5) + 212 | theme(panel.grid.minor = element_blank()) + 213 | scale_y_continuous(breaks = seq(1, 20, 1), expand = expand_scale(mult = c(0.01, 0.01))) + 214 | facet_grid(cols = vars(gp)) 215 | pB1 <- p 216 | p 217 | ``` 218 | 219 | ## Illustrate bootstrap normalised differences 220 | 221 | ### Illustrate results 222 | ```{r, fig.height=6} 223 | nb <- 20 224 | 225 | df <- tibble(bootid = 1:nb, 226 | res = normdiff(apply(bootsamp1[1:nb,],1,q3), apply(bootsamp2[1:nb,],1,q3)) 227 | ) 228 | 229 | p <- ggplot(df, aes(x = res, xend = res, y = bootid - 0.4, yend = bootid + 0.4)) + 230 | theme_gar + 231 | geom_vline(xintercept = normdiff(q3(samp1),q3(samp2)), size = 2, colour = "grey") + 232 | geom_vline(xintercept = 0, size = 0.5, linetype = "longdash") + 233 | coord_cartesian(xlim = c(-0.4, 0.4)) + 234 | scale_x_continuous(breaks = seq(-0.5, 0.5, 0.1)) + 235 | labs(x = "Normalised differences", y = "Bootstrap samples") + 236 | geom_segment(size = 1.5) + 237 | theme(panel.grid.minor = element_blank()) + 238 | scale_y_continuous(breaks = seq(1, 20, 1), expand = expand_scale(mult = c(0.01, 0.01))) 239 | pB2 <- p 240 | p 241 | ``` 242 | 243 | # Bootstrap distributions 244 | 245 | ## Compute confidence intervals 246 | ```{r} 247 | alpha <- 0.05 248 | boot1.q3 <- apply(bootsamp1,1,q3) 249 | boot2.q3 <- apply(bootsamp2,1,q3) 250 | ci1 <- quantile(boot1.q3, probs = c(alpha/2, 1-alpha/2)) 251 | ci2 <- quantile(boot2.q3, probs = c(alpha/2, 1-alpha/2)) 252 | bootdiff <- normdiff(boot1.q3, boot2.q3) 253 | ci.diff <- quantile(bootdiff, probs = c(alpha/2, 1-alpha/2)) 254 | ``` 255 | 256 | ## Bootstrap distributions of group quantiles 257 | ```{r, fig.height=4} 258 | 259 | df <- tibble(x = c(boot1.q3, boot2.q3), 260 | gp = factor(c(rep("Group 1",nboot),rep("Group 2",nboot)))) 261 | 262 | # df.q3 <- tibble(q3 = c(q3(samp1), q3(samp2)), 263 | # group = factor(c("Group 1", "Group 2"))) 264 | 265 | df.ci <- tibble(x = c(ci1[1], ci2[1]), 266 | xend = c(ci1[2], ci2[2]), 267 | y = c(0, 0), 268 | yend = c(0, 0), 269 | gp = factor(c("Group 1", "Group 2"))) 270 | 271 | p <- ggplot(df, aes(x = x)) + theme_gar + 272 | # sample q3: vertical line + label 273 | geom_vline(data = df.q3, aes(xintercept = x, group = gp), size = 2, colour = "grey") + 274 | # density 275 | geom_line(stat = "density", size = 1) + 276 | # confidence interval ---------------------- 277 | geom_segment(data = df.ci, 278 | aes(x = x, xend = xend, y = y, yend = yend), 279 | lineend = "round", size = 2, colour = "black") + 280 | labs(x = "Bootstrap quantiles", 281 | y = "Density") + 282 | theme(axis.text.y=element_blank(), #remove y axis labels 283 | axis.ticks.y=element_blank()) + 284 | facet_grid(cols = vars(gp)) 285 | p 286 | pC1 <- p 287 | ``` 288 | 289 | ## Group normalised differences 290 | ```{r, fig.height=4} 291 | samp.nd <- normdiff(q3(samp1), q3(samp2)) 292 | 293 | df <- tibble(x = bootdiff) 294 | 295 | p <- ggplot(df, aes(x = x)) + theme_gar + 296 | # sample 3rd quartile: vertical line 297 | geom_vline(xintercept = samp.nd, linetype = 'solid', size = 2, colour = "grey") + 298 | geom_line(stat = "density", size = 1) + 299 | labs(x = "Bootstrap differences", 300 | y = "Density") + 301 | theme(axis.text.y=element_blank(), #remove y axis labels 302 | axis.ticks.y=element_blank()) + 303 | # confidence interval ---------------------- 304 | geom_segment(x = ci.diff[1], xend = ci.diff[2], 305 | y = 0, yend = 0, 306 | lineend = "round", size = 2, colour = "black") + 307 | # zero reference line 308 | geom_vline(xintercept = 0, size = 0.5, linetype = "longdash") + 309 | ggtitle(paste0(round(samp.nd,digits = 3), 310 | " [", 311 | round(ci.diff[1],digits = 3), 312 | ", ", 313 | round(ci.diff[2],digits = 3) 314 | ,"]")) 315 | p 316 | pC2 <- p 317 | ``` 318 | 319 | # Make summary figure 320 | ```{r, eval = FALSE} 321 | 322 | pvoid <- ggplot() + theme_void() # empty plot for blank space 323 | 324 | # pA <- cowplot::plot_grid(pA1, pA2, 325 | # labels = NA, 326 | # ncol = 2, 327 | # nrow = 1, 328 | # rel_widths = c(2, 1), 329 | # label_size = 20, 330 | # hjust = -0.5, 331 | # scale=.95) 332 | # 333 | # pB <- cowplot::plot_grid(pB1, pB2, 334 | # labels = NA, 335 | # ncol = 2, 336 | # nrow = 1, 337 | # rel_widths = c(2, 1), 338 | # label_size = 20, 339 | # hjust = -0.5, 340 | # scale=.95) 341 | # 342 | # pC <- cowplot::plot_grid(pC1, pC2, 343 | # labels = NA, 344 | # ncol = 2, 345 | # nrow = 1, 346 | # rel_widths = c(2, 1), 347 | # label_size = 20, 348 | # hjust = -0.5, 349 | # scale=.95) 350 | 351 | p.LEFT <- cowplot::plot_grid(p.pop, pA1, pB1, pC1, 352 | labels = c("A", "B", "C", "D"), 353 | ncol = 1, 354 | nrow = 4, 355 | rel_heights = c(2, 1, 3, 2), 356 | label_size = 20, 357 | align = "v", 358 | axis = "l", 359 | hjust = -0.5, 360 | scale=.95) 361 | 362 | p.RIGHT <- cowplot::plot_grid(pvoid, pA2, pB2, pC2, 363 | labels = NA, 364 | ncol = 1, 365 | nrow = 4, 366 | rel_heights = c(2, 1, 3, 2), 367 | label_size = 20, 368 | align = "v", 369 | axis = "l", 370 | hjust = -0.5, 371 | scale=.95) 372 | 373 | cowplot::plot_grid(p.LEFT, p.RIGHT, 374 | labels = NA, 375 | ncol = 2, 376 | nrow = 1, 377 | rel_widths = c(2, 1), 378 | label_size = 20, 379 | align = "h", 380 | axis = "b", 381 | hjust = -0.5, 382 | scale=.95) 383 | 384 | # save figure 385 | ggsave(filename=('./figures/figure_2indgps.pdf'),width=17,height=15) 386 | ggsave(filename=('./figures/figure9.pdf'),width=17,height=15) 387 | ``` 388 | 389 | # Confidence interval with n = 100 390 | 391 | ```{r} 392 | set.seed(21) 393 | n1 <- 100 394 | n2 <- 100 395 | samp1 <- rlnorm(n1, meanlog = meanlog1, sdlog = sdlog1) 396 | samp2 <- rlnorm(n2, meanlog = meanlog2, sdlog = sdlog2) 397 | nboot <- 5000 398 | bootsamp1 <- matrix(sample(samp1, nboot * n1, replace = TRUE), nrow = nboot) 399 | bootsamp2 <- matrix(sample(samp2, nboot * n2, replace = TRUE), nrow = nboot) 400 | alpha <- 0.05 401 | boot1.q3 <- apply(bootsamp1,1,q3) 402 | boot2.q3 <- apply(bootsamp2,1,q3) 403 | # ci1 <- quantile(boot1.q3, probs = c(alpha/2, 1-alpha/2)) 404 | # ci2 <- quantile(boot2.q3, probs = c(alpha/2, 1-alpha/2)) 405 | bootdiff <- normdiff(boot1.q3, boot2.q3) 406 | ci.diff <- quantile(bootdiff, probs = c(alpha/2, 1-alpha/2)) 407 | round(ci.diff, digits = 2) 408 | ``` 409 | 410 | # Confidence interval with n = 200 411 | 412 | ```{r} 413 | set.seed(21) 414 | n1 <- 200 415 | n2 <- 200 416 | samp1 <- rlnorm(n1, meanlog = meanlog1, sdlog = sdlog1) 417 | samp2 <- rlnorm(n2, meanlog = meanlog2, sdlog = sdlog2) 418 | nboot <- 5000 419 | bootsamp1 <- matrix(sample(samp1, nboot * n1, replace = TRUE), nrow = nboot) 420 | bootsamp2 <- matrix(sample(samp2, nboot * n2, replace = TRUE), nrow = nboot) 421 | alpha <- 0.05 422 | boot1.q3 <- apply(bootsamp1,1,q3) 423 | boot2.q3 <- apply(bootsamp2,1,q3) 424 | # ci1 <- quantile(boot1.q3, probs = c(alpha/2, 1-alpha/2)) 425 | # ci2 <- quantile(boot2.q3, probs = c(alpha/2, 1-alpha/2)) 426 | bootdiff <- normdiff(boot1.q3, boot2.q3) 427 | ci.diff <- quantile(bootdiff, probs = c(alpha/2, 1-alpha/2)) 428 | round(ci.diff, digits = 2) 429 | ``` 430 | 431 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Guillaume Rousselet 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Reproducibility package for the article: 2 | 3 | **An introduction to the bootstrap: a versatile method to make inferences by using data-driven simulations** 4 | Rousselet G.A., Pernet C.R., Wilcox R.R. 5 | [*Meta-Psychology*](https://open.lnu.se/index.php/metapsychology/article/view/2058) 6 | 7 | [[OSF repository](https://osf.io/8b4t5/)] [[GitHub repository](https://github.com/GRousselet/bootstrap)] [[PsyArXiv Preprint](https://psyarxiv.com/h8ft7)] 8 | 9 | The repository contains all of the [R](https://www.r-project.org/) code used in the article. The code is best seen by running the RMarkdown notebooks, within [RStudio](https://www.rstudio.com/). 10 | 11 | The code is released under the [MIT license](https://opensource.org/licenses/MIT). Copyright 2019-2022, Guillaume A. Rousselet. 12 | 13 | The figures are released under the [CC-BY 4.0 license](https://creativecommons.org/licenses/by/4.0/legalcode). Copyright 2019-2022, Rousselet, Pernet & Wilcox. 14 | 15 | # Content 16 | |folder|description| 17 | |-----|-----| 18 | |`code`|R `.Rmd` files to run simulations and create figures| 19 | |`notebooks`|pdf versions of the code, with embedded figures| 20 | |`data`|simulation results needed to run the code| 21 | |`figures`|all the figures used in the article, in pdf format (only available on the [OSF](https://osf.io/8b4t5/) version of the repo)| 22 | |`functions`|extra R functions defined in text files| 23 | |`docs`|Github html versions of the notebooks (only available on the [GitHub](https://github.com/GRousselet/bootstrap) version of the repo)| 24 | 25 | # Notebooks 26 | 27 | The notebooks contain code to reproduce the figures and analyses presented in the article. They also contain extra resources, figures and analyses. 28 | 29 | |Notebook|Description|Figures| 30 | |-----|-----|-----| 31 | |[pb](/docs/pb.md)|Description of the percentile bootstrap|Figure 1| 32 | |[pc](/docs/pc.md)|Percent correct example|Figure 2| 33 | |[sampdist](/docs/sampdist.md)|Illustrate bootstrap sampling distributions|Figures 3-4| 34 | |[coverage](/docs/coverage.md)|Simulations of the coverage, width and power of one-sample confidence intervals|Figures 5, 7, 8| 35 | |[notrobust](/docs/notrobust.md)|On its own, the bootstrap does not guarantee robustness|Figure 6| 36 | |[2indgps](/docs/2indgps.md)|Compare 2 independent groups|Figure 9| 37 | |[compcorr](/docs/compcorr.md)|Comparison of correlation coefficients|Figures 10-11| 38 | |[2depgps](/docs/2depgps.md)|Illustrate hierarchical bootstrap sampling|Figure 12| 39 | |[ptb](/docs/ptb.md)|Percentile-t bootstrap technique|Figures 13-14| 40 | 41 | # Direct links to the PDF version of the figures on the OSF 42 | 43 | [Figure 1](https://osf.io/scp2j/) 44 | 45 | [Figure 2](https://osf.io/t7pkw/) 46 | 47 | [Figure 3](https://osf.io/t2qg3/) 48 | 49 | [Figure 4](https://osf.io/a4ybr/) 50 | 51 | [Figure 5](https://osf.io/r8mns/) 52 | 53 | [Figure 6](https://osf.io/hk7cn/) 54 | 55 | [Figure 7](https://osf.io/6wvuh/) 56 | 57 | [Figure 8](https://osf.io/6wakn/) 58 | 59 | [Figure 9](https://osf.io/j9b8n/) 60 | 61 | [Figure 10](https://osf.io/fdgx6/) 62 | 63 | [Figure 11](https://osf.io/wt9sy/) 64 | 65 | [Figure 12](https://osf.io/8fjuv/) 66 | 67 | [Figure 13](https://osf.io/wph78/) 68 | 69 | [Figure 14](https://osf.io/wqvay/) 70 | 71 | # R packages needed 72 | If you want to run the code in RStudio, you will need to install a few packages. 73 | 74 | To reproduce the figures only, you can install the required packages by typing this in the console: 75 | 76 | `install.packages(c("ggplot2", "tibble"))` 77 | 78 | Or you can navigate in the GUI to Tools > Install Packages... 79 | 80 | To install `rogme` and `facetscales`, first you might need to install `devtools`: 81 | 82 | `install.packages("devtools")` 83 | 84 | then: 85 | 86 | `devtools::install_github("GRousselet/rogme")` 87 | 88 | `devtools::install_github("zeehio/facetscales")` 89 | 90 | To reproduce the summary figures, you also need `cowplot` to combine panels: 91 | 92 | `install.packages("cowplot")` 93 | 94 | Finally, if you decide to run the simulations, you will need `beepr` to get a little auditory reward: 95 | 96 | `install.packages("beepr")` 97 | 98 | # Additional R functions 99 | Here we highlight a few R functions relevant to the tutorial. Most of them are listed in the RMarkdown notebooks, with example syntax. Each notebook will install the appropriate functions for you; otherwise, in the console you can type `source(file.choose())` and select the relevant .txt file. 100 | 101 | ## Robust estimation and hypothesis testing 102 | To get all the statistical functions from Rand Wilcox, select the [Rallfun-v40.txt](https://github.com/GRousselet/articles/blob/master/bootstrap/functions/Rallfun-v40.txt) file. See details on this [webpage](https://dornsife.usc.edu/labs/rwilcox/software/). The full description of the functions is available in the book [Introduction to Robust Estimation and Hypothesis Testing](https://books.google.co.uk/books/about/Introduction_to_Robust_Estimation_and_Hy.html?id=8f8nBb4__EYC&printsec=frontcover&source=kp_read_button&redir_esc=y#v=onepage&q&f=false). Here are some of the functions used or mentioned in the notebooks. 103 | 104 | ### One-sample 105 | |Name|Description| 106 | |-----|-----| 107 | |`onesampb`|one-sample percentile bootstrap for any estimator| 108 | |`sint`|parametric inference on the median| 109 | |`trimci`|one-sample test on trimmed means| 110 | |`trimpb`|percentile bootstrap inferences on trimmed means| 111 | |`trimcibt`|bootstrap-t on trimmed means| 112 | |`hdpb`|percentile bootstrap inferences on the Harrell-Davis quantile estimator| 113 | 114 | ### Two independent groups 115 | |Name|Description| 116 | |-----|-----| 117 | |`yuen`|t-test on trimmed means| 118 | |`yuenbt`|bootstrap-t on trimmed means| 119 | |`pb2gen`|percentile bootstrap to compare any estimators| 120 | |`medpb2`|same as pb2gen but only to compare medians| 121 | |`trimpb2`|same as pb2gen but only to compare trimmed means| 122 | |`comvar2`|parametric test to compare variances| 123 | 124 | ### Two dependent groups 125 | |Name|Description| 126 | |-----|-----| 127 | |`yuend`|t-test on dependent trimmed means| 128 | |`ydbt`|bootstrap-t on trimmed means| 129 | |`comdvar`|parametric test of variances| 130 | |`bootdpci`|percentile bootstrap using any estimator| 131 | 132 | ### Correlations 133 | |Name|Description| 134 | |-----|-----| 135 | |`pcorb`|percentile bootstrap confidence interval for Pearson's correlation| 136 | |`corb`|percentile bootstrap confidence interval for any robust correlation| 137 | |`wincor`|winsorised correlation| 138 | |`pbcor`|percentage bend correlation| 139 | |`mscor`|skipped correlations using Pearson's or Spearman's correlation| 140 | 141 | #### Compare independent correlations 142 | |Name|Description| 143 | |-----|-----| 144 | |`twopcor`|percentile bootstrap comparison of two independent Pearson's correlations| 145 | |`twocor`|percentile bootstrap comparison of two independent robust correlations| 146 | 147 | #### Compare dependent correlations 148 | |Name|Description| 149 | |-----|-----| 150 | |`TWOpov`|compare two overlapping Pearson's correlations| 151 | |`twoDcorR`|compare two overlapping robust correlations| 152 | |`TWOpNOV`|compare two non-overlapping Pearson's correlations| 153 | |`twoDNOV`|compare two non-overlapping robust correlations| 154 | 155 | ## Other custom functions 156 | [functions.txt](https://github.com/GRousselet/articles/blob/master/bootstrap/functions/functions.txt) and [theme_gar.txt](https://github.com/GRousselet/articles/blob/master/bootstrap/functions/theme_gar.txt) contain custom code to set some ggplot2 parameters and to compute a few things. Other custom functions are defined in the notebooks. 157 | 158 | # Extra resources 159 | 160 | ## R packages for bootstrap inferences 161 | - [`boot`](https://www.statmethods.net/advstats/bootstrapping.html) 162 | - [`resample`](https://cran.r-project.org/web/packages/resample/index.html) 163 | - [`bootstrap`](https://cran.r-project.org/web/packages/bootstrap/index.html) 164 | - [`WRS2`](https://cran.r-project.org/web/packages/WRS2/index.html) 165 | 166 | ## Interactive demo 167 | [Frequentist inference: confidence interval & bootstrap](https://seeing-theory.brown.edu/frequentist-inference/index.html#section2) 168 | 169 | 170 | ## Books 171 | Suggested books on bootstrap methods, robust statistics and simulations. 172 | 173 | [An Introduction to the Bootstrap](https://books.google.co.uk/books?id=gLlpIUxRntoC&printsec=frontcover&dq=bootstrap+efron&hl=en&sa=X&ved=0ahUKEwjiv676orHiAhVIRxUIHYkgAckQ6AEIKDAA#v=onepage&q=bootstrap%20efron&f=false) 174 | 175 | [Robust Statistics](https://books.google.co.uk/books?id=yAmZsxWSEWgC&dq=mathematical+foundations+of+robust+statistics&hl=en&sa=X&ved=0ahUKEwj3tP6Oo7HiAhU_UhUIHftBCnQQ6AEILjAB) 176 | 177 | [Introduction to Robust Estimation and Hypothesis Testing](https://books.google.co.uk/books/about/Introduction_to_Robust_Estimation_and_Hy.html?id=8f8nBb4__EYC&printsec=frontcover&source=kp_read_button&redir_esc=y#v=onepage&q&f=false) 178 | 179 | [Computer Age Statistical Inference](https://books.google.co.uk/books?id=Sj1yDAAAQBAJ&printsec=frontcover&dq=computer+inference+efron&hl=en&sa=X&ved=0ahUKEwiXy7vjorHiAhU4SxUIHUm7A3kQ6AEIKDAA#v=onepage&q=computer%20inference%20efron&f=false) 180 | 181 | [Statistics: Unlocking the Power of Data](https://books.google.co.uk/books?id=EpBEDwAAQBAJ&printsec=frontcover&dq=Statistics:+Unlocking+the+Power+of+Data&hl=en&sa=X&ved=0ahUKEwil1unSorHiAhUPTxUIHfCwBjEQ6AEIKDAA#v=onepage&q=Statistics%3A%20Unlocking%20the%20Power%20of%20Data&f=false) 182 | 183 | [Introduction to Statistical Investigations](https://books.google.co.uk/books?id=FsvVwQEACAAJ&dq=Introduction+to+Statistical+Investigations&hl=en&sa=X&ved=0ahUKEwigt_2worHiAhUySBUIHcZqCnAQ6AEIKDAA) 184 | 185 | [Mathematical Statistics with Resampling and R](https://books.google.co.uk/books?id=_2hvDwAAQBAJ&printsec=frontcover&dq=Mathematical+Statistics+with+Resampling+and+R&hl=en&sa=X&ved=0ahUKEwiEj_-gorHiAhXkQhUIHRauC-IQ6AEILzAB#v=onepage&q=Mathematical%20Statistics%20with%20Resampling%20and%20R&f=false) 186 | -------------------------------------------------------------------------------- /data/beh_sim.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/beh_sim.RData -------------------------------------------------------------------------------- /data/compcorr_nsim.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/compcorr_nsim.RData -------------------------------------------------------------------------------- /data/compcorr_nsim_0004.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/compcorr_nsim_0004.RData -------------------------------------------------------------------------------- /data/compcorr_nsim_0005.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/compcorr_nsim_0005.RData -------------------------------------------------------------------------------- /data/compcorr_nsim_0506.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/compcorr_nsim_0506.RData -------------------------------------------------------------------------------- /data/coverage50.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/coverage50.RData -------------------------------------------------------------------------------- /data/ex_g1_h0.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/ex_g1_h0.RData -------------------------------------------------------------------------------- /data/ex_g1_h0_t_samp_dist.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/ex_g1_h0_t_samp_dist.RData -------------------------------------------------------------------------------- /data/gh_coverage.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/gh_coverage.RData -------------------------------------------------------------------------------- /data/gh_t_sampdist.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/gh_t_sampdist.RData -------------------------------------------------------------------------------- /data/onesamp_coverage.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/onesamp_coverage.RData -------------------------------------------------------------------------------- /data/onesamp_coverage2.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/onesamp_coverage2.RData -------------------------------------------------------------------------------- /data/onesamp_coverage_n10.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/onesamp_coverage_n10.RData -------------------------------------------------------------------------------- /data/onesamp_coverage_nboot200.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/onesamp_coverage_nboot200.RData -------------------------------------------------------------------------------- /data/onesamp_coverage_nboot2000.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/onesamp_coverage_nboot2000.RData -------------------------------------------------------------------------------- /data/onesamp_stability.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/onesamp_stability.RData -------------------------------------------------------------------------------- /data/ptb_params.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/ptb_params.RData -------------------------------------------------------------------------------- /data/ptb_tboot.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/ptb_tboot.RData -------------------------------------------------------------------------------- /data/twosamp_coverage.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/twosamp_coverage.RData -------------------------------------------------------------------------------- /data/twosamp_fp.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/twosamp_fp.RData -------------------------------------------------------------------------------- /docs/2depgps_files/figure-gfm/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2depgps_files/figure-gfm/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /docs/2depgps_files/figure-gfm/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2depgps_files/figure-gfm/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /docs/2depgps_files/figure-gfm/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2depgps_files/figure-gfm/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /docs/2depgps_files/figure-gfm/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2depgps_files/figure-gfm/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /docs/2depgps_files/figure-gfm/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2depgps_files/figure-gfm/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /docs/2depgps_files/figure-gfm/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2depgps_files/figure-gfm/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /docs/2indgps.md: -------------------------------------------------------------------------------- 1 | Compare two independent groups 2 | ================ 3 | Guillaume A. Rousselet 4 | 2022-06-13 5 | 6 | # Dependencies 7 | 8 | ``` r 9 | library(tibble) 10 | library(ggplot2) 11 | # library(cowplot) 12 | source("./functions/theme_gar.txt") 13 | source("./functions/Rallfun-v40.txt") 14 | source("./functions/functions.txt") 15 | ``` 16 | 17 | ``` r 18 | sessionInfo() 19 | ``` 20 | 21 | ## R version 4.2.0 (2022-04-22) 22 | ## Platform: x86_64-apple-darwin17.0 (64-bit) 23 | ## Running under: macOS Catalina 10.15.7 24 | ## 25 | ## Matrix products: default 26 | ## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib 27 | ## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib 28 | ## 29 | ## locale: 30 | ## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8 31 | ## 32 | ## attached base packages: 33 | ## [1] stats graphics grDevices utils datasets methods base 34 | ## 35 | ## other attached packages: 36 | ## [1] ggplot2_3.3.6 tibble_3.1.7 37 | ## 38 | ## loaded via a namespace (and not attached): 39 | ## [1] rstudioapi_0.13 knitr_1.39 magrittr_2.0.3 tidyselect_1.1.2 40 | ## [5] munsell_0.5.0 colorspace_2.0-3 R6_2.5.1 rlang_1.0.2 41 | ## [9] fastmap_1.1.0 fansi_1.0.3 dplyr_1.0.9 stringr_1.4.0 42 | ## [13] tools_4.2.0 grid_4.2.0 gtable_0.3.0 xfun_0.31 43 | ## [17] utf8_1.2.2 cli_3.3.0 withr_2.5.0 htmltools_0.5.2 44 | ## [21] ellipsis_0.3.2 yaml_2.3.5 digest_0.6.29 lifecycle_1.0.1 45 | ## [25] crayon_1.5.1 purrr_0.3.4 vctrs_0.4.1 glue_1.6.2 46 | ## [29] evaluate_0.15 rmarkdown_2.14 stringi_1.7.6 compiler_4.2.0 47 | ## [33] pillar_1.7.0 generics_0.1.2 scales_1.2.0 pkgconfig_2.0.3 48 | 49 | Wilcox’s functions for two independent groups 50 | 51 | ``` r 52 | # to compare any estimators 53 | pb2gen(x, y, alpha=0.05, nboot=1000, est=median, SEED=FALSE) 54 | # same as pb2gen but only to compare medians: 55 | medpb2() 56 | # same as pb2gen but only to compare trimmed means: 57 | trimpb2() 58 | # to compare variances: 59 | comvar2() 60 | # For robust measures of scale, use pb2gen 61 | ``` 62 | 63 | # Illustrate populations 64 | 65 | We sample from 2 populations that differ in skewness, leading to large 66 | differences in the tails. 67 | 68 | ``` r 69 | meanlog1 <- 0 70 | meanlog2 <- 0.2 71 | sdlog1 <- 0.5 72 | sdlog2 <- 0.7 73 | 74 | x <- seq(0, 5, 0.1) 75 | 76 | p <- ggplot(as.tibble(x), aes(x = x)) + theme_gar + 77 | stat_function(geom = "line", size = 1.5, aes(colour = "Lognormal(0, 0.5)"), 78 | fun = dlnorm, args = list(meanlog = meanlog1, sdlog = sdlog1)) + 79 | stat_function(geom = "line", size = 1.5, aes(colour = "Lognormal(0.2, 0.7)"), 80 | fun = dlnorm, args = list(meanlog = meanlog2, sdlog = sdlog2)) + 81 | labs(x = "Values", y = "Density") + 82 | scale_colour_manual(name = "", values = c("Lognormal(0, 0.5)" = "orange", 83 | "Lognormal(0.2, 0.7)" = "purple")) + 84 | theme(legend.position = c(.6, .8), 85 | axis.text.y=element_blank(), #remove y axis labels 86 | axis.ticks.y=element_blank()) + 87 | guides(colour = guide_legend(override.aes = list(size = 3))) 88 | ``` 89 | 90 | ## Warning: `as.tibble()` was deprecated in tibble 2.0.0. 91 | ## Please use `as_tibble()` instead. 92 | ## The signature and semantics have changed, see `?as_tibble`. 93 | ## This warning is displayed once every 8 hours. 94 | ## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated. 95 | 96 | ``` r 97 | p.pop <- p 98 | p 99 | ``` 100 | 101 | ![](2indgps_files/figure-gfm/unnamed-chunk-4-1.png) 102 | 103 | ## Population normalise difference 104 | 105 | We use samples of 100,000 values to define the population normalise 106 | difference between 3rd quartiles. 107 | 108 | ``` r 109 | set.seed(21) 110 | normdiff(q3(rlnorm(100000, meanlog = meanlog1, sdlog = sdlog1)), q3(rlnorm(100000, meanlog = meanlog2, sdlog = sdlog2))) 111 | ``` 112 | 113 | ## [1] -0.1666533 114 | 115 | # Generate samples 116 | 117 | We get 2 relatively large samples from 2 populations. 118 | 119 | ``` r 120 | set.seed(21) 121 | n1 <- 50 122 | n2 <- 60 123 | samp1 <- rlnorm(n1, meanlog = meanlog1, sdlog = sdlog1) 124 | samp2 <- rlnorm(n2, meanlog = meanlog2, sdlog = sdlog2) 125 | ``` 126 | 127 | Instead of the traditional measures of central tendency, like the mean 128 | or the median, here we’re going to estimate the 3rd quartile of the 129 | marginal distributions. This can be justified by an interest in 130 | differences among slower responses. To illustrate that with the 131 | bootstrap we can build confidence intervals for any quantity, the group 132 | estimations will be on a normalised difference of quartiles: 133 | ![effect = (q3_1-q3_2)/(q3_1+q3_2)](https://latex.codecogs.com/png.image?%5Cdpi%7B110%7D&space;%5Cbg_white&space;effect%20%3D%20%28q3_1-q3_2%29%2F%28q3_1%2Bq3_2%29 "effect = (q3_1-q3_2)/(q3_1+q3_2)"). 134 | 135 | # Illustrate samples 136 | 137 | ``` r 138 | set.seed(21) # for reproducible jitter 139 | # raw data 140 | df <- tibble(val = c(samp1, samp2), 141 | y = rep(1, n1+n2), 142 | gp = factor(c(rep("Group 1",n1),rep("Group 2",n2))) 143 | ) 144 | 145 | df.q2 <- tibble(y = rep(0.9,2), 146 | yend = rep(1.1,2), 147 | x = c(q2(samp1),q2(samp2)), 148 | xend = x, 149 | gp = factor(c("Group 1","Group 2")) 150 | ) 151 | 152 | df.q3 <- tibble(y = rep(0.9,2), 153 | yend = rep(1.1,2), 154 | x = c(q3(samp1),q3(samp2)), 155 | xend = x, 156 | gp = factor(c("Group 1","Group 2")) 157 | ) 158 | 159 | p <- ggplot(data = df, aes(x = val, y = y)) + theme_gar + 160 | # scatterplots 161 | geom_jitter(height = .05, alpha = 0.5, 162 | size = 3, shape = 21, fill = "grey", colour = "black") + 163 | theme(axis.ticks.y = element_blank(), 164 | axis.text.y = element_blank(), 165 | axis.title.y = element_blank(), 166 | panel.grid.minor.x = element_blank()) + 167 | scale_y_continuous(breaks = 1) + 168 | # 3rd quartile 169 | geom_segment(data = df.q3, aes(y = y, yend = yend, 170 | x = x, xend = xend), 171 | size = 2, lineend = 'round', colour = "grey") + 172 | # median 173 | geom_segment(data = df.q2, aes(y = y, yend = yend, 174 | x = x, xend = xend), 175 | size = 0.5, lineend = 'round') + 176 | labs(x = "Values") + 177 | # ggtitle("Random sample") + 178 | facet_grid(cols = vars(gp)) 179 | p 180 | ``` 181 | 182 | ![](2indgps_files/figure-gfm/unnamed-chunk-7-1.png) 183 | 184 | ``` r 185 | pA1 <- p 186 | ``` 187 | 188 | Thick vertical line = 3rd quartile. Thin vertical line = 2nd quartile 189 | (median). 190 | 191 | ## Sample difference 192 | 193 | ### Illustrate results 194 | 195 | ``` r 196 | nb <- 20 197 | 198 | df <- tibble(bootid = 1, 199 | res = normdiff(q3(samp1), q3(samp2)) 200 | ) 201 | 202 | p <- ggplot(df, aes(x = res, xend = res, y = bootid - 0.4, yend = bootid + 0.4)) + 203 | theme_gar + 204 | geom_vline(xintercept = normdiff(q3(samp1),q3(samp2)), 205 | size = 2, colour = "grey") + 206 | geom_vline(xintercept = 0, size = 0.75, linetype = "dashed") + 207 | coord_cartesian(xlim = c(-0.4, 0.4)) + 208 | scale_x_continuous(breaks = seq(-0.5, 0.5, 0.1)) + 209 | labs(x = "Normalised difference") + 210 | theme(panel.grid.minor = element_blank(), 211 | axis.title.y = element_blank()) + 212 | ggtitle("(Group 1 - Group 2) / (Group 1 + Group 2)") + 213 | theme(plot.title = element_text(size = 16)) 214 | pA2 <- p 215 | p 216 | ``` 217 | 218 | ![](2indgps_files/figure-gfm/unnamed-chunk-8-1.png) population 219 | normalised difference = -0.15 220 | 221 | Thick grey vertical line = difference between 3rd quartiles. Vertical 222 | dashed line = zero reference line. 223 | 224 | # Generate bootstrap samples & confidence intervals 225 | 226 | We use 5,000 bootstrap samples. 227 | 228 | ``` r 229 | set.seed(666) 230 | nboot <- 5000 231 | bootsamp1 <- matrix(sample(samp1, nboot * n1, replace = TRUE), nrow = nboot) 232 | bootsamp2 <- matrix(sample(samp2, nboot * n2, replace = TRUE), nrow = nboot) 233 | ``` 234 | 235 | ## Illustrate 20 bootstrap samples 236 | 237 | For each sample we superimpose the 3rd quartile—short vertical black 238 | line. Disks = bootstrap observations. Long vertical grey lines mark the 239 | sample 3rd quartiles. 240 | 241 | ``` r 242 | nb <- 20 243 | df <- tibble(res = c(as.vector(bootsamp1[1:nb,]),as.vector(bootsamp2[1:nb,])), 244 | bootid = c(rep(1:nb, each = n1),rep(1:nb, each = n2)), 245 | gp = factor(c(rep("Group 1",nb*n1),rep("Group 2",nb*n2)))) 246 | 247 | df2 <- tibble(bootid = rep(1:nb,2), 248 | res = c(apply(bootsamp1[1:nb,],1,q3),apply(bootsamp2[1:nb,],1,q3)), 249 | gp = factor(rep(c("Group 1", "Group 2"), each = nb)) 250 | ) 251 | 252 | p <- ggplot(df, aes(y = bootid, x = res)) + theme_gar + 253 | # 3rd quartile 254 | geom_vline(data = df.q3, aes(group = gp, xintercept = x), size = 2, colour = "grey") + 255 | # scatterplots 256 | geom_point(alpha = 0.3, position = position_jitter(height=0.1)) + 257 | labs(x = "Values", y = "Bootstrap samples") + 258 | geom_segment(data = df2, aes(x = res, xend = res, 259 | y = bootid - 0.4, yend = bootid + 0.4), 260 | size = 1.5) + 261 | theme(panel.grid.minor = element_blank()) + 262 | scale_y_continuous(breaks = seq(1, 20, 1), expand = expand_scale(mult = c(0.01, 0.01))) + 263 | facet_grid(cols = vars(gp)) 264 | ``` 265 | 266 | ## Warning: `expand_scale()` is deprecated; use `expansion()` instead. 267 | 268 | ``` r 269 | pB1 <- p 270 | p 271 | ``` 272 | 273 | ![](2indgps_files/figure-gfm/unnamed-chunk-10-1.png) 274 | 275 | ## Illustrate bootstrap normalised differences 276 | 277 | ### Illustrate results 278 | 279 | ``` r 280 | nb <- 20 281 | 282 | df <- tibble(bootid = 1:nb, 283 | res = normdiff(apply(bootsamp1[1:nb,],1,q3), apply(bootsamp2[1:nb,],1,q3)) 284 | ) 285 | 286 | p <- ggplot(df, aes(x = res, xend = res, y = bootid - 0.4, yend = bootid + 0.4)) + 287 | theme_gar + 288 | geom_vline(xintercept = normdiff(q3(samp1),q3(samp2)), size = 2, colour = "grey") + 289 | geom_vline(xintercept = 0, size = 0.5, linetype = "longdash") + 290 | coord_cartesian(xlim = c(-0.4, 0.4)) + 291 | scale_x_continuous(breaks = seq(-0.5, 0.5, 0.1)) + 292 | labs(x = "Normalised differences", y = "Bootstrap samples") + 293 | geom_segment(size = 1.5) + 294 | theme(panel.grid.minor = element_blank()) + 295 | scale_y_continuous(breaks = seq(1, 20, 1), expand = expand_scale(mult = c(0.01, 0.01))) 296 | ``` 297 | 298 | ## Warning: `expand_scale()` is deprecated; use `expansion()` instead. 299 | 300 | ``` r 301 | pB2 <- p 302 | p 303 | ``` 304 | 305 | ![](2indgps_files/figure-gfm/unnamed-chunk-11-1.png) 306 | 307 | # Bootstrap distributions 308 | 309 | ## Compute confidence intervals 310 | 311 | ``` r 312 | alpha <- 0.05 313 | boot1.q3 <- apply(bootsamp1,1,q3) 314 | boot2.q3 <- apply(bootsamp2,1,q3) 315 | ci1 <- quantile(boot1.q3, probs = c(alpha/2, 1-alpha/2)) 316 | ci2 <- quantile(boot2.q3, probs = c(alpha/2, 1-alpha/2)) 317 | bootdiff <- normdiff(boot1.q3, boot2.q3) 318 | ci.diff <- quantile(bootdiff, probs = c(alpha/2, 1-alpha/2)) 319 | ``` 320 | 321 | ## Bootstrap distributions of group quantiles 322 | 323 | ``` r 324 | df <- tibble(x = c(boot1.q3, boot2.q3), 325 | gp = factor(c(rep("Group 1",nboot),rep("Group 2",nboot)))) 326 | 327 | # df.q3 <- tibble(q3 = c(q3(samp1), q3(samp2)), 328 | # group = factor(c("Group 1", "Group 2"))) 329 | 330 | df.ci <- tibble(x = c(ci1[1], ci2[1]), 331 | xend = c(ci1[2], ci2[2]), 332 | y = c(0, 0), 333 | yend = c(0, 0), 334 | gp = factor(c("Group 1", "Group 2"))) 335 | 336 | p <- ggplot(df, aes(x = x)) + theme_gar + 337 | # sample q3: vertical line + label 338 | geom_vline(data = df.q3, aes(xintercept = x, group = gp), size = 2, colour = "grey") + 339 | # density 340 | geom_line(stat = "density", size = 1) + 341 | # confidence interval ---------------------- 342 | geom_segment(data = df.ci, 343 | aes(x = x, xend = xend, y = y, yend = yend), 344 | lineend = "round", size = 2, colour = "black") + 345 | labs(x = "Bootstrap quantiles", 346 | y = "Density") + 347 | theme(axis.text.y=element_blank(), #remove y axis labels 348 | axis.ticks.y=element_blank()) + 349 | facet_grid(cols = vars(gp)) 350 | p 351 | ``` 352 | 353 | ![](2indgps_files/figure-gfm/unnamed-chunk-13-1.png) 354 | 355 | ``` r 356 | pC1 <- p 357 | ``` 358 | 359 | ## Group normalised differences 360 | 361 | ``` r 362 | samp.nd <- normdiff(q3(samp1), q3(samp2)) 363 | 364 | df <- tibble(x = bootdiff) 365 | 366 | p <- ggplot(df, aes(x = x)) + theme_gar + 367 | # sample 3rd quartile: vertical line 368 | geom_vline(xintercept = samp.nd, linetype = 'solid', size = 2, colour = "grey") + 369 | geom_line(stat = "density", size = 1) + 370 | labs(x = "Bootstrap differences", 371 | y = "Density") + 372 | theme(axis.text.y=element_blank(), #remove y axis labels 373 | axis.ticks.y=element_blank()) + 374 | # confidence interval ---------------------- 375 | geom_segment(x = ci.diff[1], xend = ci.diff[2], 376 | y = 0, yend = 0, 377 | lineend = "round", size = 2, colour = "black") + 378 | # zero reference line 379 | geom_vline(xintercept = 0, size = 0.5, linetype = "longdash") + 380 | ggtitle(paste0(round(samp.nd,digits = 3), 381 | " [", 382 | round(ci.diff[1],digits = 3), 383 | ", ", 384 | round(ci.diff[2],digits = 3) 385 | ,"]")) 386 | p 387 | ``` 388 | 389 | ![](2indgps_files/figure-gfm/unnamed-chunk-14-1.png) 390 | 391 | ``` r 392 | pC2 <- p 393 | ``` 394 | 395 | # Make summary figure 396 | 397 | ``` r 398 | pvoid <- ggplot() + theme_void() # empty plot for blank space 399 | 400 | # pA <- cowplot::plot_grid(pA1, pA2, 401 | # labels = NA, 402 | # ncol = 2, 403 | # nrow = 1, 404 | # rel_widths = c(2, 1), 405 | # label_size = 20, 406 | # hjust = -0.5, 407 | # scale=.95) 408 | # 409 | # pB <- cowplot::plot_grid(pB1, pB2, 410 | # labels = NA, 411 | # ncol = 2, 412 | # nrow = 1, 413 | # rel_widths = c(2, 1), 414 | # label_size = 20, 415 | # hjust = -0.5, 416 | # scale=.95) 417 | # 418 | # pC <- cowplot::plot_grid(pC1, pC2, 419 | # labels = NA, 420 | # ncol = 2, 421 | # nrow = 1, 422 | # rel_widths = c(2, 1), 423 | # label_size = 20, 424 | # hjust = -0.5, 425 | # scale=.95) 426 | 427 | p.LEFT <- cowplot::plot_grid(p.pop, pA1, pB1, pC1, 428 | labels = c("A", "B", "C", "D"), 429 | ncol = 1, 430 | nrow = 4, 431 | rel_heights = c(2, 1, 3, 2), 432 | label_size = 20, 433 | align = "v", 434 | axis = "l", 435 | hjust = -0.5, 436 | scale=.95) 437 | 438 | p.RIGHT <- cowplot::plot_grid(pvoid, pA2, pB2, pC2, 439 | labels = NA, 440 | ncol = 1, 441 | nrow = 4, 442 | rel_heights = c(2, 1, 3, 2), 443 | label_size = 20, 444 | align = "v", 445 | axis = "l", 446 | hjust = -0.5, 447 | scale=.95) 448 | 449 | cowplot::plot_grid(p.LEFT, p.RIGHT, 450 | labels = NA, 451 | ncol = 2, 452 | nrow = 1, 453 | rel_widths = c(2, 1), 454 | label_size = 20, 455 | align = "h", 456 | axis = "b", 457 | hjust = -0.5, 458 | scale=.95) 459 | 460 | # save figure 461 | ggsave(filename=('./figures/figure_2indgps.pdf'),width=17,height=15) 462 | ggsave(filename=('./figures/figure9.pdf'),width=17,height=15) 463 | ``` 464 | 465 | # Confidence interval with n = 100 466 | 467 | ``` r 468 | set.seed(21) 469 | n1 <- 100 470 | n2 <- 100 471 | samp1 <- rlnorm(n1, meanlog = meanlog1, sdlog = sdlog1) 472 | samp2 <- rlnorm(n2, meanlog = meanlog2, sdlog = sdlog2) 473 | nboot <- 5000 474 | bootsamp1 <- matrix(sample(samp1, nboot * n1, replace = TRUE), nrow = nboot) 475 | bootsamp2 <- matrix(sample(samp2, nboot * n2, replace = TRUE), nrow = nboot) 476 | alpha <- 0.05 477 | boot1.q3 <- apply(bootsamp1,1,q3) 478 | boot2.q3 <- apply(bootsamp2,1,q3) 479 | # ci1 <- quantile(boot1.q3, probs = c(alpha/2, 1-alpha/2)) 480 | # ci2 <- quantile(boot2.q3, probs = c(alpha/2, 1-alpha/2)) 481 | bootdiff <- normdiff(boot1.q3, boot2.q3) 482 | ci.diff <- quantile(bootdiff, probs = c(alpha/2, 1-alpha/2)) 483 | round(ci.diff, digits = 2) 484 | ``` 485 | 486 | ## 2.5% 97.5% 487 | ## -0.21 0.01 488 | 489 | # Confidence interval with n = 200 490 | 491 | ``` r 492 | set.seed(21) 493 | n1 <- 200 494 | n2 <- 200 495 | samp1 <- rlnorm(n1, meanlog = meanlog1, sdlog = sdlog1) 496 | samp2 <- rlnorm(n2, meanlog = meanlog2, sdlog = sdlog2) 497 | nboot <- 5000 498 | bootsamp1 <- matrix(sample(samp1, nboot * n1, replace = TRUE), nrow = nboot) 499 | bootsamp2 <- matrix(sample(samp2, nboot * n2, replace = TRUE), nrow = nboot) 500 | alpha <- 0.05 501 | boot1.q3 <- apply(bootsamp1,1,q3) 502 | boot2.q3 <- apply(bootsamp2,1,q3) 503 | # ci1 <- quantile(boot1.q3, probs = c(alpha/2, 1-alpha/2)) 504 | # ci2 <- quantile(boot2.q3, probs = c(alpha/2, 1-alpha/2)) 505 | bootdiff <- normdiff(boot1.q3, boot2.q3) 506 | ci.diff <- quantile(bootdiff, probs = c(alpha/2, 1-alpha/2)) 507 | round(ci.diff, digits = 2) 508 | ``` 509 | 510 | ## 2.5% 97.5% 511 | ## -0.22 -0.11 512 | -------------------------------------------------------------------------------- /docs/2indgps_files/figure-gfm/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2indgps_files/figure-gfm/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /docs/2indgps_files/figure-gfm/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2indgps_files/figure-gfm/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /docs/2indgps_files/figure-gfm/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2indgps_files/figure-gfm/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /docs/2indgps_files/figure-gfm/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2indgps_files/figure-gfm/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /docs/2indgps_files/figure-gfm/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2indgps_files/figure-gfm/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/2indgps_files/figure-gfm/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2indgps_files/figure-gfm/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /docs/2indgps_files/figure-gfm/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2indgps_files/figure-gfm/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/define-populations-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/define-populations-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-20-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-20-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-21-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-22-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-22-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-24-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-24-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-25-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-25-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-28-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-28-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-29-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-29-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-30-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-30-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-32-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-32-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-33-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-33-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-34-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-34-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-37-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-37-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-38-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-38-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-42-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-42-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-43-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-43-1.png -------------------------------------------------------------------------------- /docs/compcorr_files/figure-gfm/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /docs/coverage_files/figure-gfm/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /docs/coverage_files/figure-gfm/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /docs/coverage_files/figure-gfm/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /docs/coverage_files/figure-gfm/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /docs/coverage_files/figure-gfm/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /docs/coverage_files/figure-gfm/unnamed-chunk-18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-18-1.png -------------------------------------------------------------------------------- /docs/coverage_files/figure-gfm/unnamed-chunk-20-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-20-1.png -------------------------------------------------------------------------------- /docs/coverage_files/figure-gfm/unnamed-chunk-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-21-1.png -------------------------------------------------------------------------------- /docs/coverage_files/figure-gfm/unnamed-chunk-22-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-22-1.png -------------------------------------------------------------------------------- /docs/coverage_files/figure-gfm/unnamed-chunk-24-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-24-1.png -------------------------------------------------------------------------------- /docs/coverage_files/figure-gfm/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/coverage_files/figure-gfm/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /docs/coverage_files/figure-gfm/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /docs/notrobust.md: -------------------------------------------------------------------------------- 1 | The bootstrap is not robust 2 | ================ 3 | Guillaume A. Rousselet 4 | 2022-06-13 5 | 6 | # Dependencies 7 | 8 | ``` r 9 | library(tibble) 10 | library(ggplot2) 11 | # library(cowplot) 12 | source("./functions/Rallfun-v40.txt") 13 | source("./functions/theme_gar.txt") 14 | ``` 15 | 16 | The bootstrap is sometimes described as a robust technique. In itself, 17 | it is not robust. A simple example can illustrate this lack of 18 | robustness: percentile bootstrap confidence intervals for the mean are 19 | not robust, because the mean is not a robust estimator of central 20 | tendency. 21 | 22 | # Generate data and compute confidence intervals 23 | 24 | ``` r 25 | set.seed(21) 26 | n <- 10 27 | samp <- rnorm(n, 8, 2) 28 | samp <- c(samp, 17) 29 | samp_mat <- matrix(NA, nrow = 12, ncol = 7) 30 | ci_mean_t <- matrix(NA, nrow = 2, ncol = 7) 31 | mean_res <- vector(mode = "numeric", length = 7) 32 | median_res <- vector(mode = "numeric", length = 7) 33 | ci_mean_pb <- matrix(NA, nrow = 2, ncol = 7) 34 | ci_median_pb <- matrix(NA, nrow = 2, ncol = 7) 35 | ci_median_param <- matrix(NA, nrow = 2, ncol = 7) 36 | for(C in 1:7){ 37 | todo <- c(samp, 17+C^2) 38 | samp_mat[,C] <- todo 39 | mean_res[C] <- mean(todo) 40 | median_res[C] <- median(todo) 41 | ci_mean_t[,C] <- t.test(todo)$conf.int 42 | ci_mean_pb[,C] <- onesampb(todo, mean)$ci # default to nboot = 2000 43 | ci_median_pb[,C] <- onesampb(todo, median)$ci 44 | ci_median_param[,C] <- sint(todo) # parametric method for the median 45 | } 46 | ``` 47 | 48 | # Illustrate results: mean + standard CI 49 | 50 | ``` r 51 | set.seed(777) # for reproducible jitter 52 | # raw data 53 | df <- tibble(res = as.vector(samp_mat), 54 | cond = factor(rep(1:7, each = 12))) 55 | # mean + confidence intervals 56 | df2 <- tibble(res = mean_res, 57 | cond = factor(1:7), 58 | ci_low = ci_mean_t[1,], 59 | ci_up = ci_mean_t[2,]) 60 | 61 | p <- ggplot(df, aes(x = cond, y = res)) + theme_gar + 62 | # scatterplots 63 | geom_jitter(shape = 21, width = .1, colour = 'black', fill = 'grey', size = 2, alpha = 0.5) + 64 | geom_hline(yintercept = ci_mean_t[1,1], linetype = 'dashed') + 65 | geom_hline(yintercept = ci_mean_t[2,1], linetype = 'dashed') + 66 | # confidence intervals 67 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 68 | width=.05, size=0.75) + 69 | geom_point(data = df2, aes(x=cond, y=res), size=3) + 70 | theme(panel.grid.minor.x = element_blank()) + 71 | labs(x = "Conditions", y = "Values") + 72 | ggtitle("Mean: standard CI") 73 | p 74 | ``` 75 | 76 | ![](notrobust_files/figure-gfm/unnamed-chunk-3-1.png) 77 | 78 | ``` r 79 | pA <- p 80 | ``` 81 | 82 | # Illustrate results: mean + boot CI 83 | 84 | ``` r 85 | set.seed(777) # for reproducible jitter 86 | # raw data 87 | df <- tibble(res = as.vector(samp_mat), 88 | cond = factor(rep(1:7, each = 12))) 89 | # mean + confidence intervals 90 | df2 <- tibble(res = mean_res, 91 | cond = factor(1:7), 92 | ci_low = ci_mean_pb[1,], 93 | ci_up = ci_mean_pb[2,]) 94 | 95 | p <- ggplot(df, aes(x = cond, y = res)) + theme_gar + 96 | # scatterplots 97 | geom_jitter(shape = 21, width = .1, colour = 'black', fill = 'grey', size = 2, alpha = 0.5) + 98 | geom_hline(yintercept = ci_mean_pb[1,1], linetype = 'dashed') + 99 | geom_hline(yintercept = ci_mean_pb[2,1], linetype = 'dashed') + 100 | # confidence intervals 101 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 102 | width=.05, size=0.75) + 103 | geom_point(data = df2, aes(x=cond, y=res), size=3) + 104 | theme(panel.grid.minor.x = element_blank()) + 105 | labs(x = "Conditions", y = "Values") + 106 | ggtitle("Mean: bootstrap CI") 107 | p 108 | ``` 109 | 110 | ![](notrobust_files/figure-gfm/unnamed-chunk-4-1.png) 111 | 112 | ``` r 113 | pB <- p 114 | ``` 115 | 116 | # Illustrate results: median + boot CI 117 | 118 | ``` r 119 | set.seed(777) # for reproducible jitter 120 | # raw data 121 | df <- tibble(res = as.vector(samp_mat), 122 | cond = factor(rep(1:7, each = 12))) 123 | # mean + confidence intervals 124 | df2 <- tibble(res = median_res, 125 | cond = factor(1:7), 126 | ci_low = ci_median_pb[1,], 127 | ci_up = ci_median_pb[2,]) 128 | 129 | p <- ggplot(df, aes(x = cond, y = res)) + theme_gar + 130 | # scatterplots 131 | geom_jitter(shape = 21, width = .1, colour = 'black', fill = 'grey', size = 2, alpha = 0.5) + 132 | geom_hline(yintercept = ci_median_pb[1,1], linetype = 'dashed') + 133 | geom_hline(yintercept = ci_median_pb[2,1], linetype = 'dashed') + 134 | # confidence intervals 135 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 136 | width=.05, size=0.75) + 137 | geom_point(data = df2, aes(x=cond, y=res), size=3) + 138 | theme(panel.grid.minor.x = element_blank()) + 139 | labs(x = "Conditions", y = "Values") + 140 | ggtitle("Median: bootstrap CI") 141 | p 142 | ``` 143 | 144 | ![](notrobust_files/figure-gfm/unnamed-chunk-5-1.png) 145 | 146 | ``` r 147 | pC <- p 148 | ``` 149 | 150 | # Illustrate results: median + parametric CI 151 | 152 | ``` r 153 | set.seed(777) # for reproducible jitter 154 | # raw data 155 | df <- tibble(res = as.vector(samp_mat), 156 | cond = factor(rep(1:7, each = 12))) 157 | # mean + confidence intervals 158 | df2 <- tibble(res = median_res, 159 | cond = factor(1:7), 160 | ci_low = ci_median_param[1,], 161 | ci_up = ci_median_param[2,]) 162 | 163 | p <- ggplot(df, aes(x = cond, y = res)) + theme_gar + 164 | # scatterplots 165 | geom_jitter(shape = 21, width = .1, colour = 'black', fill = 'grey', size = 2, alpha = 0.5) + 166 | geom_hline(yintercept = ci_median_pb[1,1], linetype = 'dashed') + 167 | geom_hline(yintercept = ci_median_pb[2,1], linetype = 'dashed') + 168 | # confidence intervals 169 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 170 | width=.05, size=0.75) + 171 | geom_point(data = df2, aes(x=cond, y=res), size=3) + 172 | theme(panel.grid.minor.x = element_blank()) + 173 | labs(x = "Conditions", y = "Values") + 174 | ggtitle("Median: parametric CI") 175 | p 176 | ``` 177 | 178 | ![](notrobust_files/figure-gfm/unnamed-chunk-6-1.png) 179 | 180 | ``` r 181 | # pC <- p 182 | ``` 183 | 184 | # Summary figure 185 | 186 | ``` r 187 | cowplot::plot_grid(pA, pB, pC, 188 | labels = c("A", "B", "C"), 189 | ncol = 3, 190 | nrow = 1, 191 | rel_widths = c(1, 1, 1), 192 | label_size = 20, 193 | hjust = -0.5, 194 | scale=.95, 195 | align = "h") 196 | 197 | # save figure 198 | ggsave(filename=('./figures/figure_notrobust.pdf'),width=10,height=5) 199 | ggsave(filename=('./figures/figure6.pdf'),width=10,height=5) 200 | ``` 201 | -------------------------------------------------------------------------------- /docs/notrobust_files/figure-gfm/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/notrobust_files/figure-gfm/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /docs/notrobust_files/figure-gfm/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/notrobust_files/figure-gfm/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/notrobust_files/figure-gfm/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/notrobust_files/figure-gfm/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /docs/notrobust_files/figure-gfm/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/notrobust_files/figure-gfm/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /docs/pb.md: -------------------------------------------------------------------------------- 1 | Percentile bootstrap 2 | ================ 3 | Guillaume A. Rousselet 4 | 2022-06-13 5 | 6 | # Dependencies 7 | 8 | ``` r 9 | library(tibble) 10 | library(ggplot2) 11 | # library(cowplot) 12 | # library(HDInterval) 13 | source("./functions/theme_gar.txt") 14 | ``` 15 | 16 | ``` r 17 | sessionInfo() 18 | ``` 19 | 20 | ## R version 4.2.0 (2022-04-22) 21 | ## Platform: x86_64-apple-darwin17.0 (64-bit) 22 | ## Running under: macOS Catalina 10.15.7 23 | ## 24 | ## Matrix products: default 25 | ## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib 26 | ## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib 27 | ## 28 | ## locale: 29 | ## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8 30 | ## 31 | ## attached base packages: 32 | ## [1] stats graphics grDevices utils datasets methods base 33 | ## 34 | ## other attached packages: 35 | ## [1] ggplot2_3.3.6 tibble_3.1.7 36 | ## 37 | ## loaded via a namespace (and not attached): 38 | ## [1] rstudioapi_0.13 knitr_1.39 magrittr_2.0.3 tidyselect_1.1.2 39 | ## [5] munsell_0.5.0 colorspace_2.0-3 R6_2.5.1 rlang_1.0.2 40 | ## [9] fastmap_1.1.0 fansi_1.0.3 dplyr_1.0.9 stringr_1.4.0 41 | ## [13] tools_4.2.0 grid_4.2.0 gtable_0.3.0 xfun_0.31 42 | ## [17] utf8_1.2.2 cli_3.3.0 withr_2.5.0 htmltools_0.5.2 43 | ## [21] ellipsis_0.3.2 yaml_2.3.5 digest_0.6.29 lifecycle_1.0.1 44 | ## [25] crayon_1.5.1 purrr_0.3.4 vctrs_0.4.1 glue_1.6.2 45 | ## [29] evaluate_0.15 rmarkdown_2.14 stringi_1.7.6 compiler_4.2.0 46 | ## [33] pillar_1.7.0 generics_0.1.2 scales_1.2.0 pkgconfig_2.0.3 47 | 48 | # Bootstrap implementation 49 | 50 | Let’s look at how the bootstrap is implemented in the one-sample case. 51 | See an interactive demo 52 | [here](https://seeing-theory.brown.edu/frequentist-inference/index.html#section2). 53 | 54 | ## Sampling with replacement 55 | 56 | Test the `sample()` function. Let say our sample is a sequence of 57 | integers. We sample with replacement from that sequence of numbers. 58 | Execute chunk several times to see what happens. 59 | 60 | ``` r 61 | n <- 10 # sample size 62 | samp <- 1:n 63 | boot.samp <- sample(samp, n, replace = TRUE) # sample with replacement 64 | boot.samp 65 | ``` 66 | 67 | ## [1] 3 10 4 6 9 9 1 10 5 8 68 | 69 | Generate 3 bootstrap samples for the article: 70 | 71 | ``` r 72 | set.seed(21) # reproducible example 73 | n <- 6 74 | samp <- 1:n 75 | nboot <- 3 76 | matrix(sample(samp, n*nboot, replace = TRUE), nrow = nboot) 77 | ``` 78 | 79 | ## [,1] [,2] [,3] [,4] [,5] [,6] 80 | ## [1,] 1 2 3 6 3 3 81 | ## [2,] 3 5 4 6 6 4 82 | ## [3,] 1 3 2 6 2 5 83 | 84 | ## Loop 85 | 86 | ``` r 87 | set.seed(21) # reproducible results 88 | n <- 20 # sample size 89 | samp <- rnorm(n) # get normal sample 90 | nboot <- 1000 # number of bootstrap samples 91 | 92 | # declare vector of results 93 | boot.m <- vector(mode = "numeric", length = nboot) # save means 94 | boot.tm <- vector(mode = "numeric", length = nboot) # save trimmed means 95 | boot.md <- vector(mode = "numeric", length = nboot) # save medians 96 | 97 | for(B in 1:nboot){ 98 | boot.samp <- sample(samp, n, replace = TRUE) # sample with replacement 99 | boot.m[B] <- mean(boot.samp) 100 | boot.tm[B] <- mean(boot.samp, trim = 0.2) 101 | boot.md[B] <- median(boot.samp) 102 | } 103 | 104 | samp.m <- mean(samp) 105 | samp.tm <- mean(samp, trim = 0.2) 106 | samp.md <- median(samp) 107 | samp.m 108 | ``` 109 | 110 | ## [1] 0.1363416 111 | 112 | ``` r 113 | samp.tm 114 | ``` 115 | 116 | ## [1] 0.1835497 117 | 118 | ``` r 119 | samp.md 120 | ``` 121 | 122 | ## [1] 0.3028401 123 | 124 | ### Plot original results 125 | 126 | ``` r 127 | set.seed(1) 128 | df <- tibble(cond = factor(rep(1,n)), 129 | res = samp) 130 | ggplot(df, aes(x = cond, y = res)) + theme_linedraw() + 131 | geom_jitter(width = 0.1, alpha = 0.3) + 132 | theme(axis.text.x = element_blank(), 133 | axis.ticks = element_blank()) + 134 | scale_x_discrete(name ="") + 135 | # stat_summary(fun.y=median, geom="line") 136 | geom_segment(aes(x = 0.9, y = samp.m, xend = 1.1, yend = samp.m)) + 137 | geom_segment(aes(x = 0.9, y = samp.md, xend = 1.1, yend = samp.md), colour = "orange") + 138 | geom_segment(aes(x = 0.9, y = samp.tm, xend = 1.1, yend = samp.tm), colour = "blue") + 139 | annotate("text", x = 1.185, y = samp.m-0.1, label = 'bold("Mean")', size = 4, parse = TRUE) + 140 | annotate("text", x = 1.2, y = samp.md+0.1, label = 'bold("Median")', size = 4, colour = "orange", parse = TRUE) + 141 | annotate("text", x = 0.75, y = samp.tm, label = 'bold("Trimmed\nmean")', size = 4, colour = "blue", parse = TRUE) 142 | ``` 143 | 144 | ![](pb_files/figure-gfm/unnamed-chunk-6-1.png) 145 | 146 | ### Plot distributions of bootstrap estimates 147 | 148 | The distribution of bootstrap medians is multi-modal and very different 149 | from that of the mean and the 20% trimmed mean. To compute confidence 150 | intervals for the median in the one-sample case, it is recommended to 151 | use the parametric approach implemented in the function `sint()` 152 | available in the file *./functions/Rallfun-v40.txt* in this repository. 153 | 154 | ``` r 155 | df <- tibble(res = c(boot.m, boot.tm, boot.md), 156 | est = factor(c(rep("Mean",nboot), rep("Trimmed mean",nboot), rep("Median",nboot))) 157 | ) 158 | ggplot(df, aes(x = res, colour = est)) + theme_gar + 159 | geom_line(aes(y = ..density..), stat = "density", size = 1) + 160 | labs(x = "Bootstrap estimates", y = "Density") + 161 | theme(legend.position = "bottom", 162 | axis.text.y = element_blank(), 163 | axis.ticks.y = element_blank()) 164 | ``` 165 | 166 | ![](pb_files/figure-gfm/unnamed-chunk-7-1.png) 167 | 168 | ``` r 169 | # ggtitle("Boostrap samples") 170 | ``` 171 | 172 | ## Matrix 173 | 174 | ``` r 175 | set.seed(21) # reproducible results 176 | n <- 20 # sample size 177 | samp <- rnorm(n) # get normal sample 178 | nboot <- 1000 # number of bootstrap samples 179 | # sample with replacement + reoganise into a matrix 180 | boot.samp <- matrix(sample(samp, n*nboot, replace = TRUE), nrow = nboot) 181 | boot.m <- apply(boot.samp, 1, mean) 182 | boot.md <- apply(boot.samp, 1, median) 183 | ``` 184 | 185 | ## Functions 186 | 187 | Examples of R packages for bootstrap inferences: 188 | 189 | - [`boot`](https://www.statmethods.net/advstats/bootstrapping.html) 190 | 191 | - [`resample`](https://cran.r-project.org/web/packages/resample/index.html) 192 | 193 | - [`bootstrap`](https://cran.r-project.org/web/packages/bootstrap/index.html) 194 | 195 | - [`WRS2`](https://cran.r-project.org/web/packages/WRS2/index.html) 196 | 197 | Functions from [Rand 198 | Wilcox](https://dornsife.usc.edu/labs/rwilcox/software/). 199 | 200 | ``` r 201 | # TO USE THE FUNCTIONS, FIRST USE THE SOURCE COMMAND: 202 | # source('./functions/Rallfun-v40.txt') 203 | 204 | set.seed(1) # reproducible results 205 | onesampb(samp, est=mean, alpha=0.1, nboot=1000, SEED = FALSE, nv = 0) 206 | # est = estimator, could be var, mad, to use a trimmed mean, add argument trim = 0.2 207 | onesampb(samp, est=mean, alpha=0.1, nboot=1000, SEED = FALSE, nv = 0, trim = 0.1) 208 | # nv = null value for NHST 209 | # always set SEED to FALSE otherwise the function always returns the same results for a given input. 210 | # the only way to really understand the code is to look at it: edit(onesampb) 211 | 212 | # for inferences on trimmed means only: 213 | trimpb() 214 | 215 | # for inferences on the Harrell-Davis quantile estimator (default q=0.5 = median): 216 | hdpb() 217 | ``` 218 | 219 | # Generate sample from lognormal distribution 220 | 221 | ## Illustrate population 222 | 223 | Lognormal distribution from the which the sample is taken. 224 | 225 | ``` r 226 | x <- seq(0, 7, 0.001) 227 | y <- dlnorm(x) 228 | 229 | df <- tibble(x = x, y = y) 230 | 231 | p <- ggplot(df, aes(x = x, y = y)) + theme_gar + 232 | geom_line(size = 1.5, colour = "orange") + 233 | labs(x = "Values", y = "Density") + 234 | theme(axis.text.y = element_blank(), 235 | axis.ticks.y = element_blank()) 236 | p 237 | ``` 238 | 239 | ![](pb_files/figure-gfm/unnamed-chunk-10-1.png) 240 | 241 | ## Get sample 242 | 243 | ``` r 244 | set.seed(21) # reproducible example 245 | n <- 30 # sample size 246 | meanlog <- 0 247 | sdlog <- 1 248 | samp <- rlnorm(n, meanlog = meanlog, sdlog = sdlog) # random sample 249 | ``` 250 | 251 | ## Illustrate sample 252 | 253 | ``` r 254 | set.seed(21) # for reproducible jitter 255 | # raw data 256 | df <- tibble(pc = samp, 257 | cond = rep(1, n)) 258 | 259 | p <- ggplot(data = df, aes(x = cond, y = pc)) + theme_gar + 260 | # scatterplots 261 | geom_jitter(width = .05, alpha = 0.5, 262 | size = 3, shape = 21, fill = "grey", colour = "black") + 263 | theme(axis.ticks.x = element_blank(), 264 | axis.text.x = element_blank(), 265 | axis.title.x = element_blank()) + 266 | scale_x_continuous(breaks = 1) + 267 | # mean 268 | geom_segment(aes(x = 0.9, xend = 1.1, 269 | y = mean(samp), yend = mean(samp))) + 270 | # median 271 | geom_segment(aes(x = 0.9, xend = 1.1, 272 | y = median(samp), yend = median(samp)), 273 | linetype = 'longdash', lineend = 'round') + 274 | theme(panel.grid.minor.x = element_blank()) + 275 | labs(y = "Values") 276 | p 277 | ``` 278 | 279 | ![](pb_files/figure-gfm/unnamed-chunk-12-1.png) 280 | 281 | ``` r 282 | pA <- p 283 | ``` 284 | 285 | Sample mean = 1.61 286 | 287 | # Standard confidence interval 288 | 289 | ## T-value: define function 290 | 291 | ``` r 292 | # mean minus null value divided by SEM 293 | tval <- function(x,nv){ 294 | tval <- (mean(x) - nv) / sqrt(var(x)/length(x)) 295 | tval 296 | } 297 | ``` 298 | 299 | ## P value 300 | 301 | Let say our hypothesis is mu = 2. 302 | 303 | First, let’s use the function: 304 | 305 | ``` r 306 | mu <- 2 # null hypothesis 307 | t.test(samp, mu = mu)$p.value 308 | ``` 309 | 310 | ## [1] 0.2667699 311 | 312 | Then check the formula: 313 | 314 | ``` r 315 | dof <- length(samp)-1 # degrees of freedom 316 | 2 * pt(abs(tval(samp, mu)), dof, lower.tail = FALSE) 317 | ``` 318 | 319 | ## [1] 0.2667699 320 | 321 | ## Illustrate theoretical *T* distribution 322 | 323 | ``` r 324 | alpha <- 0.05 325 | 326 | p <- ggplot(data.frame(x = c(-5, 5)), aes(x)) + theme_gar + 327 | labs(y = "Density") + 328 | theme(axis.text = element_text(size = 14), 329 | axis.title = element_text(size = 16), 330 | plot.title = element_text(size=20), 331 | axis.text.y = element_blank(), 332 | axis.ticks.y = element_blank()) + 333 | labs(x = "T values") + 334 | ggtitle(substitute(paste(italic(T)," distribution with ",dof," degrees of freedom"), list(dof=dof))) + 335 | # area under the curve -> p value 336 | # https://christianburkhart.de/blog/area_under_the_curve/ 337 | stat_function(fun = dt, 338 | geom = "area", 339 | xlim = c(-5, tval(samp, mu)), 340 | alpha = .2, 341 | fill = "red", 342 | args = list(df = dof)) + 343 | # theoretical cut-off t value for alpha = 0.05 344 | geom_segment(x = qt(alpha/2, dof), 345 | xend = qt(alpha/2, dof), 346 | y = 0, 347 | yend = dt(qt(alpha/2, dof), dof), 348 | size = 1, 349 | colour = "red") + 350 | annotate(geom = "label", x = -2.7, y = 0.07, size = 7, colour = "red", 351 | label = expression(paste("T"[crit]))) + # italic("t") 352 | # observed t value 353 | # geom_vline(xintercept = abs(tval(samp, mu))) 354 | geom_segment(x = tval(samp, mu), 355 | xend = tval(samp, mu), 356 | y = 0, 357 | yend = dt(tval(samp, mu), dof), 358 | size = 1, 359 | colour = "black", 360 | linetype = "dashed") + 361 | annotate(geom = "label", x = -1.8, y = 0.22, size = 7, colour = "black", 362 | label = expression(paste("T"[obs]))) + # paste("obs.", italic("t"))) 363 | # Plot density function 364 | stat_function(fun = dt, args = list(df = dof), 365 | size = 1) + 366 | # P value 367 | geom_segment(x = -2.8, xend = -1.5, 368 | y = 0.15, yend = 0.07, 369 | arrow = arrow(type = "closed", 370 | length = unit(0.25, "cm")), 371 | colour = "grey30", 372 | size = 1) + 373 | annotate(geom = "label", x = -3, y = 0.15, size = 7, 374 | colour = "white", fill = "red", fontface = "bold", 375 | label = "P value / 2") + 376 | annotate(geom = "label", x = 2.8, y = 0.35, size = 7, 377 | colour = "white", fill = "black", fontface = "bold", 378 | label = expression(paste(bold("CI = m \u00B1 T"[crit]), 379 | bold(" * SEM")))) 380 | # ggsave(filename = './theor_t.pdf') 381 | pB <- p 382 | p 383 | ``` 384 | 385 | ![](pb_files/figure-gfm/unnamed-chunk-16-1.png) 386 | 387 | ## Compute confidence interval 388 | 389 | ### Using built-in function 390 | 391 | ``` r 392 | ci.t <- t.test(samp, mu = mu)$conf.int 393 | ci.t 394 | ``` 395 | 396 | ## [1] 0.9037929 2.3149338 397 | ## attr(,"conf.level") 398 | ## [1] 0.95 399 | 400 | ### Formula 401 | 402 | ``` r 403 | alpha <- 0.05 # alpha level 404 | df <- n-1 # degrees of freedom 405 | samp.m <- mean(samp) # sample mean 406 | sem <- sd(samp) / sqrt(n) # sample estimate of the standard error of the mean 407 | ci <- vector("numeric",2) 408 | ci[1] <- samp.m - qt(1-alpha/2, df) * sem # lower bound of CI 409 | ci[2] <- samp.m + qt(1-alpha/2, df) * sem # upper bound of CI 410 | ci 411 | ``` 412 | 413 | ## [1] 0.9037929 2.3149338 414 | 415 | # Generate bootstrap samples 416 | 417 | ``` r 418 | set.seed(666) 419 | nboot <- 5000 420 | bootsamp <- matrix(sample(samp, nboot * n, replace = TRUE), nrow = nboot) 421 | ``` 422 | 423 | ## Illustrate a few bootstrap samples 424 | 425 | For each sample we superimpose the (bootstrap) median. 426 | 427 | ``` r 428 | nb <- 20 429 | df <- tibble(res = as.vector(bootsamp[1:nb,]), 430 | bootid = rep(1:nb, each = n)) 431 | 432 | df2 <- tibble(bootid = 1:nb, 433 | res = apply(bootsamp[1:nb,],1,mean)) 434 | 435 | p <- ggplot(df, aes(y = bootid, x = res)) + theme_gar + 436 | geom_point(alpha = 0.3, position = position_jitter(height=0.1)) + 437 | labs(x = "Values", y = "Bootstrap samples") + 438 | geom_segment(data = df2, aes(x = res, xend = res, 439 | y = bootid - 0.4, yend = bootid + 0.4), 440 | size = 1.5) + 441 | theme(panel.grid.minor = element_blank()) + 442 | scale_y_continuous(breaks = seq(1, 20, 1), expand = expand_scale(mult = c(0.01, 0.01))) 443 | ``` 444 | 445 | ## Warning: `expand_scale()` is deprecated; use `expansion()` instead. 446 | 447 | ``` r 448 | pC <- p 449 | p 450 | ``` 451 | 452 | ![](pb_files/figure-gfm/unnamed-chunk-20-1.png) 453 | 454 | ## Illustrate bootstrap sampling distribution of the mean 455 | 456 | Compute confidence interval and other quantities. 457 | 458 | Here and in the rest of the tutorial, we compute bootstrap CI using 459 | `quantile(type = 6)` of the bootstrap distribution. This is recommended 460 | in this article: 461 | 462 | Hesterberg, Tim C. “What Teachers Should Know About the Bootstrap: 463 | Resampling in the Undergraduate Statistics Curriculum.” The American 464 | Statistician 69, no. 4 (October 2, 2015): 371–86. 465 | . 466 | 467 | Rand Wilcox uses a different approach. See for instance 468 | `help(onesampb)`, in which the bounds of the CI are defined using the 469 | bootstrap distribution `bvec`, `alpha` (say = 0.05) and `nboot` (say = 470 | 1,000 bootstrap samples): 471 | 472 | `low <- round((alpha/2)*nboot)` `up <- nboot-low` `low <- low+1` 473 | `ci_lower_bound <- bvec[low]` `ci_upper_bound <- bvec[up]` 474 | 475 | In practice it is unclear if these choices make any difference. What we 476 | know is that with `nboot` large enough, the choice of quantile method 477 | should make virtually no difference. 478 | 479 | ``` r 480 | # bootstrap means 481 | bootm <- apply(bootsamp, 1, mean) 482 | # confidence interval 483 | bootci <- quantile(bootm, probs = c(0.025, 0.975), type = 6) 484 | # bootstrap estimation of the standard error 485 | bootsamp.sd <- sd(bootsamp) 486 | # P value 487 | pv <- mean(bootm < mu) # + .5*mean(bootsamp==mu) 488 | pv <- 2 * min(c(pv, 1-pv)) 489 | ``` 490 | 491 | Bootstrap 95% CI = \[1.02, 2.38\] 492 | Bootstrap estimate of the SEM = 1.86 493 | Bootstrap *P* value (hypothesis = 2) = 0.276 494 | 495 | Alternatively, we could compute a highest density interval (HDI): 496 | 497 | ``` r 498 | require(HDInterval) 499 | boothdi <- HDInterval::hdi(bootm) 500 | ``` 501 | 502 | We illustrate the distribution of the bootstrap samples, from which we 503 | derive four elements: 504 | 505 | - confidence/compatibility interval 506 | 507 | - p value 508 | 509 | - bootstrap estimate of the standard error (SE) 510 | 511 | - bootstrap estimate of bias 512 | 513 | The distribution is also our best estimate of the shape the sampling 514 | distribution of the median, given the data and our model. 515 | 516 | ### Make data frame 517 | 518 | ``` r 519 | df <- as_tibble(with(density(bootm),data.frame(x,y))) 520 | 521 | df.pv <- tibble(x = df$x[df$x>mu], 522 | y = df$y[df$x>mu]) 523 | ``` 524 | 525 | ### Figure 526 | 527 | ``` r 528 | p <- ggplot(df, aes(x = x, y = y)) + theme_gar + 529 | # geom_line(stat = "density") + 530 | labs(x = "Bootstrap means", 531 | y = "Density") + 532 | theme(axis.text.y = element_blank(), 533 | axis.ticks.y = element_blank()) + 534 | # P value 535 | geom_area(data = df.pv, 536 | aes(x = x, y = y), 537 | fill = "red", alpha = .2) + 538 | # density 539 | geom_line(data = df, size = 1) + 540 | # Null value 541 | geom_segment(x = mu, 542 | xend = mu, 543 | y = 0, 544 | yend = df$y[which.min(abs(df$x-mu))], 545 | size = 1, 546 | colour = "black", 547 | linetype = "dashed") + 548 | # confidence interval ---------------------- 549 | geom_segment(x = bootci[1], xend = bootci[2], 550 | y = 0, yend = 0, 551 | lineend = "round", size = 2, colour = "black") + 552 | annotate(geom = "label", x = bootci[1]+0.15, y = 0.07, size = 5, 553 | colour = "white", fill = "black", fontface = "bold", 554 | label = paste("L = ", round(bootci[1], digits = 2))) + 555 | annotate(geom = "label", x = bootci[2]-0.15, y = 0.07, size = 5, 556 | colour = "white", fill = "black", fontface = "bold", 557 | label = paste("U = ", round(bootci[2], digits = 2))) + 558 | # sample mean: vertical line + label 559 | geom_vline(xintercept = mean(samp), 560 | linetype = 'solid') + 561 | annotate(geom = "label", x = 2.3, y = 1.2, size = 7, 562 | colour = "white", fill = "black", fontface = "bold", 563 | label = paste("Sample mean = ", round(samp.m, digits = 2))) + 564 | # vertical line marking bootstrap mean 565 | # geom_vline(xintercept = mean(bootsamp), 566 | # linetype = 'solid') 567 | # SEM label + segment 568 | annotate(geom = "label", x = 2.5, y = 0.7, size = 7, 569 | colour = "white", fill = "grey", fontface = "bold", 570 | label = paste("SD =",round(bootsamp.sd, digits = 2),"= SEM")) + 571 | geom_segment(x = 1.25, 572 | xend = 1.85, 573 | y = 0.7, yend = 0.7, 574 | arrow = arrow(type = "closed", 575 | length = unit(0.25, "cm"), 576 | ends = "both"), 577 | colour = "grey", size = 1) + 578 | # P value 579 | geom_segment(x = 2.6, xend = 2.1, y = 0.4, yend = 0.2, 580 | arrow = arrow(type = "closed", 581 | length = unit(0.25, "cm")), 582 | colour = "grey30", size = 1) + 583 | annotate(geom = "label", x = 2.6, y = 0.4, size = 7, 584 | colour = "white", fill = "red", fontface = "bold", 585 | label = "P value / 2") 586 | 587 | p 588 | ``` 589 | 590 | ![](pb_files/figure-gfm/unnamed-chunk-24-1.png) 591 | 592 | ``` r 593 | pD <- p 594 | ``` 595 | 596 | # Summary figure 597 | 598 | ``` r 599 | p1 <- cowplot::plot_grid(pA, pB, 600 | labels = c("A", "B"), 601 | ncol = 2, 602 | nrow = 1, 603 | rel_widths = c(1, 4), 604 | label_size = 20, 605 | hjust = -0.5, 606 | scale=.95, 607 | align = "h") 608 | 609 | p2 <- cowplot::plot_grid(pC, pD, 610 | labels = c("C", "D"), 611 | ncol = 2, 612 | nrow = 1, 613 | rel_widths = c(2, 3), 614 | label_size = 20, 615 | hjust = -0.5, 616 | scale=.95, 617 | align = "h") 618 | 619 | cowplot::plot_grid(p1, p2, 620 | labels = c("", ""), 621 | ncol = 1, 622 | nrow = 2, 623 | rel_heights = c(1, 1), 624 | label_size = 20, 625 | hjust = -0.5, 626 | scale=.95, 627 | align = "h") 628 | 629 | # save figure 630 | ggsave(filename=('./figures/figure_pb.pdf'),width=15,height=15) 631 | ggsave(filename=('./figures/figure1.pdf'),width=15,height=15) 632 | ``` 633 | -------------------------------------------------------------------------------- /docs/pb_files/figure-gfm/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pb_files/figure-gfm/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /docs/pb_files/figure-gfm/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pb_files/figure-gfm/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /docs/pb_files/figure-gfm/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pb_files/figure-gfm/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /docs/pb_files/figure-gfm/unnamed-chunk-20-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pb_files/figure-gfm/unnamed-chunk-20-1.png -------------------------------------------------------------------------------- /docs/pb_files/figure-gfm/unnamed-chunk-24-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pb_files/figure-gfm/unnamed-chunk-24-1.png -------------------------------------------------------------------------------- /docs/pb_files/figure-gfm/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pb_files/figure-gfm/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /docs/pb_files/figure-gfm/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pb_files/figure-gfm/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /docs/pc.md: -------------------------------------------------------------------------------- 1 | Percentage correct example 2 | ================ 3 | Guillaume A. Rousselet 4 | 2022-06-13 5 | 6 | # Dependencies 7 | 8 | ``` r 9 | library(tibble) 10 | library(ggplot2) 11 | # library(cowplot) 12 | source("./functions/theme_gar.txt") 13 | source("./functions/Rallfun-v40.txt") 14 | ``` 15 | 16 | After taking the mean across trials, correct/incorrect data are often 17 | wrongly assumed to be normally distributed. Instead, these data are 18 | always positive and bounded, and are better modelled using a beta 19 | distribution (Kruschke, 2014). Similarly, standard methods are 20 | inappropriate for ordinal data, like Likert scale results, which are too 21 | often treated as metric (Bürkner & Vuorre, 2019; Liddell & Kruschke, 22 | 2018). 23 | 24 | # Make data and compute confidence intervals 25 | 26 | ``` r 27 | set.seed(6666) 28 | n <- 15 29 | norm_samp <- rnorm(n, mean = 75, sd = 10) 30 | skew_samp <- rbeta(n, 10, 0.2) * 100 31 | skew_samp2 <- sort(skew_samp) 32 | skew_samp2[1] <- 60 33 | # hist(rbeta(10000,10,0.2)*100, 50) 34 | 35 | norm_stci <- t.test(norm_samp, conf.level = 0.95)$conf.int 36 | skew_stci <- t.test(skew_samp, conf.level = 0.95)$conf.int 37 | skew_stci2 <- t.test(skew_samp2, conf.level = 0.95)$conf.int 38 | 39 | norm_pbci <- onesampb(norm_samp, est = mean)$ci 40 | skew_pbci <- onesampb(skew_samp, est = mean)$ci 41 | skew_pbci2 <- onesampb(skew_samp2, est = mean)$ci 42 | ``` 43 | 44 | # Illustration: standard confidence interval 45 | 46 | ``` r 47 | set.seed(21) # for reproducible jitter 48 | # raw data 49 | df <- tibble(pc = c(norm_samp, skew_samp, skew_samp2), 50 | cond = rep(1:3, each = n)) 51 | # mean + confidence intervals 52 | df2 <- tibble(pc = c(mean(norm_samp), mean(skew_samp), mean(skew_samp2)), 53 | cond = 1:3, 54 | ci_low = c(norm_stci[1], skew_stci[1], skew_stci2[1]), 55 | ci_up = c(norm_stci[2], skew_stci[2], skew_stci2[2])) 56 | 57 | p <- ggplot(data = df, aes(x = cond, y = pc)) + theme_gar + 58 | # scatterplots 59 | geom_jitter(width = .1, alpha = 0.5, 60 | size = 3, shape = 21, fill = "grey", colour = "black") + 61 | scale_x_continuous(breaks = c(1,2,3)) + 62 | # confidence intervals 63 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 64 | width=.05, size=0.75) + 65 | geom_point(data = df2, aes(x=cond, y=pc), size=3) + 66 | theme(panel.grid.minor.x = element_blank()) + 67 | labs(x = "Conditions", y = "Percent correct") + 68 | ggtitle("Standard confidence intervals") 69 | p 70 | ``` 71 | 72 | ![](pc_files/figure-gfm/unnamed-chunk-3-1.png) 73 | 74 | ``` r 75 | pA <- p 76 | ``` 77 | 78 | Sample means: 79 | 80 | - Condition 1 = 73.6 81 | 82 | - Condition 2 = 96.9 83 | 84 | - Condition 3 = 95.2 85 | 86 | # Illustration: bootstrap confidence interval 87 | 88 | ``` r 89 | set.seed(21) # for reproducible jitter 90 | # raw data 91 | df <- tibble(pc = c(norm_samp, skew_samp, skew_samp2), 92 | cond = rep(1:3, each = n)) 93 | # mean + confidence intervals 94 | df2 <- tibble(pc = c(mean(norm_samp), mean(skew_samp), mean(skew_samp2)), 95 | cond = 1:3, 96 | ci_low = c(norm_pbci[1], skew_pbci[1], skew_pbci2[1]), 97 | ci_up = c(norm_pbci[2], skew_pbci[2], skew_pbci2[2])) 98 | 99 | p <- ggplot(data = df, aes(x = cond, y = pc)) + theme_gar + 100 | # scatterplots 101 | geom_jitter(width = .1, alpha = 0.5, 102 | size = 3, shape = 21, fill = "grey", colour = "black") + 103 | scale_x_continuous(breaks = c(1,2,3)) + 104 | # confidence intervals 105 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 106 | width=.05, size=0.75) + 107 | geom_point(data = df2, aes(x=cond, y=pc), size=3) + 108 | theme(panel.grid.minor.x = element_blank()) + 109 | labs(x = "Conditions", y = "Percent correct") + 110 | ggtitle("Bootstrap confidence intervals") 111 | p 112 | ``` 113 | 114 | ![](pc_files/figure-gfm/unnamed-chunk-4-1.png) 115 | 116 | ``` r 117 | pB <- p 118 | ``` 119 | 120 | # Summary figure 121 | 122 | ``` r 123 | cowplot::plot_grid(pA, pB, 124 | labels = c("A", "B"), 125 | ncol = 2, 126 | nrow = 1, 127 | rel_widths = c(1, 1), 128 | label_size = 20, 129 | hjust = -0.5, 130 | scale=.95, 131 | align = "h") 132 | 133 | # save figure 134 | ggsave(filename=('./figures/figure_pc.pdf'),width=10,height=5) 135 | ggsave(filename=('./figures/figure2.pdf'),width=10,height=5) 136 | ``` 137 | 138 | # References 139 | 140 | Bürkner, Paul-Christian, and Matti Vuorre. “Ordinal Regression Models in 141 | Psychology: A Tutorial.” Advances in Methods and Practices in 142 | Psychological Science 2, no. 1 (March 1, 2019): 77–101. 143 | . 144 | 145 | Kruschke, John. Doing Bayesian Data Analysis - 2nd Edition. Academic 146 | Press, 2014. 147 | . 148 | 149 | Liddell, Torrin M., and John K. Kruschke. “Analyzing Ordinal Data with 150 | Metric Models: What Could Possibly Go Wrong?” Journal of Experimental 151 | Social Psychology 79 (November 1, 2018): 328–48. 152 | . 153 | -------------------------------------------------------------------------------- /docs/pc_files/figure-gfm/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pc_files/figure-gfm/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /docs/pc_files/figure-gfm/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pc_files/figure-gfm/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/ptb_files/figure-gfm/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /docs/ptb_files/figure-gfm/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /docs/ptb_files/figure-gfm/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /docs/ptb_files/figure-gfm/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /docs/ptb_files/figure-gfm/unnamed-chunk-18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-18-1.png -------------------------------------------------------------------------------- /docs/ptb_files/figure-gfm/unnamed-chunk-19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-19-1.png -------------------------------------------------------------------------------- /docs/ptb_files/figure-gfm/unnamed-chunk-20-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-20-1.png -------------------------------------------------------------------------------- /docs/ptb_files/figure-gfm/unnamed-chunk-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-21-1.png -------------------------------------------------------------------------------- /docs/ptb_files/figure-gfm/unnamed-chunk-22-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-22-1.png -------------------------------------------------------------------------------- /docs/ptb_files/figure-gfm/unnamed-chunk-25-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-25-1.png -------------------------------------------------------------------------------- /docs/ptb_files/figure-gfm/unnamed-chunk-26-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-26-1.png -------------------------------------------------------------------------------- /docs/ptb_files/figure-gfm/unnamed-chunk-29-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-29-1.png -------------------------------------------------------------------------------- /docs/ptb_files/figure-gfm/unnamed-chunk-30-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-30-1.png -------------------------------------------------------------------------------- /docs/ptb_files/figure-gfm/unnamed-chunk-31-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-31-1.png -------------------------------------------------------------------------------- /docs/ptb_files/figure-gfm/unnamed-chunk-32-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-32-1.png -------------------------------------------------------------------------------- /docs/ptb_files/figure-gfm/unnamed-chunk-35-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-35-1.png -------------------------------------------------------------------------------- /docs/ptb_files/figure-gfm/unnamed-chunk-36-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-36-1.png -------------------------------------------------------------------------------- /docs/sampdist.md: -------------------------------------------------------------------------------- 1 | Bootstrap sampling distributions 2 | ================ 3 | Guillaume A. Rousselet 4 | 2022-06-13 5 | 6 | # Dependencies 7 | 8 | ``` r 9 | library(tibble) 10 | library(ggplot2) 11 | # library(cowplot) 12 | source("./functions/theme_gar.txt") 13 | # source("./functions/Rallfun-v40.txt") 14 | ``` 15 | 16 | ``` r 17 | sessionInfo() 18 | ``` 19 | 20 | ## R version 4.2.0 (2022-04-22) 21 | ## Platform: x86_64-apple-darwin17.0 (64-bit) 22 | ## Running under: macOS Catalina 10.15.7 23 | ## 24 | ## Matrix products: default 25 | ## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib 26 | ## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib 27 | ## 28 | ## locale: 29 | ## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8 30 | ## 31 | ## attached base packages: 32 | ## [1] stats graphics grDevices utils datasets methods base 33 | ## 34 | ## other attached packages: 35 | ## [1] ggplot2_3.3.6 tibble_3.1.7 36 | ## 37 | ## loaded via a namespace (and not attached): 38 | ## [1] rstudioapi_0.13 knitr_1.39 magrittr_2.0.3 tidyselect_1.1.2 39 | ## [5] munsell_0.5.0 colorspace_2.0-3 R6_2.5.1 rlang_1.0.2 40 | ## [9] fastmap_1.1.0 fansi_1.0.3 dplyr_1.0.9 stringr_1.4.0 41 | ## [13] tools_4.2.0 grid_4.2.0 gtable_0.3.0 xfun_0.31 42 | ## [17] utf8_1.2.2 cli_3.3.0 withr_2.5.0 htmltools_0.5.2 43 | ## [21] ellipsis_0.3.2 yaml_2.3.5 digest_0.6.29 lifecycle_1.0.1 44 | ## [25] crayon_1.5.1 purrr_0.3.4 vctrs_0.4.1 glue_1.6.2 45 | ## [29] evaluate_0.15 rmarkdown_2.14 stringi_1.7.6 compiler_4.2.0 46 | ## [33] pillar_1.7.0 generics_0.1.2 scales_1.2.0 pkgconfig_2.0.3 47 | 48 | # Example 1: samples from lognormal distribution 49 | 50 | We sample from a lognormal distribution, because, you know, life is 51 | [lognormal](https://stat.ethz.ch/~stahel/lognormal/lnboard/brochure.html). 52 | 53 | ## Illustrate population 54 | 55 | For comparison, show normal distribution with the same mean and sd as 56 | the default lognormal (meanlog = 0, sdlog = 1). The [mean of the 57 | lognormal 58 | distribution](https://en.wikipedia.org/wiki/Log-normal_distribution) 59 | with meanlog = 0 and sdlog = 1 is `exp(meanlog + sdlog^2/2)` = exp(1/2). 60 | The variance is `(exp(sdlog^2)-1) * exp(2*meanlog + sdlog^2)` = 61 | (exp(1)-1) \* exp(1). 62 | 63 | ### Check that the two distributions have the same mean and variance. 64 | 65 | ``` r 66 | set.seed(777) 67 | pop1 <- rnorm(1000000, 68 | mean = exp(1/2), 69 | sd = sqrt((exp(1)-1) * exp(1)) 70 | ) 71 | #mean(pop1) 72 | #var(pop1) 73 | 74 | pop2 <- rlnorm(1000000, meanlog = 0, sdlog = 1) 75 | #mean(pop2) 76 | #var(pop2) 77 | 78 | pop.tm <- mean(pop2, trim = 0.2) # save population trimmed mean for figure 79 | ``` 80 | 81 | Normal population: 82 | 83 | - Mean = 1.65 84 | 85 | - Variance = 4.68 86 | 87 | Lognormal population: 88 | 89 | - Mean = 1.65 90 | 91 | - Variance = 4.64 92 | 93 | ### Make figure 94 | 95 | ``` r 96 | x.n <- seq(-5, 7, 0.001) 97 | x.ln <- seq(0, 7, 0.001) 98 | 99 | y.n <- dnorm(x.n, mean = exp(1/2), sd = sqrt((exp(1)-1) * exp(1))) 100 | y.ln <- dlnorm(x.ln) 101 | 102 | df <- tibble(x = c(x.n, x.ln), 103 | y = c(y.n, y.ln), 104 | dist = c(rep("Normal", length(x.n)), rep("Lognormal", length(x.ln))) 105 | ) 106 | 107 | p <- ggplot(df, aes(x = x, y = y, colour = dist)) + theme_gar + 108 | geom_line(size = 1.5) + 109 | labs(x = "Values", y = "Density") + 110 | scale_colour_manual(name = "", values = c("Lognormal" = "orange", 111 | "Normal" = "purple")) + 112 | theme(legend.position = c(0.15, 0.86), 113 | axis.text.y = element_blank(), 114 | axis.ticks.y = element_blank()) 115 | p 116 | ``` 117 | 118 | ![](sampdist_files/figure-gfm/unnamed-chunk-4-1.png) 119 | 120 | ### Save figure 121 | 122 | ``` r 123 | # save figure 124 | ggsave(filename=('./figures/figure_lognormal_pop.pdf'),width=7,height=5) 125 | ggsave(filename=('./figures/figure3.pdf'),width=7,height=5) 126 | ``` 127 | 128 | ## Illustrate sampling distributions 129 | 130 | We draw 50,000 samples for different sizes from our population. Each of 131 | these samples is like an experiment. For each experiment, we compute a 132 | statistics of interest, here the 20% trimmed mean. In each plot, the 133 | vertical dashed line marks the population 20% trimmed mean that we are 134 | trying to estimate. 135 | 136 | ### n = 20 137 | 138 | ``` r 139 | # set.seed(777) 140 | n <- 20 # sample size 141 | nsamp <- 50000 # number of samples (experiments) 142 | # sampling distribution of the mean 143 | dist.samp <- apply(matrix(rlnorm(n*nsamp), nrow = nsamp), 1, mean, trim = 0.2) 144 | v <- enframe(dist.samp, name = NULL) 145 | 146 | p <- ggplot(v, aes(x = value)) + theme_gar + 147 | geom_vline(xintercept = pop.tm, linetype = 'longdash', colour = "black") + 148 | geom_line(stat = 'density', size = 1) + 149 | theme(plot.margin = unit(c(0, 0, 0.5, 0), "cm"), 150 | axis.text.y = element_blank(), 151 | axis.ticks.y = element_blank()) + 152 | labs(x = "Sample trimmed means", y = "Density") + 153 | ggtitle("Sampling distribution: n = 20") + 154 | coord_cartesian(xlim = c(0, 4)) 155 | p.sampdist1 <- p 156 | p 157 | ``` 158 | 159 | ![](sampdist_files/figure-gfm/unnamed-chunk-6-1.png) 160 | 161 | ### n = 30 162 | 163 | ``` r 164 | # set.seed(777) 165 | n <- 30 # sample size 166 | nsamp <- 50000 # number of samples (experiments) 167 | # sampling distribution of the mean 168 | dist.samp <- apply(matrix(rlnorm(n*nsamp), nrow = nsamp), 1, mean, trim = 0.2) 169 | v <- enframe(dist.samp, name = NULL) 170 | 171 | p <- ggplot(v, aes(x = value)) + theme_gar + 172 | geom_vline(xintercept = pop.tm, linetype = 'longdash', colour = "black") + 173 | geom_line(stat = 'density', size = 1) + 174 | theme(plot.margin = unit(c(0, 0, 0.5, 0), "cm"), 175 | axis.text.y = element_blank(), 176 | axis.ticks.y = element_blank()) + 177 | labs(x = "Sample trimmed means", y = "Density") + 178 | coord_cartesian(xlim = c(0, 4)) + 179 | ggtitle("Sampling distribution: n = 30") 180 | p.sampdist2 <- p 181 | p 182 | ``` 183 | 184 | ![](sampdist_files/figure-gfm/unnamed-chunk-7-1.png) 185 | 186 | ### n = 50 187 | 188 | ``` r 189 | # set.seed(777) 190 | n <- 50 # sample size 191 | nsamp <- 50000 # number of samples (experiments) 192 | # sampling distribution of the mean 193 | dist.samp <- apply(matrix(rlnorm(n*nsamp), nrow = nsamp), 1, mean, trim = 0.2) 194 | v <- enframe(dist.samp, name = NULL) 195 | 196 | p <- ggplot(v, aes(x = value)) + theme_gar + 197 | geom_vline(xintercept = pop.tm, linetype = 'longdash', colour = "black") + 198 | geom_line(stat = 'density', size = 1) + 199 | theme(plot.margin = unit(c(0, 0, 0.5, 0), "cm"), 200 | axis.text.y = element_blank(), 201 | axis.ticks.y = element_blank()) + 202 | labs(x = "Sample trimmed means", y = "Density") + 203 | coord_cartesian(xlim = c(0, 4)) + 204 | ggtitle("Sampling distribution: n = 50") 205 | p.sampdist3 <- p 206 | p 207 | ``` 208 | 209 | ![](sampdist_files/figure-gfm/unnamed-chunk-8-1.png) 210 | 211 | ## Illustrate 4 samples 212 | 213 | In each plot, the vertical dashed line marks the population 20% trimmed 214 | mean, whereas the vertical continuous line marks the sample 20% trimmed 215 | mean. 216 | 217 | ### Define function for horizontal jitter plot 218 | 219 | ``` r 220 | samp_jitter <- function(df, samp.tm){ 221 | p <- ggplot(data = df, aes(x = res, y = cond)) + theme_gar + 222 | # scatterplots 223 | geom_jitter(height = .05, alpha = 0.5, 224 | size = 3, shape = 21, fill = "grey", colour = "black") + 225 | theme(axis.ticks.y = element_blank(), 226 | axis.text.y = element_blank(), 227 | axis.title = element_blank(), 228 | panel.grid.minor.x = element_blank(), 229 | plot.margin = unit(c(0, 0, 0, 0), "cm")) + 230 | scale_x_continuous(breaks = seq(0,12,2)) + 231 | scale_y_continuous(breaks = 1) + 232 | coord_cartesian(xlim = c(0, 11)) + 233 | # sample trimmed mean 234 | geom_segment(aes(x = samp.tm, xend = samp.tm, 235 | y = 0.9, yend = 1.1)) + 236 | # population trimmed mean 237 | geom_segment(aes(x = pop.tm, xend = pop.tm, 238 | y = 0.9, yend = 1.1), 239 | linetype = 'longdash', lineend = 'round', colour = "black") + 240 | labs(x = "Values") 241 | p 242 | } 243 | ``` 244 | 245 | ### n = 20 246 | 247 | Generate all samples 248 | 249 | ``` r 250 | # set.seed(777) 251 | n <- 20 # sample size 252 | samp20 <- matrix(rlnorm(n*4), nrow = 4) # get sample 253 | samp20.tm <- apply(samp20, 1, mean, trim = 0.2) 254 | ``` 255 | 256 | Illustrate sample 1 257 | 258 | ``` r 259 | set.seed(21) # reproducible jitter 260 | S <- 1 261 | df <- tibble(res = samp20[S,], cond = rep(1, n)) 262 | p <- samp_jitter(df, samp20.tm[S]) 263 | p.samp20_1 <- p 264 | p 265 | ``` 266 | 267 | ![](sampdist_files/figure-gfm/unnamed-chunk-11-1.png) 268 | 269 | Illustrate sample 2 270 | 271 | ``` r 272 | set.seed(21) # reproducible jitter 273 | S <- 2 274 | df <- tibble(res = samp20[S,], cond = rep(1, n)) 275 | p <- samp_jitter(df, samp20.tm[S]) 276 | p.samp20_2 <- p 277 | p 278 | ``` 279 | 280 | ![](sampdist_files/figure-gfm/unnamed-chunk-12-1.png) 281 | 282 | Illustrate sample 3 283 | 284 | ``` r 285 | set.seed(21) # reproducible jitter 286 | S <- 3 287 | df <- tibble(res = samp20[S,], cond = rep(1, n)) 288 | p <- samp_jitter(df, samp20.tm[S]) 289 | p.samp20_3 <- p 290 | p 291 | ``` 292 | 293 | ![](sampdist_files/figure-gfm/unnamed-chunk-13-1.png) 294 | 295 | Illustrate sample 4 296 | 297 | ``` r 298 | set.seed(21) # reproducible jitter 299 | S <- 4 300 | df <- tibble(res = samp20[S,], cond = rep(1, n)) 301 | p <- samp_jitter(df, samp20.tm[S]) 302 | p.samp20_4 <- p 303 | p 304 | ``` 305 | 306 | ![](sampdist_files/figure-gfm/unnamed-chunk-14-1.png) 307 | 308 | ### n = 30 309 | 310 | Generate all samples 311 | 312 | ``` r 313 | # set.seed(777) 314 | n <- 30 # sample size 315 | samp30 <- matrix(rlnorm(n*4), nrow = 4) # get sample 316 | samp30.tm <- apply(samp30, 1, mean, trim = 0.2) 317 | ``` 318 | 319 | Illustrate sample 1 320 | 321 | ``` r 322 | set.seed(21) # reproducible jitter 323 | S <- 1 324 | df <- tibble(res = samp30[S,], cond = rep(1, n)) 325 | p <- samp_jitter(df, samp30.tm[S]) 326 | p.samp30_1 <- p 327 | p 328 | ``` 329 | 330 | ![](sampdist_files/figure-gfm/unnamed-chunk-16-1.png) 331 | 332 | Illustrate sample 2 333 | 334 | ``` r 335 | set.seed(21) # reproducible jitter 336 | S <- 2 337 | df <- tibble(res = samp30[S,], cond = rep(1, n)) 338 | p <- samp_jitter(df, samp30.tm[S]) 339 | p.samp30_2 <- p 340 | p 341 | ``` 342 | 343 | ![](sampdist_files/figure-gfm/unnamed-chunk-17-1.png) 344 | 345 | Illustrate sample 3 346 | 347 | ``` r 348 | set.seed(21) # reproducible jitter 349 | S <- 3 350 | df <- tibble(res = samp30[S,], cond = rep(1, n)) 351 | p <- samp_jitter(df, samp30.tm[S]) 352 | p.samp30_3 <- p 353 | p 354 | ``` 355 | 356 | ![](sampdist_files/figure-gfm/unnamed-chunk-18-1.png) 357 | 358 | Illustrate sample 4 359 | 360 | ``` r 361 | set.seed(21) # reproducible jitter 362 | S <- 4 363 | df <- tibble(res = samp30[S,], cond = rep(1, n)) 364 | p <- samp_jitter(df, samp30.tm[S]) 365 | p.samp30_4 <- p 366 | p 367 | ``` 368 | 369 | ![](sampdist_files/figure-gfm/unnamed-chunk-19-1.png) 370 | 371 | ### n = 50 372 | 373 | Generate all samples 374 | 375 | ``` r 376 | # set.seed(777) 377 | n <- 50 # sample size 378 | samp50 <- matrix(rlnorm(n*4), nrow = 4) # get sample 379 | samp50.tm <- apply(samp50, 1, mean, trim = 0.2) 380 | ``` 381 | 382 | Illustrate sample 1 383 | 384 | ``` r 385 | set.seed(21) # reproducible jitter 386 | S <- 1 387 | df <- tibble(res = samp50[S,], cond = rep(1, n)) 388 | p <- samp_jitter(df, samp50.tm[S]) 389 | p.samp50_1 <- p 390 | p 391 | ``` 392 | 393 | ![](sampdist_files/figure-gfm/unnamed-chunk-21-1.png) 394 | 395 | Illustrate sample 2 396 | 397 | ``` r 398 | set.seed(21) # reproducible jitter 399 | S <- 2 400 | df <- tibble(res = samp50[S,], cond = rep(1, n)) 401 | p <- samp_jitter(df, samp50.tm[S]) 402 | p.samp50_2 <- p 403 | p 404 | ``` 405 | 406 | ![](sampdist_files/figure-gfm/unnamed-chunk-22-1.png) 407 | 408 | Illustrate sample 3 409 | 410 | ``` r 411 | set.seed(21) # reproducible jitter 412 | S <- 3 413 | df <- tibble(res = samp50[S,], cond = rep(1, n)) 414 | p <- samp_jitter(df, samp50.tm[S]) 415 | p.samp50_3 <- p 416 | p 417 | ``` 418 | 419 | ![](sampdist_files/figure-gfm/unnamed-chunk-23-1.png) 420 | 421 | Illustrate sample 4 422 | 423 | ``` r 424 | set.seed(21) # reproducible jitter 425 | S <- 4 426 | df <- tibble(res = samp50[S,], cond = rep(1, n)) 427 | p <- samp_jitter(df, samp50.tm[S]) 428 | p.samp50_4 <- p 429 | p 430 | ``` 431 | 432 | ![](sampdist_files/figure-gfm/unnamed-chunk-24-1.png) 433 | 434 | ## Illustrate 4 bootstrap sampling distributions 435 | 436 | We estimate the sampling distributions from the samples. 437 | 438 | ### Define density plot function 439 | 440 | ``` r 441 | dens_plot <- function(dist.samp, samp.tm){ 442 | v <- enframe(dist.samp, name = NULL) 443 | p <- ggplot(v, aes(x = value)) + theme_gar + 444 | geom_vline(xintercept = samp.tm, linetype = 'solid', colour = "black") + 445 | geom_vline(xintercept = pop.tm, linetype = 'longdash', colour = "black") + 446 | geom_line(stat = 'density', size = 1) + 447 | theme(axis.text.y = element_blank(), 448 | axis.title.y = element_blank(), 449 | axis.ticks.y = element_blank(), 450 | plot.margin = unit(c(0, 0, 0.5, 0), "cm")) + 451 | labs(x = "Bootstrap estimates") + 452 | # ggtitle("Sampling distribution: n = 20") + 453 | coord_cartesian(xlim = c(0, 4)) 454 | p 455 | } 456 | ``` 457 | 458 | ### n = 20 459 | 460 | Sample 1 461 | 462 | ``` r 463 | # set.seed(777) 464 | S <- 1 465 | n <- 20 466 | nboot <- 5000 # number of bootstrap samples 467 | # bootstrap distribution 468 | boot.samp <- apply(matrix(sample(samp20[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 469 | p <- dens_plot(boot.samp, samp20.tm[S]) 470 | p.boot20_1 <- p 471 | p 472 | ``` 473 | 474 | ![](sampdist_files/figure-gfm/unnamed-chunk-26-1.png) 475 | 476 | Sample 2 477 | 478 | ``` r 479 | # set.seed(777) 480 | S <- 2 481 | n <- 20 482 | nboot <- 5000 # number of bootstrap samples 483 | # bootstrap distribution 484 | boot.samp <- apply(matrix(sample(samp20[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 485 | p <- dens_plot(boot.samp, samp20.tm[S]) 486 | p.boot20_2 <- p 487 | p 488 | ``` 489 | 490 | ![](sampdist_files/figure-gfm/unnamed-chunk-27-1.png) 491 | 492 | Sample 3 493 | 494 | ``` r 495 | # set.seed(777) 496 | S <- 3 497 | n <- 20 498 | nboot <- 5000 # number of bootstrap samples 499 | # bootstrap distribution 500 | boot.samp <- apply(matrix(sample(samp20[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 501 | p <- dens_plot(boot.samp, samp20.tm[S]) 502 | p.boot20_3 <- p 503 | p 504 | ``` 505 | 506 | ![](sampdist_files/figure-gfm/unnamed-chunk-28-1.png) 507 | 508 | Sample 4 509 | 510 | ``` r 511 | # set.seed(777) 512 | S <- 4 513 | n <- 20 514 | nboot <- 5000 # number of bootstrap samples 515 | # bootstrap distribution 516 | boot.samp <- apply(matrix(sample(samp20[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 517 | p <- dens_plot(boot.samp, samp20.tm[S]) 518 | p.boot20_4 <- p 519 | p 520 | ``` 521 | 522 | ![](sampdist_files/figure-gfm/unnamed-chunk-29-1.png) 523 | 524 | ### n = 30 525 | 526 | Sample 1 527 | 528 | ``` r 529 | # set.seed(777) 530 | S <- 1 531 | n <- 30 532 | nboot <- 5000 # number of bootstrap samples 533 | # bootstrap distribution 534 | boot.samp <- apply(matrix(sample(samp30[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 535 | p <- dens_plot(boot.samp, samp30.tm[S]) 536 | p.boot30_1 <- p 537 | p 538 | ``` 539 | 540 | ![](sampdist_files/figure-gfm/unnamed-chunk-30-1.png) 541 | 542 | Sample 2 543 | 544 | ``` r 545 | # set.seed(777) 546 | S <- 2 547 | nboot <- 5000 # number of bootstrap samples 548 | # bootstrap distribution 549 | boot.samp <- apply(matrix(sample(samp30[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 550 | p <- dens_plot(boot.samp, samp30.tm[S]) 551 | p.boot30_2 <- p 552 | p 553 | ``` 554 | 555 | ![](sampdist_files/figure-gfm/unnamed-chunk-31-1.png) 556 | 557 | Sample 3 558 | 559 | ``` r 560 | # set.seed(777) 561 | S <- 3 562 | nboot <- 5000 # number of bootstrap samples 563 | # bootstrap distribution 564 | boot.samp <- apply(matrix(sample(samp30[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 565 | p <- dens_plot(boot.samp, samp30.tm[S]) 566 | p.boot30_3 <- p 567 | p 568 | ``` 569 | 570 | ![](sampdist_files/figure-gfm/unnamed-chunk-32-1.png) 571 | 572 | Sample 4 573 | 574 | ``` r 575 | # set.seed(777) 576 | S <- 4 577 | nboot <- 5000 # number of bootstrap samples 578 | # bootstrap distribution 579 | boot.samp <- apply(matrix(sample(samp30[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 580 | p <- dens_plot(boot.samp, samp30.tm[S]) 581 | p.boot30_4 <- p 582 | p 583 | ``` 584 | 585 | ![](sampdist_files/figure-gfm/unnamed-chunk-33-1.png) 586 | 587 | ### n = 50 588 | 589 | Sample 1 590 | 591 | ``` r 592 | # set.seed(777) 593 | S <- 1 594 | n <- 50 595 | nboot <- 5000 # number of bootstrap samples 596 | # bootstrap distribution 597 | boot.samp <- apply(matrix(sample(samp50[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 598 | p <- dens_plot(boot.samp, samp50.tm[S]) 599 | p.boot50_1 <- p 600 | p 601 | ``` 602 | 603 | ![](sampdist_files/figure-gfm/unnamed-chunk-34-1.png) 604 | 605 | Sample 2 606 | 607 | ``` r 608 | # set.seed(777) 609 | S <- 2 610 | nboot <- 5000 # number of bootstrap samples 611 | # bootstrap distribution 612 | boot.samp <- apply(matrix(sample(samp50[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 613 | p <- dens_plot(boot.samp, samp50.tm[S]) 614 | p.boot50_2 <- p 615 | p 616 | ``` 617 | 618 | ![](sampdist_files/figure-gfm/unnamed-chunk-35-1.png) 619 | 620 | Sample 3 621 | 622 | ``` r 623 | # set.seed(777) 624 | S <- 3 625 | nboot <- 5000 # number of bootstrap samples 626 | # bootstrap distribution 627 | boot.samp <- apply(matrix(sample(samp50[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 628 | p <- dens_plot(boot.samp, samp50.tm[S]) 629 | p.boot50_3 <- p 630 | p 631 | ``` 632 | 633 | ![](sampdist_files/figure-gfm/unnamed-chunk-36-1.png) 634 | 635 | Sample 4 636 | 637 | ``` r 638 | # set.seed(777) 639 | S <- 4 640 | nboot <- 5000 # number of bootstrap samples 641 | # bootstrap distribution 642 | boot.samp <- apply(matrix(sample(samp50[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 643 | p <- dens_plot(boot.samp, samp50.tm[S]) 644 | p.boot50_4 <- p 645 | p 646 | ``` 647 | 648 | ![](sampdist_files/figure-gfm/unnamed-chunk-37-1.png) 649 | 650 | # Summary figure 651 | 652 | ``` r 653 | # ------------------------------------------------- 654 | p2.1 <- cowplot::plot_grid(p.samp20_1, p.boot20_1 + theme(axis.title = element_blank()), 655 | labels = c("1", "B"), 656 | label_size = 18, 657 | hjust = c(-0.5, 2.2), 658 | vjust = c(1.5, -4.3), 659 | ncol = 1, nrow = 2, 660 | rel_heights = c(1.5, 3)) 661 | 662 | p2.2 <- cowplot::plot_grid(p.samp20_2, p.boot20_2 + theme(axis.title = element_blank()), 663 | labels = c("2"), 664 | label_size = 18, 665 | ncol = 1, nrow = 2, 666 | rel_heights = c(1.5, 3)) 667 | 668 | p2.3 <- cowplot::plot_grid(p.samp20_3, p.boot20_3 + theme(axis.title = element_blank()), 669 | labels = c("3"), 670 | label_size = 18, 671 | ncol = 1, nrow = 2, 672 | rel_heights = c(1.5, 3)) 673 | 674 | p2.4 <- cowplot::plot_grid(p.samp20_4, p.boot20_4, 675 | labels = c("4"), 676 | label_size = 18, 677 | ncol = 1, nrow = 2, 678 | rel_heights = c(1.5, 3)) 679 | 680 | p2 <- cowplot::plot_grid(p.sampdist1, 681 | p2.1, p2.2, p2.3, p2.4, 682 | labels = c("A"), 683 | hjust = 0.7, 684 | label_size = 18, 685 | ncol = 1, 686 | nrow = 5, 687 | align = "v", 688 | axis = "l", 689 | rel_heights = c(1, 1, 1, 1, 1)) 690 | 691 | # ------------------------------------------------- 692 | p3.1 <- cowplot::plot_grid(p.samp30_1, p.boot30_1 + theme(axis.title = element_blank()), 693 | labels = c(""), 694 | ncol = 1, nrow = 2, 695 | rel_heights = c(1.5, 3)) 696 | 697 | p3.2 <- cowplot::plot_grid(p.samp30_2, p.boot30_2 + theme(axis.title = element_blank()), 698 | labels = c(""), 699 | ncol = 1, nrow = 2, 700 | rel_heights = c(1.5, 3)) 701 | 702 | p3.3 <- cowplot::plot_grid(p.samp30_3, p.boot30_3 + theme(axis.title = element_blank()), 703 | labels = c(""), 704 | ncol = 1, nrow = 2, 705 | rel_heights = c(1.5, 3)) 706 | 707 | p3.4 <- cowplot::plot_grid(p.samp30_4, p.boot30_4, 708 | labels = c(""), 709 | ncol = 1, nrow = 2, 710 | rel_heights = c(1.5, 3)) 711 | 712 | p3 <- cowplot::plot_grid(p.sampdist2, 713 | p3.1, p3.2, p3.3, p3.4, 714 | labels = c(""), 715 | ncol = 1, 716 | nrow = 5, 717 | align = "v", 718 | axis = "l", 719 | rel_heights = c(1, 1, 1, 1, 1)) 720 | 721 | # ------------------------------------------------- 722 | p4.1 <- cowplot::plot_grid(p.samp50_1, p.boot50_1 + theme(axis.title = element_blank()), 723 | labels = c(""), 724 | ncol = 1, nrow = 2, 725 | rel_heights = c(1.5, 3)) 726 | 727 | p4.2 <- cowplot::plot_grid(p.samp50_2, p.boot50_2 + theme(axis.title = element_blank()), 728 | labels = c(""), 729 | ncol = 1, nrow = 2, 730 | rel_heights = c(1.5, 3)) 731 | 732 | p4.3 <- cowplot::plot_grid(p.samp50_3, p.boot50_3 + theme(axis.title = element_blank()), 733 | labels = c(""), 734 | ncol = 1, nrow = 2, 735 | rel_heights = c(1.5, 3)) 736 | 737 | p4.4 <- cowplot::plot_grid(p.samp50_4, p.boot50_4, 738 | labels = c(""), 739 | ncol = 1, nrow = 2, 740 | rel_heights = c(1.5, 3)) 741 | 742 | p4 <- cowplot::plot_grid(p.sampdist3, 743 | p4.1, p4.2, p4.3, p4.4, 744 | labels = c(""), 745 | ncol = 1, 746 | nrow = 5, 747 | align = "v", 748 | axis = "l", 749 | rel_heights = c(1, 1, 1, 1, 1)) 750 | 751 | cowplot::plot_grid(p2, p3, p4, 752 | labels = c(""), 753 | ncol = 3, 754 | nrow = 1, 755 | rel_widths = c(1, 1, 1), 756 | label_size = 20, 757 | hjust = -0.5, 758 | scale=.95, 759 | align = "h") 760 | 761 | # save figure 762 | ggsave(filename=('./figures/figure_sampdist_lognormal.pdf'),width=20,height=15) 763 | ggsave(filename=('./figures/figure4.pdf'),width=20,height=15) 764 | ``` 765 | -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-18-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-19-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-21-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-22-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-22-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-23-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-23-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-24-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-24-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-26-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-26-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-27-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-27-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-28-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-28-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-29-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-29-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-30-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-30-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-31-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-31-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-32-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-32-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-33-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-33-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-34-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-34-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-35-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-35-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-36-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-36-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-37-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-37-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /docs/sampdist_files/figure-gfm/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /functions/corfun.txt: -------------------------------------------------------------------------------- 1 | # Modify spear() to return only correlation coefficient 2 | spearsim <- function(x,y){ 3 | # Compute Spearman's rho 4 | corv <- cor(rank(x),rank(y)) 5 | corv 6 | } 7 | 8 | # modify corbsub() accordingly 9 | corbsubsim <- function(isub,x,y,corfun,...){ 10 | # 11 | # Compute correlation for x[isub] and y[isub] 12 | # isub is a vector of length n, 13 | # a bootstrap sample from the sequence of integers 14 | # 1, 2, 3, ..., n 15 | # 16 | # This function is used by other functions when computing 17 | # bootstrap estimates. 18 | # 19 | # corfun is some correlation function already stored in R 20 | # 21 | corbsub <- corfun(x[isub],y[isub],...) 22 | corbsub 23 | } 24 | -------------------------------------------------------------------------------- /functions/functions.txt: -------------------------------------------------------------------------------- 1 | # Define coverage function 2 | cover <- function(ci, pop){ 3 | out <- ci[1] <= pop & ci[2] >= pop 4 | out 5 | } 6 | 7 | # Define power function 8 | powerfun <- function(ci, mu=0){ 9 | out <- ci[1] > mu | ci[2] < mu 10 | out 11 | } 12 | 13 | # Define quantile functions 14 | q1 <- function(x){ 15 | out <- hd(x, q = 0.25) 16 | } 17 | 18 | 19 | q2 <- function(x){ 20 | out <- hd(x, q = 0.5) 21 | } 22 | 23 | q3 <- function(x){ 24 | out <- hd(x, q = 0.75) 25 | } 26 | 27 | keeporder <- function(x){ 28 | x <- as.character(x) 29 | x <- factor(x, levels=unique(x)) 30 | x 31 | } 32 | 33 | # normalised difference 34 | normdiff <- function(x,y){ 35 | out <- (x-y) / (x+y) 36 | out 37 | } 38 | -------------------------------------------------------------------------------- /functions/gengh.txt: -------------------------------------------------------------------------------- 1 | # Combine RGenData::GenDataPopulation and ghdist to create a function that generates 2 | # non-normal bivariate populations with a known correlation (among the 3 options offered by the `cor()` function) and marginal g&h distributions. 3 | 4 | # ------------------------------------------ 5 | # Code from Rallfun-v35.txt from Rand Wilcox 6 | # ------------------------------------------ 7 | 8 | # Original 9 | # http://dornsife.usc.edu/labs/rwilcox/software/ 10 | 11 | # Generate n observations from a *g-and-h* distribution 12 | ghdist <- function(n,g=0,h=0){ 13 | x<-rnorm(n) 14 | if (g>0){ 15 | ghdist<-(exp(g*x)-1)*exp(h*x^2/2)/g 16 | } 17 | if(g==0)ghdist<-x*exp(h*x^2/2) 18 | ghdist 19 | } 20 | 21 | # -------------------------------------------------------------- 22 | # Code adapted from RGenData::GenDataPopulation from John Ruscio 23 | # -------------------------------------------------------------- 24 | 25 | # Reference 26 | # Ruscio, J. & Kaczetow, W. (2008) 27 | # Simulating Multivariate Nonnormal Data Using an Iterative Algorithm. 28 | # Multivariate Behav Res, 43, 355-381. 29 | # https://www.ncbi.nlm.nih.gov/pubmed/26741201 30 | 31 | # License: MIT 32 | # Copyright <2018> 33 | 34 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 35 | # 36 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 37 | # 38 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 39 | 40 | # Original 41 | # https://github.com/cran/RGenData/blob/master/R/EFAGenData.R 42 | 43 | # Simulate multivariate g-and-h data using an iterative algorithm 44 | # 45 | # Args: 46 | # n.cases : Number of observations for each variable - default 1000 47 | # n.variables : Number of variables - default 2 48 | # g : g parameter of the g-and-h distribution - default 0 49 | # h : h parameter of the g-and-h distribution - default 0 50 | # rho : Target correlation between variables or covariance matrix - default 0 51 | # corr.type : Type of correlation - default "pearson", alternative "spearman" 52 | # g & h parameters can also be vectors of length n.variables 53 | # 54 | # Returns: 55 | # data : Population of data - matrix n.cases rows by n.variables columns 56 | # 57 | gengh <- function(n.cases = 1000, n.variables = 2, 58 | g = 0, h = 0, rho = 0, 59 | corr.type = "pearson"){ 60 | 61 | if(is.null(dim(rho))) { # create correlation matrix or already provided? 62 | target.corr <- diag(n.variables) 63 | target.corr[upper.tri(target.corr)] <- rho 64 | target.corr[lower.tri(target.corr)] <- rho 65 | } else if(dim(rho)[1] == n.variables & dim(rho)[2] == n.variables) { 66 | target.corr <- rho 67 | } 68 | 69 | # Change default to number of variables, as suggested by John Ruscio 70 | # n.factors <- 0 # Number of factors (scalar) 71 | n.factors <- n.variables # Number of factors (scalar) 72 | max.trials <- 5 # Maximum number of trials (scalar) 73 | initial.multiplier <- 1 # Value of initial multiplier (scalar) 74 | 75 | # generate g-and-h data 76 | if(length(g)==1) g <- rep(g, n.variables) 77 | if(length(h)==1) h <- rep(h, n.variables) 78 | distributions <- matrix(NA, nrow = n.cases, ncol = n.variables) 79 | for (V in 1:n.variables){ 80 | distributions[,V] <- sort(ghdist(n.cases, g=g[V], h=h[V])) 81 | } 82 | 83 | data <- matrix(0, nrow = n.cases, ncol = n.variables) 84 | iteration <- 0 85 | best.rmsr <- 1 86 | trials.without.improvement <- 0 87 | intermediate.corr <- target.corr 88 | 89 | # If number of latent factors was not specified, determine it 90 | if (n.factors == 0){ 91 | Eigenvalues.Observed <- eigen(intermediate.corr)$values 92 | Eigenvalues.Random <- matrix(0, nrow = 100, ncol = n.variables) 93 | Random.Data <- matrix(0, nrow = n.cases, ncol = n.variables) 94 | for (i in 1:100){ 95 | for (j in 1:n.variables){ 96 | Random.Data[,j] <- sample(distributions[,j], size = n.cases, replace = TRUE) 97 | } 98 | Eigenvalues.Random[i,] <- eigen(cor(Random.Data))$values 99 | } 100 | Eigenvalues.Random <- apply(Eigenvalues.Random, 2, mean) # calculate mean eigenvalue for each factor 101 | n.factors <- max(1, sum(Eigenvalues.Observed > Eigenvalues.Random)) 102 | } 103 | 104 | shared.comp <- matrix(rnorm(n.cases * n.factors, 0, 1), nrow = n.cases, 105 | ncol = n.factors) 106 | unique.comp <- matrix(rnorm(n.cases * n.variables, 0, 1), nrow = n.cases, 107 | ncol = n.variables) 108 | shared.load <- matrix(0, nrow = n.variables, ncol = n.factors) 109 | unique.load <- matrix(0, nrow = n.variables, ncol = 1) 110 | while (trials.without.improvement < max.trials) { 111 | iteration <- iteration + 1 112 | factor.analysis <- FactorAnalysis(intermediate.corr, corr.matrix = TRUE, 113 | max.iteration = 50, n.factors, corr.type) 114 | if (n.factors == 1) { 115 | shared.load[, 1] <- factor.analysis$loadings 116 | } else { 117 | for (i in 1:n.factors) 118 | shared.load[, i] <- factor.analysis$loadings[, i] 119 | } 120 | shared.load[shared.load > 1] <- 1 121 | shared.load[shared.load < -1] <- -1 122 | if (shared.load[1, 1] < 0) 123 | shared.load <- shared.load * -1 124 | for (i in 1:n.variables) 125 | if (sum(shared.load[i, ] * shared.load[i, ]) < 1) { 126 | unique.load[i, 1] <- (1 - sum(shared.load[i, ] * shared.load[i, ])) 127 | } else { 128 | unique.load[i, 1] <- 0 129 | } 130 | unique.load <- sqrt(unique.load) 131 | for (i in 1:n.variables) 132 | data[, i] <- (shared.comp %*% t(shared.load))[, i] + unique.comp[, i] * 133 | unique.load[i, 1] 134 | for (i in 1:n.variables) { 135 | data <- data[sort.list(data[, i]), ] 136 | data[, i] <- distributions[, i] 137 | } 138 | reproduced.corr <- cor(data, method = corr.type) 139 | residual.corr <- target.corr - reproduced.corr 140 | rmsr <- sqrt(sum(residual.corr[lower.tri(residual.corr)] * 141 | residual.corr[lower.tri(residual.corr)]) / 142 | (.5 * (n.variables * n.variables - n.variables))) 143 | if (rmsr < best.rmsr) { 144 | best.rmsr <- rmsr 145 | best.corr <- intermediate.corr 146 | best.res <- residual.corr 147 | intermediate.corr <- intermediate.corr + initial.multiplier * 148 | residual.corr 149 | trials.without.improvement <- 0 150 | } else { 151 | trials.without.improvement <- trials.without.improvement + 1 152 | current.multiplier <- initial.multiplier * 153 | .5 ^ trials.without.improvement 154 | intermediate.corr <- best.corr + current.multiplier * best.res 155 | } 156 | } 157 | 158 | factor.analysis <- FactorAnalysis(best.corr, corr.matrix = TRUE, 159 | max.iteration = 50, n.factors, 160 | corr.type) 161 | if (n.factors == 1) { 162 | shared.load[, 1] <- factor.analysis$loadings 163 | } else { 164 | for (i in 1:n.factors) 165 | shared.load[, i] <- factor.analysis$loadings[, i] 166 | } 167 | shared.load[shared.load > 1] <- 1 168 | shared.load[shared.load < -1] <- -1 169 | if (shared.load[1, 1] < 0) 170 | shared.load <- shared.load * -1 171 | for (i in 1:n.variables) 172 | if (sum(shared.load[i, ] * shared.load[i, ]) < 1) { 173 | unique.load[i, 1] <- (1 - sum(shared.load[i, ] * shared.load[i, ])) 174 | } else { 175 | unique.load[i, 1] <- 0 176 | } 177 | unique.load <- sqrt(unique.load) 178 | for (i in 1:n.variables) 179 | data[, i] <- (shared.comp %*% t(shared.load))[, i] + unique.comp[, i] * 180 | unique.load[i, 1] 181 | data <- apply(data, 2, scale) # standardizes each variable in the matrix 182 | for (i in 1:n.variables) { 183 | data <- data[sort.list(data[, i]), ] 184 | data[, i] <- distributions[, i] 185 | } 186 | data 187 | } 188 | 189 | ################################################################################ 190 | FactorAnalysis <- function(data, corr.matrix = FALSE, max.iteration = 50, 191 | n.factors = 0, corr.type = "pearson") { 192 | # Analyzes comparison data with known factorial structures 193 | # 194 | # Args: 195 | # data : Matrix to store the simulated data. 196 | # corr.matrix : Correlation matrix (default is FALSE) 197 | # max.iteration : Maximum number of iterations (scalar, default is 50). 198 | # n.factors : Number of factors (scalar, default is 0). 199 | # corr.type : Type of correlation (character, default is "pearson", 200 | # user can also call "spearman"). 201 | # 202 | # Returns: 203 | # $loadings : Factor loadings (vector, if one factor. matrix, if multiple 204 | # factors) 205 | # $factors : Number of factors (scalar). 206 | # 207 | data <- as.matrix(data) 208 | n.variables <- dim(data)[2] 209 | if (n.factors == 0) { 210 | n.factors <- n.variables 211 | determine <- TRUE 212 | } else { 213 | determine <- FALSE 214 | } 215 | if (!corr.matrix) { 216 | corr.matrix <- cor(data, method = corr.type) 217 | } else { 218 | corr.matrix <- data 219 | } 220 | criterion <- .001 221 | old.h2 <- rep(99, n.variables) 222 | h2 <- rep(0, n.variables) 223 | change <- 1 224 | iteration <- 0 225 | factor.loadings <- matrix(nrow = n.variables, ncol = n.factors) 226 | while ((change >= criterion) & (iteration < max.iteration)) { 227 | iteration <- iteration + 1 228 | eigenvalue <- eigen(corr.matrix) 229 | l <- sqrt(eigenvalue$values[1:n.factors]) 230 | for (i in 1:n.factors) 231 | factor.loadings[, i] <- eigenvalue$vectors[, i] * l[i] 232 | for (i in 1:n.variables) 233 | h2[i] <- sum(factor.loadings[i, ] * factor.loadings[i, ]) 234 | change <- max(abs(old.h2 - h2)) 235 | old.h2 <- h2 236 | diag(corr.matrix) <- h2 237 | } 238 | if (determine) n.factors <- sum(eigenvalue$values > 1) 239 | return(list(loadings = factor.loadings[, 1:n.factors], 240 | factors = n.factors)) 241 | } 242 | -------------------------------------------------------------------------------- /functions/ghpdf.txt: -------------------------------------------------------------------------------- 1 | # Code from Yuan Yan 2 | # Reference: 3 | # Yan, Yuan, and Marc G. Genton. ‘The Tukey G-and-h Distribution’. Significance 16, no. 3 (2019): 12–13. https://doi.org/10.1111/j.1740-9713.2019.01273.x. 4 | # 5 | 6 | gh_trans <- function(z,g=0.2,h=0.1){ 7 | if(g==0) 8 | x<-z*exp(h*z^2/2) 9 | else 10 | x<-1/g*(exp(g*z)-1)*exp(h*(z^2)/2) 11 | return(x) 12 | } 13 | 14 | #pdf: density function 15 | den_tukey <- function(y,g=0.2,h=0.1,xi=0,omega=1,u=0,sig=1){ 16 | y=(y-xi)/omega 17 | z=tukey_inv(y,g,h) 18 | # for(i in 1:length(y)){ 19 | # # z[i] <- uniroot(f,c(-10,10))$root 20 | # z[i]<-nleqslv(0,fn=function(x){tukey(x,g,h)-y[i]})$x 21 | # } 22 | dnorm(z,mean=u,sd=sig)/dtukey(z,g,h)/omega 23 | } 24 | 25 | #inverse TGH transformation 26 | tukey_inv <- function(y,g=0.2,h=0.1){ 27 | if(g==0 & h==0) 28 | z=y 29 | else if(g==0) 30 | z<-sign(y)*sqrt(lambert_W0(h*y^2)/h) #better & 143 times faster 31 | else if(h==0){ 32 | # if(g>0) 33 | # y=y[y>-1/g] 34 | # else 35 | # y=y[y<-1/g] 36 | z<-log(g*y+1)/g 37 | } 38 | else{ 39 | z=numeric(length(y)) 40 | for(i in 1:length(y)){ 41 | # z[i] <- uniroot(f,c(-10,10))$root 42 | z[i]<-nleqslv(0,fn=function(x){gh_trans(x,g,h)-y[i]})$x 43 | } 44 | } 45 | return(z) 46 | } 47 | 48 | dtukey <- function(z,g=0.2,h=0.1){ 49 | if(g==0) 50 | x<-exp(h*z^2/2)*(1+h*z^2) 51 | else 52 | x<-exp(g*z+h*z^2/2)+h*z/g*(exp(g*z)-1)*exp(h*(z^2)/2) 53 | return(x) 54 | } 55 | -------------------------------------------------------------------------------- /functions/theme_gar.txt: -------------------------------------------------------------------------------- 1 | theme_gar <- theme_bw() + 2 | theme(plot.title = element_text(size = 20), 3 | axis.title = element_text(size = 16), 4 | axis.text = element_text(size = 14, colour="black"), 5 | legend.key.width = unit(1.5,"cm"), 6 | legend.text = element_text(size = 16), 7 | legend.title = element_text(size = 18), 8 | strip.text = element_text(size=20, face="bold", colour="white"), 9 | strip.background = element_rect(colour="black", fill="grey40")) -------------------------------------------------------------------------------- /notrobust.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "The bootstrap is not robust" 3 | author: "Guillaume A. Rousselet" 4 | date: "`r Sys.Date()`" 5 | output: 6 | pdf_document: 7 | fig_caption: no 8 | number_sections: no 9 | toc: yes 10 | toc_depth: 2 11 | # github_document: 12 | # html_preview: yes 13 | # toc: yes 14 | # toc_depth: 2 15 | --- 16 | 17 | # Dependencies 18 | ```{r, message=FALSE, warning=FALSE} 19 | library(tibble) 20 | library(ggplot2) 21 | # library(cowplot) 22 | source("./functions/Rallfun-v40.txt") 23 | source("./functions/theme_gar.txt") 24 | ``` 25 | 26 | The bootstrap is sometimes described as a robust technique. 27 | In itself, it is not robust. A simple example can illustrate this lack of robustness: percentile bootstrap confidence intervals for the mean are not robust, because the mean is not a robust estimator of central tendency. 28 | 29 | # Generate data and compute confidence intervals 30 | ```{r} 31 | set.seed(21) 32 | n <- 10 33 | samp <- rnorm(n, 8, 2) 34 | samp <- c(samp, 17) 35 | samp_mat <- matrix(NA, nrow = 12, ncol = 7) 36 | ci_mean_t <- matrix(NA, nrow = 2, ncol = 7) 37 | mean_res <- vector(mode = "numeric", length = 7) 38 | median_res <- vector(mode = "numeric", length = 7) 39 | ci_mean_pb <- matrix(NA, nrow = 2, ncol = 7) 40 | ci_median_pb <- matrix(NA, nrow = 2, ncol = 7) 41 | ci_median_param <- matrix(NA, nrow = 2, ncol = 7) 42 | for(C in 1:7){ 43 | todo <- c(samp, 17+C^2) 44 | samp_mat[,C] <- todo 45 | mean_res[C] <- mean(todo) 46 | median_res[C] <- median(todo) 47 | ci_mean_t[,C] <- t.test(todo)$conf.int 48 | ci_mean_pb[,C] <- onesampb(todo, mean)$ci # default to nboot = 2000 49 | ci_median_pb[,C] <- onesampb(todo, median)$ci 50 | ci_median_param[,C] <- sint(todo) # parametric method for the median 51 | } 52 | ``` 53 | 54 | # Illustrate results: mean + standard CI 55 | ```{r} 56 | set.seed(777) # for reproducible jitter 57 | # raw data 58 | df <- tibble(res = as.vector(samp_mat), 59 | cond = factor(rep(1:7, each = 12))) 60 | # mean + confidence intervals 61 | df2 <- tibble(res = mean_res, 62 | cond = factor(1:7), 63 | ci_low = ci_mean_t[1,], 64 | ci_up = ci_mean_t[2,]) 65 | 66 | p <- ggplot(df, aes(x = cond, y = res)) + theme_gar + 67 | # scatterplots 68 | geom_jitter(shape = 21, width = .1, colour = 'black', fill = 'grey', size = 2, alpha = 0.5) + 69 | geom_hline(yintercept = ci_mean_t[1,1], linetype = 'dashed') + 70 | geom_hline(yintercept = ci_mean_t[2,1], linetype = 'dashed') + 71 | # confidence intervals 72 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 73 | width=.05, size=0.75) + 74 | geom_point(data = df2, aes(x=cond, y=res), size=3) + 75 | theme(panel.grid.minor.x = element_blank()) + 76 | labs(x = "Conditions", y = "Values") + 77 | ggtitle("Mean: standard CI") 78 | p 79 | pA <- p 80 | ``` 81 | 82 | # Illustrate results: mean + boot CI 83 | ```{r} 84 | set.seed(777) # for reproducible jitter 85 | # raw data 86 | df <- tibble(res = as.vector(samp_mat), 87 | cond = factor(rep(1:7, each = 12))) 88 | # mean + confidence intervals 89 | df2 <- tibble(res = mean_res, 90 | cond = factor(1:7), 91 | ci_low = ci_mean_pb[1,], 92 | ci_up = ci_mean_pb[2,]) 93 | 94 | p <- ggplot(df, aes(x = cond, y = res)) + theme_gar + 95 | # scatterplots 96 | geom_jitter(shape = 21, width = .1, colour = 'black', fill = 'grey', size = 2, alpha = 0.5) + 97 | geom_hline(yintercept = ci_mean_pb[1,1], linetype = 'dashed') + 98 | geom_hline(yintercept = ci_mean_pb[2,1], linetype = 'dashed') + 99 | # confidence intervals 100 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 101 | width=.05, size=0.75) + 102 | geom_point(data = df2, aes(x=cond, y=res), size=3) + 103 | theme(panel.grid.minor.x = element_blank()) + 104 | labs(x = "Conditions", y = "Values") + 105 | ggtitle("Mean: bootstrap CI") 106 | p 107 | pB <- p 108 | ``` 109 | 110 | # Illustrate results: median + boot CI 111 | 112 | ```{r} 113 | set.seed(777) # for reproducible jitter 114 | # raw data 115 | df <- tibble(res = as.vector(samp_mat), 116 | cond = factor(rep(1:7, each = 12))) 117 | # mean + confidence intervals 118 | df2 <- tibble(res = median_res, 119 | cond = factor(1:7), 120 | ci_low = ci_median_pb[1,], 121 | ci_up = ci_median_pb[2,]) 122 | 123 | p <- ggplot(df, aes(x = cond, y = res)) + theme_gar + 124 | # scatterplots 125 | geom_jitter(shape = 21, width = .1, colour = 'black', fill = 'grey', size = 2, alpha = 0.5) + 126 | geom_hline(yintercept = ci_median_pb[1,1], linetype = 'dashed') + 127 | geom_hline(yintercept = ci_median_pb[2,1], linetype = 'dashed') + 128 | # confidence intervals 129 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 130 | width=.05, size=0.75) + 131 | geom_point(data = df2, aes(x=cond, y=res), size=3) + 132 | theme(panel.grid.minor.x = element_blank()) + 133 | labs(x = "Conditions", y = "Values") + 134 | ggtitle("Median: bootstrap CI") 135 | p 136 | pC <- p 137 | ``` 138 | 139 | # Illustrate results: median + parametric CI 140 | ```{r} 141 | set.seed(777) # for reproducible jitter 142 | # raw data 143 | df <- tibble(res = as.vector(samp_mat), 144 | cond = factor(rep(1:7, each = 12))) 145 | # mean + confidence intervals 146 | df2 <- tibble(res = median_res, 147 | cond = factor(1:7), 148 | ci_low = ci_median_param[1,], 149 | ci_up = ci_median_param[2,]) 150 | 151 | p <- ggplot(df, aes(x = cond, y = res)) + theme_gar + 152 | # scatterplots 153 | geom_jitter(shape = 21, width = .1, colour = 'black', fill = 'grey', size = 2, alpha = 0.5) + 154 | geom_hline(yintercept = ci_median_pb[1,1], linetype = 'dashed') + 155 | geom_hline(yintercept = ci_median_pb[2,1], linetype = 'dashed') + 156 | # confidence intervals 157 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 158 | width=.05, size=0.75) + 159 | geom_point(data = df2, aes(x=cond, y=res), size=3) + 160 | theme(panel.grid.minor.x = element_blank()) + 161 | labs(x = "Conditions", y = "Values") + 162 | ggtitle("Median: parametric CI") 163 | p 164 | # pC <- p 165 | ``` 166 | 167 | # Summary figure 168 | ```{r, eval = FALSE} 169 | cowplot::plot_grid(pA, pB, pC, 170 | labels = c("A", "B", "C"), 171 | ncol = 3, 172 | nrow = 1, 173 | rel_widths = c(1, 1, 1), 174 | label_size = 20, 175 | hjust = -0.5, 176 | scale=.95, 177 | align = "h") 178 | 179 | # save figure 180 | ggsave(filename=('./figures/figure_notrobust.pdf'),width=10,height=5) 181 | ggsave(filename=('./figures/figure6.pdf'),width=10,height=5) 182 | ``` -------------------------------------------------------------------------------- /pb.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Percentile bootstrap" 3 | author: "Guillaume A. Rousselet" 4 | date: "`r Sys.Date()`" 5 | output: 6 | pdf_document: 7 | fig_caption: no 8 | number_sections: no 9 | toc: yes 10 | toc_depth: 2 11 | # github_document: 12 | # html_preview: yes 13 | # toc: yes 14 | # toc_depth: 2 15 | --- 16 | 17 | # Dependencies 18 | ```{r, message=FALSE, warning=FALSE} 19 | library(tibble) 20 | library(ggplot2) 21 | # library(cowplot) 22 | # library(HDInterval) 23 | source("./functions/theme_gar.txt") 24 | ``` 25 | 26 | ```{r} 27 | sessionInfo() 28 | ``` 29 | 30 | # Bootstrap implementation 31 | 32 | Let's look at how the bootstrap is implemented in the one-sample case. 33 | See an interactive demo [here](https://seeing-theory.brown.edu/frequentist-inference/index.html#section2). 34 | 35 | ## Sampling with replacement 36 | 37 | Test the `sample()` function. 38 | Let say our sample is a sequence of integers. 39 | We sample with replacement from that sequence of numbers. 40 | Execute chunk several times to see what happens. 41 | ```{r} 42 | n <- 10 # sample size 43 | samp <- 1:n 44 | boot.samp <- sample(samp, n, replace = TRUE) # sample with replacement 45 | boot.samp 46 | ``` 47 | 48 | Generate 3 bootstrap samples for the article: 49 | ```{r} 50 | set.seed(21) # reproducible example 51 | n <- 6 52 | samp <- 1:n 53 | nboot <- 3 54 | matrix(sample(samp, n*nboot, replace = TRUE), nrow = nboot) 55 | ``` 56 | 57 | ## Loop 58 | ```{r} 59 | set.seed(21) # reproducible results 60 | n <- 20 # sample size 61 | samp <- rnorm(n) # get normal sample 62 | nboot <- 1000 # number of bootstrap samples 63 | 64 | # declare vector of results 65 | boot.m <- vector(mode = "numeric", length = nboot) # save means 66 | boot.tm <- vector(mode = "numeric", length = nboot) # save trimmed means 67 | boot.md <- vector(mode = "numeric", length = nboot) # save medians 68 | 69 | for(B in 1:nboot){ 70 | boot.samp <- sample(samp, n, replace = TRUE) # sample with replacement 71 | boot.m[B] <- mean(boot.samp) 72 | boot.tm[B] <- mean(boot.samp, trim = 0.2) 73 | boot.md[B] <- median(boot.samp) 74 | } 75 | 76 | samp.m <- mean(samp) 77 | samp.tm <- mean(samp, trim = 0.2) 78 | samp.md <- median(samp) 79 | samp.m 80 | samp.tm 81 | samp.md 82 | ``` 83 | 84 | ### Plot original results 85 | ```{r, fig.width=3} 86 | set.seed(1) 87 | df <- tibble(cond = factor(rep(1,n)), 88 | res = samp) 89 | ggplot(df, aes(x = cond, y = res)) + theme_linedraw() + 90 | geom_jitter(width = 0.1, alpha = 0.3) + 91 | theme(axis.text.x = element_blank(), 92 | axis.ticks = element_blank()) + 93 | scale_x_discrete(name ="") + 94 | # stat_summary(fun.y=median, geom="line") 95 | geom_segment(aes(x = 0.9, y = samp.m, xend = 1.1, yend = samp.m)) + 96 | geom_segment(aes(x = 0.9, y = samp.md, xend = 1.1, yend = samp.md), colour = "orange") + 97 | geom_segment(aes(x = 0.9, y = samp.tm, xend = 1.1, yend = samp.tm), colour = "blue") + 98 | annotate("text", x = 1.185, y = samp.m-0.1, label = 'bold("Mean")', size = 4, parse = TRUE) + 99 | annotate("text", x = 1.2, y = samp.md+0.1, label = 'bold("Median")', size = 4, colour = "orange", parse = TRUE) + 100 | annotate("text", x = 0.75, y = samp.tm, label = 'bold("Trimmed\nmean")', size = 4, colour = "blue", parse = TRUE) 101 | ``` 102 | 103 | ### Plot distributions of bootstrap estimates 104 | 105 | The distribution of bootstrap medians is multi-modal and very different from that of the mean and the 20% trimmed mean. To compute confidence intervals for the median in the one-sample case, it is recommended to use the parametric approach implemented in the function `sint()` available in the file *./functions/Rallfun-v40.txt* in this repository. 106 | 107 | ```{r} 108 | df <- tibble(res = c(boot.m, boot.tm, boot.md), 109 | est = factor(c(rep("Mean",nboot), rep("Trimmed mean",nboot), rep("Median",nboot))) 110 | ) 111 | ggplot(df, aes(x = res, colour = est)) + theme_gar + 112 | geom_line(aes(y = ..density..), stat = "density", size = 1) + 113 | labs(x = "Bootstrap estimates", y = "Density") + 114 | theme(legend.position = "bottom", 115 | axis.text.y = element_blank(), 116 | axis.ticks.y = element_blank()) 117 | # ggtitle("Boostrap samples") 118 | ``` 119 | 120 | ## Matrix 121 | ```{r} 122 | set.seed(21) # reproducible results 123 | n <- 20 # sample size 124 | samp <- rnorm(n) # get normal sample 125 | nboot <- 1000 # number of bootstrap samples 126 | # sample with replacement + reoganise into a matrix 127 | boot.samp <- matrix(sample(samp, n*nboot, replace = TRUE), nrow = nboot) 128 | boot.m <- apply(boot.samp, 1, mean) 129 | boot.md <- apply(boot.samp, 1, median) 130 | ``` 131 | 132 | ## Functions 133 | 134 | Examples of R packages for bootstrap inferences: 135 | 136 | - [`boot`](https://www.statmethods.net/advstats/bootstrapping.html) 137 | 138 | - [`resample`](https://cran.r-project.org/web/packages/resample/index.html) 139 | 140 | - [`bootstrap`](https://cran.r-project.org/web/packages/bootstrap/index.html) 141 | 142 | - [`WRS2`](https://cran.r-project.org/web/packages/WRS2/index.html) 143 | 144 | Functions from [Rand Wilcox](https://dornsife.usc.edu/labs/rwilcox/software/). 145 | 146 | ```{r, eval = FALSE} 147 | # TO USE THE FUNCTIONS, FIRST USE THE SOURCE COMMAND: 148 | # source('./functions/Rallfun-v40.txt') 149 | 150 | set.seed(1) # reproducible results 151 | onesampb(samp, est=mean, alpha=0.1, nboot=1000, SEED = FALSE, nv = 0) 152 | # est = estimator, could be var, mad, to use a trimmed mean, add argument trim = 0.2 153 | onesampb(samp, est=mean, alpha=0.1, nboot=1000, SEED = FALSE, nv = 0, trim = 0.1) 154 | # nv = null value for NHST 155 | # always set SEED to FALSE otherwise the function always returns the same results for a given input. 156 | # the only way to really understand the code is to look at it: edit(onesampb) 157 | 158 | # for inferences on trimmed means only: 159 | trimpb() 160 | 161 | # for inferences on the Harrell-Davis quantile estimator (default q=0.5 = median): 162 | hdpb() 163 | ``` 164 | 165 | # Generate sample from lognormal distribution 166 | 167 | ## Illustrate population 168 | 169 | Lognormal distribution from the which the sample is taken. 170 | ```{r, fig.height=3} 171 | x <- seq(0, 7, 0.001) 172 | y <- dlnorm(x) 173 | 174 | df <- tibble(x = x, y = y) 175 | 176 | p <- ggplot(df, aes(x = x, y = y)) + theme_gar + 177 | geom_line(size = 1.5, colour = "orange") + 178 | labs(x = "Values", y = "Density") + 179 | theme(axis.text.y = element_blank(), 180 | axis.ticks.y = element_blank()) 181 | p 182 | ``` 183 | 184 | ## Get sample 185 | ```{r} 186 | set.seed(21) # reproducible example 187 | n <- 30 # sample size 188 | meanlog <- 0 189 | sdlog <- 1 190 | samp <- rlnorm(n, meanlog = meanlog, sdlog = sdlog) # random sample 191 | ``` 192 | 193 | ## Illustrate sample 194 | ```{r, fig.width=2} 195 | set.seed(21) # for reproducible jitter 196 | # raw data 197 | df <- tibble(pc = samp, 198 | cond = rep(1, n)) 199 | 200 | p <- ggplot(data = df, aes(x = cond, y = pc)) + theme_gar + 201 | # scatterplots 202 | geom_jitter(width = .05, alpha = 0.5, 203 | size = 3, shape = 21, fill = "grey", colour = "black") + 204 | theme(axis.ticks.x = element_blank(), 205 | axis.text.x = element_blank(), 206 | axis.title.x = element_blank()) + 207 | scale_x_continuous(breaks = 1) + 208 | # mean 209 | geom_segment(aes(x = 0.9, xend = 1.1, 210 | y = mean(samp), yend = mean(samp))) + 211 | # median 212 | geom_segment(aes(x = 0.9, xend = 1.1, 213 | y = median(samp), yend = median(samp)), 214 | linetype = 'longdash', lineend = 'round') + 215 | theme(panel.grid.minor.x = element_blank()) + 216 | labs(y = "Values") 217 | p 218 | pA <- p 219 | ``` 220 | 221 | Sample mean = `r round(mean(samp), digits=2)` 222 | 223 | # Standard confidence interval 224 | 225 | ## T-value: define function 226 | ```{r} 227 | # mean minus null value divided by SEM 228 | tval <- function(x,nv){ 229 | tval <- (mean(x) - nv) / sqrt(var(x)/length(x)) 230 | tval 231 | } 232 | ``` 233 | 234 | ## P value 235 | Let say our hypothesis is mu = 2. 236 | 237 | First, let's use the function: 238 | ```{r} 239 | mu <- 2 # null hypothesis 240 | t.test(samp, mu = mu)$p.value 241 | ``` 242 | 243 | Then check the formula: 244 | ```{r, warning=FALSE} 245 | dof <- length(samp)-1 # degrees of freedom 246 | 2 * pt(abs(tval(samp, mu)), dof, lower.tail = FALSE) 247 | ``` 248 | 249 | ## Illustrate theoretical *T* distribution 250 | ```{r, warning = FALSE} 251 | alpha <- 0.05 252 | 253 | p <- ggplot(data.frame(x = c(-5, 5)), aes(x)) + theme_gar + 254 | labs(y = "Density") + 255 | theme(axis.text = element_text(size = 14), 256 | axis.title = element_text(size = 16), 257 | plot.title = element_text(size=20), 258 | axis.text.y = element_blank(), 259 | axis.ticks.y = element_blank()) + 260 | labs(x = "T values") + 261 | ggtitle(substitute(paste(italic(T)," distribution with ",dof," degrees of freedom"), list(dof=dof))) + 262 | # area under the curve -> p value 263 | # https://christianburkhart.de/blog/area_under_the_curve/ 264 | stat_function(fun = dt, 265 | geom = "area", 266 | xlim = c(-5, tval(samp, mu)), 267 | alpha = .2, 268 | fill = "red", 269 | args = list(df = dof)) + 270 | # theoretical cut-off t value for alpha = 0.05 271 | geom_segment(x = qt(alpha/2, dof), 272 | xend = qt(alpha/2, dof), 273 | y = 0, 274 | yend = dt(qt(alpha/2, dof), dof), 275 | size = 1, 276 | colour = "red") + 277 | annotate(geom = "label", x = -2.7, y = 0.07, size = 7, colour = "red", 278 | label = expression(paste("T"[crit]))) + # italic("t") 279 | # observed t value 280 | # geom_vline(xintercept = abs(tval(samp, mu))) 281 | geom_segment(x = tval(samp, mu), 282 | xend = tval(samp, mu), 283 | y = 0, 284 | yend = dt(tval(samp, mu), dof), 285 | size = 1, 286 | colour = "black", 287 | linetype = "dashed") + 288 | annotate(geom = "label", x = -1.8, y = 0.22, size = 7, colour = "black", 289 | label = expression(paste("T"[obs]))) + # paste("obs.", italic("t"))) 290 | # Plot density function 291 | stat_function(fun = dt, args = list(df = dof), 292 | size = 1) + 293 | # P value 294 | geom_segment(x = -2.8, xend = -1.5, 295 | y = 0.15, yend = 0.07, 296 | arrow = arrow(type = "closed", 297 | length = unit(0.25, "cm")), 298 | colour = "grey30", 299 | size = 1) + 300 | annotate(geom = "label", x = -3, y = 0.15, size = 7, 301 | colour = "white", fill = "red", fontface = "bold", 302 | label = "P value / 2") + 303 | annotate(geom = "label", x = 2.8, y = 0.35, size = 7, 304 | colour = "white", fill = "black", fontface = "bold", 305 | label = expression(paste(bold("CI = m \u00B1 T"[crit]), 306 | bold(" * SEM")))) 307 | # ggsave(filename = './theor_t.pdf') 308 | pB <- p 309 | p 310 | ``` 311 | 312 | ## Compute confidence interval 313 | 314 | ### Using built-in function 315 | ```{r} 316 | ci.t <- t.test(samp, mu = mu)$conf.int 317 | ci.t 318 | ``` 319 | 320 | ### Formula 321 | ```{r} 322 | alpha <- 0.05 # alpha level 323 | df <- n-1 # degrees of freedom 324 | samp.m <- mean(samp) # sample mean 325 | sem <- sd(samp) / sqrt(n) # sample estimate of the standard error of the mean 326 | ci <- vector("numeric",2) 327 | ci[1] <- samp.m - qt(1-alpha/2, df) * sem # lower bound of CI 328 | ci[2] <- samp.m + qt(1-alpha/2, df) * sem # upper bound of CI 329 | ci 330 | ``` 331 | 332 | # Generate bootstrap samples 333 | ```{r} 334 | set.seed(666) 335 | nboot <- 5000 336 | bootsamp <- matrix(sample(samp, nboot * n, replace = TRUE), nrow = nboot) 337 | ``` 338 | 339 | ## Illustrate a few bootstrap samples 340 | 341 | For each sample we superimpose the (bootstrap) median. 342 | 343 | ```{r, fig.height=6} 344 | nb <- 20 345 | df <- tibble(res = as.vector(bootsamp[1:nb,]), 346 | bootid = rep(1:nb, each = n)) 347 | 348 | df2 <- tibble(bootid = 1:nb, 349 | res = apply(bootsamp[1:nb,],1,mean)) 350 | 351 | p <- ggplot(df, aes(y = bootid, x = res)) + theme_gar + 352 | geom_point(alpha = 0.3, position = position_jitter(height=0.1)) + 353 | labs(x = "Values", y = "Bootstrap samples") + 354 | geom_segment(data = df2, aes(x = res, xend = res, 355 | y = bootid - 0.4, yend = bootid + 0.4), 356 | size = 1.5) + 357 | theme(panel.grid.minor = element_blank()) + 358 | scale_y_continuous(breaks = seq(1, 20, 1), expand = expand_scale(mult = c(0.01, 0.01))) 359 | pC <- p 360 | p 361 | ``` 362 | 363 | ## Illustrate bootstrap sampling distribution of the mean 364 | 365 | Compute confidence interval and other quantities. 366 | 367 | Here and in the rest of the tutorial, we compute bootstrap CI using `quantile(type = 6)` of the bootstrap distribution. This is recommended in this article: 368 | 369 | Hesterberg, Tim C. “What Teachers Should Know About the Bootstrap: Resampling in the Undergraduate Statistics Curriculum.” The American Statistician 69, no. 4 (October 2, 2015): 371–86. https://doi.org/10.1080/00031305.2015.1089789. 370 | 371 | Rand Wilcox uses a different approach. See for instance `help(onesampb)`, in which the bounds of the CI are defined using the bootstrap distribution `bvec`, `alpha` (say = 0.05) and `nboot` (say = 1,000 bootstrap samples): 372 | 373 | `low <- round((alpha/2)*nboot)` 374 | `up <- nboot-low` 375 | `low <- low+1` 376 | `ci_lower_bound <- bvec[low]` 377 | `ci_upper_bound <- bvec[up]` 378 | 379 | In practice it is unclear if these choices make any difference. What we know is that with `nboot` large enough, the choice of quantile method should make virtually no difference. 380 | 381 | ```{r} 382 | # bootstrap means 383 | bootm <- apply(bootsamp, 1, mean) 384 | # confidence interval 385 | bootci <- quantile(bootm, probs = c(0.025, 0.975), type = 6) 386 | # bootstrap estimation of the standard error 387 | bootsamp.sd <- sd(bootsamp) 388 | # P value 389 | pv <- mean(bootm < mu) # + .5*mean(bootsamp==mu) 390 | pv <- 2 * min(c(pv, 1-pv)) 391 | ``` 392 | 393 | Bootstrap 95% CI = [`r round(bootci[1], digits=2)`, `r round(bootci[2], digits=2)`] 394 | Bootstrap estimate of the SEM = `r round(bootsamp.sd, digits=2)` 395 | Bootstrap *P* value (hypothesis = 2) = `r pv` 396 | 397 | Alternatively, we could compute a highest density interval (HDI): 398 | ```{r, eval = FALSE} 399 | require(HDInterval) 400 | boothdi <- HDInterval::hdi(bootm) 401 | ``` 402 | 403 | We illustrate the distribution of the bootstrap samples, from which we derive four elements: 404 | 405 | - confidence/compatibility interval 406 | 407 | - p value 408 | 409 | - bootstrap estimate of the standard error (SE) 410 | 411 | - bootstrap estimate of bias 412 | 413 | The distribution is also our best estimate of the shape the sampling distribution of the median, given the data and our model. 414 | 415 | ### Make data frame 416 | ```{r} 417 | df <- as_tibble(with(density(bootm),data.frame(x,y))) 418 | 419 | df.pv <- tibble(x = df$x[df$x>mu], 420 | y = df$y[df$x>mu]) 421 | ``` 422 | 423 | ### Figure 424 | ```{r} 425 | p <- ggplot(df, aes(x = x, y = y)) + theme_gar + 426 | # geom_line(stat = "density") + 427 | labs(x = "Bootstrap means", 428 | y = "Density") + 429 | theme(axis.text.y = element_blank(), 430 | axis.ticks.y = element_blank()) + 431 | # P value 432 | geom_area(data = df.pv, 433 | aes(x = x, y = y), 434 | fill = "red", alpha = .2) + 435 | # density 436 | geom_line(data = df, size = 1) + 437 | # Null value 438 | geom_segment(x = mu, 439 | xend = mu, 440 | y = 0, 441 | yend = df$y[which.min(abs(df$x-mu))], 442 | size = 1, 443 | colour = "black", 444 | linetype = "dashed") + 445 | # confidence interval ---------------------- 446 | geom_segment(x = bootci[1], xend = bootci[2], 447 | y = 0, yend = 0, 448 | lineend = "round", size = 2, colour = "black") + 449 | annotate(geom = "label", x = bootci[1]+0.15, y = 0.07, size = 5, 450 | colour = "white", fill = "black", fontface = "bold", 451 | label = paste("L = ", round(bootci[1], digits = 2))) + 452 | annotate(geom = "label", x = bootci[2]-0.15, y = 0.07, size = 5, 453 | colour = "white", fill = "black", fontface = "bold", 454 | label = paste("U = ", round(bootci[2], digits = 2))) + 455 | # sample mean: vertical line + label 456 | geom_vline(xintercept = mean(samp), 457 | linetype = 'solid') + 458 | annotate(geom = "label", x = 2.3, y = 1.2, size = 7, 459 | colour = "white", fill = "black", fontface = "bold", 460 | label = paste("Sample mean = ", round(samp.m, digits = 2))) + 461 | # vertical line marking bootstrap mean 462 | # geom_vline(xintercept = mean(bootsamp), 463 | # linetype = 'solid') 464 | # SEM label + segment 465 | annotate(geom = "label", x = 2.5, y = 0.7, size = 7, 466 | colour = "white", fill = "grey", fontface = "bold", 467 | label = paste("SD =",round(bootsamp.sd, digits = 2),"= SEM")) + 468 | geom_segment(x = 1.25, 469 | xend = 1.85, 470 | y = 0.7, yend = 0.7, 471 | arrow = arrow(type = "closed", 472 | length = unit(0.25, "cm"), 473 | ends = "both"), 474 | colour = "grey", size = 1) + 475 | # P value 476 | geom_segment(x = 2.6, xend = 2.1, y = 0.4, yend = 0.2, 477 | arrow = arrow(type = "closed", 478 | length = unit(0.25, "cm")), 479 | colour = "grey30", size = 1) + 480 | annotate(geom = "label", x = 2.6, y = 0.4, size = 7, 481 | colour = "white", fill = "red", fontface = "bold", 482 | label = "P value / 2") 483 | 484 | p 485 | pD <- p 486 | ``` 487 | 488 | # Summary figure 489 | ```{r, eval = FALSE} 490 | p1 <- cowplot::plot_grid(pA, pB, 491 | labels = c("A", "B"), 492 | ncol = 2, 493 | nrow = 1, 494 | rel_widths = c(1, 4), 495 | label_size = 20, 496 | hjust = -0.5, 497 | scale=.95, 498 | align = "h") 499 | 500 | p2 <- cowplot::plot_grid(pC, pD, 501 | labels = c("C", "D"), 502 | ncol = 2, 503 | nrow = 1, 504 | rel_widths = c(2, 3), 505 | label_size = 20, 506 | hjust = -0.5, 507 | scale=.95, 508 | align = "h") 509 | 510 | cowplot::plot_grid(p1, p2, 511 | labels = c("", ""), 512 | ncol = 1, 513 | nrow = 2, 514 | rel_heights = c(1, 1), 515 | label_size = 20, 516 | hjust = -0.5, 517 | scale=.95, 518 | align = "h") 519 | 520 | # save figure 521 | ggsave(filename=('./figures/figure_pb.pdf'),width=15,height=15) 522 | ggsave(filename=('./figures/figure1.pdf'),width=15,height=15) 523 | ``` -------------------------------------------------------------------------------- /pc.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Percentage correct example" 3 | author: "Guillaume A. Rousselet" 4 | date: "`r Sys.Date()`" 5 | output: 6 | pdf_document: 7 | fig_caption: no 8 | number_sections: no 9 | toc: yes 10 | toc_depth: 2 11 | # github_document: 12 | # html_preview: yes 13 | # toc: yes 14 | # toc_depth: 2 15 | --- 16 | 17 | # Dependencies 18 | ```{r, message=FALSE, warning=FALSE} 19 | library(tibble) 20 | library(ggplot2) 21 | # library(cowplot) 22 | source("./functions/theme_gar.txt") 23 | source("./functions/Rallfun-v40.txt") 24 | ``` 25 | 26 | After taking the mean across trials, correct/incorrect data are often wrongly assumed to be normally distributed. Instead, these data are always positive and bounded, and are better modelled using a beta distribution (Kruschke, 2014). Similarly, standard methods are inappropriate for ordinal data, like Likert scale results, which are too often treated as metric (Bürkner & Vuorre, 2019; Liddell & Kruschke, 2018). 27 | 28 | # Make data and compute confidence intervals 29 | ```{r} 30 | set.seed(6666) 31 | n <- 15 32 | norm_samp <- rnorm(n, mean = 75, sd = 10) 33 | skew_samp <- rbeta(n, 10, 0.2) * 100 34 | skew_samp2 <- sort(skew_samp) 35 | skew_samp2[1] <- 60 36 | # hist(rbeta(10000,10,0.2)*100, 50) 37 | 38 | norm_stci <- t.test(norm_samp, conf.level = 0.95)$conf.int 39 | skew_stci <- t.test(skew_samp, conf.level = 0.95)$conf.int 40 | skew_stci2 <- t.test(skew_samp2, conf.level = 0.95)$conf.int 41 | 42 | norm_pbci <- onesampb(norm_samp, est = mean)$ci 43 | skew_pbci <- onesampb(skew_samp, est = mean)$ci 44 | skew_pbci2 <- onesampb(skew_samp2, est = mean)$ci 45 | ``` 46 | 47 | # Illustration: standard confidence interval 48 | ```{r} 49 | set.seed(21) # for reproducible jitter 50 | # raw data 51 | df <- tibble(pc = c(norm_samp, skew_samp, skew_samp2), 52 | cond = rep(1:3, each = n)) 53 | # mean + confidence intervals 54 | df2 <- tibble(pc = c(mean(norm_samp), mean(skew_samp), mean(skew_samp2)), 55 | cond = 1:3, 56 | ci_low = c(norm_stci[1], skew_stci[1], skew_stci2[1]), 57 | ci_up = c(norm_stci[2], skew_stci[2], skew_stci2[2])) 58 | 59 | p <- ggplot(data = df, aes(x = cond, y = pc)) + theme_gar + 60 | # scatterplots 61 | geom_jitter(width = .1, alpha = 0.5, 62 | size = 3, shape = 21, fill = "grey", colour = "black") + 63 | scale_x_continuous(breaks = c(1,2,3)) + 64 | # confidence intervals 65 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 66 | width=.05, size=0.75) + 67 | geom_point(data = df2, aes(x=cond, y=pc), size=3) + 68 | theme(panel.grid.minor.x = element_blank()) + 69 | labs(x = "Conditions", y = "Percent correct") + 70 | ggtitle("Standard confidence intervals") 71 | p 72 | pA <- p 73 | ``` 74 | 75 | Sample means: 76 | 77 | - Condition 1 = `r round(mean(norm_samp), digits=1)` 78 | 79 | - Condition 2 = `r round(mean(skew_samp), digits=1)` 80 | 81 | - Condition 3 = `r round(mean(skew_samp2), digits=1)` 82 | 83 | # Illustration: bootstrap confidence interval 84 | ```{r} 85 | set.seed(21) # for reproducible jitter 86 | # raw data 87 | df <- tibble(pc = c(norm_samp, skew_samp, skew_samp2), 88 | cond = rep(1:3, each = n)) 89 | # mean + confidence intervals 90 | df2 <- tibble(pc = c(mean(norm_samp), mean(skew_samp), mean(skew_samp2)), 91 | cond = 1:3, 92 | ci_low = c(norm_pbci[1], skew_pbci[1], skew_pbci2[1]), 93 | ci_up = c(norm_pbci[2], skew_pbci[2], skew_pbci2[2])) 94 | 95 | p <- ggplot(data = df, aes(x = cond, y = pc)) + theme_gar + 96 | # scatterplots 97 | geom_jitter(width = .1, alpha = 0.5, 98 | size = 3, shape = 21, fill = "grey", colour = "black") + 99 | scale_x_continuous(breaks = c(1,2,3)) + 100 | # confidence intervals 101 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 102 | width=.05, size=0.75) + 103 | geom_point(data = df2, aes(x=cond, y=pc), size=3) + 104 | theme(panel.grid.minor.x = element_blank()) + 105 | labs(x = "Conditions", y = "Percent correct") + 106 | ggtitle("Bootstrap confidence intervals") 107 | p 108 | pB <- p 109 | ``` 110 | 111 | # Summary figure 112 | ```{r, eval = FALSE} 113 | cowplot::plot_grid(pA, pB, 114 | labels = c("A", "B"), 115 | ncol = 2, 116 | nrow = 1, 117 | rel_widths = c(1, 1), 118 | label_size = 20, 119 | hjust = -0.5, 120 | scale=.95, 121 | align = "h") 122 | 123 | # save figure 124 | ggsave(filename=('./figures/figure_pc.pdf'),width=10,height=5) 125 | ggsave(filename=('./figures/figure2.pdf'),width=10,height=5) 126 | ``` 127 | 128 | # References 129 | Bürkner, Paul-Christian, and Matti Vuorre. “Ordinal Regression Models in Psychology: A Tutorial.” Advances in Methods and Practices in Psychological Science 2, no. 1 (March 1, 2019): 77–101. https://doi.org/10.1177/2515245918823199. 130 | 131 | Kruschke, John. Doing Bayesian Data Analysis - 2nd Edition. Academic Press, 2014. https://www.elsevier.com/books/doing-bayesian-data-analysis/kruschke/978-0-12-405888-0. 132 | 133 | Liddell, Torrin M., and John K. Kruschke. “Analyzing Ordinal Data with Metric Models: What Could Possibly Go Wrong?” Journal of Experimental Social Psychology 79 (November 1, 2018): 328–48. https://doi.org/10.1016/j.jesp.2018.08.009. 134 | -------------------------------------------------------------------------------- /sampdist.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Bootstrap sampling distributions" 3 | author: "Guillaume A. Rousselet" 4 | date: "`r Sys.Date()`" 5 | output: 6 | pdf_document: 7 | fig_caption: no 8 | number_sections: no 9 | toc: yes 10 | toc_depth: 2 11 | # github_document: 12 | # html_preview: yes 13 | # toc: yes 14 | # toc_depth: 2 15 | --- 16 | 17 | # Dependencies 18 | ```{r, message=FALSE, warning=FALSE} 19 | library(tibble) 20 | library(ggplot2) 21 | # library(cowplot) 22 | source("./functions/theme_gar.txt") 23 | # source("./functions/Rallfun-v40.txt") 24 | ``` 25 | 26 | ```{r} 27 | sessionInfo() 28 | ``` 29 | 30 | # Example 1: samples from lognormal distribution 31 | 32 | We sample from a lognormal distribution, because, you know, life is [lognormal](https://stat.ethz.ch/~stahel/lognormal/lnboard/brochure.html). 33 | 34 | ## Illustrate population 35 | For comparison, show normal distribution with the same mean and sd as the default lognormal (meanlog = 0, sdlog = 1). The [mean of the lognormal distribution](https://en.wikipedia.org/wiki/Log-normal_distribution) with meanlog = 0 and sdlog = 1 is `exp(meanlog + sdlog^2/2)` = exp(1/2). 36 | The variance is `(exp(sdlog^2)-1) * exp(2*meanlog + sdlog^2)` = (exp(1)-1) * exp(1). 37 | 38 | ### Check that the two distributions have the same mean and variance. 39 | ```{r} 40 | set.seed(777) 41 | pop1 <- rnorm(1000000, 42 | mean = exp(1/2), 43 | sd = sqrt((exp(1)-1) * exp(1)) 44 | ) 45 | #mean(pop1) 46 | #var(pop1) 47 | 48 | pop2 <- rlnorm(1000000, meanlog = 0, sdlog = 1) 49 | #mean(pop2) 50 | #var(pop2) 51 | 52 | pop.tm <- mean(pop2, trim = 0.2) # save population trimmed mean for figure 53 | ``` 54 | Normal population: 55 | 56 | - Mean = `r round(mean(pop1), digits=2)` 57 | 58 | - Variance = `r round(var(pop1), digits=2)` 59 | 60 | Lognormal population: 61 | 62 | - Mean = `r round(mean(pop2), digits=2)` 63 | 64 | - Variance = `r round(var(pop2), digits=2)` 65 | 66 | 67 | ### Make figure 68 | ```{r, fig.height=4} 69 | x.n <- seq(-5, 7, 0.001) 70 | x.ln <- seq(0, 7, 0.001) 71 | 72 | y.n <- dnorm(x.n, mean = exp(1/2), sd = sqrt((exp(1)-1) * exp(1))) 73 | y.ln <- dlnorm(x.ln) 74 | 75 | df <- tibble(x = c(x.n, x.ln), 76 | y = c(y.n, y.ln), 77 | dist = c(rep("Normal", length(x.n)), rep("Lognormal", length(x.ln))) 78 | ) 79 | 80 | p <- ggplot(df, aes(x = x, y = y, colour = dist)) + theme_gar + 81 | geom_line(size = 1.5) + 82 | labs(x = "Values", y = "Density") + 83 | scale_colour_manual(name = "", values = c("Lognormal" = "orange", 84 | "Normal" = "purple")) + 85 | theme(legend.position = c(0.15, 0.86), 86 | axis.text.y = element_blank(), 87 | axis.ticks.y = element_blank()) 88 | p 89 | ``` 90 | 91 | ### Save figure 92 | ```{r, eval = FALSE} 93 | # save figure 94 | ggsave(filename=('./figures/figure_lognormal_pop.pdf'),width=7,height=5) 95 | ggsave(filename=('./figures/figure3.pdf'),width=7,height=5) 96 | ``` 97 | 98 | ## Illustrate sampling distributions 99 | We draw 50,000 samples for different sizes from our population. 100 | Each of these samples is like an experiment. For each experiment, we compute a statistics of interest, here the 20% trimmed mean. In each plot, the vertical dashed line marks the population 20% trimmed mean that we are trying to estimate. 101 | 102 | ### n = 20 103 | ```{r, fig.height=3} 104 | # set.seed(777) 105 | n <- 20 # sample size 106 | nsamp <- 50000 # number of samples (experiments) 107 | # sampling distribution of the mean 108 | dist.samp <- apply(matrix(rlnorm(n*nsamp), nrow = nsamp), 1, mean, trim = 0.2) 109 | v <- enframe(dist.samp, name = NULL) 110 | 111 | p <- ggplot(v, aes(x = value)) + theme_gar + 112 | geom_vline(xintercept = pop.tm, linetype = 'longdash', colour = "black") + 113 | geom_line(stat = 'density', size = 1) + 114 | theme(plot.margin = unit(c(0, 0, 0.5, 0), "cm"), 115 | axis.text.y = element_blank(), 116 | axis.ticks.y = element_blank()) + 117 | labs(x = "Sample trimmed means", y = "Density") + 118 | ggtitle("Sampling distribution: n = 20") + 119 | coord_cartesian(xlim = c(0, 4)) 120 | p.sampdist1 <- p 121 | p 122 | ``` 123 | 124 | ### n = 30 125 | ```{r, fig.height=3} 126 | # set.seed(777) 127 | n <- 30 # sample size 128 | nsamp <- 50000 # number of samples (experiments) 129 | # sampling distribution of the mean 130 | dist.samp <- apply(matrix(rlnorm(n*nsamp), nrow = nsamp), 1, mean, trim = 0.2) 131 | v <- enframe(dist.samp, name = NULL) 132 | 133 | p <- ggplot(v, aes(x = value)) + theme_gar + 134 | geom_vline(xintercept = pop.tm, linetype = 'longdash', colour = "black") + 135 | geom_line(stat = 'density', size = 1) + 136 | theme(plot.margin = unit(c(0, 0, 0.5, 0), "cm"), 137 | axis.text.y = element_blank(), 138 | axis.ticks.y = element_blank()) + 139 | labs(x = "Sample trimmed means", y = "Density") + 140 | coord_cartesian(xlim = c(0, 4)) + 141 | ggtitle("Sampling distribution: n = 30") 142 | p.sampdist2 <- p 143 | p 144 | ``` 145 | 146 | ### n = 50 147 | ```{r, fig.height=3} 148 | # set.seed(777) 149 | n <- 50 # sample size 150 | nsamp <- 50000 # number of samples (experiments) 151 | # sampling distribution of the mean 152 | dist.samp <- apply(matrix(rlnorm(n*nsamp), nrow = nsamp), 1, mean, trim = 0.2) 153 | v <- enframe(dist.samp, name = NULL) 154 | 155 | p <- ggplot(v, aes(x = value)) + theme_gar + 156 | geom_vline(xintercept = pop.tm, linetype = 'longdash', colour = "black") + 157 | geom_line(stat = 'density', size = 1) + 158 | theme(plot.margin = unit(c(0, 0, 0.5, 0), "cm"), 159 | axis.text.y = element_blank(), 160 | axis.ticks.y = element_blank()) + 161 | labs(x = "Sample trimmed means", y = "Density") + 162 | coord_cartesian(xlim = c(0, 4)) + 163 | ggtitle("Sampling distribution: n = 50") 164 | p.sampdist3 <- p 165 | p 166 | ``` 167 | 168 | ## Illustrate 4 samples 169 | 170 | In each plot, the vertical dashed line marks the population 20% trimmed mean, whereas the vertical continuous line marks the sample 20% trimmed mean. 171 | 172 | ### Define function for horizontal jitter plot 173 | ```{r} 174 | samp_jitter <- function(df, samp.tm){ 175 | p <- ggplot(data = df, aes(x = res, y = cond)) + theme_gar + 176 | # scatterplots 177 | geom_jitter(height = .05, alpha = 0.5, 178 | size = 3, shape = 21, fill = "grey", colour = "black") + 179 | theme(axis.ticks.y = element_blank(), 180 | axis.text.y = element_blank(), 181 | axis.title = element_blank(), 182 | panel.grid.minor.x = element_blank(), 183 | plot.margin = unit(c(0, 0, 0, 0), "cm")) + 184 | scale_x_continuous(breaks = seq(0,12,2)) + 185 | scale_y_continuous(breaks = 1) + 186 | coord_cartesian(xlim = c(0, 11)) + 187 | # sample trimmed mean 188 | geom_segment(aes(x = samp.tm, xend = samp.tm, 189 | y = 0.9, yend = 1.1)) + 190 | # population trimmed mean 191 | geom_segment(aes(x = pop.tm, xend = pop.tm, 192 | y = 0.9, yend = 1.1), 193 | linetype = 'longdash', lineend = 'round', colour = "black") + 194 | labs(x = "Values") 195 | p 196 | } 197 | ``` 198 | 199 | ### n = 20 200 | 201 | Generate all samples 202 | ```{r} 203 | # set.seed(777) 204 | n <- 20 # sample size 205 | samp20 <- matrix(rlnorm(n*4), nrow = 4) # get sample 206 | samp20.tm <- apply(samp20, 1, mean, trim = 0.2) 207 | ``` 208 | 209 | Illustrate sample 1 210 | ```{r, fig.height=1} 211 | set.seed(21) # reproducible jitter 212 | S <- 1 213 | df <- tibble(res = samp20[S,], cond = rep(1, n)) 214 | p <- samp_jitter(df, samp20.tm[S]) 215 | p.samp20_1 <- p 216 | p 217 | ``` 218 | 219 | Illustrate sample 2 220 | ```{r, fig.height=1} 221 | set.seed(21) # reproducible jitter 222 | S <- 2 223 | df <- tibble(res = samp20[S,], cond = rep(1, n)) 224 | p <- samp_jitter(df, samp20.tm[S]) 225 | p.samp20_2 <- p 226 | p 227 | ``` 228 | 229 | Illustrate sample 3 230 | ```{r, fig.height=1} 231 | set.seed(21) # reproducible jitter 232 | S <- 3 233 | df <- tibble(res = samp20[S,], cond = rep(1, n)) 234 | p <- samp_jitter(df, samp20.tm[S]) 235 | p.samp20_3 <- p 236 | p 237 | ``` 238 | 239 | Illustrate sample 4 240 | ```{r, fig.height=1} 241 | set.seed(21) # reproducible jitter 242 | S <- 4 243 | df <- tibble(res = samp20[S,], cond = rep(1, n)) 244 | p <- samp_jitter(df, samp20.tm[S]) 245 | p.samp20_4 <- p 246 | p 247 | ``` 248 | 249 | ### n = 30 250 | 251 | Generate all samples 252 | ```{r} 253 | # set.seed(777) 254 | n <- 30 # sample size 255 | samp30 <- matrix(rlnorm(n*4), nrow = 4) # get sample 256 | samp30.tm <- apply(samp30, 1, mean, trim = 0.2) 257 | ``` 258 | 259 | Illustrate sample 1 260 | ```{r, fig.height=1} 261 | set.seed(21) # reproducible jitter 262 | S <- 1 263 | df <- tibble(res = samp30[S,], cond = rep(1, n)) 264 | p <- samp_jitter(df, samp30.tm[S]) 265 | p.samp30_1 <- p 266 | p 267 | ``` 268 | 269 | Illustrate sample 2 270 | ```{r, fig.height=1} 271 | set.seed(21) # reproducible jitter 272 | S <- 2 273 | df <- tibble(res = samp30[S,], cond = rep(1, n)) 274 | p <- samp_jitter(df, samp30.tm[S]) 275 | p.samp30_2 <- p 276 | p 277 | ``` 278 | 279 | Illustrate sample 3 280 | ```{r, fig.height=1} 281 | set.seed(21) # reproducible jitter 282 | S <- 3 283 | df <- tibble(res = samp30[S,], cond = rep(1, n)) 284 | p <- samp_jitter(df, samp30.tm[S]) 285 | p.samp30_3 <- p 286 | p 287 | ``` 288 | 289 | Illustrate sample 4 290 | ```{r, fig.height=1} 291 | set.seed(21) # reproducible jitter 292 | S <- 4 293 | df <- tibble(res = samp30[S,], cond = rep(1, n)) 294 | p <- samp_jitter(df, samp30.tm[S]) 295 | p.samp30_4 <- p 296 | p 297 | ``` 298 | 299 | ### n = 50 300 | 301 | Generate all samples 302 | ```{r} 303 | # set.seed(777) 304 | n <- 50 # sample size 305 | samp50 <- matrix(rlnorm(n*4), nrow = 4) # get sample 306 | samp50.tm <- apply(samp50, 1, mean, trim = 0.2) 307 | ``` 308 | 309 | Illustrate sample 1 310 | ```{r, fig.height=1} 311 | set.seed(21) # reproducible jitter 312 | S <- 1 313 | df <- tibble(res = samp50[S,], cond = rep(1, n)) 314 | p <- samp_jitter(df, samp50.tm[S]) 315 | p.samp50_1 <- p 316 | p 317 | ``` 318 | 319 | Illustrate sample 2 320 | ```{r, fig.height=1} 321 | set.seed(21) # reproducible jitter 322 | S <- 2 323 | df <- tibble(res = samp50[S,], cond = rep(1, n)) 324 | p <- samp_jitter(df, samp50.tm[S]) 325 | p.samp50_2 <- p 326 | p 327 | ``` 328 | 329 | Illustrate sample 3 330 | ```{r, fig.height=1} 331 | set.seed(21) # reproducible jitter 332 | S <- 3 333 | df <- tibble(res = samp50[S,], cond = rep(1, n)) 334 | p <- samp_jitter(df, samp50.tm[S]) 335 | p.samp50_3 <- p 336 | p 337 | ``` 338 | 339 | Illustrate sample 4 340 | ```{r, fig.height=1} 341 | set.seed(21) # reproducible jitter 342 | S <- 4 343 | df <- tibble(res = samp50[S,], cond = rep(1, n)) 344 | p <- samp_jitter(df, samp50.tm[S]) 345 | p.samp50_4 <- p 346 | p 347 | ``` 348 | 349 | ## Illustrate 4 bootstrap sampling distributions 350 | 351 | We estimate the sampling distributions from the samples. 352 | 353 | ### Define density plot function 354 | ```{r} 355 | dens_plot <- function(dist.samp, samp.tm){ 356 | v <- enframe(dist.samp, name = NULL) 357 | p <- ggplot(v, aes(x = value)) + theme_gar + 358 | geom_vline(xintercept = samp.tm, linetype = 'solid', colour = "black") + 359 | geom_vline(xintercept = pop.tm, linetype = 'longdash', colour = "black") + 360 | geom_line(stat = 'density', size = 1) + 361 | theme(axis.text.y = element_blank(), 362 | axis.title.y = element_blank(), 363 | axis.ticks.y = element_blank(), 364 | plot.margin = unit(c(0, 0, 0.5, 0), "cm")) + 365 | labs(x = "Bootstrap estimates") + 366 | # ggtitle("Sampling distribution: n = 20") + 367 | coord_cartesian(xlim = c(0, 4)) 368 | p 369 | } 370 | ``` 371 | 372 | ### n = 20 373 | 374 | Sample 1 375 | ```{r, fig.height=3} 376 | # set.seed(777) 377 | S <- 1 378 | n <- 20 379 | nboot <- 5000 # number of bootstrap samples 380 | # bootstrap distribution 381 | boot.samp <- apply(matrix(sample(samp20[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 382 | p <- dens_plot(boot.samp, samp20.tm[S]) 383 | p.boot20_1 <- p 384 | p 385 | ``` 386 | 387 | Sample 2 388 | ```{r, fig.height=3} 389 | # set.seed(777) 390 | S <- 2 391 | n <- 20 392 | nboot <- 5000 # number of bootstrap samples 393 | # bootstrap distribution 394 | boot.samp <- apply(matrix(sample(samp20[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 395 | p <- dens_plot(boot.samp, samp20.tm[S]) 396 | p.boot20_2 <- p 397 | p 398 | ``` 399 | 400 | Sample 3 401 | ```{r, fig.height=3} 402 | # set.seed(777) 403 | S <- 3 404 | n <- 20 405 | nboot <- 5000 # number of bootstrap samples 406 | # bootstrap distribution 407 | boot.samp <- apply(matrix(sample(samp20[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 408 | p <- dens_plot(boot.samp, samp20.tm[S]) 409 | p.boot20_3 <- p 410 | p 411 | ``` 412 | 413 | Sample 4 414 | ```{r, fig.height=3} 415 | # set.seed(777) 416 | S <- 4 417 | n <- 20 418 | nboot <- 5000 # number of bootstrap samples 419 | # bootstrap distribution 420 | boot.samp <- apply(matrix(sample(samp20[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 421 | p <- dens_plot(boot.samp, samp20.tm[S]) 422 | p.boot20_4 <- p 423 | p 424 | ``` 425 | 426 | ### n = 30 427 | 428 | Sample 1 429 | ```{r, fig.height=3} 430 | # set.seed(777) 431 | S <- 1 432 | n <- 30 433 | nboot <- 5000 # number of bootstrap samples 434 | # bootstrap distribution 435 | boot.samp <- apply(matrix(sample(samp30[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 436 | p <- dens_plot(boot.samp, samp30.tm[S]) 437 | p.boot30_1 <- p 438 | p 439 | ``` 440 | 441 | Sample 2 442 | ```{r, fig.height=3} 443 | # set.seed(777) 444 | S <- 2 445 | nboot <- 5000 # number of bootstrap samples 446 | # bootstrap distribution 447 | boot.samp <- apply(matrix(sample(samp30[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 448 | p <- dens_plot(boot.samp, samp30.tm[S]) 449 | p.boot30_2 <- p 450 | p 451 | ``` 452 | 453 | Sample 3 454 | ```{r, fig.height=3} 455 | # set.seed(777) 456 | S <- 3 457 | nboot <- 5000 # number of bootstrap samples 458 | # bootstrap distribution 459 | boot.samp <- apply(matrix(sample(samp30[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 460 | p <- dens_plot(boot.samp, samp30.tm[S]) 461 | p.boot30_3 <- p 462 | p 463 | ``` 464 | 465 | Sample 4 466 | ```{r, fig.height=3} 467 | # set.seed(777) 468 | S <- 4 469 | nboot <- 5000 # number of bootstrap samples 470 | # bootstrap distribution 471 | boot.samp <- apply(matrix(sample(samp30[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 472 | p <- dens_plot(boot.samp, samp30.tm[S]) 473 | p.boot30_4 <- p 474 | p 475 | ``` 476 | 477 | ### n = 50 478 | 479 | Sample 1 480 | ```{r, fig.height=3} 481 | # set.seed(777) 482 | S <- 1 483 | n <- 50 484 | nboot <- 5000 # number of bootstrap samples 485 | # bootstrap distribution 486 | boot.samp <- apply(matrix(sample(samp50[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 487 | p <- dens_plot(boot.samp, samp50.tm[S]) 488 | p.boot50_1 <- p 489 | p 490 | ``` 491 | 492 | Sample 2 493 | ```{r, fig.height=3} 494 | # set.seed(777) 495 | S <- 2 496 | nboot <- 5000 # number of bootstrap samples 497 | # bootstrap distribution 498 | boot.samp <- apply(matrix(sample(samp50[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 499 | p <- dens_plot(boot.samp, samp50.tm[S]) 500 | p.boot50_2 <- p 501 | p 502 | ``` 503 | 504 | Sample 3 505 | ```{r, fig.height=3} 506 | # set.seed(777) 507 | S <- 3 508 | nboot <- 5000 # number of bootstrap samples 509 | # bootstrap distribution 510 | boot.samp <- apply(matrix(sample(samp50[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 511 | p <- dens_plot(boot.samp, samp50.tm[S]) 512 | p.boot50_3 <- p 513 | p 514 | ``` 515 | 516 | Sample 4 517 | ```{r, fig.height=3} 518 | # set.seed(777) 519 | S <- 4 520 | nboot <- 5000 # number of bootstrap samples 521 | # bootstrap distribution 522 | boot.samp <- apply(matrix(sample(samp50[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2) 523 | p <- dens_plot(boot.samp, samp50.tm[S]) 524 | p.boot50_4 <- p 525 | p 526 | ``` 527 | 528 | # Summary figure 529 | ```{r, eval = FALSE} 530 | 531 | # ------------------------------------------------- 532 | p2.1 <- cowplot::plot_grid(p.samp20_1, p.boot20_1 + theme(axis.title = element_blank()), 533 | labels = c("1", "B"), 534 | label_size = 18, 535 | hjust = c(-0.5, 2.2), 536 | vjust = c(1.5, -4.3), 537 | ncol = 1, nrow = 2, 538 | rel_heights = c(1.5, 3)) 539 | 540 | p2.2 <- cowplot::plot_grid(p.samp20_2, p.boot20_2 + theme(axis.title = element_blank()), 541 | labels = c("2"), 542 | label_size = 18, 543 | ncol = 1, nrow = 2, 544 | rel_heights = c(1.5, 3)) 545 | 546 | p2.3 <- cowplot::plot_grid(p.samp20_3, p.boot20_3 + theme(axis.title = element_blank()), 547 | labels = c("3"), 548 | label_size = 18, 549 | ncol = 1, nrow = 2, 550 | rel_heights = c(1.5, 3)) 551 | 552 | p2.4 <- cowplot::plot_grid(p.samp20_4, p.boot20_4, 553 | labels = c("4"), 554 | label_size = 18, 555 | ncol = 1, nrow = 2, 556 | rel_heights = c(1.5, 3)) 557 | 558 | p2 <- cowplot::plot_grid(p.sampdist1, 559 | p2.1, p2.2, p2.3, p2.4, 560 | labels = c("A"), 561 | hjust = 0.7, 562 | label_size = 18, 563 | ncol = 1, 564 | nrow = 5, 565 | align = "v", 566 | axis = "l", 567 | rel_heights = c(1, 1, 1, 1, 1)) 568 | 569 | # ------------------------------------------------- 570 | p3.1 <- cowplot::plot_grid(p.samp30_1, p.boot30_1 + theme(axis.title = element_blank()), 571 | labels = c(""), 572 | ncol = 1, nrow = 2, 573 | rel_heights = c(1.5, 3)) 574 | 575 | p3.2 <- cowplot::plot_grid(p.samp30_2, p.boot30_2 + theme(axis.title = element_blank()), 576 | labels = c(""), 577 | ncol = 1, nrow = 2, 578 | rel_heights = c(1.5, 3)) 579 | 580 | p3.3 <- cowplot::plot_grid(p.samp30_3, p.boot30_3 + theme(axis.title = element_blank()), 581 | labels = c(""), 582 | ncol = 1, nrow = 2, 583 | rel_heights = c(1.5, 3)) 584 | 585 | p3.4 <- cowplot::plot_grid(p.samp30_4, p.boot30_4, 586 | labels = c(""), 587 | ncol = 1, nrow = 2, 588 | rel_heights = c(1.5, 3)) 589 | 590 | p3 <- cowplot::plot_grid(p.sampdist2, 591 | p3.1, p3.2, p3.3, p3.4, 592 | labels = c(""), 593 | ncol = 1, 594 | nrow = 5, 595 | align = "v", 596 | axis = "l", 597 | rel_heights = c(1, 1, 1, 1, 1)) 598 | 599 | # ------------------------------------------------- 600 | p4.1 <- cowplot::plot_grid(p.samp50_1, p.boot50_1 + theme(axis.title = element_blank()), 601 | labels = c(""), 602 | ncol = 1, nrow = 2, 603 | rel_heights = c(1.5, 3)) 604 | 605 | p4.2 <- cowplot::plot_grid(p.samp50_2, p.boot50_2 + theme(axis.title = element_blank()), 606 | labels = c(""), 607 | ncol = 1, nrow = 2, 608 | rel_heights = c(1.5, 3)) 609 | 610 | p4.3 <- cowplot::plot_grid(p.samp50_3, p.boot50_3 + theme(axis.title = element_blank()), 611 | labels = c(""), 612 | ncol = 1, nrow = 2, 613 | rel_heights = c(1.5, 3)) 614 | 615 | p4.4 <- cowplot::plot_grid(p.samp50_4, p.boot50_4, 616 | labels = c(""), 617 | ncol = 1, nrow = 2, 618 | rel_heights = c(1.5, 3)) 619 | 620 | p4 <- cowplot::plot_grid(p.sampdist3, 621 | p4.1, p4.2, p4.3, p4.4, 622 | labels = c(""), 623 | ncol = 1, 624 | nrow = 5, 625 | align = "v", 626 | axis = "l", 627 | rel_heights = c(1, 1, 1, 1, 1)) 628 | 629 | cowplot::plot_grid(p2, p3, p4, 630 | labels = c(""), 631 | ncol = 3, 632 | nrow = 1, 633 | rel_widths = c(1, 1, 1), 634 | label_size = 20, 635 | hjust = -0.5, 636 | scale=.95, 637 | align = "h") 638 | 639 | # save figure 640 | ggsave(filename=('./figures/figure_sampdist_lognormal.pdf'),width=20,height=15) 641 | ggsave(filename=('./figures/figure4.pdf'),width=20,height=15) 642 | ``` 643 | --------------------------------------------------------------------------------