├── .gitignore
├── 2depgps.Rmd
├── 2indgps.Rmd
├── LICENSE
├── README.md
├── compcorr.Rmd
├── coverage.Rmd
├── data
    ├── beh_sim.RData
    ├── compcorr_nsim.RData
    ├── compcorr_nsim_0004.RData
    ├── compcorr_nsim_0005.RData
    ├── compcorr_nsim_0506.RData
    ├── coverage50.RData
    ├── ex_g1_h0.RData
    ├── ex_g1_h0_t_samp_dist.RData
    ├── gh_coverage.RData
    ├── gh_t_sampdist.RData
    ├── onesamp_coverage.RData
    ├── onesamp_coverage2.RData
    ├── onesamp_coverage_n10.RData
    ├── onesamp_coverage_nboot200.RData
    ├── onesamp_coverage_nboot2000.RData
    ├── onesamp_stability.RData
    ├── ptb_params.RData
    ├── ptb_tboot.RData
    ├── twosamp_coverage.RData
    └── twosamp_fp.RData
├── docs
    ├── 2depgps.md
    ├── 2depgps_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-10-1.png
    │   │   ├── unnamed-chunk-12-1.png
    │   │   ├── unnamed-chunk-13-1.png
    │   │   ├── unnamed-chunk-15-1.png
    │   │   ├── unnamed-chunk-6-1.png
    │   │   └── unnamed-chunk-8-1.png
    ├── 2indgps.md
    ├── 2indgps_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-10-1.png
    │   │   ├── unnamed-chunk-11-1.png
    │   │   ├── unnamed-chunk-13-1.png
    │   │   ├── unnamed-chunk-14-1.png
    │   │   ├── unnamed-chunk-4-1.png
    │   │   ├── unnamed-chunk-7-1.png
    │   │   └── unnamed-chunk-8-1.png
    ├── compcorr.md
    ├── compcorr_files
    │   └── figure-gfm
    │   │   ├── define-populations-1.png
    │   │   ├── unnamed-chunk-11-1.png
    │   │   ├── unnamed-chunk-15-1.png
    │   │   ├── unnamed-chunk-20-1.png
    │   │   ├── unnamed-chunk-21-1.png
    │   │   ├── unnamed-chunk-22-1.png
    │   │   ├── unnamed-chunk-24-1.png
    │   │   ├── unnamed-chunk-25-1.png
    │   │   ├── unnamed-chunk-28-1.png
    │   │   ├── unnamed-chunk-29-1.png
    │   │   ├── unnamed-chunk-3-1.png
    │   │   ├── unnamed-chunk-30-1.png
    │   │   ├── unnamed-chunk-32-1.png
    │   │   ├── unnamed-chunk-33-1.png
    │   │   ├── unnamed-chunk-34-1.png
    │   │   ├── unnamed-chunk-37-1.png
    │   │   ├── unnamed-chunk-38-1.png
    │   │   ├── unnamed-chunk-42-1.png
    │   │   ├── unnamed-chunk-43-1.png
    │   │   └── unnamed-chunk-6-1.png
    ├── coverage.md
    ├── coverage_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-10-1.png
    │   │   ├── unnamed-chunk-12-1.png
    │   │   ├── unnamed-chunk-14-1.png
    │   │   ├── unnamed-chunk-15-1.png
    │   │   ├── unnamed-chunk-16-1.png
    │   │   ├── unnamed-chunk-18-1.png
    │   │   ├── unnamed-chunk-20-1.png
    │   │   ├── unnamed-chunk-21-1.png
    │   │   ├── unnamed-chunk-22-1.png
    │   │   ├── unnamed-chunk-24-1.png
    │   │   ├── unnamed-chunk-4-1.png
    │   │   ├── unnamed-chunk-8-1.png
    │   │   └── unnamed-chunk-9-1.png
    ├── notrobust.md
    ├── notrobust_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-3-1.png
    │   │   ├── unnamed-chunk-4-1.png
    │   │   ├── unnamed-chunk-5-1.png
    │   │   └── unnamed-chunk-6-1.png
    ├── pb.md
    ├── pb_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-10-1.png
    │   │   ├── unnamed-chunk-12-1.png
    │   │   ├── unnamed-chunk-16-1.png
    │   │   ├── unnamed-chunk-20-1.png
    │   │   ├── unnamed-chunk-24-1.png
    │   │   ├── unnamed-chunk-6-1.png
    │   │   └── unnamed-chunk-7-1.png
    ├── pc.md
    ├── pc_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-3-1.png
    │   │   └── unnamed-chunk-4-1.png
    ├── ptb.md
    ├── ptb_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-11-1.png
    │   │   ├── unnamed-chunk-13-1.png
    │   │   ├── unnamed-chunk-15-1.png
    │   │   ├── unnamed-chunk-17-1.png
    │   │   ├── unnamed-chunk-18-1.png
    │   │   ├── unnamed-chunk-19-1.png
    │   │   ├── unnamed-chunk-20-1.png
    │   │   ├── unnamed-chunk-21-1.png
    │   │   ├── unnamed-chunk-22-1.png
    │   │   ├── unnamed-chunk-25-1.png
    │   │   ├── unnamed-chunk-26-1.png
    │   │   ├── unnamed-chunk-29-1.png
    │   │   ├── unnamed-chunk-30-1.png
    │   │   ├── unnamed-chunk-31-1.png
    │   │   ├── unnamed-chunk-32-1.png
    │   │   ├── unnamed-chunk-35-1.png
    │   │   └── unnamed-chunk-36-1.png
    ├── sampdist.md
    └── sampdist_files
    │   └── figure-gfm
    │       ├── unnamed-chunk-11-1.png
    │       ├── unnamed-chunk-12-1.png
    │       ├── unnamed-chunk-13-1.png
    │       ├── unnamed-chunk-14-1.png
    │       ├── unnamed-chunk-16-1.png
    │       ├── unnamed-chunk-17-1.png
    │       ├── unnamed-chunk-18-1.png
    │       ├── unnamed-chunk-19-1.png
    │       ├── unnamed-chunk-21-1.png
    │       ├── unnamed-chunk-22-1.png
    │       ├── unnamed-chunk-23-1.png
    │       ├── unnamed-chunk-24-1.png
    │       ├── unnamed-chunk-26-1.png
    │       ├── unnamed-chunk-27-1.png
    │       ├── unnamed-chunk-28-1.png
    │       ├── unnamed-chunk-29-1.png
    │       ├── unnamed-chunk-30-1.png
    │       ├── unnamed-chunk-31-1.png
    │       ├── unnamed-chunk-32-1.png
    │       ├── unnamed-chunk-33-1.png
    │       ├── unnamed-chunk-34-1.png
    │       ├── unnamed-chunk-35-1.png
    │       ├── unnamed-chunk-36-1.png
    │       ├── unnamed-chunk-37-1.png
    │       ├── unnamed-chunk-4-1.png
    │       ├── unnamed-chunk-6-1.png
    │       ├── unnamed-chunk-7-1.png
    │       └── unnamed-chunk-8-1.png
├── functions
    ├── Rallfun-v35.txt
    ├── Rallfun-v40.txt
    ├── corfun.txt
    ├── functions.txt
    ├── gengh.txt
    ├── ghpdf.txt
    └── theme_gar.txt
├── notrobust.Rmd
├── pb.Rmd
├── pc.Rmd
├── ptb.Rmd
└── sampdist.Rmd


/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | 
 5 | # Session Data files
 6 | .RData
 7 | 
 8 | # Example code in package build process
 9 | *-Ex.R
10 | 
11 | # Output files from R CMD build
12 | /*.tar.gz
13 | 
14 | # Output files from R CMD check
15 | /*.Rcheck/
16 | 
17 | # RStudio files
18 | .Rproj.user/
19 | 
20 | # produced vignettes
21 | vignettes/*.html
22 | vignettes/*.pdf
23 | 
24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
25 | .httr-oauth
26 | 
27 | # knitr and R markdown default cache directories
28 | /*_cache/
29 | /cache/
30 | 
31 | # Temporary files created by R markdown
32 | *.utf8.md
33 | *.knit.md
34 | 
35 | # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html
36 | rsconnect/
37 | 


--------------------------------------------------------------------------------
/2depgps.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Compare two dependent groups using hierarchical bootstrap sampling"
  3 | author: "Guillaume A. Rousselet"
  4 | date: "`r Sys.Date()`"
  5 | output:
  6 |   pdf_document:
  7 |     fig_caption: no
  8 |     number_sections: no
  9 |     toc: yes
 10 |     toc_depth: 2
 11 |     # github_document:
 12 |     # html_preview: yes
 13 |     # toc: yes
 14 |     # toc_depth: 2
 15 | ---
 16 | 
 17 | # Dependencies
 18 | ```{r, message=FALSE, warning=FALSE}
 19 | library(tibble)
 20 | library(ggplot2)
 21 | source("./functions/theme_gar.txt")
 22 | source("./functions/functions.txt")
 23 | # source("./functions/Rallfun-v40.txt")
 24 | library(beepr)
 25 | # install.packages("devtools")
 26 | # devtools::install_github("GRousselet/rogme")
 27 | library(rogme)
 28 | # library(cowplot)
 29 | # devtools::install_github("zeehio/facetscales")
 30 | library(facetscales) # set different scales for each facet
 31 | ```
 32 | 
 33 | ```{r}
 34 | sessionInfo()
 35 | ```
 36 | 
 37 | Rand Wilcox's functions for two dependent groups
 38 | ```{r, eval = FALSE}
 39 | # percentile bootstrap using any estimator
 40 | # default to difference scores - dif=TRUE
 41 | bootdpci(x,y,est=median)
 42 | # to compare two variances
 43 | comdvar(x,y,alpha=0.05)
 44 | 
 45 | # To use these functions:
 46 | # source("./functions/Rallfun-v40.txt")
 47 | ```
 48 | 
 49 | For two dependent groups, an effective approach is to make inferences about the one-sample distribution of the pairwise differences. This case was already covered in the `pb` notebook. Instead, here we consider a hierarchical situation, in which each participant was tested in 2 conditions, with a large number of trials per condition. In this situation, we can build confidence intervals for each participant and at the group level. For each participant, we use the approach for 2 independent groups described in the `2indgps` notebook. At the group level, we apply a hierarchical bootstrap: following the data sampling, first we sample participants with replacement, second for each bootstrap participant, we sample trials with replacements. 
 50 | 
 51 | We use data from the [French Lexicon Project](https://sites.google.com/site/frenchlexicon/results).
 52 | Click on "French Lexicon Project trial-level results with R scripts.zip".
 53 | The `.RData` dataset was created by applying the script `getflprtdata.Rmd` available on [GitHub](https://github.com/GRousselet/rogme/tree/master/data-raw).
 54 | 
 55 | ```{r}
 56 | # get data - tibble = `flp`
 57 | flp <- rogme::flp
 58 | # columns =
 59 | #1 = participant
 60 | #2 = rt
 61 | #3 = acc = accuracy 0/1
 62 | #4 = condition = word/non-word
 63 | ```
 64 | 
 65 | N = `r length(unique(flp$participant))` participants.
 66 | 
 67 | # Illustrate random sample of 20 participants * 200 trials
 68 | 
 69 | ## Create data frame
 70 | ```{r}
 71 | set.seed(666)
 72 | p.list <- as.numeric(as.character(unique(flp$participant)))
 73 | np <- 20
 74 | nt <- 200
 75 | bootid <- sample(p.list, np, replace = FALSE)
 76 | resmat <- array(NA, dim = c(2,nt,np))
 77 | for(P in 1:np){
 78 |   resmat[1,,P] <- sample(flp$rt[flp$participant %in% bootid[P] & flp$condition == "word"],
 79 |                     nt, replace = TRUE)
 80 |     resmat[2,,P] <- sample(flp$rt[flp$participant %in% bootid[P] & flp$condition == "non-word"],
 81 |                     nt, replace = TRUE)
 82 | }
 83 | ```
 84 | 
 85 | ## Make figure
 86 | ```{r, fig.height = 4, fig.width = 10}
 87 | 
 88 | df <- tibble(rt = c(as.vector(resmat[1,,]),
 89 |                     as.vector(resmat[2,,])),
 90 |              condition = factor(rep(c("Word", "Non-Word"), nt*np)),
 91 |              participant = factor(rep(rep(1:np, each = nt),2))
 92 |              )
 93 | 
 94 | df$condition <- as.character(df$condition)
 95 | df$condition <- factor(df$condition, levels=unique(df$condition))
 96 | 
 97 | p <- ggplot(df, aes(x = rt)) + theme_gar +
 98 |   geom_line(stat = "density", aes(colour=participant), size = 0.5) + 
 99 |   scale_color_viridis_d(begin = 0.1) + 
100 |   scale_x_continuous(breaks=seq(0,2000,250), 
101 |                      minor_breaks = waiver(),
102 |                      expand = c(0, 0)) +
103 |   coord_cartesian(xlim = c(0, 2000)) +
104 |   theme(plot.title = element_text(size=22),
105 |         axis.title.x = element_text(size = 18),
106 |         axis.text = element_text(size = 16, colour = "black"),
107 |         axis.text.y = element_blank(),
108 |         axis.ticks.y = element_blank(),
109 |         axis.title.y = element_text(size = 18),
110 |         legend.text = element_text(size = 16),
111 |         legend.title = element_text(size = 18),
112 |         legend.key.width = unit(1.5,"cm"),
113 |         legend.position = "none",#c(0.75,0.8),
114 |         strip.text.y = element_text(size = 18, face = "bold", angle = 0)) +
115 |         # legend.position = c(0.25,0.9)) +
116 |   labs(x = "Reaction times (ms)", y = "Density") +
117 |   facet_grid(. ~ condition)
118 | p
119 | p.20 <- p
120 | ```
121 | 
122 | # Group 20% trimmed mean
123 | Because there is skewness at the two levels of analysis, we need to consider our estimators carefully at each level. Here for simplicity we compute the 20% trimmed mean for each participant and condition, take the difference between conditions, and finally compute the 20% trimmed mean of these differences. Many other options could be considered, some of which are covered in an other tutorial ([Rousselet & Wilcox, 2019](https://psyarxiv.com/3y54r/)).
124 | 
125 | ## Compute trimmed means
126 | ```{r}
127 | # get data: mean RT for every participant
128 | tmres <- apply(resmat, c(1,3), mean, trim = 0.2)
129 | ```
130 | 
131 | ## Illustrate distributions
132 | 
133 | 20% trimmed means for the Word and Non-Word conditions and their differences.
134 | ```{r, fig.height = 3, fig.width = 10}
135 | set.seed(21) # reproducible jitter
136 | df <- tibble(x = c(tmres[1,],tmres[2,],tmres[2,]-tmres[1,]),
137 |               condition = factor(rep(c("Word", "Non-Word", "Difference"), each = np)),
138 |               y = rep(1, 3 * np))
139 | 
140 | df$condition <- as.character(df$condition)
141 | df$condition <- factor(df$condition, levels=unique(df$condition))
142 | 
143 | scales_x <- list(
144 |   `Word` = scale_x_continuous(limits=c(400,1400), breaks=seq(0,1600,200)),
145 |   `Non-Word` = scale_x_continuous(limits=c(400,1400), breaks=seq(0,1600,200)),
146 |   `Difference` = scale_x_continuous(limits=c(-100,400), breaks=seq(-100,500,100))
147 | )
148 | 
149 | # group trimmed means
150 | df.tm <- tibble(y = rep(0.9, 3), 
151 |                 yend = rep(1.1, 3),
152 |                 x = c(mean(tmres[1,], trim = 0.2),
153 |                       mean(tmres[2,], trim = 0.2),
154 |                       mean(tmres[2,]-tmres[1,], trim = 0.2)), 
155 |                 xend = x,
156 |                 condition = factor(c("Word", "Non-Word", "Difference"))
157 |                 )
158 | 
159 | p <- ggplot(df, aes(x = x, y = y)) + theme_gar +
160 |   geom_jitter(height = .05, alpha = 0.5, 
161 |                 size = 3, shape = 21, fill = "grey", colour = "black") +
162 |   theme(axis.ticks.y = element_blank(),
163 |         axis.text.y = element_blank(),
164 |         axis.title.y = element_blank()) +
165 |   scale_y_continuous(breaks = 1) +
166 |   # Group trimmed means
167 |   geom_segment(data = df.tm, 
168 |                aes(y = y, yend = yend, x = x, xend = xend),
169 |                    size = 2, lineend = 'round') +
170 |   labs(x = "Group trimmed means (ms)") +
171 |   facet_grid_sc(cols = vars(condition), scales = list( x = scales_x))
172 | 
173 | p.scat <- p
174 | p
175 | ```
176 | 
177 | # Group median
178 | 
179 | ## Compute medians
180 | ```{r}
181 | # get data: mean RT for every participant
182 | mdres <- apply(resmat, c(1,3), median)
183 | ```
184 | 
185 | ## Illustrate distributions
186 | 
187 | 20% medians for the Word and Non-Word conditions and their differences.
188 | ```{r, fig.height = 3, fig.width = 10}
189 | set.seed(21) # reproducible jitter
190 | df <- tibble(x = c(mdres[1,],mdres[2,],mdres[2,]-mdres[1,]),
191 |               condition = factor(rep(c("Word", "Non-Word", "Difference"), each = np)),
192 |               y = rep(1, 3 * np))
193 | 
194 | df$condition <- as.character(df$condition)
195 | df$condition <- factor(df$condition, levels=unique(df$condition))
196 | 
197 | scales_x <- list(
198 |   `Word` = scale_x_continuous(limits=c(400,1400), breaks=seq(0,1600,200)),
199 |   `Non-Word` = scale_x_continuous(limits=c(400,1400), breaks=seq(0,1600,200)),
200 |   `Difference` = scale_x_continuous(limits=c(-101,400), breaks=seq(-100,500,100))
201 | )
202 | 
203 | # Group medians
204 | df.tm <- tibble(y = rep(0.9, 3), 
205 |                 yend = rep(1.1, 3),
206 |                 x = c(median(mdres[1,]),
207 |                       median(mdres[2,]),
208 |                       median(mdres[2,]-mdres[1,])), 
209 |                 xend = x,
210 |                 condition = factor(c("Word", "Non-Word", "Difference"))
211 |                 )
212 | 
213 | p <- ggplot(df, aes(x = x, y = y)) + theme_gar +
214 |   geom_jitter(height = .05, alpha = 0.5, 
215 |                 size = 3, shape = 21, fill = "grey", colour = "black") +
216 |   theme(axis.ticks.y = element_blank(),
217 |         axis.text.y = element_blank(),
218 |         axis.title.y = element_blank()) +
219 |   scale_y_continuous(breaks = 1) +
220 |   # Group medians
221 |   geom_segment(data = df.tm, 
222 |                aes(y = y, yend = yend, x = x, xend = xend),
223 |                    size = 2, lineend = 'round') +
224 |   labs(x = "Group medians (ms)") +
225 |   facet_grid_sc(cols = vars(condition), scales = list( x = scales_x))
226 | 
227 | p.scat.md <- p
228 | p
229 | ```
230 | 
231 | # Hierarchical bootstrap: 20% trimmed means
232 | 
233 | We perform the standard bootstrap and the hierarchical version on the same data for comparison. 
234 | ```{r}
235 | set.seed(21)
236 | 
237 | nboot <- 5000
238 | np <- 20
239 | nt <- 200
240 | 
241 | # sample participants with replacement
242 | bootid <- matrix(sample(np, np*nboot, replace = TRUE), nrow = nboot)
243 | 
244 | bootres.level1 <- matrix(NA, nrow = nboot, 3)
245 | bootres.hierar <- matrix(NA, nrow = nboot, 3)
246 | 
247 | for(B in 1:nboot){
248 | # sample participants with replacement
249 |   # level 1 resampling
250 |   bootsamp1 <- resmat[,,bootid[B,]]
251 |   bootres.level1[B,1:2] <- apply(tmres[,bootid[B,]], 1, mean, trim = 0.2)
252 |   bootres.level1[B,3] <- mean(tmres[2,bootid[B,]] - tmres[1,bootid[B,]], trim = 0.2)
253 |   for(P in 1:np){ # for each bootstrap participant
254 |     if(B==1 & P==1){bootsamp2 <- bootsamp1}
255 |     bootsamp2[1,,P] <- sample(bootsamp1[1,,P], nt, replace = TRUE)
256 |     bootsamp2[2,,P] <- sample(bootsamp1[2,,P], nt, replace = TRUE)
257 |   }
258 |   bootres.hierar[B,1] <- mean(apply(bootsamp2[1,,], 2, mean, trim = 0.2), trim = 0.2)
259 |   bootres.hierar[B,2] <- mean(apply(bootsamp2[2,,], 2, mean, trim = 0.2), trim = 0.2)
260 |   bootres.hierar[B,3] <- mean(apply(bootsamp2[2,,], 2, mean, trim = 0.2) -   apply(bootsamp2[1,,], 2, mean, trim = 0.2), trim = 0.2)
261 | }
262 | ```
263 | 
264 | ## Illustrate results
265 | We illustrate the group confidence intervals for each condition and for the pairwise differences. 
266 | ```{r, fig.height = 4, fig.width = 10, warning=FALSE}
267 | # compute confidence intervals --------------------------
268 | alpha <- 0.05
269 | probs <- c(alpha/2, 1-alpha/2)
270 | 
271 | ci.hierar <- matrix(NA, nrow = 2, ncol = 3)
272 | ci.hierar[,1] <- quantile(bootres.hierar[,1], type = 6, probs = probs)
273 | ci.hierar[,2] <- quantile(bootres.hierar[,2], type = 6, probs = probs)
274 | ci.hierar[,3] <- quantile(bootres.hierar[,3], type = 6, probs = probs)
275 | 
276 | ci.level1 <- matrix(NA, nrow = 2, ncol = 3)
277 | ci.level1[,1] <- quantile(bootres.level1[,1], type = 6, probs = probs)
278 | ci.level1[,2] <- quantile(bootres.level1[,2], type = 6, probs = probs)
279 | ci.level1[,3] <- quantile(bootres.level1[,3], type = 6, probs = probs)
280 | 
281 | # make tibbles --------------------------
282 | df <- tibble(x = as.vector(bootres.hierar),
283 |              cond = factor(rep(c("Word", "Non-Word", "Difference"), each = nboot)))
284 | 
285 | df$cond <- keeporder(df$cond)
286 | 
287 | # group trimmed means
288 | df.tm <- tibble(tm = c(mean(tmres[1,], trim = 0.2),
289 |                       mean(tmres[2,], trim = 0.2),
290 |                       mean(tmres[2,]-tmres[1,], trim = 0.2)),
291 |                  cond = factor(c("Word", "Non-Word", "Difference")))
292 | 
293 | # confidence intervals
294 | df.ci <- tibble(x = c(ci.hierar[1,]),
295 |                 xend = c(ci.hierar[2,]),
296 |                 y = rep(0, 3),
297 |                 yend = rep(0, 3),
298 |                 cond = factor(c("Word", "Non-Word", "Difference")))
299 |   
300 | scales_x <- list(`Word` = scale_x_continuous(limits = c(600, 1100)),
301 |                  `Non-Word` = scale_x_continuous(limits = c(600, 1100)),
302 |                  `Difference` = scale_x_continuous(limits = c(0, 200)))
303 | 
304 | # make figure --------------------------
305 | p <- ggplot(df, aes(x = x)) + theme_gar +
306 |   # density
307 |   geom_line(stat = "density", size = 1) +
308 |   # sample trimmed means: vertical line + label
309 |   geom_vline(data = df.tm, aes(xintercept = tm)) +
310 |   # confidence interval ----------------------
311 |   geom_segment(data = df.ci,
312 |                aes(x = x, xend = xend, y = y, yend = yend, group = cond),
313 |                lineend = "round", size = 2, colour = "black") +
314 |   labs(x = "Bootstrap group trimmed means (ms)",
315 |        y = "Density") +
316 |   theme(axis.text.y = element_blank(),
317 |         axis.ticks.y = element_blank()) +
318 |   facet_grid_sc(cols = vars(cond), scales = list(x = scales_x))
319 | p.boot <- p
320 | p
321 | ```
322 | ## Illustrate results (new version)
323 | New version with the hierarchical and non-hierarchical confidence intervals superimposed.
324 | ```{r, fig.height = 4, fig.width = 10, warning=FALSE}
325 | # compute confidence intervals --------------------------
326 | alpha <- 0.05
327 | probs <- c(alpha/2, 1-alpha/2)
328 | 
329 | ci.hierar <- matrix(NA, nrow = 2, ncol = 3)
330 | ci.hierar[,1] <- quantile(bootres.hierar[,1], type = 6, probs = probs)
331 | ci.hierar[,2] <- quantile(bootres.hierar[,2], type = 6, probs = probs)
332 | ci.hierar[,3] <- quantile(bootres.hierar[,3], type = 6, probs = probs)
333 | 
334 | ci.level1 <- matrix(NA, nrow = 2, ncol = 3)
335 | ci.level1[,1] <- quantile(bootres.level1[,1], type = 6, probs = probs)
336 | ci.level1[,2] <- quantile(bootres.level1[,2], type = 6, probs = probs)
337 | ci.level1[,3] <- quantile(bootres.level1[,3], type = 6, probs = probs)
338 | 
339 | # make tibbles --------------------------
340 | df <- tibble(x = c(as.vector(bootres.hierar), as.vector(bootres.level1)),
341 |              cond = factor(rep(rep(c("Word", "Non-Word", "Difference"), each = nboot),2)),
342 |              type = factor(rep(c("Hierarchical","Non-hierarchical"), each = nboot * 3))
343 |              )
344 | 
345 | df$cond <- keeporder(df$cond)
346 | 
347 | # group trimmed means
348 | df.tm <- tibble(tm = rep(c(mean(tmres[1,], trim = 0.2),
349 |                       mean(tmres[2,], trim = 0.2),
350 |                       mean(tmres[2,] - tmres[1,], trim = 0.2)),2),
351 |                  cond = factor(rep(c("Word", "Non-Word", "Difference"),2)),
352 |                 type = factor(rep(c("Hierarchical","Non-hierarchical"), each = 3))
353 |                 )
354 | 
355 | # confidence intervals
356 | df.ci <- tibble(x = c(ci.hierar[1,], ci.level1[1,]),
357 |                 xend = c(ci.hierar[2,], ci.level1[2,]),
358 |                 y = c(rep(0, 3),rep(0.0008, 3)),
359 |                 yend = c(rep(0, 3),rep(0.0008, 3)),
360 |                 cond = factor(rep(c("Word", "Non-Word", "Difference"),2)),
361 |                 type = factor(rep(c("Hierarchical","Non-hierarchical"), each = 3))
362 |                 )
363 |   
364 | scales_x <- list(`Word` = scale_x_continuous(limits = c(600, 1100)),
365 |                  `Non-Word` = scale_x_continuous(limits = c(600, 1100)),
366 |                  `Difference` = scale_x_continuous(limits = c(0, 200)))
367 | 
368 | # make figure --------------------------
369 | p <- ggplot(df, aes(x = x, colour = type)) + theme_gar +
370 |   # density
371 |   geom_line(stat = "density", size = 1) +
372 |   theme(legend.position = "bottom",
373 |         axis.text.y = element_blank(),
374 |         axis.ticks.y = element_blank()) +
375 |   # sample trimmed means: vertical line + label
376 |   geom_vline(data = df.tm, aes(xintercept = tm)) +
377 |   # confidence interval ----------------------
378 |   geom_segment(data = df.ci,
379 |                aes(x = x, xend = xend, y = y, yend = yend, 
380 |                    group = cond, colour = type),
381 |                lineend = "round", size = 2) +
382 |   scale_colour_manual(values = c("black", "#E69F00")) + 
383 |   labs(x = "Bootstrap group trimmed means (ms)",
384 |        y = "Density") +
385 |   facet_grid_sc(cols = vars(cond), scales = list(x = scales_x))
386 | p.boot2 <- p
387 | p
388 | ```
389 | 
390 | H = hierarchical confidence interval
391 | NH = non-hierarchical confidence interval
392 | 
393 | # Hierarchical bootstrap: median
394 | Previous example shows very similar results for the two types of bootstrap procedures. Because the median can lead to choppy bootstrap distributions, especially in the presence of duplicate values, the hierarchical procedure might help smooth the bootstrap distribution because of the variability introduced by random sampling of trials.
395 | 
396 | ```{r}
397 | set.seed(21)
398 | 
399 | nboot <- 5000
400 | np <- 20
401 | nt <- 200
402 | 
403 | # sample participants with replacement
404 | bootid <- matrix(sample(np, np*nboot, replace = TRUE), nrow = nboot)
405 | 
406 | bootres.level1 <- matrix(NA, nrow = nboot, 3)
407 | bootres.hierar <- matrix(NA, nrow = nboot, 3)
408 | 
409 | for(B in 1:nboot){
410 | # sample participants with replacement
411 |   # level 1 resampling
412 |   bootsamp1 <- resmat[,,bootid[B,]]
413 |   bootres.level1[B,1:2] <- apply(mdres[,bootid[B,]], 1, median)
414 |   bootres.level1[B,3] <- median(mdres[2,bootid[B,]] - mdres[1,bootid[B,]])
415 |   for(P in 1:np){ # for each bootstrap participant
416 |     if(B==1 & P==1){bootsamp2 <- bootsamp1}
417 |     bootsamp2[1,,P] <- sample(bootsamp1[1,,P], nt, replace = TRUE)
418 |     bootsamp2[2,,P] <- sample(bootsamp1[2,,P], nt, replace = TRUE)
419 |   }
420 |   bootres.hierar[B,1] <- median(apply(bootsamp2[1,,], 2, median))
421 |   bootres.hierar[B,2] <- median(apply(bootsamp2[2,,], 2, median))
422 |   bootres.hierar[B,3] <- median(apply(bootsamp2[2,,], 2, median) -   apply(bootsamp2[1,,], 2, median))
423 | }
424 | ```
425 | 
426 | ## Illustrate results
427 | We illustrate the group confidence intervals for each condition and for the pairwise differences. 
428 | ```{r, fig.height = 4, fig.width = 10, warning=FALSE}
429 | # compute confidence intervals --------------------------
430 | alpha <- 0.05
431 | probs <- c(alpha/2, 1-alpha/2)
432 | 
433 | ci.hierar <- matrix(NA, nrow = 2, ncol = 3)
434 | ci.hierar[,1] <- quantile(bootres.hierar[,1], type = 6, probs = probs)
435 | ci.hierar[,2] <- quantile(bootres.hierar[,2], type = 6, probs = probs)
436 | ci.hierar[,3] <- quantile(bootres.hierar[,3], type = 6, probs = probs)
437 | 
438 | ci.level1 <- matrix(NA, nrow = 2, ncol = 3)
439 | ci.level1[,1] <- quantile(bootres.level1[,1], type = 6, probs = probs)
440 | ci.level1[,2] <- quantile(bootres.level1[,2], type = 6, probs = probs)
441 | ci.level1[,3] <- quantile(bootres.level1[,3], type = 6, probs = probs)
442 | 
443 | # make tibbles --------------------------
444 | df <- tibble(x = c(as.vector(bootres.hierar), as.vector(bootres.level1)),
445 |              cond = factor(rep(rep(c("Word", "Non-Word", "Difference"), each = nboot),2)),
446 |              type = factor(rep(c("Hierarchical","Non-hierarchical"), each = nboot * 3))
447 |              )
448 | 
449 | df$cond <- keeporder(df$cond)
450 | 
451 | # Group medians
452 | df.tm <- tibble(tm = rep(c(median(mdres[1,]),
453 |                       median(mdres[2,]),
454 |                       median(mdres[2,] - mdres[1,])),2),
455 |                  cond = factor(rep(c("Word", "Non-Word", "Difference"),2)),
456 |                 type = factor(rep(c("Hierarchical","Non-hierarchical"), each = 3))
457 |                 )
458 | 
459 | # confidence intervals
460 | df.ci <- tibble(x = c(ci.hierar[1,], ci.level1[1,]),
461 |                 xend = c(ci.hierar[2,], ci.level1[2,]),
462 |                 y = c(rep(0, 3),rep(0.001, 3)),
463 |                 yend = c(rep(0, 3),rep(0.001, 3)),
464 |                 cond = factor(rep(c("Word", "Non-Word", "Difference"),2)),
465 |                 type = factor(rep(c("Hierarchical","Non-hierarchical"), each = 3))
466 |                 )
467 |   
468 | scales_x <- list(`Word` = scale_x_continuous(limits = c(600, 1100)),
469 |                  `Non-Word` = scale_x_continuous(limits = c(600, 1100)),
470 |                  `Difference` = scale_x_continuous(limits = c(0, 200)))
471 | 
472 | # make figure --------------------------
473 | p <- ggplot(df, aes(x = x, colour = type)) + theme_gar +
474 |   # density
475 |   geom_line(stat = "density", size = 1) +
476 |   theme(legend.position = "bottom",
477 |         axis.text.y = element_blank(),
478 |         axis.ticks.y = element_blank()) +
479 |   # sample trimmed means: vertical line + label
480 |   geom_vline(data = df.tm, aes(xintercept = tm)) +
481 |   # confidence interval ----------------------
482 |   geom_segment(data = df.ci,
483 |                aes(x = x, xend = xend, y = y, yend = yend, 
484 |                    group = cond, colour = type),
485 |                lineend = "round", size = 2) +
486 |   scale_colour_manual(values = c("black", "#E69F00")) + 
487 |   labs(x = "Bootstrap group medians (ms)",
488 |        y = "Density") +
489 |   # facet_grid(cols = vars(cond), scales = "free")
490 |   facet_grid_sc(cols = vars(cond), scales = list(x = scales_x))
491 | p.boot.md <- p
492 | p
493 | ```
494 | 
495 | The distributions of the bootstrap medians are very irregular. Bootstrapping at two levels (trials and participants) leads to much smoother distributions and different confidence intervals.
496 | 
497 | # Summary figure
498 | ```{r, eval = FALSE}
499 | legend <- cowplot::get_legend(p.boot2)
500 | 
501 | cowplot::plot_grid(p.20, 
502 |                    p.scat, 
503 |                    p.boot2 + theme(legend.position = "none"), 
504 |                    p.scat.md,
505 |                    p.boot.md + theme(legend.position = "none"),
506 |                    legend,
507 |                     labels = c("A", "B", "C", "D", "E", NA),
508 |                     ncol = 1,
509 |                     nrow = 6,
510 |                     rel_heights =  c(1, 1/2.5, 1, 1/2.5, 1, 1/5),
511 |                     align = "v",
512 |                     axis = "l",
513 |                     label_size = 20, 
514 |                     hjust = -0.5, 
515 |                     scale=.95)
516 | 
517 | # save figure
518 | ggsave(filename=('./figures/figure_hpb.pdf'),width=13,height=15)
519 | ggsave(filename=('./figures/figure12.pdf'),width=13,height=15)
520 | ```
521 | 
522 | 


--------------------------------------------------------------------------------
/2indgps.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Compare two independent groups"
  3 | author: "Guillaume A. Rousselet"
  4 | date: "`r Sys.Date()`"
  5 | output:
  6 |   pdf_document:
  7 |     fig_caption: no
  8 |     number_sections: no
  9 |     toc: yes
 10 |     toc_depth: 2
 11 |     # github_document:
 12 |     # html_preview: yes
 13 |     # toc: yes
 14 |     # toc_depth: 2
 15 | ---
 16 | 
 17 | # Dependencies
 18 | ```{r, message=FALSE, warning=FALSE}
 19 | library(tibble)
 20 | library(ggplot2)
 21 | # library(cowplot)
 22 | source("./functions/theme_gar.txt")
 23 | source("./functions/Rallfun-v40.txt")
 24 | source("./functions/functions.txt")
 25 | ```
 26 | 
 27 | ```{r}
 28 | sessionInfo()
 29 | ```
 30 | 
 31 | Wilcox's functions for two independent groups
 32 | ```{r, eval = FALSE}
 33 | # to compare any estimators
 34 | pb2gen(x, y, alpha=0.05, nboot=1000, est=median, SEED=FALSE)
 35 | # same as pb2gen but only to compare medians:
 36 | medpb2()
 37 | # same as pb2gen but only to compare trimmed means:
 38 | trimpb2()
 39 | # to compare variances:
 40 | comvar2()
 41 | # For robust measures of scale, use pb2gen
 42 | ```
 43 | 
 44 | # Illustrate populations
 45 | 
 46 | We sample from 2 populations that differ in skewness, leading to large differences in the tails.
 47 | 
 48 | ```{r, fig.height=4}
 49 | meanlog1 <- 0
 50 | meanlog2 <- 0.2
 51 | sdlog1 <- 0.5
 52 | sdlog2 <- 0.7
 53 | 
 54 | x <- seq(0, 5, 0.1)
 55 | 
 56 | p <- ggplot(as.tibble(x), aes(x = x)) + theme_gar +
 57 |   stat_function(geom = "line", size = 1.5, aes(colour = "Lognormal(0, 0.5)"),
 58 |                 fun = dlnorm, args = list(meanlog = meanlog1, sdlog = sdlog1)) +
 59 |   stat_function(geom = "line", size = 1.5, aes(colour = "Lognormal(0.2, 0.7)"),
 60 |                 fun = dlnorm, args = list(meanlog = meanlog2, sdlog = sdlog2)) +
 61 |   labs(x = "Values", y = "Density") +
 62 |   scale_colour_manual(name = "", values = c("Lognormal(0, 0.5)" = "orange",
 63 |                                             "Lognormal(0.2, 0.7)" = "purple")) +
 64 |   theme(legend.position = c(.6, .8),
 65 |         axis.text.y=element_blank(),  #remove y axis labels
 66 |         axis.ticks.y=element_blank()) +
 67 |   guides(colour = guide_legend(override.aes = list(size = 3)))
 68 | p.pop <- p
 69 | p
 70 | ```
 71 | 
 72 | ## Population normalise difference
 73 | 
 74 | We use samples of 100,000 values to define the population normalise difference between 3rd quartiles.
 75 | 
 76 | ```{r}
 77 | set.seed(21)
 78 | normdiff(q3(rlnorm(100000, meanlog = meanlog1, sdlog = sdlog1)), q3(rlnorm(100000, meanlog = meanlog2, sdlog = sdlog2)))
 79 | ```
 80 | 
 81 | # Generate samples
 82 | 
 83 | We get 2 relatively large samples from 2 populations.
 84 | 
 85 | ```{r}
 86 | set.seed(21)
 87 | n1 <- 50
 88 | n2 <- 60
 89 | samp1 <- rlnorm(n1, meanlog = meanlog1, sdlog = sdlog1)
 90 | samp2 <- rlnorm(n2, meanlog = meanlog2, sdlog = sdlog2)
 91 | ```
 92 | 
 93 | Instead of the traditional measures of central tendency, like the mean or the median, here we're going to estimate the 3rd quartile of the marginal distributions. This can be justified by an interest in differences among slower responses. To illustrate that with the bootstrap we can build confidence intervals for any quantity, the group estimations will be on a normalised difference of quartiles: $effect = (q3_1-q3_2)/(q3_1+q3_2)$.
 94 | 
 95 | # Illustrate samples
 96 | ```{r, fig.height=2}
 97 | set.seed(21) # for reproducible jitter
 98 | # raw data
 99 | df <- tibble(val = c(samp1, samp2),
100 |              y = rep(1, n1+n2),
101 |              gp = factor(c(rep("Group 1",n1),rep("Group 2",n2)))
102 |              )
103 | 
104 | df.q2 <- tibble(y = rep(0.9,2), 
105 |                 yend = rep(1.1,2),
106 |                 x = c(q2(samp1),q2(samp2)), 
107 |                 xend = x,
108 |                 gp = factor(c("Group 1","Group 2"))
109 |                 )
110 | 
111 | df.q3 <- tibble(y = rep(0.9,2), 
112 |                 yend = rep(1.1,2),
113 |                 x = c(q3(samp1),q3(samp2)), 
114 |                 xend = x,
115 |                 gp = factor(c("Group 1","Group 2"))
116 |                 )
117 | 
118 | p <- ggplot(data = df, aes(x = val, y = y)) + theme_gar +
119 |   # scatterplots
120 |   geom_jitter(height = .05, alpha = 0.5, 
121 |               size = 3, shape = 21, fill = "grey", colour = "black") +
122 |   theme(axis.ticks.y = element_blank(),
123 |     axis.text.y = element_blank(),
124 |     axis.title.y = element_blank(),
125 |     panel.grid.minor.x = element_blank()) +
126 |   scale_y_continuous(breaks = 1) +
127 |   # 3rd quartile
128 |   geom_segment(data = df.q3, aes(y = y, yend = yend,
129 |     x = x, xend = xend),
130 |     size = 2, lineend = 'round', colour = "grey") +
131 |   # median
132 |   geom_segment(data = df.q2, aes(y = y, yend = yend,
133 |     x = x, xend = xend),
134 |     size = 0.5, lineend = 'round') +
135 |   labs(x = "Values") + 
136 |   # ggtitle("Random sample") +
137 |   facet_grid(cols = vars(gp))
138 | p
139 | pA1 <- p
140 | ```
141 | 
142 | Thick vertical line = 3rd quartile.
143 | Thin vertical line = 2nd quartile (median).
144 | 
145 | ## Sample difference
146 | 
147 | ### Illustrate results
148 | ```{r, fig.height=2}
149 | nb <- 20
150 | 
151 | df <- tibble(bootid = 1,
152 |              res = normdiff(q3(samp1), q3(samp2))
153 |               )
154 | 
155 | p <- ggplot(df, aes(x = res, xend = res, y = bootid - 0.4, yend = bootid + 0.4)) + 
156 |   theme_gar +
157 |   geom_vline(xintercept = normdiff(q3(samp1),q3(samp2)),
158 |              size = 2, colour = "grey") +
159 |   geom_vline(xintercept = 0, size = 0.75, linetype = "dashed") +
160 |   coord_cartesian(xlim = c(-0.4, 0.4)) +
161 |   scale_x_continuous(breaks = seq(-0.5, 0.5, 0.1)) +
162 |   labs(x = "Normalised difference") +
163 |   theme(panel.grid.minor = element_blank(),
164 |         axis.title.y = element_blank()) +
165 |   ggtitle("(Group 1 - Group 2) / (Group 1 + Group 2)") +
166 |   theme(plot.title = element_text(size = 16))
167 | pA2 <- p
168 | p
169 | ```
170 | population normalised difference = `r round(normdiff(q3(samp1),q3(samp2)), digits = 2)`
171 | 
172 | Thick grey vertical line = difference between 3rd quartiles.
173 | Vertical dashed line = zero reference line.
174 | 
175 | # Generate bootstrap samples & confidence intervals
176 | 
177 | We use 5,000 bootstrap samples.
178 | 
179 | ```{r}
180 | set.seed(666)
181 | nboot <- 5000
182 | bootsamp1 <- matrix(sample(samp1, nboot * n1, replace = TRUE), nrow = nboot)
183 | bootsamp2 <- matrix(sample(samp2, nboot * n2, replace = TRUE), nrow = nboot)
184 | ```
185 | 
186 | ## Illustrate 20 bootstrap samples
187 | 
188 | For each sample we superimpose the 3rd quartile---short vertical black line.
189 | Disks = bootstrap observations.
190 | Long vertical grey lines mark the sample 3rd quartiles.
191 | 
192 | ```{r, fig.height=6}
193 | nb <- 20
194 | df <- tibble(res = c(as.vector(bootsamp1[1:nb,]),as.vector(bootsamp2[1:nb,])),
195 |              bootid = c(rep(1:nb, each = n1),rep(1:nb, each = n2)),
196 |              gp = factor(c(rep("Group 1",nb*n1),rep("Group 2",nb*n2))))
197 | 
198 | df2 <- tibble(bootid = rep(1:nb,2),
199 |               res = c(apply(bootsamp1[1:nb,],1,q3),apply(bootsamp2[1:nb,],1,q3)),
200 |               gp = factor(rep(c("Group 1", "Group 2"), each = nb))
201 |               )
202 | 
203 | p <- ggplot(df, aes(y = bootid, x = res)) + theme_gar +
204 |   # 3rd quartile
205 |   geom_vline(data = df.q3, aes(group = gp, xintercept = x), size = 2, colour = "grey") +
206 |   # scatterplots
207 |   geom_point(alpha = 0.3, position = position_jitter(height=0.1)) +
208 |   labs(x = "Values", y = "Bootstrap samples") +
209 |   geom_segment(data = df2, aes(x = res, xend = res,
210 |                                y = bootid - 0.4, yend = bootid + 0.4),
211 |                size = 1.5) +
212 |   theme(panel.grid.minor = element_blank()) +
213 |   scale_y_continuous(breaks = seq(1, 20, 1), expand = expand_scale(mult = c(0.01, 0.01))) +
214 |   facet_grid(cols = vars(gp))
215 | pB1 <- p
216 | p
217 | ```
218 | 
219 | ## Illustrate bootstrap normalised differences
220 | 
221 | ### Illustrate results
222 | ```{r, fig.height=6}
223 | nb <- 20
224 | 
225 | df <- tibble(bootid = 1:nb,
226 |              res = normdiff(apply(bootsamp1[1:nb,],1,q3), apply(bootsamp2[1:nb,],1,q3))
227 |               )
228 | 
229 | p <- ggplot(df, aes(x = res, xend = res, y = bootid - 0.4, yend = bootid + 0.4)) + 
230 |   theme_gar +
231 |   geom_vline(xintercept = normdiff(q3(samp1),q3(samp2)), size = 2, colour = "grey") +
232 |    geom_vline(xintercept = 0, size = 0.5, linetype = "longdash") +
233 |   coord_cartesian(xlim = c(-0.4, 0.4)) +
234 |   scale_x_continuous(breaks = seq(-0.5, 0.5, 0.1)) +
235 |   labs(x = "Normalised differences", y = "Bootstrap samples") +
236 |   geom_segment(size = 1.5) +
237 |   theme(panel.grid.minor = element_blank()) +
238 |   scale_y_continuous(breaks = seq(1, 20, 1), expand = expand_scale(mult = c(0.01, 0.01)))
239 | pB2 <- p
240 | p
241 | ```
242 | 
243 | # Bootstrap distributions
244 | 
245 | ## Compute confidence intervals
246 | ```{r}
247 | alpha <- 0.05
248 | boot1.q3 <- apply(bootsamp1,1,q3)
249 | boot2.q3 <- apply(bootsamp2,1,q3)
250 | ci1 <- quantile(boot1.q3, probs = c(alpha/2, 1-alpha/2))
251 | ci2 <- quantile(boot2.q3, probs = c(alpha/2, 1-alpha/2))
252 | bootdiff <- normdiff(boot1.q3, boot2.q3)
253 | ci.diff <- quantile(bootdiff, probs = c(alpha/2, 1-alpha/2)) 
254 | ```
255 | 
256 | ## Bootstrap distributions of group quantiles
257 | ```{r, fig.height=4}
258 | 
259 | df <- tibble(x = c(boot1.q3, boot2.q3),
260 |              gp = factor(c(rep("Group 1",nboot),rep("Group 2",nboot))))
261 | 
262 | # df.q3 <- tibble(q3 = c(q3(samp1), q3(samp2)),
263 | #                  group = factor(c("Group 1", "Group 2")))
264 | 
265 | df.ci <- tibble(x = c(ci1[1], ci2[1]),
266 |                 xend = c(ci1[2], ci2[2]),
267 |                 y = c(0, 0),
268 |                 yend = c(0, 0),
269 |                 gp = factor(c("Group 1", "Group 2")))
270 |   
271 | p <- ggplot(df, aes(x = x)) + theme_gar +
272 |   # sample q3: vertical line + label
273 |   geom_vline(data = df.q3, aes(xintercept = x, group = gp), size = 2, colour = "grey") +
274 |   # density
275 |   geom_line(stat = "density", size = 1) +
276 |   # confidence interval ----------------------
277 |   geom_segment(data = df.ci,
278 |                aes(x = x, xend = xend, y = y, yend = yend),
279 |                lineend = "round", size = 2, colour = "black") +
280 |   labs(x = "Bootstrap quantiles",
281 |        y = "Density") +
282 |   theme(axis.text.y=element_blank(),  #remove y axis labels
283 |         axis.ticks.y=element_blank()) +
284 |   facet_grid(cols = vars(gp))
285 | p
286 | pC1 <- p
287 | ```
288 | 
289 | ## Group normalised differences
290 | ```{r, fig.height=4}
291 | samp.nd <- normdiff(q3(samp1), q3(samp2))
292 | 
293 | df <- tibble(x = bootdiff)
294 | 
295 | p <- ggplot(df, aes(x = x)) + theme_gar +
296 |   # sample 3rd quartile: vertical line
297 |   geom_vline(xintercept = samp.nd, linetype = 'solid', size = 2, colour = "grey") +
298 |       geom_line(stat = "density", size = 1) +
299 |   labs(x = "Bootstrap differences",
300 |        y = "Density") +
301 |   theme(axis.text.y=element_blank(),  #remove y axis labels
302 |         axis.ticks.y=element_blank()) +
303 |   # confidence interval ----------------------
304 |   geom_segment(x = ci.diff[1], xend = ci.diff[2],
305 |                y = 0, yend = 0,
306 |                lineend = "round", size = 2, colour = "black") +
307 |   # zero reference line
308 |   geom_vline(xintercept = 0, size = 0.5, linetype = "longdash") +
309 |   ggtitle(paste0(round(samp.nd,digits = 3),
310 |                  " [",
311 |                  round(ci.diff[1],digits = 3),
312 |                  ", ",
313 |                  round(ci.diff[2],digits = 3)
314 |                  ,"]"))
315 | p
316 | pC2 <- p
317 | ```
318 | 
319 | # Make summary figure
320 | ```{r, eval = FALSE}
321 | 
322 | pvoid <- ggplot() + theme_void() # empty plot for blank space
323 | 
324 | # pA <- cowplot::plot_grid(pA1, pA2,
325 | #                     labels = NA,
326 | #                     ncol = 2,
327 | #                     nrow = 1,
328 | #                     rel_widths = c(2, 1), 
329 | #                     label_size = 20, 
330 | #                     hjust = -0.5, 
331 | #                     scale=.95)
332 | # 
333 | # pB <- cowplot::plot_grid(pB1, pB2,
334 | #                     labels = NA,
335 | #                     ncol = 2,
336 | #                     nrow = 1,
337 | #                     rel_widths = c(2, 1), 
338 | #                     label_size = 20, 
339 | #                     hjust = -0.5, 
340 | #                     scale=.95)
341 | # 
342 | # pC <- cowplot::plot_grid(pC1, pC2,
343 | #                     labels = NA,
344 | #                     ncol = 2,
345 | #                     nrow = 1,
346 | #                     rel_widths = c(2, 1), 
347 | #                     label_size = 20, 
348 | #                     hjust = -0.5, 
349 | #                     scale=.95)
350 | 
351 | p.LEFT <- cowplot::plot_grid(p.pop, pA1, pB1, pC1,
352 |                     labels = c("A", "B", "C", "D"),
353 |                     ncol = 1,
354 |                     nrow = 4,
355 |                     rel_heights = c(2, 1, 3, 2),
356 |                     label_size = 20,
357 |                     align = "v",
358 |                     axis = "l",
359 |                     hjust = -0.5,
360 |                     scale=.95)
361 | 
362 | p.RIGHT <- cowplot::plot_grid(pvoid, pA2, pB2, pC2,
363 |                     labels = NA,
364 |                     ncol = 1,
365 |                     nrow = 4,
366 |                     rel_heights = c(2, 1, 3, 2),
367 |                     label_size = 20,
368 |                     align = "v",
369 |                     axis = "l",
370 |                     hjust = -0.5,
371 |                     scale=.95)
372 | 
373 | cowplot::plot_grid(p.LEFT, p.RIGHT,
374 |                     labels = NA,
375 |                     ncol = 2,
376 |                     nrow = 1,
377 |                     rel_widths = c(2, 1),
378 |                     label_size = 20,
379 |                     align = "h",
380 |                     axis = "b",
381 |                     hjust = -0.5,
382 |                     scale=.95)
383 | 
384 | # save figure
385 | ggsave(filename=('./figures/figure_2indgps.pdf'),width=17,height=15)
386 | ggsave(filename=('./figures/figure9.pdf'),width=17,height=15)
387 | ```
388 | 
389 | # Confidence interval with n = 100
390 | 
391 | ```{r}
392 | set.seed(21)
393 | n1 <- 100
394 | n2 <- 100
395 | samp1 <- rlnorm(n1, meanlog = meanlog1, sdlog = sdlog1)
396 | samp2 <- rlnorm(n2, meanlog = meanlog2, sdlog = sdlog2)
397 | nboot <- 5000
398 | bootsamp1 <- matrix(sample(samp1, nboot * n1, replace = TRUE), nrow = nboot)
399 | bootsamp2 <- matrix(sample(samp2, nboot * n2, replace = TRUE), nrow = nboot)
400 | alpha <- 0.05
401 | boot1.q3 <- apply(bootsamp1,1,q3)
402 | boot2.q3 <- apply(bootsamp2,1,q3)
403 | # ci1 <- quantile(boot1.q3, probs = c(alpha/2, 1-alpha/2))
404 | # ci2 <- quantile(boot2.q3, probs = c(alpha/2, 1-alpha/2))
405 | bootdiff <- normdiff(boot1.q3, boot2.q3)
406 | ci.diff <- quantile(bootdiff, probs = c(alpha/2, 1-alpha/2)) 
407 | round(ci.diff, digits = 2)
408 | ```
409 | 
410 | # Confidence interval with n = 200
411 | 
412 | ```{r}
413 | set.seed(21)
414 | n1 <- 200
415 | n2 <- 200
416 | samp1 <- rlnorm(n1, meanlog = meanlog1, sdlog = sdlog1)
417 | samp2 <- rlnorm(n2, meanlog = meanlog2, sdlog = sdlog2)
418 | nboot <- 5000
419 | bootsamp1 <- matrix(sample(samp1, nboot * n1, replace = TRUE), nrow = nboot)
420 | bootsamp2 <- matrix(sample(samp2, nboot * n2, replace = TRUE), nrow = nboot)
421 | alpha <- 0.05
422 | boot1.q3 <- apply(bootsamp1,1,q3)
423 | boot2.q3 <- apply(bootsamp2,1,q3)
424 | # ci1 <- quantile(boot1.q3, probs = c(alpha/2, 1-alpha/2))
425 | # ci2 <- quantile(boot2.q3, probs = c(alpha/2, 1-alpha/2))
426 | bootdiff <- normdiff(boot1.q3, boot2.q3)
427 | ci.diff <- quantile(bootdiff, probs = c(alpha/2, 1-alpha/2)) 
428 | round(ci.diff, digits = 2)
429 | ```
430 | 
431 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Guillaume Rousselet
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Reproducibility package for the article:
  2 | 
  3 | **An introduction to the bootstrap: a versatile method to make inferences by using data-driven simulations**
  4 | Rousselet G.A., Pernet C.R., Wilcox R.R.
  5 | [*Meta-Psychology*](https://open.lnu.se/index.php/metapsychology/article/view/2058)
  6 | 
  7 | [[OSF repository](https://osf.io/8b4t5/)] [[GitHub repository](https://github.com/GRousselet/bootstrap)] [[PsyArXiv Preprint](https://psyarxiv.com/h8ft7)]
  8 | 
  9 | The repository contains all of the [R](https://www.r-project.org/) code  used in the article. The code is best seen by running the RMarkdown notebooks, within [RStudio](https://www.rstudio.com/).
 10 | 
 11 | The code is released under the [MIT license](https://opensource.org/licenses/MIT). Copyright 2019-2022, Guillaume A. Rousselet.
 12 | 
 13 | The figures are released under the [CC-BY 4.0 license](https://creativecommons.org/licenses/by/4.0/legalcode). Copyright 2019-2022, Rousselet, Pernet & Wilcox.
 14 | 
 15 | # Content
 16 | |folder|description|
 17 | |-----|-----|
 18 | |`code`|R `.Rmd` files to run simulations and create figures|
 19 | |`notebooks`|pdf versions of the code, with embedded figures|
 20 | |`data`|simulation results needed to run the code|
 21 | |`figures`|all the figures used in the article, in pdf format (only available on the [OSF](https://osf.io/8b4t5/) version of the repo)|
 22 | |`functions`|extra R functions defined in text files|
 23 | |`docs`|Github html versions of the notebooks (only available on the [GitHub](https://github.com/GRousselet/bootstrap) version of the repo)|
 24 | 
 25 | # Notebooks
 26 | 
 27 | The notebooks contain code to reproduce the figures and analyses presented in the article. They also contain extra resources, figures and analyses.
 28 | 
 29 | |Notebook|Description|Figures|
 30 | |-----|-----|-----|
 31 | |[pb](/docs/pb.md)|Description of the percentile bootstrap|Figure 1|
 32 | |[pc](/docs/pc.md)|Percent correct example|Figure 2|
 33 | |[sampdist](/docs/sampdist.md)|Illustrate bootstrap sampling distributions|Figures 3-4|
 34 | |[coverage](/docs/coverage.md)|Simulations of the coverage, width and power of one-sample confidence intervals|Figures 5, 7, 8|
 35 | |[notrobust](/docs/notrobust.md)|On its own, the bootstrap does not guarantee robustness|Figure 6|
 36 | |[2indgps](/docs/2indgps.md)|Compare 2 independent groups|Figure 9|
 37 | |[compcorr](/docs/compcorr.md)|Comparison of correlation coefficients|Figures 10-11|
 38 | |[2depgps](/docs/2depgps.md)|Illustrate hierarchical bootstrap sampling|Figure 12|
 39 | |[ptb](/docs/ptb.md)|Percentile-t bootstrap technique|Figures 13-14|
 40 | 
 41 | # Direct links to the PDF version of the figures on the OSF
 42 | 
 43 | [Figure 1](https://osf.io/scp2j/)
 44 | 
 45 | [Figure 2](https://osf.io/t7pkw/)
 46 | 
 47 | [Figure 3](https://osf.io/t2qg3/)
 48 | 
 49 | [Figure 4](https://osf.io/a4ybr/)
 50 | 
 51 | [Figure 5](https://osf.io/r8mns/)
 52 | 
 53 | [Figure 6](https://osf.io/hk7cn/)
 54 | 
 55 | [Figure 7](https://osf.io/6wvuh/)
 56 | 
 57 | [Figure 8](https://osf.io/6wakn/)
 58 | 
 59 | [Figure 9](https://osf.io/j9b8n/)
 60 | 
 61 | [Figure 10](https://osf.io/fdgx6/)
 62 | 
 63 | [Figure 11](https://osf.io/wt9sy/)
 64 | 
 65 | [Figure 12](https://osf.io/8fjuv/)
 66 | 
 67 | [Figure 13](https://osf.io/wph78/)
 68 | 
 69 | [Figure 14](https://osf.io/wqvay/)
 70 | 
 71 | # R packages needed
 72 | If you want to run the code in RStudio, you will need to install a few packages.
 73 | 
 74 | To reproduce the figures only, you can install the required packages by typing this in the console:
 75 | 
 76 | `install.packages(c("ggplot2", "tibble"))`
 77 | 
 78 | Or you can navigate in the GUI to Tools > Install Packages...
 79 | 
 80 | To install `rogme` and `facetscales`, first you might need to install `devtools`:
 81 | 
 82 | `install.packages("devtools")`
 83 | 
 84 | then:
 85 | 
 86 | `devtools::install_github("GRousselet/rogme")`
 87 | 
 88 | `devtools::install_github("zeehio/facetscales")`
 89 | 
 90 | To reproduce the summary figures, you also need `cowplot` to combine panels:
 91 | 
 92 | `install.packages("cowplot")`
 93 | 
 94 | Finally, if you decide to run the simulations, you will need `beepr` to get a little auditory reward:
 95 | 
 96 | `install.packages("beepr")`
 97 | 
 98 | # Additional R functions
 99 | Here we highlight a few R functions relevant to the tutorial. Most of them are listed in the RMarkdown notebooks, with example syntax. Each notebook will install the appropriate functions for you; otherwise, in the console you can type `source(file.choose())` and select the relevant .txt file.
100 | 
101 | ## Robust estimation and hypothesis testing
102 | To get all the statistical functions from Rand Wilcox, select the [Rallfun-v40.txt](https://github.com/GRousselet/articles/blob/master/bootstrap/functions/Rallfun-v40.txt) file. See details on this [webpage](https://dornsife.usc.edu/labs/rwilcox/software/). The full description of the functions is available in the book [Introduction to Robust Estimation and Hypothesis Testing](https://books.google.co.uk/books/about/Introduction_to_Robust_Estimation_and_Hy.html?id=8f8nBb4__EYC&printsec=frontcover&source=kp_read_button&redir_esc=y#v=onepage&q&f=false). Here are some of the functions used or mentioned in the notebooks.
103 | 
104 | ### One-sample
105 | |Name|Description|
106 | |-----|-----|
107 | |`onesampb`|one-sample percentile bootstrap for any estimator|
108 | |`sint`|parametric inference on the median|
109 | |`trimci`|one-sample test on trimmed means|
110 | |`trimpb`|percentile bootstrap inferences on trimmed means|
111 | |`trimcibt`|bootstrap-t on trimmed means|
112 | |`hdpb`|percentile bootstrap inferences on the Harrell-Davis quantile estimator|
113 | 
114 | ### Two independent groups
115 | |Name|Description|
116 | |-----|-----|
117 | |`yuen`|t-test on trimmed means|
118 | |`yuenbt`|bootstrap-t on trimmed means|
119 | |`pb2gen`|percentile bootstrap to compare any estimators|
120 | |`medpb2`|same as pb2gen but only to compare medians|
121 | |`trimpb2`|same as pb2gen but only to compare trimmed means|
122 | |`comvar2`|parametric test to compare variances|
123 | 
124 | ### Two dependent groups
125 | |Name|Description|
126 | |-----|-----|
127 | |`yuend`|t-test on dependent trimmed means|
128 | |`ydbt`|bootstrap-t on trimmed means|
129 | |`comdvar`|parametric test of variances|
130 | |`bootdpci`|percentile bootstrap using any estimator|
131 | 
132 | ### Correlations
133 | |Name|Description|
134 | |-----|-----|
135 | |`pcorb`|percentile bootstrap confidence interval for Pearson's correlation|
136 | |`corb`|percentile bootstrap confidence interval for any robust correlation|
137 | |`wincor`|winsorised correlation|
138 | |`pbcor`|percentage bend correlation|
139 | |`mscor`|skipped correlations using Pearson's or Spearman's correlation|
140 | 
141 | #### Compare independent correlations
142 | |Name|Description|
143 | |-----|-----|
144 | |`twopcor`|percentile bootstrap comparison of two independent Pearson's correlations|
145 | |`twocor`|percentile bootstrap comparison of two independent robust correlations|
146 | 
147 | #### Compare dependent correlations
148 | |Name|Description|
149 | |-----|-----|
150 | |`TWOpov`|compare two overlapping Pearson's correlations|
151 | |`twoDcorR`|compare two overlapping robust correlations|
152 | |`TWOpNOV`|compare two non-overlapping Pearson's correlations|
153 | |`twoDNOV`|compare two non-overlapping robust correlations|
154 | 
155 | ## Other custom functions
156 | [functions.txt](https://github.com/GRousselet/articles/blob/master/bootstrap/functions/functions.txt) and [theme_gar.txt](https://github.com/GRousselet/articles/blob/master/bootstrap/functions/theme_gar.txt) contain custom code to set some ggplot2 parameters and to compute a few things. Other custom functions are defined in the notebooks.
157 | 
158 | # Extra resources
159 | 
160 | ## R packages for bootstrap inferences
161 | - [`boot`](https://www.statmethods.net/advstats/bootstrapping.html)
162 | - [`resample`](https://cran.r-project.org/web/packages/resample/index.html)
163 | - [`bootstrap`](https://cran.r-project.org/web/packages/bootstrap/index.html)
164 | - [`WRS2`](https://cran.r-project.org/web/packages/WRS2/index.html)
165 | 
166 | ## Interactive demo
167 | [Frequentist inference: confidence interval & bootstrap](https://seeing-theory.brown.edu/frequentist-inference/index.html#section2)
168 | 
169 | 
170 | ## Books
171 | Suggested books on bootstrap methods, robust statistics and simulations.
172 | 
173 | [An Introduction to the Bootstrap](https://books.google.co.uk/books?id=gLlpIUxRntoC&printsec=frontcover&dq=bootstrap+efron&hl=en&sa=X&ved=0ahUKEwjiv676orHiAhVIRxUIHYkgAckQ6AEIKDAA#v=onepage&q=bootstrap%20efron&f=false)
174 | 
175 | [Robust Statistics](https://books.google.co.uk/books?id=yAmZsxWSEWgC&dq=mathematical+foundations+of+robust+statistics&hl=en&sa=X&ved=0ahUKEwj3tP6Oo7HiAhU_UhUIHftBCnQQ6AEILjAB)
176 | 
177 | [Introduction to Robust Estimation and Hypothesis Testing](https://books.google.co.uk/books/about/Introduction_to_Robust_Estimation_and_Hy.html?id=8f8nBb4__EYC&printsec=frontcover&source=kp_read_button&redir_esc=y#v=onepage&q&f=false)
178 | 
179 | [Computer Age Statistical Inference](https://books.google.co.uk/books?id=Sj1yDAAAQBAJ&printsec=frontcover&dq=computer+inference+efron&hl=en&sa=X&ved=0ahUKEwiXy7vjorHiAhU4SxUIHUm7A3kQ6AEIKDAA#v=onepage&q=computer%20inference%20efron&f=false)
180 | 
181 | [Statistics: Unlocking the Power of Data](https://books.google.co.uk/books?id=EpBEDwAAQBAJ&printsec=frontcover&dq=Statistics:+Unlocking+the+Power+of+Data&hl=en&sa=X&ved=0ahUKEwil1unSorHiAhUPTxUIHfCwBjEQ6AEIKDAA#v=onepage&q=Statistics%3A%20Unlocking%20the%20Power%20of%20Data&f=false)
182 | 
183 | [Introduction to Statistical Investigations](https://books.google.co.uk/books?id=FsvVwQEACAAJ&dq=Introduction+to+Statistical+Investigations&hl=en&sa=X&ved=0ahUKEwigt_2worHiAhUySBUIHcZqCnAQ6AEIKDAA)
184 | 
185 | [Mathematical Statistics with Resampling and R](https://books.google.co.uk/books?id=_2hvDwAAQBAJ&printsec=frontcover&dq=Mathematical+Statistics+with+Resampling+and+R&hl=en&sa=X&ved=0ahUKEwiEj_-gorHiAhXkQhUIHRauC-IQ6AEILzAB#v=onepage&q=Mathematical%20Statistics%20with%20Resampling%20and%20R&f=false)
186 | 


--------------------------------------------------------------------------------
/data/beh_sim.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/beh_sim.RData


--------------------------------------------------------------------------------
/data/compcorr_nsim.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/compcorr_nsim.RData


--------------------------------------------------------------------------------
/data/compcorr_nsim_0004.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/compcorr_nsim_0004.RData


--------------------------------------------------------------------------------
/data/compcorr_nsim_0005.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/compcorr_nsim_0005.RData


--------------------------------------------------------------------------------
/data/compcorr_nsim_0506.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/compcorr_nsim_0506.RData


--------------------------------------------------------------------------------
/data/coverage50.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/coverage50.RData


--------------------------------------------------------------------------------
/data/ex_g1_h0.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/ex_g1_h0.RData


--------------------------------------------------------------------------------
/data/ex_g1_h0_t_samp_dist.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/ex_g1_h0_t_samp_dist.RData


--------------------------------------------------------------------------------
/data/gh_coverage.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/gh_coverage.RData


--------------------------------------------------------------------------------
/data/gh_t_sampdist.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/gh_t_sampdist.RData


--------------------------------------------------------------------------------
/data/onesamp_coverage.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/onesamp_coverage.RData


--------------------------------------------------------------------------------
/data/onesamp_coverage2.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/onesamp_coverage2.RData


--------------------------------------------------------------------------------
/data/onesamp_coverage_n10.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/onesamp_coverage_n10.RData


--------------------------------------------------------------------------------
/data/onesamp_coverage_nboot200.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/onesamp_coverage_nboot200.RData


--------------------------------------------------------------------------------
/data/onesamp_coverage_nboot2000.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/onesamp_coverage_nboot2000.RData


--------------------------------------------------------------------------------
/data/onesamp_stability.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/onesamp_stability.RData


--------------------------------------------------------------------------------
/data/ptb_params.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/ptb_params.RData


--------------------------------------------------------------------------------
/data/ptb_tboot.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/ptb_tboot.RData


--------------------------------------------------------------------------------
/data/twosamp_coverage.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/twosamp_coverage.RData


--------------------------------------------------------------------------------
/data/twosamp_fp.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/data/twosamp_fp.RData


--------------------------------------------------------------------------------
/docs/2depgps_files/figure-gfm/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2depgps_files/figure-gfm/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/docs/2depgps_files/figure-gfm/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2depgps_files/figure-gfm/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/docs/2depgps_files/figure-gfm/unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2depgps_files/figure-gfm/unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/docs/2depgps_files/figure-gfm/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2depgps_files/figure-gfm/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/docs/2depgps_files/figure-gfm/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2depgps_files/figure-gfm/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/docs/2depgps_files/figure-gfm/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2depgps_files/figure-gfm/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/docs/2indgps.md:
--------------------------------------------------------------------------------
  1 | Compare two independent groups
  2 | ================
  3 | Guillaume A. Rousselet
  4 | 2022-06-13
  5 | 
  6 | # Dependencies
  7 | 
  8 | ``` r
  9 | library(tibble)
 10 | library(ggplot2)
 11 | # library(cowplot)
 12 | source("./functions/theme_gar.txt")
 13 | source("./functions/Rallfun-v40.txt")
 14 | source("./functions/functions.txt")
 15 | ```
 16 | 
 17 | ``` r
 18 | sessionInfo()
 19 | ```
 20 | 
 21 |     ## R version 4.2.0 (2022-04-22)
 22 |     ## Platform: x86_64-apple-darwin17.0 (64-bit)
 23 |     ## Running under: macOS Catalina 10.15.7
 24 |     ## 
 25 |     ## Matrix products: default
 26 |     ## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
 27 |     ## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
 28 |     ## 
 29 |     ## locale:
 30 |     ## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
 31 |     ## 
 32 |     ## attached base packages:
 33 |     ## [1] stats     graphics  grDevices utils     datasets  methods   base     
 34 |     ## 
 35 |     ## other attached packages:
 36 |     ## [1] ggplot2_3.3.6 tibble_3.1.7 
 37 |     ## 
 38 |     ## loaded via a namespace (and not attached):
 39 |     ##  [1] rstudioapi_0.13  knitr_1.39       magrittr_2.0.3   tidyselect_1.1.2
 40 |     ##  [5] munsell_0.5.0    colorspace_2.0-3 R6_2.5.1         rlang_1.0.2     
 41 |     ##  [9] fastmap_1.1.0    fansi_1.0.3      dplyr_1.0.9      stringr_1.4.0   
 42 |     ## [13] tools_4.2.0      grid_4.2.0       gtable_0.3.0     xfun_0.31       
 43 |     ## [17] utf8_1.2.2       cli_3.3.0        withr_2.5.0      htmltools_0.5.2 
 44 |     ## [21] ellipsis_0.3.2   yaml_2.3.5       digest_0.6.29    lifecycle_1.0.1 
 45 |     ## [25] crayon_1.5.1     purrr_0.3.4      vctrs_0.4.1      glue_1.6.2      
 46 |     ## [29] evaluate_0.15    rmarkdown_2.14   stringi_1.7.6    compiler_4.2.0  
 47 |     ## [33] pillar_1.7.0     generics_0.1.2   scales_1.2.0     pkgconfig_2.0.3
 48 | 
 49 | Wilcox’s functions for two independent groups
 50 | 
 51 | ``` r
 52 | # to compare any estimators
 53 | pb2gen(x, y, alpha=0.05, nboot=1000, est=median, SEED=FALSE)
 54 | # same as pb2gen but only to compare medians:
 55 | medpb2()
 56 | # same as pb2gen but only to compare trimmed means:
 57 | trimpb2()
 58 | # to compare variances:
 59 | comvar2()
 60 | # For robust measures of scale, use pb2gen
 61 | ```
 62 | 
 63 | # Illustrate populations
 64 | 
 65 | We sample from 2 populations that differ in skewness, leading to large
 66 | differences in the tails.
 67 | 
 68 | ``` r
 69 | meanlog1 <- 0
 70 | meanlog2 <- 0.2
 71 | sdlog1 <- 0.5
 72 | sdlog2 <- 0.7
 73 | 
 74 | x <- seq(0, 5, 0.1)
 75 | 
 76 | p <- ggplot(as.tibble(x), aes(x = x)) + theme_gar +
 77 |   stat_function(geom = "line", size = 1.5, aes(colour = "Lognormal(0, 0.5)"),
 78 |                 fun = dlnorm, args = list(meanlog = meanlog1, sdlog = sdlog1)) +
 79 |   stat_function(geom = "line", size = 1.5, aes(colour = "Lognormal(0.2, 0.7)"),
 80 |                 fun = dlnorm, args = list(meanlog = meanlog2, sdlog = sdlog2)) +
 81 |   labs(x = "Values", y = "Density") +
 82 |   scale_colour_manual(name = "", values = c("Lognormal(0, 0.5)" = "orange",
 83 |                                             "Lognormal(0.2, 0.7)" = "purple")) +
 84 |   theme(legend.position = c(.6, .8),
 85 |         axis.text.y=element_blank(),  #remove y axis labels
 86 |         axis.ticks.y=element_blank()) +
 87 |   guides(colour = guide_legend(override.aes = list(size = 3)))
 88 | ```
 89 | 
 90 |     ## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
 91 |     ## Please use `as_tibble()` instead.
 92 |     ## The signature and semantics have changed, see `?as_tibble`.
 93 |     ## This warning is displayed once every 8 hours.
 94 |     ## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
 95 | 
 96 | ``` r
 97 | p.pop <- p
 98 | p
 99 | ```
100 | 
101 | ![](2indgps_files/figure-gfm/unnamed-chunk-4-1.png)<!-- -->
102 | 
103 | ## Population normalise difference
104 | 
105 | We use samples of 100,000 values to define the population normalise
106 | difference between 3rd quartiles.
107 | 
108 | ``` r
109 | set.seed(21)
110 | normdiff(q3(rlnorm(100000, meanlog = meanlog1, sdlog = sdlog1)), q3(rlnorm(100000, meanlog = meanlog2, sdlog = sdlog2)))
111 | ```
112 | 
113 |     ## [1] -0.1666533
114 | 
115 | # Generate samples
116 | 
117 | We get 2 relatively large samples from 2 populations.
118 | 
119 | ``` r
120 | set.seed(21)
121 | n1 <- 50
122 | n2 <- 60
123 | samp1 <- rlnorm(n1, meanlog = meanlog1, sdlog = sdlog1)
124 | samp2 <- rlnorm(n2, meanlog = meanlog2, sdlog = sdlog2)
125 | ```
126 | 
127 | Instead of the traditional measures of central tendency, like the mean
128 | or the median, here we’re going to estimate the 3rd quartile of the
129 | marginal distributions. This can be justified by an interest in
130 | differences among slower responses. To illustrate that with the
131 | bootstrap we can build confidence intervals for any quantity, the group
132 | estimations will be on a normalised difference of quartiles:
133 | ![effect = (q3_1-q3_2)/(q3_1+q3_2)](https://latex.codecogs.com/png.image?%5Cdpi%7B110%7D&space;%5Cbg_white&space;effect%20%3D%20%28q3_1-q3_2%29%2F%28q3_1%2Bq3_2%29 "effect = (q3_1-q3_2)/(q3_1+q3_2)").
134 | 
135 | # Illustrate samples
136 | 
137 | ``` r
138 | set.seed(21) # for reproducible jitter
139 | # raw data
140 | df <- tibble(val = c(samp1, samp2),
141 |              y = rep(1, n1+n2),
142 |              gp = factor(c(rep("Group 1",n1),rep("Group 2",n2)))
143 |              )
144 | 
145 | df.q2 <- tibble(y = rep(0.9,2), 
146 |                 yend = rep(1.1,2),
147 |                 x = c(q2(samp1),q2(samp2)), 
148 |                 xend = x,
149 |                 gp = factor(c("Group 1","Group 2"))
150 |                 )
151 | 
152 | df.q3 <- tibble(y = rep(0.9,2), 
153 |                 yend = rep(1.1,2),
154 |                 x = c(q3(samp1),q3(samp2)), 
155 |                 xend = x,
156 |                 gp = factor(c("Group 1","Group 2"))
157 |                 )
158 | 
159 | p <- ggplot(data = df, aes(x = val, y = y)) + theme_gar +
160 |   # scatterplots
161 |   geom_jitter(height = .05, alpha = 0.5, 
162 |               size = 3, shape = 21, fill = "grey", colour = "black") +
163 |   theme(axis.ticks.y = element_blank(),
164 |     axis.text.y = element_blank(),
165 |     axis.title.y = element_blank(),
166 |     panel.grid.minor.x = element_blank()) +
167 |   scale_y_continuous(breaks = 1) +
168 |   # 3rd quartile
169 |   geom_segment(data = df.q3, aes(y = y, yend = yend,
170 |     x = x, xend = xend),
171 |     size = 2, lineend = 'round', colour = "grey") +
172 |   # median
173 |   geom_segment(data = df.q2, aes(y = y, yend = yend,
174 |     x = x, xend = xend),
175 |     size = 0.5, lineend = 'round') +
176 |   labs(x = "Values") + 
177 |   # ggtitle("Random sample") +
178 |   facet_grid(cols = vars(gp))
179 | p
180 | ```
181 | 
182 | ![](2indgps_files/figure-gfm/unnamed-chunk-7-1.png)<!-- -->
183 | 
184 | ``` r
185 | pA1 <- p
186 | ```
187 | 
188 | Thick vertical line = 3rd quartile. Thin vertical line = 2nd quartile
189 | (median).
190 | 
191 | ## Sample difference
192 | 
193 | ### Illustrate results
194 | 
195 | ``` r
196 | nb <- 20
197 | 
198 | df <- tibble(bootid = 1,
199 |              res = normdiff(q3(samp1), q3(samp2))
200 |               )
201 | 
202 | p <- ggplot(df, aes(x = res, xend = res, y = bootid - 0.4, yend = bootid + 0.4)) + 
203 |   theme_gar +
204 |   geom_vline(xintercept = normdiff(q3(samp1),q3(samp2)),
205 |              size = 2, colour = "grey") +
206 |   geom_vline(xintercept = 0, size = 0.75, linetype = "dashed") +
207 |   coord_cartesian(xlim = c(-0.4, 0.4)) +
208 |   scale_x_continuous(breaks = seq(-0.5, 0.5, 0.1)) +
209 |   labs(x = "Normalised difference") +
210 |   theme(panel.grid.minor = element_blank(),
211 |         axis.title.y = element_blank()) +
212 |   ggtitle("(Group 1 - Group 2) / (Group 1 + Group 2)") +
213 |   theme(plot.title = element_text(size = 16))
214 | pA2 <- p
215 | p
216 | ```
217 | 
218 | ![](2indgps_files/figure-gfm/unnamed-chunk-8-1.png)<!-- --> population
219 | normalised difference = -0.15
220 | 
221 | Thick grey vertical line = difference between 3rd quartiles. Vertical
222 | dashed line = zero reference line.
223 | 
224 | # Generate bootstrap samples & confidence intervals
225 | 
226 | We use 5,000 bootstrap samples.
227 | 
228 | ``` r
229 | set.seed(666)
230 | nboot <- 5000
231 | bootsamp1 <- matrix(sample(samp1, nboot * n1, replace = TRUE), nrow = nboot)
232 | bootsamp2 <- matrix(sample(samp2, nboot * n2, replace = TRUE), nrow = nboot)
233 | ```
234 | 
235 | ## Illustrate 20 bootstrap samples
236 | 
237 | For each sample we superimpose the 3rd quartile—short vertical black
238 | line. Disks = bootstrap observations. Long vertical grey lines mark the
239 | sample 3rd quartiles.
240 | 
241 | ``` r
242 | nb <- 20
243 | df <- tibble(res = c(as.vector(bootsamp1[1:nb,]),as.vector(bootsamp2[1:nb,])),
244 |              bootid = c(rep(1:nb, each = n1),rep(1:nb, each = n2)),
245 |              gp = factor(c(rep("Group 1",nb*n1),rep("Group 2",nb*n2))))
246 | 
247 | df2 <- tibble(bootid = rep(1:nb,2),
248 |               res = c(apply(bootsamp1[1:nb,],1,q3),apply(bootsamp2[1:nb,],1,q3)),
249 |               gp = factor(rep(c("Group 1", "Group 2"), each = nb))
250 |               )
251 | 
252 | p <- ggplot(df, aes(y = bootid, x = res)) + theme_gar +
253 |   # 3rd quartile
254 |   geom_vline(data = df.q3, aes(group = gp, xintercept = x), size = 2, colour = "grey") +
255 |   # scatterplots
256 |   geom_point(alpha = 0.3, position = position_jitter(height=0.1)) +
257 |   labs(x = "Values", y = "Bootstrap samples") +
258 |   geom_segment(data = df2, aes(x = res, xend = res,
259 |                                y = bootid - 0.4, yend = bootid + 0.4),
260 |                size = 1.5) +
261 |   theme(panel.grid.minor = element_blank()) +
262 |   scale_y_continuous(breaks = seq(1, 20, 1), expand = expand_scale(mult = c(0.01, 0.01))) +
263 |   facet_grid(cols = vars(gp))
264 | ```
265 | 
266 |     ## Warning: `expand_scale()` is deprecated; use `expansion()` instead.
267 | 
268 | ``` r
269 | pB1 <- p
270 | p
271 | ```
272 | 
273 | ![](2indgps_files/figure-gfm/unnamed-chunk-10-1.png)<!-- -->
274 | 
275 | ## Illustrate bootstrap normalised differences
276 | 
277 | ### Illustrate results
278 | 
279 | ``` r
280 | nb <- 20
281 | 
282 | df <- tibble(bootid = 1:nb,
283 |              res = normdiff(apply(bootsamp1[1:nb,],1,q3), apply(bootsamp2[1:nb,],1,q3))
284 |               )
285 | 
286 | p <- ggplot(df, aes(x = res, xend = res, y = bootid - 0.4, yend = bootid + 0.4)) + 
287 |   theme_gar +
288 |   geom_vline(xintercept = normdiff(q3(samp1),q3(samp2)), size = 2, colour = "grey") +
289 |    geom_vline(xintercept = 0, size = 0.5, linetype = "longdash") +
290 |   coord_cartesian(xlim = c(-0.4, 0.4)) +
291 |   scale_x_continuous(breaks = seq(-0.5, 0.5, 0.1)) +
292 |   labs(x = "Normalised differences", y = "Bootstrap samples") +
293 |   geom_segment(size = 1.5) +
294 |   theme(panel.grid.minor = element_blank()) +
295 |   scale_y_continuous(breaks = seq(1, 20, 1), expand = expand_scale(mult = c(0.01, 0.01)))
296 | ```
297 | 
298 |     ## Warning: `expand_scale()` is deprecated; use `expansion()` instead.
299 | 
300 | ``` r
301 | pB2 <- p
302 | p
303 | ```
304 | 
305 | ![](2indgps_files/figure-gfm/unnamed-chunk-11-1.png)<!-- -->
306 | 
307 | # Bootstrap distributions
308 | 
309 | ## Compute confidence intervals
310 | 
311 | ``` r
312 | alpha <- 0.05
313 | boot1.q3 <- apply(bootsamp1,1,q3)
314 | boot2.q3 <- apply(bootsamp2,1,q3)
315 | ci1 <- quantile(boot1.q3, probs = c(alpha/2, 1-alpha/2))
316 | ci2 <- quantile(boot2.q3, probs = c(alpha/2, 1-alpha/2))
317 | bootdiff <- normdiff(boot1.q3, boot2.q3)
318 | ci.diff <- quantile(bootdiff, probs = c(alpha/2, 1-alpha/2)) 
319 | ```
320 | 
321 | ## Bootstrap distributions of group quantiles
322 | 
323 | ``` r
324 | df <- tibble(x = c(boot1.q3, boot2.q3),
325 |              gp = factor(c(rep("Group 1",nboot),rep("Group 2",nboot))))
326 | 
327 | # df.q3 <- tibble(q3 = c(q3(samp1), q3(samp2)),
328 | #                  group = factor(c("Group 1", "Group 2")))
329 | 
330 | df.ci <- tibble(x = c(ci1[1], ci2[1]),
331 |                 xend = c(ci1[2], ci2[2]),
332 |                 y = c(0, 0),
333 |                 yend = c(0, 0),
334 |                 gp = factor(c("Group 1", "Group 2")))
335 |   
336 | p <- ggplot(df, aes(x = x)) + theme_gar +
337 |   # sample q3: vertical line + label
338 |   geom_vline(data = df.q3, aes(xintercept = x, group = gp), size = 2, colour = "grey") +
339 |   # density
340 |   geom_line(stat = "density", size = 1) +
341 |   # confidence interval ----------------------
342 |   geom_segment(data = df.ci,
343 |                aes(x = x, xend = xend, y = y, yend = yend),
344 |                lineend = "round", size = 2, colour = "black") +
345 |   labs(x = "Bootstrap quantiles",
346 |        y = "Density") +
347 |   theme(axis.text.y=element_blank(),  #remove y axis labels
348 |         axis.ticks.y=element_blank()) +
349 |   facet_grid(cols = vars(gp))
350 | p
351 | ```
352 | 
353 | ![](2indgps_files/figure-gfm/unnamed-chunk-13-1.png)<!-- -->
354 | 
355 | ``` r
356 | pC1 <- p
357 | ```
358 | 
359 | ## Group normalised differences
360 | 
361 | ``` r
362 | samp.nd <- normdiff(q3(samp1), q3(samp2))
363 | 
364 | df <- tibble(x = bootdiff)
365 | 
366 | p <- ggplot(df, aes(x = x)) + theme_gar +
367 |   # sample 3rd quartile: vertical line
368 |   geom_vline(xintercept = samp.nd, linetype = 'solid', size = 2, colour = "grey") +
369 |       geom_line(stat = "density", size = 1) +
370 |   labs(x = "Bootstrap differences",
371 |        y = "Density") +
372 |   theme(axis.text.y=element_blank(),  #remove y axis labels
373 |         axis.ticks.y=element_blank()) +
374 |   # confidence interval ----------------------
375 |   geom_segment(x = ci.diff[1], xend = ci.diff[2],
376 |                y = 0, yend = 0,
377 |                lineend = "round", size = 2, colour = "black") +
378 |   # zero reference line
379 |   geom_vline(xintercept = 0, size = 0.5, linetype = "longdash") +
380 |   ggtitle(paste0(round(samp.nd,digits = 3),
381 |                  " [",
382 |                  round(ci.diff[1],digits = 3),
383 |                  ", ",
384 |                  round(ci.diff[2],digits = 3)
385 |                  ,"]"))
386 | p
387 | ```
388 | 
389 | ![](2indgps_files/figure-gfm/unnamed-chunk-14-1.png)<!-- -->
390 | 
391 | ``` r
392 | pC2 <- p
393 | ```
394 | 
395 | # Make summary figure
396 | 
397 | ``` r
398 | pvoid <- ggplot() + theme_void() # empty plot for blank space
399 | 
400 | # pA <- cowplot::plot_grid(pA1, pA2,
401 | #                     labels = NA,
402 | #                     ncol = 2,
403 | #                     nrow = 1,
404 | #                     rel_widths = c(2, 1), 
405 | #                     label_size = 20, 
406 | #                     hjust = -0.5, 
407 | #                     scale=.95)
408 | # 
409 | # pB <- cowplot::plot_grid(pB1, pB2,
410 | #                     labels = NA,
411 | #                     ncol = 2,
412 | #                     nrow = 1,
413 | #                     rel_widths = c(2, 1), 
414 | #                     label_size = 20, 
415 | #                     hjust = -0.5, 
416 | #                     scale=.95)
417 | # 
418 | # pC <- cowplot::plot_grid(pC1, pC2,
419 | #                     labels = NA,
420 | #                     ncol = 2,
421 | #                     nrow = 1,
422 | #                     rel_widths = c(2, 1), 
423 | #                     label_size = 20, 
424 | #                     hjust = -0.5, 
425 | #                     scale=.95)
426 | 
427 | p.LEFT <- cowplot::plot_grid(p.pop, pA1, pB1, pC1,
428 |                     labels = c("A", "B", "C", "D"),
429 |                     ncol = 1,
430 |                     nrow = 4,
431 |                     rel_heights = c(2, 1, 3, 2),
432 |                     label_size = 20,
433 |                     align = "v",
434 |                     axis = "l",
435 |                     hjust = -0.5,
436 |                     scale=.95)
437 | 
438 | p.RIGHT <- cowplot::plot_grid(pvoid, pA2, pB2, pC2,
439 |                     labels = NA,
440 |                     ncol = 1,
441 |                     nrow = 4,
442 |                     rel_heights = c(2, 1, 3, 2),
443 |                     label_size = 20,
444 |                     align = "v",
445 |                     axis = "l",
446 |                     hjust = -0.5,
447 |                     scale=.95)
448 | 
449 | cowplot::plot_grid(p.LEFT, p.RIGHT,
450 |                     labels = NA,
451 |                     ncol = 2,
452 |                     nrow = 1,
453 |                     rel_widths = c(2, 1),
454 |                     label_size = 20,
455 |                     align = "h",
456 |                     axis = "b",
457 |                     hjust = -0.5,
458 |                     scale=.95)
459 | 
460 | # save figure
461 | ggsave(filename=('./figures/figure_2indgps.pdf'),width=17,height=15)
462 | ggsave(filename=('./figures/figure9.pdf'),width=17,height=15)
463 | ```
464 | 
465 | # Confidence interval with n = 100
466 | 
467 | ``` r
468 | set.seed(21)
469 | n1 <- 100
470 | n2 <- 100
471 | samp1 <- rlnorm(n1, meanlog = meanlog1, sdlog = sdlog1)
472 | samp2 <- rlnorm(n2, meanlog = meanlog2, sdlog = sdlog2)
473 | nboot <- 5000
474 | bootsamp1 <- matrix(sample(samp1, nboot * n1, replace = TRUE), nrow = nboot)
475 | bootsamp2 <- matrix(sample(samp2, nboot * n2, replace = TRUE), nrow = nboot)
476 | alpha <- 0.05
477 | boot1.q3 <- apply(bootsamp1,1,q3)
478 | boot2.q3 <- apply(bootsamp2,1,q3)
479 | # ci1 <- quantile(boot1.q3, probs = c(alpha/2, 1-alpha/2))
480 | # ci2 <- quantile(boot2.q3, probs = c(alpha/2, 1-alpha/2))
481 | bootdiff <- normdiff(boot1.q3, boot2.q3)
482 | ci.diff <- quantile(bootdiff, probs = c(alpha/2, 1-alpha/2)) 
483 | round(ci.diff, digits = 2)
484 | ```
485 | 
486 |     ##  2.5% 97.5% 
487 |     ## -0.21  0.01
488 | 
489 | # Confidence interval with n = 200
490 | 
491 | ``` r
492 | set.seed(21)
493 | n1 <- 200
494 | n2 <- 200
495 | samp1 <- rlnorm(n1, meanlog = meanlog1, sdlog = sdlog1)
496 | samp2 <- rlnorm(n2, meanlog = meanlog2, sdlog = sdlog2)
497 | nboot <- 5000
498 | bootsamp1 <- matrix(sample(samp1, nboot * n1, replace = TRUE), nrow = nboot)
499 | bootsamp2 <- matrix(sample(samp2, nboot * n2, replace = TRUE), nrow = nboot)
500 | alpha <- 0.05
501 | boot1.q3 <- apply(bootsamp1,1,q3)
502 | boot2.q3 <- apply(bootsamp2,1,q3)
503 | # ci1 <- quantile(boot1.q3, probs = c(alpha/2, 1-alpha/2))
504 | # ci2 <- quantile(boot2.q3, probs = c(alpha/2, 1-alpha/2))
505 | bootdiff <- normdiff(boot1.q3, boot2.q3)
506 | ci.diff <- quantile(bootdiff, probs = c(alpha/2, 1-alpha/2)) 
507 | round(ci.diff, digits = 2)
508 | ```
509 | 
510 |     ##  2.5% 97.5% 
511 |     ## -0.22 -0.11
512 | 


--------------------------------------------------------------------------------
/docs/2indgps_files/figure-gfm/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2indgps_files/figure-gfm/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/docs/2indgps_files/figure-gfm/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2indgps_files/figure-gfm/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/docs/2indgps_files/figure-gfm/unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2indgps_files/figure-gfm/unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/docs/2indgps_files/figure-gfm/unnamed-chunk-14-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2indgps_files/figure-gfm/unnamed-chunk-14-1.png


--------------------------------------------------------------------------------
/docs/2indgps_files/figure-gfm/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2indgps_files/figure-gfm/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/docs/2indgps_files/figure-gfm/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2indgps_files/figure-gfm/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/docs/2indgps_files/figure-gfm/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/2indgps_files/figure-gfm/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/define-populations-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/define-populations-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-20-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-20-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-21-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-21-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-22-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-22-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-24-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-24-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-25-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-25-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-28-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-28-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-29-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-29-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-30-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-30-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-32-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-32-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-33-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-33-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-34-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-34-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-37-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-37-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-38-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-38-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-42-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-42-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-43-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-43-1.png


--------------------------------------------------------------------------------
/docs/compcorr_files/figure-gfm/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/compcorr_files/figure-gfm/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/docs/coverage_files/figure-gfm/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/docs/coverage_files/figure-gfm/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/docs/coverage_files/figure-gfm/unnamed-chunk-14-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-14-1.png


--------------------------------------------------------------------------------
/docs/coverage_files/figure-gfm/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/docs/coverage_files/figure-gfm/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/docs/coverage_files/figure-gfm/unnamed-chunk-18-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-18-1.png


--------------------------------------------------------------------------------
/docs/coverage_files/figure-gfm/unnamed-chunk-20-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-20-1.png


--------------------------------------------------------------------------------
/docs/coverage_files/figure-gfm/unnamed-chunk-21-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-21-1.png


--------------------------------------------------------------------------------
/docs/coverage_files/figure-gfm/unnamed-chunk-22-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-22-1.png


--------------------------------------------------------------------------------
/docs/coverage_files/figure-gfm/unnamed-chunk-24-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-24-1.png


--------------------------------------------------------------------------------
/docs/coverage_files/figure-gfm/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/docs/coverage_files/figure-gfm/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/docs/coverage_files/figure-gfm/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/coverage_files/figure-gfm/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/docs/notrobust.md:
--------------------------------------------------------------------------------
  1 | The bootstrap is not robust
  2 | ================
  3 | Guillaume A. Rousselet
  4 | 2022-06-13
  5 | 
  6 | # Dependencies
  7 | 
  8 | ``` r
  9 | library(tibble)
 10 | library(ggplot2)
 11 | # library(cowplot)
 12 | source("./functions/Rallfun-v40.txt")
 13 | source("./functions/theme_gar.txt")
 14 | ```
 15 | 
 16 | The bootstrap is sometimes described as a robust technique. In itself,
 17 | it is not robust. A simple example can illustrate this lack of
 18 | robustness: percentile bootstrap confidence intervals for the mean are
 19 | not robust, because the mean is not a robust estimator of central
 20 | tendency.
 21 | 
 22 | # Generate data and compute confidence intervals
 23 | 
 24 | ``` r
 25 | set.seed(21)
 26 | n <- 10
 27 | samp <- rnorm(n, 8, 2) 
 28 | samp <- c(samp, 17)
 29 | samp_mat <- matrix(NA, nrow = 12, ncol = 7)
 30 | ci_mean_t <- matrix(NA, nrow = 2, ncol = 7)
 31 | mean_res <- vector(mode = "numeric", length = 7)
 32 | median_res <- vector(mode = "numeric", length = 7)
 33 | ci_mean_pb <- matrix(NA, nrow = 2, ncol = 7)
 34 | ci_median_pb <- matrix(NA, nrow = 2, ncol = 7)
 35 | ci_median_param <- matrix(NA, nrow = 2, ncol = 7)
 36 | for(C in 1:7){
 37 |   todo <- c(samp, 17+C^2)
 38 |   samp_mat[,C] <- todo
 39 |   mean_res[C] <- mean(todo)
 40 |   median_res[C] <- median(todo)
 41 |   ci_mean_t[,C] <- t.test(todo)$conf.int
 42 |   ci_mean_pb[,C] <- onesampb(todo, mean)$ci # default to nboot = 2000
 43 |   ci_median_pb[,C] <- onesampb(todo, median)$ci
 44 |   ci_median_param[,C] <- sint(todo) # parametric method for the median
 45 | }
 46 | ```
 47 | 
 48 | # Illustrate results: mean + standard CI
 49 | 
 50 | ``` r
 51 | set.seed(777) # for reproducible jitter
 52 | # raw data
 53 | df <- tibble(res = as.vector(samp_mat),
 54 |              cond = factor(rep(1:7, each = 12)))
 55 | # mean + confidence intervals
 56 | df2 <- tibble(res = mean_res,
 57 |               cond = factor(1:7),
 58 |               ci_low = ci_mean_t[1,],
 59 |               ci_up = ci_mean_t[2,])
 60 | 
 61 | p <- ggplot(df, aes(x = cond, y = res)) + theme_gar +
 62 |    # scatterplots
 63 |   geom_jitter(shape = 21, width = .1, colour = 'black', fill = 'grey', size = 2, alpha = 0.5) +
 64 |   geom_hline(yintercept = ci_mean_t[1,1], linetype = 'dashed') +
 65 |   geom_hline(yintercept = ci_mean_t[2,1], linetype = 'dashed') +
 66 |     # confidence intervals
 67 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 
 68 |               width=.05, size=0.75) + 
 69 |   geom_point(data = df2, aes(x=cond, y=res), size=3) +
 70 |    theme(panel.grid.minor.x = element_blank()) +
 71 |   labs(x = "Conditions", y = "Values") +
 72 |   ggtitle("Mean: standard CI") 
 73 | p
 74 | ```
 75 | 
 76 | ![](notrobust_files/figure-gfm/unnamed-chunk-3-1.png)<!-- -->
 77 | 
 78 | ``` r
 79 | pA <- p
 80 | ```
 81 | 
 82 | # Illustrate results: mean + boot CI
 83 | 
 84 | ``` r
 85 | set.seed(777) # for reproducible jitter
 86 | # raw data
 87 | df <- tibble(res = as.vector(samp_mat),
 88 |              cond = factor(rep(1:7, each = 12)))
 89 | # mean + confidence intervals
 90 | df2 <- tibble(res = mean_res,
 91 |               cond = factor(1:7),
 92 |               ci_low = ci_mean_pb[1,],
 93 |               ci_up = ci_mean_pb[2,])
 94 | 
 95 | p <- ggplot(df, aes(x = cond, y = res)) + theme_gar +
 96 |    # scatterplots
 97 |   geom_jitter(shape = 21, width = .1, colour = 'black', fill = 'grey', size = 2, alpha = 0.5) +
 98 |   geom_hline(yintercept = ci_mean_pb[1,1], linetype = 'dashed') +
 99 |   geom_hline(yintercept = ci_mean_pb[2,1], linetype = 'dashed') +
100 |     # confidence intervals
101 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 
102 |               width=.05, size=0.75) + 
103 |   geom_point(data = df2, aes(x=cond, y=res), size=3) +
104 |    theme(panel.grid.minor.x = element_blank()) +
105 |   labs(x = "Conditions", y = "Values") +
106 |   ggtitle("Mean: bootstrap CI") 
107 | p
108 | ```
109 | 
110 | ![](notrobust_files/figure-gfm/unnamed-chunk-4-1.png)<!-- -->
111 | 
112 | ``` r
113 | pB <- p
114 | ```
115 | 
116 | # Illustrate results: median + boot CI
117 | 
118 | ``` r
119 | set.seed(777) # for reproducible jitter
120 | # raw data
121 | df <- tibble(res = as.vector(samp_mat),
122 |              cond = factor(rep(1:7, each = 12)))
123 | # mean + confidence intervals
124 | df2 <- tibble(res = median_res,
125 |               cond = factor(1:7),
126 |               ci_low = ci_median_pb[1,],
127 |               ci_up = ci_median_pb[2,])
128 | 
129 | p <- ggplot(df, aes(x = cond, y = res)) + theme_gar +
130 |    # scatterplots
131 |   geom_jitter(shape = 21, width = .1, colour = 'black', fill = 'grey', size = 2, alpha = 0.5) +
132 |   geom_hline(yintercept = ci_median_pb[1,1], linetype = 'dashed') +
133 |   geom_hline(yintercept = ci_median_pb[2,1], linetype = 'dashed') +
134 |     # confidence intervals
135 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 
136 |               width=.05, size=0.75) + 
137 |   geom_point(data = df2, aes(x=cond, y=res), size=3) +
138 |    theme(panel.grid.minor.x = element_blank()) +
139 |   labs(x = "Conditions", y = "Values") +
140 |   ggtitle("Median: bootstrap CI") 
141 | p
142 | ```
143 | 
144 | ![](notrobust_files/figure-gfm/unnamed-chunk-5-1.png)<!-- -->
145 | 
146 | ``` r
147 | pC <- p
148 | ```
149 | 
150 | # Illustrate results: median + parametric CI
151 | 
152 | ``` r
153 | set.seed(777) # for reproducible jitter
154 | # raw data
155 | df <- tibble(res = as.vector(samp_mat),
156 |              cond = factor(rep(1:7, each = 12)))
157 | # mean + confidence intervals
158 | df2 <- tibble(res = median_res,
159 |               cond = factor(1:7),
160 |               ci_low = ci_median_param[1,],
161 |               ci_up = ci_median_param[2,])
162 | 
163 | p <- ggplot(df, aes(x = cond, y = res)) + theme_gar +
164 |    # scatterplots
165 |   geom_jitter(shape = 21, width = .1, colour = 'black', fill = 'grey', size = 2, alpha = 0.5) +
166 |   geom_hline(yintercept = ci_median_pb[1,1], linetype = 'dashed') +
167 |   geom_hline(yintercept = ci_median_pb[2,1], linetype = 'dashed') +
168 |     # confidence intervals
169 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 
170 |               width=.05, size=0.75) + 
171 |   geom_point(data = df2, aes(x=cond, y=res), size=3) +
172 |    theme(panel.grid.minor.x = element_blank()) +
173 |   labs(x = "Conditions", y = "Values") +
174 |   ggtitle("Median: parametric CI") 
175 | p
176 | ```
177 | 
178 | ![](notrobust_files/figure-gfm/unnamed-chunk-6-1.png)<!-- -->
179 | 
180 | ``` r
181 | # pC <- p
182 | ```
183 | 
184 | # Summary figure
185 | 
186 | ``` r
187 | cowplot::plot_grid(pA, pB, pC,
188 |                     labels = c("A", "B", "C"),
189 |                     ncol = 3,
190 |                     nrow = 1,
191 |                     rel_widths = c(1, 1, 1), 
192 |                     label_size = 20, 
193 |                     hjust = -0.5, 
194 |                     scale=.95,
195 |                     align = "h")
196 | 
197 | # save figure
198 | ggsave(filename=('./figures/figure_notrobust.pdf'),width=10,height=5)
199 | ggsave(filename=('./figures/figure6.pdf'),width=10,height=5)
200 | ```
201 | 


--------------------------------------------------------------------------------
/docs/notrobust_files/figure-gfm/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/notrobust_files/figure-gfm/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/docs/notrobust_files/figure-gfm/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/notrobust_files/figure-gfm/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/docs/notrobust_files/figure-gfm/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/notrobust_files/figure-gfm/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/docs/notrobust_files/figure-gfm/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/notrobust_files/figure-gfm/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/docs/pb.md:
--------------------------------------------------------------------------------
  1 | Percentile bootstrap
  2 | ================
  3 | Guillaume A. Rousselet
  4 | 2022-06-13
  5 | 
  6 | # Dependencies
  7 | 
  8 | ``` r
  9 | library(tibble)
 10 | library(ggplot2)
 11 | # library(cowplot)
 12 | # library(HDInterval)
 13 | source("./functions/theme_gar.txt")
 14 | ```
 15 | 
 16 | ``` r
 17 | sessionInfo()
 18 | ```
 19 | 
 20 |     ## R version 4.2.0 (2022-04-22)
 21 |     ## Platform: x86_64-apple-darwin17.0 (64-bit)
 22 |     ## Running under: macOS Catalina 10.15.7
 23 |     ## 
 24 |     ## Matrix products: default
 25 |     ## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
 26 |     ## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
 27 |     ## 
 28 |     ## locale:
 29 |     ## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
 30 |     ## 
 31 |     ## attached base packages:
 32 |     ## [1] stats     graphics  grDevices utils     datasets  methods   base     
 33 |     ## 
 34 |     ## other attached packages:
 35 |     ## [1] ggplot2_3.3.6 tibble_3.1.7 
 36 |     ## 
 37 |     ## loaded via a namespace (and not attached):
 38 |     ##  [1] rstudioapi_0.13  knitr_1.39       magrittr_2.0.3   tidyselect_1.1.2
 39 |     ##  [5] munsell_0.5.0    colorspace_2.0-3 R6_2.5.1         rlang_1.0.2     
 40 |     ##  [9] fastmap_1.1.0    fansi_1.0.3      dplyr_1.0.9      stringr_1.4.0   
 41 |     ## [13] tools_4.2.0      grid_4.2.0       gtable_0.3.0     xfun_0.31       
 42 |     ## [17] utf8_1.2.2       cli_3.3.0        withr_2.5.0      htmltools_0.5.2 
 43 |     ## [21] ellipsis_0.3.2   yaml_2.3.5       digest_0.6.29    lifecycle_1.0.1 
 44 |     ## [25] crayon_1.5.1     purrr_0.3.4      vctrs_0.4.1      glue_1.6.2      
 45 |     ## [29] evaluate_0.15    rmarkdown_2.14   stringi_1.7.6    compiler_4.2.0  
 46 |     ## [33] pillar_1.7.0     generics_0.1.2   scales_1.2.0     pkgconfig_2.0.3
 47 | 
 48 | # Bootstrap implementation
 49 | 
 50 | Let’s look at how the bootstrap is implemented in the one-sample case.
 51 | See an interactive demo
 52 | [here](https://seeing-theory.brown.edu/frequentist-inference/index.html#section2).
 53 | 
 54 | ## Sampling with replacement
 55 | 
 56 | Test the `sample()` function. Let say our sample is a sequence of
 57 | integers. We sample with replacement from that sequence of numbers.
 58 | Execute chunk several times to see what happens.
 59 | 
 60 | ``` r
 61 | n <- 10 # sample size
 62 | samp <- 1:n
 63 | boot.samp <- sample(samp, n, replace = TRUE) # sample with replacement
 64 | boot.samp
 65 | ```
 66 | 
 67 |     ##  [1]  3 10  4  6  9  9  1 10  5  8
 68 | 
 69 | Generate 3 bootstrap samples for the article:
 70 | 
 71 | ``` r
 72 | set.seed(21) # reproducible example
 73 | n <- 6
 74 | samp <- 1:n
 75 | nboot <- 3
 76 | matrix(sample(samp, n*nboot, replace = TRUE), nrow = nboot)
 77 | ```
 78 | 
 79 |     ##      [,1] [,2] [,3] [,4] [,5] [,6]
 80 |     ## [1,]    1    2    3    6    3    3
 81 |     ## [2,]    3    5    4    6    6    4
 82 |     ## [3,]    1    3    2    6    2    5
 83 | 
 84 | ## Loop
 85 | 
 86 | ``` r
 87 | set.seed(21) # reproducible results
 88 | n <- 20 # sample size
 89 | samp <- rnorm(n) # get normal sample
 90 | nboot <- 1000 # number of bootstrap samples
 91 | 
 92 | # declare vector of results
 93 | boot.m <- vector(mode = "numeric", length = nboot) # save means
 94 | boot.tm <- vector(mode = "numeric", length = nboot) # save trimmed means
 95 | boot.md <- vector(mode = "numeric", length = nboot) # save medians
 96 | 
 97 | for(B in 1:nboot){
 98 |   boot.samp <- sample(samp, n, replace = TRUE) # sample with replacement
 99 |   boot.m[B] <- mean(boot.samp)
100 |   boot.tm[B] <- mean(boot.samp, trim = 0.2)
101 |   boot.md[B] <- median(boot.samp)
102 | }
103 | 
104 | samp.m <- mean(samp)
105 | samp.tm <- mean(samp, trim = 0.2)
106 | samp.md <- median(samp)
107 | samp.m
108 | ```
109 | 
110 |     ## [1] 0.1363416
111 | 
112 | ``` r
113 | samp.tm
114 | ```
115 | 
116 |     ## [1] 0.1835497
117 | 
118 | ``` r
119 | samp.md
120 | ```
121 | 
122 |     ## [1] 0.3028401
123 | 
124 | ### Plot original results
125 | 
126 | ``` r
127 | set.seed(1)
128 | df <- tibble(cond = factor(rep(1,n)),
129 |              res = samp) 
130 | ggplot(df, aes(x = cond, y = res)) + theme_linedraw() + 
131 |   geom_jitter(width = 0.1, alpha = 0.3) +
132 |   theme(axis.text.x = element_blank(),
133 |     axis.ticks = element_blank()) +
134 |   scale_x_discrete(name ="") +
135 |   # stat_summary(fun.y=median, geom="line")
136 |   geom_segment(aes(x = 0.9, y = samp.m, xend = 1.1, yend = samp.m)) +
137 |   geom_segment(aes(x = 0.9, y = samp.md, xend = 1.1, yend = samp.md), colour = "orange") +
138 |   geom_segment(aes(x = 0.9, y = samp.tm, xend = 1.1, yend = samp.tm), colour = "blue") +
139 |   annotate("text", x = 1.185, y = samp.m-0.1, label = 'bold("Mean")', size = 4, parse = TRUE) +
140 |   annotate("text", x = 1.2, y = samp.md+0.1, label = 'bold("Median")', size = 4, colour = "orange", parse = TRUE) +
141 |   annotate("text", x = 0.75, y = samp.tm, label = 'bold("Trimmed\nmean")', size = 4, colour = "blue", parse = TRUE)
142 | ```
143 | 
144 | ![](pb_files/figure-gfm/unnamed-chunk-6-1.png)<!-- -->
145 | 
146 | ### Plot distributions of bootstrap estimates
147 | 
148 | The distribution of bootstrap medians is multi-modal and very different
149 | from that of the mean and the 20% trimmed mean. To compute confidence
150 | intervals for the median in the one-sample case, it is recommended to
151 | use the parametric approach implemented in the function `sint()`
152 | available in the file *./functions/Rallfun-v40.txt* in this repository.
153 | 
154 | ``` r
155 | df <- tibble(res = c(boot.m, boot.tm, boot.md),
156 |              est = factor(c(rep("Mean",nboot), rep("Trimmed mean",nboot), rep("Median",nboot)))
157 |              )
158 | ggplot(df, aes(x = res, colour = est)) + theme_gar +
159 |   geom_line(aes(y = ..density..), stat = "density", size = 1) +
160 |   labs(x = "Bootstrap estimates", y = "Density") + 
161 |   theme(legend.position = "bottom",
162 |         axis.text.y = element_blank(),
163 |         axis.ticks.y = element_blank())
164 | ```
165 | 
166 | ![](pb_files/figure-gfm/unnamed-chunk-7-1.png)<!-- -->
167 | 
168 | ``` r
169 |   # ggtitle("Boostrap samples")
170 | ```
171 | 
172 | ## Matrix
173 | 
174 | ``` r
175 | set.seed(21) # reproducible results
176 | n <- 20 # sample size
177 | samp <- rnorm(n) # get normal sample
178 | nboot <- 1000 # number of bootstrap samples
179 | # sample with replacement + reoganise into a matrix
180 | boot.samp <- matrix(sample(samp, n*nboot, replace = TRUE), nrow = nboot)
181 | boot.m <- apply(boot.samp, 1, mean)
182 | boot.md <- apply(boot.samp, 1, median)
183 | ```
184 | 
185 | ## Functions
186 | 
187 | Examples of R packages for bootstrap inferences:
188 | 
189 | -   [`boot`](https://www.statmethods.net/advstats/bootstrapping.html)
190 | 
191 | -   [`resample`](https://cran.r-project.org/web/packages/resample/index.html)
192 | 
193 | -   [`bootstrap`](https://cran.r-project.org/web/packages/bootstrap/index.html)
194 | 
195 | -   [`WRS2`](https://cran.r-project.org/web/packages/WRS2/index.html)
196 | 
197 | Functions from [Rand
198 | Wilcox](https://dornsife.usc.edu/labs/rwilcox/software/).
199 | 
200 | ``` r
201 | # TO USE THE FUNCTIONS, FIRST USE THE SOURCE COMMAND:
202 | # source('./functions/Rallfun-v40.txt')
203 | 
204 | set.seed(1) # reproducible results
205 | onesampb(samp, est=mean, alpha=0.1, nboot=1000, SEED = FALSE, nv = 0)
206 | # est  = estimator, could be var, mad, to use a trimmed mean, add argument trim = 0.2
207 | onesampb(samp, est=mean, alpha=0.1, nboot=1000, SEED = FALSE, nv = 0, trim = 0.1)
208 | # nv = null value for NHST
209 | # always set SEED to FALSE otherwise the function always returns the same results for a given input.
210 | # the only way to really understand the code is to look at it: edit(onesampb)
211 | 
212 | # for inferences on trimmed means only:
213 | trimpb()
214 | 
215 | # for inferences on the Harrell-Davis quantile estimator (default q=0.5 = median):
216 | hdpb()
217 | ```
218 | 
219 | # Generate sample from lognormal distribution
220 | 
221 | ## Illustrate population
222 | 
223 | Lognormal distribution from the which the sample is taken.
224 | 
225 | ``` r
226 | x <- seq(0, 7, 0.001)
227 | y <- dlnorm(x)
228 | 
229 | df <- tibble(x = x, y = y) 
230 | 
231 | p <- ggplot(df, aes(x = x, y = y)) + theme_gar +
232 |   geom_line(size = 1.5, colour = "orange") +
233 |   labs(x = "Values", y = "Density") +
234 |   theme(axis.text.y = element_blank(),
235 |         axis.ticks.y = element_blank())
236 | p
237 | ```
238 | 
239 | ![](pb_files/figure-gfm/unnamed-chunk-10-1.png)<!-- -->
240 | 
241 | ## Get sample
242 | 
243 | ``` r
244 | set.seed(21) # reproducible example
245 | n <- 30 # sample size
246 | meanlog <- 0
247 | sdlog <- 1
248 | samp <- rlnorm(n, meanlog = meanlog, sdlog = sdlog) # random sample
249 | ```
250 | 
251 | ## Illustrate sample
252 | 
253 | ``` r
254 | set.seed(21) # for reproducible jitter
255 | # raw data
256 | df <- tibble(pc = samp,
257 |              cond = rep(1, n))
258 | 
259 | p <- ggplot(data = df, aes(x = cond, y = pc)) + theme_gar +
260 |   # scatterplots
261 |     geom_jitter(width = .05, alpha = 0.5, 
262 |                 size = 3, shape = 21, fill = "grey", colour = "black") +
263 |   theme(axis.ticks.x = element_blank(),
264 |         axis.text.x = element_blank(),
265 |         axis.title.x = element_blank()) +
266 |   scale_x_continuous(breaks = 1) +
267 |   # mean
268 |   geom_segment(aes(x = 0.9, xend = 1.1,
269 |                    y = mean(samp), yend = mean(samp))) +
270 |   # median
271 |   geom_segment(aes(x = 0.9, xend = 1.1,
272 |                    y = median(samp), yend = median(samp)),
273 |                linetype = 'longdash', lineend = 'round') +
274 |   theme(panel.grid.minor.x = element_blank()) +
275 |   labs(y = "Values")
276 | p
277 | ```
278 | 
279 | ![](pb_files/figure-gfm/unnamed-chunk-12-1.png)<!-- -->
280 | 
281 | ``` r
282 | pA <- p
283 | ```
284 | 
285 | Sample mean = 1.61
286 | 
287 | # Standard confidence interval
288 | 
289 | ## T-value: define function
290 | 
291 | ``` r
292 | # mean minus null value divided by SEM
293 | tval <- function(x,nv){
294 |   tval <- (mean(x) - nv) / sqrt(var(x)/length(x))
295 |   tval
296 | }
297 | ```
298 | 
299 | ## P value
300 | 
301 | Let say our hypothesis is mu = 2.
302 | 
303 | First, let’s use the function:
304 | 
305 | ``` r
306 | mu <- 2 # null hypothesis
307 | t.test(samp, mu = mu)$p.value
308 | ```
309 | 
310 |     ## [1] 0.2667699
311 | 
312 | Then check the formula:
313 | 
314 | ``` r
315 | dof <- length(samp)-1 # degrees of freedom
316 | 2 * pt(abs(tval(samp, mu)), dof, lower.tail = FALSE)
317 | ```
318 | 
319 |     ## [1] 0.2667699
320 | 
321 | ## Illustrate theoretical *T* distribution
322 | 
323 | ``` r
324 | alpha <- 0.05
325 | 
326 | p <- ggplot(data.frame(x = c(-5, 5)), aes(x)) + theme_gar +
327 |   labs(y = "Density") +
328 |   theme(axis.text = element_text(size = 14),
329 |         axis.title = element_text(size = 16),
330 |         plot.title = element_text(size=20),
331 |         axis.text.y = element_blank(),
332 |         axis.ticks.y = element_blank()) +
333 |   labs(x = "T values") +
334 |   ggtitle(substitute(paste(italic(T)," distribution with ",dof," degrees of freedom"), list(dof=dof))) +
335 |   # area under the curve -> p value
336 |   # https://christianburkhart.de/blog/area_under_the_curve/
337 |   stat_function(fun = dt,
338 |                 geom = "area",
339 |                 xlim = c(-5, tval(samp, mu)),
340 |                 alpha = .2,
341 |                 fill = "red",
342 |                 args = list(df = dof)) +
343 |   # theoretical cut-off t value for alpha = 0.05
344 |   geom_segment(x = qt(alpha/2, dof),
345 |                xend = qt(alpha/2, dof),
346 |                y = 0,
347 |                yend = dt(qt(alpha/2, dof), dof),
348 |                size = 1,
349 |                colour = "red") +
350 |     annotate(geom = "label", x = -2.7, y = 0.07, size  = 7, colour = "red",
351 |              label = expression(paste("T"[crit]))) + # italic("t")
352 |   # observed t value
353 |   # geom_vline(xintercept = abs(tval(samp, mu)))
354 |   geom_segment(x = tval(samp, mu),
355 |                xend = tval(samp, mu),
356 |                y = 0,
357 |                yend = dt(tval(samp, mu), dof),
358 |                size = 1,
359 |                colour = "black",
360 |                linetype = "dashed") +
361 |     annotate(geom = "label", x = -1.8, y = 0.22, size  = 7, colour = "black",
362 |              label = expression(paste("T"[obs]))) + # paste("obs.", italic("t")))
363 |   # Plot density function
364 |   stat_function(fun = dt, args = list(df = dof),
365 |                 size = 1) +
366 |   # P value
367 |   geom_segment(x = -2.8, xend = -1.5,
368 |                y = 0.15, yend = 0.07,
369 |                arrow = arrow(type = "closed", 
370 |                              length = unit(0.25, "cm")),
371 |                colour = "grey30",
372 |                size = 1) +
373 |   annotate(geom = "label", x = -3, y = 0.15, size  = 7, 
374 |              colour = "white", fill = "red", fontface = "bold",
375 |              label = "P value / 2") +
376 |   annotate(geom = "label", x = 2.8, y = 0.35, size = 7,
377 |              colour = "white", fill = "black", fontface = "bold",
378 |              label = expression(paste(bold("CI = m \u00B1 T"[crit]),
379 |                                       bold(" * SEM"))))
380 | # ggsave(filename = './theor_t.pdf')
381 | pB <- p
382 | p
383 | ```
384 | 
385 | ![](pb_files/figure-gfm/unnamed-chunk-16-1.png)<!-- -->
386 | 
387 | ## Compute confidence interval
388 | 
389 | ### Using built-in function
390 | 
391 | ``` r
392 | ci.t <- t.test(samp, mu = mu)$conf.int
393 | ci.t
394 | ```
395 | 
396 |     ## [1] 0.9037929 2.3149338
397 |     ## attr(,"conf.level")
398 |     ## [1] 0.95
399 | 
400 | ### Formula
401 | 
402 | ``` r
403 | alpha <- 0.05 # alpha level
404 | df <- n-1 # degrees of freedom
405 | samp.m <- mean(samp) # sample mean
406 | sem <- sd(samp) / sqrt(n) # sample estimate of the standard error of the mean
407 | ci <- vector("numeric",2)
408 | ci[1] <- samp.m - qt(1-alpha/2, df) * sem # lower bound of CI
409 | ci[2] <- samp.m + qt(1-alpha/2, df) * sem # upper bound of CI
410 | ci
411 | ```
412 | 
413 |     ## [1] 0.9037929 2.3149338
414 | 
415 | # Generate bootstrap samples
416 | 
417 | ``` r
418 | set.seed(666)
419 | nboot <- 5000
420 | bootsamp <- matrix(sample(samp, nboot * n, replace = TRUE), nrow = nboot)
421 | ```
422 | 
423 | ## Illustrate a few bootstrap samples
424 | 
425 | For each sample we superimpose the (bootstrap) median.
426 | 
427 | ``` r
428 | nb <- 20
429 | df <- tibble(res = as.vector(bootsamp[1:nb,]),
430 |              bootid = rep(1:nb, each = n))
431 | 
432 | df2 <- tibble(bootid = 1:nb,
433 |               res = apply(bootsamp[1:nb,],1,mean))
434 | 
435 | p <- ggplot(df, aes(y = bootid, x = res)) + theme_gar +
436 |   geom_point(alpha = 0.3, position = position_jitter(height=0.1)) +
437 |   labs(x = "Values", y = "Bootstrap samples") +
438 |   geom_segment(data = df2, aes(x = res, xend = res,
439 |                                y = bootid - 0.4, yend = bootid + 0.4),
440 |                size = 1.5) +
441 |   theme(panel.grid.minor = element_blank()) +
442 |   scale_y_continuous(breaks = seq(1, 20, 1), expand = expand_scale(mult = c(0.01, 0.01)))
443 | ```
444 | 
445 |     ## Warning: `expand_scale()` is deprecated; use `expansion()` instead.
446 | 
447 | ``` r
448 | pC <- p
449 | p
450 | ```
451 | 
452 | ![](pb_files/figure-gfm/unnamed-chunk-20-1.png)<!-- -->
453 | 
454 | ## Illustrate bootstrap sampling distribution of the mean
455 | 
456 | Compute confidence interval and other quantities.
457 | 
458 | Here and in the rest of the tutorial, we compute bootstrap CI using
459 | `quantile(type = 6)` of the bootstrap distribution. This is recommended
460 | in this article:
461 | 
462 | Hesterberg, Tim C. “What Teachers Should Know About the Bootstrap:
463 | Resampling in the Undergraduate Statistics Curriculum.” The American
464 | Statistician 69, no. 4 (October 2, 2015): 371–86.
465 | <https://doi.org/10.1080/00031305.2015.1089789>.
466 | 
467 | Rand Wilcox uses a different approach. See for instance
468 | `help(onesampb)`, in which the bounds of the CI are defined using the
469 | bootstrap distribution `bvec`, `alpha` (say = 0.05) and `nboot` (say =
470 | 1,000 bootstrap samples):
471 | 
472 | `low <- round((alpha/2)*nboot)` `up <- nboot-low` `low <- low+1`
473 | `ci_lower_bound <- bvec[low]` `ci_upper_bound <- bvec[up]`
474 | 
475 | In practice it is unclear if these choices make any difference. What we
476 | know is that with `nboot` large enough, the choice of quantile method
477 | should make virtually no difference.
478 | 
479 | ``` r
480 | # bootstrap means
481 | bootm <- apply(bootsamp, 1, mean)
482 | # confidence interval
483 | bootci <- quantile(bootm, probs = c(0.025, 0.975), type = 6)
484 | # bootstrap estimation of the standard error
485 | bootsamp.sd <- sd(bootsamp)
486 | # P value
487 | pv <- mean(bootm < mu) # + .5*mean(bootsamp==mu)
488 | pv <- 2 * min(c(pv, 1-pv))
489 | ```
490 | 
491 | Bootstrap 95% CI = \[1.02, 2.38\]  
492 | Bootstrap estimate of the SEM = 1.86  
493 | Bootstrap *P* value (hypothesis = 2) = 0.276
494 | 
495 | Alternatively, we could compute a highest density interval (HDI):
496 | 
497 | ``` r
498 | require(HDInterval)
499 | boothdi <- HDInterval::hdi(bootm)
500 | ```
501 | 
502 | We illustrate the distribution of the bootstrap samples, from which we
503 | derive four elements:
504 | 
505 | -   confidence/compatibility interval
506 | 
507 | -   p value
508 | 
509 | -   bootstrap estimate of the standard error (SE)
510 | 
511 | -   bootstrap estimate of bias
512 | 
513 | The distribution is also our best estimate of the shape the sampling
514 | distribution of the median, given the data and our model.
515 | 
516 | ### Make data frame
517 | 
518 | ``` r
519 | df <- as_tibble(with(density(bootm),data.frame(x,y)))
520 | 
521 | df.pv <- tibble(x = df$x[df$x>mu],
522 |                 y = df$y[df$x>mu])
523 | ```
524 | 
525 | ### Figure
526 | 
527 | ``` r
528 | p <- ggplot(df, aes(x = x, y = y)) + theme_gar +
529 |       # geom_line(stat = "density") +
530 |   labs(x = "Bootstrap means",
531 |        y = "Density") +
532 |   theme(axis.text.y = element_blank(),
533 |         axis.ticks.y = element_blank()) +
534 |   # P value
535 |   geom_area(data = df.pv,
536 |             aes(x = x, y = y),
537 |             fill = "red", alpha = .2) +
538 |   # density
539 |   geom_line(data = df, size = 1) +
540 |   # Null value
541 |   geom_segment(x = mu,
542 |                xend = mu,
543 |                y = 0,
544 |                yend = df$y[which.min(abs(df$x-mu))],
545 |                size = 1,
546 |                colour = "black",
547 |                linetype = "dashed") +
548 |   # confidence interval ----------------------
549 |   geom_segment(x = bootci[1], xend = bootci[2],
550 |                y = 0, yend = 0,
551 |                lineend = "round", size = 2, colour = "black") +
552 |   annotate(geom = "label", x = bootci[1]+0.15, y = 0.07, size = 5,
553 |              colour = "white", fill = "black", fontface = "bold",
554 |              label = paste("L = ", round(bootci[1], digits = 2))) +
555 |   annotate(geom = "label", x = bootci[2]-0.15, y = 0.07, size = 5,
556 |              colour = "white", fill = "black", fontface = "bold",
557 |              label = paste("U = ", round(bootci[2], digits = 2))) +
558 |   # sample mean: vertical line + label
559 |   geom_vline(xintercept = mean(samp),
560 |              linetype = 'solid') +
561 |   annotate(geom = "label", x = 2.3, y = 1.2, size = 7,
562 |              colour = "white", fill = "black", fontface = "bold",
563 |              label = paste("Sample mean = ", round(samp.m, digits = 2))) +
564 |   # vertical line marking bootstrap mean
565 |   # geom_vline(xintercept = mean(bootsamp),
566 |   #            linetype = 'solid')
567 |   # SEM label + segment
568 |   annotate(geom = "label", x = 2.5, y = 0.7, size  = 7, 
569 |              colour = "white", fill = "grey", fontface = "bold",
570 |              label = paste("SD =",round(bootsamp.sd, digits = 2),"= SEM")) + 
571 |   geom_segment(x = 1.25, 
572 |                xend = 1.85,
573 |                y = 0.7, yend = 0.7,
574 |                arrow = arrow(type = "closed", 
575 |                              length = unit(0.25, "cm"),
576 |                              ends = "both"),
577 |                colour = "grey", size = 1) +
578 |   # P value
579 |   geom_segment(x = 2.6, xend = 2.1, y = 0.4, yend = 0.2,
580 |                arrow = arrow(type = "closed", 
581 |                              length = unit(0.25, "cm")),
582 |                colour = "grey30", size = 1) +
583 |   annotate(geom = "label", x = 2.6, y = 0.4, size  = 7, 
584 |              colour = "white", fill = "red", fontface = "bold",
585 |              label = "P value / 2")
586 |   
587 | p
588 | ```
589 | 
590 | ![](pb_files/figure-gfm/unnamed-chunk-24-1.png)<!-- -->
591 | 
592 | ``` r
593 | pD <- p
594 | ```
595 | 
596 | # Summary figure
597 | 
598 | ``` r
599 | p1 <- cowplot::plot_grid(pA, pB,
600 |                     labels = c("A", "B"),
601 |                     ncol = 2,
602 |                     nrow = 1,
603 |                     rel_widths = c(1, 4), 
604 |                     label_size = 20, 
605 |                     hjust = -0.5, 
606 |                     scale=.95,
607 |                     align = "h")
608 | 
609 | p2 <- cowplot::plot_grid(pC, pD,
610 |                     labels = c("C", "D"),
611 |                     ncol = 2,
612 |                     nrow = 1,
613 |                     rel_widths = c(2, 3), 
614 |                     label_size = 20, 
615 |                     hjust = -0.5, 
616 |                     scale=.95,
617 |                     align = "h")
618 | 
619 | cowplot::plot_grid(p1, p2,
620 |                     labels = c("", ""),
621 |                     ncol = 1,
622 |                     nrow = 2,
623 |                     rel_heights = c(1, 1), 
624 |                     label_size = 20, 
625 |                     hjust = -0.5, 
626 |                     scale=.95,
627 |                     align = "h")
628 | 
629 | # save figure
630 | ggsave(filename=('./figures/figure_pb.pdf'),width=15,height=15)
631 | ggsave(filename=('./figures/figure1.pdf'),width=15,height=15)
632 | ```
633 | 


--------------------------------------------------------------------------------
/docs/pb_files/figure-gfm/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pb_files/figure-gfm/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/docs/pb_files/figure-gfm/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pb_files/figure-gfm/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/docs/pb_files/figure-gfm/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pb_files/figure-gfm/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/docs/pb_files/figure-gfm/unnamed-chunk-20-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pb_files/figure-gfm/unnamed-chunk-20-1.png


--------------------------------------------------------------------------------
/docs/pb_files/figure-gfm/unnamed-chunk-24-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pb_files/figure-gfm/unnamed-chunk-24-1.png


--------------------------------------------------------------------------------
/docs/pb_files/figure-gfm/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pb_files/figure-gfm/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/docs/pb_files/figure-gfm/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pb_files/figure-gfm/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/docs/pc.md:
--------------------------------------------------------------------------------
  1 | Percentage correct example
  2 | ================
  3 | Guillaume A. Rousselet
  4 | 2022-06-13
  5 | 
  6 | # Dependencies
  7 | 
  8 | ``` r
  9 | library(tibble)
 10 | library(ggplot2)
 11 | # library(cowplot)
 12 | source("./functions/theme_gar.txt")
 13 | source("./functions/Rallfun-v40.txt")
 14 | ```
 15 | 
 16 | After taking the mean across trials, correct/incorrect data are often
 17 | wrongly assumed to be normally distributed. Instead, these data are
 18 | always positive and bounded, and are better modelled using a beta
 19 | distribution (Kruschke, 2014). Similarly, standard methods are
 20 | inappropriate for ordinal data, like Likert scale results, which are too
 21 | often treated as metric (Bürkner & Vuorre, 2019; Liddell & Kruschke,
 22 | 2018).
 23 | 
 24 | # Make data and compute confidence intervals
 25 | 
 26 | ``` r
 27 | set.seed(6666)
 28 | n <- 15
 29 | norm_samp <- rnorm(n, mean = 75, sd = 10)
 30 | skew_samp <- rbeta(n, 10, 0.2) * 100
 31 | skew_samp2 <- sort(skew_samp)
 32 | skew_samp2[1] <- 60
 33 | # hist(rbeta(10000,10,0.2)*100, 50)
 34 | 
 35 | norm_stci <- t.test(norm_samp, conf.level = 0.95)$conf.int
 36 | skew_stci <- t.test(skew_samp, conf.level = 0.95)$conf.int
 37 | skew_stci2 <- t.test(skew_samp2, conf.level = 0.95)$conf.int
 38 | 
 39 | norm_pbci <- onesampb(norm_samp, est = mean)$ci
 40 | skew_pbci <- onesampb(skew_samp, est = mean)$ci
 41 | skew_pbci2 <- onesampb(skew_samp2, est = mean)$ci
 42 | ```
 43 | 
 44 | # Illustration: standard confidence interval
 45 | 
 46 | ``` r
 47 | set.seed(21) # for reproducible jitter
 48 | # raw data
 49 | df <- tibble(pc = c(norm_samp, skew_samp, skew_samp2),
 50 |              cond = rep(1:3, each = n))
 51 | # mean + confidence intervals
 52 | df2 <- tibble(pc = c(mean(norm_samp), mean(skew_samp), mean(skew_samp2)),
 53 |               cond = 1:3,
 54 |               ci_low = c(norm_stci[1], skew_stci[1], skew_stci2[1]),
 55 |               ci_up = c(norm_stci[2], skew_stci[2], skew_stci2[2]))
 56 | 
 57 | p <- ggplot(data = df, aes(x = cond, y = pc)) + theme_gar +
 58 |   # scatterplots
 59 |     geom_jitter(width = .1, alpha = 0.5, 
 60 |                 size = 3, shape = 21, fill = "grey", colour = "black") +
 61 |   scale_x_continuous(breaks = c(1,2,3)) +
 62 |   # confidence intervals
 63 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 
 64 |               width=.05, size=0.75) + 
 65 |   geom_point(data = df2, aes(x=cond, y=pc), size=3) +
 66 |    theme(panel.grid.minor.x = element_blank()) +
 67 |   labs(x = "Conditions", y = "Percent correct") +
 68 |   ggtitle("Standard confidence intervals") 
 69 | p
 70 | ```
 71 | 
 72 | ![](pc_files/figure-gfm/unnamed-chunk-3-1.png)<!-- -->
 73 | 
 74 | ``` r
 75 | pA <- p
 76 | ```
 77 | 
 78 | Sample means:
 79 | 
 80 | -   Condition 1 = 73.6
 81 | 
 82 | -   Condition 2 = 96.9
 83 | 
 84 | -   Condition 3 = 95.2
 85 | 
 86 | # Illustration: bootstrap confidence interval
 87 | 
 88 | ``` r
 89 | set.seed(21) # for reproducible jitter
 90 | # raw data
 91 | df <- tibble(pc = c(norm_samp, skew_samp, skew_samp2),
 92 |              cond = rep(1:3, each = n))
 93 | # mean + confidence intervals
 94 | df2 <- tibble(pc = c(mean(norm_samp), mean(skew_samp), mean(skew_samp2)),
 95 |               cond = 1:3,
 96 |               ci_low = c(norm_pbci[1], skew_pbci[1], skew_pbci2[1]),
 97 |               ci_up = c(norm_pbci[2], skew_pbci[2], skew_pbci2[2]))
 98 | 
 99 | p <- ggplot(data = df, aes(x = cond, y = pc)) + theme_gar +
100 |   # scatterplots
101 |     geom_jitter(width = .1, alpha = 0.5, 
102 |                 size = 3, shape = 21, fill = "grey", colour = "black") +
103 |   scale_x_continuous(breaks = c(1,2,3)) +
104 |   # confidence intervals
105 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 
106 |               width=.05, size=0.75) + 
107 |   geom_point(data = df2, aes(x=cond, y=pc), size=3) +
108 |    theme(panel.grid.minor.x = element_blank()) +
109 |   labs(x = "Conditions", y = "Percent correct") +
110 |   ggtitle("Bootstrap confidence intervals") 
111 | p
112 | ```
113 | 
114 | ![](pc_files/figure-gfm/unnamed-chunk-4-1.png)<!-- -->
115 | 
116 | ``` r
117 | pB <- p
118 | ```
119 | 
120 | # Summary figure
121 | 
122 | ``` r
123 | cowplot::plot_grid(pA, pB,
124 |                     labels = c("A", "B"),
125 |                     ncol = 2,
126 |                     nrow = 1,
127 |                     rel_widths = c(1, 1), 
128 |                     label_size = 20, 
129 |                     hjust = -0.5, 
130 |                     scale=.95,
131 |                     align = "h")
132 | 
133 | # save figure
134 | ggsave(filename=('./figures/figure_pc.pdf'),width=10,height=5)
135 | ggsave(filename=('./figures/figure2.pdf'),width=10,height=5)
136 | ```
137 | 
138 | # References
139 | 
140 | Bürkner, Paul-Christian, and Matti Vuorre. “Ordinal Regression Models in
141 | Psychology: A Tutorial.” Advances in Methods and Practices in
142 | Psychological Science 2, no. 1 (March 1, 2019): 77–101.
143 | <https://doi.org/10.1177/2515245918823199>.
144 | 
145 | Kruschke, John. Doing Bayesian Data Analysis - 2nd Edition. Academic
146 | Press, 2014.
147 | <https://www.elsevier.com/books/doing-bayesian-data-analysis/kruschke/978-0-12-405888-0>.
148 | 
149 | Liddell, Torrin M., and John K. Kruschke. “Analyzing Ordinal Data with
150 | Metric Models: What Could Possibly Go Wrong?” Journal of Experimental
151 | Social Psychology 79 (November 1, 2018): 328–48.
152 | <https://doi.org/10.1016/j.jesp.2018.08.009>.
153 | 


--------------------------------------------------------------------------------
/docs/pc_files/figure-gfm/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pc_files/figure-gfm/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/docs/pc_files/figure-gfm/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/pc_files/figure-gfm/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/docs/ptb_files/figure-gfm/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/docs/ptb_files/figure-gfm/unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/docs/ptb_files/figure-gfm/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/docs/ptb_files/figure-gfm/unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-17-1.png


--------------------------------------------------------------------------------
/docs/ptb_files/figure-gfm/unnamed-chunk-18-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-18-1.png


--------------------------------------------------------------------------------
/docs/ptb_files/figure-gfm/unnamed-chunk-19-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-19-1.png


--------------------------------------------------------------------------------
/docs/ptb_files/figure-gfm/unnamed-chunk-20-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-20-1.png


--------------------------------------------------------------------------------
/docs/ptb_files/figure-gfm/unnamed-chunk-21-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-21-1.png


--------------------------------------------------------------------------------
/docs/ptb_files/figure-gfm/unnamed-chunk-22-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-22-1.png


--------------------------------------------------------------------------------
/docs/ptb_files/figure-gfm/unnamed-chunk-25-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-25-1.png


--------------------------------------------------------------------------------
/docs/ptb_files/figure-gfm/unnamed-chunk-26-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-26-1.png


--------------------------------------------------------------------------------
/docs/ptb_files/figure-gfm/unnamed-chunk-29-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-29-1.png


--------------------------------------------------------------------------------
/docs/ptb_files/figure-gfm/unnamed-chunk-30-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-30-1.png


--------------------------------------------------------------------------------
/docs/ptb_files/figure-gfm/unnamed-chunk-31-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-31-1.png


--------------------------------------------------------------------------------
/docs/ptb_files/figure-gfm/unnamed-chunk-32-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-32-1.png


--------------------------------------------------------------------------------
/docs/ptb_files/figure-gfm/unnamed-chunk-35-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-35-1.png


--------------------------------------------------------------------------------
/docs/ptb_files/figure-gfm/unnamed-chunk-36-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/ptb_files/figure-gfm/unnamed-chunk-36-1.png


--------------------------------------------------------------------------------
/docs/sampdist.md:
--------------------------------------------------------------------------------
  1 | Bootstrap sampling distributions
  2 | ================
  3 | Guillaume A. Rousselet
  4 | 2022-06-13
  5 | 
  6 | # Dependencies
  7 | 
  8 | ``` r
  9 | library(tibble)
 10 | library(ggplot2)
 11 | # library(cowplot)
 12 | source("./functions/theme_gar.txt")
 13 | # source("./functions/Rallfun-v40.txt")
 14 | ```
 15 | 
 16 | ``` r
 17 | sessionInfo()
 18 | ```
 19 | 
 20 |     ## R version 4.2.0 (2022-04-22)
 21 |     ## Platform: x86_64-apple-darwin17.0 (64-bit)
 22 |     ## Running under: macOS Catalina 10.15.7
 23 |     ## 
 24 |     ## Matrix products: default
 25 |     ## BLAS:   /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
 26 |     ## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
 27 |     ## 
 28 |     ## locale:
 29 |     ## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
 30 |     ## 
 31 |     ## attached base packages:
 32 |     ## [1] stats     graphics  grDevices utils     datasets  methods   base     
 33 |     ## 
 34 |     ## other attached packages:
 35 |     ## [1] ggplot2_3.3.6 tibble_3.1.7 
 36 |     ## 
 37 |     ## loaded via a namespace (and not attached):
 38 |     ##  [1] rstudioapi_0.13  knitr_1.39       magrittr_2.0.3   tidyselect_1.1.2
 39 |     ##  [5] munsell_0.5.0    colorspace_2.0-3 R6_2.5.1         rlang_1.0.2     
 40 |     ##  [9] fastmap_1.1.0    fansi_1.0.3      dplyr_1.0.9      stringr_1.4.0   
 41 |     ## [13] tools_4.2.0      grid_4.2.0       gtable_0.3.0     xfun_0.31       
 42 |     ## [17] utf8_1.2.2       cli_3.3.0        withr_2.5.0      htmltools_0.5.2 
 43 |     ## [21] ellipsis_0.3.2   yaml_2.3.5       digest_0.6.29    lifecycle_1.0.1 
 44 |     ## [25] crayon_1.5.1     purrr_0.3.4      vctrs_0.4.1      glue_1.6.2      
 45 |     ## [29] evaluate_0.15    rmarkdown_2.14   stringi_1.7.6    compiler_4.2.0  
 46 |     ## [33] pillar_1.7.0     generics_0.1.2   scales_1.2.0     pkgconfig_2.0.3
 47 | 
 48 | # Example 1: samples from lognormal distribution
 49 | 
 50 | We sample from a lognormal distribution, because, you know, life is
 51 | [lognormal](https://stat.ethz.ch/~stahel/lognormal/lnboard/brochure.html).
 52 | 
 53 | ## Illustrate population
 54 | 
 55 | For comparison, show normal distribution with the same mean and sd as
 56 | the default lognormal (meanlog = 0, sdlog = 1). The [mean of the
 57 | lognormal
 58 | distribution](https://en.wikipedia.org/wiki/Log-normal_distribution)
 59 | with meanlog = 0 and sdlog = 1 is `exp(meanlog + sdlog^2/2)` = exp(1/2).
 60 | The variance is `(exp(sdlog^2)-1) * exp(2*meanlog + sdlog^2)` =
 61 | (exp(1)-1) \* exp(1).
 62 | 
 63 | ### Check that the two distributions have the same mean and variance.
 64 | 
 65 | ``` r
 66 | set.seed(777)
 67 | pop1 <- rnorm(1000000, 
 68 |              mean = exp(1/2), 
 69 |              sd = sqrt((exp(1)-1) * exp(1))
 70 |              )
 71 | #mean(pop1)
 72 | #var(pop1)
 73 | 
 74 | pop2 <- rlnorm(1000000, meanlog = 0, sdlog = 1)
 75 | #mean(pop2)
 76 | #var(pop2)
 77 | 
 78 | pop.tm <- mean(pop2, trim = 0.2) # save population trimmed mean for figure
 79 | ```
 80 | 
 81 | Normal population:
 82 | 
 83 | -   Mean = 1.65
 84 | 
 85 | -   Variance = 4.68
 86 | 
 87 | Lognormal population:
 88 | 
 89 | -   Mean = 1.65
 90 | 
 91 | -   Variance = 4.64
 92 | 
 93 | ### Make figure
 94 | 
 95 | ``` r
 96 | x.n <- seq(-5, 7, 0.001)
 97 | x.ln <- seq(0, 7, 0.001)
 98 | 
 99 | y.n <- dnorm(x.n, mean = exp(1/2), sd = sqrt((exp(1)-1) * exp(1)))
100 | y.ln <- dlnorm(x.ln)
101 | 
102 | df <- tibble(x = c(x.n, x.ln),
103 |              y = c(y.n, y.ln),
104 |              dist = c(rep("Normal", length(x.n)), rep("Lognormal", length(x.ln)))
105 |              ) 
106 | 
107 | p <- ggplot(df, aes(x = x, y = y, colour = dist)) + theme_gar +
108 |   geom_line(size = 1.5) +
109 |   labs(x = "Values", y = "Density") +
110 |   scale_colour_manual(name = "", values = c("Lognormal" = "orange",
111 |                                             "Normal" = "purple")) +
112 |   theme(legend.position = c(0.15, 0.86),
113 |         axis.text.y = element_blank(),
114 |         axis.ticks.y = element_blank())
115 | p
116 | ```
117 | 
118 | ![](sampdist_files/figure-gfm/unnamed-chunk-4-1.png)<!-- -->
119 | 
120 | ### Save figure
121 | 
122 | ``` r
123 | # save figure
124 | ggsave(filename=('./figures/figure_lognormal_pop.pdf'),width=7,height=5)
125 | ggsave(filename=('./figures/figure3.pdf'),width=7,height=5)
126 | ```
127 | 
128 | ## Illustrate sampling distributions
129 | 
130 | We draw 50,000 samples for different sizes from our population. Each of
131 | these samples is like an experiment. For each experiment, we compute a
132 | statistics of interest, here the 20% trimmed mean. In each plot, the
133 | vertical dashed line marks the population 20% trimmed mean that we are
134 | trying to estimate.
135 | 
136 | ### n = 20
137 | 
138 | ``` r
139 | # set.seed(777)
140 | n <- 20 # sample size
141 | nsamp <- 50000 # number of samples (experiments)
142 | # sampling distribution of the mean
143 | dist.samp <- apply(matrix(rlnorm(n*nsamp), nrow = nsamp), 1, mean, trim = 0.2)
144 | v <- enframe(dist.samp, name = NULL) 
145 | 
146 | p <- ggplot(v, aes(x = value)) + theme_gar +
147 |         geom_vline(xintercept = pop.tm, linetype = 'longdash', colour = "black") +
148 |         geom_line(stat = 'density', size = 1) +  
149 |   theme(plot.margin = unit(c(0, 0, 0.5, 0), "cm"),
150 |         axis.text.y = element_blank(),
151 |         axis.ticks.y = element_blank()) +
152 |   labs(x = "Sample trimmed means", y = "Density") +
153 |   ggtitle("Sampling distribution: n = 20") +
154 |   coord_cartesian(xlim = c(0, 4))
155 | p.sampdist1 <- p 
156 | p
157 | ```
158 | 
159 | ![](sampdist_files/figure-gfm/unnamed-chunk-6-1.png)<!-- -->
160 | 
161 | ### n = 30
162 | 
163 | ``` r
164 | # set.seed(777)
165 | n <- 30 # sample size
166 | nsamp <- 50000 # number of samples (experiments)
167 | # sampling distribution of the mean
168 | dist.samp <- apply(matrix(rlnorm(n*nsamp), nrow = nsamp), 1, mean, trim = 0.2)
169 | v <- enframe(dist.samp, name = NULL) 
170 | 
171 | p <- ggplot(v, aes(x = value)) + theme_gar +
172 |         geom_vline(xintercept = pop.tm, linetype = 'longdash', colour = "black") +
173 |         geom_line(stat = 'density', size = 1) +
174 |   theme(plot.margin = unit(c(0, 0, 0.5, 0), "cm"),
175 |         axis.text.y = element_blank(),
176 |         axis.ticks.y = element_blank()) +
177 |    labs(x = "Sample trimmed means", y = "Density") +
178 |   coord_cartesian(xlim = c(0, 4)) +   
179 |   ggtitle("Sampling distribution: n = 30")
180 | p.sampdist2 <- p 
181 | p
182 | ```
183 | 
184 | ![](sampdist_files/figure-gfm/unnamed-chunk-7-1.png)<!-- -->
185 | 
186 | ### n = 50
187 | 
188 | ``` r
189 | # set.seed(777)
190 | n <- 50 # sample size
191 | nsamp <- 50000 # number of samples (experiments)
192 | # sampling distribution of the mean
193 | dist.samp <- apply(matrix(rlnorm(n*nsamp), nrow = nsamp), 1, mean, trim = 0.2)
194 | v <- enframe(dist.samp, name = NULL) 
195 | 
196 | p <- ggplot(v, aes(x = value)) + theme_gar +
197 |         geom_vline(xintercept = pop.tm, linetype = 'longdash', colour = "black") +
198 |         geom_line(stat = 'density', size = 1) +
199 |   theme(plot.margin = unit(c(0, 0, 0.5, 0), "cm"),
200 |         axis.text.y = element_blank(),
201 |         axis.ticks.y = element_blank()) +
202 |    labs(x = "Sample trimmed means", y = "Density") +
203 |   coord_cartesian(xlim = c(0, 4)) +  
204 |   ggtitle("Sampling distribution: n = 50")
205 | p.sampdist3 <- p 
206 | p
207 | ```
208 | 
209 | ![](sampdist_files/figure-gfm/unnamed-chunk-8-1.png)<!-- -->
210 | 
211 | ## Illustrate 4 samples
212 | 
213 | In each plot, the vertical dashed line marks the population 20% trimmed
214 | mean, whereas the vertical continuous line marks the sample 20% trimmed
215 | mean.
216 | 
217 | ### Define function for horizontal jitter plot
218 | 
219 | ``` r
220 | samp_jitter <- function(df, samp.tm){
221 | p <- ggplot(data = df, aes(x = res, y = cond)) + theme_gar +
222 |   # scatterplots
223 |     geom_jitter(height = .05, alpha = 0.5, 
224 |                 size = 3, shape = 21, fill = "grey", colour = "black") +
225 |   theme(axis.ticks.y = element_blank(),
226 |         axis.text.y = element_blank(),
227 |         axis.title = element_blank(),
228 |         panel.grid.minor.x = element_blank(),
229 |         plot.margin = unit(c(0, 0, 0, 0), "cm")) +
230 |   scale_x_continuous(breaks = seq(0,12,2)) +
231 |   scale_y_continuous(breaks = 1) +
232 |   coord_cartesian(xlim = c(0, 11)) +
233 |   # sample trimmed mean
234 |   geom_segment(aes(x = samp.tm, xend = samp.tm,
235 |                    y = 0.9, yend = 1.1)) +
236 |   # population trimmed mean
237 |   geom_segment(aes(x = pop.tm, xend = pop.tm,
238 |                    y = 0.9, yend = 1.1),
239 |                linetype = 'longdash', lineend = 'round', colour = "black") +
240 |   labs(x = "Values")
241 | p
242 | }
243 | ```
244 | 
245 | ### n = 20
246 | 
247 | Generate all samples
248 | 
249 | ``` r
250 | # set.seed(777)
251 | n <- 20 # sample size 
252 | samp20 <- matrix(rlnorm(n*4), nrow = 4) # get sample 
253 | samp20.tm <- apply(samp20, 1, mean, trim = 0.2)
254 | ```
255 | 
256 | Illustrate sample 1
257 | 
258 | ``` r
259 | set.seed(21) # reproducible jitter
260 | S <- 1
261 | df <- tibble(res = samp20[S,], cond = rep(1, n))
262 | p <- samp_jitter(df, samp20.tm[S])
263 | p.samp20_1 <- p
264 | p
265 | ```
266 | 
267 | ![](sampdist_files/figure-gfm/unnamed-chunk-11-1.png)<!-- -->
268 | 
269 | Illustrate sample 2
270 | 
271 | ``` r
272 | set.seed(21) # reproducible jitter
273 | S <- 2
274 | df <- tibble(res = samp20[S,], cond = rep(1, n))
275 | p <- samp_jitter(df, samp20.tm[S])
276 | p.samp20_2 <- p
277 | p
278 | ```
279 | 
280 | ![](sampdist_files/figure-gfm/unnamed-chunk-12-1.png)<!-- -->
281 | 
282 | Illustrate sample 3
283 | 
284 | ``` r
285 | set.seed(21) # reproducible jitter
286 | S <- 3
287 | df <- tibble(res = samp20[S,], cond = rep(1, n))
288 | p <- samp_jitter(df, samp20.tm[S])
289 | p.samp20_3 <- p
290 | p
291 | ```
292 | 
293 | ![](sampdist_files/figure-gfm/unnamed-chunk-13-1.png)<!-- -->
294 | 
295 | Illustrate sample 4
296 | 
297 | ``` r
298 | set.seed(21) # reproducible jitter
299 | S <- 4
300 | df <- tibble(res = samp20[S,], cond = rep(1, n))
301 | p <- samp_jitter(df, samp20.tm[S])
302 | p.samp20_4 <- p
303 | p
304 | ```
305 | 
306 | ![](sampdist_files/figure-gfm/unnamed-chunk-14-1.png)<!-- -->
307 | 
308 | ### n = 30
309 | 
310 | Generate all samples
311 | 
312 | ``` r
313 | # set.seed(777)
314 | n <- 30 # sample size 
315 | samp30 <- matrix(rlnorm(n*4), nrow = 4) # get sample 
316 | samp30.tm <- apply(samp30, 1, mean, trim = 0.2)
317 | ```
318 | 
319 | Illustrate sample 1
320 | 
321 | ``` r
322 | set.seed(21) # reproducible jitter
323 | S <- 1
324 | df <- tibble(res = samp30[S,], cond = rep(1, n))
325 | p <- samp_jitter(df, samp30.tm[S])
326 | p.samp30_1 <- p
327 | p
328 | ```
329 | 
330 | ![](sampdist_files/figure-gfm/unnamed-chunk-16-1.png)<!-- -->
331 | 
332 | Illustrate sample 2
333 | 
334 | ``` r
335 | set.seed(21) # reproducible jitter
336 | S <- 2
337 | df <- tibble(res = samp30[S,], cond = rep(1, n))
338 | p <- samp_jitter(df, samp30.tm[S])
339 | p.samp30_2 <- p
340 | p
341 | ```
342 | 
343 | ![](sampdist_files/figure-gfm/unnamed-chunk-17-1.png)<!-- -->
344 | 
345 | Illustrate sample 3
346 | 
347 | ``` r
348 | set.seed(21) # reproducible jitter
349 | S <- 3
350 | df <- tibble(res = samp30[S,], cond = rep(1, n))
351 | p <- samp_jitter(df, samp30.tm[S])
352 | p.samp30_3 <- p
353 | p
354 | ```
355 | 
356 | ![](sampdist_files/figure-gfm/unnamed-chunk-18-1.png)<!-- -->
357 | 
358 | Illustrate sample 4
359 | 
360 | ``` r
361 | set.seed(21) # reproducible jitter
362 | S <- 4
363 | df <- tibble(res = samp30[S,], cond = rep(1, n))
364 | p <- samp_jitter(df, samp30.tm[S])
365 | p.samp30_4 <- p
366 | p
367 | ```
368 | 
369 | ![](sampdist_files/figure-gfm/unnamed-chunk-19-1.png)<!-- -->
370 | 
371 | ### n = 50
372 | 
373 | Generate all samples
374 | 
375 | ``` r
376 | # set.seed(777)
377 | n <- 50 # sample size 
378 | samp50 <- matrix(rlnorm(n*4), nrow = 4) # get sample 
379 | samp50.tm <- apply(samp50, 1, mean, trim = 0.2)
380 | ```
381 | 
382 | Illustrate sample 1
383 | 
384 | ``` r
385 | set.seed(21) # reproducible jitter
386 | S <- 1
387 | df <- tibble(res = samp50[S,], cond = rep(1, n))
388 | p <- samp_jitter(df, samp50.tm[S])
389 | p.samp50_1 <- p
390 | p
391 | ```
392 | 
393 | ![](sampdist_files/figure-gfm/unnamed-chunk-21-1.png)<!-- -->
394 | 
395 | Illustrate sample 2
396 | 
397 | ``` r
398 | set.seed(21) # reproducible jitter
399 | S <- 2
400 | df <- tibble(res = samp50[S,], cond = rep(1, n))
401 | p <- samp_jitter(df, samp50.tm[S])
402 | p.samp50_2 <- p
403 | p
404 | ```
405 | 
406 | ![](sampdist_files/figure-gfm/unnamed-chunk-22-1.png)<!-- -->
407 | 
408 | Illustrate sample 3
409 | 
410 | ``` r
411 | set.seed(21) # reproducible jitter
412 | S <- 3
413 | df <- tibble(res = samp50[S,], cond = rep(1, n))
414 | p <- samp_jitter(df, samp50.tm[S])
415 | p.samp50_3 <- p
416 | p
417 | ```
418 | 
419 | ![](sampdist_files/figure-gfm/unnamed-chunk-23-1.png)<!-- -->
420 | 
421 | Illustrate sample 4
422 | 
423 | ``` r
424 | set.seed(21) # reproducible jitter
425 | S <- 4
426 | df <- tibble(res = samp50[S,], cond = rep(1, n))
427 | p <- samp_jitter(df, samp50.tm[S])
428 | p.samp50_4 <- p
429 | p
430 | ```
431 | 
432 | ![](sampdist_files/figure-gfm/unnamed-chunk-24-1.png)<!-- -->
433 | 
434 | ## Illustrate 4 bootstrap sampling distributions
435 | 
436 | We estimate the sampling distributions from the samples.
437 | 
438 | ### Define density plot function
439 | 
440 | ``` r
441 | dens_plot <- function(dist.samp, samp.tm){
442 |   v <- enframe(dist.samp, name = NULL) 
443 | p <- ggplot(v, aes(x = value)) + theme_gar +
444 |   geom_vline(xintercept = samp.tm, linetype = 'solid', colour = "black") +      
445 |   geom_vline(xintercept = pop.tm, linetype = 'longdash', colour = "black") +
446 |         geom_line(stat = 'density', size = 1) +   
447 |   theme(axis.text.y = element_blank(),
448 |         axis.title.y = element_blank(),
449 |         axis.ticks.y = element_blank(),
450 |         plot.margin = unit(c(0, 0, 0.5, 0), "cm")) +
451 |   labs(x = "Bootstrap estimates") +
452 |   # ggtitle("Sampling distribution: n = 20") +
453 |   coord_cartesian(xlim = c(0, 4))
454 | p
455 | }
456 | ```
457 | 
458 | ### n = 20
459 | 
460 | Sample 1
461 | 
462 | ``` r
463 | # set.seed(777)
464 | S <- 1
465 | n <- 20
466 | nboot <- 5000 # number of bootstrap samples
467 | # bootstrap distribution
468 | boot.samp <- apply(matrix(sample(samp20[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
469 | p <- dens_plot(boot.samp, samp20.tm[S])
470 | p.boot20_1 <- p
471 | p
472 | ```
473 | 
474 | ![](sampdist_files/figure-gfm/unnamed-chunk-26-1.png)<!-- -->
475 | 
476 | Sample 2
477 | 
478 | ``` r
479 | # set.seed(777)
480 | S <- 2
481 | n <- 20
482 | nboot <- 5000 # number of bootstrap samples
483 | # bootstrap distribution
484 | boot.samp <- apply(matrix(sample(samp20[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
485 | p <- dens_plot(boot.samp, samp20.tm[S])
486 | p.boot20_2 <- p
487 | p
488 | ```
489 | 
490 | ![](sampdist_files/figure-gfm/unnamed-chunk-27-1.png)<!-- -->
491 | 
492 | Sample 3
493 | 
494 | ``` r
495 | # set.seed(777)
496 | S <- 3
497 | n <- 20
498 | nboot <- 5000 # number of bootstrap samples
499 | # bootstrap distribution
500 | boot.samp <- apply(matrix(sample(samp20[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
501 | p <- dens_plot(boot.samp, samp20.tm[S])
502 | p.boot20_3 <- p
503 | p
504 | ```
505 | 
506 | ![](sampdist_files/figure-gfm/unnamed-chunk-28-1.png)<!-- -->
507 | 
508 | Sample 4
509 | 
510 | ``` r
511 | # set.seed(777)
512 | S <- 4
513 | n <- 20
514 | nboot <- 5000 # number of bootstrap samples
515 | # bootstrap distribution
516 | boot.samp <- apply(matrix(sample(samp20[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
517 | p <- dens_plot(boot.samp, samp20.tm[S])
518 | p.boot20_4 <- p
519 | p
520 | ```
521 | 
522 | ![](sampdist_files/figure-gfm/unnamed-chunk-29-1.png)<!-- -->
523 | 
524 | ### n = 30
525 | 
526 | Sample 1
527 | 
528 | ``` r
529 | # set.seed(777)
530 | S <- 1
531 | n <- 30
532 | nboot <- 5000 # number of bootstrap samples
533 | # bootstrap distribution
534 | boot.samp <- apply(matrix(sample(samp30[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
535 | p <- dens_plot(boot.samp, samp30.tm[S])
536 | p.boot30_1 <- p
537 | p
538 | ```
539 | 
540 | ![](sampdist_files/figure-gfm/unnamed-chunk-30-1.png)<!-- -->
541 | 
542 | Sample 2
543 | 
544 | ``` r
545 | # set.seed(777)
546 | S <- 2
547 | nboot <- 5000 # number of bootstrap samples
548 | # bootstrap distribution
549 | boot.samp <- apply(matrix(sample(samp30[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
550 | p <- dens_plot(boot.samp, samp30.tm[S])
551 | p.boot30_2 <- p
552 | p
553 | ```
554 | 
555 | ![](sampdist_files/figure-gfm/unnamed-chunk-31-1.png)<!-- -->
556 | 
557 | Sample 3
558 | 
559 | ``` r
560 | # set.seed(777)
561 | S <- 3
562 | nboot <- 5000 # number of bootstrap samples
563 | # bootstrap distribution
564 | boot.samp <- apply(matrix(sample(samp30[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
565 | p <- dens_plot(boot.samp, samp30.tm[S])
566 | p.boot30_3 <- p
567 | p
568 | ```
569 | 
570 | ![](sampdist_files/figure-gfm/unnamed-chunk-32-1.png)<!-- -->
571 | 
572 | Sample 4
573 | 
574 | ``` r
575 | # set.seed(777)
576 | S <- 4
577 | nboot <- 5000 # number of bootstrap samples
578 | # bootstrap distribution
579 | boot.samp <- apply(matrix(sample(samp30[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
580 | p <- dens_plot(boot.samp, samp30.tm[S])
581 | p.boot30_4 <- p
582 | p
583 | ```
584 | 
585 | ![](sampdist_files/figure-gfm/unnamed-chunk-33-1.png)<!-- -->
586 | 
587 | ### n = 50
588 | 
589 | Sample 1
590 | 
591 | ``` r
592 | # set.seed(777)
593 | S <- 1
594 | n <- 50
595 | nboot <- 5000 # number of bootstrap samples
596 | # bootstrap distribution
597 | boot.samp <- apply(matrix(sample(samp50[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
598 | p <- dens_plot(boot.samp, samp50.tm[S])
599 | p.boot50_1 <- p
600 | p
601 | ```
602 | 
603 | ![](sampdist_files/figure-gfm/unnamed-chunk-34-1.png)<!-- -->
604 | 
605 | Sample 2
606 | 
607 | ``` r
608 | # set.seed(777)
609 | S <- 2
610 | nboot <- 5000 # number of bootstrap samples
611 | # bootstrap distribution
612 | boot.samp <- apply(matrix(sample(samp50[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
613 | p <- dens_plot(boot.samp, samp50.tm[S])
614 | p.boot50_2 <- p
615 | p
616 | ```
617 | 
618 | ![](sampdist_files/figure-gfm/unnamed-chunk-35-1.png)<!-- -->
619 | 
620 | Sample 3
621 | 
622 | ``` r
623 | # set.seed(777)
624 | S <- 3
625 | nboot <- 5000 # number of bootstrap samples
626 | # bootstrap distribution
627 | boot.samp <- apply(matrix(sample(samp50[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
628 | p <- dens_plot(boot.samp, samp50.tm[S])
629 | p.boot50_3 <- p
630 | p
631 | ```
632 | 
633 | ![](sampdist_files/figure-gfm/unnamed-chunk-36-1.png)<!-- -->
634 | 
635 | Sample 4
636 | 
637 | ``` r
638 | # set.seed(777)
639 | S <- 4
640 | nboot <- 5000 # number of bootstrap samples
641 | # bootstrap distribution
642 | boot.samp <- apply(matrix(sample(samp50[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
643 | p <- dens_plot(boot.samp, samp50.tm[S])
644 | p.boot50_4 <- p
645 | p
646 | ```
647 | 
648 | ![](sampdist_files/figure-gfm/unnamed-chunk-37-1.png)<!-- -->
649 | 
650 | # Summary figure
651 | 
652 | ``` r
653 | # -------------------------------------------------
654 | p2.1 <- cowplot::plot_grid(p.samp20_1, p.boot20_1 + theme(axis.title = element_blank()),
655 |                     labels = c("1", "B"),
656 |                     label_size = 18,
657 |                     hjust = c(-0.5, 2.2),
658 |                     vjust = c(1.5, -4.3),
659 |                     ncol = 1, nrow = 2,
660 |                     rel_heights = c(1.5, 3))
661 | 
662 | p2.2 <- cowplot::plot_grid(p.samp20_2, p.boot20_2 + theme(axis.title = element_blank()),
663 |                     labels = c("2"),
664 |                     label_size = 18,
665 |                     ncol = 1, nrow = 2,
666 |                     rel_heights = c(1.5, 3))
667 | 
668 | p2.3 <- cowplot::plot_grid(p.samp20_3, p.boot20_3 + theme(axis.title = element_blank()),
669 |                     labels = c("3"),
670 |                     label_size = 18,
671 |                     ncol = 1, nrow = 2,
672 |                     rel_heights = c(1.5, 3))
673 | 
674 | p2.4 <- cowplot::plot_grid(p.samp20_4, p.boot20_4,
675 |                     labels = c("4"),
676 |                     label_size = 18,
677 |                     ncol = 1, nrow = 2,
678 |                     rel_heights = c(1.5, 3))
679 | 
680 | p2 <- cowplot::plot_grid(p.sampdist1,
681 |                          p2.1, p2.2, p2.3, p2.4,
682 |                     labels = c("A"),
683 |                     hjust = 0.7,
684 |                     label_size = 18,
685 |                     ncol = 1, 
686 |                     nrow = 5,
687 |                     align = "v",
688 |                     axis = "l",
689 |                     rel_heights = c(1, 1, 1, 1, 1))
690 | 
691 | # -------------------------------------------------
692 | p3.1 <- cowplot::plot_grid(p.samp30_1, p.boot30_1 + theme(axis.title = element_blank()),
693 |                     labels = c(""),
694 |                     ncol = 1, nrow = 2,
695 |                     rel_heights = c(1.5, 3))
696 | 
697 | p3.2 <- cowplot::plot_grid(p.samp30_2, p.boot30_2 + theme(axis.title = element_blank()),
698 |                     labels = c(""),
699 |                     ncol = 1, nrow = 2,
700 |                     rel_heights = c(1.5, 3))
701 | 
702 | p3.3 <- cowplot::plot_grid(p.samp30_3, p.boot30_3 + theme(axis.title = element_blank()),
703 |                     labels = c(""),
704 |                     ncol = 1, nrow = 2,
705 |                     rel_heights = c(1.5, 3))
706 | 
707 | p3.4 <- cowplot::plot_grid(p.samp30_4, p.boot30_4,
708 |                     labels = c(""),
709 |                     ncol = 1, nrow = 2,
710 |                     rel_heights = c(1.5, 3))
711 | 
712 | p3 <- cowplot::plot_grid(p.sampdist2,
713 |                          p3.1, p3.2, p3.3, p3.4,
714 |                     labels = c(""),
715 |                     ncol = 1,
716 |                     nrow = 5,
717 |                     align = "v",
718 |                     axis = "l",
719 |                     rel_heights = c(1, 1, 1, 1, 1))
720 | 
721 | # -------------------------------------------------
722 | p4.1 <- cowplot::plot_grid(p.samp50_1, p.boot50_1 + theme(axis.title = element_blank()),
723 |                     labels = c(""),
724 |                     ncol = 1, nrow = 2,
725 |                     rel_heights = c(1.5, 3))
726 | 
727 | p4.2 <- cowplot::plot_grid(p.samp50_2, p.boot50_2 + theme(axis.title = element_blank()),
728 |                     labels = c(""),
729 |                     ncol = 1, nrow = 2,
730 |                     rel_heights = c(1.5, 3))
731 | 
732 | p4.3 <- cowplot::plot_grid(p.samp50_3, p.boot50_3 + theme(axis.title = element_blank()),
733 |                     labels = c(""),
734 |                     ncol = 1, nrow = 2,
735 |                     rel_heights = c(1.5, 3))
736 | 
737 | p4.4 <- cowplot::plot_grid(p.samp50_4, p.boot50_4,
738 |                     labels = c(""),
739 |                     ncol = 1, nrow = 2,
740 |                     rel_heights = c(1.5, 3))
741 | 
742 | p4 <- cowplot::plot_grid(p.sampdist3,
743 |                          p4.1, p4.2, p4.3, p4.4,
744 |                     labels = c(""),
745 |                     ncol = 1,
746 |                     nrow = 5,
747 |                     align = "v",
748 |                     axis = "l",
749 |                     rel_heights = c(1, 1, 1, 1, 1))
750 | 
751 | cowplot::plot_grid(p2, p3, p4,
752 |                     labels = c(""),
753 |                     ncol = 3,
754 |                     nrow = 1,
755 |                     rel_widths = c(1, 1, 1), 
756 |                     label_size = 20, 
757 |                     hjust = -0.5, 
758 |                     scale=.95,
759 |                     align = "h")
760 | 
761 | # save figure
762 | ggsave(filename=('./figures/figure_sampdist_lognormal.pdf'),width=20,height=15)
763 | ggsave(filename=('./figures/figure4.pdf'),width=20,height=15)
764 | ```
765 | 


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-14-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-14-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-17-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-18-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-18-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-19-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-19-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-21-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-21-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-22-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-22-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-23-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-23-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-24-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-24-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-26-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-26-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-27-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-27-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-28-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-28-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-29-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-29-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-30-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-30-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-31-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-31-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-32-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-32-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-33-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-33-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-34-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-34-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-35-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-35-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-36-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-36-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-37-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-37-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/docs/sampdist_files/figure-gfm/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/GRousselet/bootstrap/73901f263c822c9c7fc00d8e0997d13f7034dd7e/docs/sampdist_files/figure-gfm/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/functions/corfun.txt:
--------------------------------------------------------------------------------
 1 | # Modify spear() to return only correlation coefficient
 2 | spearsim <- function(x,y){
 3 | # Compute Spearman's rho
 4 | corv <- cor(rank(x),rank(y))
 5 | corv
 6 | }
 7 | 
 8 | # modify corbsub() accordingly
 9 | corbsubsim <- function(isub,x,y,corfun,...){
10 | #
11 | #  Compute correlation for x[isub] and y[isub]
12 | #  isub is a vector of length n,
13 | #  a bootstrap sample from the sequence of integers
14 | #  1, 2, 3, ..., n
15 | #
16 | #  This function is used by other functions when computing
17 | #  bootstrap estimates.
18 | #
19 | #  corfun is some correlation function already stored in R
20 | #
21 | corbsub <- corfun(x[isub],y[isub],...)
22 | corbsub
23 | }
24 | 


--------------------------------------------------------------------------------
/functions/functions.txt:
--------------------------------------------------------------------------------
 1 | # Define coverage function
 2 | cover <- function(ci, pop){
 3 |   out <- ci[1] <= pop & ci[2] >= pop
 4 |     out
 5 | }
 6 | 
 7 | # Define power function
 8 | powerfun <- function(ci, mu=0){
 9 |   out <- ci[1] > mu | ci[2] < mu
10 |     out
11 | }
12 | 
13 | # Define quantile functions
14 | q1 <- function(x){
15 |   out <- hd(x, q = 0.25)
16 | }
17 | 
18 | 
19 | q2 <- function(x){
20 |   out <- hd(x, q = 0.5)
21 | }
22 | 
23 | q3 <- function(x){
24 |   out <- hd(x, q = 0.75)
25 | }
26 | 
27 | keeporder <- function(x){
28 | x <- as.character(x)
29 | x <- factor(x, levels=unique(x))
30 | x
31 | }
32 | 
33 | # normalised difference
34 | normdiff <- function(x,y){
35 |   out <- (x-y) / (x+y)
36 |   out
37 | }
38 | 


--------------------------------------------------------------------------------
/functions/gengh.txt:
--------------------------------------------------------------------------------
  1 | # Combine RGenData::GenDataPopulation and ghdist to create a function that generates
  2 | # non-normal bivariate populations with a known correlation (among the 3 options offered by the `cor()` function) and marginal g&h distributions.
  3 | 
  4 | # ------------------------------------------
  5 | # Code from Rallfun-v35.txt from Rand Wilcox
  6 | # ------------------------------------------
  7 | 
  8 | # Original
  9 | # http://dornsife.usc.edu/labs/rwilcox/software/
 10 | 
 11 | # Generate n observations from a *g-and-h* distribution
 12 | ghdist <- function(n,g=0,h=0){
 13 |   x<-rnorm(n)
 14 |   if (g>0){
 15 |     ghdist<-(exp(g*x)-1)*exp(h*x^2/2)/g
 16 |   }
 17 |   if(g==0)ghdist<-x*exp(h*x^2/2)
 18 |   ghdist
 19 | }
 20 | 
 21 | # --------------------------------------------------------------
 22 | # Code adapted from RGenData::GenDataPopulation from John Ruscio
 23 | # --------------------------------------------------------------
 24 | 
 25 | # Reference
 26 | # Ruscio, J. & Kaczetow, W. (2008) 
 27 | # Simulating Multivariate Nonnormal Data Using an Iterative Algorithm. 
 28 | # Multivariate Behav Res, 43, 355-381.
 29 | # https://www.ncbi.nlm.nih.gov/pubmed/26741201
 30 | 
 31 | # License: MIT
 32 | # Copyright <2018> <John Ruscio>
 33 | 
 34 | # Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 35 | # 
 36 | # The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 37 | # 
 38 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 39 | 
 40 | # Original
 41 | # https://github.com/cran/RGenData/blob/master/R/EFAGenData.R
 42 | 
 43 | # Simulate multivariate g-and-h data using an iterative algorithm
 44 | #
 45 | # Args:
 46 | #   n.cases            : Number of observations for each variable - default 1000
 47 | #   n.variables        : Number of variables - default 2
 48 | #   g                  : g parameter of the g-and-h distribution - default 0
 49 | #   h                  : h parameter of the g-and-h distribution - default 0
 50 | #   rho                : Target correlation between variables or covariance matrix - default 0
 51 | #   corr.type          : Type of correlation - default "pearson", alternative "spearman"
 52 | # g & h parameters can also be vectors of length n.variables
 53 | #
 54 | # Returns:
 55 | #   data               : Population of data - matrix n.cases rows by n.variables columns
 56 | #
 57 | gengh <- function(n.cases = 1000, n.variables = 2, 
 58 |                   g = 0, h = 0, rho = 0, 
 59 |                   corr.type = "pearson"){
 60 |   
 61 | if(is.null(dim(rho))) { # create correlation matrix or already provided?
 62 |     target.corr <- diag(n.variables)
 63 |     target.corr[upper.tri(target.corr)] <- rho
 64 |     target.corr[lower.tri(target.corr)] <- rho
 65 |     } else if(dim(rho)[1] == n.variables & dim(rho)[2] == n.variables) {
 66 |     target.corr <- rho
 67 | }
 68 | 
 69 | # Change default to number of variables, as suggested by John Ruscio
 70 | # n.factors <- 0 # Number of factors (scalar)
 71 | n.factors <- n.variables # Number of factors (scalar)
 72 | max.trials <- 5 # Maximum number of trials (scalar)
 73 | initial.multiplier <- 1 # Value of initial multiplier (scalar)
 74 | 
 75 | # generate g-and-h data
 76 | if(length(g)==1) g <- rep(g, n.variables)
 77 | if(length(h)==1) h <- rep(h, n.variables)
 78 | distributions <- matrix(NA, nrow = n.cases, ncol = n.variables)
 79 |   for (V in 1:n.variables){
 80 |     distributions[,V] <- sort(ghdist(n.cases, g=g[V], h=h[V]))
 81 |   }
 82 | 
 83 | data <- matrix(0, nrow = n.cases, ncol = n.variables)
 84 | iteration <- 0
 85 | best.rmsr <- 1
 86 | trials.without.improvement <- 0
 87 | intermediate.corr <- target.corr
 88 | 
 89 | # If number of latent factors was not specified, determine it
 90 | if (n.factors == 0){
 91 |   Eigenvalues.Observed <- eigen(intermediate.corr)$values 
 92 |   Eigenvalues.Random <- matrix(0, nrow = 100, ncol = n.variables) 
 93 |   Random.Data <- matrix(0, nrow = n.cases, ncol = n.variables)
 94 |   for (i in 1:100){
 95 |     for (j in 1:n.variables){
 96 |       Random.Data[,j] <- sample(distributions[,j], size = n.cases, replace = TRUE)
 97 |     }
 98 |       Eigenvalues.Random[i,] <- eigen(cor(Random.Data))$values
 99 |   }
100 |   Eigenvalues.Random <- apply(Eigenvalues.Random, 2, mean) # calculate mean eigenvalue for each factor 
101 |   n.factors <- max(1, sum(Eigenvalues.Observed > Eigenvalues.Random))
102 | }
103 | 
104 | shared.comp <- matrix(rnorm(n.cases * n.factors, 0, 1), nrow = n.cases,
105 |                       ncol = n.factors)
106 | unique.comp <- matrix(rnorm(n.cases * n.variables, 0, 1), nrow = n.cases,
107 |                       ncol = n.variables)
108 | shared.load <- matrix(0, nrow = n.variables, ncol = n.factors)
109 | unique.load <- matrix(0, nrow = n.variables, ncol = 1)
110 | while (trials.without.improvement < max.trials) {
111 |   iteration <- iteration + 1
112 |   factor.analysis <- FactorAnalysis(intermediate.corr, corr.matrix = TRUE,
113 |                                     max.iteration = 50, n.factors, corr.type)
114 |   if (n.factors == 1) {
115 |     shared.load[, 1] <- factor.analysis$loadings
116 |   } else {
117 |     for (i in 1:n.factors)
118 |       shared.load[, i] <- factor.analysis$loadings[, i]
119 |   }
120 |   shared.load[shared.load > 1] <- 1
121 |   shared.load[shared.load < -1] <- -1
122 |   if (shared.load[1, 1] < 0)
123 |     shared.load <- shared.load * -1
124 |   for (i in 1:n.variables)
125 |     if (sum(shared.load[i, ] * shared.load[i, ]) < 1) {
126 |       unique.load[i, 1] <- (1 - sum(shared.load[i, ] * shared.load[i, ]))
127 |     } else {
128 |       unique.load[i, 1] <- 0
129 |     }
130 |   unique.load <- sqrt(unique.load)
131 |   for (i in 1:n.variables)
132 |     data[, i] <- (shared.comp %*% t(shared.load))[, i] + unique.comp[, i] *
133 |     unique.load[i, 1]
134 |   for (i in 1:n.variables) {
135 |     data <- data[sort.list(data[, i]), ]
136 |     data[, i] <- distributions[, i]
137 |   }
138 |   reproduced.corr <- cor(data, method = corr.type)
139 |   residual.corr <- target.corr - reproduced.corr
140 |   rmsr <- sqrt(sum(residual.corr[lower.tri(residual.corr)] *
141 |                      residual.corr[lower.tri(residual.corr)]) /
142 |                  (.5 * (n.variables * n.variables - n.variables)))
143 |   if (rmsr < best.rmsr) {
144 |     best.rmsr <- rmsr
145 |     best.corr <- intermediate.corr
146 |     best.res <- residual.corr
147 |     intermediate.corr <- intermediate.corr + initial.multiplier *
148 |       residual.corr
149 |     trials.without.improvement <- 0
150 |   } else {
151 |     trials.without.improvement <- trials.without.improvement + 1
152 |     current.multiplier <- initial.multiplier *
153 |       .5 ^ trials.without.improvement
154 |     intermediate.corr <- best.corr + current.multiplier * best.res
155 |   }
156 | }
157 | 
158 | factor.analysis <- FactorAnalysis(best.corr, corr.matrix = TRUE,
159 |                                   max.iteration = 50, n.factors,
160 |                                   corr.type)
161 | if (n.factors == 1) {
162 |   shared.load[, 1] <- factor.analysis$loadings
163 | } else {
164 |   for (i in 1:n.factors)
165 |     shared.load[, i] <- factor.analysis$loadings[, i]
166 | }
167 | shared.load[shared.load > 1] <- 1
168 | shared.load[shared.load < -1] <- -1
169 | if (shared.load[1, 1] < 0)
170 |   shared.load <- shared.load * -1
171 | for (i in 1:n.variables)
172 |   if (sum(shared.load[i, ] * shared.load[i, ]) < 1) {
173 |     unique.load[i, 1] <- (1 - sum(shared.load[i, ] * shared.load[i, ]))
174 |   } else {
175 |     unique.load[i, 1] <- 0
176 |   }
177 | unique.load <- sqrt(unique.load)
178 | for (i in 1:n.variables)
179 |   data[, i] <- (shared.comp %*% t(shared.load))[, i] + unique.comp[, i] *
180 |   unique.load[i, 1]
181 | data <- apply(data, 2, scale) # standardizes each variable in the matrix
182 | for (i in 1:n.variables) {
183 |   data <- data[sort.list(data[, i]), ]
184 |   data[, i] <- distributions[, i]
185 | }
186 | data
187 | }
188 | 
189 | ################################################################################
190 | FactorAnalysis <- function(data, corr.matrix = FALSE, max.iteration = 50,
191 |                             n.factors = 0, corr.type = "pearson") {
192 | # Analyzes comparison data with known factorial structures
193 | #
194 | # Args:
195 | #   data          : Matrix to store the simulated data.
196 | #   corr.matrix   : Correlation matrix (default is FALSE)
197 | #   max.iteration : Maximum number of iterations (scalar, default is 50).
198 | #   n.factors     : Number of factors (scalar, default is 0).
199 | #   corr.type     : Type of correlation (character, default is "pearson",
200 | #                   user can also call "spearman").
201 | #
202 | # Returns:
203 | #   $loadings : Factor loadings (vector, if one factor. matrix, if multiple
204 | #               factors)
205 | #   $factors  : Number of factors (scalar).
206 | #
207 |   data <- as.matrix(data)
208 |   n.variables <- dim(data)[2]
209 |   if (n.factors == 0) {
210 |     n.factors <- n.variables
211 |     determine <- TRUE
212 |   } else {
213 |     determine <- FALSE
214 |   }
215 |   if (!corr.matrix) {
216 |     corr.matrix <- cor(data, method = corr.type)
217 |   } else {
218 |     corr.matrix <- data
219 |   }
220 |   criterion <- .001
221 |   old.h2 <- rep(99, n.variables)
222 |   h2 <- rep(0, n.variables)
223 |   change <- 1
224 |   iteration <- 0
225 |   factor.loadings <- matrix(nrow = n.variables, ncol = n.factors)
226 |   while ((change >= criterion) & (iteration < max.iteration)) {
227 |     iteration <- iteration + 1
228 |     eigenvalue <- eigen(corr.matrix)
229 |     l <- sqrt(eigenvalue$values[1:n.factors])
230 |     for (i in 1:n.factors)
231 |       factor.loadings[, i] <- eigenvalue$vectors[, i] * l[i]
232 |     for (i in 1:n.variables)
233 |       h2[i] <- sum(factor.loadings[i, ] * factor.loadings[i, ])
234 |     change <- max(abs(old.h2 - h2))
235 |     old.h2 <- h2
236 |     diag(corr.matrix) <- h2
237 |   }
238 |   if (determine) n.factors <- sum(eigenvalue$values > 1)
239 |   return(list(loadings = factor.loadings[, 1:n.factors],
240 |               factors = n.factors))
241 | }
242 | 


--------------------------------------------------------------------------------
/functions/ghpdf.txt:
--------------------------------------------------------------------------------
 1 | # Code from Yuan Yan <yuan.yan@dal.ca>
 2 | # Reference:
 3 | # Yan, Yuan, and Marc G. Genton. ‘The Tukey G-and-h Distribution’. Significance 16, no. 3 (2019): 12–13. https://doi.org/10.1111/j.1740-9713.2019.01273.x.
 4 | # 
 5 | 
 6 | gh_trans <- function(z,g=0.2,h=0.1){
 7 |   if(g==0)
 8 |     x<-z*exp(h*z^2/2)
 9 |   else
10 |     x<-1/g*(exp(g*z)-1)*exp(h*(z^2)/2)
11 |   return(x)
12 | }
13 | 
14 | #pdf: density function
15 | den_tukey <- function(y,g=0.2,h=0.1,xi=0,omega=1,u=0,sig=1){
16 |   y=(y-xi)/omega
17 |   z=tukey_inv(y,g,h)
18 | #   for(i in 1:length(y)){
19 | #     # z[i] <- uniroot(f,c(-10,10))$root
20 | #     z[i]<-nleqslv(0,fn=function(x){tukey(x,g,h)-y[i]})$x
21 | #   }
22 |   dnorm(z,mean=u,sd=sig)/dtukey(z,g,h)/omega
23 | }
24 | 
25 | #inverse TGH transformation
26 | tukey_inv <- function(y,g=0.2,h=0.1){
27 |   if(g==0 & h==0)
28 |     z=y
29 |   else if(g==0)
30 |     z<-sign(y)*sqrt(lambert_W0(h*y^2)/h) #better & 143 times faster
31 |   else if(h==0){
32 |     # if(g>0)
33 |     #   y=y[y>-1/g]
34 |     # else
35 |     #   y=y[y<-1/g]
36 |     z<-log(g*y+1)/g
37 |   }
38 |   else{
39 |     z=numeric(length(y))
40 |     for(i in 1:length(y)){
41 |       # z[i] <- uniroot(f,c(-10,10))$root
42 |       z[i]<-nleqslv(0,fn=function(x){gh_trans(x,g,h)-y[i]})$x
43 |     }
44 |   }
45 |   return(z)
46 | }
47 | 
48 | dtukey <- function(z,g=0.2,h=0.1){
49 |   if(g==0)
50 |     x<-exp(h*z^2/2)*(1+h*z^2)
51 |   else
52 |     x<-exp(g*z+h*z^2/2)+h*z/g*(exp(g*z)-1)*exp(h*(z^2)/2)
53 |   return(x)
54 | }
55 | 


--------------------------------------------------------------------------------
/functions/theme_gar.txt:
--------------------------------------------------------------------------------
1 | theme_gar <- theme_bw() + 
2 |   theme(plot.title = element_text(size = 20),
3 |         axis.title = element_text(size = 16),
4 |         axis.text = element_text(size = 14, colour="black"),
5 |         legend.key.width = unit(1.5,"cm"),
6 |         legend.text = element_text(size = 16),
7 |         legend.title = element_text(size = 18),
8 |         strip.text = element_text(size=20, face="bold", colour="white"),
9 |         strip.background = element_rect(colour="black", fill="grey40"))


--------------------------------------------------------------------------------
/notrobust.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "The bootstrap is not robust"
  3 | author: "Guillaume A. Rousselet"
  4 | date: "`r Sys.Date()`"
  5 | output:
  6 |   pdf_document:
  7 |     fig_caption: no
  8 |     number_sections: no
  9 |     toc: yes
 10 |     toc_depth: 2
 11 |     # github_document:
 12 |     # html_preview: yes
 13 |     # toc: yes
 14 |     # toc_depth: 2
 15 | ---
 16 | 
 17 | # Dependencies
 18 | ```{r, message=FALSE, warning=FALSE}
 19 | library(tibble)
 20 | library(ggplot2)
 21 | # library(cowplot)
 22 | source("./functions/Rallfun-v40.txt")
 23 | source("./functions/theme_gar.txt")
 24 | ```
 25 | 
 26 | The bootstrap is sometimes described as a robust technique. 
 27 | In itself, it is not robust. A simple example can illustrate this lack of robustness: percentile bootstrap confidence intervals for the mean are not robust, because the mean is not a robust estimator of central tendency.  
 28 | 
 29 | # Generate data and compute confidence intervals
 30 | ```{r}
 31 | set.seed(21)
 32 | n <- 10
 33 | samp <- rnorm(n, 8, 2) 
 34 | samp <- c(samp, 17)
 35 | samp_mat <- matrix(NA, nrow = 12, ncol = 7)
 36 | ci_mean_t <- matrix(NA, nrow = 2, ncol = 7)
 37 | mean_res <- vector(mode = "numeric", length = 7)
 38 | median_res <- vector(mode = "numeric", length = 7)
 39 | ci_mean_pb <- matrix(NA, nrow = 2, ncol = 7)
 40 | ci_median_pb <- matrix(NA, nrow = 2, ncol = 7)
 41 | ci_median_param <- matrix(NA, nrow = 2, ncol = 7)
 42 | for(C in 1:7){
 43 |   todo <- c(samp, 17+C^2)
 44 |   samp_mat[,C] <- todo
 45 |   mean_res[C] <- mean(todo)
 46 |   median_res[C] <- median(todo)
 47 |   ci_mean_t[,C] <- t.test(todo)$conf.int
 48 |   ci_mean_pb[,C] <- onesampb(todo, mean)$ci # default to nboot = 2000
 49 |   ci_median_pb[,C] <- onesampb(todo, median)$ci
 50 |   ci_median_param[,C] <- sint(todo) # parametric method for the median
 51 | }
 52 | ```
 53 | 
 54 | # Illustrate results: mean + standard CI
 55 | ```{r}
 56 | set.seed(777) # for reproducible jitter
 57 | # raw data
 58 | df <- tibble(res = as.vector(samp_mat),
 59 |              cond = factor(rep(1:7, each = 12)))
 60 | # mean + confidence intervals
 61 | df2 <- tibble(res = mean_res,
 62 |               cond = factor(1:7),
 63 |               ci_low = ci_mean_t[1,],
 64 |               ci_up = ci_mean_t[2,])
 65 | 
 66 | p <- ggplot(df, aes(x = cond, y = res)) + theme_gar +
 67 |    # scatterplots
 68 |   geom_jitter(shape = 21, width = .1, colour = 'black', fill = 'grey', size = 2, alpha = 0.5) +
 69 |   geom_hline(yintercept = ci_mean_t[1,1], linetype = 'dashed') +
 70 |   geom_hline(yintercept = ci_mean_t[2,1], linetype = 'dashed') +
 71 |     # confidence intervals
 72 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 
 73 |               width=.05, size=0.75) + 
 74 |   geom_point(data = df2, aes(x=cond, y=res), size=3) +
 75 |    theme(panel.grid.minor.x = element_blank()) +
 76 |   labs(x = "Conditions", y = "Values") +
 77 |   ggtitle("Mean: standard CI") 
 78 | p
 79 | pA <- p
 80 | ```
 81 | 
 82 | # Illustrate results: mean + boot CI
 83 | ```{r}
 84 | set.seed(777) # for reproducible jitter
 85 | # raw data
 86 | df <- tibble(res = as.vector(samp_mat),
 87 |              cond = factor(rep(1:7, each = 12)))
 88 | # mean + confidence intervals
 89 | df2 <- tibble(res = mean_res,
 90 |               cond = factor(1:7),
 91 |               ci_low = ci_mean_pb[1,],
 92 |               ci_up = ci_mean_pb[2,])
 93 | 
 94 | p <- ggplot(df, aes(x = cond, y = res)) + theme_gar +
 95 |    # scatterplots
 96 |   geom_jitter(shape = 21, width = .1, colour = 'black', fill = 'grey', size = 2, alpha = 0.5) +
 97 |   geom_hline(yintercept = ci_mean_pb[1,1], linetype = 'dashed') +
 98 |   geom_hline(yintercept = ci_mean_pb[2,1], linetype = 'dashed') +
 99 |     # confidence intervals
100 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 
101 |               width=.05, size=0.75) + 
102 |   geom_point(data = df2, aes(x=cond, y=res), size=3) +
103 |    theme(panel.grid.minor.x = element_blank()) +
104 |   labs(x = "Conditions", y = "Values") +
105 |   ggtitle("Mean: bootstrap CI") 
106 | p
107 | pB <- p
108 | ```
109 | 
110 | # Illustrate results: median + boot CI
111 | 
112 | ```{r}
113 | set.seed(777) # for reproducible jitter
114 | # raw data
115 | df <- tibble(res = as.vector(samp_mat),
116 |              cond = factor(rep(1:7, each = 12)))
117 | # mean + confidence intervals
118 | df2 <- tibble(res = median_res,
119 |               cond = factor(1:7),
120 |               ci_low = ci_median_pb[1,],
121 |               ci_up = ci_median_pb[2,])
122 | 
123 | p <- ggplot(df, aes(x = cond, y = res)) + theme_gar +
124 |    # scatterplots
125 |   geom_jitter(shape = 21, width = .1, colour = 'black', fill = 'grey', size = 2, alpha = 0.5) +
126 |   geom_hline(yintercept = ci_median_pb[1,1], linetype = 'dashed') +
127 |   geom_hline(yintercept = ci_median_pb[2,1], linetype = 'dashed') +
128 |     # confidence intervals
129 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 
130 |               width=.05, size=0.75) + 
131 |   geom_point(data = df2, aes(x=cond, y=res), size=3) +
132 |    theme(panel.grid.minor.x = element_blank()) +
133 |   labs(x = "Conditions", y = "Values") +
134 |   ggtitle("Median: bootstrap CI") 
135 | p
136 | pC <- p
137 | ```
138 | 
139 | # Illustrate results: median + parametric CI
140 | ```{r}
141 | set.seed(777) # for reproducible jitter
142 | # raw data
143 | df <- tibble(res = as.vector(samp_mat),
144 |              cond = factor(rep(1:7, each = 12)))
145 | # mean + confidence intervals
146 | df2 <- tibble(res = median_res,
147 |               cond = factor(1:7),
148 |               ci_low = ci_median_param[1,],
149 |               ci_up = ci_median_param[2,])
150 | 
151 | p <- ggplot(df, aes(x = cond, y = res)) + theme_gar +
152 |    # scatterplots
153 |   geom_jitter(shape = 21, width = .1, colour = 'black', fill = 'grey', size = 2, alpha = 0.5) +
154 |   geom_hline(yintercept = ci_median_pb[1,1], linetype = 'dashed') +
155 |   geom_hline(yintercept = ci_median_pb[2,1], linetype = 'dashed') +
156 |     # confidence intervals
157 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 
158 |               width=.05, size=0.75) + 
159 |   geom_point(data = df2, aes(x=cond, y=res), size=3) +
160 |    theme(panel.grid.minor.x = element_blank()) +
161 |   labs(x = "Conditions", y = "Values") +
162 |   ggtitle("Median: parametric CI") 
163 | p
164 | # pC <- p
165 | ```
166 | 
167 | # Summary figure
168 | ```{r, eval = FALSE}
169 | cowplot::plot_grid(pA, pB, pC,
170 |                     labels = c("A", "B", "C"),
171 |                     ncol = 3,
172 |                     nrow = 1,
173 |                     rel_widths = c(1, 1, 1), 
174 |                     label_size = 20, 
175 |                     hjust = -0.5, 
176 |                     scale=.95,
177 |                     align = "h")
178 | 
179 | # save figure
180 | ggsave(filename=('./figures/figure_notrobust.pdf'),width=10,height=5)
181 | ggsave(filename=('./figures/figure6.pdf'),width=10,height=5)
182 | ```


--------------------------------------------------------------------------------
/pb.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Percentile bootstrap"
  3 | author: "Guillaume A. Rousselet"
  4 | date: "`r Sys.Date()`"
  5 | output:
  6 |   pdf_document:
  7 |     fig_caption: no
  8 |     number_sections: no
  9 |     toc: yes
 10 |     toc_depth: 2
 11 |     # github_document:
 12 |     # html_preview: yes
 13 |     # toc: yes
 14 |     # toc_depth: 2
 15 | ---
 16 | 
 17 | # Dependencies
 18 | ```{r, message=FALSE, warning=FALSE}
 19 | library(tibble)
 20 | library(ggplot2)
 21 | # library(cowplot)
 22 | # library(HDInterval)
 23 | source("./functions/theme_gar.txt")
 24 | ```
 25 | 
 26 | ```{r}
 27 | sessionInfo()
 28 | ```
 29 | 
 30 | # Bootstrap implementation
 31 | 
 32 | Let's look at how the bootstrap is implemented in the one-sample case.
 33 | See an interactive demo [here](https://seeing-theory.brown.edu/frequentist-inference/index.html#section2).
 34 | 
 35 | ## Sampling with replacement
 36 | 
 37 | Test the `sample()` function.
 38 | Let say our sample is a sequence of integers.
 39 | We sample with replacement from that sequence of numbers.
 40 | Execute chunk several times to see what happens.
 41 | ```{r}
 42 | n <- 10 # sample size
 43 | samp <- 1:n
 44 | boot.samp <- sample(samp, n, replace = TRUE) # sample with replacement
 45 | boot.samp
 46 | ```
 47 | 
 48 | Generate 3 bootstrap samples for the article:
 49 | ```{r}
 50 | set.seed(21) # reproducible example
 51 | n <- 6
 52 | samp <- 1:n
 53 | nboot <- 3
 54 | matrix(sample(samp, n*nboot, replace = TRUE), nrow = nboot)
 55 | ```
 56 | 
 57 | ## Loop
 58 | ```{r}
 59 | set.seed(21) # reproducible results
 60 | n <- 20 # sample size
 61 | samp <- rnorm(n) # get normal sample
 62 | nboot <- 1000 # number of bootstrap samples
 63 | 
 64 | # declare vector of results
 65 | boot.m <- vector(mode = "numeric", length = nboot) # save means
 66 | boot.tm <- vector(mode = "numeric", length = nboot) # save trimmed means
 67 | boot.md <- vector(mode = "numeric", length = nboot) # save medians
 68 | 
 69 | for(B in 1:nboot){
 70 |   boot.samp <- sample(samp, n, replace = TRUE) # sample with replacement
 71 |   boot.m[B] <- mean(boot.samp)
 72 |   boot.tm[B] <- mean(boot.samp, trim = 0.2)
 73 |   boot.md[B] <- median(boot.samp)
 74 | }
 75 | 
 76 | samp.m <- mean(samp)
 77 | samp.tm <- mean(samp, trim = 0.2)
 78 | samp.md <- median(samp)
 79 | samp.m
 80 | samp.tm
 81 | samp.md
 82 | ```
 83 | 
 84 | ### Plot original results
 85 | ```{r, fig.width=3}
 86 | set.seed(1)
 87 | df <- tibble(cond = factor(rep(1,n)),
 88 |              res = samp) 
 89 | ggplot(df, aes(x = cond, y = res)) + theme_linedraw() + 
 90 |   geom_jitter(width = 0.1, alpha = 0.3) +
 91 |   theme(axis.text.x = element_blank(),
 92 |     axis.ticks = element_blank()) +
 93 |   scale_x_discrete(name ="") +
 94 |   # stat_summary(fun.y=median, geom="line")
 95 |   geom_segment(aes(x = 0.9, y = samp.m, xend = 1.1, yend = samp.m)) +
 96 |   geom_segment(aes(x = 0.9, y = samp.md, xend = 1.1, yend = samp.md), colour = "orange") +
 97 |   geom_segment(aes(x = 0.9, y = samp.tm, xend = 1.1, yend = samp.tm), colour = "blue") +
 98 |   annotate("text", x = 1.185, y = samp.m-0.1, label = 'bold("Mean")', size = 4, parse = TRUE) +
 99 |   annotate("text", x = 1.2, y = samp.md+0.1, label = 'bold("Median")', size = 4, colour = "orange", parse = TRUE) +
100 |   annotate("text", x = 0.75, y = samp.tm, label = 'bold("Trimmed\nmean")', size = 4, colour = "blue", parse = TRUE)
101 | ```
102 | 
103 | ### Plot distributions of bootstrap estimates
104 | 
105 | The distribution of bootstrap medians is multi-modal and very different from that of the mean and the 20% trimmed mean. To compute confidence intervals for the median in the one-sample case, it is recommended to use the parametric approach implemented in the function `sint()` available in the file *./functions/Rallfun-v40.txt* in this repository.
106 | 
107 | ```{r}
108 | df <- tibble(res = c(boot.m, boot.tm, boot.md),
109 |              est = factor(c(rep("Mean",nboot), rep("Trimmed mean",nboot), rep("Median",nboot)))
110 |              )
111 | ggplot(df, aes(x = res, colour = est)) + theme_gar +
112 |   geom_line(aes(y = ..density..), stat = "density", size = 1) +
113 |   labs(x = "Bootstrap estimates", y = "Density") + 
114 |   theme(legend.position = "bottom",
115 |         axis.text.y = element_blank(),
116 |         axis.ticks.y = element_blank())
117 |   # ggtitle("Boostrap samples")
118 | ```
119 | 
120 | ## Matrix
121 | ```{r}
122 | set.seed(21) # reproducible results
123 | n <- 20 # sample size
124 | samp <- rnorm(n) # get normal sample
125 | nboot <- 1000 # number of bootstrap samples
126 | # sample with replacement + reoganise into a matrix
127 | boot.samp <- matrix(sample(samp, n*nboot, replace = TRUE), nrow = nboot)
128 | boot.m <- apply(boot.samp, 1, mean)
129 | boot.md <- apply(boot.samp, 1, median)
130 | ```
131 | 
132 | ## Functions
133 | 
134 | Examples of R packages for bootstrap inferences:  
135 | 
136 | - [`boot`](https://www.statmethods.net/advstats/bootstrapping.html)  
137 | 
138 | - [`resample`](https://cran.r-project.org/web/packages/resample/index.html)  
139 | 
140 | - [`bootstrap`](https://cran.r-project.org/web/packages/bootstrap/index.html)  
141 | 
142 | - [`WRS2`](https://cran.r-project.org/web/packages/WRS2/index.html)  
143 | 
144 | Functions from [Rand Wilcox](https://dornsife.usc.edu/labs/rwilcox/software/).
145 | 
146 | ```{r, eval = FALSE}
147 | # TO USE THE FUNCTIONS, FIRST USE THE SOURCE COMMAND:
148 | # source('./functions/Rallfun-v40.txt')
149 | 
150 | set.seed(1) # reproducible results
151 | onesampb(samp, est=mean, alpha=0.1, nboot=1000, SEED = FALSE, nv = 0)
152 | # est  = estimator, could be var, mad, to use a trimmed mean, add argument trim = 0.2
153 | onesampb(samp, est=mean, alpha=0.1, nboot=1000, SEED = FALSE, nv = 0, trim = 0.1)
154 | # nv = null value for NHST
155 | # always set SEED to FALSE otherwise the function always returns the same results for a given input.
156 | # the only way to really understand the code is to look at it: edit(onesampb)
157 | 
158 | # for inferences on trimmed means only:
159 | trimpb()
160 | 
161 | # for inferences on the Harrell-Davis quantile estimator (default q=0.5 = median):
162 | hdpb()
163 | ```
164 | 
165 | # Generate sample from lognormal distribution
166 | 
167 | ## Illustrate population
168 | 
169 | Lognormal distribution from the which the sample is taken.
170 | ```{r, fig.height=3}
171 | x <- seq(0, 7, 0.001)
172 | y <- dlnorm(x)
173 | 
174 | df <- tibble(x = x, y = y) 
175 | 
176 | p <- ggplot(df, aes(x = x, y = y)) + theme_gar +
177 |   geom_line(size = 1.5, colour = "orange") +
178 |   labs(x = "Values", y = "Density") +
179 |   theme(axis.text.y = element_blank(),
180 |         axis.ticks.y = element_blank())
181 | p
182 | ```
183 | 
184 | ## Get sample
185 | ```{r}
186 | set.seed(21) # reproducible example
187 | n <- 30 # sample size
188 | meanlog <- 0
189 | sdlog <- 1
190 | samp <- rlnorm(n, meanlog = meanlog, sdlog = sdlog) # random sample
191 | ```
192 | 
193 | ## Illustrate sample
194 | ```{r, fig.width=2}
195 | set.seed(21) # for reproducible jitter
196 | # raw data
197 | df <- tibble(pc = samp,
198 |              cond = rep(1, n))
199 | 
200 | p <- ggplot(data = df, aes(x = cond, y = pc)) + theme_gar +
201 |   # scatterplots
202 |     geom_jitter(width = .05, alpha = 0.5, 
203 |                 size = 3, shape = 21, fill = "grey", colour = "black") +
204 |   theme(axis.ticks.x = element_blank(),
205 |         axis.text.x = element_blank(),
206 |         axis.title.x = element_blank()) +
207 |   scale_x_continuous(breaks = 1) +
208 |   # mean
209 |   geom_segment(aes(x = 0.9, xend = 1.1,
210 |                    y = mean(samp), yend = mean(samp))) +
211 |   # median
212 |   geom_segment(aes(x = 0.9, xend = 1.1,
213 |                    y = median(samp), yend = median(samp)),
214 |                linetype = 'longdash', lineend = 'round') +
215 |   theme(panel.grid.minor.x = element_blank()) +
216 |   labs(y = "Values")
217 | p
218 | pA <- p
219 | ```
220 | 
221 | Sample mean = `r round(mean(samp), digits=2)`  
222 | 
223 | # Standard confidence interval
224 | 
225 | ## T-value: define function
226 | ```{r}
227 | # mean minus null value divided by SEM
228 | tval <- function(x,nv){
229 |   tval <- (mean(x) - nv) / sqrt(var(x)/length(x))
230 |   tval
231 | }
232 | ```
233 | 
234 | ## P value
235 | Let say our hypothesis is mu = 2.
236 | 
237 | First, let's use the function:
238 | ```{r}
239 | mu <- 2 # null hypothesis
240 | t.test(samp, mu = mu)$p.value
241 | ```
242 | 
243 | Then check the formula:
244 | ```{r, warning=FALSE}
245 | dof <- length(samp)-1 # degrees of freedom
246 | 2 * pt(abs(tval(samp, mu)), dof, lower.tail = FALSE)
247 | ```
248 | 
249 | ## Illustrate theoretical *T* distribution
250 | ```{r, warning = FALSE}
251 | alpha <- 0.05
252 | 
253 | p <- ggplot(data.frame(x = c(-5, 5)), aes(x)) + theme_gar +
254 |   labs(y = "Density") +
255 |   theme(axis.text = element_text(size = 14),
256 |         axis.title = element_text(size = 16),
257 |         plot.title = element_text(size=20),
258 |         axis.text.y = element_blank(),
259 |         axis.ticks.y = element_blank()) +
260 |   labs(x = "T values") +
261 |   ggtitle(substitute(paste(italic(T)," distribution with ",dof," degrees of freedom"), list(dof=dof))) +
262 |   # area under the curve -> p value
263 |   # https://christianburkhart.de/blog/area_under_the_curve/
264 |   stat_function(fun = dt,
265 |                 geom = "area",
266 |                 xlim = c(-5, tval(samp, mu)),
267 |                 alpha = .2,
268 |                 fill = "red",
269 |                 args = list(df = dof)) +
270 |   # theoretical cut-off t value for alpha = 0.05
271 |   geom_segment(x = qt(alpha/2, dof),
272 |                xend = qt(alpha/2, dof),
273 |                y = 0,
274 |                yend = dt(qt(alpha/2, dof), dof),
275 |                size = 1,
276 |                colour = "red") +
277 |     annotate(geom = "label", x = -2.7, y = 0.07, size  = 7, colour = "red",
278 |              label = expression(paste("T"[crit]))) + # italic("t")
279 |   # observed t value
280 |   # geom_vline(xintercept = abs(tval(samp, mu)))
281 |   geom_segment(x = tval(samp, mu),
282 |                xend = tval(samp, mu),
283 |                y = 0,
284 |                yend = dt(tval(samp, mu), dof),
285 |                size = 1,
286 |                colour = "black",
287 |                linetype = "dashed") +
288 |     annotate(geom = "label", x = -1.8, y = 0.22, size  = 7, colour = "black",
289 |              label = expression(paste("T"[obs]))) + # paste("obs.", italic("t")))
290 |   # Plot density function
291 |   stat_function(fun = dt, args = list(df = dof),
292 |                 size = 1) +
293 |   # P value
294 |   geom_segment(x = -2.8, xend = -1.5,
295 |                y = 0.15, yend = 0.07,
296 |                arrow = arrow(type = "closed", 
297 |                              length = unit(0.25, "cm")),
298 |                colour = "grey30",
299 |                size = 1) +
300 |   annotate(geom = "label", x = -3, y = 0.15, size  = 7, 
301 |              colour = "white", fill = "red", fontface = "bold",
302 |              label = "P value / 2") +
303 |   annotate(geom = "label", x = 2.8, y = 0.35, size = 7,
304 |              colour = "white", fill = "black", fontface = "bold",
305 |              label = expression(paste(bold("CI = m \u00B1 T"[crit]),
306 |                                       bold(" * SEM"))))
307 | # ggsave(filename = './theor_t.pdf')
308 | pB <- p
309 | p
310 | ```
311 | 
312 | ## Compute confidence interval
313 | 
314 | ### Using built-in function
315 | ```{r}
316 | ci.t <- t.test(samp, mu = mu)$conf.int
317 | ci.t
318 | ```
319 | 
320 | ### Formula
321 | ```{r}
322 | alpha <- 0.05 # alpha level
323 | df <- n-1 # degrees of freedom
324 | samp.m <- mean(samp) # sample mean
325 | sem <- sd(samp) / sqrt(n) # sample estimate of the standard error of the mean
326 | ci <- vector("numeric",2)
327 | ci[1] <- samp.m - qt(1-alpha/2, df) * sem # lower bound of CI
328 | ci[2] <- samp.m + qt(1-alpha/2, df) * sem # upper bound of CI
329 | ci
330 | ```
331 | 
332 | # Generate bootstrap samples
333 | ```{r}
334 | set.seed(666)
335 | nboot <- 5000
336 | bootsamp <- matrix(sample(samp, nboot * n, replace = TRUE), nrow = nboot)
337 | ```
338 | 
339 | ## Illustrate a few bootstrap samples
340 | 
341 | For each sample we superimpose the (bootstrap) median.
342 | 
343 | ```{r, fig.height=6}
344 | nb <- 20
345 | df <- tibble(res = as.vector(bootsamp[1:nb,]),
346 |              bootid = rep(1:nb, each = n))
347 | 
348 | df2 <- tibble(bootid = 1:nb,
349 |               res = apply(bootsamp[1:nb,],1,mean))
350 | 
351 | p <- ggplot(df, aes(y = bootid, x = res)) + theme_gar +
352 |   geom_point(alpha = 0.3, position = position_jitter(height=0.1)) +
353 |   labs(x = "Values", y = "Bootstrap samples") +
354 |   geom_segment(data = df2, aes(x = res, xend = res,
355 |                                y = bootid - 0.4, yend = bootid + 0.4),
356 |                size = 1.5) +
357 |   theme(panel.grid.minor = element_blank()) +
358 |   scale_y_continuous(breaks = seq(1, 20, 1), expand = expand_scale(mult = c(0.01, 0.01)))
359 | pC <- p
360 | p
361 | ```
362 | 
363 | ## Illustrate bootstrap sampling distribution of the mean
364 | 
365 | Compute confidence interval and other quantities.
366 | 
367 | Here and in the rest of the tutorial, we compute bootstrap CI using `quantile(type = 6)` of the bootstrap distribution. This is recommended in this article:
368 | 
369 | Hesterberg, Tim C. “What Teachers Should Know About the Bootstrap: Resampling in the Undergraduate Statistics Curriculum.” The American Statistician 69, no. 4 (October 2, 2015): 371–86. https://doi.org/10.1080/00031305.2015.1089789.
370 | 
371 | Rand Wilcox uses a different approach. See for instance `help(onesampb)`, in which the bounds of the CI are defined using the bootstrap distribution `bvec`, `alpha` (say = 0.05) and `nboot` (say = 1,000 bootstrap samples):
372 | 
373 | `low <- round((alpha/2)*nboot)`
374 | `up <- nboot-low`
375 | `low <- low+1`
376 | `ci_lower_bound <- bvec[low]`
377 | `ci_upper_bound <- bvec[up]`
378 | 
379 | In practice it is unclear if these choices make any difference. What we know is that with `nboot` large enough, the choice of quantile method should make virtually no difference.
380 | 
381 | ```{r}
382 | # bootstrap means
383 | bootm <- apply(bootsamp, 1, mean)
384 | # confidence interval
385 | bootci <- quantile(bootm, probs = c(0.025, 0.975), type = 6)
386 | # bootstrap estimation of the standard error
387 | bootsamp.sd <- sd(bootsamp)
388 | # P value
389 | pv <- mean(bootm < mu) # + .5*mean(bootsamp==mu)
390 | pv <- 2 * min(c(pv, 1-pv))
391 | ```
392 | 
393 | Bootstrap 95% CI = [`r round(bootci[1], digits=2)`, `r round(bootci[2], digits=2)`]  
394 | Bootstrap estimate of the SEM = `r round(bootsamp.sd, digits=2)`  
395 | Bootstrap *P* value (hypothesis = 2) = `r pv`  
396 | 
397 | Alternatively, we could compute a highest density interval (HDI):
398 | ```{r, eval = FALSE}
399 | require(HDInterval)
400 | boothdi <- HDInterval::hdi(bootm)
401 | ```
402 | 
403 | We illustrate the distribution of the bootstrap samples, from which we derive four elements:  
404 | 
405 | - confidence/compatibility interval  
406 | 
407 | - p value  
408 | 
409 | - bootstrap estimate of the standard error (SE)  
410 | 
411 | - bootstrap estimate of bias  
412 | 
413 | The distribution is also our best estimate of the shape the sampling distribution of the median, given the data and our model.
414 | 
415 | ### Make data frame
416 | ```{r}
417 | df <- as_tibble(with(density(bootm),data.frame(x,y)))
418 | 
419 | df.pv <- tibble(x = df$x[df$x>mu],
420 |                 y = df$y[df$x>mu])
421 | ```
422 | 
423 | ### Figure
424 | ```{r}
425 | p <- ggplot(df, aes(x = x, y = y)) + theme_gar +
426 |       # geom_line(stat = "density") +
427 |   labs(x = "Bootstrap means",
428 |        y = "Density") +
429 |   theme(axis.text.y = element_blank(),
430 |         axis.ticks.y = element_blank()) +
431 |   # P value
432 |   geom_area(data = df.pv,
433 |             aes(x = x, y = y),
434 |             fill = "red", alpha = .2) +
435 |   # density
436 |   geom_line(data = df, size = 1) +
437 |   # Null value
438 |   geom_segment(x = mu,
439 |                xend = mu,
440 |                y = 0,
441 |                yend = df$y[which.min(abs(df$x-mu))],
442 |                size = 1,
443 |                colour = "black",
444 |                linetype = "dashed") +
445 |   # confidence interval ----------------------
446 |   geom_segment(x = bootci[1], xend = bootci[2],
447 |                y = 0, yend = 0,
448 |                lineend = "round", size = 2, colour = "black") +
449 |   annotate(geom = "label", x = bootci[1]+0.15, y = 0.07, size = 5,
450 |              colour = "white", fill = "black", fontface = "bold",
451 |              label = paste("L = ", round(bootci[1], digits = 2))) +
452 |   annotate(geom = "label", x = bootci[2]-0.15, y = 0.07, size = 5,
453 |              colour = "white", fill = "black", fontface = "bold",
454 |              label = paste("U = ", round(bootci[2], digits = 2))) +
455 |   # sample mean: vertical line + label
456 |   geom_vline(xintercept = mean(samp),
457 |              linetype = 'solid') +
458 |   annotate(geom = "label", x = 2.3, y = 1.2, size = 7,
459 |              colour = "white", fill = "black", fontface = "bold",
460 |              label = paste("Sample mean = ", round(samp.m, digits = 2))) +
461 |   # vertical line marking bootstrap mean
462 |   # geom_vline(xintercept = mean(bootsamp),
463 |   #            linetype = 'solid')
464 |   # SEM label + segment
465 |   annotate(geom = "label", x = 2.5, y = 0.7, size  = 7, 
466 |              colour = "white", fill = "grey", fontface = "bold",
467 |              label = paste("SD =",round(bootsamp.sd, digits = 2),"= SEM")) + 
468 |   geom_segment(x = 1.25, 
469 |                xend = 1.85,
470 |                y = 0.7, yend = 0.7,
471 |                arrow = arrow(type = "closed", 
472 |                              length = unit(0.25, "cm"),
473 |                              ends = "both"),
474 |                colour = "grey", size = 1) +
475 |   # P value
476 |   geom_segment(x = 2.6, xend = 2.1, y = 0.4, yend = 0.2,
477 |                arrow = arrow(type = "closed", 
478 |                              length = unit(0.25, "cm")),
479 |                colour = "grey30", size = 1) +
480 |   annotate(geom = "label", x = 2.6, y = 0.4, size  = 7, 
481 |              colour = "white", fill = "red", fontface = "bold",
482 |              label = "P value / 2")
483 |   
484 | p
485 | pD <- p
486 | ```
487 | 
488 | # Summary figure
489 | ```{r, eval = FALSE}
490 | p1 <- cowplot::plot_grid(pA, pB,
491 |                     labels = c("A", "B"),
492 |                     ncol = 2,
493 |                     nrow = 1,
494 |                     rel_widths = c(1, 4), 
495 |                     label_size = 20, 
496 |                     hjust = -0.5, 
497 |                     scale=.95,
498 |                     align = "h")
499 | 
500 | p2 <- cowplot::plot_grid(pC, pD,
501 |                     labels = c("C", "D"),
502 |                     ncol = 2,
503 |                     nrow = 1,
504 |                     rel_widths = c(2, 3), 
505 |                     label_size = 20, 
506 |                     hjust = -0.5, 
507 |                     scale=.95,
508 |                     align = "h")
509 | 
510 | cowplot::plot_grid(p1, p2,
511 |                     labels = c("", ""),
512 |                     ncol = 1,
513 |                     nrow = 2,
514 |                     rel_heights = c(1, 1), 
515 |                     label_size = 20, 
516 |                     hjust = -0.5, 
517 |                     scale=.95,
518 |                     align = "h")
519 | 
520 | # save figure
521 | ggsave(filename=('./figures/figure_pb.pdf'),width=15,height=15)
522 | ggsave(filename=('./figures/figure1.pdf'),width=15,height=15)
523 | ```


--------------------------------------------------------------------------------
/pc.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Percentage correct example"
  3 | author: "Guillaume A. Rousselet"
  4 | date: "`r Sys.Date()`"
  5 | output:
  6 |   pdf_document:
  7 |     fig_caption: no
  8 |     number_sections: no
  9 |     toc: yes
 10 |     toc_depth: 2
 11 |     # github_document:
 12 |     # html_preview: yes
 13 |     # toc: yes
 14 |     # toc_depth: 2
 15 | ---
 16 | 
 17 | # Dependencies
 18 | ```{r, message=FALSE, warning=FALSE}
 19 | library(tibble)
 20 | library(ggplot2)
 21 | # library(cowplot)
 22 | source("./functions/theme_gar.txt")
 23 | source("./functions/Rallfun-v40.txt")
 24 | ```
 25 | 
 26 | After taking the mean across trials, correct/incorrect data are often wrongly assumed to be normally distributed. Instead, these data are always positive and bounded, and are better modelled using a beta distribution (Kruschke, 2014). Similarly, standard methods are inappropriate for ordinal data, like Likert scale results, which are too often treated as metric (Bürkner & Vuorre, 2019; Liddell & Kruschke, 2018). 
 27 | 
 28 | # Make data and compute confidence intervals
 29 | ```{r}
 30 | set.seed(6666)
 31 | n <- 15
 32 | norm_samp <- rnorm(n, mean = 75, sd = 10)
 33 | skew_samp <- rbeta(n, 10, 0.2) * 100
 34 | skew_samp2 <- sort(skew_samp)
 35 | skew_samp2[1] <- 60
 36 | # hist(rbeta(10000,10,0.2)*100, 50)
 37 | 
 38 | norm_stci <- t.test(norm_samp, conf.level = 0.95)$conf.int
 39 | skew_stci <- t.test(skew_samp, conf.level = 0.95)$conf.int
 40 | skew_stci2 <- t.test(skew_samp2, conf.level = 0.95)$conf.int
 41 | 
 42 | norm_pbci <- onesampb(norm_samp, est = mean)$ci
 43 | skew_pbci <- onesampb(skew_samp, est = mean)$ci
 44 | skew_pbci2 <- onesampb(skew_samp2, est = mean)$ci
 45 | ```
 46 | 
 47 | # Illustration: standard confidence interval
 48 | ```{r}
 49 | set.seed(21) # for reproducible jitter
 50 | # raw data
 51 | df <- tibble(pc = c(norm_samp, skew_samp, skew_samp2),
 52 |              cond = rep(1:3, each = n))
 53 | # mean + confidence intervals
 54 | df2 <- tibble(pc = c(mean(norm_samp), mean(skew_samp), mean(skew_samp2)),
 55 |               cond = 1:3,
 56 |               ci_low = c(norm_stci[1], skew_stci[1], skew_stci2[1]),
 57 |               ci_up = c(norm_stci[2], skew_stci[2], skew_stci2[2]))
 58 | 
 59 | p <- ggplot(data = df, aes(x = cond, y = pc)) + theme_gar +
 60 |   # scatterplots
 61 |     geom_jitter(width = .1, alpha = 0.5, 
 62 |                 size = 3, shape = 21, fill = "grey", colour = "black") +
 63 |   scale_x_continuous(breaks = c(1,2,3)) +
 64 |   # confidence intervals
 65 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 
 66 |               width=.05, size=0.75) + 
 67 |   geom_point(data = df2, aes(x=cond, y=pc), size=3) +
 68 |    theme(panel.grid.minor.x = element_blank()) +
 69 |   labs(x = "Conditions", y = "Percent correct") +
 70 |   ggtitle("Standard confidence intervals") 
 71 | p
 72 | pA <- p
 73 | ```
 74 | 
 75 | Sample means:
 76 | 
 77 | - Condition 1 = `r round(mean(norm_samp), digits=1)`
 78 | 
 79 | - Condition 2 = `r round(mean(skew_samp), digits=1)`
 80 | 
 81 | - Condition 3 = `r round(mean(skew_samp2), digits=1)`  
 82 | 
 83 | # Illustration: bootstrap confidence interval
 84 | ```{r}
 85 | set.seed(21) # for reproducible jitter
 86 | # raw data
 87 | df <- tibble(pc = c(norm_samp, skew_samp, skew_samp2),
 88 |              cond = rep(1:3, each = n))
 89 | # mean + confidence intervals
 90 | df2 <- tibble(pc = c(mean(norm_samp), mean(skew_samp), mean(skew_samp2)),
 91 |               cond = 1:3,
 92 |               ci_low = c(norm_pbci[1], skew_pbci[1], skew_pbci2[1]),
 93 |               ci_up = c(norm_pbci[2], skew_pbci[2], skew_pbci2[2]))
 94 | 
 95 | p <- ggplot(data = df, aes(x = cond, y = pc)) + theme_gar +
 96 |   # scatterplots
 97 |     geom_jitter(width = .1, alpha = 0.5, 
 98 |                 size = 3, shape = 21, fill = "grey", colour = "black") +
 99 |   scale_x_continuous(breaks = c(1,2,3)) +
100 |   # confidence intervals
101 | geom_errorbar(data = df2, aes(x=cond, ymin=ci_low, ymax=ci_up), 
102 |               width=.05, size=0.75) + 
103 |   geom_point(data = df2, aes(x=cond, y=pc), size=3) +
104 |    theme(panel.grid.minor.x = element_blank()) +
105 |   labs(x = "Conditions", y = "Percent correct") +
106 |   ggtitle("Bootstrap confidence intervals") 
107 | p
108 | pB <- p
109 | ```
110 | 
111 | # Summary figure
112 | ```{r, eval = FALSE}
113 | cowplot::plot_grid(pA, pB,
114 |                     labels = c("A", "B"),
115 |                     ncol = 2,
116 |                     nrow = 1,
117 |                     rel_widths = c(1, 1), 
118 |                     label_size = 20, 
119 |                     hjust = -0.5, 
120 |                     scale=.95,
121 |                     align = "h")
122 | 
123 | # save figure
124 | ggsave(filename=('./figures/figure_pc.pdf'),width=10,height=5)
125 | ggsave(filename=('./figures/figure2.pdf'),width=10,height=5)
126 | ```
127 | 
128 | # References
129 | Bürkner, Paul-Christian, and Matti Vuorre. “Ordinal Regression Models in Psychology: A Tutorial.” Advances in Methods and Practices in Psychological Science 2, no. 1 (March 1, 2019): 77–101. https://doi.org/10.1177/2515245918823199.
130 | 
131 | Kruschke, John. Doing Bayesian Data Analysis - 2nd Edition. Academic Press, 2014. https://www.elsevier.com/books/doing-bayesian-data-analysis/kruschke/978-0-12-405888-0.
132 | 
133 | Liddell, Torrin M., and John K. Kruschke. “Analyzing Ordinal Data with Metric Models: What Could Possibly Go Wrong?” Journal of Experimental Social Psychology 79 (November 1, 2018): 328–48. https://doi.org/10.1016/j.jesp.2018.08.009.
134 | 


--------------------------------------------------------------------------------
/sampdist.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Bootstrap sampling distributions"
  3 | author: "Guillaume A. Rousselet"
  4 | date: "`r Sys.Date()`"
  5 | output:
  6 |   pdf_document:
  7 |     fig_caption: no
  8 |     number_sections: no
  9 |     toc: yes
 10 |     toc_depth: 2
 11 |     # github_document:
 12 |     # html_preview: yes
 13 |     # toc: yes
 14 |     # toc_depth: 2
 15 | ---
 16 | 
 17 | # Dependencies
 18 | ```{r, message=FALSE, warning=FALSE}
 19 | library(tibble)
 20 | library(ggplot2)
 21 | # library(cowplot)
 22 | source("./functions/theme_gar.txt")
 23 | # source("./functions/Rallfun-v40.txt")
 24 | ```
 25 | 
 26 | ```{r}
 27 | sessionInfo()
 28 | ```
 29 | 
 30 | # Example 1: samples from lognormal distribution
 31 | 
 32 | We sample from a lognormal distribution, because, you know, life is [lognormal](https://stat.ethz.ch/~stahel/lognormal/lnboard/brochure.html).
 33 | 
 34 | ## Illustrate population
 35 | For comparison, show normal distribution with the same mean and sd as the default lognormal (meanlog = 0, sdlog = 1). The [mean of the lognormal distribution](https://en.wikipedia.org/wiki/Log-normal_distribution) with meanlog = 0 and sdlog = 1 is `exp(meanlog + sdlog^2/2)` = exp(1/2).
 36 | The variance is `(exp(sdlog^2)-1) * exp(2*meanlog + sdlog^2)` = (exp(1)-1) * exp(1).
 37 | 
 38 | ### Check that the two distributions have the same mean and variance.
 39 | ```{r}
 40 | set.seed(777)
 41 | pop1 <- rnorm(1000000, 
 42 |              mean = exp(1/2), 
 43 |              sd = sqrt((exp(1)-1) * exp(1))
 44 |              )
 45 | #mean(pop1)
 46 | #var(pop1)
 47 | 
 48 | pop2 <- rlnorm(1000000, meanlog = 0, sdlog = 1)
 49 | #mean(pop2)
 50 | #var(pop2)
 51 | 
 52 | pop.tm <- mean(pop2, trim = 0.2) # save population trimmed mean for figure
 53 | ```
 54 | Normal population:
 55 | 
 56 | - Mean = `r round(mean(pop1), digits=2)`
 57 | 
 58 | - Variance = `r round(var(pop1), digits=2)`
 59 | 
 60 | Lognormal population:
 61 | 
 62 | - Mean = `r round(mean(pop2), digits=2)`
 63 | 
 64 | - Variance = `r round(var(pop2), digits=2)`
 65 | 
 66 | 
 67 | ### Make figure
 68 | ```{r, fig.height=4}
 69 | x.n <- seq(-5, 7, 0.001)
 70 | x.ln <- seq(0, 7, 0.001)
 71 | 
 72 | y.n <- dnorm(x.n, mean = exp(1/2), sd = sqrt((exp(1)-1) * exp(1)))
 73 | y.ln <- dlnorm(x.ln)
 74 | 
 75 | df <- tibble(x = c(x.n, x.ln),
 76 |              y = c(y.n, y.ln),
 77 |              dist = c(rep("Normal", length(x.n)), rep("Lognormal", length(x.ln)))
 78 |              ) 
 79 | 
 80 | p <- ggplot(df, aes(x = x, y = y, colour = dist)) + theme_gar +
 81 |   geom_line(size = 1.5) +
 82 |   labs(x = "Values", y = "Density") +
 83 |   scale_colour_manual(name = "", values = c("Lognormal" = "orange",
 84 |                                             "Normal" = "purple")) +
 85 |   theme(legend.position = c(0.15, 0.86),
 86 |         axis.text.y = element_blank(),
 87 |         axis.ticks.y = element_blank())
 88 | p
 89 | ```
 90 | 
 91 | ### Save figure
 92 | ```{r, eval = FALSE}
 93 | # save figure
 94 | ggsave(filename=('./figures/figure_lognormal_pop.pdf'),width=7,height=5)
 95 | ggsave(filename=('./figures/figure3.pdf'),width=7,height=5)
 96 | ```
 97 | 
 98 | ## Illustrate sampling distributions
 99 | We draw 50,000 samples for different sizes from our population.
100 | Each of these samples is like an experiment. For each experiment, we compute a statistics of interest, here the 20% trimmed mean. In each plot, the vertical dashed line marks the population 20% trimmed mean that we are trying to estimate.
101 | 
102 | ### n = 20
103 | ```{r, fig.height=3}
104 | # set.seed(777)
105 | n <- 20 # sample size
106 | nsamp <- 50000 # number of samples (experiments)
107 | # sampling distribution of the mean
108 | dist.samp <- apply(matrix(rlnorm(n*nsamp), nrow = nsamp), 1, mean, trim = 0.2)
109 | v <- enframe(dist.samp, name = NULL) 
110 | 
111 | p <- ggplot(v, aes(x = value)) + theme_gar +
112 |         geom_vline(xintercept = pop.tm, linetype = 'longdash', colour = "black") +
113 |         geom_line(stat = 'density', size = 1) +  
114 |   theme(plot.margin = unit(c(0, 0, 0.5, 0), "cm"),
115 |         axis.text.y = element_blank(),
116 |         axis.ticks.y = element_blank()) +
117 |   labs(x = "Sample trimmed means", y = "Density") +
118 |   ggtitle("Sampling distribution: n = 20") +
119 |   coord_cartesian(xlim = c(0, 4))
120 | p.sampdist1 <- p 
121 | p
122 | ```
123 | 
124 | ### n = 30
125 | ```{r, fig.height=3}
126 | # set.seed(777)
127 | n <- 30 # sample size
128 | nsamp <- 50000 # number of samples (experiments)
129 | # sampling distribution of the mean
130 | dist.samp <- apply(matrix(rlnorm(n*nsamp), nrow = nsamp), 1, mean, trim = 0.2)
131 | v <- enframe(dist.samp, name = NULL) 
132 | 
133 | p <- ggplot(v, aes(x = value)) + theme_gar +
134 |         geom_vline(xintercept = pop.tm, linetype = 'longdash', colour = "black") +
135 |         geom_line(stat = 'density', size = 1) +
136 |   theme(plot.margin = unit(c(0, 0, 0.5, 0), "cm"),
137 |         axis.text.y = element_blank(),
138 |         axis.ticks.y = element_blank()) +
139 |    labs(x = "Sample trimmed means", y = "Density") +
140 |   coord_cartesian(xlim = c(0, 4)) +   
141 |   ggtitle("Sampling distribution: n = 30")
142 | p.sampdist2 <- p 
143 | p
144 | ```
145 | 
146 | ### n = 50
147 | ```{r, fig.height=3}
148 | # set.seed(777)
149 | n <- 50 # sample size
150 | nsamp <- 50000 # number of samples (experiments)
151 | # sampling distribution of the mean
152 | dist.samp <- apply(matrix(rlnorm(n*nsamp), nrow = nsamp), 1, mean, trim = 0.2)
153 | v <- enframe(dist.samp, name = NULL) 
154 | 
155 | p <- ggplot(v, aes(x = value)) + theme_gar +
156 |         geom_vline(xintercept = pop.tm, linetype = 'longdash', colour = "black") +
157 |         geom_line(stat = 'density', size = 1) +
158 |   theme(plot.margin = unit(c(0, 0, 0.5, 0), "cm"),
159 |         axis.text.y = element_blank(),
160 |         axis.ticks.y = element_blank()) +
161 |    labs(x = "Sample trimmed means", y = "Density") +
162 |   coord_cartesian(xlim = c(0, 4)) +  
163 |   ggtitle("Sampling distribution: n = 50")
164 | p.sampdist3 <- p 
165 | p
166 | ```
167 | 
168 | ## Illustrate 4 samples
169 | 
170 | In each plot, the vertical dashed line marks the population 20% trimmed mean, whereas the vertical continuous line marks the sample 20% trimmed mean.  
171 | 
172 | ### Define function for horizontal jitter plot
173 | ```{r}
174 | samp_jitter <- function(df, samp.tm){
175 | p <- ggplot(data = df, aes(x = res, y = cond)) + theme_gar +
176 |   # scatterplots
177 |     geom_jitter(height = .05, alpha = 0.5, 
178 |                 size = 3, shape = 21, fill = "grey", colour = "black") +
179 |   theme(axis.ticks.y = element_blank(),
180 |         axis.text.y = element_blank(),
181 |         axis.title = element_blank(),
182 |         panel.grid.minor.x = element_blank(),
183 |         plot.margin = unit(c(0, 0, 0, 0), "cm")) +
184 |   scale_x_continuous(breaks = seq(0,12,2)) +
185 |   scale_y_continuous(breaks = 1) +
186 |   coord_cartesian(xlim = c(0, 11)) +
187 |   # sample trimmed mean
188 |   geom_segment(aes(x = samp.tm, xend = samp.tm,
189 |                    y = 0.9, yend = 1.1)) +
190 |   # population trimmed mean
191 |   geom_segment(aes(x = pop.tm, xend = pop.tm,
192 |                    y = 0.9, yend = 1.1),
193 |                linetype = 'longdash', lineend = 'round', colour = "black") +
194 |   labs(x = "Values")
195 | p
196 | }
197 | ```
198 | 
199 | ### n = 20
200 | 
201 | Generate all samples
202 | ```{r}
203 | # set.seed(777)
204 | n <- 20 # sample size 
205 | samp20 <- matrix(rlnorm(n*4), nrow = 4) # get sample 
206 | samp20.tm <- apply(samp20, 1, mean, trim = 0.2)
207 | ```
208 | 
209 | Illustrate sample 1
210 | ```{r, fig.height=1}
211 | set.seed(21) # reproducible jitter
212 | S <- 1
213 | df <- tibble(res = samp20[S,], cond = rep(1, n))
214 | p <- samp_jitter(df, samp20.tm[S])
215 | p.samp20_1 <- p
216 | p
217 | ```
218 | 
219 | Illustrate sample 2
220 | ```{r, fig.height=1}
221 | set.seed(21) # reproducible jitter
222 | S <- 2
223 | df <- tibble(res = samp20[S,], cond = rep(1, n))
224 | p <- samp_jitter(df, samp20.tm[S])
225 | p.samp20_2 <- p
226 | p
227 | ```
228 | 
229 | Illustrate sample 3
230 | ```{r, fig.height=1}
231 | set.seed(21) # reproducible jitter
232 | S <- 3
233 | df <- tibble(res = samp20[S,], cond = rep(1, n))
234 | p <- samp_jitter(df, samp20.tm[S])
235 | p.samp20_3 <- p
236 | p
237 | ```
238 | 
239 | Illustrate sample 4
240 | ```{r, fig.height=1}
241 | set.seed(21) # reproducible jitter
242 | S <- 4
243 | df <- tibble(res = samp20[S,], cond = rep(1, n))
244 | p <- samp_jitter(df, samp20.tm[S])
245 | p.samp20_4 <- p
246 | p
247 | ```
248 | 
249 | ### n = 30
250 | 
251 | Generate all samples
252 | ```{r}
253 | # set.seed(777)
254 | n <- 30 # sample size 
255 | samp30 <- matrix(rlnorm(n*4), nrow = 4) # get sample 
256 | samp30.tm <- apply(samp30, 1, mean, trim = 0.2)
257 | ```
258 | 
259 | Illustrate sample 1
260 | ```{r, fig.height=1}
261 | set.seed(21) # reproducible jitter
262 | S <- 1
263 | df <- tibble(res = samp30[S,], cond = rep(1, n))
264 | p <- samp_jitter(df, samp30.tm[S])
265 | p.samp30_1 <- p
266 | p
267 | ```
268 | 
269 | Illustrate sample 2
270 | ```{r, fig.height=1}
271 | set.seed(21) # reproducible jitter
272 | S <- 2
273 | df <- tibble(res = samp30[S,], cond = rep(1, n))
274 | p <- samp_jitter(df, samp30.tm[S])
275 | p.samp30_2 <- p
276 | p
277 | ```
278 | 
279 | Illustrate sample 3
280 | ```{r, fig.height=1}
281 | set.seed(21) # reproducible jitter
282 | S <- 3
283 | df <- tibble(res = samp30[S,], cond = rep(1, n))
284 | p <- samp_jitter(df, samp30.tm[S])
285 | p.samp30_3 <- p
286 | p
287 | ```
288 | 
289 | Illustrate sample 4
290 | ```{r, fig.height=1}
291 | set.seed(21) # reproducible jitter
292 | S <- 4
293 | df <- tibble(res = samp30[S,], cond = rep(1, n))
294 | p <- samp_jitter(df, samp30.tm[S])
295 | p.samp30_4 <- p
296 | p
297 | ```
298 | 
299 | ### n = 50
300 | 
301 | Generate all samples
302 | ```{r}
303 | # set.seed(777)
304 | n <- 50 # sample size 
305 | samp50 <- matrix(rlnorm(n*4), nrow = 4) # get sample 
306 | samp50.tm <- apply(samp50, 1, mean, trim = 0.2)
307 | ```
308 | 
309 | Illustrate sample 1
310 | ```{r, fig.height=1}
311 | set.seed(21) # reproducible jitter
312 | S <- 1
313 | df <- tibble(res = samp50[S,], cond = rep(1, n))
314 | p <- samp_jitter(df, samp50.tm[S])
315 | p.samp50_1 <- p
316 | p
317 | ```
318 | 
319 | Illustrate sample 2
320 | ```{r, fig.height=1}
321 | set.seed(21) # reproducible jitter
322 | S <- 2
323 | df <- tibble(res = samp50[S,], cond = rep(1, n))
324 | p <- samp_jitter(df, samp50.tm[S])
325 | p.samp50_2 <- p
326 | p
327 | ```
328 | 
329 | Illustrate sample 3
330 | ```{r, fig.height=1}
331 | set.seed(21) # reproducible jitter
332 | S <- 3
333 | df <- tibble(res = samp50[S,], cond = rep(1, n))
334 | p <- samp_jitter(df, samp50.tm[S])
335 | p.samp50_3 <- p
336 | p
337 | ```
338 | 
339 | Illustrate sample 4
340 | ```{r, fig.height=1}
341 | set.seed(21) # reproducible jitter
342 | S <- 4
343 | df <- tibble(res = samp50[S,], cond = rep(1, n))
344 | p <- samp_jitter(df, samp50.tm[S])
345 | p.samp50_4 <- p
346 | p
347 | ```
348 | 
349 | ## Illustrate 4 bootstrap sampling distributions
350 | 
351 | We estimate the sampling distributions from the samples.
352 | 
353 | ### Define density plot function
354 | ```{r}
355 | dens_plot <- function(dist.samp, samp.tm){
356 |   v <- enframe(dist.samp, name = NULL) 
357 | p <- ggplot(v, aes(x = value)) + theme_gar +
358 |   geom_vline(xintercept = samp.tm, linetype = 'solid', colour = "black") +      
359 |   geom_vline(xintercept = pop.tm, linetype = 'longdash', colour = "black") +
360 |         geom_line(stat = 'density', size = 1) +   
361 |   theme(axis.text.y = element_blank(),
362 |         axis.title.y = element_blank(),
363 |         axis.ticks.y = element_blank(),
364 |         plot.margin = unit(c(0, 0, 0.5, 0), "cm")) +
365 |   labs(x = "Bootstrap estimates") +
366 |   # ggtitle("Sampling distribution: n = 20") +
367 |   coord_cartesian(xlim = c(0, 4))
368 | p
369 | }
370 | ```
371 | 
372 | ### n = 20
373 | 
374 | Sample 1
375 | ```{r, fig.height=3}
376 | # set.seed(777)
377 | S <- 1
378 | n <- 20
379 | nboot <- 5000 # number of bootstrap samples
380 | # bootstrap distribution
381 | boot.samp <- apply(matrix(sample(samp20[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
382 | p <- dens_plot(boot.samp, samp20.tm[S])
383 | p.boot20_1 <- p
384 | p
385 | ```
386 | 
387 | Sample 2
388 | ```{r, fig.height=3}
389 | # set.seed(777)
390 | S <- 2
391 | n <- 20
392 | nboot <- 5000 # number of bootstrap samples
393 | # bootstrap distribution
394 | boot.samp <- apply(matrix(sample(samp20[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
395 | p <- dens_plot(boot.samp, samp20.tm[S])
396 | p.boot20_2 <- p
397 | p
398 | ```
399 | 
400 | Sample 3
401 | ```{r, fig.height=3}
402 | # set.seed(777)
403 | S <- 3
404 | n <- 20
405 | nboot <- 5000 # number of bootstrap samples
406 | # bootstrap distribution
407 | boot.samp <- apply(matrix(sample(samp20[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
408 | p <- dens_plot(boot.samp, samp20.tm[S])
409 | p.boot20_3 <- p
410 | p
411 | ```
412 | 
413 | Sample 4
414 | ```{r, fig.height=3}
415 | # set.seed(777)
416 | S <- 4
417 | n <- 20
418 | nboot <- 5000 # number of bootstrap samples
419 | # bootstrap distribution
420 | boot.samp <- apply(matrix(sample(samp20[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
421 | p <- dens_plot(boot.samp, samp20.tm[S])
422 | p.boot20_4 <- p
423 | p
424 | ```
425 | 
426 | ### n = 30
427 | 
428 | Sample 1
429 | ```{r, fig.height=3}
430 | # set.seed(777)
431 | S <- 1
432 | n <- 30
433 | nboot <- 5000 # number of bootstrap samples
434 | # bootstrap distribution
435 | boot.samp <- apply(matrix(sample(samp30[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
436 | p <- dens_plot(boot.samp, samp30.tm[S])
437 | p.boot30_1 <- p
438 | p
439 | ```
440 | 
441 | Sample 2
442 | ```{r, fig.height=3}
443 | # set.seed(777)
444 | S <- 2
445 | nboot <- 5000 # number of bootstrap samples
446 | # bootstrap distribution
447 | boot.samp <- apply(matrix(sample(samp30[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
448 | p <- dens_plot(boot.samp, samp30.tm[S])
449 | p.boot30_2 <- p
450 | p
451 | ```
452 | 
453 | Sample 3
454 | ```{r, fig.height=3}
455 | # set.seed(777)
456 | S <- 3
457 | nboot <- 5000 # number of bootstrap samples
458 | # bootstrap distribution
459 | boot.samp <- apply(matrix(sample(samp30[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
460 | p <- dens_plot(boot.samp, samp30.tm[S])
461 | p.boot30_3 <- p
462 | p
463 | ```
464 | 
465 | Sample 4
466 | ```{r, fig.height=3}
467 | # set.seed(777)
468 | S <- 4
469 | nboot <- 5000 # number of bootstrap samples
470 | # bootstrap distribution
471 | boot.samp <- apply(matrix(sample(samp30[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
472 | p <- dens_plot(boot.samp, samp30.tm[S])
473 | p.boot30_4 <- p
474 | p
475 | ```
476 | 
477 | ### n = 50
478 | 
479 | Sample 1
480 | ```{r, fig.height=3}
481 | # set.seed(777)
482 | S <- 1
483 | n <- 50
484 | nboot <- 5000 # number of bootstrap samples
485 | # bootstrap distribution
486 | boot.samp <- apply(matrix(sample(samp50[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
487 | p <- dens_plot(boot.samp, samp50.tm[S])
488 | p.boot50_1 <- p
489 | p
490 | ```
491 | 
492 | Sample 2
493 | ```{r, fig.height=3}
494 | # set.seed(777)
495 | S <- 2
496 | nboot <- 5000 # number of bootstrap samples
497 | # bootstrap distribution
498 | boot.samp <- apply(matrix(sample(samp50[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
499 | p <- dens_plot(boot.samp, samp50.tm[S])
500 | p.boot50_2 <- p
501 | p
502 | ```
503 | 
504 | Sample 3
505 | ```{r, fig.height=3}
506 | # set.seed(777)
507 | S <- 3
508 | nboot <- 5000 # number of bootstrap samples
509 | # bootstrap distribution
510 | boot.samp <- apply(matrix(sample(samp50[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
511 | p <- dens_plot(boot.samp, samp50.tm[S])
512 | p.boot50_3 <- p
513 | p
514 | ```
515 | 
516 | Sample 4
517 | ```{r, fig.height=3}
518 | # set.seed(777)
519 | S <- 4
520 | nboot <- 5000 # number of bootstrap samples
521 | # bootstrap distribution
522 | boot.samp <- apply(matrix(sample(samp50[S,], n*nboot, replace = TRUE), nrow = nboot), 1, mean, trim = 0.2)
523 | p <- dens_plot(boot.samp, samp50.tm[S])
524 | p.boot50_4 <- p
525 | p
526 | ```
527 | 
528 | # Summary figure
529 | ```{r, eval = FALSE}
530 | 
531 | # -------------------------------------------------
532 | p2.1 <- cowplot::plot_grid(p.samp20_1, p.boot20_1 + theme(axis.title = element_blank()),
533 |                     labels = c("1", "B"),
534 |                     label_size = 18,
535 |                     hjust = c(-0.5, 2.2),
536 |                     vjust = c(1.5, -4.3),
537 |                     ncol = 1, nrow = 2,
538 |                     rel_heights = c(1.5, 3))
539 | 
540 | p2.2 <- cowplot::plot_grid(p.samp20_2, p.boot20_2 + theme(axis.title = element_blank()),
541 |                     labels = c("2"),
542 |                     label_size = 18,
543 |                     ncol = 1, nrow = 2,
544 |                     rel_heights = c(1.5, 3))
545 | 
546 | p2.3 <- cowplot::plot_grid(p.samp20_3, p.boot20_3 + theme(axis.title = element_blank()),
547 |                     labels = c("3"),
548 |                     label_size = 18,
549 |                     ncol = 1, nrow = 2,
550 |                     rel_heights = c(1.5, 3))
551 | 
552 | p2.4 <- cowplot::plot_grid(p.samp20_4, p.boot20_4,
553 |                     labels = c("4"),
554 |                     label_size = 18,
555 |                     ncol = 1, nrow = 2,
556 |                     rel_heights = c(1.5, 3))
557 | 
558 | p2 <- cowplot::plot_grid(p.sampdist1,
559 |                          p2.1, p2.2, p2.3, p2.4,
560 |                     labels = c("A"),
561 |                     hjust = 0.7,
562 |                     label_size = 18,
563 |                     ncol = 1, 
564 |                     nrow = 5,
565 |                     align = "v",
566 |                     axis = "l",
567 |                     rel_heights = c(1, 1, 1, 1, 1))
568 | 
569 | # -------------------------------------------------
570 | p3.1 <- cowplot::plot_grid(p.samp30_1, p.boot30_1 + theme(axis.title = element_blank()),
571 |                     labels = c(""),
572 |                     ncol = 1, nrow = 2,
573 |                     rel_heights = c(1.5, 3))
574 | 
575 | p3.2 <- cowplot::plot_grid(p.samp30_2, p.boot30_2 + theme(axis.title = element_blank()),
576 |                     labels = c(""),
577 |                     ncol = 1, nrow = 2,
578 |                     rel_heights = c(1.5, 3))
579 | 
580 | p3.3 <- cowplot::plot_grid(p.samp30_3, p.boot30_3 + theme(axis.title = element_blank()),
581 |                     labels = c(""),
582 |                     ncol = 1, nrow = 2,
583 |                     rel_heights = c(1.5, 3))
584 | 
585 | p3.4 <- cowplot::plot_grid(p.samp30_4, p.boot30_4,
586 |                     labels = c(""),
587 |                     ncol = 1, nrow = 2,
588 |                     rel_heights = c(1.5, 3))
589 | 
590 | p3 <- cowplot::plot_grid(p.sampdist2,
591 |                          p3.1, p3.2, p3.3, p3.4,
592 |                     labels = c(""),
593 |                     ncol = 1,
594 |                     nrow = 5,
595 |                     align = "v",
596 |                     axis = "l",
597 |                     rel_heights = c(1, 1, 1, 1, 1))
598 | 
599 | # -------------------------------------------------
600 | p4.1 <- cowplot::plot_grid(p.samp50_1, p.boot50_1 + theme(axis.title = element_blank()),
601 |                     labels = c(""),
602 |                     ncol = 1, nrow = 2,
603 |                     rel_heights = c(1.5, 3))
604 | 
605 | p4.2 <- cowplot::plot_grid(p.samp50_2, p.boot50_2 + theme(axis.title = element_blank()),
606 |                     labels = c(""),
607 |                     ncol = 1, nrow = 2,
608 |                     rel_heights = c(1.5, 3))
609 | 
610 | p4.3 <- cowplot::plot_grid(p.samp50_3, p.boot50_3 + theme(axis.title = element_blank()),
611 |                     labels = c(""),
612 |                     ncol = 1, nrow = 2,
613 |                     rel_heights = c(1.5, 3))
614 | 
615 | p4.4 <- cowplot::plot_grid(p.samp50_4, p.boot50_4,
616 |                     labels = c(""),
617 |                     ncol = 1, nrow = 2,
618 |                     rel_heights = c(1.5, 3))
619 | 
620 | p4 <- cowplot::plot_grid(p.sampdist3,
621 |                          p4.1, p4.2, p4.3, p4.4,
622 |                     labels = c(""),
623 |                     ncol = 1,
624 |                     nrow = 5,
625 |                     align = "v",
626 |                     axis = "l",
627 |                     rel_heights = c(1, 1, 1, 1, 1))
628 | 
629 | cowplot::plot_grid(p2, p3, p4,
630 |                     labels = c(""),
631 |                     ncol = 3,
632 |                     nrow = 1,
633 |                     rel_widths = c(1, 1, 1), 
634 |                     label_size = 20, 
635 |                     hjust = -0.5, 
636 |                     scale=.95,
637 |                     align = "h")
638 | 
639 | # save figure
640 | ggsave(filename=('./figures/figure_sampdist_lognormal.pdf'),width=20,height=15)
641 | ggsave(filename=('./figures/figure4.pdf'),width=20,height=15)
642 | ```
643 | 


--------------------------------------------------------------------------------