├── DESCRIPTION ├── NAMESPACE ├── R ├── slipper.R ├── slipper_ci.R ├── slipper_lm.R └── utils.R ├── README.md ├── man ├── slipper.Rd ├── slipper_.Rd ├── slipper_ci.Rd ├── slipper_ci_.Rd ├── slipper_lm.Rd └── slipper_lm_.Rd ├── slipper.Rproj └── slipper.jpg /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: slipper 2 | Title: Easy and tidy bootstrapping 3 | Version: 0.0.0.9001 4 | Authors@R: person("Jeff", "Leek", email = "jtleek@gmail.com", role = c("aut", "cre")) 5 | Description: This package makes it easy to do some common bootstrapping tasks in 6 | a tidy way. 7 | Depends: 8 | R (>= 3.3.1), 9 | tidyverse, 10 | lazyeval, 11 | broom 12 | License: MIT 13 | Encoding: UTF-8 14 | LazyData: true 15 | RoxygenNote: 6.0.1 16 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(slipper) 4 | export(slipper_) 5 | export(slipper_ci) 6 | export(slipper_ci_) 7 | export(slipper_lm) 8 | export(slipper_lm_) 9 | -------------------------------------------------------------------------------- /R/slipper.R: -------------------------------------------------------------------------------- 1 | #' Bootstrap a function that returns a single number 2 | #' 3 | #' Takes a data frame, a function that returns a single number, 4 | #' and a number of replicates and returns a data frame with the 5 | #' function calculated on the observed sample and on B bootstrap 6 | #' samples. 7 | #' 8 | #' @param df A data frame 9 | #' @param expr A an expression with a function that operates on one or more variables from the data frame. 10 | #' @param B the number of bootstrap samples to draw 11 | #' 12 | #' @return out A data frame with the values, whether they come from the observed data or the bootstrapped data. 13 | #' 14 | #' 15 | #' @examples 16 | #' 17 | #' # Boostrap the mean of the mpg variable in the mtcars data set 18 | #' slipper(mtcars,mean(mpg),B=100) 19 | #' 20 | #' # Bootstrap the mean of the mpg variable with piping 21 | #' mtcars %>% slipper(mean(mpg),B=100) 22 | #' 23 | #' # Calculate a confidence interval using the quantiles method 24 | #' mtcars %>% slipper(mean(mpg),B=100) %>% 25 | #' filter(type=="bootstrap") %>% 26 | #' summarize(ci_low = quantile(value,0.025), 27 | #' ci_high = quantile(value,0.975)) 28 | #' 29 | #' 30 | #' @export 31 | 32 | slipper_ = function(df,expr,B=100){ 33 | obs_val = lazy_eval(expr,data=df) 34 | n = nrow(df) 35 | boot_val = replicate(B, { 36 | newdata = sample_n(df, n, replace = TRUE) 37 | lazy_eval(expr, data = newdata) 38 | }) 39 | out = data.frame(type = c("observed",rep("bootstrap",B)), 40 | value = c(obs_val,boot_val)) 41 | return(out) 42 | } 43 | 44 | #' Bootstrap a function that returns a single number 45 | #' 46 | #' Takes a data frame, a function that returns a single number, 47 | #' and a number of replicates and returns a data frame with the 48 | #' function calculated on the observed sample and on B bootstrap 49 | #' samples. 50 | #' 51 | #' @param df A data frame 52 | #' @param expr A bare function that operates on one or more variables from the data frame. 53 | #' @param B the number of bootstrap samples to draw 54 | #' 55 | #' @return out A data frame with the values, whether they come from the observed data or the bootstrapped data. 56 | #' 57 | #' 58 | #' @examples 59 | #' 60 | #' # Boostrap the mean of the mpg variable in the mtcars data set 61 | #' slipper(mtcars,mean(mpg),B=100) 62 | #' 63 | #' # Bootstrap the mean of the mpg variable with piping 64 | #' mtcars %>% slipper(mean(mpg),B=100) 65 | #' 66 | #' # Calculate a confidence interval using the quantiles method 67 | #' mtcars %>% slipper(mean(mpg),B=100) %>% 68 | #' dplyr::filter(type == "bootstrap") %>% 69 | #' summarize(ci_low = quantile(value,0.025), 70 | #' ci_high = quantile(value,0.975)) 71 | #' 72 | #' 73 | #' @export 74 | 75 | slipper = function(df, expr, B=100) { 76 | slipper_(df, lazy(expr),B) 77 | } 78 | -------------------------------------------------------------------------------- /R/slipper_ci.R: -------------------------------------------------------------------------------- 1 | #' Bootstrap a function that returns a single number and 2 | #' return a non-parametric confidence interval. 3 | #' 4 | #' Takes a data frame, a function that returns a single number, 5 | #' and a number of replicates and returns a data frame with the 6 | #' function calculated on the observed sample and on B bootstrap 7 | #' samples. 8 | #' 9 | #' @param df A data frame 10 | #' @param expr A an expression with a function that operates on one or more variables from the data frame. 11 | #' @param B the number of bootstrap samples to draw 12 | #' @param lower the lower percentile for the confidence interval (default 2.5%) 13 | #' @param upper the upper percentile for the confidence interval (default 97.5%) 14 | #' 15 | #' @return confint A vector with the lower bound and upper bound 16 | #' 17 | #' 18 | #' @examples 19 | #' 20 | #' # Boostrap the mean of the mpg variable in the mtcars data set and get a confidence 21 | #' # interval 22 | #' slipper_ci(mtcars,mean(mpg),B=100) 23 | #' 24 | #' # Bootstrap the mean of the mpg variable with piping 25 | #' mtcars %>% slipper_ci(mean(mpg),B=100) 26 | #' 27 | #' 28 | #' 29 | #' @export 30 | 31 | slipper_ci_ = function(df,expr,B=100,lower=0.025,upper=0.975){ 32 | obs_val = lazy_eval(expr,data=df) 33 | n = nrow(df) 34 | boot_val = replicate(B, { 35 | newdata = sample_n(df, n, replace = TRUE) 36 | lazy_eval(expr, data = newdata) 37 | }) 38 | tmp = data.frame(type = c("observed",rep("bootstrap",B)), 39 | value = c(obs_val,boot_val)) 40 | out = tmp %>% filter(type=="bootstrap") %>% 41 | summarize(ci_low = quantile(value,lower), 42 | ci_high = quantile(value,upper)) 43 | return(out) 44 | } 45 | 46 | #' Bootstrap a function that returns a single number and 47 | #' return a non-parametric confidence interval. 48 | #' 49 | #' Takes a data frame, a function that returns a single number, 50 | #' and a number of replicates and returns a data frame with the 51 | #' function calculated on the observed sample and on B bootstrap 52 | #' samples. 53 | #' 54 | #' @param df A data frame 55 | #' @param expr A an expression with a function that operates on one or more variables from the data frame. 56 | #' @param B the number of bootstrap samples to draw 57 | #' @param lower the lower percentile for the confidence interval (default 2.5%) 58 | #' @param upper the upper percentile for the confidence interval (default 97.5%) 59 | #' 60 | #' @return confint A vector with the lower bound and upper bound 61 | #' 62 | #' 63 | #' @examples 64 | #' 65 | #' # Boostrap the mean of the mpg variable in the mtcars data set and get a confidence 66 | #' # interval 67 | #' slipper_ci(mtcars,mean(mpg),B=100) 68 | #' 69 | #' # Bootstrap the mean of the mpg variable with piping 70 | #' mtcars %>% slipper_ci(mean(mpg),B=100) 71 | #' 72 | #' 73 | #' 74 | #' @export 75 | 76 | slipper_ci = function(df, expr, B=100, lower=0.025,upper=0.975) { 77 | slipper_ci_(df, lazy(expr),B) 78 | } -------------------------------------------------------------------------------- /R/slipper_lm.R: -------------------------------------------------------------------------------- 1 | 2 | #' Bootstrap a linear regression model 3 | #' 4 | #' Takes a data frame, and a model to fit to the data 5 | #' and each bootstrap replicate. Bootstrapping is by 6 | #' default resampling cases, but if you set boot_resid=TRUE 7 | #' then resampling residuals will be performed. If you 8 | #' pass a null model formula that includes a subset 9 | #' of the variables in the full model (i.e. it is a 10 | #' nested model) then the bootstrap statistics will 11 | #' come from the bootstrapped null data and can be 12 | #' used for a hypothesis test. 13 | #' 14 | #' 15 | #' @param df A data frame 16 | #' @param formula A an expression for a formula to pass to the lm command 17 | #' @param null_formula (optional) If NULL, standard bootstrapping is performed. If a nested expression for a null formula is passed the bootstrapped statistics come from the null. 18 | #' @param B the number of bootstrap samples to draw 19 | #' @param boot_resid If TRUE then bootstrapping residuals is performed. 20 | #' 21 | #' @return out A data frame with the values, whether they come from the observed data or the bootstrapped data, and the coefficient name. 22 | #' 23 | #' @examples 24 | #' 25 | #' 26 | #' @export 27 | 28 | 29 | slipper_lm_ = function(df, 30 | formula, 31 | null_formula=NULL, 32 | B=100, 33 | boot_resid=FALSE){ 34 | 35 | lm_fit = lm(lazy_eval(formula),data=df) 36 | obs_val = tidy(lm_fit) %>% 37 | select(term,value=estimate) 38 | n = nrow(df) 39 | 40 | if(!is.null(lazy_eval(null_formula))){ 41 | lm_fit_null = lm(lazy_eval(null_formula), 42 | data=df) 43 | 44 | outcome = all.vars(lazy_eval(formula)[[2]]) 45 | pos = which(names(df) == outcome) 46 | res = residuals(lm_fit) 47 | 48 | boot_val = replicate(B, { 49 | newdata = df 50 | newdata[,pos] = lm_fit_null$fitted.values + 51 | sample(res,replace=TRUE) 52 | tidy(lm(lazy_eval(formula),data=newdata)) %>% 53 | select(term,value=estimate) 54 | },simplify=F) %>% bind_rows() 55 | 56 | }else if(is.null(lazy_eval(null_formula)) & boot_resid==FALSE){ 57 | boot_val = replicate(B, { 58 | newdata = sample_n(df, n, replace = TRUE) 59 | tidy(lm(lazy_eval(formula),data=newdata)) %>% 60 | select(term,value=estimate) 61 | },simplify=F) %>% bind_rows() 62 | }else if(is.null(lazy_eval(null_formula)) & boot_resid==TRUE){ 63 | 64 | outcome = all.vars(lazy_eval(formula)[[2]]) 65 | pos = which(names(df) == outcome) 66 | res = residuals(lm_fit) 67 | 68 | boot_val = replicate(B, { 69 | newdata = df 70 | newdata[,pos] = lm_fit$fitted.values + sample(res,replace=TRUE) 71 | tidy(lm(lazy_eval(formula),data=newdata)) %>% 72 | select(term,value=estimate) 73 | },simplify=F) %>% bind_rows() 74 | } 75 | out = rbind(obs_val,boot_val) 76 | out$type = c(rep("observed",dim(obs_val)[1]), 77 | rep("bootstrap",dim(boot_val)[1])) 78 | return(out) 79 | 80 | } 81 | 82 | 83 | #' Bootstrap a linear regression model 84 | #' 85 | #' Takes a data frame, and a model to fit to the data 86 | #' and each bootstrap replicate. Bootstrapping is by 87 | #' default resampling cases, but if you set boot_resid=TRUE 88 | #' then resampling residuals will be performed. If you 89 | #' pass a null model formula that includes a subset 90 | #' of the variables in the full model (i.e. it is a 91 | #' nested model) then the bootstrap statistics will 92 | #' come from the bootstrapped null data and can be 93 | #' used for a hypothesis test. 94 | #' 95 | #' 96 | #' @param df A data frame 97 | #' @param formula A bare formula to pass to the lm command 98 | #' @param null_formula (optional) If NULL, standard bootstrapping is performed. If a bare nested null formula is passed the bootstrapped statistics come from the null. 99 | #' @param B the number of bootstrap samples to draw 100 | #' @param boot_resid If TRUE then bootstrapping residuals is performed. 101 | #' 102 | #' @return out A data frame with the values, whether they come from the observed data or the bootstrapped data, and the coefficient name. 103 | #' 104 | #' 105 | #' @export 106 | #' @examples 107 | #' 108 | #' # Bootstrap a regression model 109 | #' slipper_lm(mtcars,mpg ~ cyl,B=100) 110 | #' 111 | #' # Bootstrap a regression model with piping 112 | #' mtcars %>% slipper_lm(mpg ~ cyl,B=100) 113 | #' 114 | #' # Bootstrap residuals for a regression model 115 | #' mtcars %>% slipper_lm(mpg ~ cyl,B=100,boot_resid=TRUE) 116 | #' 117 | #' # Bootsrap confidence intervals 118 | #' mtcars %>% slipper_lm(mpg ~ cyl,B=100) %>% 119 | #' filter(type=="bootstrap",term=="cyl") %>% 120 | #' summarize(ci_low = quantile(value,0.025), 121 | #' ci_high = quantile(value,0.975)) 122 | #' 123 | #' # Bootstrap hypothesis test - here I've added one to the numerator 124 | #' # and denominator because bootstrap p-values should never be zero. 125 | #' 126 | #' boot = mtcars %>% slipper_lm(mpg ~ cyl, null_formula = mpg ~ 1,B=1000) %>% 127 | #' filter(term=="cyl") %>% 128 | #' summarize(num = sum(abs(value) >= abs(value[1])), 129 | #' den = n(), 130 | #' pval = num/den) 131 | 132 | 133 | 134 | 135 | slipper_lm = function(df, 136 | formula, 137 | null_formula=NULL, 138 | B=100, 139 | boot_resid=FALSE) { 140 | slipper_lm_(df, lazy(formula),lazy(null_formula),B,boot_resid) 141 | } 142 | 143 | 144 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | slipper_boot = function(df,formula,B){ 2 | n = nrow(df) 3 | boot_val = replicate(B, { 4 | newdata = sample_n(df, n, replace = TRUE) 5 | tidy(lm(lazy_eval(formula),data=newdata)) %>% 6 | select(term,estimate) 7 | },simplify=F) %>% bind_rows() 8 | return(boot_val) 9 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Bootstrapping made easy and tidy with slipper 2 | ================= 3 | 4 | ![](slipper.jpg) 5 | 6 | You've heard of [broom](https://cran.r-project.org/web/packages/broom/index.html) for tidying up your R functions. slipper is an R package for tidy/easy bootstrapping. There are already a bunch of good bootstrapping packages out there including [bootstrap](https://cran.r-project.org/web/packages/bootstrap/) and [boot](https://cran.r-project.org/web/packages/boot/). You can also bootstrap with [dplyr and broom](https://cran.r-project.org/web/packages/broom/vignettes/bootstrapping.html) or with [purrr and modelr](https://cran.r-project.org/web/packages/modelr). 7 | 8 | But I'm too dumb for any of those. So slipper includes some simple,pipeable bootstrapping functions for me 9 | 10 | ### install 11 | with `devtools`: 12 | 13 | ```r 14 | devtools::install_github('jtleek/slipper') 15 | ``` 16 | 17 | ### use 18 | 19 | There are only two functions in this package. 20 | 21 | Call `slipper` to bootstrap any function that returns 22 | a single value. 23 | 24 | ```r 25 | slipper(mtcars,mean(mpg),B=100) 26 | ``` 27 | 28 | slipper is built to work with pipes and the tidyverse too. 29 | 30 | ```r 31 | mtcars %>% slipper(mean(mpg),B=100) 32 | ``` 33 | 34 | The output is a data frame with the values of the function on the original data set and the bootstrapped replicates. You can calculate confidence intervals using summarize 35 | 36 | ```r 37 | mtcars %>% slipper(mean(mpg),B=100) %>% 38 | filter(type=="bootstrap") %>% 39 | summarize(ci_low = quantile(value,0.025), 40 | ci_high = quantile(value,0.975)) 41 | ``` 42 | 43 | You can also bootstrap linear models using `slipper_lm` just pass the data frame and the formula you want to fit on the original data and on the bootstrap samples. 44 | 45 | ```r 46 | slipper_lm(mtcars,mpg ~ cyl,B=100) 47 | ``` 48 | 49 | This is also pipeable 50 | 51 | ```r 52 | mtcars %>% slipper_lm(mpg ~ cyl,B=100) 53 | ``` 54 | 55 | The default behavior is to bootstrap complete cases, but if you want to bootstrap residuals set `boot_resid=TRUE` 56 | 57 | ```S 58 | mtcars %>% slipper_lm(mpg ~ cyl,B=100,boot_resid=TRUE) 59 | ``` 60 | 61 | You can calculate bootstrap confidence intervals in the same way as you do for `slipper`. 62 | 63 | ```r 64 | mtcars %>% slipper_lm(mpg ~ cyl,B=100) %>% 65 | filter(type=="bootstrap",term=="cyl") %>% 66 | summarize(ci_low = quantile(value,0.025), 67 | ci_high = quantile(value,0.975)) 68 | ``` 69 | 70 | Finally if you want to do a bootstrap hypothesis test you can pass a formula and a nested null formula. `formula` must every term in `null_formula` and one additional one you want to test. 71 | 72 | ```r 73 | # Bootstrap hypothesis test - 74 | # here I've added one to the numerator 75 | # and denominator because bootstrap p-values should 76 | # never be zero. 77 | 78 | mtcars %>% 79 | slipper_lm(mpg ~ cyl, null_formula = mpg ~ 1,B=1000) %>% 80 | filter(term=="cyl") %>% 81 | summarize(num = sum(abs(value) >= abs(value[1])), 82 | den = n(), 83 | pval = num/den) 84 | ``` 85 | 86 | That's basically it for now. Would love some help/pull requests/fixes as this is my first attempt at getting into the tidyverse :). 87 | -------------------------------------------------------------------------------- /man/slipper.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/slipper.R 3 | \name{slipper} 4 | \alias{slipper} 5 | \title{Bootstrap a function that returns a single number} 6 | \usage{ 7 | slipper(df, expr, B = 100) 8 | } 9 | \arguments{ 10 | \item{df}{A data frame} 11 | 12 | \item{expr}{A bare function that operates on one or more variables from the data frame.} 13 | 14 | \item{B}{the number of bootstrap samples to draw} 15 | } 16 | \value{ 17 | out A data frame with the values, whether they come from the observed data or the bootstrapped data. 18 | } 19 | \description{ 20 | Takes a data frame, a function that returns a single number, 21 | and a number of replicates and returns a data frame with the 22 | function calculated on the observed sample and on B bootstrap 23 | samples. 24 | } 25 | \examples{ 26 | 27 | # Boostrap the mean of the mpg variable in the mtcars data set 28 | slipper(mtcars,mean(mpg),B=100) 29 | 30 | # Bootstrap the mean of the mpg variable with piping 31 | mtcars \%>\% slipper(mean(mpg),B=100) 32 | 33 | # Calculate a confidence interval using the quantiles method 34 | mtcars \%>\% slipper(mean(mpg),B=100) \%>\% 35 | dplyr::filter(type == "bootstrap") \%>\% 36 | summarize(ci_low = quantile(value,0.025), 37 | ci_high = quantile(value,0.975)) 38 | 39 | 40 | } 41 | -------------------------------------------------------------------------------- /man/slipper_.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/slipper.R 3 | \name{slipper_} 4 | \alias{slipper_} 5 | \title{Bootstrap a function that returns a single number} 6 | \usage{ 7 | slipper_(df, expr, B = 100) 8 | } 9 | \arguments{ 10 | \item{df}{A data frame} 11 | 12 | \item{expr}{A an expression with a function that operates on one or more variables from the data frame.} 13 | 14 | \item{B}{the number of bootstrap samples to draw} 15 | } 16 | \value{ 17 | out A data frame with the values, whether they come from the observed data or the bootstrapped data. 18 | } 19 | \description{ 20 | Takes a data frame, a function that returns a single number, 21 | and a number of replicates and returns a data frame with the 22 | function calculated on the observed sample and on B bootstrap 23 | samples. 24 | } 25 | \examples{ 26 | 27 | # Boostrap the mean of the mpg variable in the mtcars data set 28 | slipper(mtcars,mean(mpg),B=100) 29 | 30 | # Bootstrap the mean of the mpg variable with piping 31 | mtcars \%>\% slipper(mean(mpg),B=100) 32 | 33 | # Calculate a confidence interval using the quantiles method 34 | mtcars \%>\% slipper(mean(mpg),B=100) \%>\% 35 | filter(type=="bootstrap") \%>\% 36 | summarize(ci_low = quantile(value,0.025), 37 | ci_high = quantile(value,0.975)) 38 | 39 | 40 | } 41 | -------------------------------------------------------------------------------- /man/slipper_ci.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/slipper_ci.R 3 | \name{slipper_ci} 4 | \alias{slipper_ci} 5 | \title{Bootstrap a function that returns a single number and 6 | return a non-parametric confidence interval.} 7 | \usage{ 8 | slipper_ci(df, expr, B = 100, lower = 0.025, upper = 0.975) 9 | } 10 | \arguments{ 11 | \item{df}{A data frame} 12 | 13 | \item{expr}{A an expression with a function that operates on one or more variables from the data frame.} 14 | 15 | \item{B}{the number of bootstrap samples to draw} 16 | 17 | \item{lower}{the lower percentile for the confidence interval (default 2.5%)} 18 | 19 | \item{upper}{the upper percentile for the confidence interval (default 97.5%)} 20 | } 21 | \value{ 22 | confint A vector with the lower bound and upper bound 23 | } 24 | \description{ 25 | Takes a data frame, a function that returns a single number, 26 | and a number of replicates and returns a data frame with the 27 | function calculated on the observed sample and on B bootstrap 28 | samples. 29 | } 30 | \examples{ 31 | 32 | # Boostrap the mean of the mpg variable in the mtcars data set and get a confidence 33 | # interval 34 | slipper_ci(mtcars,mean(mpg),B=100) 35 | 36 | # Bootstrap the mean of the mpg variable with piping 37 | mtcars \%>\% slipper_ci(mean(mpg),B=100) 38 | 39 | 40 | 41 | } 42 | -------------------------------------------------------------------------------- /man/slipper_ci_.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/slipper_ci.R 3 | \name{slipper_ci_} 4 | \alias{slipper_ci_} 5 | \title{Bootstrap a function that returns a single number and 6 | return a non-parametric confidence interval.} 7 | \usage{ 8 | slipper_ci_(df, expr, B = 100, lower = 0.025, upper = 0.975) 9 | } 10 | \arguments{ 11 | \item{df}{A data frame} 12 | 13 | \item{expr}{A an expression with a function that operates on one or more variables from the data frame.} 14 | 15 | \item{B}{the number of bootstrap samples to draw} 16 | 17 | \item{lower}{the lower percentile for the confidence interval (default 2.5%)} 18 | 19 | \item{upper}{the upper percentile for the confidence interval (default 97.5%)} 20 | } 21 | \value{ 22 | confint A vector with the lower bound and upper bound 23 | } 24 | \description{ 25 | Takes a data frame, a function that returns a single number, 26 | and a number of replicates and returns a data frame with the 27 | function calculated on the observed sample and on B bootstrap 28 | samples. 29 | } 30 | \examples{ 31 | 32 | # Boostrap the mean of the mpg variable in the mtcars data set and get a confidence 33 | # interval 34 | slipper_ci(mtcars,mean(mpg),B=100) 35 | 36 | # Bootstrap the mean of the mpg variable with piping 37 | mtcars \%>\% slipper_ci(mean(mpg),B=100) 38 | 39 | 40 | 41 | } 42 | -------------------------------------------------------------------------------- /man/slipper_lm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/slipper_lm.R 3 | \name{slipper_lm} 4 | \alias{slipper_lm} 5 | \title{Bootstrap a linear regression model} 6 | \usage{ 7 | slipper_lm(df, formula, null_formula = NULL, B = 100, boot_resid = FALSE) 8 | } 9 | \arguments{ 10 | \item{df}{A data frame} 11 | 12 | \item{formula}{A bare formula to pass to the lm command} 13 | 14 | \item{null_formula}{(optional) If NULL, standard bootstrapping is performed. If a bare nested null formula is passed the bootstrapped statistics come from the null.} 15 | 16 | \item{B}{the number of bootstrap samples to draw} 17 | 18 | \item{boot_resid}{If TRUE then bootstrapping residuals is performed.} 19 | } 20 | \value{ 21 | out A data frame with the values, whether they come from the observed data or the bootstrapped data, and the coefficient name. 22 | } 23 | \description{ 24 | Takes a data frame, and a model to fit to the data 25 | and each bootstrap replicate. Bootstrapping is by 26 | default resampling cases, but if you set boot_resid=TRUE 27 | then resampling residuals will be performed. If you 28 | pass a null model formula that includes a subset 29 | of the variables in the full model (i.e. it is a 30 | nested model) then the bootstrap statistics will 31 | come from the bootstrapped null data and can be 32 | used for a hypothesis test. 33 | } 34 | \examples{ 35 | 36 | # Bootstrap a regression model 37 | slipper_lm(mtcars,mpg ~ cyl,B=100) 38 | 39 | # Bootstrap a regression model with piping 40 | mtcars \%>\% slipper_lm(mpg ~ cyl,B=100) 41 | 42 | # Bootstrap residuals for a regression model 43 | mtcars \%>\% slipper_lm(mpg ~ cyl,B=100,boot_resid=TRUE) 44 | 45 | # Bootsrap confidence intervals 46 | mtcars \%>\% slipper_lm(mpg ~ cyl,B=100) \%>\% 47 | filter(type=="bootstrap",term=="cyl") \%>\% 48 | summarize(ci_low = quantile(value,0.025), 49 | ci_high = quantile(value,0.975)) 50 | 51 | # Bootstrap hypothesis test - here I've added one to the numerator 52 | # and denominator because bootstrap p-values should never be zero. 53 | 54 | boot = mtcars \%>\% slipper_lm(mpg ~ cyl, null_formula = mpg ~ 1,B=1000) \%>\% 55 | filter(term=="cyl") \%>\% 56 | summarize(num = sum(abs(value) >= abs(value[1])), 57 | den = n(), 58 | pval = num/den) 59 | } 60 | -------------------------------------------------------------------------------- /man/slipper_lm_.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/slipper_lm.R 3 | \name{slipper_lm_} 4 | \alias{slipper_lm_} 5 | \title{Bootstrap a linear regression model} 6 | \usage{ 7 | slipper_lm_(df, formula, null_formula = NULL, B = 100, boot_resid = FALSE) 8 | } 9 | \arguments{ 10 | \item{df}{A data frame} 11 | 12 | \item{formula}{A an expression for a formula to pass to the lm command} 13 | 14 | \item{null_formula}{(optional) If NULL, standard bootstrapping is performed. If a nested expression for a null formula is passed the bootstrapped statistics come from the null.} 15 | 16 | \item{B}{the number of bootstrap samples to draw} 17 | 18 | \item{boot_resid}{If TRUE then bootstrapping residuals is performed.} 19 | } 20 | \value{ 21 | out A data frame with the values, whether they come from the observed data or the bootstrapped data, and the coefficient name. 22 | } 23 | \description{ 24 | Takes a data frame, and a model to fit to the data 25 | and each bootstrap replicate. Bootstrapping is by 26 | default resampling cases, but if you set boot_resid=TRUE 27 | then resampling residuals will be performed. If you 28 | pass a null model formula that includes a subset 29 | of the variables in the full model (i.e. it is a 30 | nested model) then the bootstrap statistics will 31 | come from the bootstrapped null data and can be 32 | used for a hypothesis test. 33 | } 34 | \examples{ 35 | 36 | 37 | } 38 | -------------------------------------------------------------------------------- /slipper.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /slipper.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jtleek/slipper/bd395641b14c3ad17892504fc09fb76741498ad6/slipper.jpg --------------------------------------------------------------------------------