├── .gitignore ├── README.Rmd ├── README.md ├── code └── smd-plot.R ├── exercises ├── 01-whole-game-exercises.Rmd ├── 02-dags-exercises.Rmd ├── 03-pscores-exercises.Rmd ├── 04-pscores-weighting-exercises.Rmd ├── 05-pscores-diagnostics-exercises.Rmd └── 06-outcome-model-exercises.Rmd ├── slides ├── 00-intro.Rmd ├── 00-intro.html ├── 00-intro.pdf ├── 01-causal_modeling_whole_game.Rmd ├── 01-causal_modeling_whole_game.html ├── 01-causal_modeling_whole_game.pdf ├── 01-causal_modeling_whole_game_cache │ └── html │ │ ├── __packages │ │ ├── boot_cache_011afc01abdd656081906cf473a2e4a7.RData │ │ ├── boot_cache_011afc01abdd656081906cf473a2e4a7.rdb │ │ └── boot_cache_011afc01abdd656081906cf473a2e4a7.rdx ├── 01-causal_modeling_whole_game_files │ └── figure-html │ │ ├── unnamed-chunk-10-1.png │ │ ├── unnamed-chunk-11-1.png │ │ ├── unnamed-chunk-12-1.png │ │ ├── unnamed-chunk-13-1.png │ │ ├── unnamed-chunk-14-1.png │ │ ├── unnamed-chunk-16-1.png │ │ ├── unnamed-chunk-19-1.png │ │ ├── unnamed-chunk-2-1.png │ │ ├── unnamed-chunk-20-1.png │ │ ├── unnamed-chunk-21-1.png │ │ ├── unnamed-chunk-27-1.png │ │ ├── unnamed-chunk-3-1.png │ │ ├── unnamed-chunk-4-1.png │ │ ├── unnamed-chunk-5-1.png │ │ ├── unnamed-chunk-6-1.png │ │ ├── unnamed-chunk-7-1.png │ │ └── unnamed-chunk-8-1.png ├── 02-dags.Rmd ├── 02-dags.html ├── 02-dags.pdf ├── 02-dags_files │ └── figure-html │ │ ├── unnamed-chunk-10-1.png │ │ ├── unnamed-chunk-11-1.png │ │ ├── unnamed-chunk-13-1.png │ │ ├── unnamed-chunk-15-1.png │ │ ├── unnamed-chunk-17-1.png │ │ ├── unnamed-chunk-19-1.png │ │ ├── unnamed-chunk-21-1.png │ │ ├── unnamed-chunk-5-1.png │ │ ├── unnamed-chunk-6-1.png │ │ ├── unnamed-chunk-7-1.png │ │ ├── unnamed-chunk-8-1.png │ │ └── unnamed-chunk-9-1.png ├── 03-pscores.Rmd ├── 03-pscores.html ├── 03-pscores.pdf ├── 03-pscores_files │ └── figure-html │ │ ├── unnamed-chunk-5-1.png │ │ └── unnamed-chunk-6-1.png ├── 04-pscore-weighting.Rmd ├── 04-pscore-weighting.html ├── 04-pscore-weighting.pdf ├── 04-pscore-weighting_files │ └── figure-html │ │ ├── unnamed-chunk-2-1.png │ │ ├── unnamed-chunk-3-1.png │ │ ├── unnamed-chunk-4-1.png │ │ ├── unnamed-chunk-5-1.png │ │ ├── unnamed-chunk-6-1.png │ │ ├── unnamed-chunk-7-1.png │ │ └── unnamed-chunk-8-1.png ├── 04-using-pscores_files │ └── figure-html │ │ ├── unnamed-chunk-2-1.png │ │ ├── unnamed-chunk-3-1.png │ │ ├── unnamed-chunk-4-1.png │ │ ├── unnamed-chunk-5-1.png │ │ ├── unnamed-chunk-6-1.png │ │ └── unnamed-chunk-7-1.png ├── 05-pscore-diagnostics.Rmd ├── 05-pscore-diagnostics.html ├── 05-pscore-diagnostics.pdf ├── 05-pscore-diagnostics_files │ └── figure-html │ │ ├── unnamed-chunk-13-1.png │ │ ├── unnamed-chunk-14-1.png │ │ ├── unnamed-chunk-14-2.png │ │ ├── unnamed-chunk-14-3.png │ │ ├── unnamed-chunk-14-4.png │ │ ├── unnamed-chunk-15-1.png │ │ ├── unnamed-chunk-15-2.png │ │ ├── unnamed-chunk-17-1.png │ │ ├── unnamed-chunk-18-1.png │ │ ├── unnamed-chunk-18-2.png │ │ ├── unnamed-chunk-19-1.png │ │ ├── unnamed-chunk-19-2.png │ │ ├── unnamed-chunk-20-1.png │ │ ├── unnamed-chunk-21-1.png │ │ ├── unnamed-chunk-23-1.png │ │ ├── unnamed-chunk-32-1.png │ │ └── unnamed-chunk-8-1.png ├── 06-outcome-model.Rmd ├── 06-outcome-model.html ├── 06-outcome-model.pdf ├── img │ ├── conf-2.png │ ├── conf-3.png │ ├── ggdagitty.png │ ├── ggdagitty_alg.png │ ├── ggdagitty_plots.png │ ├── ldm.jpg │ ├── mb.jpg │ ├── obs-studies-2.png │ ├── obs-studies-3.png │ ├── obs-studies.png │ ├── pscores.png │ ├── randomized-2.png │ ├── randomized.png │ ├── tidy_ggdagitty.png │ ├── trt-conf.png │ └── trt.png ├── index.Rmd ├── index.html ├── libs │ ├── countdown-0.3.5 │ │ ├── countdown.css │ │ ├── countdown.js │ │ └── smb_stage_clear.mp3 │ ├── countdown │ │ ├── countdown.css │ │ ├── countdown.js │ │ └── smb_stage_clear.mp3 │ ├── header-attrs-2.3 │ │ └── header-attrs.js │ ├── header-attrs │ │ └── header-attrs.js │ ├── remark-css-0.0.1 │ │ └── default.css │ └── remark-css │ │ └── default.css └── theme.css └── user2020-causal-inference.Rproj /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | .DS_Store 6 | render_pdf.R 7 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>" 11 | ) 12 | ``` 13 | 14 | ## user2020! Causal Inference in R Workshop 15 | 16 | ### Slides 17 | * [00 Intro](https://user2020.lucymcgowan.com/00-intro.html) 18 | * [01 Whole Game](https://user2020.lucymcgowan.com/01-causal_modeling_whole_game.html) 19 | * [02 Causal Diagrams](https://user2020.lucymcgowan.com/02-dags.html) 20 | * [03 Introduction to Propensity Scores](https://user2020.lucymcgowan.com/03-pscores.html) 21 | * [04 Using Propensity Scores](https://user2020.lucymcgowan.com/04-pscore-weighting.html) 22 | * [05 Checking Propensity Scores](https://user2020.lucymcgowan.com/05-pscore-diagnostics.html) 23 | * [06 Fitting the outcome model](https://user2020.lucymcgowan.com/06-outcome-model.html) 24 | 25 | ### Installing materials locally 26 | 27 | We will be using RStudio Cloud for the workshop, but if you would like to install the required packages and course materials, we have an R package called {[useRcausal2020](https://github.com/malcolmbarrett/useRcausal2020)} to help you do that! You can install {[useRcausal2020](https://github.com/malcolmbarrett/useRcausal2020)} from GitHub with: 28 | 29 | ``` r 30 | install.packages("remotes") 31 | remotes::install_github("malcolmbarrett/useRcausal2020") 32 | ``` 33 | 34 | Once you've installed the package, install the workshop with 35 | 36 | ``` r 37 | useRcausal2020::install_workshop("path/to/your/computer") 38 | ``` 39 | 40 | Replace "path/to/your/computer" with where on your computer you want the workshop installed. 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ## user2020\! Causal Inference in R Workshop 5 | 6 | ### Slides 7 | 8 | - [00 Intro](https://user2020.lucymcgowan.com/00-intro.html) 9 | - [01 Whole 10 | Game](https://user2020.lucymcgowan.com/01-causal_modeling_whole_game.html) 11 | - [02 Causal Diagrams](https://user2020.lucymcgowan.com/02-dags.html) 12 | - [03 Introduction to Propensity 13 | Scores](https://user2020.lucymcgowan.com/03-pscores.html) 14 | - [04 Using Propensity 15 | Scores](https://user2020.lucymcgowan.com/04-pscore-weighting.html) 16 | - [05 Checking Propensity 17 | Scores](https://user2020.lucymcgowan.com/05-pscore-diagnostics.html) 18 | - [06 Fitting the outcome 19 | model](https://user2020.lucymcgowan.com/06-outcome-model.html) 20 | 21 | ### Installing materials locally 22 | 23 | We will be using RStudio Cloud for the workshop, but if you would like 24 | to install the required packages and course materials, we have an R 25 | package called 26 | {[useRcausal2020](https://github.com/malcolmbarrett/useRcausal2020)} to 27 | help you do that\! You can install 28 | {[useRcausal2020](https://github.com/malcolmbarrett/useRcausal2020)} 29 | from GitHub with: 30 | 31 | ``` r 32 | install.packages("remotes") 33 | remotes::install_github("malcolmbarrett/useRcausal2020") 34 | ``` 35 | 36 | Once you’ve installed the package, install the workshop with 37 | 38 | ``` r 39 | useRcausal2020::install_workshop("path/to/your/computer") 40 | ``` 41 | 42 | Replace “path/to/your/computer” with where on your computer you want the 43 | workshop installed. 44 | -------------------------------------------------------------------------------- /code/smd-plot.R: -------------------------------------------------------------------------------- 1 | library(survey) 2 | library(tableone) 3 | library(tidyverse) 4 | library(broom) 5 | # remotes::install_github("malcolmbarrett/cidata") 6 | library(cidata) 7 | 8 | propensity_model <- glm( 9 | qsmk ~ sex + 10 | race + age + I(age^2) + education + 11 | smokeintensity + I(smokeintensity^2) + 12 | smokeyrs + I(smokeyrs^2) + exercise + active + 13 | wt71 + I(wt71^2), 14 | family = binomial(), 15 | data = nhefs_complete 16 | ) 17 | 18 | df <- propensity_model %>% 19 | augment(type.predict = "response", data = nhefs_complete) %>% 20 | mutate(wts = 1 / ifelse(qsmk == 0, 1 - .fitted, .fitted)) 21 | 22 | svy_des <- svydesign( 23 | ids = ~ 1, 24 | data = df, 25 | weights = ~ wts) 26 | 27 | smd_table_unweighted <- CreateTableOne( 28 | vars = c("sex", "race", "age", "education", "smokeintensity", "smokeyrs", 29 | "exercise", "active", "wt71"), 30 | strata = "qsmk", 31 | data = df, 32 | test = FALSE) 33 | 34 | smd_table <- svyCreateTableOne( 35 | vars = c("sex", "race", "age", "education", "smokeintensity", "smokeyrs", 36 | "exercise", "active", "wt71"), 37 | strata = "qsmk", 38 | data = svy_des, 39 | test = FALSE) 40 | 41 | 42 | plot_df <- data.frame( 43 | var = rownames(ExtractSmd(smd_table)), 44 | Unadjusted = as.numeric(ExtractSmd(smd_table_unweighted)), 45 | Weighted = as.numeric(ExtractSmd(smd_table))) %>% 46 | pivot_longer(-var, names_to = "Method", values_to = "SMD") 47 | 48 | ggplot( 49 | data = plot_df, 50 | mapping = aes(x = var, y = SMD, group = Method, color = Method) 51 | ) + 52 | geom_line() + 53 | geom_point() + 54 | geom_hline(yintercept = 0.1, color = "black", size = 0.1) + 55 | coord_flip() 56 | -------------------------------------------------------------------------------- /exercises/01-whole-game-exercises.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Causal Modeling in R: Whole Game" 3 | output: html_document 4 | --- 5 | 6 | ```{r setup} 7 | library(tidyverse) 8 | library(broom) 9 | library(rsample) 10 | library(ggdag) 11 | # remotes::install_github("malcolmbarrett/cidata") 12 | library(cidata) 13 | library(survey) 14 | library(tableone) 15 | ``` 16 | 17 | ## Causal Modeling: Whole Game 18 | 19 | In this guided exercise, we'll attempt to answer a causal question: does quitting smoking make you gain weight? Causal modeling has a special place in the history of smoking research: the studies that demonstrated that smoking causes lung cancer were observational. Thanks to other studies, we also know that, if you're already a smoker, quitting smoking reduces your risk of lung cancer. However, some have observed that former smokers tend to gain weight. Is this the result of quitting smoking, or does something else explain this effect? In the book Causal Inference by Hernán and Robins, the authors analyze this question using several causal inference techniques. 20 | 21 | To answer this question, we'll use causal inference methods to examine the relationship between quitting smoking and gaining weight. First, we'll draw our assumptions with a causal diagram (a directed acyclic graph, or DAG), which will guide our model. Then, we'll use a modeling approach called inverse probability weighting--one of many causal modeling techniques--to estimate the causal effect we're interested in. 22 | 23 | We'll use data from NHEFS to try to estimate the causal effect of quitting smoking on weight game. NHEFS is a longitudinal, observational study that has many of the variables we'll need. Take a look at `nhefs_codebook` if you want to know more about the variables in this data set. These data are included in the {cidata} package. We'll use the `nhefs_complete` data set, but we'll remove people who were lost to follow-up. 24 | 25 | ```{r} 26 | nhefs_complete_uc <- nhefs_complete %>% 27 | filter(censored == 0) 28 | nhefs_complete_uc 29 | ``` 30 | 31 | Let's look at the distribution of weight gain between the two groups. 32 | 33 | ```{r} 34 | colors <- c("#E69F00", "#56B4E9") 35 | 36 | nhefs_complete_uc %>% 37 | ggplot(aes(wt82_71, fill = factor(qsmk))) + 38 | geom_vline(xintercept = 0, color = "grey60", size = 1) + 39 | geom_density(color = "white", alpha = .75, size = .5) + 40 | scale_fill_manual(values = colors) + 41 | theme_minimal() + 42 | theme(legend.position = "bottom") + 43 | labs( 44 | x = "change in weight (lbs)", 45 | fill = "quit smoking (1 = yes)" 46 | ) 47 | ``` 48 | 49 | There's a difference--former smokers do seemed to have gained a bit more weight--but there's also a lot of variation. Let's look at the numeric summaries. 50 | 51 | ```{r} 52 | # ~2.5 lbs gained for quit vs. not quit 53 | nhefs_complete_uc %>% 54 | group_by(qsmk) %>% 55 | summarize( 56 | mean_weight_change = mean(wt82_71), 57 | sd = sd(wt82_71), 58 | .groups = "drop" 59 | ) 60 | ``` 61 | 62 | --- 63 | 64 | ```{r} 65 | # ~2.5 lbs gained for quit vs. not quit 66 | nhefs_complete_uc %>% 67 | group_by(qsmk) %>% 68 | summarize( 69 | mean_weight_change = mean(wt82_71), 70 | sd = sd(wt82_71), 71 | .groups = "drop" 72 | ) 73 | ``` 74 | 75 | Here, it looks like those who quit smoking gained, on average, 2.5 lbs. But is there something else that could explain these results? There are many factors associated with both quitting smoking and gaining weight; could one of those factors explain away the results we're seeing here? 76 | 77 | To truly answer this question, we need to specify a causal diagram based on domain knowledge. Sadly, for most circumstances, there is no data-driven approach that consistently identify confounders. Only our causal assumptions can help us identify them. Causal diagrams are a visual expression of those assumptions linked to rigorous mathematics that allow us to understand what we need to account for in our model. 78 | 79 | In R, we can visualize and analyze our DAGs with the {ggdag} package. {ggdag} uses {ggplot2} and {ggraph} to visualize diagrams and {dagitty} to analyze them. Let's set up our assumptions. The `dagify()` function takes formulas, much like `lm()` and friends, to express assumptions. We have two basic causal structures: the causes of quitting smoking and the causes of gaining weight. Here, we're assuming that the set of variables here affect both. Additionally, we're adding `qsmk` as a cause of `wt82_71`, which is our causal question; we also identify these as our outcome and exposure. Finally, we'll add some labels so the diagram is easier to understand. The result is a `dagitty` object, and we can transform it to a `tidy_dagitty` data set with `tidy_dagitty()`. 80 | 81 | ```{r} 82 | library(ggdag) 83 | # set up DAG 84 | smk_wt_dag <- dagify( 85 | # specify causes of quitting smoking and weight gain: 86 | qsmk ~ sex + race + age + education + 87 | smokeintensity + smokeyrs + exercise + active + wt71, 88 | wt82_71 ~ qsmk + sex + race + age + education + 89 | smokeintensity + smokeyrs + exercise + active + wt71, 90 | # specify causal question: 91 | exposure = "qsmk", 92 | outcome = "wt82_71", 93 | # set up labels: 94 | # here, I'll use the same variable names as the data set, but I'll label them 95 | # with clearer names 96 | labels = c( 97 | # causal question 98 | "qsmk" = "quit\nsmoking", 99 | "wt82_71" = "change in\nweight", 100 | 101 | # demographics 102 | "age" = "age", 103 | "sex" = "sex", 104 | "race" = "race", 105 | "education" = "education", 106 | 107 | # health 108 | "wt71" = "baseline\nweight", 109 | "active" = "daily\nactivity\nlevel", 110 | "exercise" = "exercise", 111 | 112 | # smoking history 113 | "smokeintensity" = "smoking\nintensity", 114 | "smokeyrs" = "yrs of\nsmoking" 115 | ) 116 | ) 117 | 118 | tidy_dagitty(smk_wt_dag) 119 | ``` 120 | 121 | Let's visualize our assumptions with `ggdag()`. 122 | 123 | ```{r} 124 | smk_wt_dag %>% 125 | ggdag(text = FALSE, use_labels = "label") 126 | ``` 127 | 128 | What do we need to control for to estimate an unbiased effect of quitting smoking on weight gain? In many DAGs, there will be many sets of variables--called adjustment sets--that will give us the right effect (assuming our DAG is correct--a big, unverifiable assumption!). `ggdag_adjustment_set()` can help you visualize them. Here, there's only one adjustment set: we need to control for everything! While we're add it, since a {ggdag} plot is just a {ggplot2} plot, let's clean it up a bit, too. 129 | 130 | ```{r} 131 | smk_wt_dag %>% 132 | ggdag_adjustment_set(text = FALSE, use_labels = "label") + 133 | theme_dag() + 134 | scale_color_manual(values = colors) + 135 | scale_fill_manual(values = colors) 136 | ``` 137 | 138 | Let's fit a model with these variables. Note that we'll fit all continuous variables with squared terms, as well, to allow them a bit of flexibility. 139 | 140 | ```{r} 141 | lm( 142 | wt82_71~ qsmk + sex + 143 | race + age + I(age^2) + education + 144 | smokeintensity + I(smokeintensity^2) + 145 | smokeyrs + I(smokeyrs^2) + exercise + active + 146 | wt71 + I(wt71^2), 147 | data = nhefs_complete_uc 148 | ) %>% 149 | tidy(conf.int = TRUE) %>% 150 | filter(term == "qsmk") 151 | ``` 152 | 153 | When we adjust for the variables in our DAG, we get an estimate of about 3.5 lbs--people who quit smoking gained about this amount of weight. However, we are trying to answer a specific causal question: how much weight would a person gain if the quit smoking vs. if the same person did not quit smoking? Let's use an inverse probability weighting model to try to estimate that effect at the population level (what if *everyone* quit smoking vs what if *no one* quit smoking). 154 | 155 | For a simple IPW model, we have two modeling steps. First, we fit a propensity score model, which predicts the probability that you received a treatment or exposure (here, that a participant quit smoking). We use this model to calculate inverse probability weights--1 / your probability of treatment. Then, in the second step, we use this weights in the outcome model, which estimates the effect of exposure on the outcome (here, the effect of quitting smoking on gaining weight). 156 | 157 | For the propensity score model, we'll use logistic regression (since quitting smoking is a binary variable). The outcome is quitting smoking, and the variables in the model are all those included in our adjustment set. Then, we'll use `augment()` from {broom} (which calls `predict()` on the inside) to calculate our weights and save it back into our data set. 158 | 159 | 160 | ```{r} 161 | propensity_model <- glm( 162 | qsmk ~ sex + 163 | race + age + I(age^2) + education + 164 | smokeintensity + I(smokeintensity^2) + 165 | smokeyrs + I(smokeyrs^2) + exercise + active + 166 | wt71 + I(wt71^2), 167 | family = binomial(), 168 | data = nhefs_complete_uc 169 | ) 170 | 171 | nhefs_complete_uc <- propensity_model %>% 172 | # predict whether quit smoking 173 | augment(type.predict = "response", data = nhefs_complete_uc) %>% 174 | # calculate inverse probability 175 | mutate(wts = 1 / ifelse(qsmk == 0, 1 - .fitted, .fitted)) 176 | 177 | nhefs_complete_uc %>% 178 | select(qsmk, .fitted, wts) 179 | ``` 180 | 181 | Let's look at the distribution of the weights. 182 | 183 | ```{r} 184 | ggplot(nhefs_complete_uc, aes(wts)) + 185 | geom_density(col = "#E69F00", fill = "#E69F0095", size = .8) + 186 | # use a log scale for the x axis 187 | scale_x_log10() + 188 | theme_minimal(base_size = 20) + 189 | xlab("Weights") 190 | ``` 191 | 192 | It looks a little skewed, particularly that there are some participants with much higher weights. There are a few techniques for dealing with this--trimming weights and stabilizing weights--but we'll keep it simple for now and just use them as is. 193 | 194 | The main goal here is to *break* the non-causal associations between quitting smoking and gaining weight--the other paths that might distort our results. In other words, if we succeed, there should be no differences in these variables between our two groups, those who quit smoking and those who didn't. This is where randomized trials shine; you can often assume that there is no baseline differences among potential confounders between your treatment groups (of course, no study is perfect, and there's a whole set of literature on dealing with this problem in randomized trials). 195 | 196 | Standardized mean differences (SMD) are a simple measurement of differences that work across variable types. In general, the closer to 0 we are, the better job we have done eliminating the non-causal relationships we drew in our DAG. Note that low SMDs for everything we adjust for does *not* mean that there is not something else that might confound our study. Unmeasured confounders or misspecified DAGs can still distort our effects, even if our SMDs look great! 197 | 198 | We'll use the {survey} and {tableone} package to calculate the SMDs, then visualize them. 199 | 200 | ```{r} 201 | svy_des <- svydesign( 202 | ids = ~ 1, 203 | data = nhefs_complete_uc, 204 | weights = ~ wts) 205 | 206 | smd_table_unweighted <- CreateTableOne( 207 | vars = c("sex", "race", "age", "education", "smokeintensity", "smokeyrs", 208 | "exercise", "active", "wt71"), 209 | strata = "qsmk", 210 | data = nhefs_complete_uc, 211 | test = FALSE) 212 | 213 | smd_table <- svyCreateTableOne( 214 | vars = c("sex", "race", "age", "education", "smokeintensity", "smokeyrs", 215 | "exercise", "active", "wt71"), 216 | strata = "qsmk", 217 | data = svy_des, 218 | test = FALSE) 219 | 220 | 221 | plot_df <- data.frame( 222 | var = rownames(ExtractSmd(smd_table)), 223 | Unadjusted = as.numeric(ExtractSmd(smd_table_unweighted)), 224 | Weighted = as.numeric(ExtractSmd(smd_table))) %>% 225 | pivot_longer(-var, names_to = "Method", values_to = "SMD") 226 | 227 | ggplot( 228 | data = plot_df, 229 | mapping = aes(x = var, y = SMD, group = Method, color = Method) 230 | ) + 231 | geom_line() + 232 | geom_point() + 233 | geom_hline(yintercept = 0.1, color = "black", size = 0.1) + 234 | coord_flip() + 235 | theme_minimal() 236 | 237 | ``` 238 | 239 | These look pretty good! Some variables are better than others, but weighting appears to have done a much better job eliminating these differences than an unadjusted analysis. 240 | 241 | We could do more here to analyze our assumptions, but let's move on to our second step: fitting the outcome model weighted by our inverse probabilities. Some researchers call these Marginal Structural Models, in part because the model is marginal; we only need to include our outcome (`wt82_71`) and exposure (`qsmk`). The other variables aren't in the model; they are accounted for with the IPWs! 242 | 243 | ```{r} 244 | ipw_model <- lm( 245 | wt82_71 ~ qsmk, 246 | data = nhefs_complete_uc, 247 | weights = wts # inverse probability weights 248 | ) 249 | 250 | ipw_estimate <- ipw_model %>% 251 | tidy(conf.int = TRUE) %>% 252 | filter(term == "qsmk") 253 | 254 | ipw_estimate 255 | ``` 256 | 257 | This estimate is pretty similar to what we saw before, if a little smaller. In fact, for simple causal questions, this is often the case: adjusting for confounders directly in your regression model sometimes estimates the same effect as IPWs and other causal techniques. Causal techniques are special, though, in that the use counterfactual modeling, which allows you to deal with many circumstances, such as when you have selection bias or time-dependendent confounding. 258 | 259 | But we have other problem that we need to address. While we're just using `lm()` to estimate our IPW model, it doesn't properly account for the weights. That means our standard error is too small, which will artificially narrow confidence intervals and artificially shrink p-values. There are many ways to address this, including robust estimators, but we'll use a simple approach using the bootstrap via the {rsamples} package. 260 | 261 | First, we need to wrap our model in a function so we can call it many times on our bootstrapped data. A function like this might be your instinct; however, it's not quite right. 262 | 263 | ```{r} 264 | # fit ipw model for a single bootstrap sample 265 | fit_ipw_not_quite_rightly <- function(split, ...) { 266 | # get bootstrapped data sample with `rsample::analysis()` 267 | .df <- analysis(split) 268 | 269 | # fit ipw model 270 | lm(wt82_71 ~ qsmk, data = .df, weights = wts) %>% 271 | tidy() 272 | } 273 | ``` 274 | 275 | The problem is that we need to account for the *entire* modeling process, so we need to include the first step of our analysis -- fitting the inverse probability weights. 276 | 277 | ```{r} 278 | fit_ipw <- function(split, ...) { 279 | .df <- analysis(split) 280 | 281 | # fit propensity score model 282 | propensity_model <- glm( 283 | qsmk ~ sex + 284 | race + age + I(age^2) + education + 285 | smokeintensity + I(smokeintensity^2) + 286 | smokeyrs + I(smokeyrs^2) + exercise + active + 287 | wt71 + I(wt71^2), 288 | family = binomial(), 289 | data = .df 290 | ) 291 | 292 | # calculate inverse probability weights 293 | .df <- propensity_model %>% 294 | augment(type.predict = "response", data = .df) %>% 295 | mutate(wts = 1 / ifelse(qsmk == 0, 1 - .fitted, .fitted)) 296 | 297 | # fit correctly bootsrapped ipw model 298 | lm(wt82_71 ~ qsmk, data = .df, weights = wts) %>% 299 | tidy() 300 | } 301 | ``` 302 | 303 | {rsample} makes the rest easy for us: `bootstraps()` resamples our data 1000 times, then we can use `purrr::map()` to apply our function to each resampled set (`splits`). {rsample}'s `int_*()` functions help us get confidence intervals for our estimate. 304 | 305 | ```{r} 306 | # fit ipw model to bootstrapped samples 307 | ipw_results <- bootstraps(nhefs_complete, 1000, apparent = TRUE) %>% 308 | mutate(results = map(splits, fit_ipw)) 309 | 310 | # get t-statistic-based CIs 311 | boot_estimate <- int_t(ipw_results, results) %>% 312 | filter(term == "qsmk") 313 | 314 | boot_estimate 315 | ``` 316 | 317 | Let's compare to our naive weighted model that just used a single estimate from `lm()` 318 | 319 | ```{r} 320 | bind_rows( 321 | ipw_estimate %>% 322 | select(estimate, conf.low, conf.high) %>% 323 | mutate(type = "ols"), 324 | boot_estimate %>% 325 | select(estimate = .estimate, conf.low = .lower, conf.high = .upper) %>% 326 | mutate(type = "bootstrap") 327 | ) %>% 328 | # calculate CI width to sort by it 329 | mutate(width = conf.high - conf.low) %>% 330 | arrange(width) %>% 331 | # fix the order of the model types for the plot 332 | mutate(type = fct_inorder(type)) %>% 333 | ggplot(aes(x = type, y = estimate, ymin = conf.low, ymax = conf.high)) + 334 | geom_pointrange(color = "#0172B1", size = 1, fatten = 3) + 335 | coord_flip() + 336 | theme_minimal(base_size = 20) 337 | ``` 338 | 339 | Our bootstrapped confidence intervals are wider, which is expected; remember that they were artificially narrow in the naive OLS model! 340 | 341 | So, we have a final estimate for our causal effect: on average, a person who quits smoking will gain 3.5 lbs (95% CI 2.4 lbs, 4.4 lbs) versus if they had not quit smoking. What do you think? Is this estimate reliable? Did we do a good job addressing the assumptions we need to make for a causal effect, particularly that there is no confounding? How might you criticize this model, and what would you do differently? 342 | 343 | *** 344 | 345 | # Take aways 346 | * The broad strokes for a causal analysis are: 1) identify your causal question 2) make your assumptions clear 3) check your assumptions as best you can and 4) use the right estimator for the question you're trying to ask. As scientists, we should be able to critique each of these steps, and that's a good thing! 347 | * To create marginal structural models, first fit a propensity model for the weights with the exposure as the outcome. Then, use the inverse of the predicted probabilities as weights in a model with just the outcome and exposure. 348 | * See more at https://causalinferencebookr.netlify.com 349 | -------------------------------------------------------------------------------- /exercises/02-dags-exercises.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Causal Diagrams in R" 3 | output: html_document 4 | --- 5 | 6 | ```{r setup} 7 | library(tidyverse) 8 | library(ggdag) 9 | library(dagitty) 10 | ``` 11 | 12 | ## Your Turn 1 13 | 14 | Descriptively, drinking coffee is associated with lung cancer. Does drinking coffee cause lung cancer? 15 | 16 | 1. Specify a DAG with `dagify()`. Write your assumption that `smoking` causes `cancer` as a formula. 17 | 2. We're going to assume that coffee does *not* cause cancer, so there's no formula for that. But we still need to declare our causal question. Specify "coffee" as the exposure and "cancer" as the outcome (both in quotations marks). 18 | 3. Plot the DAG using `ggdag()` 19 | 20 | Stretch goal: Underneath the hood, `ggdag()` calls `tidy_dagitty()` on `dagitty` objects. Check the help page for `?tidy_dagitty()` and try using one of the layouts listed in `?layout_tbl_graph_igraph()`. Experiment with different layouts. Plot with `ggdag()` 21 | 22 | Stretch goal: We are assuming that coffee does *not* cause lung cancer. In causal diagram terminology, we want to know if these two factors are *D-connected* (as in, directionally connected). Since we are assuming that there's no causal relationship, any D-connection is caused by other factors. Try `ggdag_dconnected()` to see if we coffee and lung cancer are D-connected. 23 | 24 | ```{r} 25 | coffee_cancer_dag <- ______( 26 | ______ ~ ______, 27 | smoking ~ addictive, 28 | coffee ~ addictive, 29 | exposure = "______", 30 | outcome = "______", 31 | labels = c( 32 | "coffee" = "Coffee", 33 | "cancer" = "Lung Cancer", 34 | "smoking" = "Smoking", 35 | "addictive" = "Addictive \nBehavior" 36 | ) 37 | ) 38 | 39 | ______(coffee_cancer_dag) 40 | ``` 41 | 42 | ## Your Turn 2 43 | 44 | Most {ggdag} quick plotting functions are actually wrappers for functions that let you work with the DAG data directly. 45 | 46 | 1. Call `tidy_dagitty()` on `coffee_cancer_dag` to create a tidy DAG, then pass the results to `dag_paths()`. What's different about these data? 47 | 2. Plot the open paths with `ggdag_paths()`. (Just give it `coffee_cancer_dag` rather than using `dag_paths()`; the quick plot function will do that for you.) Remember, since we assume there is *no* causal path from coffee to lung cancer, any open paths must be confounding pathways. 48 | 49 | Stretch goal: ggdags are just ggplots! You can add themes, geoms, and other {ggplot2} elements with `+` like a normal ggplot. Try adding a theme ({ggdag} has several, or you could try `theme_void()`). 50 | 51 | Stretch goal: The variable names are a little hard to read. We specified some labels earlier, so let's use them. In the tidy DAG, these are called `label`. In `ggdag_paths()`, add the argument `use_labels = "label"` and remove the node text with `text = FALSE`. 52 | 53 | ```{r} 54 | coffee_cancer_dag %>% 55 | ______() %>% 56 | ______() 57 | 58 | ______ 59 | ``` 60 | 61 | 62 | ## Your Turn 3 63 | 64 | Now that we know the open, confounding pathways (sometimes called "backdoor paths"), we need to know how to close them! First, we'll ask {ggdag} for adjustment sets, then we would need to do something in our analysis to account for at least one adjustment set (e.g. multivariable regression, weighting, or matching for the adjustment sets). 65 | 66 | 1. Use `ggdag_adjustment_set()` to visualize the adjustment sets. Add the arguments `use_labels = "label"` and `text = FALSE`. 67 | 2. Write an R formula for each adjustment set, as you might if you were fitting a model in `lm()` or `glm()` 68 | 69 | Stretch goal: Sometimes, we know a variable plays a vital role in a causal diagram but we can't measure it, or we simply don't have it in our data set. You can tell {ggdag} that a variable is unmeasured with the `latent` argument in `dagify()`. Re-run the `dagify()` call above, but set `latent = "addictive"` (meaning we can't or haven't measured this variable). Plot it with `ggdag_adjustment_set()`. What's different? Now, try setting `latent = c("addictive", "smoking")` and plotting the adjustment set. What do the results mean? 70 | 71 | ```{r} 72 | ______(______) 73 | 74 | cancer ~ ______ 75 | cancer ~ ______ 76 | ``` 77 | 78 | # Take aways 79 | 80 | * Draw your assumptions with DAGs! Use `dagify()` to specify them and `ggdag()` and friends to draw them. 81 | * The main goal for many analyses is to close backdoor (non-causal) paths. {ggdag} and {dagitty} can help you identify them. 82 | * Adjustment sets are key for closing backdoor paths. Take a reasonable set and use it in your model to get a causal effect estimate. 83 | -------------------------------------------------------------------------------- /exercises/03-pscores-exercises.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Propensity Scores" 3 | output: html_document 4 | --- 5 | 6 | 7 | ```{r} 8 | library(tidyverse) 9 | library(broom) 10 | library(cidata) 11 | library(ggdag) 12 | ``` 13 | 14 | Using the National Health and Nutrition Examination Survey Data (`nhefs_complete`), we are interested in the relationship between the **exposure**, `qsmk`: whether the participant quit smoking, and the **outcome**, `wt82_71`: their weight change in kilograms. 15 | 16 | Below is a proposed DAG of the relationship between 4 confounders: `sex`, `age`, `smokeyrs` and `wt71` and the exposure and outcome. 17 | 18 | _Knit this document to see the DAG or refer to the slides_. 19 | 20 | ```{r} 21 | set.seed(1234) 22 | # set up DAG 23 | smk_wt_dag <- dagify( 24 | # specify causes of quitting smoking and weight gain: 25 | qsmk ~ sex + age + smokeyrs + wt71, 26 | wt82_71 ~ sex + age + smokeyrs + wt71, 27 | # specify causal question: 28 | exposure = "qsmk", 29 | outcome = "wt82_71", 30 | # set up labels: 31 | # here, I'll use the same variable names as the data set, but I'll label them 32 | # with clearer names 33 | labels = c( 34 | # causal question 35 | "qsmk" = "quit\nsmoking (qsmk)", 36 | "wt82_71" = "change in\nweight", 37 | 38 | # demographics 39 | "age" = "age", 40 | "sex" = "sex", 41 | 42 | # health 43 | "wt71" = "baseline\nweight (wt71)", 44 | 45 | # smoking history 46 | "smokeyrs" = "yrs of\nsmoking (smokeyrs)" 47 | ) 48 | ) %>% 49 | tidy_dagitty() 50 | 51 | smk_wt_dag %>% 52 | ggdag(text = FALSE, use_labels = "label") 53 | ``` 54 | 55 | ## Your Turn 56 | 57 | _After updating the code chunks below, change `eval = TRUE` before knitting._ 58 | 59 | Fit a propensity score model for `qsmk` using the above proposed confounders. 60 | 61 | ```{r, eval = FALSE} 62 | propensity_model <- ___( 63 | ___ ~ ___, 64 | data = nhefs_complete, 65 | family = _____ 66 | ) 67 | ``` 68 | 69 | Add the propensity scores to the `nhefs_complete` data set, call this new dataset `df`. 70 | 71 | ```{r, eval = FALSE} 72 | df <- propensity_model %>% 73 | ____(type.predict = ____, data = ____) 74 | ``` 75 | 76 | 77 | Stretch Goal 1: 78 | 79 | Examine two histograms of the propensity scores, one for those that quit smoking (`qsmk == 1`) and one for those that did not (`qsmk == 0`). How do these compare? 80 | 81 | ```{r} 82 | 83 | ``` 84 | 85 | -------------------------------------------------------------------------------- /exercises/04-pscores-weighting-exercises.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Propensity Score Weighting" 3 | output: html_document 4 | --- 5 | 6 | 7 | ```{r} 8 | library(tidyverse) 9 | library(broom) 10 | library(cidata) 11 | ``` 12 | 13 | Using the National Health and Nutrition Examination Survey Data (`nhefs_complete`), we are interested in the relationship between the **exposure**, `qsmk`: whether the participant quit smoking, and the **outcome**, `wt82_71`: their weight change in kilograms. 14 | 15 | Below is the propensity score model you created in the previous exercise. 16 | 17 | ```{r, eval = FALSE} 18 | propensity_model <- glm( 19 | qsmk ~ age + sex + wt71 + smokeyrs, 20 | data = nhefs_complete, 21 | family = binomial() 22 | ) 23 | 24 | df <- propensity_model %>% 25 | augment(type.predict = "response", data = nhefs_complete) 26 | ``` 27 | 28 | ## Your Turn 29 | 30 | _After updating the code chunks below, change `eval = TRUE` before knitting._ 31 | 32 | Add the ATE weights to the data frame, `df` 33 | 34 | ```{r, eval = FALSE} 35 | df <- df %>% 36 | mutate(w_ate = ___) 37 | ``` 38 | 39 | 40 | Stretch Goal 1: 41 | 42 | Add ATT weights to the data frame, `df` 43 | 44 | ```{r, eval = FALSE} 45 | df <- df %>% 46 | mutate(w_att = ___) 47 | ``` 48 | 49 | Stretch Goal 2: 50 | 51 | Update the code below to examine the distribution of the weighted sample. **HINT** the part that needs to be updated is the `weight` parameter in two of the `geom_histogram()` calls. 52 | 53 | ```{r, eval = FALSE} 54 | d <- df %>% 55 | tidyr::spread(qsmk, .fitted, sep = "_p") 56 | ``` 57 | 58 | 59 | ```{r, eval = FALSE} 60 | ggplot(d) + 61 | geom_histogram(bins = 50, aes(qsmk_p1), alpha = 0.5) + 62 | geom_histogram(bins = 50, aes(qsmk_p1, weight = ____), fill = "green", alpha = 0.5) + 63 | geom_histogram(bins = 50, alpha = 0.5, aes(x = qsmk_p0, y = -..count..)) + 64 | geom_histogram(bins = 50, aes(x = qsmk_p0, weight = ____, y = -..count..), fill = "blue", alpha = 0.5) + 65 | ylab("count") + xlab("p") + 66 | geom_hline(yintercept = 0, lwd = 0.5) + 67 | scale_y_continuous(label = abs) + 68 | theme_minimal() + 69 | geom_rect(aes(xmin = 0.95, xmax = 1, ymin = 5, ymax = 100), fill = "#5DB854") + 70 | geom_text(aes(x = 0.975, y = 50), label = "trt", angle = 270, color = "white") + 71 | geom_rect(aes(xmin = 0.95, xmax = 1, ymin = -100, ymax = -5), fill = "#5154B8") + 72 | geom_text(aes(x = 0.975, y = -50), label = "control", angle = 270, color = "white") 73 | -------------------------------------------------------------------------------- /exercises/05-pscores-diagnostics-exercises.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Propensity Score Diagnostics" 3 | output: html_document 4 | --- 5 | 6 | 7 | ```{r} 8 | library(tidyverse) 9 | library(survey) 10 | library(tableone) 11 | library(broom) 12 | library(cidata) 13 | ``` 14 | 15 | Using the National Health and Nutrition Examination Survey Data (`nhefs_complete`), we are interested in the relationship between the **exposure**, `qsmk`: whether the participant quit smoking, and the **outcome**, `wt82_71`: their weight change in kilograms. 16 | 17 | Below is the propensity score model and weights you created in the previous exercise. 18 | 19 | ```{r, eval = FALSE} 20 | propensity_model <- glm( 21 | qsmk ~ age + sex + wt71 + smokeyrs, 22 | data = nhefs_complete, 23 | family = binomial() 24 | ) 25 | 26 | df <- propensity_model %>% 27 | augment(type.predict = "response", data = nhefs_complete) %>% 28 | mutate(w_ate = 1 / ifelse(qsmk == 0, 1 - .fitted, .fitted)) 29 | ``` 30 | 31 | ## Your Turn 1 32 | 33 | _After updating the code chunks below, change `eval = TRUE` before knitting._ 34 | 35 | Create the survey design object to incorporate the weights. 36 | 37 | ```{r, eval = FALSE} 38 | svy_des <- ____( 39 | ids = ~ 1, 40 | data = ___, 41 | weights = ___ 42 | ) 43 | ``` 44 | 45 | Create the **unweighted** standardized mean differences data frame 46 | 47 | ```{r, eval = FALSE} 48 | smd_table_unweighted <- ____( 49 | vars = _____, 50 | strata = _____, 51 | data = ____, 52 | test = FALSE) 53 | ``` 54 | 55 | Create the **weighted** standardized mean differences data frame 56 | 57 | ```{r, eval = FALSE} 58 | smd_table <- ____( 59 | vars = _____, 60 | strata = _____, 61 | data = ____, 62 | test = FALSE) 63 | ``` 64 | 65 | Create a data frame that merges `smd_table_unweighted` and `smd_table` and pivots it to prepare for plotting 66 | 67 | ```{r, eval = FALSE} 68 | plot_df <- data.frame( 69 | var = rownames(____), 70 | Unadjusted = _____, 71 | Weighted = _____) %>% 72 | pivot_longer(-var, names_to = "Method", values_to = "SMD") 73 | ``` 74 | 75 | Create the Love Plot using ggplot 76 | 77 | ```{r, eval = FALSE} 78 | ggplot(data = _____, 79 | mapping = aes(x = ____, y = ____, group = ____, color = ____)) + 80 | geom_line() + 81 | geom_point() + 82 | geom_hline(yintercept = 0.1, color = "black", size = 0.1) + 83 | coord_flip() 84 | ``` 85 | 86 | 87 | 88 | ## Your Turn 2 89 | 90 | Create an unweighted ECDF for `smokeyrs` by those who quit smoking and those who did not. 91 | 92 | ```{r, eval = FALSE} 93 | ggplot(df, aes(x = ____, group = ____, color = factor(____))) + 94 | ____() + 95 | scale_color_manual("Quit smoking", values = c("#5154B8", "#5DB854"), 96 | labels = c("Yes", "No")) + 97 | xlab(____) + 98 | ylab("Proportion <= x") 99 | ``` 100 | 101 | 102 | Create an weighted ECDF for `smokeyrs` by those who quit smoking and those who did not. 103 | 104 | ```{r, eval = FALSE} 105 | ecdf_1 <- df %>% 106 | filter(____) %>% 107 | arrange(____) %>% 108 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 109 | 110 | ecdf_0 <- df %>% 111 | filter(____) %>% 112 | arrange(____) %>% 113 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 114 | 115 | ggplot(ecdf_1, aes(x = _____, y = cum_pct)) + 116 | geom_line(color = "#5DB854") + 117 | geom_line(data = ecdf_0, aes(x = ____, y = cum_pct), color = "#5154B8") + 118 | xlab(____) + 119 | ylab("Proportion <= x") 120 | ``` 121 | 122 | -------------------------------------------------------------------------------- /exercises/06-outcome-model-exercises.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Outcome Model" 3 | output: html_document 4 | --- 5 | 6 | 7 | ```{r} 8 | library(tidyverse) 9 | library(survey) 10 | library(tableone) 11 | library(broom) 12 | library(cidata) 13 | library(rsample) 14 | ``` 15 | 16 | Using the National Health and Nutrition Examination Survey Data (`nhefs_complete`), we are interested in the relationship between the **exposure**, `qsmk`: whether the participant quit smoking, and the **outcome**, `wt82_71`: their weight change in kilograms. 17 | 18 | ## Your turn 19 | 20 | _After updating the code chunks below, change `eval = TRUE` before knitting._ 21 | 22 | Create a function called `ipw_fit` that fits the propensity score model from Exercise 03, incorporates the ATE weights calculated in Exercise 04, and fits a weighted outcome model. 23 | 24 | ```{r, eval = FALSE} 25 | it_ipw <- function(split, ...) { 26 | .df <-____ 27 | 28 | # fit propensity score model 29 | 30 | # calculate ATE weights 31 | 32 | # fit correctly bootsrapped ipw model 33 | lm(___ ~ ___, data = .df, weights = ___) %>% 34 | tidy() 35 | } 36 | ``` 37 | 38 | Bootstrap this result 1000 times. 39 | 40 | ```{r, eval = FALSE} 41 | ipw_results <- ____(___, 1000, apparent = TRUE) %>% 42 | mutate(results = map(splits, _____)) 43 | ``` 44 | 45 | 46 | Calculate the confidence interval 47 | 48 | ```{r, eval = FALSE} 49 | boot_estimate <- ____(____, ____) %>% 50 | filter(term == ____) 51 | ``` 52 | 53 | -------------------------------------------------------------------------------- /slides/00-intro.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Causal Inference in R: Introduction" 3 | date: "2020-07-29 (updated: `r Sys.Date()`)" 4 | output: 5 | xaringan::moon_reader: 6 | css: ["default", "theme.css"] 7 | lib_dir: libs 8 | nature: 9 | highlightStyle: github 10 | highlightLines: true 11 | countIncrementalSlides: false 12 | --- 13 | 14 | ```{r setup, include=FALSE} 15 | options(htmltools.dir.version = FALSE, tibble.max_extra_cols = 6, tibble.width = 60) 16 | knitr::opts_chunk$set(warning = FALSE, message = FALSE, fig.align = "center", dpi = 320) 17 | ``` 18 | 19 | ```{css, echo = FALSE} 20 | img { 21 | height: 250px; 22 | width: 250px; 23 | border-radius: 50%; 24 | object-fit: cover; 25 | } 26 | ``` 27 | 28 | 29 | 30 | ## `> who_are_we(c("lucy", "malcolm"))` 31 | 32 | .pull-left[ 33 |
34 |
35 | ```{r, echo=FALSE} 36 | knitr::include_graphics("img/ldm.jpg") 37 | ``` 38 |
39 |         `r fontawesome::fa("globe")` [https://www.lucymcgowan.com/](https://www.lucymcgowan.com/) 40 | ] 41 | 42 | .pull-right[ 43 |
44 |
45 | ```{r, echo=FALSE} 46 | knitr::include_graphics("img/mb.jpg") 47 | ``` 48 |
49 |                `r fontawesome::fa("globe")` [https://www.malco.io/](https://www.malco.io/) 50 | 51 | ] 52 | 53 | --- 54 | 55 | class: center, inverse, middle 56 | 57 | # The three practices of analysis 58 | 59 | 1. Describe 60 | 2. Predict 61 | 3. Explain 62 | 63 | --- 64 | class: middle, center, inverse 65 | 66 | # Normal regression estimates associations. But we want *counterfactual, causal* estimates: 67 | 68 | # What would happen if *everyone* in the study were exposed to x vs if *no one* was exposed. 69 | 70 | 71 | --- 72 | 73 | class: middle, center, inverse 74 | # For causal inference, we need to make sometimes unverifiable assumptions. 75 | 76 | # Today, we'll focus on the assumption of *no confounding*. 77 | 78 | --- 79 | 80 | class: inverse, middle 81 | 82 | # Tools for causal inference 83 | 84 | 1. Causal diagrams 85 | 1. Propensity score weighting 86 | 1. Propensity score matching 87 | 88 | --- 89 | 90 | class: inverse, middle 91 | 92 | # Other tools for causal inference 93 | 94 | 1. Randomized trials 95 | 1. G-methods & friends 96 | 1. Instrumental variables & friends 97 | 98 | --- 99 | 100 | class: inverse, middle, center 101 | 102 | # Let's head to RStudio Cloud: https://bit.ly/causalcloud 103 | 104 | --- 105 | 106 | 107 | class: inverse 108 | 109 | # Resources 110 | ## [Causal Inference](https://www.hsph.harvard.edu/miguel-hernan/causal-inference-book/): Comprehensive text on causal inference. Free online. 111 | ## [The Book of Why](http://bayes.cs.ucla.edu/WHY/): Detailed, friendly intro to DAGs and causal inference. Free online. 112 | ## [Mastering 'Metrics](http://www.masteringmetrics.com/): Friendly introduction to IV-based methods 113 | -------------------------------------------------------------------------------- /slides/00-intro.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Causal Inference in R: Introduction 5 | 6 | 7 | 8 | 9 | 10 | 11 | 113 | 114 | 115 | 202 | 203 | 222 | 223 | 233 | 234 | 235 | -------------------------------------------------------------------------------- /slides/00-intro.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/00-intro.pdf -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Causal Modeling in R: Whole Game" 3 | date: "2020-07-29 (updated: `r Sys.Date()`)" 4 | output: 5 | xaringan::moon_reader: 6 | css: ["default", "theme.css"] 7 | lib_dir: libs 8 | nature: 9 | highlightStyle: github 10 | highlightLines: true 11 | countIncrementalSlides: false 12 | 13 | --- 14 | 15 | class: inverse 16 | 17 | ```{r setup, include=FALSE} 18 | options(htmltools.dir.version = FALSE, tibble.max_extra_cols = 6, tibble.width = 60) 19 | knitr::opts_chunk$set(warning = FALSE, message = FALSE, fig.align = "center", dpi = 320, fig.height = 4) 20 | library(tidyverse) 21 | library(broom) 22 | library(rsample) 23 | library(ggdag) 24 | library(cidata) 25 | library(survey) 26 | library(tableone) 27 | ``` 28 | 29 | # Broad strokes 30 | 1. Specify causal question 31 | 2. Draw assumptions (causal diagram) 32 | 3. Model assumptions (propensity score) 33 | 4. Analyze propensities (diagnostics) 34 | 5. Estimate causal effects (IPW) 35 | 36 | --- 37 | 38 | class: middle, center, inverse 39 | 40 | # **Do people who quit smoking gain weight?** 41 | 42 | --- 43 | 44 | ```{r} 45 | library(cidata) 46 | nhefs_complete_uc <- nhefs_complete %>% 47 | filter(censored == 0) 48 | nhefs_complete_uc 49 | ``` 50 | 51 | --- 52 | 53 | ## Did those who quit smoking gain weight? 54 | 55 | ```{r, echo = FALSE} 56 | colors <- c("#E69F00", "#56B4E9") 57 | 58 | nhefs_complete_uc %>% 59 | ggplot(aes(wt82_71, fill = factor(qsmk))) + 60 | geom_vline(xintercept = 0, color = "grey60", size = 1) + 61 | geom_density(color = "white", alpha = .75, size = .5) + 62 | scale_fill_manual(values = colors) + 63 | theme_minimal() + 64 | theme(legend.position = "bottom") + 65 | labs( 66 | x = "change in weight (lbs)", 67 | fill = "quit smoking (1 = yes)" 68 | ) 69 | ``` 70 | 71 | --- 72 | 73 | ## Did those who quit smoking gain weight? 74 | 75 | ```{r, highlight.output = 4:5} 76 | # ~2.5 lbs gained for quit vs. not quit 77 | nhefs_complete_uc %>% 78 | group_by(qsmk) %>% 79 | summarize( 80 | mean_weight_change = mean(wt82_71), 81 | sd = sd(wt82_71), 82 | .groups = "drop" 83 | ) 84 | ``` 85 | 86 | --- 87 | 88 | class: inverse, center, middle 89 | 90 | # **draw your assumptions** 91 | 92 | --- 93 | 94 | ```{r, echo = FALSE, fig.height=5.5} 95 | library(ggdag) 96 | set.seed(1234) 97 | # set up DAG 98 | smk_wt_dag <- dagify( 99 | # specify causes of quitting smoking and weight gain: 100 | qsmk ~ sex + race + age + education + 101 | smokeintensity + smokeyrs + exercise + active + wt71, 102 | wt82_71 ~ qsmk + sex + race + age + education + 103 | smokeintensity + smokeyrs + exercise + active + wt71, 104 | # specify causal question: 105 | exposure = "qsmk", 106 | outcome = "wt82_71", 107 | # set up labels: 108 | # here, I'll use the same variable names as the data set, but I'll label them 109 | # with clearer names 110 | labels = c( 111 | # causal question 112 | "qsmk" = "quit\nsmoking", 113 | "wt82_71" = "change in\nweight", 114 | 115 | # demographics 116 | "age" = "age", 117 | "sex" = "sex", 118 | "race" = "race", 119 | "education" = "education", 120 | 121 | # health 122 | "wt71" = "baseline\nweight", 123 | "active" = "daily\nactivity\nlevel", 124 | "exercise" = "exercise", 125 | 126 | # smoking history 127 | "smokeintensity" = "smoking\nintensity", 128 | "smokeyrs" = "yrs of\nsmoking" 129 | ) 130 | ) %>% 131 | tidy_dagitty() 132 | 133 | smk_wt_dag %>% 134 | filter(name %in% c("qsmk", "wt82_71")) %>% 135 | ggdag(text = FALSE, use_labels = "label") + 136 | ylim(6, 9.5) + 137 | xlim(1.5, 5) 138 | ``` 139 | 140 | --- 141 | 142 | ```{r, echo = FALSE, fig.height=5.5} 143 | smk_wt_dag %>% 144 | ggdag(text = FALSE, use_labels = "label") + 145 | ylim(6, 9.5) + 146 | xlim(1.5, 5) 147 | ``` 148 | 149 | --- 150 | 151 | class: center, middle 152 | 153 | # What do I need to control for? 154 | 155 | --- 156 | 157 | ```{r, echo = FALSE, fig.height = 5.5} 158 | smk_wt_dag %>% 159 | ggdag_adjustment_set(text = FALSE, use_labels = "label", node_size = 8) 160 | ``` 161 | 162 | --- 163 | 164 | ## Multivariable regression: what's the association? 165 | 166 | ```{r, eval = FALSE} 167 | lm( #<< 168 | wt82_71~ qsmk + sex + #<< 169 | race + age + I(age^2) + education + #<< 170 | smokeintensity + I(smokeintensity^2) + #<< 171 | smokeyrs + I(smokeyrs^2) + exercise + active + #<< 172 | wt71 + I(wt71^2), #<< 173 | data = nhefs_complete_uc #<< 174 | ) %>% #<< 175 | tidy(conf.int = TRUE) %>% 176 | filter(term == "qsmk") 177 | ``` 178 | 179 | --- 180 | 181 | ## Multivariable regression: what's the association? 182 | 183 | ```{r, highlight.output = 4} 184 | lm( 185 | wt82_71~ qsmk + sex + 186 | race + age + I(age^2) + education + 187 | smokeintensity + I(smokeintensity^2) + 188 | smokeyrs + I(smokeyrs^2) + exercise + active + 189 | wt71 + I(wt71^2), 190 | data = nhefs_complete_uc 191 | ) %>% 192 | tidy(conf.int = TRUE) %>% 193 | filter(term == "qsmk") 194 | ``` 195 | 196 | --- 197 | 198 | class: inverse, center, middle 199 | 200 | # **model your assumptions** 201 | 202 | --- 203 | 204 | class: center, middle 205 | 206 | # counterfactual: what if everyone quit smoking vs. what if no one quit smoking 207 | 208 | --- 209 | 210 | ## Fit propensity score model 211 | 212 | ```{r} 213 | propensity_model <- glm( #<< 214 | qsmk ~ sex + #<< 215 | race + age + I(age^2) + education + 216 | smokeintensity + I(smokeintensity^2) + 217 | smokeyrs + I(smokeyrs^2) + exercise + active + 218 | wt71 + I(wt71^2), 219 | family = binomial(), 220 | data = nhefs_complete_uc 221 | ) 222 | ``` 223 | 224 | --- 225 | 226 | ## Calculate inverse probability weights 227 | 228 | ```{r} 229 | nhefs_complete_uc <- propensity_model %>% 230 | # predict whether quit smoking 231 | augment(type.predict = "response", data = nhefs_complete_uc) %>% #<< 232 | # calculate inverse probability 233 | mutate(wts = 1 / ifelse(qsmk == 0, 1 - .fitted, .fitted)) #<< 234 | ``` 235 | 236 | --- 237 | 238 | class: inverse, center, middle 239 | 240 | # **diagnose your model assumptions** 241 | 242 | --- 243 | 244 | ## What's the distribution of weights? 245 | 246 | ```{r, echo = FALSE} 247 | ggplot(nhefs_complete_uc, aes(wts)) + 248 | geom_density(col = "#E69F00", fill = "#E69F0095", size = .8) + 249 | # use a log scale for the x axis 250 | scale_x_log10() + 251 | theme_minimal(base_size = 20) + 252 | xlab("Weights") 253 | ``` 254 | 255 | 256 | --- 257 | 258 | ```{r, echo=FALSE, fig.height=5.5} 259 | svy_des <- svydesign( 260 | ids = ~ 1, 261 | data = nhefs_complete_uc, 262 | weights = ~ wts) 263 | 264 | smd_table_unweighted <- CreateTableOne( 265 | vars = c("sex", "race", "age", "education", "smokeintensity", "smokeyrs", 266 | "exercise", "active", "wt71"), 267 | strata = "qsmk", 268 | data = nhefs_complete_uc, 269 | test = FALSE) 270 | 271 | smd_table <- svyCreateTableOne( 272 | vars = c("sex", "race", "age", "education", "smokeintensity", "smokeyrs", 273 | "exercise", "active", "wt71"), 274 | strata = "qsmk", 275 | data = svy_des, 276 | test = FALSE) 277 | 278 | plot_df <- data.frame( 279 | var = rownames(ExtractSmd(smd_table)), 280 | Unadjusted = as.numeric(ExtractSmd(smd_table_unweighted)), 281 | Weighted = as.numeric(ExtractSmd(smd_table))) %>% 282 | pivot_longer(-var, names_to = "Method", values_to = "SMD") 283 | ``` 284 | 285 | 286 | ```{r, echo=FALSE, fig.height=5.5} 287 | ggplot( 288 | data = plot_df %>% filter(Method == "Unadjusted"), 289 | mapping = aes(x = var, y = SMD, group = Method, color = Method) 290 | ) + 291 | geom_line() + 292 | geom_point() + 293 | geom_hline(yintercept = 0.1, color = "black", size = 0.1) + 294 | coord_flip() + 295 | theme_minimal() + 296 | ylim(0, .3) 297 | ``` 298 | 299 | --- 300 | 301 | ```{r, echo=FALSE, fig.height=5.5} 302 | ggplot( 303 | data = plot_df, 304 | mapping = aes(x = var, y = SMD, group = Method, color = Method) 305 | ) + 306 | geom_line() + 307 | geom_point() + 308 | geom_hline(yintercept = 0.1, color = "black", size = 0.1) + 309 | coord_flip() + 310 | theme_minimal() + 311 | scale_color_manual(values = c("grey85", "#00BFC4")) + 312 | ylim(0, .3) 313 | ``` 314 | 315 | --- 316 | class: inverse, center, middle 317 | 318 | # **estimate the causal effects** 319 | 320 | --- 321 | 322 | ## Estimate causal effect with IPW 323 | 324 | ```{r} 325 | ipw_model <- lm( #<< 326 | wt82_71 ~ qsmk, #<< 327 | data = nhefs_complete_uc, 328 | weights = wts #<< 329 | ) 330 | 331 | ipw_estimate <- ipw_model %>% 332 | tidy(conf.int = TRUE) %>% 333 | filter(term == "qsmk") 334 | ``` 335 | 336 | --- 337 | 338 | ## Estimate causal effect with IPW 339 | 340 | ```{r, highlight.output = 4} 341 | ipw_estimate 342 | ``` 343 | 344 | --- 345 | 346 | ## Let's fix our confidence intervals with the bootstrap! 347 | 348 | -- 349 | 350 | ```{r} 351 | # fit ipw model for a single bootstrap sample 352 | fit_ipw_not_quite_rightly <- function(split, ...) { 353 | # get bootstrapped data sample with `rsample::analysis()` 354 | .df <- analysis(split) 355 | 356 | # fit ipw model 357 | lm(wt82_71 ~ qsmk, data = .df, weights = wts) %>% 358 | tidy() 359 | } 360 | ``` 361 | 362 | --- 363 | 364 | ```{r} 365 | fit_ipw <- function(split, ...) { 366 | .df <- analysis(split) 367 | 368 | # fit propensity score model 369 | propensity_model <- glm( 370 | qsmk ~ sex + 371 | race + age + I(age^2) + education + 372 | smokeintensity + I(smokeintensity^2) + 373 | smokeyrs + I(smokeyrs^2) + exercise + active + 374 | wt71 + I(wt71^2), 375 | family = binomial(), 376 | data = .df 377 | ) 378 | 379 | # calculate inverse probability weights 380 | .df <- propensity_model %>% 381 | augment(type.predict = "response", data = .df) %>% 382 | mutate(wts = 1 / ifelse(qsmk == 0, 1 - .fitted, .fitted)) 383 | 384 | # fit correctly bootsrapped ipw model 385 | lm(wt82_71 ~ qsmk, data = .df, weights = wts) %>% 386 | tidy() 387 | } 388 | ``` 389 | 390 | --- 391 | 392 | ## Using {rsample} to bootstrap our causal effect 393 | 394 | -- 395 | 396 | ```{r boot_cache, cache = TRUE} 397 | # fit ipw model to bootstrapped samples 398 | ipw_results <- bootstraps(nhefs_complete, 1000, apparent = TRUE) %>% #<< 399 | mutate(results = map(splits, fit_ipw)) #<< 400 | ``` 401 | 402 | --- 403 | 404 | ## Using {rsample} to bootstrap our causal effect 405 | 406 | ```{r, eval = FALSE} 407 | # get t-statistic-based CIs 408 | boot_estimate <- int_t(ipw_results, results) %>% #<< 409 | filter(term == "qsmk") 410 | 411 | boot_estimate 412 | ``` 413 | 414 | --- 415 | 416 | ## Using {rsample} to bootstrap our causal effect 417 | 418 | ```{r, highlight.output = 4} 419 | # get t-statistic-based CIs 420 | boot_estimate <- int_t(ipw_results, results) %>% 421 | filter(term == "qsmk") 422 | 423 | boot_estimate 424 | ``` 425 | 426 | --- 427 | 428 | class: middle 429 | 430 | ```{r, echo = FALSE} 431 | bind_rows( 432 | ipw_estimate %>% 433 | select(estimate, conf.low, conf.high) %>% 434 | mutate(type = "ols"), 435 | boot_estimate %>% 436 | select(estimate = .estimate, conf.low = .lower, conf.high = .upper) %>% 437 | mutate(type = "bootstrap") 438 | ) %>% 439 | # calculate CI width to sort by it 440 | mutate(width = conf.high - conf.low) %>% 441 | arrange(width) %>% 442 | # fix the order of the model types for the plot 443 | mutate(type = fct_inorder(type)) %>% 444 | ggplot(aes(x = type, y = estimate, ymin = conf.low, ymax = conf.high)) + 445 | geom_pointrange(color = "#0172B1", size = 1, fatten = 3) + 446 | coord_flip() + 447 | theme_minimal(base_size = 20) 448 | ``` 449 | 450 | --- 451 | 452 | class: center, inverse, middle 453 | 454 | # *Our causal effect estimate: **3.5 lbs (95% CI 2.4 lbs, 4.4 lbs)*** 455 | 456 | --- 457 | 458 | class: center, inverse, middle 459 | 460 | # **Review the R Markdown file... later!** 461 | 462 | --- 463 | class: inverse, center 464 | 465 | # Resources 466 | ## [Causal Inference](https://www.hsph.harvard.edu/miguel-hernan/causal-inference-book/): Comprehensive text on causal inference. Free online. 467 | ## [Causal Inference Notebook](http://causalinferencebookr.netlify.com): R code to go along with Causal Inference 468 | ## [Bootstrap confidence intervals with {rsample}](https://rsample.tidymodels.org/articles/Applications/Intervals.html) 469 | -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Causal Modeling in R: Whole Game 5 | 6 | 7 | 8 | 9 | 10 | 11 | 389 | 390 | 391 | 478 | 479 | 498 | 499 | 509 | 510 | 511 | -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game.pdf -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_cache/html/__packages: -------------------------------------------------------------------------------- 1 | base 2 | tidyverse 3 | ggplot2 4 | tibble 5 | tidyr 6 | readr 7 | purrr 8 | dplyr 9 | stringr 10 | forcats 11 | broom 12 | rsample 13 | ggdag 14 | cidata 15 | Matrix 16 | survival 17 | survey 18 | tableone 19 | -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_cache/html/boot_cache_011afc01abdd656081906cf473a2e4a7.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_cache/html/boot_cache_011afc01abdd656081906cf473a2e4a7.RData -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_cache/html/boot_cache_011afc01abdd656081906cf473a2e4a7.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_cache/html/boot_cache_011afc01abdd656081906cf473a2e4a7.rdb -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_cache/html/boot_cache_011afc01abdd656081906cf473a2e4a7.rdx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_cache/html/boot_cache_011afc01abdd656081906cf473a2e4a7.rdx -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-19-1.png -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-20-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-20-1.png -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-21-1.png -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-27-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-27-1.png -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/01-causal_modeling_whole_game_files/figure-html/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /slides/02-dags.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Causal Diagrams in R" 3 | date: "2020-07-29 (updated: `r Sys.Date()`)" 4 | output: 5 | xaringan::moon_reader: 6 | css: ["default", "theme.css"] 7 | lib_dir: libs 8 | nature: 9 | highlightStyle: github 10 | highlightLines: true 11 | countIncrementalSlides: false 12 | --- 13 | class: middle, center, inverse 14 | 15 | ```{r setup, include=FALSE} 16 | options(htmltools.dir.version = FALSE, tibble.max_extra_cols = 6, tibble.width = 60) 17 | knitr::opts_chunk$set(warning = FALSE, message = FALSE, fig.align = "center", dpi = 320, fig.height = 4) 18 | library(tidyverse) 19 | library(ggdag) 20 | 21 | set.seed(1234) 22 | 23 | smk_wt_dag <- dagify( 24 | # specify causes of quitting smoking and weight gain: 25 | qsmk ~ sex + race + age + education + 26 | smokeintensity + smokeyrs + exercise + active + wt71, 27 | wt82_71 ~ qsmk + sex + race + age + education + 28 | smokeintensity + smokeyrs + exercise + active + wt71, 29 | # specify causal question: 30 | exposure = "qsmk", 31 | outcome = "wt82_71", 32 | # set up labels: 33 | # here, I'll use the same variable names as the data set, but I'll label them 34 | # with clearer names 35 | labels = c( 36 | # causal question 37 | "qsmk" = "quit\nsmoking", 38 | "wt82_71" = "change in\nweight", 39 | 40 | # demographics 41 | "age" = "age", 42 | "sex" = "sex", 43 | "race" = "race", 44 | "education" = "education", 45 | 46 | # health 47 | "wt71" = "baseline\nweight", 48 | "active" = "daily\nactivity\nlevel", 49 | "exercise" = "exercise", 50 | 51 | # smoking history 52 | "smokeintensity" = "smoking\nintensity", 53 | "smokeyrs" = "yrs of\nsmoking" 54 | ) 55 | ) 56 | 57 | ``` 58 | 59 | # **Draw your causal assumptions with causal directed acyclic graphs (DAGs)** 60 | 61 | --- 62 | class: inverse 63 | 64 | # The basic idea 65 | 66 | 1. Specify your causal question 67 | 1. Use domain knowledge 68 | 1. Write variables as nodes 69 | 1. Write causal pathways as arrows (edges) 70 | 71 | --- 72 | 73 | class: middle, center, inverse 74 | 75 | # **ggdag** 76 | 77 | --- 78 | 79 | ```{r, echo=FALSE, out.width="100%", out.height="100%"} 80 | knitr::include_graphics("img/ggdagitty.png") 81 | ``` 82 | 83 | --- 84 | 85 | ```{r, echo=FALSE, out.width="100%", out.height="100%"} 86 | knitr::include_graphics("img/ggdagitty_alg.png") 87 | ``` 88 | 89 | --- 90 | 91 | ```{r, echo=FALSE, out.width="100%", out.height="100%"} 92 | knitr::include_graphics("img/ggdagitty_plots.png") 93 | ``` 94 | 95 | --- 96 | 97 | ```{r, echo=FALSE, out.width="100%", out.height="100%"} 98 | knitr::include_graphics("img/tidy_ggdagitty.png") 99 | ``` 100 | 101 | --- 102 | 103 | # Step 1: Specify your DAG 104 | 105 | -- 106 | 107 | ```{r, eval = FALSE} 108 | dagify( 109 | cancer ~ smoking, 110 | coffee ~ smoking 111 | ) 112 | ``` 113 | 114 | --- 115 | 116 | # Step 1: Specify your DAG 117 | 118 | 119 | ```{r, eval = FALSE} 120 | dagify( 121 | cancer ~ smoking, #<< 122 | coffee ~ smoking 123 | ) 124 | ``` 125 | 126 | --- 127 | 128 | # Step 1: Specify your DAG 129 | 130 | 131 | ```{r, eval = FALSE} 132 | dagify( 133 | cancer ~ smoking, 134 | coffee ~ smoking #<< 135 | ) 136 | ``` 137 | 138 | --- 139 | 140 | # Step 1: Specify your DAG 141 | 142 | 143 | ```{r, eval = FALSE} 144 | dagify( 145 | cancer ~ smoking, 146 | coffee ~ smoking 147 | ) %>% ggdag() 148 | ``` 149 | 150 | --- 151 | 152 | # Step 1: Specify your DAG 153 | 154 | 155 | ```{r, echo = FALSE} 156 | dagify( 157 | cancer ~ smoking, 158 | coffee ~ smoking 159 | ) %>% ggdag() 160 | ``` 161 | 162 | --- 163 | 164 | # Step 1: Specify your DAG 165 | 166 | 167 | ```{r, eval = FALSE} 168 | dagify( 169 | cancer ~ smoking + coffee, 170 | coffee ~ smoking 171 | ) %>% ggdag() 172 | ``` 173 | 174 | --- 175 | 176 | # Step 1: Specify your DAG 177 | 178 | 179 | ```{r, echo = FALSE} 180 | dagify( 181 | cancer ~ smoking + coffee, 182 | coffee ~ smoking 183 | ) %>% ggdag() 184 | ``` 185 | 186 | --- 187 | 188 | ## Your Turn 1 (**`02-dags-exercises.Rmd`**) 189 | 190 | ### Specify a DAG with `dagify()`. Write your assumption that `smoking` causes `cancer` as a formula. 191 | ### We're going to assume that coffee does not cause cancer, so there's no formula for that. But we still need to declare our causal question. Specify "coffee" as the exposure and "cancer" as the outcome (both in quotations marks). 192 | ### Plot the DAG using `ggdag()` 193 | 194 | `r countdown::countdown(minutes = 3)` 195 | 196 | --- 197 | 198 | ## Your Turn 1 (`02-dags-exercises.Rmd`) 199 | 200 | ```{r} 201 | coffee_cancer_dag <- dagify( 202 | cancer ~ smoking, 203 | smoking ~ addictive, 204 | coffee ~ addictive, 205 | exposure = "coffee", 206 | outcome = "cancer", 207 | labels = c( 208 | "coffee" = "Coffee", 209 | "cancer" = "Lung Cancer", 210 | "smoking" = "Smoking", 211 | "addictive" = "Addictive \nBehavior" 212 | ) 213 | ) 214 | ``` 215 | 216 | --- 217 | 218 | 219 | ```{r} 220 | ggdag(coffee_cancer_dag) 221 | ``` 222 | 223 | --- 224 | 225 | # Causal effects and backdoor paths 226 | 227 | --- 228 | 229 | # Causal effects and backdoor paths 230 | ## **Ok, correlation != causation. But why not?** 231 | --- 232 | 233 | # Causal effects and backdoor paths 234 | ## ~~Ok, correlation != causation. But why not?~~ 235 | ## **We want to know if `x -> y`...** 236 | 237 | --- 238 | 239 | # Causal effects and backdoor paths 240 | ## ~~Ok, correlation != causation. But why not?~~ 241 | ## ~~We want to know if `x -> y`...~~ 242 | ## **But other paths also cause associations** 243 | 244 | --- 245 | 246 | # `ggdag_paths()` 247 | 248 | ## Identify "backdoor" paths 249 | 250 | -- 251 | 252 | ```{r, eval = FALSE} 253 | ggdag_paths(smk_wt_dag) 254 | ``` 255 | 256 | 257 | --- 258 | 259 | ```{r, echo = FALSE, fig.height=5.5} 260 | smk_wt_dag %>% 261 | dag_paths(paths_only = FALSE) %>% 262 | ggplot(aes(x = x, y = y, xend = xend, yend = yend, col = path, alpha = path)) + 263 | geom_dag_edges_link( 264 | aes( 265 | edge_alpha = path, 266 | edge_colour = path, 267 | start_cap = ggraph::circle(3, 'mm'), 268 | end_cap = ggraph::circle(3, 'mm') 269 | ) 270 | ) + 271 | geom_dag_point(size = 4) + 272 | facet_wrap(~forcats::fct_inorder(as.factor(set), ordered = TRUE)) + 273 | scale_alpha_manual( 274 | drop = FALSE, 275 | values = c("open path" = 1), 276 | na.value = .35, 277 | breaks = "open path" 278 | ) + 279 | ggraph::scale_edge_alpha_manual( 280 | drop = FALSE, 281 | values = c("open path" = 1), 282 | na.value = .35, 283 | breaks = "open path" 284 | ) + 285 | ggraph::scale_edge_colour_hue(drop = FALSE, breaks = "open path") + 286 | scale_color_hue(drop = FALSE, breaks = "open path") + 287 | expand_plot( 288 | expand_x = expansion(c(0.25, 0.25)), 289 | expand_y = expansion(c(0.1, 0.1)) 290 | ) + 291 | theme(legend.position = "none") 292 | ``` 293 | 294 | --- 295 | 296 | ## Your Turn 2 297 | 298 | ### Call `tidy_dagitty()` on `coffee_cancer_dag` to create a tidy DAG, then pass the results to `dag_paths()`. What's different about these data? 299 | ### Plot the open paths with `ggdag_paths()`. (Just give it `coffee_cancer_dag` rather than using `dag_paths()`; the quick plot function will do that for you.) Remember, since we assume there is *no* causal path from coffee to lung cancer, any open paths must be confounding pathways. 300 | 301 | `r countdown::countdown(minutes = 3)` 302 | 303 | --- 304 | 305 | ## Your Turn 2 306 | 307 | ```{r} 308 | coffee_cancer_dag %>% 309 | tidy_dagitty() %>% 310 | dag_paths() 311 | ``` 312 | 313 | --- 314 | 315 | ```{r} 316 | coffee_cancer_dag %>% 317 | ggdag_paths() 318 | ``` 319 | 320 | --- 321 | 322 | # Closing backdoor paths 323 | 324 | --- 325 | 326 | # Closing backdoor paths 327 | ## **We need to account for these open, non-causal paths** 328 | 329 | --- 330 | 331 | # Closing backdoor paths 332 | ## ~~We need to account for these open, non-causal paths~~ 333 | ## **Randomization** 334 | 335 | --- 336 | 337 | # Closing backdoor paths 338 | ## ~~We need to account for these open, non-causal paths~~ 339 | ## ~~Randomization~~ 340 | ## **Stratification, adjustment, weighting, matching, etc.** 341 | 342 | --- 343 | 344 | # Identifying adjustment sets 345 | 346 | ```{r,eval=FALSE} 347 | ggdag_adjustment_set(smk_wt_dag) 348 | ``` 349 | 350 | --- 351 | 352 | ```{r, echo=FALSE, fig.height=5.5} 353 | ggdag_adjustment_set(smk_wt_dag) 354 | ``` 355 | 356 | --- 357 | 358 | ## Your Turn 3 359 | 360 | #### Now that we know the open, confounding pathways (sometimes called "backdoor paths"), we need to know how to close them! First, we'll ask {ggdag} for adjustment sets, then we would need to do something in our analysis to account for at least one adjustment set (e.g. multivariable regression, weighting, or matching for the adjustment sets). 361 | 362 | #### Use `ggdag_adjustment_set()` to visualize the adjustment sets. Add the arguments `use_labels = "label"` and `text = FALSE`. 363 | #### Write an R formula for each adjustment set, as you might if you were fitting a model in `lm()` or `glm()` 364 | 365 | `r countdown::countdown(minutes = 3)` 366 | 367 | --- 368 | 369 | ## Your Turn 3 370 | 371 | ```{r, eval = FALSE} 372 | ggdag_adjustment_set( 373 | coffee_cancer_dag, 374 | use_labels = "label", 375 | text = FALSE 376 | ) 377 | ``` 378 | 379 | --- 380 | 381 | ```{r, echo = FALSE, fig.height=5.5} 382 | ggdag_adjustment_set( 383 | coffee_cancer_dag, 384 | use_labels = "label", 385 | text = FALSE 386 | ) 387 | ``` 388 | 389 | --- 390 | 391 | ## Your Turn 3 392 | 393 | ```{r, eval = FALSE} 394 | cancer ~ coffee + addictive 395 | cancer ~ coffee + smoking 396 | ``` 397 | 398 | --- 399 | 400 | class: inverse 401 | 402 | # Resources: ggdag vignettes 403 | ## [An Introduction to ggdag](https://ggdag.malco.io/articles/intro-to-ggdag.html) 404 | ## [An Introduction to Directed Acyclic Graphs](https://ggdag.malco.io/articles/intro-to-dags.html) 405 | ## [Common Structures of Bias](https://ggdag.malco.io/articles/bias-structures.html) 406 | -------------------------------------------------------------------------------- /slides/02-dags.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Causal Diagrams in R 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 355 | 356 | 357 | 444 | 445 | 464 | 465 | 475 | 476 | 477 | -------------------------------------------------------------------------------- /slides/02-dags.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/02-dags.pdf -------------------------------------------------------------------------------- /slides/02-dags_files/figure-html/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/02-dags_files/figure-html/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /slides/02-dags_files/figure-html/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/02-dags_files/figure-html/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /slides/02-dags_files/figure-html/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/02-dags_files/figure-html/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /slides/02-dags_files/figure-html/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/02-dags_files/figure-html/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /slides/02-dags_files/figure-html/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/02-dags_files/figure-html/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /slides/02-dags_files/figure-html/unnamed-chunk-19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/02-dags_files/figure-html/unnamed-chunk-19-1.png -------------------------------------------------------------------------------- /slides/02-dags_files/figure-html/unnamed-chunk-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/02-dags_files/figure-html/unnamed-chunk-21-1.png -------------------------------------------------------------------------------- /slides/02-dags_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/02-dags_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /slides/02-dags_files/figure-html/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/02-dags_files/figure-html/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /slides/02-dags_files/figure-html/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/02-dags_files/figure-html/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /slides/02-dags_files/figure-html/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/02-dags_files/figure-html/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /slides/02-dags_files/figure-html/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/02-dags_files/figure-html/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /slides/03-pscores.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Propensity Scores" 3 | author: "Lucy D'Agostino McGowan" 4 | institute: "Wake Forest University" 5 | date: "2020-07-29 (updated: `r Sys.Date()`)" 6 | output: 7 | xaringan::moon_reader: 8 | css: ["default", "theme.css"] 9 | lib_dir: libs 10 | nature: 11 | highlightStyle: github 12 | highlightLines: true 13 | highlightSpans: true 14 | countIncrementalSlides: false 15 | --- 16 | 17 | ```{r, include = FALSE} 18 | knitr::opts_chunk$set(warning = FALSE, message = FALSE, fig.align = "center", dpi = 320, fig.height = 4) 19 | ``` 20 | 21 | class: inverse 22 | 23 | ## Observational Studies 24 | 25 | **Goal**: To answer a research question 26 | 27 | ![](img/obs-studies.png) 28 | 29 | --- 30 | class: inverse 31 | 32 | ## Observational Studies 33 | 34 | **Goal**: To answer a research question 35 | 36 | ![](img/obs-studies-2.png) 37 | 38 | --- 39 | class: inverse 40 | 41 | ## ~~Observational Studies~~ 42 | ### **Randomized Controlled Trial** 43 | 44 | ![](img/randomized.png) 45 | 46 | --- 47 | class: inverse 48 | 49 | ## ~~Observational Studies~~ 50 | ### **Randomized Controlled Trial** 51 | 52 | ![](img/randomized-2.png) 53 | 54 | --- 55 | class: inverse 56 | 57 | ## Observational Studies 58 | 59 | ![](img/obs-studies-3.png) 60 | 61 | --- 62 | class: inverse 63 | 64 | ![](img/trt.png) 65 | 66 | --- 67 | class: inverse 68 | 69 | ![](img/trt-conf.png) 70 | --- 71 | class: inverse 72 | 73 | ## Confounding 74 | 75 | ![](img/conf-2.png) 76 | 77 | --- 78 | class: inverse 79 | 80 | ## Confounding 81 | 82 | ![](img/conf-3.png) 83 | 84 | --- 85 | 86 | ## Propensity scores 87 | 88 | Rosenbaum and Rubin showed in observational studies, conditioning on **propensity scores** can lead to unbiased estimates of the exposure effect 89 | 90 | 1. There are no unmeasured confounders 91 | 2. Every subject has a nonzero probability of receiving either exposure 92 | 93 | --- 94 | 95 | ## Propensity scores 96 | 97 | * Fit a **logistic regression** predicting exposure using known covariates 98 | 99 | $$Pr(exposure = 1) = \frac{1}{1+\exp(-X\beta)}$$ 100 | 101 | * Each individuals' predicted values are the **propensity scores** 102 | 103 | --- 104 | 105 | ## Propensity scores 106 | 107 | ```{r, message = FALSE, warning = FALSE} 108 | library(tidyverse) 109 | library(broom) 110 | ``` 111 | 112 | --- 113 | 114 | ## Propensity scores 115 | 116 | ```{r, eval = FALSE} 117 | glm(exposure ~ confounder_1 + confounder_2 + confounder_3 + ..., 118 | data = df, 119 | family = binomial()) 120 | ``` 121 | 122 | --- 123 | 124 | ## Propensity scores 125 | 126 | ```{r, eval = FALSE} 127 | glm(exposure ~ confounder_1 + confounder_2 + confounder_3 + ..., 128 | data = df, 129 | family = binomial()) %>% 130 | augment(type.predict = "response", data = df) 131 | ``` 132 | 133 | --- 134 | 135 | ## Propensity scores 136 | 137 | ```{r, eval = FALSE} 138 | glm(exposure ~ confounder_1 + confounder_2 + confounder_3 + ..., 139 | data = df, 140 | family = binomial()) %>% 141 | augment(type.predict = "response", data = df) #<< 142 | ``` 143 | 144 | --- 145 | class: inverse 146 | 147 | ## Propensity scores 148 | 149 | ![](img/pscores.png) 150 | 151 | --- 152 | 153 | ```{r, echo = FALSE, message = FALSE, warning = FALSE, fig.height = 5.5} 154 | library(cidata) 155 | library(ggdag) 156 | set.seed(1234) 157 | # set up DAG 158 | smk_wt_dag <- dagify( 159 | # specify causes of quitting smoking and weight gain: 160 | qsmk ~ sex + age + smokeyrs + wt71, 161 | wt82_71 ~ sex + age + smokeyrs + wt71, 162 | # specify causal question: 163 | exposure = "qsmk", 164 | outcome = "wt82_71", 165 | # set up labels: 166 | # here, I'll use the same variable names as the data set, but I'll label them 167 | # with clearer names 168 | labels = c( 169 | # causal question 170 | "qsmk" = "quit\nsmoking (qsmk)", 171 | "wt82_71" = "change in\nweight", 172 | 173 | # demographics 174 | "age" = "age", 175 | "sex" = "sex", 176 | 177 | # health 178 | "wt71" = "baseline\nweight (wt71)", 179 | 180 | # smoking history 181 | "smokeyrs" = "yrs of\nsmoking (smokeyrs)" 182 | ) 183 | ) %>% 184 | tidy_dagitty() 185 | 186 | smk_wt_dag %>% 187 | ggdag(text = FALSE, use_labels = "label") 188 | ``` 189 | 190 | --- 191 | 192 | class: inverse 193 | 194 | ## Your turn 195 | 196 | `r countdown::countdown(minutes = 5)` 197 | 198 | 1. Using the **confounders** identified in the previous DAG, fit a propensity score model for `qsmk` 199 | 2. Stretch: Create two histograms, one of the propensity scores for those that quit smoking and one for those that do not 200 | 201 | 202 | 203 | -------------------------------------------------------------------------------- /slides/03-pscores.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Propensity Scores 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 180 | 181 | 182 | 244 | 245 | 255 | 256 | 275 | 276 | 286 | 287 | 288 | -------------------------------------------------------------------------------- /slides/03-pscores.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/03-pscores.pdf -------------------------------------------------------------------------------- /slides/03-pscores_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/03-pscores_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /slides/03-pscores_files/figure-html/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/03-pscores_files/figure-html/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /slides/04-pscore-weighting.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Propensity Score Weighting" 3 | author: "Lucy D'Agostino McGowan" 4 | institute: "Wake Forest University" 5 | date: "2020-07-29 (updated: `r Sys.Date()`)" 6 | output: 7 | xaringan::moon_reader: 8 | css: ["default", "theme.css"] 9 | lib_dir: libs 10 | nature: 11 | highlightStyle: github 12 | highlightLines: true 13 | highlightSpans: true 14 | countIncrementalSlides: false 15 | --- 16 | 17 | ```{r, include = FALSE} 18 | knitr::opts_chunk$set(warning = FALSE, message = FALSE, fig.align = "center", dpi = 320, fig.height = 4) 19 | ``` 20 | 21 | class: inverse 22 | 23 | ## Propensity scores 24 | 25 | * Weighting 26 | * Matching 27 | * Stratification 28 | * Direct Adjustment 29 | * ... 30 | 31 | --- 32 | class: inverse 33 | 34 | ## Propensity scores 35 | 36 | * **Weighting** 37 | * Matching 38 | * Stratification 39 | * Direct Adjustment 40 | * ... 41 | 42 | --- 43 | class: inverse 44 | 45 | ## Target estimands 46 | 47 | ### Average Treatment Effect (ATE) 48 | 49 | $$\Large w_{ATE} = \frac{Z_i}{p_i} + \frac{1-Z_i}{1 - p_i}$$ 50 | 51 | --- 52 | class: inverse 53 | 54 | ## Target estimands 55 | 56 | ### Average Treatment Effect Among the Treated (ATT) 57 | $$\Large w_{ATT} = \frac{p_i Z_i}{p_i} + \frac{p_i (1-Z_i)}{1-p_i}$$ 58 | -- 59 | 60 | ### Average Treatment Effect Among the Controls (ATC) 61 | $$\Large w_{ATC} = \frac{(1-p_i)Z_i}{p_i} + \frac{(1-p_i)(1-Z_i)}{(1-p_i)}$$ 62 | 63 | --- 64 | class: inverse 65 | 66 | ## Target estimands 67 | 68 | ### Average Treatment Effect Among the Evenly Matchable (ATM) 69 | $$\Large w_{ATM} = \frac{\min \{p_i, 1-p_i\}}{z_ip_i + (1-Z_i)(1-p_i)}$$ 70 | -- 71 | 72 | ### Average Treatment Effect Among the Overlap Population 73 | $$\Large w_{ATO} = (1-p_i)Z_i + p_i(1-Z_i)$$ 74 | 75 | --- 76 | 77 | ```{r, include = FALSE} 78 | library(tidyverse) 79 | library(broom) 80 | library(cidata) 81 | propensity_model <- glm( 82 | qsmk ~ sex + 83 | race + age + I(age^2) + education + 84 | smokeintensity + I(smokeintensity^2) + 85 | smokeyrs + I(smokeyrs^2) + exercise + active + 86 | wt71 + I(wt71^2), 87 | family = binomial(), 88 | data = nhefs_complete 89 | ) 90 | 91 | df <- propensity_model %>% 92 | augment(type.predict = "response", data = nhefs_complete) %>% 93 | mutate(wts = 1 / ifelse(qsmk == 0, 1 - .fitted, .fitted), 94 | w_ate = (qsmk / .fitted) + 95 | ((1 - qsmk) / (1 - .fitted)), 96 | w_att = ((.fitted * qsmk) / .fitted) + 97 | ((.fitted * (1 - qsmk)) / (1 - .fitted)), 98 | w_atc = (((1 - .fitted) * qsmk) / .fitted) + 99 | (((1 - .fitted) * (1 - qsmk)) / (1 - .fitted)), 100 | w_atm = pmin(.fitted, 1 - .fitted) / 101 | (qsmk * .fitted + (1 - qsmk) * (1 - .fitted)), 102 | w_ato = (1 - .fitted) * qsmk + 103 | .fitted * (1 - qsmk) 104 | ) 105 | 106 | d <- df %>% 107 | tidyr::spread(qsmk, .fitted, sep = "_p") 108 | ``` 109 | 110 | ## Histogram of propensity scores 111 | 112 | ```{r, echo = FALSE, message = FALSE, warning = FALSE} 113 | ggplot(d) + 114 | geom_histogram(bins = 50, aes(qsmk_p1)) + 115 | geom_histogram(bins = 50, aes(x = qsmk_p0, y = -..count..)) + 116 | ylab("count") + xlab("p") + 117 | geom_hline(yintercept = 0, lwd = 0.5) + 118 | scale_y_continuous(label = abs) 119 | ``` 120 | 121 | --- 122 | 123 | ## ATE 124 | 125 | ```{r, echo = FALSE, message = FALSE, warning = FALSE} 126 | ggplot(d) + 127 | geom_histogram(bins = 50, aes(qsmk_p1), alpha = 0.5) + 128 | geom_histogram(bins = 50, aes(qsmk_p1, weight = w_ate), fill = "green", alpha = 0.5) + 129 | geom_histogram(bins = 50, alpha = 0.5, aes(x = qsmk_p0, y = -..count..)) + 130 | geom_histogram(bins = 50, aes(x = qsmk_p0, weight = w_ate, y = -..count..), fill = "blue", alpha = 0.5) + 131 | ylab("count") + xlab("p") + 132 | geom_hline(yintercept = 0, lwd = 0.5) + 133 | scale_y_continuous(label = abs) + 134 | theme_minimal() + 135 | geom_rect(aes(xmin = 0.95, xmax = 1, ymin = 5, ymax = 100), fill = "#5DB854") + 136 | geom_text(aes(x = 0.975, y = 50), label = "trt", angle = 270, color = "white") + 137 | geom_rect(aes(xmin = 0.95, xmax = 1, ymin = -100, ymax = -5), fill = "#5154B8") + 138 | geom_text(aes(x = 0.975, y = -50), label = "control", angle = 270, color = "white") 139 | ``` 140 | 141 | --- 142 | 143 | ## ATT 144 | 145 | 146 | ```{r, echo = FALSE, message = FALSE, warning = FALSE} 147 | ggplot(d) + 148 | geom_histogram(bins = 50, aes(qsmk_p1), alpha = 0.5) + 149 | geom_histogram(bins = 50, aes(qsmk_p1, weight = w_att), fill = "green", alpha = 0.5) + 150 | geom_histogram(bins = 50, alpha = 0.5, aes(x = qsmk_p0, y = -..count..)) + 151 | geom_histogram(bins = 50, aes(x = qsmk_p0, weight = w_att, y = -..count..), fill = "blue", alpha = 0.5) + 152 | ylab("count") + xlab("p") + 153 | geom_hline(yintercept = 0, lwd = 0.5) + 154 | scale_y_continuous(label = abs) + 155 | theme_minimal() + 156 | geom_rect(aes(xmin = 0.95, xmax = 1, ymin = 5, ymax = 30), fill = "#5DB854") + 157 | geom_text(aes(x = 0.975, y = 17), label = "trt", angle = 270, color = "white") + 158 | geom_rect(aes(xmin = 0.95, xmax = 1, ymin = -100, ymax = -5), fill = "#5154B8") + 159 | geom_text(aes(x = 0.975, y = -50), label = "control", angle = 270, color = "white") 160 | ``` 161 | 162 | 163 | --- 164 | 165 | ## ATC 166 | 167 | ```{r, echo = FALSE, message = FALSE, warning = FALSE} 168 | ggplot(d) + 169 | geom_histogram(bins = 50, aes(qsmk_p1), alpha = 0.5) + 170 | geom_histogram(bins = 50, aes(qsmk_p1, weight = w_atc), fill = "green", alpha = 0.5) + 171 | geom_histogram(bins = 50, alpha = 0.5, aes(x = qsmk_p0, y = -..count..)) + 172 | geom_histogram(bins = 50, aes(x = qsmk_p0, weight = w_atc, y = -..count..), fill = "blue", alpha = 0.5) + 173 | ylab("count") + xlab("p") + 174 | geom_hline(yintercept = 0, lwd = 0.5) + 175 | scale_y_continuous(label = abs) + 176 | theme_minimal() + 177 | geom_rect(aes(xmin = 0.95, xmax = 1, ymin = 5, ymax = 100), fill = "#5DB854") + 178 | geom_text(aes(x = 0.975, y = 50), label = "trt", angle = 270, color = "white") + 179 | geom_rect(aes(xmin = 0.95, xmax = 1, ymin = -100, ymax = -5), fill = "#5154B8") + 180 | geom_text(aes(x = 0.975, y = -50), label = "control", angle = 270, color = "white") 181 | ``` 182 | 183 | --- 184 | 185 | ## ATM 186 | 187 | ```{r, echo = FALSE, message = FALSE, warning = FALSE} 188 | ggplot(d) + 189 | geom_histogram(bins = 50, aes(qsmk_p1), alpha = 0.5) + 190 | geom_histogram(bins = 50, aes(qsmk_p1, weight = w_atm), fill = "green", alpha = 0.5) + 191 | geom_histogram(bins = 50, alpha = 0.5, aes(x = qsmk_p0, y = -..count..)) + 192 | geom_histogram(bins = 50, aes(x = qsmk_p0, weight = w_atm, y = -..count..), fill = "blue", alpha = 0.5) + 193 | ylab("count") + xlab("p") + 194 | geom_hline(yintercept = 0, lwd = 0.5) + 195 | scale_y_continuous(label = abs) + theme_minimal() + 196 | geom_rect(aes(xmin = 0.95, xmax = 1, ymin = 5, ymax = 30), fill = "#5DB854") + 197 | geom_text(aes(x = 0.975, y = 17), label = "trt", angle = 270, color = "white") + 198 | geom_rect(aes(xmin = 0.95, xmax = 1, ymin = -100, ymax = -5), fill = "#5154B8") + 199 | geom_text(aes(x = 0.975, y = -50), label = "control", angle = 270, color = "white") 200 | ``` 201 | 202 | --- 203 | 204 | ## ATO 205 | 206 | ```{r, echo = FALSE, message = FALSE, warning = FALSE} 207 | ggplot(d) + 208 | geom_histogram(bins = 50, aes(qsmk_p1), alpha = 0.5) + 209 | geom_histogram(bins = 50, aes(qsmk_p1, weight = w_ato), fill = "green", alpha = 0.5) + 210 | geom_histogram(bins = 50, alpha = 0.5, aes(x = qsmk_p0, y = -..count..)) + 211 | geom_histogram(bins = 50, aes(x = qsmk_p0, weight = w_ato, y = -..count..), fill = "blue", alpha = 0.5) + 212 | ylab("count") + xlab("p") + 213 | geom_hline(yintercept = 0, lwd = 0.5) + 214 | scale_y_continuous(label = abs) + 215 | theme_minimal() + 216 | geom_rect(aes(xmin = 0.95, xmax = 1, ymin = 5, ymax = 30), fill = "#5DB854") + 217 | geom_text(aes(x = 0.975, y = 17), label = "trt", angle = 270, color = "white") + 218 | geom_rect(aes(xmin = 0.95, xmax = 1, ymin = -100, ymax = -5), fill = "#5154B8") + 219 | geom_text(aes(x = 0.975, y = -50), label = "control", angle = 270, color = "white") 220 | ``` 221 | 222 | 223 | --- 224 | 225 | ## ATE in R 226 | 227 | * Average Treatment Effect (ATE) 228 | * $w_{ATE} = \frac{Z_i}{p_i} + \frac{1-Z_i}{1 - p_i}$ 229 | 230 | ```{r} 231 | df <- propensity_model %>% 232 | augment(type.predict = "response", data = nhefs_complete) %>% 233 | mutate(w_ate = (qsmk / .fitted) + ((1 - qsmk) / (1 - .fitted))) #<< 234 | ``` 235 | 236 | --- 237 | class: inverse 238 | 239 | ## Your Turn 240 | 241 | `r countdown::countdown(minutes = 5)` 242 | 243 | 1. Using the propensity scores you created in the previous exercise, add the ATE weights to your data frame `df` 244 | 245 | 2. Stretch: Using the same propensity scores, create ATT weights 246 | -------------------------------------------------------------------------------- /slides/04-pscore-weighting.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Propensity Score Weighting 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 148 | 149 | 150 | 212 | 213 | 223 | 224 | 243 | 244 | 254 | 255 | 256 | -------------------------------------------------------------------------------- /slides/04-pscore-weighting.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/04-pscore-weighting.pdf -------------------------------------------------------------------------------- /slides/04-pscore-weighting_files/figure-html/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/04-pscore-weighting_files/figure-html/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /slides/04-pscore-weighting_files/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/04-pscore-weighting_files/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /slides/04-pscore-weighting_files/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/04-pscore-weighting_files/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /slides/04-pscore-weighting_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/04-pscore-weighting_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /slides/04-pscore-weighting_files/figure-html/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/04-pscore-weighting_files/figure-html/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /slides/04-pscore-weighting_files/figure-html/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/04-pscore-weighting_files/figure-html/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /slides/04-pscore-weighting_files/figure-html/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/04-pscore-weighting_files/figure-html/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /slides/04-using-pscores_files/figure-html/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/04-using-pscores_files/figure-html/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /slides/04-using-pscores_files/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/04-using-pscores_files/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /slides/04-using-pscores_files/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/04-using-pscores_files/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /slides/04-using-pscores_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/04-using-pscores_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /slides/04-using-pscores_files/figure-html/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/04-using-pscores_files/figure-html/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /slides/04-using-pscores_files/figure-html/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/04-using-pscores_files/figure-html/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Propensity Score Diagnostics" 3 | author: "Lucy D'Agostino McGowan" 4 | institute: "Wake Forest University" 5 | date: "2020-07-29 (updated: `r Sys.Date()`)" 6 | output: 7 | xaringan::moon_reader: 8 | css: ["default", "theme.css"] 9 | lib_dir: libs 10 | nature: 11 | highlightStyle: github 12 | highlightLines: true 13 | highlightSpans: true 14 | countIncrementalSlides: false 15 | --- 16 | 17 | ```{r, include = FALSE} 18 | knitr::opts_chunk$set(warning = FALSE, message = FALSE, fig.align = "center", dpi = 320, fig.height = 4) 19 | ``` 20 | 21 | ```{r, echo = FALSE} 22 | knitr::opts_chunk$set(eval = FALSE) 23 | ``` 24 | 25 | class: inverse 26 | 27 | ## Checking balance 28 | 29 | * Love plots (Standardized Mean Difference) 30 | * ECDF plots 31 | 32 | --- 33 | class: inverse 34 | 35 | ## Standardized Mean Difference (SMD) 36 | 37 | $$\LARGE d = \frac{\bar{x}_{treatment}-\bar{x}_{control}}{\sqrt{\frac{s^2_{treatment}+s^2_{control}}{2}}}$$ 38 | 39 | --- 40 | 41 | ## SMD in R 42 | 43 | 1

        Create a "design object" to incorporate the weights

44 | 45 | ```{r, message = FALSE, warning = FALSE} 46 | library(survey) 47 | 48 | svy_des <- svydesign( 49 | ids = ~ 1, 50 | data = df, 51 | weights = ~ wts 52 | ) 53 | ``` 54 | 55 | --- 56 | 57 | ## SMD in R 58 | 59 | 2

        Calculate the unweighted standardized mean differences

60 | 61 | 62 | ```{r, message = FALSE, warning = FALSE} 63 | library(tableone) 64 | library(tidyverse) 65 | 66 | smd_table_unweighted <- CreateTableOne( 67 | vars = c("confounder_1", "confounder_1", ...), 68 | strata = "exposure", 69 | data = df, 70 | test = FALSE) 71 | ``` 72 | 73 | --- 74 | 75 | ## SMD in R 76 | 77 | 3

        Calculate the weighted standardized mean differences

78 | 79 | ```{r} 80 | smd_table <- svyCreateTableOne( 81 | vars = c("confounder_1", "confounder_1", ...), 82 | strata = "exposure", 83 | data = svy_des, 84 | test = FALSE) 85 | ``` 86 | 87 | --- 88 | 89 | ## SMD in R 90 | 91 | 3

        Calculate the weighted standardized mean differences

92 | 93 | ```{r} 94 | smd_table <- svyCreateTableOne( #<< 95 | vars = c("confounder_1", "confounder_1", ...), 96 | strata = "exposure", 97 | data = svy_des, #<< 98 | test = FALSE) 99 | ``` 100 | 101 | --- 102 | 103 | ## SMD in R 104 | 105 | 4

        Stick these together in a data frame

106 | 107 | 108 | ```{r} 109 | plot_df <- data.frame( 110 | var = rownames(ExtractSmd(smd_table)), 111 | Unadjusted = as.numeric(ExtractSmd(smd_table_unweighted)), 112 | Weighted = as.numeric(ExtractSmd(smd_table))) %>% 113 | pivot_longer(-var, names_to = "Method", values_to = "SMD") 114 | ``` 115 | 116 | --- 117 | 118 | ## SMD in R 119 | 120 | 4

        Stick these together in a data frame

121 | 122 | 123 | ```{r} 124 | plot_df <- data.frame( 125 | var = rownames(ExtractSmd(smd_table)), #<< 126 | Unadjusted = as.numeric(ExtractSmd(smd_table_unweighted)), 127 | Weighted = as.numeric(ExtractSmd(smd_table))) %>% 128 | pivot_longer(-var, names_to = "Method", values_to = "SMD") 129 | 130 | rownames(EXtractSMD(smd_table)) 131 | #> [1] "confounder_1" "confounder_2" 132 | ``` 133 | 134 | --- 135 | 136 | ## SMD in R 137 | 138 | 4

        Stick these together in a data frame

139 | 140 | 141 | ```{r} 142 | plot_df <- data.frame( 143 | var = rownames(ExtractSmd(smd_table)), 144 | Unadjusted = as.numeric(ExtractSmd(smd_table_unweighted)), #<< 145 | Weighted = as.numeric(ExtractSmd(smd_table))) %>% 146 | pivot_longer(-var, names_to = "Method", values_to = "SMD") 147 | 148 | as.numeric(ExtractSmd(smd_table_unweighted)) 149 | #> [1] 0.160 0.177 150 | ``` 151 | 152 | --- 153 | 154 | ## SMD in R 155 | 156 | 4

        Stick these together in a data frame

157 | 158 | 159 | ```{r} 160 | plot_df <- data.frame( 161 | var = rownames(ExtractSmd(smd_table)), 162 | Unadjusted = as.numeric(ExtractSmd(smd_table_unweighted)), 163 | Weighted = as.numeric(ExtractSmd(smd_table))) %>% #<< 164 | pivot_longer(-var, names_to = "Method", values_to = "SMD") 165 | 166 | as.numeric(ExtractSmd(smd_table)) 167 | #> [1] 0.002 0.007 168 | ``` 169 | 170 | --- 171 | 172 | ## SMD in R 173 | 174 | 4

        Stick these together in a data frame

175 | 176 | 177 | ```{r} 178 | plot_df <- data.frame( 179 | var = rownames(ExtractSmd(smd_table)), 180 | Unadjusted = as.numeric(ExtractSmd(smd_table_unweighted)), 181 | Weighted = as.numeric(ExtractSmd(smd_table))) %>% 182 | pivot_longer(-var, names_to = "Method", values_to = "SMD") #<< 183 | 184 | ``` 185 | 186 | --- 187 | 188 | ## SMD in R 189 | 190 | 5

        Plot them! (in a Love plot!)

191 | 192 | 193 | ```{r} 194 | ggplot(data = plot_df, 195 | mapping = aes(x = var, y = SMD, group = Method, color = Method)) + 196 | geom_line() + 197 | geom_point() + 198 | geom_hline(yintercept = 0.1, color = "black", size = 0.1) + 199 | coord_flip() 200 | ``` 201 | 202 | --- 203 | 204 | ## SMD in R 205 | 206 | 5

        Plot them! (in a Love plot!)

207 | 208 | 209 | ```{r} 210 | ggplot(data = plot_df, #<< 211 | mapping = aes(x = var, y = SMD, group = Method, color = Method)) + #<< 212 | geom_line() + 213 | geom_point() + 214 | geom_hline(yintercept = 0.1, color = "black", size = 0.1) + 215 | coord_flip() 216 | ``` 217 | 218 | --- 219 | 220 | 221 | ## SMD in R 222 | 223 | 5

        Plot them! (in a Love plot!)

224 | 225 | 226 | ```{r} 227 | ggplot(data = plot_df, 228 | mapping = aes(x = var, y = SMD, group = Method, color = Method)) + 229 | geom_line() + #<< 230 | geom_point() + 231 | geom_hline(yintercept = 0.1, color = "black", size = 0.1) + 232 | coord_flip() 233 | ``` 234 | 235 | --- 236 | 237 | 238 | ## SMD in R 239 | 240 | 5

        Plot them! (in a Love plot!)

241 | 242 | 243 | ```{r} 244 | ggplot(data = plot_df, 245 | mapping = aes(x = var, y = SMD, group = Method, color = Method)) + 246 | geom_line() + 247 | geom_point() + #<< 248 | geom_hline(yintercept = 0.1, color = "black", size = 0.1) + 249 | coord_flip() 250 | ``` 251 | 252 | 253 | --- 254 | 255 | 256 | ## SMD in R 257 | 258 | 5

        Plot them! (in a Love plot!)

259 | 260 | 261 | ```{r} 262 | ggplot(data = plot_df, 263 | mapping = aes(x = var, y = SMD, group = Method, color = Method)) + 264 | geom_line() + 265 | geom_point() + 266 | geom_hline(yintercept = 0.1, color = "black", size = 0.1) + #<< 267 | coord_flip() 268 | ``` 269 | 270 | 271 | --- 272 | 273 | 274 | ## SMD in R 275 | 276 | 277 | 5

        Plot them! (in a Love plot!)

278 | 279 | ```{r} 280 | ggplot(data = plot_df, mapping = aes(x = var, y = SMD, group = Method, color = Method)) + 281 | geom_line() + 282 | geom_point() + 283 | geom_hline(yintercept = 0.1, color = "black", size = 0.1) + 284 | coord_flip() #<< 285 | ``` 286 | 287 | --- 288 | 289 | ## Love plot 290 | 291 | ```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE} 292 | library(survey) 293 | library(tableone) 294 | library(tidyverse) 295 | library(broom) 296 | # remotes::install_github("malcolmbarrett/cidata") 297 | library(cidata) 298 | 299 | propensity_model <- glm( 300 | qsmk ~ sex + 301 | race + age + I(age^2) + education + 302 | smokeintensity + I(smokeintensity^2) + 303 | smokeyrs + I(smokeyrs^2) + exercise + active + 304 | wt71 + I(wt71^2), 305 | family = binomial(), 306 | data = nhefs_complete 307 | ) 308 | 309 | df <- propensity_model %>% 310 | augment(type.predict = "response", data = nhefs_complete) %>% 311 | mutate(w_ate = 1 / ifelse(qsmk == 0, 1 - .fitted, .fitted)) 312 | 313 | svy_des <- svydesign( 314 | ids = ~ 1, 315 | data = df, 316 | weights = ~ w_ate) 317 | 318 | smd_table_unweighted <- CreateTableOne( 319 | vars = c("sex", "race", "age", "education", "smokeintensity", "smokeyrs", 320 | "exercise", "active", "wt71"), 321 | strata = "qsmk", 322 | data = df, 323 | test = FALSE) 324 | 325 | smd_table <- svyCreateTableOne( 326 | vars = c("sex", "race", "age", "education", "smokeintensity", "smokeyrs", 327 | "exercise", "active", "wt71"), 328 | strata = "qsmk", 329 | data = svy_des, 330 | test = FALSE) 331 | 332 | 333 | plot_df <- data.frame( 334 | var = rownames(ExtractSmd(smd_table)), 335 | Unadjusted = as.numeric(ExtractSmd(smd_table_unweighted)), 336 | Weighted = as.numeric(ExtractSmd(smd_table))) %>% 337 | pivot_longer(-var, names_to = "Method", values_to = "SMD") 338 | 339 | ggplot( 340 | data = plot_df, 341 | mapping = aes(x = var, y = SMD, group = Method, color = Method) 342 | ) + 343 | geom_line() + 344 | geom_point() + 345 | geom_hline(yintercept = 0.1, color = "black", size = 0.1) + 346 | coord_flip() 347 | ``` 348 | 349 | --- 350 | 351 | ## Your turn 1 352 | 353 | `r countdown::countdown(minutes = 7)` 354 | 355 | 1. Create a Love Plot for the propensity score weighting you created in the previous exercise 356 | 357 | --- 358 | 359 | ## ECDF 360 | 361 | For continuous variables, it can be helpful to look at the _whole_ distribution pre and post-weighting rather than a single summary measure 362 | 363 | ```{r, echo = FALSE, message = FALSE, warning = FALSE, eval = TRUE} 364 | ggplot(df, aes(x = wt71, group = qsmk, color = factor(qsmk))) + 365 | stat_ecdf() + 366 | scale_color_manual("Quit smoking", values = c("#5154B8", "#5DB854"), 367 | labels = c("Yes", "No")) + 368 | xlab("Weight in Kg in 1971") + 369 | ylab("Proportion <= x") 370 | ``` 371 | 372 | 373 | --- 374 | 375 | ## Unweighted ECDF 376 | 377 | ```{r} 378 | ggplot(df, aes(x = wt71, group = qsmk, color = factor(qsmk))) + 379 | stat_ecdf() + 380 | scale_color_manual("Quit smoking", values = c("#5154B8", "#5DB854"), 381 | labels = c("Yes", "No")) + 382 | xlab("Weight in Kg in 1971") + 383 | ylab("Proportion <= x") 384 | ``` 385 | 386 | --- 387 | 388 | ## Unweighted ECDF 389 | 390 | ```{r} 391 | ggplot(df, aes(x = wt71, group = qsmk, color = factor(qsmk))) + #<< 392 | stat_ecdf() + 393 | scale_color_manual("Quit smoking", values = c("#5154B8", "#5DB854"), 394 | labels = c("Yes", "No")) + 395 | xlab("Weight in Kg in 1971") + 396 | ylab("Proportion <= x") 397 | ``` 398 | 399 | --- 400 | 401 | ## Unweighted ECDF 402 | 403 | ```{r} 404 | ggplot(df, aes(x = wt71, group = qsmk, color = factor(qsmk))) + 405 | stat_ecdf() + #<< 406 | scale_color_manual("Quit smoking", values = c("#5154B8", "#5DB854"), 407 | labels = c("Yes", "No")) + 408 | xlab("Weight in Kg in 1971") + 409 | ylab("Proportion <= x") 410 | ``` 411 | 412 | --- 413 | 414 | ## Unweighted ECDF 415 | 416 | ```{r, echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE} 417 | ggplot(df, aes(x = wt71, group = qsmk, color = factor(qsmk))) + 418 | stat_ecdf() + 419 | scale_color_manual("Quit smoking", values = c("#5154B8", "#5DB854"), 420 | labels = c("Yes", "No")) + 421 | xlab("Weight in Kg in 1971") + 422 | ylab("Proportion <= x") 423 | ``` 424 | 425 | 426 | --- 427 | 428 | ## Weighted ECDF 429 | 430 | ```{r} 431 | ecdf_1 <- df %>% 432 | filter(qsmk == 1) %>% 433 | arrange(wt71) %>% 434 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 435 | 436 | ecdf_0 <- df %>% 437 | filter(qsmk == 0) %>% 438 | arrange(wt71) %>% 439 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 440 | 441 | ggplot(ecdf_1, aes(x = wt71, y = cum_pct)) + 442 | geom_line( color = "#5DB854") + 443 | geom_line(data = ecdf_0, aes(x = wt71, y = cum_pct), color = "#5154B8") + 444 | xlab("Weight in Kg in 1971") + 445 | ylab("Proportion <= x") 446 | ``` 447 | 448 | --- 449 | 450 | 451 | ## Weighted ECDF 452 | 453 | ```{r} 454 | ecdf_1 <- df %>% 455 | filter(qsmk == 1) %>% #<< 456 | arrange(wt71) %>% 457 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 458 | 459 | ecdf_0 <- df %>% 460 | filter(qsmk == 0) %>% 461 | arrange(wt71) %>% 462 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 463 | 464 | ggplot(ecdf_1, aes(x = wt71, y = cum_pct)) + 465 | geom_line( color = "#5DB854") + 466 | geom_line(data = ecdf_0, aes(x = wt71, y = cum_pct), color = "#5154B8") + 467 | xlab("Weight in Kg in 1971") + 468 | ylab("Proportion <= x") 469 | ``` 470 | 471 | --- 472 | 473 | 474 | ## Weighted ECDF 475 | 476 | ```{r} 477 | ecdf_1 <- df %>% 478 | filter(qsmk == 1) %>% 479 | arrange(wt71) %>% #<< 480 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 481 | 482 | ecdf_0 <- df %>% 483 | filter(qsmk == 0) %>% 484 | arrange(wt71) %>% 485 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 486 | 487 | ggplot(ecdf_1, aes(x = wt71, y = cum_pct)) + 488 | geom_line( color = "#5DB854") + 489 | geom_line(data = ecdf_0, aes(x = wt71, y = cum_pct), color = "#5154B8") + 490 | xlab("Weight in Kg in 1971") + 491 | ylab("Proportion <= x") 492 | ``` 493 | 494 | --- 495 | 496 | 497 | ## Weighted ECDF 498 | 499 | ```{r} 500 | ecdf_1 <- df %>% 501 | filter(qsmk == 1) %>% 502 | arrange(wt71) %>% 503 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) #<< 504 | 505 | ecdf_0 <- df %>% 506 | filter(qsmk == 0) %>% 507 | arrange(wt71) %>% 508 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 509 | 510 | ggplot(ecdf_1, aes(x = wt71, y = cum_pct)) + 511 | geom_line( color = "#5DB854") + 512 | geom_line(data = ecdf_0, aes(x = wt71, y = cum_pct), color = "#5154B8") + 513 | xlab("Weight in Kg in 1971") + 514 | ylab("Proportion <= x") 515 | ``` 516 | 517 | --- 518 | 519 | ## Weighted ECDF 520 | 521 | ```{r} 522 | ecdf_1 <- df %>% 523 | filter(qsmk == 1) %>% 524 | arrange(wt71) %>% 525 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 526 | 527 | ecdf_0 <- df %>% #<< 528 | filter(qsmk == 0) %>% #<< 529 | arrange(wt71) %>% #<< 530 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) #<< 531 | 532 | ggplot(ecdf_1, aes(x = wt71, y = cum_pct)) + 533 | geom_line( color = "#5DB854") + 534 | geom_line(data = ecdf_0, aes(x = wt71, y = cum_pct), color = "#5154B8") + 535 | xlab("Weight in Kg in 1971") + 536 | ylab("Proportion <= x") 537 | ``` 538 | 539 | --- 540 | 541 | ## Weighted ECDF 542 | 543 | ```{r} 544 | ecdf_1 <- df %>% 545 | filter(qsmk == 1) %>% 546 | arrange(wt71) %>% 547 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 548 | 549 | ecdf_0 <- df %>% 550 | filter(qsmk == 0) %>% 551 | arrange(wt71) %>% 552 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 553 | 554 | ggplot(ecdf_1, aes(x = wt71, y = cum_pct)) + #<< 555 | geom_line( color = "#5DB854") + 556 | geom_line(data = ecdf_0, aes(x = wt71, y = cum_pct), color = "#5154B8") + 557 | xlab("Weight in Kg in 1971") + 558 | ylab("Proportion <= x") 559 | ``` 560 | 561 | --- 562 | 563 | 564 | ## Weighted ECDF 565 | 566 | ```{r} 567 | ecdf_1 <- df %>% 568 | filter(qsmk == 1) %>% 569 | arrange(wt71) %>% 570 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 571 | 572 | ecdf_0 <- df %>% 573 | filter(qsmk == 0) %>% 574 | arrange(wt71) %>% 575 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 576 | 577 | ggplot(ecdf_1, aes(x = wt71, y = cum_pct)) + 578 | geom_line( color = "#5DB854") + #<< 579 | geom_line(data = ecdf_0, aes(x = wt71, y = cum_pct), color = "#5154B8") + 580 | xlab("Weight in Kg in 1971") + 581 | ylab("Proportion <= x") 582 | ``` 583 | 584 | --- 585 | 586 | 587 | ## Weighted ECDF 588 | 589 | ```{r} 590 | ecdf_1 <- df %>% 591 | filter(qsmk == 1) %>% 592 | arrange(wt71) %>% 593 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 594 | 595 | ecdf_0 <- df %>% 596 | filter(qsmk == 0) %>% 597 | arrange(wt71) %>% 598 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 599 | 600 | ggplot(ecdf_1, aes(x = wt71, y = cum_pct)) + 601 | geom_line( color = "#5DB854") + 602 | geom_line(data = ecdf_0, aes(x = wt71, y = cum_pct), color = "#5154B8") + #<< 603 | xlab("Weight in Kg in 1971") + 604 | ylab("Proportion <= x") 605 | ``` 606 | --- 607 | 608 | ## Weighted ECDF 609 | 610 | ```{r, echo = FALSE, eval = TRUE} 611 | ecdf_1 <- df %>% 612 | filter(qsmk == 1) %>% 613 | arrange(wt71) %>% 614 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 615 | 616 | ecdf_0 <- df %>% 617 | filter(qsmk == 0) %>% 618 | arrange(wt71) %>% 619 | mutate(cum_pct = cumsum(w_ate) / sum(w_ate)) 620 | 621 | ggplot(ecdf_1, aes(x = wt71, y = cum_pct)) + 622 | geom_line( color = "#5DB854") + 623 | geom_line(data = ecdf_0, aes(x = wt71, y = cum_pct), color = "#5154B8") + 624 | xlab("Weight in Kg in 1971") + 625 | ylab("Proportion <= x") 626 | ``` 627 | 628 | --- 629 | 630 | ## Your turn 2 631 | 632 | `r countdown::countdown(minutes = 7)` 633 | 634 | 1. Create an unweighted ECDF examining the `smokeyrs` confounder for those that quit smoking and those that did not 635 | 3. Create a weighted ECDF examining the `smokeyrs` confounder 636 | -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics.pdf -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-14-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-14-2.png -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-14-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-14-3.png -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-14-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-14-4.png -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-15-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-15-2.png -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-18-1.png -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-18-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-18-2.png -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-19-1.png -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-19-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-19-2.png -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-20-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-20-1.png -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-21-1.png -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-23-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-23-1.png -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-32-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-32-1.png -------------------------------------------------------------------------------- /slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/05-pscore-diagnostics_files/figure-html/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /slides/06-outcome-model.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Fitting the outcome model" 3 | author: "Lucy D'Agostino McGowan" 4 | institute: "Wake Forest University" 5 | date: "2020-07-29 (updated: `r Sys.Date()`)" 6 | output: 7 | xaringan::moon_reader: 8 | css: ["default", "theme.css"] 9 | lib_dir: libs 10 | nature: 11 | highlightStyle: github 12 | highlightLines: true 13 | highlightSpans: true 14 | countIncrementalSlides: false 15 | --- 16 | 17 | ```{r, include = FALSE} 18 | knitr::opts_chunk$set(eval = FALSE) 19 | ``` 20 | 21 | ## Outcome Model 22 | 23 | ```{r} 24 | library(broom) 25 | 26 | lm(outcome ~ exposure, data = df, weights = wts) %>% 27 | tidy() 28 | ``` 29 | 30 | -- 31 | `r emo::ji("check")` This will get us the point estimate 32 | -- 33 | 34 | `r emo::ji("x")` This will get NOT us the correct confidence intervals 35 | -- 36 | 37 | `r emo::ji("package")` {rsample} 38 | --- 39 | 40 | 1

        Create a function to run your analysis once on a sample of your data

41 | 42 | .small[ 43 | ```{r} 44 | fit_ipw <- function(split, ...) { 45 | .df <- analysis(split) 46 | 47 | # fit propensity score model 48 | propensity_model <- glm( 49 | exposure ~ confounder_1 + confounder_2 + ... 50 | family = binomial(), 51 | data = .df 52 | ) 53 | 54 | # calculate inverse probability weights 55 | .df <- propensity_model %>% 56 | augment(type.predict = "response", data = .df) %>% 57 | mutate(wts = 1 / ifelse(exposure == 0, 1 - .fitted, .fitted)) 58 | 59 | # fit correctly bootsrapped ipw model 60 | lm(outcome ~ exposure, data = .df, weights = wts) %>% 61 | tidy() 62 | } 63 | ``` 64 | ] 65 | 66 | --- 67 | 68 | 1

        Create a function to run your analysis once on a sample of your data

69 | 70 | .small[ 71 | ```{r} 72 | fit_ipw <- function(split, ...) { #<< 73 | .df <- analysis(split) #<< 74 | 75 | # fit propensity score model 76 | propensity_model <- glm( 77 | exposure ~ confounder_1 + confounder_2 + ... 78 | family = binomial(), 79 | data = .df 80 | ) 81 | 82 | # calculate inverse probability weights 83 | .df <- propensity_model %>% 84 | augment(type.predict = "response", data = .df) %>% 85 | mutate(wts = 1 / ifelse(exposure == 0, 1 - .fitted, .fitted)) 86 | 87 | # fit correctly bootsrapped ipw model 88 | lm(outcome ~ exposure, data = .df, weights = wts) %>% 89 | tidy() 90 | } 91 | ``` 92 | ] 93 | 94 | --- 95 | 96 | 1

        Create a function to run your analysis once on a sample of your data

97 | 98 | .small[ 99 | ```{r} 100 | fit_ipw <- function(split, ...) { 101 | .df <- analysis(split) 102 | 103 | # fit propensity score model #<< 104 | propensity_model <- glm( #<< 105 | exposure ~ confounder_1 + confounder_2 + ... #<< 106 | family = binomial(), #<< 107 | data = .df #<< 108 | ) #<< 109 | 110 | # calculate inverse probability weights 111 | .df <- propensity_model %>% 112 | augment(type.predict = "response", data = .df) %>% 113 | mutate(wts = 1 / ifelse(exposure == 0, 1 - .fitted, .fitted)) 114 | 115 | # fit correctly bootsrapped ipw model 116 | lm(outcome ~ exposure, data = .df, weights = wts) %>% 117 | tidy() 118 | } 119 | ``` 120 | ] 121 | 122 | --- 123 | 124 | 1

        Create a function to run your analysis once on a sample of your data

125 | 126 | .small[ 127 | ```{r} 128 | fit_ipw <- function(split, ...) { 129 | .df <- analysis(split) 130 | 131 | # fit propensity score model 132 | propensity_model <- glm( 133 | exposure ~ confounder_1 + confounder_2 + ... 134 | family = binomial(), 135 | data = .df 136 | ) 137 | 138 | # calculate inverse probability weights #<< 139 | .df <- propensity_model %>% #<< 140 | augment(type.predict = "response", data = .df) %>% #<< 141 | mutate(wts = 1 / ifelse(exposure == 0, 1 - .fitted, .fitted)) #<< 142 | 143 | # fit correctly bootsrapped ipw model 144 | lm(outcome ~ exposure, data = .df, weights = wts) %>% 145 | tidy() 146 | } 147 | ``` 148 | ] 149 | 150 | --- 151 | 152 | 1

        Create a function to run your analysis once on a sample of your data

153 | 154 | .small[ 155 | ```{r} 156 | fit_ipw <- function(split, ...) { 157 | .df <- analysis(split) 158 | 159 | # fit propensity score model 160 | propensity_model <- glm( 161 | exposure ~ confounder_1 + confounder_2 + ... 162 | family = binomial(), 163 | data = .df 164 | ) 165 | 166 | # calculate inverse probability weights 167 | .df <- propensity_model %>% 168 | augment(type.predict = "response", data = .df) %>% 169 | mutate(wts = 1 / ifelse(exposure == 0, 1 - .fitted, .fitted)) 170 | 171 | # fit correctly bootsrapped ipw model #<< 172 | lm(outcome ~ exposure, data = .df, weights = wts) %>% #<< 173 | tidy() #<< 174 | } 175 | ``` 176 | ] 177 | 178 | --- 179 | 180 | 2

        Use {rsample} to bootstrap our causal effect

181 | 182 | 183 | ```{r} 184 | library(rsample) 185 | 186 | # fit ipw model to bootstrapped samples 187 | ipw_results <- bootstraps(df, 1000, apparent = TRUE) %>% 188 | mutate(results = map(splits, fit_ipw)) 189 | ``` 190 | 191 | --- 192 | 193 | 194 | 2

        Use {rsample} to bootstrap our causal effect

195 | 196 | 197 | ```{r} 198 | library(rsample) 199 | 200 | # fit ipw model to bootstrapped samples 201 | ipw_results <- bootstraps(df, 1000, apparent = TRUE) %>% #<< 202 | mutate(results = map(splits, fit_ipw)) 203 | ``` 204 | 205 | --- 206 | 207 | 2

        Use {rsample} to bootstrap our causal effect

208 | 209 | 210 | ```{r} 211 | library(rsample) 212 | 213 | # fit ipw model to bootstrapped samples 214 | ipw_results <- bootstraps(df, 1000, apparent = TRUE) %>% 215 | mutate(results = map(splits, fit_ipw)) #<< 216 | ``` 217 | 218 | --- 219 | 220 | 3

        Pull out the causal effect

221 | 222 | 223 | ```{r, eval = FALSE} 224 | # get t-statistic-based CIs 225 | boot_estimate <- int_t(ipw_results, results) %>% #<< 226 | filter(term == "exposure") 227 | ``` 228 | 229 | --- 230 | 231 | ## Your Turn 232 | 233 | `r countdown::countdown(minutes = 7)` 234 | 235 | 1. Create a function called `ipw_fit` that fits the propensity score model and the weighted outcome model for the effect between `qsmk` and `wt82_71` 236 | 237 | 2. Using the `bootstraps()` and `int_t()` functions to estimate the final effect. 238 | 239 | -------------------------------------------------------------------------------- /slides/06-outcome-model.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Fitting the outcome model 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 260 | 261 | 262 | 324 | 325 | 335 | 336 | 355 | 356 | 366 | 367 | 368 | -------------------------------------------------------------------------------- /slides/06-outcome-model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/06-outcome-model.pdf -------------------------------------------------------------------------------- /slides/img/conf-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/img/conf-2.png -------------------------------------------------------------------------------- /slides/img/conf-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/img/conf-3.png -------------------------------------------------------------------------------- /slides/img/ggdagitty.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/img/ggdagitty.png -------------------------------------------------------------------------------- /slides/img/ggdagitty_alg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/img/ggdagitty_alg.png -------------------------------------------------------------------------------- /slides/img/ggdagitty_plots.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/img/ggdagitty_plots.png -------------------------------------------------------------------------------- /slides/img/ldm.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/img/ldm.jpg -------------------------------------------------------------------------------- /slides/img/mb.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/img/mb.jpg -------------------------------------------------------------------------------- /slides/img/obs-studies-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/img/obs-studies-2.png -------------------------------------------------------------------------------- /slides/img/obs-studies-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/img/obs-studies-3.png -------------------------------------------------------------------------------- /slides/img/obs-studies.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/img/obs-studies.png -------------------------------------------------------------------------------- /slides/img/pscores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/img/pscores.png -------------------------------------------------------------------------------- /slides/img/randomized-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/img/randomized-2.png -------------------------------------------------------------------------------- /slides/img/randomized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/img/randomized.png -------------------------------------------------------------------------------- /slides/img/tidy_ggdagitty.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/img/tidy_ggdagitty.png -------------------------------------------------------------------------------- /slides/img/trt-conf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/img/trt-conf.png -------------------------------------------------------------------------------- /slides/img/trt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/img/trt.png -------------------------------------------------------------------------------- /slides/index.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Causal Inference in R Slides" 3 | date: "`r Sys.Date()`" 4 | output: 5 | xaringan::moon_reader: 6 | seal: false 7 | css: ["default", "theme.css"] 8 | lib_dir: libs 9 | nature: 10 | highlightStyle: github 11 | highlightLines: true 12 | countIncrementalSlides: false 13 | --- 14 | 15 | ```{r setup, include=FALSE} 16 | options(htmltools.dir.version = FALSE) 17 | knitr::opts_chunk$set(warning = FALSE, message = FALSE, fig.align = "center", dpi = 320) 18 | library(tidyverse) 19 | ``` 20 | 21 | class: inverse, center, middle 22 | 23 | ###[00-intro](00-intro.html) 24 | ###[01-causal_modeling_whole_game](01-causal_modeling_whole_game.html) 25 | ###[02-dags](02-dags.html) 26 | ###[03-pscores](03-pscores.html) 27 | ###[04-using-pscores](04-pscore-weighting.html) 28 | ###[05-pscore-diagnostics](05-pscore-diagnostics.html) 29 | ###[06-outcome-model](06-outcome-model.html) 30 | 31 | 32 | -------------------------------------------------------------------------------- /slides/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Causal Inference in R Slides 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 29 | 30 | 31 | 118 | 119 | 138 | 139 | 149 | 150 | 151 | -------------------------------------------------------------------------------- /slides/libs/countdown-0.3.5/countdown.css: -------------------------------------------------------------------------------- 1 | .countdown { 2 | background: inherit; 3 | position: absolute; 4 | cursor: pointer; 5 | font-size: 3em; 6 | line-height: 1; 7 | border-color: #ddd; 8 | border-width: 3px; 9 | border-style: solid; 10 | border-radius: 15px; 11 | box-shadow: 0px 4px 10px 0px rgba(50, 50, 50, 0.4); 12 | -webkit-box-shadow: 0px 4px 10px 0px rgba(50, 50, 50, 0.4); 13 | margin: 0.6em; 14 | padding: 10px 15px; 15 | text-align: center; 16 | } 17 | .countdown { 18 | display: flex; 19 | align-items: center; 20 | justify-content: center; 21 | } 22 | .countdown .countdown-time { 23 | background: none; 24 | font-size: 100%; 25 | padding: 0; 26 | } 27 | .countdown-digits { 28 | color: inherit; 29 | } 30 | .countdown.running { 31 | border-color: #3C9A5F; 32 | background-color: #43AC6A; 33 | } 34 | .countdown.running .countdown-digits { 35 | color: #102B1A; 36 | } 37 | .countdown.finished { 38 | border-color: #D83A20; 39 | background-color: #F04124; 40 | } 41 | .countdown.finished .countdown-digits { 42 | color: #3C1009; 43 | } 44 | .countdown.running.warning { 45 | border-color: #CFAE24; 46 | background-color: #E6C229; 47 | } 48 | .countdown.running.warning .countdown-digits { 49 | color: #39300A; 50 | } 51 | 52 | @-webkit-keyframes blink { 53 | from {opacity: 1} 54 | 50% {opacity: 0.1} 55 | to {opacity: 1} 56 | } 57 | 58 | @keyframes blink { 59 | from {opacity: 1} 60 | 50% {opacity: 0.1} 61 | to {opacity: 1} 62 | } 63 | 64 | .countdown.running.blink-colon .countdown-digits.colon { 65 | -webkit-animation: blink 2s steps(1, end) 0s infinite; 66 | animation: blink 2s steps(1, end) 0s infinite; 67 | } 68 | -------------------------------------------------------------------------------- /slides/libs/countdown-0.3.5/countdown.js: -------------------------------------------------------------------------------- 1 | var counters = {timer: {}}; 2 | var update_timer = function(timer, force = false) { 3 | var secs = timer.value; 4 | 5 | // check if we should update timer or not 6 | noup = timer.div.className.match(/noupdate-\d+/); 7 | if (!force && noup != null) { 8 | noup = parseInt(noup[0].match(/\d+$/)); 9 | if (secs > noup * 2 && secs % noup > 0) { return; } 10 | } 11 | 12 | // should we apply or remove warning class? 13 | warnwhen = timer.div.dataset.warnwhen; 14 | if (warnwhen && warnwhen > 0) { 15 | if (secs <= warnwhen && !timer.div.classList.contains("warning")) { 16 | timer.div.classList.add("warning"); 17 | } else if (secs > warnwhen && timer.div.classList.contains("warning")) { 18 | timer.div.classList.remove("warning"); 19 | } 20 | } 21 | 22 | var mins = Math.floor(secs / 60); // 1 min = 60 secs 23 | secs -= mins * 60; 24 | 25 | // Update HTML 26 | timer.min.innerHTML = String(mins).padStart(2, 0); 27 | timer.sec.innerHTML = String(secs).padStart(2, 0); 28 | } 29 | var countdown = function (e) { 30 | target = e.target; 31 | if (target.classList.contains("countdown-digits")) { 32 | target = target.parentElement; 33 | } 34 | if (target.tagName == "CODE") { 35 | target = target.parentElement; 36 | } 37 | 38 | // Init counter 39 | if (!counters.timer.hasOwnProperty(target.id)) { 40 | counters.timer[target.id] = {}; 41 | // Set the containers 42 | counters.timer[target.id].min = target.getElementsByClassName("minutes")[0]; 43 | counters.timer[target.id].sec = target.getElementsByClassName("seconds")[0]; 44 | counters.timer[target.id].div = target; 45 | } 46 | 47 | if (!counters.timer[target.id].running) { 48 | if (!counters.timer[target.id].end) { 49 | counters.timer[target.id].end = parseInt(counters.timer[target.id].min.innerHTML) * 60; 50 | counters.timer[target.id].end += parseInt(counters.timer[target.id].sec.innerHTML); 51 | } 52 | 53 | counters.timer[target.id].value = counters.timer[target.id].end; 54 | update_timer(counters.timer[target.id]); 55 | if (counters.ticker) counters.timer[target.id].value += 1; 56 | 57 | // Start if not past end date 58 | if (counters.timer[target.id].value > 0) { 59 | base_class = target.className.replace(/\s?(running|finished)/, "") 60 | target.className = base_class + " running"; 61 | counters.timer[target.id].running = true; 62 | 63 | if (!counters.ticker) { 64 | counters.ticker = setInterval(counter_update_all, 1000); 65 | } 66 | } 67 | } else { 68 | // Bump timer value if running & clicked 69 | counters.timer[target.id].value += counter_bump_increment(counters.timer[target.id].end); 70 | update_timer(counters.timer[target.id], force = true); 71 | counters.timer[target.id].value += 1; 72 | } 73 | }; 74 | 75 | var counter_bump_increment = function(val) { 76 | if (val <= 30) { 77 | return 5; 78 | } else if (val <= 300) { 79 | return 15; 80 | } else if (val <= 3000) { 81 | return 30; 82 | } else { 83 | return 60; 84 | } 85 | } 86 | 87 | var counter_update_all = function() { 88 | // Iterate over all running timers 89 | for (var i in counters.timer) { 90 | // Stop if passed end time 91 | console.log(counters.timer[i].id) 92 | counters.timer[i].value--; 93 | if (counters.timer[i].value <= 0) { 94 | counters.timer[i].min.innerHTML = "00"; 95 | counters.timer[i].sec.innerHTML = "00"; 96 | counters.timer[i].div.className = counters.timer[i].div.className.replace("running", "finished"); 97 | counters.timer[i].running = false; 98 | } else { 99 | // Update 100 | update_timer(counters.timer[i]); 101 | 102 | // Play countdown sound if data-audio=true on container div 103 | let audio = counters.timer[i].div.dataset.audio 104 | if (audio && counters.timer[i].value == 5) { 105 | counter_play_sound(audio); 106 | } 107 | } 108 | } 109 | 110 | // If no more running timers, then clear ticker 111 | var timerIsRunning = false; 112 | for (var t in counters.timer) { 113 | timerIsRunning = timerIsRunning || counters.timer[t].running 114 | } 115 | if (!timerIsRunning) { 116 | clearInterval(counters.ticker); 117 | counters.ticker = null; 118 | } 119 | } 120 | 121 | var counter_play_sound = function(url) { 122 | if (typeof url === 'boolean') { 123 | url = 'libs/countdown/smb_stage_clear.mp3'; 124 | } 125 | sound = new Audio(url); 126 | sound.play(); 127 | } 128 | 129 | var counter_addEventListener = function() { 130 | if (!document.getElementsByClassName("countdown").length) { 131 | setTimeout(counter_addEventListener, 2); 132 | return; 133 | } 134 | var counter_divs = document.getElementsByClassName("countdown"); 135 | console.log(counter_divs); 136 | for (var i = 0; i < counter_divs.length; i++) { 137 | counter_divs[i].addEventListener("click", countdown, false); 138 | } 139 | }; 140 | 141 | counter_addEventListener(); 142 | -------------------------------------------------------------------------------- /slides/libs/countdown-0.3.5/smb_stage_clear.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/libs/countdown-0.3.5/smb_stage_clear.mp3 -------------------------------------------------------------------------------- /slides/libs/countdown/countdown.css: -------------------------------------------------------------------------------- 1 | .countdown { 2 | background: inherit; 3 | position: absolute; 4 | cursor: pointer; 5 | font-size: 3em; 6 | line-height: 1; 7 | border-color: #ddd; 8 | border-width: 3px; 9 | border-style: solid; 10 | border-radius: 15px; 11 | box-shadow: 0px 4px 10px 0px rgba(50, 50, 50, 0.4); 12 | -webkit-box-shadow: 0px 4px 10px 0px rgba(50, 50, 50, 0.4); 13 | margin: 0.6em; 14 | padding: 10px 15px; 15 | text-align: center; 16 | } 17 | .countdown { 18 | display: flex; 19 | align-items: center; 20 | justify-content: center; 21 | } 22 | .countdown .countdown-time { 23 | background: none; 24 | font-size: 100%; 25 | padding: 0; 26 | } 27 | .countdown-digits { 28 | color: inherit; 29 | } 30 | .countdown.running { 31 | border-color: #3C9A5F; 32 | background-color: #43AC6A; 33 | } 34 | .countdown.running .countdown-digits { 35 | color: #102B1A; 36 | } 37 | .countdown.finished { 38 | border-color: #D83A20; 39 | background-color: #F04124; 40 | } 41 | .countdown.finished .countdown-digits { 42 | color: #3C1009; 43 | } 44 | .countdown.running.warning { 45 | border-color: #CFAE24; 46 | background-color: #E6C229; 47 | } 48 | .countdown.running.warning .countdown-digits { 49 | color: #39300A; 50 | } 51 | 52 | @-webkit-keyframes blink { 53 | from {opacity: 1} 54 | 50% {opacity: 0.1} 55 | to {opacity: 1} 56 | } 57 | 58 | @keyframes blink { 59 | from {opacity: 1} 60 | 50% {opacity: 0.1} 61 | to {opacity: 1} 62 | } 63 | 64 | .countdown.running.blink-colon .countdown-digits.colon { 65 | -webkit-animation: blink 2s steps(1, end) 0s infinite; 66 | animation: blink 2s steps(1, end) 0s infinite; 67 | } 68 | -------------------------------------------------------------------------------- /slides/libs/countdown/countdown.js: -------------------------------------------------------------------------------- 1 | var counters = {timer: {}}; 2 | var update_timer = function(timer, force = false) { 3 | var secs = timer.value; 4 | 5 | // check if we should update timer or not 6 | noup = timer.div.className.match(/noupdate-\d+/); 7 | if (!force && noup != null) { 8 | noup = parseInt(noup[0].match(/\d+$/)); 9 | if (secs > noup * 2 && secs % noup > 0) { return; } 10 | } 11 | 12 | // should we apply or remove warning class? 13 | warnwhen = timer.div.dataset.warnwhen; 14 | if (warnwhen && warnwhen > 0) { 15 | if (secs <= warnwhen && !timer.div.classList.contains("warning")) { 16 | timer.div.classList.add("warning"); 17 | } else if (secs > warnwhen && timer.div.classList.contains("warning")) { 18 | timer.div.classList.remove("warning"); 19 | } 20 | } 21 | 22 | var mins = Math.floor(secs / 60); // 1 min = 60 secs 23 | secs -= mins * 60; 24 | 25 | // Update HTML 26 | timer.min.innerHTML = String(mins).padStart(2, 0); 27 | timer.sec.innerHTML = String(secs).padStart(2, 0); 28 | } 29 | var countdown = function (e) { 30 | target = e.target; 31 | if (target.classList.contains("countdown-digits")) { 32 | target = target.parentElement; 33 | } 34 | if (target.tagName == "CODE") { 35 | target = target.parentElement; 36 | } 37 | 38 | // Init counter 39 | if (!counters.timer.hasOwnProperty(target.id)) { 40 | counters.timer[target.id] = {}; 41 | // Set the containers 42 | counters.timer[target.id].min = target.getElementsByClassName("minutes")[0]; 43 | counters.timer[target.id].sec = target.getElementsByClassName("seconds")[0]; 44 | counters.timer[target.id].div = target; 45 | } 46 | 47 | if (!counters.timer[target.id].running) { 48 | if (!counters.timer[target.id].end) { 49 | counters.timer[target.id].end = parseInt(counters.timer[target.id].min.innerHTML) * 60; 50 | counters.timer[target.id].end += parseInt(counters.timer[target.id].sec.innerHTML); 51 | } 52 | 53 | counters.timer[target.id].value = counters.timer[target.id].end; 54 | update_timer(counters.timer[target.id]); 55 | if (counters.ticker) counters.timer[target.id].value += 1; 56 | 57 | // Start if not past end date 58 | if (counters.timer[target.id].value > 0) { 59 | base_class = target.className.replace(/\s?(running|finished)/, "") 60 | target.className = base_class + " running"; 61 | counters.timer[target.id].running = true; 62 | 63 | if (!counters.ticker) { 64 | counters.ticker = setInterval(counter_update_all, 1000); 65 | } 66 | } 67 | } else { 68 | // Bump timer value if running & clicked 69 | counters.timer[target.id].value += counter_bump_increment(counters.timer[target.id].end); 70 | update_timer(counters.timer[target.id], force = true); 71 | counters.timer[target.id].value += 1; 72 | } 73 | }; 74 | 75 | var counter_bump_increment = function(val) { 76 | if (val <= 30) { 77 | return 5; 78 | } else if (val <= 300) { 79 | return 15; 80 | } else if (val <= 3000) { 81 | return 30; 82 | } else { 83 | return 60; 84 | } 85 | } 86 | 87 | var counter_update_all = function() { 88 | // Iterate over all running timers 89 | for (var i in counters.timer) { 90 | // Stop if passed end time 91 | console.log(counters.timer[i].id) 92 | counters.timer[i].value--; 93 | if (counters.timer[i].value <= 0) { 94 | counters.timer[i].min.innerHTML = "00"; 95 | counters.timer[i].sec.innerHTML = "00"; 96 | counters.timer[i].div.className = counters.timer[i].div.className.replace("running", "finished"); 97 | counters.timer[i].running = false; 98 | } else { 99 | // Update 100 | update_timer(counters.timer[i]); 101 | 102 | // Play countdown sound if data-audio=true on container div 103 | let audio = counters.timer[i].div.dataset.audio 104 | if (audio && counters.timer[i].value == 5) { 105 | counter_play_sound(audio); 106 | } 107 | } 108 | } 109 | 110 | // If no more running timers, then clear ticker 111 | var timerIsRunning = false; 112 | for (var t in counters.timer) { 113 | timerIsRunning = timerIsRunning || counters.timer[t].running 114 | } 115 | if (!timerIsRunning) { 116 | clearInterval(counters.ticker); 117 | counters.ticker = null; 118 | } 119 | } 120 | 121 | var counter_play_sound = function(url) { 122 | if (typeof url === 'boolean') { 123 | url = 'libs/countdown/smb_stage_clear.mp3'; 124 | } 125 | sound = new Audio(url); 126 | sound.play(); 127 | } 128 | 129 | var counter_addEventListener = function() { 130 | if (!document.getElementsByClassName("countdown").length) { 131 | setTimeout(counter_addEventListener, 2); 132 | return; 133 | } 134 | var counter_divs = document.getElementsByClassName("countdown"); 135 | console.log(counter_divs); 136 | for (var i = 0; i < counter_divs.length; i++) { 137 | counter_divs[i].addEventListener("click", countdown, false); 138 | } 139 | }; 140 | 141 | counter_addEventListener(); 142 | -------------------------------------------------------------------------------- /slides/libs/countdown/smb_stage_clear.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LucyMcGowan/user2020-causal-inference/6f5d3a455267e9e3e212a0c5ff3afd3f1c7ad379/slides/libs/countdown/smb_stage_clear.mp3 -------------------------------------------------------------------------------- /slides/libs/header-attrs-2.3/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /slides/libs/header-attrs/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /slides/libs/remark-css-0.0.1/default.css: -------------------------------------------------------------------------------- 1 | a, a > code { 2 | color: rgb(249, 38, 114); 3 | text-decoration: none; 4 | } 5 | .footnote { 6 | position: absolute; 7 | bottom: 3em; 8 | padding-right: 4em; 9 | font-size: 90%; 10 | } 11 | .remark-code-line-highlighted { background-color: #ffff88; } 12 | 13 | .inverse { 14 | background-color: #272822; 15 | color: #d6d6d6; 16 | text-shadow: 0 0 20px #333; 17 | } 18 | .inverse h1, .inverse h2, .inverse h3 { 19 | color: #f3f3f3; 20 | } 21 | /* Two-column layout */ 22 | .left-column { 23 | color: #777; 24 | width: 20%; 25 | height: 92%; 26 | float: left; 27 | } 28 | .left-column h2:last-of-type, .left-column h3:last-child { 29 | color: #000; 30 | } 31 | .right-column { 32 | width: 75%; 33 | float: right; 34 | padding-top: 1em; 35 | } 36 | .pull-left { 37 | float: left; 38 | width: 47%; 39 | } 40 | .pull-right { 41 | float: right; 42 | width: 47%; 43 | } 44 | .pull-right ~ * { 45 | clear: both; 46 | } 47 | img, video, iframe { 48 | max-width: 100%; 49 | } 50 | blockquote { 51 | border-left: solid 5px lightgray; 52 | padding-left: 1em; 53 | } 54 | .remark-slide table { 55 | margin: auto; 56 | border-top: 1px solid #666; 57 | border-bottom: 1px solid #666; 58 | } 59 | .remark-slide table thead th { border-bottom: 1px solid #ddd; } 60 | th, td { padding: 5px; } 61 | .remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(even) { background: #eee } 62 | 63 | @page { margin: 0; } 64 | @media print { 65 | .remark-slide-scaler { 66 | width: 100% !important; 67 | height: 100% !important; 68 | transform: scale(1) !important; 69 | top: 0 !important; 70 | left: 0 !important; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /slides/libs/remark-css/default.css: -------------------------------------------------------------------------------- 1 | a, a > code { 2 | color: rgb(249, 38, 114); 3 | text-decoration: none; 4 | } 5 | .footnote { 6 | position: absolute; 7 | bottom: 3em; 8 | padding-right: 4em; 9 | font-size: 90%; 10 | } 11 | .remark-code-line-highlighted { background-color: #ffff88; } 12 | 13 | .inverse { 14 | background-color: #272822; 15 | color: #d6d6d6; 16 | text-shadow: 0 0 20px #333; 17 | } 18 | .inverse h1, .inverse h2, .inverse h3 { 19 | color: #f3f3f3; 20 | } 21 | /* Two-column layout */ 22 | .left-column { 23 | color: #777; 24 | width: 20%; 25 | height: 92%; 26 | float: left; 27 | } 28 | .left-column h2:last-of-type, .left-column h3:last-child { 29 | color: #000; 30 | } 31 | .right-column { 32 | width: 75%; 33 | float: right; 34 | padding-top: 1em; 35 | } 36 | .pull-left { 37 | float: left; 38 | width: 47%; 39 | } 40 | .pull-right { 41 | float: right; 42 | width: 47%; 43 | } 44 | .pull-right ~ * { 45 | clear: both; 46 | } 47 | img, video, iframe { 48 | max-width: 100%; 49 | } 50 | blockquote { 51 | border-left: solid 5px lightgray; 52 | padding-left: 1em; 53 | } 54 | .remark-slide table { 55 | margin: auto; 56 | border-top: 1px solid #666; 57 | border-bottom: 1px solid #666; 58 | } 59 | .remark-slide table thead th { border-bottom: 1px solid #ddd; } 60 | th, td { padding: 5px; } 61 | .remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(even) { background: #eee } 62 | 63 | @page { margin: 0; } 64 | @media print { 65 | .remark-slide-scaler { 66 | width: 100% !important; 67 | height: 100% !important; 68 | transform: scale(1) !important; 69 | top: 0 !important; 70 | left: 0 !important; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /slides/theme.css: -------------------------------------------------------------------------------- 1 | @import url(https://fonts.googleapis.com/css?family=Fira+Sans:300,300i,400,400i,500,500i,700,700i|IBM+Plex+Mono:400,500); 2 | @import url(//cdn.jsdelivr.net/npm/hack-font@3.3.0/build/web/hack-subset.css); 3 | 4 | body { 5 | font-family: 'Fira Sans','Droid Serif', 'Palatino Linotype', 'Book Antiqua', Palatino, 'Microsoft YaHei', 'Songti SC', serif; 6 | } 7 | 8 | a, a > code { 9 | color: #EC99C6; 10 | text-decoration: none; 11 | } 12 | 13 | strong { 14 | color: #E69F00; 15 | } 16 | 17 | em { 18 | color: #56B4E9; 19 | font-style: normal; 20 | font-weight: bold; 21 | } 22 | 23 | del { 24 | color: #E5E5E5; 25 | text-decoration: none; 26 | font-weight: bold; 27 | } 28 | 29 | .remark-code { 30 | font-family: 'IBM Plex Mono', 'Lucida Console', Monaco, monospace; 31 | font-size: 100%; 32 | } 33 | 34 | .remark-inline-code { 35 | font-family: 'Fira Sans', 'Lucida Console', Monaco, monospace; 36 | font-weight: 400; 37 | font-size: 100%; 38 | } 39 | 40 | .remark-code-line-highlighted { 41 | background-color: #CEE9FF; 42 | font-weight: 500; 43 | } 44 | 45 | .large { font-size: 130% } 46 | .medium { font-size: 115% } 47 | .small { font-size: 70% } 48 | 49 | .remark-slide-content { 50 | color: #474747; 51 | font-weight: 300; 52 | font-weight: 300; 53 | padding: 1em 2em 1em 2em 54 | } 55 | 56 | h1 { 57 | color: #56B4E9; 58 | font-weight: 500; 59 | } 60 | 61 | h2 { 62 | font-weight: 500; 63 | } 64 | 65 | .remark-slide-number { 66 | font-size: 20px; 67 | } 68 | 69 | .title-slide .remark-slide-number { 70 | display: none; 71 | } 72 | 73 | .inverse.title-slide { 74 | background-size: cover; 75 | color: #EDEEEF; 76 | } 77 | 78 | .inverse.title-slide h1 { 79 | color: #E69F00; 80 | font-size: 72px; 81 | text-shadow: none; 82 | text-align: left; 83 | vertical-align: bottom; 84 | } 85 | .inverse.title-slide h2 { 86 | color: #56B4E9; 87 | text-shadow: none; 88 | font-size: 48px; 89 | text-align: left; 90 | font-weight: bold; 91 | } 92 | .inverse.title-slide h3 { 93 | color: #EDEEEF; 94 | text-shadow: none; 95 | font-size: 36px; 96 | text-align: left; 97 | margin-bottom: 10px; 98 | } 99 | 100 | .inverse.title-slide h4 { 101 | color: #EDEEEF; 102 | text-shadow: none; 103 | font-size: 24px; 104 | text-align: left; 105 | margin-bottom: 10px; 106 | } 107 | 108 | .inverse { 109 | background-size: cover; 110 | background-color: #23373B; 111 | color: #EDEEEF; 112 | font-weight: bold; 113 | text-shadow: none; 114 | } 115 | 116 | .inverse-ns { 117 | background-size: cover; 118 | background-color: #23373B; 119 | color: #EDEEEF; 120 | text-shadow: none; 121 | font-weight: bold; 122 | } 123 | 124 | .takeaways { 125 | padding-top: 80px; 126 | } 127 | 128 | .inverse h2, .inverse h3 { 129 | color: #EDEEEF; 130 | font-weight: 500; 131 | } 132 | 133 | .inverse del { 134 | color: #6C7B7F; 135 | } 136 | 137 | img { 138 | display: block; 139 | margin-left: auto; 140 | margin-right: auto; 141 | } 142 | 143 | ul { 144 | font-size: 48px; 145 | list-style-type: none; 146 | text-align: left; 147 | font-weight: 500; 148 | padding-top: 40px; 149 | } 150 | 151 | ul li { 152 | padding-bottom: 40px; 153 | } 154 | 155 | ol { 156 | counter-reset: my-counter; 157 | list-style: none; 158 | padding-left: 40px; 159 | font-size: 45px; 160 | font-weight: bold; 161 | text-align: left; 162 | } 163 | 164 | ol li { 165 | counter-increment: my-counter; 166 | padding-left: 40px; 167 | position: relative; 168 | font-size: 45px; 169 | margin: 20px 0; 170 | display: block; 171 | margin-block-start: 0.83em; 172 | margin-block-end: 0.83em; 173 | margin-inline-start: 0; 174 | margin-inline-end: 0; 175 | } 176 | 177 | ol li::before { 178 | content: counter(my-counter); 179 | color: #fff; 180 | font-size: 40px; 181 | font-weight: bold; 182 | position: absolute; 183 | left: -25px; 184 | line-height: 50px; 185 | width: 50px; 186 | height: 50px; 187 | top: 0; 188 | background: #56B4E9; 189 | border-radius: 50%; 190 | text-align: center; 191 | } 192 | 193 | .num { 194 | position: absolute; 195 | color: #fff; 196 | font-size: 40px; 197 | font-weight: bold; 198 | line-height: 50px; 199 | width: 50px; 200 | height: 50px; 201 | background: #56B4E9; 202 | border-radius: 50%; 203 | text-align: center; 204 | } -------------------------------------------------------------------------------- /user2020-causal-inference.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | --------------------------------------------------------------------------------