├── .gitattributes
├── .gitignore
├── README.md
├── email_instructions.txt
├── exercises
    ├── exercise_0-pdf.Rmd
    ├── exercise_0-pdf.pdf
    ├── exercise_0.Rmd
    ├── exercise_0.nb.html
    ├── exercise_0_SOLUTION.Rmd
    ├── exercise_0_SOLUTION.nb.html
    ├── exercise_1-number_of_parameters.Rmd
    ├── exercise_1-number_of_parameters.pdf
    ├── exercise_2-pdf.Rmd
    ├── exercise_2-pdf.pdf
    ├── exercise_2.R
    ├── exercise_2.Rmd
    ├── exercise_2.nb.html
    ├── exercise_2_SOLUTION.R
    └── prepare_data.R
├── handout
    ├── mixed_model_handout.Rmd
    └── mixed_model_handout.pdf
├── part0-introduction
    ├── .Rhistory
    ├── figures
    │   ├── RMarkdown-example.png
    │   ├── ch-02-markdown-margin.png
    │   ├── data-science.png
    │   ├── github-workshop.png
    │   ├── magrittr.png
    │   ├── markdownChunk2.png
    │   └── tidy-1.png
    ├── introduction.R
    ├── introduction.Rmd
    ├── introduction.html
    ├── libs
    │   └── remark-css
    │   │   ├── default-fonts.css
    │   │   └── default.css
    └── my-theme.css
├── part1-statistical-modeling-in-r
    ├── .Rhistory
    ├── cognition_cutout.png
    ├── libs
    │   └── remark-css
    │   │   ├── default-fonts.css
    │   │   └── default.css
    ├── my-theme.css
    ├── ssk16_dat_tutorial.rda
    ├── statistical_modeling.R
    ├── statistical_modeling.Rmd
    ├── statistical_modeling.html
    └── statistical_modeling_files
    │   └── figure-html
    │       ├── unnamed-chunk-10-1.svg
    │       ├── unnamed-chunk-2-1.svg
    │       ├── unnamed-chunk-3-1.svg
    │       ├── unnamed-chunk-4-1.svg
    │       ├── unnamed-chunk-5-1.svg
    │       ├── unnamed-chunk-57-1.svg
    │       ├── unnamed-chunk-58-1.svg
    │       ├── unnamed-chunk-60-1.png
    │       ├── unnamed-chunk-60-1.svg
    │       ├── unnamed-chunk-62-1.png
    │       ├── unnamed-chunk-7-1.svg
    │       ├── unnamed-chunk-71-1.svg
    │       ├── unnamed-chunk-76-1.svg
    │       └── unnamed-chunk-8-1.svg
└── part2-mixed-models-in-r
    ├── .Rhistory
    ├── fitted_lmms.rda
    ├── libs
        └── remark-css
        │   ├── default-fonts.css
        │   └── default.css
    ├── mixed_models.R
    ├── mixed_models.Rmd
    ├── mixed_models.html
    ├── mixed_models_files
        └── figure-html
        │   ├── unnamed-chunk-10-1.png
        │   ├── unnamed-chunk-11-1.png
        │   ├── unnamed-chunk-12-1.png
        │   ├── unnamed-chunk-13-1.png
        │   ├── unnamed-chunk-14-1.png
        │   ├── unnamed-chunk-15-1.png
        │   ├── unnamed-chunk-16-1.png
        │   ├── unnamed-chunk-17-1.png
        │   ├── unnamed-chunk-19-1.png
        │   ├── unnamed-chunk-20-1.png
        │   ├── unnamed-chunk-21-1.png
        │   ├── unnamed-chunk-22-1.png
        │   ├── unnamed-chunk-24-1.svg
        │   ├── unnamed-chunk-27-1.png
        │   ├── unnamed-chunk-3-1.png
        │   ├── unnamed-chunk-30-1.png
        │   ├── unnamed-chunk-31-1.png
        │   ├── unnamed-chunk-32-1.png
        │   ├── unnamed-chunk-33-1.png
        │   ├── unnamed-chunk-34-1.png
        │   ├── unnamed-chunk-34-1.svg
        │   ├── unnamed-chunk-35-1.png
        │   ├── unnamed-chunk-35-1.svg
        │   ├── unnamed-chunk-38-1.png
        │   ├── unnamed-chunk-39-1.png
        │   ├── unnamed-chunk-4-1.png
        │   ├── unnamed-chunk-41-1.png
        │   ├── unnamed-chunk-42-1.png
        │   ├── unnamed-chunk-43-1.png
        │   ├── unnamed-chunk-46-1.png
        │   ├── unnamed-chunk-47-1.png
        │   ├── unnamed-chunk-48-1.png
        │   ├── unnamed-chunk-49-1.png
        │   ├── unnamed-chunk-5-1.png
        │   ├── unnamed-chunk-50-1.png
        │   ├── unnamed-chunk-6-1.png
        │   ├── unnamed-chunk-7-1.png
        │   ├── unnamed-chunk-8-1.png
        │   └── unnamed-chunk-9-1.png
    ├── my-theme.css
    ├── random_effect_types.png
    └── ssk16_dat_tutorial.rda


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Windows image file caches
 2 | Thumbs.db
 3 | ehthumbs.db
 4 | 
 5 | # Folder config file
 6 | Desktop.ini
 7 | 
 8 | # Recycle Bin used on file shares
 9 | $RECYCLE.BIN/
10 | 
11 | # Windows Installer files
12 | *.cab
13 | *.msi
14 | *.msm
15 | *.msp
16 | 
17 | # Windows shortcuts
18 | *.lnk
19 | 
20 | # =========================
21 | # Operating System Files
22 | # =========================
23 | 
24 | # OSX
25 | # =========================
26 | 
27 | .DS_Store
28 | .AppleDouble
29 | .LSOverride
30 | 
31 | # Thumbnails
32 | ._*
33 | 
34 | # Files that might appear in the root of a volume
35 | .DocumentRevisions-V100
36 | .fseventsd
37 | .Spotlight-V100
38 | .TemporaryItems
39 | .Trashes
40 | .VolumeIcon.icns
41 | 
42 | # Directories potentially created on remote AFP share
43 | .AppleDB
44 | .AppleDesktop
45 | Network Trash Folder
46 | Temporary Items
47 | .apdisk
48 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | **NOTE: This repository is not maintained or updated any more. Please visit the [successor repository](https://github.com/singmann/mixed_model_workshop_2day) which extends this workshop to two days:**  https://github.com/singmann/mixed_model_workshop_2day
 3 | 
 4 | -------------
 5 | 
 6 | # Statistical Modeling and Mixed Models with R
 7 | 
 8 | This repo contains slides and exercise materials for my workshop on statistical modeling and mixed models with R. Previous instances of this workshop:
 9 | 
10 | - The first instance of this workshop was held as part of the [Data on the Mind 2017](http://www.dataonthemind.org/2017-workshop). Title: *Statistical Models for Dependent Data: An Introduction to Mixed Models in R*
11 | - One day workshop at the University of Freiburg in June 2018. Title: *Mixed Models in R – An Applied Introduction*
12 | - One day tutorial at CogSci 2018 in Madison (Wisconsin). Title: *Mixed Models in R – An Applied Introduction*
13 | 
14 | The mixed model part of the workshop are loosely based on my chapter: [An introduction to linear mixed modeling in experimental psychology.](http://singmann.org/download/publications/singmann_kellen-introduction-mixed-models.pdf)
15 | Read the chapter to get a more comprehensive overview.
16 | 
17 | 
18 | The repo currently contains three `html` presentations:
19 | 
20 | - [Part 0: Introduction to Modern `R`](https://htmlpreview.github.io/?https://github.com/singmann/mixed_model_workshop/blob/master/part0-introduction/introduction.html)
21 | - [Part 1: Statistical Modeling in R](https://htmlpreview.github.io/?https://github.com/singmann/mixed_model_workshop/blob/master/part1-statistical-modeling-in-r/statistical_modeling.html)
22 | - [Part 2: Mixed Models in R](https://htmlpreview.github.io/?https://github.com/singmann/mixed_model_workshop/blob/master/part2-mixed-models-in-r/mixed_models.html)
23 | 
24 | In addition, the repo contains a [`pdf` handout](https://github.com/singmann/mixed_model_workshop/raw/master/handout/mixed_model_handout.pdf) providing a concise overview. 
25 | 
26 | ### Requirements
27 | - A recent version of `R` (currently `R 3.5.1`): `https://cran.rstudio.com/`
28 | - `R` packages necessary for the analysis (install with `install.packages("package")` at `R` prompt): `afex` (which automatically installs the additional requirements `emmeans`, `lme4`, and `car`) and `psych` and `MEMSS` (for example data)
29 | - `R` package `tidyverse` as well as `broom` for the exercises (we mainly need `dplyr`, `broom`, `tidyr`, `purrr`, and `ggplot2`).
30 | - `R` package `xaringan` to compile the slides.
31 | - `R` package `sjstats` for Intraclass Correlation Coefficient (ICC)
32 | - Possibly `R` packages `sjPlot` and `MuMIn` for some examples.
33 | - A html 5 compatible browser to view the slides.
34 | - `RStudio`: https://www.rstudio.com/products/rstudio/download3/#download
35 | 
36 | ### Overview
37 | 
38 | In order to increase statistical power and precision, many data sets in cognitive and behavioral sciences contain more than one data point from each unit of observation (e.g., participant), often across different experimental conditions. Such *repeated-measures* pose a problem to most standard statistical procedures such as ordinary least-squares regression, (between-subjects) ANOVA, or generalized linear models (e.g., logistic regression) as these procedures assume that the data points are *independent and identically distributed*. In case of repeated measures, the independence assumption is expected to be violated. For example, observations coming from the same participant are usually correlated - they are more likely to be similar to each other than two observations coming from two different participants. 
39 | 
40 | The goal of this workshop is to introduce a class of statistical models that is able to account for most of the cases of non-independence that are typically encountered in cognitive science – *linear mixed-effects models* (Baayen, Davidson, & Bates, 2008), or mixed models for short. Mixed models are a generalization of ordinary regression that explicitly capture the dependency among data points via random-effects parameters.  Compared to traditional analyses approaches that ignore these dependencies, mixed models provide more accurate (and generalizable) estimates of the effects, improved statistical power, and non-inflated Type I errors (e.g., Barr, Levy, Scheepers, & Tily, 2013).
41 | 
42 | In recent years, mixed models have become increasingly popular. One of the main reason for this is that a number of software packages have appeared that allow to estimate large classes of mixed models in a relatively convenient manner. The workshop will focus on `lme4` (Bates, Mächler, Bolker, & Walker, 2015), the gold standard for estimating mixed models in `R` (R Core Team, 2018). In addition, it will introduce the functionality of `afex` (Singmann, Bolker, Westfall, & Aust, 2017), which simplifies many aspects of using `lme4`, such as the calculation of p-values for mixed models. `afex` was specifically developed with a focus on factorial designs that are common in cognitive and behavioral sciences.
43 | 
44 | Despite a number of high impact publications that introduce mixed models to a wide variety of audiences (e.g., Baayen et al., 2008; Judd, Westfall, & Kenny, 2012) the application of mixed models in practice is far from trivial. Applying mixed models requires a number of steps and decisions that are not necessarily part of the methodological arsenal of every researcher. The goal of the workshop is to change this and to introduce mixed models in such a way that they can be effectively used and the results communicated.
45 | 
46 | The workshop is split into two parts main parts and one interlude. The focus of the first part is not on mixed models, but on the basic knowledge in statistical modeling with R that necessary for competently using mixed models. The second part focuses exclusively on mixed models. It introduces the key concepts and simultaneously shows how to fit mixed models of increasing complexity. Each part will take approximately 3 hours (including breaks). The time between the two parts will be used to provide a short introduction to the `tidyverse` (Wickham & Grolemund, 2017), a modern set of tools for data science in R that are especially useful in this context.
47 | 
48 | Participants of the workshop need some basic knowledge of R. For example, they should be able to read in data, select subsets of the data, and estimate a linear regression model. Participants without any R knowledge will likely nor profit from the workshop. 
49 | 
50 | ### References
51 | 
52 | - Baayen, H., Davidson, D. J., & Bates, D. (2008). Mixed-effects modeling with crossed random effects for subjects and items. *Journal of Memory and Language*, 59(4), 390–412. https://doi.org/10.1016/j.jml.2007.12.005
53 | - Bates, D., Mächler, M., Bolker, B., & Walker, S. (2015). Fitting Linear Mixed-Effects Models Using lme4. *Journal of Statistical Software*, 67(1). https://doi.org/10.18637/jss.v067.i01
54 | - Barr, D. J., Levy, R., Scheepers, C., & Tily, H. J. (2013). Random effects structure for confirmatory hypothesis testing: Keep it maximal. *Journal of Memory and Language*, 68(3), 255–278. https://doi.org/10.1016/j.jml.2012.11.001 
55 | - Judd, C. M., Westfall, J., & Kenny, D. A. (2012). Treating stimuli as a random factor in social psychology: A new and comprehensive solution to a pervasive but largely ignored problem. *Journal of Personality and Social Psychology*, 103(1), 54–69. https://doi.org/10.1037/a0028347
56 | - Singmann, H., Bolker, B., Westfall, J., & Aust, F. (2017). *afex: Analysis of Factorial Experiments.* R package version 0.18-0. http://cran.r-project.org/package=afex 
57 | - R Core Team. (2017). *R: A Language and Environment for Statistical Computing*. Vienna, Austria: R Foundation for Statistical Computing. http://www.R-project.org/
58 | - Wickham, H., & Grolemund, G. (2017). *R for Data Science: Import, Tidy, Transform, Visualize, and Model Data.* Sebastopol  CA: O’Reilly.
59 | 
60 | ---
61 | 
62 | Last edited: June 2018
63 | 
64 | ---
65 | 
66 | All code in this repository is released under the [GPL v2 or later license](https://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html). All non-code materials is released under the [CC-BY-SA license](https://creativecommons.org/licenses/by-sa/4.0/).
67 | 


--------------------------------------------------------------------------------
/email_instructions.txt:
--------------------------------------------------------------------------------
 1 | Please bring your laptop and prepare it beforehand. This includes:
 2 | - Updating both R and RStudio, 
 3 | - installing a few R packages, 
 4 | - and making sure that xaringan presentations can be produced. 
 5 | 
 6 | In the interest of spending the time of the tutorial on its content, it is important that you do this at least one or two days before the tutorial. I will not have the time to solve installation problems at the day of the tutorial. So please make sure you do this beforehand!
 7 | 
 8 | The latest version of R is 3.5.1 and can be downloaded from: https://cran.rstudio.com/
 9 | The latest version of RStudio is 1.1.453 and can be downloaded from: https://www.rstudio.com/products/rstudio/download/#download
10 | 
11 | Please note that both R and RStudio need to be updated independently and older versions of R/RStudio are likely to not work properly.
12 | 
13 | After updating both R and RStudio, please install the following R packages [e.g., via install.packages("package")]:
14 | afex
15 | MEMSS
16 | psych
17 | tidyverse
18 | broom
19 | xaringan
20 | sjPlot
21 | 
22 | After installation of these packages, please ensure that you can produce ("knit") xaringan presentations. For this, start RStudio and create a new example presentation:
23 | - In RStudio select from the menu File -> New File -> R Markdown -> From Template -> Ninja Presentation 
24 | - Save the newly created RMarkdown document somewhere (e.g., as "test.Rmd" on your Desktop)
25 | - Click on "Knit" (above the code, below the menu). Note that clicking "Knit" for the first time might prompt the installation of additional packages. 
26 | 
27 | If successful, clicking "Knit" should create and open the example presentation ("Presentation Ninja - with xaringan ...") as an html file (e.g., "test.html"). The file will likely be opened in an RStudio internal html viewer. Clicking "Open in Browser" will open the file in a browser.
28 | 
29 | All workshop materials are available from: https://github.com/singmann/mixed_model_workshop/releases
30 | Download the corresponding zip or tar.gz archive ("Source code"). This archive contains all slides and code used at the workshop. 
31 | Please note that it is possible that I will update the materials until the workshop. 
32 | 


--------------------------------------------------------------------------------
/exercises/exercise_0-pdf.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Exercise 0: Introduction"
  3 | author: "Henrik Singmann"
  4 | date: "25 July 2018"
  5 | output: pdf_document
  6 | ---
  7 | 
  8 | ## Freeman, Heathcote, Chalmers, and Hockley (2010) data
  9 | 
 10 | The data are lexical decision and word naming latencies for 300 words and 300 nonwords from 45 participants presented in Freeman et al. (2010). The 300 items in each `stimulus` condition were selected to form a balanced $2 \times 2$ design with factors neighborhood `density` (low versus high) and `frequency` (low versus high). 
 11 | 
 12 | The `task` was a between subjects factor: 25 participants worked on the lexical decision task and 20 participants on the naming task. After excluding erroneous responses each participants responded to between 135 and 150 words and between 124 and 150 nonwords.
 13 | 
 14 | - Lexical decision task: Decide whether a string of letters presented on screen is a word (e.g., house) or a non-word (e.g., huese). Response times were recorded when participants pressed the corresponding response key (i.e., word or non-word).
 15 | - Naming task: Read the word presented in the screen. Response times were recorded when participants started saying the presented word.
 16 | 
 17 | 
 18 | ### Design
 19 | 
 20 | The data comes with package `afex`, so we can load it right away. But at first, we load the `tidyverse` package, because these are the functions we want to use throughout this exercise.
 21 | 
 22 | ```{r, message=FALSE}
 23 | library("tidyverse")
 24 | data("fhch2010", package = "afex") # load 
 25 | fhch <- droplevels(fhch2010[ fhch2010$correct,]) # remove errors
 26 | str(fhch) # structure of the data
 27 | ```
 28 | 
 29 | The columns in the data are:
 30 | 
 31 | - `id`: participant id, `factor`
 32 | - `task`: `factor` with two levels indicating which task was performed: `"naming"` or `"lexdec"`
 33 | - `stimulus`: `factor` indicating whether the shown stimulus was a `"word"` or `"nonword"`
 34 | - `density`: `factor` indicating the neighborhood density of presented items with two levels: `"low"` and `"high"`. Density is defined as the number of words that differ from a base word by one letter or phoneme.
 35 | - `frequency`: `factor` indicating the word frequency of presented items with two levels: `"low"` (i.e., words that occur less often in natural language) and `"high"` (i.e., words that occur more often in natural language).
 36 | - `length`: `factor` with 3 levels (4, 5, or 6) indicating the number of characters of presented stimuli.
 37 | - `item`: `factor` with 600 levels: 300 words and 300 nonwords
 38 | - `rt`: response time in seconds
 39 | - `log_rt`: natural logarithm of response time in seconds
 40 | - `correct`: boolean indicating whether or not the response in the lexical decision task was correct or incorrect (incorrect responses of the naming task are not part of the data).
 41 | 
 42 | 
 43 | ## Exercise 1: Calculating Simple Summary Measures
 44 | 
 45 | For this and the following exercises use the `fhch` `data.frame` (i.e., the data after removing errors).
 46 | 
 47 | ### Part A:
 48 | 
 49 | Use your knowledge of `dplyr` in combination with the pipe `%>%` and take the `mean` of the `rt` column, conditional on `task`. For which task are participants on average faster?
 50 | 
 51 | Hints:
 52 | 
 53 | - `group_by` can be used for conditioning on one or several variables. Separate more than one variable by comma.
 54 | - `summarise` can be used for aggregating multiple lines into one.
 55 | - The pipe `%>%` allows to concatenate calls from left to right (shortcut for the pipe is `ctrl/cmd` + `shift` + `m`).
 56 | - More information: https://github.com/rstudio/cheatsheets/raw/master/data-transformation.pdf
 57 | 
 58 | 
 59 | ```{r}
 60 | # start with: 
 61 | #fhch %>% ...
 62 | ```
 63 | 
 64 | ### Part B
 65 | 
 66 | `summarise` allows to use more than one aggregation function. Extend the previous code and also calculate the standard deviation, `sd()`, per task. Does the task that is faster also have the lower variability in RT (i.e., smaller sd)?
 67 | 
 68 | ```{r}
 69 | # fhch %>% ...
 70 | ```
 71 | 
 72 | ### Part C
 73 | 
 74 | Means are quite sensitive to outliers. Therefore, please recalculate `mean` and  `sd` per task, after removing some extreme outliers. Here, we define outliers as RTs below .25 seconds and above 2.5 seconds. Do we still find the same pattern?
 75 | 
 76 | Remember, the verb for selecting variables using `dplyr` is `filter`. You can concatenate various filters simply by comma, in the same call to `filter()`.
 77 | 
 78 | 
 79 | ```{r}
 80 | # fhch %>% ...
 81 | 
 82 | ```
 83 | 
 84 | 
 85 | 
 86 | ## Exercise 2: Aggregating Data by ID and Plotting
 87 | 
 88 | The `fhch` data has multiple observations (i.e., trials) per participant and cell of the design. In a traditional analysis, for example, using ANOVA, one can only have one observation per participants and cell of the design. Therefore, a common task is to aggregate the data on the level of the participant and combinations of factors one is currently interested in.
 89 | 
 90 | 
 91 | ### Part A
 92 | 
 93 | Use the data from the `"lexdec"` task only. For this, take the `mean` of the `rt` column per participant and level of the `length` factor. Save this data in a new object `agg1`.
 94 | 
 95 | Note that to condition on more than one variable in `group_by()`, simply separate the variables by comma.
 96 | 
 97 | 
 98 | ```{r}
 99 | ## write code here
100 | ```
101 | 
102 | 
103 | ### Part B
104 | 
105 | Let us take a look at the individual-level data per length level that you just created. For this, use `ggplot` and plot the level of `length` on the x-axis and the mean RTs on the y-axis. 
106 | 
107 | - Try both `geom_point` and  `geom_jitter()`. Which looks better?
108 | - Does this plot show any clear pattern?
109 | - Can you think of a way to make this plot more informative?
110 | 
111 | ```{r}
112 | ## write code here
113 | ```
114 | 
115 | ```{r}
116 | ## write code here
117 | ```
118 | 
119 | 
120 | ### Part C
121 | 
122 | Make a plot similar as above, but this time also condition on the `density` factor. That is, first aggregate the data again, this time for the combination of `id`, `length`, and `density`. Then plot the data as above, but also add an aesthetic for the `density` factor. Use `color` to distinguish the different levels of `density` in the plot. Can you see something in this plot? If not, have a look at `position_dodge` with `geom_point`.
123 | 
124 | 
125 | ```{r}
126 | ## write code here
127 | ```
128 | 
129 | 
130 | ## Ressources
131 | 
132 | - `RStudio` cheat sheets: https://www.rstudio.com/resources/cheatsheets/
133 |     - `RStudio`: https://github.com/rstudio/cheatsheets/raw/master/rstudio-ide.pdf
134 |     - `ggplot2`: https://github.com/rstudio/cheatsheets/raw/master/data-visualization-2.1.pdf
135 |     - `dplyr` & `tidyr`: https://github.com/rstudio/cheatsheets/raw/master/data-transformation.pdf
136 | 
137 | ## References
138 | 
139 | - Freeman, E., Heathcote, A., Chalmers, K., & Hockley, W. (2010). Item effects in recognition memory for words. *Journal of Memory and Language*, 62(1), 1-18. https://doi.org/10.1016/j.jml.2009.09.004
140 | 
141 | 
142 | 


--------------------------------------------------------------------------------
/exercises/exercise_0-pdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/exercises/exercise_0-pdf.pdf


--------------------------------------------------------------------------------
/exercises/exercise_0.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Exercise 0: Introduction"
  3 | output: html_notebook
  4 | ---
  5 | 
  6 | ## Freeman, Heathcote, Chalmers, and Hockley (2010) data
  7 | 
  8 | The data are lexical decision and word naming latencies for 300 words and 300 nonwords from 45 participants presented in Freeman et al. (2010). The 300 items in each `stimulus` condition were selected to form a balanced $2 \times 2$ design with factors neighborhood `density` (low versus high) and `frequency` (low versus high). 
  9 | 
 10 | The `task` was a between subjects factor: 25 participants worked on the lexical decision task and 20 participants on the naming task. After excluding erroneous responses each participants responded to between 135 and 150 words and between 124 and 150 nonwords.
 11 | 
 12 | - Lexical decision task: Decide whether a string of letters presented on screen is a word (e.g., house) or a non-word (e.g., huese). Response times were recorded when participants pressed the corresponding response key (i.e., word or non-word).
 13 | - Naming task: Read the word presented in the screen. Response times were recorded when participants started saying the presented word.
 14 | 
 15 | 
 16 | ### Design
 17 | 
 18 | The data comes with package `afex`, so we can load it right away. But at first, we load the `tidyverse` package, because these are the functions we want to use throughout this exercise.
 19 | 
 20 | ```{r, message=FALSE}
 21 | library("tidyverse")
 22 | data("fhch2010", package = "afex") # load 
 23 | fhch <- droplevels(fhch2010[ fhch2010$correct,]) # remove errors
 24 | str(fhch) # structure of the data
 25 | ```
 26 | 
 27 | The columns in the data are:
 28 | 
 29 | - `id`: participant id, `factor`
 30 | - `task`: `factor` with two levels indicating which task was performed: `"naming"` or `"lexdec"`
 31 | - `stimulus`: `factor` indicating whether the shown stimulus was a `"word"` or `"nonword"`
 32 | - `density`: `factor` indicating the neighborhood density of presented items with two levels: `"low"` and `"high"`. Density is defined as the number of words that differ from a base word by one letter or phoneme.
 33 | - `frequency`: `factor` indicating the word frequency of presented items with two levels: `"low"` (i.e., words that occur less often in natural language) and `"high"` (i.e., words that occur more often in natural language).
 34 | - `length`: `factor` with 3 levels (4, 5, or 6) indicating the number of characters of presented stimuli.
 35 | - `item`: `factor` with 600 levels: 300 words and 300 nonwords
 36 | - `rt`: response time in seconds
 37 | - `log_rt`: natural logarithm of response time in seconds
 38 | - `correct`: boolean indicating whether or not the response in the lexical decision task was correct or incorrect (incorrect responses of the naming task are not part of the data).
 39 | 
 40 | 
 41 | ## Exercise 1: Calculating Simple Summary Measures
 42 | 
 43 | For this and the following exercises use the `fhch` `data.frame` (i.e., the data after removing errors).
 44 | 
 45 | ### Part A:
 46 | 
 47 | Use your knowledge of `dplyr` in combination with the pipe `%>%` and take the `mean` of the `rt` column, conditional on `task`. For which task are participants on average faster?
 48 | 
 49 | Hints:
 50 | 
 51 | - `group_by` can be used for conditioning on one or several variables. Separate more than one variable by comma.
 52 | - `summarise` can be used for aggregating multiple lines into one.
 53 | - The pipe `%>%` allows to concatenate calls from left to right (shortcut for the pipe is `ctrl/cmd` + `shift` + `m`).
 54 | - More information: https://github.com/rstudio/cheatsheets/raw/master/data-transformation.pdf
 55 | 
 56 | 
 57 | ```{r}
 58 | # start with: 
 59 | #fhch %>% ...
 60 | ```
 61 | 
 62 | ### Part B
 63 | 
 64 | `summarise` allows to use more than one aggregation function. Extend the previous code and also calculate the standard deviation, `sd()`, per task. Does the task that is faster also have the lower variability in RT (i.e., smaller sd)?
 65 | 
 66 | ```{r}
 67 | # fhch %>% ...
 68 | ```
 69 | 
 70 | ### Part C
 71 | 
 72 | Means are quite sensitive to outliers. Therefore, please recalculate `mean` and  `sd` per task, after removing some extreme outliers. Here, we define outliers as RTs below .25 seconds and above 2.5 seconds. Do we still find the same pattern?
 73 | 
 74 | Remember, the verb for selecting variables using `dplyr` is `filter`. You can concatenate various filters simply by comma, in the same call to `filter()`.
 75 | 
 76 | 
 77 | ```{r}
 78 | # fhch %>% ...
 79 | 
 80 | ```
 81 | 
 82 | 
 83 | ## Exercise 2: Aggregating Data by ID and Plotting
 84 | 
 85 | The `fhch` data has multiple observations (i.e., trials) per participant and cell of the design. In a traditional analysis, for example, using ANOVA, one can only have one observation per participants and cell of the design. Therefore, a common task is to aggregate the data on the level of the participant and combinations of factors one is currently interested in.
 86 | 
 87 | 
 88 | ### Part A
 89 | 
 90 | Use the data from the `"lexdec"` task only. For this, take the `mean` of the `rt` column per participant and level of the `length` factor. Save this data in a new object `agg1`.
 91 | 
 92 | Note that to condition on more than one variable in `group_by()`, simply separate the variables by comma.
 93 | 
 94 | 
 95 | ```{r}
 96 | ## write code here
 97 | ```
 98 | 
 99 | 
100 | ### Part B
101 | 
102 | Let us take a look at the individual-level data per length level that you just created. For this, use `ggplot` and plot the level of `length` on the x-axis and the mean RTs on the y-axis. 
103 | 
104 | - Try both `geom_point` and  `geom_jitter()`. Which looks better?
105 | - Does this plot show any clear pattern?
106 | - Can you think of a way to make this plot more informative?
107 | 
108 | ```{r}
109 | ## write code here
110 | ```
111 | 
112 | ```{r}
113 | ## write code here
114 | ```
115 | 
116 | 
117 | ### Part C
118 | 
119 | Make a plot similar as above, but this time also condition on the `density` factor. That is, first aggregate the data again, this time for the combination of `id`, `length`, and `density`. Then plot the data as above, but also add an aesthetic for the `density` factor. Use `color` to distinguish the different levels of `density` in the plot. Can you see something in this plot? If not, have a look at `position_dodge` with `geom_point`.
120 | 
121 | 
122 | ```{r}
123 | ## write code here
124 | ```
125 | 
126 | 
127 | ## Ressources
128 | 
129 | - `RStudio` cheat sheets: https://www.rstudio.com/resources/cheatsheets/
130 |     - `RStudio`: https://github.com/rstudio/cheatsheets/raw/master/rstudio-ide.pdf
131 |     - `ggplot2`: https://github.com/rstudio/cheatsheets/raw/master/data-visualization-2.1.pdf
132 |     - `dplyr` & `tidyr`: https://github.com/rstudio/cheatsheets/raw/master/data-transformation.pdf
133 | 
134 | ## References
135 | 
136 | - Freeman, E., Heathcote, A., Chalmers, K., & Hockley, W. (2010). Item effects in recognition memory for words. *Journal of Memory and Language*, 62(1), 1-18. https://doi.org/10.1016/j.jml.2009.09.004
137 | 
138 | 
139 | 


--------------------------------------------------------------------------------
/exercises/exercise_0_SOLUTION.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Exercise 0: Introduction"
  3 | output: html_notebook
  4 | ---
  5 | 
  6 | ## Freeman, Heathcote, Chalmers, and Hockley (2010) data
  7 | 
  8 | The data are lexical decision and word naming latencies for 300 words and 300 nonwords from 45 participants presented in Freeman et al. (2010). The 300 items in each `stimulus` condition were selected to form a balanced $2 \times 2$ design with factors neighborhood `density` (low versus high) and `frequency` (low versus high). 
  9 | 
 10 | The `task` was a between subjects factor: 25 participants worked on the lexical decision task and 20 participants on the naming task. After excluding erroneous responses each participants responded to between 135 and 150 words and between 124 and 150 nonwords.
 11 | 
 12 | - Lexical decision task: Decide whether a string of letters presented on screen is a word (e.g., house) or a non-word (e.g., huese). Response times were recorded when participants pressed the corresponding response key (i.e., word or non-word).
 13 | - Naming task: Read the word presented in the screen. Response times were recorded when participants started saying the presented word.
 14 | 
 15 | 
 16 | ### Design
 17 | 
 18 | The data comes with package `afex`, so we can load it right away. But at first, we load the `tidyverse` package, because these are the functions we want to use throughout this exercise.
 19 | 
 20 | ```{r, message=FALSE}
 21 | library("tidyverse")
 22 | data("fhch2010", package = "afex") # load 
 23 | fhch <- droplevels(fhch2010[ fhch2010$correct,]) # remove errors
 24 | str(fhch) # structure of the data
 25 | library("tidyverse")
 26 | ```
 27 | 
 28 | The columns in the data are:
 29 | 
 30 | - `id`: participant id, `factor`
 31 | - `task`: `factor` with two levels indicating which task was performed: `"naming"` or `"lexdec"`
 32 | - `stimulus`: `factor` indicating whether the shown stimulus was a `"word"` or `"nonword"`
 33 | - `density`: `factor` indicating the neighborhood density of presented items with two levels: `"low"` and `"high"`. Density is defined as the number of words that differ from a base word by one letter or phoneme.
 34 | - `frequency`: `factor` indicating the word frequency of presented items with two levels: `"low"` (i.e., words that occur less often in natural language) and `"high"` (i.e., words that occur more often in natural language).
 35 | - `length`: `factor` with 3 levels (4, 5, or 6) indicating the number of characters of presented stimuli.
 36 | - `item`: `factor` with 600 levels: 300 words and 300 nonwords
 37 | - `rt`: response time in seconds
 38 | - `log_rt`: natural logarithm of response time in seconds
 39 | - `correct`: boolean indicating whether or not the response in the lexical decision task was correct or incorrect (incorrect responses of the naming task are not part of the data).
 40 | 
 41 | 
 42 | ## Exercise 1: Calculating Simple Summary Measures
 43 | 
 44 | For this and the following exercises use the `fhch` `data.frame` (i.e., the data after removing errors).
 45 | 
 46 | ### Part A:
 47 | 
 48 | Use your knowledge of `dplyr` in combination with the pipe `%>%` and take the `mean` of the `rt` column, conditional on `task`. For which task are participants on average faster?
 49 | 
 50 | Hints:
 51 | 
 52 | - `group_by` can be used for conditioning on one or several variables. Separate more than one variable by comma.
 53 | - `summarise` can be used for aggregating multiple lines into one.
 54 | - The pipe `%>%` allows to concatenate calls from left to right (shortcut for the pipe is `ctrl/cmd` + `shift` + `m`).
 55 | - More information: https://github.com/rstudio/cheatsheets/raw/master/data-transformation.pdf
 56 | 
 57 | 
 58 | ```{r}
 59 | fhch %>% 
 60 |   group_by(task) %>% 
 61 |   summarise(m = mean(rt))
 62 | ```
 63 | 
 64 | ### Part B
 65 | 
 66 | `summarise` allows to use more than one aggregation function. Extend the previous code and also calculate the standard deviation, `sd()`, per task. Does the task that is faster also have the lower variability in RT (i.e., smaller sd)?
 67 | 
 68 | 
 69 | ```{r}
 70 | fhch %>% 
 71 |   group_by(task) %>% 
 72 |   summarise(m = mean(rt),
 73 |             sd = sd(rt))
 74 | ```
 75 | 
 76 | ### Part C
 77 | 
 78 | Means are quite sensitive to outliers. Therefore, please recalculate `mean` and  `sd` per task, after removing some extreme outliers. Here, we define outliers as RTs below .25 seconds and above 2.5 seconds. Do we still find the same pattern?
 79 | 
 80 | Remember, the verb for selecting variables using `dplyr` is `filter`. You can concatenate various filters simply by comma, in the same call to `filter()`.
 81 | 
 82 | 
 83 | ```{r}
 84 | fhch %>% 
 85 |   filter(rt > 0.25, rt < 2.5) %>% 
 86 |   group_by(task) %>% 
 87 |   summarise(m = mean(rt),
 88 |             sd = sd(rt))
 89 | 
 90 | ```
 91 | 
 92 | 
 93 | ## Exercise 2: Aggregating Data by ID and Plotting
 94 | 
 95 | The `fhch` data has multiple observations (i.e., trials) per participant and cell of the design. In a traditional analysis, for example, using ANOVA, one can only have one observation per participants and cell of the design. Therefore, a common task is to aggregate the data on the level of the participant and combinations of factors one is currently interested in.
 96 | 
 97 | 
 98 | ### Part A
 99 | 
100 | Use the data from the `"lexdec"` task only. For this, take the `mean` of the `rt` column per participant and level of the `length` factor. Save this data in a new object `agg1`.
101 | 
102 | Note that to condition on more than one variable in `group_by()`, simply separate the variables by comma.
103 | 
104 | 
105 | ```{r}
106 | agg1 <- fhch %>% 
107 |   filter(task == "lexdec") %>% 
108 |   group_by(id, length) %>% 
109 |   summarise(mrt = mean(rt))
110 | ```
111 | 
112 | 
113 | ### Part B
114 | 
115 | Let us take a look at the individual-level data per length level that you just created. For this, use `ggplot` and plot the level of `length` on the x-axis and the mean RTs on the y-axis. 
116 | 
117 | - Try both `geom_point` and  `geom_jitter()`. Which looks better?
118 | - Does this plot show any clear pattern?
119 | - Can you think of a way to make this plot more informative?
120 | 
121 | ```{r}
122 | ggplot(agg1, aes(x = length, y = mrt)) +
123 |   geom_jitter()
124 | ```
125 | 
126 | ```{r}
127 | ggplot(agg1, aes(x = length, y = mrt)) +
128 |   geom_point(alpha = 0.2) +
129 |   geom_violin(fill = "transparent") +
130 |   stat_summary(color = "red") +
131 |   theme_bw()
132 | ```
133 | 
134 | 
135 | ### Part C
136 | 
137 | Make a plot similar as above, but this time also condition on the `density` factor. That is, first aggregate the data again, this time for the combination of `id`, `length`, and `density`. Then plot the data as above, but also add an aesthetic for the `density` factor. Use `color` to distinguish the different levels of `density` in the plot. Can you see something in this plot? If not, have a look at `position_dodge` with `geom_point`.
138 | 
139 | 
140 | ```{r}
141 | agg2 <- fhch %>% 
142 |   filter(task == "lexdec") %>% 
143 |   group_by(id, length, density) %>% 
144 |   summarise(mrt = mean(rt))
145 | ggplot(agg2, aes(x = length, y = mrt, color = density, group = density)) +
146 |   geom_point(position = position_dodge(0.25)) +
147 |   stat_summary(position = position_dodge(0.25))
148 | 
149 | ```
150 | 
151 | ```{r}
152 | ggplot(agg2, aes(x = length, y = mrt, color = density, group = density)) +
153 |   geom_point(position = position_dodge(0.25), alpha = 0.5) +
154 |   stat_summary(position = position_dodge(0.25)) +
155 |   theme_light()
156 | 
157 | ```
158 | 
159 | 
160 | ## Ressources
161 | 
162 | - `RStudio` cheat sheets: https://www.rstudio.com/resources/cheatsheets/
163 |     - `RStudio`: https://github.com/rstudio/cheatsheets/raw/master/rstudio-ide.pdf
164 |     - `ggplot2`: https://github.com/rstudio/cheatsheets/raw/master/data-visualization-2.1.pdf
165 |     - `dplyr` & `tidyr`: https://github.com/rstudio/cheatsheets/raw/master/data-transformation.pdf
166 | 
167 | ## References
168 | 
169 | - Freeman, E., Heathcote, A., Chalmers, K., & Hockley, W. (2010). Item effects in recognition memory for words. *Journal of Memory and Language*, 62(1), 1-18. https://doi.org/10.1016/j.jml.2009.09.004
170 | 
171 | 
172 | 


--------------------------------------------------------------------------------
/exercises/exercise_1-number_of_parameters.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Exercises I: Statistical Modeling in R"
 3 | author: "Henrik Singmann"
 4 | date: "25 July 2018"
 5 | output: pdf_document
 6 | ---
 7 | 
 8 | 
 9 | ```{r setup, include=FALSE}
10 | require(psych)
11 | data(sat.act)
12 | sat.act$gender <- factor(sat.act$gender, 1:2, labels = c("male", "female"))
13 | sat.act$education <- factor(sat.act$education)
14 | sat.act <- na.omit(sat.act)
15 | ```
16 | 
17 | # Formula Interface for Statistical Models: `~`
18 | 
19 | - Allows symbolic specification of statistical model, e.g. linear models: `lm(ACT ~ SATQ, sat.act)`
20 | - Everything to the left of `~` is the dependent variable.
21 | - Independent variables are to the right of the `~`:
22 | 
23 | | Formula | &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; | Interpretation  |
24 | | ------------------------|---|----------------------------------|
25 | | `~ x` or `~1+x`         || Intercept and main effect of `x` | 
26 | | ` ~ x-1` or `~0 + x`    || Only main effect of `x` and no intercept (questionable) |
27 | | `~ x+y`                 || Main effects of `x` and `y`|
28 | | `~ x:y`                 || Interaction between `x` and `y` (and no main effect) |
29 | | `~ x*y` or `~ x+y+x:y`  || Main effects and interaction between `x` and `y` |
30 | 
31 | 
32 | # Continuous Variables: How many Parameters in each Model?
33 | 
34 | ```{r, eval=FALSE}
35 | lm(ACT ~ SATQ_c + SATV_c, sat.act)   # a
36 | lm(ACT ~ SATQ_c : SATV_c, sat.act)   # b
37 | lm(ACT ~ 0 + SATQ_c:SATV_c, sat.act) # c
38 | lm(ACT ~ SATQ_c*SATV_c, sat.act)     # d
39 | lm(ACT ~ 0+SATQ_c*SATV_c, sat.act)   # e
40 | ```
41 | 
42 | # Categorical Variables: How many Parameters in each Model?
43 | 
44 | ```{r, eval=FALSE}
45 | lm(ACT ~ gender, sat.act)                  # a
46 | lm(ACT ~ 0+gender, sat.act)                # b
47 | lm(ACT ~ gender+education, sat.act)        # c
48 | lm(ACT ~ 0+gender+education, sat.act)      # d
49 | lm(ACT ~ gender:education, sat.act)        # e
50 | lm(ACT ~ 0+gender:education, sat.act)      # f
51 | lm(ACT ~ gender*education, sat.act)        # g
52 | lm(ACT ~ 0+gender*education, sat.act)      # h
53 | lm(ACT ~ gender+gender:education, sat.act) # i
54 | ```
55 | 
56 | ```{r}
57 | levels(sat.act$gender) ## 2
58 | levels(sat.act$education) ## 6
59 | ```
60 | 


--------------------------------------------------------------------------------
/exercises/exercise_1-number_of_parameters.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/exercises/exercise_1-number_of_parameters.pdf


--------------------------------------------------------------------------------
/exercises/exercise_2-pdf.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Exercise 2: Identifying Random Effects-Structure"
 3 | author: "Henrik Singmann"
 4 | date: "25 July 2018"
 5 | output: pdf_document
 6 | ---
 7 | 
 8 | # Exercise 2: Identifying Random Effects-Structure
 9 | 
10 | Your task is to identify the *maximal random-effects structure justified by the design* (Barr, Levy, Scheepers, & Tily, 2013) for one data set and implement this structure in `lme4::lmer` syntax.
11 | 
12 | 
13 | # Freeman, Heathcote, Chalmers, and Hockley (2010)
14 | 
15 | Lexical decision and word naming latencies for 300 words and 300 nonwords presented in Freeman, Heathcote, Chalmers, and Hockley (2010). The study had one between-subjects factors, `task` with two levels (`"naming"` or `"lexdec"`), and four within-subjects factors: `stimulus` type with two levels (`"word"` or `"nonword"`), word `density` and word `frequency` each with two levels (`"low"` and `"high"`) and stimulus `length` with three levels (`4`, `5`, and `6`).
16 | 
17 | The data comes with `afex` as `fhch2010`:
18 | ```{r}
19 | data("fhch2010", package = "afex")
20 | str(fhch2010)
21 | ```
22 | 
23 | What is the maximal random-effects structure justified by the design for this data set for the dependent variable `log_rt`:
24 | 
25 | ```{r, eval=FALSE}
26 | mixed(log_rt ~ ...)
27 | 
28 | ```
29 | 
30 | 
31 | ## References 
32 | - Barr, D. J., Levy, R., Scheepers, C., & Tily, H. J. (2013). Random effects structure for confirmatory hypothesis testing: Keep it maximal. *Journal of Memory and Language*, 68(3), 255-278. https://doi.org/10.1016/j.jml.2012.11.001
33 | -Freeman, E., Heathcote, A., Chalmers, K., & Hockley, W. (2010). Item effects in recognition memory for words. *Journal of Memory and Language*, 62(1), 1-18. http://doi.org/10.1016/j.jml.2009.09.004
34 | 


--------------------------------------------------------------------------------
/exercises/exercise_2-pdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/exercises/exercise_2-pdf.pdf


--------------------------------------------------------------------------------
/exercises/exercise_2.R:
--------------------------------------------------------------------------------
1 | ## ------------------------------------------------------------------------
2 | data("fhch2010", package = "afex")
3 | str(fhch2010)
4 | 
5 | ## ---- eval=FALSE---------------------------------------------------------
6 | ## mixed(log_rt ~ ...)
7 | ## 
8 | 
9 | 


--------------------------------------------------------------------------------
/exercises/exercise_2.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Exercise 2: Identifying Random Effects-Structure"
 3 | output: html_notebook
 4 | ---
 5 | 
 6 | # Exercise 2: Identifying Random Effects-Structure
 7 | 
 8 | Your task is to identify the *maximal random-effects structure justified by the design* (Barr, Levy, Scheepers, & Tily, 2013) for one data set and implement this structure in `lme4::lmer` syntax.
 9 | 
10 | 
11 | # Freeman, Heathcote, Chalmers, and Hockley (2010)
12 | 
13 | Lexical decision and word naming latencies for 300 words and 300 nonwords presented in Freeman, Heathcote, Chalmers, and Hockley (2010). The study had one between-subjects factors, `task` with two levels (`"naming"` or `"lexdec"`), and four within-subjects factors: `stimulus` type with two levels (`"word"` or `"nonword"`), word `density` and word `frequency` each with two levels (`"low"` and `"high"`) and stimulus `length` with three levels (`4`, `5`, and `6`).
14 | 
15 | The data comes with `afex` as `fhch2010`:
16 | ```{r}
17 | data("fhch2010", package = "afex")
18 | str(fhch2010)
19 | ```
20 | 
21 | What is the maximal random-effects structure justified by the design for this data set for the dependent variable `log_rt`:
22 | 
23 | ```{r, eval=FALSE}
24 | mixed(log_rt ~ ...)
25 | 
26 | ```
27 | 
28 | 
29 | ## References 
30 | - Barr, D. J., Levy, R., Scheepers, C., & Tily, H. J. (2013). Random effects structure for confirmatory hypothesis testing: Keep it maximal. *Journal of Memory and Language*, 68(3), 255-278. https://doi.org/10.1016/j.jml.2012.11.001
31 | -Freeman, E., Heathcote, A., Chalmers, K., & Hockley, W. (2010). Item effects in recognition memory for words. *Journal of Memory and Language*, 62(1), 1-18. http://doi.org/10.1016/j.jml.2009.09.004
32 | 
33 | 
34 | 


--------------------------------------------------------------------------------
/exercises/exercise_2_SOLUTION.R:
--------------------------------------------------------------------------------
 1 | ## ------------------------------------------------------------------------
 2 | data("fhch2010", package = "afex")
 3 | str(fhch2010)
 4 | # 'data.frame':	13222 obs. of  10 variables:
 5 | # $ id       : Factor w/ 45 levels "N1","N12","N13",..: 1 1 1 1 1 1 1 1 1 1 ...
 6 | # $ task     : Factor w/ 2 levels "naming","lexdec": 1 1 1 1 1 1 1 1 1 1 ...
 7 | # $ stimulus : Factor w/ 2 levels "word","nonword": 1 1 1 2 2 1 2 2 1 2 ...
 8 | # $ density  : Factor w/ 2 levels "low","high": 2 1 1 2 1 2 1 1 1 1 ...
 9 | # $ frequency: Factor w/ 2 levels "low","high": 1 2 2 2 2 2 1 2 1 2 ...
10 | # $ length   : Factor w/ 3 levels "4","5","6": 3 3 2 2 1 1 3 2 1 3 ...
11 | # $ item     : Factor w/ 600 levels "abide","acts",..: 363 121 202 525 580 135 42 368 227 141 ...
12 | # $ rt       : num  1.091 0.876 0.71 1.21 0.843 ...
13 | # $ log_rt   : num  0.0871 -0.1324 -0.3425 0.1906 -0.1708 ...
14 | # $ correct  : logi  TRUE TRUE TRUE TRUE TRUE TRUE ...
15 | 
16 | ## ---- eval=FALSE---------------------------------------------------------
17 | ## mixed(log_rt ~ ...)
18 | ## 
19 | 
20 | m_fhch <- mixed(log_rt ~ task*stimulus*density*frequency*length + 
21 |                   (stimulus*density*frequency*length||id) +
22 |                   (task||item), fhch2010, 
23 |                 method = "S", expand_re = TRUE)
24 | 


--------------------------------------------------------------------------------
/exercises/prepare_data.R:
--------------------------------------------------------------------------------
 1 | # This file prepares the data as described in https://osf.io/j4swp/
 2 | # With a few additional additions
 3 | 
 4 | # you might need to set the correct working directory via the menu: 
 5 | # Session - Set Working Directory - To Source File Location
 6 | 
 7 | load("ssk16_dat_online.rda") # data comes in 4 data frames per
 8 | # dw_1$group <-"P(if,then)"
 9 | # dw_2$group <-"Acc(if,then)"
10 | # dw_3$group <-"P(Even)"
11 | # dw_4$group <-"Acc(Even)"
12 | 
13 | dw_1$dv_question <- "probability"
14 | dw_2$dv_question <- "acceptability"
15 | dw_3$dv_question <- "probability"
16 | dw_4$dv_question <- "acceptability"
17 | 
18 | dw_1$conditional <- "indicative"
19 | dw_2$conditional <- "indicative"
20 | dw_3$conditional <- "concessive"
21 | dw_4$conditional <- "concessive"
22 | 
23 | dw_1$lfdn <- factor(paste(as.character(dw_1$lfdn), "P(if,then)", sep ="_"))
24 | dw_2$lfdn <- factor(paste(as.character(dw_2$lfdn), "Acc(if,then)", sep ="_"))
25 | dw_3$lfdn <- factor(paste(as.character(dw_3$lfdn), "P(Even)", sep ="_"))
26 | dw_4$lfdn <- factor(paste(as.character(dw_4$lfdn), "Acc(Even)", sep ="_"))
27 | 
28 | names(dw_1)[names(dw_1) == 'P'] <- 'DV'
29 | names(dw_2)[names(dw_2) == 'ACC'] <- 'DV'
30 | names(dw_3)[names(dw_3) == 'PEven'] <- 'DV'
31 | names(dw_4)[names(dw_4) == 'ACCEven'] <- 'DV'
32 | 
33 | dw <- rbind(dw_1, dw_2, dw_3, dw_4) 
34 | 
35 | # center IVs and DV at midpoint of scale
36 | dat <- within(dw, {
37 |   c_given_a <- (CgivenA-50)/100
38 |   dv <- (DV-50)/100
39 |   #group <- factor(group, levels = c("P(if,then)", "Acc(if,then)", "P(Even)", "Acc(Even)"))
40 |   dv_question <- factor(dv_question, levels = c("probability", "acceptability"))
41 |   conditional <- factor(conditional, levels = c("indicative", "concessive"))
42 | })
43 | 
44 | dat$AC <- NULL
45 | dat$conclusion <- NULL
46 | 
47 | dat <- droplevels(dat[ dat$conditional == "indicative", ])
48 | dat$conditional <- NULL
49 | dat$type <- NULL
50 | 
51 | dat <- dplyr::rename(dat, p_id = lfdn, i_id = le_nr)
52 | length(levels(dat$p_id))
53 | 
54 | save(dat, file="ssk16_dat_preapred.rda")
55 | 
56 | dat <- droplevels(dat[ dat$dv_question == "probability", ])
57 | dat$dv_question <- NULL
58 | 
59 | save(dat, file="ssk16_dat_preapred_ex1.rda")
60 | 
61 | ### latest preparation (July 2018)
62 | 
63 | library("tidyverse")
64 | 
65 | dat <- dat %>% 
66 |   rename(B_given_A = CgivenA,
67 |          if_A_then_B = DV,
68 |          B_given_A_c = c_given_a,
69 |          if_A_then_B_c = dv) %>% 
70 |   select(p_id, i_id, B_given_A, B_given_A_c, if_A_then_B, if_A_then_B_c, rel_cond) 
71 | 
72 | save(dat, file = "ssk16_dat_tutorial.rda")


--------------------------------------------------------------------------------
/handout/mixed_model_handout.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Mixed Models in R - A Practical Introduction"
  3 | author: "Henrik Singmann"
  4 | date: "November 2018"
  5 | output: pdf_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(echo = TRUE)
 10 | ```
 11 | 
 12 | ### Overview: Statistical Models in R
 13 | 
 14 | 1. Identify probability distribution of data (more correct: of conditional distribution of the response)
 15 | 2. Make sure variables are of correct type via `str()`
 16 | 3. Set appropriate contrasts (orthogonal contrasts if model includes interaction): `afex::set_sum_contrasts()`
 17 | 4. Describe statistical model using `formula`
 18 | 4. Fit model: pass `formula` and `data.frame` to corresponding modeling function (e.g., `lm()`, `glm()`)
 19 | 4. Check model fit (e.g., inspect residuals)
 20 | 5. Test terms (i.e., main effects and interactions): Pass fitted model to `car::Anova()`
 21 | 7. Follow-up tests: 
 22 |     - Estimated marginal means: Pass fitted model to `emmeans::emmeans()`
 23 |     - Specify specific contrasts on estimated marginal means (e.g., `contrast()`, `pairs()`)
 24 |    
 25 | - `afex` combines fitting (5.) and testing (7.):
 26 |     - ANOVAs: `afex::aov_car()`, `afex::aov_ez()`, or `afex::aov_4()`
 27 |     - (Generalized) linear mixed-effects models: `afex::mixed()`
 28 | 
 29 | 
 30 | ### `R` Formula Interface for Statistical Models: `~`
 31 | 
 32 | - `R` `formula` interface allows symbolic specification of statistical models, e.g. linear models:  
 33 |   `lm(y ~ x, data)`
 34 | - Dependent variable(s) left of `~` (can be multivariate or missing), independent variables right of `~`:
 35 | 
 36 | | Formula | &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; | Interpretation  |
 37 | | ------------------------|---|----------------------------------|
 38 | | `~ x` or `~1+x`         || Intercept and main effect of `x` | 
 39 | | ` ~ x-1` or `~0 + x`    || Only main effect of `x` and no intercept (questionable) |
 40 | | `~ x+y`                 || Main effects of `x` and `y`|
 41 | | `~ x:y`                 || Interaction between `x` and `y` (and no main effect) |
 42 | | `~ x*y` or `~ x+y+x:y`  || Main effects and interaction between `x` and `y` |
 43 | 
 44 | 
 45 | - **Formulas behave differently for coninuous and categorical covariates!!**
 46 |     + Always use `str(data)` before fitting: `int` & `num` is continuous, `Factor` or `character` is categorical.
 47 |     + Categorical/nominal variables have to be `factor`s. Create via `factor()`.
 48 |   
 49 | - Categorical variables are transformed into numerical variables using contrast functions (via `model.matrix()`; see Cohen et al., 2002)
 50 |     + **If models include interactions, orthogonal contrasts (e.g., `contr.sum`) in which the intercept corresponds to the (unweighted) grand mean should be used**: `afex::set_sum_contrasts()`
 51 |     + Dummy/treatment contrasts (`R` default) lead to simple effects for lower order effects.
 52 |     + For linear models: Coding only affects interpretation of parameters/tests not overall model fit.
 53 | 
 54 | - For models with only numerical covariates, suppressing intercept works as expected.
 55 | - For models with categorical covariates, suppressing intercept or other lower-order effects often leads to very surprising results (and should generally be avoided).
 56 | 
 57 | ### Tests of Model Terms/Effects with `car::Anova()`
 58 | - `car::Anova(model, type = 3)` general solution for testing effects.
 59 | - Type II and III tests equivalent for balanced designs (i.e., equal group sizes) and highest-order effect.
 60 | - Type III tests require orthogonal contrasts (e.g.,`contr.sum`); recommended:
 61 |     + For experimental designs in which imbalance is completely random and not structural,
 62 |     + Complete cross-over interactions (i.e., main effects in presence of interaction) possible.
 63 | - Type II are more appropriate if imbalance is structural (i.e., observational data).
 64 | 
 65 | ### Follow-Up Tests
 66 | - Choice of follow-up test after significant interactions based on research questions
 67 |     - Simple effects (e.g., main effect of one factor conditional on other factor[s])
 68 |     - Comparison of specific cell means
 69 | - Two approaches for follow-up tests:
 70 |     - Model based using `emmeans` (assumes assumptions hold and uses shared error term)
 71 |     - Splitting data and running separate models for each split (assumes assumptions do not hold, use separate error terms)
 72 | - When splitting data or using `emmeans::test()`, adjustment for multiple testing needs to be done by hand; e.g., pass $p$-values to `p.adjust()`
 73 | 
 74 | 
 75 | 
 76 | ### Follow-up Tests with `emmeans` (Formerly `lsmeans`)
 77 | - `emmeans(model, c("factor"))` (or `emmeans(model, ~factor)`)  produces estimates marginal means (or least-square means for linear regression) for model terms (e.g., `emmeans(m6, c("education", "gender"))`).
 78 | - Additional functions allow specifying contrasts/follow-up tests on the means, e.g.:
 79 |     + `pairs()` tests all pairwise comparisons among means.
 80 |     + `contrast()` allows to define arbitrary contrasts on marginal means.
 81 |     + `test(..., joint = TRUE)` for joint tests (e.g., simple effects if using `by`).
 82 |     + For more examples see vignettes: https://cran.r-project.org/package=emmeans 
 83 | 
 84 | ### ANOVAs with afex
 85 | 
 86 | - `afex` ANOVA functions require column with participant ID:
 87 |     + `afex::aov_car()` allows specification of ANOVA using `aov`-like formula. Specification of participant id in `Error()` term. For example:  
 88 |     `aov_car(dv ~ between_factor + Error(id/within_factor), data)`
 89 |     + `afex::aov_4()` allows specification of ANOVA using `lme4`-like formula. Specification of participant id in random term. For example:  
 90 |     `aov_4(dv ~ between_factor + (within_factor|id), data)`
 91 |     + `afex::aov_ez()` allows specification of ANOVA using characters. For example:  
 92 |     `aov_ez("id", "dv", data, between = "between_factor", within = "within_factor")`
 93 | - All `afex` ANOVA functions return same results (only differ in how to specify)
 94 | 
 95 | ### Repeated-Measures, IID Assumption, & Pooling
 96 | 
 97 | - Ordinary linear regression, between-subjects ANOVA, and basically all standard statistical models share one assumption: Data points are *independent and identically distributed* (*iid*).
 98 |     + Independence assumption refers to residuals: After taking structure of model (i.e., parameters) into account, probability of a data point having a specific value is independent of all other data points.
 99 |     + Identical distribution: All observations sampled from same distribution.
100 | - For repeated-measures independence assumption often violated, which can have dramatic consequences on significance tests from model (e.g., increased or decreased Type I errors).
101 | - Three ways to deal with repeated-measures:
102 |     1. *Complete pooling*: Ignore dependency in data (often not appropriate, results likely biased)
103 |     2. *No pooling*: Two step procedure. 1. Separate data based on factor producing dependency and calculate separate statistical model for each subset. 2. Analysis of distribution of estimates from step 1. (Prone to overfitting which decreases precision of parameter estimates, estimation error accumulates in step 2, combination and analysis of individual estimates can be non-trivial if interest is in more than 1 parameter)
104 |     3. *Partial pooling*: Analyse data jointly while taking dependency into account (gold standard, e.g., mixed models)
105 |     
106 | 
107 |     
108 | ### Mixed Models
109 | 
110 | - Mixed models extend regular regression models via *random-effects parameters* that account for dependencies among related data points.
111 | - __Fixed Effects__
112 |     - Overall  or *population-level average* effect of specific model term (i.e., main effect, interaction, parameter) on dependent variable
113 |     - Independent of stochastic variability controlled for by random effects
114 |     - Hypothesis tests on fixed effect interpreted as hypothesis tests for terms in standard ANOVA or regression model
115 |     - Possible to test specific hypotheses among factor levels (e.g., planned contrasts)
116 |     - *Fixed-effects parameters*: Overall effect of specific model term on dependent variable
117 | - __Random Effects__
118 |     - *Random-effects grouping factors*: Categorical variables that capture random or stochastic variability (e.g., participants, items, groups, or other hierarchical-structures).
119 |     - In experimental settings, random-effects grouping factors often part of design one wants to generalize over.
120 |     - Random-effects factor out idiosyncrasies of sample, thereby providing a more general estimate of the fixed effects of interest.
121 |     - *Random-effects parameters*: 
122 |         + Provide each level of random-effects grouping factor with idiosyncratic parameter set.
123 |         + zero-centered offsets/displacements for each level of random-effects grouping factor
124 |         + added to specific fixed-effects parameter
125 |         + assumed to follow normal distribution which provides _hierarchical shrinkage_, thereby avoids over-fitting
126 |         + should be added to each parameter that varies within the levels of a random-effects grouping factor (i.e., factor is *crossed* with random-effects grouping factor)
127 |         + Note: random-effects parameters (i.e., random-slopes) can only be added to a parameter if there exist multiple data points (i.e., replications) for each level of random-effects grouping factor and the parameter (e.g., each cell of corresponding factor or design-cell) 
128 | 
129 | 
130 | ### Random-Effects Parameters in `lme4`/`afex`
131 | 
132 | | Formula | Interpretation  |
133 | | ------------------------|----------------------------------|
134 | | `(1|s)`         | random intercepts for `s` (i.e., by-`s` random intercepts) | 
135 | | `(1|s) + (1|i)`   |  by-`s` and by-`i` (i.e., crossed) random intercepts |
136 | | `(a|s)` or `(1+a|s)`               | by-`s` random intercepts and by-`s` random slopes for `a` plus their correlation|
137 | | `(a*b|s)`                 | by-`s` random intercepts and by-`s` random slopes for `a`, `b`, and the `a:b` interaction plus correlations among the by-`s` random effects parameters |
138 | | `(0+a|s)`  | by-`s` random slopes for `a` and no random intercept |
139 | | `(a||s)` | by-`s` random intercepts and by-`s` random slopes for `a`, but no correlation (expands to: `(0+a|s) + (1|s)`) |
140 | \emph{Note.} Suppressing the correlation parameters via \texttt{||} works only for numerical covariates in \texttt{lmer} and not for factors. \texttt{afex} provides the functionality to suppress the correlation also among factors if argument \texttt{expand\_re = TRUE} in the call to \texttt{mixed()} (see also function \texttt{lmer\_alt()}).
141 | 
142 | Examples:  
143 | `mixed(dv ~ within_s_factor * within_i_factor + (within_s_factor|s) + (within_i_factor|i), data, method = "S")`  
144 | `mixed(dv ~ within_s_factor + (within_s_factor||s), data, method = "S", expand_re = TRUE)`
145 | 
146 | ### Crossed Versus Nested Factors
147 | 
148 | - Factor `A` is **crossed** with factor `B` if multiple levels of `A` appear within multiple levels of `B`. Note that this definition allows for missing values (i.e., it does not need to hold that all levels of `A` appear in all levels of `B`). For example:
149 |     - Levels `a1`, `a2`, ... of `A`  appear in `b1` of `B` and in `b2` of `B`, etc. 
150 |     - A within-subject factor (e.g., `congruency`) is crossed with the `participant` factor. 
151 |     - If each participant responds to a random subset of items and each item is responded to by several participants, `participant` and `item` are crossed.
152 | 
153 | 
154 | - Factor `A` is **nested** within factor `B` if some levels of `A` appear only within specific levels of factor `B`. E.g.,:
155 |     - Levels `a1`, `a2`, and `a3` of `A`  appear only in `b1` of `B` and `a4`, `a5`, and `a6` of `A`  appear only in `b2` of `B`
156 |     - Participants are nested in a between-subjects factor (e.g., `group`), because each level of `participant` only provides data for one level of the factor.
157 |     - If student can be member of one class only and several classes were observed, factor `student` is nested within factor `class`.
158 | 
159 | 
160 | - Both dependency structures dealt with in same conceptual manner, via independent random effects-parameters. Specifically, both need independent random effects terms in model formula. For example:
161 |     - For `students` nested within `class`, where each student has unique label (i.e., `student` id 1 is assigned to exactly one student and not to different students in different classes), at least:  
162 |     `... + (1|student) + (1|class)`
163 |     - If additional factor `A` is crossed with `class`, but not with `student` (e.g., some students in each class receive treatment `a1`,  some others `a2`), by-class random slopes need to be added:  
164 |     `... + (1|student) + (A|class)`
165 | 
166 | 
167 | ### Hypothesis-Tests for Mixed Models
168 | 
169 | - `lme4::lmer` does not include *p*-values.
170 | - `afex::mixed` provides four different methods:
171 |     1. Kenward-Roger (`method="KR"`, default): Provides best-protection against anti-conservative results, requires a lot of RAM for complicated random-effects structures.
172 |     2. Satterthwaite (`method="S"`): Similar to KR, but requires less RAM.
173 |     3. Parametric-bootstrap (`method="PB"`): Simulation-based, can take a lot of time (can be speed-up using parallel computation).
174 |     4. Likelihood-ratio tests (`method="LRT"`): Provides worst control for anti-conservative results. Can be used if all else fails or if all random-effects grouping factors have many levels (e.g., over 50).
175 | - `afex::mixed` uses orthogonal contrasts per default. Necessary for categorical variables in interactions.
176 | 
177 | ### Random-Effects Structure
178 | 
179 | - Omitting random-effects parameters for model terms which vary within the levels of a random-effects grouping factor and for which random variability exists leads to non-iid residuals (i.e., $\epsilon$) and anti-conservative results (e.g., Barr, Levy, Scheepers, & Tily, 2013).
180 | - Safeguard is *maximal model justified by the design*.
181 | - If maximal model is overparameterized, contains degenerate estimates, and/or singular fits, power of maximal model may be reduced and a reduced model may be considered (Bates et al., 2015; Matuschek et al., 2017); however, reducing model introduces unknown risk of anti-conservativity, and should be done with caution.
182 | - Steps for running a mixed model analysis:
183 |     1. Identify desired fixed-effects structure
184 |     2. Identify random-effects grouping factors
185 |     3. Identify *maximal model justified by the design*:
186 |         - Which factors/terms vary within levels of (i.e. are crossed with) each random-effects grouping factor? 
187 |         - Are there replicates within factor levels (or parameters/coefficients) for levels of random-effects grouping factor?
188 |     4. Choose method for calculating *p*-values and fit maximal model
189 |     5. Iteratively reduce random-effects structure until all degenerate/zero-variance random-effects parameters are removed.
190 | - If the maximal model shows critical convergence warnings, reducing random-effects structure probably indicated, even though this introduces unknown risk of anti-conservativity:
191 |     - Start by removing the correlation among random-effects parameters
192 |     - Remove random-effects parameters for highest-order effects with lowest variance
193 |     - It can sometimes help to try different optimizers
194 |     - Compare *p*-values/fixed-effects estimates across models (*p*-values from degenerate/minimal models are not reliable)
195 | 
196 | ### GLMMs: Mixed-models with Alternative Distributional Assumptions
197 | 
198 | - Not all data can be reasonable described by a normal distribution.
199 | - Generalized-linear mixed models (GLMMs; e.g., Jaeger, 2008) allow for other distributions. For example:
200 |     - Binomial distribution: Repeated-measures logistic regression
201 |     - Poisson distribution for count data
202 |     - Gamma distribution for non-negative data (e.g., RTs)
203 | - GLMMs require specification of the conditional distribution of the response (`family`) and link function.
204 | - Link function determines how values on untransformed scale are mapped onto response scale.
205 | - Specification of random-effects structure conceptually identical as for LMMs.
206 | - GLMMs only allow two methods for hypothesis testing: `"LRT"` or `"PB"`.
207 | - Inspection of residuals/model fit more important for GLMMs than for LMMs: R package [`DHARMa`](https://cran.r-project.org/package=DHARMa)
208 | - Fit with `lme4::glmer` or `afex::mixed`, both require `family` argument (e.g., `family = binomial`):  
209 |   `mixed(prop ~ a * b + (a|s) + (b|i), data, weights = data$n, family = binomial, method = "LRT")` (Note: `data$n * data$prop` must produce integers; number of successes.)
210 | 


--------------------------------------------------------------------------------
/handout/mixed_model_handout.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/handout/mixed_model_handout.pdf


--------------------------------------------------------------------------------
/part0-introduction/.Rhistory:
--------------------------------------------------------------------------------
 1 | options(htmltools.dir.version = FALSE)
 2 | # see: https://github.com/yihui/xaringan
 3 | # install.packages("xaringan")
 4 | # see:
 5 | # https://github.com/yihui/xaringan/wiki
 6 | # https://github.com/gnab/remark/wiki/Markdown
 7 | options(width=110)
 8 | options(digits = 4)
 9 | require(psych)
10 | data(sat.act)
11 | sat.act$gender <- factor(sat.act$gender, 1:2, labels = c("male", "female"))
12 | sat.act$education <- factor(sat.act$education)
13 | summary(sat.act) # alternatively: psych::describe(sat.act)
14 | sat.act <- na.omit(sat.act)
15 | par(mfrow=c(1,2))
16 | plot(sat.act$SATV, sat.act$ACT)
17 | plot(sat.act$SATQ, sat.act$ACT)
18 | m1 <- lm(ACT ~ SATQ, sat.act)
19 | summary(m1)
20 | coef(m1)
21 | plot(sat.act$SATV, sat.act$ACT)
22 | abline(m1)
23 | sat.act$SATQ_c <- sat.act$SATQ - mean(sat.act$SATQ, na.rm = TRUE)
24 | sat.act$SATV_c <- sat.act$SATV - mean(sat.act$SATV)
25 | m2 <- lm(ACT ~ SATQ_c, sat.act)
26 | summary(m2)
27 | coef(m2)
28 | plot(sat.act$SATV_c, sat.act$ACT)
29 | abline(m2)
30 | plot(ACT ~ SATV, sat.act)
31 | plot(ACT ~ SATV_c, sat.act)
32 | ?formula
33 | cbind(rnorm(10), rnorm(10))
34 | cbind(rnorm(10), rnorm(10), rnorm(10))
35 | cbind(rnorm(10), rnorm(10), rnorm(9))
36 | 


--------------------------------------------------------------------------------
/part0-introduction/figures/RMarkdown-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part0-introduction/figures/RMarkdown-example.png


--------------------------------------------------------------------------------
/part0-introduction/figures/ch-02-markdown-margin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part0-introduction/figures/ch-02-markdown-margin.png


--------------------------------------------------------------------------------
/part0-introduction/figures/data-science.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part0-introduction/figures/data-science.png


--------------------------------------------------------------------------------
/part0-introduction/figures/github-workshop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part0-introduction/figures/github-workshop.png


--------------------------------------------------------------------------------
/part0-introduction/figures/magrittr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part0-introduction/figures/magrittr.png


--------------------------------------------------------------------------------
/part0-introduction/figures/markdownChunk2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part0-introduction/figures/markdownChunk2.png


--------------------------------------------------------------------------------
/part0-introduction/figures/tidy-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part0-introduction/figures/tidy-1.png


--------------------------------------------------------------------------------
/part0-introduction/introduction.R:
--------------------------------------------------------------------------------
 1 | ## ----setup, include=FALSE------------------------------------------------
 2 | options(htmltools.dir.version = FALSE)
 3 | # see: https://github.com/yihui/xaringan
 4 | # install.packages("xaringan")
 5 | # see: 
 6 | # https://github.com/yihui/xaringan/wiki
 7 | # https://github.com/gnab/remark/wiki/Markdown
 8 | options(width=110)
 9 | options(digits = 4)
10 | 
11 | ## ---- eval = FALSE-------------------------------------------------------
12 | ## ---
13 | ## title: "My Title"
14 | ## author: "Henrik Singmann"
15 | ## date: "`r format(Sys.time(), '%d %B, %Y')`"
16 | ## output:
17 | ##   html_document:
18 | ##     toc: TRUE
19 | ##     toc_float: true
20 | ##     theme: paper
21 | ##     highlight: espresso
22 | ## ---
23 | 
24 | ## ---- echo=FALSE---------------------------------------------------------
25 | 1 + 1
26 | 
27 | ## ---- eval=FALSE---------------------------------------------------------
28 | ## iris
29 | 
30 | ## ---- eval=TRUE, echo=FALSE----------------------------------------------
31 | options(width = 50)
32 | iris[1:5, 1:3] # [...]
33 | 
34 | ## ---- eval=TRUE----------------------------------------------------------
35 | iris$Spec
36 | 
37 | ## ---- eval=TRUE----------------------------------------------------------
38 | library("tibble") 
39 | iris2 <- as_tibble(iris)
40 | iris2
41 | iris2$Spec
42 | 
43 | ## ---- eval=FALSE---------------------------------------------------------
44 | ## x %>% f
45 | ## x %>% f(y)
46 | ## x %>% f %>% g %>% h
47 | ## 
48 | ## x %>% f(y, .)
49 | ## x %>% f(y, z = .)
50 | 
51 | ## ---- eval=FALSE---------------------------------------------------------
52 | ## f(x)
53 | ## f(x, y)
54 | ## h(g(f(x)))
55 | ## 
56 | ## f(y, x)
57 | ## f(y, z = x)
58 | 
59 | ## ---- eval=FALSE---------------------------------------------------------
60 | ## library(magrittr)
61 | ## iris2$Sepal.Length %>%
62 | ##   mean
63 | 
64 | ## ---- message=FALSE------------------------------------------------------
65 | library("dplyr")
66 | iris2 %>% 
67 |   filter(Species == "setosa") %>% 
68 |   summarise(mean(Sepal.Length))
69 | 
70 | ## ------------------------------------------------------------------------
71 | iris2 %>%
72 |   group_by(Species) %>% 
73 |   summarise(mean_l = mean(Sepal.Length),
74 |             max_l = max(Sepal.Length),
75 |             min_l = min(Sepal.Length),
76 |             sd_l = sd(Sepal.Length))
77 | 
78 | ## ---- eval=FALSE---------------------------------------------------------
79 | ## library("ggplot2")
80 | ## ggplot(iris2, aes(x = Petal.Width, y = Petal.Length)) +
81 | ##   geom_point()
82 | 
83 | ## ---- eval=FALSE---------------------------------------------------------
84 | ## ggplot(iris2, aes(x = Petal.Width, y = Petal.Length, color = Species)) +
85 | ##   geom_point()
86 | 
87 | ## ---- eval=FALSE---------------------------------------------------------
88 | ## ggplot(iris2, aes(x = Species, y = Petal.Length)) +
89 | ##   geom_jitter(width = 0.2) +
90 | ##   geom_boxplot(fill = "transparent") +
91 | ##   theme_bw()
92 | 
93 | 


--------------------------------------------------------------------------------
/part0-introduction/introduction.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Mixed Models in R"
  3 | subtitle: "An Applied Introduction"
  4 | author: "Henrik Singmann (University of Zurich)<br/>Twitter: <a href='https://twitter.com/HenrikSingmann'>@HenrikSingmann</a>"
  5 | date: "July 2018"
  6 | output:
  7 |   xaringan::moon_reader:
  8 |     css: ["default", "default-fonts", "my-theme.css"]
  9 |     lib_dir: libs
 10 |     nature:
 11 |       highlightStyle: github
 12 |       highlightLines: true
 13 |       countIncrementalSlides: false
 14 |       ratio: '16:9'
 15 | ---
 16 | 
 17 | 
 18 | 
 19 | ```{r setup, include=FALSE}
 20 | options(htmltools.dir.version = FALSE)
 21 | # see: https://github.com/yihui/xaringan
 22 | # install.packages("xaringan")
 23 | # see: 
 24 | # https://github.com/yihui/xaringan/wiki
 25 | # https://github.com/gnab/remark/wiki/Markdown
 26 | options(width=110)
 27 | options(digits = 4)
 28 | ```
 29 | 
 30 | 
 31 | class: inline-grey
 32 | # Outline
 33 | 
 34 | 1. Introduction: Modern `R`
 35 | 2. Statistical Modeling in `R`
 36 | 3. Dealing with repeated-measures (pooling)
 37 | 4. Mixed models
 38 | 
 39 | ---
 40 | class: small
 41 | 
 42 | ### Research and Statistics
 43 | 
 44 | - *Substantive research questions*
 45 |     1. Negative cognitive distortions sustain depressive symptoms.
 46 |     2. Interference and not decay is the main source of forgetting in memory.
 47 |     3. Inhibition is a specific and general mental ability, like IQ. 
 48 | 
 49 | --
 50 | 
 51 | - *Operationalization and measurement*
 52 |     1. Educating patients how to escape their negative thoughts should reduce depressive symptoms.
 53 |     2. Control independently time of delay and amount of new information.
 54 |     3. Ability to suppress distracting information should be related across tasks. For example, Stroop performance and flanker performance.
 55 |     
 56 | --
 57 | 
 58 | 
 59 | - Substantive questions cannot be directly adressed via empirical means (e.g., [Duhem-Quine thesis](https://en.wikipedia.org/wiki/Duhem%E2%80%93Quine_thesis)).
 60 | - Researchers use empirical observations (data) for making arguments about research questions.
 61 | - Appropriate *research methods* (e.g., experimental design, reliability, validity, reproducibility) help in making better (i.e., more convincing) arguments. 
 62 | - *Data visualization* and *statistics* are important tools for making good arguments about data:
 63 |     - A statistic cannot prove nor disprove a substantive research question or empirical hypothesis: *statistical arguments need context (e.g., data visualization).* [this is why AIC/BIC/WAIC/... often sucks]
 64 |     - Some statistical arguments are better, some are worse, and some have essentially no evidential value.
 65 |     - *Statistics is not a ritual* (e.g., [Gigerenzer, 2018](https://doi.org/10.1177/2515245918771329)). Instead, statistics is a toolkit, researchers have to select the right tool for each job.
 66 | --
 67 | - "There are no routine statistical questions, only questionable statistical routines." (David Cox)
 68 | - "The combination of some data and an aching desire for an answer does not ensure that a reasonable answer can be extracted from a given body of data." (John Tukey)
 69 |     
 70 | 
 71 | ---
 72 | 
 73 | ### Process and Tools: `tidyverse` and `RMarkdown`
 74 | 
 75 | ![](figures/data-science.png) 
 76 | 
 77 | Conceptual model of data analysis (source: [Wickham & Grolemund (2017): R for Data Science](http://r4ds.had.co.nz))
 78 | 
 79 | 
 80 | --
 81 | 
 82 | - `tidyverse`: Selection of packages curated/developed by `RStudio`:
 83 |     - [`readr`](https://readr.tidyverse.org/): Reading data in, the `RStudio` way.
 84 |     - Data wrangling with [`tibble`](http://tibble.tidyverse.org/), [`magrittr`](http://magrittr.tidyverse.org/), [`tidyr`](http://tidyr.tidyverse.org/), and [`dplyr`](http://dplyr.tidyverse.org/): Coherent set of functions for tidying, transforming, and working with rectangular data. Supersedes many base `R` functions and makes common problems easy. 
 85 |     - [`ggplot2`](http://ggplot2.tidyverse.org/): System for data visualization.
 86 |     - [`purrr`](http://purrr.tidyverse.org/) and [`broom`](https://broom.tidyverse.org/): Advanced modeling with the `tidyverse`.
 87 | 
 88 | --
 89 | 
 90 | - `RMarkdown` "authoring framework for data science".
 91 | 
 92 | ---
 93 | 
 94 | # `RMarkdown`
 95 | 
 96 | 
 97 | - Context requires combination of a narrative/prose with data visualization and statistical results.
 98 | - `RMarkdown` "authoring framework for data science".
 99 | - Single document, `.Rmd` file, combines text, pictures, and `R` code.
100 | - Render document: Runs code and combines text, pictures, code, and output (i.e., text output and plots) into nicely formatted result:
101 |     - `html` file
102 |     - `pdf` or `Word` file
103 |     - presentation (like this one)
104 |     - blog or other website (`blogdown`), books (`bookdown`), interactive tutorials (`learnr`), [...](https://www.rstudio.com/resources/videos/r-markdown-eight-ways/)
105 | 
106 | --
107 | 
108 | - `RMarkdown` is efficient, easy to use, ensures reproducibility, and
109 |   - is ideal for communicating results with collaborators or PIs,
110 |   - can be used for for writing preregistrations with [`prereg`](https://cran.r-project.org/package=prereg), 
111 |   - and even for writing papers (i.e., [`papaja`](https://github.com/crsh/papaja)).
112 | 
113 | --
114 | 
115 | 
116 | - *Warning:* If you send an `RMarkdown` `html` report, it needs to be downloaded before figures are visible (e.g., opening it directly from `gmail` does not show plots)!
117 | 
118 | ---
119 | class:inline-grey, small
120 | 
121 | ### `RMarkdown` - First Steps
122 | 
123 | - Create new `RMarkdown` document: `File` -> `New File` -> `R Markdown...`
124 | - Enter title and your name -> Keep `html` selected -> `Ok`
125 | - `Save` file somewhere (e.g., `test.Rmd` in `Downloads`) -> `Knit` creates and opens `html` document
126 | 
127 | 
128 | ---
129 | 
130 | ### `RMarkdown` Document Example ([source](http://rstudio-pubs-static.s3.amazonaws.com/202429_acbbe794b27f4dffaac6047d1b6d5aa0.html))
131 | 
132 | ![](figures/RMarkdown-example.png)
133 | 
134 | ---
135 | class:inline-grey, small
136 | 
137 | ### `RMarkdown` - YAML Header
138 | 
139 | 
140 | ```{r, eval = FALSE}
141 | ---
142 | title: "My Title"
143 | author: "Henrik Singmann"
144 | date: "`r format(Sys.time(), '%d %B, %Y')`"
145 | output: 
146 |   html_document:
147 |     toc: TRUE
148 |     toc_float: true
149 |     theme: paper
150 |     highlight: espresso
151 | ---
152 | ```
153 | 
154 | - `YAML` Stands for "YAML Ain't Markup Language"
155 | - This is where you set options for your overall document, for example: 
156 |       - [output format](https://rmarkdown.rstudio.com/formats.html) (`html_document`, `pdf_document`, `word_document`, `github_document`, ...)
157 |       - add and format table of content
158 |       - appearance (also add custom `css`)
159 |       - see [`RMarkdown` cheat sheet](https://github.com/rstudio/cheatsheets/raw/master/rmarkdown-2.0.pdf) or https://rmarkdown.rstudio.com/html_document_format.html
160 | 
161 | ---
162 | class: small
163 | 
164 | ### Text Formatting 
165 | 
166 | .pull-left[
167 | ![](figures/ch-02-markdown-margin.png)
168 | 
169 | `[link](www.rstudio.com)` -> [link](www.rstudio.com)
170 | 
171 | 
172 | (source: http://socviz.co/gettingstarted.html#work-in-plain-text-using-rmarkdown)
173 | ]
174 | 
175 | 
176 | 
177 | ---
178 | class: small
179 | 
180 | ![](figures/markdownChunk2.png)
181 | 
182 | ---
183 | 
184 | ### Code Chunks
185 | 
186 | ````
187 | ```{r chunk_name, echo=FALSE}`r ''`
188 | 1 + 1
189 | ```
190 | ````
191 | 
192 | ```{r, echo=FALSE}
193 | 1 + 1
194 | ```
195 | 
196 | - Run a code chunk with `Ctrl`/`Cmd` + `Shift` + `Enter`
197 | 
198 | Important chunk options:
199 | - `echo`: Display code in output document (default = `TRUE`)
200 | - `eval`: Run code in chunk (default = `TRUE`)
201 | - `include`: Include chunk and output in doc after running (default = `TRUE`)
202 | - `fig.height` and `fig.width`: Dimensions of plots in inches
203 | - `error`: Display error messages in doc (`TRUE`) or stop render when errors occur (`FALSE`) (default = `FALSE`)
204 | - `warning`: display code warnings in document (default = `TRUE`)
205 | - `results`: How to format results:
206 |   - default = `'markup'`
207 |   - `'asis'` - pass through results
208 |   - `'hide'` - do not display results
209 |   - `'hold'` - put all results below all code
210 | - `cache`: cache results for future knits (default = `FALSE`)
211 | 
212 | --
213 | 
214 | Try replacing `summary(cars)` with `str(cars)`
215 | 
216 | ---
217 | class: small, inline-grey
218 | 
219 | - visit: [`https://github.com/singmann/mixed_model_workshop/releases`](https://github.com/singmann/mixed_model_workshop/releases)
220 | - Download `Source code (zip)` (or `Source code (tar.gz) `)
221 | 
222 | <img src="figures/github-workshop.png" style="width: 80%" align="center" />
223 | 
224 | 
225 | 
226 | ---
227 | class: inline-grey
228 | 
229 | ## Workshop Materials
230 | 
231 | - `zip` Archive contains all materials (e.g., slides, code, exercises) of the current workshop
232 | - Extract `zip` archive if necessary
233 | - All slides are built using `RMarkdown` and `xaringan` package.
234 | - `part0-introduction` materials for introduction session (these slides).
235 | -  `part1-statistical-modeling-in-r` materials for statistical modeling session.
236 | - `part2-mixed-models-in-r` materials for mixed models session.
237 | - In each folder:
238 |   - `.Rmd` file is the `RMarkdown` containing text and code for the slides.
239 |   - `.R` file only contains the code for the slides and no text.
240 |   - You can follow the presentation by following either file. Don't forget:
241 |       - Run a code chunk (i.e., grey block) with `Ctrl`/`Cmd` + `Shift` + `Enter`
242 |       - Run a single line of code with `Ctrl`/`Cmd` + `Enter`
243 |   - `.html` is the full presentation you are seeing. After opening, press `h` for help.
244 | - `exercises` contains some exercises.
245 | - `handout` contains the handout (also includes the `RMarkdown` file)
246 | 
247 | ---
248 | class: center, middle, inverse
249 | 
250 | # `tidyverse`
251 | 
252 | ---
253 | class: small
254 | 
255 | .pull-left[
256 | ### `tibble`
257 | 
258 | - "**tibble** or `tbl_df` is a modern reimagining of the data.frame, keeping what time has proven to be effective, and throwing out what is not."
259 | - Dramtically enhanced `print` method.
260 | - Does not change `strings` to `factors`.
261 | - Complains when a variable is missing (i.e., no partial matching).
262 | - Allows list columns (with nice printing).
263 | 
264 | ```{r, eval=FALSE}
265 | iris
266 | ```
267 | 
268 | ```{r, eval=TRUE, echo=FALSE}
269 | options(width = 50)
270 | iris[1:5, 1:3] # [...]
271 | ```
272 | 
273 | [...]
274 | 
275 | ```{r, eval=TRUE}
276 | iris$Spec
277 | ```
278 | ]
279 | 
280 | --
281 | 
282 | .pull-right[
283 | ```{r, eval=TRUE}
284 | library("tibble") 
285 | iris2 <- as_tibble(iris)
286 | iris2
287 | iris2$Spec
288 | ```
289 | 
290 | ]
291 | 
292 | ---
293 | class:inline-grey
294 | 
295 | ## `magrittr`  <img src="figures/magrittr.png" style="width: 10%" align="right" />
296 | 
297 | - Pipe operator `%>%` makes code more readable:
298 |   - structuring sequences of data operations left-to-right (as opposed to from the inside and out)
299 |   - avoiding nested function calls,
300 |   - minimizing need for local variables and function definitions.
301 | -  Add pipe with `Ctrl`/`Cmd` +`Shift` + `m` 
302 | 
303 | .pull-left[
304 | ### Pipe
305 | 
306 | ```{r, eval=FALSE}
307 | x %>% f
308 | x %>% f(y)
309 | x %>% f %>% g %>% h
310 | 
311 | x %>% f(y, .)
312 | x %>% f(y, z = .)
313 | ```
314 | 
315 | ]
316 | 
317 | .pull-right[
318 | ### Base R
319 | ```{r, eval=FALSE}
320 | f(x)
321 | f(x, y)
322 | h(g(f(x)))
323 | 
324 | f(y, x)
325 | f(y, z = x)
326 | ```
327 | 
328 | ]
329 | 
330 | --
331 | 
332 | Try it out:
333 | ```{r, eval=FALSE}
334 | library(magrittr)
335 | iris2$Sepal.Length %>% 
336 |   mean
337 | ```
338 | 
339 | ---
340 | class: small
341 | 
342 | ### Tidy Data (`tidyr`)
343 | 
344 | *"Tidy datasets are all alike, but every messy dataset is messy in its own way." -- Hadley Wickham*
345 | 
346 | 1. Put each data set in a `tibble`.
347 | 2. Put each variable in a column.
348 |     1. Each variable must have its own column.
349 |     2. Each observation must have its own row.
350 |     3. Each value must have its own cell.
351 | ![](figures/tidy-1.png)
352 | --
353 | 
354 | - For psychologists: Transform wide into long data. See also: 
355 | - Wickham, H. (2014). Tidy data. *The Journal of Statistical Software*, 59(10). http://www.jstatsoft.org/v59/i10
356 | - Wickham, H., & Grolemund, G. (2017). R for Data Science (ch. 12). http://r4ds.had.co.nz/tidy-data.html
357 | 
358 | ---
359 | 
360 | ### `dplyr`
361 | 
362 | - grammar of data manipulation, providing a consistent set of verbs that help you solve the most common data manipulation challenges:
363 |   - `mutate()` adds new variables that are functions of existing variables
364 |   - `select()` picks variables based on their names.
365 |   - `filter()` picks cases based on their values.
366 |   - `summarise()` reduces multiple values down to a single summary.
367 |   - `arrange()` changes the ordering of the rows.
368 | - All combine naturally with `group_by()` which allows performing any operation "by group".
369 | 
370 | --
371 | 
372 | .pull-left[
373 | ```{r, message=FALSE}
374 | library("dplyr")
375 | iris2 %>% 
376 |   filter(Species == "setosa") %>% 
377 |   summarise(mean(Sepal.Length))
378 | ```
379 | 
380 | ]
381 | 
382 | --
383 | 
384 | .pull-right[
385 | 
386 | ```{r}
387 | iris2 %>%
388 |   group_by(Species) %>% 
389 |   summarise(mean_l = mean(Sepal.Length),
390 |             max_l = max(Sepal.Length),
391 |             min_l = min(Sepal.Length),
392 |             sd_l = sd(Sepal.Length))
393 | ```
394 | ]
395 | 
396 | 
397 | 
398 | ---
399 | 
400 | ### `ggplot2`
401 | 
402 | - System for declaratively creating graphics, based on "The Grammar of Graphics" by Leland Wilkinson
403 | - "You provide data, tell `ggplot2` how to map variables to aesthetics what graphical primitives to use, and it takes care of the details."
404 | - `ggplot()` is the basic function which takes the data.
405 | - `aes()` is used for mapping aesthetics.
406 | - `geom_...` tells which primitive to use. 
407 | 
408 | ```{r, eval=FALSE}
409 | library("ggplot2")
410 | ggplot(iris2, aes(x = Petal.Width, y = Petal.Length)) +
411 |   geom_point()
412 | ```
413 | --
414 | 
415 | ```{r, eval=FALSE}
416 | ggplot(iris2, aes(x = Petal.Width, y = Petal.Length, color = Species)) +
417 |   geom_point()
418 | ```
419 | --
420 | 
421 | 
422 | ```{r, eval=FALSE}
423 | ggplot(iris2, aes(x = Species, y = Petal.Length)) +
424 |   geom_jitter(width = 0.2) +
425 |   geom_boxplot(fill = "transparent") +
426 |   theme_bw()
427 | ```
428 | --
429 | 
430 | - Learning `ggplot2`:
431 |   - R for Data Science, http://r4ds.had.co.nz/, ch. 3 and ch. 28
432 |   - `ggplot2` cheat sheet: https://github.com/rstudio/cheatsheets/raw/master/data-visualization-2.1.pdf
433 | 
434 | ---
435 | 
436 | ## Summary
437 | 
438 | ![](figures/data-science.png) 
439 | 
440 | - Data analysis and statistics are iterative processes.
441 | - Goal of statistics is to support arguments connecting empirical data and substantive research questions.
442 | 
443 | ### `tidyverse`
444 | - Selection of packages providing a unified approach and syntax to common data analysis problems. 
445 | - To learn more about the `tidyverse` check out the free "R for Data Science" book by Wickham & Grolemund: http://r4ds.had.co.nz/
446 | 
447 | ### `RMarkdown`
448 | - Allows combining prose, code, and output into one nicely formatted document. 
449 | - Great for communicating results and ensuring reproducibility.
450 | 
451 | ---
452 | class: inline-grey
453 | 
454 | ### Exercise
455 | 
456 | - Open `exercises/exercise_0.Rmd` (or the exercise handout or  `exercises/exercise_0.nb.html` for a nicer format of the instruction).
457 | - Follow text and try to solve a few small tasks helping you to get comfortable with the `tidyverse` (without looking at the solution).
458 | - Main goal is for you getting comfortable with `dplyr` and `ggplot2` syntax.
459 | - Exercise uses response time data from Freeman, Heathcote, Chalmers, and Hockley (2010).
460 | - Participants did a lexical-decision task or a naming task.
461 | 
462 | The exercise uses yet another type of `RMarkdown` document, `html_notebook` instead of `html_document`:
463 | - `html_document`: "knitting" runs all code in a new `R` process from beginning to end (which ensures reproducibility).
464 | - In contrast, a `html_notebook`
465 |   - uses current `R` process (i.e., state of `Console`), similar to [`Jupyter`](http://jupyter.org/) (does *NOT* ensure reproducibility),
466 |   - allows to `Preview` the current state of the document as a `html` file,
467 |   - potentially better for initial analysis or situations involving expensive calculations,
468 |   - can be transformed into `html_document` by simply changing the YAML header.
469 | 
470 | Remember:
471 | - Run a code chunk (i.e., grey block) with `Ctrl`/`Cmd` + `Shift` + `Enter`
472 | - Run a single line of code with `Ctrl`/`Cmd` + `Enter`
473 | 
474 | ---
475 | 
476 | ###  Links
477 | - `RStudio` cheat sheets: https://www.rstudio.com/resources/cheatsheets/
478 |     - `RStudio`: https://github.com/rstudio/cheatsheets/raw/master/rstudio-ide.pdf
479 |     - `RMarkdown`: https://github.com/rstudio/cheatsheets/raw/master/rmarkdown-2.0.pdf
480 |     - `ggplot2`: https://github.com/rstudio/cheatsheets/raw/master/data-visualization-2.1.pdf
481 |     - `dplyr` & `tidyr`: https://github.com/rstudio/cheatsheets/raw/master/data-transformation.pdf
482 | 
483 | - Introduction to Open Data Science: http://ohi-science.org/data-science-training/
484 | - R for Data Science: http://r4ds.had.co.nz/
485 | - Data Visualization: A practical introduction: http://socviz.co/
486 | - Exploratory Data Analysis with R: https://bookdown.org/rdpeng/exdata/
487 | - The Art of Data Science: https://bookdown.org/rdpeng/artofdatascience/
488 | 
489 | 
490 | 


--------------------------------------------------------------------------------
/part0-introduction/introduction.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 |   <head>
  4 |     <title>Mixed Models in R</title>
  5 |     <meta charset="utf-8">
  6 |     <meta name="author" content="Henrik Singmann (University of Zurich) Twitter: @HenrikSingmann" />
  7 |     <link href="libs/remark-css/default.css" rel="stylesheet" />
  8 |     <link href="libs/remark-css/default-fonts.css" rel="stylesheet" />
  9 |     <link rel="stylesheet" href="my-theme.css" type="text/css" />
 10 |   </head>
 11 |   <body>
 12 |     <textarea id="source">
 13 | class: center, middle, inverse, title-slide
 14 | 
 15 | # Mixed Models in R
 16 | ## An Applied Introduction
 17 | ### Henrik Singmann (University of Zurich)<br/>Twitter: <a href='https://twitter.com/HenrikSingmann'><span class="citation">@HenrikSingmann</span></a>
 18 | ### July 2018
 19 | 
 20 | ---
 21 | 
 22 | 
 23 | 
 24 | 
 25 | 
 26 | 
 27 | 
 28 | class: inline-grey
 29 | # Outline
 30 | 
 31 | 1. Introduction: Modern `R`
 32 | 2. Statistical Modeling in `R`
 33 | 3. Dealing with repeated-measures (pooling)
 34 | 4. Mixed models
 35 | 
 36 | ---
 37 | class: small
 38 | 
 39 | ### Research and Statistics
 40 | 
 41 | - *Substantive research questions*
 42 |     1. Negative cognitive distortions sustain depressive symptoms.
 43 |     2. Interference and not decay is the main source of forgetting in memory.
 44 |     3. Inhibition is a specific and general mental ability, like IQ. 
 45 | 
 46 | --
 47 | 
 48 | - *Operationalization and measurement*
 49 |     1. Educating patients how to escape their negative thoughts should reduce depressive symptoms.
 50 |     2. Control independently time of delay and amount of new information.
 51 |     3. Ability to suppress distracting information should be related across tasks. For example, Stroop performance and flanker performance.
 52 |     
 53 | --
 54 | 
 55 | 
 56 | - Substantive questions cannot be directly adressed via empirical means (e.g., [Duhem-Quine thesis](https://en.wikipedia.org/wiki/Duhem%E2%80%93Quine_thesis)).
 57 | - Researchers use empirical observations (data) for making arguments about research questions.
 58 | - Appropriate *research methods* (e.g., experimental design, reliability, validity, reproducibility) help in making better (i.e., more convincing) arguments. 
 59 | - *Data visualization* and *statistics* are important tools for making good arguments about data:
 60 |     - A statistic cannot prove nor disprove a substantive research question or empirical hypothesis: *statistical arguments need context (e.g., data visualization).* [this is why AIC/BIC/WAIC/... often sucks]
 61 |     - Some statistical arguments are better, some are worse, and some have essentially no evidential value.
 62 |     - *Statistics is not a ritual* (e.g., [Gigerenzer, 2018](https://doi.org/10.1177/2515245918771329)). Instead, statistics is a toolkit, researchers have to select the right tool for each job.
 63 | --
 64 | - "There are no routine statistical questions, only questionable statistical routines." (David Cox)
 65 | - "The combination of some data and an aching desire for an answer does not ensure that a reasonable answer can be extracted from a given body of data." (John Tukey)
 66 |     
 67 | 
 68 | ---
 69 | 
 70 | ### Process and Tools: `tidyverse` and `RMarkdown`
 71 | 
 72 | ![](figures/data-science.png) 
 73 | 
 74 | Conceptual model of data analysis (source: [Wickham &amp; Grolemund (2017): R for Data Science](http://r4ds.had.co.nz))
 75 | 
 76 | 
 77 | --
 78 | 
 79 | - `tidyverse`: Selection of packages curated/developed by `RStudio`:
 80 |     - [`readr`](https://readr.tidyverse.org/): Reading data in, the `RStudio` way.
 81 |     - Data wrangling with [`tibble`](http://tibble.tidyverse.org/), [`magrittr`](http://magrittr.tidyverse.org/), [`tidyr`](http://tidyr.tidyverse.org/), and [`dplyr`](http://dplyr.tidyverse.org/): Coherent set of functions for tidying, transforming, and working with rectangular data. Supersedes many base `R` functions and makes common problems easy. 
 82 |     - [`ggplot2`](http://ggplot2.tidyverse.org/): System for data visualization.
 83 |     - [`purr`](http://purrr.tidyverse.org/) and [`broom`](https://broom.tidyverse.org/): Advanced modeling with the `tidyverse`.
 84 | 
 85 | --
 86 | 
 87 | - `RMarkdown` "authoring framework for data science".
 88 | 
 89 | ---
 90 | 
 91 | # `RMarkdown`
 92 | 
 93 | 
 94 | - Context requires combination of a narrative/prose with data visualization and statistical results.
 95 | - `RMarkdown` "authoring framework for data science".
 96 | - Single document, `.Rmd` file, combines text, pictures, and `R` code.
 97 | - Render document: Runs code and combines text, pictures, code, and output (i.e., text output and plots) into nicely formatted result:
 98 |     - `html` file
 99 |     - `pdf` or `Word` file
100 |     - presentation (like this one)
101 |     - blog or other website (`blogdown`), books (`bookdown`), interactive tutorials (`learnr`), [...](https://www.rstudio.com/resources/videos/r-markdown-eight-ways/)
102 | 
103 | --
104 | 
105 | - `RMarkdown` is efficient, easy to use, ensures reproducibility, and
106 |   - is ideal for communicating results with collaborators or PIs,
107 |   - can be used for for writing preregistrations with [`prereg`](https://cran.r-project.org/package=prereg), 
108 |   - and even for writing papers (i.e., [`papaja`](https://github.com/crsh/papaja)).
109 | 
110 | --
111 | 
112 | 
113 | - *Warning:* If you send an `RMarkdown` `html` report, it needs to be downloaded before figures are visible (e.g., opening it directly from `gmail` does not show plots)!
114 | 
115 | ---
116 | class:inline-grey, small
117 | 
118 | ### `RMarkdown` - First Steps
119 | 
120 | - Create new `RMarkdown` document: `File` -&gt; `New File` -&gt; `R Markdown...`
121 | - Enter title and your name -&gt; Keep `html` selected -&gt; `Ok`
122 | - `Save` file somewhere (e.g., `test.Rmd` in `Downloads`) -&gt; `Knit` creates and opens `html` document
123 | 
124 | 
125 | ---
126 | 
127 | ### `RMarkdown` Document Example ([source](http://rstudio-pubs-static.s3.amazonaws.com/202429_acbbe794b27f4dffaac6047d1b6d5aa0.html))
128 | 
129 | ![](figures/RMarkdown-example.png)
130 | 
131 | ---
132 | class:inline-grey, small
133 | 
134 | ### `RMarkdown` - YAML Header
135 | 
136 | 
137 | 
138 | ```r
139 | ---
140 | title: "My Title"
141 | author: "Henrik Singmann"
142 | date: "`r format(Sys.time(), '%d %B, %Y')`"
143 | output: 
144 |   html_document:
145 |     toc: TRUE
146 |     toc_float: true
147 |     theme: paper
148 |     highlight: espresso
149 | ---
150 | ```
151 | 
152 | - `YAML` Stands for "YAML Ain't Markup Language"
153 | - This is where you set options for your overall document, for example: 
154 |       - [output format](https://rmarkdown.rstudio.com/formats.html) (`html_document`, `pdf_document`, `word_document`, `github_document`, ...)
155 |       - add and format table of content
156 |       - appearance (also add custom `css`)
157 |       - see [`RMarkdown` cheat sheet](https://github.com/rstudio/cheatsheets/raw/master/rmarkdown-2.0.pdf) or https://rmarkdown.rstudio.com/html_document_format.html
158 | 
159 | ---
160 | class: small
161 | 
162 | ### Text Formatting 
163 | 
164 | .pull-left[
165 | ![](figures/ch-02-markdown-margin.png)
166 | 
167 | `[link](www.rstudio.com)` -&gt; [link](www.rstudio.com)
168 | 
169 | 
170 | (source: http://socviz.co/gettingstarted.html#work-in-plain-text-using-rmarkdown)
171 | ]
172 | 
173 | 
174 | 
175 | ---
176 | class: small
177 | 
178 | ![](figures/markdownChunk2.png)
179 | 
180 | ---
181 | 
182 | ### Code Chunks
183 | 
184 | ````
185 | ```{r chunk_name, echo=FALSE}
186 | 1 + 1
187 | ```
188 | ````
189 | 
190 | 
191 | ```
192 | ## [1] 2
193 | ```
194 | 
195 | - Run a code chunk with `Ctrl`/`Cmd` + `Shift` + `Enter`
196 | 
197 | Important chunk options:
198 | - `echo`: Display code in output document (default = `TRUE`)
199 | - `eval`: Run code in chunk (default = `TRUE`)
200 | - `include`: Include chunk and output in doc after running (default = `TRUE`)
201 | - `fig.height` and `fig.width`: Dimensions of plots in inches
202 | - `error`: Display error messages in doc (`TRUE`) or stop render when errors occur (`FALSE`) (default = `FALSE`)
203 | - `warning`: display code warnings in document (default = `TRUE`)
204 | - `results`: How to format results:
205 |   - default = `'markup'`
206 |   - `'asis'` - pass through results
207 |   - `'hide'` - do not display results
208 |   - `'hold'` - put all results below all code
209 | - `cache`: cache results for future knits (default = `FALSE`)
210 | 
211 | --
212 | 
213 | Try replacing `summary(cars)` with `str(cars)`
214 | 
215 | ---
216 | class: small, inline-grey
217 | 
218 | - visit: [`https://github.com/singmann/mixed_model_workshop/releases`](https://github.com/singmann/mixed_model_workshop/releases)
219 | - Download `Source code (zip)` (or `Source code (tar.gz) `)
220 | 
221 | &lt;img src="figures/github-workshop.png" style="width: 80%" align="center" /&gt;
222 | 
223 | 
224 | 
225 | ---
226 | class: inline-grey
227 | 
228 | ## Workshop Materials
229 | 
230 | - `zip` Archive contains all materials (e.g., slides, code, exercises) of the current workshop
231 | - Extract `zip` archive if necessary
232 | - All slides are built using `RMarkdown` and `xaringan` package.
233 | - `part0-introduction` materials for introduction session (these slides).
234 | -  `part1-statistical-modeling-in-r` materials for statistical modeling session.
235 | - `part2-mixed-models-in-r` materials for mixed models session.
236 | - In each folder:
237 |   - `.Rmd` file is the `RMarkdown` containing text and code for the slides.
238 |   - `.R` file only contains the code for the slides and no text.
239 |   - You can follow the presentation by following either file. Don't forget:
240 |       - Run a code chunk (i.e., grey block) with `Ctrl`/`Cmd` + `Shift` + `Enter`
241 |       - Run a single line of code with `Ctrl`/`Cmd` + `Enter`
242 |   - `.html` is the full presentation you are seeing. After opening, press `h` for help.
243 | - `exercises` contains some exercises.
244 | - `handout` contains the handout (also includes the `RMarkdown` file)
245 | 
246 | ---
247 | class: center, middle, inverse
248 | 
249 | # `tidyverse`
250 | 
251 | ---
252 | class: small
253 | 
254 | .pull-left[
255 | ### `tibble`
256 | 
257 | - "**tibble** or `tbl_df` is a modern reimagining of the data.frame, keeping what time has proven to be effective, and throwing out what is not."
258 | - Dramtically enhanced `print` method.
259 | - Does not change `strings` to `factors`.
260 | - Complains when a variable is missing (i.e., no partial matching).
261 | - Allows list columns (with nice printing).
262 | 
263 | 
264 | ```r
265 | iris
266 | ```
267 | 
268 | 
269 | ```
270 | ##   Sepal.Length Sepal.Width Petal.Length
271 | ## 1          5.1         3.5          1.4
272 | ## 2          4.9         3.0          1.4
273 | ## 3          4.7         3.2          1.3
274 | ## 4          4.6         3.1          1.5
275 | ## 5          5.0         3.6          1.4
276 | ```
277 | 
278 | [...]
279 | 
280 | 
281 | ```r
282 | iris$Spec
283 | ```
284 | 
285 | ```
286 | ##   [1] setosa     setosa     setosa     setosa    
287 | ##   [5] setosa     setosa     setosa     setosa    
288 | ##   [9] setosa     setosa     setosa     setosa    
289 | ##  [13] setosa     setosa     setosa     setosa    
290 | ##  [17] setosa     setosa     setosa     setosa    
291 | ##  [21] setosa     setosa     setosa     setosa    
292 | ##  [25] setosa     setosa     setosa     setosa    
293 | ##  [29] setosa     setosa     setosa     setosa    
294 | ##  [33] setosa     setosa     setosa     setosa    
295 | ##  [37] setosa     setosa     setosa     setosa    
296 | ##  [41] setosa     setosa     setosa     setosa    
297 | ##  [45] setosa     setosa     setosa     setosa    
298 | ##  [49] setosa     setosa     versicolor versicolor
299 | ##  [53] versicolor versicolor versicolor versicolor
300 | ##  [57] versicolor versicolor versicolor versicolor
301 | ##  [61] versicolor versicolor versicolor versicolor
302 | ##  [65] versicolor versicolor versicolor versicolor
303 | ##  [69] versicolor versicolor versicolor versicolor
304 | ##  [73] versicolor versicolor versicolor versicolor
305 | ##  [77] versicolor versicolor versicolor versicolor
306 | ##  [81] versicolor versicolor versicolor versicolor
307 | ##  [85] versicolor versicolor versicolor versicolor
308 | ##  [89] versicolor versicolor versicolor versicolor
309 | ##  [93] versicolor versicolor versicolor versicolor
310 | ##  [97] versicolor versicolor versicolor versicolor
311 | ## [101] virginica  virginica  virginica  virginica 
312 | ## [105] virginica  virginica  virginica  virginica 
313 | ## [109] virginica  virginica  virginica  virginica 
314 | ## [113] virginica  virginica  virginica  virginica 
315 | ## [117] virginica  virginica  virginica  virginica 
316 | ## [121] virginica  virginica  virginica  virginica 
317 | ## [125] virginica  virginica  virginica  virginica 
318 | ## [129] virginica  virginica  virginica  virginica 
319 | ## [133] virginica  virginica  virginica  virginica 
320 | ## [137] virginica  virginica  virginica  virginica 
321 | ## [141] virginica  virginica  virginica  virginica 
322 | ## [145] virginica  virginica  virginica  virginica 
323 | ## [149] virginica  virginica 
324 | ## Levels: setosa versicolor virginica
325 | ```
326 | ]
327 | 
328 | --
329 | 
330 | .pull-right[
331 | 
332 | ```r
333 | library("tibble") 
334 | iris2 &lt;- as_tibble(iris)
335 | iris2
336 | ```
337 | 
338 | ```
339 | ## # A tibble: 150 x 5
340 | ##    Sepal.Length Sepal.Width Petal.Length
341 | ##           &lt;dbl&gt;       &lt;dbl&gt;        &lt;dbl&gt;
342 | ##  1          5.1         3.5          1.4
343 | ##  2          4.9         3            1.4
344 | ##  3          4.7         3.2          1.3
345 | ##  4          4.6         3.1          1.5
346 | ##  5          5           3.6          1.4
347 | ##  6          5.4         3.9          1.7
348 | ##  7          4.6         3.4          1.4
349 | ##  8          5           3.4          1.5
350 | ##  9          4.4         2.9          1.4
351 | ## 10          4.9         3.1          1.5
352 | ## # ... with 140 more rows, and 2 more variables:
353 | ## #   Petal.Width &lt;dbl&gt;, Species &lt;fct&gt;
354 | ```
355 | 
356 | ```r
357 | iris2$Spec
358 | ```
359 | 
360 | ```
361 | ## Warning: Unknown or uninitialised column: 'Spec'.
362 | ```
363 | 
364 | ```
365 | ## NULL
366 | ```
367 | 
368 | ]
369 | 
370 | ---
371 | class:inline-grey
372 | 
373 | ## `magrittr`  &lt;img src="figures/magrittr.png" style="width: 10%" align="right" /&gt;
374 | 
375 | - Pipe operator `%&gt;%` makes code more readable:
376 |   - structuring sequences of data operations left-to-right (as opposed to from the inside and out)
377 |   - avoiding nested function calls,
378 |   - minimizing need for local variables and function definitions.
379 | -  Add pipe with `Ctrl`/`Cmd` +`Shift` + `m` 
380 | 
381 | .pull-left[
382 | ### Pipe
383 | 
384 | 
385 | ```r
386 | x %&gt;% f
387 | x %&gt;% f(y)
388 | x %&gt;% f %&gt;% g %&gt;% h
389 | 
390 | x %&gt;% f(y, .)
391 | x %&gt;% f(y, z = .)
392 | ```
393 | 
394 | ]
395 | 
396 | .pull-right[
397 | ### Base R
398 | 
399 | ```r
400 | f(x)
401 | f(x, y)
402 | h(g(f(x)))
403 | 
404 | f(y, x)
405 | f(y, z = x)
406 | ```
407 | 
408 | ]
409 | 
410 | --
411 | 
412 | Try it out:
413 | 
414 | ```r
415 | library(magrittr)
416 | iris2$Sepal.Length %&gt;% 
417 |   mean
418 | ```
419 | 
420 | ---
421 | class: small
422 | 
423 | ### Tidy Data (`tidyr`)
424 | 
425 | *"Tidy datasets are all alike, but every messy dataset is messy in its own way." -- Hadley Wickham*
426 | 
427 | 1. Put each data set in a `tibble`.
428 | 2. Put each variable in a column.
429 |     1. Each variable must have its own column.
430 |     2. Each observation must have its own row.
431 |     3. Each value must have its own cell.
432 | ![](figures/tidy-1.png)
433 | --
434 | 
435 | - For psychologists: Transform wide into long data. See also: 
436 | - Wickham, H. (2014). Tidy data. *The Journal of Statistical Software*, 59(10). http://www.jstatsoft.org/v59/i10
437 | - Wickham, H., &amp; Grolemund, G. (2017). R for Data Science (ch. 12). http://r4ds.had.co.nz/tidy-data.html
438 | 
439 | ---
440 | 
441 | ### `dplyr`
442 | 
443 | - grammar of data manipulation, providing a consistent set of verbs that help you solve the most common data manipulation challenges:
444 |   - `mutate()` adds new variables that are functions of existing variables
445 |   - `select()` picks variables based on their names.
446 |   - `filter()` picks cases based on their values.
447 |   - `summarise()` reduces multiple values down to a single summary.
448 |   - `arrange()` changes the ordering of the rows.
449 | - All combine naturally with `group_by()` which allows performing any operation "by group".
450 | 
451 | --
452 | 
453 | .pull-left[
454 | 
455 | ```r
456 | library("dplyr")
457 | iris2 %&gt;% 
458 |   filter(Species == "setosa") %&gt;% 
459 |   summarise(mean(Sepal.Length))
460 | ```
461 | 
462 | ```
463 | ## # A tibble: 1 x 1
464 | ##   `mean(Sepal.Length)`
465 | ##                  &lt;dbl&gt;
466 | ## 1                 5.01
467 | ```
468 | 
469 | ]
470 | 
471 | --
472 | 
473 | .pull-right[
474 | 
475 | 
476 | ```r
477 | iris2 %&gt;%
478 |   group_by(Species) %&gt;% 
479 |   summarise(mean_l = mean(Sepal.Length),
480 |             max_l = max(Sepal.Length),
481 |             min_l = min(Sepal.Length),
482 |             sd_l = sd(Sepal.Length))
483 | ```
484 | 
485 | ```
486 | ## # A tibble: 3 x 5
487 | ##   Species    mean_l max_l min_l  sd_l
488 | ##   &lt;fct&gt;       &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;dbl&gt;
489 | ## 1 setosa       5.01   5.8   4.3 0.352
490 | ## 2 versicolor   5.94   7     4.9 0.516
491 | ## 3 virginica    6.59   7.9   4.9 0.636
492 | ```
493 | ]
494 | 
495 | 
496 | 
497 | ---
498 | 
499 | ### `ggplot2`
500 | 
501 | - System for declaratively creating graphics, based on "The Grammar of Graphics" by Leland Wilkinson
502 | - "You provide data, tell `ggplot2` how to map variables to aesthetics what graphical primitives to use, and it takes care of the details."
503 | - `ggplot()` is the basic function which takes the data.
504 | - `aes()` is used for mapping aesthetics.
505 | - `geom_...` tells which primitive to use. 
506 | 
507 | 
508 | ```r
509 | library("ggplot2")
510 | ggplot(iris2, aes(x = Petal.Width, y = Petal.Length)) +
511 |   geom_point()
512 | ```
513 | --
514 | 
515 | 
516 | ```r
517 | ggplot(iris2, aes(x = Petal.Width, y = Petal.Length, color = Species)) +
518 |   geom_point()
519 | ```
520 | --
521 | 
522 | 
523 | 
524 | ```r
525 | ggplot(iris2, aes(x = Species, y = Petal.Length)) +
526 |   geom_jitter(width = 0.2) +
527 |   geom_boxplot(fill = "transparent") +
528 |   theme_bw()
529 | ```
530 | --
531 | 
532 | - Learning `ggplot2`:
533 |   - R for Data Science, http://r4ds.had.co.nz/, ch. 3 and ch. 28
534 |   - `ggplot2` cheat sheet: https://github.com/rstudio/cheatsheets/raw/master/data-visualization-2.1.pdf
535 | 
536 | ---
537 | 
538 | ## Summary
539 | 
540 | ![](figures/data-science.png) 
541 | 
542 | - Data analysis and statistics are iterative processes.
543 | - Goal of statistics is to support arguments connecting empirical data and substantive research questions.
544 | 
545 | ### `tidyverse`
546 | - Selection of packages providing a unified approach and syntax to common data analysis problems. 
547 | - To learn more about the `tidyverse` check out the free "R for Data Science" book by Wickham &amp; Grolemund: http://r4ds.had.co.nz/
548 | 
549 | ### `RMarkdown`
550 | - Allows combining prose, code, and output into one nicely formatted document. 
551 | - Great for communicating results and ensuring reproducibility.
552 | 
553 | ---
554 | class: inline-grey
555 | 
556 | ### Exercise
557 | 
558 | - Open `exercises/exercise_0.Rmd` (or the exercise handout or  `exercises/exercise_0.nb.html` for a nicer format of the instruction).
559 | - Follow text and try to solve a few small tasks helping you to get comfortable with the `tidyverse` (without looking at the solution).
560 | - Main goal is for you getting comfortable with `dplyr` and `ggplot2` syntax.
561 | - Exercise uses response time data from Freeman, Heathcote, Chalmers, and Hockley (2010).
562 | - Participants did a lexical-decision task or a naming task.
563 | 
564 | The exercise uses yet another type of `RMarkdown` document, `html_notebook` instead of `html_document`:
565 | - `html_document`: "knitting" runs all code in a new `R` process from beginning to end (which ensures reproducibility).
566 | - In contrast, a `html_notebook`
567 |   - uses current `R` process (i.e., state of `Console`), similar to [`Jupyter`](http://jupyter.org/) (does *NOT* ensure reproducibility),
568 |   - allows to `Preview` the current state of the document as a `html` file,
569 |   - potentially better for initial analysis or situations involving expensive calculations,
570 |   - can be transformed into `html_document` by simply changing the YAML header.
571 | 
572 | Remember:
573 | - Run a code chunk (i.e., grey block) with `Ctrl`/`Cmd` + `Shift` + `Enter`
574 | - Run a single line of code with `Ctrl`/`Cmd` + `Enter`
575 | 
576 | ---
577 | 
578 | ###  Links
579 | - `RStudio` cheat sheets: https://www.rstudio.com/resources/cheatsheets/
580 |     - `RStudio`: https://github.com/rstudio/cheatsheets/raw/master/rstudio-ide.pdf
581 |     - `RMarkdown`: https://github.com/rstudio/cheatsheets/raw/master/rmarkdown-2.0.pdf
582 |     - `ggplot2`: https://github.com/rstudio/cheatsheets/raw/master/data-visualization-2.1.pdf
583 |     - `dplyr` &amp; `tidyr`: https://github.com/rstudio/cheatsheets/raw/master/data-transformation.pdf
584 | 
585 | - Introduction to Open Data Science: http://ohi-science.org/data-science-training/
586 | - R for Data Science: http://r4ds.had.co.nz/
587 | - Data Visualization: A practical introduction: http://socviz.co/
588 | - Exploratory Data Analysis with R: https://bookdown.org/rdpeng/exdata/
589 | - The Art of Data Science: https://bookdown.org/rdpeng/artofdatascience/
590 |     </textarea>
591 | <script src="https://remarkjs.com/downloads/remark-latest.min.js"></script>
592 | <script>var slideshow = remark.create({
593 | "highlightStyle": "github",
594 | "highlightLines": true,
595 | "countIncrementalSlides": false,
596 | "ratio": "16:9"
597 | });
598 | if (window.HTMLWidgets) slideshow.on('afterShowSlide', function (slide) {
599 |   window.dispatchEvent(new Event('resize'));
600 | });
601 | (function() {
602 |   var d = document, s = d.createElement("style"), r = d.querySelector(".remark-slide-scaler");
603 |   if (!r) return;
604 |   s.type = "text/css"; s.innerHTML = "@page {size: " + r.style.width + " " + r.style.height +"; }";
605 |   d.head.appendChild(s);
606 | })();</script>
607 | 
608 | <script>
609 | (function() {
610 |   var i, text, code, codes = document.getElementsByTagName('code');
611 |   for (i = 0; i < codes.length;) {
612 |     code = codes[i];
613 |     if (code.parentNode.tagName !== 'PRE' && code.childElementCount === 0) {
614 |       text = code.textContent;
615 |       if (/^\\\((.|\s)+\\\)$/.test(text) || /^\\\[(.|\s)+\\\]$/.test(text) ||
616 |           /^\$\$(.|\s)+\$\$$/.test(text) ||
617 |           /^\\begin\{([^}]+)\}(.|\s)+\\end\{[^}]+\}$/.test(text)) {
618 |         code.outerHTML = code.innerHTML;  // remove <code></code>
619 |         continue;
620 |       }
621 |     }
622 |     i++;
623 |   }
624 | })();
625 | </script>
626 | <!-- dynamically load mathjax for compatibility with self-contained -->
627 | <script>
628 | (function () {
629 |   var script = document.createElement('script');
630 |   script.type = 'text/javascript';
631 |   script.src  = 'https://cdn.bootcss.com/mathjax/2.7.1/MathJax.js?config=TeX-MML-AM_CHTML';
632 |   if (location.protocol !== 'file:' && /^https?:/.test(script.src))
633 |     script.src  = script.src.replace(/^https?:/, '');
634 |   document.getElementsByTagName('head')[0].appendChild(script);
635 | })();
636 | </script>
637 |   </body>
638 | </html>
639 | 


--------------------------------------------------------------------------------
/part0-introduction/libs/remark-css/default-fonts.css:
--------------------------------------------------------------------------------
 1 | @import url(https://fonts.googleapis.com/css?family=Yanone+Kaffeesatz);
 2 | @import url(https://fonts.googleapis.com/css?family=Droid+Serif:400,700,400italic);
 3 | @import url(https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700);
 4 | 
 5 | body { font-family: 'Droid Serif', 'Palatino Linotype', 'Book Antiqua', Palatino, 'Microsoft YaHei', 'Songti SC', serif; }
 6 | h1, h2, h3 {
 7 |   font-family: 'Yanone Kaffeesatz';
 8 |   font-weight: normal;
 9 | }
10 | .remark-code, .remark-inline-code { font-family: 'Source Code Pro', 'Lucida Console', Monaco, monospace; }
11 | 


--------------------------------------------------------------------------------
/part0-introduction/libs/remark-css/default.css:
--------------------------------------------------------------------------------
 1 | a, a > code {
 2 |   color: rgb(249, 38, 114);
 3 |   text-decoration: none;
 4 | }
 5 | .footnote {
 6 |   position: absolute;
 7 |   bottom: 3em;
 8 |   padding-right: 4em;
 9 |   font-size: 90%;
10 | }
11 | .remark-code-line-highlighted     { background-color: #ffff88; }
12 | 
13 | .inverse {
14 |   background-color: #272822;
15 |   color: #d6d6d6;
16 |   text-shadow: 0 0 20px #333;
17 | }
18 | .inverse h1, .inverse h2, .inverse h3 {
19 |   color: #f3f3f3;
20 | }
21 | /* Two-column layout */
22 | .left-column {
23 |   color: #777;
24 |   width: 20%;
25 |   height: 92%;
26 |   float: left;
27 | }
28 | .left-column h2:last-of-type, .left-column h3:last-child {
29 |   color: #000;
30 | }
31 | .right-column {
32 |   width: 75%;
33 |   float: right;
34 |   padding-top: 1em;
35 | }
36 | .pull-left {
37 |   float: left;
38 |   width: 47%;
39 | }
40 | .pull-right {
41 |   float: right;
42 |   width: 47%;
43 | }
44 | .pull-right ~ * {
45 |   clear: both;
46 | }
47 | img, video, iframe {
48 |   max-width: 100%;
49 | }
50 | blockquote {
51 |   border-left: solid 5px lightgray;
52 |   padding-left: 1em;
53 | }
54 | .remark-slide table {
55 |   margin: auto;
56 |   border-top: 1px solid #666;
57 |   border-bottom: 1px solid #666;
58 | }
59 | .remark-slide table thead th { border-bottom: 1px solid #ddd; }
60 | th, td { padding: 5px; }
61 | .remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(even) { background: #eee }
62 | 
63 | @page { margin: 0; }
64 | @media print {
65 |   .remark-slide-scaler {
66 |     width: 100% !important;
67 |     height: 100% !important;
68 |     transform: scale(1) !important;
69 |     top: 0 !important;
70 |     left: 0 !important;
71 |   }
72 | }
73 | 


--------------------------------------------------------------------------------
/part0-introduction/my-theme.css:
--------------------------------------------------------------------------------
 1 | /* Two-column layout */
 2 | .pull-left2 {
 3 |   float: left;
 4 |   width: 65%;
 5 | }
 6 | .pull-right2 {
 7 |   float: right;
 8 |   width: 32%;
 9 | }
10 | .small-code .remark-code { 
11 |     font-size: 80%;
12 | }
13 | .small h1, h2, h3 {
14 |   margin-top: 15px;
15 |   margin-bottom: 5px;
16 | }
17 | 
18 | .small .remark-code { 
19 |     font-size: 80%;
20 | }
21 | 
22 | .inline-grey code {
23 |   background: #F5F5F5; /* lighter */
24 |   /* background: #e7e8e2; /* darker */
25 |   border-radius: 2px;
26 |   padding: 2px;
27 | } 
28 | 
29 | .pull-left3 {
30 |   float: left;
31 |   width: 43%;
32 | }
33 | .pull-right3 {
34 |   float: right;
35 |   width: 52%;
36 | }


--------------------------------------------------------------------------------
/part1-statistical-modeling-in-r/.Rhistory:
--------------------------------------------------------------------------------
 1 | options(htmltools.dir.version = FALSE)
 2 | # see: https://github.com/yihui/xaringan
 3 | # install.packages("xaringan")
 4 | # see:
 5 | # https://github.com/yihui/xaringan/wiki
 6 | # https://github.com/gnab/remark/wiki/Markdown
 7 | options(width=110)
 8 | options(digits = 4)
 9 | require(psych)
10 | data(sat.act)
11 | sat.act$gender <- factor(sat.act$gender, 1:2, labels = c("male", "female"))
12 | sat.act$education <- factor(sat.act$education)
13 | summary(sat.act) # alternatively: psych::describe(sat.act)
14 | sat.act <- na.omit(sat.act)
15 | par(mfrow=c(1,2))
16 | plot(sat.act$SATV, sat.act$ACT)
17 | plot(sat.act$SATQ, sat.act$ACT)
18 | m1 <- lm(ACT ~ SATQ, sat.act)
19 | summary(m1)
20 | coef(m1)
21 | plot(sat.act$SATV, sat.act$ACT)
22 | abline(m1)
23 | sat.act$SATQ_c <- sat.act$SATQ - mean(sat.act$SATQ, na.rm = TRUE)
24 | sat.act$SATV_c <- sat.act$SATV - mean(sat.act$SATV)
25 | m2 <- lm(ACT ~ SATQ_c, sat.act)
26 | summary(m2)
27 | coef(m2)
28 | plot(sat.act$SATV_c, sat.act$ACT)
29 | abline(m2)
30 | plot(ACT ~ SATV, sat.act)
31 | plot(ACT ~ SATV_c, sat.act)
32 | ?formula
33 | cbind(rnorm(10), rnorm(10))
34 | cbind(rnorm(10), rnorm(10), rnorm(10))
35 | cbind(rnorm(10), rnorm(10), rnorm(9))
36 | 


--------------------------------------------------------------------------------
/part1-statistical-modeling-in-r/cognition_cutout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part1-statistical-modeling-in-r/cognition_cutout.png


--------------------------------------------------------------------------------
/part1-statistical-modeling-in-r/libs/remark-css/default-fonts.css:
--------------------------------------------------------------------------------
 1 | @import url(https://fonts.googleapis.com/css?family=Yanone+Kaffeesatz);
 2 | @import url(https://fonts.googleapis.com/css?family=Droid+Serif:400,700,400italic);
 3 | @import url(https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700);
 4 | 
 5 | body { font-family: 'Droid Serif', 'Palatino Linotype', 'Book Antiqua', Palatino, 'Microsoft YaHei', 'Songti SC', serif; }
 6 | h1, h2, h3 {
 7 |   font-family: 'Yanone Kaffeesatz';
 8 |   font-weight: normal;
 9 | }
10 | .remark-code, .remark-inline-code { font-family: 'Source Code Pro', 'Lucida Console', Monaco, monospace; }
11 | 


--------------------------------------------------------------------------------
/part1-statistical-modeling-in-r/libs/remark-css/default.css:
--------------------------------------------------------------------------------
 1 | a, a > code {
 2 |   color: rgb(249, 38, 114);
 3 |   text-decoration: none;
 4 | }
 5 | .footnote {
 6 |   position: absolute;
 7 |   bottom: 3em;
 8 |   padding-right: 4em;
 9 |   font-size: 90%;
10 | }
11 | .remark-code-line-highlighted     { background-color: #ffff88; }
12 | 
13 | .inverse {
14 |   background-color: #272822;
15 |   color: #d6d6d6;
16 |   text-shadow: 0 0 20px #333;
17 | }
18 | .inverse h1, .inverse h2, .inverse h3 {
19 |   color: #f3f3f3;
20 | }
21 | /* Two-column layout */
22 | .left-column {
23 |   color: #777;
24 |   width: 20%;
25 |   height: 92%;
26 |   float: left;
27 | }
28 | .left-column h2:last-of-type, .left-column h3:last-child {
29 |   color: #000;
30 | }
31 | .right-column {
32 |   width: 75%;
33 |   float: right;
34 |   padding-top: 1em;
35 | }
36 | .pull-left {
37 |   float: left;
38 |   width: 47%;
39 | }
40 | .pull-right {
41 |   float: right;
42 |   width: 47%;
43 | }
44 | .pull-right ~ * {
45 |   clear: both;
46 | }
47 | img, video, iframe {
48 |   max-width: 100%;
49 | }
50 | blockquote {
51 |   border-left: solid 5px lightgray;
52 |   padding-left: 1em;
53 | }
54 | .remark-slide table {
55 |   margin: auto;
56 |   border-top: 1px solid #666;
57 |   border-bottom: 1px solid #666;
58 | }
59 | .remark-slide table thead th { border-bottom: 1px solid #ddd; }
60 | th, td { padding: 5px; }
61 | .remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(even) { background: #eee }
62 | 
63 | @page { margin: 0; }
64 | @media print {
65 |   .remark-slide-scaler {
66 |     width: 100% !important;
67 |     height: 100% !important;
68 |     transform: scale(1) !important;
69 |     top: 0 !important;
70 |     left: 0 !important;
71 |   }
72 | }
73 | 


--------------------------------------------------------------------------------
/part1-statistical-modeling-in-r/my-theme.css:
--------------------------------------------------------------------------------
 1 | /* Two-column layout */
 2 | .pull-left2 {
 3 |   float: left;
 4 |   width: 65%;
 5 | }
 6 | .pull-right2 {
 7 |   float: right;
 8 |   width: 32%;
 9 | }
10 | .small-code .remark-code { 
11 |     font-size: 80%;
12 | }
13 | .small h1, h2, h3 {
14 |   margin-top: 15px;
15 |   margin-bottom: 5px;
16 | }
17 | 
18 | .small .remark-code { 
19 |     font-size: 80%;
20 | }
21 | 
22 | .inline-grey code {
23 |   background: #F5F5F5; /* lighter */
24 |   /* background: #e7e8e2; /* darker */
25 |   border-radius: 2px;
26 |   padding: 2px;
27 | } 
28 | 
29 | .pull-left3 {
30 |   float: left;
31 |   width: 43%;
32 | }
33 | .pull-right3 {
34 |   float: right;
35 |   width: 52%;
36 | }


--------------------------------------------------------------------------------
/part1-statistical-modeling-in-r/ssk16_dat_tutorial.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part1-statistical-modeling-in-r/ssk16_dat_tutorial.rda


--------------------------------------------------------------------------------
/part1-statistical-modeling-in-r/statistical_modeling.R:
--------------------------------------------------------------------------------
  1 | ## ----setup, include=FALSE------------------------------------------------
  2 | options(htmltools.dir.version = FALSE)
  3 | # see: https://github.com/yihui/xaringan
  4 | # install.packages("xaringan")
  5 | # see: 
  6 | # https://github.com/yihui/xaringan/wiki
  7 | # https://github.com/gnab/remark/wiki/Markdown
  8 | options(width=110)
  9 | options(digits = 4)
 10 | 
 11 | ## ---- message=FALSE------------------------------------------------------
 12 | require(psych)
 13 | data(sat.act)
 14 | sat.act$gender <- factor(sat.act$gender, 1:2, labels = c("male", "female"))
 15 | sat.act$education <- factor(sat.act$education)
 16 | summary(sat.act) # alternatively: psych::describe(sat.act)
 17 | sat.act <- na.omit(sat.act)
 18 | 
 19 | ## ---- fig.height=4, dev='svg'--------------------------------------------
 20 | par(mfrow=c(1,2))
 21 | plot(sat.act$SATV, sat.act$ACT)
 22 | plot(sat.act$SATQ, sat.act$ACT)
 23 | 
 24 | ## ---- fig.height=3.5, fig.width=3.5, dev='svg', results='hide', message=FALSE, include=FALSE, eval=FALSE----
 25 | ## library("tidyverse")
 26 | ## ggplot(sat.act, aes(x = ACT, y = SATV)) +
 27 | ##   geom_point() +
 28 | ##   theme_light()
 29 | 
 30 | ## ---- fig.height=3.5, fig.width=3.5, dev='svg', include=FALSE, eval=FALSE----
 31 | ## ggplot(sat.act, aes(x = ACT, y = SATQ)) +
 32 | ##   geom_point() +
 33 | ##   theme_light()
 34 | 
 35 | ## ------------------------------------------------------------------------
 36 | m1 <- lm(ACT ~ SATQ, sat.act)
 37 | summary(m1)
 38 | 
 39 | ## ------------------------------------------------------------------------
 40 | coef(m1)
 41 | 
 42 | ## ---- fig.height=3.7, fig.width=4, dev='svg'-----------------------------
 43 | plot(sat.act$SATQ, sat.act$ACT)
 44 | abline(m1)
 45 | 
 46 | ## ------------------------------------------------------------------------
 47 | sat.act$SATQ_c <- sat.act$SATQ - mean(sat.act$SATQ)
 48 | sat.act$SATV_c <- sat.act$SATV - mean(sat.act$SATV)
 49 | m2 <- lm(ACT ~ SATQ_c, sat.act)
 50 | summary(m2)
 51 | 
 52 | ## ------------------------------------------------------------------------
 53 | coef(m2)
 54 | 
 55 | ## ---- fig.height=3.7, fig.width=4, dev='svg'-----------------------------
 56 | plot(sat.act$SATQ_c, sat.act$ACT)
 57 | abline(m2)
 58 | 
 59 | ## ---- eval=FALSE---------------------------------------------------------
 60 | ## lm(ACT ~ SATQ_c + SATV_c, sat.act)   # a
 61 | ## lm(ACT ~ SATQ_c : SATV_c, sat.act)   # b
 62 | ## lm(ACT ~ 0 + SATQ_c:SATV_c, sat.act) # c
 63 | ## lm(ACT ~ SATQ_c*SATV_c, sat.act)     # d
 64 | ## lm(ACT ~ 0+SATQ_c*SATV_c, sat.act)   # e
 65 | 
 66 | ## ------------------------------------------------------------------------
 67 | coef(lm(ACT ~ SATQ_c + SATV_c, sat.act))   # a
 68 | coef(lm(ACT ~ SATQ_c : SATV_c, sat.act))   # b
 69 | coef(lm(ACT ~ 0 + SATQ_c:SATV_c, sat.act)) # c
 70 | 
 71 | ## ------------------------------------------------------------------------
 72 | coef(lm(ACT ~ SATQ_c*SATV_c, sat.act))     # d
 73 | coef(lm(ACT ~ 0+SATQ_c*SATV_c, sat.act))   # e
 74 | 
 75 | ## ---- eval=FALSE, include=FALSE------------------------------------------
 76 | ## summary(lm(ACT ~ SATQ + SATV, sat.act))   # a
 77 | ## summary(lm(ACT ~ SATQ : SATV, sat.act))   # b
 78 | ## summary(lm(ACT ~ 0 + SATQ:SATV, sat.act)) # c
 79 | ## summary(lm(ACT ~ SATQ*SATV, sat.act))     # d
 80 | ## summary(lm(ACT ~ 0+SATQ*SATV, sat.act))   # e
 81 | 
 82 | ## ------------------------------------------------------------------------
 83 | str(sat.act) ## alternatively tibble::glimpse(sat.act)
 84 | 
 85 | ## ------------------------------------------------------------------------
 86 | m3 <- lm(ACT ~ gender, sat.act)
 87 | summary(m3)
 88 | 
 89 | ## ---- include=FALSE------------------------------------------------------
 90 | op <- options(width = 40)
 91 | require(dplyr)
 92 | 
 93 | ## ------------------------------------------------------------------------
 94 | mean(sat.act$ACT)
 95 | sat.act %>% group_by(gender) %>%
 96 |   summarise(m = mean(ACT))
 97 | 
 98 | ## ------------------------------------------------------------------------
 99 | sat.act %>% group_by(gender) %>%
100 |   summarise(m = mean(ACT)) %>%
101 |   {.$m[2] - .$m[1]}
102 | 
103 | ## ---- include=FALSE------------------------------------------------------
104 | options(op)
105 | 
106 | ## ------------------------------------------------------------------------
107 | model.matrix(ACT ~ gender, sat.act[1:5,])
108 | 
109 | ## ------------------------------------------------------------------------
110 | model.matrix(ACT ~ gender, sat.act[1:5,])
111 | 
112 | ## ------------------------------------------------------------------------
113 | afex::set_sum_contrasts()
114 | 
115 | ## ------------------------------------------------------------------------
116 | model.matrix(ACT ~ gender, sat.act[1:5,])
117 | 
118 | ## ------------------------------------------------------------------------
119 | m4 <- lm(ACT ~ gender, sat.act)
120 | summary(m4)
121 | 
122 | ## ---- include=FALSE------------------------------------------------------
123 | op <- options(width = 40)
124 | 
125 | ## ------------------------------------------------------------------------
126 | mean(sat.act$ACT)
127 | sat.act %>% group_by(gender) %>%
128 |   summarise(m = mean(ACT))
129 | sat.act %>% group_by(gender) %>%
130 |   summarise(m = mean(ACT)) %>% 
131 |   summarise(mean(m))
132 | 
133 | 
134 | ## ---- include=FALSE------------------------------------------------------
135 | options(op)
136 | 
137 | ## ------------------------------------------------------------------------
138 | afex::set_default_contrasts() # or set_treatment_contrasts()
139 | 
140 | ## ---- include=FALSE------------------------------------------------------
141 | op <- options(width = 70)
142 | 
143 | ## ------------------------------------------------------------------------
144 | m5 <- lm(ACT ~ gender*education, sat.act)
145 | coef(m5)
146 | 
147 | ## ------------------------------------------------------------------------
148 | sat.act %>% 
149 |   group_by(gender,education) %>%
150 |   summarise(mean(ACT))
151 | 
152 | ## ---- include=FALSE------------------------------------------------------
153 | options(op)
154 | 
155 | ## ------------------------------------------------------------------------
156 | afex::set_sum_contrasts() # or set_effects_contrasts() or set_deviation_contrasts()
157 | 
158 | ## ---- include=FALSE------------------------------------------------------
159 | op <- options(width = 70)
160 | 
161 | ## ------------------------------------------------------------------------
162 | m6 <- lm(ACT ~ gender*education, sat.act)
163 | coef(m6)
164 | 
165 | ## ------------------------------------------------------------------------
166 | sat.act %>% 
167 |   group_by(gender,education) %>%
168 |   summarise(m = mean(ACT)) %>% 
169 |   ungroup() %>% 
170 |   summarise(mean(m))
171 | 
172 | ## ---- include=FALSE------------------------------------------------------
173 | options(op)
174 | 
175 | ## ---- eval=FALSE---------------------------------------------------------
176 | ## lm(ACT ~ SATQ + SATV, sat.act)   # a: 3
177 | ## lm(ACT ~ SATQ : SATV, sat.act)   # b: 2
178 | ## lm(ACT ~ 0 + SATQ:SATV, sat.act) # c: 1
179 | ## lm(ACT ~ SATQ*SATV, sat.act)     # d: 4
180 | ## lm(ACT ~ 0+SATQ*SATV, sat.act)   # e: 3
181 | ## 
182 | ## lm(ACT ~ SATQ, sat.act)          # f: 2
183 | ## lm(ACT ~ 0 + SATQ, sat.act)      # g: 1
184 | 
185 | ## ---- eval=FALSE---------------------------------------------------------
186 | ## lm(ACT ~ gender, sat.act)                  # a
187 | ## lm(ACT ~ 0+gender, sat.act)                # b
188 | ## lm(ACT ~ gender+education, sat.act)        # c
189 | ## lm(ACT ~ 0+gender+education, sat.act)      # d
190 | ## lm(ACT ~ gender:education, sat.act)        # e
191 | ## lm(ACT ~ 0+gender:education, sat.act)      # f
192 | ## lm(ACT ~ gender*education, sat.act)        # g
193 | ## lm(ACT ~ 0+gender*education, sat.act)      # h
194 | ## lm(ACT ~ gender+gender:education, sat.act) # i
195 | 
196 | ## ------------------------------------------------------------------------
197 | levels(sat.act$gender)
198 | levels(sat.act$education)
199 | 
200 | ## ------------------------------------------------------------------------
201 | coef(lm(ACT ~ gender, sat.act))                  # a: 2
202 | coef(lm(ACT ~ 0+gender, sat.act))                # b: 2
203 | coef(lm(ACT ~ gender+education, sat.act))        # c: 7
204 | coef(lm(ACT ~ 0+gender+education, sat.act))      # d: 7
205 | 
206 | ## ------------------------------------------------------------------------
207 | coef(lm(ACT ~ gender:education, sat.act))        # e: 13
208 | coef(lm(ACT ~ 0+gender:education, sat.act))      # f: 12
209 | 
210 | ## ---- eval = FALSE-------------------------------------------------------
211 | ## coef(lm(ACT ~ gender*education, sat.act))        # g: 12
212 | ## coef(lm(ACT ~ 0+gender*education, sat.act))      # h: 12
213 | ## coef(lm(ACT ~ gender+gender:education, sat.act)) # i: 12
214 | 
215 | ## ---- include=FALSE------------------------------------------------------
216 | op <- options(width = 70)
217 | 
218 | ## ---- message=FALSE------------------------------------------------------
219 | afex::set_sum_contrasts()
220 | m6 <- lm(ACT ~ gender*education, sat.act)
221 | summary(m6)
222 | 
223 | ## ------------------------------------------------------------------------
224 | sat.act %>% 
225 |   group_by(gender, education) %>%
226 |   summarise(m = mean(ACT)) %>% 
227 |   ungroup() %>% 
228 |   summarise(mean(m))
229 | 
230 | ## ---- include=FALSE------------------------------------------------------
231 | options(op)
232 | 
233 | ## ---- message=FALSE------------------------------------------------------
234 | require(car) # Companion to Applied Regression (Fox & Weisberg, 2011)
235 | Anova(m6, type = 3)
236 | 
237 | ## ---- message=FALSE, warning=FALSE---------------------------------------
238 | library("emmeans")    
239 | (emms <- emmeans(m6, ~education))
240 | 
241 | ## ---- message=FALSE------------------------------------------------------
242 | pairs(emms, adjust='holm')
243 | 
244 | ## ---- message=FALSE, warning=FALSE---------------------------------------
245 | library("emmeans")  
246 | (emms <- emmeans(m6, "education")) 
247 | 
248 | ## ---- message=FALSE------------------------------------------------------
249 | cs <- list(
250 |   "12-45" = c(0, -0.5, -0.5, 0, 0.5, 0.5),
251 |   "0-3" = c(-1, 0, 0, 1, 0, 0),
252 |   "all-last" = c(-rep(0.2, 5), 1)
253 | )
254 | contrast(emms, cs, adjust = "holm")
255 | 
256 | ## ---- message=FALSE, comment='#'-----------------------------------------
257 | library("afex")
258 | sat.act$id <- factor(1:nrow(sat.act))
259 | (a1 <- aov_car(ACT ~ gender+Error(id), sat.act))
260 | 
261 | 
262 | ## ------------------------------------------------------------------------
263 | sat_long <- tidyr::gather(
264 |   sat.act, key = "SAT_type", 
265 |   value = "SAT_value", SATV, SATQ)
266 | 
267 | ## ---- message=FALSE, comment='#'-----------------------------------------
268 | (a2 <- aov_car(SAT_value ~ gender*SAT_type+
269 |                  Error(id/SAT_type), sat_long))
270 | emmeans(a2, c("gender", "SAT_type"))
271 | 
272 | ## ------------------------------------------------------------------------
273 | data("Machines", package = "MEMSS")
274 | str(Machines)
275 | 
276 | ## ---- include=FALSE------------------------------------------------------
277 | library("tidyverse")
278 | 
279 | ## ------------------------------------------------------------------------
280 | library("tidyverse")
281 | Machines %>% group_by(Machine) %>% 
282 |   summarise(m = mean(score), se = sd(score)/sqrt(n()))
283 | 
284 | ## ---- fig.height=4, dev='svg'--------------------------------------------
285 | ggplot(Machines, aes(x = Machine, y = score)) +
286 |   geom_point() + 
287 |   facet_wrap(~ Worker) + 
288 |   theme_light()
289 | 
290 | ## ------------------------------------------------------------------------
291 | mach_agg <- Machines %>% 
292 |   group_by(Worker, Machine) %>% 
293 |   summarise(score = mean(score))
294 | 
295 | ## ---- include=FALSE------------------------------------------------------
296 | ggplot(mach_agg, aes(x = Machine, y = score)) + geom_point()
297 | 
298 | ## ---- message=FALSE------------------------------------------------------
299 | afex::set_sum_contrasts()
300 | mmach <- lm(score ~ Machine, mach_agg)
301 | car::Anova(mmach, type = 3)
302 | 
303 | ## ------------------------------------------------------------------------
304 | library("emmeans")
305 | pairs(emmeans(mmach, "Machine"), 
306 |       adjust = "holm")
307 | 
308 | ## ------------------------------------------------------------------------
309 | dm1 <- Machines %>% 
310 |   filter(Worker == "1")
311 | 
312 | ## ------------------------------------------------------------------------
313 | m1 <- lm(score ~ Machine, dm1)
314 | car::Anova(m1, type = 3)
315 | 
316 | ## ---- warning=FALSE------------------------------------------------------
317 | a1 <- aov_car(score ~ Error(Worker/Machine), Machines)
318 | a1
319 | 
320 | ## ------------------------------------------------------------------------
321 | pairs(emmeans(a1, "Machine"), 
322 |       adjust = "holm")
323 | 
324 | ## ------------------------------------------------------------------------
325 | pairs(emmeans(mmach, "Machine"), 
326 |       adjust = "holm")  ## no pooling results
327 | 
328 | ## ------------------------------------------------------------------------
329 | # Session -> Set Working Directory ->
330 | # -> To Source File Location
331 | load("ssk16_dat_tutorial.rda") 
332 | # full data: https://osf.io/j4swp/
333 | str(dat, width=50, strict.width = "cut")
334 | 
335 | ## ---- fig.height=6, dev='svg'--------------------------------------------
336 | ggplot(data = dat) + 
337 |   geom_point(mapping = aes(x = B_given_A, 
338 |                            y = if_A_then_B), 
339 |              alpha = 0.2, pch = 16, size = 3) + 
340 |   coord_fixed() +
341 |   theme_light() +
342 |   theme(text = element_text(size=20))
343 | 
344 | 
345 | ## ------------------------------------------------------------------------
346 | m1 <- lm(if_A_then_B~B_given_A, dat)
347 | broom::tidy(m1)
348 | 
349 | ## ------------------------------------------------------------------------
350 | dat_p <- dat %>% 
351 |   group_by(p_id) %>% 
352 |   summarise_if(is.numeric, mean)
353 |   
354 | m2 <- lm(if_A_then_B~B_given_A, dat_p)
355 | broom::tidy(m2)
356 | 
357 | ## ------------------------------------------------------------------------
358 | dat_i <- dat %>% 
359 |   group_by(i_id) %>% 
360 |   summarise_if(is.numeric, mean)
361 |   
362 | m3 <- lm(if_A_then_B~B_given_A, dat_i)
363 | broom::tidy(m3)
364 | 
365 | ## ------------------------------------------------------------------------
366 | no_pooling_estimates <- dat %>% 
367 |   group_by(p_id) %>% 
368 |   do(broom::tidy(lm(if_A_then_B ~ B_given_A, .)))
369 | ## see: https://stackoverflow.com/a/30015869/289572
370 | 
371 | no_pooling_estimates
372 | 
373 | ## ---- fig.height=5, dev='svg'--------------------------------------------
374 | slopes <- no_pooling_estimates %>% 
375 |   filter(term == "B_given_A")
376 | 
377 | ggplot(slopes, aes(estimate)) +
378 |   geom_histogram(bins = 35) +
379 |   theme_light() +
380 |   theme(text = element_text(size=20))
381 | 
382 | 
383 | ## ------------------------------------------------------------------------
384 | m_no <- lm(estimate ~ 1, slopes)
385 | car::Anova(m_no, type = 3)
386 | broom::tidy(m_no)
387 | 
388 | 
389 | 


--------------------------------------------------------------------------------
/part1-statistical-modeling-in-r/statistical_modeling.Rmd:
--------------------------------------------------------------------------------
   1 | ---
   2 | title: "Statistical Modeling in R"
   3 | subtitle: "The Basics"
   4 | author: "Henrik Singmann (University of Zurich)<br/>Twitter: <a href='https://twitter.com/HenrikSingmann'>@HenrikSingmann</a>"
   5 | date: "July 2018"
   6 | output:
   7 |   xaringan::moon_reader:
   8 |     css: ["default", "default-fonts", "my-theme.css"]
   9 |     lib_dir: libs
  10 |     nature:
  11 |       highlightStyle: github
  12 |       highlightLines: true
  13 |       countIncrementalSlides: false
  14 |       ratio: '16:9'
  15 | ---
  16 | 
  17 | 
  18 | 
  19 | ```{r setup, include=FALSE}
  20 | options(htmltools.dir.version = FALSE)
  21 | # see: https://github.com/yihui/xaringan
  22 | # install.packages("xaringan")
  23 | # see: 
  24 | # https://github.com/yihui/xaringan/wiki
  25 | # https://github.com/gnab/remark/wiki/Markdown
  26 | options(width=110)
  27 | options(digits = 4)
  28 | ```
  29 | 
  30 | 
  31 | class: inline-grey
  32 | # Summary: Analysis with Statistical Models in R
  33 | 
  34 | 1. Identify probability distribution of data (more correct: of residuals/conditional distribution)
  35 | 2. Make sure variables are of correct type via `str()` or `tibble::glimpse()`
  36 | 3. Set appropriate contrasts (orthogonal contrasts if model includes interaction): `afex::set_sum_contrasts()`
  37 | 4. Describe statistical model using `formula`
  38 | 4. Fit model: pass `formula` and `data.frame` to corresponding modeling function (e.g., `lm()`, `glm()`)
  39 | 4. Check model fit (e.g., inspect residuals)
  40 | 5. Test terms (i.e., main effects and interactions): Pass fitted model to `car::Anova()`
  41 | 7. Follow-up tests: 
  42 |    - Estimated marginal means: Pass fitted model to `emmeans::emmeans()` (formerly:`lsmeans::lsmeans()`)
  43 |    - Specify specific contrasts on estimated marginal means (e.g., `contrast()`, `pairs()`)
  44 | 
  45 | `afex` combines fitting (5.) and testing (7.):
  46 | - ANOVAs: `afex::aov_car()`, `afex::aov_ez()`, or `afex::aov_4()`
  47 | - (Generalized) linear mixed-effects models: `afex::mixed()`
  48 | 
  49 | ---
  50 | 
  51 | # Overview: Part I
  52 | 
  53 | - Statistical Modeling with `lm` (no mixed-model)
  54 |     - Model setup and model formulas
  55 |     - Continuous versus categorical covariates 
  56 |     - `model.matrix()` and factor codings.
  57 |     - Categorical covariates and interactions
  58 | 
  59 | 
  60 | - Tests of Model Terms/Effects with `car::Anova()`
  61 | - Follow-up Tests with `emmeans`
  62 | 
  63 | 
  64 | - ANOVAs with `afex`
  65 | 
  66 | 
  67 | - Problem with Repeated-Measures: IID assumption
  68 |   - Solution: Different ways of *pooling*
  69 |   - Pooling: worked examples 
  70 | 
  71 | ---
  72 | 
  73 | # Statistical Model
  74 | 
  75 | From [Wikipedia](https://en.wikipedia.org/wiki/Statistical_model) (emphasis added):
  76 | 
  77 | > A statistical model is a class of mathematical model, which embodies a set of assumptions concerning the generation of some sample data, and similar data from a larger population. A statistical model represents, often in considerably idealized form, the **data-generating process**.
  78 | 
  79 | > The assumptions embodied by a statistical model describe a set of **probability distributions**, some of which are assumed to adequately approximate the distribution from which a particular data set is sampled. The probability distributions inherent in statistical models are what distinguishes statistical models from other, non-statistical, mathematical models.
  80 | 
  81 | > A statistical model is usually specified by mathematical equations that relate one or more random variables and possibly other non-random variables. As such, "a model is a formal representation of a theory" (Herman Ader quoting Kenneth Bollen).
  82 | 
  83 | > All statistical hypothesis tests and all statistical estimators are derived from statistical models. More generally, statistical models are part of the foundation of statistical inference.
  84 | 
  85 | ---
  86 | class: small
  87 | 
  88 | # Some Example Data
  89 | 
  90 | Data from Revelle, Wilt and Rosenthal (2009). `?sat.act`:
  91 | > Items were collected as part of the SAPA project (http://sapa-project.org) to develop online measures of ability (Revelle, Wilt and Rosenthal, 2009). The score means are higher than national norms suggesting both self selection for people taking on line personality and ability tests and a self reporting bias in scores.
  92 | 
  93 | ```{r, message=FALSE}
  94 | require(psych)
  95 | data(sat.act)
  96 | sat.act$gender <- factor(sat.act$gender, 1:2, labels = c("male", "female"))
  97 | sat.act$education <- factor(sat.act$education)
  98 | summary(sat.act) # alternatively: psych::describe(sat.act)
  99 | sat.act <- na.omit(sat.act)
 100 | ```
 101 | 
 102 | ---
 103 | # Some Example Data
 104 | 
 105 | 
 106 | ```{r, fig.height=4, dev='svg'}
 107 | par(mfrow=c(1,2))
 108 | plot(sat.act$SATV, sat.act$ACT)
 109 | plot(sat.act$SATQ, sat.act$ACT)
 110 | ```
 111 | 
 112 | .pull-left[
 113 | ```{r, fig.height=3.5, fig.width=3.5, dev='svg', results='hide', message=FALSE, include=FALSE, eval=FALSE}
 114 | library("tidyverse")
 115 | ggplot(sat.act, aes(x = ACT, y = SATV)) +
 116 |   geom_point() +
 117 |   theme_light()
 118 | ```
 119 | ]
 120 | 
 121 | .pull-right[
 122 | ```{r, fig.height=3.5, fig.width=3.5, dev='svg', include=FALSE, eval=FALSE}
 123 | ggplot(sat.act, aes(x = ACT, y = SATQ)) +
 124 |   geom_point() +
 125 |   theme_light()
 126 | ```
 127 | ]
 128 | 
 129 | ---
 130 | # Linear Regression Model
 131 | 
 132 | - $\bf{y}$ = vector of ACT scores of length $n$ (*dependent variable*)
 133 | - $\bf{x_{\mbox{SATV}}}$ = vector of SATV scores of length $n$ (*independent variable* or *covariate*)
 134 | 
 135 | $$y_i = \beta_0x_{0,i}+\beta_{\mbox{SATV}}x_{\mbox{SATV},i}+\epsilon_i, \ \ i = 1, ..., n, \\
 136 | \bf{\epsilon} \sim \mathcal{N}(0, \sigma^2_{\epsilon}),$$
 137 | where $\bf{x_0}$ is a vector of 1s of length $n$.
 138 | 
 139 | - Errors $\bf{\epsilon}$ are assumed to come from a normal distribution (i.e., uncorrelated).
 140 | 
 141 | - $\beta_0$ and  $\beta_{\mbox{SATV}}$ are scalars (i.e., of length 1) and called *regression coefficients* or *parameters* ( $\sigma^2_{\epsilon}$ is also a parameter). $\beta_0$ is also known as the *intercept*.
 142 | 
 143 | ******
 144 | 
 145 | In matrix form this model can be expressed as:
 146 | $$\bf{y} = \bf{X}\bf{\beta}+\bf{\epsilon}$$
 147 | 
 148 | ---
 149 | class: small
 150 | 
 151 | # Linear Model in R
 152 | 
 153 | .pull-left2[
 154 | ```{r}
 155 | m1 <- lm(ACT ~ SATQ, sat.act)
 156 | summary(m1)
 157 | ```
 158 | 
 159 | ]
 160 | .pull-right2[
 161 | ```{r}
 162 | coef(m1)
 163 | ```
 164 | 
 165 | ```{r, fig.height=3.7, fig.width=4, dev='svg'}
 166 | plot(sat.act$SATQ, sat.act$ACT)
 167 | abline(m1)
 168 | ```
 169 | ]
 170 | ---
 171 | class: small
 172 | 
 173 | # Linear Model in R (Centered)
 174 | 
 175 | .pull-left2[
 176 | ```{r}
 177 | sat.act$SATQ_c <- sat.act$SATQ - mean(sat.act$SATQ)
 178 | sat.act$SATV_c <- sat.act$SATV - mean(sat.act$SATV)
 179 | m2 <- lm(ACT ~ SATQ_c, sat.act)
 180 | summary(m2)
 181 | ```
 182 | 
 183 | ]
 184 | .pull-right2[
 185 | ```{r}
 186 | coef(m2)
 187 | ```
 188 | 
 189 | ```{r, fig.height=3.7, fig.width=4, dev='svg'}
 190 | plot(sat.act$SATQ_c, sat.act$ACT)
 191 | abline(m2)
 192 | ```
 193 | ]
 194 | ---
 195 | 
 196 | class: inline-grey
 197 | # Formula Interface for Statistical Models: `~`
 198 | 
 199 | Allows symbolic specification of statistical model, e.g. linear models: `lm(ACT ~ SATQ, sat.act)`
 200 | 
 201 | Everything to the left of `~` is the dependent variable:
 202 | ```r
 203 | y ~ x # univariate model
 204 | cbind(y1, y2, y3) ~ x # multivariate model
 205 | ~ x # one sided formula
 206 | ```
 207 | 
 208 | Independent variables are to the right of the `~`:
 209 | 
 210 | | Formula | &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; | Interpretation  |
 211 | | ------------------------|---|----------------------------------|
 212 | | `~ x` or `~1+x`         || Intercept and main effect of `x` | 
 213 | | ` ~ x-1` or `~0 + x`    || Only main effect of `x` and no intercept (questionable) |
 214 | | `~ x+y`                 || Main effects of `x` and `y`|
 215 | | `~ x:y`                 || Interaction between `x` and `y` (and no main effect) |
 216 | | `~ x*y` or `~ x+y+x:y`  || Main effects and interaction between `x` and `y` |
 217 | 
 218 | ---
 219 | class: small
 220 | 
 221 | # How many Parameters in each Model?
 222 | 
 223 | ```{r, eval=FALSE}
 224 | lm(ACT ~ SATQ_c + SATV_c, sat.act)   # a
 225 | lm(ACT ~ SATQ_c : SATV_c, sat.act)   # b
 226 | lm(ACT ~ 0 + SATQ_c:SATV_c, sat.act) # c
 227 | lm(ACT ~ SATQ_c*SATV_c, sat.act)     # d
 228 | lm(ACT ~ 0+SATQ_c*SATV_c, sat.act)   # e
 229 | ```
 230 | 
 231 | --
 232 | .pull-left[
 233 | ```{r}
 234 | coef(lm(ACT ~ SATQ_c + SATV_c, sat.act))   # a
 235 | coef(lm(ACT ~ SATQ_c : SATV_c, sat.act))   # b
 236 | coef(lm(ACT ~ 0 + SATQ_c:SATV_c, sat.act)) # c
 237 | ```
 238 | 
 239 | 
 240 | ]
 241 | 
 242 | .pull-right[
 243 | ```{r}
 244 | coef(lm(ACT ~ SATQ_c*SATV_c, sat.act))     # d
 245 | coef(lm(ACT ~ 0+SATQ_c*SATV_c, sat.act))   # e
 246 | ```
 247 | 
 248 | ]
 249 | 
 250 | ```{r, eval=FALSE, include=FALSE}
 251 | summary(lm(ACT ~ SATQ + SATV, sat.act))   # a
 252 | summary(lm(ACT ~ SATQ : SATV, sat.act))   # b
 253 | summary(lm(ACT ~ 0 + SATQ:SATV, sat.act)) # c
 254 | summary(lm(ACT ~ SATQ*SATV, sat.act))     # d
 255 | summary(lm(ACT ~ 0+SATQ*SATV, sat.act))   # e
 256 | ```
 257 | 
 258 | 
 259 | ---
 260 | class: center, middle, inverse
 261 | 
 262 | # Categorical Covariates
 263 | 
 264 | ---
 265 | class: small
 266 | # Categorical Covariates
 267 | 
 268 | `R` modeling functions behave differently for numerical and categorical covariates. 
 269 | 
 270 | It is important to always know of what type variables are. Use `str()` on a `data.frame` (or `glimpse()` after loading the `tidyverse`) to obtain information regarding the structure, including variable types: 
 271 | 
 272 | ```{r}
 273 | str(sat.act) ## alternatively tibble::glimpse(sat.act)
 274 | ```
 275 | 
 276 | - Numerical covariates are `int` or `num`.
 277 | - Categorical covariates are `Factor` (or `character`).
 278 | 
 279 | **Make sure all categorical variables are factors before adding them to a statistical model!**
 280 | 
 281 | ---
 282 | class: small
 283 | 
 284 | # Models with Categorical Covariates
 285 | 
 286 | We might be interested in testing whether ACT differs between men and women. 
 287 | .pull-left2[
 288 | ```{r}
 289 | m3 <- lm(ACT ~ gender, sat.act)
 290 | summary(m3)
 291 | ```
 292 | 
 293 | ]
 294 | --
 295 | 
 296 | ```{r, include=FALSE}
 297 | op <- options(width = 40)
 298 | require(dplyr)
 299 | ```
 300 | 
 301 | .pull-right2[
 302 | ```{r}
 303 | mean(sat.act$ACT)
 304 | sat.act %>% group_by(gender) %>%
 305 |   summarise(m = mean(ACT))
 306 | ```
 307 | 
 308 | ```{r}
 309 | sat.act %>% group_by(gender) %>%
 310 |   summarise(m = mean(ACT)) %>%
 311 |   {.$m[2] - .$m[1]}
 312 | ```
 313 | ]
 314 | 
 315 | ```{r, include=FALSE}
 316 | options(op)
 317 | ```
 318 | 
 319 | ---
 320 | class: small
 321 | 
 322 | # R and Categorical Covariates
 323 | `model.matrix()` transforms categorical covariates into numerical variables that can be used for fitting using a specific contrast function (see `?contr.sum`).
 324 | 
 325 | .pull-left[
 326 | ```{r}
 327 | model.matrix(ACT ~ gender, sat.act[1:5,])
 328 | ```
 329 | ]
 330 | 
 331 | ---
 332 | class: small
 333 | # R and Categorical Covariates
 334 | 
 335 | `model.matrix()` transforms categorical covariates into numerical variables that can be used for fitting using a specific contrast function (see `?contr.sum`).
 336 | 
 337 | .pull-left[
 338 | ```{r}
 339 | model.matrix(ACT ~ gender, sat.act[1:5,])
 340 | ```
 341 | 
 342 | ```{r}
 343 | afex::set_sum_contrasts()
 344 | ```
 345 | 
 346 | ]
 347 | 
 348 | .pull-right[
 349 | ```{r}
 350 | model.matrix(ACT ~ gender, sat.act[1:5,])
 351 | ```
 352 | ]
 353 | 
 354 | ---
 355 | class: small
 356 | 
 357 | # Models with Categorical Covariates II 
 358 | 
 359 | Same model as before, but with sum/effects contrasts.
 360 | 
 361 | .pull-left2[
 362 | ```{r}
 363 | m4 <- lm(ACT ~ gender, sat.act)
 364 | summary(m4)
 365 | ```
 366 | 
 367 | ]
 368 | 
 369 | ```{r, include=FALSE}
 370 | op <- options(width = 40)
 371 | ```
 372 | 
 373 | .pull-right2[
 374 | ```{r}
 375 | mean(sat.act$ACT)
 376 | sat.act %>% group_by(gender) %>%
 377 |   summarise(m = mean(ACT))
 378 | sat.act %>% group_by(gender) %>%
 379 |   summarise(m = mean(ACT)) %>% 
 380 |   summarise(mean(m))
 381 | 
 382 | ```
 383 | 
 384 | ]
 385 | 
 386 | ```{r, include=FALSE}
 387 | options(op)
 388 | ```
 389 | 
 390 | ---
 391 | class: small
 392 | # Models with Categorical Covariates and Interactions
 393 | 
 394 | ```{r}
 395 | afex::set_default_contrasts() # or set_treatment_contrasts()
 396 | ```
 397 | 
 398 | 
 399 | ```{r, include=FALSE}
 400 | op <- options(width = 70)
 401 | ```
 402 | 
 403 | .pull-left2[
 404 | ```{r}
 405 | m5 <- lm(ACT ~ gender*education, sat.act)
 406 | coef(m5)
 407 | ```
 408 | 
 409 | ]
 410 | 
 411 | .pull-right2[
 412 | ```{r}
 413 | sat.act %>% 
 414 |   group_by(gender,education) %>%
 415 |   summarise(mean(ACT))
 416 | ```
 417 | ]
 418 | 
 419 | ```{r, include=FALSE}
 420 | options(op)
 421 | ```
 422 | 
 423 | ---
 424 | class: small
 425 | # Models with Categorical Covariates and Interactions II
 426 | 
 427 | ```{r}
 428 | afex::set_sum_contrasts() # or set_effects_contrasts() or set_deviation_contrasts()
 429 | ```
 430 | 
 431 | 
 432 | ```{r, include=FALSE}
 433 | op <- options(width = 70)
 434 | ```
 435 | 
 436 | .pull-left2[
 437 | ```{r}
 438 | m6 <- lm(ACT ~ gender*education, sat.act)
 439 | coef(m6)
 440 | ```
 441 | 
 442 | ]
 443 | 
 444 | .pull-right2[
 445 | ```{r}
 446 | sat.act %>% 
 447 |   group_by(gender,education) %>%
 448 |   summarise(m = mean(ACT)) %>% 
 449 |   ungroup() %>% 
 450 |   summarise(mean(m))
 451 | ```
 452 | ]
 453 | 
 454 | ```{r, include=FALSE}
 455 | options(op)
 456 | ```
 457 | 
 458 | ---
 459 | # Categorical Covariates and Model Matrices
 460 | 
 461 | .pull-left3[
 462 | ```{r, eval=FALSE}
 463 | lm(ACT ~ SATQ + SATV, sat.act)   # a: 3
 464 | lm(ACT ~ SATQ : SATV, sat.act)   # b: 2
 465 | lm(ACT ~ 0 + SATQ:SATV, sat.act) # c: 1
 466 | lm(ACT ~ SATQ*SATV, sat.act)     # d: 4
 467 | lm(ACT ~ 0+SATQ*SATV, sat.act)   # e: 3
 468 | 
 469 | lm(ACT ~ SATQ, sat.act)          # f: 2
 470 | lm(ACT ~ 0 + SATQ, sat.act)      # g: 1
 471 | ```
 472 | 
 473 | ]
 474 | 
 475 | --
 476 | .pull-right3[
 477 | ```{r, eval=FALSE}
 478 | lm(ACT ~ gender, sat.act)                  # a
 479 | lm(ACT ~ 0+gender, sat.act)                # b
 480 | lm(ACT ~ gender+education, sat.act)        # c
 481 | lm(ACT ~ 0+gender+education, sat.act)      # d
 482 | lm(ACT ~ gender:education, sat.act)        # e
 483 | lm(ACT ~ 0+gender:education, sat.act)      # f
 484 | lm(ACT ~ gender*education, sat.act)        # g
 485 | lm(ACT ~ 0+gender*education, sat.act)      # h
 486 | lm(ACT ~ gender+gender:education, sat.act) # i
 487 | ```
 488 | 
 489 | ```{r}
 490 | levels(sat.act$gender)
 491 | levels(sat.act$education)
 492 | ```
 493 | 
 494 | 
 495 | ]
 496 | 
 497 | 
 498 | ---
 499 | class: small
 500 | 
 501 | # Beware of Formulas with Categorical Variables
 502 | 
 503 | 
 504 | ```{r}
 505 | coef(lm(ACT ~ gender, sat.act))                  # a: 2
 506 | coef(lm(ACT ~ 0+gender, sat.act))                # b: 2
 507 | coef(lm(ACT ~ gender+education, sat.act))        # c: 7
 508 | coef(lm(ACT ~ 0+gender+education, sat.act))      # d: 7
 509 | ```
 510 | 
 511 | ---
 512 | class: small
 513 | 
 514 | 
 515 | ```{r}
 516 | coef(lm(ACT ~ gender:education, sat.act))        # e: 13
 517 | coef(lm(ACT ~ 0+gender:education, sat.act))      # f: 12
 518 | ```
 519 | 
 520 | ```{r, eval = FALSE}
 521 | coef(lm(ACT ~ gender*education, sat.act))        # g: 12
 522 | coef(lm(ACT ~ 0+gender*education, sat.act))      # h: 12
 523 | coef(lm(ACT ~ gender+gender:education, sat.act)) # i: 12
 524 | ```
 525 | 
 526 | 
 527 | 
 528 | ---
 529 | class: inline-grey
 530 | # Interim Summary
 531 | 
 532 | - The `R` `formula` interface allows symbolic specification of statistical models.
 533 |   - `+` = main effects
 534 |   - `:` = interaction
 535 |   - `*` = main effects plus interaction
 536 |   - `0+`/`-1` = no intercept
 537 | 
 538 | 
 539 | - Categorical variables are transformed into numerical variables using contrast functions (via `model.matrix()`; see Cohen et al., 2002)
 540 |   - If models include interactions, orthogonal contrasts (e.g., `contr.sum`) in which the intercept corresponds to the (unweighted) grand mean should be used: `afex::set_sum_contrasts()`
 541 |   - Dummy/treatment contrasts (`R` default) lead to simple effects for lower order effects.
 542 |   - **Coding only affects interpretation of parameters/tests not overall model fit.**
 543 | 
 544 | 
 545 | - For models with solely numerical independent variables, suppressing intercept works as expected.
 546 | - For models with categorical independent variables, suppressing intercept or other lower-order effects often leads to very surprising results (and should generally be avoided).
 547 | 
 548 | ---
 549 | class: center, middle, inverse
 550 | 
 551 | # Tests of Terms/Effects
 552 | 
 553 | ---
 554 | class: small
 555 | 
 556 | ```{r, include=FALSE}
 557 | op <- options(width = 70)
 558 | ```
 559 | 
 560 | .pull-left2[
 561 | ```{r, message=FALSE}
 562 | afex::set_sum_contrasts()
 563 | m6 <- lm(ACT ~ gender*education, sat.act)
 564 | summary(m6)
 565 | ```
 566 | 
 567 | ]
 568 | 
 569 | .pull-right2[
 570 | ```{r}
 571 | sat.act %>% 
 572 |   group_by(gender, education) %>%
 573 |   summarise(m = mean(ACT)) %>% 
 574 |   ungroup() %>% 
 575 |   summarise(mean(m))
 576 | ```
 577 | ]
 578 | 
 579 | ```{r, include=FALSE}
 580 | options(op)
 581 | ```
 582 | 
 583 | ---
 584 | 
 585 | # `car::Anova()` is the Solution
 586 | 
 587 | ```{r, message=FALSE}
 588 | require(car) # Companion to Applied Regression (Fox & Weisberg, 2011)
 589 | Anova(m6, type = 3)
 590 | ```
 591 | --
 592 | 
 593 | - Type II and III tests equivalent for balanced designs (i.e., equal group sizes) and highest-order effect.
 594 | - Type III tests require orthogonal contrasts (e.g.,`contr.sum`); recommended:
 595 |   - For experimental designs in which imbalance is completely random and not structural,
 596 |   - Complete cross-over interactions (i.e., main effects in presence of interaction) possible.
 597 | - Type II are more appropriate if imbalance is structural (i.e., observational data; maybe here).
 598 | 
 599 | ---
 600 | class: small, inline-grey
 601 | 
 602 | # `emmeans` for Follow-Up/Post-Hoc Tests I
 603 | 
 604 | .pull-left[
 605 | ```{r, message=FALSE, warning=FALSE}
 606 | library("emmeans")    
 607 | (emms <- emmeans(m6, ~education))
 608 | ```
 609 | `emmeans` returns estimated marginal means (or least-square means for linear regression) for model terms (e.g., `emmeans(m6, ~education*gender)`).
 610 | 
 611 | One can specify arbitrary contrasts on marginal means (e.g., `contrast()`).
 612 | 
 613 | ]
 614 | --
 615 | .pull-right[
 616 | ```{r, message=FALSE}
 617 | pairs(emms, adjust='holm')
 618 | ```
 619 | ]
 620 | 
 621 | ---
 622 | class: small, inline-grey
 623 | 
 624 | # `emmeans` for Follow-Up/Post-Hoc Tests II
 625 | 
 626 | .pull-left[
 627 | ```{r, message=FALSE, warning=FALSE}
 628 | library("emmeans")  
 629 | (emms <- emmeans(m6, "education")) 
 630 | ```
 631 | `emmeans` returns estimated marginal means (or least-square means for linear regression) for model terms (e.g., `emmeans(m6, ~education*gender)`).
 632 | 
 633 | One can specify arbitrary contrasts on marginal means (e.g., `contrast()`).
 634 | 
 635 | ]
 636 | 
 637 | .pull-right[
 638 | ```{r, message=FALSE}
 639 | cs <- list(
 640 |   "12-45" = c(0, -0.5, -0.5, 0, 0.5, 0.5),
 641 |   "0-3" = c(-1, 0, 0, 1, 0, 0),
 642 |   "all-last" = c(-rep(0.2, 5), 1)
 643 | )
 644 | contrast(emms, cs, adjust = "holm")
 645 | ```
 646 | ]
 647 | 
 648 | 
 649 | ---
 650 | class: inline-grey
 651 | # Beyond Linear Models with Normal Residual Distribution
 652 | 
 653 | Statistical models defined by relationship of covariates and assumption of residual probability distribution. `formula` defines the relationship of covariates, `function` defines distributional assumption.
 654 | 
 655 | 
 656 | - (1) Most models assume independent data points (i.e., no replicates or repeated measures):  
 657 | - `lm()` linear model (normal distribution of residuals, includes multivariate IVs)
 658 | - `glm()` generalized linear model (other residual distribution, e.g., binomial, Poisson)
 659 | - `MASS::rlm()` robust linear model
 660 | - `MASS::polr()` ordered logistic or probit regression
 661 | - `MASS::loglm()` log-linear model (for contingency tables)
 662 | - `nnet::multinom()` models for multinomial data
 663 | - `ordinal::clm()` cumulative link models for ordinal data
 664 | 
 665 | 
 666 | - (2) Models support repeated-measures usually via random-effects parameters:
 667 | - `nlme::lme()` linear mixed-effects models (generally superseded by `lme4`)
 668 | - `lme4::lmer()` linear mixed-effects models (modern implementation)
 669 | - `lme4::glmer()` generalized linear mixed-effects models
 670 | - `ordinal::clmm2()` cumulative link mixed models for ordinal data
 671 | - `mcmcGLMM` Bayesian generalized linear mixed-effects models
 672 | - `rstanarm::stan_lmer()`/`stan_glmer()` **Bayesian (generalized) linear mixed-effects models**
 673 | - `brms::brm()` **general framework for formula-based Bayesian models; extremely flexible**
 674 | 
 675 | 
 676 | ---
 677 | class: inline-grey
 678 | # Summary: Analysis with Statistical Models in R
 679 | 
 680 | 1. Identify probability distribution of data (more correct: of residuals/conditional distribution)
 681 | 2. Make sure variables are of correct type via `str()`
 682 | 3. Set appropriate contrasts (orthogonal contrasts if model includes interaction): `afex::set_sum_contrasts()`
 683 | 4. Describe statistical model using `formula`
 684 | 4. Fit model: pass `formula` and `data.frame` to corresponding modeling function (e.g., `lm()`, `glm()`)
 685 | 4. Check model fit (e.g., inspect residuals)
 686 | 5. Test terms (i.e., main effects and interactions): Pass fitted model to `car::Anova()`  
 687 |     Note: parameter estimates (e.g., `coef` or `summary`) are often not informative for models involving categorical variables, especially if the variables have more than three levels.
 688 | 7. Follow-up tests: 
 689 |    - Estimated marginal means: Pass fitted model to `emmeans::emmeans()` (formerly `lsmeans`)
 690 |    - Specify specific contrasts on estimated marginal means (e.g., `contrast()`, `pairs()`)
 691 | 
 692 | --
 693 | 
 694 | `afex` combines fitting (5.) and testing (7.):
 695 | - ANOVAs: `afex::aov_car()`, `afex::aov_ez()`, or `afex::aov_4()`
 696 | - (Generalized) linear mixed-effects models: `afex::mixed()`
 697 | 
 698 | ---
 699 | class: small
 700 | # ANOVAs with afex
 701 | 
 702 | .pull-left[
 703 | `afex::aov_car()` allows specification of ANOVA using formula, but requires specification of participant id in `Error()` term.
 704 | 
 705 | ```{r, message=FALSE, comment='#'}
 706 | library("afex")
 707 | sat.act$id <- factor(1:nrow(sat.act))
 708 | (a1 <- aov_car(ACT ~ gender+Error(id), sat.act))
 709 | 
 710 | ```
 711 | ```{r}
 712 | sat_long <- tidyr::gather(
 713 |   sat.act, key = "SAT_type", 
 714 |   value = "SAT_value", SATV, SATQ)
 715 | ```
 716 | 
 717 | ]
 718 | 
 719 | --
 720 | .pull-right[
 721 | 
 722 | ```{r, message=FALSE, comment='#'}
 723 | (a2 <- aov_car(SAT_value ~ gender*SAT_type+
 724 |                  Error(id/SAT_type), sat_long))
 725 | emmeans(a2, c("gender", "SAT_type"))
 726 | ```
 727 | 
 728 | ]
 729 | 
 730 | ---
 731 | class: center, middle, inverse
 732 | 
 733 | # Repeated-Measures
 734 | 
 735 | ---
 736 | class: inline-grey
 737 | # IID Assumption
 738 | 
 739 | - Ordinary linear regression, between-subjects ANOVA, and basically all standard statistical models share one assumption: Data points are *independent and identically distributed* (*iid*).
 740 |   - Independence assumption refers to residuals: After taking structure of model (i.e., parameters) into account, probability of a data point having a specific value is independent of all other data points.
 741 |   - Identical distribution: All observations sampled from same distribution.
 742 | 
 743 | 
 744 | - For repeated-measures independence assumption often violated (e.g., data points from one participant more likely to be similar to each other).
 745 | - Violation of independence assumption can have dramatic consequences on statistical inferences from a model (e.g., increased or decreased Type I errors).
 746 | 
 747 | 
 748 | - Three approaches for dealing with repeated-measures (e.g., Gelman & Hill, 2007):
 749 |   1. *Complete pooling*: Ignore dependency in data (often not appropriate, results likely biased, not trustworthy)
 750 |   2. *No pooling*: Separate data based on factor producing dependency and calculate separate statistical model for each subset (individual-level parameters prone to overfitting, combining results can be non-trivial)
 751 |   3. *Partial pooling*: Analyse data jointly while taking dependency into account (gold standard, e.g., mixed models, hierarchical models)
 752 | 
 753 | ---
 754 | class: small
 755 | 
 756 | ### Complete Pooling
 757 | - 1 set of parameters $\Theta$ for data *n*, usually aggregated across participants
 758 | - Likelihood: $ln(L({\bf n} \mid \Theta))$
 759 | - Easy to implement
 760 | - Ignores individual variability
 761 | 
 762 | --
 763 | 
 764 | ### No Pooling
 765 | - 1 set of parameters $\Theta_i$ for each individual data set ${\bf n_i}$
 766 | - Likelihood: $\sum_{i = 1}^N ln(L({\bf n_i} \mid \Theta_i))$
 767 | - Requires considerable data on the individual level, otherwise parameter prone to overfitting
 768 | - Results sometimes not easy to combine
 769 | 
 770 | --
 771 | 
 772 | ### Partial Pooling
 773 | - 1 set of group-level parameters ${\hat \Theta}$ and 1 set of parameters $\Theta_i$ for each individual data set ${\bf n_i}$
 774 | - Likelihood: $\sum_{i = 1}^N ln(L({\bf n_i} \mid \Theta_i, {\hat \Theta})) + ln(L(\Theta_i \mid {\hat \Theta}))$
 775 | - Requires distributional assumption for group-level parameters (e.g., individual-level data normally distributed around group-level parameters)
 776 | - Provides on average better estimates for both individual-level and group-level parameters (e.g., [Stein's paradox](https://en.wikipedia.org/wiki/Stein%27s_example))
 777 | - Estimation can be difficult in a frequentist setting (alternatives: penalized or restricted maximum likelihood estimation or Bayesian estimation)
 778 | 
 779 | ---
 780 | class: small
 781 | 
 782 | ## Example Data: Productivity Scores for Machines and Workers
 783 | 
 784 | ```{r}
 785 | data("Machines", package = "MEMSS")
 786 | str(Machines)
 787 | ```
 788 | 
 789 | - `Worker`: `Factor` giving unique identifier for the worker.
 790 | - `Machine`: `Factor` with levels A, B, and C identifying machine brand.
 791 | - `score`: Overall productivity score taking into account number and quality of components produced.
 792 | 
 793 | Research question: Do the machines differ in their score?
 794 | 
 795 | ```{r, include=FALSE}
 796 | library("tidyverse")
 797 | ```
 798 | 
 799 | ```{r}
 800 | library("tidyverse")
 801 | Machines %>% group_by(Machine) %>% 
 802 |   summarise(m = mean(score), se = sd(score)/sqrt(n()))
 803 | ```
 804 | 
 805 | 
 806 | ---
 807 | 
 808 | ## Example Data: Productivity Scores for Machines and Workers
 809 | 
 810 | ```{r, fig.height=4, dev='svg'}
 811 | ggplot(Machines, aes(x = Machine, y = score)) +
 812 |   geom_point() + 
 813 |   facet_wrap(~ Worker) + 
 814 |   theme_light()
 815 | ```
 816 | 
 817 | ---
 818 | 
 819 | ## Productivity Scores for Machines and Workers: Complete Pooling
 820 | 
 821 | 
 822 | .pull-left[
 823 | (1) Aggregate data
 824 | 
 825 | ```{r}
 826 | mach_agg <- Machines %>% 
 827 |   group_by(Worker, Machine) %>% 
 828 |   summarise(score = mean(score))
 829 | ```
 830 | 
 831 | ```{r, include=FALSE}
 832 | ggplot(mach_agg, aes(x = Machine, y = score)) + geom_point()
 833 | ```
 834 | 
 835 | (2) Estimate model
 836 | 
 837 | ```{r, message=FALSE}
 838 | afex::set_sum_contrasts()
 839 | mmach <- lm(score ~ Machine, mach_agg)
 840 | car::Anova(mmach, type = 3)
 841 | ```
 842 | ]
 843 | 
 844 | 
 845 | .pull-right[
 846 | (3) Follow-up test
 847 | 
 848 | ```{r}
 849 | library("emmeans")
 850 | pairs(emmeans(mmach, "Machine"), 
 851 |       adjust = "holm")
 852 | ```
 853 | 
 854 | ]
 855 | ---
 856 | class: small
 857 | 
 858 | ## No Pooling
 859 | 
 860 | (1) Select worker 1
 861 | 
 862 | ```{r}
 863 | dm1 <- Machines %>% 
 864 |   filter(Worker == "1")
 865 | ```
 866 | 
 867 | (2) Estimate model for worker 1
 868 | 
 869 | ```{r}
 870 | m1 <- lm(score ~ Machine, dm1)
 871 | car::Anova(m1, type = 3)
 872 | ```
 873 | 
 874 | (3) Record statistic of interest (e.g., *F*-value or *p*-value).  
 875 | (4) Calculate statistic for other workers and record as well.  
 876 | (5) Investigate distribution of statistics (e.g., all *p*-values smaller .05?).  
 877 | 
 878 | ---
 879 | class: small
 880 | 
 881 | ## Simple Partial-Pooling: Repeated-measures ANOVA
 882 | 
 883 | ```{r, warning=FALSE}
 884 | a1 <- aov_car(score ~ Error(Worker/Machine), Machines)
 885 | a1
 886 | ```
 887 | 
 888 | .pull-left[
 889 | ```{r}
 890 | pairs(emmeans(a1, "Machine"), 
 891 |       adjust = "holm")
 892 | ```
 893 | ]
 894 | 
 895 | --
 896 | 
 897 | .pull-right[
 898 | ```{r}
 899 | pairs(emmeans(mmach, "Machine"), 
 900 |       adjust = "holm")  ## no pooling results
 901 | ```
 902 | 
 903 | ]
 904 | 
 905 | 
 906 | 
 907 | 
 908 | ---
 909 | class: small
 910 | 
 911 | # Example Data 2
 912 | 
 913 | ![](cognition_cutout.png)
 914 | 
 915 | 
 916 | ---
 917 | class: small, inline-grey
 918 | 
 919 | ### Skovgaard-Olsen et al. (2016)
 920 | 
 921 | - Conditional = *if-then* statement; e.g., If global warning continues, London will be flooded.
 922 | - Bayesian reasoning often assumes 'the Equation': *P*(if *A* then *B*) = *P*(*B*|*A*)
 923 | - Our question: Does 'the Equation' hold? 
 924 | - Participants provide idiosyncratic estimates of both *P*(if *A* then *B*) and *P*(*B*|*A*).
 925 | 
 926 | - ~100 participants recruited via `crowdflower.com` worked on 12 items:
 927 | 
 928 | > Sophia's scenario: Sophia wishes to find a nice present for her 13-year-old son, Tim, for Christmas. She is running on a tight budget, but she knows that Tim loves participating in live role-playing in the forest and she is really skilled at sewing the orc costumes he needs. Unfortunately, she will not be able to afford the leather parts that such costumes usually have, but she will still be able to make them look nice.
 929 | 
 930 | --
 931 | 
 932 | (1) *P*(*B*|*A*) (`B_given_A`):
 933 |   > Suppose Sophia makes Tim an orc costume.
 934 |   > Under this assumption, how probable is it that the following sentence is true:   
 935 |   > Tim will be excited about his present.
 936 | 
 937 | --
 938 | 
 939 | (2) *P*(if *A* then *B*) (`if_A_then_B`):
 940 |   > Could you please rate the probability that the following sentence is true:   
 941 |   > IF Sophia makes Tim an orc costume, THEN he will be excited about his present.
 942 | 
 943 | ---
 944 | 
 945 | class: small 
 946 | 
 947 | .pull-left[
 948 | ### Skovgaard-Olsen et al. (2016)
 949 | 
 950 | - Does 'the Equation' (i.e., *P*(if *A* then *B*) = *P*(*B*|*A*)) hold? 
 951 | 
 952 | - Participants provided idiosyncratic estimates of *P*(if *A* then *B*) (`if_A_then_B`) and *P*(*B*|*A*) (`B_given_A`) for each item.
 953 | - Each participant worked on 12 items.
 954 | 
 955 | 
 956 | ```{r}
 957 | # Session -> Set Working Directory ->
 958 | # -> To Source File Location
 959 | load("ssk16_dat_tutorial.rda") 
 960 | # full data: https://osf.io/j4swp/
 961 | str(dat, width=50, strict.width = "cut")
 962 | ```
 963 | 
 964 | ]
 965 | 
 966 | .pull-right[
 967 | 
 968 | ```{r, fig.height=6, dev='svg'}
 969 | ggplot(data = dat) + 
 970 |   geom_point(mapping = aes(x = B_given_A, 
 971 |                            y = if_A_then_B), 
 972 |              alpha = 0.2, pch = 16, size = 3) + 
 973 |   coord_fixed() +
 974 |   theme_light() +
 975 |   theme(text = element_text(size=20))
 976 | 
 977 | ```
 978 | 
 979 | ]
 980 | 
 981 | ---
 982 | class: small
 983 | 
 984 | .pull-left[
 985 | 
 986 | ### Skovgaard-Olsen et al. (2016): Complete Pooling
 987 | 
 988 | (1) Overall
 989 | 
 990 | ```{r}
 991 | m1 <- lm(if_A_then_B~B_given_A, dat)
 992 | broom::tidy(m1)
 993 | ```
 994 | 
 995 | 
 996 | ]
 997 | 
 998 | --
 999 | 
1000 | .pull-right[
1001 | 
1002 | (2) By-Participant
1003 | 
1004 | ```{r}
1005 | dat_p <- dat %>% 
1006 |   group_by(p_id) %>% 
1007 |   summarise_if(is.numeric, mean)
1008 |   
1009 | m2 <- lm(if_A_then_B~B_given_A, dat_p)
1010 | broom::tidy(m2)
1011 | ```
1012 | 
1013 | 
1014 | (3) By-Item
1015 | 
1016 | ```{r}
1017 | dat_i <- dat %>% 
1018 |   group_by(i_id) %>% 
1019 |   summarise_if(is.numeric, mean)
1020 |   
1021 | m3 <- lm(if_A_then_B~B_given_A, dat_i)
1022 | broom::tidy(m3)
1023 | ```
1024 | 
1025 | ]
1026 | 
1027 | ---
1028 | 
1029 | ### Skovgaard-Olsen et al. (2016): No Pooling (By-Participant) I
1030 | 
1031 | 
1032 | ```{r}
1033 | no_pooling_estimates <- dat %>% 
1034 |   group_by(p_id) %>% 
1035 |   do(broom::tidy(lm(if_A_then_B ~ B_given_A, .)))
1036 | ## see: https://stackoverflow.com/a/30015869/289572
1037 | 
1038 | no_pooling_estimates
1039 | ```
1040 | 
1041 | ---
1042 | class: small
1043 | 
1044 | ### Skovgaard-Olsen et al. (2016): No Pooling (By-Participant) II
1045 | 
1046 | .pull-left[
1047 | ```{r, fig.height=5, dev='svg'}
1048 | slopes <- no_pooling_estimates %>% 
1049 |   filter(term == "B_given_A")
1050 | 
1051 | ggplot(slopes, aes(estimate)) +
1052 |   geom_histogram(bins = 35) +
1053 |   theme_light() +
1054 |   theme(text = element_text(size=20))
1055 | 
1056 | ```
1057 | 
1058 | ]
1059 | --
1060 | 
1061 | .pull-right[
1062 | ```{r}
1063 | m_no <- lm(estimate ~ 1, slopes)
1064 | car::Anova(m_no, type = 3)
1065 | broom::tidy(m_no)
1066 | 
1067 | ```
1068 | 
1069 | ]
1070 | 
1071 | ---
1072 | class: center, middle, inverse
1073 | 
1074 | # Partial-Pooling for Skovgaard-Olsen et al. (2016) Data Requires Mixed-Models
1075 | 
1076 | ---
1077 | 
1078 | ## Summary: Repeated-Measures and Pooling
1079 | 
1080 | - IID assumption (i.e., independent and identically distributed) shared by most "standard" statistical model. 
1081 | - In case of repeated-measures, independence assumption often violated (e.g., data points from one participant more likely to be similar to each other).
1082 | - Violation of independence assumption can have dramatic consequences on statistical inferences from a model (e.g., increased or decreased Type I errors).
1083 | 
1084 | 
1085 | - Three approaches for dealing with repeated-measures (e.g., Gelman & Hill, 2007):
1086 |   1. *Complete pooling*: Ignore dependency in data (often not appropriate, results likely biased, not trustworthy)
1087 |   2. *No pooling*: Separate data based on factor producing dependency and calculate separate statistical model for each subset (individual-level parameters prone to overfitting, combining results can be non-trivial)
1088 |   3. *Partial pooling*: Analyse data jointly while taking dependency into account (gold standard, e.g., mixed models, hierarchical models)
1089 | 
1090 | 
1091 | ---
1092 | ### References Statistical Modeling:
1093 | - John Fox and Sanford Weisberg (2011). *An R Companion to Applied Regression, Second Edition.* Thousand Oaks CA: Sage. URL: http://socserv.socsci.mcmaster.ca/jfox/Books/Companion
1094 | - Gelman, A., & Hill, J. (2007). *Data analysis using regression and multilevel/hierarchical models.* Cambridge; New York: Cambridge University Press.
1095 | - Russell V. Lenth (2016). Least-Squares Means: The R Package lsmeans. *Journal of Statistical Software*, 69(1), 1-33. https://doi.org/10.18637/jss.v069.i01
1096 | - Cohen, J., Cohen, P., West, S. G., & Aiken, L. S. (2002). *Applied Multiple Regression/Correlation Analysis for the Behavioral Sciences.* New York: Routledge Academic.
1097 | 
1098 | ### References Example Data:
1099 | - Revelle, William, Wilt, Joshua, and Rosenthal, Allen (2009) Personality and Cognition: The Personality-Cognition Link. In Gruszka, Alexandra and Matthews, Gerald and Szymura, Blazej (Eds.) _Handbook of Individual Differences in Cognition: Attention, Memory and Executive Control_, Springer.
1100 | - Skovgaard-Olsen, N., Singmann, H., & Klauer, K. C. (2016). The relevance effect and conditionals. *Cognition*, 150, 26-36. https://doi.org/10.1016/j.cognition.2015.12.017
1101 | 


--------------------------------------------------------------------------------
/part1-statistical-modeling-in-r/statistical_modeling_files/figure-html/unnamed-chunk-60-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part1-statistical-modeling-in-r/statistical_modeling_files/figure-html/unnamed-chunk-60-1.png


--------------------------------------------------------------------------------
/part1-statistical-modeling-in-r/statistical_modeling_files/figure-html/unnamed-chunk-62-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part1-statistical-modeling-in-r/statistical_modeling_files/figure-html/unnamed-chunk-62-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/.Rhistory:
--------------------------------------------------------------------------------
  1 | (p1_d_all <- xyplot(value ~ measurement | treatment +time, OBK.long, group = id,type="o", col = "black", panel = function(x, y, type, col, ...) {
  2 | panel.xyplot(x, y, type = type, ..., lty = 1, col = "darkgrey", alpha = 0.2, pch = 16, lwd = 2.0, cex = 1.0)
  3 | #browser()
  4 | means <- tapply(y,x, mean)
  5 | panel.lines(seq_along(means), means, type = type, ..., lty = 1, col = col, lwd = 3.0, cex = 1.75, pch = 20)
  6 | }, scale = list(cex = 1.0), par.strip.text=list(cex=1.2), xlab = list("Inference", cex = 1.8), ylab = list("Endorsement", cex = 1.8)))
  7 | require(lattice)
  8 | require(latticeExtra)
  9 | lattice.options(default.theme = standard.theme(color = FALSE))
 10 | lattice.options(default.args = list(as.table = TRUE))
 11 | data(obk.long, package = "afex")
 12 | str(obk.long)
 13 | (p1_d_all <- xyplot(value ~ phase | treatment +time, obk.long, group = id,type="o", col = "black", panel = function(x, y, type, col, ...) {
 14 | panel.xyplot(x, y, type = type, ..., lty = 1, col = "darkgrey", alpha = 0.2, pch = 16, lwd = 2.0, cex = 1.0)
 15 | #browser()
 16 | means <- tapply(y,x, mean)
 17 | panel.lines(seq_along(means), means, type = type, ..., lty = 1, col = col, lwd = 3.0, cex = 1.75, pch = 20)
 18 | }, scale = list(cex = 1.0), par.strip.text=list(cex=1.2), xlab = list("Inference", cex = 1.8), ylab = list("Endorsement", cex = 1.8)))
 19 | (p1_d_all <- xyplot(value ~ phase | treatment +hour, obk.long, group = id,type="o", col = "black", panel = function(x, y, type, col, ...) {
 20 | panel.xyplot(x, y, type = type, ..., lty = 1, col = "darkgrey", alpha = 0.2, pch = 16, lwd = 2.0, cex = 1.0)
 21 | #browser()
 22 | means <- tapply(y,x, mean)
 23 | panel.lines(seq_along(means), means, type = type, ..., lty = 1, col = col, lwd = 3.0, cex = 1.75, pch = 20)
 24 | }, scale = list(cex = 1.0), par.strip.text=list(cex=1.2), xlab = list("Inference", cex = 1.8), ylab = list("Endorsement", cex = 1.8)))
 25 | require(plotrix)
 26 | raw.means.plot2(obk.long, col.id = "id", col.offset = "treatment", col.x = "phase")
 27 | raw.means.plot2(obk.long, col.id = "id", col.offset = "treatment", col.x = "phase", col.value = "value")
 28 | dbinom(10, 100, 0.5)
 29 | dbinom(10.5, 100, 0.5)
 30 | v1<-c(.1,.1,.2,.2,.3,.3,.4,.4,.5,.5)
 31 | v2<-c(1,2,3,4,5,6,7,8,9,10)
 32 | df1 <- data.frame(v1, v2)
 33 | df2 <- data.frame(c(1))
 34 | v1kriterien <- seq(from = .1, to =  .5, by = .1) #Equivalente
 35 | v1kriterien
 36 | df2
 37 | i <- 1
 38 | with(df1, "v1"==v1kriterien[i], )
 39 | v1kriterien
 40 | require(dplyr)
 41 | df1 %>% group_by(v1) %>%
 42 | summarise(v3 = mean(v2))
 43 | ?is.integer
 44 | .Machine$double.eps^0.5
 45 | .Machine$double.eps^1
 46 | .Machine$double.eps^0.5
 47 | seq(0, 1, by=0.1) == .3
 48 | abs(seq(0, 1, by=0.1) - .3) < .Machine$double.eps^0.5
 49 | x <- array(1:20, dim = c(2,2, 5))
 50 | x
 51 | x <- array(1:12, dim = c(2,2, 3))
 52 | x
 53 | dim(x)
 54 | dim(x) <- c(6,2)
 55 | x
 56 | x <- array(1:12, dim = c(2,2, 3))
 57 | x
 58 | dim(x)
 59 | dim(x) <- c(6,2)
 60 | x
 61 | ?aperm
 62 | require(rstan)
 63 | ?stan_model
 64 | ?log_prob
 65 | library(rstan)
 66 | # model
 67 | stancode <- 'data {
 68 | int<lower=1> N; // number of observations
 69 | vector[N] y; // observations
 70 | }
 71 | parameters {
 72 | real<lower=0> sigma2;
 73 | }
 74 | model {
 75 | target += cauchy_lpdf(sigma2 | 1, 1);
 76 | target += normal_lpdf(y | 0, sqrt(sigma2));
 77 | }
 78 | '
 79 | stanmodel <- stan_model(model_code = stancode, model_name="stanmodel")
 80 | set.seed(1)
 81 | N <- 5
 82 | sigma2_gen <- 1
 83 | stanobject <- sampling(stanmodel, data = list(y = y, N = N),
 84 | iter = 50000, warmup = 1000, chains = 4)
 85 | y <- rnorm(N, sd = sqrt(sigma2_gen))
 86 | stanobject <- sampling(stanmodel, data = list(y = y, N = N),
 87 | iter = 50000, warmup = 1000, chains = 4)
 88 | pars <- extract(object = stanobject, par = c("sigma2", "lp__"))
 89 | hist(pars$sigma2)
 90 | log_dnorm_c <- function(x, sigma2) {
 91 | -.5*log(2*pi*sigma2) - .5*x^2/sigma2
 92 | }
 93 | log_dnorm_c(.2, .3)
 94 | dnorm(.2, sd = sqrt(.3), log = TRUE)
 95 | log_dnorm_no_c <- function(x, sigma2) {
 96 | - .5*x^2/sigma2
 97 | }
 98 | log(dcauchy(.2, 1, 1))
 99 | log_target_Jc <- function(sigma2_t, y) {
100 | sum(dnorm(y, 0, sqrt(exp(sigma2_t)), log = TRUE)) +
101 | dcauchy(exp(sigma2_t), 1, 1, log = TRUE)-pcauchy(0, 1, 1, lower.tail = FALSE, log.p = TRUE) + sigma2_t
102 | }
103 | log_target_Jc(sigma2_t[1], y)
104 | sigma2_t <- log(pars$sigma2)
105 | log_target_Jc(sigma2_t[1], y)
106 | pars$lp__[1]
107 | log_target_Jc2 <- function(sigma2_t, y) {
108 | sum(dnorm(y, 0, sqrt(exp(sigma2_t)), log = TRUE)) +
109 | dcauchy(exp(sigma2_t), 1, 1, log = TRUE) + sigma2_t
110 | }
111 | log_target_Jc2(sigma2_t[1], y)
112 | stancode <- 'data {
113 | int<lower=1> N; // number of observations
114 | vector[N] y; // observations
115 | }
116 | parameters {
117 | real<lower=0> sigma2;
118 | }
119 | model {
120 | target += cauchy_lpdf(sigma2 | 1, 1) - cauchy_lccdf(0 | 1, 1);
121 | target += normal_lpdf(y | 0, sqrt(sigma2));
122 | }
123 | '
124 | stanmodel <- stan_model(model_code = stancode, model_name="stanmodel")
125 | stanobject <- sampling(stanmodel, data = list(y = y, N = N),
126 | iter = 50000, warmup = 1000, chains = 4)
127 | pars <- extract(object = stanobject, par = c("sigma2", "lp__"))
128 | hist(pars$sigma2)
129 | log_target_Jc(sigma2_t[1], y)
130 | pars$lp__[1]
131 | sigma2_t <- log(pars$sigma2)
132 | pars <- extract(object = stanobject, par = c("sigma2", "lp__"))
133 | hist(pars$sigma2)
134 | sigma2_t <- log(pars$sigma2)
135 | log_target_Jc(sigma2_t[1], y)
136 | pars$lp__[1]
137 | pars$lp__[1]
138 | log_target_c(pars$sigma2[1], y)
139 | sigma2_t <- log(pars$sigma2)
140 | pars$lp__[1]
141 | sigma2_t <- log(pars$sigma2)
142 | log_target_Jc(sigma2_t[1], y)
143 | log_target_Jc2(sigma2_t[1], y)
144 | log_prob(stanmodel, sigma2_t[1])
145 | stanfit2 <- stan(fit = stanmodel)
146 | stanmodel
147 | stanfit2 <- sampling(stanmodel, chains = 0)
148 | log_prob(stanmodel, sigma2_t[1])
149 | log_prob(stanfit2, sigma2_t[1])
150 | str(stanmodel)
151 | data <- list("dependent" = rnorm(100), "RM Factor 1" = factor(rep(c("Level 1", "Level 2"), 50)), "subject" = factor(rep(1:50, each = 2)))
152 | attr(data, 'row.names') <- seq_len(length(data[[1]]))
153 | attr(data, 'class') <- 'data.frame'
154 | formula <- as.formula("dependent ~ `RM Factor 1` + Error(subject/(`RM Factor 1`))")
155 | str(aov(formula, data))
156 | str(aov(formula, data), 1)
157 | install.packages("bridgesampling")
158 | citation("bridgesampling)")
159 | citation("bridgesampling")
160 | devtools::install_github("quentingronau/bridgesampling@master")
161 | citation("bridgesampling")
162 | help(package = "loo")
163 | require(rstanarm)
164 | install.packages("rstanarm")
165 | install.packages("rstanarm")
166 | require(rstanarm)
167 | ?stan_aov
168 | (fit <- stan_lm(mpg ~ wt + qsec + am, data = mtcars, prior = R2(0.75),
169 | # the next line is only to make the example go fast enough
170 | chains = 1, iter = 500, seed = 12345))
171 | waic(fit)
172 | print(waic(fit))
173 | print(waic(fit))
174 | help(package = "afex")
175 | options(htmltools.dir.version = FALSE)
176 | # see: https://github.com/yihui/xaringan
177 | # install.packages("xaringan")
178 | options(width=120)
179 | m2 <- lm(ACT ~ SATQ * gender, sat.act)
180 | options(htmltools.dir.version = FALSE)
181 | # see: https://github.com/yihui/xaringan
182 | # install.packages("xaringan")
183 | options(width=120)
184 | require(psych)
185 | data(sat.act)
186 | sat.act$gender <- factor(sat.act$gender, 1:2, labels = c("male", "female"))
187 | sat.act$education <- factor(sat.act$education)
188 | summary(sat.act) # alternatively: psych::describe(sat.act)
189 | par(mfrow=c(1,2))
190 | plot(sat.act$SATV, sat.act$ACT)
191 | plot(sat.act$SATQ, sat.act$ACT)
192 | m1 <- lm(ACT ~ SATQ, sat.act)
193 | summary(m1)
194 | coef(m1)
195 | plot(sat.act$SATV, sat.act$ACT)
196 | abline(m1)
197 | str(sat.act)
198 | m2 <- lm(ACT ~ SATQ + gender, sat.act)
199 | m2 <- lm(ACT ~ SATQ * gender, sat.act)
200 | m2 <- lm(ACT ~ SATQ * gender, sat.act)
201 | m2
202 | summary(m2)
203 | m2 <- lm(ACT ~ SATV * gender, sat.act)
204 | summary(m2)
205 | options(htmltools.dir.version = FALSE)
206 | # see: https://github.com/yihui/xaringan
207 | # install.packages("xaringan")
208 | # see:
209 | # https://github.com/yihui/xaringan/wiki
210 | # https://github.com/gnab/remark/wiki/Markdown
211 | options(width=120)
212 | options(digits = 4)
213 | data(sat.act)
214 | require(psych)
215 | data(sat.act)
216 | sat.act$gender <- factor(sat.act$gender, 1:2, labels = c("male", "female"))
217 | sat.act$education <- factor(sat.act$education)
218 | summary(sat.act) # alternatively: psych::describe(sat.act)
219 | sat.act <- na.omit(sat.act)
220 | summary(sat.act)
221 | options(htmltools.dir.version = FALSE)
222 | # see: https://github.com/yihui/xaringan
223 | # install.packages("xaringan")
224 | # see:
225 | # https://github.com/yihui/xaringan/wiki
226 | # https://github.com/gnab/remark/wiki/Markdown
227 | options(width=120)
228 | options(digits = 4)
229 | require(psych)
230 | data(sat.act)
231 | sat.act$gender <- factor(sat.act$gender, 1:2, labels = c("male", "female"))
232 | sat.act$education <- factor(sat.act$education)
233 | summary(sat.act) # alternatively: psych::describe(sat.act)
234 | sat.act <- na.omit(sat.act)
235 | par(mfrow=c(1,2))
236 | plot(sat.act$SATV, sat.act$ACT)
237 | plot(sat.act$SATQ, sat.act$ACT)
238 | m1 <- lm(ACT ~ SATQ, sat.act)
239 | summary(m1)
240 | coef(m1)
241 | plot(sat.act$SATV, sat.act$ACT)
242 | abline(m1)
243 | sat.act$SATQ <- sat.act$SATQ - mean(sat.act$SATQ, na.rm = TRUE)
244 | sat.act$SATV <- sat.act$SATV - mean(sat.act$SATV)
245 | m2 <- lm(ACT ~ SATQ, sat.act)
246 | summary(m2)
247 | coef(m2)
248 | plot(sat.act$SATV, sat.act$ACT)
249 | abline(m2)
250 | coef(lm(ACT ~ SATQ + SATV, sat.act))   # a
251 | coef(lm(ACT ~ SATQ : SATV, sat.act))   # b
252 | coef(lm(ACT ~ 0 + SATQ:SATV, sat.act)) # c
253 | coef(lm(ACT ~ SATQ*SATV, sat.act))     # d
254 | coef(lm(ACT ~ 0+SATQ*SATV, sat.act))   # e
255 | str(sat.act)
256 | m2a <- lm(ACT ~ SATV + gender, sat.act)
257 | m2b <- lm(ACT ~ SATV * gender, sat.act)
258 | op <- options(width = 40)
259 | coef(m2a)
260 | coef(m2b)
261 | mean(sat.act$ACT)
262 | mean(sat.act[sat.act$gender == "male","ACT"])
263 | options(op)
264 | aggregate(ACT ~ gender, sat.act, mean)
265 | prop.table(table(sat.act$gender))
266 | 28.73858 -0.30114
267 | coef(lm(ACT ~ gender, sat.act))
268 | mean(sat.act[sat.act$gender == "male","SATV"])
269 | mean(sat.act[sat.act$gender == "female","SATV"])
270 | str(sat.act)
271 | install.packages("acss")
272 | help(package = "acss")
273 | options(htmltools.dir.version = FALSE)
274 | # see: https://github.com/yihui/xaringan
275 | # install.packages("xaringan")
276 | # see:
277 | # https://github.com/yihui/xaringan/wiki
278 | # https://github.com/gnab/remark/wiki/Markdown
279 | options(width=110)
280 | options(digits = 4)
281 | require(psych)
282 | data(sat.act)
283 | sat.act$gender <- factor(sat.act$gender, 1:2, labels = c("male", "female"))
284 | sat.act$education <- factor(sat.act$education)
285 | summary(sat.act) # alternatively: psych::describe(sat.act)
286 | sat.act <- na.omit(sat.act)
287 | par(mfrow=c(1,2))
288 | plot(sat.act$SATV, sat.act$ACT)
289 | plot(sat.act$SATQ, sat.act$ACT)
290 | m1 <- lm(ACT ~ SATQ, sat.act)
291 | summary(m1)
292 | coef(m1)
293 | plot(sat.act$SATV, sat.act$ACT)
294 | abline(m1)
295 | sat.act$SATQ_c <- sat.act$SATQ - mean(sat.act$SATQ, na.rm = TRUE)
296 | sat.act$SATV_c <- sat.act$SATV - mean(sat.act$SATV)
297 | m2 <- lm(ACT ~ SATQ_c, sat.act)
298 | summary(m2)
299 | coef(m2)
300 | plot(sat.act$SATV_c, sat.act$ACT)
301 | abline(m2)
302 | coef(lm(ACT ~ SATQ_c + SATV_c, sat.act))   # a
303 | coef(lm(ACT ~ SATQ_c : SATV_c, sat.act))   # b
304 | coef(lm(ACT ~ 0 + SATQ_c:SATV_c, sat.act)) # c
305 | coef(lm(ACT ~ SATQ_c*SATV_c, sat.act))     # d
306 | coef(lm(ACT ~ 0+SATQ_c*SATV_c, sat.act))   # e
307 | str(sat.act)
308 | m3 <- lm(ACT ~ gender, sat.act)
309 | summary(m3)
310 | op <- options(width = 40)
311 | mean(sat.act$ACT)
312 | aggregate(ACT ~ gender, sat.act,
313 | mean)
314 | diff(aggregate(ACT ~ gender,
315 | sat.act,
316 | mean)$ACT)
317 | options(op)
318 | model.matrix(ACT ~ gender, sat.act[1:5,])
319 | model.matrix(ACT ~ gender, sat.act[1:5,])
320 | afex::set_sum_contrasts()
321 | model.matrix(ACT ~ gender, sat.act[1:5,])
322 | m4 <- lm(ACT ~ gender, sat.act)
323 | summary(m4)
324 | op <- options(width = 40)
325 | mean(sat.act$ACT)
326 | aggregate(ACT ~ gender, sat.act,
327 | mean)
328 | mean(aggregate(ACT ~ gender,
329 | sat.act,
330 | mean)$ACT)
331 | options(op)
332 | afex::set_default_contrasts() # or set_treatment_contrasts()
333 | op <- options(width = 70)
334 | m5 <- lm(ACT ~ gender*education, sat.act)
335 | coef(m5)
336 | aggregate(ACT ~ gender+education,
337 | sat.act, mean)
338 | options(op)
339 | afex::set_sum_contrasts() # or set_effects_contrasts() or set_deviation_contrasts()
340 | op <- options(width = 70)
341 | m6 <- lm(ACT ~ gender*education, sat.act)
342 | coef(m6)
343 | mean(
344 | aggregate(ACT ~
345 | gender+education,
346 | sat.act, mean)$ACT)
347 | options(op)
348 | levels(sat.act$gender)
349 | levels(sat.act$education)
350 | coef(lm(ACT ~ gender, sat.act))                  # a: 2
351 | coef(lm(ACT ~ 0+gender, sat.act))                # b: 2
352 | coef(lm(ACT ~ gender+education, sat.act))        # c: 7
353 | coef(lm(ACT ~ 0+gender+education, sat.act))      # d: 7
354 | coef(lm(ACT ~ gender:education, sat.act))        # e: 13
355 | coef(lm(ACT ~ 0+gender:education, sat.act))      # f: 12
356 | op <- options(width = 70)
357 | afex::set_sum_contrasts()
358 | m6 <- lm(ACT ~ gender*education, sat.act)
359 | summary(m6)$coefficients
360 | summary(m6)
361 | ?print
362 | m6Out <- capture.output(summary(m6))
363 | m6Out
364 | m6Out <- capture.output(summary(m6))
365 | options(op)
366 | citation("car")
367 | require(lsmeans)
368 | lsmeans(m6, "education", contr = "pairwise")
369 | require(lsmeans)
370 | lsmeans(m6, "education")
371 | pairs(lsmeans(m6, "education"))
372 | summary(pairs(lsmeans(m6, "education")), adjust='holm')
373 | ?car
374 | ?rlm
375 | ?plor
376 | ?polr
377 | ?coxph
378 | ?coxme
379 | ??coxme
380 | ?lme
381 | ?ordinal
382 | lsmeans(m6, ~education)
383 | lsmeans(m6, ~education+gender)
384 | lsmeans(m6, ~education*gender)
385 | citation("lsmeans")
386 | data("sk2011.2")
387 | library(afex)
388 | data("sk2011.2")
389 | sk2_aff <- droplevels(sk2011.2[sk2011.2$what == "affirmation",])
390 | sk_m3 <- mixed(response ~ instruction * inference * type + (0 + inference | id), sk2_aff,
391 | expand_re = TRUE) # runs just fine
392 | all_fit(sk_m3$full_model, data=sk_m3$data)
393 | sk_m3b <- mixed(response ~ instruction * inference * type + (0 + inference | id), sk2_aff,
394 | expand_re = TRUE, all_fit = TRUE)
395 | require(optimx)
396 | install.packages("optimx")
397 | require(dfoptim)
398 | sk_m3b <- mixed(response ~ instruction * inference * type + (0 + inference | id), sk2_aff,
399 | expand_re = TRUE, all_fit = TRUE)
400 | sk_m3b
401 | attr(sk_m3b, "all_fit_selected")
402 | attr(sk_m3b, "all_fit_logLik")
403 | require(optimx)
404 | sk_m3b <- mixed(response ~ instruction * inference * type + (0 + inference | id), sk2_aff,
405 | expand_re = TRUE, all_fit = TRUE)
406 | attr(sk_m3b, "all_fit_selected")
407 | attr(sk_m3b, "all_fit_logLik")
408 | update.packages(ask = FALSE)
409 | require(lsmeans)
410 | require(lme4)
411 | require(lsmeans)
412 | install.packages("afex", dependencies = TRUE)
413 | require(lsmeans)
414 | require(lme4)
415 | sleep2 <- sleepstudy
416 | sleep2$Days <- sleep2$Days
417 | fm1 <- lmer(Reaction ~ Days + (Days2 | Subject), sleep2)
418 | sleep2$Days2 <- sleep2$Days
419 | fm1 <- lmer(Reaction ~ Days + (Days2 | Subject), sleep2)
420 | lsmeans(fm1, "Days")
421 | rm(sleep2)
422 | lsmeans(fm1, "Days")
423 | lsmeans(fm1, "Days", data=sleepstudy)
424 | sleep2 <- sleepstudy
425 | sleep2$Days2 <- sleep2$Days
426 | sleep3 <- sleep2
427 | rm(sleep2)
428 | lsmeans(fm1, "Days", data=sleepstudy)
429 | lsmeans(fm1, "Days", data=sleep3)
430 | fm1@call[["data"]]
431 | fm1@call[["data"]] <- as.name("sleep3")
432 | lsmeans(fm1, "Days")
433 | sleep2 <- sleepstudy
434 | sleep2$Days2 <- sleep2$Days
435 | sleep2$Days2 <- NULL
436 | lsmeans(fm1, "Days")
437 | require(lsmeans)
438 | require(lme4)
439 | sleep2 <- sleepstudy
440 | sleep2$Days2 <- sleep2$Days
441 | sleep3 <- sleep2
442 | fm1 <- lmer(Reaction ~ Days + (Days2 | Subject), sleep2)
443 | lsmeans(fm1, "Days")
444 | sleep2$Days2 <- NULL
445 | lsmeans(fm1, "Days")
446 | lsmeans(fm1, "Days", data=sleepstudy)
447 | lsmeans(fm1, "Days", data=sleep3)
448 | fm1@call[["data"]] <- as.name("sleep3")
449 | lsmeans(fm1, "Days")
450 | lsmeans:::lsm.basis.merMod
451 | 1239 / 465
452 | 2464/2.66
453 | setwd("~/GitHub/2017-summer-workshop/singmann-statistical-modeling-in-r/part2-mixed-models-in-r")
454 | options(htmltools.dir.version = FALSE)
455 | # see: https://github.com/yihui/xaringan
456 | # install.packages("xaringan")
457 | # see:
458 | # https://github.com/yihui/xaringan/wiki
459 | # https://github.com/gnab/remark/wiki/Markdown
460 | options(width=110)
461 | options(digits = 4)
462 | load("../exercises/ssk16_dat_preapred.rda")
463 | require(afex)
464 | str(dat)
465 | dat_indi <- droplevels(dat[dat$conditional  == "indicative",])
466 | dat_conc <- droplevels(dat[dat$conditional  == "concessive",])
467 | m_indi_full <- mixed(dv ~ c_given_a*rel_cond*group+(rel_cond*c_given_a|lfdn)+(rel_cond*c_given_a*group|le_nr),dat_indi,control = lmerControl(optCtrl = list(maxfun=1e8)), method = "S")
468 | m_indi_full <- mixed(dv ~ c_given_a*rel_cond*dv_question +
469 | (rel_cond*c_given_a|lfdn) +
470 | (rel_cond*c_given_a*dv_question|le_nr),
471 | dat_indi,
472 | control = lmerControl(optCtrl = list(maxfun=1e8)),
473 | method = "S")
474 | m_indi_full
475 | m_conc_full <- mixed(dv ~ c_given_a*rel_cond*dv_question +
476 | (rel_cond*c_given_a|lfdn) +
477 | (rel_cond*c_given_a*dv_question|le_nr),
478 | dat_conc,
479 | control = lmerControl(optCtrl = list(maxfun=1e8)),
480 | method = "S")
481 | (rel_cond*c_given_a|lfdn) +
482 | (rel_cond*c_given_a*dv_question|le_nr),
483 | m_conc_full <- mixed(dv ~ c_given_a*rel_cond*dv_question +
484 | (rel_cond*c_given_a|lfdn) +
485 | (rel_cond*c_given_a*dv_question|le_nr),
486 | dat_conc,
487 | control = lmerControl(optCtrl = list(maxfun=1e8)),
488 | method = "S")
489 | save(m_indi_full, file = "fitted_lmms.rda", compress = "xz")
490 | m_conc_full <- mixed(dv ~ c_given_a*rel_cond*dv_question +
491 | (rel_cond*c_given_a|lfdn) +
492 | (rel_cond*c_given_a*dv_question|le_nr),
493 | dat_conc,
494 | control = lmerControl(optCtrl = list(maxfun=1e8)),
495 | method = "S")
496 | load("../exercises/ssk16_dat_preapred.rda")
497 | m_full <- m_indi_full
498 | save(m_full, file = "fitted_lmms.rda", compress = "xz")
499 | load("../exercises/ssk16_dat_preapred.rda")
500 | m_full <- mixed(dv ~ c_given_a*rel_cond*dv_question +
501 | (rel_cond*c_given_a|lfdn) +
502 | (rel_cond*c_given_a*dv_question|le_nr),
503 | dat,
504 | control = lmerControl(optCtrl = list(maxfun=1e8)),
505 | method = "S")
506 | m_full <- mixed(dv ~ c_given_a*rel_cond*dv_question +
507 | (rel_cond*c_given_a|lfdn) +
508 | (rel_cond*c_given_a*dv_question|le_nr),
509 | dat,
510 | control = lmerControl(optCtrl = list(maxfun=1e8)),
511 | method = "S")
512 | save(m_full, file = "fitted_lmms.rda", compress = "xz")
513 | 


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/fitted_lmms.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/fitted_lmms.rda


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/libs/remark-css/default-fonts.css:
--------------------------------------------------------------------------------
 1 | @import url(https://fonts.googleapis.com/css?family=Yanone+Kaffeesatz);
 2 | @import url(https://fonts.googleapis.com/css?family=Droid+Serif:400,700,400italic);
 3 | @import url(https://fonts.googleapis.com/css?family=Source+Code+Pro:400,700);
 4 | 
 5 | body { font-family: 'Droid Serif', 'Palatino Linotype', 'Book Antiqua', Palatino, 'Microsoft YaHei', 'Songti SC', serif; }
 6 | h1, h2, h3 {
 7 |   font-family: 'Yanone Kaffeesatz';
 8 |   font-weight: normal;
 9 | }
10 | .remark-code, .remark-inline-code { font-family: 'Source Code Pro', 'Lucida Console', Monaco, monospace; }
11 | 


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/libs/remark-css/default.css:
--------------------------------------------------------------------------------
 1 | a, a > code {
 2 |   color: rgb(249, 38, 114);
 3 |   text-decoration: none;
 4 | }
 5 | .footnote {
 6 |   position: absolute;
 7 |   bottom: 3em;
 8 |   padding-right: 4em;
 9 |   font-size: 90%;
10 | }
11 | .remark-code-line-highlighted     { background-color: #ffff88; }
12 | 
13 | .inverse {
14 |   background-color: #272822;
15 |   color: #d6d6d6;
16 |   text-shadow: 0 0 20px #333;
17 | }
18 | .inverse h1, .inverse h2, .inverse h3 {
19 |   color: #f3f3f3;
20 | }
21 | /* Two-column layout */
22 | .left-column {
23 |   color: #777;
24 |   width: 20%;
25 |   height: 92%;
26 |   float: left;
27 | }
28 | .left-column h2:last-of-type, .left-column h3:last-child {
29 |   color: #000;
30 | }
31 | .right-column {
32 |   width: 75%;
33 |   float: right;
34 |   padding-top: 1em;
35 | }
36 | .pull-left {
37 |   float: left;
38 |   width: 47%;
39 | }
40 | .pull-right {
41 |   float: right;
42 |   width: 47%;
43 | }
44 | .pull-right ~ * {
45 |   clear: both;
46 | }
47 | img, video, iframe {
48 |   max-width: 100%;
49 | }
50 | blockquote {
51 |   border-left: solid 5px lightgray;
52 |   padding-left: 1em;
53 | }
54 | .remark-slide table {
55 |   margin: auto;
56 |   border-top: 1px solid #666;
57 |   border-bottom: 1px solid #666;
58 | }
59 | .remark-slide table thead th { border-bottom: 1px solid #ddd; }
60 | th, td { padding: 5px; }
61 | .remark-slide thead, .remark-slide tfoot, .remark-slide tr:nth-child(even) { background: #eee }
62 | 
63 | @page { margin: 0; }
64 | @media print {
65 |   .remark-slide-scaler {
66 |     width: 100% !important;
67 |     height: 100% !important;
68 |     transform: scale(1) !important;
69 |     top: 0 !important;
70 |     left: 0 !important;
71 |   }
72 | }
73 | 


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models.R:
--------------------------------------------------------------------------------
  1 | ## ----setup, include=FALSE------------------------------------------------
  2 | options(htmltools.dir.version = FALSE)
  3 | # see: https://github.com/yihui/xaringan
  4 | # install.packages("xaringan")
  5 | # see: 
  6 | # https://github.com/yihui/xaringan/wiki
  7 | # https://github.com/gnab/remark/wiki/Markdown
  8 | options(width=110)
  9 | options(digits = 4)
 10 | 
 11 | ## ---- echo=FALSE, results='hide', message=FALSE--------------------------
 12 | load("ssk16_dat_tutorial.rda") 
 13 | str(dat)
 14 | datr <- droplevels(dat[dat$rel_cond != "NE",])
 15 | library("ggplot2")
 16 | afex::set_sum_contrasts()
 17 | library("lme4")
 18 | 
 19 | ## ------------------------------------------------------------------------
 20 | m_fixed <- lm(if_A_then_B_c ~ B_given_A_c, datr)
 21 | summary(m_fixed)
 22 | 
 23 | ## ---- echo=FALSE, dpi=500, fig.width=3.5, fig.height=3.5-----------------
 24 | par(pty="s")
 25 | limits <- c(-0.5, 0.5)
 26 | plot(if_A_then_B_c ~ B_given_A_c, datr, asp = 1, ylim=limits, xlim=limits)
 27 | abline(m_fixed)
 28 | 
 29 | ## ---- echo=FALSE, dpi=300, fig.width=3.5, fig.height=4-------------------
 30 | par(pty="s")
 31 | plot(if_A_then_B_c ~ B_given_A_c, datr, asp = 1, ylim=limits, xlim=limits)
 32 | abline(m_fixed)
 33 | 
 34 | ## ---- echo=FALSE, dpi=500, fig.width=3.5, fig.height=4, warning=FALSE----
 35 | m_tmp <- lmer(if_A_then_B_c ~ B_given_A_c + (0+B_given_A_c|p_id), datr)
 36 | rnd_coefs <- coef(m_tmp)$p_id
 37 | par(pty="s")
 38 | plot(if_A_then_B_c ~ B_given_A_c, datr, 
 39 |      asp = 1, ylim=limits, xlim=limits)
 40 | for (i in seq_len(nrow(rnd_coefs))) 
 41 |   abline(a = rnd_coefs[i,1], 
 42 |          b = rnd_coefs[i,2],
 43 |          col = "lightgrey")
 44 | abline(m_fixed)
 45 | 
 46 | ## ---- echo=FALSE, dpi=300, fig.width=3.5, fig.height=3.5, warning=FALSE, out.width='25%'----
 47 | m_tmp <- lmer(if_A_then_B_c ~ B_given_A_c + (1+B_given_A_c|p_id), datr)
 48 | rnd_coefs <- coef(m_tmp)$p_id
 49 | rnd_coefs <- coef(m_tmp)$p_id
 50 | par(pty="s")
 51 | plot(if_A_then_B_c ~ B_given_A_c, datr,
 52 |      asp = 1, ylim=limits, xlim=limits)
 53 | for (i in seq_len(nrow(rnd_coefs))) 
 54 |   abline(a = rnd_coefs[i,1], 
 55 |          b = rnd_coefs[i,2],
 56 |          col = "lightgrey")
 57 | abline(m_fixed)
 58 | 
 59 | ## ---- echo=FALSE, dpi=300, fig.width=3.5, fig.height=4, warning=FALSE , out.width='25%'----
 60 | m_tmp <- lmer(if_A_then_B_c ~ B_given_A_c + (1+B_given_A_c|p_id), datr)
 61 | rnd_coefs <- coef(m_tmp)$p_id
 62 | rnd_coefs <- coef(m_tmp)$p_id
 63 | par(pty="s")
 64 | plot(if_A_then_B_c ~ B_given_A_c, datr,
 65 |      asp = 1, ylim=limits, xlim=limits)
 66 | for (i in seq_len(nrow(rnd_coefs))) 
 67 |   abline(a = rnd_coefs[i,1], 
 68 |          b = rnd_coefs[i,2],
 69 |          col = "lightgrey")
 70 | abline(m_fixed)
 71 | 
 72 | ## ---- echo=FALSE, dpi=500, fig.width=7, fig.height=4, warning=FALSE, out.width='80%'----
 73 | ggplot(datr, aes(y = if_A_then_B_c, x = B_given_A_c)) +
 74 |   geom_point() +
 75 |   facet_wrap(~ rel_cond) + 
 76 |   theme_light() + coord_fixed()
 77 | 
 78 | ## ------------------------------------------------------------------------
 79 | m_fixed <- lm(if_A_then_B_c ~ 
 80 |                 B_given_A_c*rel_cond, datr)
 81 | summary(m_fixed)
 82 | 
 83 | ## ---- echo=FALSE, dpi=500, fig.width=7, fig.height=4---------------------
 84 | par(mfrow = c(1,2))
 85 | par(pty="s")
 86 | plot(if_A_then_B_c ~ B_given_A_c, datr, subset = rel_cond == "PO", 
 87 |      asp = 1, ylim=limits, xlim=limits, main ="PO")
 88 | abline(a = coef(m_fixed)[1] + coef(m_fixed)[3], 
 89 |        b = coef(m_fixed)[2] + coef(m_fixed)[4])
 90 | plot(if_A_then_B_c ~ B_given_A_c, datr, subset = rel_cond == "IR", 
 91 |      asp = 1, ylim=limits, xlim=limits, main ="IR")
 92 | abline(a = coef(m_fixed)[1] - coef(m_fixed)[3], 
 93 |        b = coef(m_fixed)[2] - coef(m_fixed)[4])
 94 | 
 95 | ## ---- echo=FALSE, dpi=500, fig.width=7, fig.height=4---------------------
 96 | par(mfrow = c(1,2))
 97 | par(pty="s")
 98 | plot(if_A_then_B_c ~ B_given_A_c, datr, subset = rel_cond == "PO", 
 99 |      asp = 1, ylim=limits, xlim=limits, main ="PO")
100 | abline(a = coef(m_fixed)[1] + coef(m_fixed)[3], 
101 |        b = coef(m_fixed)[2] + coef(m_fixed)[4])
102 | plot(if_A_then_B_c ~ B_given_A_c, datr, subset = rel_cond == "IR", 
103 |      asp = 1, ylim=limits, xlim=limits, main ="IR")
104 | abline(a = coef(m_fixed)[1] - coef(m_fixed)[3], 
105 |        b = coef(m_fixed)[2] - coef(m_fixed)[4])
106 | 
107 | ## ---- echo=FALSE, dpi=500, fig.width=7, fig.height=4, warning=FALSE------
108 | m_tmp <- lmer(if_A_then_B_c ~ B_given_A_c*rel_cond + (0+B_given_A_c|p_id), datr)
109 | rnd_coefs <- coef(m_tmp)$p_id
110 | par(mfrow = c(1,2))
111 | par(pty="s")
112 | plot(if_A_then_B_c ~ B_given_A_c, datr, subset = rel_cond == "PO", 
113 |      asp = 1, ylim=limits, xlim=limits, main ="PO")
114 | for (i in seq_len(nrow(rnd_coefs))) 
115 |   abline(a = rnd_coefs[i,1] + rnd_coefs[i,3], 
116 |          b = rnd_coefs[i,2] + rnd_coefs[i,4],
117 |          col = "lightgrey")
118 | abline(a = coef(m_fixed)[1] + coef(m_fixed)[3], 
119 |        b = coef(m_fixed)[2] + coef(m_fixed)[4])
120 | plot(if_A_then_B_c ~ B_given_A_c, datr, subset = rel_cond == "IR", 
121 |      asp = 1, ylim=limits, xlim=limits, main ="IR")
122 | for (i in seq_len(nrow(rnd_coefs))) 
123 |   abline(a = rnd_coefs[i,1] - rnd_coefs[i,3], 
124 |          b = rnd_coefs[i,2] - rnd_coefs[i,4],
125 |          col = "lightgrey")
126 | abline(a = coef(m_fixed)[1] - coef(m_fixed)[3], 
127 |        b = coef(m_fixed)[2] - coef(m_fixed)[4])
128 | 
129 | ## ---- echo=FALSE, dpi=500, fig.width=7, fig.height=3.5, warning=FALSE----
130 | m_tmp <- lmer(if_A_then_B_c ~ B_given_A_c*rel_cond + (1+B_given_A_c|p_id), datr)
131 | rnd_coefs <- coef(m_tmp)$p_id
132 | par(mfrow = c(1,2))
133 | par(pty="s")
134 | plot(if_A_then_B_c ~ B_given_A_c, datr, subset = rel_cond == "PO", 
135 |      asp = 1, ylim=limits, xlim=limits, main ="PO")
136 | for (i in seq_len(nrow(rnd_coefs))) 
137 |   abline(a = rnd_coefs[i,1] + rnd_coefs[i,3], 
138 |          b = rnd_coefs[i,2] + rnd_coefs[i,4],
139 |          col = "lightgrey")
140 | abline(a = coef(m_fixed)[1] + coef(m_fixed)[3], 
141 |        b = coef(m_fixed)[2] + coef(m_fixed)[4])
142 | plot(if_A_then_B_c ~ B_given_A_c, datr, subset = rel_cond == "IR", 
143 |      asp = 1, ylim=limits, xlim=limits, main ="IR")
144 | for (i in seq_len(nrow(rnd_coefs))) 
145 |   abline(a = rnd_coefs[i,1] - rnd_coefs[i,3], 
146 |          b = rnd_coefs[i,2] - rnd_coefs[i,4],
147 |          col = "lightgrey")
148 | abline(a = coef(m_fixed)[1] - coef(m_fixed)[3], 
149 |        b = coef(m_fixed)[2] - coef(m_fixed)[4])
150 | 
151 | ## ------------------------------------------------------------------------
152 | library("lme4")
153 | m_p_max <- 
154 |   lmer(if_A_then_B_c ~ B_given_A_c*rel_cond + 
155 |          (B_given_A_c*rel_cond|p_id), datr)
156 | summary(m_p_max)$varcor
157 | summary(m_p_max)$coefficients
158 | 
159 | 
160 | ## ---- echo=FALSE, dpi=500, fig.width=7, fig.height=4, warning=FALSE------
161 | m_tmp <- lmer(if_A_then_B_c ~ B_given_A_c*rel_cond + (B_given_A_c*rel_cond|p_id), datr)
162 | rnd_coefs <- coef(m_tmp)$p_id
163 | par(mfrow = c(1,2))
164 | par(pty="s")
165 | plot(if_A_then_B_c ~ B_given_A_c, datr, subset = rel_cond == "PO", 
166 |      asp = 1, ylim=limits, xlim=limits, main ="PO")
167 | for (i in seq_len(nrow(rnd_coefs))) 
168 |   abline(a = rnd_coefs[i,1] + rnd_coefs[i,3], 
169 |          b = rnd_coefs[i,2] + rnd_coefs[i,4],
170 |          col = "lightgrey")
171 | abline(a = coef(m_fixed)[1] + coef(m_fixed)[3], 
172 |        b = coef(m_fixed)[2] + coef(m_fixed)[4])
173 | plot(if_A_then_B_c ~ B_given_A_c, datr, subset = rel_cond == "IR", 
174 |      asp = 1, ylim=limits, xlim=limits, main ="IR")
175 | for (i in seq_len(nrow(rnd_coefs))) 
176 |   abline(a = rnd_coefs[i,1] - rnd_coefs[i,3], 
177 |          b = rnd_coefs[i,2] - rnd_coefs[i,4],
178 |          col = "lightgrey")
179 | abline(a = coef(m_fixed)[1] - coef(m_fixed)[3], 
180 |        b = coef(m_fixed)[2] - coef(m_fixed)[4])
181 | 
182 | ## ------------------------------------------------------------------------
183 | m_max <- lmer(if_A_then_B_c ~ B_given_A_c*rel_cond + 
184 |                 (B_given_A_c*rel_cond|p_id) + 
185 |                 (B_given_A_c*rel_cond|i_id), 
186 |               datr)
187 | 
188 | ## ------------------------------------------------------------------------
189 | summary(m_max)
190 | 
191 | ## ---- echo=FALSE, message=FALSE, warning=FALSE, results='hide'-----------
192 | library("dplyr")
193 | library("broom")
194 | library("ggplot2")
195 | library("tidyr")
196 | no_pooling_estimates <- datr %>% 
197 |   group_by(p_id, rel_cond) %>% 
198 |   do(tidy(lm(if_A_then_B_c~B_given_A_c, .))) %>% 
199 |   filter(term == "B_given_A_c") %>% 
200 |   rename(no_pooling = estimate)
201 | 
202 | partial_pooling_estimates <- data.frame(p_id = rownames(coef(m_max)$p_id),
203 |            PO = coef(m_max)$p_id[,2] + coef(m_max)$p_id[,4],
204 |            IR = coef(m_max)$p_id[,2] - coef(m_max)$p_id[,4])
205 | partial_pooling_estimates <- tidyr::gather(partial_pooling_estimates, key = "rel_cond", value = "partial_pooling", PO, IR)
206 | 
207 | estimates <- left_join(no_pooling_estimates, partial_pooling_estimates)
208 | 
209 | 
210 | ## ---- echo=FALSE, out.width='500px', out.height='300px', dpi = 500, fig.width=7, fig.height=7*3/5----
211 | 
212 | ggplot(data = estimates) + 
213 |   geom_point(mapping = aes(x = no_pooling, y = partial_pooling), alpha = 1.0, pch = 16) + 
214 |   facet_grid(rel_cond ~ .) + 
215 |   coord_fixed() + 
216 |   geom_abline(slope = 1, intercept = 0) + 
217 |   theme(text=element_text(size=18))
218 | 
219 | 
220 | ## ---- echo=FALSE, out.width='400px', out.height='350px', dpi = 500, fig.width=7, fig.height=7*35/40----
221 | estimates_l <- estimates %>% 
222 |   gather("key","estimate",no_pooling, partial_pooling) 
223 | 
224 | ggplot(data = estimates_l, aes(estimate)) + 
225 |   geom_histogram(binwidth = 0.2) + 
226 |   facet_grid(key ~ rel_cond) +
227 |   theme(text=element_text(size=18))
228 | 
229 | ## ---- echo=FALSE, out.width='1000px', out.height='500px', dpi = 500, fig.width=10, fig.height=5----
230 | 
231 | df_gravity <- as.data.frame(summary(emmeans::emtrends(m_fixed, "rel_cond", var = "B_given_A_c")))
232 | df_gravity <- df_gravity %>% 
233 |   select(rel_cond, B_given_A_c.trend) %>% 
234 |   spread(rel_cond, B_given_A_c.trend) %>% 
235 |   mutate(key = "complete_pooling")
236 | 
237 | estimates_l %>% 
238 |   select(-std.error, -statistic, -p.value) %>% 
239 |   spread(rel_cond, estimate) %>% 
240 |   na.omit() %>% 
241 |   ungroup %>% 
242 |   ggplot() + 
243 |   aes(x = PO, y = IR, color = key) + 
244 |   geom_point(size = 2) + 
245 |   geom_point(data = df_gravity, size = 5) + 
246 |   # Draw an arrow connecting the observations between models
247 |   geom_path(aes(group = p_id, color = NULL, alpha = 0.1), 
248 |             arrow = arrow(length = unit(.02, "npc"))) + 
249 |   theme(legend.position = "bottom") + 
250 |   ggtitle("Pooling of regression parameters") + 
251 |   scale_color_brewer(palette = "Dark2") 
252 | 
253 | 
254 | ## ---- eval=FALSE---------------------------------------------------------
255 | ## library("afex")
256 | ## mixed(if_A_then_B ~ B_given_A*rel_cond + (B_given_A*rel_cond|p_id), datr, method = "KR")
257 | ## mixed(if_A_then_B ~ B_given_A*rel_cond + (B_given_A*rel_cond|p_id), datr, method = "S")
258 | ## mixed(if_A_then_B ~ B_given_A*rel_cond + (B_given_A*rel_cond|p_id), datr, method = "LRT")
259 | ## # mixed(if_A_then_B ~ B_given_A*rel_cond + (B_given_A*rel_cond|p_id), datr, method = "PB")
260 | 
261 | ## ---- echo=FALSE, results='hide', message=FALSE--------------------------
262 | library("afex")
263 | 
264 | ## ---- results='hide', message=FALSE--------------------------------------
265 | m_red <- mixed(
266 |   if_A_then_B_c ~ B_given_A_c*rel_cond + 
267 |     (B_given_A_c*rel_cond||p_id), 
268 |   datr, method = "S", 
269 |   expand_re = TRUE)
270 | 
271 | ## ------------------------------------------------------------------------
272 | summary(m_red)$varcor
273 | 
274 | ## ------------------------------------------------------------------------
275 | m_red
276 | 
277 | ## ---- echo=FALSE, dpi=500, fig.width=7, fig.height=4, warning=FALSE------
278 | rnd_coefs <- coef(m_red$full_model)$p_id
279 | par(mfrow = c(1,2))
280 | par(pty="s")
281 | limits <- c(-0.5, 0.5)
282 | plot(if_A_then_B_c ~ B_given_A_c, datr, subset = rel_cond == "PO", 
283 |      asp = 1, ylim=limits, xlim=limits, main ="PO")
284 | for (i in seq_len(nrow(rnd_coefs))) 
285 |   abline(a = rnd_coefs[i,4] + rnd_coefs[i,6] + rnd_coefs[i,2], 
286 |          b = rnd_coefs[i,5] + rnd_coefs[i,1] + rnd_coefs[i,7] + rnd_coefs[i,3],
287 |          col = "lightgrey")
288 | abline(a = coef(m_fixed)[1] + coef(m_fixed)[3], 
289 |        b = coef(m_fixed)[2] + coef(m_fixed)[4])
290 | plot(if_A_then_B_c ~ B_given_A_c, datr, subset = rel_cond == "IR", 
291 |      asp = 1, ylim=limits, xlim=limits, main ="IR")
292 | for (i in seq_len(nrow(rnd_coefs))) 
293 |   abline(a = rnd_coefs[i,4] - (rnd_coefs[i,6] + rnd_coefs[i,2]), 
294 |          b = rnd_coefs[i,5] + rnd_coefs[i,1] - (rnd_coefs[i,7] + rnd_coefs[i,3]),
295 |          col = "lightgrey")
296 | abline(a = coef(m_fixed)[1] - coef(m_fixed)[3], 
297 |        b = coef(m_fixed)[2] - coef(m_fixed)[4])
298 | 
299 | ## ---- eval=FALSE---------------------------------------------------------
300 | ## m_fhch <- mixed(log_rt ~ task*stimulus*density*frequency*length +
301 | ##                   (stimulus*density*frequency*length||id) +
302 | ##                   (task||item), fhch2010,
303 | ##                 method = "S", expand_re = TRUE)
304 | 
305 | ## ---- message=FALSE------------------------------------------------------
306 | m_max2 <- mixed(
307 |   if_A_then_B_c ~ B_given_A_c*rel_cond + 
308 |     (B_given_A_c*rel_cond||p_id) + 
309 |     (B_given_A_c*rel_cond||i_id), 
310 |   datr, method = 'S', expand_re = TRUE)
311 | nice(m_max2) %>% as.data.frame()
312 | 
313 | ## ------------------------------------------------------------------------
314 | emm_options(lmer.df = "asymptotic") 
315 | # or "Kenward-Roger" or "Satterthwaite"
316 | emmeans(m_max2, "rel_cond")
317 | 
318 | ## ------------------------------------------------------------------------
319 | emm_options(lmer.df = "asymptotic") 
320 | # or "Kenward-Roger" or "Satterthwaite"
321 | emtrends(m_max2, "rel_cond", var = "B_given_A_c")
322 | 
323 | ## ------------------------------------------------------------------------
324 | fixef(m_max2$full_model)[2] + fixef(m_max2$full_model)[4] 
325 | 
326 | ## ------------------------------------------------------------------------
327 | data("Machines", package = "MEMSS")
328 | 
329 | ## ---- include=FALSE------------------------------------------------------
330 | library("tidyverse")
331 | 
332 | ## ---- fig.height=4, dev='svg', echo=FALSE--------------------------------
333 | ggplot(Machines, aes(x = Machine, y = score)) +
334 |   geom_point() + 
335 |   facet_wrap(~ Worker) + 
336 |   theme_light()
337 | 
338 | ## ------------------------------------------------------------------------
339 | mach1 <- lm(score ~ Machine, Machines)
340 | car::Anova(mach1, type = 3)
341 | 
342 | ## ------------------------------------------------------------------------
343 | data("Machines", package = "MEMSS")
344 | 
345 | ## ---- include=FALSE------------------------------------------------------
346 | library("tidyverse")
347 | 
348 | ## ------------------------------------------------------------------------
349 | mach1 <- lm(score ~ Machine, Machines)
350 | car::Anova(mach1, type = 3)
351 | 
352 | ## ---- results="hide"-----------------------------------------------------
353 | (mach2 <- mixed(score~Machine+
354 |                 (Machine|Worker), Machines))
355 | 
356 | ## ---- echo=FALSE---------------------------------------------------------
357 | mach2 
358 | 
359 | ## ------------------------------------------------------------------------
360 | pairs(emmeans(mach1, "Machine"),
361 |       adjust = "holm")
362 | 
363 | ## ------------------------------------------------------------------------
364 | pairs(emmeans(mach2, "Machine"),
365 |       adjust = "holm")
366 | 
367 | ## ---- echo=FALSE, message=FALSE, results='hide'--------------------------
368 | library("sjstats")
369 | 
370 | ## ------------------------------------------------------------------------
371 | m1 <- lmer(if_A_then_B_c ~ 1 + (1|p_id), datr)
372 | # summary(m1)
373 | # Random effects:
374 | #  Groups   Name        Variance Std.Dev.
375 | #  p_id     (Intercept) 0.00572  0.0757  
376 | #  Residual             0.14607  0.3822  
377 | # Number of obs: 752, groups:  p_id, 94
378 | 
379 | 0.00572 / (0.0057+0.1461)
380 | library("sjstats")
381 | icc(m1)
382 | 
383 | ## ------------------------------------------------------------------------
384 | m1 <- lmer(if_A_then_B_c ~ 1 + (1|p_id), datr)
385 | # summary(m1)
386 | # Random effects:
387 | #  Groups   Name        Variance Std.Dev.
388 | #  p_id     (Intercept) 0.00572  0.0757  
389 | #  Residual             0.14607  0.3822  
390 | # Number of obs: 752, groups:  p_id, 94
391 | 
392 | icc(m1)
393 | 
394 | ## ---- warning=FALSE, message=FALSE---------------------------------------
395 | m2 <- lmer(if_A_then_B_c ~ 1 + 
396 |              (rel_cond:B_given_A_c|p_id), datr)
397 | # summary(m2)
398 |  # Groups   Name                   Variance Std.Dev. Corr       
399 |  # p_id     (Intercept)            0.0398   0.200               
400 |  #          rel_condPO:B_given_A_c 1.0186   1.009    -0.94      
401 |  #          rel_condIR:B_given_A_c 0.3262   0.571    -0.48  0.75
402 |  # Residual                        0.0570   0.239               
403 | icc(m2)
404 | ## Caution! ICC for random-slope-intercept models usually 
405 | ## not meaningful. See 'Note' in `?icc`.
406 | 
407 | ## ---- eval=FALSE---------------------------------------------------------
408 | ## data("fhch2010")
409 | ## fhch2 <- droplevels(fhch2010[fhch2010$task == "lexdec", ] )
410 | ## gm1 <- mixed(correct ~ stimulus + (stimulus||id) + (stimulus||item),
411 | ##              fhch2, family = binomial,      # implies: binomial(link = "logit")
412 | ##              method = "LRT", expand_re = TRUE) # alt: binomial(link = "probit")
413 | ## gm1
414 | ## ## Mixed Model Anova Table (Type 3 tests, LRT-method)
415 | ## ##
416 | ## ## Model: correct ~ stimulus + (stimulus || id) + (stimulus | item)
417 | ## ## Data: fhch2
418 | ## ## Df full model: 7
419 | ## ##     Effect df Chisq p.value
420 | ## ## 1 stimulus  1  1.19     .28
421 | ## ## ---
422 | ## ## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '+' 0.1 ' ' 1
423 | ## ## Warning messages: [...]
424 | ## emmeans(gm1, "stimulus", type = "response")
425 | ## ##  stimulus   prob       SE df asymp.LCL asymp.UCL
426 | ## ##  word     0.9907 0.002323 NA    0.9849    0.9943
427 | ## ##  nonword  0.9857 0.003351 NA    0.9774    0.9909
428 | ## ##
429 | ## ## Confidence level used: 0.95
430 | ## ## Intervals are back-transformed from the logit scale
431 | 
432 | ## ---- fig.width=5, fig.height=4------------------------------------------
433 | plot(m_max, 
434 |      resid(.,scaled=TRUE) ~ B_given_A | rel_cond)
435 | 
436 | ## ---- fig.width=4, fig.height=4------------------------------------------
437 | lattice::qqmath(m_max)
438 | 
439 | ## ---- eval=FALSE---------------------------------------------------------
440 | ## plot(m_max, p_id ~ resid(., scaled=TRUE) )
441 | ## plot(m_max, resid(., scaled=TRUE) ~ fitted(.) | rel_cond)
442 | ## ?plot.merMod
443 | 
444 | ## ---- eval=FALSE, include=FALSE------------------------------------------
445 | ## library("afex")
446 | ## load("ssk16_dat_tutorial.rda")
447 | 
448 | ## ---- eval=FALSE, include=FALSE------------------------------------------
449 | ## 
450 | ## m_full <- mixed(if_A_then_B_c ~ B_given_A_c*rel_cond +
451 | ##                        (rel_cond*B_given_A_c|p_id) +
452 | ##                        (rel_cond*B_given_A_c|i_id),
453 | ##                      dat,
454 | ##                      control = lmerControl(optCtrl = list(maxfun=1e8)),
455 | ##                      method = "S")
456 | ## 
457 | ## save(m_full, file = "fitted_lmms.rda", compress = "xz")
458 | 
459 | 


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-14-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-14-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-15-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-15-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-17-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-17-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-19-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-19-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-20-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-20-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-21-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-21-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-22-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-22-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-27-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-27-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-30-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-30-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-31-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-31-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-32-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-32-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-33-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-33-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-34-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-34-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-35-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-35-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-38-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-38-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-39-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-39-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-41-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-41-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-42-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-42-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-43-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-43-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-46-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-46-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-47-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-47-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-48-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-48-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-49-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-49-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-50-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-50-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/mixed_models_files/figure-html/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/my-theme.css:
--------------------------------------------------------------------------------
 1 | /* Two-column layout */
 2 | .pull-left2 {
 3 |   float: left;
 4 |   width: 67%;
 5 | }
 6 | .pull-right2 {
 7 |   float: right;
 8 |   width: 32%;
 9 | }
10 | .small-code .remark-code { 
11 |     font-size: 80%;
12 | }
13 | .small h1, h2, h3 {
14 |   margin-top: 15px;
15 |   margin-bottom: 5px;
16 | }
17 | 
18 | .small .remark-code { 
19 |     font-size: 80%;
20 | }
21 | 
22 | .inline-grey code {
23 |   background: #F5F5F5; /* lighter */
24 |   /* background: #e7e8e2; /* darker */
25 |   border-radius: 2px;
26 |   padding: 2px;
27 | } 
28 | 
29 | .pull-left3 {
30 |   float: left;
31 |   width: 43%;
32 | }
33 | .pull-right3 {
34 |   float: right;
35 |   width: 52%;
36 | }


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/random_effect_types.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/random_effect_types.png


--------------------------------------------------------------------------------
/part2-mixed-models-in-r/ssk16_dat_tutorial.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/singmann/mixed_model_workshop/cb1ba0d82579103e246402c25015bc99fa27fbeb/part2-mixed-models-in-r/ssk16_dat_tutorial.rda


--------------------------------------------------------------------------------