├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── README.md
├── data
    ├── HusainEtAlexpt1data.txt
    ├── cholesky_sim.Rda
    ├── factorialFit.Rda
    ├── fixEfFit.Rda
    ├── gibsonwu2012data.txt
    ├── gibsonwu2012datarepeat.txt
    ├── matrixFit.Rda
    ├── pp.Rda
    ├── ranIntFit.Rda
    └── ranIntSlpFit.Rda
├── doc
    ├── SorensenEtAl.Rnw
    ├── SorensenEtAl.bib
    └── SorensenEtAl.pdf
├── man
    └── BayesLMMTutorial-package.Rd
└── vignettes
    ├── BayesianLMMs.Rmd
    ├── BayesianLMMs.html
    ├── factorialDesign
        ├── factorialDesign.Rnw
        └── factorialDesign.pdf
    ├── factorialModel.stan
    ├── fixEf.stan
    ├── matrixModel.stan
    ├── pp.stan
    ├── ranInt.stan
    ├── ranIntSlp.stan
    └── ranIntSlpNoCor.stan


/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: BayesLMMTutorial
 2 | Type: Package
 3 | Title: Tutorial on fitting Bayesian Linear Mixed Models using Stan
 4 | Version: 1.0
 5 | Date: 2015-06-12
 6 | Authors@R: c(person("Shravan", "Vasishth", 
 7 |              role=c("aut","cre"),
 8 |              email="vasishth@uni-potsdam.de"))
 9 | Maintainer: Shravan Vasishth <vasishth@uni-potsdam.de>
10 | Description: This package provides the code accompanying the paper by Sorensen and Vasishth on fitting linear mixed models using Stan. We provide an example for a two-condition repeated measures design, a 2x2 repeated measures factorial design, and some other more complex designs.
11 | Depends: R (>= 3.1.0), rstan(>=2.6)
12 | Suggests: lme4(>= 1.1), knitr
13 | BugReports: https://github.com/vasishth/BayesLMMTutorial/issues
14 | VignetteBuilder: knitr
15 | License: MIT
16 | Encoding: UTF-8
17 | LazyData: yes


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Shravan Vasishth
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | exportPattern("^[[:alpha:]]+")
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # BayesLMMTutorial
2 | Tutorial files to accompany Sorensen, Hohenstein, and Vasishth paper: http://www.ling.uni-potsdam.de/~vasishth/statistics/BayesLMMs.html
3 | 
4 | This paper is in press with the journal [Quantitative Methods for Psychology](http://www.tqmp.org/).
5 | 
6 | Try out the code in the Rmd file under vignettes to run the code in the paper.
7 | 
8 | For queries about this github package please contact Shravan Vasishth.


--------------------------------------------------------------------------------
/data/cholesky_sim.Rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasishth/BayesLMMTutorial/cccf977025847a83b5ae651e914ad95c8e53cdee/data/cholesky_sim.Rda


--------------------------------------------------------------------------------
/data/factorialFit.Rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasishth/BayesLMMTutorial/cccf977025847a83b5ae651e914ad95c8e53cdee/data/factorialFit.Rda


--------------------------------------------------------------------------------
/data/fixEfFit.Rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasishth/BayesLMMTutorial/cccf977025847a83b5ae651e914ad95c8e53cdee/data/fixEfFit.Rda


--------------------------------------------------------------------------------
/data/gibsonwu2012datarepeat.txt:
--------------------------------------------------------------------------------
  1 | "subj" "item" "condition" "pos" "rt" "region"
  2 | "9" "1m1" 15 "obj-ext" "8" 832 "head noun"
  3 | "20" "1m1" 8 "subj-ext" "8" 2131 "head noun"
  4 | "33" "1m1" 11 "obj-ext" "8" 553 "head noun"
  5 | "46" "1m1" 10 "subj-ext" "8" 1091 "head noun"
  6 | "62" "1m1" 16 "subj-ext" "8" 598 "head noun"
  7 | "75" "1m1" 14 "subj-ext" "8" 645 "head noun"
  8 | "88" "1m1" 3 "obj-ext" "8" 445 "head noun"
  9 | "104" "1m1" 1 "obj-ext" "8" 574 "head noun"
 10 | "119" "1m1" 9 "obj-ext" "8" 542 "head noun"
 11 | "135" "1m1" 4 "subj-ext" "8" 455 "head noun"
 12 | "152" "1m1" 13 "obj-ext" "8" 569 "head noun"
 13 | "165" "1m1" 2 "subj-ext" "8" 554 "head noun"
 14 | "180" "1m1" 5 "obj-ext" "8" 515 "head noun"
 15 | "192" "1m1" 7 "obj-ext" "8" 346 "head noun"
 16 | "204" "1m1" 6 "subj-ext" "8" 457 "head noun"
 17 | "216" "1m10" 4 "obj-ext" "8" 426 "head noun"
 18 | "233" "1m10" 6 "obj-ext" "8" 262 "head noun"
 19 | "254" "1m10" 13 "subj-ext" "8" 314 "head noun"
 20 | "267" "1m10" 2 "obj-ext" "8" 435 "head noun"
 21 | "285" "1m10" 11 "subj-ext" "8" 185 "head noun"
 22 | "298" "1m10" 7 "subj-ext" "8" 253 "head noun"
 23 | "310" "1m10" 9 "subj-ext" "8" 236 "head noun"
 24 | "326" "1m10" 8 "obj-ext" "8" 215 "head noun"
 25 | "339" "1m10" 1 "subj-ext" "8" 199 "head noun"
 26 | "354" "1m10" 5 "subj-ext" "8" 219 "head noun"
 27 | "366" "1m10" 10 "obj-ext" "8" 319 "head noun"
 28 | "382" "1m10" 15 "subj-ext" "8" 230 "head noun"
 29 | "393" "1m10" 14 "obj-ext" "8" 204 "head noun"
 30 | "406" "1m11" 11 "obj-ext" "8" 292 "head noun"
 31 | "419" "1m11" 4 "subj-ext" "8" 889 "head noun"
 32 | "436" "1m11" 8 "subj-ext" "8" 440 "head noun"
 33 | "449" "1m11" 3 "obj-ext" "8" 297 "head noun"
 34 | "465" "1m11" 5 "obj-ext" "8" 377 "head noun"
 35 | "477" "1m11" 15 "obj-ext" "8" 317 "head noun"
 36 | "488" "1m11" 13 "obj-ext" "8" 325 "head noun"
 37 | "501" "1m11" 7 "obj-ext" "8" 346 "head noun"
 38 | "513" "1m11" 9 "obj-ext" "8" 266 "head noun"
 39 | "529" "1m11" 1 "obj-ext" "8" 256 "head noun"
 40 | "544" "1m11" 10 "subj-ext" "8" 991 "head noun"
 41 | "560" "1m11" 6 "subj-ext" "8" 346 "head noun"
 42 | "572" "1m11" 14 "subj-ext" "8" 323 "head noun"
 43 | "585" "1m11" 16 "subj-ext" "8" 450 "head noun"
 44 | "598" "1m11" 2 "subj-ext" "8" 230 "head noun"
 45 | "613" "1m12" 6 "obj-ext" "8" 393 "head noun"
 46 | "625" "1m12" 16 "obj-ext" "8" 307 "head noun"
 47 | "638" "1m12" 11 "subj-ext" "8" 457 "head noun"
 48 | "651" "1m12" 14 "obj-ext" "8" 334 "head noun"
 49 | "664" "1m12" 13 "subj-ext" "8" 384 "head noun"
 50 | "677" "1m12" 1 "subj-ext" "8" 340 "head noun"
 51 | "692" "1m12" 2 "obj-ext" "8" 385 "head noun"
 52 | "707" "1m12" 10 "obj-ext" "8" 444 "head noun"
 53 | "723" "1m12" 8 "obj-ext" "8" 1678 "head noun"
 54 | "736" "1m12" 9 "subj-ext" "8" 459 "head noun"
 55 | "752" "1m12" 15 "subj-ext" "8" 708 "head noun"
 56 | "763" "1m12" 5 "subj-ext" "8" 455 "head noun"
 57 | "775" "1m12" 3 "subj-ext" "8" 601 "head noun"
 58 | "791" "1m12" 4 "obj-ext" "8" 355 "head noun"
 59 | "808" "1m12" 7 "subj-ext" "8" 373 "head noun"
 60 | "820" "1m13" 13 "obj-ext" "8" 561 "head noun"
 61 | "833" "1m13" 5 "obj-ext" "8" 641 "head noun"
 62 | "845" "1m13" 3 "obj-ext" "8" 641 "head noun"
 63 | "861" "1m13" 1 "obj-ext" "8" 588 "head noun"
 64 | "876" "1m13" 15 "obj-ext" "8" 518 "head noun"
 65 | "887" "1m13" 10 "subj-ext" "8" 505 "head noun"
 66 | "903" "1m13" 6 "subj-ext" "8" 471 "head noun"
 67 | "915" "1m13" 9 "obj-ext" "8" 380 "head noun"
 68 | "931" "1m13" 14 "subj-ext" "8" 388 "head noun"
 69 | "944" "1m13" 2 "subj-ext" "8" 395 "head noun"
 70 | "959" "1m13" 7 "obj-ext" "8" 488 "head noun"
 71 | "971" "1m13" 4 "subj-ext" "8" 379 "head noun"
 72 | "988" "1m13" 11 "obj-ext" "8" 381 "head noun"
 73 | "1001" "1m13" 16 "subj-ext" "8" 444 "head noun"
 74 | "1014" "1m13" 8 "subj-ext" "8" 405 "head noun"
 75 | "1027" "1m14" 11 "subj-ext" "8" 369 "head noun"
 76 | "1040" "1m14" 3 "subj-ext" "8" 384 "head noun"
 77 | "1056" "1m14" 6 "obj-ext" "8" 402 "head noun"
 78 | "1068" "1m14" 16 "obj-ext" "8" 1689 "head noun"
 79 | "1081" "1m14" 1 "subj-ext" "8" 316 "head noun"
 80 | "1096" "1m14" 9 "subj-ext" "8" 342 "head noun"
 81 | "1112" "1m14" 5 "subj-ext" "8" 396 "head noun"
 82 | "1124" "1m14" 15 "subj-ext" "8" 325 "head noun"
 83 | "1135" "1m14" 2 "obj-ext" "8" 308 "head noun"
 84 | "1150" "1m14" 13 "subj-ext" "8" 300 "head noun"
 85 | "1163" "1m14" 4 "obj-ext" "8" 306 "head noun"
 86 | "1180" "1m14" 8 "obj-ext" "8" 325 "head noun"
 87 | "1193" "1m14" 7 "subj-ext" "8" 350 "head noun"
 88 | "1205" "1m14" 10 "obj-ext" "8" 411 "head noun"
 89 | "1221" "1m14" 14 "obj-ext" "8" 301 "head noun"
 90 | "1234" "1m15" 2 "subj-ext" "8" 251 "head noun"
 91 | "1249" "1m15" 5 "obj-ext" "8" 584 "head noun"
 92 | "1261" "1m15" 6 "subj-ext" "8" 413 "head noun"
 93 | "1273" "1m15" 8 "subj-ext" "8" 1905 "head noun"
 94 | "1286" "1m15" 3 "obj-ext" "8" 636 "head noun"
 95 | "1302" "1m15" 15 "obj-ext" "8" 407 "head noun"
 96 | "1313" "1m15" 4 "subj-ext" "8" 199 "head noun"
 97 | "1330" "1m15" 7 "obj-ext" "8" 302 "head noun"
 98 | "1342" "1m15" 11 "obj-ext" "8" 231 "head noun"
 99 | "1355" "1m15" 10 "subj-ext" "8" 243 "head noun"
100 | "1371" "1m15" 13 "obj-ext" "8" 299 "head noun"
101 | "1384" "1m15" 16 "subj-ext" "8" 248 "head noun"
102 | "1397" "1m15" 9 "obj-ext" "8" 221 "head noun"
103 | "1413" "1m15" 14 "subj-ext" "8" 458 "head noun"
104 | "1426" "1m15" 1 "obj-ext" "8" 274 "head noun"
105 | "1441" "1m16" 11 "subj-ext" "8" 416 "head noun"
106 | "1454" "1m16" 5 "subj-ext" "8" 319 "head noun"
107 | "1466" "1m16" 2 "obj-ext" "8" 253 "head noun"
108 | "1484" "1m16" 15 "subj-ext" "8" 228 "head noun"
109 | "1495" "1m16" 6 "obj-ext" "8" 202 "head noun"
110 | "1507" "1m16" 4 "obj-ext" "8" 217 "head noun"
111 | "1524" "1m16" 16 "obj-ext" "8" 233 "head noun"
112 | "1537" "1m16" 13 "subj-ext" "8" 1149 "head noun"
113 | "1550" "1m16" 14 "obj-ext" "8" 258 "head noun"
114 | "1563" "1m16" 3 "subj-ext" "8" 340 "head noun"
115 | "1579" "1m16" 9 "subj-ext" "8" 234 "head noun"
116 | "1595" "1m16" 8 "obj-ext" "8" 227 "head noun"
117 | "1608" "1m16" 1 "subj-ext" "8" 238 "head noun"
118 | "1623" "1m16" 7 "subj-ext" "8" 243 "head noun"
119 | "1635" "1m17" 10 "subj-ext" "8" 360 "head noun"
120 | "1651" "1m17" 13 "obj-ext" "8" 329 "head noun"
121 | "1664" "1m17" 1 "obj-ext" "8" 344 "head noun"
122 | "1679" "1m17" 14 "subj-ext" "8" 391 "head noun"
123 | "1692" "1m17" 2 "subj-ext" "8" 281 "head noun"
124 | "1707" "1m17" 16 "subj-ext" "8" 953 "head noun"
125 | "1720" "1m17" 8 "subj-ext" "8" 406 "head noun"
126 | "1733" "1m17" 3 "obj-ext" "8" 172 "head noun"
127 | "1749" "1m17" 6 "subj-ext" "8" 10594 "head noun"
128 | "1761" "1m17" 11 "obj-ext" "8" 219 "head noun"
129 | "1774" "1m17" 5 "obj-ext" "8" 391 "head noun"
130 | "1786" "1m17" 4 "subj-ext" "8" 219 "head noun"
131 | "1803" "1m17" 7 "obj-ext" "8" 265 "head noun"
132 | "1815" "1m17" 9 "obj-ext" "8" 281 "head noun"
133 | "1832" "1m18" 15 "subj-ext" "8" 344 "head noun"
134 | "1843" "1m18" 8 "obj-ext" "8" 1766 "head noun"
135 | "1856" "1m18" 13 "subj-ext" "8" 9688 "head noun"
136 | "1869" "1m18" 14 "obj-ext" "8" 734 "head noun"
137 | "1882" "1m18" 3 "subj-ext" "8" 796 "head noun"
138 | "1898" "1m18" 9 "subj-ext" "8" 328 "head noun"
139 | "1914" "1m18" 10 "obj-ext" "8" 422 "head noun"
140 | "1930" "1m18" 1 "subj-ext" "8" 344 "head noun"
141 | "1945" "1m18" 7 "subj-ext" "8" 422 "head noun"
142 | "1957" "1m18" 11 "subj-ext" "8" 562 "head noun"
143 | "1970" "1m18" 16 "obj-ext" "8" 313 "head noun"
144 | "1983" "1m18" 4 "obj-ext" "8" 390 "head noun"
145 | "2000" "1m18" 2 "obj-ext" "8" 531 "head noun"
146 | "2015" "1m18" 5 "subj-ext" "8" 406 "head noun"
147 | "2027" "1m18" 6 "obj-ext" "8" 344 "head noun"
148 | "2039" "1m2" 6 "obj-ext" "8" 554 "head noun"
149 | "2051" "1m2" 11 "subj-ext" "8" 336 "head noun"
150 | "2064" "1m2" 5 "subj-ext" "8" 484 "head noun"
151 | "2076" "1m2" 1 "subj-ext" "8" 642 "head noun"
152 | "2091" "1m2" 16 "obj-ext" "8" 370 "head noun"
153 | "2104" "1m2" 9 "subj-ext" "8" 516 "head noun"
154 | "2120" "1m2" 15 "subj-ext" "8" 327 "head noun"
155 | "2131" "1m2" 10 "obj-ext" "8" 414 "head noun"
156 | "2147" "1m2" 13 "subj-ext" "8" 707 "head noun"
157 | "2160" "1m2" 2 "obj-ext" "8" 362 "head noun"
158 | "2175" "1m2" 14 "obj-ext" "8" 330 "head noun"
159 | "2188" "1m2" 8 "obj-ext" "8" 376 "head noun"
160 | "2201" "1m2" 3 "subj-ext" "8" 423 "head noun"
161 | "2217" "1m2" 7 "subj-ext" "8" 367 "head noun"
162 | "2229" "1m2" 4 "obj-ext" "8" 430 "head noun"
163 | "2246" "1m3" 6 "subj-ext" "8" 611 "head noun"
164 | "2258" "1m3" 2 "subj-ext" "8" 657 "head noun"
165 | "2273" "1m3" 1 "obj-ext" "8" 563 "head noun"
166 | "2288" "1m3" 16 "subj-ext" "8" 953 "head noun"
167 | "2301" "1m3" 4 "subj-ext" "8" 878 "head noun"
168 | "2318" "1m3" 8 "subj-ext" "8" 865 "head noun"
169 | "2331" "1m3" 3 "obj-ext" "8" 489 "head noun"
170 | "2347" "1m3" 13 "obj-ext" "8" 449 "head noun"
171 | "2360" "1m3" 5 "obj-ext" "8" 430 "head noun"
172 | "2372" "1m3" 9 "obj-ext" "8" 651 "head noun"
173 | "2388" "1m3" 10 "subj-ext" "8" 467 "head noun"
174 | "2404" "1m3" 14 "subj-ext" "8" 455 "head noun"
175 | "2417" "1m3" 7 "obj-ext" "8" 348 "head noun"
176 | "2429" "1m3" 15 "obj-ext" "8" 373 "head noun"
177 | "2440" "1m3" 11 "obj-ext" "8" 350 "head noun"
178 | "2453" "1m4" 1 "subj-ext" "8" 319 "head noun"
179 | "2468" "1m4" 13 "subj-ext" "8" 389 "head noun"
180 | "2481" "1m4" 16 "obj-ext" "8" 315 "head noun"
181 | "2494" "1m4" 4 "obj-ext" "8" 345 "head noun"
182 | "2511" "1m4" 11 "subj-ext" "8" 340 "head noun"
183 | "2524" "1m4" 14 "obj-ext" "8" 332 "head noun"
184 | "2537" "1m4" 10 "obj-ext" "8" 731 "head noun"
185 | "2553" "1m4" 7 "subj-ext" "8" 382 "head noun"
186 | "2565" "1m4" 6 "obj-ext" "8" 319 "head noun"
187 | "2577" "1m4" 15 "subj-ext" "8" 521 "head noun"
188 | "2588" "1m4" 5 "subj-ext" "8" 311 "head noun"
189 | "2600" "1m4" 3 "subj-ext" "8" 307 "head noun"
190 | "2616" "1m4" 2 "obj-ext" "8" 357 "head noun"
191 | "2631" "1m4" 9 "subj-ext" "8" 400 "head noun"
192 | "2647" "1m4" 8 "obj-ext" "8" 323 "head noun"
193 | "2660" "1m5" 5 "obj-ext" "8" 573 "head noun"
194 | "2672" "1m5" 13 "obj-ext" "8" 541 "head noun"
195 | "2685" "1m5" 14 "subj-ext" "8" 622 "head noun"
196 | "2698" "1m5" 6 "subj-ext" "8" 332 "head noun"
197 | "2710" "1m5" 4 "subj-ext" "8" 1529 "head noun"
198 | "2727" "1m5" 3 "obj-ext" "8" 385 "head noun"
199 | "2743" "1m5" 8 "subj-ext" "8" 1025 "head noun"
200 | "2756" "1m5" 7 "obj-ext" "8" 318 "head noun"
201 | "2768" "1m5" 9 "obj-ext" "8" 392 "head noun"
202 | "2784" "1m5" 10 "subj-ext" "8" 555 "head noun"
203 | "2800" "1m5" 16 "subj-ext" "8" 403 "head noun"
204 | "2813" "1m5" 1 "obj-ext" "8" 374 "head noun"
205 | "2828" "1m5" 11 "obj-ext" "8" 366 "head noun"
206 | "2841" "1m5" 2 "subj-ext" "8" 619 "head noun"
207 | "2856" "1m5" 15 "obj-ext" "8" 333 "head noun"
208 | "2867" "1m6" 13 "subj-ext" "8" 798 "head noun"
209 | "2880" "1m6" 14 "obj-ext" "8" 301 "head noun"
210 | "2893" "1m6" 11 "subj-ext" "8" 314 "head noun"
211 | "2906" "1m6" 10 "obj-ext" "8" 489 "head noun"
212 | "2922" "1m6" 8 "obj-ext" "8" 313 "head noun"
213 | "2935" "1m6" 6 "obj-ext" "8" 444 "head noun"
214 | "2947" "1m6" 4 "obj-ext" "8" 325 "head noun"
215 | "2964" "1m6" 7 "subj-ext" "8" 291 "head noun"
216 | "2976" "1m6" 3 "subj-ext" "8" 788 "head noun"
217 | "2992" "1m6" 5 "subj-ext" "8" 314 "head noun"
218 | "3004" "1m6" 2 "obj-ext" "8" 279 "head noun"
219 | "3019" "1m6" 9 "subj-ext" "8" 367 "head noun"
220 | "3035" "1m6" 15 "subj-ext" "8" 655 "head noun"
221 | "3046" "1m6" 1 "subj-ext" "8" 635 "head noun"
222 | "3061" "1m6" 16 "obj-ext" "8" 309 "head noun"
223 | "3074" "1m7" 16 "subj-ext" "8" 406 "head noun"
224 | "3087" "1m7" 15 "obj-ext" "8" 385 "head noun"
225 | "3098" "1m7" 10 "subj-ext" "8" 382 "head noun"
226 | "3114" "1m7" 2 "subj-ext" "8" 360 "head noun"
227 | "3129" "1m7" 8 "subj-ext" "8" 309 "head noun"
228 | "3142" "1m7" 1 "obj-ext" "8" 291 "head noun"
229 | "3157" "1m7" 13 "obj-ext" "8" 267 "head noun"
230 | "3170" "1m7" 9 "obj-ext" "8" 291 "head noun"
231 | "3186" "1m7" 3 "obj-ext" "8" 282 "head noun"
232 | "3202" "1m7" 7 "obj-ext" "8" 311 "head noun"
233 | "3214" "1m7" 5 "obj-ext" "8" 288 "head noun"
234 | "3226" "1m7" 14 "subj-ext" "8" 312 "head noun"
235 | "3239" "1m7" 6 "subj-ext" "8" 318 "head noun"
236 | "3251" "1m7" 4 "subj-ext" "8" 265 "head noun"
237 | "3268" "1m7" 11 "obj-ext" "8" 285 "head noun"
238 | "3281" "1m8" 15 "subj-ext" "8" 334 "head noun"
239 | "3292" "1m8" 6 "obj-ext" "8" 249 "head noun"
240 | "3304" "1m8" 9 "subj-ext" "8" 336 "head noun"
241 | "3320" "1m8" 1 "subj-ext" "8" 312 "head noun"
242 | "3335" "1m8" 16 "obj-ext" "8" 320 "head noun"
243 | "3348" "1m8" 13 "subj-ext" "8" 308 "head noun"
244 | "3361" "1m8" 7 "subj-ext" "8" 264 "head noun"
245 | "3373" "1m8" 11 "subj-ext" "8" 418 "head noun"
246 | "3386" "1m8" 3 "subj-ext" "8" 321 "head noun"
247 | "3402" "1m8" 14 "obj-ext" "8" 270 "head noun"
248 | "3415" "1m8" 10 "obj-ext" "8" 663 "head noun"
249 | "3431" "1m8" 2 "obj-ext" "8" 238 "head noun"
250 | "3446" "1m8" 5 "subj-ext" "8" 244 "head noun"
251 | "3458" "1m8" 8 "obj-ext" "8" 485 "head noun"
252 | "3471" "1m8" 4 "obj-ext" "8" 281 "head noun"
253 | "3488" "1m9" 11 "obj-ext" "8" 621 "head noun"
254 | "3501" "1m9" 16 "subj-ext" "8" 909 "head noun"
255 | "3514" "1m9" 4 "subj-ext" "8" 374 "head noun"
256 | "3531" "1m9" 10 "subj-ext" "8" 578 "head noun"
257 | "3547" "1m9" 15 "obj-ext" "8" 458 "head noun"
258 | "3558" "1m9" 5 "obj-ext" "8" 1072 "head noun"
259 | "3570" "1m9" 8 "subj-ext" "8" 474 "head noun"
260 | "3583" "1m9" 2 "subj-ext" "8" 390 "head noun"
261 | "3598" "1m9" 14 "subj-ext" "8" 424 "head noun"
262 | "3611" "1m9" 7 "obj-ext" "8" 469 "head noun"
263 | "3623" "1m9" 1 "obj-ext" "8" 353 "head noun"
264 | "3638" "1m9" 6 "subj-ext" "8" 813 "head noun"
265 | "3650" "1m9" 9 "obj-ext" "8" 333 "head noun"
266 | "3666" "1m9" 13 "obj-ext" "8" 386 "head noun"
267 | "3679" "1m9" 3 "obj-ext" "8" 382 "head noun"
268 | "3695" "2m1" 6 "subj-ext" "8" 307 "head noun"
269 | "3707" "2m1" 4 "subj-ext" "8" 273 "head noun"
270 | "3724" "2m1" 5 "obj-ext" "8" 269 "head noun"
271 | "3736" "2m1" 15 "obj-ext" "8" 297 "head noun"
272 | "3747" "2m1" 16 "subj-ext" "8" 334 "head noun"
273 | "3760" "2m1" 2 "subj-ext" "8" 244 "head noun"
274 | "3775" "2m1" 13 "obj-ext" "8" 346 "head noun"
275 | "3788" "2m1" 1 "obj-ext" "8" 337 "head noun"
276 | "3803" "2m1" 9 "obj-ext" "8" 352 "head noun"
277 | "3819" "2m1" 10 "subj-ext" "8" 352 "head noun"
278 | "3835" "2m1" 7 "obj-ext" "8" 241 "head noun"
279 | "3847" "2m1" 8 "subj-ext" "8" 398 "head noun"
280 | "3860" "2m1" 11 "obj-ext" "8" 346 "head noun"
281 | "3873" "2m1" 14 "subj-ext" "8" 371 "head noun"
282 | "3886" "2m1" 3 "obj-ext" "8" 300 "head noun"
283 | "3902" "2m10" 14 "obj-ext" "8" 888 "head noun"
284 | "3915" "2m10" 6 "obj-ext" "8" 866 "head noun"
285 | "3927" "2m10" 8 "obj-ext" "8" 644 "head noun"
286 | "3940" "2m10" 7 "subj-ext" "8" 681 "head noun"
287 | "3952" "2m10" 15 "subj-ext" "8" 386 "head noun"
288 | "3963" "2m10" 1 "subj-ext" "8" 558 "head noun"
289 | "3978" "2m10" 5 "subj-ext" "8" 340 "head noun"
290 | "3990" "2m10" 10 "obj-ext" "8" 509 "head noun"
291 | "4006" "2m10" 4 "obj-ext" "8" 309 "head noun"
292 | "4023" "2m10" 13 "subj-ext" "8" 484 "head noun"
293 | "4036" "2m10" 16 "obj-ext" "8" 361 "head noun"
294 | "4049" "2m10" 11 "subj-ext" "8" 238 "head noun"
295 | "4062" "2m10" 9 "subj-ext" "8" 386 "head noun"
296 | "4078" "2m10" 3 "subj-ext" "8" 392 "head noun"
297 | "4094" "2m10" 2 "obj-ext" "8" 623 "head noun"
298 | "4109" "2m11" 15 "obj-ext" "8" 469 "head noun"
299 | "4120" "2m11" 13 "obj-ext" "8" 306 "head noun"
300 | "4133" "2m11" 11 "obj-ext" "8" 463 "head noun"
301 | "4146" "2m11" 4 "subj-ext" "8" 247 "head noun"
302 | "4163" "2m11" 5 "obj-ext" "8" 3826 "head noun"
303 | "4175" "2m11" 1 "obj-ext" "8" 321 "head noun"
304 | "4190" "2m11" 6 "subj-ext" "8" 269 "head noun"
305 | "4202" "2m11" 7 "obj-ext" "8" 854 "head noun"
306 | "4214" "2m11" 3 "obj-ext" "8" 564 "head noun"
307 | "4230" "2m11" 8 "subj-ext" "8" 327 "head noun"
308 | "4243" "2m11" 10 "subj-ext" "8" 479 "head noun"
309 | "4259" "2m11" 16 "subj-ext" "8" 272 "head noun"
310 | "4272" "2m11" 14 "subj-ext" "8" 413 "head noun"
311 | "4285" "2m11" 2 "subj-ext" "8" 235 "head noun"
312 | "4300" "2m11" 9 "obj-ext" "8" 275 "head noun"
313 | "4316" "2m12" 13 "subj-ext" "8" 2590 "head noun"
314 | "4329" "2m12" 5 "subj-ext" "8" 482 "head noun"
315 | "4341" "2m12" 2 "obj-ext" "8" 321 "head noun"
316 | "4356" "2m12" 16 "obj-ext" "8" 238 "head noun"
317 | "4369" "2m12" 11 "subj-ext" "8" 414 "head noun"
318 | "4382" "2m12" 6 "obj-ext" "8" 256 "head noun"
319 | "4395" "2m12" 1 "subj-ext" "8" 297 "head noun"
320 | "4410" "2m12" 9 "subj-ext" "8" 253 "head noun"
321 | "4426" "2m12" 7 "subj-ext" "8" 297 "head noun"
322 | "4438" "2m12" 14 "obj-ext" "8" 226 "head noun"
323 | "4451" "2m12" 15 "subj-ext" "8" 238 "head noun"
324 | "4462" "2m12" 4 "obj-ext" "8" 229 "head noun"
325 | "4479" "2m12" 8 "obj-ext" "8" 294 "head noun"
326 | "4492" "2m12" 3 "subj-ext" "8" 303 "head noun"
327 | "4508" "2m13" 9 "obj-ext" "8" 869 "head noun"
328 | "4524" "2m13" 10 "subj-ext" "8" 632 "head noun"
329 | "4540" "2m13" 5 "obj-ext" "8" 1860 "head noun"
330 | "4552" "2m13" 7 "obj-ext" "8" 644 "head noun"
331 | "4564" "2m13" 15 "obj-ext" "8" 844 "head noun"
332 | "4575" "2m13" 16 "subj-ext" "8" 568 "head noun"
333 | "4588" "2m13" 2 "subj-ext" "8" 315 "head noun"
334 | "4603" "2m13" 6 "subj-ext" "8" 494 "head noun"
335 | "4615" "2m13" 14 "subj-ext" "8" 700 "head noun"
336 | "4628" "2m13" 3 "obj-ext" "8" 1617 "head noun"
337 | "4644" "2m13" 4 "subj-ext" "8" 417 "head noun"
338 | "4661" "2m13" 1 "obj-ext" "8" 589 "head noun"
339 | "4676" "2m13" 13 "obj-ext" "8" 524 "head noun"
340 | "4689" "2m13" 11 "obj-ext" "8" 454 "head noun"
341 | "4702" "2m13" 8 "subj-ext" "8" 3826 "head noun"
342 | "4715" "2m14" 1 "subj-ext" "8" 275 "head noun"
343 | "4730" "2m14" 3 "subj-ext" "8" 389 "head noun"
344 | "4746" "2m14" 15 "subj-ext" "8" 213 "head noun"
345 | "4757" "2m14" 6 "obj-ext" "8" 272 "head noun"
346 | "4769" "2m14" 11 "subj-ext" "8" 284 "head noun"
347 | "4782" "2m14" 5 "subj-ext" "8" 244 "head noun"
348 | "4794" "2m14" 7 "subj-ext" "8" 285 "head noun"
349 | "4806" "2m14" 16 "obj-ext" "8" 296 "head noun"
350 | "4819" "2m14" 2 "obj-ext" "8" 325 "head noun"
351 | "4834" "2m14" 10 "obj-ext" "8" 297 "head noun"
352 | "4850" "2m14" 13 "subj-ext" "8" 253 "head noun"
353 | "4863" "2m14" 4 "obj-ext" "8" 263 "head noun"
354 | "4880" "2m14" 14 "obj-ext" "8" 266 "head noun"
355 | "4893" "2m14" 8 "obj-ext" "8" 226 "head noun"
356 | "4906" "2m14" 9 "subj-ext" "8" 235 "head noun"
357 | "4922" "2m15" 9 "obj-ext" "8" 389 "head noun"
358 | "4938" "2m15" 15 "obj-ext" "8" 417 "head noun"
359 | "4949" "2m15" 11 "obj-ext" "8" 343 "head noun"
360 | "4962" "2m15" 13 "obj-ext" "8" 306 "head noun"
361 | "4975" "2m15" 8 "subj-ext" "8" 1171 "head noun"
362 | "4988" "2m15" 6 "subj-ext" "8" 269 "head noun"
363 | "5000" "2m15" 2 "subj-ext" "8" 322 "head noun"
364 | "5015" "2m15" 10 "subj-ext" "8" 284 "head noun"
365 | "5031" "2m15" 7 "obj-ext" "8" 318 "head noun"
366 | "5043" "2m15" 5 "obj-ext" "8" 294 "head noun"
367 | "5055" "2m15" 1 "obj-ext" "8" 284 "head noun"
368 | "5070" "2m15" 4 "subj-ext" "8" 414 "head noun"
369 | "5087" "2m15" 3 "obj-ext" "8" 367 "head noun"
370 | "5103" "2m15" 14 "subj-ext" "8" 275 "head noun"
371 | "5116" "2m15" 16 "subj-ext" "8" 272 "head noun"
372 | "5129" "2m16" 4 "obj-ext" "8" 445 "head noun"
373 | "5146" "2m16" 14 "obj-ext" "8" 482 "head noun"
374 | "5159" "2m16" 5 "subj-ext" "8" 497 "head noun"
375 | "5171" "2m16" 10 "obj-ext" "8" 380 "head noun"
376 | "5187" "2m16" 13 "subj-ext" "8" 417 "head noun"
377 | "5200" "2m16" 3 "subj-ext" "8" 466 "head noun"
378 | "5216" "2m16" 8 "obj-ext" "8" 485 "head noun"
379 | "5229" "2m16" 2 "obj-ext" "8" 319 "head noun"
380 | "5244" "2m16" 11 "subj-ext" "8" 306 "head noun"
381 | "5257" "2m16" 16 "obj-ext" "8" 285 "head noun"
382 | "5270" "2m16" 6 "obj-ext" "8" 275 "head noun"
383 | "5282" "2m16" 15 "subj-ext" "8" 337 "head noun"
384 | "5293" "2m16" 1 "subj-ext" "8" 266 "head noun"
385 | "5308" "2m16" 7 "subj-ext" "8" 318 "head noun"
386 | "5320" "2m16" 9 "subj-ext" "8" 303 "head noun"
387 | "5336" "2m17" 4 "subj-ext" "8" 518 "head noun"
388 | "5353" "2m17" 8 "subj-ext" "8" 469 "head noun"
389 | "5366" "2m17" 1 "obj-ext" "8" 368 "head noun"
390 | "5381" "2m17" 16 "subj-ext" "8" 281 "head noun"
391 | "5394" "2m17" 11 "obj-ext" "8" 216 "head noun"
392 | "5407" "2m17" 3 "obj-ext" "8" 232 "head noun"
393 | "5423" "2m17" 9 "obj-ext" "8" 263 "head noun"
394 | "5439" "2m17" 10 "subj-ext" "8" 282 "head noun"
395 | "5455" "2m17" 7 "obj-ext" "8" 248 "head noun"
396 | "5467" "2m17" 5 "obj-ext" "8" 260 "head noun"
397 | "5479" "2m17" 14 "subj-ext" "8" 272 "head noun"
398 | "5492" "2m17" 13 "obj-ext" "8" 251 "head noun"
399 | "5505" "2m17" 6 "subj-ext" "8" 260 "head noun"
400 | "5517" "2m17" 2 "subj-ext" "8" 288 "head noun"
401 | "5532" "2m17" 15 "obj-ext" "8" 244 "head noun"
402 | "5543" "2m18" 4 "obj-ext" "8" 309 "head noun"
403 | "5560" "2m18" 9 "subj-ext" "8" 420 "head noun"
404 | "5576" "2m18" 5 "subj-ext" "8" 457 "head noun"
405 | "5588" "2m18" 15 "subj-ext" "8" 284 "head noun"
406 | "5599" "2m18" 6 "obj-ext" "8" 337 "head noun"
407 | "5611" "2m18" 8 "obj-ext" "8" 1014 "head noun"
408 | "5624" "2m18" 16 "obj-ext" "8" 395 "head noun"
409 | "5637" "2m18" 11 "subj-ext" "8" 722 "head noun"
410 | "5650" "2m18" 14 "obj-ext" "8" 435 "head noun"
411 | "5663" "2m18" 7 "subj-ext" "8" 374 "head noun"
412 | "5675" "2m18" 13 "subj-ext" "8" 469 "head noun"
413 | "5688" "2m18" 10 "obj-ext" "8" 266 "head noun"
414 | "5704" "2m18" 3 "subj-ext" "8" 235 "head noun"
415 | "5720" "2m18" 2 "obj-ext" "8" 294 "head noun"
416 | "5735" "2m18" 1 "subj-ext" "8" 435 "head noun"
417 | "5750" "2m19" 3 "obj-ext" "8" 460 "head noun"
418 | "5766" "2m19" 16 "subj-ext" "8" 866 "head noun"
419 | "5779" "2m19" 5 "obj-ext" "8" 512 "head noun"
420 | "5791" "2m19" 11 "obj-ext" "8" 423 "head noun"
421 | "5804" "2m19" 10 "subj-ext" "8" 2604 "head noun"
422 | "5820" "2m19" 9 "obj-ext" "8" 1417 "head noun"
423 | "5836" "2m19" 8 "subj-ext" "8" 1961 "head noun"
424 | "5849" "2m19" 6 "subj-ext" "8" 383 "head noun"
425 | "5861" "2m19" 4 "subj-ext" "8" 475 "head noun"
426 | "5878" "2m19" 15 "obj-ext" "8" 537 "head noun"
427 | "5889" "2m19" 7 "obj-ext" "8" 278 "head noun"
428 | "5901" "2m19" 2 "subj-ext" "8" 278 "head noun"
429 | "5916" "2m19" 1 "obj-ext" "8" 321 "head noun"
430 | "5931" "2m19" 14 "subj-ext" "8" 672 "head noun"
431 | "5944" "2m19" 13 "obj-ext" "8" 417 "head noun"
432 | "5957" "2m2" 5 "subj-ext" "8" 464 "head noun"
433 | "5969" "2m2" 15 "subj-ext" "8" 313 "head noun"
434 | "5980" "2m2" 11 "subj-ext" "8" 517 "head noun"
435 | "5993" "2m2" 6 "obj-ext" "8" 483 "head noun"
436 | "6005" "2m2" 10 "obj-ext" "8" 889 "head noun"
437 | "6021" "2m2" 16 "obj-ext" "8" 514 "head noun"
438 | "6034" "2m2" 2 "obj-ext" "8" 359 "head noun"
439 | "6049" "2m2" 3 "subj-ext" "8" 436 "head noun"
440 | "6065" "2m2" 13 "subj-ext" "8" 412 "head noun"
441 | "6078" "2m2" 8 "obj-ext" "8" 818 "head noun"
442 | "6091" "2m2" 9 "subj-ext" "8" 504 "head noun"
443 | "6107" "2m2" 1 "subj-ext" "8" 424 "head noun"
444 | "6122" "2m2" 7 "subj-ext" "8" 387 "head noun"
445 | "6134" "2m2" 14 "obj-ext" "8" 528 "head noun"
446 | "6147" "2m2" 4 "obj-ext" "8" 365 "head noun"
447 | "6164" "2m20" 11 "subj-ext" "8" 705 "head noun"
448 | "6177" "2m20" 16 "obj-ext" "8" 469 "head noun"
449 | "6190" "2m20" 8 "obj-ext" "8" 567 "head noun"
450 | "6203" "2m20" 15 "subj-ext" "8" 368 "head noun"
451 | "6214" "2m20" 4 "obj-ext" "8" 487 "head noun"
452 | "6231" "2m20" 1 "subj-ext" "8" 315 "head noun"
453 | "6246" "2m20" 6 "obj-ext" "8" 306 "head noun"
454 | "6258" "2m20" 9 "subj-ext" "8" 472 "head noun"
455 | "6274" "2m20" 14 "obj-ext" "8" 260 "head noun"
456 | "6287" "2m20" 2 "obj-ext" "8" 303 "head noun"
457 | "6302" "2m20" 3 "subj-ext" "8" 256 "head noun"
458 | "6318" "2m20" 10 "obj-ext" "8" 249 "head noun"
459 | "6334" "2m20" 7 "subj-ext" "8" 1588 "head noun"
460 | "6346" "2m20" 5 "subj-ext" "8" 245 "head noun"
461 | "6358" "2m20" 13 "subj-ext" "8" 200 "head noun"
462 | "6371" "2m21" 5 "obj-ext" "8" 1207 "head noun"
463 | "6383" "2m21" 14 "subj-ext" "8" 791 "head noun"
464 | "6396" "2m21" 3 "obj-ext" "8" 327 "head noun"
465 | "6412" "2m21" 6 "subj-ext" "8" 834 "head noun"
466 | "6424" "2m21" 10 "subj-ext" "8" 788 "head noun"
467 | "6440" "2m21" 8 "subj-ext" "8" 782 "head noun"
468 | "6453" "2m21" 13 "obj-ext" "8" 1663 "head noun"
469 | "6466" "2m21" 4 "subj-ext" "8" 475 "head noun"
470 | "6483" "2m21" 9 "obj-ext" "8" 530 "head noun"
471 | "6499" "2m21" 2 "subj-ext" "8" 293 "head noun"
472 | "6514" "2m21" 15 "obj-ext" "8" 643 "head noun"
473 | "6525" "2m21" 7 "obj-ext" "8" 539 "head noun"
474 | "6537" "2m21" 16 "subj-ext" "8" 339 "head noun"
475 | "6550" "2m21" 1 "obj-ext" "8" 416 "head noun"
476 | "6565" "2m21" 11 "obj-ext" "8" 327 "head noun"
477 | "6578" "2m22" 10 "obj-ext" "8" 472 "head noun"
478 | "6594" "2m22" 13 "subj-ext" "8" 1990 "head noun"
479 | "6607" "2m22" 16 "obj-ext" "8" 367 "head noun"
480 | "6620" "2m22" 9 "subj-ext" "8" 456 "head noun"
481 | "6636" "2m22" 7 "subj-ext" "8" 410 "head noun"
482 | "6648" "2m22" 15 "subj-ext" "8" 557 "head noun"
483 | "6659" "2m22" 4 "obj-ext" "8" 465 "head noun"
484 | "6676" "2m22" 6 "obj-ext" "8" 459 "head noun"
485 | "6688" "2m22" 8 "obj-ext" "8" 696 "head noun"
486 | "6701" "2m22" 11 "subj-ext" "8" 379 "head noun"
487 | "6714" "2m22" 5 "subj-ext" "8" 287 "head noun"
488 | "6726" "2m22" 3 "subj-ext" "8" 524 "head noun"
489 | "6742" "2m22" 1 "subj-ext" "8" 367 "head noun"
490 | "6757" "2m22" 14 "obj-ext" "8" 336 "head noun"
491 | "6770" "2m22" 2 "obj-ext" "8" 305 "head noun"
492 | "6785" "2m3" 1 "obj-ext" "8" 823 "head noun"
493 | "6800" "2m3" 13 "obj-ext" "8" 2200 "head noun"
494 | "6813" "2m3" 6 "subj-ext" "8" 1168 "head noun"
495 | "6825" "2m3" 14 "subj-ext" "8" 697 "head noun"
496 | "6838" "2m3" 3 "obj-ext" "8" 401 "head noun"
497 | "6854" "2m3" 10 "subj-ext" "8" 558 "head noun"
498 | "6870" "2m3" 5 "obj-ext" "8" 694 "head noun"
499 | "6882" "2m3" 15 "obj-ext" "8" 380 "head noun"
500 | "6893" "2m3" 9 "obj-ext" "8" 595 "head noun"
501 | "6909" "2m3" 8 "subj-ext" "8" 1541 "head noun"
502 | "6922" "2m3" 4 "subj-ext" "8" 355 "head noun"
503 | "6939" "2m3" 2 "subj-ext" "8" 355 "head noun"
504 | "6954" "2m3" 7 "obj-ext" "8" 414 "head noun"
505 | "6966" "2m3" 11 "obj-ext" "8" 355 "head noun"
506 | "6979" "2m3" 16 "subj-ext" "8" 352 "head noun"
507 | "6992" "2m4" 2 "obj-ext" "8" 297 "head noun"
508 | "7007" "2m4" 7 "subj-ext" "8" 260 "head noun"
509 | "7019" "2m4" 5 "subj-ext" "8" 398 "head noun"
510 | "7031" "2m4" 14 "obj-ext" "8" 217 "head noun"
511 | "7044" "2m4" 1 "subj-ext" "8" 244 "head noun"
512 | "7059" "2m4" 15 "subj-ext" "8" 207 "head noun"
513 | "7070" "2m4" 3 "subj-ext" "8" 269 "head noun"
514 | "7086" "2m4" 9 "subj-ext" "8" 244 "head noun"
515 | "7102" "2m4" 4 "obj-ext" "8" 207 "head noun"
516 | "7119" "2m4" 11 "subj-ext" "8" 238 "head noun"
517 | "7132" "2m4" 10 "obj-ext" "8" 408 "head noun"
518 | "7148" "2m4" 8 "obj-ext" "8" 248 "head noun"
519 | "7161" "2m4" 6 "obj-ext" "8" 232 "head noun"
520 | "7173" "2m4" 13 "subj-ext" "8" 241 "head noun"
521 | "7186" "2m4" 16 "obj-ext" "8" 220 "head noun"
522 | "7199" "2m5" 6 "subj-ext" "8" 621 "head noun"
523 | "7211" "2m5" 16 "subj-ext" "8" 433 "head noun"
524 | "7224" "2m5" 9 "obj-ext" "8" 528 "head noun"
525 | "7240" "2m5" 3 "obj-ext" "8" 371 "head noun"
526 | "7256" "2m5" 4 "subj-ext" "8" 429 "head noun"
527 | "7273" "2m5" 11 "obj-ext" "8" 387 "head noun"
528 | "7286" "2m5" 14 "subj-ext" "8" 395 "head noun"
529 | "7299" "2m5" 10 "subj-ext" "8" 482 "head noun"
530 | "7315" "2m5" 2 "subj-ext" "8" 387 "head noun"
531 | "7330" "2m5" 13 "obj-ext" "8" 328 "head noun"
532 | "7343" "2m5" 5 "obj-ext" "8" 528 "head noun"
533 | "7355" "2m5" 1 "obj-ext" "8" 282 "head noun"
534 | "7370" "2m5" 7 "obj-ext" "8" 325 "head noun"
535 | "7382" "2m5" 15 "obj-ext" "8" 318 "head noun"
536 | "7393" "2m5" 8 "subj-ext" "8" 281 "head noun"
537 | "7406" "2m6" 9 "subj-ext" "8" 392 "head noun"
538 | "7422" "2m6" 15 "subj-ext" "8" 1235 "head noun"
539 | "7433" "2m6" 6 "obj-ext" "8" 257 "head noun"
540 | "7445" "2m6" 13 "subj-ext" "8" 3272 "head noun"
541 | "7458" "2m6" 16 "obj-ext" "8" 964 "head noun"
542 | "7471" "2m6" 1 "subj-ext" "8" 370 "head noun"
543 | "7486" "2m6" 3 "subj-ext" "8" 269 "head noun"
544 | "7502" "2m6" 8 "obj-ext" "8" 739 "head noun"
545 | "7515" "2m6" 5 "subj-ext" "8" 300 "head noun"
546 | "7527" "2m6" 10 "obj-ext" "8" 678 "head noun"
547 | "7543" "2m6" 14 "obj-ext" "8" 216 "head noun"
548 | "7556" "2m6" 11 "subj-ext" "8" 256 "head noun"
549 | "7569" "2m6" 7 "subj-ext" "8" 232 "head noun"
550 | "7581" "2m6" 4 "obj-ext" "8" 266 "head noun"
551 | "7598" "2m6" 2 "obj-ext" "8" 195 "head noun"
552 | "7613" "2m7" 11 "obj-ext" "8" 285 "head noun"
553 | "7626" "2m7" 15 "obj-ext" "8" 355 "head noun"
554 | "7637" "2m7" 4 "subj-ext" "8" 232 "head noun"
555 | "7654" "2m7" 6 "subj-ext" "8" 254 "head noun"
556 | "7666" "2m7" 16 "subj-ext" "8" 198 "head noun"
557 | "7679" "2m7" 7 "obj-ext" "8" 269 "head noun"
558 | "7691" "2m7" 13 "obj-ext" "8" 208 "head noun"
559 | "7704" "2m7" 8 "subj-ext" "8" 549 "head noun"
560 | "7717" "2m7" 5 "obj-ext" "8" 263 "head noun"
561 | "7729" "2m7" 9 "obj-ext" "8" 217 "head noun"
562 | "7745" "2m7" 2 "subj-ext" "8" 189 "head noun"
563 | "7760" "2m7" 10 "subj-ext" "8" 439 "head noun"
564 | "7776" "2m7" 3 "obj-ext" "8" 223 "head noun"
565 | "7792" "2m7" 1 "obj-ext" "8" 208 "head noun"
566 | "7807" "2m7" 14 "subj-ext" "8" 284 "head noun"
567 | "7820" "2m8" 16 "obj-ext" "8" 322 "head noun"
568 | "7833" "2m8" 6 "obj-ext" "8" 353 "head noun"
569 | "7845" "2m8" 13 "subj-ext" "8" 1126 "head noun"
570 | "7858" "2m8" 15 "subj-ext" "8" 275 "head noun"
571 | "7869" "2m8" 4 "obj-ext" "8" 291 "head noun"
572 | "7886" "2m8" 8 "obj-ext" "8" 377 "head noun"
573 | "7899" "2m8" 14 "obj-ext" "8" 278 "head noun"
574 | "7912" "2m8" 9 "subj-ext" "8" 306 "head noun"
575 | "7928" "2m8" 1 "subj-ext" "8" 288 "head noun"
576 | "7943" "2m8" 3 "subj-ext" "8" 263 "head noun"
577 | "7959" "2m8" 7 "subj-ext" "8" 238 "head noun"
578 | "7971" "2m8" 10 "obj-ext" "8" 294 "head noun"
579 | "7987" "2m8" 2 "obj-ext" "8" 275 "head noun"
580 | "8002" "2m8" 11 "subj-ext" "8" 303 "head noun"
581 | "8015" "2m8" 5 "subj-ext" "8" 321 "head noun"
582 | "8027" "2m9" 11 "obj-ext" "8" 562 "head noun"
583 | "8040" "2m9" 16 "subj-ext" "8" 454 "head noun"
584 | "8053" "2m9" 6 "subj-ext" "8" 436 "head noun"
585 | "8065" "2m9" 7 "obj-ext" "8" 494 "head noun"
586 | "8077" "2m9" 13 "obj-ext" "8" 362 "head noun"
587 | "8090" "2m9" 4 "subj-ext" "8" 355 "head noun"
588 | "8107" "2m9" 3 "obj-ext" "8" 765 "head noun"
589 | "8123" "2m9" 5 "obj-ext" "8" 663 "head noun"
590 | "8135" "2m9" 8 "subj-ext" "8" 469 "head noun"
591 | "8148" "2m9" 15 "obj-ext" "8" 457 "head noun"
592 | "8159" "2m9" 14 "subj-ext" "8" 355 "head noun"
593 | "8172" "2m9" 2 "subj-ext" "8" 257 "head noun"
594 | "8187" "2m9" 10 "subj-ext" "8" 438 "head noun"
595 | "8203" "2m9" 9 "obj-ext" "8" 241 "head noun"
596 | "8219" "2m9" 1 "obj-ext" "8" 208 "head noun"
597 | 


--------------------------------------------------------------------------------
/data/matrixFit.Rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasishth/BayesLMMTutorial/cccf977025847a83b5ae651e914ad95c8e53cdee/data/matrixFit.Rda


--------------------------------------------------------------------------------
/data/pp.Rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasishth/BayesLMMTutorial/cccf977025847a83b5ae651e914ad95c8e53cdee/data/pp.Rda


--------------------------------------------------------------------------------
/data/ranIntFit.Rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasishth/BayesLMMTutorial/cccf977025847a83b5ae651e914ad95c8e53cdee/data/ranIntFit.Rda


--------------------------------------------------------------------------------
/data/ranIntSlpFit.Rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasishth/BayesLMMTutorial/cccf977025847a83b5ae651e914ad95c8e53cdee/data/ranIntSlpFit.Rda


--------------------------------------------------------------------------------
/doc/SorensenEtAl.Rnw:
--------------------------------------------------------------------------------
   1 | \documentclass[doc, floatsintext]{apa6} % man for manuscript format, jou for journal format, doc for standard LaTeX document format
   2 | \usepackage[natbibapa]{apacite} 
   3 | \usepackage[american]{babel}
   4 | \usepackage[utf8]{inputenc}
   5 | \usepackage{csquotes}
   6 | 
   7 | \usepackage{setspace}
   8 | 
   9 | \usepackage[outdir=./]{epstopdf}
  10 | 
  11 | 
  12 | \usepackage{amsmath,amssymb,amsfonts}
  13 | 
  14 | \usepackage{url}   % this allows us to cite URLs in the text
  15 | \usepackage{graphicx}   % allows for graphic to float when doing jou or doc style
  16 | \usepackage{verbatim}   % allows us to use \begin{comment} environment
  17 | \usepackage{caption}
  18 | %\usepackage{lscape}
  19 | \usepackage{pdflscape}
  20 | 
  21 | \usepackage{fancyvrb}
  22 | 
  23 | \usepackage{newfloat}
  24 | \DeclareFloatingEnvironment[
  25 | %    fileext=los,
  26 | %    listname=List of Schemes,
  27 | %    name=Listing,
  28 | %    placement=!htbp,
  29 | %    within=section,
  30 | ]{listing}
  31 | 
  32 | \title{Bayesian linear mixed models using Stan:
  33 | A tutorial for psychologists, linguists, and cognitive scientists}
  34 | \shorttitle{Bayesian linear mixed models: A tutorial}
  35 | 
  36 | \threeauthors{Tanner Sorensen}{Sven Hohenstein}{Shravan Vasishth}
  37 | 
  38 | \threeaffiliations{
  39 | Signal Analysis and Interpretation Laboratory, University of Southern California, Los Angeles, CA, USA
  40 | }
  41 | {
  42 | Department of Psychology,
  43 | University of Potsdam, Potsdam, Germany
  44 | }
  45 | {
  46 | Department of Linguistics,
  47 | University of Potsdam, Potsdam, Germany, and \\
  48 | CEREMADE, Universit\'e Paris-Dauphine, Paris, France}
  49 | 
  50 | % Based off of code provided in a question by Melvin Roest on tex.stackexchange.com
  51 | % (http://tex.stackexchange.com/questions/176745/strange-knitr-behavior-with-apa6-class-manuscript)
  52 | 
  53 | %\rightheader{knitr and apa6} % for jou format
  54 | \leftheader{Sorensen, Hohenstein, Vasishth}
  55 | 
  56 | \authornote{
  57 | Please send correspondence to tsorense@usc.edu and \{sven.hohenstein,vasishth\}@uni-potsdam.de.
  58 | 
  59 | The authors' orcid IDs are: 0000-0002-3111-9974 (Sorensen), 0000-0002-9708-1593 (Hohenstein), and 0000-0003-2027-1994 (Vasishth).
  60 | 
  61 | All authors have read and approved the final manuscript, and they have no conflicts of interest with respect to their authorship or the publication of this article. Furthermore, the authors did not benefit from funding.}
  62 | 
  63 | \note{\today}
  64 | 
  65 | % \journal{Submitted to Behavioral Methods}
  66 | % \volume{under review}
  67 | \keywords{Bayesian data analysis, linear mixed models, Stan}
  68 | 
  69 | \doublespacing
  70 | \pagebreak
  71 | \abstract{With the arrival of the R packages \texttt{nlme} and \texttt{lme4}, linear mixed models (LMMs) have come to be widely used in experimentally-driven areas like psychology, linguistics, and cognitive science. This tutorial provides a practical introduction to fitting LMMs in a Bayesian framework using the probabilistic programming language Stan. We choose Stan (rather than WinBUGS or JAGS) because it provides an elegant and scalable framework for fitting models in most of the standard applications of LMMs. We ease the reader into fitting increasingly complex LMMs, using a two-condition repeated measures self-paced reading study.}
  72 | 
  73 | \ccoppy{Draft of \today}
  74 | \begin{document}
  75 | 
  76 | \maketitle
  77 | 
  78 | <<setup,include=FALSE,cache=FALSE>>=
  79 | library(knitr)
  80 | library(coda)
  81 | 
  82 | # set global chunk options, put figures into folder
  83 | options(replace.assign=TRUE,show.signif.stars=FALSE)
  84 | opts_chunk$set(fig.path='figures/figure-', fig.align='center', fig.show='hold')
  85 | options(replace.assign=TRUE,width=75)
  86 | # opts_chunk$set(dev='postscript')
  87 | opts_chunk$set(dev='pdf')
  88 | options(digits = 2)
  89 | library(rstan)
  90 | rstan_options(auto_write = TRUE)
  91 | options(mc.cores = parallel::detectCores())
  92 | 
  93 | set.seed(9991)
  94 | 
  95 | # save workspace image, if you want
  96 | #the.date <- format(Sys.time(), "%b%d%Y")
  97 | #save.image(file=paste0("homework01-",the.date,".RData")
  98 | 
  99 | # knit_hooks$set(source = function(x, options) {
 100 | #     paste("\\begin{lstlisting}[numbers=left]\n", x, 
 101 | #         "\\end{lstlisting}\n", sep = "")
 102 | # })
 103 | @
 104 | 
 105 | \section{Introduction}
 106 | 
 107 | Linear mixed models, or hierarchical/multilevel linear models, have become the main workhorse of experimental research in psychology, linguistics, and cognitive science, where repeated measures designs are the norm.
 108 | Within the programming environment R~\citep{R}, 
 109 | the \texttt{nlme} package \citep{pinheirobates} and its successor, \texttt{lme4} \citep{Bates2015} have revolutionized the use of linear mixed models (LMMs) due to their simplicity and speed: one can fit fairly complicated models relatively quickly, often with a single line of code. A great advantage of LMMs over traditional approaches such as repeated measures ANOVA and paired t-tests is that there is no need to aggregate over subjects and items to compute two sets of F-scores (or several t-scores) separately; a single model can take all sources of variance into account simultaneously. Furthermore, comparisons between conditions can easily be implemented in a single model through appropriate contrast coding.
 110 | 
 111 | Other important developments related to LMMs have been unfolding in  computational statistics.
 112 | Specifically, probabilistic programming languages like WinBUGS \citep{lunn2000winbugs}, JAGS \citep{plummer2011jags} and Stan \citep{stan-manual:2014}, among others, have  made it possible to fit Bayesian LMMs quite easily. However, one prerequisite for using these programming languages is that some background statistical knowledge is needed before one can define the model. This difficulty is well-known; for example, \citet[p.~4]{spiegelhalter2004bayesian} write: ``Bayesian statistics has a (largely deserved) reputation for being mathematically challenging and difficult to put into practice\dots''. 
 113 | 
 114 | The purpose of this paper is to facilitate a first encounter with model specification in one of these programming languages, Stan. The tutorial is aimed primarily at psychologists, linguists, and cognitive scientists who have used \texttt{lme4} to fit models to their data, but who may have only a basic knowledge of the underlying LMM machinery. By ``basic knowledge'' we mean that they may not be able to answer some or all of these questions: what is a design matrix; what is contrast coding; what is a random effects variance-covariance matrix in a linear mixed model; what is the Cholesky decomposition? Our tutorial is not intended for statisticians or psychology researchers who could, for example, write their own Markov Chain Monte Carlo (MCMC) samplers in R or C++ or the like; for them, the Stan manual is the optimal starting point. The present tutorial attempts to ease the beginner into their first steps towards fitting Bayesian linear mixed models. More detailed presentations about linear mixed models are available in several textbooks; references are provided at the end of this tutorial. 
 115 | For the complete newcomer to statistical methods, the articles by
 116 | \citet{VasishthNicenboimStatMeth} and
 117 | \citet{NicenboimVasishthStatMeth} should be read first, as they provide a grounds-up preparation for the present article.
 118 | 
 119 | We have chosen Stan as the programming language of choice (over JAGS and WinBUGS) because it is possible to fit arbitrarily complex models with Stan. For example, it is possible (if time consuming) to fit a model with $14$ fixed effects predictors and two crossed random effects by subject and item, each involving a $14\times 14$ variance-covariance matrix \citep{BatesEtAlParsimonious}; as far as we are aware,  such models cannot be fit in JAGS or WinBUGS.\footnote{Whether it makes sense in general to fit such a complex model is a different issue; see \citet{Gelman14}, and \citet{BatesEtAlParsimonious} for recent discussion.}
 120 | 
 121 | In this tutorial, 
 122 | we take it as a given that the reader is interested in learning how to fit Bayesian linear mixed models.
 123 | The tutorial is structured as follows. After a short introduction to Bayesian modeling, we begin by 
 124 | %Section~\ref{sec:modeling} 
 125 | successively building up increasingly complex LMMs using the data-set reported by~\citet{gibsonwu}, which has a  simple two-condition design. At each step, we explain the structure of the model. The next section takes up inference for this two-condition design.  
 126 | 
 127 | This paper was written using a literate programming tool, \texttt{knitr} \citep{xie2015knitr}; this integrates documentation for the accompanying code with the paper.
 128 | The \texttt{knitr} file that generated this paper, as well as all the code and data used in this tutorial, can be downloaded from our website: 
 129 | 
 130 | \url{https://www.ling.uni-potsdam.de/~vasishth/statistics/BayesLMMs.html}
 131 | 
 132 | \noindent
 133 | In addition, the source code for the paper, all R code, and data are available on github at:
 134 | 
 135 | \url{https://github.com/vasishth/BayesLMMTutorial}
 136 | 
 137 | We start with the two-condition repeated measures data-set~\citep{gibsonwu} as a concrete running example. This simple example serves as a starter kit for fitting commonly used LMMs in the Bayesian setting. We assume that the reader has the relevant software installed; specifically, the RStan interface to Stan in R. For detailed instructions, see  
 138 | 
 139 | \url{https://github.com/stan-dev/rstan/wiki/RStan-Getting-Started}
 140 | 
 141 | 
 142 | 
 143 | \section{Bayesian statistics}
 144 | 
 145 | Bayesian modeling has two major advantages over frequentist analysis with linear mixed models. First, information based on pre-existing knowledge can be incorporated into the analysis using different priors. Second, complex models with a large number of random variance components can be fit. In the following, we will provide a short introduction to Bayesian statistics which highlights these two advantages of the Bayesian approach to data analysis.
 146 | 
 147 | The first advantage of the Bayesian approach is a consequence of Bayes' Theorem, the fundamental rule of Bayesian statistics. It can be seen as a way of understanding how the probability that a hypothesis is true is affected by new data. In mathematical notation, Bayes' Theorem states
 148 | $$
 149 | P(H\mid D) = \frac{P(D\mid H) P(H)}{P(D)},
 150 | $$
 151 | where $H$ is the hypothesis we are interested in and $D$ represents new data. Since $D$ is fixed for a given data-set, the theorem can be rephrased as
 152 | $$
 153 | P(H\mid D) \propto P(D\mid H) P(H).
 154 | $$
 155 | The \emph{posterior} probability that the hypothesis is true given new data, $P(H\mid D)$, is proportional to the product of the \emph{likelihood} of the new data given the hypothesis, $P(D\mid H)$, and the \emph{prior} probability of the hypothesis, $P(H)$. 
 156 | 
 157 | For the purposes of this paper, the goal of a Bayesian analysis is simply to derive the posterior distribution of each parameter of interest, given some data and prior knowledge about the distributions of the parameters. The following example illustrates how the posterior depends on the likelihood and prior. Before collecting data, a researcher has some hypothesis concerning the distribution of the response variable $X$ in an experiment. The reseacher expresses his or her belief in a prior distribution, say, a normal distribution with a mean value of $\mu = 60$ and variance $\sigma^2 = 1000$ (solid density in left-hand panel of Figure \ref{fig:bayes}). The large variance reflects the researcher's uncertainty concerning the true mean of the distribution. Alternatively, if the researcher were very certain that $\mu=60$, then he or she might choose the much lower variance $\sigma^2 = 100$ (solid density in right-hand panel of the right-hand panel of Figure \ref{fig:bayes}). 
 158 | 
 159 | \begin{figure}
 160 | \centering
 161 | <<figbayesintro, include=TRUE,echo=FALSE,cache=TRUE,fig.width=7,fig.height=5,out.width='0.75\\textwidth'>>=
 162 | layout(t(1:2))
 163 | # likelihood
 164 | xbar <- 100
 165 | n <- 20
 166 | sd <- 40
 167 | # prior 0
 168 | m0 <- 60
 169 | v0 <- 1000
 170 | # posterior 0
 171 | vstar0 <- 1 / (1/v0 + n/(sd^2))
 172 | mstar0 <- vstar0 * (m0/v0 + (n * xbar) / (sd^2))
 173 | # plot
 174 | curve(dnorm(x, mean = xbar, sd = sd/sqrt(n)), from = 0, to = 160, ylim = c(0, 0.07),
 175 |       lty = 2, ylab = "Density")
 176 | curve(dnorm(x, mean = m0, sd = sqrt(v0)), add = TRUE)
 177 | curve(dnorm(x, mean = mstar0, sd = sqrt(vstar0)), add = TRUE, lty = 4)
 178 | legend(legend = c("Prior", "Likelihood", "Posterior"), x = "topleft",
 179 |        lty = c(1, 2, 4), box.lwd = 0, box.lty = 0, box.col = NA)
 180 | # prior 1
 181 | m1 <- 60
 182 | v1 <- 100
 183 | # posterior 1
 184 | vstar1 <- 1 / (1/v1 + n/(sd^2))
 185 | mstar1 <- vstar1 * (m1/v1 + (n * xbar) / (sd^2))
 186 | # plot
 187 | curve(dnorm(x, mean = xbar, sd = sd/sqrt(n)), from = 0, to = 160, ylim = c(0, 0.07),
 188 |       lty = 2, ylab = "Density")
 189 | curve(dnorm(x, mean = m1, sd = sqrt(v1)), add = TRUE)
 190 | curve(dnorm(x, mean = mstar1, sd = sqrt(vstar1)), add = TRUE, lty = 4)
 191 | @
 192 | \caption{Prior, likelihood, and posterior normal distributions. The likelihood is based on $n = 20$ observations with sample mean $\mu = 100$ and standard deviation $\sigma = 40$. The prior (identical in both panels) has mean $\mu_0 = 60$ and variance $\sigma^2_0 = 1000$ (left-hand panel) or $\sigma^2_0 = 100$ (right-hand panel), respectively.}
 193 | \label{fig:bayes}
 194 | \end{figure}
 195 | 
 196 | The researcher starts to collect data. In our example, there are $n = 20$ values with a sample mean $\bar x = 100$ and sample standard deviation $s = 40$. The corresponding likelihood distribution is displayed in Figure \ref{fig:bayes} (dashed line). The resulting posterior distribution (dash-dot line) combines the prior and likelihood. Given the prior with the larger variance (left-hand panel), the posterior is largely influenced by the data. Given the prior with the smaller variance (right-hand panel), its influence on the posterior is much stronger, resulting in a smaller shift towards the data mean.
 197 | 
 198 | This toy example illustrates the central idea of Bayesian modeling. The prior reflects our knowledge of past results. In most cases, we will use so-called vague flat priors such that the posterior distribution is mainly affected by the data. The resulting posterior distribution allows for making inferences about model parameters. 
 199 | 
 200 | The second advantage of Bayesian modeling concerns variance components (random effects). Fitting a large number of random effects in non-Bayesian settings requires a large amount of data. Often, the data-set is too small to reliably estimate variance component parameters \citep{BatesEtAlParsimonious,hannesBEAP}.  However, if a researcher is interested in differences between individual subjects or items (random intercepts and random slopes) or relationships between differences (correlations between variance components), Bayesian modeling can be used even if there is not enough data for inferential statistics. The resulting posterior distributions might have high variance but they still allow for calculating probabilities of true parameter values of variance components. Note that we do not intend to criticize classical LMMs, but rather to highlight the possibilities of Bayesian modeling concerning random effects.
 201 | For further explanation of the advantages this approach affords beyond the classical frequentist approach, the reader is directed to the rich literature relating to a comparison between Bayesian versus frequentist statistics (such as the provocatively titled paper by \citealp{lavine1999bayesian}, and the highly accessible textbooks by 
 202 | \citealp{mcelreath2016statistical} and 
 203 | \citealp{kruschke2014doing}). 
 204 | 
 205 | \section{Example: A two-condition repeated measures design}
 206 | \label{sec:modeling}
 207 | 
 208 | This section motivates the LMM with the self-paced reading data-set of~\citet{gibsonwu}. We introduce the data-set, state our modeling goals here, and proceed to build up increasingly complex LMMs, starting with a fixed effects linear model before adding varying intercepts, adding varying slopes, and finally modeling the correlation between the varying intercepts and slopes (the ``maximal model'' of \citealp{barr2011random}). We explain these new model parameters as we introduce them. Models of varying complexity such as these three can be generalized as described in Appendix~\ref{sec:matrix}. The result of our modeling is a probability model that expresses how the dependent variable, the reading time labeled $\hbox{\texttt{rt}}$, was generated in the experiment of~\citet{gibsonwu}. The model allows us to derive the \textit{posterior probability distribution} of the model parameters from a \textit{prior probability distribution} and a \textit{likelihood function}. Stan makes it easy to compute this posterior distribution for each model parameter of interest. The resulting posterior distribution reflects what we should believe about the value of that parameter, given the experimental data. 
 209 | 
 210 | 
 211 | \paragraph{The scientific question}
 212 | Subject and object relative clauses have been widely used in reading studies to investigate sentence comprehension processes. A subject relative is a sentence like \textit{The senator who interrogated the journalist resigned} where a noun (\textit{senator}) is modified by a relative clause (\textit{who interrogated the journalist}), and the modified noun is the grammatical subject of the relative clause. In an object relative, the noun modified by the relative clause is the grammatical object of the relative clause (e.g., \textit{The senator who the journalist interrogated resigned}). In both cases, the noun that is modified (\textit{senator}) is called the head noun.
 213 | 
 214 | A typical finding for English is that subject relatives are easier to process than object relatives~\citep{just1992ctc}. Natural languages generally have relative clauses, and the subject relative advantage has until recently been considered to be true cross-linguistically. However, Chinese relative clauses apparently represent an interesting counter-example to this generalization; recent work by~\citet{hsiao03} has suggested that in Chinese, \textit{object} relatives are easier to process than subject relatives at a particular point in the sentence (the head noun of the relative clause). We now present an analysis of a subsequently published data-set~\citep{gibsonwu} that evaluates this claim. 
 215 | 
 216 | \paragraph{The data}
 217 | The dependent variable of the experiment of~\citet{gibsonwu} was the reading time $\hbox{\texttt{rt}}$ in milliseconds of the head noun of the relative clause. This was recorded in two conditions (subject relative and object relative), with $37$ subjects and $15$ items, presented in a standard Latin square design. There were originally $16$ items, but one item was removed, resulting in $37\times 15=555$ data points. However, eight data points from one subject (id 27) were missing. As a consequence, we have a total of $555-8=547$ data points. The first few lines from the data frame are shown in Table~\ref{tab:dataframe1}; ``o'' refers to object relative and ``s'' to subject relative.
 218 | 
 219 | \begin{table}[ht]
 220 | \centering
 221 | \begin{tabular}{rrrlr}
 222 |   \hline
 223 | row & subj & item & so & rt \\ 
 224 |   \hline
 225 | 1 & 1 &  13 & o & 1561 \\ 
 226 | 2 &  1 &   6 & s & 959 \\ 
 227 | 3 &  1 &   5 & o & 582 \\ 
 228 | 4 &  1 &   9 & o & 294 \\ 
 229 | 5 &  1 &  14 & s & 438 \\ 
 230 | 6 &  1 &   4 & s & 286 \\ 
 231 |    \vdots & \vdots & \vdots & \vdots \\ 
 232 | 547 & 9 & 11 & o & 350 \\   
 233 |    \hline
 234 | \end{tabular}
 235 | \caption{First six rows, and the last row, of the data-set of Gibson and Wu (2013), as they appear in the data frame.}\label{tab:dataframe1}
 236 | \end{table}
 237 | 
 238 | \subsection{Fixed Effects Model}
 239 | \label{subsec:fixef}
 240 | 
 241 | We begin by making the working assumption that the dependent variable of reading time $\hbox{\texttt{rt}}$ on the head noun is approximately log-normally distributed~\citep{rouder2005}. This assumes that the logarithm of $\hbox{\texttt{rt}}$ is approximately normally distributed. The logarithm of the reading times, $\log \hbox{\texttt{rt}}$, has some unknown grand mean $\beta _0$. 
 242 | The mean of the log-normal distribution of $\hbox{\texttt{rt}}$ is the sum of $\beta _0$ and an adjustment $\beta _1 \hbox{\texttt{so}}$ whose magnitude depends on the categorical predictor $\hbox{\texttt{so}}$, which has the value $-1$ when $\hbox{\texttt{rt}}$ is from the subject relative condition, and $1$ when $\hbox{\texttt{rt}}$ is from the object relative condition. One way to write the model in terms of the logarithm of the reading times is as follows:
 243 | 
 244 | \begin{equation}\label{eq:fixef}
 245 | \log \hbox{\texttt{rt}}_{i} = \beta _0 + \beta _1\hbox{\texttt{so}}_i + \varepsilon_{i} 
 246 | \end{equation}
 247 | This is a \textit{fixed effects model}. The index $i$ represents the $i$-th row in the data-frame (in this case, $i \in \{1,\dots,547\}$); the term $\varepsilon_i$ represents the error in the $i$-th row.
 248 | With the above $\pm 1$ contrast coding, $\beta _0$ represents the grand mean of $\log \hbox{\texttt{rt}}$, regardless of relative clause type. It can be estimated by simply taking the grand mean of $\log \hbox{\texttt{rt}}$.
 249 | The parameter $\beta _1$ is an adjustment to $\beta _0$ so that the mean of $\log \hbox{\texttt{rt}}$ is $\beta _0 + 1 \beta _1$ when $\log \hbox{\texttt{rt}}$ is from the object relative condition, and $\beta _0 - 1 \beta _1$ when $\log \hbox{\texttt{rt}}$ is from the subject relative condition. Notice that $2 \beta_1$ will be the difference in the means between the object and subject relative clause conditions.
 250 | Together, $\beta _0$ and $\beta _1$ make up the part of the model which characterizes the effect of the experimental manipulation, relative clause type (\texttt{so}), on the dependent variable \texttt{rt}. We call this a fixed effects model because we estimate the parameters $\beta _0$ and $\beta _1$, which do not vary from subject to subject or from item to item. In R, this would correspond to fitting a simple linear model using the \texttt{lm} function, with \texttt{so} as predictor and $\log \hbox{\texttt{rt}}$ as dependent variable.
 251 | 
 252 | The error $\varepsilon _i $ is positive when $\log \hbox{\texttt{rt}}_i$ is greater than the expected value $\mu_i = \beta _0 + \beta _1 \hbox{\texttt{so}}_i$ and negative when $\log \hbox{\texttt{rt}}_i$ is less than the expected value $\mu_i$. Thus, the error is the amount by which the expected value differs from  actually observed value. We assume that the $\varepsilon_i$ are independently and identically distributed as a normal distribution with mean zero and unknown standard deviation $\sigma_e$. Stan parameterizes the normal distribution by the mean and standard deviation, and we follow that convention here by writing the distribution of $\varepsilon$ as $\mathcal{N}(0, \sigma _e)$. (This is different from  the standard notation in statistics, where the normal distribution is defined in terms of mean and variance.) A consequence of the assumption that the errors are identically distributed is that the distribution of $\varepsilon$ should, at least approximately, have the same shape as the normal distribution. Independence implies that there should be no correlation between the errors---this is not the case in the data, since we have multiple measurements from each subject and multiple measurements from each item. This introduces correlation between errors. 
 253 | 
 254 | %%Listing1
 255 | 
 256 | \paragraph{Setting up the data}
 257 | 
 258 | \singlespacing
 259 | \begin{listing}
 260 | % <<Model1code, eval=FALSE>>=
 261 | \begin{Verbatim}[numbers=left,frame=single,fontfamily=courier,fontsize=\footnotesize]
 262 | # read in data:
 263 | rDat <- read.table("gibsonwu2012data.txt", header = TRUE)
 264 | # subset critical region:
 265 | rDat <- subset(rDat, region == "headnoun")
 266 | 
 267 | # convert subjects and items to factors
 268 | rDat$subj <- factor(rDat$subj)
 269 | rDat$item <- factor(rDat$item)
 270 | # contrast coding of type (-1 vs. 1)
 271 | rDat$so <- ifelse(rDat$type == "subj-ext", -1, 1)
 272 | 
 273 | # create data as list for Stan, and fit model:
 274 | stanDat <- list(rt = rDat$rt, so = rDat$so, N = nrow(rDat))
 275 | library(rstan)
 276 | fixEfFit <- stan(file = "fixEf.stan", data = stanDat,
 277 |                  iter = 2000, chains = 4)
 278 | 
 279 | # plot traceplot, excluding warm-up:
 280 | traceplot(fixEfFit, pars = c("beta", "sigma_e"),
 281 |           inc_warmup = FALSE)
 282 | 
 283 | # examine quantiles of posterior distributions:
 284 | print(fixEfFit, pars = c("beta", "sigma_e"),
 285 |       probs = c(0.025, 0.5, 0.975))
 286 | 
 287 | # examine quantiles of parameter of interest:
 288 | beta1 <- unlist(extract(fixEfFit, pars = "beta[2]"))
 289 | print(quantile(beta1, probs = c(0.025, 0.5, 0.975)))
 290 | \end{Verbatim}
 291 | % @
 292 | \caption{R code for the fixed effects model.}\label{fig:fixefcode}
 293 | \end{listing}
 294 | \doublespacing
 295 | 
 296 | We now fit the fixed effects model. 
 297 | For the following discussion, refer to the code in Listings~\ref{fig:fixefcode} (R code) and \ref{fig:fixefstancode} (Stan code). First, we read the~\citet{gibsonwu} data into a data frame \texttt{rDat} in R, and then subset the critical region (Listing~\ref{fig:fixefcode}, lines 2 and 4).
 298 | Next, we create a data list \texttt{stanDat} for Stan, which contains the data (line 13). Stan  requires the data to be of type list; this is different from the \texttt{lm} and \texttt{lmer} functions, which assume that the data are of type data-frame.
 299 | 
 300 | \paragraph{Defining the model}
 301 | 
 302 | \begin{listing}
 303 | \begin{Verbatim}[numbers=left,frame=single,fontfamily=courier,fontsize=\footnotesize]
 304 | data {
 305 |   int<lower=1> N;                //number of data points
 306 |   real rt[N];                    //reading time
 307 |   real<lower=-1,upper=1> so[N];  //predictor
 308 | }
 309 | parameters {
 310 |   vector[2] beta;            //intercept and slope
 311 |   real<lower=0> sigma_e;     //error sd
 312 | }
 313 | model {
 314 |   real mu;
 315 |   for (i in 1:N){                   // likelihood
 316 |     mu = beta[1] + beta[2] * so[i];
 317 |     rt[i] ~ lognormal(mu, sigma_e);
 318 |   }
 319 | }
 320 | \end{Verbatim}
 321 | \caption{Stan code for the fixed effects model.}\label{fig:fixefstancode}
 322 | \end{listing}
 323 | 
 324 | The next step is to write the Stan model in a text file with extension \texttt{.stan}. A Stan model consists of several \emph{blocks}. A block is a set of statements surrounded by brackets and preceded by the block name. We open up a file \texttt{fixEf.stan} in a text editor and write down the first block, the \emph{data block}, which contains the declaration of the variables in the data object \texttt{stanDat} (Listing~\ref{fig:fixefstancode}, lines 1--5).
 325 | The strings \texttt{real} and \texttt{int} specify the data type for each variable. A \texttt{real} variable is a real number, and an \texttt{int} variable is an integer. For instance, \texttt{N} is the integer number of data points. The variables $\hbox{\texttt{so}}$ and $\hbox{\texttt{rt}}$ are arrays of length \texttt{N} whose entries are \texttt{real}. We constrain a variable to take only a subset of the values allowed by its type (e.g., \texttt{int} or \texttt{real}) by specifying in brackets lower and upper bounds (e.g. \texttt{<lower=-1,upper=1>}). The variables in the data block, \texttt{N}, \texttt{rt}, and \texttt{so}, correspond to the values of the list \texttt{stanDat} in R. The list \texttt{stanDat} must match the variables of the data block in case, but the order of variable declarations in the data block does not necessarily have to match the order of values in the list \texttt{stanDat}.
 326 | 
 327 | Next, we turn to the \textit{parameters block}, where the parameters are defined (Listing~\ref{fig:fixefstancode}, lines 6--9). These are the model parameters, for which posterior distributions are of interest.
 328 | The fixed effects model has three parameters: the fixed intercept $\beta _0$, the fixed slope $\beta _1$, and the standard deviation $\sigma _e$ of the error. 
 329 | We store the fixed effects $\beta _0$ and $\beta _1$ in a vector, which contains variables of type \texttt{real}. Although we called our parameters $\beta_0$ and $\beta_1$ in the fixed effects model, in Stan, these are contained in the vector \texttt{beta} with indices 1 and 2. Thus, $\beta_0$ is in \texttt{beta[1]} and $\beta_1$ in \texttt{beta[2]}. 
 330 | The third parameter, the standard deviation $\sigma _e$ of the error (\texttt{sigma\_e}), is also defined here, and is constrained to have lower bound zero (Listing~\ref{fig:fixefstancode}, line 8).
 331 | 
 332 | Finally,  the \textit{model block} specifies the prior distribution and the likelihood (Listing~\ref{fig:fixefstancode}, lines 10--16).  
 333 | To understand the Stan syntax, compare the Stan code above to the specification of the fixed effects model. The Stan code literally writes out this model. The block begins with a local variable declaration for \texttt{mu}, which is the mean of $\hbox{\texttt{rt}}$ conditional on whether $\hbox{\texttt{so}}$ is $-1$ for the subject relative condition or $1$  for the object relative condition. 
 334 | 
 335 | The for-loop assigns to \texttt{mu} the mean for the log-normal distribution of \texttt{rt[i]}, conditional on the value of the predictor \texttt{so[i]} for relative clause type. The statement \texttt{rt[i] \textasciitilde{ }lognormal(mu, sigma\_e)} in a for-loop means that the logarithm of each value in the vector $\hbox{\texttt{rt}}$ is normally distributed with mean \texttt{mu} and standard deviation \texttt{sigma\_e}.\footnote{One could have equally well log-transformed the reading time and assumed a normal distribution instead of the lognormal.}
 336 | 
 337 | The prior distributions on the parameters \texttt{beta} and \texttt{sigma\_e} would ordinarily be declared in the model block. If we don't declare any prior, it is assumed that they have a uniform prior distribution.
 338 | Note that the distribution of \texttt{sigma\_e} is truncated at zero because \texttt{sigma\_e} is constrained to be positive (see the declaration \texttt{real<lower=0> sigma\_e;} in the parameters block). This means that the error has a uniform prior with lower bound zero.\footnote{This is an example of an improper prior, which is not a probability distribution. Although all the improper priors used in this tutorial produce posteriors which are probability distributions, this is not true in general, and care should be taken in using improper priors~\citep{gelman2006prior}. In the present case, a Cauchy prior truncated to have a lower bound of 0 could alternatively be defined for the standard deviation. For example code using such a prior, see the KBStan vignette in the \texttt{RePsychLing} package \citep{repsychling}.}
 339 | 
 340 | \paragraph{Running the model}
 341 | We save the file \texttt{fixEf.stan} which contains the Stan code and fit the model in R with the function \texttt{stan} from the package \texttt{rstan} (Listing~\ref{fig:fixefcode}, lines 15--16).
 342 | This call to the function \texttt{stan} will compile a C++ program which produces samples from the joint posterior distribution of the fixed intercept $\beta _0$, the fixed slope $\beta _1$, and the standard deviation $\sigma _e$ of the error.
 343 | 
 344 | The function generates four \textit{chains} of samples. A \emph{Markov chain} is a stochastic process, in which random values are sequentially generated. Each sample depends on the previous one. Different chains are independent of each other such that running a Stan model with four chains is equivalent to running four (identically specified) Stan models with one chain each. For the model used here, each of the four chains contains $2000$ samples of each parameter.
 345 | 
 346 | Samples $1$ to $1000$ are part of the \textit{warmup}, where the chains settle into the posterior distribution. We analyze samples $1001$ to $2000$. The result is saved to an object \texttt{fixEfFit} of class \texttt{stanFit}.
 347 | 
 348 | The warmup samples, also known as the \emph{burn-in} period, are intended to allow the MCMC sampling process to converge to the posterior distribution. Once a chain has converged, the samples remain quite stable.\footnote{See,~\cite{Gelman14} for a precise discussion of convergence.} Before the MCMC sampling process, the number of interations necessary for convergence is unknown. Therefore, all warmup samples are discarded. This is necessary since the initial values of the parameters might have low posterior probability and might therefore bias the result.
 349 | 
 350 | Besides the number of samples, we specified sampling in four different chains. Each chain is independent from the others and starts with different random initial values. Running multiple chains has two advantages over a single chain. First, the independent chains are helpful for diagnostics. If all chains have converged to the same region of the parameter space, it is more likely that they converged to the posterior distribution. Second, running multiple chains allows for parallel simulations on multiple cores.
 351 | 
 352 | \paragraph{Evaluating model convergence}
 353 | The number of iterations necessary for convergence to the posterior distribution depends on the number of parameters. The probability to reach convergence increases with the number of iterations. Hence, we generally recommend using a large number of iterations although the process might converge after a smaller number of iterations. In the examples in the present paper, we use $1000$ iterations for warmup and another $1000$ iterations for analyzing the posterior distribution. For more complex models, more iterations might be necessary before the MCMC sampling process converges to the posterior distribution. Although there are ways to determine how long the simulation needs to be run and the number of warmup iterations given the type of posterior distribution \citep{Raftery1992}, we illustrate below practical convergence diagnostics for the evaluation of convergence in the samples.
 354 | 
 355 | The first step after running the function \texttt{stan} should be to look at the \textit{trace plot} of each chain after warmup, using the command shown in Listing~\ref{fig:fixefcode}, lines 13 and 14 (function \texttt{traceplot}). We choose the parameters $\beta_i$ and $\sigma_e$ (\texttt{pars = c("beta", "sigma\_e")}) and omit the warmup samples (\texttt{inc\_warmup = FALSE}).
 356 | A trace plot has the chains plotted against the sample number. In Figure~\ref{fig:traceplot}, we see three different chains plotted against sample number going from $1001$ to $2000$. If the trace plot looks like a ``fat, hairy caterpillar'' \citep{lunn2012bugs} which does not bend, this suggests that the chains have converged to the posterior distribution.
 357 | 
 358 | \begin{figure}
 359 | \centering
 360 | <<figtraceplot, include=TRUE,echo=FALSE,cache=TRUE,fig.width=7,fig.height=5,out.width='0.75\\textwidth'>>=
 361 | # Load the fixed effects model.
 362 | load("../data/fixEfFit.Rda")
 363 | traceplot(fixEfFit,pars=c("beta","sigma_e"),inc_warmup=FALSE)
 364 | @
 365 | \caption{Trace plots of the fixed intercept $\beta _0$ (\texttt{beta[1]}), the fixed slope $\beta _1$ (\texttt{beta[2]}), and the standard deviation $\sigma _e$ (\texttt{sigma\_e}) of the error for the fixed effects model.} Different colours denote different chains.
 366 | \label{fig:traceplot}
 367 | \end{figure}
 368 | 
 369 | The second diagnostic which we use to assess whether the chains have converged to the posterior distribution is the statistic \texttt{Rhat}. Each parameter has the  \texttt{Rhat} statistic associated with it~\citep{gelman1992inference}; this is essentially the ratio of between-chain variance to within-chain variance (analogous to ANOVA).  The \texttt{Rhat} statistic should be approximately $1\pm 0.1$  if the chain has converged. 
 370 | This is shown in the rightmost column of the model summary, printed in Table~\ref{tab:quantilesGibsonWu}. The information can be otained with \texttt{print(fixEfFit)}, where \texttt{fixEfFit} is the object of type \texttt{stan.model} returned by the function \texttt{stan}. For example, see Listing~\ref{fig:fixefcode}, lines 23--24.
 371 | 
 372 | Having satisfied ourselves that the chains have converged, next we turn to examine this posterior distribution. If there is an indication that convergence has not happened, then, assuming that the model has no errors in it, increasing the number of samples usually resolves the issue.
 373 | 
 374 | \begin{table}[htp]
 375 | \begin{center}
 376 | \begin{tabular}{crrrr}
 377 | \hline
 378 | parameter        & mean   & 2.5\%  & 97.5\% & $\hat R$\\
 379 | \hline
 380 | $\hat \beta_0$  & 6.06      & 6.01    &  6.12 &  1\\
 381 | $\hat \beta_1$  &  $-0.04$  & $-0.09$ &   0.02  &  1\\
 382 | $\hat \sigma_e$ &   0.60    & 0.56    &   0.64   &  1\\
 383 | \hline
 384 | \end{tabular}
 385 | \end{center}
 386 | \caption{Credible intervals and R-hat statistic in the Gibson and Wu data.}\label{tab:quantilesGibsonWu}
 387 | \end{table}
 388 | 
 389 | 
 390 | \paragraph{Summarizing the result}
 391 | The result of fitting the fixed effects model is the \textit{joint posterior probability distribution} of the parameters $\beta _0$, $\beta _1$, and $\sigma _e$. The distribution is joint because each of the $4000$ $(4\text{ chains }\times  1000 \text{ post-warmup iterations})$ posterior samples which the call to \texttt{stan} generates is a vector $\theta = ( \beta _0, \beta _1, \sigma _e )^\intercal $ of three model parameters. Thus, the object \texttt{fixEfFit} contains $4000$ parameter vectors $\theta $ which occupy a three dimensional space. Already in three dimensions, the posterior distribution becomes difficult to view in one graph. Figure~\ref{fig:jointposterior} displays the joint posterior probability distribution of the elements of $\theta $ by projecting it down onto planes. In each of the three planes (lower triangular scattergrams) we see how one parameter varies with respect to the other. In the diagonal histograms, we visualize the marginal probability distribution of each parameter separately from the other parameters.
 392 | \begin{figure}
 393 | \centering
 394 | <<figbivariatedistrn, include=TRUE,echo=FALSE,cache=TRUE,fig.width=7,fig.height=7,out.width='0.75\\textwidth'>>=
 395 | magnify<-0.80
 396 | beta0 <- extract(fixEfFit,pars=c("beta[1]"))$beta
 397 | beta1 <- extract(fixEfFit,pars=c("beta[2]"))$beta
 398 | sigma_e <- extract(fixEfFit,pars=c("sigma_e"))$sigma_e
 399 | N_iter<-length(beta0)
 400 | theta<-list(beta0=beta0,beta1=beta1,sigma_e=sigma_e)
 401 | lab<-c(expression(beta[0]),expression(beta[1]),expression(sigma[e]))
 402 | lim<-matrix(c(min(beta0),min(beta1),min(sigma_e),
 403 |               max(beta0),max(beta1),max(sigma_e)),nrow=3,ncol=2)
 404 | par(mfrow=c(3,3))
 405 | for(i in 1:3)
 406 |   for(j in 1:3){
 407 |     if(i==j){
 408 |       # PLOT MARGINALS ON DIAGONAL
 409 |       hist(theta[[i]],freq=FALSE,col="black",border="white",main=NULL,xlab=lab[i],cex.axis=magnify)
 410 |     }else if(i>j){
 411 |       # PLOT BIVARIATE ON THE LOWER TRIANGULAR
 412 |       # CODE ADAPTED FROM: 
 413 |       # http://stats.stackexchange.com/questions/24380/how-to-get-ellipse-region-from-bivariate-normal-distributed-data
 414 |       xy<-matrix(nrow=N_iter,ncol=2)
 415 |       xy[,1]<-theta[[i]]
 416 |       xy[,2]<-theta[[j]]
 417 |       center <- apply(xy, 2, mean)
 418 |       sigma <- cov(xy)
 419 |       sigma.inv = solve(sigma, matrix(c(1,0,0,1),2,2))
 420 |       # DEFINE GRID
 421 |       n <- 50
 422 |       xlim<-lim[i,]
 423 |       ylim<-lim[j,]
 424 |       x <- seq(xlim[1],xlim[2],length.out=n)
 425 |       y <- seq(ylim[1],ylim[2],length.out=n)
 426 |       # EVALUATE HEIGHT FUNCTION ON GRID
 427 |       height <- function(s,t) {u<-c(s,t)-center; u %*% sigma.inv %*% u / 2}
 428 |       z <- mapply(height, as.vector(rep(x,n)), as.vector(outer(rep(0,n), y, `+`)))
 429 |       # PLOT
 430 |       plot(xy, pch=20, xlim=xlim, ylim=ylim, xlab=lab[i], ylab=lab[j],
 431 |            cex.axis=magnify)
 432 |       contour(x,y,matrix(z,n,n), levels=(0:2), col = gray(.5), lwd=2, 
 433 |               add=TRUE, drawlabels=FALSE)
 434 |     }else{
 435 |       # SKIP UPPER TRIANGULAR PLOTS (REPEATS)
 436 |       plot.new()
 437 |     }
 438 |   }
 439 | @
 440 | \caption{Samples and level curves of the bivariate joint posterior probability distribution of each element of $\theta $ with each other element (lower triangular) and marginal posterior probability distribution of each element of $\theta $ separately (diagonal). All parameters are on the log scale, but note the difference in length scale between $\beta _1$ on the one hand and $\beta _0$ and $\sigma _e$ on the other.}\label{fig:jointposterior}
 441 | \end{figure}
 442 | 
 443 | Of immediate interest is the marginal distribution of the slope $\beta _1$. 
 444 | Figure~\ref{fig:jointposterior} suggests that most of the posterior probability density of $\beta _1$ is located below zero. One quantitative way to assess the posterior probability distribution is to examine its quantiles; see Table~\ref{tab:quantilesGibsonWu}. Here, it is useful to define the concept of the \textit{credible interval}. The $(1-\alpha )$\% credible interval contains $(1-\alpha )$\% of the posterior probability density. Unlike the $(1-\alpha )$\% confidence interval from the frequentist setting, the $(1-\alpha )$\% credible interval represents the range within which we are $(1-\alpha )$\% certain that the true value of the parameter lies, given the prior and the data (see \citealp{morey2015fallacy} for further discussion on confidence intervals vs credible intervals). A common convention is to use the interval ranging from the $2.5$th to $97.5$th percentiles. We follow this convention to obtain 95\% credible intervals in Table~\ref{tab:quantilesGibsonWu}. Lines 27--28 of Listing~\ref{fig:fixefcode} illustrate how these quantiles of the posterior distribution of $\beta_1$ (\texttt{beta[2]}) can be computed.
 445 | 
 446 | The sample distribution of $\beta_1$ indicates that approximately
 447 | \Sexpr{round(mean(beta1<0),digits=2)*100}\% 
 448 | of the posterior probability density is below zero, suggesting that there is some evidence that object relatives are easier to process than subject relatives in Chinese, given the Gibson and Wu data. However, since the 95\% credible interval includes zero, we may be reluctant to draw this conclusion.
 449 | We will say more about the evaluation of research hypotheses further on, but it is important to note here that the fixed effects model presented above is in any case not appropriate for the present data. The independence assumption is violated for the errors because we have repeated measures from each subject and from each item. Linear mixed models extend the linear model to solve precisely this problem. 
 450 | 
 451 | \subsection{Varying Intercepts Mixed Effects Model}
 452 | \label{subsec:ranint}
 453 | 
 454 | The fixed effects model is inappropriate for the Gibson and Wu data because it does not take into account the fact that we have multiple measurements for each subject and item. As mentioned above, these multiple measurements lead to a violation of the independence of errors assumption. Moreover, the fixed effects coefficients $\beta_0$ and $\beta_1$ represent means over all subjects and items, ignoring the fact that some subjects will be faster and some slower than average; similarly, some items will be read faster than average, and some slower.
 455 | 
 456 | In linear mixed models, we take this by-subject and by-item variability into account by adding adjustment terms $u_{0j}$ and $w_{0k}$, which adjust $\beta_0$ for subject $j$ and item $k$. This partially decomposes $\varepsilon _i$ into a sum of the terms $u_{0j}$ and $w_{0k}$, which are adjustments to the intercept $\beta _0$ for the subject $j$ and item $k$ associated with $\hbox{\texttt{rt}}_i$. If subject $j$ is slower than the average of all the subjects, $u_j$ would be some positive number, and if item $k$ is read faster than the average reading time of all the items, then $w_k$ would be some negative number. Each subject $j$ has their own adjustment $u_{0j}$, and each item its own $w_{0k}$. These adjustments $u_{0j}$ and $w_{0k}$ are called \textit{random intercepts} by \citet{pinheirobates} and
 457 | \textit{varying intercepts} by \citet{gelmanhill07}, and by adjusting $\beta _0$ by these we account for the variability by speaker and by item. 
 458 | 
 459 | We assume that these adjustments are normally distributed around zero with unknown standard deviation: $u_0 \sim \mathcal{N}(0,\sigma _u)$ and $w_0 \sim \mathcal{N}(0,\sigma _w)$. 
 460 | We now have three sources of variance in this model: the standard deviation of the errors $\sigma _e$, the standard deviation of the by-subject random intercepts $\sigma _u $, and the standard deviation of the by-item varying intercepts $\sigma _w $. We will refer to these as \textit{variance components}.
 461 | 
 462 | We now express the logarithm of reading time, which was produced by subjects $j \in \{1,\dots,37\}$ reading items $k \in \{1,\dots,15\}$, in conditions $i \in \{1,2\}$ (1 refers to subject relatives, 2 to object relatives), as the following sum. Notice that we are now using a slightly different way to describe the model, compared to the fixed effects model. We are using indices for subject, item, and condition to identify unique rows. Also, instead of writing $\beta_1 \hbox{\texttt{so}}_i$, we index $\beta_1$ by the condition $i$. This follows the notation used in the textbook on linear mixed models, written by the authors of \texttt{nlme} \citep{pinheirobates}, the precursor to \texttt{lme4}.
 463 | 
 464 | \begin{equation}\label{eq:ranint}
 465 | \log \hbox{\texttt{rt}}_{ijk} = \beta _0 + \underbrace{\beta_{1i}}_{\beta_1\hbox{\texttt{so}}_i} + u_{0j} + w_{0k} + \varepsilon_{ijk} 
 466 | \end{equation}
 467 | 
 468 | This is an LMM, and more specifically a \textit{varying intercepts model}. The coefficient $\beta_{1i}$ is the one of primary interest; it will have some mean value $-\beta_1$ for subject relatives and $\beta_1$ for object relatives due to the contrast coding. So, if our posterior mean for $\beta_1$ is negative, this would suggest that object relatives are read faster than subject relatives.
 469 | 
 470 | We fit the varying intercepts model in Stan in much the same way as the fixed effects model. 
 471 | For the following discussion, consult Listing~\ref{fig:Model2code} for the R code used to run the model, and Listing~\ref{fig:varintstancode} for the Stan code.
 472 | 
 473 | \paragraph{Setting up the data}
 474 | 
 475 | \singlespacing
 476 | \begin{listing}
 477 | % <<Model2code, eval=FALSE>>=
 478 | \begin{Verbatim}[numbers=left,frame=single,fontfamily=courier,fontsize=\footnotesize]
 479 | # format data for Stan:
 480 | stanDat <- list(subj = as.integer(rDat$subj),
 481 |                 item = as.integer(rDat$item),
 482 |                 rt = rDat$rt,
 483 |                 so = rDat$so,
 484 |                 N = nrow(rDat),
 485 |                 J = nlevels(rDat$subj),
 486 |                 K = nlevels(rDat$item))
 487 | 
 488 | # Sample from posterior distribution:
 489 | ranIntFit <- stan(file = "ranInt.stan", data = stanDat,
 490 |                   iter = 2000, chains = 4)
 491 | # Summarize results:
 492 | print(ranIntFit, pars = c("beta", "sigma_e", "sigma_u", "sigma_w"),
 493 |       probs = c(0.025, 0.5, 0.975))
 494 | 
 495 | beta1 <- unlist(extract(ranIntFit, pars = "beta[2]"))
 496 | print(quantile(beta1, probs = c(0.025, 0.5, 0.975)))
 497 | 
 498 | # Posterior probability of beta1 being less than 0:
 499 | mean(beta1 < 0)                  
 500 | \end{Verbatim}
 501 | % @
 502 | \caption{R code for running the random intercepts model, the varying intercepts model. Note that lines 1--10 and 14 of Listing~\ref{fig:fixefcode} must be run first.}\label{fig:Model2code}
 503 | \end{listing}
 504 | \doublespacing
 505 | The data which we prepare for passing on to the function \texttt{stan} now includes subject and item information (Listing~\ref{fig:Model2code}, lines 2--8). 
 506 | The data block in the Stan code accordingly includes the number \texttt{J}, \texttt{K} of subjects and items, respectively, as well as subject and item identifiers \texttt{subj} and \texttt{item} (Listing~\ref{fig:varintstancode}, lines 5--8).
 507 | 
 508 | \paragraph{Defining the model}
 509 | 
 510 | \begin{listing}
 511 | \begin{Verbatim}[numbers=left,frame=single,fontfamily=courier,fontsize=\footnotesize]
 512 | data {
 513 |   int<lower=1> N;                  //number of data points
 514 |   real rt[N];                      //reading time
 515 |   real<lower=-1, upper=1> so[N];   //predictor
 516 |   int<lower=1> J;                  //number of subjects
 517 |   int<lower=1> K;                  //number of items
 518 |   int<lower=1, upper=J> subj[N];   //subject id
 519 |   int<lower=1, upper=K> item[N];   //item id
 520 | }
 521 | 
 522 | parameters {
 523 |   vector[2] beta;            //fixed intercept and slope
 524 |   vector[J] u;               //subject intercepts
 525 |   vector[K] w;               //item intercepts
 526 |   real<lower=0> sigma_e;     //error sd
 527 |   real<lower=0> sigma_u;     //subj sd
 528 |   real<lower=0> sigma_w;     //item sd
 529 | }
 530 | 
 531 | model {
 532 |   real mu;
 533 |   //priors
 534 |   u ~ normal(0, sigma_u);    //subj random effects
 535 |   w ~ normal(0, sigma_w);    //item random effects
 536 |   // likelihood
 537 |   for (i in 1:N){
 538 |     mu = beta[1] + u[subj[i]] + w[item[i]] + beta[2] * so[i];
 539 |     rt[i] ~ lognormal(mu, sigma_e);
 540 |   }
 541 | }
 542 | \end{Verbatim}
 543 | \caption{Stan code for running the random intercepts model, the varying intercepts model.}\label{fig:varintstancode}
 544 | \end{listing}
 545 | 
 546 | The random intercepts model, shown in Listing~\ref{fig:varintstancode}, still has the fixed intercept $\beta _0$, the fixed slope $\beta _1$, and the standard deviation $\sigma _e$ of the error, and we specify these in the same way as we did for the fixed effects model. In addition, the varying intercepts model has by-subject varying intercepts $u_{0j}$ for $j \in \{1,\ldots,J\}$ and by-item varying intercepts $w_{0k}$ for $k \in \{1,\ldots,K\}$. The standard deviation of $u_0$ is $\sigma _u$ and the standard deviation of $w_0$ is $\sigma _w$. We again constrain the standard deviations to be positive.
 547 | 
 548 | The model block places normal distribution priors on the varying intercepts $u_0$ and $w_0$.  We implicitly place uniform priors on \texttt{sigma\_u}, \texttt{sigma\_w}, and \texttt{sigma\_e} by omitting them from the model block. As pointed out earlier for \texttt{sigma\_e}, these prior distributions have lower bound zero because of the constraint \texttt{<lower=0>} in the variable declarations. 
 549 | 
 550 | The statement about how each row in the data is generated is shown in Listing~\ref{fig:varintstancode}, lines 26--29; here, both the fixed effects and the varying intercepts for subjects and items determine the expected value \texttt{mu}.
 551 | The vector \texttt{u} has varying intercepts for subjects. Likewise, the vector \texttt{w} has varying intercepts for items. The for-loop in lines 26--29 now adds \texttt{u[subj[i]] + w[item[i]]} to the mean \texttt{beta[1]} of the distribution of \texttt{rt[i]}. These are subject- and item-specific adjustments to the fixed-effects intercept \texttt{beta[1]}.  The term \texttt{u[subj[i]]} is the identifier of the subject for row $i$ in the data-frame; thus, if $i=1$, then \texttt{subj[1] = 1}, and \texttt{item[1] = 13} (see Table~\ref{tab:dataframe1}).
 552 | 
 553 | \paragraph{Running the model}
 554 | 
 555 | In R, we pass the list \texttt{stanDat} of data to \texttt{stan}, which compiles a C++ program to sample from the posterior distribution of the random intercepts model.
 556 | Stan samples from the posterior distribution of the model parameters, including the varying intercepts $u_{0j}$ and $w_{0k}$ for each subject $j \in \{1,\ldots ,J\}$ and item $k \in \{1,\ldots ,K\}$. 
 557 | 
 558 | It may be helpful to rewrite the model in mathematical form following the Stan syntax (\citealp{gelmanhill07} use a similar notation); the Stan statements are slightly different from the way that we expressed the random intercepts model. Defining $i$ as the row number in the data frame, i.e., $i \in  \{1,\dots, 547\}$, we can write:
 559 | 
 560 | \begin{equation}
 561 | \begin{split}
 562 | ~& \hbox{Likelihood}:\\
 563 | ~& \mu_i = \beta_0 + u_{[subj[i]]} + w_{[item[i]]} + \beta_1 \cdot \hbox{\texttt{so}}_i\\
 564 | ~& \hbox{\texttt{rt}}_i \sim \hbox{LogNormal}(\mu_i,\sigma_e)\\
 565 | ~& \hbox{Priors}:\\
 566 | ~& u \sim \hbox{Normal}(0, \sigma_u) \quad w \sim \hbox{Normal}(0,\sigma_w)\\
 567 | ~& \sigma_e, \sigma_u, \sigma_w \sim \hbox{Uniform}(0,\infty)\\
 568 | ~& \beta \sim \hbox{Uniform}(-\infty,\infty)\\
 569 | \end{split}
 570 | \end{equation}
 571 | 
 572 | Here, notice that the $i$-th row in the statement for $\mu$ identifies the subject identifier ($j$) ranging from 1 to 37, and the item identifier ($k$) ranging from 1 to 15.
 573 | 
 574 | \paragraph{Summarizing the results}
 575 | 
 576 | The posterior distributions of each of the parameters is summarized in Table~\ref{tab:Model2posterior}. The $\hat R$ values suggest that model has converged because they equal one. Note also that compared to Model \ref{eq:fixef}, the estimate of $\sigma_e$ is smaller; this is because the other two variance components are now being estimated as well. Note that the 95\% credible interval for the estimate $\hat\beta_1$ includes zero; thus, there is some evidence that object relatives are easier than subject relatives, but we cannot exclude the possibility that there is no difference in the reading times between the two relative clause types.
 577 | 
 578 | \begin{table}[htp]
 579 | \begin{center}
 580 | \begin{tabular}{crrrr}
 581 | \hline
 582 | parameter        & mean   & 2.5\%  &97.5\% & $\hat R$\\
 583 | \hline
 584 | $\hat \beta_0$ & 6.06     &   5.92      & 6.20  &  1\\
 585 | $\hat \beta_1$ &  $-0.04$ &   $-0.08$   & 0.01 &  1\\
 586 | $\hat \sigma_e$ &   0.52  &     0.49    &  0.56 &  1\\
 587 | $\hat \sigma_u$ & 0.26    &    0.19     &  0.34 & 1\\
 588 | $\hat \sigma_w$ & 0.20    &       0.12  &  0.33 &     1\\
 589 | \hline
 590 | \end{tabular}
 591 | \end{center}
 592 | \caption{The quantiles and the $\hat R$ statistic in the Gibson and Wu data, the varying intercepts model.}\label{tab:Model2posterior}
 593 | \end{table}
 594 | 
 595 | 
 596 | 
 597 | \subsection{Varying Intercepts, Varying Slopes Mixed Effects Model}\label{subsec:ranslpnocor}
 598 | 
 599 | The varying intercepts model accounted for having multiple measurements from each subject and item by introducing random intercepts by subject and by item. This reflects that some subjects will be faster and some slower than average, and that some items will be read faster than average, and some slower. 
 600 | Consider now that not only does reading speed differ by subject and by item, but also the slowdown in the object relative condition may differ in magnitude by subject and item. This amounts to a different effect size for \texttt{so} by subject and item. 
 601 | Although such individual-level variability was not of interest in the original paper by Gibson and Wu, it could be of theoretical interest  (see, for example, \citealp{kliegl2010experimental}). Furthermore, as \citet{barr2011random} point out, it is in principle desirable to include a fixed effect factor in the random effects as a varying slope if the experiment design is such that subjects see both levels of the factor (cf.\ \citealp{BatesEtAlParsimonious,hannesBEAP,CaveOfShadows}).
 602 | 
 603 | \paragraph{Adding varying slopes}
 604 | 
 605 | In order to express this structure in the LMM, we must introduce varying slopes. 
 606 | The first change is to let the size of the effect for \texttt{so} vary by subject and by item. The goal here is to express that some subjects exhibit greater slowdowns in the object relative condition than others. We let effect size vary by subject and by item by including in the model by-subject and by-item varying slopes which adjust the fixed slope $\beta _1$ in the same way that the by-subject and by-item varying intercepts adjust the fixed intercept $\beta _0$. This adjustment of the slope by subject and by item is expressed by adjusting $\beta _1$ by adding two terms $u_{1j}$ and $w_{1k}$. These are \textit{random} or \textit{varying slopes}, and by adding them we account for how the effect of relative clause type varies by subject $j$ and by item $k$. We now express the logarithm of reading time, which was produced by subject $j$ reading item $k$, as the following sum. The subscript $i$ indexes the conditions.
 607 | 
 608 | \begin{equation}\label{eq:ranslp}
 609 | \log \hbox{\texttt{rt}}_{ijk} = \underbrace{\beta_0 + u_{0j} + w_{0k}}_{\text{varying intercepts}}  + 
 610 | \underbrace{\beta_{1i} + u_{1ij} + w_{1ik}}_{\text{varying slopes}} + \varepsilon_{ijk} 
 611 | \end{equation}
 612 | This is a \textit{varying intercepts, varying slopes model}.
 613 | 
 614 | \paragraph{Setting up the data} 
 615 | Listing~\ref{fig:varintslpcode} contains the R code for fitting the varying intercepts, varying slopes model. The data which we pass to the function \texttt{stan} is the same as for the varying intercepts model. This contains subject and item information (Listing~\ref{fig:Model2code}, lines 2–8). 
 616 | 
 617 | \singlespacing
 618 | \begin{listing}
 619 | % <<Model3code, eval=FALSE>>=
 620 | \begin{Verbatim}[numbers=left,frame=single,fontfamily=courier,fontsize=\footnotesize]
 621 | # 1. Compile and fit model
 622 | ranIntSlpNoCorFit <- stan(file="ranIntSlpNoCor.stan", data = stanDat, 
 623 |                      iter = 2000, chains = 4)
 624 |                      
 625 | # posterior probability of beta 1 being less
 626 | # than 0:
 627 | beta1 <- unlist(extract(ranIntSlpNoCorFit, pars = "beta[2]"))
 628 | print(quantile(beta1, probs = c(0.025, 0.5, 0.975)))
 629 | mean(beta1 < 0)
 630 | \end{Verbatim}
 631 | % @
 632 | \caption{R code for running the varying intercepts, varying slopes model. Note that lines 1-10 and 14 of Listing~\ref{fig:fixefcode} and lines 2--8 of Listing~\ref{fig:Model2code} must be run first.}\label{fig:varintslpcode}
 633 | \end{listing}
 634 | \doublespacing
 635 | 
 636 | \paragraph{Defining the model}
 637 | Listing~\ref{fig:varintslpnocorstancode} contains the Stan code for the varying intercepts, varying slopes model. 
 638 | The data block is the same as in the varying intercepts model, but the parameters block contains several new parameters. 
 639 | This time we have the vector \texttt{sigma\_u}, which contains the standard deviations $(\sigma _{u0},\sigma _{u1})^\intercal $ of the by-subject random intercepts and slopes. The by-subject random intercepts are in the first row of the 2$\times $\texttt{J} matrix \texttt{u}, and the by-subject random slopes are in the second row of \texttt{u}. Similarly, the vector \texttt{sigma\_w} contains the standard deviations $(\sigma _{w0},\sigma _{w1})^\intercal $ of the by-item random intercepts and slopes. The by-item random intercepts are in the first row of the 2$\times $\texttt{K} matrix \texttt{w}, and the by-item random slopes are in the second row of \texttt{w}.
 640 | 
 641 | In the model block, we place priors on the parameters declared in the parameters block (Listing~\ref{fig:varintslpnocorstancode}, lines 23--26), and define how these parameters generate $\log \hbox{\texttt{rt}}$ (Listing~\ref{fig:varintslpnocorstancode}, lines 28--32).
 642 | The statement \texttt{u[1] \textasciitilde{ }normal(0,sigma\_u[1]);} specifies a normal prior for the by-subject random intercepts in the first row of \texttt{u}, and the statement \texttt{u[2] \textasciitilde{ }normal(0,sigma\_u[2]);} does the same for the by-subject random slopes in the second row of \texttt{u}. The same goes for the by-item random intercepts and slopes. Thus, there is a prior normal distribution for each of the random effects. These distributions are centered on zero and have different standard deviations. 
 643 | 
 644 | \begin{listing}
 645 | \begin{Verbatim}[numbers=left,frame=single,fontfamily=courier,fontsize=\footnotesize]
 646 | data {
 647 |   int<lower=1> N;                  //number of data points
 648 |   real rt[N];                      //reading time
 649 |   real<lower=-1,upper=1> so[N];    //predictor
 650 |   int<lower=1> J;                  //number of subjects
 651 |   int<lower=1> K;                  //number of items
 652 |   int<lower=1, upper=J> subj[N];   //subject id
 653 |   int<lower=1, upper=K> item[N];   //item id
 654 | }
 655 | 
 656 | parameters {
 657 |   vector[2] beta;                  //intercept and slope
 658 |   real<lower=0> sigma_e;           //error sd
 659 |   matrix[2,J] u;                   //subj intercepts, slopes
 660 |   vector<lower=0>[2] sigma_u;      //subj sd
 661 |   matrix[2,K] w;                   //item intercepts, slopes
 662 |   vector<lower=0>[2] sigma_w;      //item sd
 663 | }
 664 | 
 665 | model {
 666 |   real mu;
 667 |   //priors
 668 |   u[1] ~ normal(0,sigma_u[1]);      //subj intercepts
 669 |   u[2] ~ normal(0,sigma_u[2]);      //subj slopes
 670 |   w[1] ~ normal(0,sigma_w[1]);      //item intercepts
 671 |   w[2] ~ normal(0,sigma_w[2]);      //item slopes
 672 |   //likelihood
 673 |   for (i in 1:N){
 674 |     mu = beta[1] + u[1,subj[i]] + w[1,item[i]] 
 675 |           + (beta[2] + u[2,subj[i]] + w[2,item[i]])*so[i];
 676 |     rt[i] ~ lognormal(mu,sigma_e);
 677 |   }
 678 | }
 679 | \end{Verbatim}
 680 | \caption{Stan code for the varying intercepts, varying slopes model.}\label{fig:varintslpnocorstancode}
 681 | \end{listing}
 682 | 
 683 | \paragraph{Running the model} We can now fit the varying intercepts, varying slopes model in R (see Listing~\ref{fig:varintslpcode}). We see in the model summary of Table~\ref{tab:varintslpposterior}, obtained as before using \texttt{print(ranIntSlpNoCorFit)}, that the model has converged, and that the credible interval of the parameter of interest, $\beta_1$, still includes zero. In fact, the posterior probability of the parameter being less than zero is now $90$\%. 
 684 | 
 685 | \begin{table}[htp]
 686 | \begin{center}
 687 | \begin{tabular}{crrrr}
 688 | \hline
 689 | parameter        & mean   & 2.5\%  &97.5\% & $\hat R$\\
 690 | \hline
 691 | $\hat \beta_0$ & 6.06 &   5.92  & 6.20  &  1\\
 692 | $\hat \beta_1$ &  $-0.04$    &   $-0.09$  & 0.02 &  1\\
 693 | $\hat \sigma_e$ &   0.52   &     0.48 &  0.55 &  1\\
 694 | $\hat \sigma_{u0}$ & 0.25   &    0.18  &  0.34 & 1\\
 695 | $\hat \sigma_{u1}$ & 0.06   &    0.01  &  0.13 & 1\\
 696 | $\hat \sigma_{w0}$ & 0.20  &       0.12  &  0.32 &     1\\
 697 | $\hat \sigma_{w1}$ & 0.04  &       0.01  &  0.11 &     1\\
 698 | \hline
 699 | \end{tabular}
 700 | \end{center}
 701 | \caption{The quantiles and the $\hat R$ statistic in the Gibson and Wu data, the varying intercepts, varying slopes model.}\label{tab:varintslpposterior}
 702 | \end{table}
 703 | 
 704 | 
 705 | \subsection{Correlated Varying Intercepts, Varying Slopes Mixed Effects Model}\label{subsec:ranslpwithcor}
 706 | 
 707 | Consider now that subjects who are faster than average (i.e., who have a negative varying intercept) may exhibit greater slowdowns when they read object relatives compared to subject relatives. Similarly, it is in principle possible that items which are read faster (i.e., which have a large negative varying intercept) may show a greater slowdown in the object relative condition than in the subject relative condition. The opposite situation could also hold: faster subjects may show smaller SR-OR effects, or items read faster may show smaller SR-OR effects. 
 708 | This suggests the possibility of correlations between random intercepts and random slopes. 
 709 | 
 710 | In order to express this structure in the LMM, we must model correlation between the varying intercepts and varying slopes. The model equation, repeated below, is the same as before.
 711 | $$
 712 | \log \hbox{\texttt{rt}}_{ijk} = \underbrace{\beta_0 + u_{0j} + w_{0k}}_{\text{varying intercepts}}  + 
 713 | \underbrace{\beta_1 + u_{1ij} + w_{1ik}}_{\text{varying slopes}} + \varepsilon_{ijk} 
 714 | $$
 715 | Introducing correlation between the varying intercepts and varying slopes makes this a \textit{correlated varying intercepts, varying slopes model}.
 716 | 
 717 | \paragraph{Defining a variance-covariance matrix for the random effects}
 718 | 
 719 | Modeling the correlation between varying intercepts and slopes means defining a covariance relationship between by-subject varying intercepts and slopes, and between by-items varying intercepts and slopes. This amounts to adding an assumption that the by-subject slopes $u_{1}$ could in principle have some correlation with the by-subject intercepts $u_{0}$; and by-item slopes $w_{1}$ with by-item intercept $w_{0}$. We explain this in detail below.
 720 | 
 721 | Let us assume that the adjustments $u_0$ and $u_1$ are normally distributed with mean zero and some variances $\sigma_{u0}^2$ and $\sigma_{u1}^2$, respectively; also assume that $u_0$ and $u_1$ have correlation $\rho_{u}$. It is standard to express this situation by defining a variance-covariance matrix $\Sigma _u$, sometimes called simply a variance matrix. 
 722 | This matrix has the variances of $u_0$ and $u_1$ respectively along the diagonal, and the covariances on the off-diagonal. The covariance $\mathrm{Cov}(X,Y)$ between two variables $X$ and $Y$ is defined as the product of their correlation $\rho$ and their standard deviations $\sigma_X$ and $\sigma_Y$: $\mathrm{Cov}(X,Y)=\rho\sigma_X \sigma_Y$.
 723 | \begin{equation}\label{eq:covmat}
 724 | \Sigma _u
 725 | =
 726 | \begin{pmatrix}
 727 | \sigma _{u0}^2  & \rho _{u}\sigma _{u0}\sigma _{u1}\\
 728 | \rho _{u}\sigma _{u0}\sigma _{u1}    & \sigma _{u1}^2\\
 729 | \end{pmatrix}
 730 | \end{equation}
 731 | Similarly, we can define a variance-covariance matrix $\Sigma_w$ for items, using the standard deviations
 732 | $\sigma_{w0}$, $\sigma_{w1}$, and the correlation 
 733 | $\rho_{w}$.
 734 | \begin{equation}\label{eq:covmatw}
 735 | \Sigma _w
 736 | =
 737 | \begin{pmatrix}
 738 | \sigma _{w0}^2  & \rho _{w}\sigma _{w0}\sigma _{w1}\\
 739 | \rho _{w}\sigma _{w0}\sigma _{w1}    & \sigma _{w1}^2\\
 740 | \end{pmatrix}
 741 | \end{equation}
 742 | The standard way to express this relationship between
 743 | the subject intercepts  $u_0$ and slopes $u_1$, and the item intercepts  $w_0$ and slopes $w_1$, is to define a bivariate normal distribution as follows: 
 744 | \begin{equation}\label{eq:jointpriordist1}
 745 | \begin{pmatrix}
 746 |   u_0 \\ 
 747 |   u_1 \\
 748 | \end{pmatrix}
 749 | \sim 
 750 | \mathcal{N} \left(
 751 | \begin{pmatrix}
 752 |   0 \\
 753 |   0 \\
 754 | \end{pmatrix},
 755 | \Sigma_{u}
 756 | \right),
 757 | \quad
 758 | \begin{pmatrix}
 759 |   w_0 \\ 
 760 |   w_1 \\
 761 | \end{pmatrix}
 762 | \sim 
 763 | \mathcal{N}\left(
 764 | \begin{pmatrix}
 765 |   0 \\
 766 |   0 \\
 767 | \end{pmatrix},
 768 | \Sigma_{w}
 769 | \right)
 770 | \end{equation}
 771 | An important point to notice here is that any $n\times n$ variance-covariance matrix has associated with it an $n\times n$ correlation matrix. In the subject variance-covariance matrix $\Sigma_{u}$, the correlation matrix is
 772 | \begin{equation}
 773 | \begin{pmatrix}
 774 | 1 & \rho_{01}\\
 775 | \rho_{01} & 1\\
 776 | \end{pmatrix}
 777 | \end{equation}
 778 | In a correlation matrix, the diagonal elements will always be $1$, because a variable always has a correlation of $1$ with itself. The off-diagonal entries will have the correlations between the variables. Note also that, given the variances $\sigma_{u0}^2$ and $\sigma_{u1}^2$, we can always recover the variance-covariance matrix, if we know the correlation matrix. 
 779 | This is because of the above-mentioned definition of covariance. 
 780 | 
 781 | A correlation matrix can be factored into a \textit{matrix square root}. Given a correlation matrix $C$, we can obtain its square root matrix $L$. The square root of a matrix is such that we can square $L$ to get the correlation matrix $C$ back. 
 782 | In the next section, we see that the matrix square root is important for generating the random intercepts and slopes because of its role in generating correlated random variables. 
 783 | Appendix~\ref{sec:chol} describes one method for obtaining $L$, namely, the Cholesky factorization. 
 784 | 
 785 | \paragraph{Defining the model}
 786 | 
 787 | \singlespacing
 788 | \begin{listing}
 789 | % <<Model3code, eval=FALSE>>=
 790 | \begin{Verbatim}[numbers=left,frame=single,fontfamily=courier,fontsize=\footnotesize]
 791 | ranIntSlpFit <- stan(file = "ranIntSlp.stan", data = stanDat,
 792 |                      iter = 2000, chains = 4)
 793 | 
 794 | # posterior probability of beta 1 being less
 795 | # than 0:
 796 | beta1 <- unlist(extract(ranIntSlpFit, pars = "beta[2]"))
 797 | print(quantile(beta1, probs = c(0.025, 0.5, 0.975)))
 798 | mean(beta1 < 0)
 799 | 
 800 | ## Use the L matrices the compute the correlation matrices
 801 | # L matrices
 802 | L_u <- extract(ranIntSlpFit, pars = "L_u")$L_u
 803 | L_w <- extract(ranIntSlpFit, pars = "L_w")$L_w
 804 | 
 805 | # correlation parameters
 806 | cor_u <- apply(L_u, 1, function(x) tcrossprod(x)[1, 2])
 807 | cor_w <- apply(L_w, 1, function(x) tcrossprod(x)[1, 2])
 808 | 
 809 | print(signif(quantile(cor_u, probs = c(0.025, 0.5, 0.975)), 2))
 810 | print(mean(cor_u))
 811 | print(signif(quantile(cor_w, probs = c(0.025, 0.5, 0.975)), 2))
 812 | print(mean(cor_w))
 813 | \end{Verbatim}
 814 | % @
 815 | \caption{R code for running the correlated varying intercepts, varying slopes model. Note that lines 1--10 and 14 of Listing~\ref{fig:fixefcode} and lines 2--8 of Listing~\ref{fig:Model2code} must be run first.}\label{fig:Model3code}
 816 | \end{listing}
 817 | \doublespacing
 818 | 
 819 | 
 820 | \begin{listing}
 821 | \begin{Verbatim}[numbers=left,frame=single,fontfamily=courier,fontsize=\footnotesize]
 822 | data {
 823 |   int<lower=1> N;                  //number of data points
 824 |   real rt[N];                      //reading time
 825 |   real<lower=-1, upper=1> so[N];   //predictor
 826 |   int<lower=1> J;                  //number of subjects
 827 |   int<lower=1> K;                  //number of items
 828 |   int<lower=1, upper=J> subj[N];   //subject id
 829 |   int<lower=1, upper=K> item[N];   //item id
 830 | }
 831 | 
 832 | parameters {
 833 |   vector[2] beta;                  //intercept and slope
 834 |   real<lower=0> sigma_e;           //error sd
 835 |   vector<lower=0>[2] sigma_u;      //subj sd
 836 |   cholesky_factor_corr[2] L_u;
 837 |   matrix[2,J] z_u;
 838 |   vector<lower=0>[2] sigma_w;      //item sd
 839 |   cholesky_factor_corr[2] L_w;
 840 |   matrix[2,K] z_w;
 841 | }
 842 | 
 843 | transformed parameters{
 844 |   matrix[2,J] u;
 845 |   matrix[2,K] w;
 846 |   
 847 |   u = diag_pre_multiply(sigma_u, L_u) * z_u;  //subj random effects
 848 |   w = diag_pre_multiply(sigma_w, L_w) * z_w;  //item random effects
 849 | }
 850 | 
 851 | model {
 852 |   real mu;
 853 |   
 854 |   //priors
 855 |   L_u ~ lkj_corr_cholesky(2.0);
 856 |   L_w ~ lkj_corr_cholesky(2.0);
 857 |   to_vector(z_u) ~ normal(0,1);
 858 |   to_vector(z_w) ~ normal(0,1);
 859 |   //likelihood
 860 |   for (i in 1:N){
 861 |     mu = beta[1] + u[1,subj[i]] + w[1,item[i]] 
 862 |           + (beta[2] + u[2,subj[i]] + w[2,item[i]]) * so[i];
 863 |     rt[i] ~ lognormal(mu, sigma_e);
 864 |   }
 865 | }
 866 | \end{Verbatim}
 867 | \caption{The Stan code for the correlated varying intercepts, varying slopes model.}\label{fig:Model3Stancode}
 868 | \end{listing}
 869 | 
 870 | 
 871 | With this background, implementing the varying intercepts, varying slopes model is straightforward; see Listing~\ref{fig:Model3code} for the R code and Listing~\ref{fig:Model3Stancode} for the Stan code. The R list \texttt{stanDat} is identical to the one of the varying intercepts, varying slopes model, and therefore we will focus on the Stan code.
 872 | The data block is the same as before.
 873 | The parameters block contains several new parameters. 
 874 | As before, we have vectors \texttt{sigma\_u} and \texttt{sigma\_w}, which are $(\sigma _{u0},\sigma _{u1})^\intercal $ and $(\sigma _{w0},\sigma _{w1})^\intercal $. 
 875 | The variables \texttt{L\_u}, \texttt{L\_w}, \texttt{z\_u}, and \texttt{z\_w}, which have been declared in the parameters block, play a role in the \textit{transformed parameters block}, a block which we did not use in the earlier models. The transformed parameters block generates the by-subject and by-item varying intercepts and slopes using the parameters \texttt{sigma\_u}, \texttt{L\_u}, \texttt{z\_u}, \texttt{sigma\_w}, \texttt{L\_w}, and \texttt{z\_w}.  The $J$ pairs of by-subject varying intercepts and slopes are in the rows of the $J\times 2$ matrix \texttt{u}, and the $K$ pairs of by-item varying intercepts and slopes are in the rows of the $K\times 2$ matrix \texttt{w}.
 876 | 
 877 | These varying intercepts and slopes are obtained through the statements \texttt{diag\_pre\_multiply(sigma\_u, L\_u) * z\_u} and \texttt{diag\_pre\_multiply(sigma\_w, L\_w) * z\_w}.
 878 | This statement generates varying intercepts and slopes from the joint probability distribution of Equation~\ref{eq:jointpriordist1}. 
 879 | The parameters \texttt{L\_u}, \texttt{L\_w} are the matrix square roots (Cholesky factor) of the subject and item correlation matrices, respectively, and  \texttt{z\_u}, and \texttt{z\_w} are $\mathcal N(0,1)$ random variables. Appendix~\ref{sec:chol} has details on how this generates correlated random intercepts and slopes. 
 880 |  
 881 | In the model block, we place priors on the parameters declared in the parameters block, and define how these parameters generate $\log \hbox{\texttt{rt}}$ (Listing~\ref{fig:Model3Stancode}, lines 30--43).
 882 | The statement \texttt{L\_u \textasciitilde{ }lkj\_corr\_cholesky(2.0)} specifies a prior for the square root \texttt{L\_u} (Cholesky factor) of the correlation matrix. 
 883 | This prior is best interpreted with respect to the square of \texttt{L\_u}, that is, with respect to the correlation matrix.
 884 | The statement \texttt{L\_u \textasciitilde{ }lkj\_corr\_cholesky(2.0)} implicitly places the lkj prior~\cite[so-called because it was first described by][]{lewandowski2009generating} with shape parameter $\nu =2.0$ on the correlation matrices
 885 | \begin{equation}
 886 | \begin{pmatrix}
 887 | 1 & \rho _u\\
 888 | \rho _u & 1
 889 | \end{pmatrix}
 890 | \text{ and }
 891 | \begin{pmatrix}
 892 | 1 & \rho _w\\
 893 | \rho _w & 1
 894 | \end{pmatrix},
 895 | \end{equation}
 896 | where $\rho _u$ is the correlation between the by-subject varying intercept $\sigma _{u0}$ and slope $\sigma _{u1}$ (cf.\ the covariance matrix of Equation~\ref{eq:covmat}) and $\rho _w$ is the correlation between the by-item varying intercept $\sigma _{w0}$ and slope $\sigma _{w1}$. The lkj distribution is a probability distribution over correlation matrices. 
 897 | The lkj distribution has one shape parameter $\nu$, which controls the prior correlation. 
 898 | If $\nu > 1$, then the probability density becomes concentrated about the $2\times 2$ identity matrix.\footnote{The lkj prior can scale up to correlation matrices larger than $2\times 2$.} This expresses the prior belief that the correlations are not large. 
 899 | If $\nu = 1$, then the probability density function is uniform over all $2\times 2$ correlation matrices.
 900 | If $0 < \nu < 1$, then the probability density has a trough at the $2\times 2$ identity matrix. 
 901 | In our model, we choose $\nu=2.0$. This choice implies that the correlations on the off-diagonal are near zero, reflecting the fact that we have no prior information about the correlation between intercepts and slopes.
 902 | 
 903 | The statement \texttt{to\_vector(z\_u) \textasciitilde{ }normal(0,1)} places a normal distribution with mean zero and standard deviation one on \texttt{z\_u}.\footnote{The function \texttt{to\_vector} means that we rearrange the matrix \texttt{z\_u} as a vector in order to place the normal distribution on a vector. This makes the code run faster.} The same goes for \texttt{z\_w}. The for-loop assigns to $\hbox{\texttt{mu}}$ the mean of the log-normal distribution from which we draw $\hbox{\texttt{rt[i]}}$, conditional on the value of the predictor \texttt{so[i]} for relative clause type and the subject and item identity.
 904 | 
 905 | \paragraph{Running the model} 
 906 | We can now fit the varying intercepts, varying slopes model; see Listing~\ref{fig:Model3code} for the code. We see in the model summary in Table~\ref{tab:Model3posterior} that the model has converged,\footnote{We do not report the R-hat statistic for parameters $\rho _u$, $\rho _w$ because these parameters converge when $\hat R$ equals one for each entry of the matrices $L_u$, $L_w$. This was the case.}  and that the credible intervals of the parameter of interest, $\beta_1$, still includes zero. In fact, the posterior probability of the parameter being less than zero is now $90$\%. This information can be extracted as shown in Listing~\ref{fig:Model3code}, lines 6--8. 
 907 | 
 908 | \begin{table}[htp]
 909 | \begin{center}
 910 | \begin{tabular}{crrrr}
 911 | \hline
 912 | parameter        & mean   & 2.5\%  &97.5\% & $\hat R$\\
 913 | \hline
 914 | $\hat \beta_0$    & 6.06 &   5.92  & 6.20  &  1\\
 915 | $\hat \beta_1$    &  $-0.04$    &   $-0.09$  & 0.02 &  1\\
 916 | $\hat \sigma_e$   &   0.52   &     0.48 &  0.55 &  1\\
 917 | $\hat \sigma_{u0}$ & 0.25   &    0.18  &  0.34 & 1\\
 918 | $\hat \sigma_{u1}$ & 0.07   &    0.01  &  0.13 & 1\\
 919 | $\hat \sigma_{w0}$ & 0.20  &       0.12  &  0.32 &     1\\
 920 | $\hat \sigma_{w1}$ & 0.04  &       0.0  &  0.11 &     1\\
 921 | $\hat \rho_{u}$   & $-0.44$   &    $-0.91$  &  0.36 & \\
 922 | $\hat \rho_{w}$   & $-0.01$   &    $-0.76$  &  0.76 & \\
 923 | \hline
 924 | \end{tabular}
 925 | \end{center}
 926 | \caption{The quantiles and the $\hat R$ statistic in the Gibson and Wu data, the varying intercepts, varying slopes model.}\label{tab:Model3posterior}
 927 | \end{table}
 928 | 
 929 | Figure~\ref{fig:lmlist} plots the varying slope's posterior distribution against the varying intercept's posterior distribution for each subject. The correlation between $u_0$ and $u_1$ is negative, as captured by the marginal posterior distributions of the correlation $\rho _u$ between $u_0$ and $u_1$. Thus, Figure~\ref{fig:lmlist} suggests that the slower a subject's reading time is on average, the slower they read object relatives. In contrast, Figure~\ref{fig:lmlist} shows no clear pattern for the by-item varying intercepts and slopes. The broader distribution of the correlation parameter for items compared to slopes illustrates the greater uncertainty concerning the true value of the parameter. We briefly discuss inference next.
 930 | 
 931 | \begin{figure}[!htbp]
 932 | \centering
 933 | <<figrandintslopes,include=TRUE,echo=FALSE,cache=TRUE,fig.width=7,fig.height=5,out.width='0.75\\textwidth'>>=
 934 | magnify<-0.80
 935 | ## load data:
 936 | rDat<-read.table("../data/gibsonwu2012data.txt",header=TRUE)
 937 | rDat<-subset(rDat,region=="headnoun")
 938 | J<-length(unique(rDat$subj))
 939 | K<-length(unique(rDat$item))
 940 | 
 941 | # Load the varying intercepts, varying slopes effects model.
 942 | load("../data/ranIntSlpFit.Rda")
 943 | u<-matrix(nrow=2,ncol=J)
 944 | for(j in 1:J)
 945 |   for(i in 1:2)
 946 |     u[i,j]<-mean(extract(ranIntSlpFit,pars=c(paste("u[",i,",",j,"]",sep="")))[[1]])
 947 | N_sample<-length(extract(ranIntSlpFit,pars="L_u[1,1]")[[1]])
 948 | L_u<-array(dim=c(2,2,N_sample))
 949 | for(i in 1:2)
 950 |   for(j in 1:2)
 951 |     L_u[i,j,]<-extract(ranIntSlpFit,pars=c(paste("L_u[",i,",",j,"]",sep="")))[[1]]
 952 | omega_u<-numeric()
 953 | for(i in 1:N_sample){
 954 |   Omega_u<-L_u[,,i]%*%t(L_u[,,i])
 955 |   omega_u[i]<-Omega_u[1,2]
 956 | }
 957 | # Extract item random intercepts and slopes.
 958 | w<-matrix(nrow=2,ncol=K)
 959 | for(k in 1:K)
 960 |   for(i in 1:2)
 961 |     w[i,k]<-mean(extract(ranIntSlpFit,pars=c(paste("w[",i,",",k,"]",sep="")))[[1]])
 962 | L_w<-array(dim=c(2,2,N_sample))
 963 | for(i in 1:2)
 964 |   for(j in 1:2)
 965 |     L_w[i,j,]<-extract(ranIntSlpFit,pars=c(paste("L_w[",i,",",j,"]",sep="")))[[1]]
 966 | omega_w<-numeric()
 967 | for(i in 1:N_sample){
 968 |   Omega_w<-L_w[,,i]%*%t(L_w[,,i])
 969 |   omega_w[i]<-Omega_w[1,2]
 970 | }
 971 | # Visualize the posterior distribution for the intercept beta[1] ...
 972 | par(mfrow=c(2,2),pch=21,bg="white")
 973 | plot(u[1,],u[2,],bg="black",mgp=c(2,.25,0),
 974 |      xlim=c(-.6,.6),ylim=c(-0.10,0.10),
 975 |      xlab=expression(u[0]),ylab=expression(u[1]),
 976 |      cex.axis=magnify)
 977 | plot(w[1,],w[2,],bg="black",mgp=c(2,.25,0),
 978 |      xlim=c(-.6,.6),ylim=c(-.10,.10),
 979 |      xlab=expression(w[0]),ylab=expression(w[1]),
 980 |      cex.axis=magnify)
 981 | hist(omega_u,freq=FALSE,col="black",border="white",
 982 |      main=NULL,xlab=expression(rho[u]))
 983 | hist(omega_w,freq=FALSE,col="black",border="white",
 984 |      main=NULL,xlab=expression(rho[w]))
 985 | @
 986 | \caption{The top row shows the relationship between the posterior mean of the varying slopes (y-axis) and intercepts (x-axis) for each subject (left panel) and item (right panel). The bottom row shows the posterior distribution of the parameter of correlation between the varying slopes and intercepts for each subject (left panel) and item (right panel).}
 987 | \label{fig:lmlist}
 988 | \end{figure}
 989 | 
 990 | 
 991 | \paragraph{Random effects in a non-Bayesian LMM}
 992 | 
 993 | We fit the same model also as a classical non-Bayesian LMM with the \texttt{lmer} function from the \texttt{lme4} package. This allows us 
 994 | to compare the \texttt{lme4} results with the Stan results. Here, we focus on random effects. As illustrated in Figure \ref{fig:lmercomp}, the estimates of the random-effect standard deviations of the classical LMM are in agreement with the modes of the posterior distributions. The \texttt{lmer} function does not show any convergence error, but the correlations between the random intercepts and slopes shows the boundary values $-1$ and $+1$,: the variance-covariance matrices for the subject and item random effects are degenerate. By contrast, Stan can still estimate posterior distributions for  parameters in such an overly complex model (Figure \ref{fig:lmlist}). Of course, one may want to simplify the model
 995 | for reasons of parsimony, or easier interpretability. Model selection can be carried out by evaluating predictive performance of the model, with methods such as Leave One Out (LOO) Cross-validation, or by using information criteria like the Watanabe Akaike (or Widely Available) Information Criterion (WAIC). See \citet{NicenboimVasishthStatMeth} for discussion and example code. 
 996 | 
 997 | \begin{figure}[!htbp]
 998 | \centering
 999 | <<figlmercomp,include=TRUE,echo=FALSE,cache=TRUE,message=FALSE,fig.width=7,fig.height=5,out.width='0.75\\textwidth'>>=
1000 | library(lme4)
1001 | # fit LMM and extract random-effect variances
1002 | so <- ifelse(rDat$type == "obj-ext", 1, -1)
1003 | LMMFit <- lmer(log(rt) ~ so + (1 + so | subj) + (1 + so | item), rDat)
1004 | vc <- VarCorr(LMMFit)
1005 | sigma_u <- attr(vc[["subj"]], "stddev")
1006 | sigma_w <- attr(vc[["item"]], "stddev")
1007 | 
1008 | # extract values from Stan object
1009 | sigma_u_stan <- extract(ranIntSlpFit, "sigma_u")[[1]]
1010 | sigma_w_stan <- extract(ranIntSlpFit, "sigma_w")[[1]]
1011 | 
1012 | # plot
1013 | layout(matrix(1:4, nrow = 2, byrow = TRUE))
1014 | sigma_u0_dens <- density(sigma_u_stan[ , 1])
1015 | plot(sigma_u0_dens, main = NA, xlab = expression(sigma[u0]))
1016 | abline(v = sigma_u[1])
1017 | sigma_u1_dens <- density(sigma_u_stan[ , 2])
1018 | plot(sigma_u1_dens, main = NA, xlab = expression(sigma[u1]))
1019 | abline(v = sigma_u[2])
1020 | sigma_w0_dens <- density(sigma_w_stan[ , 1])
1021 | plot(sigma_w0_dens, main = NA, xlab = expression(sigma[w0]))
1022 | abline(v = sigma_w[1])
1023 | sigma_w1_dens <- density(sigma_w_stan[ , 2])
1024 | plot(sigma_w1_dens, main = NA, xlab = expression(sigma[w1]))
1025 | abline(v = sigma_w[2])
1026 | @
1027 | \caption{The curves show the density of the posterior distributions of the random-effect standard deviations. The vertical bars indicate the corresponding \texttt{lmer} estimates. The top row shows the random effects for subjects, the bottom row shows the random effects for items. Left-hand panels correspond to random intercepts, right-hand panels correspond to random slopes.}
1028 | \label{fig:lmercomp}
1029 | \end{figure}
1030 | 
1031 | 
1032 | \section{Inference} \label{sec:bda}
1033 |  
1034 | Having fit a correlated varying intercepts, varying slopes model, we now explain one way to carry out statistical inference, using  credible intervals.  
1035 | We have used this approach to draw inferences from data in previously published work (e.g., \citealp{FrankEtAl2015}, \citealp{HofmeisterVasishth2014}, \citealp{SafaviEtAlFrontiers2016}). There are of course other approaches possible for carrying out inference. Bayes Factors are an example; see \citet{lee2013bayesian} and \citet{rouder2012default}. Another is to define a Region of Practical Equivalence \citep{kruschke2014doing}. The reader can choose the approach they find the most appealing.
1036 | For further discussion of Bayes Factors, with example code, see \citet{NicenboimVasishthStatMeth}.
1037 | 
1038 | %\subsection{Inference using credible intervals} \label{subsec:posteriorintervals}
1039 | 
1040 | \begin{figure}
1041 | \centering
1042 | <<figcredinterval,include=TRUE,echo=FALSE,cache=TRUE,fig.width=7,fig.height=5,out.width='0.75\\textwidth'>>=
1043 | # Get HPD interval for beta[2]
1044 | beta1<-as.mcmc(unlist(extract(ranIntSlpFit,pars="beta[2]")))
1045 | betaHPD<-HPDinterval(beta1,prob=0.95)
1046 | # Get HPD interval for omega_u
1047 | N_iter<-length(beta1)
1048 | omega_u<-numeric(N_iter)
1049 | L_u<-array(dim=c(2,2,N_iter))
1050 | for(i in 1:2)
1051 |   for(j in 1:2)
1052 |     L_u[i,j,]<-extract(ranIntSlpFit,pars=paste("L_u[",i,",",j,"]",sep=""))[[1]]
1053 | for(i in 1:N_iter)
1054 |   omega_u[i] <- tcrossprod(L_u[,,i])[1,2]
1055 | omega_u<-as.mcmc(omega_u)
1056 | omegaHPD<-HPDinterval(omega_u,prob=0.95)
1057 | # PLOT HPD INTERVALS ON THE MARGINAL POSTERIORS
1058 | par(mfrow=c(1,2))
1059 | hist(beta1,freq=FALSE,col="black",border="white",xaxt="n",
1060 |      main=NULL,xlim=c(-.1,.1),xlab=expression(beta[1]))
1061 | abline(v=betaHPD,lty=2,lwd=2)
1062 | axis(1, at = seq(-.1,.1,length.out=5), labels = seq(-.1,.1,length.out=5))
1063 | hist(omega_u,freq=FALSE,col="black",border="white",
1064 |      main=NULL,xlab=expression(rho[u]),xlim=c(-1,1))
1065 | abline(v=omegaHPD,lty=2,lwd=2)
1066 | @
1067 | \caption{Upper and lower bounds on the highest posterior density credible intervals (dashed lines) plotted over the marginal posterior distribution of the fixed slope $\beta _1$ (left) and of the correlation $\rho _u$ between the by-subject varying intercepts and varying slopes (right).} \label{fig:hpdinterval}
1068 | \end{figure}
1069 | 
1070 | The result of fitting the varying intercepts, varying slopes model is the posterior distribution of the model parameters. Direct inference from the posterior distributions is possible. For instance, we can find the posterior probability with which the fixed intercept $\beta _1$ or the correlation $\rho _u$ between by-subject varying intercepts and slopes take on any given value by consulting the marginal posterior distributions whose histograms are shown in Figure~\ref{fig:hpdinterval}. The information conveyed by such graphs can be sharpened by using the $95$\% credible interval, mentioned earlier.  Approximately $95$\% of the posterior density of $\beta _1$ lies between the $2.5$th  percentile \Sexpr{signif(quantile(beta1,probs=.025),2)} and the $97.5$th percentile \Sexpr{signif(quantile(beta1,probs=.975),2)}.
1071 | This leads us to conclude that the slope $\beta _1$ for relative clause type $\hbox{\texttt{so}}$ is less than zero with probability $90$\% (see Listing~\ref{fig:Model3code}, line 8). Since zero is included in the credible interval, it is difficult to draw the inference that object relative clauses are read faster than subject relative clauses. However, one could perhaps still make a weak claim that object relatives are easier to process, especially if a lot of evidence has accumulated in other experiments that supports such a conclusion (see \citealp{VasishthetalPLoSOne2013} for a more detailed discussion). Meta-analysis of existing studies can help in obtaining a better estimate of the posterior distribution of a parameter; for psycholinguistic examples, see \citet{Vasishth:MScStatistics,EngelmannJaegerVasishth2016,mahowald2016meta}.
1072 | 
1073 | What about the correlations between varying intercepts and varying slopes for subject and for item? What can we infer from the analysis about these relationships?
1074 | The 95\% credible interval for $\rho _u$ is $(\Sexpr{signif(omegaHPD[1],1)},\Sexpr{signif(omegaHPD[2],1)})$. Our belief that $\rho _u$ is less than zero is rather uncertain, although we can conclude that $\rho _u$ is less than zero with probability \Sexpr{signif(100*mean(omega_u<0),0)}\%. There is only weak evidence that subjects who read faster than average exhibit greater slowdowns at the head noun of object relative clauses than subjects who read slower than average. For the by-item varying intercepts and slopes, it is pretty clear that we do not have enough data (15 items) to draw any conclusions. For these data, it probably makes sense to fit a simpler model \citep{BatesEtAlParsimonious}, with only varying intercepts and slopes for subject, and only varying intercepts for items; although there is no harm done in this particular example if we fit a model with a full variance-covariance matrix for both subjects and items. 
1075 | 
1076 | In sum, regarding our main research question, our conclusion here is that we cannot say that object relatives are harder to process than subject relatives, because the credible interval for $\beta_1$ includes zero. However, one could argue that there is \textit{some} weak evidence in favor of the hypothesis, since the posterior probability of the parameter being negative is approximately $90$\%. 
1077 | 
1078 | \section{Further reading}
1079 | 
1080 | We hope that this tutorial has given the reader a flavor of what it would be like to fit Bayesian linear mixed models. There is of course much more to say on the topic, and we hope that the interested reader will take a look at some of the excellent books that have recently come out. We suggest below a sequence of reading that we found helpful.
1081 | A good first general textbook is by \citet{gelmanhill07}; it begins with the frequentist approach and only later transitions to Bayesian models. The book by \citet{rethinking} is also excellent. 
1082 | For those looking for a psychology-specific introduction, the books by \citet{kruschke2014doing} and \citet{lee2013bayesian} are to be recommended, although for the latter the going might be easier if the reader has already looked at \citet{gelmanhill07}.
1083 | As a second book, \citet{lunn2012bugs} is recommended; it provides many interesting and useful examples using the BUGS language, which are discussed in exceptionally clear language. Many of these books use the BUGS syntax~\citep{lunn2000winbugs}, which the probabilistic programming language JAGS \citep{plummer2011jags} also adopts; however, Stan code for these books is slowly becoming available on the Stan home page (\url{https://github.com/stan-dev/example-models/wiki}). 
1084 | For those with introductory calculus, a slightly more technical introduction to Bayesian methods by~\citet{lynch2007introduction} is an excellent choice. Finally, the textbook by~\citet{Gelman14} is the definitive modern guide, and provides a more advanced treatment. 
1085 | 
1086 | \section*{Acknowledgements}
1087 | 
1088 | We are grateful to the developers of Stan (in particular, Andrew Gelman, Bob Carpenter) and members of the Stan mailing list for their advice regarding model specification. Douglas Bates and Reinhold Kliegl have helped considerably over the years in improving our understanding of LMMs from a frequentist perspective. We also thank Edward Gibson for releasing his published data. Titus von der Malsburg, Lena J\"ager, and Bruno Nicenboim provided useful comments on previous drafts.
1089 | Thanks also go to Charles S.\ Stanton for catching a mistake in our code.
1090 | 
1091 | \clearpage
1092 | 
1093 | \bibliographystyle{apacite}
1094 | \bibliography{SorensenEtAl}
1095 | 
1096 | \clearpage
1097 | 
1098 | \appendix
1099 | 
1100 | \section{Cholesky factorization}
1101 | \label{sec:chol}
1102 | 
1103 | A correlation matrix can be factored into a \textit{square root of the matrix}; one method is the Cholesky factorization. Given a correlation matrix $C$, we can obtain its square root $L$. The square root of a matrix is such that we can square $L$ to get the correlation matrix $C$ back.  We illustrate the matrix square root with a simple example. Suppose we have a correlation matrix:
1104 | \begin{equation}
1105 | C=\begin{pmatrix}
1106 | 1 & -0.5 \\
1107 | -0.5 & 1 \\
1108 | \end{pmatrix}
1109 | \end{equation}
1110 | <<generatechol,include=FALSE,echo=TRUE,cache=TRUE>>=
1111 | C<-matrix(c(1,-.5,-.5,1),ncol=2)
1112 | L<-t(chol(C))
1113 | @
1114 | We can use the Cholesky factorization function in R, \texttt{chol}, to derive the lower triangular square root $L$ of this matrix. This gives us:
1115 | \begin{equation}
1116 | L=\begin{pmatrix}
1117 | 1 & 0 \\
1118 | -0.5 & 0.8660254 \\
1119 | \end{pmatrix}
1120 | \end{equation}
1121 | We confirm that this is a square root by multiplying L with itself to get the correlation matrix back (squaring a matrix is done by multiplying the matrix by its transpose):
1122 | \begin{equation}
1123 | L L^\intercal=
1124 | \begin{pmatrix}
1125 | 1 & 0 \\
1126 | -0.5 & 0.8660254 \\
1127 | \end{pmatrix}
1128 | \begin{pmatrix}
1129 | 1 & -0.5 \\
1130 | 0 & 0.8660254 \\
1131 | \end{pmatrix}
1132 | =
1133 | \begin{pmatrix}
1134 | 1 & -0.5 \\
1135 | -0.5 & 1 \\
1136 | \end{pmatrix}
1137 | \end{equation}
1138 | 
1139 | The reason that the Cholesky factorization is useful for LMMs is that we use it to generate the by-subject and by-item random intercepts and slopes. 
1140 | 
1141 | \paragraph{Generating correlated random variables using the Cholesky factor}
1142 | The by-subject and by-item adjustments are generated using the following standard procedure for generating correlated random variables $\mathbf{x}=(x_1,x_2)$:
1143 | \begin{enumerate}
1144 | 
1145 | \item
1146 | Given a vector of standard deviations (e.g., $\sigma_{u0}, \sigma_{u1}$), create a diagonal matrix:
1147 | \begin{equation}
1148 | \tau=
1149 | \begin{pmatrix}
1150 | \sigma_{u0} & 0 \\
1151 | 0 & \sigma_{u0}\\
1152 | \end{pmatrix}
1153 | \end{equation}
1154 | 
1155 | \item 
1156 | Premultiply the diagonalized matrix $\tau$ with the Cholesky factor $L$ of the correlation matrix $C$ to get a matrix $\Lambda$.
1157 | 
1158 | \item 
1159 | Generate values from a random variable $\mathbf{z}=(z_1,z_2)^\intercal $, where $z_1$ and $z_2$ each have independent $\mathcal{N}(0,1)$ distributions (left panel of Figure~\ref{fig:xz}).
1160 | 
1161 | \item 
1162 | Multiply $\Lambda$ with $\mathbf{z}$; this generates the correlated random variables $\mathbf{x}$ (right panel of Figure~\ref{fig:xz}).
1163 | \end{enumerate}
1164 | 
1165 | <<generatervs,include=TRUE,echo=FALSE,cache=TRUE,fig.width=7,fig.height=5,out.width='0.75\\textwidth'>>=
1166 | # Declare functions which plot the graphs for the next chunk's next figure.
1167 | zDistribution <- function(){
1168 |   # Graphical Parameters
1169 |   par(mfrow=c(1,1),oma=rep(0,4),mar=c(4,4,0,2),pty="s",cex=1.5)
1170 |   
1171 |   # Plot varcov matrix.
1172 |   #rotate <- function(x) apply(x, 2, rev)
1173 |   #varcov <- diag(sigma)%*%Omega%*%diag(sigma)
1174 |   #image(rotate(varcov),axes=FALSE)
1175 |   
1176 |   zz <- extract(cholesky_sim,pars=ave(x=1:N,FUN=function(x)paste("z[",x,"]")))
1177 |   
1178 |   # Plot zz
1179 |   errorCircle <- function(mu,L,sigma,stddev,n){
1180 |     circle <- t(matrix(c(cos(2*pi*(0:n)/n), sin(2*pi*(0:n)/n)), ncol=2))*stddev
1181 |     return(circle)
1182 |   }
1183 |   plot(zz[[1]],zz[[2]],xlab=expression(z[1]),ylab=expression(z[2]),
1184 |        xlim=c(-10,10),ylim=c(-10,10))
1185 |   circleInner <- errorCircle(0,L,sigma,1,100)
1186 |   circleOuter <- errorCircle(0,L,sigma,2,100)
1187 |   lines(circleInner[1,],circleInner[2,],col="gray",lwd=2)
1188 |   lines(circleOuter[1,],circleOuter[2,],col="gray",lwd=2)
1189 |   mtext(paste("sample corr. ",signif(cor(zz[[1]],zz[[2]]),2)),cex=1.5)
1190 | }
1191 | 
1192 | xDistribution <- function(){
1193 |   # Graphical Parameters
1194 |   par(mfrow=c(1,1),oma=rep(0,4),mar=c(4,4,0,2),pty="s",cex=1.5)
1195 |   
1196 |   # Plot varcov matrix.
1197 |   #rotate <- function(x) apply(x, 2, rev)
1198 |   #varcov <- diag(sigma)%*%Omega%*%diag(sigma)
1199 |   #image(rotate(varcov),axes=FALSE)
1200 |   
1201 |   xx <- extract(cholesky_sim,pars=ave(x=1:N,FUN=function(x)paste("x[",x,"]")))
1202 |   
1203 |   errorEllipse <- function(mu,L,sigma,stddev,n){
1204 |     circle <- t(matrix(c(cos(2*pi*(0:n)/n), sin(2*pi*(0:n)/n)), ncol=2))*stddev
1205 |     ellipse <- diag(sigma) %*% L %*% circle + mu
1206 |     return(ellipse)
1207 |   }
1208 |   plot(xx[[1]],xx[[2]],xlab=expression(x[1]),ylab=expression(x[2]),
1209 |        xlim=c(-10,10),ylim=c(-10,10))
1210 |   ellipseInner <- errorEllipse(0,L,sigma,1,100)
1211 |   ellipseOuter <- errorEllipse(0,L,sigma,2,100)
1212 |   lines(ellipseInner[1,],ellipseInner[2,],col="gray",lwd=2)
1213 |   lines(ellipseOuter[1,],ellipseOuter[2,],col="gray",lwd=2)
1214 |   mtext(paste("sample corr. ",signif(cor(xx[[1]],xx[[2]]),2)),cex=1.5)
1215 | }
1216 | @
1217 | 
1218 | \begin{figure}
1219 | \centering
1220 | \begin{minipage}{0.45\textwidth}
1221 | <<figzdistrn,include=TRUE,echo=FALSE,cache=TRUE,fig.width=7,fig.height=5,out.width='0.75\\textwidth'>>=
1222 | 
1223 | load("../data/cholesky_sim.Rda")
1224 | zDistribution()
1225 | @
1226 | \end{minipage}
1227 | \begin{minipage}{0.45\textwidth}
1228 | <<xdistribution,include=TRUE,echo=FALSE,cache=TRUE,fig.width=7,fig.height=5,out.width='0.75\\textwidth'>>=
1229 | xDistribution()
1230 | @
1231 | \end{minipage}
1232 | \caption{Uncorrelated random variables $\mathbf{z}=(z_1,z_2)^\intercal $ (left) and correlated random variables $\mathbf{x}=(x_1,x_2)^\intercal $ (right).}\label{fig:xz}
1233 | \end{figure}
1234 | 
1235 | It is helpful to walk through steps 1 to 4 of the procedure described above for generating correlated random intercepts and random slopes. These are carried out in lines 26 and 36 of Listing~\ref{fig:Model3Stancode}. The statement \texttt{diag\_pre\_multiply(sigma\_u, L\_u)} in line 26 computes the transpose matrix product (steps 1 and 2). The statement \texttt{to\_vector(z\_u) \textasciitilde{ }normal(0,1);} in line 36 generates \texttt{z\_u} as samples from the unit normal distribution (step 3). In line 26, the right multiplication of \texttt{diag\_pre\_multiply(sigma\_u, L\_u)} by \texttt{z\_u}, a matrix of normally distributed random variables, yields the varying intercepts and slopes (step 4).
1236 | 
1237 | \begin{equation}
1238 | \begin{split}
1239 | \begin{pmatrix}
1240 | u_{01} & u_{11} \\
1241 | u_{02} & u_{12} \\
1242 | \vdots & \vdots \\
1243 | u_{0J} & u_{1J}
1244 | \end{pmatrix}
1245 | &=
1246 | \big( \text{diag}(\sigma _{u0}, \sigma _{u1})
1247 | L_u \mathbf{z}_u \big)^\intercal\\
1248 | &=
1249 | \left(\begin{pmatrix}
1250 | \sigma _{u0} & 0\\
1251 | 0 & \sigma _{01}
1252 | \end{pmatrix}
1253 | \begin{pmatrix}
1254 | \ell _{11} & 0 \\
1255 | \ell _{21} & \ell _{22}
1256 | \end{pmatrix}
1257 | \begin{pmatrix}
1258 | z_{11} & z_{12} & \ldots & z_{1J}\\
1259 | z_{21} & z_{22} & \ldots & z_{2J}\\
1260 | \end{pmatrix}\right)^\intercal
1261 | \end{split}
1262 | \end{equation} 
1263 | 
1264 | \section{Matrix formulation of the linear mixed model }
1265 | \label{sec:matrix}
1266 | 
1267 | In the body of the text, we fit four models of increasing complexity to the data-set of~\citet{gibsonwu}. In all specifications, there was an explicit vector \texttt{so} for the predictor variable in Stan. However, if we want to fit more complex models with many categorical and continuous predictors and interactions, this approach requires increasingly complex specifications in Stan code. Alternatively, we can use the matrix formulation of the linear mixed model that allows for using the same code for models of different complexity. In the following, we will apply this approach for an alternative version of the correlated varying intercepts, varying slopes model, which includes random intercepts and slopes for subjects and items.
1268 | 
1269 | % The grand mean $\beta _0$ of $\log \hbox{\texttt{rt}}$ is adjusted by subject and by item through the varying intercepts $u_0$ and $w_0$, which are unique values for each subject and item, respectively. Likewise, the fixed effect $\beta _1$, which is associated with the predictor $\hbox{\texttt{so}}$, is adjusted by the by-subject varying slope $u_1$ and by-item varying slope $w_1$.
1270 | 
1271 | We build up the model specification by first noting that, for each subject,  
1272 | the by-subject varying intercept $u_0$ and slope $u_1$ have a multivariate normal prior distribution with mean zero and covariance matrix $\Sigma _u$. Similarly, for each item, the by-item varying intercept $w_0$ and slope $w_1$ have a multivariate normal prior distribution with mean zero and covariance matrix $\Sigma _w$.
1273 | The error $\varepsilon $ is assumed to have a normal distribution with mean zero and standard deviation $\sigma _e$.
1274 | 
1275 | We proceed to implement the model in Stan.
1276 | Instead of passing the predictor $\hbox{\texttt{so}}$ to \texttt{stan} as vector, as we did earlier, we make $\hbox{\texttt{so}}$ into a design matrix \texttt{X} using the function \texttt{model.matrix} available in R (see Listing \ref{fig:matrixModelCode}, line 2).\footnote{Here, we would like to acknowledge the contribution of Douglas Bates in specifying the model in this general matrix form.} The command \verb|model.matrix(~ 1 + so, rDat)| creates a model matrix with two fixed effects, the intercept (\texttt{1}) and a factor ($\hbox{\texttt{so}}$), based on the data frame \texttt{rDat}.
1277 | The first column of the design matrix \texttt{X} consists of all ones; this column represents the intercept. The second column is the predictor $\hbox{\texttt{so}}$ and consists of values in $\{-1, 1\}$. The model matrix thus consists of a two-level factorial design, with blocks of this design repeated for each subject. 
1278 | For the full data-set, we could write it very compactly in matrix form as follows:
1279 | 
1280 | \begin{equation} \label{eq:matrixmodel}
1281 | \log \hbox{\texttt{rt}} = \mathbf{X}\boldsymbol{\beta} + \mathbf{Z}_{u} \mathbf{u} + \mathbf{Z}_{w} \mathbf{w} + \boldsymbol{\varepsilon} 
1282 | \end{equation}
1283 | 
1284 | Here,  $\mathbf{X}$ is the $N\times P$ model matrix (with $N=547$, since we have $547$ data points; and $P=2$ since we have the intercept plus another fixed effect), $\boldsymbol{\beta}$ is a vector of length $P$ including fixed effects parameters, $\mathbf{Z}_{u}$ and $\mathbf{Z}_{w}$ are the subject and item model matrices ($N\times P$), and $\mathbf{u}$ and $\mathbf{w}$ are the by-subject and by-item adjustments to the fixed effects estimates; these are identical to the design matrix $\mathbf{X}$ in the model with varying intercepts and varying slopes included. For more examples of similar model specifications in Stan, see the R package \texttt{RePsychLing} on github (\url{https://github.com/dmbates/RePsychLing}).
1285 | 
1286 | \singlespacing
1287 | \begin{listing}
1288 | % <<matrixModelCode,echo=TRUE,eval=FALSE>>=
1289 | \begin{Verbatim}[numbers=left,frame=single,fontfamily=courier,fontsize=\footnotesize]
1290 | # Make design matrix
1291 | X <- unname(model.matrix(~ 1 + so, rDat))
1292 | attr(X, "assign") <- NULL
1293 | # Make Stan data
1294 | stanDat <- list(N = nrow(X),
1295 |                 P = ncol(X),
1296 |                 n_u = ncol(X),
1297 |                 n_w = ncol(X),
1298 |                 X = X,
1299 |                 Z_u = X,
1300 |                 Z_w = X,
1301 |                 J = nlevels(rDat$subj),
1302 |                 K = nlevels(rDat$item),
1303 |                 rt = rDat$rt,
1304 |                 subj = as.integer(rDat$subj),
1305 |                 item = as.integer(rDat$item))
1306 | # Fit the model
1307 | matrixFit <- stan(file = "matrixModel.stan", data = stanDat,
1308 |                   iter = 2000, chains = 4)
1309 | \end{Verbatim}
1310 | % @
1311 | \caption{Matrix formulation code for running the varying intercepts, varying slopes model.}\label{fig:matrixModelCode}
1312 | \end{listing}
1313 | \doublespacing
1314 | 
1315 | Note that we remove the column names and the attributes of the model matrix \texttt{X} in order to use it for Stan; refer to Listing~\ref{fig:matrixModelCode}. Having defined the model, we proceed to assemble the list \texttt{stanDat} of data, relying on the above matrix formulation. The number \texttt{N} of observations, the number \texttt{J} of subjects and \texttt{K} of items, the reading times \texttt{rt}, and the subject and item indicator variables \texttt{subj} and \texttt{item} are familiar from the previous models presented. The integer \texttt{P} is the number of fixed effects (two including the intercept). Model 4 includes a varying intercept $u_{0}$ and a varying slope $u_{1}$ for each subject, and so the number \texttt{n\_u} of by-subject random effects equals \texttt{P}. Likewise, Model 4 includes a varying intercept $w_{0}$ and a varying slope $w_{1}$ for each item, and so the number \texttt{n\_w} of by-item random effects also equals \texttt{P}. 
1316 | 
1317 | \begin{listing}
1318 | \begin{Verbatim}[numbers=left,frame=single,fontfamily=courier,fontsize=\footnotesize]
1319 | data {
1320 |   int<lower=0> N;               //n trials
1321 |   int<lower=1> P;               //n fixefs
1322 |   int<lower=0> J;               //n subjects
1323 |   int<lower=1> n_u;             //n subj ranefs
1324 |   int<lower=0> K;               //n items
1325 |   int<lower=1> n_w;             //n item ranefs
1326 |   int<lower=1,upper=J> subj[N]; //subject indicator
1327 |   int<lower=1,upper=K> item[N]; //item indicator
1328 |   row_vector[P] X[N];           //fixed effects design matrix
1329 |   row_vector[n_u] Z_u[N];       //subj ranef design matrix
1330 |   row_vector[n_w] Z_w[N];       //item ranef design matrix
1331 |   vector[N] rt;                 //reading time
1332 | }
1333 | parameters {
1334 |   vector[P] beta;               //fixed effects coefs
1335 |   cholesky_factor_corr[n_u] L_u;  //cholesky factor of subj ranef corr matrix
1336 |   cholesky_factor_corr[n_w] L_w;  //cholesky factor of item ranef corr matrix
1337 |   vector<lower=0>[n_u] sigma_u; //subj ranef std
1338 |   vector<lower=0>[n_w] sigma_w; //item ranef std
1339 |   real<lower=0> sigma_e;        //residual std
1340 |   vector[n_u] z_u[J];           //subj ranef
1341 |   vector[n_w] z_w[K];           //item ranef
1342 | }
1343 | transformed parameters {
1344 |   vector[n_u] u[J];             //subj ranefs
1345 |   vector[n_w] w[K];             //item ranefs
1346 |   {
1347 |     matrix[n_u,n_u] Sigma_u;    //subj ranef cov matrix
1348 |     matrix[n_w,n_w] Sigma_w;    //item ranef cov matrix
1349 |     Sigma_u = diag_pre_multiply(sigma_u, L_u);
1350 |     Sigma_w = diag_pre_multiply(sigma_w, L_w);
1351 |     for(j in 1:J)
1352 |       u[j] = Sigma_u * z_u[j];
1353 |     for(k in 1:K)
1354 |       w[k] = Sigma_w * z_w[k];
1355 |   }
1356 | }
1357 | model {
1358 |   //priors
1359 |   L_u ~ lkj_corr_cholesky(2.0);
1360 |   L_w ~ lkj_corr_cholesky(2.0);
1361 |   for (j in 1:J)
1362 |     z_u[j] ~ normal(0,1);
1363 |   for (k in 1:K)
1364 |     z_w[k] ~ normal(0,1);
1365 |   //likelihood
1366 |   for (i in 1:N)
1367 |     rt[i] ~ lognormal(X[i] * beta + 
1368 |                       Z_u[i] * u[subj[i]] + 
1369 |                       Z_w[i] * w[item[i]], 
1370 |                       sigma_e);
1371 | }
1372 | \end{Verbatim}
1373 | \caption{Stan code for the matrix formulation of the varying intercepts, varying slopes model.}\label{fig:StanCodeMatrixModel}
1374 | \end{listing}
1375 | 
1376 | We also have to adapt the Stan code to the model formulation (see Listing~\ref{fig:StanCodeMatrixModel}). The data block contains the corresponding variables. Using the command \verb|row_vector[P] X[N]|, we declare the fixed effects design matrix \texttt{X} as an array of \texttt{N} row vectors of length \texttt{P} whose components are the predictors associated with the \texttt{N} reading times. Likewise for the subject and item random effects design matrices \texttt{Z\_u} and \texttt{Z\_w}, which correspond to $\mathbf{Z}_{u}$ and $\mathbf{Z}_{w}$ respectively in Equation~\ref{eq:matrixmodel}. 
1377 | The vector \texttt{beta} contains the fixed effects $\beta _0$ and $\beta_1$. The matrices \texttt{L\_u}, \texttt{L\_w} and the arrays \texttt{z\_u}, \texttt{z\_w} of vectors (not to be confused with the design matrices \texttt{Z\_u} and \texttt{Z\_w}) will generate the varying intercepts and slopes $u_0, u_1$ and $w_0, w_1$, using the procedure described for the varying intercepts, varying slopes model. For example, the command \verb|vector[n_u] u[J]| specifies \texttt{u} as an array of \texttt{J} vectors of length \texttt{n\_u}; hence, there is one vector per subject. The vector \texttt{sigma\_u} contains the standard deviations of the by-subject varying intercepts and slopes $u_0, u_1$, and the vector \texttt{sigma\_w} contains the standard deviations of the by-item varying intercepts and slopes $w_0, w_1$. The variable \texttt{sigma\_e} is the standard deviation $\sigma _e$ of the error $\varepsilon$.
1378 | The transformed parameters block generates the by-subject intercepts and slopes $u_0, u_1$ and the by-item intercepts and slopes $w_0, w_1$.
1379 | 
1380 | We place lkj priors on the random effects correlation matrices through the \texttt{lkj\_corr\_cholesky(2.0)} priors on their Cholesky factors \texttt{L\_u} and \texttt{L\_w}. We implicitly place uniform priors on the fixed effects $\beta_0, \beta_1$, the random effects standard deviations $\sigma _{u0}, \sigma_{u1}$, and $\sigma_{w0}, \sigma_{w1}$ and the error standard deviation $\sigma _e$ by omitting any prior specifications for them in the model block. We specify the likelihood with the probability statement that \texttt{rt[i]} is distributed log-normally with mean \texttt{X[i] * beta + Z\_u[i] * u[subj[i]] + Z\_w[i] * w[item[i]]} and standard deviation \texttt{sigma\_e}.
1381 | The next step towards model-fitting is to pass the list \texttt{stanDat} to \texttt{stan}, which compiles a C++ program to sample from the posterior distribution of the model parameters.
1382 | 
1383 | A major advantage of the above matrix formulation is that we do not need to write a new Stan model for a future repeated measures design. All we have to do now is define the design matrix $\mathbf X$ appropriately, and include it (along with appropriately defined $\mathbf Z_u$ and $\mathbf Z_w$ for the subjects and items random effects) as part of the data specification that is passed to Stan. 
1384 | 
1385 | \end{document}
1386 | 


--------------------------------------------------------------------------------
/doc/SorensenEtAl.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasishth/BayesLMMTutorial/cccf977025847a83b5ae651e914ad95c8e53cdee/doc/SorensenEtAl.pdf


--------------------------------------------------------------------------------
/man/BayesLMMTutorial-package.Rd:
--------------------------------------------------------------------------------
 1 | \name{BayesLMMTutorial-package}
 2 | \alias{BayesLMMTutorial-package}
 3 | \alias{BayesLMMTutorial}
 4 | \docType{package}
 5 | \title{
 6 | What the package does (short line)
 7 | ~~ package title ~~
 8 | }
 9 | \description{
10 | More about what it does (maybe more than one line)
11 | ~~ A concise (1-5 lines) description of the package ~~
12 | }
13 | \details{
14 | \tabular{ll}{
15 | Package: \tab BayesLMMTutorial\cr
16 | Type: \tab Package\cr
17 | Version: \tab 1.0\cr
18 | Date: \tab 2015-06-12\cr
19 | License: \tab What license is it under?\cr
20 | }
21 | ~~ An overview of how to use the package, including the most ~~
22 | ~~ important functions ~~
23 | }
24 | \author{
25 | Who wrote it
26 | 
27 | Maintainer: Who to complain to <yourfault@somewhere.net>
28 | ~~ The author and/or maintainer of the package ~~
29 | }
30 | \references{
31 | ~~ Literature or other references for background information ~~
32 | }
33 | ~~ Optionally other standard keywords, one per line, from ~~
34 | ~~ file KEYWORDS in the R documentation directory ~~
35 | \keyword{ package }
36 | \seealso{
37 | ~~ Optional links to other man pages, e.g. ~~
38 | ~~ \code{\link[<pkg>:<pkg>-package]{<pkg>}} ~~
39 | }
40 | \examples{
41 | ~~ simple examples of the most important functions ~~
42 | }
43 | 


--------------------------------------------------------------------------------
/vignettes/BayesianLMMs.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Fitting Bayesian LMMs using Stan: A tutorial"
  3 | author: "Shravan Vasishth, Sven Hohenstein, and Tanner Sorensen"
  4 | date: "`r Sys.Date()`"
  5 | output: rmarkdown::html_vignette
  6 | vignette: >
  7 |   %\VignetteIndexEntry{Bayesian data analysis examples}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   \usepackage[utf8]{inputenc}
 10 | ---
 11 | 
 12 | ```{r preliminaries,echo=FALSE,cache=FALSE}
 13 | library(rstan)
 14 | library(knitr)
 15 | options(width=92,
 16 |         show.signif.stars = FALSE)
 17 | opts_chunk$set(comment=NA, fig.width=8, fig.height=10)
 18 | ```
 19 | 
 20 | 
 21 | # Example 1: Gibson and Wu 2013 data
 22 | 
 23 | This is the data reported in Gibson and Wu 2013. It is available with this package.
 24 | 
 25 | ## Fixed effects model
 26 | 
 27 | Read in the Read in the Gibson and Wu data and subset head noun:
 28 | 
 29 | ```{r loadgibsonwudata}
 30 | rDat<-read.table("../data/gibsonwu2012data.txt",header=TRUE)
 31 | 
 32 | rDat<-subset(rDat,region=="headnoun")
 33 | dim(rDat)
 34 | ```
 35 | 
 36 | Convert subject and item to factors:
 37 | 
 38 | ```{r definefactorsmodel1}
 39 | rDat$region<-factor(rDat$region)
 40 | rDat$subj <- factor(rDat$subj)
 41 | rDat$item <- factor(rDat$item)
 42 | summary(rDat)
 43 | ```
 44 | 
 45 | Apply sum contrast coding to predictor (obj:+1; subj:-1):
 46 | 
 47 | 
 48 | ```{r contrastcodingmodel1}
 49 | rDat$so <- ifelse(rDat$type == "subj-ext", -1, 1)
 50 | summary(rDat)
 51 | ```
 52 | 
 53 | Set up data for Stan:
 54 | 
 55 | ```{r datasetupmodel1}
 56 | stanDat<-list(rt = rDat$rt,
 57 |               so = rDat$so,
 58 |               N = nrow(rDat))
 59 | ```
 60 | 
 61 | Load, compile, and fit model:
 62 | 
 63 | 
 64 | ```{r fitmodel1}              
 65 | fixEfFit <- stan(file = "fixEf.stan", 
 66 |                  data = stanDat, 
 67 |                  iter = 2000, chains = 4)
 68 | 
 69 | save(list="fixEfFit",file="../data/fixEfFit.Rda",
 70 |      compress="xz")
 71 | ```
 72 | 
 73 | ```{r traceplotmodel1}
 74 | traceplot(fixEfFit, pars = c("beta","sigma_e"), inc_warmup = FALSE)
 75 | ```
 76 | 
 77 | ```{r summarizeresultsmodel1}
 78 | print(fixEfFit, pars = c("beta","sigma_e"), probs = c(0.025, 0.5, 0.975))
 79 | ```
 80 | 
 81 | Plot the posterior distributions:
 82 | 
 83 | 
 84 | ```{r,fig.show='hold'}
 85 | beta0 <- extract(fixEfFit, pars = c("beta[1]"))$beta
 86 | beta1 <- extract(fixEfFit, pars = c("beta[2]"))$beta
 87 | sigma_e <- extract(fixEfFit, pars = c("sigma_e"))$sigma_e
 88 | N_iter <- length(beta0)
 89 | theta <- list(beta0 = beta0, beta1 = beta1, sigma_e = sigma_e)
 90 | lab <- c(expression(hat(beta)[0]), expression(hat(beta)[1]), expression(hat(sigma)[e]))
 91 | lim <- matrix(c(6.25, -0.09, .55,
 92 |               6.45, .03, .75), nrow = 3, ncol = 2)
 93 | par(mfrow = c(3, 3))
 94 | for(i in 1:3)
 95 |   for(j in 1:3){
 96 |     if(i == j){
 97 |       # PLOT MARGINALS ON DIAGONAL
 98 |       hist(theta[[i]], freq = FALSE, col = "black", border = "white", main = NULL, xlab = lab[i])
 99 |     }else if(i>j){
100 |       # PLOT BIVARIATE ON THE LOWER TRIANGULAR
101 |       # CODE ADAPTED FROM: 
102 |       # http://stats.stackexchange.com/questions/24380/how-to-get-ellipse-region-from-bivariate-normal-distributed-data
103 |       xy <- matrix(nrow=N_iter,ncol=2)
104 |       xy[, 1] <- theta[[i]]
105 |       xy[, 2] <- theta[[j]]
106 |       center <- apply(xy, 2, mean)
107 |       sigma <- cov(xy)
108 |       sigma.inv = solve(sigma, matrix(c(1, 0, 0, 1), 2, 2))
109 |       # DEFINE GRID
110 |       n <- 50
111 |       xlim <- lim[i, ]
112 |       ylim <- lim[j, ]
113 |       x <- seq(xlim[1], xlim[2], length.out = n)
114 |       y <- seq(ylim[1], ylim[2], length.out = n)
115 |       # EVALUATE HEIGHT FUNCTION ON GRID
116 |       height <- function(s, t) {u <- c(s, t) - center; u %*% sigma.inv %*% u / 2}
117 |       z <- mapply(height, as.vector(rep(x, n)), as.vector(outer(rep(0, n), y, `+`)))
118 |       # PLOT
119 |       plot(xy, pch = 20, xlim = xlim, ylim = ylim, xlab = lab[i], ylab = lab[j])
120 |       contour(x, y, matrix(z, n, n), levels = (0:2), col = gray(.5), lwd = 2, add = TRUE)
121 |     }else{
122 |       # SKIP UPPER TRIANGULAR PLOTS (REPEATS)
123 |       plot.new()
124 |     }
125 |   }
126 | ```
127 | 
128 | Find the 95\% credible interval for the slope parameter:
129 | 
130 | ```{r}
131 | beta1 <- extract(fixEfFit, pars = c("beta[2]"))$beta
132 | print(signif(quantile(beta1, probs = c(0.025, 0.5, 0.975)), 2))
133 | 
134 | mean(beta1<0)
135 | ```
136 | 
137 | 
138 | ## Varying intercepts model
139 | 
140 | 
141 | $$\log rt_{jk} = \beta _0 + u_{0j} + w_{0k} + \beta _1 so_{jk} + \epsilon_{jk}$$
142 | 
143 | 
144 | ```{r setupdatadatamodel2}
145 | stanDat<-list(subj = as.integer(factor(rDat$subj)),
146 |               item = as.integer(factor(rDat$item)),
147 |               rt = rDat$rt,
148 |               so = rDat$so,
149 |               N = nrow(rDat),
150 |               J = length(unique(rDat$subj)),
151 |               K = length(unique(rDat$item)))
152 | ```
153 | 
154 | ```{r compileandfitmodel}
155 | ranIntFit <- stan(file = "ranInt.stan", data = stanDat, 
156 |                   iter = 2000, chains = 4)
157 | ```
158 | 
159 | ```{r saveresultsmodel2}
160 | save(list = "ranIntFit",file = "../data/ranIntFit.Rda",
161 |      compress = "xz")
162 | ```
163 | 
164 | Examining the posterior distribution:
165 | 
166 | ```{r summarizeresultsmodel2}
167 | print(ranIntFit, pars = c("beta", "sigma_e", "sigma_u", "sigma_w"),
168 |       probs=c(0.025, 0.5, 0.975))
169 | 
170 | beta1 <- extract(ranIntFit, pars = c("beta[2]"))$beta
171 | print(signif(quantile(beta1, probs=c(0.025, 0.5, 0.975)), 2))
172 | 
173 | mean(beta1 < 0)
174 | ```
175 | 
176 | 
177 | ## Varying intercepts, varying slopes, no correlation model
178 | 
179 | $$
180 | \log rt_{jk} = \beta_0 + u_{0j} + w_{0k} + (\beta_1 + u_{1j} + w_{1k}) so_{jk} + \epsilon_{jk} 
181 | $$
182 | 
183 | ```{r}
184 | ############################
185 | ## VARYING INTERCEPTS, 
186 | ## VARYING SLOPES,
187 | ## NO CORRELATION  
188 | ## MIXED EFFECTS MODEL
189 | ############################
190 | 
191 | # 1. Compile and fit model.
192 | ranIntSlpNoCorFit <- stan(file = "ranIntSlpNoCor.stan", data = stanDat, 
193 |                      iter = 2000, chains = 4)
194 | 
195 | save(list = "ranIntSlpNoCorFit",
196 |      file = "../data/ranIntSlpNoCorFit.Rda",
197 |      compress = "xz")
198 | ```
199 | 
200 | ```{r}
201 | print(ranIntSlpNoCorFit, pars = c("beta", "sigma_e", "sigma_u", "sigma_w"),
202 |       probs=c(0.025, 0.5, 0.975))
203 | 
204 | beta1 <- extract(ranIntSlpNoCorFit, pars = c("beta[2]"))$beta
205 | print(signif(quantile(beta1, probs = c(0.025, 0.5, 0.975)), 2))
206 | 
207 | mean(beta1 < 0)
208 | ```
209 | 
210 | 
211 | ## Varying intercepts, varying slopes, correlation model
212 | 
213 | $$
214 | \log rt_{jk} = \beta_0 + u_{0j} + w_{0k} + (\beta_1 + u_{1j} + w_{1k}) so_{jk} + \epsilon_{jk} 
215 | $$
216 | 
217 | 
218 | ```{r}
219 | ############################
220 | ## VARYING INTERCEPTS, 
221 | ## VARYING SLOPES MIXED 
222 | ## EFFECTS MODEL
223 | ############################
224 | 
225 | # 1. Compile and fit model.
226 | ranIntSlpFit <- stan(file = "ranIntSlp.stan", data = stanDat, 
227 |                      iter = 2000, chains = 4)
228 | 
229 | save(list = "ranIntSlpFit",
230 |      file = "../data/ranIntSlpFit.Rda",
231 |      compress = "xz")
232 | ```
233 | 
234 | ```{r}
235 | print(ranIntSlpFit, pars = c("beta", "sigma_e", "sigma_u", "sigma_w"),
236 |       probs = c(0.025, 0.5, 0.975))
237 | 
238 | beta1 <- extract(ranIntSlpFit, pars = c("beta[2]"))$beta
239 | print(signif(quantile(beta1, probs = c(0.025, 0.5, 0.975)), 2))
240 | 
241 | mean(beta1 < 0)
242 | ```
243 | 
244 | Use the L matrix the compute the correlation matrix.
245 | 
246 | ```{r}
247 | # L matrices
248 | L_u <- extract(ranIntSlpFit, pars = "L_u")$L_u
249 | L_w <- extract(ranIntSlpFit, pars = "L_w")$L_w
250 | 
251 | # correlation parameters
252 | cor_u <- apply(L_u, 1, function(x) tcrossprod(x)[1, 2])
253 | cor_w <- apply(L_w, 1, function(x) tcrossprod(x)[1, 2])
254 | 
255 | print(signif(quantile(cor_u, probs = c(0.025, 0.5, 0.975)), 2))
256 | print(mean(cor_u))
257 | print(signif(quantile(cor_w, probs = c(0.025, 0.5, 0.975)), 2))
258 | print(mean(cor_w))
259 | ```
260 | 
261 | 
262 | Comparison with lme4: note that lme4 fails to estimate the correlations:
263 | 
264 | ```{r}
265 | library(lme4)
266 | so<-ifelse(rDat$type == "obj-ext", 1, -1)
267 | lmer(log(rt) ~ so + (1 + so | subj) +(1 + so| item), rDat)
268 | ```
269 | 
270 | 
271 | ## Matrix formulation
272 | 
273 | ```{r}
274 | #############################################
275 | ## ALTERNATIVE MATRIX FORMULATION OF MODEL 3
276 | #############################################
277 | 
278 | # 1. Make design matrix.
279 | X <- unname(model.matrix(~ 1 + so, rDat))
280 | attr(X, "assign") <- NULL
281 | # 2. Make Stan data.
282 | stanDat <- list(N = nrow(X),
283 |                 P = ncol(X),
284 |                 n_u = ncol(X),
285 |                 n_w = ncol(X),
286 |                 X = X,
287 |                 Z_u = X,
288 |                 Z_w = X,
289 |                 J = nlevels(rDat$subj),
290 |                 K = nlevels(rDat$item),
291 |                 rt = rDat$rt,
292 |                 subj = as.integer(rDat$subj),
293 |                 item = as.integer(rDat$item))
294 | # 3. Fit the model.
295 | matrixFit <- stan(file = "matrixModel.stan", data = stanDat,
296 |                   iter = 2000, chains = 4)
297 | # 4. Save the result.
298 | save(list = "matrixFit",
299 |      file = "../data/matrixFit.Rda",
300 |      compress = "xz")
301 | ```
302 | 
303 | 
304 | 
305 | 
306 | 
307 | 
308 | 
309 | ## Posterior predictive checks
310 | 
311 | ```{r}
312 | ############################
313 | ## POSTERIOR PREDICTIVE 
314 | ## CHECKS
315 | ############################
316 | 
317 | # 1. Compile and fit model.
318 | pp <- stan(file = "pp.stan", data = stanDat, 
319 |            warmup = 500, iter = 750, chains = 1)
320 | 
321 | save(list = "pp",file = "../data/pp.Rda",
322 |      compress = "xz")
323 | ```
324 | 
325 | Plot correlations between intercepts and slopes for subjects and for items:
326 | 
327 | ```{r fig.show='hold'}
328 | J<-length(unique(rDat$subj))
329 | u<-matrix(nrow=2,ncol=J)
330 | for(j in 1:J)
331 |   for(i in 1:2)
332 |     u[i,j]<-mean(extract(ranIntSlpFit,pars=c(paste("u[",i,",",j,"]",sep="")))[[1]])
333 | N_sample<-length(extract(ranIntSlpFit,pars="L_u[1,1]")[[1]])
334 | L_u<-array(dim=c(2,2,N_sample))
335 | for(i in 1:2)
336 |   for(j in 1:2)
337 |     L_u[i,j,]<-extract(ranIntSlpFit,pars=c(paste("L_u[",i,",",j,"]",sep="")))[[1]]
338 | omega_u<-numeric()
339 | for(i in 1:N_sample){
340 |   Omega_u<-L_u[,,i]%*%t(L_u[,,i])
341 |   omega_u[i]<-Omega_u[1,2]
342 | }
343 | # Extract item random intercepts and slopes.
344 | K<-length(unique(rDat$item))
345 | w<-matrix(nrow=2,ncol=K)
346 | for(k in 1:K)
347 |   for(i in 1:2)
348 |     w[i,k]<-mean(extract(ranIntSlpFit,pars=c(paste("w[",i,",",k,"]",sep="")))[[1]])
349 | L_w<-array(dim=c(2,2,N_sample))
350 | for(i in 1:2)
351 |   for(j in 1:2)
352 |     L_w[i,j,]<-extract(ranIntSlpFit,pars=c(paste("L_w[",i,",",j,"]",sep="")))[[1]]
353 | omega_w<-numeric()
354 | for(i in 1:N_sample){
355 |   Omega_w<-L_w[,,i]%*%t(L_w[,,i])
356 |   omega_w[i]<-Omega_w[1,2]
357 | }
358 | # Visualize the posterior distribution for the intercept beta[1] ...
359 | par(mfrow=c(2,2),pch=21,bg="white")
360 | plot(u[1,],u[2,],bg="black",mgp=c(2,.25,0),
361 |      xlim=c(-.6,.6),ylim=c(-.04,.04),
362 |      xlab=expression(hat(u[0])),ylab=expression(hat(u[1])))
363 | plot(w[1,],w[2,],bg="black",mgp=c(2,.25,0),
364 |      xlim=c(-.6,.6),ylim=c(-.04,.04),
365 |      xlab=expression(hat(w[0])),ylab=expression(hat(w[1])))
366 | hist(omega_u,freq=FALSE,col="black",border="white",
367 |      main=NULL,xlab=expression(hat(omega)[u]))
368 | hist(omega_w,freq=FALSE,col="black",border="white",
369 |      main=NULL,xlab=expression(hat(omega)[w]))
370 | ```
371 | 
372 | Inference:
373 | 
374 | ```{r inference}
375 | 
376 | library(coda)
377 | # Get HPD interval for beta[2]
378 | beta1<-as.mcmc(unlist(extract(ranIntSlpFit,pars="beta[2]")))
379 | betaHPD<-HPDinterval(beta1,prob=0.95)
380 | # Get HPD interval for omega_u
381 | N_iter<-length(beta1)
382 | omega_u<-numeric(N_iter)
383 | L_u<-array(dim=c(2,2,N_iter))
384 | for(i in 1:2)
385 |   for(j in 1:2)
386 |     L_u[i,j,]<-extract(ranIntSlpFit,pars=paste("L_u[",i,",",j,"]",sep=""))[[1]]
387 | for(i in 1:N_iter)
388 |   omega_u[i] <- tcrossprod(L_u[,,i])[1,2]
389 | omega_u<-as.mcmc(omega_u)
390 | omegaHPD<-HPDinterval(omega_u,prob=0.95)
391 | # PLOT HPD INTERVALS ON THE MARGINAL POSTERIORS
392 | par(mfrow=c(1,2))
393 | hist(beta1,freq=FALSE,col="black",border="white",xaxt="n",
394 |      main=NULL,xlim=c(-.1,.1),xlab=expression(hat(beta)[1]))
395 | abline(v=betaHPD,lty=2,lwd=2)
396 | axis(1, at = seq(-.1,.1,length.out=5), labels = seq(-.1,.1,length.out=5))
397 | hist(omega_u,freq=FALSE,col="black",border="white",
398 |      main=NULL,xlab=expression(hat(omega)[u]),xlim=c(-1,1))
399 | abline(v=omegaHPD,lty=2,lwd=2)
400 | ```
401 | 
402 | Posterior predictive checks:
403 | 
404 | ```{r, fig.show='hold'}
405 | rDat<-read.table("../data/gibsonwu2012data.txt",header=TRUE)
406 | # 2. Define the test quantity.
407 | test<-function(rt){quantile(rt,probs=.95,names=FALSE)}
408 | # 3. Get maximum of observed RT distribution.
409 | upRT <- test(rDat$rt)
410 | # 4. Read in the posterior predictive model.
411 | load("../data/pp.Rda")
412 | # 5. Extract the posterior predictive RT distributions.
413 | # (rows are data-sets, columns are trials)
414 | rt_tilde<-extract(pp,pars="rt_tilde")[[1]]
415 | # 6. compare 5 randomly selected posterior predictive 
416 | # RT distributions to the observed RT distribution.
417 | par(mfrow=c(3,2))
418 | for(i in sample(1:dim(rt_tilde)[1],5,replace=FALSE,prob=NULL))
419 |   hist(rt_tilde[i,],freq=FALSE,col="black",border="white",
420 |      main=NULL,xlab=expression(rt^{rep}),xlim=c(0,1E4))
421 | hist(rDat$rt,freq=FALSE,col="gray",border="black",
422 |      main=NULL,xlab=expression(rt^{rep}),xlim=c(0,1E4))
423 | ```
424 | 
425 | Distribution of the test statistic:
426 | 
427 | ```{r,fig.show='hold',fig.height=7}
428 | upRTrep<-apply(rt_tilde, 1, test)
429 | # 8. Compute the probability that upRTrep is greater 
430 | # than the maximum of the observed RT distribution.
431 | p<-mean(upRTrep>upRT)
432 | # 9. Plot the posterior predictive test quantities 
433 | # upRTrep and the observed test quantity upRT.
434 | hist(upRTrep,freq=FALSE,col="black",border="white",
435 |      main=NULL,xlab=expression(T(rt^{rep})),xlim=c(min(upRTrep),upRT))
436 | abline(v=upRT,lty=2,lwd=2)
437 | ```
438 | 
439 | ## Model 6: 2x2 factorial design
440 | 
441 | This is an analysis of the data reported in Husain et al 2014. It is included with this package.
442 | 
443 | ```{r}
444 | ############################
445 | ## FACTORIAL MODEL
446 | ############################
447 | 
448 | # 1. Read in the Husain et al. data.
449 | rDat<-read.table("../data/HusainEtAlexpt1data.txt",header=TRUE)
450 | rDat$so<-rDat$RCType # Change name for consistency.
451 | # 2. Make design matrix.
452 | X <- unname(model.matrix(~ 1+so+dist+int, rDat))
453 | attr(X,"assign") <- NULL
454 | # 3. Factor subj and item.
455 | rDat$subj <- with(rDat,factor(subj))
456 | rDat$item <- with(rDat,factor(item))
457 | # 4. Make Stan data.
458 | stanDat <- within(list(),
459 | {
460 |   N<-nrow(X)
461 |   P <- n_u <- n_w <- ncol(X)
462 |   X <- Z_u <- Z_w <- X
463 |   J <- length(levels(rDat$subj))
464 |   K <- length(levels(rDat$item))
465 |   rt <- rDat$rt
466 |   subj <- as.integer(rDat$subj)
467 |   item <- as.integer(rDat$item)
468 | }
469 | )
470 | # 5. Fit the model.
471 | factorialFit <- stan(file="factorialModel.stan",data=stanDat,
472 |                      iter=2000, chains=4)
473 | # 6. Save the result.
474 | save(list="factorialFit",
475 |      file="../data/factorialFit.Rda",
476 |      compress="xz")
477 | ```
478 | 
479 | 
480 | $$
481 | \log rt_{jk} = \beta _0  + u_{0j} + w_{0k}
482 | &+(\beta _1 +u_{1j} +w_{1k})so_{jk}
483 | &+(\beta _2 +u_{2j} +w_{2k})dist_{jk}
484 | &+(\beta _3 +u_{3j} +w_{3k})int_{jk} + \epsilon_{jk} 
485 | $$
486 | 
487 | In matrix form:
488 | 
489 | $$
490 | \mathrm{rt} = 
491 | X\beta + Z_j u_j + Z_k w_k + \epsilon
492 | $$
493 | 
494 | $X$ is the $N\times P$ model matrix (with P=4 since we have three fixed effects, plus the intercept), $\beta$ is a $P\times 1$ vector of fixed effects parameters, $Z_j$ and $Z_k$ are the subject and item model matrices ($N\times P$), and $u_j$ and $w_k$ are the by-subject and by-item adjustments to the fixed effects estimates.  $\epsilon$ refers to the residual error  ($N\times 1$).
495 | 
496 | 
497 | ```{r,fig.show='hold'}
498 | # Extract the fixef coefs.
499 | beta0 <- extract(factorialFit,pars=c("beta[1]"))
500 | beta1 <- extract(factorialFit,pars=c("beta[2]"))
501 | beta2 <- extract(factorialFit,pars=c("beta[3]"))
502 | beta3 <- extract(factorialFit,pars=c("beta[4]"))
503 | # Get HPD interval for the fixef coefs.
504 | beta0HPD<-HPDinterval(as.mcmc(unlist(beta0)),prob=0.95)
505 | beta1HPD<-HPDinterval(as.mcmc(unlist(beta1)),prob=0.95)
506 | beta2HPD<-HPDinterval(as.mcmc(unlist(beta2)),prob=0.95)
507 | beta3HPD<-HPDinterval(as.mcmc(unlist(beta3)),prob=0.95)
508 | # Plot histograms with HPDs as dotted lines
509 | par(mfrow=c(2,2))
510 | hist(beta0$beta,freq=FALSE,col="black",border="white",main="grand mean",xlab=expression(beta[0]))
511 | abline(v=beta0HPD,lty=2,lwd=2)
512 | hist(beta1$beta,freq=FALSE,col="black",border="white",main="relative clause type",
513 |      xlim=c(-.12,.12),xlab=expression(beta[1]))
514 | abline(v=beta1HPD,lty=2,lwd=2)
515 | hist(beta2$beta,freq=FALSE,col="black",border="white",main="distance",
516 |      xlim=c(-.12,.12),xlab=expression(beta[2]))
517 | abline(v=beta2HPD,lty=2,lwd=2)
518 | hist(beta3$beta,freq=FALSE,col="black",border="white",main="interaction",
519 |      xlim=c(-.12,.12),xlab=expression(beta[3]))
520 | abline(v=beta3HPD,lty=2,lwd=2)
521 | ```
522 | 
523 | 


--------------------------------------------------------------------------------
/vignettes/factorialDesign/factorialDesign.Rnw:
--------------------------------------------------------------------------------
  1 | \documentclass[doc]{apa6} % man for manuscript format, jou for journal format, doc for standard LaTeX document format
  2 | \usepackage[natbibapa]{apacite} 
  3 | \usepackage[american]{babel}
  4 | \usepackage[utf8]{inputenc}
  5 | \usepackage{csquotes}
  6 | 
  7 | \usepackage{setspace}
  8 | 
  9 | 
 10 | \usepackage{amsmath,amssymb,amsfonts}
 11 | 
 12 | \usepackage{url}   % this allows us to cite URLs in the text
 13 | \usepackage{graphicx}   % allows for graphic to float when doing jou or doc style
 14 | \usepackage{verbatim}   % allows us to use \begin{comment} environment
 15 | \usepackage{caption}
 16 | %\usepackage{lscape}
 17 | \usepackage{pdflscape}
 18 | 
 19 | \usepackage{fancyvrb}
 20 | 
 21 | \usepackage{newfloat}
 22 | \DeclareFloatingEnvironment[
 23 | %    fileext=los,
 24 | %    listname=List of Schemes,
 25 | %    name=Listing,
 26 | %    placement=!htbp,
 27 | %    within=section,
 28 | ]{listing}
 29 | 
 30 | \title{Generalizing the linear mixed model to factorial designs}
 31 | 
 32 | \twoauthors{Tanner Sorensen}{Shravan Vasishth}
 33 | \twoaffiliations{University of Potsdam, Potsdam, Germany}{University of Potsdam, Potsdam, Germany, and \\
 34 | School of Mathematics and Statistics, University of Sheffield, Sheffield, UK}
 35 | 
 36 | 
 37 | 
 38 | %\rightheader{knitr and apa6} % for jou format
 39 | \leftheader{Sorensen, Vasishth}
 40 | 
 41 | 
 42 | \note{\today}
 43 | 
 44 | \keywords{Bayesian data analysis, linear mixed models, Stan}
 45 | 
 46 | \doublespacing
 47 | 
 48 | \ccoppy{Draft of \today} 
 49 | \begin{document}
 50 | 
 51 | \maketitle
 52 | 
 53 | <<setup,include=FALSE,cache=FALSE>>=
 54 | library(knitr)
 55 | library(coda)
 56 | 
 57 | # set global chunk options, put figures into folder
 58 | options(replace.assign=TRUE,show.signif.stars=FALSE)
 59 | opts_chunk$set(fig.path='figures/figure-', fig.align='center', fig.show='hold')
 60 | options(replace.assign=TRUE,width=75)
 61 | opts_chunk$set(dev='pdf')
 62 | library(rstan)
 63 | set.seed(9991)
 64 | 
 65 | # save workspace image, if you want
 66 | #the.date <- format(Sys.time(), "%b%d%Y")
 67 | #save.image(file=paste0("homework01-",the.date,".RData")
 68 | @
 69 | 
 70 | 
 71 | 
 72 | The~\citet{gibsonwu} data-set has a two-condition design. This section presents a varying intercepts, varying slopes model for a $2\times 2$ factorial design. Because of the more general matrix formulation we use here, the Stan code can be deployed with minimal changes for much more complex designs, including correlational studies. 
 73 | 
 74 | Our example is the $2\times 2$ repeated measures factorial design of~\citet[Experiment 1]{HusainEtAl2014}, also a self-paced reading study on relative clauses. The dependent variable was the reading time $\hbox{\texttt{rt}}$ of the relative clause verb. The factors were relative clause type, which we code with the predictor $\hbox{\texttt{so}}$ ($\hbox{\texttt{so}}=+1$ for object relatives and $\hbox{\texttt{so}}=-1$ for subject relatives) and distance between the head noun and the relative clause verb, which we code with the predictor $\hbox{\texttt{dist}}$ ($\hbox{\texttt{dist}}=+1$ for far and $\hbox{\texttt{dist}}=-1$ for near). Their interaction is the product of the \texttt{dist} and \texttt{so} contrast vectors, and labeled as the predictor $\hbox{\texttt{int}}$. The $60$ subjects were speakers of Hindi, an Indo-Aryan language spoken primarily in India. The $24$ items were presented in a standard, fully balanced Latin square design. This resulted in a total of $1440$ data points ($60\times 24=1440$). The first few lines from the data frame are shown below.
 75 | 
 76 | \begin{table}[htbp]
 77 | \centering
 78 | \begin{tabular}{rrrrrr}
 79 |   \hline
 80 | row & subj & item & so & dist & rt \\ 
 81 |   \hline
 82 | 1 &  1 &  14 &  s & n & 1561 \\ 
 83 | 2 &  1 &  16 &  o & n & 959 \\ 
 84 | 3 &  1 &  15 &  o & f & 582 \\ 
 85 | 4 &  1 &  18 &  s & n & 294 \\ 
 86 | 5 &  1 &   4 &  o & n & 438 \\ 
 87 | 6 &  1 &  17 &  s & f & 286 \\ 
 88 | \vdots  & \vdots & \vdots & \vdots & \vdots & \vdots \\  
 89 | 1440 &  9 & 13 &  s & f & 516 \\
 90 |    \hline
 91 | \end{tabular}
 92 | \label{tab:dataframe2}
 93 | \caption{The first six rows, and the last row, of the data-set of Husain et al.\ (2014, Experiment 1), as they appear in the data frame.}
 94 | \end{table}
 95 | 
 96 | The theoretical interest is in determining whether relative clause type and distance influence reading time, and whether there is an interaction between these two factors. We use Stan to determine the posterior probability distribution of the fixed effect $\beta _1$ for relative clause type, the fixed effect $\beta _2$ for distance, and their interaction $\beta _3$.
 97 | 
 98 | We fit a varying intercepts, varying slopes model to this data-set. 
 99 | The grand mean $\beta _0$ of $\log \hbox{\texttt{rt}}$ is adjusted by subject and by item through the varying intercepts $u_0$ and $w_0$, which are unique values for each subject and item respectively. Likewise, the three fixed effects $\beta _1$, $\beta _2$, and $\beta _3$ which are associated with the predictors $\hbox{\texttt{so}}$, $\hbox{\texttt{dist}}$, and $\hbox{\texttt{int}}$, respectively, are adjusted by the by-subject varying slopes $u_1$, $u_2$, and $u_3$ and by-item varying slopes $w_1$, $w_2$, and $w_3$. 
100 | 
101 | It is more convenient to represent this model in matrix form. We build up the model specification by first noting that, for each subject,  
102 | the by-subject varying intercept $u_0$ and slopes $u_1$, $u_2$, and $u_3$ have a multivariate normal prior distribution with mean zero and covariance matrix $\Sigma _u$. Similarly, for each item, the by-item varying intercept $w_0$ and slopes $w_1$, $w_2$, and $w_3$ have a multivariate normal prior distribution with mean zero and covariance matrix $\Sigma _w$. We can write this as follows:
103 | 
104 | \begin{equation}
105 | \begin{pmatrix}
106 |   u_0 \\ 
107 |   u_1 \\
108 |   u_2 \\
109 |   u_3
110 | \end{pmatrix}
111 | \sim 
112 | \mathrm{N} \left(
113 | \begin{pmatrix}
114 |   0 \\
115 |   0 \\
116 |   0 \\
117 |   0
118 | \end{pmatrix},
119 | \Sigma_{u}
120 | \right)  
121 | \quad 
122 | \begin{pmatrix}
123 |   w_0 \\ 
124 |   w_1 \\
125 |   w_2 \\
126 |   w_3
127 | \end{pmatrix}
128 | \sim 
129 | \mathrm{N} \left(
130 | \begin{pmatrix}
131 |   0 \\
132 |   0 \\
133 |   0 \\
134 |   0
135 | \end{pmatrix},
136 | \Sigma_{w}
137 | \right)
138 | \end{equation}
139 | 
140 | The error $\varepsilon $ is assumed to have a normal distribution with mean zero and standard deviation $\sigma _e$. 
141 | 
142 | \begin{listing}
143 | \begin{Verbatim}[numbers=left,frame=single,fontfamily=courier,fontsize=\footnotesize]
144 | rDat<-read.table("HusainEtAlexpt1data.txt",header=TRUE)
145 | rDat$subj <- with(rDat,factor(subj))
146 | rDat$item <- with(rDat,factor(item))
147 | 
148 | X <- unname(model.matrix(~1+so+dist+int, rDat))
149 | 
150 | stanDat <- within(list(),
151 | {
152 |   N<-nrow(X)
153 |   P <- n_u <- n_w <- ncol(X)
154 |   X <- X
155 |   Z_u <- X 
156 |   Z_w <- X
157 |   J <- length(levels(rDat$subj))
158 |   K <- length(levels(rDat$item))
159 |   rt <- rDat$rt
160 |   subj <- as.integer(rDat$subj)
161 |   item <- as.integer(rDat$item)
162 | }
163 | )
164 | factorialFit <- stan(file="factorialModel.stan",
165 |                      data=stanDat,
166 |                      iter=2000, chains=4)                     
167 | \end{Verbatim}
168 | \caption{Preparation of data for analyzing the Husain et al.\ data-set, and running the model.}\label{fig:preparehusaindata}
169 | \end{listing}
170 | 
171 | \begin{listing}
172 | \begin{Verbatim}[numbers=left,frame=single,fontfamily=courier,fontsize=\footnotesize]
173 | data {
174 |   int<lower=0> N;               //no trials
175 |   int<lower=1> P;               //no fixefs
176 |   int<lower=0> J;               //no subjects
177 |   int<lower=1> n_u;             //no subj ranefs
178 |   int<lower=0> K;               //no items
179 |   int<lower=1> n_w;             //no item ranefs
180 |   int<lower=1,upper=J> subj[N]; //subject indicator
181 |   int<lower=1,upper=K> item[N]; //item indicator
182 |   row_vector[P] X[N];           //fixef design matrix
183 |   row_vector[n_u] Z_u[N];       //subj ranef design matrix
184 |   row_vector[n_w] Z_w[N];       //item ranef design matrix
185 |   vector[N] rt;                 //reading time
186 | }
187 | parameters {
188 |   vector[P] beta;               //fixef coefs
189 |   cholesky_factor_corr[n_u] L_u;  //cholesky factor of subj ranef corr matrix
190 |   cholesky_factor_corr[n_w] L_w;  //cholesky factor of item ranef corr matrix
191 |   vector<lower=0>[n_u] sigma_u; //subj ranef std
192 |   vector<lower=0>[n_w] sigma_w; //item ranef std
193 |   real<lower=0> sigma_e;        //residual std
194 |   vector[n_u] z_u[J];           //subj ranef
195 |   vector[n_w] z_w[K];           //item ranef
196 | }
197 | transformed parameters {
198 |   vector[n_u] u[J];             //subj ranefs
199 |   vector[n_w] w[K];             //item ranefs
200 |   {
201 |     matrix[n_u,n_u] Sigma_u;    //subj ranef cov matrix
202 |     matrix[n_w,n_w] Sigma_w;    //item ranef cov matrix
203 |     Sigma_u <- diag_pre_multiply(sigma_u,L_u);
204 |     Sigma_w <- diag_pre_multiply(sigma_w,L_w);
205 |     for(j in 1:J)
206 |       u[j] <- Sigma_u * z_u[j];
207 |     for(k in 1:K)
208 |       w[k] <- Sigma_w * z_w[k];
209 |   }
210 | }
211 | model {
212 |   //priors
213 |   L_u ~ lkj_corr_cholesky(2.0);
214 |   L_w ~ lkj_corr_cholesky(2.0);
215 |   for (j in 1:J)
216 |     z_u[j] ~ normal(0,1);
217 |   for (k in 1:K)
218 |     z_w[k] ~ normal(0,1);
219 |   //likelihood
220 |   for (i in 1:N)
221 |     rt[i] ~ lognormal(X[i] * beta + 
222 |                       Z_u[i] * u[subj[i]] + 
223 |                       Z_w[i] * w[item[i]], 
224 |                       sigma_e);
225 | }
226 | \end{Verbatim}
227 | \caption{Stan code for Husain et al data.}\label{fig:Stancodehusaindata}
228 | \end{listing}
229 | 
230 | We proceed to implement the model in Stan. First we read in the data-set (see Listing~\ref{fig:preparehusaindata}).
231 | Instead of passing the predictors $\hbox{\texttt{so}}$, $\hbox{\texttt{dist}}$, and their interaction $\hbox{\texttt{int}}$ to \texttt{stan} as vectors, as we did with $\hbox{\texttt{so}}$ earlier, we make $\hbox{\texttt{so}}$, $\hbox{\texttt{dist}}$, and $\hbox{\texttt{int}}$ into a design matrix \texttt{X} using the function \texttt{model.matrix} available in R.\footnote{Here, we would like to acknowledge the contribution of Douglas Bates in specifying the model in this general matrix form.}
232 | The first column of the design matrix \texttt{X} consists of all ones. The second column is the predictor $\hbox{\texttt{so}}$ which codes the factor for relative clause type. The third column the predictor $\hbox{\texttt{dist}}$ which codes the factor for distance. The fourth column is the predictor $\hbox{\texttt{int}}$ which codes the interaction between relative clause type and distance.  The model matrix thus consists of a fully factorial $2 \times 2$ design, with blocks of this design repeated for each subject. 
233 | For the full data-set, we could write it very compactly in matrix form as follows:
234 | 
235 | \begin{equation} \label{eq:factorialmodel}
236 | \mathbf{\log(rt)} = \mathbf{X}\beta + \mathbf{Z}_{u} \mathbf{u} + \mathbf{Z}_{w} \mathbf{w} + \mathbf{\varepsilon} 
237 | \end{equation}
238 | 
239 | Here,  $\mathbf{X}$ is the $N\times P$ model matrix (with $N=1440$, since we have $1440$ data points; and $P=4$ since we have the intercept plus three other fixed effects), $\mathbf{\beta}$ is a $P\times 1$ vector of fixed effects parameters, $\mathbf{Z}_{u}$ and $\mathbf{Z}_{w}$ are the subject and item model matrices ($N\times P$), and $u$ and $w$ are the by-subject and by-item adjustments to the fixed effects estimates; these are identical to the design matrix $\mathbf{X}$ in the model with varying intercepts and varying slopes included.  For more examples of similar model specifications in Stan, see the R package \texttt{RePsychLing} on github (https://github.com/dmbates/RePsychLing).
240 | 
241 | Having defined the model, we proceed to assemble the list \texttt{stanDat} of data, relying on the above matrix formulation; please refer to Listing~\ref{fig:preparehusaindata}. The number \texttt{N} of observations, the number \texttt{J} of subjects and \texttt{K} of items, the reading times \texttt{rt}, and the subject and item indicator variables \texttt{subj} and \texttt{item} are familiar from the previous models presented. The integer \texttt{P} is the number of fixed effects (four including the intercept). Model~\ref{eq:factorialmodel} includes a varying intercept $u_{0}$ and varying slopes $u_{1}$, $u_{2}$, $u_{3}$ for each subject, and so the number \texttt{n\_u} of by-subject random effects equals \texttt{P}. Likewise, Model~\ref{eq:factorialmodel} includes a varying intercept $w_{0}$ and varying slopes $w_{1}$, $w_{2}$, $w_{3}$ for each item, and so the number \texttt{n\_w} of by-item random effects also equals \texttt{P}. 
242 | The data block contains the corresponding variables. We declare the fixed effects design matrix \texttt{X} as an array of \texttt{N} row vectors whose components are the predictors associated with the \texttt{N} reading times. Likewise for the subject and item random effects design matrices \texttt{Z\_u} and \texttt{Z\_w}, which correspond to $\mathbf{Z}_{u}$ and $\mathbf{Z}_{w}$ respectively in Model~\ref{eq:factorialmodel}. 
243 | The vector \texttt{beta} contains the fixed effects $\beta _0$, $\beta _1$, $\beta _2$, and $\beta _3$. The matrices \texttt{L\_u}, \texttt{L\_w} and the arrays \texttt{z\_u}, \texttt{z\_w} of vectors (not to be confused with the design matrices \texttt{Z\_u} and \texttt{Z\_w}) will generate the varying intercepts and slopes $u_0$, \dots , $u_3$ and $w_0$, \dots , $w_3$. The vector \texttt{sigma\_u} contains the standard deviations of the by-subject varying intercepts and slopes $u_0$, \dots , $u_3$, and the vector \texttt{sigma\_w} contains the standard deviations of the by-item varying intercepts and slopes $w_0$, \ldots , $w_3$. The variable \texttt{sigma\_e} is the standard deviation $\sigma _e$ of the error $\varepsilon$.
244 | The transformed parameters block generates the by-subject intercepts and slopes $u_0$, \dots , $u_3$ and the by-item intercepts and slopes $w_0$, \dots, $w_3$.
245 | 
246 | We place lkj priors on the random effects correlation matrices through the \texttt{lkj\_corr\_cholesky(2.0)} priors on their Cholesky factors \texttt{L\_u} and \texttt{L\_w}. We implicitly place uniform priors on the fixed effects $\beta _0$, \dots , $\beta _3$, the random effects standard deviations $\sigma _{u0}$, \dots , $\sigma _{u3}$, and $\sigma _{w0}$, \dots, $\sigma _{w3}$ and the error standard deviation $\sigma _e$ by omitting any prior specifications for them in the model block. We specify the likelihood with the probability statement that \texttt{rt[i]} is distributed log-normally with mean \texttt{X[i] * beta + Z\_u[i] * u[subj[i]] + Z\_w[i] * w[item[i]]} and standard deviation \texttt{sigma\_e}.
247 | The next step towards model-fitting is to pass the list \texttt{stanDat} to \texttt{stan}, which compiles a C++ program to sample from the posterior distribution of the model parameters.
248 | 
249 | Figure~\ref{fig:factorialfixefposterior} plots histograms of the marginal posterior distribution of the fixed effects. The HPD interval of the fixed effect $\hat\beta _1$ for relative clause type is entirely below zero. This is evidence that object relatives are read faster than subject relatives. The HPD interval of the fixed effect $\hat\beta _2$ for distance is also entirely below zero. This is evidence of a slowdown when the verb (where reading time was measured) is closer to the head noun of the relative clause. The HPD of the interaction $\hat\beta _3$ between relative clause type and distance is greater than zero, which is evidence for a greater slowdown on subject relatives when the distance between the verb and head noun is short.
250 | 
251 | \begin{figure}
252 | \centering
253 | <<fighusainresults,include=TRUE,echo=FALSE,cache=TRUE,fig.width=7,fig.height=5,out.width='0.75\\textwidth'>>=
254 | # Load the fixed effects model.
255 | load("../../data/factorialFit.Rda")
256 | # Extract the fixef coefs.
257 | beta0 <- extract(factorialFit,pars=c("beta[1]"))
258 | beta1 <- extract(factorialFit,pars=c("beta[2]"))
259 | beta2 <- extract(factorialFit,pars=c("beta[3]"))
260 | beta3 <- extract(factorialFit,pars=c("beta[4]"))
261 | # Get HPD interval for the fixef coefs.
262 | beta0HPD<-HPDinterval(as.mcmc(unlist(beta0)),prob=0.95)
263 | beta1HPD<-HPDinterval(as.mcmc(unlist(beta1)),prob=0.95)
264 | beta2HPD<-HPDinterval(as.mcmc(unlist(beta2)),prob=0.95)
265 | beta3HPD<-HPDinterval(as.mcmc(unlist(beta3)),prob=0.95)
266 | # Plot histograms with HPDs as dotted lines
267 | par(mfrow=c(2,2))
268 | hist(beta0$beta,freq=FALSE,col="black",border="white",main="grand mean",xlab=expression(beta[0]))
269 | abline(v=beta0HPD,lty=2,lwd=2)
270 | hist(beta1$beta,freq=FALSE,col="black",border="white",main="relative clause type",
271 |      xlim=c(-.12,.12),xlab=expression(beta[1]))
272 | abline(v=beta1HPD,lty=2,lwd=2)
273 | hist(beta2$beta,freq=FALSE,col="black",border="white",main="distance",
274 |      xlim=c(-.12,.12),xlab=expression(beta[2]))
275 | abline(v=beta2HPD,lty=2,lwd=2)
276 | hist(beta3$beta,freq=FALSE,col="black",border="white",main="interaction",
277 |      xlim=c(-.12,.12),xlab=expression(beta[3]))
278 | abline(v=beta3HPD,lty=2,lwd=2)
279 | @
280 | \caption{Marginal posterior distribution and HPD intervals of the fixed effects grand mean $\beta _0$, slope $\beta _1$ for relative clause type, slope $\beta _2$ for distance, and interaction $\beta _3$. All fixed effects are on the log-scale.}\label{fig:factorialfixefposterior}
281 | \end{figure}
282 | 
283 | A major advantage of the above matrix formulation is that we do not need to write a new Stan model for a future repeated measures factorial design. All we have to do now is define the design matrix $X$ appropriately, and include it (along with appropriately defined $Z_u$ and $Z_w$ for the subjects and items random effects) as part of the data specification that is passed to Stan. 
284 | 
285 | 
286 | \clearpage
287 | 
288 | \bibliographystyle{apacite}
289 | \bibliography{../../doc/SorensenVasishth}
290 | 
291 | \clearpage
292 | 
293 | \end{document}
294 | 
295 | 
296 | 
297 | 
298 | 
299 | \end{document}
300 | 
301 | 


--------------------------------------------------------------------------------
/vignettes/factorialDesign/factorialDesign.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vasishth/BayesLMMTutorial/cccf977025847a83b5ae651e914ad95c8e53cdee/vignettes/factorialDesign/factorialDesign.pdf


--------------------------------------------------------------------------------
/vignettes/factorialModel.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int<lower=0> N;               //no trials
 3 |   int<lower=1> P;               //no fixefs
 4 |   int<lower=0> J;               //no subjects
 5 |   int<lower=1> n_u;             //no subj ranefs
 6 |   int<lower=0> K;               //no items
 7 |   int<lower=1> n_w;             //no item ranefs
 8 |   int<lower=1,upper=J> subj[N]; //subject indicator
 9 |   int<lower=1,upper=K> item[N]; //item indicator
10 |   row_vector[P] X[N];           //fixef design matrix
11 |   row_vector[n_u] Z_u[N];       //subj ranef design matrix
12 |   row_vector[n_w] Z_w[N];       //item ranef design matrix
13 |   vector[N] rt;                 //reading time
14 | }
15 | 
16 | parameters {
17 |   vector[P] beta;               //fixef coefs
18 |   cholesky_factor_corr[n_u] L_u;  //cholesky factor of subj ranef corr matrix
19 |   cholesky_factor_corr[n_w] L_w;  //cholesky factor of item ranef corr matrix
20 |   vector<lower=0>[n_u] sigma_u; //subj ranef std
21 |   vector<lower=0>[n_w] sigma_w; //item ranef std
22 |   real<lower=0> sigma_e;        //residual std
23 |   vector[n_u] z_u[J];           //spherical subj ranef
24 |   vector[n_w] z_w[K];           //spherical item ranef
25 | }
26 | 
27 | transformed parameters {
28 |   vector[n_u] u[J];             //subj ranefs
29 |   vector[n_w] w[K];             //item ranefs
30 |   {
31 |     matrix[n_u,n_u] Sigma_u;    //subj ranef cov matrix
32 |     matrix[n_w,n_w] Sigma_w;    //item ranef cov matrix
33 |     Sigma_u = diag_pre_multiply(sigma_u,L_u);
34 |     Sigma_w = diag_pre_multiply(sigma_w,L_w);
35 |     for(j in 1:J)
36 |       u[j] = Sigma_u * z_u[j];
37 |     for(k in 1:K)
38 |       w[k] = Sigma_w * z_w[k];
39 |   }
40 | }
41 | 
42 | model {
43 |   //priors
44 |   L_u ~ lkj_corr_cholesky(2.0);
45 |   L_w ~ lkj_corr_cholesky(2.0);
46 |   for (j in 1:J)
47 |     z_u[j] ~ normal(0,1);
48 |   for (k in 1:K)
49 |     z_w[k] ~ normal(0,1);
50 |   //likelihood
51 |   for (i in 1:N)
52 |     rt[i] ~ lognormal(X[i] * beta + Z_u[i] * u[subj[i]] + Z_w[i] * w[item[i]], sigma_e);
53 | }
54 | 


--------------------------------------------------------------------------------
/vignettes/fixEf.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int<lower=1> N;                //number of data points
 3 |   real rt[N];                    //reading time
 4 |   real<lower=-1,upper=1> so[N];  //predictor
 5 | }
 6 | 
 7 | parameters {
 8 |   vector[2] beta;            //intercept and slope
 9 |   real<lower=0> sigma_e;     //error sd
10 | }
11 | 
12 | model {
13 |   real mu;
14 |   for (i in 1:N){                   // likelihood
15 |     mu = beta[1] + beta[2]*so[i];
16 |     rt[i] ~ lognormal(mu,sigma_e);
17 |   }
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/vignettes/matrixModel.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int<lower=0> N;               //no trials
 3 |   int<lower=1> P;               //no fixefs
 4 |   int<lower=0> J;               //no subjects
 5 |   int<lower=1> n_u;             //no subj ranefs
 6 |   int<lower=0> K;               //no items
 7 |   int<lower=1> n_w;             //no item ranefs
 8 |   int<lower=1,upper=J> subj[N]; //subject indicator
 9 |   int<lower=1,upper=K> item[N]; //item indicator
10 |   row_vector[P] X[N];           //fixef design matrix
11 |   row_vector[n_u] Z_u[N];       //subj ranef design matrix
12 |   row_vector[n_w] Z_w[N];       //item ranef design matrix
13 |   vector[N] rt;                 //reading time
14 | }
15 | 
16 | parameters {
17 |   vector[P] beta;               //fixef coefs
18 |   cholesky_factor_corr[n_u] L_u;  //cholesky factor of subj ranef corr matrix
19 |   cholesky_factor_corr[n_w] L_w;  //cholesky factor of item ranef corr matrix
20 |   vector<lower=0>[n_u] sigma_u; //subj ranef std
21 |   vector<lower=0>[n_w] sigma_w; //item ranef std
22 |   real<lower=0> sigma_e;        //residual std
23 |   vector[n_u] z_u[J];           //spherical subj ranef
24 |   vector[n_w] z_w[K];           //spherical item ranef
25 | }
26 | 
27 | transformed parameters {
28 |   vector[n_u] u[J];             //subj ranefs
29 |   vector[n_w] w[K];             //item ranefs
30 |   {
31 |     matrix[n_u,n_u] Sigma_u;    //subj ranef cov matrix
32 |     matrix[n_w,n_w] Sigma_w;    //item ranef cov matrix
33 |     Sigma_u = diag_pre_multiply(sigma_u,L_u);
34 |     Sigma_w = diag_pre_multiply(sigma_w,L_w);
35 |     for(j in 1:J)
36 |       u[j] = Sigma_u * z_u[j];
37 |     for(k in 1:K)
38 |       w[k] = Sigma_w * z_w[k];
39 |   }
40 | }
41 | 
42 | model {
43 |   //priors
44 |   L_u ~ lkj_corr_cholesky(2.0);
45 |   L_w ~ lkj_corr_cholesky(2.0);
46 |   for (j in 1:J)
47 |     z_u[j] ~ normal(0,1);
48 |   for (k in 1:K)
49 |     z_w[k] ~ normal(0,1);
50 |   //likelihood
51 |   for (i in 1:N)
52 |     rt[i] ~ lognormal(X[i] * beta + Z_u[i] * u[subj[i]] + Z_w[i] * w[item[i]], sigma_e);
53 | }
54 | 


--------------------------------------------------------------------------------
/vignettes/pp.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 | 	int<lower=1> N;
 3 | 	real rt[N];                     //outcome
 4 | 	real<lower=-1,upper=1> so[N];  //predictor
 5 | 	int<lower=1> J;                  //number of subjects
 6 | 	int<lower=1> K;                  //number of items
 7 | 	int<lower=1, upper=J> subj[N];   //subject id
 8 | 	int<lower=1, upper=K> item[N];   //item id
 9 | }
10 | 
11 | parameters {
12 | 	vector[2] beta;			// intercept and slopes
13 | 	real<lower=0> sigma_e;		// residual sd
14 | 	vector<lower=0>[2] sigma_u;	// subj sd
15 | 	vector<lower=0>[2] sigma_w;	// item sd
16 | 	cholesky_factor_corr[2] L_u;
17 | 	cholesky_factor_corr[2] L_w;
18 | 	matrix[2,J] z_u;
19 | 	matrix[2,K] z_w;
20 | }
21 | 
22 | transformed parameters{
23 |      	matrix[J,2] u;
24 | 	matrix[K,2] w;
25 |      
26 |      u = (diag_pre_multiply(sigma_u,L_u) * z_u)';	// subj random effects
27 | 	w = (diag_pre_multiply(sigma_w,L_w) * z_w)';	// item random effects
28 | }
29 | 
30 | model {
31 | 	real mu;
32 | 	
33 | 	# priors:
34 | 	L_u ~ lkj_corr_cholesky(2.0);
35 | 	L_w ~ lkj_corr_cholesky(2.0);
36 | 	to_vector(z_u) ~ normal(0,1);
37 | 	to_vector(z_w) ~ normal(0,1);
38 | 	
39 | 	for (i in 1:N){
40 | 		mu = beta[1] + u[subj[i],1] + w[item[i],1] 
41 | 			+ (beta[2] + u[subj[i],2] + w[item[i],2])*so[i];
42 |            rt[i] ~ lognormal(mu,sigma_e);        // likelihood
43 |       }
44 | }
45 | 
46 | generated quantities{
47 |   real rt_tilde[N];
48 |   real mu;
49 |   for (i in 1:N){
50 |     mu = beta[1] + u[subj[i],1] + w[item[i],1] 
51 |         + (beta[2] + u[subj[i],2] + w[item[i],2])*so[i];
52 |     rt_tilde[i] = lognormal_rng(mu,sigma_e);
53 |   }
54 | }
55 | 


--------------------------------------------------------------------------------
/vignettes/ranInt.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int<lower=1> N;                  //number of data points
 3 |   real rt[N];                      //reading time
 4 |   real<lower=-1,upper=1> so[N];    //predictor
 5 |   int<lower=1> J;                  //number of subjects
 6 |   int<lower=1> K;                  //number of items
 7 |   int<lower=1, upper=J> subj[N];   //subject id
 8 |   int<lower=1, upper=K> item[N];   //item id
 9 | }
10 | 
11 | parameters {
12 |   vector[2] beta;            //fixed intercept and slope
13 |   vector[J] u;               //subject intercepts
14 |   vector[K] w;               //item intercepts
15 |   real<lower=0> sigma_e;     //error sd
16 |   real<lower=0> sigma_u;     //subj sd
17 |   real<lower=0> sigma_w;     //item sd
18 | }
19 | 
20 | model {
21 |   real mu;
22 |   //priors
23 |   u ~ normal(0,sigma_u);    //subj random effects
24 |   w ~ normal(0,sigma_w);    //item random effects
25 |   // likelihood
26 |   for (i in 1:N){
27 |     mu = beta[1] + u[subj[i]] + w[item[i]] + beta[2]*so[i];
28 |     rt[i] ~ lognormal(mu,sigma_e);
29 |   }
30 | }
31 | 


--------------------------------------------------------------------------------
/vignettes/ranIntSlp.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int<lower=1> N;                  //number of data points
 3 |   real rt[N];                      //reading time
 4 |   real<lower=-1,upper=1> so[N];    //predictor
 5 |   int<lower=1> J;                  //number of subjects
 6 |   int<lower=1> K;                  //number of items
 7 |   int<lower=1, upper=J> subj[N];   //subject id
 8 |   int<lower=1, upper=K> item[N];   //item id
 9 | }
10 | 
11 | parameters {
12 |   vector[2] beta;                  //intercept and slope
13 |   real<lower=0> sigma_e;           //error sd
14 |   vector<lower=0>[2] sigma_u;      //subj sd
15 |   vector<lower=0>[2] sigma_w;      //item sd
16 |   cholesky_factor_corr[2] L_u;
17 |   cholesky_factor_corr[2] L_w;
18 |   matrix[2,J] z_u;
19 |   matrix[2,K] z_w;
20 | }
21 | 
22 | transformed parameters{
23 |   matrix[2,J] u;
24 |   matrix[2,K] w;
25 |   
26 |   u = diag_pre_multiply(sigma_u,L_u) * z_u;	//subj random effects
27 |   w = diag_pre_multiply(sigma_w,L_w) * z_w;	//item random effects
28 | }
29 | 
30 | model {
31 |   real mu;
32 |   //priors
33 |   L_u ~ lkj_corr_cholesky(2.0);
34 |   L_w ~ lkj_corr_cholesky(2.0);
35 |   to_vector(z_u) ~ normal(0,1);
36 |   to_vector(z_w) ~ normal(0,1);
37 |   //likelihood
38 |   for (i in 1:N){
39 |     mu = beta[1] + u[1,subj[i]] + w[1,item[i]] 
40 |           + (beta[2] + u[2,subj[i]] + w[2,item[i]])*so[i];
41 |     rt[i] ~ lognormal(mu,sigma_e);
42 |   }
43 | }
44 | 


--------------------------------------------------------------------------------
/vignettes/ranIntSlpNoCor.stan:
--------------------------------------------------------------------------------
 1 | data {
 2 |   int<lower=1> N;                  //number of data points
 3 |   real rt[N];                      //reading time
 4 |   real<lower=-1,upper=1> so[N];    //predictor
 5 |   int<lower=1> J;                  //number of subjects
 6 |   int<lower=1> K;                  //number of items
 7 |   int<lower=1, upper=J> subj[N];   //subject id
 8 |   int<lower=1, upper=K> item[N];   //item id
 9 | }
10 | 
11 | parameters {
12 |   vector[2] beta;                  //intercept and slope
13 |   real<lower=0> sigma_e;           //error sd
14 |   matrix[2,J] u;                   //subj intercepts, slopes
15 |   vector<lower=0>[2] sigma_u;      //subj sd
16 |   matrix[2,K] w;                   //item intercepts, slopes
17 |   vector<lower=0>[2] sigma_w;      //item sd
18 | }
19 | 
20 | model {
21 |   real mu;
22 |   //priors
23 |   for (i in 1:2){
24 |     u[i] ~ normal(0,sigma_u[i]);    //subj random effects
25 |     w[i] ~ normal(0,sigma_w[i]);    //item random effects
26 |   }
27 |   //likelihood
28 |   for (i in 1:N){
29 |     mu = beta[1] + u[1,subj[i]] + w[1,item[i]] 
30 |           + (beta[2] + u[2,subj[i]] + w[2,item[i]])*so[i];
31 |     rt[i] ~ lognormal(mu,sigma_e);
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------