├── .Rbuildignore
├── .gitignore
├── .remarkrc
├── DESCRIPTION
├── LICENSE
├── Makefile
├── NAMESPACE
├── README.md
├── Untitled.Rmd
├── WORDLIST
├── _bookdown.yml
├── _common.R
├── _lint.R
├── _notes
    ├── _main.Rmd
    ├── biblio.md
    ├── pauperism.Rmd
    ├── questions.Rmd
    ├── realstats.Rmd
    └── wine.Rmd
├── _output.yml
├── _render.R
├── _serve.R
├── _spelling.R
├── appendix.Rmd
├── bibliography.Rmd
├── bootstrapping.Rmd
├── causal-regression.Rmd
├── cross-validation.Rmd
├── data
    └── western1995
    │   ├── econ_growth.tsv
    │   ├── income_ineq.tsv
    │   └── unionization.tsv
├── diagrams
    ├── _book
    │   ├── _main_files
    │   │   └── figure-html
    │   │   │   ├── unnamed-chunk-11-1.png
    │   │   │   ├── unnamed-chunk-12-1.png
    │   │   │   └── unnamed-chunk-3-1.png
    │   ├── cross-validation.html
    │   ├── libs
    │   │   ├── gitbook-2.6.7
    │   │   │   ├── css
    │   │   │   │   ├── fontawesome
    │   │   │   │   │   └── fontawesome-webfont.ttf
    │   │   │   │   ├── plugin-bookdown.css
    │   │   │   │   ├── plugin-fontsettings.css
    │   │   │   │   ├── plugin-highlight.css
    │   │   │   │   ├── plugin-search.css
    │   │   │   │   └── style.css
    │   │   │   └── js
    │   │   │   │   ├── app.min.js
    │   │   │   │   ├── jquery.highlight.js
    │   │   │   │   ├── lunr.js
    │   │   │   │   ├── plugin-bookdown.js
    │   │   │   │   ├── plugin-fontsettings.js
    │   │   │   │   ├── plugin-search.js
    │   │   │   │   └── plugin-sharing.js
    │   │   └── jquery-2.2.3
    │   │   │   └── jquery.min.js
    │   └── search_index.json
    ├── iv-dag.gv
    ├── science.mmd
    └── science2.mmd
├── docs
    ├── .nojekyll
    ├── appendix.md
    ├── bootstrapping.html
    ├── bootstrapping.md
    ├── causal-regression.md
    ├── colinearity-and-multicolinearity.html
    ├── collinearity-and-multicollinearity.html
    ├── cross-validation.html
    ├── cross-validation.md
    ├── cross-validation_files
    │   └── figure-html
    │   │   ├── unnamed-chunk-12-1.svg
    │   │   ├── unnamed-chunk-13-1.svg
    │   │   └── unnamed-chunk-4-1.svg
    ├── eda.md
    ├── formatting-tables.html
    ├── img
    │   └── islr-fig-6.7.png
    ├── index.html
    ├── index.md
    ├── libs
    │   ├── gitbook-2.6.7
    │   │   ├── css
    │   │   │   ├── fontawesome
    │   │   │   │   └── fontawesome-webfont.ttf
    │   │   │   ├── plugin-bookdown.css
    │   │   │   ├── plugin-fontsettings.css
    │   │   │   ├── plugin-highlight.css
    │   │   │   ├── plugin-search.css
    │   │   │   └── style.css
    │   │   └── js
    │   │   │   ├── app.min.js
    │   │   │   ├── jquery.highlight.js
    │   │   │   ├── lunr.js
    │   │   │   ├── plugin-bookdown.js
    │   │   │   ├── plugin-fontsettings.js
    │   │   │   ├── plugin-search.js
    │   │   │   └── plugin-sharing.js
    │   └── jquery-2.2.3
    │   │   └── jquery.min.js
    ├── linear-regression.md
    ├── matrix-algebra-review.html
    ├── matrix.md
    ├── multicolinearity.md
    ├── multicollinearity.html
    ├── multicollinearity.md
    ├── ols-assumptions.html
    ├── ols-in-matrix-form.html
    ├── other-did-approaches.html
    ├── panel-data-fixed-effects-and-difference-in-difference.html
    ├── panel.md
    ├── prediction-policy-problems.html
    ├── prediction.html
    ├── prediction.md
    ├── prediction_files
    │   └── figure-html
    │   │   ├── unnamed-chunk-10-1.svg
    │   │   ├── unnamed-chunk-11-1.svg
    │   │   ├── unnamed-chunk-13-1.svg
    │   │   ├── unnamed-chunk-16-1.svg
    │   │   ├── unnamed-chunk-18-1.svg
    │   │   ├── unnamed-chunk-20-1.svg
    │   │   ├── unnamed-chunk-21-1.svg
    │   │   └── unnamed-chunk-9-1.svg
    ├── presentation.md
    ├── programming.md
    ├── purpose.html
    ├── rd.md
    ├── rd_files
    │   └── figure-html
    │   │   └── unnamed-chunk-2-1.svg
    ├── references-1.html
    ├── references-3.html
    ├── references-4.html
    ├── references-5.html
    ├── references.html
    ├── references.md
    ├── reganat.md
    ├── reganat_files
    │   └── figure-html
    │   │   ├── unnamed-chunk-3-1.svg
    │   │   └── unnamed-chunk-7-1.svg
    ├── regression-anatomy.html
    ├── regression-discontinuity.html
    ├── regression.html
    ├── regularization.html
    ├── regularization.md
    ├── regularization_files
    │   └── figure-html
    │   │   └── unnamed-chunk-5-1.svg
    ├── reproducible-research.html
    ├── reproducible-research.md
    ├── reproducible_research.md
    ├── search_index.json
    ├── section.html
    ├── tables-and-plots.md
    ├── tables_and_plots.md
    ├── typesetting-and-word-processing-programs.html
    ├── word-processing.md
    ├── writing-resources.html
    └── writing.md
├── eda.Rmd
├── img
    ├── 1000px-Coefficient_of_Determination.svg.png
    ├── islr-fig-6.7.png
    ├── laffer.png
    └── tobias-funke-blue.jpeg
├── includes
    ├── after_body.html
    ├── before_body.html
    ├── in_header.html
    └── preamble.tex
├── index.Rmd
├── intro-methods-notes.Rproj
├── intromethods.bib
├── iv.Rmd
├── linear-regression.Rmd
├── matrix.Rmd
├── model-fit.Rmd
├── multicollinearity.Rmd
├── old-files
    ├── _main.Rmd
    ├── multicollinearity.Rmd
    ├── non-standard-errors.Rmd
    ├── ols-diagnostics-troubleshooting.Rmd
    ├── ols-estimator.Rmd
    ├── ols-inference.Rmd
    ├── ols-misc.Rmd
    ├── ovb-measurment-error.Rmd
    └── resampling-methods.Rmd
├── outliers.Rmd
├── ovb.Rmd
├── package-lock.json
├── panel.Rmd
├── potential-outcomes.Rmd
├── prediction.Rmd
├── presentation.Rmd
├── probability.Rmd
├── programming.Rmd
├── questions.Rmd
├── rd.Rmd
├── references.Rmd
├── reganat.Rmd
├── regression-inference.Rmd
├── regularization.Rmd
├── reproducible-research.Rmd
├── simple-regression.Rmd
├── simpsons.Rmd
├── tables-and-plots.Rmd
├── word-processing.Rmd
└── writing.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .*.Rnb.cached
 5 | *.rds
 6 | *.aux
 7 | *.fdb_latexmk
 8 | *.fls
 9 | *.out
10 | *.toc
11 | *.bak
12 | $*$
13 | *.sav
14 | _bookdown_files
15 | *.utf8.md
16 | *.knit.md
17 | 
18 | /*.html
19 | /*_cache
20 | /*_files
21 | node_modules
22 | *.log
23 | ^/bookdown*
24 | /bookdown*
25 | 


--------------------------------------------------------------------------------
/.remarkrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "plugins": [
 3 |     "remark-preset-lint-recommended",
 4 |     "remark-preset-lint-consistent",
 5 |     "remark-preset-lint-markdown-style-guide",
 6 |     "remark-frontmatter",
 7 |     ["remark-lint-file-extension", false],
 8 |     ["remark-lint-maximum-line-length", 300],
 9 |     ["remark-lint-no-shortcut-reference-link", false],
10 |     ["remark-lint-list-item-indent", "tab-size"],
11 |     ["remark-lint-no-undefined-references", false],
12 |     ["remark-lint-emphasis-marker", false],
13 |     ["remark-lint-fenced-code-flag", false],
14 |     ["remark-lint-no-duplicate-headings", false]
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: pols503notes
 2 | Title: POLS 503 Notes
 3 | Version: 0.0.1
 4 | Authors@R: c(
 5 |   person("Jeffrey", "Arnold", , "jeffrey.arnold@gmail.com", c("aut", "cre"))
 6 |   )
 7 | Depends: R (>= 3.1.0)
 8 | URL: https://github.com/jrnold/intro-methods-notes
 9 | Imports:
10 |   bookdown,
11 |   broom,
12 |   carData,
13 |   datums,
14 |   htmltools,
15 |   htmlTable,
16 |   jrnoldmisc,
17 |   MASS,
18 |   texreg,
19 |   tidyverse,
20 |   vcd,
21 |   xtable
22 | Remotes:
23 |   jrnold/jrnoldmisc,
24 |   jrnold/datums
25 | RoxygenNote: 6.0.1
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | CC0 1.0 Universal
  2 | 
  3 | Statement of Purpose
  4 | 
  5 | The laws of most jurisdictions throughout the world automatically confer
  6 | exclusive Copyright and Related Rights (defined below) upon the creator and
  7 | subsequent owner(s) (each and all, an "owner") of an original work of
  8 | authorship and/or a database (each, a "Work").
  9 | 
 10 | Certain owners wish to permanently relinquish those rights to a Work for the
 11 | purpose of contributing to a commons of creative, cultural and scientific
 12 | works ("Commons") that the public can reliably and without fear of later
 13 | claims of infringement build upon, modify, incorporate in other works, reuse
 14 | and redistribute as freely as possible in any form whatsoever and for any
 15 | purposes, including without limitation commercial purposes. These owners may
 16 | contribute to the Commons to promote the ideal of a free culture and the
 17 | further production of creative, cultural and scientific works, or to gain
 18 | reputation or greater distribution for their Work in part through the use and
 19 | efforts of others.
 20 | 
 21 | For these and/or other purposes and motivations, and without any expectation
 22 | of additional consideration or compensation, the person associating CC0 with a
 23 | Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
 24 | and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
 25 | and publicly distribute the Work under its terms, with knowledge of his or her
 26 | Copyright and Related Rights in the Work and the meaning and intended legal
 27 | effect of CC0 on those rights.
 28 | 
 29 | 1. Copyright and Related Rights. A Work made available under CC0 may be
 30 | protected by copyright and related or neighboring rights ("Copyright and
 31 | Related Rights"). Copyright and Related Rights include, but are not limited
 32 | to, the following:
 33 | 
 34 |   i. the right to reproduce, adapt, distribute, perform, display, communicate,
 35 |   and translate a Work;
 36 | 
 37 |   ii. moral rights retained by the original author(s) and/or performer(s);
 38 | 
 39 |   iii. publicity and privacy rights pertaining to a person's image or likeness
 40 |   depicted in a Work;
 41 | 
 42 |   iv. rights protecting against unfair competition in regards to a Work,
 43 |   subject to the limitations in paragraph 4(a), below;
 44 | 
 45 |   v. rights protecting the extraction, dissemination, use and reuse of data in
 46 |   a Work;
 47 | 
 48 |   vi. database rights (such as those arising under Directive 96/9/EC of the
 49 |   European Parliament and of the Council of 11 March 1996 on the legal
 50 |   protection of databases, and under any national implementation thereof,
 51 |   including any amended or successor version of such directive); and
 52 | 
 53 |   vii. other similar, equivalent or corresponding rights throughout the world
 54 |   based on applicable law or treaty, and any national implementations thereof.
 55 | 
 56 | 2. Waiver. To the greatest extent permitted by, but not in contravention of,
 57 | applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
 58 | unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
 59 | and Related Rights and associated claims and causes of action, whether now
 60 | known or unknown (including existing as well as future claims and causes of
 61 | action), in the Work (i) in all territories worldwide, (ii) for the maximum
 62 | duration provided by applicable law or treaty (including future time
 63 | extensions), (iii) in any current or future medium and for any number of
 64 | copies, and (iv) for any purpose whatsoever, including without limitation
 65 | commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
 66 | the Waiver for the benefit of each member of the public at large and to the
 67 | detriment of Affirmer's heirs and successors, fully intending that such Waiver
 68 | shall not be subject to revocation, rescission, cancellation, termination, or
 69 | any other legal or equitable action to disrupt the quiet enjoyment of the Work
 70 | by the public as contemplated by Affirmer's express Statement of Purpose.
 71 | 
 72 | 3. Public License Fallback. Should any part of the Waiver for any reason be
 73 | judged legally invalid or ineffective under applicable law, then the Waiver
 74 | shall be preserved to the maximum extent permitted taking into account
 75 | Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
 76 | is so judged Affirmer hereby grants to each affected person a royalty-free,
 77 | non transferable, non sublicensable, non exclusive, irrevocable and
 78 | unconditional license to exercise Affirmer's Copyright and Related Rights in
 79 | the Work (i) in all territories worldwide, (ii) for the maximum duration
 80 | provided by applicable law or treaty (including future time extensions), (iii)
 81 | in any current or future medium and for any number of copies, and (iv) for any
 82 | purpose whatsoever, including without limitation commercial, advertising or
 83 | promotional purposes (the "License"). The License shall be deemed effective as
 84 | of the date CC0 was applied by Affirmer to the Work. Should any part of the
 85 | License for any reason be judged legally invalid or ineffective under
 86 | applicable law, such partial invalidity or ineffectiveness shall not
 87 | invalidate the remainder of the License, and in such case Affirmer hereby
 88 | affirms that he or she will not (i) exercise any of his or her remaining
 89 | Copyright and Related Rights in the Work or (ii) assert any associated claims
 90 | and causes of action with respect to the Work, in either case contrary to
 91 | Affirmer's express Statement of Purpose.
 92 | 
 93 | 4. Limitations and Disclaimers.
 94 | 
 95 |   a. No trademark or patent rights held by Affirmer are waived, abandoned,
 96 |   surrendered, licensed or otherwise affected by this document.
 97 | 
 98 |   b. Affirmer offers the Work as-is and makes no representations or warranties
 99 |   of any kind concerning the Work, express, implied, statutory or otherwise,
100 |   including without limitation warranties of title, merchantability, fitness
101 |   for a particular purpose, non infringement, or the absence of latent or
102 |   other defects, accuracy, or the present or absence of errors, whether or not
103 |   discoverable, all to the greatest extent permissible under applicable law.
104 | 
105 |   c. Affirmer disclaims responsibility for clearing rights of other persons
106 |   that may apply to the Work or any use thereof, including without limitation
107 |   any person's Copyright and Related Rights in the Work. Further, Affirmer
108 |   disclaims responsibility for obtaining any necessary consents, permissions
109 |   or other rights required for any use of the Work.
110 | 
111 |   d. Affirmer understands and acknowledges that Creative Commons is not a
112 |   party to this document and has no duty or obligation with respect to this
113 |   CC0 or use of the Work.
114 | 
115 | For more information, please see
116 | <http://creativecommons.org/publicdomain/zero/1.0/>
117 | 
118 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	Rscript --quiet _render.R
3 | 
4 | gitbook:
5 | 	Rscript --quiet _render.R "bookdown::gitbook"
6 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 | 
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/intro-methods-notes/4c1342aa322c728ad21bbfaa2eeade554cb79b6c/README.md


--------------------------------------------------------------------------------
/WORDLIST:
--------------------------------------------------------------------------------
  1 | AJPS
  2 | AME
  3 | APSA
  4 | ATT
  5 | Abadie
  6 | AbdulkadirogluAngristPathak
  7 | Altonji
  8 | AltonjiElderTaber
  9 | Angrist
 10 | AngristPischke
 11 | Arrellano
 12 | Ashenfelter
 13 | Ashenfelter's
 14 | AshenfelterAshemoreLalonde
 15 | Ashmore
 16 | AtheyImbens
 17 | AucTeX
 18 | Belloni
 19 | BertrandDufloMullainathan
 20 | Bizup
 21 | Bonferroni
 22 | Broockman
 23 | BroockmanKallaAronow
 24 | CEF
 25 | CLT
 26 | CPE
 27 | CalTech
 28 | CarpenterDobkin
 29 | CaugheySekhon
 30 | Chernozhukov
 31 | DAGs
 32 | DGP
 33 | DOI
 34 | Dafoe
 35 | DavidsonMacKinnon
 36 | DiD
 37 | Duflo
 38 | EPE
 39 | EggersFowlerHainmuellerEtAl
 40 | EggersHainmueller
 41 | EsareyMenger
 42 | Eubank
 43 | Evera
 44 | FWL
 45 | Frisch
 46 | Gardeazabel
 47 | Gelman
 48 | GelmanHill
 49 | GelmanKatz
 50 | Gentzkow
 51 | Glymour
 52 | Grilliches
 53 | GrimmerHershFeinsteinEtAl
 54 | Guber
 55 | HahnToddKlaauw
 56 | Hausmann
 57 | Herndon
 58 | HerndonAshPollin
 59 | Hesterberg
 60 | Hochberg
 61 | Holm
 62 | Homas
 63 | IPE
 64 | ISLR
 65 | ImbensKalyanaraman
 66 | JacobLefgren
 67 | JamesWittenHastieEtAl
 68 | Jeter
 69 | Katz
 70 | Keele
 71 | KingTomzWittenberg
 72 | Kleinberg
 73 | KleinbergLudwigMullainathanEtAl
 74 | Krugman
 75 | LDV
 76 | LDVs
 77 | LSDV
 78 | LaCour
 79 | LaTeXTools
 80 | Lalonde
 81 | LeeLemieux
 82 | LeeMorettiButler
 83 | LindenAdamsRoberts
 84 | LitschigMorrison
 85 | Lovell
 86 | MPs
 87 | MSS
 88 | MacKinnon
 89 | Makefiles
 90 | Mankiw
 91 | Mattenberg
 92 | McCloskey
 93 | McCrary
 94 | Mendeley
 95 | Methodologist
 96 | Mullainathan
 97 | NBER
 98 | NHST
 99 | Nunn
100 | NunnWantchekon
101 | OVB
102 | Oster
103 | PCSE
104 | PSAS
105 | Passell
106 | PeiPischkeSchwandt
107 | Pischke
108 | Pollin
109 | RDD
110 | RStudio
111 | Reinhart
112 | ReporteR
113 | Rmd
114 | Rnw
115 | Rogoff
116 | Ruud
117 | SEs
118 | SSR
119 | ShareLaTeX
120 | Shmueli
121 | StackExchange
122 | Strunk
123 | TSCS
124 | TSS
125 | Taber
126 | TeXStudio
127 | TeXmaker
128 | TeXshop
129 | ThistlethwaiteCampbell
130 | ThistlethwaiteCampbell1960a
131 | VIF
132 | Varian
133 | Vec
134 | WYS
135 | Wantchekon
136 | Weingast
137 | Wikibook
138 | X'X
139 | X'y
140 | Zan
141 | Zapnik
142 | Zheng
143 | Zotero
144 | Zvi
145 | acec
146 | advstats
147 | al
148 | andrewgelman
149 | apalike
150 | apsrtable
151 | arg
152 | arxiv
153 | asymptotics
154 | avesbiodiv
155 | bc
156 | beamer
157 | biblio
158 | binom
159 | bmatrix
160 | bmj
161 | bookdown
162 | cdots
163 | clubSandwich
164 | clusterSE
165 | colinear
166 | collinear
167 | colorlinks
168 | colwiz
169 | confounders
170 | counterfactuals
171 | cov
172 | csic
173 | dX
174 | ddots
175 | df
176 | discretizing
177 | documentclass
178 | docx
179 | doi
180 | downarrow
181 | econometrics
182 | emptyset
183 | estadistica
184 | et
185 | fhat
186 | frac
187 | geq
188 | github
189 | heteroskedastic
190 | heteroskedasticity
191 | homoskedastic
192 | homoskedasticity
193 | htmlTables
194 | http
195 | https
196 | iK
197 | ignorability
198 | igt
199 | ij
200 | ik
201 | infty
202 | intromethods
203 | invertible
204 | itg
205 | jrnold
206 | jrnoldmisc
207 | ki
208 | knitr
209 | lah
210 | leftrightarrow
211 | leq
212 | lim
213 | linearities
214 | literatures
215 | lme
216 | lof
217 | mathbb
218 | mathcal
219 | mathrm
220 | mathsf
221 | mathtt
222 | mattblackwell
223 | mncn
224 | monofont
225 | monofontoptions
226 | multicollinearity
227 | neq
228 | nk
229 | nonsingular
230 | observables
231 | overfit
232 | parametric
233 | perp
234 | plm
235 | policymaker's
236 | politicalsciencereplication
237 | positivity
238 | pre
239 | probabilistically
240 | programmatically
241 | regularization
242 | repo
243 | residualized
244 | roids
245 | rsample
246 | rsquared
247 | se
248 | skedastic
249 | statmethods
250 | tech
251 | teck
252 | texreg
253 | tg
254 | tidyverse
255 | tl
256 | tomhopper
257 | unbiasedness
258 | unconfoundedness
259 | underbrace
260 | underfit
261 | underspecified
262 | unmodeled
263 | untestable
264 | uparrow
265 | varepsilon
266 | vcov
267 | vdots
268 | vec
269 | wc
270 | widehat
271 | wordpress
272 | www
273 | xtable
274 | 


--------------------------------------------------------------------------------
/_bookdown.yml:
--------------------------------------------------------------------------------
 1 | book_filename: "intro-method-notes"
 2 | chapter_name: "Chapter "
 3 | delete_merged_file: true
 4 | new_session: yes
 5 | output_dir: docs
 6 | before_chapter_script: "_common.R"
 7 | edit:
 8 |   link: https://github.com/jrnold/intro-methods-notes/edit/gh-pages/%s
 9 |   text: "Edit"
10 | rmd_subdir: false
11 | rmd_files:
12 |   - index.Rmd
13 | 
14 |   - eda.Rmd
15 | 
16 |   - programming.Rmd
17 | 
18 |   - linear-regression.Rmd
19 |   - reganat.Rmd
20 |   - matrix.Rmd
21 |   - multicollinearity.Rmd
22 |   - bootstrapping.Rmd
23 | 
24 |   - prediction.Rmd
25 |   - cross-validation.Rmd
26 |   - regularization.Rmd
27 | 
28 |   - causal-regression.Rmd
29 |   - panel.Rmd
30 |   - rd.Rmd
31 | 
32 |   - presentation.Rmd
33 |   - tables-and-plots.Rmd
34 |   - reproducible-research.Rmd
35 |   - word-processing.Rmd
36 |   - writing.Rmd
37 | 
38 |   - appendix.Rmd
39 |   - references.Rmd
40 | 


--------------------------------------------------------------------------------
/_common.R:
--------------------------------------------------------------------------------
 1 | suppressPackageStartupMessages(
 2 |   library("tidyverse")
 3 | )
 4 | 
 5 | rpkg_url <- function(pkg) {
 6 |   paste0("https://cran.r-project.org/package=", pkg)
 7 | }
 8 | 
 9 | rpkg <- function(pkg) {
10 |   paste0("**[", pkg, "](", rpkg_url(pkg), ")**")
11 | }
12 | 
13 | rdoc_url <- function(pkg, fun) {
14 |   paste0("https://www.rdocumentation.org/packages/", pkg, "/topics/", fun) # nolint
15 | }
16 | 
17 | rdoc <- function(pkg, fun, full_name = FALSE) {
18 |   text <- if (full_name) paste0(pkg, "::", fun) else pkg
19 |   paste0("[", text, "](", rdoc_url(pkg, fun), ")")
20 | }
21 | 
22 | knitr::opts_chunk$set(cache = TRUE, autodep = TRUE)
23 | set.seed(634808943)
24 | 


--------------------------------------------------------------------------------
/_lint.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | suppressPackageStartupMessages({
 3 |   library("rex")
 4 |   library("lintr")
 5 | })
 6 | 
 7 | lint_dir <- function(path = ".", relative_path = TRUE,
 8 |                      pattern = "\\.([Rr]|Rmd|Rhtml)$", recursive = TRUE, ...) {
 9 |   lintr:::read_settings(path)
10 |   on.exit(lintr:::clear_settings, add = TRUE)
11 |   settings <- lintr:::settings
12 |   names(settings$exclusions) <-
13 |     normalizePath(file.path(path, names(settings$exclusions)))
14 |   files <- dir(path = path, pattern = pattern, recursive = TRUE,
15 |                full.names = TRUE)
16 |   files <- normalizePath(files)
17 |   lints <- lintr:::flatten_lints(lapply(files, function(file) {
18 |     if (interactive()) {
19 |       message(".", appendLF = FALSE)
20 |     }
21 |     try(lint(file, ..., parse_settings = FALSE))
22 |   }))
23 |   if (interactive()) {
24 |     message()
25 |   }
26 |   lints <- lintr:::reorder_lints(lints)
27 |   if (relative_path == TRUE) {
28 |     lints[] <- lapply(lints, function(x) {
29 |       x$filename <- re_substitutes(x$filename, rex(normalizePath(path),
30 |                                                    one_of("/", "\\")), "")
31 |       x
32 |     })
33 |     attr(lints, "path") <- path
34 |   }
35 |   class(lints) <- "lints"
36 |   lints
37 | }
38 | 
39 | lint_dir(here::here())
40 | 


--------------------------------------------------------------------------------
/_notes/pauperism.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Yule Replication"
  3 | output: html_document
  4 | ---
  5 | 
  6 | ```{r include=FALSE}
  7 | library("dplyr")
  8 | library("readr")
  9 | library("tidyr")
 10 | library("haven")
 11 | library("plm")
 12 | library("magrittr")
 13 | library("purrr")
 14 | library("ggplot2")
 15 | library("broom")
 16 | ```
 17 | 
 18 | ```{r}
 19 | ratiodiff <- function(x) {
 20 |   z <- x / lag(x)
 21 |   z[is.infinite(z)] <- NA_real_
 22 |   z
 23 | }
 24 | ```
 25 | 
 26 | ```{r}
 27 | pauperism <-
 28 |   left_join(yule, yule_plu, by = "plu")
 29 | ```
 30 | 
 31 | Table 2: Metropolitan Group, 1871-1881
 32 | ```{r results = 'asis'}
 33 | filter(yule_long, Type == "Metropolitan") %>%
 34 |   filter(year == 1881) %>%
 35 |   select(ID, Union, pauper_diff, outratio, Prop65_diff,
 36 |          Popn_diff) %>%
 37 |   arrange(ID) %>%
 38 |   select(-ID) %>%
 39 |   knitr::kable()
 40 | ```
 41 | 
 42 | $$
 43 | \begin{aligned}[t]
 44 | \Delta\mathtt{Paup} &= \beta_0  \\
 45 |           &+ \beta_1 \Delta\mathtt{Out} \\
 46 |           &+ \beta_2 \Delta\mathtt{Old} \\
 47 |           &+ \beta_3 \Delta\mathtt{Pop} + \varepsilon
 48 | \end{aligned}
 49 | $$
 50 | 
 51 | # Summary Statistics
 52 | 
 53 | ```{r}
 54 | filter(yule_long, year > 1871) %>%
 55 |   group_by(year, Type) %>%
 56 |   select(pauper_diff, outratiodiff, Prop65_diff, Popn_diff) %>%
 57 |   gather(variable, value, -Type, -year) %>%
 58 |   group_by(variable, year, Type) %>%
 59 |   summarize(mean = mean(value, na.rm = TRUE),
 60 |             sd = sd(value, na.rm = TRUE)) %>%
 61 |   knitr::kable()
 62 | 
 63 | ```
 64 | 
 65 | 
 66 | # Regression
 67 | 
 68 | ```{r}
 69 | lm(pauper ~ outratio, data = yule_long)
 70 | lm(pauper ~ year + Type + outratio, data = yule_long)
 71 | lm(pauper ~ year + Type + outratio + Prop65 + Popn65, data = yule_long)
 72 | lm(pauper ~ Type * (year + outratio + Prop65 + Popn65), data = yule_long)
 73 | ```
 74 | 
 75 | 
 76 | ```{r}
 77 | yule_diff <- yule_long %>%
 78 |   filter(year > 1871) %>%
 79 |   mutate(year = as.factor(year)) %>%
 80 |   select(ID, Union, Type, year, pauper_diff, outratiodiff, Popn_diff,
 81 |          Prop65_diff)
 82 | 
 83 | lm(pauper_diff ~ outratiodiff, data = yule_diff)
 84 | lm(pauper_diff ~ Type * year + outratiodiff, data = yule_diff)
 85 | lm(pauper_diff ~ Type * year + outratiodiff + Popn_diff + Prop65_diff, data = yule_diff)
 86 | lm(pauper_diff ~ (Type * year) * (outratiodiff + Prop65_diff + Popn_diff),
 87 |    data = yule_diff)
 88 | 
 89 | 
 90 | 
 91 | ```
 92 | 
 93 | Individual regression for each Type and Region
 94 | ```{r}
 95 | diff_mod_3 <-
 96 |   yule_long %>%
 97 |   filter(year %in% c(1881, 1891)) %>%
 98 |   group_by(year, Type) %>%
 99 |   do(tidy(lm(pauper_diff ~ outratiodiff + Popn_diff + Prop65_diff, data = .)))
100 | 
101 | diff_mod_3 %>%
102 |   select(year, Type, term, estimate) %>%
103 |   spread(term, estimate) %>%
104 |   knitr::kable()
105 | ```
106 | 
107 | ## Summary Statistics
108 | 
109 | ### Outratio
110 | 
111 | ```{r}
112 | ggplot(select(filter(yule_long, !is.na(outratio)),
113 |               outratio, ID, year, Type),
114 |        aes(x = outratio, y = ..density..)) +
115 |   geom_histogram(binwidth = 2) +
116 |   facet_grid(year ~ Type)
117 | ```
118 | 
119 | ```{r}
120 | ggplot(select(filter(yule_long, !is.na(outratiodiff)),
121 |               outratiodiff, ID, year, Type),
122 |        aes(x = outratiodiff, y = ..density..)) +
123 |   geom_histogram(binwidth = 20) +
124 |   facet_grid(year ~ Type)
125 | ```
126 | 
127 | ## Pauperism
128 | 
129 | ```{r}
130 | ggplot(select(filter(yule_long, !is.na(pauper)),
131 |               pauper, ID, year, Type),
132 |        aes(x = pauper, y = ..density..)) +
133 |   geom_histogram(binwidth = .01) +
134 |   facet_grid(year ~ Type)
135 | ```
136 | 
137 | There appear to be some big outliers in the ratio difference
138 | in pauperism,
139 | ```{r}
140 | ggplot(select(filter(yule_long, !is.na(pauper_diff)),
141 |               pauper_diff, ID, year, Type),
142 |        aes(x = pauper_diff, y = ..density..)) +
143 |   geom_histogram(binwidth = 15) +
144 |   facet_grid(year ~ Type)
145 | ```
146 | 


--------------------------------------------------------------------------------
/_notes/questions.Rmd:
--------------------------------------------------------------------------------
  1 | # Questions
  2 | 
  3 | ```{r setup,include=FALSE}
  4 | library("DiagrammR")
  5 | ```
  6 | 
  7 | # Tukey (1980)
  8 | 
  9 | > Tukey, John W. 1980. "We Need Both Exploratory and Confirmatory" *The American Statistician.* https://dx.doi.org/10.2307/268299
 10 | 
 11 | John Tukey discussed exploratory and confirmatory analysis and the need for both:
 12 | 
 13 | The stylized view of science is the "straight-line paradigm"
 14 | ```{r}
 15 | mermaid("diagrams/science.mmd")
 16 | ```
 17 | 
 18 | But where does the question or idea come from? Tukey notes four issues with this straight-line paradigm:
 19 | 
 20 | - Questions come from theory and insights derived from previous explorations of similar data
 21 | - Designs come are also driven by insights from previous studies of similar data
 22 | - Data collection is monitored by exploring the data and looking for unexpected patterns
 23 | - The analysis proceeds often by exploring the data to avoid bad or pursue good avenues of discovery?
 24 | 
 25 | All science has peeked at the data before answering the question. 
 26 | In fact, if science as a whole persued the straight-line paradigm only the first question ever posed could be analyzed without some corruption from knowing something about domain of study.
 27 | 
 28 | Instead, a more realistic formulation of the scientific process is
 29 | ```{r}
 30 | mermaid("diagrams/scienc2.mmd")
 31 | ```
 32 | 
 33 | > The formulation of the question itself involves what can in fact be asked, what designs are feasible, as well as how likely a given design is to give a useful answer.
 34 | > Both inchoate insight and extensive exploration (of past data) can---and should---play a role in this process of formulating and question.
 35 | > 
 36 | > Science ... DOES NOT BEGIN WITH A TIDY QUESTION. Nor does it end with a tidy answer.
 37 | > 
 38 | > The picture of a scientist struck---as by lightning---with a question is very far from the truth.
 39 | 
 40 | But if you do do confirmatory analysis:
 41 | 
 42 | 1. randomize
 43 | 2. pre-plan
 44 | 
 45 | After choosing a question, limit your analysis to one main question---specified by the entire design, collection, monitoring, and analysis.
 46 | 
 47 | # Peng and Leek
 48 | 
 49 | The epicycles of analysis (CH 2).
 50 | There are 5 core activities of data analysis: 
 51 | 
 52 | 1. Stating the question
 53 | 2. Exploratory data analysis
 54 | 3. Model building
 55 | 4. Interpreting
 56 | 5. Communicating
 57 | 
 58 | Each of those activities consists of three epicycles:
 59 | 
 60 | 1. setting expectations
 61 | 2. collecting data, comparing data to expectations
 62 | 3. if the data don't match expectations, then revise data or expectations and repeat
 63 | 
 64 | Types of questions. There are six types of questions (p. 18--19)
 65 | Leek and Peng. What is the question? 2015. *Science* http://science.sciencemag.org/content/347/6228/1314
 66 | 
 67 | 1. Descriptive: Summarizes a characteristic of data.
 68 | 2. Exploratory: Find patterns in data. Hypothesis generating analysis.
 69 | 3. Inferential: Given a hypothesis, extrapolate from the sample to the population or different sample.
 70 | 4. Predictive: Predict new data. In this you don't necessarily care about the predictors, only that the model predicts well.
 71 | 5. Causal: Does X cause Y? How does changing one factor change another (on average) in the population?
 72 | 6. Mechanistic: How does X cause Y?
 73 | 
 74 | What is a good question (p. 21)?
 75 | 
 76 | 1. interest to the audience
 77 | 2. it is not already answered
 78 | 3. it stems from a plausible framework
 79 | 4. it should be answerable
 80 | 5. it is also useful to be specific - because that helps answerability.
 81 | 
 82 | # Exploratory Data Analysis
 83 | 
 84 | Goals of EDA (Art of Data Science, Ch 4.):
 85 | 
 86 | 1. Find problems in the data
 87 | 2. Detemine whether the question can be answered with the data at hand (proof of concept)
 88 | 3. Develop a "sketch of the answer"
 89 | 
 90 | Their EDA checklist
 91 | 
 92 | 1. Formulate your question
 93 | 2. Read in your data
 94 | 3. Check the packaging: How many observations and variables? What are the observations and variables in the data?
 95 | 4. Look at the top and the bottom of your data: Look at the beginning and end of the data---is it in order, is it properly formatted, in a time series does it have the right times?
 96 | 5. Check your "n"s: Always check the number of observations. This is quick way to check that there aren't mistakes in the sample, especially when merging.
 97 | 6. Validate with at least one external data source: This doesn't need to be formal. But compare values of variables to other known values to ensure they are in the right ballpark. This catches unit-of-measurement issues, variables not measuring what you thought they were measuring, data entry errors.
 98 | 7. Make a plot. Comparing the data to what you expect it to look like is a good way to catch both data errors and also to find new patterns.
 99 | 8. Try the easy solution first. This is a proof of concept that your answer will work.
100 | 9. Follow up. Challenge the solution. Why might it be wrong.
101 | 
102 |    - do you have the right data?
103 |    - do you need more data?
104 |    - do you have the right question?
105 | 


--------------------------------------------------------------------------------
/_notes/realstats.Rmd:
--------------------------------------------------------------------------------
 1 | 3.1 Bivariate Regression Model
 2 | 
 3 |     - estimation
 4 | 
 5 | 3.2 Random variation in Coefficient estimates
 6 | 
 7 |     - distribution of $\hat{\beta}$ estimates
 8 |     - $\hat{\beta}$ are normally distributed
 9 | 
10 | 3.3 Exogeneity and Ubiasedness
11 | 
12 |     - conditions for unbiased estimator
13 |     - characterizing biaas
14 | 
15 | 3.4 Precision of Estimate
16 | 3.5 Probability limits and consistency
17 | 
18 |     - probability limit
19 |     - consistency
20 | 
21 | 3.6 Homoskedasticity
22 | 
23 |     - heteroskedasticity
24 |     - correlated errors - autocorrelation
25 | 


--------------------------------------------------------------------------------
/_output.yml:
--------------------------------------------------------------------------------
 1 | bookdown::gitbook:
 2 |   # css: style.css
 3 |   # math: true
 4 |   dev: svglite
 5 |   config:
 6 |     toc:
 7 |       collapse: none
 8 |       before: |
 9 |         <li><a href="./">Intro Method Notes</a></li>
10 |       after: |
11 |         <li><a href="https://github.com/rstudio/bookdown" target="blank">Published with bookdown</a></li>
12 |     edit:
13 |       link: https://github.com/jrnold/intro-method-notes/edit/master/%s
14 |       text: "Edit"
15 |     sharing:
16 |       github: yes
17 |       facebook: no
18 |   always_allow_html: yes
19 |   includes:
20 |     in_header: includes/in_header.html
21 |     before_body: includes/before_body.html
22 |     after_body: includes/after_body.html
23 | bookdown::pdf_book:
24 |   includes:
25 |     in_header: includes/preamble.tex
26 |   latex_engine: xelatex
27 |   always_allow_html: yes
28 | 


--------------------------------------------------------------------------------
/_render.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript --quiet
 2 | quiet <- "--quiet" %in% commandArgs(FALSE)
 3 | formats <- commandArgs(TRUE)
 4 | 
 5 | # provide default formats if necessary
 6 | if (length(formats) == 0) {
 7 |   formats <- c("bookdown::pdf_book", "bookdown::gitbook")
 8 | }
 9 | # render the book to all formats unless they are specified via command-line args
10 | for (fmt in formats) {
11 |   cmd <- sprintf("bookdown::render_book('index.Rmd', '%s', quiet = %s)",
12 |                  fmt, quiet)
13 |   res <- bookdown:::Rscript(c("-e", shQuote(cmd)))
14 |   if (res != 0) stop("Failed to compile the book to ", fmt)
15 | }
16 | 


--------------------------------------------------------------------------------
/_serve.R:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env Rscript --quiet
2 | quiet <- "--quiet" %in% commandArgs(FALSE)
3 | bookdown::serve_book(dir = ".",
4 |                      preview = TRUE,
5 |                      daemon = FALSE,
6 |                      in_session = FALSE)
7 | 


--------------------------------------------------------------------------------
/_spelling.R:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env Rscript
2 | 
3 | files <- c(dir(here::here(), pattern = "\\.(Rmd)"),
4 |            here::here("README.md"))
5 | words <- readLines(here::here("WORDLIST"))
6 | spelling::spell_check_files(files, ignore = words)
7 | 


--------------------------------------------------------------------------------
/appendix.Rmd:
--------------------------------------------------------------------------------
1 | # (APPENDIX) Appendix {-}
2 | 


--------------------------------------------------------------------------------
/bibliography.Rmd:
--------------------------------------------------------------------------------
1 | # Annotated Bibliography
2 | 
3 | 


--------------------------------------------------------------------------------
/bootstrapping.Rmd:
--------------------------------------------------------------------------------
  1 | # Bootstrapping
  2 | 
  3 | The central analogy of bootstrapping is
  4 | 
  5 | > The population is to the sample as the sample is to the bootstrap samples [@Fox2008a, p. 590]
  6 | 
  7 | To calculate standard errors to use in confidence intervals we need to know sampling distribution of the statistic of interest.
  8 | 
  9 | In the case of a mean, we can appeal to the central limit theorem if the sample size is large enough.
 10 | 
 11 | Bootstrapping takes a different approach. 
 12 | We use the sample as an estimator of the sampling distribution. 
 13 | E.g. bootstrap claims
 14 | $$
 15 | \text{sample distribution} \approx \text{population distribution}
 16 | $$
 17 | and then proceeds to *plug-in* the sample distribution for the population distribution, and then draw new samples to generate a sampling distribution.
 18 | 
 19 | The bootstrap relies upon the **plug-in principle**.
 20 | The plug-in principle is that when something is unknown, use an estimate of it.
 21 | An example is the use of the *sample standard deviation* in place of the *population standard deviation*, when calculating the standard error of the mean,
 22 | $$
 23 | \SE(\bar{x}) = \frac{\sigma}{\sqrt{n}} \approx \frac{\hat{\sigma}}{\sqrt{n}}
 24 | $$
 25 | Bootstrap is the plug-in principal on 'roids.
 26 | It uses the empirical distribution  as a plug-in for the unknown population distribution.
 27 | See Figures 4 and 5 of @Hesterberg2015a. 
 28 | 
 29 | Bootstrap principles
 30 | 
 31 | 1.  The substitution of the empirical distribution for the population works.
 32 | 1.  Sample with replacement.
 33 | 
 34 | -   The bootstrap is for inference not better estimates. It can estimate uncertainty, not improve $\bar{x}$. It is not generating new data out of nowhere. However, see the section on bagging for how bootstrap aggregation can be used.
 35 | 
 36 | ## Non-parametric bootstrap
 37 | 
 38 | The non-parametric bootstrap resamples the data with replacement $B$ times and calculates the statistic on each resample.
 39 | 
 40 | ## Standard Errors
 41 | 
 42 | The bootstrap is primarily a means to calculate standard errors.
 43 | 
 44 | The bootstrap standard error is
 45 | 
 46 | Suppose there are $r$ bootstrap replicates.
 47 | Let $\hat{\theta}^{*}_1, \dots, \hat{\theta}^{*}_r$ be statistics calculated on each bootstrap samples.
 48 | $$
 49 | \SE^{*}\left(\hat{\theta}^{*}\right) = \sqrt{\frac{\sum_{b = 1}^r {(\hat{\theta}^{*}_b - \bar{\theta}^{*})}^2}{r - 1}}
 50 | $$
 51 | where $\bar{\theta}^{*}$ is the mean of bootstrap statistics,
 52 | $$
 53 | \bar{\theta}^{*} = \frac{\sum_{b = 1}^r}{r} .
 54 | $$
 55 | 
 56 | ## Confidence Intervals
 57 | 
 58 | There are multiple ways to calculate confidence intervals from bootstrap.
 59 | 
 60 | -   Normal-Theory Intervals
 61 | -   Percentile Intervals
 62 | -   ABC Intervals
 63 | 
 64 | ## Alternative methods
 65 | 
 66 | ### Parametric Bootstrap
 67 | 
 68 | The parametric bootstrap draws samples from the estimated model.
 69 | 
 70 | For example, in linear regression, we can start from the model,
 71 | $$
 72 | y_i = \Vec{x}_i \Vec{\beta} + \epsilon_i
 73 | $$
 74 | 
 75 | 1.  Estimate the regression model to get $\hat{\beta}$ and $\hat{\sigma}$
 76 | 
 77 | 1.  For $1, \dots, r$ bootstrap replicates:
 78 | 
 79 |     1.  Generate bootstrap sample $(\Vec{y}^{*}, \Mat{X})$, where $\Mat{X}$ are 
 80 |         those from the original sample, and the values of $\Vec{y}^{*}$ are generated
 81 |         by sampling from the residual distribution,
 82 |         $$
 83 |         y_i^{*}_b = \Vec{x}_i \Vec{\hat{\beta}} + \epsilon^{*}_{i,b}
 84 |         $$
 85 |         where $\epsilon^{*}_{i,b} \sim \mathrm{Normal}(0, \hat{\sigma})$.
 86 | 
 87 |     1.  Re-estimate a regression on  $(\Vec{y}^{*}, \Mat{X})$ to estimate
 88 |         $\hat{\beta}^{*}$.
 89 | 
 90 |     1.  Calculate any statistics of the regression results.
 91 | 
 92 | Alternatively, we could have drawn the values of $\Vec{\epsilon}^*_b$ from the
 93 | empirical distribution of residuals or the [Wild Bootstrap](https://www.math.kth.se/matstat/gru/sf2930/papers/wild.bootstrap.pdf).
 94 | 
 95 | See the the discussion in the `boot::boot()` function, for `sim = "parametric"`.
 96 | 
 97 | ### Clustered bootstrap
 98 | 
 99 | We can incorporate complex sampling methods into the bootstrap [@Fox2008a, Sec 21.5].
100 | In particular, by resampling clusters instead of individual observations, we get the clustered bootstrap.[@EsareyMenger2017a]
101 | 
102 | ### Time series bootstrap
103 | 
104 | Since data are not independent in time-series, variations of the bootstrap have to be used.
105 | See the references in the documentation for `boot::tsboot`.
106 | 
107 | ### How to sample?
108 | 
109 | Draw the bootstrap sample in the same way it was drawn from the population (if possible) [@Hesterberg2015a, p. 19]
110 | 
111 | The are a few exceptions:
112 | 
113 | -   Condition on the observed information. We should fix known quantities, e.g. observed sample sizes of sub-samples [@Hesterberg2015a]
114 | -   For hypothesis testing, the sampling distribution needs to be modified to represent the null distribution [@Hesterberg2015a]
115 | 
116 | ### Caveats
117 | 
118 | -   Bootstrapping does not work well for the median or other quantities that depend on the small number of observations out of larger sample.[@Hesterberg2015a]
119 | -   Uncertainty in the bootstrap estimator is due to both (1) Monte Carlo sampling (taking a finite number of samples), and (2) the sample itself. The former can be decreased by increasing the number of bootstrap samples. The latter is irreducible without a new sample.
120 | -   The bootstrap distribution will reflect the data. If the sample was "unusual", then the bootstrap distribution will also be so.[@Hesterberg2015a]
121 | -   In small samples there is a narrowness bias. [@Hesterberg2015a, p. 24]. As always, small samples is problematic.
122 | 
123 | ### Why use bootstrapping?
124 | 
125 | -   The common practice of relying on asymmetric results may understate variability by ignoring dependencies or heteroskedasticity. These can be incorporated into bootstrapping.[@Fox2008a, p. 602]
126 | -   it is general purpose algorithm that can generate standard errors and confidence intervals in cases where an analytic solution does not exist.
127 | -   however, it may require programming to implement and computational power to execute
128 | 
129 | ## Bagging
130 | 
131 | Note that in all the previous discussion, the original point estimate is used.
132 | Bootstrapping is only used to generate (1) standard errors and confidence intervals (2).
133 | 
134 | Bootstrap aggregating or [bagging](https://en.wikipedia.org/wiki/Bootstrap_aggregating) is a meta-algorithm that constructs a point estimate by averaging the point-estimates from bootstrap samples.
135 | Bagging can reduce the variance of some estimators, so can be thought of as a sort of regularization method.
136 | 
137 | ## Hypothesis Testing
138 | 
139 | Hypothesis testing with bootstrap is more complicated.
140 | 
141 | ## How many samples? 
142 | 
143 | There is no fixed rule of thumb (it will depend on the statistic you are calculating and the population distribution), but if you want a single number, 1,000 is good lower bound.
144 | 
145 | -   Higher levels of confidence require more samples
146 | 
147 | -   Note that the results of the percentile method will be more variable than the normal-approximation method.
148 |     The ABC confidence intervals will be even better.
149 | 
150 | One ad-hoc recipe suggested [here](https://www.stata.com/support/faqs/statistics/bootstrapped-samples-guidelines/) is:
151 | 
152 | 1.  Choose a $B$
153 | 1.  Run the bootstrap
154 | 1.  Run the bootstrap again (ensure there is a different random number seed)
155 | 1.  If results differ, increase the size.
156 | 
157 | @DavidsonMacKinnon2000a suggest the following:
158 | 
159 | -   5%: 399
160 | -   1%: 1499
161 | 
162 | Though it also suggests a pre-test method.
163 | 
164 | @Hesterberg2015a suggests far a larger bootstrap sample size: 10,000 for routine use.
165 | It notes that for a t-test, 15,000 samples for the a 95% probability that the one-sided levels fall within 10% of the true values, for 95% intervals and 5% tests.
166 | 
167 | ## References
168 | 
169 | See @Fox2008a [Ch. 21].
170 | 
171 | @Hesterberg2015a is for "teachers of statistics" but is a great overview of bootstrapping.
172 | I found it more useful than the treatment of bootstrapping in many textbooks.
173 | 
174 | For some Monte Carlo results on the accuracy of the bootstrap see @Hesterberg2015a, p. 21.
175 | 
176 | R packages. For general purpose bootstrapping and cross-validation I suggest the `r rpkg("rsample")` package, which works well with the tidyverse and seems to be
177 | useful going forward.
178 | 
179 | The `r rpkg("boot")` package included in the recommended R packages is a classic package that implements many bootstrapping and resampling methods. Most of them
180 | are parallelized. However, its interface is not as nice as rsample.
181 | 
182 | -   <https://www.statmethods.net/advstats/bootstrapping.html>
183 | -   <http://avesbiodiv.mncn.csic.es/estadistica/boot1.pdf>
184 | 
185 | See [this spreadsheet](https://docs.google.com/spreadsheets/d/1MNOCwOo7oPKrDB1FMwDzsYzvLoK-IBqoxhKrOsN1M2A/edit#gid=0) for some Monte Carlo simulations on Bootstrap vs. t-statistic.
186 | 


--------------------------------------------------------------------------------
/data/western1995/econ_growth.tsv:
--------------------------------------------------------------------------------
 1 | country	econ_growth	labor_org	social_dem
 2 | Australia	.51	1.87	30.5
 3 | Austria	.64	3.06	100.0
 4 | Belgium	.44	2.80	21.0
 5 | Canada	.50	.98	.0
 6 | Denmark	.36	2.77	75.5
 7 | Finland	.56	2.76	40.2
 8 | France	.57	.68	1.7
 9 | Germany	.53	1.80	74.8
10 | Holland	.44	1.90	41.2
11 | Italy	.53	1.47	6.5
12 | Japan	.38	.43	.0
13 | Norway	1.05	3.33	100.0
14 | Sweden	.44	3.52	45.9
15 | United Kingdom	.26	1.81	86.0
16 | United States	.51	.82	.0
17 | 


--------------------------------------------------------------------------------
/data/western1995/income_ineq.tsv:
--------------------------------------------------------------------------------
 1 | country	inequality	turnout	energy	socialism
 2 | Argentina	2.960	61.8	1,088	2.3
 3 | Australia	1.940	85.3	3,918	45.0
 4 | Denmark	2.734	86.8	2,829	41.8
 5 | Finland	4.441	82.1	1,650	24.9
 6 | France	5.653	66.5	2,419	25.1
 7 | Germany	3.435	77.6	3,673	27.1
 8 | Israel	1.950	84.1	1,243	50.8
 9 | Italy	2.196	89.2	1,135	17.0
10 | Japan	3.007	72.3	1,166	27.5
11 | Netherlands	3.457	87.9	2,691	30.8
12 | Norway	2.440	81.9	2,740	52.0
13 | Puerto	Rico	3.693	73.3	1,453	0.0
14 | South	Africa	9.410	14.3	2,338	1.8
15 | Sweden	3.143	78.1	3,491	48.5
16 | Trinidad and Tobago	3.888	64.7	1,935	18.8
17 | United Kingdom	2.876	72.4	4,907	48.5
18 | United States	2.296	56.8	8,047	0.0
19 | Venezuela	3.515	78.8	2,623	28.7
20 | 


--------------------------------------------------------------------------------
/data/western1995/unionization.tsv:
--------------------------------------------------------------------------------
 1 | country	union_density	left_government	labor_force_size	econ_conc
 2 | Sweden	82.4	111.84	3,931	1.55
 3 | Israel	80.0	73.17	997	1.71
 4 | Iceland	74.3	17.25	81	2.06
 5 | Finland	73.3	59.33	2,034	1.56
 6 | Belgium	71.9	43.25	3,348	1.52
 7 | Denmark	69.8	90.24	2,225	1.52
 8 | Ireland	68.1	.00	886	1.75
 9 | Austria	65.6	48.67	2,469	1.53
10 | New Zealand	59.4	60.00	1,050	1.64
11 | Norway	58.9	83.08	1,657	1.58
12 | Australia	51.4	33.74	5,436	1.37
13 | Italy	50.6	.00	15,819	.86
14 | United Kingdon	48.0	43.67	25,757	1.13
15 | Germany	39.6	35.33	23,003	.92
16 | Netherlands	37.7	31.50	4,509	1.25
17 | Switzerland	35.4	11.87	2,460	1.68
18 | Canada	31.2	.00	10,516	1.35
19 | Japan	31.0	1.92	39,930	1.11
20 | France	28.2	8.67	18,846	.95
21 | United States	24.5	.00	92,899	1.00
22 | 


--------------------------------------------------------------------------------
/diagrams/_book/_main_files/figure-html/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/intro-methods-notes/4c1342aa322c728ad21bbfaa2eeade554cb79b6c/diagrams/_book/_main_files/figure-html/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/diagrams/_book/_main_files/figure-html/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/intro-methods-notes/4c1342aa322c728ad21bbfaa2eeade554cb79b6c/diagrams/_book/_main_files/figure-html/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/diagrams/_book/_main_files/figure-html/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/intro-methods-notes/4c1342aa322c728ad21bbfaa2eeade554cb79b6c/diagrams/_book/_main_files/figure-html/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/diagrams/_book/libs/gitbook-2.6.7/css/fontawesome/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/intro-methods-notes/4c1342aa322c728ad21bbfaa2eeade554cb79b6c/diagrams/_book/libs/gitbook-2.6.7/css/fontawesome/fontawesome-webfont.ttf


--------------------------------------------------------------------------------
/diagrams/_book/libs/gitbook-2.6.7/css/plugin-bookdown.css:
--------------------------------------------------------------------------------
  1 | .book .book-header h1 {
  2 |   padding-left: 20px;
  3 |   padding-right: 20px;
  4 | }
  5 | .book .book-header.fixed {
  6 |   position: fixed;
  7 |   right: 0;
  8 |   top: 0;
  9 |   left: 0;
 10 |   border-bottom: 1px solid rgba(0,0,0,.07);
 11 | }
 12 | span.search-highlight {
 13 |   background-color: #ffff88;
 14 | }
 15 | @media (min-width: 600px) {
 16 |   .book.with-summary .book-header.fixed {
 17 |     left: 300px;
 18 |   }
 19 | }
 20 | @media (max-width: 1240px) {
 21 |   .book .book-body.fixed {
 22 |     top: 50px;
 23 |   }
 24 |   .book .book-body.fixed .body-inner {
 25 |     top: auto;
 26 |   }
 27 | }
 28 | @media (max-width: 600px) {
 29 |   .book.with-summary .book-header.fixed {
 30 |     left: calc(100% - 60px);
 31 |     min-width: 300px;
 32 |   }
 33 |   .book.with-summary .book-body {
 34 |     transform: none;
 35 |     left: calc(100% - 60px);
 36 |     min-width: 300px;
 37 |   }
 38 |   .book .book-body.fixed {
 39 |     top: 0;
 40 |   }
 41 | }
 42 | 
 43 | .book .book-body.fixed .body-inner {
 44 |   top: 50px;
 45 | }
 46 | .book .book-body .page-wrapper .page-inner section.normal sub, .book .book-body .page-wrapper .page-inner section.normal sup {
 47 |   font-size: 85%;
 48 | }
 49 | 
 50 | @media print {
 51 |   .book .book-summary, .book .book-body .book-header, .fa {
 52 |     display: none !important;
 53 |   }
 54 |   .book .book-body.fixed {
 55 |     left: 0px;
 56 |   }
 57 |   .book .book-body,.book .book-body .body-inner, .book.with-summary {
 58 |     overflow: visible !important;
 59 |   }
 60 | }
 61 | .kable_wrapper {
 62 |   border-spacing: 20px 0;
 63 |   border-collapse: separate;
 64 |   border: none;
 65 |   margin: auto;
 66 | }
 67 | .kable_wrapper > tbody > tr > td {
 68 |   vertical-align: top;
 69 | }
 70 | .book .book-body .page-wrapper .page-inner section.normal table tr.header {
 71 |   border-top-width: 2px;
 72 | }
 73 | .book .book-body .page-wrapper .page-inner section.normal table tr:last-child td {
 74 |   border-bottom-width: 2px;
 75 | }
 76 | .book .book-body .page-wrapper .page-inner section.normal table td, .book .book-body .page-wrapper .page-inner section.normal table th {
 77 |   border-left: none;
 78 |   border-right: none;
 79 | }
 80 | .book .book-body .page-wrapper .page-inner section.normal table.kable_wrapper > tbody > tr, .book .book-body .page-wrapper .page-inner section.normal table.kable_wrapper > tbody > tr > td {
 81 |   border-top: none;
 82 | }
 83 | .book .book-body .page-wrapper .page-inner section.normal table.kable_wrapper > tbody > tr:last-child > td {
 84 |     border-bottom: none;
 85 | }
 86 | 
 87 | div.theorem, div.lemma, div.corollary, div.proposition, div.conjecture {
 88 |   font-style: italic;
 89 | }
 90 | span.theorem, span.lemma, span.corollary, span.proposition, span.conjecture {
 91 |   font-style: normal;
 92 | }
 93 | div.proof:after {
 94 |   content: "\25a2";
 95 |   float: right;
 96 | }
 97 | .header-section-number {
 98 |   padding-right: .5em;
 99 | }
100 | 


--------------------------------------------------------------------------------
/diagrams/_book/libs/gitbook-2.6.7/css/plugin-search.css:
--------------------------------------------------------------------------------
 1 | .book .book-summary .book-search {
 2 |   padding: 6px;
 3 |   background: transparent;
 4 |   position: absolute;
 5 |   top: -50px;
 6 |   left: 0px;
 7 |   right: 0px;
 8 |   transition: top 0.5s ease;
 9 | }
10 | .book .book-summary .book-search input,
11 | .book .book-summary .book-search input:focus,
12 | .book .book-summary .book-search input:hover {
13 |   width: 100%;
14 |   background: transparent;
15 |   border: 1px solid #ccc;
16 |   box-shadow: none;
17 |   outline: none;
18 |   line-height: 22px;
19 |   padding: 7px 4px;
20 |   color: inherit;
21 |   box-sizing: border-box;
22 | }
23 | .book.with-search .book-summary .book-search {
24 |   top: 0px;
25 | }
26 | .book.with-search .book-summary ul.summary {
27 |   top: 50px;
28 | }
29 | 


--------------------------------------------------------------------------------
/diagrams/_book/libs/gitbook-2.6.7/js/jquery.highlight.js:
--------------------------------------------------------------------------------
 1 | gitbook.require(["jQuery"], function(jQuery) {
 2 | 
 3 | /*
 4 |  * jQuery Highlight plugin
 5 |  *
 6 |  * Based on highlight v3 by Johann Burkard
 7 |  * http://johannburkard.de/blog/programming/javascript/highlight-javascript-text-higlighting-jquery-plugin.html
 8 |  *
 9 |  * Code a little bit refactored and cleaned (in my humble opinion).
10 |  * Most important changes:
11 |  *  - has an option to highlight only entire words (wordsOnly - false by default),
12 |  *  - has an option to be case sensitive (caseSensitive - false by default)
13 |  *  - highlight element tag and class names can be specified in options
14 |  *
15 |  * Copyright (c) 2009 Bartek Szopka
16 |  *
17 |  * Licensed under MIT license.
18 |  *
19 |  */
20 | 
21 | jQuery.extend({
22 |     highlight: function (node, re, nodeName, className) {
23 |         if (node.nodeType === 3) {
24 |             var match = node.data.match(re);
25 |             if (match) {
26 |                 var highlight = document.createElement(nodeName || 'span');
27 |                 highlight.className = className || 'highlight';
28 |                 var wordNode = node.splitText(match.index);
29 |                 wordNode.splitText(match[0].length);
30 |                 var wordClone = wordNode.cloneNode(true);
31 |                 highlight.appendChild(wordClone);
32 |                 wordNode.parentNode.replaceChild(highlight, wordNode);
33 |                 return 1; //skip added node in parent
34 |             }
35 |         } else if ((node.nodeType === 1 && node.childNodes) && // only element nodes that have children
36 |                 !/(script|style)/i.test(node.tagName) && // ignore script and style nodes
37 |                 !(node.tagName === nodeName.toUpperCase() && node.className === className)) { // skip if already highlighted
38 |             for (var i = 0; i < node.childNodes.length; i++) {
39 |                 i += jQuery.highlight(node.childNodes[i], re, nodeName, className);
40 |             }
41 |         }
42 |         return 0;
43 |     }
44 | });
45 | 
46 | jQuery.fn.unhighlight = function (options) {
47 |     var settings = { className: 'highlight', element: 'span' };
48 |     jQuery.extend(settings, options);
49 | 
50 |     return this.find(settings.element + "." + settings.className).each(function () {
51 |         var parent = this.parentNode;
52 |         parent.replaceChild(this.firstChild, this);
53 |         parent.normalize();
54 |     }).end();
55 | };
56 | 
57 | jQuery.fn.highlight = function (words, options) {
58 |     var settings = { className: 'highlight', element: 'span', caseSensitive: false, wordsOnly: false };
59 |     jQuery.extend(settings, options);
60 | 
61 |     if (words.constructor === String) {
62 |         words = [words];
63 |     }
64 |     words = jQuery.grep(words, function(word, i){
65 |       return word !== '';
66 |     });
67 |     words = jQuery.map(words, function(word, i) {
68 |       return word.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
69 |     });
70 |     if (words.length === 0) { return this; }
71 | 
72 |     var flag = settings.caseSensitive ? "" : "i";
73 |     var pattern = "(" + words.join("|") + ")";
74 |     if (settings.wordsOnly) {
75 |         pattern = "\\b" + pattern + "\\b";
76 |     }
77 |     var re = new RegExp(pattern, flag);
78 | 
79 |     return this.each(function () {
80 |         jQuery.highlight(this, re, settings.element, settings.className);
81 |     });
82 | };
83 | 
84 | });
85 | 


--------------------------------------------------------------------------------
/diagrams/_book/libs/gitbook-2.6.7/js/plugin-bookdown.js:
--------------------------------------------------------------------------------
  1 | gitbook.require(["gitbook", "lodash", "jQuery"], function(gitbook, _, $) {
  2 | 
  3 |   var gs = gitbook.storage;
  4 | 
  5 |   gitbook.events.bind("start", function(e, config) {
  6 | 
  7 |     // add the Edit button (edit on Github)
  8 |     var edit = config.edit;
  9 |     if (edit && edit.link) gitbook.toolbar.createButton({
 10 |       icon: 'fa fa-edit',
 11 |       label: edit.text || 'Edit',
 12 |       position: 'left',
 13 |       onClick: function(e) {
 14 |         e.preventDefault();
 15 |         window.open(edit.link);
 16 |       }
 17 |     });
 18 | 
 19 |     var down = config.download;
 20 |     var normalizeDownload = function() {
 21 |       if (!down || !(down instanceof Array) || down.length === 0) return;
 22 |       if (down[0] instanceof Array) return down;
 23 |       return $.map(down, function(file, i) {
 24 |         return [[file, file.replace(/.*[.]/g, '').toUpperCase()]];
 25 |       });
 26 |     };
 27 |     down = normalizeDownload(down);
 28 |     if (down) if (down.length === 1 && /[.]pdf$/.test(down[0][0])) {
 29 |       gitbook.toolbar.createButton({
 30 |         icon: 'fa fa-file-pdf-o',
 31 |         label: down[0][1],
 32 |         position: 'left',
 33 |         onClick: function(e) {
 34 |           e.preventDefault();
 35 |           window.open(down[0][0]);
 36 |         }
 37 |       });
 38 |     } else {
 39 |       gitbook.toolbar.createButton({
 40 |         icon: 'fa fa-download',
 41 |         label: 'Download',
 42 |         position: 'left',
 43 |         dropdown: $.map(down, function(item, i) {
 44 |           return {
 45 |             text: item[1],
 46 |             onClick: function(e) {
 47 |               e.preventDefault();
 48 |               window.open(item[0]);
 49 |             }
 50 |           };
 51 |         })
 52 |       });
 53 |     }
 54 | 
 55 |     // highlight the current section in TOC
 56 |     var href = window.location.pathname;
 57 |     href = href.substr(href.lastIndexOf('/') + 1);
 58 |     if (href === '') href = 'index.html';
 59 |     var li = $('a[href^="' + href + location.hash + '"]').parent('li.chapter').first();
 60 |     var summary = $('ul.summary'), chaps = summary.find('li.chapter');
 61 |     if (li.length === 0) li = chaps.first();
 62 |     li.addClass('active');
 63 |     chaps.on('click', function(e) {
 64 |       chaps.removeClass('active');
 65 |       $(this).addClass('active');
 66 |       gs.set('tocScrollTop', summary.scrollTop());
 67 |     });
 68 | 
 69 |     var toc = config.toc;
 70 |     // collapse TOC items that are not for the current chapter
 71 |     if (toc && toc.collapse) (function() {
 72 |       var type = toc.collapse;
 73 |       if (type === 'none') return;
 74 |       if (type !== 'section' && type !== 'subsection') return;
 75 |       // sections under chapters
 76 |       var toc_sub = summary.children('li[data-level]').children('ul');
 77 |       if (type === 'section') {
 78 |         toc_sub.hide()
 79 |           .parent().has(li).children('ul').show();
 80 |       } else {
 81 |         toc_sub.children('li').children('ul').hide()
 82 |           .parent().has(li).children('ul').show();
 83 |       }
 84 |       li.children('ul').show();
 85 |       var toc_sub2 = toc_sub.children('li');
 86 |       if (type === 'section') toc_sub2.children('ul').hide();
 87 |       summary.children('li[data-level]').find('a')
 88 |         .on('click.bookdown', function(e) {
 89 |           if (href === $(this).attr('href').replace(/#.*/, ''))
 90 |             $(this).parent('li').children('ul').toggle();
 91 |         });
 92 |     })();
 93 | 
 94 |     // add tooltips to the <a>'s that are truncated
 95 |     $('a').each(function(i, el) {
 96 |       if (el.offsetWidth >= el.scrollWidth) return;
 97 |       if (typeof el.title === 'undefined') return;
 98 |       el.title = el.text;
 99 |     });
100 | 
101 |     // restore TOC scroll position
102 |     var pos = gs.get('tocScrollTop');
103 |     if (typeof pos !== 'undefined') summary.scrollTop(pos);
104 | 
105 |     // highlight the TOC item that has same text as the heading in view as scrolling
106 |     if (toc && toc.scroll_highlight !== false) (function() {
107 |       // scroll the current TOC item into viewport
108 |       var ht = $(window).height(), rect = li[0].getBoundingClientRect();
109 |       if (rect.top >= ht || rect.top <= 0 || rect.bottom <= 0) {
110 |         summary.scrollTop(li[0].offsetTop);
111 |       }
112 |       // current chapter TOC items
113 |       var items = $('a[href^="' + href + '"]').parent('li.chapter'),
114 |           m = items.length;
115 |       if (m === 0) {
116 |         items = summary.find('li.chapter');
117 |         m = items.length;
118 |       }
119 |       if (m === 0) return;
120 |       // all section titles on current page
121 |       var hs = bookInner.find('.page-inner').find('h1,h2,h3'), n = hs.length,
122 |           ts = hs.map(function(i, el) { return $(el).text(); });
123 |       if (n === 0) return;
124 |       var scrollHandler = function(e) {
125 |         var ht = $(window).height();
126 |         clearTimeout($.data(this, 'scrollTimer'));
127 |         $.data(this, 'scrollTimer', setTimeout(function() {
128 |           // find the first visible title in the viewport
129 |           for (var i = 0; i < n; i++) {
130 |             var rect = hs[i].getBoundingClientRect();
131 |             if (rect.top >= 0 && rect.bottom <= ht) break;
132 |           }
133 |           if (i === n) return;
134 |           items.removeClass('active');
135 |           for (var j = 0; j < m; j++) {
136 |             if (items.eq(j).children('a').first().text() === ts[i]) break;
137 |           }
138 |           if (j === m) j = 0;  // highlight the chapter title
139 |           // search bottom-up for a visible TOC item to highlight; if an item is
140 |           // hidden, we check if its parent is visible, and so on
141 |           while (j > 0 && items.eq(j).is(':hidden')) j--;
142 |           items.eq(j).addClass('active');
143 |         }, 250));
144 |       };
145 |       bookInner.on('scroll.bookdown', scrollHandler);
146 |       bookBody.on('scroll.bookdown', scrollHandler);
147 |     })();
148 | 
149 |     // do not refresh the page if the TOC item points to the current page
150 |     $('a[href="' + href + '"]').parent('li.chapter').children('a')
151 |       .on('click', function(e) {
152 |         bookInner.scrollTop(0);
153 |         bookBody.scrollTop(0);
154 |         return false;
155 |       });
156 | 
157 |     var toolbar = config.toolbar;
158 |     if (!toolbar || toolbar.position !== 'static') {
159 |       var bookHeader = $('.book-header');
160 |       bookBody.addClass('fixed');
161 |       bookHeader.addClass('fixed')
162 |       .css('background-color', bookBody.css('background-color'))
163 |       .on('click.bookdown', function(e) {
164 |         // the theme may have changed after user clicks the theme button
165 |         bookHeader.css('background-color', bookBody.css('background-color'));
166 |       });
167 |     }
168 | 
169 |   });
170 | 
171 |   gitbook.events.bind("page.change", function(e) {
172 |     // store TOC scroll position
173 |     var summary = $('ul.summary');
174 |     gs.set('tocScrollTop', summary.scrollTop());
175 |   });
176 | 
177 |   var bookBody = $('.book-body'), bookInner = bookBody.find('.body-inner');
178 |   var chapterTitle = function() {
179 |     return bookInner.find('.page-inner').find('h1,h2').first().text();
180 |   };
181 |   var bookTitle = function() {
182 |     return bookInner.find('.book-header > h1').first().text();
183 |   };
184 |   var saveScrollPos = function(e) {
185 |     // save scroll position before page is reloaded
186 |     gs.set('bodyScrollTop', {
187 |       body: bookBody.scrollTop(),
188 |       inner: bookInner.scrollTop(),
189 |       focused: document.hasFocus(),
190 |       title: chapterTitle()
191 |     });
192 |   };
193 |   $(document).on('servr:reload', saveScrollPos);
194 | 
195 |   // check if the page is loaded in an iframe (e.g. the RStudio preview window)
196 |   var inIFrame = function() {
197 |     var inIframe = true;
198 |     try { inIframe = window.self !== window.top; } catch (e) {}
199 |     return inIframe;
200 |   };
201 |   $(window).on('blur unload', function(e) {
202 |     if (inIFrame()) saveScrollPos(e);
203 |     gs.set('bookTitle', bookTitle());
204 |   });
205 | 
206 |   $(function(e) {
207 |     if (gs.get('bookTitle', '') !== bookTitle()) localStorage.clear();
208 |     var pos = gs.get('bodyScrollTop');
209 |     if (pos) {
210 |       if (pos.title === chapterTitle()) {
211 |         if (pos.body !== 0) bookBody.scrollTop(pos.body);
212 |         if (pos.inner !== 0) bookInner.scrollTop(pos.inner);
213 |       }
214 |       if (pos.focused) bookInner.find('.page-wrapper').focus();
215 |     }
216 |     // clear book body scroll position
217 |     gs.remove('bodyScrollTop');
218 |   });
219 | 
220 | });
221 | 


--------------------------------------------------------------------------------
/diagrams/_book/libs/gitbook-2.6.7/js/plugin-fontsettings.js:
--------------------------------------------------------------------------------
  1 | gitbook.require(["gitbook", "lodash", "jQuery"], function(gitbook, _, $) {
  2 |     var fontState;
  3 | 
  4 |     var THEMES = {
  5 |         "white": 0,
  6 |         "sepia": 1,
  7 |         "night": 2
  8 |     };
  9 | 
 10 |     var FAMILY = {
 11 |         "serif": 0,
 12 |         "sans": 1
 13 |     };
 14 | 
 15 |     // Save current font settings
 16 |     function saveFontSettings() {
 17 |         gitbook.storage.set("fontState", fontState);
 18 |         update();
 19 |     }
 20 | 
 21 |     // Increase font size
 22 |     function enlargeFontSize(e) {
 23 |         e.preventDefault();
 24 |         if (fontState.size >= 4) return;
 25 | 
 26 |         fontState.size++;
 27 |         saveFontSettings();
 28 |     };
 29 | 
 30 |     // Decrease font size
 31 |     function reduceFontSize(e) {
 32 |         e.preventDefault();
 33 |         if (fontState.size <= 0) return;
 34 | 
 35 |         fontState.size--;
 36 |         saveFontSettings();
 37 |     };
 38 | 
 39 |     // Change font family
 40 |     function changeFontFamily(index, e) {
 41 |         e.preventDefault();
 42 | 
 43 |         fontState.family = index;
 44 |         saveFontSettings();
 45 |     };
 46 | 
 47 |     // Change type of color
 48 |     function changeColorTheme(index, e) {
 49 |         e.preventDefault();
 50 | 
 51 |         var $book = $(".book");
 52 | 
 53 |         if (fontState.theme !== 0)
 54 |             $book.removeClass("color-theme-"+fontState.theme);
 55 | 
 56 |         fontState.theme = index;
 57 |         if (fontState.theme !== 0)
 58 |             $book.addClass("color-theme-"+fontState.theme);
 59 | 
 60 |         saveFontSettings();
 61 |     };
 62 | 
 63 |     function update() {
 64 |         var $book = gitbook.state.$book;
 65 | 
 66 |         $(".font-settings .font-family-list li").removeClass("active");
 67 |         $(".font-settings .font-family-list li:nth-child("+(fontState.family+1)+")").addClass("active");
 68 | 
 69 |         $book[0].className = $book[0].className.replace(/\bfont-\S+/g, '');
 70 |         $book.addClass("font-size-"+fontState.size);
 71 |         $book.addClass("font-family-"+fontState.family);
 72 | 
 73 |         if(fontState.theme !== 0) {
 74 |             $book[0].className = $book[0].className.replace(/\bcolor-theme-\S+/g, '');
 75 |             $book.addClass("color-theme-"+fontState.theme);
 76 |         }
 77 |     };
 78 | 
 79 |     function init(config) {
 80 |         var $bookBody, $book;
 81 | 
 82 |         //Find DOM elements.
 83 |         $book = gitbook.state.$book;
 84 |         $bookBody = $book.find(".book-body");
 85 | 
 86 |         // Instantiate font state object
 87 |         fontState = gitbook.storage.get("fontState", {
 88 |             size: config.size || 2,
 89 |             family: FAMILY[config.family || "sans"],
 90 |             theme: THEMES[config.theme || "white"]
 91 |         });
 92 | 
 93 |         update();
 94 |     };
 95 | 
 96 | 
 97 |     gitbook.events.bind("start", function(e, config) {
 98 |         var opts = config.fontsettings;
 99 | 
100 |         // Create buttons in toolbar
101 |         gitbook.toolbar.createButton({
102 |             icon: 'fa fa-font',
103 |             label: 'Font Settings',
104 |             className: 'font-settings',
105 |             dropdown: [
106 |                 [
107 |                     {
108 |                         text: 'A',
109 |                         className: 'font-reduce',
110 |                         onClick: reduceFontSize
111 |                     },
112 |                     {
113 |                         text: 'A',
114 |                         className: 'font-enlarge',
115 |                         onClick: enlargeFontSize
116 |                     }
117 |                 ],
118 |                 [
119 |                     {
120 |                         text: 'Serif',
121 |                         onClick: _.partial(changeFontFamily, 0)
122 |                     },
123 |                     {
124 |                         text: 'Sans',
125 |                         onClick: _.partial(changeFontFamily, 1)
126 |                     }
127 |                 ],
128 |                 [
129 |                     {
130 |                         text: 'White',
131 |                         onClick: _.partial(changeColorTheme, 0)
132 |                     },
133 |                     {
134 |                         text: 'Sepia',
135 |                         onClick: _.partial(changeColorTheme, 1)
136 |                     },
137 |                     {
138 |                         text: 'Night',
139 |                         onClick: _.partial(changeColorTheme, 2)
140 |                     }
141 |                 ]
142 |             ]
143 |         });
144 | 
145 | 
146 |         // Init current settings
147 |         init(opts);
148 |     });
149 | });
150 | 
151 | 
152 | 


--------------------------------------------------------------------------------
/diagrams/_book/libs/gitbook-2.6.7/js/plugin-search.js:
--------------------------------------------------------------------------------
  1 | gitbook.require(["gitbook", "lodash", "jQuery"], function(gitbook, _, $) {
  2 |     var index = null;
  3 |     var $searchInput, $searchForm;
  4 |     var $highlighted, hi = 0, hiOpts = { className: 'search-highlight' };
  5 |     var collapse = false;
  6 | 
  7 |     // Use a specific index
  8 |     function loadIndex(data) {
  9 |         // [Yihui] In bookdown, I use a character matrix to store the chapter
 10 |         // content, and the index is dynamically built on the client side.
 11 |         // Gitbook prebuilds the index data instead: https://github.com/GitbookIO/plugin-search
 12 |         // We can certainly do that via R packages V8 and jsonlite, but let's
 13 |         // see how slow it really is before improving it. On the other hand,
 14 |         // lunr cannot handle non-English text very well, e.g. the default
 15 |         // tokenizer cannot deal with Chinese text, so we may want to replace
 16 |         // lunr with a dumb simple text matching approach.
 17 |         index = lunr(function () {
 18 |           this.ref('url');
 19 |           this.field('title', { boost: 10 });
 20 |           this.field('body');
 21 |         });
 22 |         data.map(function(item) {
 23 |           index.add({
 24 |             url: item[0],
 25 |             title: item[1],
 26 |             body: item[2]
 27 |           });
 28 |         });
 29 |     }
 30 | 
 31 |     // Fetch the search index
 32 |     function fetchIndex() {
 33 |         return $.getJSON(gitbook.state.basePath+"/search_index.json")
 34 |                 .then(loadIndex);  // [Yihui] we need to use this object later
 35 |     }
 36 | 
 37 |     // Search for a term and return results
 38 |     function search(q) {
 39 |         if (!index) return;
 40 | 
 41 |         var results = _.chain(index.search(q))
 42 |         .map(function(result) {
 43 |             var parts = result.ref.split("#");
 44 |             return {
 45 |                 path: parts[0],
 46 |                 hash: parts[1]
 47 |             };
 48 |         })
 49 |         .value();
 50 | 
 51 |         // [Yihui] Highlight the search keyword on current page
 52 |         hi = 0;
 53 |         $highlighted = results.length === 0 ? undefined : $('.page-inner')
 54 |           .unhighlight(hiOpts).highlight(q, hiOpts).find('span.search-highlight');
 55 |         scrollToHighlighted();
 56 |         toggleTOC(results.length > 0);
 57 | 
 58 |         return results;
 59 |     }
 60 | 
 61 |     // [Yihui] Scroll the chapter body to the i-th highlighted string
 62 |     function scrollToHighlighted() {
 63 |       if (!$highlighted) return;
 64 |       var n = $highlighted.length;
 65 |       if (n === 0) return;
 66 |       var $p = $highlighted.eq(hi), p = $p[0], rect = p.getBoundingClientRect();
 67 |       if (rect.top < 0 || rect.bottom > $(window).height()) {
 68 |         ($(window).width() >= 1240 ? $('.body-inner') : $('.book-body'))
 69 |           .scrollTop(p.offsetTop - 100);
 70 |       }
 71 |       $highlighted.css('background-color', '');
 72 |       // an orange background color on the current item and removed later
 73 |       $p.css('background-color', 'orange');
 74 |       setTimeout(function() {
 75 |         $p.css('background-color', '');
 76 |       }, 2000);
 77 |     }
 78 | 
 79 |     // [Yihui] Expand/collapse TOC
 80 |     function toggleTOC(show) {
 81 |       if (!collapse) return;
 82 |       var toc_sub = $('ul.summary').children('li[data-level]').children('ul');
 83 |       if (show) return toc_sub.show();
 84 |       var href = window.location.pathname;
 85 |       href = href.substr(href.lastIndexOf('/') + 1);
 86 |       if (href === '') href = 'index.html';
 87 |       var li = $('a[href^="' + href + location.hash + '"]').parent('li.chapter').first();
 88 |       toc_sub.hide().parent().has(li).children('ul').show();
 89 |       li.children('ul').show();
 90 |     }
 91 | 
 92 |     // Create search form
 93 |     function createForm(value) {
 94 |         if ($searchForm) $searchForm.remove();
 95 |         if ($searchInput) $searchInput.remove();
 96 | 
 97 |         $searchForm = $('<div>', {
 98 |             'class': 'book-search',
 99 |             'role': 'search'
100 |         });
101 | 
102 |         $searchInput = $('<input>', {
103 |             'type': 'search',
104 |             'class': 'form-control',
105 |             'val': value,
106 |             'placeholder': 'Type to search'
107 |         });
108 | 
109 |         $searchInput.appendTo($searchForm);
110 |         $searchForm.prependTo(gitbook.state.$book.find('.book-summary'));
111 |     }
112 | 
113 |     // Return true if search is open
114 |     function isSearchOpen() {
115 |         return gitbook.state.$book.hasClass("with-search");
116 |     }
117 | 
118 |     // Toggle the search
119 |     function toggleSearch(_state) {
120 |         if (isSearchOpen() === _state) return;
121 |         if (!$searchInput) return;
122 | 
123 |         gitbook.state.$book.toggleClass("with-search", _state);
124 | 
125 |         // If search bar is open: focus input
126 |         if (isSearchOpen()) {
127 |             gitbook.sidebar.toggle(true);
128 |             $searchInput.focus();
129 |         } else {
130 |             $searchInput.blur();
131 |             $searchInput.val("");
132 |             gitbook.storage.remove("keyword");
133 |             gitbook.sidebar.filter(null);
134 |             $('.page-inner').unhighlight(hiOpts);
135 |             toggleTOC(false);
136 |         }
137 |     }
138 | 
139 |     // Recover current search when page changed
140 |     function recoverSearch() {
141 |         var keyword = gitbook.storage.get("keyword", "");
142 | 
143 |         createForm(keyword);
144 | 
145 |         if (keyword.length > 0) {
146 |             if(!isSearchOpen()) {
147 |                 toggleSearch(true); // [Yihui] open the search box
148 |             }
149 |             gitbook.sidebar.filter(_.pluck(search(keyword), "path"));
150 |         }
151 |     }
152 | 
153 | 
154 |     gitbook.events.bind("start", function(e, config) {
155 |         // [Yihui] disable search
156 |         if (config.search === false) return;
157 |         collapse = !config.toc || config.toc.collapse === 'section' ||
158 |           config.toc.collapse === 'subsection';
159 | 
160 |         // Pre-fetch search index and create the form
161 |         fetchIndex()
162 |         // [Yihui] recover search after the page is loaded
163 |         .then(recoverSearch);
164 | 
165 | 
166 |         // Type in search bar
167 |         $(document).on("keyup", ".book-search input", function(e) {
168 |             var key = (e.keyCode ? e.keyCode : e.which);
169 |             // [Yihui] Escape -> close search box; Up/Down: previous/next highlighted
170 |             if (key == 27) {
171 |                 e.preventDefault();
172 |                 toggleSearch(false);
173 |             } else if (key == 38) {
174 |               if (hi <= 0 && $highlighted) hi = $highlighted.length;
175 |               hi--;
176 |               scrollToHighlighted();
177 |             } else if (key == 40) {
178 |               hi++;
179 |               if ($highlighted && hi >= $highlighted.length) hi = 0;
180 |               scrollToHighlighted();
181 |             }
182 |         }).on("input", ".book-search input", function(e) {
183 |             var q = $(this).val().trim();
184 |             if (q.length === 0) {
185 |                 gitbook.sidebar.filter(null);
186 |                 gitbook.storage.remove("keyword");
187 |                 $('.page-inner').unhighlight(hiOpts);
188 |                 toggleTOC(false);
189 |             } else {
190 |                 var results = search(q);
191 |                 gitbook.sidebar.filter(
192 |                     _.pluck(results, "path")
193 |                 );
194 |                 gitbook.storage.set("keyword", q);
195 |             }
196 |         });
197 | 
198 |         // Create the toggle search button
199 |         gitbook.toolbar.createButton({
200 |             icon: 'fa fa-search',
201 |             label: 'Search',
202 |             position: 'left',
203 |             onClick: toggleSearch
204 |         });
205 | 
206 |         // Bind keyboard to toggle search
207 |         gitbook.keyboard.bind(['f'], toggleSearch);
208 |     });
209 | 
210 |     // [Yihui] do not try to recover search; always start fresh
211 |     // gitbook.events.bind("page.change", recoverSearch);
212 | });
213 | 


--------------------------------------------------------------------------------
/diagrams/_book/libs/gitbook-2.6.7/js/plugin-sharing.js:
--------------------------------------------------------------------------------
  1 | gitbook.require(["gitbook", "lodash", "jQuery"], function(gitbook, _, $) {
  2 |     var SITES = {
  3 |         'github': {
  4 |             'label': 'Github',
  5 |             'icon': 'fa fa-github',
  6 |             'onClick': function(e) {
  7 |                 e.preventDefault();
  8 |                 var repo = $('meta[name="github-repo"]').attr('content');
  9 |                 if (typeof repo === 'undefined') throw("Github repo not defined");
 10 |                 window.open("https://github.com/"+repo);
 11 |             }
 12 |         },
 13 |         'facebook': {
 14 |             'label': 'Facebook',
 15 |             'icon': 'fa fa-facebook',
 16 |             'onClick': function(e) {
 17 |                 e.preventDefault();
 18 |                 window.open("http://www.facebook.com/sharer/sharer.php?s=100&p[url]="+encodeURIComponent(location.href));
 19 |             }
 20 |         },
 21 |         'twitter': {
 22 |             'label': 'Twitter',
 23 |             'icon': 'fa fa-twitter',
 24 |             'onClick': function(e) {
 25 |                 e.preventDefault();
 26 |                 window.open("http://twitter.com/home?status="+encodeURIComponent(document.title+" "+location.href));
 27 |             }
 28 |         },
 29 |         'google': {
 30 |             'label': 'Google+',
 31 |             'icon': 'fa fa-google-plus',
 32 |             'onClick': function(e) {
 33 |                 e.preventDefault();
 34 |                 window.open("https://plus.google.com/share?url="+encodeURIComponent(location.href));
 35 |             }
 36 |         },
 37 |         'linkedin': {
 38 |             'label': 'LinkedIn',
 39 |             'icon': 'fa fa-linkedin',
 40 |             'onClick': function(e) {
 41 |                 e.preventDefault();
 42 |                 window.open("https://www.linkedin.com/shareArticle?mini=true&url="+encodeURIComponent(location.href)+"&title="+encodeURIComponent(document.title));
 43 |             }
 44 |         },
 45 |         'weibo': {
 46 |             'label': 'Weibo',
 47 |             'icon': 'fa fa-weibo',
 48 |             'onClick': function(e) {
 49 |                 e.preventDefault();
 50 |                 window.open("http://service.weibo.com/share/share.php?content=utf-8&url="+encodeURIComponent(location.href)+"&title="+encodeURIComponent(document.title));
 51 |             }
 52 |         },
 53 |         'instapaper': {
 54 |             'label': 'Instapaper',
 55 |             'icon': 'fa fa-instapaper',
 56 |             'onClick': function(e) {
 57 |                 e.preventDefault();
 58 |                 window.open("http://www.instapaper.com/text?u="+encodeURIComponent(location.href));
 59 |             }
 60 |         },
 61 |         'vk': {
 62 |             'label': 'VK',
 63 |             'icon': 'fa fa-vk',
 64 |             'onClick': function(e) {
 65 |                 e.preventDefault();
 66 |                 window.open("http://vkontakte.ru/share.php?url="+encodeURIComponent(location.href));
 67 |             }
 68 |         }
 69 |     };
 70 | 
 71 | 
 72 | 
 73 |     gitbook.events.bind("start", function(e, config) {
 74 |         var opts = config.sharing;
 75 |         if (!opts) return;
 76 | 
 77 |         // Create dropdown menu
 78 |         var menu = _.chain(opts.all)
 79 |             .map(function(id) {
 80 |                 var site = SITES[id];
 81 | 
 82 |                 return {
 83 |                     text: site.label,
 84 |                     onClick: site.onClick
 85 |                 };
 86 |             })
 87 |             .compact()
 88 |             .value();
 89 | 
 90 |         // Create main button with dropdown
 91 |         if (menu.length > 0) {
 92 |             gitbook.toolbar.createButton({
 93 |                 icon: 'fa fa-share-alt',
 94 |                 label: 'Share',
 95 |                 position: 'right',
 96 |                 dropdown: [menu]
 97 |             });
 98 |         }
 99 | 
100 |         // Direct actions to share
101 |         _.each(SITES, function(site, sideId) {
102 |             if (!opts[sideId]) return;
103 | 
104 |             gitbook.toolbar.createButton({
105 |                 icon: site.icon,
106 |                 label: site.text,
107 |                 position: 'right',
108 |                 onClick: site.onClick
109 |             });
110 |         });
111 |     });
112 | });
113 | 


--------------------------------------------------------------------------------
/diagrams/iv-dag.gv:
--------------------------------------------------------------------------------
 1 | digraph ivdag {
 2 |   graph [layout = dot]
 3 | 
 4 |   edge [color = black]
 5 |   Z -> D
 6 |   D -> Y
 7 | 
 8 |   edge [color = gray]
 9 |   U -> D
10 |   U -> Y
11 | 
12 | }
13 | 


--------------------------------------------------------------------------------
/diagrams/science.mmd:
--------------------------------------------------------------------------------
1 | graph LR
2 | A(question) --> B(design)
3 | B --> C(collection)
4 | C --> D(analysis)
5 | D --> E(answer)
6 | 


--------------------------------------------------------------------------------
/diagrams/science2.mmd:
--------------------------------------------------------------------------------
1 | graph LR
2 | A(idea) --> B
3 | B(question) --> C(design)
4 | C --> B
5 | C --> D(collection)
6 | D --> E(analysis)
7 | E --> F(answer)
8 | 


--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/intro-methods-notes/4c1342aa322c728ad21bbfaa2eeade554cb79b6c/docs/.nojekyll


--------------------------------------------------------------------------------
/docs/appendix.md:
--------------------------------------------------------------------------------
1 | 
2 | # (APPENDIX) Appendix {-}
3 | 


--------------------------------------------------------------------------------
/docs/eda.md:
--------------------------------------------------------------------------------
1 | 
2 | # (PART) Exploratory Data Analysis {-}
3 | 


--------------------------------------------------------------------------------
/docs/img/islr-fig-6.7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/intro-methods-notes/4c1342aa322c728ad21bbfaa2eeade554cb79b6c/docs/img/islr-fig-6.7.png


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ---
 3 | title: "Data Analysis Notes"
 4 | author: "Jeffrey B. Arnold"
 5 | date: "2018-05-07"
 6 | knit: "bookdown::render_book"
 7 | bibliography: ["intromethods.bib"]
 8 | biblio-style: "apalike"
 9 | link-citations: true
10 | documentclass: book
11 | colorlinks: yes
12 | lot: yes
13 | lof: yes
14 | monofont: "Source Code Pro"
15 | monofontoptions: "Scale=0.7"
16 | site: bookdown::bookdown_site
17 | github-repo: jrnold/intro-methods-notes
18 | description: >
19 |   These are notes associated with the course, POLS/CS&SS 503: Advanced Quantitative Political Methodology at the University of Washington.
20 | ---
21 | 
22 | # Introduction
23 | 
24 | Notes used when teaching "POLS/CS&SS 501: Advanced Political Research Design and Analysis" and "POLS/CS&SS 503: Advanced Quantitative Political Methodology" at the University of Washington.
25 | 
26 | <!-- Dummy math to ensure that math equations always occur -->
27 | $$
28 | $$
29 | 


--------------------------------------------------------------------------------
/docs/libs/gitbook-2.6.7/css/fontawesome/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/intro-methods-notes/4c1342aa322c728ad21bbfaa2eeade554cb79b6c/docs/libs/gitbook-2.6.7/css/fontawesome/fontawesome-webfont.ttf


--------------------------------------------------------------------------------
/docs/libs/gitbook-2.6.7/css/plugin-bookdown.css:
--------------------------------------------------------------------------------
  1 | .book .book-header h1 {
  2 |   padding-left: 20px;
  3 |   padding-right: 20px;
  4 | }
  5 | .book .book-header.fixed {
  6 |   position: fixed;
  7 |   right: 0;
  8 |   top: 0;
  9 |   left: 0;
 10 |   border-bottom: 1px solid rgba(0,0,0,.07);
 11 | }
 12 | span.search-highlight {
 13 |   background-color: #ffff88;
 14 | }
 15 | @media (min-width: 600px) {
 16 |   .book.with-summary .book-header.fixed {
 17 |     left: 300px;
 18 |   }
 19 | }
 20 | @media (max-width: 1240px) {
 21 |   .book .book-body.fixed {
 22 |     top: 50px;
 23 |   }
 24 |   .book .book-body.fixed .body-inner {
 25 |     top: auto;
 26 |   }
 27 | }
 28 | @media (max-width: 600px) {
 29 |   .book.with-summary .book-header.fixed {
 30 |     left: calc(100% - 60px);
 31 |     min-width: 300px;
 32 |   }
 33 |   .book.with-summary .book-body {
 34 |     transform: none;
 35 |     left: calc(100% - 60px);
 36 |     min-width: 300px;
 37 |   }
 38 |   .book .book-body.fixed {
 39 |     top: 0;
 40 |   }
 41 | }
 42 | 
 43 | .book .book-body.fixed .body-inner {
 44 |   top: 50px;
 45 | }
 46 | .book .book-body .page-wrapper .page-inner section.normal sub, .book .book-body .page-wrapper .page-inner section.normal sup {
 47 |   font-size: 85%;
 48 | }
 49 | 
 50 | @media print {
 51 |   .book .book-summary, .book .book-body .book-header, .fa {
 52 |     display: none !important;
 53 |   }
 54 |   .book .book-body.fixed {
 55 |     left: 0px;
 56 |   }
 57 |   .book .book-body,.book .book-body .body-inner, .book.with-summary {
 58 |     overflow: visible !important;
 59 |   }
 60 | }
 61 | .kable_wrapper {
 62 |   border-spacing: 20px 0;
 63 |   border-collapse: separate;
 64 |   border: none;
 65 |   margin: auto;
 66 | }
 67 | .kable_wrapper > tbody > tr > td {
 68 |   vertical-align: top;
 69 | }
 70 | .book .book-body .page-wrapper .page-inner section.normal table tr.header {
 71 |   border-top-width: 2px;
 72 | }
 73 | .book .book-body .page-wrapper .page-inner section.normal table tr:last-child td {
 74 |   border-bottom-width: 2px;
 75 | }
 76 | .book .book-body .page-wrapper .page-inner section.normal table td, .book .book-body .page-wrapper .page-inner section.normal table th {
 77 |   border-left: none;
 78 |   border-right: none;
 79 | }
 80 | .book .book-body .page-wrapper .page-inner section.normal table.kable_wrapper > tbody > tr, .book .book-body .page-wrapper .page-inner section.normal table.kable_wrapper > tbody > tr > td {
 81 |   border-top: none;
 82 | }
 83 | .book .book-body .page-wrapper .page-inner section.normal table.kable_wrapper > tbody > tr:last-child > td {
 84 |     border-bottom: none;
 85 | }
 86 | 
 87 | div.theorem, div.lemma, div.corollary, div.proposition {
 88 |   font-style: italic;
 89 | }
 90 | span.theorem, span.lemma, span.corollary, span.proposition {
 91 |   font-style: normal;
 92 | }
 93 | div.proof:after {
 94 |   content: "\25a2";
 95 |   float: right;
 96 | }
 97 | .header-section-number {
 98 |   padding-right: .5em;
 99 | }
100 | 


--------------------------------------------------------------------------------
/docs/libs/gitbook-2.6.7/css/plugin-search.css:
--------------------------------------------------------------------------------
 1 | .book .book-summary .book-search {
 2 |   padding: 6px;
 3 |   background: transparent;
 4 |   position: absolute;
 5 |   top: -50px;
 6 |   left: 0px;
 7 |   right: 0px;
 8 |   transition: top 0.5s ease;
 9 | }
10 | .book .book-summary .book-search input,
11 | .book .book-summary .book-search input:focus,
12 | .book .book-summary .book-search input:hover {
13 |   width: 100%;
14 |   background: transparent;
15 |   border: 1px solid #ccc;
16 |   box-shadow: none;
17 |   outline: none;
18 |   line-height: 22px;
19 |   padding: 7px 4px;
20 |   color: inherit;
21 |   box-sizing: border-box;
22 | }
23 | .book.with-search .book-summary .book-search {
24 |   top: 0px;
25 | }
26 | .book.with-search .book-summary ul.summary {
27 |   top: 50px;
28 | }
29 | 


--------------------------------------------------------------------------------
/docs/libs/gitbook-2.6.7/js/jquery.highlight.js:
--------------------------------------------------------------------------------
 1 | gitbook.require(["jQuery"], function(jQuery) {
 2 | 
 3 | /*
 4 |  * jQuery Highlight plugin
 5 |  *
 6 |  * Based on highlight v3 by Johann Burkard
 7 |  * http://johannburkard.de/blog/programming/javascript/highlight-javascript-text-higlighting-jquery-plugin.html
 8 |  *
 9 |  * Code a little bit refactored and cleaned (in my humble opinion).
10 |  * Most important changes:
11 |  *  - has an option to highlight only entire words (wordsOnly - false by default),
12 |  *  - has an option to be case sensitive (caseSensitive - false by default)
13 |  *  - highlight element tag and class names can be specified in options
14 |  *
15 |  * Copyright (c) 2009 Bartek Szopka
16 |  *
17 |  * Licensed under MIT license.
18 |  *
19 |  */
20 | 
21 | jQuery.extend({
22 |     highlight: function (node, re, nodeName, className) {
23 |         if (node.nodeType === 3) {
24 |             var match = node.data.match(re);
25 |             if (match) {
26 |                 var highlight = document.createElement(nodeName || 'span');
27 |                 highlight.className = className || 'highlight';
28 |                 var wordNode = node.splitText(match.index);
29 |                 wordNode.splitText(match[0].length);
30 |                 var wordClone = wordNode.cloneNode(true);
31 |                 highlight.appendChild(wordClone);
32 |                 wordNode.parentNode.replaceChild(highlight, wordNode);
33 |                 return 1; //skip added node in parent
34 |             }
35 |         } else if ((node.nodeType === 1 && node.childNodes) && // only element nodes that have children
36 |                 !/(script|style)/i.test(node.tagName) && // ignore script and style nodes
37 |                 !(node.tagName === nodeName.toUpperCase() && node.className === className)) { // skip if already highlighted
38 |             for (var i = 0; i < node.childNodes.length; i++) {
39 |                 i += jQuery.highlight(node.childNodes[i], re, nodeName, className);
40 |             }
41 |         }
42 |         return 0;
43 |     }
44 | });
45 | 
46 | jQuery.fn.unhighlight = function (options) {
47 |     var settings = { className: 'highlight', element: 'span' };
48 |     jQuery.extend(settings, options);
49 | 
50 |     return this.find(settings.element + "." + settings.className).each(function () {
51 |         var parent = this.parentNode;
52 |         parent.replaceChild(this.firstChild, this);
53 |         parent.normalize();
54 |     }).end();
55 | };
56 | 
57 | jQuery.fn.highlight = function (words, options) {
58 |     var settings = { className: 'highlight', element: 'span', caseSensitive: false, wordsOnly: false };
59 |     jQuery.extend(settings, options);
60 | 
61 |     if (words.constructor === String) {
62 |         words = [words];
63 |     }
64 |     words = jQuery.grep(words, function(word, i){
65 |       return word !== '';
66 |     });
67 |     words = jQuery.map(words, function(word, i) {
68 |       return word.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
69 |     });
70 |     if (words.length === 0) { return this; }
71 | 
72 |     var flag = settings.caseSensitive ? "" : "i";
73 |     var pattern = "(" + words.join("|") + ")";
74 |     if (settings.wordsOnly) {
75 |         pattern = "\\b" + pattern + "\\b";
76 |     }
77 |     var re = new RegExp(pattern, flag);
78 | 
79 |     return this.each(function () {
80 |         jQuery.highlight(this, re, settings.element, settings.className);
81 |     });
82 | };
83 | 
84 | });
85 | 


--------------------------------------------------------------------------------
/docs/libs/gitbook-2.6.7/js/plugin-bookdown.js:
--------------------------------------------------------------------------------
  1 | gitbook.require(["gitbook", "lodash", "jQuery"], function(gitbook, _, $) {
  2 | 
  3 |   var gs = gitbook.storage;
  4 | 
  5 |   gitbook.events.bind("start", function(e, config) {
  6 | 
  7 |     // add the Edit button (edit on Github)
  8 |     var edit = config.edit;
  9 |     if (edit && edit.link) gitbook.toolbar.createButton({
 10 |       icon: 'fa fa-edit',
 11 |       label: edit.text || 'Edit',
 12 |       position: 'left',
 13 |       onClick: function(e) {
 14 |         e.preventDefault();
 15 |         window.open(edit.link);
 16 |       }
 17 |     });
 18 | 
 19 |     var down = config.download;
 20 |     var normalizeDownload = function() {
 21 |       if (!down || !(down instanceof Array) || down.length === 0) return;
 22 |       if (down[0] instanceof Array) return down;
 23 |       return $.map(down, function(file, i) {
 24 |         return [[file, file.replace(/.*[.]/g, '').toUpperCase()]];
 25 |       });
 26 |     };
 27 |     down = normalizeDownload(down);
 28 |     if (down) if (down.length === 1 && /[.]pdf$/.test(down[0][0])) {
 29 |       gitbook.toolbar.createButton({
 30 |         icon: 'fa fa-file-pdf-o',
 31 |         label: down[0][1],
 32 |         position: 'left',
 33 |         onClick: function(e) {
 34 |           e.preventDefault();
 35 |           window.open(down[0][0]);
 36 |         }
 37 |       });
 38 |     } else {
 39 |       gitbook.toolbar.createButton({
 40 |         icon: 'fa fa-download',
 41 |         label: 'Download',
 42 |         position: 'left',
 43 |         dropdown: $.map(down, function(item, i) {
 44 |           return {
 45 |             text: item[1],
 46 |             onClick: function(e) {
 47 |               e.preventDefault();
 48 |               window.open(item[0]);
 49 |             }
 50 |           };
 51 |         })
 52 |       });
 53 |     }
 54 | 
 55 |     // highlight the current section in TOC
 56 |     var href = window.location.pathname;
 57 |     href = href.substr(href.lastIndexOf('/') + 1);
 58 |     if (href === '') href = 'index.html';
 59 |     var li = $('a[href^="' + href + location.hash + '"]').parent('li.chapter').first();
 60 |     var summary = $('ul.summary'), chaps = summary.find('li.chapter');
 61 |     if (li.length === 0) li = chaps.first();
 62 |     li.addClass('active');
 63 |     chaps.on('click', function(e) {
 64 |       chaps.removeClass('active');
 65 |       $(this).addClass('active');
 66 |       gs.set('tocScrollTop', summary.scrollTop());
 67 |     });
 68 | 
 69 |     var toc = config.toc;
 70 |     // collapse TOC items that are not for the current chapter
 71 |     if (toc && toc.collapse) (function() {
 72 |       var type = toc.collapse;
 73 |       if (type === 'none') return;
 74 |       if (type !== 'section' && type !== 'subsection') return;
 75 |       // sections under chapters
 76 |       var toc_sub = summary.children('li[data-level]').children('ul');
 77 |       if (type === 'section') {
 78 |         toc_sub.hide()
 79 |           .parent().has(li).children('ul').show();
 80 |       } else {
 81 |         toc_sub.children('li').children('ul').hide()
 82 |           .parent().has(li).children('ul').show();
 83 |       }
 84 |       li.children('ul').show();
 85 |       var toc_sub2 = toc_sub.children('li');
 86 |       if (type === 'section') toc_sub2.children('ul').hide();
 87 |       summary.children('li[data-level]').find('a')
 88 |         .on('click.bookdown', function(e) {
 89 |           if (href === $(this).attr('href').replace(/#.*/, ''))
 90 |             $(this).parent('li').children('ul').toggle();
 91 |         });
 92 |     })();
 93 | 
 94 |     // add tooltips to the <a>'s that are truncated
 95 |     $('a').each(function(i, el) {
 96 |       if (el.offsetWidth >= el.scrollWidth) return;
 97 |       if (typeof el.title === 'undefined') return;
 98 |       el.title = el.text;
 99 |     });
100 | 
101 |     // restore TOC scroll position
102 |     var pos = gs.get('tocScrollTop');
103 |     if (typeof pos !== 'undefined') summary.scrollTop(pos);
104 | 
105 |     // highlight the TOC item that has same text as the heading in view as scrolling
106 |     if (toc && toc.scroll_highlight !== false) (function() {
107 |       // scroll the current TOC item into viewport
108 |       var ht = $(window).height(), rect = li[0].getBoundingClientRect();
109 |       if (rect.top >= ht || rect.top <= 0 || rect.bottom <= 0) {
110 |         summary.scrollTop(li[0].offsetTop);
111 |       }
112 |       // current chapter TOC items
113 |       var items = $('a[href^="' + href + '"]').parent('li.chapter'),
114 |           m = items.length;
115 |       if (m === 0) {
116 |         items = summary.find('li.chapter');
117 |         m = items.length;
118 |       }
119 |       if (m === 0) return;
120 |       // all section titles on current page
121 |       var hs = bookInner.find('.page-inner').find('h1,h2,h3'), n = hs.length,
122 |           ts = hs.map(function(i, el) { return $(el).text(); });
123 |       if (n === 0) return;
124 |       var scrollHandler = function(e) {
125 |         var ht = $(window).height();
126 |         clearTimeout($.data(this, 'scrollTimer'));
127 |         $.data(this, 'scrollTimer', setTimeout(function() {
128 |           // find the first visible title in the viewport
129 |           for (var i = 0; i < n; i++) {
130 |             var rect = hs[i].getBoundingClientRect();
131 |             if (rect.top >= 0 && rect.bottom <= ht) break;
132 |           }
133 |           if (i === n) return;
134 |           items.removeClass('active');
135 |           for (var j = 0; j < m; j++) {
136 |             if (items.eq(j).children('a').first().text() === ts[i]) break;
137 |           }
138 |           if (j === m) j = 0;  // highlight the chapter title
139 |           // search bottom-up for a visible TOC item to highlight; if an item is
140 |           // hidden, we check if its parent is visible, and so on
141 |           while (j > 0 && items.eq(j).is(':hidden')) j--;
142 |           items.eq(j).addClass('active');
143 |         }, 250));
144 |       };
145 |       bookInner.on('scroll.bookdown', scrollHandler);
146 |       bookBody.on('scroll.bookdown', scrollHandler);
147 |     })();
148 | 
149 |     // do not refresh the page if the TOC item points to the current page
150 |     $('a[href="' + href + '"]').parent('li.chapter').children('a')
151 |       .on('click', function(e) {
152 |         bookInner.scrollTop(0);
153 |         bookBody.scrollTop(0);
154 |         return false;
155 |       });
156 | 
157 |     var toolbar = config.toolbar;
158 |     if (!toolbar || toolbar.position !== 'static') {
159 |       var bookHeader = $('.book-header');
160 |       bookBody.addClass('fixed');
161 |       bookHeader.addClass('fixed')
162 |       .css('background-color', bookBody.css('background-color'))
163 |       .on('click.bookdown', function(e) {
164 |         // the theme may have changed after user clicks the theme button
165 |         bookHeader.css('background-color', bookBody.css('background-color'));
166 |       });
167 |     }
168 | 
169 |   });
170 | 
171 |   gitbook.events.bind("page.change", function(e) {
172 |     // store TOC scroll position
173 |     var summary = $('ul.summary');
174 |     gs.set('tocScrollTop', summary.scrollTop());
175 |   });
176 | 
177 |   var bookBody = $('.book-body'), bookInner = bookBody.find('.body-inner');
178 |   var chapterTitle = function() {
179 |     return bookInner.find('.page-inner').find('h1,h2').first().text();
180 |   };
181 |   var bookTitle = function() {
182 |     return bookInner.find('.book-header > h1').first().text();
183 |   };
184 |   var saveScrollPos = function(e) {
185 |     // save scroll position before page is reloaded
186 |     gs.set('bodyScrollTop', {
187 |       body: bookBody.scrollTop(),
188 |       inner: bookInner.scrollTop(),
189 |       focused: document.hasFocus(),
190 |       title: chapterTitle()
191 |     });
192 |   };
193 |   $(document).on('servr:reload', saveScrollPos);
194 | 
195 |   // check if the page is loaded in an iframe (e.g. the RStudio preview window)
196 |   var inIFrame = function() {
197 |     var inIframe = true;
198 |     try { inIframe = window.self !== window.top; } catch (e) {}
199 |     return inIframe;
200 |   };
201 |   $(window).on('blur unload', function(e) {
202 |     if (inIFrame()) saveScrollPos(e);
203 |     gs.set('bookTitle', bookTitle());
204 |   });
205 | 
206 |   $(function(e) {
207 |     if (gs.get('bookTitle', '') !== bookTitle()) localStorage.clear();
208 |     var pos = gs.get('bodyScrollTop');
209 |     if (pos) {
210 |       if (pos.title === chapterTitle()) {
211 |         if (pos.body !== 0) bookBody.scrollTop(pos.body);
212 |         if (pos.inner !== 0) bookInner.scrollTop(pos.inner);
213 |       }
214 |       if (pos.focused) bookInner.find('.page-wrapper').focus();
215 |     }
216 |     // clear book body scroll position
217 |     gs.remove('bodyScrollTop');
218 |   });
219 | 
220 | });
221 | 


--------------------------------------------------------------------------------
/docs/libs/gitbook-2.6.7/js/plugin-fontsettings.js:
--------------------------------------------------------------------------------
  1 | gitbook.require(["gitbook", "lodash", "jQuery"], function(gitbook, _, $) {
  2 |     var fontState;
  3 | 
  4 |     var THEMES = {
  5 |         "white": 0,
  6 |         "sepia": 1,
  7 |         "night": 2
  8 |     };
  9 | 
 10 |     var FAMILY = {
 11 |         "serif": 0,
 12 |         "sans": 1
 13 |     };
 14 | 
 15 |     // Save current font settings
 16 |     function saveFontSettings() {
 17 |         gitbook.storage.set("fontState", fontState);
 18 |         update();
 19 |     }
 20 | 
 21 |     // Increase font size
 22 |     function enlargeFontSize(e) {
 23 |         e.preventDefault();
 24 |         if (fontState.size >= 4) return;
 25 | 
 26 |         fontState.size++;
 27 |         saveFontSettings();
 28 |     };
 29 | 
 30 |     // Decrease font size
 31 |     function reduceFontSize(e) {
 32 |         e.preventDefault();
 33 |         if (fontState.size <= 0) return;
 34 | 
 35 |         fontState.size--;
 36 |         saveFontSettings();
 37 |     };
 38 | 
 39 |     // Change font family
 40 |     function changeFontFamily(index, e) {
 41 |         e.preventDefault();
 42 | 
 43 |         fontState.family = index;
 44 |         saveFontSettings();
 45 |     };
 46 | 
 47 |     // Change type of color
 48 |     function changeColorTheme(index, e) {
 49 |         e.preventDefault();
 50 | 
 51 |         var $book = $(".book");
 52 | 
 53 |         if (fontState.theme !== 0)
 54 |             $book.removeClass("color-theme-"+fontState.theme);
 55 | 
 56 |         fontState.theme = index;
 57 |         if (fontState.theme !== 0)
 58 |             $book.addClass("color-theme-"+fontState.theme);
 59 | 
 60 |         saveFontSettings();
 61 |     };
 62 | 
 63 |     function update() {
 64 |         var $book = gitbook.state.$book;
 65 | 
 66 |         $(".font-settings .font-family-list li").removeClass("active");
 67 |         $(".font-settings .font-family-list li:nth-child("+(fontState.family+1)+")").addClass("active");
 68 | 
 69 |         $book[0].className = $book[0].className.replace(/\bfont-\S+/g, '');
 70 |         $book.addClass("font-size-"+fontState.size);
 71 |         $book.addClass("font-family-"+fontState.family);
 72 | 
 73 |         if(fontState.theme !== 0) {
 74 |             $book[0].className = $book[0].className.replace(/\bcolor-theme-\S+/g, '');
 75 |             $book.addClass("color-theme-"+fontState.theme);
 76 |         }
 77 |     };
 78 | 
 79 |     function init(config) {
 80 |         var $bookBody, $book;
 81 | 
 82 |         //Find DOM elements.
 83 |         $book = gitbook.state.$book;
 84 |         $bookBody = $book.find(".book-body");
 85 | 
 86 |         // Instantiate font state object
 87 |         fontState = gitbook.storage.get("fontState", {
 88 |             size: config.size || 2,
 89 |             family: FAMILY[config.family || "sans"],
 90 |             theme: THEMES[config.theme || "white"]
 91 |         });
 92 | 
 93 |         update();
 94 |     };
 95 | 
 96 | 
 97 |     gitbook.events.bind("start", function(e, config) {
 98 |         var opts = config.fontsettings;
 99 | 
100 |         // Create buttons in toolbar
101 |         gitbook.toolbar.createButton({
102 |             icon: 'fa fa-font',
103 |             label: 'Font Settings',
104 |             className: 'font-settings',
105 |             dropdown: [
106 |                 [
107 |                     {
108 |                         text: 'A',
109 |                         className: 'font-reduce',
110 |                         onClick: reduceFontSize
111 |                     },
112 |                     {
113 |                         text: 'A',
114 |                         className: 'font-enlarge',
115 |                         onClick: enlargeFontSize
116 |                     }
117 |                 ],
118 |                 [
119 |                     {
120 |                         text: 'Serif',
121 |                         onClick: _.partial(changeFontFamily, 0)
122 |                     },
123 |                     {
124 |                         text: 'Sans',
125 |                         onClick: _.partial(changeFontFamily, 1)
126 |                     }
127 |                 ],
128 |                 [
129 |                     {
130 |                         text: 'White',
131 |                         onClick: _.partial(changeColorTheme, 0)
132 |                     },
133 |                     {
134 |                         text: 'Sepia',
135 |                         onClick: _.partial(changeColorTheme, 1)
136 |                     },
137 |                     {
138 |                         text: 'Night',
139 |                         onClick: _.partial(changeColorTheme, 2)
140 |                     }
141 |                 ]
142 |             ]
143 |         });
144 | 
145 | 
146 |         // Init current settings
147 |         init(opts);
148 |     });
149 | });
150 | 
151 | 
152 | 


--------------------------------------------------------------------------------
/docs/libs/gitbook-2.6.7/js/plugin-search.js:
--------------------------------------------------------------------------------
  1 | gitbook.require(["gitbook", "lodash", "jQuery"], function(gitbook, _, $) {
  2 |     var index = null;
  3 |     var $searchInput, $searchForm;
  4 |     var $highlighted, hi = 0, hiOpts = { className: 'search-highlight' };
  5 |     var collapse = false;
  6 | 
  7 |     // Use a specific index
  8 |     function loadIndex(data) {
  9 |         // [Yihui] In bookdown, I use a character matrix to store the chapter
 10 |         // content, and the index is dynamically built on the client side.
 11 |         // Gitbook prebuilds the index data instead: https://github.com/GitbookIO/plugin-search
 12 |         // We can certainly do that via R packages V8 and jsonlite, but let's
 13 |         // see how slow it really is before improving it. On the other hand,
 14 |         // lunr cannot handle non-English text very well, e.g. the default
 15 |         // tokenizer cannot deal with Chinese text, so we may want to replace
 16 |         // lunr with a dumb simple text matching approach.
 17 |         index = lunr(function () {
 18 |           this.ref('url');
 19 |           this.field('title', { boost: 10 });
 20 |           this.field('body');
 21 |         });
 22 |         data.map(function(item) {
 23 |           index.add({
 24 |             url: item[0],
 25 |             title: item[1],
 26 |             body: item[2]
 27 |           });
 28 |         });
 29 |     }
 30 | 
 31 |     // Fetch the search index
 32 |     function fetchIndex() {
 33 |         return $.getJSON(gitbook.state.basePath+"/search_index.json")
 34 |                 .then(loadIndex);  // [Yihui] we need to use this object later
 35 |     }
 36 | 
 37 |     // Search for a term and return results
 38 |     function search(q) {
 39 |         if (!index) return;
 40 | 
 41 |         var results = _.chain(index.search(q))
 42 |         .map(function(result) {
 43 |             var parts = result.ref.split("#");
 44 |             return {
 45 |                 path: parts[0],
 46 |                 hash: parts[1]
 47 |             };
 48 |         })
 49 |         .value();
 50 | 
 51 |         // [Yihui] Highlight the search keyword on current page
 52 |         hi = 0;
 53 |         $highlighted = results.length === 0 ? undefined : $('.page-inner')
 54 |           .unhighlight(hiOpts).highlight(q, hiOpts).find('span.search-highlight');
 55 |         scrollToHighlighted();
 56 |         toggleTOC(results.length > 0);
 57 | 
 58 |         return results;
 59 |     }
 60 | 
 61 |     // [Yihui] Scroll the chapter body to the i-th highlighted string
 62 |     function scrollToHighlighted() {
 63 |       if (!$highlighted) return;
 64 |       var n = $highlighted.length;
 65 |       if (n === 0) return;
 66 |       var $p = $highlighted.eq(hi), p = $p[0], rect = p.getBoundingClientRect();
 67 |       if (rect.top < 0 || rect.bottom > $(window).height()) {
 68 |         ($(window).width() >= 1240 ? $('.body-inner') : $('.book-body'))
 69 |           .scrollTop(p.offsetTop - 100);
 70 |       }
 71 |       $highlighted.css('background-color', '');
 72 |       // an orange background color on the current item and removed later
 73 |       $p.css('background-color', 'orange');
 74 |       setTimeout(function() {
 75 |         $p.css('background-color', '');
 76 |       }, 2000);
 77 |     }
 78 | 
 79 |     // [Yihui] Expand/collapse TOC
 80 |     function toggleTOC(show) {
 81 |       if (!collapse) return;
 82 |       var toc_sub = $('ul.summary').children('li[data-level]').children('ul');
 83 |       if (show) return toc_sub.show();
 84 |       var href = window.location.pathname;
 85 |       href = href.substr(href.lastIndexOf('/') + 1);
 86 |       if (href === '') href = 'index.html';
 87 |       var li = $('a[href^="' + href + location.hash + '"]').parent('li.chapter').first();
 88 |       toc_sub.hide().parent().has(li).children('ul').show();
 89 |       li.children('ul').show();
 90 |     }
 91 | 
 92 |     // Create search form
 93 |     function createForm(value) {
 94 |         if ($searchForm) $searchForm.remove();
 95 |         if ($searchInput) $searchInput.remove();
 96 | 
 97 |         $searchForm = $('<div>', {
 98 |             'class': 'book-search',
 99 |             'role': 'search'
100 |         });
101 | 
102 |         $searchInput = $('<input>', {
103 |             'type': 'search',
104 |             'class': 'form-control',
105 |             'val': value,
106 |             'placeholder': 'Type to search'
107 |         });
108 | 
109 |         $searchInput.appendTo($searchForm);
110 |         $searchForm.prependTo(gitbook.state.$book.find('.book-summary'));
111 |     }
112 | 
113 |     // Return true if search is open
114 |     function isSearchOpen() {
115 |         return gitbook.state.$book.hasClass("with-search");
116 |     }
117 | 
118 |     // Toggle the search
119 |     function toggleSearch(_state) {
120 |         if (isSearchOpen() === _state) return;
121 |         if (!$searchInput) return;
122 | 
123 |         gitbook.state.$book.toggleClass("with-search", _state);
124 | 
125 |         // If search bar is open: focus input
126 |         if (isSearchOpen()) {
127 |             gitbook.sidebar.toggle(true);
128 |             $searchInput.focus();
129 |         } else {
130 |             $searchInput.blur();
131 |             $searchInput.val("");
132 |             gitbook.storage.remove("keyword");
133 |             gitbook.sidebar.filter(null);
134 |             $('.page-inner').unhighlight(hiOpts);
135 |             toggleTOC(false);
136 |         }
137 |     }
138 | 
139 |     // Recover current search when page changed
140 |     function recoverSearch() {
141 |         var keyword = gitbook.storage.get("keyword", "");
142 | 
143 |         createForm(keyword);
144 | 
145 |         if (keyword.length > 0) {
146 |             if(!isSearchOpen()) {
147 |                 toggleSearch(true); // [Yihui] open the search box
148 |             }
149 |             gitbook.sidebar.filter(_.pluck(search(keyword), "path"));
150 |         }
151 |     }
152 | 
153 | 
154 |     gitbook.events.bind("start", function(e, config) {
155 |         // [Yihui] disable search
156 |         if (config.search === false) return;
157 |         collapse = !config.toc || config.toc.collapse === 'section' ||
158 |           config.toc.collapse === 'subsection';
159 | 
160 |         // Pre-fetch search index and create the form
161 |         fetchIndex()
162 |         // [Yihui] recover search after the page is loaded
163 |         .then(recoverSearch);
164 | 
165 | 
166 |         // Type in search bar
167 |         $(document).on("keyup", ".book-search input", function(e) {
168 |             var key = (e.keyCode ? e.keyCode : e.which);
169 |             // [Yihui] Escape -> close search box; Up/Down: previous/next highlighted
170 |             if (key == 27) {
171 |                 e.preventDefault();
172 |                 toggleSearch(false);
173 |             } else if (key == 38) {
174 |               if (hi <= 0 && $highlighted) hi = $highlighted.length;
175 |               hi--;
176 |               scrollToHighlighted();
177 |             } else if (key == 40) {
178 |               hi++;
179 |               if ($highlighted && hi >= $highlighted.length) hi = 0;
180 |               scrollToHighlighted();
181 |             }
182 |         }).on("input", ".book-search input", function(e) {
183 |             var q = $(this).val().trim();
184 |             if (q.length === 0) {
185 |                 gitbook.sidebar.filter(null);
186 |                 gitbook.storage.remove("keyword");
187 |                 $('.page-inner').unhighlight(hiOpts);
188 |                 toggleTOC(false);
189 |             } else {
190 |                 var results = search(q);
191 |                 gitbook.sidebar.filter(
192 |                     _.pluck(results, "path")
193 |                 );
194 |                 gitbook.storage.set("keyword", q);
195 |             }
196 |         });
197 | 
198 |         // Create the toggle search button
199 |         gitbook.toolbar.createButton({
200 |             icon: 'fa fa-search',
201 |             label: 'Search',
202 |             position: 'left',
203 |             onClick: toggleSearch
204 |         });
205 | 
206 |         // Bind keyboard to toggle search
207 |         gitbook.keyboard.bind(['f'], toggleSearch);
208 |     });
209 | 
210 |     // [Yihui] do not try to recover search; always start fresh
211 |     // gitbook.events.bind("page.change", recoverSearch);
212 | });
213 | 


--------------------------------------------------------------------------------
/docs/libs/gitbook-2.6.7/js/plugin-sharing.js:
--------------------------------------------------------------------------------
  1 | gitbook.require(["gitbook", "lodash", "jQuery"], function(gitbook, _, $) {
  2 |     var SITES = {
  3 |         'github': {
  4 |             'label': 'Github',
  5 |             'icon': 'fa fa-github',
  6 |             'onClick': function(e) {
  7 |                 e.preventDefault();
  8 |                 var repo = $('meta[name="github-repo"]').attr('content');
  9 |                 if (typeof repo === 'undefined') throw("Github repo not defined");
 10 |                 window.open("https://github.com/"+repo);
 11 |             }
 12 |         },
 13 |         'facebook': {
 14 |             'label': 'Facebook',
 15 |             'icon': 'fa fa-facebook',
 16 |             'onClick': function(e) {
 17 |                 e.preventDefault();
 18 |                 window.open("http://www.facebook.com/sharer/sharer.php?s=100&p[url]="+encodeURIComponent(location.href));
 19 |             }
 20 |         },
 21 |         'twitter': {
 22 |             'label': 'Twitter',
 23 |             'icon': 'fa fa-twitter',
 24 |             'onClick': function(e) {
 25 |                 e.preventDefault();
 26 |                 window.open("http://twitter.com/home?status="+encodeURIComponent(document.title+" "+location.href));
 27 |             }
 28 |         },
 29 |         'google': {
 30 |             'label': 'Google+',
 31 |             'icon': 'fa fa-google-plus',
 32 |             'onClick': function(e) {
 33 |                 e.preventDefault();
 34 |                 window.open("https://plus.google.com/share?url="+encodeURIComponent(location.href));
 35 |             }
 36 |         },
 37 |         'linkedin': {
 38 |             'label': 'LinkedIn',
 39 |             'icon': 'fa fa-linkedin',
 40 |             'onClick': function(e) {
 41 |                 e.preventDefault();
 42 |                 window.open("https://www.linkedin.com/shareArticle?mini=true&url="+encodeURIComponent(location.href)+"&title="+encodeURIComponent(document.title));
 43 |             }
 44 |         },
 45 |         'weibo': {
 46 |             'label': 'Weibo',
 47 |             'icon': 'fa fa-weibo',
 48 |             'onClick': function(e) {
 49 |                 e.preventDefault();
 50 |                 window.open("http://service.weibo.com/share/share.php?content=utf-8&url="+encodeURIComponent(location.href)+"&title="+encodeURIComponent(document.title));
 51 |             }
 52 |         },
 53 |         'instapaper': {
 54 |             'label': 'Instapaper',
 55 |             'icon': 'fa fa-instapaper',
 56 |             'onClick': function(e) {
 57 |                 e.preventDefault();
 58 |                 window.open("http://www.instapaper.com/text?u="+encodeURIComponent(location.href));
 59 |             }
 60 |         },
 61 |         'vk': {
 62 |             'label': 'VK',
 63 |             'icon': 'fa fa-vk',
 64 |             'onClick': function(e) {
 65 |                 e.preventDefault();
 66 |                 window.open("http://vkontakte.ru/share.php?url="+encodeURIComponent(location.href));
 67 |             }
 68 |         }
 69 |     };
 70 | 
 71 | 
 72 | 
 73 |     gitbook.events.bind("start", function(e, config) {
 74 |         var opts = config.sharing;
 75 |         if (!opts) return;
 76 | 
 77 |         // Create dropdown menu
 78 |         var menu = _.chain(opts.all)
 79 |             .map(function(id) {
 80 |                 var site = SITES[id];
 81 | 
 82 |                 return {
 83 |                     text: site.label,
 84 |                     onClick: site.onClick
 85 |                 };
 86 |             })
 87 |             .compact()
 88 |             .value();
 89 | 
 90 |         // Create main button with dropdown
 91 |         if (menu.length > 0) {
 92 |             gitbook.toolbar.createButton({
 93 |                 icon: 'fa fa-share-alt',
 94 |                 label: 'Share',
 95 |                 position: 'right',
 96 |                 dropdown: [menu]
 97 |             });
 98 |         }
 99 | 
100 |         // Direct actions to share
101 |         _.each(SITES, function(site, sideId) {
102 |             if (!opts[sideId]) return;
103 | 
104 |             gitbook.toolbar.createButton({
105 |                 icon: site.icon,
106 |                 label: site.text,
107 |                 position: 'right',
108 |                 onClick: site.onClick
109 |             });
110 |         });
111 |     });
112 | });
113 | 


--------------------------------------------------------------------------------
/docs/linear-regression.md:
--------------------------------------------------------------------------------
1 | 
2 | # (PART) Linear Regression {-}
3 | 


--------------------------------------------------------------------------------
/docs/multicolinearity.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Colinearity and Multicolinearity
  3 | 
  4 | ## (Perfect) Colinearity
  5 | 
  6 | In order to estimate unique $\hat{\beta}$ OLS requires the that the columns of the design matrix $\Vec{X}$ are linearly independent.
  7 | 
  8 | Common examples of groups of variables that are not linearly independent:
  9 | 
 10 | -   Categorical variables in which there is no excluded category.
 11 |     You can also include all categories of a categorical variable if you exclude the intercept.
 12 |     Note that although they are not (often) used in political science, there are other methods of transforming categorical variables to ensure the columns in the design matrix are independent.
 13 | -   A constant variable. This can happen in practice with dichotomous variables of rare events; if you drop some observations for whatever reason, you may end up dropping all the 1's in the data. So although the variable is not constant in the population, in your sample it is constant and cannot be included in the regression.
 14 | -   A variable that is a multiple of another variable. E.g. you cannot include $\log(\text{GDP in millions USD})$ and $\log({GDP in USD})$ since $\log(\text{GDP in millions USD}) = \log({GDP in USD}) / 1,000,000$. in
 15 | -   A variable that is the sum of two other variables. E.g. you cannot include $\log(population)$, $\log(GDP)$, $\log(GDP per capita)$ in a regression since
 16 | $$\log(\text{GDP per capita}) = \log(\text{GDP} / \text{population}) = \log(\text{GDP}) - \log(\text{population})$$.
 17 | 
 18 | 
 19 | ## What to do about it?
 20 | 
 21 | R and most statistical programs will run regressions with colinear variables, but will drop variables until only linearly independent columns in $\Mat{X}$ remain.
 22 | 
 23 | For example, consider the following code. The variable `type` is a categorical variable with categories "bc", "wc", and "prof".
 24 | It will
 25 | 
 26 | ```r
 27 | data(Duncan, package = "car")
 28 | # Create dummy variables for each category
 29 | Duncan <- mutate(Duncan,
 30 |                  bc = type == "bc",
 31 |                  wc = type == "wc",
 32 |                  prof = type == "prof")
 33 | lm(prestige ~ bc + wc + prof, data = Duncan)
 34 | ```
 35 | 
 36 | ```
 37 | ## 
 38 | ## Call:
 39 | ## lm(formula = prestige ~ bc + wc + prof, data = Duncan)
 40 | ## 
 41 | ## Coefficients:
 42 | ## (Intercept)       bcTRUE       wcTRUE     profTRUE  
 43 | ##       80.44       -57.68       -43.78           NA
 44 | ```
 45 | R runs the regression, but coefficient and standard errors for `prof` are set to `NA`.
 46 | 
 47 | You should not rely on the software to fix this for you; once you (or the software) notices the problem check the reasons it occurred. The rewrite your regression to remove whatever was creating linearly dependent variables in $\Mat{X}$.
 48 | 
 49 | 
 50 | 
 51 | # Multicollinearity
 52 | 
 53 | Multicollinearity is the (poor) name for less-than-perfect collinearity.
 54 | Even though there is enough variation in $\Mat{X}$ to estimate OLS coefficients, if some set of variables in $\Mat{X}$ is highly correlated it will result in large, but unbiased, standard errors on the esimates.
 55 | 
 56 | What happens if variables are not linearly dependent, but nevertheless highly correlated?
 57 | If $\Cor(\Vec{x}_1, vec{x}_2) = 1$, then they are linearly dependent and the regression cannot be estimated (see above).
 58 | But if $\Cor(\Vec{x}_1, vec{x}_2) = 0.99$, the OLS can estimate unique values of of $\hat\beta$. However, it everything was fine with OLS estimates until, suddenly, when there is linearly independence everything breaks. The answer is yes, and no.
 59 | As $|\Cor(\Vec{x}_1, \Vec{x}_2)| \to 1$ the standard errors on the coefficients of these variables increase, but OLS as an estimator works correctly; $\hat\beta$ and $\se{\hat\beta}$ are unbiased.
 60 | With multicollinearly, OLS gives you the "right" answer, but it cannot say much with certainty.
 61 | 
 62 | 
 63 | For a bivariate regression, the distribution of the slope coefficient has variance,
 64 | $$
 65 | \Var(\hat{\beta}_1) = \frac{\sigma_u^2}{\sum_{i = 1} (x_i - \bar{x})^2} .
 66 | $$
 67 | 
 68 | What affects the standard error of $\hat{\beta}$? 
 69 | 
 70 | -   The error variance ($\sigma_u^2$). The higher the variance of the residuals, the higher the variance of the coefficients.
 71 | -   The variance of $\Vec{x}$. The lower variation in $\Mat{x}$, the bigger the standard errors of the slope.
 72 | 
 73 | Now consider a multiple regression,
 74 | $$
 75 | \Vec{y} = \beta_0 + \beta_1 \Vec{x}_1 + \beta_2 \Vec{x}_2 + u
 76 | $$
 77 | 
 78 | this becomes,
 79 | $$
 80 | \Var(\hat{\beta}_1) = \frac{\sigma_u^2}{(1 - R^2_1) \sum_{i = 1}^n (x_i - \bar{x})^2}
 81 | $$
 82 | where $R^2_1$ is the $R^2$ from the regression of $\Vec{x}_1$ on $\Vec{x}_2$,
 83 | $$
 84 | \Vec{x} = \hat{\delta}_0 + \hat{\delta}_1 \Vec{x}_2 .
 85 | $$
 86 | 
 87 | The factors affecting standard errors are
 88 | 
 89 | 1.  Error variance: higher residuals leads to higher standard errors.
 90 | 2.  Variance of $\Vec{x}_1$: lower variation in $\Vec{x}_2$ leads to higher standard errors.
 91 | 3.  The strength of the relationship between $x_1$ and $x_2$. Stronger relationship between $x_1$ and $x_2$ (higher $R^2$ of the regression of $x_1$ on $x_2$) leads to higher standard errors.
 92 | 
 93 | These arguments generalize to more than two predictors.
 94 | 
 95 | ### What do do about it? 
 96 | 
 97 | Multicollinearity is not an "error" in the model.
 98 | All you can do is:
 99 | 
100 | 1.  Get more data
101 | 2.  Find more conditional variation in the predictor of interest
102 | 
103 | What it means depends on what you are doing.
104 | 
105 | 1.  Prediction: then you are interested in $\hat{\Vec{y}}$ and not $\hat{\beta}}$ (or its standard errors).
106 |     In this case, multicollinearity is irrelevant.
107 |     
108 | 2.  Causal inference: in this case you are interested in $\hat{\Vec{\beta}}$.
109 |     Multicollinearity does not bias $\hat{\beta}$.
110 |     You should include all regressors to achieve balance, and include all relevant pre-treatment variables and not include post-treatment variables.
111 |     Multicollinearity is not directly relevant in this choice.
112 |     All multicollinearity means is that the variation in the treatment after accounting for selection effects is very low, making it hard to say anything about the treatment effect with that observational data.
113 |     More sophisticated methods may trade off some bias for a lower variance (e.g. shrinkage methods), but that must be done systematically, and not ad-hoc dropping relevant pre-treatment variables that simply correlate highly with your treatment variable.
114 | 
115 | 
116 | 
117 | 


--------------------------------------------------------------------------------
/docs/multicollinearity.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Collinearity and Multicollinearity
  3 | 
  4 | 
  5 | ```r
  6 | library("tidyverse")
  7 | library("carData")
  8 | ```
  9 | 
 10 | ## (Perfect) collinearity
 11 | 
 12 | In order to estimate unique $\hat{\beta}$ OLS requires the that the columns of the design matrix $\Vec{X}$ are linearly independent.
 13 | 
 14 | Common examples of groups of variables that are not linearly independent:
 15 | 
 16 | -   Categorical variables in which there is no excluded category.
 17 |     You can also include all categories of a categorical variable if you exclude the intercept.
 18 |     Note that although they are not (often) used in political science, there are other methods of transforming categorical variables to ensure the columns in the design matrix are independent.
 19 | 
 20 | -   A constant variable. This can happen in practice with dichotomous
 21 |     variables of rare events; if you drop some observations for whatever
 22 |     reason, you may end up dropping all the 1's in the data. So although the
 23 |     variable is not constant in the population, in your sample it is constant
 24 |     and cannot be included in the regression.
 25 | 
 26 | -   A variable that is a multiple of another variable. E.g. you cannot include $\log(\text{GDP in millions USD})$ and $\log({GDP in USD})$ since $\log(\text{GDP in millions USD}) = \log({GDP in USD}) / 1,000,000$.
 27 | 
 28 | -   A variable that is the sum of two other variables. E.g. you cannot include $\log(population)$, $\log(GDP)$, $\log(GDP per capita)$ in a regression since
 29 | $$\log(\text{GDP per capita}) = \log(\text{GDP} / \text{population}) = \log(\text{GDP}) - \log(\text{population})$$.
 30 | 
 31 | ## What to do about it?
 32 | 
 33 | R and most statistical programs will run regressions with collinear variables, but will drop variables until only linearly independent columns in $\Mat{X}$ remain.
 34 | 
 35 | For example, consider the following code. The variable `type` is a categorical variable with categories "bc", "wc", and "prof".
 36 | 
 37 | 
 38 | ```r
 39 | data(Duncan, package = "carData")
 40 | # Create dummy variables for each category
 41 | Duncan <- mutate(Duncan,
 42 |                  bc = type == "bc",
 43 |                  wc = type == "wc",
 44 |                  prof = type == "prof")
 45 | lm(prestige ~ bc + wc + prof, data = Duncan)
 46 | ```
 47 | 
 48 | ```
 49 | ## 
 50 | ## Call:
 51 | ## lm(formula = prestige ~ bc + wc + prof, data = Duncan)
 52 | ## 
 53 | ## Coefficients:
 54 | ## (Intercept)       bcTRUE       wcTRUE     profTRUE  
 55 | ##       80.44       -57.68       -43.78           NA
 56 | ```
 57 | R runs the regression, but coefficient and standard errors for `prof` are set to `NA`.
 58 | 
 59 | You should not rely on the software to fix this for you; once you (or the software) notices the problem check the reasons it occurred. The rewrite your regression to remove whatever was creating linearly dependent variables in $\Mat{X}$.
 60 | 
 61 | ## Multicollinearity
 62 | 
 63 | Multicollinearity is the (poor) name for less-than-perfect collinearity.
 64 | Even though there is enough variation in $\Mat{X}$ to estimate OLS coefficients, if some set of variables in $\Mat{X}$ is highly correlated it will result in large, but unbiased, standard errors on the estimates.
 65 | 
 66 | What happens if variables are not linearly dependent, but nevertheless highly correlated?
 67 | If $\Cor(\Vec{x}_1, vec{x}_2) = 1$, then they are linearly dependent and the regression cannot be estimated (see above).
 68 | But if $\Cor(\Vec{x}_1, vec{x}_2) = 0.99$, the OLS can estimate unique values of of $\hat\beta$. However, it everything was fine with OLS estimates until, suddenly, when there is linearly independence everything breaks. The answer is yes, and no.
 69 | As $|\Cor(\Vec{x}_1, \Vec{x}_2)| \to 1$ the standard errors on the coefficients of these variables increase, but OLS as an estimator works correctly; $\hat\beta$ and $\se{\hat\beta}$ are unbiased.
 70 | With multicollinearity, OLS gives you the "right" answer, but it cannot say much with certainty.
 71 | 
 72 | For a bivariate regression, the distribution of the slope coefficient has variance,
 73 | $$
 74 | \Var(\hat{\beta}_1) = \frac{\sigma_u^2}{\sum_{i = 1} (x_i - \bar{x})^2} .
 75 | $$
 76 | 
 77 | What affects the standard error of $\hat{\beta}$?
 78 | 
 79 | -   The error variance ($\sigma_u^2$). The higher the variance of the residuals, the higher the variance of the coefficients.
 80 | -   The variance of $\Vec{x}$. The lower variation in $\Mat{x}$, the bigger the standard errors of the slope.
 81 | 
 82 | Now consider a multiple regression,
 83 | $$
 84 | \Vec{y} = \beta_0 + \beta_1 \Vec{x}_1 + \beta_2 \Vec{x}_2 + u
 85 | $$
 86 | 
 87 | this becomes,
 88 | $$
 89 | \Var(\hat{\beta}_1) = \frac{\sigma_u^2}{(1 - R^2_1) \sum_{i = 1}^n (x_i - \bar{x})^2}
 90 | $$
 91 | where $R^2_1$ is the $R^2$ from the regression of $\Vec{x}_1$ on $\Vec{x}_2$,
 92 | $$
 93 | \Vec{x} = \hat{\delta}_0 + \hat{\delta}_1 \Vec{x}_2 .
 94 | $$
 95 | 
 96 | The factors affecting standard errors are
 97 | 
 98 | 1.  Error variance: higher residuals leads to higher standard errors.
 99 | 1.  Variance of $\Vec{x}_1$: lower variation in $\Vec{x}_2$ leads to higher standard errors.
100 | 1.  The strength of the relationship between $x_1$ and $x_2$. Stronger relationship between $x_1$ and $x_2$ (higher $R^2$ of the regression of $x_1$ on $x_2$) leads to higher standard errors.
101 | 
102 | These arguments generalize to more than two predictors.
103 | 
104 | ## What do do about it?
105 | 
106 | Multicollinearity is not an "error" in the model.
107 | All you can do is:
108 | 
109 | 1.  Get more data
110 | 1.  Find more conditional variation in the predictor of interest
111 | 
112 | What it means depends on what you are doing.
113 | 
114 | 1.  Prediction: then you are interested in $\hat{\Vec{y}}$ and not $\hat{\beta}}$ (or its standard errors).
115 |     In this case, multicollinearity is irrelevant.
116 | 
117 | 1.  Causal inference: in this case you are interested in $\hat{\Vec{\beta}}$.
118 |     Multicollinearity does not bias $\hat{\beta}$.
119 |     You should include all regressors to achieve balance, and include all relevant pre-treatment variables and not include post-treatment variables.
120 |     Multicollinearity is not directly relevant in this choice.
121 |     All multicollinearity means is that the variation in the treatment after accounting for selection effects is very low, making it hard to say anything about the treatment effect with that observational data.
122 |     More sophisticated methods may trade off some bias for a lower variance (e.g. shrinkage methods), but that must be done systematically, and not ad-hoc dropping relevant pre-treatment variables that simply correlate highly with your treatment variable.
123 | 


--------------------------------------------------------------------------------
/docs/presentation.md:
--------------------------------------------------------------------------------
1 | 
2 | # (PART) Presentation {-}
3 | 


--------------------------------------------------------------------------------
/docs/programming.md:
--------------------------------------------------------------------------------
1 | 
2 | # (PART) Programming {-}
3 | 


--------------------------------------------------------------------------------
/docs/rd.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # Regression Discontinuity
  3 | 
  4 | Summary: If there are thresholds whereby some observations receive the
  5 | treatment above it, other those below it do not, and those immediately above or
  6 | below that threshold are similar, we can use the difference of the outcome
  7 | between those just above and those just below the threshold to estimate the
  8 | causal effect of the treatment.
  9 | 
 10 | Suppose there is a running variable $x$ such that any person receives the treatment, $d$ if $x \geq a$ and does not if $x \leq a$,
 11 | $$
 12 | d = \begin{cases}
 13 | 1 & x \geq a \\
 14 | 0 & x < a
 15 | \end{cases}
 16 | $$
 17 | 
 18 | A simple regression discontinuity model is,
 19 | $$
 20 | \begin{aligned}[t]
 21 | y_i = \alpha + \beta x_i + \tau d_i + \gamma x_i d_i + \epsilon_i
 22 | \end{aligned}
 23 | $$
 24 | The local causal effect of the treatment at the discontinuity is $\tau$.
 25 | 
 26 | <div class="figure">
 27 | <img src="rd_files/figure-html/unnamed-chunk-2-1.svg" alt="Fake Example of a Regression Discontinuity. The difference at the threshold (50) is the effect of the treatment." width="672" />
 28 | <p class="caption">(\#fig:unnamed-chunk-2)Fake Example of a Regression Discontinuity. The difference at the threshold (50) is the effect of the treatment.</p>
 29 | </div>
 30 | 
 31 | However, there are several choices
 32 | 
 33 | -   Functional form of the trends before and after the discontinuity
 34 | -   The size of the window of observations before and after the trend which to compare.
 35 | 
 36 | How to choose?
 37 | 
 38 | -   parametric: chooses specific functional forms
 39 | -   non-parametric: uses flexible forms, and chooses a bandwidth [@ImbensKalyanaraman2011a]
 40 | 
 41 | Sharp vs. Fuzzy Discontinuity?
 42 | 
 43 | -   Sharp: the assignment of the treatment occurs with certainty at the threshold.
 44 | -   Fuzzy: the assignment of the treatment occurs only probabilistically at the threshold.
 45 | 
 46 | Suppose that the causal effect of treatment $T \in \{0, 1\}$ on unit $i$ is $\tau_i = Y_i(1) - Y_i(0)$ where $Y_i(1)$ is the potential outcome of $i$ under the treatment and $Y_i(0)$ is the potential outcome of $i$ under the control.
 47 | If potential outcomes are distributed smoothly at the cut-point $c$, then the average causal effect of the treatment at the cut-point, $Z_i = c$:
 48 | $$
 49 | \tau_{RD} = \E[Y_{i}(1) - Y_i(0)| Z_i = c] = \lim_{Z_i \downarrow c}\E[Y_{i}(1) | Z_i = c] - \lim_{Z_i \uparrow c}\E[Y_i(0)| Z_i = c]
 50 | $$
 51 | 
 52 | An advantage of RD designs is that unlike selection on observables or IV, its identifying assumptions are more observable and testable.
 53 | 
 54 | There are two basic tests (@LeeLemieux2010a):
 55 | 
 56 | 1.  Continuity of pre-treatment covariates. E.g. density test of McCrary (2008). Whether the ratio of treated to control units departs from chance.
 57 |     A difficulty is that balance only holds in the limit, and covariance balance may still be present in finite samples.
 58 | 
 59 | 1.  Irrelevance of covariates to the treatment-outcome relationship. There should be no systematic association between covariates and treatment, so controlling for them shouldn't affect the estimates.
 60 | 
 61 | ## Examples
 62 | 
 63 | -   @ThistlethwaiteCampbell1960a was the first example of RD.
 64 | 
 65 |     -   Outcome: Career choices in teaching  
 66 |     -   Running variable: PSAS scores
 67 |     -   Cutoff: receiving National Merit Finalist
 68 |     -   Discussed: @AngristPischke2014a [Ch 4]
 69 | 
 70 | -   @CarpenterDobkin2011a, @CarpenterDobkin2009a
 71 | 
 72 |     -   Running variable: age
 73 |     -   Cutoff: ability to drink alcohol legally
 74 |     -   Outcome: Death, accidents
 75 |     -   Discussed: @AngristPischke2014a [Ch 4]
 76 | 
 77 | -   @AbdulkadirogluAngristPathak2014a
 78 | 
 79 |     -   Running variable: exam score
 80 |     -   Cutoff: above threshold receive an offer from a school. This is fuzzy since not all those who receive the offer attend.
 81 |     -   Outcome: Educational outcomes
 82 |     -   Discussed: @AngristPischke2014a [Ch 4]
 83 | 
 84 | -   @EggersHainmueller2009a
 85 | 
 86 |     -   units: UK MPs
 87 |     -   outcome: personal wealth
 88 |     -   treatment: winning an election (holding office)
 89 |     -   running variable: vote share
 90 | 
 91 | -   @LitschigMorrison2013a
 92 | 
 93 |     -   units: Brazilian municipalities
 94 |     -   outcome: education, literacy, poverty rate
 95 |     -   treatment: receiving a cash transfer from the central government (there are population cutoffs)
 96 |     -   running variable: population
 97 | 
 98 | -   @GelmanHill2007a [p. 213-217]
 99 | 
100 |     -   units: US Congressional members
101 |     -   outcome: ideology of representative
102 |     -   treatment: winning election
103 |     -   running variable: vote share
104 | 
105 | -   @GelmanKatz2007a, @GelmanHill2007a [p. 232]
106 | 
107 |     -   units: patients
108 |     -   outcome: length of hospital stay
109 |     -   treatment: new surgery method
110 |     -   cutoff: not performed on those over 80
111 |     -   running variable: age
112 | 
113 | -   @LeeMorettiButler2004a. Also see derived examples in @Bailey2016a [Ex. 6.3]. See @Button2015a for a replication.
114 | 
115 |     -   units: congressional districts
116 |     -   outcome: ideology of nominees
117 |     -   treatment: election
118 |     -   running variable: vote share
119 | 
120 | -   @JacobLefgren2004a
121 | 
122 |     -   units: students
123 |     -   outcome: education achievement
124 |     -   treatment: summer school, retention
125 |     -   running variable: standardized test
126 | 
127 | ## Example: Close Elections
128 | 
129 | A common use of RD in political science and econ is election outcomes.
130 | In this case the "treatment" is winning the election; it is applied to the candidate whose vote exceeds the threshold of 50%, but not to candidates arbitrarily below that threshold.
131 | Thus "close" elections are a common use of RD designs.
132 | This design was formalized in @Lee2008a.
133 | 
134 | Several papers question whether close elections satisfy the assumptions of RD:
135 | 
136 | -   @CaugheySekhon2011a look at US House elections (1942-2008). They find that close elections are more imbalanced. They attribute this to national partisan waves.
137 | -   @GrimmerHershFeinsteinEtAl2011a look at all US House elections 1880-2008. They find that structurally advantaged candidates (strong party, incumbents) are more likely to win close elections.
138 | 
139 | The ways in which close elections can be non-random are lawsuit challenges and fraud.
140 | 
141 | @EggersFowlerHainmuellerEtAl2014a addresses these concerns with a systematic review of 40,000 close elections:  "U.S. House in other time periods, statewide, state legislative, and mayoral races in the U.S. and national or local elections in nine other countries"
142 | Only the US House appears to have these issues.
143 | 
144 | ## Software
145 | 
146 | See the R packages
147 | 
148 | -   **[rddtools](https://cran.r-project.org/package=rddtools)**: a new and fairly complete package of regression discontinuity from primary data viz to other tests.
149 | -   **[rdd](https://cran.r-project.org/package=rdd)**
150 | -   **[rdrobust](https://cran.r-project.org/package=rdrobust)**: Tools for data-driven graphical and analytical statistical inference in RD.
151 | -   **[rdpower](https://cran.r-project.org/package=rdpower)**: Calculate power for RD designs.
152 | -   **[rdmulti](https://cran.r-project.org/package=rdmulti)**: Analyze designs with multiple cutoffs.
153 | 
154 | See entries in the [Econometrics](https://cran.r-project.org/web/views/Econometrics.html) task view.
155 | 
156 | ## References
157 | 
158 | Textbooks and Reviews:
159 | 
160 | -   @AngristPischke2014a [Ch. 4]
161 | -   @GelmanHill2007a [Sec. 10.4]
162 | -   @Bailey2016a [Ch. 11]
163 | -   @LindenAdamsRoberts2006a for applications to medicine
164 | -   @HahnToddKlaauw2001a An early review of RD in economics
165 | 
166 | Methods:
167 | 
168 | -   @ImbensKalyanaraman2011a propose an optimal bandwidth selection method
169 | 


--------------------------------------------------------------------------------
/docs/references-3.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/intro-methods-notes/4c1342aa322c728ad21bbfaa2eeade554cb79b6c/docs/references-3.html


--------------------------------------------------------------------------------
/docs/references.md:
--------------------------------------------------------------------------------
1 | 
2 | # References {-}
3 | 


--------------------------------------------------------------------------------
/docs/reproducible-research.md:
--------------------------------------------------------------------------------
1 | 
2 | # Reproducible Research
3 | 


--------------------------------------------------------------------------------
/docs/reproducible_research.md:
--------------------------------------------------------------------------------
1 | 
2 | # Reproducible Research
3 | 


--------------------------------------------------------------------------------
/docs/word-processing.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Typesetting and Word Processing Programs
 3 | 
 4 | ## LaTeX
 5 | 
 6 | [LaTeX](https://en.wikipedia.org/wiki/LaTeX) is a document markup language (think something like HTML) that is widely used in academia.[^pronunciation]
 7 | Its primary advantages over Word (and word processors) are the separation of content and presentation and its formatting of mathematical equations.
 8 | In addition to papers, it is often used for academic slides; many talk slides are prepared with beamer.
 9 | 
10 | ### Learning LaTeX
11 | 
12 | Here are some links to get started learning LaTeX:
13 | 
14 | -   [Overleaf Free & Interactive Online Introduction to LaTeX](https://www.overleaf.com/latex/learn/free-online-introduction-to-latex-part-1)
15 | -   [LaTeX Tutorial](https://www.latex-tutorial.com/tutorials/) has interactive lessons
16 | -   [ShareLaTeX Documentation](https://www.sharelatex.com/learn/)
17 | -   [Overleaf Example Templates](https://www.overleaf.com/latex/templates/) has many different examples of LaTeX documents.
18 | -   [LaTeX Wikibook](https://en.wikibooks.org/wiki/LaTeX)
19 | -   [Not So Short Introduction to LaTeX](https://tobi.oetiker.ch/lshort/lshort.pdf) is a classic, but not as as new-user friendly as the others.
20 | 
21 | ### Using LaTeX
22 | 
23 | -   Use an online service such as [Overleaf](https://www.overleaf.com/) or [ShareLaTeX](https://www.sharelatex.com/). These are great for collaboration, but become inflexible
24 |     when you want to customize your workflow.
25 | 
26 | -   Write it with a specialized editor such as [TeXmaker](http://www.xm1math.net/texmaker/), [TeXStudio](http://www.texstudio.org/), or [TeXshop](http://pages.uoregon.edu/koch/texshop/). These generally have
27 |     built ways to insert text, and also live preview. I would stay away from editors such as [LyX](https://www.lyx.org/) that are [WYSIWYG](https://en.wikipedia.org/wiki/WYSIWYG).
28 | 
29 | -   Write it with an general purpose editor such as [Atom](https://atom.io/) or [Sublime Text](https://www.sublimetext.com/).[^1] Most editors have a plugin
30 |    to make writing LaTeX easier. For Atom there is [LaTeXTools](https://atom.io/packages/latextools), and for Sublime Text, [LaTeXTools](https://github.com/SublimeText/LaTeXTools)
31 | 
32 | [^1]: And of course [Vim](http://www.vim.org/) or [Emacs](https://www.gnu.org/software/emacs/).
33 | 
34 | ### LaTeX with R
35 | 
36 | This is pretty easy. Rnw, also called Sweave, documents allow you to mix R chunks with LaTeX.
37 | This is similar to R markdown, but with LaTeX instead of markdown.[^2]
38 | 
39 | Many packages, such as [xtable](https://cran.r-project.org/package=xtable), [stargazer](ttps://cran.r-project.org/package=stargazer), or [texreg](ttps://cran.r-project.org/package=texreg)  produce formatted output in LaTeX.
40 | When you use these programs, do not copy and paste the output. Instead, save it to a file,
41 | and use `\input{}` to include the contents in your document.
42 | 
43 | [^2]: And [Sweave](https://www.statistik.lmu.de/~leisch/Sweave/) files preceded R markdown and knitr by many years.
44 | 
45 | ## Word
46 | 
47 | While I use LaTeX in my own work, Microsoft Word is powerful piece of software,
48 | and many of the complaints against Word come down to not being aware of its
49 | features. There are many tools you can use to build your research paper;
50 | whatever tool you use, learn how to use it proficiently.
51 | 
52 | ### General Advice
53 | 
54 | This guide on using [Microsoft Word for Dissertations](http://guides.lib.umich.edu/c.php?g=283073&p=1886001)
55 | covers everything and more that I would have. Also see [this](http://www3.nd.edu/~shill2/dtclass/word_2013_word_for_research_projects.pdf)
56 | 
57 | -   [separate presentation and content](https://en.wikipedia.org/wiki/Separation_of_presentation_and_content) using styles
58 | 
59 | -   Automatically number figures and tables
60 | 
61 | -   Use a reference manager like [Mendeley](https://www.mendeley.com/), [Zotero](https://www.zotero.org/), [colwiz](https://www.colwiz.com/app), or [Papers](http://www.papersapp.com/). They have plugins for citations in Word.
62 | 
63 | -   When exporting figures for Word, if you must use a [raster graphic](https://en.wikipedia.org/wiki/Raster_graphics) use PNG files (not JPEG). For publication, use a high DPI (600) with PNG graphics.
64 | 
65 | -   Learn to use *Fields*. You can insert figures from files that you can
66 |     update using `Insert > Field > Links and References > IncludePicture`.
67 |     This is useful for programmatically generating figures to insert into
68 |     your document. Likewise, you can insert text from files that you can
69 |     update using `Insert > Field > Links and References > IncludeText`.
70 | 
71 | ### Using R with Word
72 | 
73 | For a dynamic reports you can use [R Markdown](http://rmarkdown.rstudio.com/word_document_format.html) and export to a word document. When doing this, use a reference document to set the the styles that you will use.
74 | See [Happy collaboration with Rmd to docx](http://rmarkdown.rstudio.com/articles_docx.html) for more advice on using R Markdown with Word.
75 | 
76 | When using functions from packages such as [xtable](https://cran.r-project.org/package=xtable), [stargazer](ttps://cran.r-project.org/package=stargazer), or [texreg](ttps://cran.r-project.org/package=texreg) output  HTML, which can be copy and pasted into word.
77 | 
78 | Finally, the [ReporteR](http://davidgohel.github.io/ReporteRs/word.html) package is an alternative method to generate Word Documents from R.
79 | 
80 | [^pronunciation]: TeX is pronounced as "teck" because the X is a Greek chi. The pronunciation of of LaTeX is thus lah-teck or lay-teck. It is not
81 |     pronounced like the rubber compound. See this [StackExchange](http://tex.stackexchange.com/questions/17502/what-is-the-correct-pronunciation-of-tex-and-latex) question on the pronunciation of LaTeX.
82 | 


--------------------------------------------------------------------------------
/docs/writing.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Writing Resources
 3 | 
 4 | ## Writing and Organizing Papers
 5 | 
 6 | -   Chris Adolph. [Writing Empirical Papers: 6 Rules & 12 Recommendations](http://faculty.washington.edu/cadolph/503/papers.pdf)
 7 | 
 8 | -   Barry R. Weingast. 2015. [CalTech Rules for Writing Papers: How to Structure Your Paper and Write an Introduction](https://web.stanford.edu/group/mcnollgast/cgi-bin/wordpress/wp-content/uploads/2013/10/CALTECH.RUL_..pdf)
 9 | 
10 | -   [The Science of Scientific Writing](http://www.americanscientist.org/issues/id.877,y.0,no.,content.true,page.1,css.print/issue.aspx) *American Scientist*
11 | 
12 | -   Deidre McCloskey. [Economical Writing](http://www.amazon.com/Economical-Writing-Deirdre-McCloskey/dp/1577660633/)
13 | 
14 | -   William Thompson. [A Guide for the Young Economist](http://www.amazon.com/Guide-Young-Economist-MIT-Press/dp/026251589X). "Chapter 2: Writing Papers."
15 | 
16 | -   Stephen Van Evera. [Guide to Methods for Students of Political Science](http://www.amazon.com/Guide-Methods-Students-Political-Science/dp/080148457X). Appendix.
17 | 
18 | -   Joseph M. Williams and Joseph Bizup. [Style: Lessons in Clarity and Grace](http://www.amazon.com/dp/0321898680/)
19 | 
20 | -   Strunk and White. *The Elements of Style*
21 | 
22 | -   [Chicago Manual of Style](http://www.chicagomanualofstyle.org/) and [APSA Style Manual for Political Science](http://www.apsanet.org/Portals/54/APSA%20Files/publications/APSAStyleManual2006.pdf) for editorial and style issues.
23 | 
24 | -   [How to construct a Nature summary paragraph](http://www.nature.com/nature/authors/gta/Letter_bold_para.doc). Though specific to *Nature*, it provides good advice for structuring abstracts or introductions.
25 | 
26 | -   Ezra Klein. [How researchers are terrible communications, and how they can do better](http://chrisblattman.com/2015/11/05/ezra-klein-how-researchers-are-terrible-communicators-and-how-they-can-do-better/).
27 | 
28 | -   The advice in the *AJPS* [Instructions for Submitting Authors](http://ajps.org/guidelines-for-manuscripts/) is a concise description of how to write an abstract:
29 | 
30 |     > The abstract should provide a very concise descriptive summary of the research stream to which the manuscript contributes, the specific research
31 |     > topic it addresses, the research strategy employed for the analysis, the results obtained from the analysis, and the implications of the findings.
32 | 
33 | -   [Concrete Advice for Writing Informative Abstracts](http://connection.sagepub.com/blog/sage-connection/2014/05/15/concrete-advice-for-writing-informative-abstracts/) and [How to Carefully Choose Useless Titles for Academic Writing](http://www.socialsciencespace.com/2014/03/how-to-carefully-choose-useless-titles-for-academic-writing/)
34 | 
35 | ## Finding Research Ideas
36 | 
37 | -   Paul Krugman [How I Work](http://web.mit.edu/krugman/www/howiwork.html)
38 | -   Hal Varian. [How to build an Economic Model in your spare time](http://people.ischool.berkeley.edu/~hal/Papers/how.pdf)
39 | -   Greg Mankiw, [My Rules of Thumb](http://faculty.som.yale.edu/jameschoi/mankiw_tips.pdf):
40 | -   The links in [Advice for Grad Students](http://gregmankiw.blogspot.com/2006/05/advice-for-grad-students.html)
41 | 
42 | ## Replications
43 | 
44 | Gary King has advice on how to turn a replication into a publishable paper:
45 | 
46 | -   Gary King [How to Write a Publishable Paper as a Class Project](http://gking.harvard.edu/papers)
47 | 
48 | -   Gary King. 2006. "[Publication, Publication.](http://gking.harvard.edu/files/abs/paperspub-abs.shtml)" *PS: Political Science and Politics*.
49 | 
50 | -   [Political Science Should Not Stop Young Researchers from Replicating](https://politicalsciencereplication.wordpress.com/2015/06/15/political-science-should-not-stop-young-researchers-from-replicating/)
51 |     from the [Political Science Replication](https://politicalsciencereplication.wordpress.com) blog.
52 | 
53 | And see the examples of students replications from his Harvard course at <https://politicalsciencereplication.wordpress.com/>.
54 | 
55 | Famous replications.
56 | 
57 | -   "Irregularities in LaCour (2014) [@BroockmanKallaAronow2015a]
58 | -   "Does High Public Debt Consistently Stifle Economic Growth? A Critique of Reinhart and Rogoff." [@HerndonAshPollin2013a]
59 | 
60 | However, although those replications are famous for finding fraud or obvious
61 | errors in the analysis, replications can lead to extensions and generate new
62 | ideas. This was the intent of @BroockmanKallaAronow2015a when starting the
63 | replication.
64 | 


--------------------------------------------------------------------------------
/eda.Rmd:
--------------------------------------------------------------------------------
1 | # (PART) Exploratory Data Analysis {-}
2 | 


--------------------------------------------------------------------------------
/img/1000px-Coefficient_of_Determination.svg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/intro-methods-notes/4c1342aa322c728ad21bbfaa2eeade554cb79b6c/img/1000px-Coefficient_of_Determination.svg.png


--------------------------------------------------------------------------------
/img/islr-fig-6.7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/intro-methods-notes/4c1342aa322c728ad21bbfaa2eeade554cb79b6c/img/islr-fig-6.7.png


--------------------------------------------------------------------------------
/img/laffer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/intro-methods-notes/4c1342aa322c728ad21bbfaa2eeade554cb79b6c/img/laffer.png


--------------------------------------------------------------------------------
/img/tobias-funke-blue.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/intro-methods-notes/4c1342aa322c728ad21bbfaa2eeade554cb79b6c/img/tobias-funke-blue.jpeg


--------------------------------------------------------------------------------
/includes/after_body.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/intro-methods-notes/4c1342aa322c728ad21bbfaa2eeade554cb79b6c/includes/after_body.html


--------------------------------------------------------------------------------
/includes/before_body.html:
--------------------------------------------------------------------------------
 1 | \[
 2 | \DeclareMathOperator{\E}{E}
 3 | \DeclareMathOperator{\mean}{mean}
 4 | \DeclareMathOperator{\Var}{Var}
 5 | \DeclareMathOperator{\Cov}{Cov}
 6 | \DeclareMathOperator{\Cor}{Cor}
 7 | \DeclareMathOperator{\Bias}{Bias}
 8 | \DeclareMathOperator{\MSE}{MSE}
 9 | \DeclareMathOperator{\RMSE}{RMSE}
10 | \DeclareMathOperator{\sd}{sd}
11 | \DeclareMathOperator{\se}{se}
12 | \DeclareMathOperator{\rank}{rank}
13 | \DeclareMathOperator*{\argmin}{arg\,min}
14 | \DeclareMathOperator*{\argmax}{arg\,max}
15 | 
16 | \newcommand{\Mat}[1]{\boldsymbol{#1}}
17 | \newcommand{\Vec}[1]{\boldsymbol{#1}}
18 | \newcommand{\T}{'}
19 | 
20 | \newcommand{\distr}[1]{\mathcal{#1}}
21 | \newcommand{\dnorm}{\distr{N}}
22 | \newcommand{\dmvnorm}[1]{\distr{N}_{#1}}
23 | \newcommand{\dt}[1]{\distr{T}_{#1}}
24 | 
25 | \newcommand{\cia}{\perp\!\!\!\perp}
26 | \DeclareMathOperator*{\plim}{plim}
27 | \]
28 | 


--------------------------------------------------------------------------------
/includes/in_header.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/intro-methods-notes/4c1342aa322c728ad21bbfaa2eeade554cb79b6c/includes/in_header.html


--------------------------------------------------------------------------------
/includes/preamble.tex:
--------------------------------------------------------------------------------
 1 | \usepackage{booktabs}
 2 | 
 3 | \DeclareMathOperator{\E}{E}
 4 | \DeclareMathOperator{\mean}{mean}
 5 | \DeclareMathOperator{\Var}{Var}
 6 | \DeclareMathOperator{\Cov}{Cov}
 7 | \DeclareMathOperator{\Cor}{Cor}
 8 | \DeclareMathOperator{\Bias}{Bias}
 9 | \DeclareMathOperator{\MSE}{MSE}
10 | \DeclareMathOperator{\sd}{sd}
11 | \DeclareMathOperator{\se}{se}
12 | \DeclareMathOperator{\rank}{rank}
13 | \DeclareMathOperator*{\argmin}{arg\,min}
14 | \DeclareMathOperator*{\argmax}{arg\,max}
15 | 
16 | \newcommand{\mat}[1]{\boldsymbol{#1}}
17 | \renewcommand{\vec}[1]{\boldsymbol{#1}}
18 | \renewcommand{\T}{'}
19 | 
20 | \newcommand{\distr}[1]{\mathcal{#1}}
21 | \newcommand{\dnorm}{\distr{N}}
22 | \newcommand{\dmvnorm}[1]{\distr{N}_{#1}}
23 | \newcommand{\dt}[1]{\distr{T}_{#1}}
24 | 
25 | \newcommand{\cia}{\perp\!\!\!\perp}
26 | \DeclareMathOperator*{\plim}{plim}
27 | 


--------------------------------------------------------------------------------
/index.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Data Analysis Notes"
 3 | author: "Jeffrey B. Arnold"
 4 | date: "`r Sys.Date()`"
 5 | knit: "bookdown::render_book"
 6 | bibliography: ["intromethods.bib"]
 7 | biblio-style: "apalike"
 8 | link-citations: true
 9 | documentclass: book
10 | colorlinks: yes
11 | lot: yes
12 | lof: yes
13 | monofont: "Source Code Pro"
14 | monofontoptions: "Scale=0.7"
15 | site: bookdown::bookdown_site
16 | github-repo: jrnold/intro-methods-notes
17 | description: >
18 |   These are notes associated with the course, POLS/CS&SS 503: Advanced Quantitative Political Methodology at the University of Washington.
19 | ---
20 | 
21 | # Introduction
22 | 
23 | Notes used when teaching "POLS/CS&SS 501: Advanced Political Research Design and Analysis" and "POLS/CS&SS 503: Advanced Quantitative Political Methodology" at the University of Washington.
24 | 
25 | <!-- Dummy math to ensure that math equations always occur -->
26 | $$
27 | $$
28 | 


--------------------------------------------------------------------------------
/intro-methods-notes.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: No
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Makefile
19 | 


--------------------------------------------------------------------------------
/linear-regression.Rmd:
--------------------------------------------------------------------------------
1 | # (PART) Linear Regression {-}
2 | 


--------------------------------------------------------------------------------
/model-fit.Rmd:
--------------------------------------------------------------------------------
 1 | # Model Fit
 2 | 
 3 | ## Sums of Squares
 4 | 
 5 | There are several "sums of squares" that are important for regression.
 6 | 
 7 | | RSS | Residual Sum of squares | $\sum_{i = 1} (\hat{y}_i - y_i)^2$ |
 8 | | MSS | $\sum_{i = 1} (\hat{y}_i - y_i)^2$ |
 9 | | TSS | Total Sum of squares | $\sum_{i = 1} (y_i - \mean{y}_i)^2$ |
10 | 
11 | -   The *residual sum of squares* is the total error of the model. How much of $y$ is not explained[^causal] by $x$?
12 | -   The *model sum of squares* is the total difference between the regression line and the mean of $y$. How much of $y$ is explained by $x$?
13 | -   The *total sum of squares* is the total variation (numerator of the variance of $y$) in $y$ unconditional of $x$. How much of $y$ is there to explain?
14 | 
15 | $$
16 | \begin{aligned}[t]
17 | TSS = MSS + RSS \\
18 | \text{total variation} = \text{variation explained by model} + \text{remaining variation}
19 | \end{aligned}
20 | $$
21 | 
22 | These terms have many names ... some of which conflict with each other.
23 | 
24 | -   RSS (Residual sum of squares)
25 | 
26 |     -   SSE (Sum of squared errors)
27 |     -   SSR (Sum of squared residuals)
28 | 
29 | -   MSS (Model sum of squares)
30 | 
31 |     -   RSS (Regression sum of squares)
32 | 
33 | -   TSS (Total sum of squares)
34 | 
35 | The OLS variance decomposition is
36 | 
37 | $$
38 | TSS = RSS + 
39 | $$
40 | 
41 | ## Regression Standard Error
42 | 
43 | The regression standard error for a linear regression with $n$ observations and $k$ variables is.
44 | $$
45 | \hat{\sigma} = \frac{\sum_{i= 1} \hat{\epsilon}_i}{n - k - 1}
46 | $$
47 | The $n - k - 1$ denominator is the *regression degrees of freedom*. 
48 | Since we have already estimated $k$ slope coefficients and the intercept, there are only $n - k - 1$ values left to estimate the regression standard error.
49 | 
50 | But recall regression standard error is an estimator for the population $\sigma$, for the population model,
51 | $$
52 | Y = X \beta + \epsilon
53 | $$
54 | where $\E(\epsilon) = 0$ and $\Var(\epsilon) = \sigma^2$.
55 | The $n - k - 1$ denominator is needed for the estimator (of the variance) to be unbiased. 
56 | 
57 | ## (Root) Mean Squared Error
58 | 
59 | The statistic mean squared error (MSE) is,
60 | $$
61 | MSE(\hat{\epsilon}) = \frac{1}{n} \sum_{i = 1}^n \hat{\epsilon}_i^2 .
62 | $$
63 | 
64 | Unlike $\hat{\sigma}$ the denominator is $n$, not $n - k - 1$. 
65 | This is because the MSE is used as a descriptive statistic of the sample rather than as an estimator of a population value.
66 | 
67 | The MSE is not on the same scale as $y$, so often the root mean squared error (RMSE) is used,
68 | $$
69 | RMSE(\hat{\epsilon}) = \sqrt{MSE(\hat{\epsilon})}.
70 | $$
71 | 
72 | Both MSE and RMSE are also often used as out-of-sample model fit measures in cross-validation.
73 | 
74 | [^causal]: Where "explained" is in **no** way causal. In this case explained means the difference in variation in one variable after conditioning on another variable.
75 | 
76 | ## R-squared
77 | 
78 | R squared is also called the **coefficient of determination**.
79 | 
80 | $$
81 | \begin{aligned}[t]
82 | R^2 &= \frac{MSS}{TSS} = 1 - \frac{RSS}{TSS} \\
83 | &= \frac{\text{model variance}}{\text{total variance}} \\
84 | &= 1 - \frac{\text{residual variance}}{\text{total variance}} \\
85 | &= \text{fraction of variance explained}
86 | \end{aligned}
87 | $$
88 | 
89 | -   R-squared is so called because for a bivariate regression the $R^2$ is the square of the correlation coefficient ($r$).
90 | 
91 | There are a large [number](https://stats.stackexchange.com/questions/13314/is-r2-useful-or-dangerous) of rants about the dangers of focusing on $R^2$.
92 | 
93 | ```{r echo=FALSE}
94 | knitr::include_graphics("")
95 | ```
96 | 


--------------------------------------------------------------------------------
/multicollinearity.Rmd:
--------------------------------------------------------------------------------
  1 | # Collinearity and Multicollinearity
  2 | 
  3 | ```{r}
  4 | library("tidyverse")
  5 | library("carData")
  6 | ```
  7 | 
  8 | ## (Perfect) collinearity
  9 | 
 10 | In order to estimate unique $\hat{\beta}$ OLS requires the that the columns of the design matrix $\Vec{X}$ are linearly independent.
 11 | 
 12 | Common examples of groups of variables that are not linearly independent:
 13 | 
 14 | -   Categorical variables in which there is no excluded category.
 15 |     You can also include all categories of a categorical variable if you exclude the intercept.
 16 |     Note that although they are not (often) used in political science, there are other methods of transforming categorical variables to ensure the columns in the design matrix are independent.
 17 | 
 18 | -   A constant variable. This can happen in practice with dichotomous
 19 |     variables of rare events; if you drop some observations for whatever
 20 |     reason, you may end up dropping all the 1's in the data. So although the
 21 |     variable is not constant in the population, in your sample it is constant
 22 |     and cannot be included in the regression.
 23 | 
 24 | -   A variable that is a multiple of another variable. E.g. you cannot include $\log(\text{GDP in millions USD})$ and $\log({GDP in USD})$ since $\log(\text{GDP in millions USD}) = \log({GDP in USD}) / 1,000,000$.
 25 | 
 26 | -   A variable that is the sum of two other variables. E.g. you cannot include $\log(population)$, $\log(GDP)$, $\log(GDP per capita)$ in a regression since
 27 | $$\log(\text{GDP per capita}) = \log(\text{GDP} / \text{population}) = \log(\text{GDP}) - \log(\text{population})$$.
 28 | 
 29 | ## What to do about it?
 30 | 
 31 | R and most statistical programs will run regressions with collinear variables, but will drop variables until only linearly independent columns in $\Mat{X}$ remain.
 32 | 
 33 | For example, consider the following code. The variable `type` is a categorical variable with categories "bc", "wc", and "prof".
 34 | 
 35 | ```{r}
 36 | data(Duncan, package = "carData")
 37 | # Create dummy variables for each category
 38 | Duncan <- mutate(Duncan,
 39 |                  bc = type == "bc",
 40 |                  wc = type == "wc",
 41 |                  prof = type == "prof")
 42 | lm(prestige ~ bc + wc + prof, data = Duncan)
 43 | ```
 44 | R runs the regression, but coefficient and standard errors for `prof` are set to `NA`.
 45 | 
 46 | You should not rely on the software to fix this for you; once you (or the software) notices the problem check the reasons it occurred. The rewrite your regression to remove whatever was creating linearly dependent variables in $\Mat{X}$.
 47 | 
 48 | ## Multicollinearity
 49 | 
 50 | Multicollinearity is the (poor) name for less-than-perfect collinearity.
 51 | Even though there is enough variation in $\Mat{X}$ to estimate OLS coefficients, if some set of variables in $\Mat{X}$ is highly correlated it will result in large, but unbiased, standard errors on the estimates.
 52 | 
 53 | What happens if variables are not linearly dependent, but nevertheless highly correlated?
 54 | If $\Cor(\Vec{x}_1, vec{x}_2) = 1$, then they are linearly dependent and the regression cannot be estimated (see above).
 55 | But if $\Cor(\Vec{x}_1, vec{x}_2) = 0.99$, the OLS can estimate unique values of of $\hat\beta$. However, it everything was fine with OLS estimates until, suddenly, when there is linearly independence everything breaks. The answer is yes, and no.
 56 | As $|\Cor(\Vec{x}_1, \Vec{x}_2)| \to 1$ the standard errors on the coefficients of these variables increase, but OLS as an estimator works correctly; $\hat\beta$ and $\se{\hat\beta}$ are unbiased.
 57 | With multicollinearity, OLS gives you the "right" answer, but it cannot say much with certainty.
 58 | 
 59 | For a bivariate regression, the distribution of the slope coefficient has variance,
 60 | $$
 61 | \Var(\hat{\beta}_1) = \frac{\sigma_u^2}{\sum_{i = 1} (x_i - \bar{x})^2} .
 62 | $$
 63 | 
 64 | What affects the standard error of $\hat{\beta}$?
 65 | 
 66 | -   The error variance ($\sigma_u^2$). The higher the variance of the residuals, the higher the variance of the coefficients.
 67 | -   The variance of $\Vec{x}$. The lower variation in $\Mat{x}$, the bigger the standard errors of the slope.
 68 | 
 69 | Now consider a multiple regression,
 70 | $$
 71 | \Vec{y} = \beta_0 + \beta_1 \Vec{x}_1 + \beta_2 \Vec{x}_2 + u
 72 | $$
 73 | 
 74 | this becomes,
 75 | $$
 76 | \Var(\hat{\beta}_1) = \frac{\sigma_u^2}{(1 - R^2_1) \sum_{i = 1}^n (x_i - \bar{x})^2}
 77 | $$
 78 | where $R^2_1$ is the $R^2$ from the regression of $\Vec{x}_1$ on $\Vec{x}_2$,
 79 | $$
 80 | \Vec{x} = \hat{\delta}_0 + \hat{\delta}_1 \Vec{x}_2 .
 81 | $$
 82 | 
 83 | The factors affecting standard errors are
 84 | 
 85 | 1.  Error variance: higher residuals leads to higher standard errors.
 86 | 1.  Variance of $\Vec{x}_1$: lower variation in $\Vec{x}_2$ leads to higher standard errors.
 87 | 1.  The strength of the relationship between $x_1$ and $x_2$. Stronger relationship between $x_1$ and $x_2$ (higher $R^2$ of the regression of $x_1$ on $x_2$) leads to higher standard errors.
 88 | 
 89 | These arguments generalize to more than two predictors.
 90 | 
 91 | ## What do do about it?
 92 | 
 93 | Multicollinearity is not an "error" in the model.
 94 | All you can do is:
 95 | 
 96 | 1.  Get more data
 97 | 1.  Find more conditional variation in the predictor of interest
 98 | 
 99 | What it means depends on what you are doing.
100 | 
101 | 1.  Prediction: then you are interested in $\hat{\Vec{y}}$ and not $\hat{\beta}}$ (or its standard errors).
102 |     In this case, multicollinearity is irrelevant.
103 | 
104 | 1.  Causal inference: in this case you are interested in $\hat{\Vec{\beta}}$.
105 |     Multicollinearity does not bias $\hat{\beta}$.
106 |     You should include all regressors to achieve balance, and include all relevant pre-treatment variables and not include post-treatment variables.
107 |     Multicollinearity is not directly relevant in this choice.
108 |     All multicollinearity means is that the variation in the treatment after accounting for selection effects is very low, making it hard to say anything about the treatment effect with that observational data.
109 |     More sophisticated methods may trade off some bias for a lower variance (e.g. shrinkage methods), but that must be done systematically, and not ad-hoc dropping relevant pre-treatment variables that simply correlate highly with your treatment variable.
110 | 


--------------------------------------------------------------------------------
/old-files/multicollinearity.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Collinearity and Multicollinearity
 3 | ---
 4 | 
 5 | # Collinearity and Multicollinearity
 6 | 
 7 | ## (Perfect) Collinearity
 8 | 
 9 | In order to estimate unique $\hat{\beta}$ OLS requires the that the columns of the design matrix $\Vec{X}$ are linearly independent.
10 | 
11 | Common examples of groups of variables that are not linearly independent:
12 | 
13 | - Categorical variables in which there is no excluded category.
14 |   You can also include all categories of a categorical variable if you exclude the intercept.
15 |   Note that although they are not (often) used in political science, there are other methods of transforming categorical variables to ensure the columns in the design matrix are independent.
16 | - A constant variable. This can happen in practice with dichotomous variables of rare events; if you drop some observations for whatever reason, you may end up dropping all the 1's in the data. So although the variable is not constant in the population, in your sample it is constant and cannot be included in the regression.
17 | - A variable that is a multiple of another variable. E.g. you cannot include $\log(\text{GDP in millions USD})$ and $\log({GDP in USD})$ since $\log(\text{GDP in millions USD}) = \log({GDP in USD}) / 1,000,000$. in
18 | - A variable that is the sum of two other variables. E.g. you cannot include $\log(population)$, $\log(GDP)$, $\log(GDP per capita)$ in a regression since
19 | $$\log(\text{GDP per capita}) = \log(\text{GDP} / \text{population}) = \log(\text{GDP}) - \log(\text{population})$$.
20 | 
21 | 
22 | #### What to do about it?
23 | 
24 | R and most statistical programs will run regressions with collinear variables, but will drop variables until only linearly independent columns in $\Mat{X}$ remain.
25 | 
26 | For example, consider the following code. The variable `type` is a categorical variable with categories "bc", "wc", and "prof".
27 | It will
28 | ```{r}
29 | data(Duncan, package = "car")
30 | # Create dummy variables for each category
31 | Duncan <- mutate(Duncan,
32 |                  bc = type == "bc",
33 |                  wc = type == "wc",
34 |                  prof = type == "prof")
35 | lm(prestige ~ bc + wc + prof, data = Duncan)
36 | ```
37 | R runs the regression, but coefficient and standard errors for `prof` are set to `NA`.
38 | 
39 | You should not rely on the software to fix this for you; once you (or the software) notices the problem check the reasons it occurred. The rewrite your regression to remove whatever was creating linearly dependent variables in $\Mat{X}$.
40 | 
41 | 
42 | 
43 | ## Multicollinearity
44 | 
45 | 
46 | *Insert plot of highly correlated variables and their coefficients.*
47 | 
48 | *Insert plot of uncorrelated variables and their coefficients.*
49 | 
50 | ### What to do about it?
51 | 
52 | Remember multicollinearity does not violate the assumptions of OLS. If all the other assumptions hold, then OLS is giving you unbiased coefficients and standard errors. What multicollinearity is indicating is that you may not be able to answer the question with the precision you would like.
53 | 
54 | 1.   If the variable(s) of interest are highly correlated with other variables, then it means that there is not enough variation, controlling for other factors. You may check that you are not controlling for "post-treatment" variables.  Dropping control variables if they are correctly included will bias your estimates. But otherwise, there is little you can do other than get more data. You could re-consider your research design and question. What does it mean if there is that little variation in the treatment variable after controlling for other factors?
55 | 2.   If control variables are highly correlated with each other, it does not matter. You should not be interpreting their coefficients, so their standard errors do not matter. In fact, controlling for several similar, but correlated variables, may be useful in order to offset measurement error in any one of them.
56 | 


--------------------------------------------------------------------------------
/old-files/ols-diagnostics-troubleshooting.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Regression Diagnostics"
 3 | ---
 4 | 
 5 | # Regression Diagnostics
 6 | 
 7 | Several packages in R provide large collections of regression diagnostics:
 8 | 
 9 | - [lmtest](https://cran.r-project.org/web/packages/lmtest/index.html)
10 | - [car](https://cran.r-project.org/web/packages/car/index.html)
11 | 
12 | Reading the vignettes or documentation of these packages is a good overview of available regression diagnostics.
13 | Also see the [Econometrics Task View](https://cran.r-project.org/web/views/Econometrics.html).
14 | 
15 | @Fox2016a has a particularly extensive overview of regression diagnostics.
16 | 
17 | Though for Stata, [this tutorial](http://www.ats.ucla.edu/stat/stata/webbooks/reg/chapter2/statareg2.htm) has an overview of many regression diagnostics.
18 | 


--------------------------------------------------------------------------------
/old-files/ols-inference.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: OLS Inference
 3 | ---
 4 | 
 5 | # OLS Inference
 6 | 
 7 | 
 8 | ## Sampling Distribution 
 9 | 
10 | The sampling distribution of the OLS parameters is
11 | $$
12 | \Vec{\beta} \sim \dmvnorm(\Vec{beta}, \sigma^2 (\Mat{X}' \Mat{X})^{-1}).
13 | $$
14 | Thus, the variance of the coefficients is
15 | $$
16 | \Var(\hat{\beta}) = \sigma^2 (\Mat{X}' \Mat{X})^{-1} .
17 | $$
18 | which is a symmetric matrix,
19 | $$
20 | \Var(\hat{\beta}) =
21 | \begin{bmatrix}
22 | \Var(\hat{\beta}_0) & \Cov(\hat{\beta}_0, \hat{\beta}_1) & \Cov(\hat{\beta}_0, \hat{\beta}_1) & \cdots & \Cov(\hat{\beta}_0, \hat{\beta}_K) \\
23 | \Cov(\hat{\beta}_0, \hat{\beta}_1) & \Var(\hat{\beta}_1) & \Cov(\hat{\beta}_1, \hat{\beta}_2) & \cdots & \Cov(\hat{\beta}_1, \hat{\beta}_K) \\
24 | \Cov(\hat{\beta}_0, \hat{\beta}_2) & \Cov(\hat{\beta}_1, \hat{\beta}_2) & \Cov(\hat{\beta}_2) & \cdots & \Cov(\hat{\beta}_2, \hat{\beta}_K) \\
25 | \vdots & \vdots & \vdots & \ddots & \vdots \\
26 | \Cov(\hat{\beta}_0, \hat{\beta}_K) & \Cov(\hat{\beta}_1, \hat{\beta}_K) & \Cov(\hat{\beta}_K) & \cdots & \Var( \hat{\beta}_k)
27 | \end{bmatrix}
28 | $$
29 | On the diagonal are the variances of the parameters, and the off-diagonal elements are the covariances of the parameters.
30 | 
31 | 
32 | ## t-tests for single parameters
33 | 
34 | The null hypothesis and alternative hypotheses for two-sided tests are,
35 | $$
36 | \begin{aligned}[t]
37 | H_0: &\beta_k = \beta_0 \\
38 | H_a: &\beta_k \neq \beta_0
39 | \end{aligned}
40 | $$
41 | 
42 | Then in large samples,
43 | $$
44 | \frac{\hat{\beta}_k - \beta_k}{\se(\widehat{\beta}_k)} \sim \dnorm(0, 1)
45 | $$
46 | In small samples,
47 | $$
48 | \frac{\hat{\beta}_k - \beta_k}{\se(\widehat{\beta}_k)} \sim \dt{N - (K + 1)}
49 | $$
50 | 
51 | 
52 | The estimated standard errors of $\hat{\beta}$ come from
53 | $$
54 | \begin{aligned}[t]
55 | \Var(\hat{\Vec{\beta}}) &= \hat{\sigma}^2 (\Mat{X}' \Mat{X})^{-1} \\
56 | \hat{\sigma}^2 &= \frac{\Vec{\epsilon}'\Vec{\epsilon}}{(N - (K + 1))}
57 | \end{aligned}
58 | $$
59 | 
60 | So, under the common null hypothesis test for $\beta_k = 0$,
61 | $$
62 | \frac{\hat{\beta}_k}{\se(\widehat{\beta}_k)} \sim \dt{N - (K + 1)}
63 | $$
64 | 
65 | And the confidence intervals for a $(1 - \alpha) \times 100$ confidence interval for  $\hat{\beta}_k$ are,
66 | $$
67 | \hat{\beta}_k \pm t^*_{\alpha / 2} \times \se(\hat{\beta}_K)
68 | $$
69 | where $t^*_{\alpha / 2}$ is the quantile of the $\dt{n - (K + 1)}$ distribution such that $P(T \leq t^*) > 1 - \alpha / 2$.
70 | 
71 | 
72 | ## F-tests of Multiple Hypotheses
73 | 
74 | TODO
75 | 
76 | ## Testing functions of coefficients
77 | 
78 | The standard error for non-linear functions of parameters can be approximated with the Delta method:
79 | $$
80 | \se(f(\Vec{\beta})) = 
81 | \left(\frac{d\,f(\Vec{\beta})}{d\,\Vec{beta}} \right)\T
82 | \Var{\Vec{\beta}}
83 | \left(\frac{d\,f(\Vec{\beta})}{d\,\Vec{beta}} \right) .
84 | $$
85 | 


--------------------------------------------------------------------------------
/old-files/ols-misc.Rmd:
--------------------------------------------------------------------------------
  1 | # Diagnostics and Troubleshooting
  2 | 
  3 | 
  4 | ## Omitted variables
  5 | 
  6 | - Problem: An omitted variables bias coefficients unless (1) their coefficient is zero, or (2) it is uncorrelated with the variable.
  7 | - Solutions: Control for those variables. When estimating a structural or causal effect, care needs to be taken to not include bad controls.
  8 | 
  9 | ### Simulations
 10 | 
 11 | 
 12 | ### What to do about it?
 13 | 
 14 | - Include more controls
 15 | - Estimate the possible bias of omitted variables
 16 | - Better design. Do not rely on selection on observables.
 17 | 
 18 | ### Examples
 19 | 
 20 | **TODO:** Find good examples. Perhaps examples of Simpson's Paradox.
 21 | 
 22 | ## Measurement Errors
 23 | 
 24 | - Problem: Measurement error in covariates biases regression coefficient towards zero, and makes it an imperfect control
 25 | - Solutions:
 26 |     - better measures
 27 |     - instrumental variable or structural equation models
 28 | 
 29 | A regression model allows for measurement error in the outcome variable, since measurement error uncorrelated with $X$ can be thought of as part of the residual $\varepsilon$.
 30 | 
 31 | However, measurement error in the covariates is a different issue.
 32 | Measurement error in a covariate biases its coefficient downward.
 33 | This is called **attenuation bias**. That covariate also acts as 
 34 | an imperfect control, which will bias other coefficients.
 35 | 
 36 | Suppose the population regression function is
 37 | $$
 38 | Y_i = \beta_0 + \beta_1 X_{i} + \varepsilon_i
 39 | $$
 40 | However, instead of $X_1$, you observe $\tilde{X}_1$, which is observed with measurement error,
 41 | $$
 42 | \tilde{X}_1 = X_{i} + \delta_i
 43 | $$
 44 | where $\delta_i$ is the *classical measurement error*, which is mean zero and uncorrelated with the covariates or regression disturbances,
 45 | $$
 46 | \begin{aligned}[t]
 47 | \E(\delta_i) &= 0 \\
 48 | \Cov(X_i, \delta_i) &= \Cov(\epsilon_i, \delta_i) = 0
 49 | \end{aligned}
 50 | $$
 51 | 
 52 | Measurement error in a variable $X$ has the following effects
 53 | 
 54 | - Biases its coefficient towards zero (attenuation biase)
 55 | - Biases the coefficients of other variables (that $X$ is correlated with) in unknown directions.
 56 | - Controlling for other variables *increases* the attenuation bias in $\beta$
 57 | 
 58 | **TODO** Fill in equations. See Wooldridge Ch 9 (p. 320-323), Mastering Metrics, p. 240; Fox, Ch 3. 
 59 | 
 60 | ### What can we do about it? 
 61 | 
 62 | - Instrumental variable models, and, more generally, structural equation models, can model the measurement error.
 63 | - Use measures that are more closely aligned with your concepts, have less error.
 64 | - Combine multiple measures in order to reduce measurement error
 65 | 
 66 | ### Simulations
 67 | 
 68 | 
 69 | 
 70 | 
 71 | 
 72 | ### Example
 73 | 
 74 | **TODO:** Need example of measurement error in political science.
 75 | 
 76 | ### References
 77 | 
 78 | - Mastering Metrics, Ch 6. p. 240.
 79 | - Fox, Ch 6.4. p. 112.
 80 | - Kennedy (6 ed) Ch 9, p. 139.
 81 | 
 82 | 
 83 | ## Functional Form
 84 | 
 85 | **TODO**
 86 | 
 87 | ## Multicollinearity
 88 | 
 89 | - Problem: Correlation between predictors increases the standard errors on those predictors. However, coefficients are unbiased, an assuming the other CLM assumptions hold, the standard errors.
 90 | - Solution: 
 91 |     - More data
 92 |     - Remove predictors
 93 |     - Combine predictors: principal components, indexes
 94 |     - Regularization: e.g. LASSO or Ridge regression
 95 | 
 96 | ## Residuals
 97 | 
 98 | ### Non-Normal errors
 99 | 
100 | - Problem: Incorrect standard errors, but generally only an issue if sample size is small. However, this may suggest that the expected value of $Y$ is not a substantively meaningful quantity.
101 | - Solution:
102 |     - Transform variables
103 |     - Use alternative model more appropriate for the data
104 | 
105 | 
106 | Diagnostics
107 | 
108 | - qqplots
109 | 
110 | ### Non-Constant variance
111 | 
112 | - Problem: Incorrect standard errors. This may also suggest incorrect functional form.
113 | - Solution:
114 |     - If form of non-constant variance is known: weighted least squares
115 |     - If form is unknown: robust standard errors
116 |     - Since it suggests an incorrect functional form, adjust the model until non-constant variance disappears.
117 |     
118 | Diagnostics
119 | 
120 | - plots
121 | - compare robust standard errors to non-robust standard errors
122 | 


--------------------------------------------------------------------------------
/old-files/ovb-measurment-error.Rmd:
--------------------------------------------------------------------------------
 1 | ## Measurement Error
 2 | 
 3 | ### What's the problem?
 4 | 
 5 | It biases coefficients. The way in which it biases coefficients depends on which 
 6 | variables have measurement error.
 7 | 
 8 | 1. Variable with measurement error: biases $\beta$ towards zero (**attenuation bias**)
 9 | 2. Other variables: Biases $\beta$ similarly to omitted variable bias. In other words, when a variable has measurement error it is an imperfect control. You can think of omitted variables as the limit of the effect of measurement error as it increases.
10 | 
11 | 
12 | ### What to do about it?
13 | 
14 | There's no easy fix within the OLS framework.
15 | 
16 | 1. If the measurement error is in the variable of interest, then the variable will be biased towards zero, and your estimate is too large.
17 | 2. Find better measures with lower measurement errors. If the variable is the variable of interest, then perhaps combine multiple variables into a single index. If the measurement error is in the control variables, then include several measures. That these measure correlate closely increases their standard errors, but the control variables are not the object of the inferential analysis.
18 | 3. More complicated methods: errors in variable models, structural equation models, instrumental variable (IV) models, and Bayesian methods.
19 | 
20 | 


--------------------------------------------------------------------------------
/old-files/resampling-methods.Rmd:
--------------------------------------------------------------------------------
 1 | # Prediction
 2 | 
 3 | 
 4 | 
 5 | 
 6 | ## Prediction error
 7 | 
 8 | The problem is that we would like to estimate how well the model will fit *new* data. 
 9 | Since we haven't seen the new data we don't know, this is hard.
10 | We will have to 
11 | 
12 | 
13 | ## Cross-Validation
14 | 
15 | 
16 | ### Example
17 | 
18 | ```{r}
19 | 
20 | ```
21 | 
22 | 
23 | ## Application to Science
24 | 
25 | TODO
26 | 
27 | 
28 | ## References
29 | 
30 | See the R packages
31 | 
32 | -   **caret**
33 | -   **mlr**
34 | -   **recipes**
35 | 


--------------------------------------------------------------------------------
/outliers.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: html_document
  3 | editor_options:
  4 |   chunk_output_type: console
  5 | ---
  6 | 
  7 | # Outliers
  8 | 
  9 | In bivariate regression, the coefficient $\hat{\beta}_1$ can be written as a weighted average of the outcomes,
 10 | $$
 11 | \hat{\beta}_1 = \sum_{i = 1}^n w_i (y_i - \bar{y}),
 12 | $$
 13 | where
 14 | $$
 15 | w_i = \frac{x_i - \bar{x}}{\sum_{i = 1}^n (x_i - \bar{x})^2} .
 16 | $$
 17 | 
 18 | ```{r}
 19 | Anscombe <- anscombe %>%
 20 |   rowid_to_column(var = ".id") %>%
 21 |   gather(variable, value, -.id) %>%
 22 |   separate(variable, c("xy", "dataset"), sep = 1) %>%
 23 |   spread(xy, value)
 24 | ```
 25 | 
 26 | We'll consider the regressions on each dataset.
 27 | ```{r}
 28 | ggplot(Anscombe, aes(x = x, y = y)) +
 29 |   geom_point() +
 30 |   geom_smooth(method = "lm", se = FALSE) +
 31 |   facet_wrap(~ dataset, ncol = 2)
 32 | ```
 33 | 
 34 | Add the linear regression weights for each observation:
 35 | ```{r}
 36 | Anscombe <- Anscombe %>%
 37 |   group_by(dataset) %>%
 38 |   mutate(w = x - mean(x),
 39 |          w = w / sum(w ^ 2)) %>%
 40 |   ungroup()
 41 | ```
 42 | Now show the weights of each observation:
 43 | ```{r}
 44 | ggplot(Anscombe, aes(x = x, y = y, size = abs(w))) +
 45 |   geom_point() +
 46 |   geom_smooth(method = "lm", se = FALSE) +
 47 |   facet_wrap(~ dataset, ncol = 2)
 48 | ```
 49 | 
 50 | <!--
 51 | A linear regression is a weighted average of all
 52 | pairwise comparisons.
 53 | $$
 54 | \begin{aligned}[t]
 55 | \hat{\beta} &= \frac{\sum_i (y_i - \bar{y}) (x_i - \bar{x})}{\sum_i (x_i - \bar{x})^2} \\
 56 | &=  \frac{\sum_i \sum_j (y_i - y_j) (x_i - x_j)}{\sum_i \sum_j (x_i - x_j)^2} \\
 57 | & =  \frac{\sum_i \sum_j \frac{y_i - y_j}{x_i - x_j} (x_i - x_j)^2}{\sum_i \sum_j (x_i - x_j)^2}
 58 | \end{aligned}
 59 | $$
 60 | This is not directly useful, but another reminder that regression can be expressed as comparisons.
 61 | 
 62 | See this Gelman article about splitting continuous regressors. http://www.stat.columbia.edu/~gelman/research/unpublished/thirds4.pdf.
 63 | -->
 64 | 
 65 | ## Questions
 66 | 
 67 | -   Which observations in linear regression given the most weight in determining $\hat{\beta_1}$?
 68 | 
 69 | -   Consider two observations $x_1 = 1$ and $x_2 = 2$. Suppose $\bar{x} = 1$.
 70 |     What are the weights of the two observations? What is the implication for
 71 |     how OLS will respond to outliers?
 72 | 
 73 | ## Influential Weights
 74 | 
 75 | The previous section showed how OLS coefficients are a weighted average of the outcomes.
 76 | This suggests that some observations may have have more influence than others on our estimates.
 77 | 
 78 | There are three types of extreme values to consider:
 79 | 
 80 | 1.  Leverage point: extreme in $x$
 81 | 1.  Outlier: extreme in $y$
 82 | 1.  Influence point: a leverage point **and** an outlier
 83 | 
 84 | ## Leverage Point
 85 | 
 86 | The **hat matrix** is defined as
 87 | $$
 88 | \Mat{H} = \Mat{X} (\Mat{X}' \Mat{X})^{-1} \Mat{X}'
 89 | $$
 90 | 
 91 | Note,
 92 | $$
 93 | \begin{aligned}[t]
 94 | \hat{\Vec{u}} &= \Vec{y} - \Mat{X} \hat{\Vec{\beta}} \\
 95 | &= \Vec{y} - \Mat{X} \underbrace{\Mat{X} (\Mat{X}' \Mat{X})^{-1} \Mat{X}' \Vec{y}}_{\text{OLS estimate}} \\
 96 | &= \Vec{y} - \Mat{H} \Vec{y} \\
 97 | &= (\Mat{I} - \Mat{H}) \Vec{y}
 98 | \end{aligned}
 99 | $$
100 | The hat matrix is so-called because it puts the "hat" on $\Vec{y}$:
101 | $$
102 | \hat{\Vec{y}} = \Mat{H} \Vec{y}
103 | $$
104 | Properties of the hat matrix:
105 | 
106 | -   $n \times n$ symmetric matrix
107 | -   idempotent: $\Mat{H} \Mat{H} = \Mat{I}$.
108 | 


--------------------------------------------------------------------------------
/ovb.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: html_document
  3 | editor_options:
  4 |   chunk_output_type: console
  5 | ---
  6 | 
  7 | # Omitted Variable Bias
  8 | 
  9 | **Long regression:** The regression with all variables.
 10 | $$
 11 | Y_i = \beta_0 + \beta_1 X_{1,i} + \beta_2 X_{2,i} + u_i
 12 | $$
 13 | 
 14 | **Short regression:** The regression that omits a variable. In this case the short regression omits $Z_i$
 15 | $$
 16 | Y_i = \beta^s_0 + \beta^s_1 X_{1,i} + u_i^s
 17 | $$
 18 | 
 19 | **Question** Will $E(\widehat{\alpha}_1) = \beta_1$? Under what assumptions?
 20 | 
 21 | **Result:**
 22 | $$
 23 | \beta^s = \beta_2 + \underbrace{\delta_{21} \beta_2}_{\text{bias}}
 24 | $$
 25 | where $\delta_1$ is the coefficient of $X_{2,i}$ on $X_{1,i}$,
 26 | $$
 27 | X_{2,i} = \delta_0 + \delta_{1} X_{1,i} .
 28 | $$
 29 | 
 30 | **Omitted variable bias:** bias in $\hat{\beta}^s$ due to omitting $X_{2i}$,
 31 | $$
 32 | \mathrm{Bias}(\hat{\beta}_1) = \E[\widehat{\beta}_1] - \beta_1 = \beta_2 \delta_1 .
 33 | $$
 34 | 
 35 | The omitted variable bias is:
 36 | $$
 37 | \begin{aligned}[t]
 38 | (\text{"effect of $X_{2i}$ on $Y_i$"}) & \times (\text{"effect of $X_{2i}$ on $X_{1i}$"}) \\
 39 | (\mathrm{omitted} \to \mathrm{outcome}) & \times (\mathrm{included} \to \mathrm{omitted})
 40 | \end{aligned}
 41 | $$
 42 | 
 43 | Remember that by OLS, the effect of $X_{1i}$ on $X_{2i}$ is
 44 | $$
 45 | \delta_1 = \frac{\Cov(X_{1i}, X_{2i})}{\Var(X_{1i})} .
 46 | $$
 47 | 
 48 | |                | $\Cov(X_{1i}, X_{2i}) > 0$ | $\Cov(X_{1i}, X_{2i}) < 0$ | $\Cov(X_{1i}, X_{2i}) = 0$ |
 49 | | -------------- | -------------------------- | -------------------------- | -------------------------- |
 50 | | $\beta_2 > 0$  | $+$                        | $-$                        | $\emptyset$                |
 51 | | $\beta_2 < 0$  | $-$                        | $+$                        | $\emptyset$                |
 52 | | $\beta_2 = 0$  | $\emptyset$                | $\emptyset$                | $\emptyset$                |
 53 | 
 54 | So $\beta^2_1$ is only unbiased if either of the following is true:
 55 | 
 56 | -   $\beta_2 = 0$ ($X_{2,i} is uncorrelated with $Y_i$)
 57 | -   $\delta_2 = 0$ ($X_{2,i}) is uncorrelated with $X_{1,i}$)
 58 | 
 59 | See @AngristPischke2014 [p. 92].
 60 | 
 61 | ## Including Irrelevant Variables
 62 | 
 63 | How does including an **irrelevant variable** in a regression affect the other coefficients?
 64 | 
 65 | An **irrelevant variable** is one which is uncorrelated with $Y_i$, thus it would have a coefficient of 0.
 66 | 
 67 | Consider the regression,
 68 | $$
 69 | Y_i = \beta_0 + \beta_1 X_{1i} + \beta_2 X_{2i} + u_i .
 70 | $$
 71 | If $X_{2i}$ is irrelevant, then $\beta_2 = 0$, and
 72 | $$
 73 | Y_i = \beta_0 + \beta_1 X_{1i} + 0 \times X_{2i} + u_i .
 74 | $$
 75 | 
 76 | But given the previous results, OLS is still unbiased for all parameters,
 77 | $$
 78 | \begin{aligned}[t]
 79 | \E[\widehat{\beta}_0] &= \beta_0 \\
 80 | \E[\widehat{\beta}_1] &= \beta_1 \\
 81 | \E[\widehat{\beta}_2] &= 0
 82 | \end{aligned}
 83 | $$
 84 | 
 85 | However, including an irrelevant variable will increase the standard errors of $\hat{\beta}_1$ by reducing the conditional variation of $X_{1i}$ and it also removing a degrees of freedom.
 86 | 
 87 | ## Measurement Error
 88 | 
 89 | There are two issues to be concerned about.
 90 | 
 91 | 1.  Measurement error in the covariate of interest.
 92 | 1.  Measurement error in a control variable.
 93 | 
 94 | ## When does Omitted variable bias make sense?
 95 | 
 96 | -   **Description**: No. It may be interesting to consider the relationship conditional on another variable, but not doing it doesn't invalidate the method.
 97 | 
 98 | -   **Prediction**: No. The $\hat{\beta}$ do not (directly) matter, since we care about $\hat{y}$. Omitted variable bias does not directly affect that.
 99 | 
100 | -   **Causal Inference** Yes. Not only does it make sense, it is the most important assumption for casual inference.
101 | 
102 |     -   model/structural approach: OVB violates Gauss-Markov assumptions and estimator is biased.
103 |     -   potential outcomes approach: OVB violates the conditional independence assumption.
104 | 
105 | ## What to do about it?
106 | 
107 | OVB is the most important assumption for regression in any causal setting.
108 | It is also difficult to assess.
109 | How can we know what we omitted? And how can we know that we've including everything relevant from the population model, if we don't know the population model?
110 | 
111 | There are effectively two strategies for testing OVB [@PeiPischkeSchwandt2017a]
112 | 
113 | 1.  balancing tests
114 | 1.  coefficient comparison tests (regression sensitivity analysis/robustness tests)
115 | 
116 | Consider the case of long and short regressions
117 | $$
118 | \begin{aligned}
119 | Y_i &= \beta_0 + \beta_1 X_{1,i} + \beta_2 X_{2,i} + u_i \\
120 | Y_i &= \beta^s_0 + \beta^s_1 X_{1,i} + u_i^s
121 | \end{aligned}
122 | $$
123 | The omitted variable bias for estimating the short regression rather than the long regression is
124 | $$
125 | 
126 | $$
127 | 
128 | **Regression sensitivity analysis** Suppose you are interested in the coefficient on $X_1$. Run the bivariate regression of $X_{1i}$ (without any controls),
129 | $$
130 | y_i = \hat{\beta}_0^s + \hat{\beta}_1^s X_{1i} + \hat{u}_i^s ,
131 | $$
132 | and the multiple regression with **all** the controls,
133 | $$
134 | y_i = \hat{\beta}_0 + \hat{\beta}_1 x_{1i} + \sum_{k = 2}^K \hat{\beta}_k X_{ki} + \hat{u}_i.
135 | $$
136 | The quantity of interest is the difference,
137 | $$
138 | |\hat{\beta}_{1} - \hat{\beta}_{1}^s| .
139 | $$
140 | If the coefficient on $x_{1i}$ has a large change with the addition of control variables, it suggest that it is likely that there more omitted variables out there.
141 | If the coefficient on $x_{1i}$ changes little with the addition of control variables, it suggest that few covariates influence its coefficient, and it is less likely that there are omitted covariates that would influence the coefficient of $x_{1i}$ [@AngristPischke2014; p. 74].
142 | See @NunnWantchekon2011a, @AltonjiElderTaber2005a, @PeiPischkeSchwandt2017a, @Oster2016a.
143 | 
144 | You may often see papers include regressors one at a time or in groups.
145 | That is more-or-less pointless (at least in the manner that it is usually done), and provides no more information than the long regression.
146 | 
147 | ## References
148 | 
149 | Much of this chapter is derived from Matt Blackwell <http://www.mattblackwell.org/files/teaching/gov2000/s09-two-variable-regression-slides-print.pdf>.
150 | 


--------------------------------------------------------------------------------
/potential-outcomes.Rmd:
--------------------------------------------------------------------------------
1 | # Potential Outcomes
2 | 
3 | 


--------------------------------------------------------------------------------
/presentation.Rmd:
--------------------------------------------------------------------------------
1 | # (PART) Presentation {-}
2 | 


--------------------------------------------------------------------------------
/probability.Rmd:
--------------------------------------------------------------------------------
1 | # (PART) Probability {-}
2 | 


--------------------------------------------------------------------------------
/programming.Rmd:
--------------------------------------------------------------------------------
1 | # (PART) Programming {-}
2 | 


--------------------------------------------------------------------------------
/questions.Rmd:
--------------------------------------------------------------------------------
 1 | # Types of Questions
 2 | 
 3 | 1.  Descriptive: Represent data $X$ by a smaller number of values
 4 | 1.  Predictive: Given some $X$, what is the value of $Y$?
 5 | 1.  Causal: If I change $X$, what will the value of $Y$ be?
 6 | 
 7 | Causal questions
 8 | 
 9 | > A variable $X$ is a cause of *variable* $Y$ if $Y$ in any way relies on $X$ for its value. ... $X$ is a cause of $Y$ if $Y$ listens to $X$ and decides
10 | > its value in response to what it hears. Pearl et al. p. 5.
11 | 
12 | References
13 | 
14 | -   Keele - Statistics of Causal Inference
15 | -   Explanation vs. Prediction
16 | -   Policy Prediction Questions
17 | 


--------------------------------------------------------------------------------
/rd.Rmd:
--------------------------------------------------------------------------------
  1 | # Regression Discontinuity
  2 | 
  3 | Summary: If there are thresholds whereby some observations receive the
  4 | treatment above it, other those below it do not, and those immediately above or
  5 | below that threshold are similar, we can use the difference of the outcome
  6 | between those just above and those just below the threshold to estimate the
  7 | causal effect of the treatment.
  8 | 
  9 | Suppose there is a running variable $x$ such that any person receives the treatment, $d$ if $x \geq a$ and does not if $x \leq a$,
 10 | $$
 11 | d = \begin{cases}
 12 | 1 & x \geq a \\
 13 | 0 & x < a
 14 | \end{cases}
 15 | $$
 16 | 
 17 | A simple regression discontinuity model is,
 18 | $$
 19 | \begin{aligned}[t]
 20 | y_i = \alpha + \beta x_i + \tau d_i + \gamma x_i d_i + \epsilon_i
 21 | \end{aligned}
 22 | $$
 23 | The local causal effect of the treatment at the discontinuity is $\tau$.
 24 | 
 25 | ```{r echo=FALSE, fig.cap="Fake Example of a Regression Discontinuity. The difference at the threshold (50) is the effect of the treatment."}
 26 | tibble(
 27 |   x = 1:100,
 28 |   d = x > 50,
 29 |   yhat = 0.2 * x + 20 * d - 0.1 * x * d,
 30 |   y = yhat + rnorm(length(x), 0, 7)
 31 | ) %>%
 32 | ggplot(aes(x = x)) +
 33 |   geom_vline(xintercept = 50, colour = "white", size = 2) +
 34 |   geom_point(aes(y = y)) +
 35 |   geom_smooth(aes(y = y, group = d), method = "lm")
 36 | ```
 37 | 
 38 | However, there are several choices
 39 | 
 40 | -   Functional form of the trends before and after the discontinuity
 41 | -   The size of the window of observations before and after the trend which to compare.
 42 | 
 43 | How to choose?
 44 | 
 45 | -   parametric: chooses specific functional forms
 46 | -   non-parametric: uses flexible forms, and chooses a bandwidth [@ImbensKalyanaraman2011a]
 47 | 
 48 | Sharp vs. Fuzzy Discontinuity?
 49 | 
 50 | -   Sharp: the assignment of the treatment occurs with certainty at the threshold.
 51 | -   Fuzzy: the assignment of the treatment occurs only probabilistically at the threshold.
 52 | 
 53 | Suppose that the causal effect of treatment $T \in \{0, 1\}$ on unit $i$ is $\tau_i = Y_i(1) - Y_i(0)$ where $Y_i(1)$ is the potential outcome of $i$ under the treatment and $Y_i(0)$ is the potential outcome of $i$ under the control.
 54 | If potential outcomes are distributed smoothly at the cut-point $c$, then the average causal effect of the treatment at the cut-point, $Z_i = c$:
 55 | $$
 56 | \tau_{RD} = \E[Y_{i}(1) - Y_i(0)| Z_i = c] = \lim_{Z_i \downarrow c}\E[Y_{i}(1) | Z_i = c] - \lim_{Z_i \uparrow c}\E[Y_i(0)| Z_i = c]
 57 | $$
 58 | 
 59 | An advantage of RD designs is that unlike selection on observables or IV, its identifying assumptions are more observable and testable.
 60 | 
 61 | There are two basic tests (@LeeLemieux2010a):
 62 | 
 63 | 1.  Continuity of pre-treatment covariates. E.g. density test of McCrary (2008). Whether the ratio of treated to control units departs from chance.
 64 |     A difficulty is that balance only holds in the limit, and covariance balance may still be present in finite samples.
 65 | 
 66 | 1.  Irrelevance of covariates to the treatment-outcome relationship. There should be no systematic association between covariates and treatment, so controlling for them shouldn't affect the estimates.
 67 | 
 68 | ## Examples
 69 | 
 70 | -   @ThistlethwaiteCampbell1960a was the first example of RD.
 71 | 
 72 |     -   Outcome: Career choices in teaching  
 73 |     -   Running variable: PSAS scores
 74 |     -   Cutoff: receiving National Merit Finalist
 75 |     -   Discussed: @AngristPischke2014a [Ch 4]
 76 | 
 77 | -   @CarpenterDobkin2011a, @CarpenterDobkin2009a
 78 | 
 79 |     -   Running variable: age
 80 |     -   Cutoff: ability to drink alcohol legally
 81 |     -   Outcome: Death, accidents
 82 |     -   Discussed: @AngristPischke2014a [Ch 4]
 83 | 
 84 | -   @AbdulkadirogluAngristPathak2014a
 85 | 
 86 |     -   Running variable: exam score
 87 |     -   Cutoff: above threshold receive an offer from a school. This is fuzzy since not all those who receive the offer attend.
 88 |     -   Outcome: Educational outcomes
 89 |     -   Discussed: @AngristPischke2014a [Ch 4]
 90 | 
 91 | -   @EggersHainmueller2009a
 92 | 
 93 |     -   units: UK MPs
 94 |     -   outcome: personal wealth
 95 |     -   treatment: winning an election (holding office)
 96 |     -   running variable: vote share
 97 | 
 98 | -   @LitschigMorrison2013a
 99 | 
100 |     -   units: Brazilian municipalities
101 |     -   outcome: education, literacy, poverty rate
102 |     -   treatment: receiving a cash transfer from the central government (there are population cutoffs)
103 |     -   running variable: population
104 | 
105 | -   @GelmanHill2007a [p. 213-217]
106 | 
107 |     -   units: US Congressional members
108 |     -   outcome: ideology of representative
109 |     -   treatment: winning election
110 |     -   running variable: vote share
111 | 
112 | -   @GelmanKatz2007a, @GelmanHill2007a [p. 232]
113 | 
114 |     -   units: patients
115 |     -   outcome: length of hospital stay
116 |     -   treatment: new surgery method
117 |     -   cutoff: not performed on those over 80
118 |     -   running variable: age
119 | 
120 | -   @LeeMorettiButler2004a. Also see derived examples in @Bailey2016a [Ex. 6.3]. See @Button2015a for a replication.
121 | 
122 |     -   units: congressional districts
123 |     -   outcome: ideology of nominees
124 |     -   treatment: election
125 |     -   running variable: vote share
126 | 
127 | -   @JacobLefgren2004a
128 | 
129 |     -   units: students
130 |     -   outcome: education achievement
131 |     -   treatment: summer school, retention
132 |     -   running variable: standardized test
133 | 
134 | ## Example: Close Elections
135 | 
136 | A common use of RD in political science and econ is election outcomes.
137 | In this case the "treatment" is winning the election; it is applied to the candidate whose vote exceeds the threshold of 50%, but not to candidates arbitrarily below that threshold.
138 | Thus "close" elections are a common use of RD designs.
139 | This design was formalized in @Lee2008a.
140 | 
141 | Several papers question whether close elections satisfy the assumptions of RD:
142 | 
143 | -   @CaugheySekhon2011a look at US House elections (1942-2008). They find that close elections are more imbalanced. They attribute this to national partisan waves.
144 | -   @GrimmerHershFeinsteinEtAl2011a look at all US House elections 1880-2008. They find that structurally advantaged candidates (strong party, incumbents) are more likely to win close elections.
145 | 
146 | The ways in which close elections can be non-random are lawsuit challenges and fraud.
147 | 
148 | @EggersFowlerHainmuellerEtAl2014a addresses these concerns with a systematic review of 40,000 close elections:  "U.S. House in other time periods, statewide, state legislative, and mayoral races in the U.S. and national or local elections in nine other countries"
149 | Only the US House appears to have these issues.
150 | 
151 | ## Software
152 | 
153 | See the R packages
154 | 
155 | -   `r rpkg("rddtools")`: a new and fairly complete package of regression discontinuity from primary data viz to other tests.
156 | -   `r rpkg("rdd")`
157 | -   `r rpkg("rdrobust")`: Tools for data-driven graphical and analytical statistical inference in RD.
158 | -   `r rpkg("rdpower")`: Calculate power for RD designs.
159 | -   `r rpkg("rdmulti")`: Analyze designs with multiple cutoffs.
160 | 
161 | See entries in the [Econometrics](https://cran.r-project.org/web/views/Econometrics.html) task view.
162 | 
163 | ## References
164 | 
165 | Textbooks and Reviews:
166 | 
167 | -   @AngristPischke2014a [Ch. 4]
168 | -   @GelmanHill2007a [Sec. 10.4]
169 | -   @Bailey2016a [Ch. 11]
170 | -   @LindenAdamsRoberts2006a for applications to medicine
171 | -   @HahnToddKlaauw2001a An early review of RD in economics
172 | 
173 | Methods:
174 | 
175 | -   @ImbensKalyanaraman2011a propose an optimal bandwidth selection method
176 | 


--------------------------------------------------------------------------------
/references.Rmd:
--------------------------------------------------------------------------------
1 | `r if (knitr:::is_html_output()) '# References {-}'`
2 | 


--------------------------------------------------------------------------------
/regularization.Rmd:
--------------------------------------------------------------------------------
  1 | # Regularization
  2 | 
  3 | ```{r}
  4 | library("glmnet")
  5 | library("tidyverse")
  6 | library("broom")
  7 | ```
  8 | 
  9 | ```{r}
 10 | UScrime <- MASS::UScrime %>%
 11 |   mutate_at(vars(y, M, Ed, Po1, Po2, LF, M.F, Pop,
 12 |                  NW, U1, U2, GDP, Ineq, Prob, Time),
 13 |             funs(log))
 14 | 
 15 | varlist <- c("M", "Ed", "Po1", "Po2", "LF", "M.F", "Pop", "NW",
 16 |              "U1", "U2", "GDP", "Ineq", "Prob", "Time")
 17 | ```
 18 | 
 19 | By default, `glmnet` will return and entire range of coefficients.
 20 | ```{r}
 21 | mod_lasso <- glmnet(as.matrix(UScrime[, varlist]), UScrime[["y"]])
 22 | mod_ridge <- glmnet(as.matrix(UScrime[, varlist]), UScrime[["y"]], alpha = 0)     
 23 | ```
 24 | 
 25 | ```{r}
 26 | bind_rows(
 27 |   mutate(tidy(mod_lasso), model = "Lasso"),
 28 |   mutate(tidy(mod_ridge), model = "Ridge")
 29 | ) %>%
 30 |   filter(term != "(Intercept)") %>%
 31 |   ggplot(aes(x = dev.ratio, y = estimate, colour = term)) +
 32 |   geom_line() +
 33 |   facet_wrap(~ model, ncol = 1)
 34 | ```
 35 | 
 36 | Alternatively, the lasso and ridge regression models are the solutions to the problems
 37 | $$
 38 | \hat{\beta}_{\text{lasso}} = \arg \min_\beta \left\{  \sum_{i =1}^n \left(y_i - \beta_0 - \sum_{j = 1}^p \beta_j x_{ij} \right)^{2} \right\} \text{s.t.} \sum_{j = 1}^p \beta_j^2 \leq c, 
 39 | $$
 40 | and
 41 | $$
 42 | \begin{aligned}[t]
 43 | \hat{\beta}_{\text{lasso}} &= \arg \min_\beta \left\{  \sum_{i =1}^n \left(y_i - \beta_0 - \sum_{j = 1}^p \beta_j x_{ij} \right)^{2} \right\} \\
 44 | \text{s.t.}& \sum_{j = 1}^p |\beta_j| \leq c
 45 | \end{aligned}
 46 | $$
 47 | 
 48 | In other words, these methods try to find the $\Vec{\beta}$ with the smallest sum of squared errors that has a $\Vec{\beta}$ with a norm less than $c$.
 49 | The value of $c$ corresponds to some value of $\lambda$ in the previous methods.
 50 | 
 51 | Think of $c$ as a fixed *budget*. The lasso and ridge regressions try to find the variables that explain $y$ the best without going over the budget [@JamesWittenHastieEtAl2013a, p. 221]:
 52 | 
 53 | Consider the case with only coefficients: $\beta_1$ and $\beta_2$.
 54 | In the lasso, we want to find the values of $\beta_1$ and $\beta_2$
 55 | $$
 56 | |\beta_1| + |\beta_2| \leq c
 57 | $$
 58 | 
 59 | ```{r echo=FALSE}
 60 | knitr::include_graphics("img/islr-fig-6.7.png")
 61 | ```
 62 | 
 63 | > never trust OLS with more than five regressors
 64 | > --- [Zvi Grilliches](http://www.nber.org/econometrics_minicourse_2015/nber_slides11.pdf)
 65 | >
 66 | > Regularization theory was one of the first signs of the existence of intelligent inference
 67 | > --- [Zapnik](http://www.nber.org/econometrics_minicourse_2015/nber_slides11.pdf)
 68 | 
 69 | Rather than choose the best fit, there is some penalty to avoid over-fitting.
 70 | This is to choose the optimal optimal point on the expected predicted value.
 71 | 
 72 | There are two questions
 73 | 
 74 | 1.  method of regularization
 75 | 1.  amount of regularization
 76 | 
 77 | There are several choices of the former, chosen for different reasons.
 78 | 
 79 | The latter is almost always chosen by cross-validation.
 80 | 
 81 | While OLS is okay for estimating $\beta$ (best linear unbiased property).
 82 | However, with $K \geq 3$ regressors, OLS is poor.
 83 | 
 84 | The approaches to regularization in regression are
 85 | 
 86 | 1.  Shrink estimates to zero (Ridge)
 87 | 1.  Sparsity, limit number of non-zero estimates (Lasso)
 88 | 1.  Combination of the two (Bridge)
 89 | 
 90 | ## Ridge Regression
 91 | 
 92 | $$
 93 | \hat{\beta}_{\text{OLS}} = \arg \min_{\beta} \sum_{i=1}^{n} (y_i - \Vec{x}_{i} \Vec{\beta})^{2}
 94 | $$
 95 | 
 96 | Regularized regression adds a penalty that is a function of $\beta$.
 97 | This encourages $\beta$ to be close to zero.
 98 | $$
 99 | \hat{\beta}_{\text{regularized}} = \arg \min_{\beta} \sum_{i=1}^{n} (y_i - \Vec{x}_{i} \Vec{\beta})^{2} + \lambda f(\beta)
100 | $$
101 | 
102 | Where $\lambda$ is a penalty parameter, and $f(\beta)$ is a function that increases in the total magnitudes of the coefficients.
103 | 
104 | -   $\lambda \to \infty$: all coefficients are zero
105 | -   $\lambda \to 0$: same as OLS
106 | 
107 | How do we choose the value of $\lambda$? 
108 | 
109 | -   Currently: cross-validation
110 | -   Historically: there were some default plug-in estimators, especially for ridge regression.
111 | 
112 | **Ridge** regression penalizes the $\Vec{\beta}$ vector by the 
113 | $$
114 | \hat{\beta}_{\text{ridge}} = \arg \min_{\beta} \sum_{i=1}^{n} (y_i - \Vec{x}_{i} \Vec{\beta})^{2} + \sum_{k = 1}^{p} \beta_k^2
115 | $$
116 | 
117 | **Lasso** penalizes the coefficients by an the $L1$ norm. 
118 | Suppose we want to find the best subset of $\leq k$ covariates .
119 | $$
120 | \hat{\beta}_{\text{lasso}} = \arg \min_{\beta} \sum_{i=1}^{n} (y_i - \Vec{x}_{i} \Vec{\beta})^{2} + \lambda \sum_{k = 1}^p |\beta_k|
121 | $$
122 | 
123 | -   If true distribution of coefficients is a few big ones and many small ones, 
124 |     LASSO will do better. If many small/modest sized effects, ridge may do better.
125 | 
126 | -   LASSO does not work well with highly correlated coefficients.
127 | 
128 |     -   Ridge: $\hat{\beta}_{1} + \hat{\beta}_{2} \approx (\beta_1 + \beta_2)/ 2$.
129 |     -   LASSO: Indifferent between $\hat{\beta}_1 = 0$, $\hat{\beta}_2 = \beta_1 + \beta_2$, $\hat{\beta}_1 = \beta_1 + \beta_2$, and $\hat{\beta}_2 = 0$.
130 | 
131 | -   Approximate best-subset selection. Suppose that we would really like to select
132 |     the best subset of $q < k$ coefficients and set the rest to zero (this is the variable selection problem).
133 |     That is a hard problem since there are $\binom{k}{q}$. 
134 |     Lasso can be viewed as an approximation of the problem.
135 | 
136 | -   Oracle property. If the true model is sufficiently sparse, we can ignore the
137 |     selection stage and use OLS standard errors of the non-zero variables 
138 |     for inference.
139 | 
140 | **Bridge** regression penalizes the $\Vec{\beta}$ vector by the 
141 | $$
142 | \hat{\beta}_{\text{bridge}} = \arg \min_{\beta} \sum_{i=1}^{n} (y_i - \Vec{x}_{i} \Vec{\beta})^{2} + \lambda_1 \sum_{k = 1}^{p} |\beta_k| + \lambda_2 \sum_{k = 1}^{p} \beta_k^2
143 | $$
144 | 
145 | Bridge regression has some of the properties of both ridge and Lasso. 
146 | It will select correlated regressors, yet also shrink coefficients to zero for
147 | a sparse solution.
148 | 
149 | The R package `r rpkg("glmnet")` is the most commonly used package to estimate 
150 | Lasso, ridge, and bridge regression for linear and generalized linear models.
151 | However, these methods are common enough that all machine learning frameworks
152 | will have some implementation of them.  See other packages for variations on the
153 | lasso that take into account other dependencies in the data.
154 | 
155 | How to find the value of $\lambda$? Cross validation. 
156 | The function `cv.glmet()` uses cross-validation to select the penalty parameter.
157 | 
158 | ## Regularization for Causal Inference
159 | 
160 | Belloni, Chernozhukov, and Hansen (2014) propose a simple method for using Lasso 
161 | for causal effects.
162 | 
163 | What's the problem with regularized regression for causal inference?
164 | Suppose we estimate a model with the aim to recover $\beta_1$.
165 | $$
166 | \Vec{y} = \alpha + \beta x + \gamma_1 z_1 + \cdots + \gamma_k z_{k-1} + \epsilon
167 | $$
168 | If we estimate it with a regularized model, like lasso, then $\beta_1$ will be shrunk in addition to the controls.
169 | If we instead do not shrink $\beta_1$ but we shrink the controls enough. 
170 | It will be closer to **not** controlling for the other variables since any part of 
171 | of the treatment prediction of the outcome explained by the controls will be shrunk since those coefficients are penalized, but the treatment coefficient is not.
172 | 
173 | 1.  Run Lasso with the outcome $y$ on all controls, $z_1, \dots, \z_k$. 
174 |     Keep all non-zero coefficients.
175 | 
176 | 1.  Run Lasso with the treatment $z$ on all controls, $z_1, \dots, z_k$. 
177 |     Keep all non-zero coefficients.
178 | 
179 | 1.  Run OLS with the outcome $y$ on the treatment, $x$, and all variables with 
180 |     a non-zero coefficient in either step 1 or 2.
181 | 
182 | If the **true model is sparse** (and asymptotics), then by the Oracle property, 
183 | we can treat the standard errors of the OLS coefficients in the last step as
184 | if the selection stage did not occur.
185 | 
186 | See <https://arxiv.org/pdf/1603.01700.pdf> and the `r rpkg("hdm")` which implements this method, and extensions to work with high dimensional data in R.
187 | 
188 | ## References
189 | 
190 | It is a few years old, but the [2015 NBER Summer course](http://www.nber.org/econometrics_minicourse_2015/nber_slides11.pdf) has a good introduction to machine learning that is targeted at social scientists.
191 | 


--------------------------------------------------------------------------------
/reproducible-research.Rmd:
--------------------------------------------------------------------------------
1 | # Reproducible Research
2 | 


--------------------------------------------------------------------------------
/simple-regression.Rmd:
--------------------------------------------------------------------------------
  1 | # OLS Estimator
  2 | 
  3 | For **unbiasedness**
  4 | 
  5 | 1.  Linearity
  6 | 1.  Random (iid) sample
  7 | 1.  Variation in $X_i$
  8 | 1.  Zero conditional mean of errors
  9 | 
 10 | ## Linearity
 11 | 
 12 | **Assumption 1** The population regression function is linear in the parameters.
 13 | $$
 14 | Y = \beta_0 + \beta_1 X_i + u
 15 | $$
 16 | 
 17 | Note that
 18 | 
 19 | -   $u$ is the *unobserved* disturbance term for all factors influencing $Y$ other than $X$
 20 | -   This is different than the the CEF error - we are interpreting $\beta_1$ structurally. This is an assumption needed for $\hat{\beta}$ to be an unbiased estimator of the population $\beta$. It may still be the case that $\hat{\beta}$ is a good estimator for other quantities.
 21 | 
 22 | A violation:
 23 | $$
 24 | Y_i = \frac{1}{\beta_0 + \beta_1 X_i} + u_i
 25 | $$
 26 | 
 27 | Sometimes we can transform non-linear cases to be linear.
 28 | For example, while this is not linear,
 29 | $$
 30 | Y_i = \exp(\beta_0) \exp(\beta_1 X_i) u_i
 31 | $$
 32 | the log transformation is linear,
 33 | $$
 34 | \log Y_i = \beta_0 + \beta_1 X_i + \log (u_i).
 35 | $$
 36 | 
 37 | ## Random Sample
 38 | 
 39 | **Assumption 2:** We have a iid random sample of size $n$ $\{Y_i, X_i: i = 1, \dots, n\}$ from the population regression model.
 40 | 
 41 | This is a standard assumption for generalizing from a sample to a population.
 42 | Violations include time-series and selected samples.
 43 | 
 44 | ## Variation in $X$
 45 | 
 46 | **Assumption 3:** The in-sample independent variables $\{X_i: i = 1, \dots, n\}$ are not all the same value.
 47 | 
 48 | Recall, the formula for the OLS slope is
 49 | $$
 50 | \hat{\beta}_1 = \frac{\sum_{i = 1}^n (x_i - \bar{x}) (y_i - \bar{y})}{\sum_{i = 1}^n (x_i - \bar{x})^2}
 51 | $$
 52 | If there is no variation in $x$, then all $x_i = \bar{x}$,
 53 | and
 54 | $$
 55 | \hat{\beta}_1 = \frac{\sum_{i = 1}^n (\bar{x} - \bar{x}) (y_i - \bar{y})}{\sum_{i = 1}^n (\bar{x} - \bar{x})^2} =  \frac{0}{0} \to \text{undefined} .
 56 | $$
 57 | 
 58 | ## Assumption 4
 59 | 
 60 | **Assumption 4** The error $u_i$, has expected value of 0, given the values of the independent variable,
 61 | $$
 62 | E(u_i | X_i = x) = 0,
 63 | $$
 64 | for all $x$.
 65 | 
 66 | This is the key assumption for a structural interpretation of $Y$.
 67 | It says that all the other things that influence $Y$ on average have no effect on $Y$ at every value of $x$.
 68 | 
 69 | When is this most plausible? When $X$ is randomly assigned, so it uncorrelated with the errors by design.
 70 | In **observational** data this is difficult to justify.
 71 | 
 72 | *Consistency* is a property of an estimator that as the sample size gets larger, it approaches the true value,
 73 | $$
 74 | \widehat{\beta}_1 \to^{p} \beta_1
 75 | $$
 76 | 
 77 | For consistency, only as weaker version of Assumption 4 is needed.
 78 | 
 79 | **Assumption 4(b)**  The error is mean zero, $E(u_i) = 0$, and uncorrelated with $X$, $E(u_i X_i) = 0$.
 80 | 
 81 | That the error is mean zero is not binding as long as we have an intercept in the model.
 82 | 
 83 | That the errors are uncorrelated with the predictor.
 84 | This is weaker than Assumption 4 because it only rules out *linear* relationships between $u$ and $X$.
 85 | If there are unmodeled non-linearities OLS still captures the best linear approximation to the CEF.
 86 | And this weaker assumption says that even if we miss those, we will be consistent estimates of the population line of best fit.
 87 | 
 88 | Note that $\widehat{\beta}$ is a weighted sum of residuals,
 89 | $$
 90 | \widehat{\beta}_1 = \beta_1 + \sum_{i = 1}^n W_i u_i .
 91 | $$
 92 | So,
 93 | $$
 94 | \sum_{i = 1}^n W_i u_i \to^p  \frac{\Cov(X_i, u_i)}{V(X_i)}
 95 | $$
 96 | Since $Cov(X_i, u_i) = 0$, $\widehat{\beta}_1 \to^p \beta_1$.
 97 | 
 98 | **Where are we?** Under assumptions 1--4, $\widehat{beta} \sim ?(\beta_1, ?)$.
 99 | These assumptions establish that the expected value of the sampling distribution is $E(\widehat{\beta}_1) = \beta_1$.
100 | However, they don't say anything about the distributional form (is it Normal?) or the standard deviation of the sampling distribution of $\hat{\beta_1}$.
101 | We need a few more assumptions to deal with that.
102 | 
103 | ## Large Sample Inference
104 | 
105 | **Assumption 5:** The conditional variance of $Y_i$ given $X_i$ is constant,
106 | $$
107 | V(Y_i | X_i = x) = V(u_i | X_i = x) = \sigma^2_u .
108 | $$
109 | 
110 | The function which gives the values of the variance of $Y$ as a function of $X$ is called the **skedastic** function.
111 | 
112 | -   **homodeskedasticity**: $V(Y | X = x) = V(u | X = x) = \sigma^2_u$ for all $x$
113 | -   **heteroskedasticity**: $V(u | X = x) \neq V(u | X = x')$ for some values of $x$ and $x'$. In other words, the conditional variance is not constant.
114 | 
115 | ## Asymptotic Normality of OLS
116 | 
117 | Do we need the errors to be distributed normal? No, not in large samples.
118 | The OLS error is a weighted sum of the residuals,
119 | $$
120 | \hat{\beta}_1 - \beta = \sum_{i = 1}^n W_i u_i
121 | $$
122 | Since the estimator error is a mean, the CLT holds, and the distribution of the errors (variance) will be distributed standard normal.
123 | $$
124 | \frac{\hat{\beta}_1 - \beta_1}{SE(\hat{\beta}_)} \to N(0, 1)
125 | $$
126 | Also, in large samples, we can plug in the estimated standard error for the population standard error,
127 | $$
128 | \frac{\hat{\beta}_1 - \beta_1}{\widehat{SE}(\hat{\beta}_)} \to N(0, 1)
129 | $$
130 | 
131 | ## Small Sample Model-Based Inference
132 | 
133 | The CLT tells us that the sampling distribution of $\beta$ is normal in large samples (asymptotically).
134 | What about small samples?
135 | To use the normal (t-distribution) for hypothesis testing, we need the assumption that the errors are distributed normal.
136 | 
137 | **Assumption 6** The conditional distribution of $u$ given $X$ is Normal with mean 0 and variance $\sigma^2_u$.
138 | 
139 | $$
140 | \frac{\widehat{\beta}_1 - \beta_1}{SE(\hat{\beta}_1)} \sim N(0, 1)
141 | $$
142 | 
143 | If we plug in the sample standard error for the population standard error, the sampling distribution has a $t$-distribution with $n - k - 1$ (where $k$ is the number of predictors) degrees of freedom.
144 | $$
145 | \frac{\widehat{\beta}_1 - \beta_1}{\widehat{SE}(\widehat{\beta}_1)} \sim \text{Student's-} t_{n - k - 1}
146 | $$
147 | 
148 | ## Assumptions Review
149 | 
150 | What assumptions do we need to make for various uses of OLS?
151 | 
152 | 1.  Data description: variation in X
153 | 1.  Consistency: linearity, iid, variation in X, uncorrelated errors
154 | 1.  Unbiasedness: linearity, iid, variation in X, zero conditional mean errors
155 | 1.  Large-sample inference: linearity, iid, variation in X, zero conditional mean error, homoskedasticity.
156 | 1.  Small-sample inference: linearity, iid, variation in X, zero conditional mean error, homoskedasticity, Normal errors
157 | 


--------------------------------------------------------------------------------
/simpsons.Rmd:
--------------------------------------------------------------------------------
  1 | # Simpson's Paradox
  2 | 
  3 | The "paradox" is data where the a statistical association is present in
  4 | every subgroup but the reverse association is present in the population.
  5 | 
  6 | ## Examples
  7 | 
  8 | ### Batting Averages
  9 | 
 10 | This example is from:
 11 | 
 12 | > Ken Ross. "A Mathematician at the Ballpark: Odds and Probabilities for Baseball Fans (Paperback)" Pi Press, 2004. ISBN 0-13-147990-3. 12–13
 13 | 
 14 | This example involves the batting averages for the baseball players, Derek Jeter and David Justice.
 15 | In both 1995 and 1996, David Justice had a higher batting average than Derek Jeter.
 16 | But when aggregated, Derek Jeter had a higher batting average for 1995-96 than David Justice.
 17 | 
 18 | ```{r}
 19 | batting_yearly <- tribble(
 20 |   ~ player, ~ year, ~ hits, ~ ab,
 21 |   "Derek Jeter", 1995, 12, 48,
 22 |   "Derek Jeter", 1996, 183, 582,
 23 |   "David Justice", 1995, 104, 411,
 24 |   "David Justice", 1996, 45, 140
 25 | ) %>%
 26 |   mutate(avg = hits / ab)
 27 | ```
 28 | 
 29 | ```{r}
 30 | batting_total <- batting_yearly %>%
 31 |   group_by(player) %>%
 32 |   summarise(ab = sum(ab), hits = sum(hits)) %>%
 33 |   mutate(avg = hits / ab)
 34 | ```
 35 | 
 36 | ```{r}
 37 | ggplot() +
 38 |   geom_point(data = batting_yearly,
 39 |              mapping = aes(x = as.integer(as.factor(player)),
 40 |                            y = avg,
 41 |                            color = as.factor(year),
 42 |                            size = ab)) +
 43 |   geom_line(data = batting_yearly,
 44 |              mapping = aes(x = as.integer(as.factor(player)),
 45 |                            y = avg, color = as.factor(year))) +
 46 |   geom_point(data = batting_total,
 47 |              mapping = aes(x = as.integer(as.factor(player)),
 48 |                            y = avg,
 49 |                            size = ab)) +
 50 |   geom_line(data = batting_total,
 51 |              mapping = aes(x = as.integer(as.factor(player)),
 52 |                            y = avg)) +
 53 |   scale_x_continuous("Player", breaks = 1:2,
 54 |                      labels = levels(as.factor(batting_yearly$player))) +
 55 |   scale_y_continuous("Batting Avg.") +
 56 |   scale_color_discrete("Year") +
 57 |   scale_size_continuous("At Bats")
 58 | 
 59 | ```
 60 | 
 61 | ### Kidney Stones
 62 | 
 63 | See <https://doi.org/10.1136/bmj.309.6967.1480>
 64 | 
 65 | ```{r}
 66 | kidney_stones <- tribble(
 67 |   ~ treatment, ~ size, ~ success, ~ n,
 68 |   0, "Small", 81, 87,
 69 |   0, "Large", 192, 263,
 70 |   1, "Small", 234, 270,
 71 |   1, "Large", 55, 80
 72 | ) %>%
 73 |   mutate(p = success / n)
 74 | ```
 75 | 
 76 | ```{r}
 77 | kidney_stones_total <- kidney_stones %>%
 78 |   group_by(treatment) %>%
 79 |   summarise(success = sum(success), n = sum(n)) %>%
 80 |   mutate(p = success / n)
 81 | ```
 82 | 
 83 | ```{r}
 84 | ggplot() +
 85 |   geom_point(data = kidney_stones,
 86 |              mapping = aes(x = treatment, y = p,
 87 |                            color = size, size = n)) +
 88 |   geom_line(data = kidney_stones,
 89 |              mapping = aes(x = treatment, y = p, color = size)) +
 90 |   geom_point(data = kidney_stones_total,
 91 |               mapping = aes(x = treatment, y = p, size = n)) +
 92 |   geom_line(data = kidney_stones_total,
 93 |             mapping = aes(x = treatment, y = p)) +
 94 |   scale_x_continuous("Treatment",
 95 |                      breaks = c(0, 1), labels = c("A", "B")) +
 96 |   scale_y_continuous("% Success") +
 97 |   scale_color_discrete("Kidney Stone Size") +
 98 |   scale_size_continuous("Number in Group")
 99 | 
100 | ```
101 | 
102 | ### Blood Pressure Drug
103 | 
104 | Example from Pearl et al. (p. 4) original from Simpson (1951).
105 | 
106 | Consider data on 700 sick patients given the opportunity to try a new drug, of which 350 *chose* to take the new drug.
107 | The number of patients in each gender ("Male", "Female") and the number recovered were recorded.
108 | 
109 | ```{r}
110 | tribble(
111 |   ~ gender, ~ drug, ~ recovered, ~ n,
112 |   "Men", 1, 81, 87,
113 |   "Men", 0, 234, 270,
114 |   "Women", 1, 192, 263,
115 |   "Women", 0, 289, 350
116 | )
117 | ```
118 | 
119 | 1.  What is the overall recovery rate ?
120 | 1.  What is the recovery rate within each gender?
121 | 
122 | ### Berkeley Admissions
123 | 
124 | This is an example commonly used to illustrate Simpson's Paradox, first appearing in
125 | 
126 | >
127 | 
128 | It concerns the admission rate of women to graduate University of
129 | Across all graduate departments, the admissions rate of women applicants was less than male applicants.
130 | However, within all departments considered, the admissions rate for women was higher than that of males.
131 | The reversal of association was due to women applying to more selective programs which had lower overall admissions rates.
132 | 
133 | The dataset is in the recommended R package `r rpkg("datasets")` as `r rdoc("datasets::UCBAdmissions")`.
134 | 
135 | These links provide good visualizations of the data:
136 | 
137 | -   [Simpson's Paradox](http://vudlab.com/simpsons/)
138 | -   <https://www.ft.com/content/94e3acec-a767-11e7-ab55-27219df83c97>
139 | 
140 | ### Teacher Salary and Test Scores
141 | 
142 | Simpson's Paradox can occur with continuous data. This example is from:
143 | 
144 | > Deborah Lynn Guber, "Getting what you pay for: the debate over equity in public school expenditures" (1999), *Journal of Statistics Education*
145 | 
146 | This example concerns school expenditures and test scores.
147 | The proportion of school expenditure and SAT test scores.
148 | 
149 | The data is included in the package `r rpkg("mosaicData")` as the dataset `r rdoc("mosaicData::SAT")`:
150 | 
151 | ```{r}
152 | data("SAT", package = "mosaicData")
153 | ```
154 | 
155 | For the fifty US states, there is a negative relationship between school expenditures and SAT test scores.
156 | 
157 | ```{r}
158 | ggplot(SAT, aes(label = state, x = salary, y = sat)) +
159 |   geom_text() +
160 |   geom_smooth(method = "lm", se = FALSE)
161 | ```
162 | 
163 | However, when the states are categorized by the fraction of students taking the SAT, there is a positive or negligible association between school expenditure and SAT expenditure within each subgroup:
164 | 
165 | ```{r}
166 | mutate(SAT,
167 |        frac_cat = cut_number(frac, 3)) %>%
168 |   ggplot(aes(label = state, x = salary, y = sat, color = frac_cat)) +
169 |   geom_text() +
170 |   geom_smooth(method = "lm", se = FALSE)
171 | ```
172 | 
173 | ### Other Examples
174 | 
175 | -   U.S. Education and Income. Norris, Floyd. "[Can Every Group Be Worse Than Average? Yes.](https://economix.blogs.nytimes.com/2013/05/01/can-every-group-be-worse-than-average-yes/)". *New York*. May 1, 2013.
176 | -   Armstrong, Zan, and Martin Mattenberg. 2014. "[Visualizing Statistical Mix Effects and Simpson’s Paradox](https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/42901.pdf)"
177 | -   Horton, Bob. [Fun with Simpson's Paradox: Simulating Confounders](http://blog.revolutionanalytics.com/2015/11/fun-with-simpsons-paradox-simulating-confounders.html) November 17, 2015.
178 | -   Burn-Murdoch, John. "[Germany’s election and the trouble with correlation](https://www.ft.com/content/94e3acec-a767-11e7-ab55-27219df83c97)" *Financial Times*. October 2, 2017.
179 | -   Income and Party Affiliation in the United States. DOI 10.1561/100.00006026
180 | 
181 | ## Casual Inference and Simpson's Paradox
182 | 
183 | The Simpson's Paradox does not directly have anything to do with causal inference, which should be obvious given the clearly non-causal baseball example.
184 | 
185 | Simpson's example is a special case of *omitted variable bias* (discussed in more detail later).
186 | It does illustrate how associations between variables can be much different when looking at subpopulations (or controlling for a variable).
187 | 
188 | It does suggest a major difficulty in conducting causal inference with observational data.  However, whether those variables need to be controlled for or not is an *extra-statistical* question and cannot be revealed by the associations alone.
189 | 
190 | In the new drug example, women are more likely to take the drug *and* less likely to recover. In other words, gender is a common cause of taking the drug and recovery rate. (this example is from Pearl, Glymour, and Jewell).
191 | To assess how effective the drug is, we need to compare group
192 | 
193 | However, consider blood pressure observed after the administration of the drug.
194 | Reducing blood pressure may be a mechanism by which the drug affects recovery rate.
195 | Comparing patients the recovery rate of patients with the same blood pressure level *after* taking the drug does not make sense.
196 | 


--------------------------------------------------------------------------------
/word-processing.Rmd:
--------------------------------------------------------------------------------
 1 | # Typesetting and Word Processing Programs
 2 | 
 3 | ## LaTeX
 4 | 
 5 | [LaTeX](https://en.wikipedia.org/wiki/LaTeX) is a document markup language (think something like HTML) that is widely used in academia.[^pronunciation]
 6 | Its primary advantages over Word (and word processors) are the separation of content and presentation and its formatting of mathematical equations.
 7 | In addition to papers, it is often used for academic slides; many talk slides are prepared with beamer.
 8 | 
 9 | ### Learning LaTeX
10 | 
11 | Here are some links to get started learning LaTeX:
12 | 
13 | -   [Overleaf Free & Interactive Online Introduction to LaTeX](https://www.overleaf.com/latex/learn/free-online-introduction-to-latex-part-1)
14 | -   [LaTeX Tutorial](https://www.latex-tutorial.com/tutorials/) has interactive lessons
15 | -   [ShareLaTeX Documentation](https://www.sharelatex.com/learn/)
16 | -   [Overleaf Example Templates](https://www.overleaf.com/latex/templates/) has many different examples of LaTeX documents.
17 | -   [LaTeX Wikibook](https://en.wikibooks.org/wiki/LaTeX)
18 | -   [Not So Short Introduction to LaTeX](https://tobi.oetiker.ch/lshort/lshort.pdf) is a classic, but not as as new-user friendly as the others.
19 | 
20 | ### Using LaTeX
21 | 
22 | -   Use an online service such as [Overleaf](https://www.overleaf.com/) or [ShareLaTeX](https://www.sharelatex.com/). These are great for collaboration, but become inflexible
23 |     when you want to customize your workflow.
24 | 
25 | -   Write it with a specialized editor such as [TeXmaker](http://www.xm1math.net/texmaker/), [TeXStudio](http://www.texstudio.org/), or [TeXshop](http://pages.uoregon.edu/koch/texshop/). These generally have
26 |     built ways to insert text, and also live preview. I would stay away from editors such as [LyX](https://www.lyx.org/) that are [WYSIWYG](https://en.wikipedia.org/wiki/WYSIWYG).
27 | 
28 | -   Write it with an general purpose editor such as [Atom](https://atom.io/) or [Sublime Text](https://www.sublimetext.com/).[^1] Most editors have a plugin
29 |    to make writing LaTeX easier. For Atom there is [LaTeXTools](https://atom.io/packages/latextools), and for Sublime Text, [LaTeXTools](https://github.com/SublimeText/LaTeXTools)
30 | 
31 | [^1]: And of course [Vim](http://www.vim.org/) or [Emacs](https://www.gnu.org/software/emacs/).
32 | 
33 | ### LaTeX with R
34 | 
35 | This is pretty easy. Rnw, also called Sweave, documents allow you to mix R chunks with LaTeX.
36 | This is similar to R markdown, but with LaTeX instead of markdown.[^2]
37 | 
38 | Many packages, such as [xtable](https://cran.r-project.org/package=xtable), [stargazer](ttps://cran.r-project.org/package=stargazer), or [texreg](ttps://cran.r-project.org/package=texreg)  produce formatted output in LaTeX.
39 | When you use these programs, do not copy and paste the output. Instead, save it to a file,
40 | and use `\input{}` to include the contents in your document.
41 | 
42 | [^2]: And [Sweave](https://www.statistik.lmu.de/~leisch/Sweave/) files preceded R markdown and knitr by many years.
43 | 
44 | ## Word
45 | 
46 | While I use LaTeX in my own work, Microsoft Word is powerful piece of software,
47 | and many of the complaints against Word come down to not being aware of its
48 | features. There are many tools you can use to build your research paper;
49 | whatever tool you use, learn how to use it proficiently.
50 | 
51 | ### General Advice
52 | 
53 | This guide on using [Microsoft Word for Dissertations](http://guides.lib.umich.edu/c.php?g=283073&p=1886001)
54 | covers everything and more that I would have. Also see [this](http://www3.nd.edu/~shill2/dtclass/word_2013_word_for_research_projects.pdf)
55 | 
56 | -   [separate presentation and content](https://en.wikipedia.org/wiki/Separation_of_presentation_and_content) using styles
57 | 
58 | -   Automatically number figures and tables
59 | 
60 | -   Use a reference manager like [Mendeley](https://www.mendeley.com/), [Zotero](https://www.zotero.org/), [colwiz](https://www.colwiz.com/app), or [Papers](http://www.papersapp.com/). They have plugins for citations in Word.
61 | 
62 | -   When exporting figures for Word, if you must use a [raster graphic](https://en.wikipedia.org/wiki/Raster_graphics) use PNG files (not JPEG). For publication, use a high DPI (600) with PNG graphics.
63 | 
64 | -   Learn to use *Fields*. You can insert figures from files that you can
65 |     update using `Insert > Field > Links and References > IncludePicture`.
66 |     This is useful for programmatically generating figures to insert into
67 |     your document. Likewise, you can insert text from files that you can
68 |     update using `Insert > Field > Links and References > IncludeText`.
69 | 
70 | ### Using R with Word
71 | 
72 | For a dynamic reports you can use [R Markdown](http://rmarkdown.rstudio.com/word_document_format.html) and export to a word document. When doing this, use a reference document to set the the styles that you will use.
73 | See [Happy collaboration with Rmd to docx](http://rmarkdown.rstudio.com/articles_docx.html) for more advice on using R Markdown with Word.
74 | 
75 | When using functions from packages such as [xtable](https://cran.r-project.org/package=xtable), [stargazer](ttps://cran.r-project.org/package=stargazer), or [texreg](ttps://cran.r-project.org/package=texreg) output  HTML, which can be copy and pasted into word.
76 | 
77 | Finally, the [ReporteR](http://davidgohel.github.io/ReporteRs/word.html) package is an alternative method to generate Word Documents from R.
78 | 
79 | [^pronunciation]: TeX is pronounced as "teck" because the X is a Greek chi. The pronunciation of of LaTeX is thus lah-teck or lay-teck. It is not
80 |     pronounced like the rubber compound. See this [StackExchange](http://tex.stackexchange.com/questions/17502/what-is-the-correct-pronunciation-of-tex-and-latex) question on the pronunciation of LaTeX.
81 | 


--------------------------------------------------------------------------------
/writing.Rmd:
--------------------------------------------------------------------------------
 1 | # Writing Resources
 2 | 
 3 | ## Writing and Organizing Papers
 4 | 
 5 | -   Chris Adolph. [Writing Empirical Papers: 6 Rules & 12 Recommendations](http://faculty.washington.edu/cadolph/503/papers.pdf)
 6 | 
 7 | -   Barry R. Weingast. 2015. [CalTech Rules for Writing Papers: How to Structure Your Paper and Write an Introduction](https://web.stanford.edu/group/mcnollgast/cgi-bin/wordpress/wp-content/uploads/2013/10/CALTECH.RUL_..pdf)
 8 | 
 9 | -   [The Science of Scientific Writing](http://www.americanscientist.org/issues/id.877,y.0,no.,content.true,page.1,css.print/issue.aspx) *American Scientist*
10 | 
11 | -   Deidre McCloskey. [Economical Writing](http://www.amazon.com/Economical-Writing-Deirdre-McCloskey/dp/1577660633/)
12 | 
13 | -   William Thompson. [A Guide for the Young Economist](http://www.amazon.com/Guide-Young-Economist-MIT-Press/dp/026251589X). "Chapter 2: Writing Papers."
14 | 
15 | -   Stephen Van Evera. [Guide to Methods for Students of Political Science](http://www.amazon.com/Guide-Methods-Students-Political-Science/dp/080148457X). Appendix.
16 | 
17 | -   Joseph M. Williams and Joseph Bizup. [Style: Lessons in Clarity and Grace](http://www.amazon.com/dp/0321898680/)
18 | 
19 | -   Strunk and White. *The Elements of Style*
20 | 
21 | -   [Chicago Manual of Style](http://www.chicagomanualofstyle.org/) and [APSA Style Manual for Political Science](http://www.apsanet.org/Portals/54/APSA%20Files/publications/APSAStyleManual2006.pdf) for editorial and style issues.
22 | 
23 | -   [How to construct a Nature summary paragraph](http://www.nature.com/nature/authors/gta/Letter_bold_para.doc). Though specific to *Nature*, it provides good advice for structuring abstracts or introductions.
24 | 
25 | -   Ezra Klein. [How researchers are terrible communications, and how they can do better](http://chrisblattman.com/2015/11/05/ezra-klein-how-researchers-are-terrible-communicators-and-how-they-can-do-better/).
26 | 
27 | -   The advice in the *AJPS* [Instructions for Submitting Authors](http://ajps.org/guidelines-for-manuscripts/) is a concise description of how to write an abstract:
28 | 
29 |     > The abstract should provide a very concise descriptive summary of the research stream to which the manuscript contributes, the specific research
30 |     > topic it addresses, the research strategy employed for the analysis, the results obtained from the analysis, and the implications of the findings.
31 | 
32 | -   [Concrete Advice for Writing Informative Abstracts](http://connection.sagepub.com/blog/sage-connection/2014/05/15/concrete-advice-for-writing-informative-abstracts/) and [How to Carefully Choose Useless Titles for Academic Writing](http://www.socialsciencespace.com/2014/03/how-to-carefully-choose-useless-titles-for-academic-writing/)
33 | 
34 | ## Finding Research Ideas
35 | 
36 | -   Paul Krugman [How I Work](http://web.mit.edu/krugman/www/howiwork.html)
37 | -   Hal Varian. [How to build an Economic Model in your spare time](http://people.ischool.berkeley.edu/~hal/Papers/how.pdf)
38 | -   Greg Mankiw, [My Rules of Thumb](http://faculty.som.yale.edu/jameschoi/mankiw_tips.pdf):
39 | -   The links in [Advice for Grad Students](http://gregmankiw.blogspot.com/2006/05/advice-for-grad-students.html)
40 | 
41 | ## Replications
42 | 
43 | Gary King has advice on how to turn a replication into a publishable paper:
44 | 
45 | -   Gary King [How to Write a Publishable Paper as a Class Project](http://gking.harvard.edu/papers)
46 | 
47 | -   Gary King. 2006. "[Publication, Publication.](http://gking.harvard.edu/files/abs/paperspub-abs.shtml)" *PS: Political Science and Politics*.
48 | 
49 | -   [Political Science Should Not Stop Young Researchers from Replicating](https://politicalsciencereplication.wordpress.com/2015/06/15/political-science-should-not-stop-young-researchers-from-replicating/)
50 |     from the [Political Science Replication](https://politicalsciencereplication.wordpress.com) blog.
51 | 
52 | And see the examples of students replications from his Harvard course at <https://politicalsciencereplication.wordpress.com/>.
53 | 
54 | Famous replications.
55 | 
56 | -   "Irregularities in LaCour (2014) [@BroockmanKallaAronow2015a]
57 | -   "Does High Public Debt Consistently Stifle Economic Growth? A Critique of Reinhart and Rogoff." [@HerndonAshPollin2013a]
58 | 
59 | However, although those replications are famous for finding fraud or obvious
60 | errors in the analysis, replications can lead to extensions and generate new
61 | ideas. This was the intent of @BroockmanKallaAronow2015a when starting the
62 | replication.
63 | 


--------------------------------------------------------------------------------