├── .gitignore
├── COPYING
├── Makefile
├── README.md
├── figs
    ├── Makefile
    ├── distributions.R
    └── relationships.pdf
├── literature.bib
├── probstat.tex
└── stat-cookbook.tex


/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | *.fdb_latexmk
 3 | *.aux
 4 | *.bbl
 5 | *.blg
 6 | *.brf
 7 | *.fls
 8 | *.log
 9 | *.out
10 | *.sw?
11 | *.toc
12 | *.gz
13 | figs/*
14 | !figs/relationships.pdf
15 | stat-cookbook.pdf
16 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
1 | This work is licensed under the Creative Commons
2 | Attribution-NonCommercial-ShareAlike 4.0 International License. To view a copy
3 | of this license, visit http://creativecommons.org/licenses/by-nc-sa/4.0/ or
4 | send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
5 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | DOC  := stat-cookbook.tex
 2 | 
 3 | RERUN := "(undefined references|Rerun to get (cross-references|the bars|point totals) right|Table widths have changed. Rerun LaTeX.|Linenumber reference failed)"
 4 | RERUNBIB := "No file.*\.bbl|Citation.*undefined"
 5 | 
 6 | all: figs doc
 7 | 
 8 | figs:
 9 | 	@$(MAKE) -C $@
10 | 
11 | doc: $(DOC:.tex=.pdf)
12 | 
13 | %.pdf: %.tex
14 | 	pdflatex $<
15 | 	@egrep -q $(RERUNBIB) $*.log && bibtex $* && pdflatex $<; true
16 | 	@egrep -q $(RERUN) $*.log && pdflatex $<; true
17 | 	@egrep -q $(RERUN) $*.log && pdflatex $<; true
18 | 
19 | latexmk:
20 | 	-latexmk -pvc -pdf $(DOC)
21 | 
22 | purge:
23 | 	-rm -f *.{aux,dvi,log,bbl,blg,brf,fls,toc,thm,out,fdb_latexmk}
24 | 
25 | clean: purge
26 | 	$(MAKE) -C figs $@
27 | 	-rm -f $(DOC:.tex=.pdf)
28 | 
29 | .PHONY: all figs purge clean
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | The [probability and statistics cookbook][stat-cookbook] contains a succinct
 2 | representation of various topics in probability theory and statistics. It
 3 | provides a comprehensive mathematical reference reduced to its essence, rather
 4 | than aiming for elaborate explanations.
 5 | 
 6 | Feel encouraged to extend the cookbook by forking it and submitting pull
 7 | requests.
 8 | 
 9 | Build Setup
10 | -----------
11 | 
12 | You can build the cookbook locally via:
13 | 
14 |     make
15 | 
16 | This first generates the distribution plots via R and then compiles the LaTeX source.
17 | You may have to install a few missing packages via CRAN.
18 | 
19 | License
20 | -------
21 | 
22 | This work is licensed under a [Attribution-NonCommercial-ShareAlike 4.0
23 | International License][by-nc-sa].
24 | 
25 | [![Creative Commons License][by-nc-sa-img]][by-nc-sa]
26 | 
27 | [stat-cookbook]: http://statistics.zone
28 | [by-nc-sa]: http://creativecommons.org/licenses/by-nc-sa/4.0/
29 | [by-nc-sa-img]: http://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png
30 | 


--------------------------------------------------------------------------------
/figs/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all clean
 2 | 
 3 | # We'll take the first plot as indicator for all plots until the R script
 4 | # allows for more fine-grained plot generation.
 5 | all: beta-cdf.pdf
 6 | 
 7 | %.pdf:
 8 | 	R --vanilla < distributions.R
 9 | 
10 | clean:
11 | 	-rm -f *-*.pdf
12 | 


--------------------------------------------------------------------------------
/figs/distributions.R:
--------------------------------------------------------------------------------
  1 | # Install needed packages if necessary
  2 | needed_packages = c("ggplot2", "reshape2", "grid", "RColorBrewer", "VGAM")
  3 | if (length(setdiff(needed_packages, rownames(installed.packages()))) > 0) {
  4 |   install.packages(setdiff(needed_packages, rownames(installed.packages())),
  5 |                    dependencies=TRUE, repos="http://cran.r-project.org")
  6 | }
  7 | 
  8 | library(ggplot2)
  9 | library(reshape2)
 10 | library(grid)
 11 | library(RColorBrewer)
 12 | library(VGAM) # [dp]pareto
 13 | 
 14 | line_width = 1.3
 15 | point_size = 4
 16 | theme_set(theme_bw(base_size=20))
 17 | theme_update(legend.background=element_rect(fill=alpha("white", 0)),
 18 |              legend.key=element_rect(colour="white"),
 19 |              legend.key.width=unit(3, "lines"),
 20 |              plot.margin=unit(rep(0, 4), "lines"))
 21 | 
 22 | # FIXME: is it possible to move this statement into theme_update?
 23 | scale_color_discrete = function(...) scale_color_brewer(..., palette="Dark2")
 24 | 
 25 | 
 26 | # --------------------------------------------------------------------------- #
 27 | 
 28 | make.dist.fn <- function(mode, dist) {
 29 |   if (mode == "cdf")
 30 |     eval(parse(text=paste("p", dist, sep="")))
 31 |   else if (mode == "pdf" || mode == "pmf")
 32 |     eval(parse(text=paste("d", dist, sep="")))
 33 |   else
 34 |     stop("invalid mode: must be 'cdf' or 'pdf/pmf'")
 35 | }
 36 | 
 37 | make.data <- function(mode, dist, theta, xseq) {
 38 |   dist.fn <- make.dist.fn(mode, dist)
 39 |   unary <- function(...) function(x) dist.fn(x, ...)
 40 |   data.fns <- apply(theta, 1, function(x) do.call(unary, as.list(t(x))))
 41 |   values <- data.frame(sapply(data.fns, function(f) f(xseq)))
 42 |   cbind(x=xseq, values)
 43 | }
 44 | 
 45 | plot.dist <- function(xseq, theta, dist, mode, title, lab.fn) {
 46 |   values <- make.data(mode, dist, theta, xseq)
 47 |   molten <- melt(values, 1)
 48 |   labels <- apply(theta, 1, function(x) do.call(lab.fn, as.list(t(x))))
 49 |   p <- ggplot(molten, aes(x=x, y=value, color=variable, linetype=variable)) +
 50 |        ggtitle(title) +
 51 |        ylab(toupper(mode)) +
 52 |        scale_color_discrete(labels=labels) +
 53 |        scale_linetype_discrete(labels=labels)
 54 | 
 55 |   # We position the legend for CDFs bottom-right and for P[MD]Fs top-right.
 56 |   if (mode == "cdf")
 57 |     p <- p + theme(legend.title=element_blank(),
 58 |                    legend.justification=c(1, 0),
 59 |                    legend.position=c(1, 0))
 60 |   else
 61 |     p <- p + theme(legend.title=element_blank(),
 62 |                    legend.justification=c(1, 1),
 63 |                    legend.position=c(1, 1))
 64 |   p
 65 | }
 66 | 
 67 | plot.discrete <- function(from, to, ...) {
 68 |   xseq <- seq(from, to)
 69 |   plot.dist(xseq, ...) +
 70 |     geom_line(size=line_width) +
 71 |     geom_point(size=point_size)
 72 | }
 73 | 
 74 | plot.continuous <- function(from, to, ...) {
 75 |   xseq <- seq(from, to, by=0.01)
 76 |   plot.dist(xseq, ...) +
 77 |     geom_line(size=line_width)
 78 | }
 79 | 
 80 | # --------------------------------------------------------------------------- #
 81 | 
 82 | plot.uniform.cdf.discrete <- function() {
 83 |   xseq <- 3:7
 84 |   x <- melt(as.data.frame(cbind(xseq, ecdf(xseq)(xseq))), 1)
 85 |   ggplot(x, aes(x=xseq, y=value)) +
 86 |     geom_point(size=point_size) +
 87 |     geom_segment(aes(x=xseq, y=value, xend=xseq+1, yend=value)) +
 88 |     geom_segment(aes(x=3.1, y=0.2, xend=4, yend=0.2)) +
 89 |     geom_segment(aes(x=6, y=0.8, xend=6.9, yend=0.8)) +
 90 |     geom_point(aes(x=xseq+1), size=point_size, color="white", shape=19) +
 91 |     geom_point(aes(x=xseq+1), size=point_size, shape=1) +
 92 |     ggtitle("Uniform (discrete)") +
 93 |     labs(x="x", y="CDF") +
 94 |     theme(panel.grid.minor=element_blank()) +
 95 |     scale_x_continuous(name="x", limits=c(3.1, 6.9), breaks=4:6,
 96 |                        labels=c("a", "", "b")) +
 97 |     scale_y_continuous(name="CDF", limits=c(0.2, 0.8),
 98 |                        breaks=c(0.2, 0.4, 0.6, 0.8),
 99 |                        labels=c(0, expression(frac(i, n)),
100 |                                 expression(frac(i, n)), 1.0))
101 | }
102 | 
103 | plot.uniform.cdf.continuous <- function() {
104 |   x <- as.data.frame(rbind(c(0,0,2,0), c(2,0,6,1), c(6,1,8,1)))
105 |   ggplot(x) +
106 |     geom_segment(aes(x=V1, y=V2, xend=V3, yend=V4)) +
107 |     ggtitle("Uniform (continuous)") +
108 |     labs(x="x", y="CDF") +
109 |     theme(panel.grid.minor=element_blank()) +
110 |     scale_x_continuous(breaks=c(2,6), labels=c("a", "b")) +
111 |     scale_y_continuous(limits=0:1, breaks=0:1, labels=0:1)
112 | }
113 | 
114 | plot.uniform.pmf <- function() {
115 |   xseq <- 3:8
116 |   ggplot(data.frame(x0=factor(xseq), x1=xseq, y0=0, y1=0.5)) +
117 |     aes(x=x0, y=y1) +
118 |     geom_point(size=point_size) +
119 | #    geom_segment(aes(x=x1, xend=x1, y=y0, yend=y1), linetype="dashed") +
120 |     labs(title="Uniform (discrete)") +
121 |     theme(panel.grid.minor=element_blank()) +
122 |     scale_x_discrete(name="x",
123 |                      breaks=xseq,
124 |                      limits=1:10,
125 |                      labels=c("a", rep("", length(xseq)-2), "b")) +
126 |     scale_y_continuous(name="PMF",
127 |                        breaks=0.5,
128 |                        limits=0:1,
129 |                        labels=expression(frac(1, n)))
130 | }
131 | 
132 | plot.uniform.pdf <- function() {
133 |   solid <- data.frame(x0=c(1, 3,   8),
134 |                       x1=c(3, 8,  10),
135 |                       y0=c(0, 0.5, 0),
136 |                       y1=c(0, 0.5, 0))
137 |   dashed <- data.frame(x0=c(solid[1,2], solid[3,1]),
138 |                        x1=c(solid[1,2], solid[2,2]),
139 |                        y0=c(solid[1,3], solid[3,3]),
140 |                        y1=c(solid[2,3], solid[2,4]))
141 |   filled <- data.frame(x=c(solid[2,1], solid[3,1]),
142 |                        y=c(solid[2,3], solid[2,3]))
143 |   hollow <- data.frame(x=c(solid[2,1], solid[3,1]),
144 |                        y=c(solid[1,3], solid[3,3]))
145 | 
146 |   ggplot(solid) +
147 |     geom_segment(aes(x=x0, xend=x1, y=y0, yend=y1), size=line_width) +
148 |     geom_segment(data=dashed,
149 |                  aes(x=x0, xend=x1, y=y0, yend=y1),
150 |                  size=line_width,
151 |                  linetype="dashed") +
152 |     geom_point(data=filled, aes(x=x, y=y), size=point_size) +
153 |     geom_point(data=hollow, aes(x=x, y=y), size=point_size, shape=21,
154 |                fill="white") +
155 |     theme(panel.grid.minor=element_blank()) +
156 |     ggtitle("Uniform (continuous)") +
157 |     scale_x_continuous(name="x",
158 |                        breaks=c(solid[1,2], solid[3,1]),
159 |                        limits=c(solid[1,1], solid[3,2]),
160 |                        labels=c("a", "b")) +
161 |     scale_y_continuous(name="PDF",
162 |                        breaks=solid[2,3],
163 |                        limits=0:1,
164 |                        labels=expression(frac(1, b-a)))
165 | }
166 | 
167 | # --------------------------------------------------------------------------- #
168 | 
169 | plot.binomial = function(mode, xmin=1, xmax=40,
170 |                          theta=data.frame(n=c(40, 30, 25), p=c(0.3, 0.6, 0.9)),
171 |                          title="Binomial") {
172 |   lab.fn <- function(x, y) substitute(list(n==i, p==j), list(i=x, j=y))
173 |   plot.discrete(xmin, xmax, theta, "binom", mode, title, lab.fn)
174 | }
175 | 
176 | plot.geometric <- function(mode, xmin=0, xmax=10,
177 |                            theta=data.frame(p=c(0.2, 0.5, 0.8)),
178 |                            title="Geometric") {
179 |   lab.fn <- function(x) substitute(p==i, list(i=x))
180 |   plot.discrete(xmin, xmax, theta, "geom", mode, title, lab.fn)
181 | }
182 | 
183 | plot.poisson <- function(mode, xmin=0, xmax=20,
184 |                          theta=data.frame(lambda=c(1,4,10)),
185 |                          title="Poisson") {
186 |   lab.fn <- function(x) substitute(lambda==i, list(i=x))
187 |   plot.discrete(xmin, xmax, theta, "pois", mode, title, lab.fn)
188 | }
189 | 
190 | # --------------------------------------------------------------------------- #
191 | 
192 | plot.normal <- function(mode, xmin=-5, xmax=5,
193 |                         theta=data.frame(mu=c(0,0,0,-2), s2=c(0.2,1,5,0.5)),
194 |                         title="Normal") {
195 |   lab.fn <- function(x, y) substitute(list(mu==i, sigma^2==j), list(i=x, j=y))
196 |   plot.continuous(xmin, xmax, theta, "norm", mode, title, lab.fn)
197 | }
198 | 
199 | plot.lognormal <- function(mode, xmin=0, xmax=3,
200 |                           theta=data.frame(mu=c(0,2,0,1/2,1/4,1/8),
201 |                               s2=c(3,2,1,1,1,1)), title="Log-Normal") {
202 |   lab.fn <- function(x, y) substitute(list(mu==i, sigma^2==j), list(i=x, j=y))
203 |   p <- plot.continuous(xmin, xmax, theta, "lnorm", mode, title, lab.fn)
204 |   if (mode == "cdf")
205 |     p <- p + theme(legend.justification=c(0, 1), legend.position=c(0, 1))
206 | 
207 |   p
208 | }
209 | 
210 | plot.student <- function(mode, xmin=-5, xmax=5,
211 |                          theta=data.frame(c(1,2,5,Inf)),
212 |                          title=expression(bold("Student\'s") ~ italic(t))) {
213 |   lab.fn <- function(x) {
214 |     if (x == Inf)
215 |       quote(nu==infinity)
216 |     else
217 |       substitute(nu==i, list(i=x))
218 |   }
219 | 
220 |   plot.continuous(xmin, xmax, theta, "t", mode, title, lab.fn)
221 | }
222 | 
223 | plot.chisquare <- function(mode, xmin=0, xmax=8,
224 |                            theta=data.frame(1:5),
225 |                            title=expression(chi^2)) {
226 |   lab.fn <- function(x) substitute(k==i, list(i=x))
227 |   plot.continuous(xmin, xmax, theta, "chisq", mode, title, lab.fn)
228 | }
229 | 
230 | plot.f <- function(mode, xmin=0, xmax=5,
231 |                    theta=data.frame(d1=c(1,2,5,100,100), d2=c(1,1,2,1,100)),
232 |                    title="F") {
233 |   lab.fn <- function(x, y) substitute(list(d[1]==i, d[2]==j), list(i=x, j=y))
234 |   plot.continuous(xmin, xmax, theta, "f", mode, title, lab.fn)
235 | }
236 | 
237 | plot.exp <- function(mode, xmin=0, xmax=5,
238 |                      theta=data.frame(c(2,1,0.4)),
239 |                      title="Exponential") {
240 |   lab.fn <- function(x) substitute(beta==i, list(i=1/x))
241 |   plot.continuous(xmin, xmax, theta, "exp", mode, title, lab.fn)
242 | }
243 | 
244 | plot.gamma <- function(mode, xmin=0, xmax=20,
245 |                        theta=data.frame(a=c(1,2,3,5,9), b=c(0.5,0.5,0.5,1,2)),
246 |                        title="Gamma") {
247 |   lab.fn <- function(x, y) substitute(list(alpha==i, beta==j), list(i=x, j=y))
248 |   plot.continuous(xmin, xmax, theta, "gamma", mode, title, lab.fn)
249 | }
250 | 
251 | dinvgamma <- function(x, shape = 1, rate = 1, scale = 1/rate, log = FALSE) {
252 |   logval <- shape * log(rate) - lgamma(shape) - (shape+1) * log(x) - rate/x
253 |   if (log)
254 |     logval
255 |   else
256 |     exp(logval)
257 | }
258 | 
259 | pinvgamma <- function(q, shape = 1, rate = 1, scale = 1/rate,
260 |                       lower.tail = TRUE, log.p = FALSE) {
261 |   pgamma(1 / q, shape, rate, scale, !lower.tail, log.p)
262 | }
263 | 
264 | plot.invgamma <- function(mode, xmin=0, xmax=5,
265 |                           theta=data.frame(a=c(1,2,3,3), b=c(1,1,1,0.5)),
266 |                           title="Inverse Gamma") {
267 |   lab.fn <- function(x, y) substitute(list(alpha==i, beta==j), list(i=x, j=y))
268 |   plot.continuous(xmin, xmax, theta, "invgamma", mode, title, lab.fn)
269 | }
270 | 
271 | #plot.dirichlet = function()
272 | #{
273 | #  require(MCMCpack)
274 | #
275 | #  a = list(c(6,2,2), c(3,7,5), c(6,2,6), c(2,3,4))
276 | #  seqs = seq(0, 15, by=0.01) # FIXME: choose right input
277 | #  f = function(v) ddirichlet(cbind(seqs, seqs, seqs), v)
278 | #
279 | #  # TODO
280 | #  mapply(f, a, b)
281 | #
282 | #  s = function(k) substitute(list(alpha == i), list(i=a[k]))
283 | #  labs = lapply(1:length(a), s)
284 | #}
285 | 
286 | plot.beta <- function(mode, xmin=0, xmax=1,
287 |                        theta=data.frame(a=c(0.5,5,1,2,2), b=c(0.5,1,3,2,5)),
288 |                        title="Beta") {
289 |   lab.fn <- function(x, y) substitute(list(alpha==i, beta==j), list(i=x, j=y))
290 |   p <- plot.continuous(xmin, xmax, theta, "beta", mode, title, lab.fn)
291 | 
292 |   if (mode == "cdf")
293 |     p <- p + theme(legend.justification=c(0, 1), legend.position=c(0, 1))
294 |   else
295 |     p <- p + theme(legend.justification=c(0.5, 1), legend.position=c(0.5, 1))
296 | 
297 |   p
298 | }
299 | 
300 | plot.weibull <- function(mode, xmin=0, xmax=2.5,
301 |                          theta=data.frame(lambda=c(1,1,1,1), k=c(0.5,1,1.5,5)),
302 |                          title="Weibull") {
303 |   lab.fn <- function(x, y) substitute(list(lambda==i, k==j), list(i=x, j=y))
304 |   plot.continuous(xmin, xmax, theta, "weibull", mode, title, lab.fn)
305 | }
306 | 
307 | plot.pareto <- function(mode, xmin=0.8, xmax=2.5,
308 |                         theta=data.frame(xm=c(1,1,1), a=c(1,2,4)),
309 |                         title="Pareto") {
310 |   lab.fn <- function(x, y) substitute(list(x[m]==i, k==j), list(i=x, j=y))
311 |   plot.continuous(xmin, xmax, theta, "pareto", mode, title, lab.fn)
312 | }
313 | 
314 | # --------------------------------------------------------------------------- #
315 | 
316 | store <- function(name, p) {
317 |   ggsave(paste(name, "pdf", sep="."), p)
318 | }
319 | 
320 | store("uniform-pmf", plot.uniform.pmf())
321 | store("uniform-pdf", plot.uniform.pdf())
322 | store("uniform-cdf-discrete", plot.uniform.cdf.discrete())
323 | store("uniform-cdf-continuous", plot.uniform.cdf.continuous())
324 | 
325 | store("binomial-pmf", plot.binomial("pmf"))
326 | store("binomial-cdf", plot.binomial("cdf"))
327 | store("geometric-pmf", plot.geometric("pmf"))
328 | store("geometric-cdf", plot.geometric("cdf"))
329 | store("poisson-pmf", plot.poisson("pmf"))
330 | store("poisson-cdf", plot.poisson("cdf"))
331 | 
332 | store("normal-pdf", plot.normal("pdf"))
333 | store("normal-cdf", plot.normal("cdf"))
334 | store("lognormal-pdf", plot.lognormal("pdf") + ylim(0,1))
335 | store("lognormal-cdf", plot.lognormal("cdf"))
336 | store("student-pdf", plot.student("pdf"))
337 | store("student-cdf", plot.student("cdf"))
338 | store("chisquare-pdf", plot.chisquare("pdf") + ylim(0,1))
339 | store("chisquare-cdf", plot.chisquare("cdf"))
340 | store("f-pdf", plot.f("pdf"))
341 | store("f-cdf", plot.f("cdf"))
342 | store("exponential-pdf", plot.exp("pdf"))
343 | store("exponential-cdf", plot.exp("cdf"))
344 | store("gamma-pdf", plot.gamma("pdf"))
345 | store("gamma-cdf", plot.gamma("cdf"))
346 | store("invgamma-pdf", plot.invgamma("pdf"))
347 | store("invgamma-cdf", plot.invgamma("cdf"))
348 | store("beta-pdf", plot.beta("pdf"))
349 | store("beta-cdf", plot.beta("cdf"))
350 | store("weibull-pdf", plot.weibull("pdf"))
351 | store("weibull-cdf", plot.weibull("cdf"))
352 | store("pareto-pdf", plot.pareto("pdf"))
353 | store("pareto-cdf", plot.pareto("cdf"))
354 | 


--------------------------------------------------------------------------------
/figs/relationships.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mavam/stat-cookbook/09bd804a5251911088d55f8ce787a991b8b4ba15/figs/relationships.pdf


--------------------------------------------------------------------------------
/literature.bib:
--------------------------------------------------------------------------------
 1 | @Article{Leemis08,
 2 |   author = {Lawrence M. Leemis and Jacquelyn T. McQueston},
 3 |   title = {{Univariate Distribution Relationships}},
 4 |   journal = {The American Statistician},
 5 |   year = 2008,
 6 |   volume = {62},
 7 |   pages = {45--53},
 8 |   number = {1},
 9 |   timestamp = {2008.10.16}
10 | }
11 | 
12 | @Book{Hoel72,
13 |   author = {Paul G. Hoel and Sidney C. Port and Charles J. Stone},
14 |   title = {{Introduction to Probability Theory}},
15 |   publisher = {Brooks Cole},
16 |   year = 1972,
17 |   isbn = {978-0395046364}
18 | }
19 | 
20 | @Book{Shumway06,
21 |   author = {Robert H. Shumway and David S. Stoffer},
22 |   title = {{Time Series Analysis and Its Applications With R Examples}},
23 |   publisher = {Springer},
24 |   isbn = {978-0-387-29317-2},
25 |   year = 2006
26 | }
27 | 
28 | @Book{Wasserman03,
29 |   author = {Larry Wasserman},
30 |   title = {{All of Statistics: A Concise Course in Statistical Inference}},
31 |   publisher = {Springer},
32 |   year = 2003
33 | }
34 | 
35 | @Book{Steger01,
36 |   author = {Angelika Steger},
37 |   title = {{Diskrete Strukturen 
38 |             -- Band 1: Kombinatorik, Graphentheorie, Algebra}},
39 |   publisher = {Springer},
40 |   year = 2001
41 | }
42 | 
43 | @Book{Steger02,
44 |   author = {Angelika Steger},
45 |   title = {{Diskrete Strukturen 
46 |             -- Band 2: Wahrscheinlichkeitstheorie und Statistik}},
47 |   publisher = {Springer},
48 |   year = 2002
49 | }
50 | 


--------------------------------------------------------------------------------
/probstat.tex:
--------------------------------------------------------------------------------
  1 | \usepackage{amsmath,amssymb}
  2 | \usepackage{dsfont}
  3 | \usepackage{cancel}
  4 | \usepackage{graphicx}
  5 | \usepackage{xargs}
  6 | \usepackage{xspace}
  7 | 
  8 | % =============================================================================
  9 | %                                  Formatting
 10 | % =============================================================================
 11 | 
 12 | % Make a note on the margin.
 13 | \newcommand{\marnote}[1]{
 14 |   \reversemarginpar
 15 |   \marginpar[\raggedleft\footnotesize\textit{\\[3ex]#1}]%
 16 |       {\raggedright\footnotesize\textit{\\[3ex]#1}}
 17 |   \normalmarginpar
 18 | }
 19 | 
 20 | \newcommand{\pwiseii}[1]{\ensuremath{\left\{\begin{array}{ll}#1\end{array}}}
 21 | \newcommand{\pwiseiii}[1]{\ensuremath{\left\{\begin{array}{ll}#1\end{array}}}
 22 | \newcommand{\prn}[1]{\ensuremath{\left(#1\right)}}
 23 | \newcommand{\brk}[1]{\ensuremath{\left[#1\right]}}
 24 | \newcommand{\brc}[1]{\ensuremath{\left\{#1\right\}}}
 25 | \newcommand{\x}[1]{\ensuremath{\cancel{#1}}}
 26 | 
 27 | % =============================================================================
 28 | %                                 General Math
 29 | % =============================================================================
 30 | 
 31 | % Special functions and operators
 32 | \DeclareMathOperator{\erf}{erf}
 33 | \DeclareMathOperator{\logit}{logit}
 34 | \DeclareMathOperator{\sign}{sign}
 35 | \DeclareMathOperator*{\argmin}{\arg\!\min}
 36 | 
 37 | % Definitions
 38 | \def\define{:=}
 39 | \def\defined{=:}
 40 | \def\eqdef{\triangleq}
 41 | 
 42 | % Proofs
 43 | \def\qed{\ifhmode\unskip\nobreak\fi\hfill \ensuremath{\square}}
 44 | 
 45 | % Standard transformation function
 46 | \def\transform{\ensuremath{\varphi}\xspace}
 47 | 
 48 | % Logic
 49 | \newcommand{\comp}[1]{\neg{#1}}
 50 | \newcommand{\imp}{\ensuremath{\;\Longrightarrow\;}}
 51 | \newcommand{\pmi}{\ensuremath{\;\Longleftarrow\;}}
 52 | \newcommand{\nimp}{\ensuremath{\;\not\!\!\Longrightarrow\;}}
 53 | \newcommand{\npmi}{\ensuremath{\;\not\!\!\Longleftarrow\;}}
 54 | \newcommand{\eqv}{\ensuremath{\;\Longleftrightarrow\;}}
 55 | 
 56 | % Numbers.
 57 | \def\C{\mathbb{C}}
 58 | \def\N{\mathbb{N}}
 59 | \def\R{\mathbb{R}}
 60 | \def\Z{\mathbb{Z}}
 61 | 
 62 | % Matrices
 63 | \newcommand{\eyeii}{\ensuremath{\left(\begin{matrix}1 & 0 \\ 0 & 1\end{matrix}\right)}}
 64 | \newcommand{\eyeiii}{\ensuremath{\left(\begin{matrix}1 & 0 & 0 \\ 0 & 1 & 0 \\ 0 & 0 & 1\end{matrix}\right)}}
 65 | 
 66 | % Limits
 67 | \newcommand{\Lim}[2]{\ensuremath{\lim_{#1\to #2}}}
 68 | \newcommand{\limx}[1][\infty]{\ensuremath{\lim_{x\to #1}}}
 69 | \newcommand{\limn}[1][\infty]{\ensuremath{\lim_{n\to #1}}}
 70 | 
 71 | % Sums and products
 72 | \newcommand{\Sum}[2][i=1]{\ensuremath{\sum_{#1}^{#2}}}
 73 | \newcommand{\sumin}{\ensuremath{\sum_{i=1}^n}}
 74 | \newcommand{\sumiN}{\ensuremath{\sum_{i=1}^N}}
 75 | \newcommand{\sumim}{\ensuremath{\sum_{i=1}^m}}
 76 | \newcommand{\sumjk}{\ensuremath{\sum_{j=1}^k}}
 77 | \newcommand{\sumjn}{\ensuremath{\sum_{j=1}^n}}
 78 | \newcommand{\sumjm}{\ensuremath{\sum_{j=1}^m}}
 79 | \newcommand{\isum}[1][n]{\ensuremath{\sum_{#1}^\infty}}
 80 | \newcommand{\dsum}[4][i=1]{\ensuremath{\sum_{#1}^{#2}\sum_{#3}^{#4}}}
 81 | \newcommand{\Prod}[2][i=1]{\ensuremath{\prod_{#1}^{#2}}}
 82 | \newcommand{\prodin}{\ensuremath{\prod_{i=1}^n}}
 83 | \newcommand{\prodjn}{\ensuremath{\prod_{j=1}^n}}
 84 | 
 85 | % Derivatives
 86 | \newcommand{\der}[2][]{\ensuremath{\frac{d #1}{d #2}}}
 87 | \newcommand{\dder}[2][]{\ensuremath{\frac{d^2 #1}{d #2^2}}}
 88 | \newcommand{\pder}[2][]{\ensuremath{\frac{\partial #1}{\partial #2}}}
 89 | \newcommand{\pdder}[2][]{\ensuremath{\frac{\partial^2 #1}{\partial #2^2}}}
 90 | \newcommand{\mpder}[3][]{%
 91 |   \ensuremath{\frac{\partial^2 #1}{\partial #2 \partial #3}}}
 92 | 
 93 | % Differentials
 94 | %\renewcommand{\d}[1]{\,\mathrm{d}#1}
 95 | \renewcommand{\d}[1]{\,d#1}
 96 | \def\ds{\d{s}}
 97 | \def\dt{\d{t}}
 98 | \def\dtheta{\d{\theta}}
 99 | \def\du{\d{u}}
100 | \def\dx{\d{x}}
101 | \def\dy{\d{y}}
102 | \def\dfx{\d{F_X(x)}}
103 | \def\dfy{\d{F_Y(y)}}
104 | \def\dfhatx{\d{\widehat{F}_n(x)}}
105 | 
106 | % Transcendentals w/ extended arguments.
107 | \newcommand{\Exp}[1]{\ensuremath{\exp\left\{#1\right\}}}
108 | \newcommand{\Log}[1]{\ensuremath{\log\left\{#1\right\}}}
109 | 
110 | % =============================================================================
111 | %                          Probability and Statistics
112 | % =============================================================================
113 | 
114 | % Formatted terminology.
115 | \def\bias{\textsf{bias}\xspace}
116 | \def\se{\textsf{se}\xspace}
117 | \def\pdf{\textsc{pdf}\xspace}
118 | \def\cdf{\textsc{cdf}\xspace}
119 | \def\ise{\textsc{ise}\xspace}
120 | \def\pgf{\textsc{pgf}\xspace}
121 | \def\mgf{\textsc{mgf}\xspace}
122 | \def\mse{\textsc{mse}\xspace}
123 | \def\mspe{\textsc{mspe}\xspace}
124 | \def\mle{\textsc{mle}\xspace}
125 | \def\mom{\textsc{mom}\xspace}
126 | \def\are{\textsc{are}\xspace}
127 | \def\rss{\textsc{rss}\xspace}
128 | \def\ess{\textsc{ess}\xspace}
129 | \def\tss{\textsc{tss}\xspace}
130 | 
131 | % Naming shortcuts.
132 | \def\ahat{\ensuremath{\widehat{\alpha}}}
133 | \def\atil{\ensuremath{\tilde{\alpha}}}
134 | \def\bhat{\ensuremath{\widehat{\beta}}}
135 | \def\btil{\ensuremath{\tilde{\beta}}}
136 | \def\dhat{\ensuremath{\widehat{\delta}}}
137 | \def\ehat{\ensuremath{\hat{\epsilon}}}
138 | \def\ghat{\ensuremath{\widehat{\gamma}}}
139 | \def\khat{\ensuremath{\widehat{\kappa}}}
140 | \def\lhat{\ensuremath{\widehat{\lambda}}}
141 | \def\ltil{\ensuremath{\tilde{\lambda}}}
142 | \def\mhat{\ensuremath{\widehat{\mu}}}
143 | \def\nhat{\ensuremath{\widehat{\nu}}}
144 | \def\mtil{\ensuremath{\tilde{\mu}}}
145 | \def\psihat{\ensuremath{\widehat{\psi}}}
146 | \def\shat{\ensuremath{\widehat{\sigma}}}
147 | \def\stil{\ensuremath{\tilde{\sigma}}}
148 | \def\that{\ensuremath{\widehat{\theta}}}
149 | \def\ttil{\ensuremath{\widetilde{\theta}}}
150 | \def\rhohat{\widehat{\rho}}
151 | \def\xihat{\widehat{\xi}}
152 | 
153 | \def\sehat{\ensuremath{\widehat{\se}}}
154 | \def\fhat{\ensuremath{\widehat{f}}}
155 | \def\Fhat{\ensuremath{\widehat{F}}}
156 | \def\fnhat{\ensuremath{\widehat{f}_n}}
157 | \def\Fnhat{\ensuremath{\widehat{F}_n}}
158 | \def\Jhat{\ensuremath{\widehat{J}}}
159 | \def\phat{\ensuremath{\widehat{p}}}
160 | \def\ptil{\ensuremath{\tilde{p}}}
161 | \def\rhat{\widehat{r}}
162 | \def\Rbar{\bar{R}}
163 | \def\Rhat{\widehat{R}}
164 | \def\Qbar{\bar{Q}}
165 | \def\Qhat{\widehat{Q}}
166 | \def\Xhat{\widehat{X}}
167 | \def\xbar{\bar{x}}
168 | \def\Xbar{\bar{X}}
169 | \def\Xsqbar{\overline{X^2}}
170 | \def\xnbar{\overline{x}_n}
171 | \def\Xnbar{\overline{X}_n}
172 | \def\Yhat{\widehat{Y}}
173 | \def\ybar{\overline{y}}
174 | \def\Ybar{\overline{Y}}
175 | \def\Ynbar{\overline{Y}_n}
176 | 
177 | % Random variables.
178 | \def\rv{\textsc{rv}\xspace}
179 | \def\iid{\ensuremath{\textsc{iid}}\xspace}
180 | \def\dist{\ensuremath{\sim}\xspace}
181 | \def\disteq{\ensuremath{\stackrel{D}{=}}\xspace}
182 | \def\distiid{\ensuremath{\stackrel{iid}{\sim}}\xspace}
183 | \def\ind{\ensuremath{\perp\!\!\!\perp}\xspace}
184 | \def\nind{\ensuremath{\perp\!\!\!\!\big\vert\!\!\!\!\perp}\xspace}
185 | \def\Xon{\ensuremath{X_1,\dots,X_n}\xspace}
186 | \def\xon{\ensuremath{x_1,\dots,x_n}\xspace}
187 | \def\giv{\ensuremath{\,|\,}}
188 | \def\Giv{\ensuremath{\,\big|\,}}
189 | \def\GIV{\ensuremath{\,\Big|\,}}
190 | \newcommand{\indicator}[1]{\mathds{1}_{\left\{#1\right\}}}
191 | 
192 | % Probability, expectation, and variance.
193 | \def\prob{\mathbb{P}}
194 | \renewcommand{\Pr}[2][]{\ensuremath{\prob_{#1}\left[#2\right]}\xspace}
195 | \newcommand{\E}[2][]{\ensuremath{\mathbb{E}_{#1}\left[#2\right]}}
196 | \newcommand{\V}[2][]{\ensuremath{\mathbb{V}_{#1}\left[#2\right]}}
197 | \newcommand{\cov}[2][]{\ensuremath{\mathrm{Cov}_{#1}\left[#2\right]}}
198 | \newcommand{\corr}[2][]{\ensuremath{\rho_{#1}\left[#2\right]}}
199 | \def\sd{\ensuremath{\textsf{sd}}\xspace}
200 | \def\samplemean{\ensuremath{\bar{X}_n}\xspace}
201 | \def\samplevar{\ensuremath{S^2}\xspace}
202 | \def\za{\ensuremath{z_{\alpha}}}
203 | \def\zat{\ensuremath{z_{\alpha/2}}}
204 | 
205 | % Inference
206 | \def\Ll{\ensuremath{\mathcal{L}}\xspace}
207 | \def\Lln{\ensuremath{\Ll_n}\xspace}
208 | \def\ll{\ensuremath{\ell}}
209 | \def\lln{\ensuremath{\ll_n}}
210 | 
211 | % Hypothesis testing
212 | \newcommand{\hyp}[2]{
213 | \ensuremath{H_0:#1 \ifhmode\quad\text{versus}\quad\fi\text{ vs. } H_1:#2}}
214 | 
215 | % Convergence.
216 | \def\conv{\rightarrow}
217 | \def\convinf{\rightarrow_{n\to\infty}}
218 | \def\pconv{\stackrel{\text{\tiny{P}}}{\rightarrow}}
219 | \def\npconv{\stackrel{\text{\tiny{P}}}{\nrightarrow}}
220 | \def\dconv{\stackrel{\text{\tiny{D}}}{\rightarrow}}
221 | \def\ndconv{\stackrel{\text{\tiny{D}}}{\nrightarrow}}
222 | \def\qmconv{\stackrel{\text{\tiny{qm}}}{\rightarrow}}
223 | \def\nqmconv{\stackrel{\text{\tiny{qm}}}{\nrightarrow}}
224 | \def\asconv{\stackrel{\text{\tiny{as}}}{\rightarrow}}
225 | \def\nasconv{\stackrel{\text{\tiny{as}}}{\nrightarrow}}
226 | 
227 | %
228 | % Distributions
229 | %
230 | \newcommandx{\unif}[1][1={a,b}]{\textrm{Unif}\left({#1}\right)}
231 | \newcommandx{\unifd}[1][1={a,\ldots,b}]{\textrm{Unif}\left\{{#1}\right\}}
232 | \newcommandx{\dunif}[3][1=x,2=a,3=b]{\frac{I(#2<#1<#3)}{#3-#2}}
233 | \newcommandx{\dunifd}[3][1=x,2=a,3=b]{\frac{I(#2\le#1\le#3)}{#3-#2+1}}
234 | \newcommandx{\punif}[3][1=x,2=a,3=b]{
235 | \begin{cases} 0 & #1 < #2 \\ \frac{#1-#2}{#3-#2} & #2 < #1 < #3 \\ 1 & #1 > #3\\\end{cases}}
236 | \newcommandx{\punifd}[3][1=x,2=a,3=b]{
237 | \begin{cases} 0 & #1 < #2\\ \frac{\lfloor#1\rfloor-#2+1}{#3-#2} & #2 \le #1 \le #3 \\ 1 & #1 > #3\\ \end{cases}}
238 | 
239 | % Bernoulli
240 | \newcommandx\bern[1][1=p]{\textrm{Bern}\left({#1}\right)}
241 | \newcommandx\dbern[2][1=x,2=p]{#2^{#1} \left(1-#2\right)^{1-#1}}
242 | \newcommandx\pbern[2][1=x,2=p]{\left(1-#2\right)^{1-#1}}
243 | 
244 | % Binomial
245 | \newcommandx\bin[1][1={n,p}]{\textrm{Bin}\left(#1\right)}
246 | \newcommandx\dbin[3][1=x,2=n,3=p]{\binom{#2}{#1}#3^#1\left(1-#3\right)^{#2-#1}}
247 | 
248 | % Multinomial
249 | \newcommandx\mult[1][1={n,p}]{\textrm{Mult}\left(#1\right)}
250 | \newcommandx\dmult[3][1=x,2=n,3=p]{\frac{#2!}{#1_1!\ldots#1_k!}#3_1^{#1_1}\cdots#3_k^{#1_k}}
251 | 
252 | % Hypergeometric
253 | \newcommandx\hyper[1][1={N,m,n}]{\textrm{Hyp}\left({#1}\right)}
254 | \newcommandx\dhyper[4][1=x,2=N,3=m,4=n]{\frac{\binom{#3}{#1}\binom{#2-#3}{#4-#1}}{\binom{#2}{#4}}}
255 | 
256 | % Negative Binomial
257 | \newcommandx\nbin[1][1={r,p}]{\textrm{NBin}\left({#1}\right)}
258 | \newcommandx\dnbin[3][1=x,2=r,3=p]{\binom{#1+#2-1}{#2-1}#3^#2(1-#3)^#1}
259 | \newcommandx\pnbin[3][1=x,2=r,3=p]{I_#3(#2,#1+1)}
260 | 
261 | % Geometric
262 | \newcommandx\geo[1][1=p]{\textrm{Geo}\left(#1\right)}
263 | \newcommandx\dgeo[2][1=x,2=p]{#2(1-#2)^{#1-1}}
264 | \newcommandx\pgeo[2][1=x,2=p]{1-(1-#2)^#1}
265 | 
266 | % Poisson
267 | \newcommandx\pois[1][1=\lambda]{\textrm{Po}\left({#1}\right)}
268 | \newcommandx\dpois[2][1=x,2=\lambda]{\frac{#2^#1 e^{-#2}}{#1!}}
269 | \newcommandx\ppois[2][1=x,2=\lambda]{e^{-#2}\sum_{i=0}^#1\frac{#2^i}{i!}}
270 | 
271 | % Normal
272 | \newcommandx\norm[1][1={\mu,\sigma^2}]{\mathcal{N}\left({#1}\right)}
273 | \newcommandx\dnorm[3][1=x,2=\mu,3=\sigma]%
274 | {\frac{1}{#3\sqrt{2\pi}}\Exp{-\frac{\left(#1-#2\right)^2}{2 #3^2}}}
275 | \newcommandx\pnorm[1][1=x]{\Phi\left({#1}\right)}
276 | \newcommandx\qnorm[1]{\Phi^{-1}\left({#1}\right)}
277 | 
278 | % Multivariate Normal
279 | \newcommandx\mvn[1][1={\mu,\Sigma}]{\mathrm{MVN}\left({#1}\right)}
280 | 
281 | % Exponential
282 | \newcommandx\ex[1][1=\beta]{\textrm{Exp}\left(#1\right)}
283 | \newcommandx\dex[2][1=x,2=\beta]{\frac{1}{#2}e^{-#1/#2}}
284 | \newcommandx\pex[2][1=x,2=\beta]{1-e^{-#1/#2}}
285 | 
286 | % Gamma
287 | \newcommandx\gam[1][1={\alpha,\beta}]{\textrm{Gamma}\left({#1}\right)}
288 | \newcommandx\dgamma[3][1=x,2=\alpha,3=\beta]%
289 | {\frac{#3^{#2}}{\Gamma\left( #2 \right)} #1^{#2-1}e^{-#3#1}}
290 | 
291 | % InverseGamma
292 | \newcommandx\invgamma[1][1={\alpha,\beta}]{\textrm{InvGamma}\left({#1}\right)}
293 | \newcommandx\dinvgamma[3][1=x,2=\alpha,3=\beta]%
294 | {\frac{#3^{#2}}{\Gamma\left(#2\right)}#1^{-#2-1}e^{-#3/#1}}
295 | \newcommandx\pinvgamma[3][1=x,2=\alpha,3=\beta]%
296 | {\frac{\Gamma\left(#2,\frac{#3}{#1}\right)}{\Gamma\left(#2\right)}}
297 | 
298 | % Beta
299 | \newcommandx\bet[1][1={\alpha,\beta}]{\textrm{Beta}\left(#1\right)}
300 | \newcommandx\dbeta[3][1=x,2=\alpha,3=\beta]
301 | {\frac{\Gamma\left(#2+#3\right)}{\Gamma\left(#2\right)\Gamma\left(#3\right)}#1^{#2-1}\left(1-#1\right)^{#3-1}}
302 | 
303 | % Dirichlet
304 | \newcommandx\dir[1][1={\alpha}]{\textrm{Dir}\left(#1\right)}
305 | \newcommandx\ddir[3][1=x,2=\alpha]{\frac{\Gamma\left(\sum_{i=1}^k #2_i\right)}{\prod_{i=1}^k\Gamma\left(#2_i\right)}\prod_{i=1}^k #1_i^{#2_i-1}}
306 | 
307 | % Weibull
308 | \newcommandx\weibull[1][1={\alpha}]{\textrm{Dir}\left(#1\right)}
309 | \newcommandx\dweibull[3][1=x,2=\lambda,3=k]{\frac{#3}{#2}
310 | \left(\frac{#1}{#2}\right)^{#3-1} e^{-(#1/#2)^k}}
311 | 
312 | % Chi-squard
313 | \newcommandx\chisq[1][1=k]{\chi_{#1}^2}
314 | 
315 | % Zeta
316 | \newcommandx\zet[1][1=s]{\textrm{Zeta}\left(#1\right)}
317 | \newcommandx\dzeta[2][1=x,2=s]{\frac{#1^{-#2}}{\zeta\left(#2\right)}}
318 | 
319 | % Time Series
320 | \newcommandx\AR[1][1=p]{\mathsf{AR}\left({#1}\right)}
321 | \newcommandx\MA[1][1=q]{\mathsf{MA}\left({#1}\right)}
322 | \newcommandx\ARMA[1][1={p,q}]{\mathsf{ARMA}\left({#1}\right)}
323 | \newcommandx\ARIMA[1][1={p,d,q}]{\mathsf{ARIMA}\left({#1}\right)}
324 | \newcommandx\SARIMA[3][1={p,d,q},2={P,D,Q},3=s]{\mathsf{ARIMA}\left(#1\right) \times \left(#2\right)_{#3}}
325 | 
326 | 
327 | % =============================================================================
328 | %                                 Algorithms
329 | % =============================================================================
330 | 
331 | \newcommandx\step[1][1=t]{^{(#1)}}
332 | 


--------------------------------------------------------------------------------
/stat-cookbook.tex:
--------------------------------------------------------------------------------
   1 | % ----------------------------------------------------------------------------
   2 | %
   3 | %                           Probability and Statistics
   4 | %                                  Cookbook
   5 | %
   6 | % ----------------------------------------------------------------------------
   7 | %
   8 | % Copyright © Matthias Vallentin <matthias@berkeley.edu>, 2017
   9 | %
  10 | 
  11 | \documentclass[landscape]{article}
  12 | 
  13 | \usepackage{array}
  14 | \usepackage{amsmath,amssymb}
  15 | \usepackage{booktabs}
  16 | \usepackage{caption}
  17 | \usepackage[nodayofweek]{datetime}
  18 | \usepackage{environ}
  19 | \usepackage{float}
  20 | \usepackage{enumitem}
  21 | \usepackage{fancyhdr}
  22 | \usepackage[landscape,margin=13mm,footskip=1pt,includefoot]{geometry}
  23 | \usepackage{graphicx}
  24 | \usepackage{hyperref}
  25 | \usepackage{multicol}
  26 | \usepackage{rotating}
  27 | \usepackage{tikz}
  28 | \usepackage{threeparttable}
  29 | \usepackage{url}
  30 | \usepackage{xspace}
  31 | 
  32 | % Document version, MAJOR.MINOR.PATCH. Please change with any modification
  33 | % according to semantic versioning practices:
  34 | %   - The major version changes when adding a new section or topic, or making a
  35 | %     substantial content change.
  36 | %   - The minor version changes for non-trivial fixes, corrections, or
  37 | %     improvements.
  38 | %   - The patch version changes for trivial fixes, such as typos in text or
  39 | %     formulas.
  40 | \newcommand{\version}{0.2.7}
  41 | 
  42 | % Probability and Statistics LaTeX shortcuts.
  43 | \input{probstat}
  44 | 
  45 | % TikZ tweaks
  46 | \usetikzlibrary{arrows,shapes}
  47 | \usetikzlibrary{decorations.pathreplacing}
  48 | \tikzstyle{every picture}+=[remember picture]
  49 | \tikzstyle{na} = [baseline=-.5ex]
  50 | 
  51 | % Move footnotes to the bottom-right corner
  52 | \pagestyle{fancy}
  53 | \fancyhf{} % clear all header and footer fields
  54 | \fancyhead{}
  55 | \fancyfoot[R]{\footnotesize \thepage}
  56 | \renewcommand{\headrulewidth}{0pt}
  57 | 
  58 | % Further document tweaks.
  59 | \parindent=0pt
  60 | \setitemize{itemsep=0.2mm,parsep=1pt}
  61 | \setenumerate{itemsep=0.2mm,parsep=1pt}
  62 | 
  63 | % A type of blue that doesn't look as aggressive as the default 'blue' but also
  64 | % distinguishes well from black while not appearing to light.
  65 | \definecolor{trueblue}{rgb}{0.0, 0.45, 0.81}
  66 | 
  67 | % Link style (hyperref package)
  68 | \hypersetup{
  69 |   colorlinks=true,        % false: boxed links; true: colored links
  70 |   linkcolor=black,        % color of internal links
  71 |   citecolor=trueblue,     % color of links to bibliography
  72 |   filecolor=trueblue,     % color of file links
  73 |   urlcolor=trueblue       % color of external links
  74 | }
  75 | 
  76 | % Personal
  77 | \def\email{info@statistics.zone}
  78 | \def\web{\url{http://statistics.zone/}}
  79 | 
  80 | % An itemize list with a title that avoids a break between title and list.
  81 | \newenvironment{titemize}[1]{
  82 |   \begin{minipage}[h]{\columnwidth}
  83 |     #1
  84 |     \begin{itemize}
  85 | }{
  86 |     \end{itemize}
  87 |   \end{minipage}
  88 | }
  89 | 
  90 | \begin{document}
  91 | 
  92 | \thispagestyle{empty}
  93 | \begin{center}
  94 |   \vspace*{\fill}
  95 |   \textsc{\Huge Probability and Statistics\\[2ex] \huge Cookbook}
  96 |   \vfill
  97 |   \footnotesize{
  98 |     Version \version\\[1ex]
  99 |     \today\\[1ex]
 100 |     \web\\[1ex]
 101 |     Copyright \copyright{}
 102 |     \href{http://matthias.vallentin.net}{Matthias Vallentin}\\
 103 |   }
 104 | \end{center}
 105 | 
 106 | \newpage
 107 | 
 108 | \thispagestyle{empty}
 109 | \begin{multicols*}{3}
 110 |   \tableofcontents
 111 |   \vfill
 112 |   \hrule
 113 |   \vspace{5pt}
 114 |   {\footnotesize This cookbook integrates various topics in probability theory
 115 |   and statistics, based on literature~\cite{Hoel72,Wasserman03,Shumway06}
 116 |   and in-class material from courses of the statistics department at the
 117 |   University of California in Berkeley but also influenced by others
 118 |   \cite{Steger01,Steger02}. If you find errors or have suggestions for
 119 |   improvements, please get in touch at \web.}
 120 | \end{multicols*}
 121 | 
 122 | \newpage
 123 | 
 124 | \section{Distribution Overview}
 125 | 
 126 | \subsection{Discrete Distributions}
 127 | 
 128 | \begin{center}
 129 | \small
 130 | \begin{tabular}{@{}l*6{>{\begin{math}\displaystyle}c<{\end{math}}}@{}}
 131 |   \toprule &&&&&& \\[-2ex]
 132 |   & \text{Notation}\footnotemark
 133 |   & F_X(x) & f_X(x) & \E{X} & \V{X} & M_X(s) \\[1ex]
 134 | 
 135 |   \midrule
 136 | 
 137 |   Uniform & \unifd & \punifd & \dunifd &
 138 |   \frac{a+b}{2} & \frac{(b-a+1)^2-1}{12} &
 139 |   \frac{e^{as}-e^{-(b+1)s}}{s(b-a)} \\[3ex]
 140 | 
 141 |   Bernoulli & \bern & \pbern & \dbern &
 142 |   p & p(1-p) &
 143 |   1-p+pe^s \\[3ex]
 144 | 
 145 |   Binomial & \bin & I_{1-p}(n-x,x+1) & \dbin &
 146 |   np & np(1-p) &
 147 |   (1-p+pe^s)^n \\[3ex]
 148 | 
 149 |   Multinomial & \mult & & \dmult \quad \sum_{i=1}^k x_i = n&
 150 |   \left( {\begin{array}{*{20}{c}}
 151 |     {n{p_1}}\\
 152 |     \vdots \\
 153 |     {n{p_k}}
 154 |   \end{array}} \right) & \left( {\begin{array}{*{20}{c}}
 155 |     {n{p_1}(1 - {p_1})}&{ - n{p_1}{p_2}}\\
 156 |     { - n{p_2}{p_1}}& \ddots 
 157 |     \end{array}} \right) &
 158 |   \left( \sum_{i=0}^k p_i e^{s_i} \right)^n \\[3ex]
 159 | 
 160 |   Hypergeometric & \hyper &
 161 |   \approx \Phi\left(\displaystyle\frac{x-np}{\sqrt{np(1-p)}}\right) &
 162 |   \dhyper &
 163 |   \frac{nm}{N} & \frac{nm(N-n)(N-m)}{N^2(N-1)} & \\[3ex]
 164 | 
 165 |   Negative Binomial & \nbin & \pnbin & \dnbin &
 166 |   r\frac{1-p}{p} & r\frac{1-p}{p^2} &
 167 |   \left(\frac{pe^s}{1-(1-p)e^s}\right)^r \\[3ex]
 168 | 
 169 |   Geometric & \geo &
 170 |   \pgeo \quad x\in\mathbb N^+ &
 171 |   \dgeo \quad x\in\mathbb N^+ &
 172 |   \frac{1}{p} & \frac{1-p}{p^2} &
 173 |   \frac{pe^s}{1-(1-p)e^s} \\[3ex]
 174 | 
 175 |   Poisson & \pois & \ppois & \dpois &
 176 |   \lambda & \lambda &
 177 |   e^{\lambda(e^s-1)}\\[3ex]
 178 | 
 179 |   \bottomrule
 180 | \end{tabular}
 181 | \end{center}
 182 | 
 183 | \footnotetext{We use the notation $\gamma(s,x)$ and $\Gamma(x)$ to refer to the
 184 | Gamma functions (see \S\ref{sec:math:gamma}), and use $\text{B}(x,y)$ and $I_x$
 185 | to refer to the Beta functions (see \S\ref{sec:math:beta}).}
 186 | 
 187 | \pagebreak
 188 | 
 189 | \begin{figure}[H]
 190 |   \includegraphics[scale=0.35]{figs/uniform-pmf.pdf}
 191 |   \includegraphics[scale=0.35]{figs/binomial-pmf.pdf}
 192 |   \includegraphics[scale=0.35]{figs/geometric-pmf.pdf}
 193 |   \includegraphics[scale=0.35]{figs/poisson-pmf.pdf}
 194 | 
 195 |   \includegraphics[scale=0.35]{figs/uniform-cdf-discrete.pdf}
 196 |   \includegraphics[scale=0.35]{figs/binomial-cdf.pdf}
 197 |   \includegraphics[scale=0.35]{figs/geometric-cdf.pdf}
 198 |   \includegraphics[scale=0.35]{figs/poisson-cdf.pdf}
 199 | \end{figure}
 200 | 
 201 | \subsection{Continuous Distributions}
 202 | 
 203 | \begin{threeparttable}
 204 | \small
 205 | %\newcolumntype{L}{>{\varwidth[c]{\linewidth}}l<{\endvarwidth}}
 206 | \newcolumntype{M}{>{\begin{math}\displaystyle}c<{\end{math}}}
 207 | \begin{tabular}{@{}l*6{M}@{}}
 208 |   \toprule &&&&&& \\[-2ex]
 209 |   & \text{Notation}
 210 |   & F_X(x) & f_X(x) & \E{X} & \V{X} & M_X(s) \\[1ex]
 211 | 
 212 |   \midrule
 213 | 
 214 |   Uniform & \unif & \punif & \dunif &
 215 |   \frac{a+b}{2} & \frac{(b-a)^2}{12} &
 216 |   \frac{e^{sb}-e^{sa}}{s(b-a)} \\[3ex]
 217 | 
 218 |   Normal & \norm &
 219 |   \Phi(x)=\displaystyle\int_{-\infty}^x \phi(t)\,dt &
 220 |   \phi(x)=\dnorm &
 221 |   \mu & \sigma^2 &
 222 |   \Exp{\mu s + \frac{\sigma^2s^2}{2}}\\[3ex]
 223 | 
 224 |   Log-Normal & \ln\norm&
 225 |   \frac{1}{2}+\frac{1}{2} \erf\left[\frac{\ln x-\mu}{\sqrt{2\sigma^2}}\right] &
 226 |   \frac{1}{x\sqrt{2\pi\sigma^2}} \Exp{-\frac{(\ln x - \mu)^2}{2\sigma^2}} &
 227 |   e^{\mu+\sigma^2/2} &
 228 |   (e^{\sigma^2}-1) e^{2\mu+\sigma^2} &
 229 |   \\[3ex]
 230 | 
 231 |   Multivariate Normal & \mvn & &
 232 |   (2\pi)^{-k/2} |\Sigma|^{-1/2} e^{-\frac{1}{2}(x-\mu)^T \Sigma^{-1}(x-\mu)} &
 233 |   \mu & \Sigma &
 234 |   \Exp{\mu^T s + \frac{1}{2} s^T \Sigma s}\\[3ex]
 235 | 
 236 |   Student's $t$ & \text{Student}(\nu)
 237 |   & I_x\left( \frac{\nu}{2},\frac{\nu}{2} \right)
 238 |   & \frac{\Gamma\left(\frac{\nu+1}{2}\right)}
 239 |     {\sqrt{\nu\pi}\Gamma\left(\frac{\nu}{2}\right)}
 240 |     \left(1+\frac{x^2}{\nu}\right)^{-(\nu+1)/2}
 241 |   & 0 \quad \nu  > 1
 242 |   & \begin{cases}
 243 |       \displaystyle\frac{\nu}{\nu-2} & \nu > 2 \\
 244 |       \infty & 1 < \nu \le 2
 245 |     \end{cases}
 246 |   & \\[3ex]
 247 | 
 248 |   Chi-square & \chisq &
 249 |   \frac{1}{\Gamma(k/2)} \gamma\left(\frac{k}{2}, \frac{x}{2}\right) &
 250 |   \frac{1}{2^{k/2} \Gamma(k/2)} x^{k/2-1} e^{-x/2}&
 251 |   k & 2k &
 252 |   (1-2s)^{-k/2} \; s<1/2\\[3ex]
 253 | 
 254 |   F & \text{F}(d_1,d_2) &
 255 |   I_\frac{d_1x}{d_1x+d_2}\left(\frac{d_1}{2},\frac{d_2}{2}\right) &
 256 |   \frac{\sqrt{\frac{(d_1x)^{d_1} d_2^{d_2}}{(d_1x+d_2)^{d_1+d_2}}}}
 257 |     {x\mathrm{B}\left(\frac{d_1}{2},\frac{d_1}{2}\right)} &
 258 |   \frac{d_2}{d_2-2} %\; d_2 > 2
 259 |   & \frac{2d_2^2(d_1+d_2-2)}{d_1(d_2-2)^2(d_2-4)} %\; d_2 > 4
 260 |   & \\[3ex]
 261 | 
 262 |   Exponential\tnote{$\ast$} & \ex & \pex & \dex &
 263 |   \beta & \beta^2 &
 264 |   \frac{1}{1-\frac{s}{\beta}} \left(s<\beta\right) \\[3ex]
 265 | 
 266 |   Gamma\tnote{$\ast$} & \gam &
 267 |   \frac{\gamma(\alpha,\beta x)}{\Gamma(\alpha)} & \dgamma &
 268 |   \frac{\alpha}{\beta} & \frac{\alpha}{\beta^2} &
 269 |   \left(\frac{1}{1-\frac{s}{\beta}} \right)^\alpha \left(s<\beta\right)\\[3ex]
 270 | 
 271 |   Inverse Gamma & \invgamma & \pinvgamma & \dinvgamma &
 272 |   \frac{\beta}{\alpha-1} \; \alpha>1 &
 273 |   \frac{\beta^2}{(\alpha-1)^2(\alpha-2)} \; \alpha > 2 &
 274 |   \frac{2(-\beta s)^{\alpha/2}}{\Gamma(\alpha)}K_\alpha
 275 |   \left( \sqrt{-4\beta s} \right)\\[3ex]
 276 | 
 277 |   Dirichlet & \dir & & \ddir &
 278 |   \frac{\alpha_i}{\sum_{i=1}^k \alpha_i} &
 279 |   \frac{\E{X_i}(1-\E{X_i})}{\sum_{i=1}^k\alpha_i + 1} & \\[3ex]
 280 | 
 281 |   Beta & \bet & I_x(\alpha,\beta)& \dbeta &
 282 |   \frac{\alpha}{\alpha+\beta} &
 283 |   \frac{\alpha\beta}{(\alpha+\beta)^2(\alpha+\beta+1)} &
 284 |   1+\sum_{k=1}^{\infty} \left( \prod_{r=0}^{k-1}
 285 |     \frac{\alpha+r}{\alpha+\beta+r} \right) \frac{s^k}{k!} \\[3ex]
 286 | 
 287 |   Weibull & \mathrm{Weibull}(\lambda, k) & 1 - e^{-(x/\lambda)^k} & \dweibull &
 288 |   \lambda \Gamma\left(1 + \frac{1}{k} \right) &
 289 |   \lambda^2 \Gamma\left(1 + \frac{2}{k}\right) - \mu^2 &
 290 |   \sum_{n=0}^\infty \frac{s^n \lambda^n}{n!} \Gamma\left(1+\frac{n}{k}\right)
 291 |   \\[3ex]
 292 | 
 293 |   Pareto & \mathrm{Pareto}(x_m, \alpha) &
 294 |   1 - \left(\frac{x_m}{x} \right)^\alpha \; x\ge x_m &
 295 |   \alpha\frac{x_m^\alpha}{x^{\alpha+1}} \quad x\ge x_m&
 296 |   \frac{\alpha x_m}{\alpha-1} \; \alpha>1 &
 297 |   \frac{x_m^2\alpha}{(\alpha-1)^2(\alpha-2)} \; \alpha>2 &
 298 |   \alpha(-x_m s)^\alpha \Gamma(-\alpha,-x_m s) \; s<0\\[3ex]
 299 | 
 300 |   \bottomrule
 301 | \end{tabular}
 302 | \begin{tablenotes}
 303 | \item[$\ast$] We use the \emph{rate} parameterization where
 304 |   $\beta=\frac{1}{\lambda}$. Some textbooks use $\beta$ as \emph{scale}
 305 |   parameter instead~\cite{Wasserman03}.
 306 | \end{tablenotes}
 307 | \end{threeparttable}
 308 | 
 309 | \begin{figure}[H]
 310 |   \includegraphics[scale=0.35]{figs/uniform-pdf.pdf}
 311 |   \includegraphics[scale=0.35]{figs/normal-pdf.pdf}
 312 |   \includegraphics[scale=0.35]{figs/lognormal-pdf.pdf}
 313 |   \includegraphics[scale=0.35]{figs/student-pdf.pdf}
 314 |   \includegraphics[scale=0.35]{figs/chisquare-pdf.pdf}
 315 |   \includegraphics[scale=0.35]{figs/f-pdf.pdf}
 316 |   \includegraphics[scale=0.35]{figs/exponential-pdf.pdf}
 317 |   \includegraphics[scale=0.35]{figs/gamma-pdf.pdf}
 318 |   \includegraphics[scale=0.35]{figs/invgamma-pdf.pdf}
 319 |   \includegraphics[scale=0.35]{figs/beta-pdf.pdf}
 320 |   \includegraphics[scale=0.35]{figs/weibull-pdf.pdf}
 321 |   \includegraphics[scale=0.35]{figs/pareto-pdf.pdf}
 322 | \end{figure}
 323 | 
 324 | \begin{figure}[H]
 325 |   \includegraphics[scale=0.35]{figs/uniform-cdf-continuous.pdf}
 326 |   \includegraphics[scale=0.35]{figs/normal-cdf.pdf}
 327 |   \includegraphics[scale=0.35]{figs/lognormal-cdf.pdf}
 328 |   \includegraphics[scale=0.35]{figs/student-cdf.pdf}
 329 |   \includegraphics[scale=0.35]{figs/chisquare-cdf.pdf}
 330 |   \includegraphics[scale=0.35]{figs/f-cdf.pdf}
 331 |   \includegraphics[scale=0.35]{figs/exponential-cdf.pdf}
 332 |   \includegraphics[scale=0.35]{figs/gamma-cdf.pdf}
 333 |   \includegraphics[scale=0.35]{figs/invgamma-cdf.pdf}
 334 |   \includegraphics[scale=0.35]{figs/beta-cdf.pdf}
 335 |   \includegraphics[scale=0.35]{figs/weibull-cdf.pdf}
 336 |   \includegraphics[scale=0.35]{figs/pareto-cdf.pdf}
 337 | \end{figure}
 338 | 
 339 | \begin{multicols*}{2}
 340 | 
 341 | \section{Probability Theory}
 342 | 
 343 | Definitions
 344 | \begin{itemize}
 345 |   \item Sample space $\Omega$
 346 |   \item Outcome (point or element) $\omega \in \Omega$
 347 |   \item Event $A \subseteq \Omega$
 348 |   \item $\sigma$-algebra $\mathcal{A}$
 349 |     \begin{enumerate}
 350 |       \item $\varnothing \in \mathcal{A}$
 351 |       \item $A_1,A_2,\dots, \in \mathcal{A}
 352 |         \imp \bigcup_{i=1}^\infty A_i \in \mathcal{A}$
 353 |       \item $A \in \mathcal{A} \imp \comp{A} \in \mathcal{A}$
 354 |     \end{enumerate}
 355 |   \item Probability Distribution $\prob$
 356 |     \begin{enumerate}
 357 |       \item $\Pr{A} \ge 0 \quad \forall A$
 358 |       \item $\Pr{\Omega} = 1$
 359 |       \item $\Pr{\displaystyle\bigsqcup_{i=1}^\infty A_i}
 360 |         = \displaystyle\sum_{i=1}^\infty \Pr{A_i}$
 361 |     \end{enumerate}
 362 |   \item Probability space $(\Omega,\mathcal{A},\prob)$
 363 | \end{itemize}
 364 | 
 365 | Properties
 366 | \begin{itemize}
 367 |   \item $\Pr{\varnothing} = 0$
 368 |   \item $B = \Omega \cap B = (A \cup \comp{A}) \cap B
 369 |     = (A \cap B) \cup (\comp{A} \cap B)$
 370 |   \item $\Pr{\comp{A}} = 1 - \Pr{A}$
 371 |   \item $\Pr{B} = \Pr{A \cap B} + \Pr{\comp{A} \cap B}$
 372 |   \item $\Pr{\Omega} = 1 \qquad \Pr{\varnothing} = 0$
 373 |   \item $\comp{\left(\bigcup_n A_n\right)} = \bigcap_n \comp{A_n}
 374 |     \quad
 375 |     \comp{\left(\bigcap_n A_n\right)} = \bigcup_n \comp{A_n}
 376 |     \qquad$
 377 |     \textsc{DeMorgan}
 378 |   \item $\Pr{\bigcup_n A_n}
 379 |     = 1 - \Pr{\bigcap_n \comp{A_n}}$
 380 |   \item $\Pr{A \cup B} = \Pr{A} + \Pr{B} - \Pr{A \cap B}\\[1ex]
 381 |     \imp \Pr{A \cup B} \le \Pr{A} + \Pr{B}$
 382 |   \item $\Pr{A \cup B}
 383 |     = \Pr{A \cap \comp{B}} + \Pr{\comp{A} \cap B} + \Pr{A \cap B}$
 384 |   \item $\Pr{A \cap \comp{B}} = \Pr{A} - \Pr{A \cap B}$
 385 | \end{itemize}
 386 | 
 387 | Continuity of Probabilities
 388 | \begin{itemize}
 389 |   \item $A_1 \subset A_2 \subset \dots \imp \limn \Pr{A_n} = \Pr{A}
 390 |     \quad\text{where } A = \bigcup_{i=1}^\infty A_i$
 391 |   \item $A_1 \supset A_2 \supset \dots \imp \limn \Pr{A_n} = \Pr{A}
 392 |     \quad\text{where } A = \bigcap_{i=1}^\infty A_i$
 393 | \end{itemize}
 394 | 
 395 | Independence \ind
 396 | \[A \ind B \eqv \Pr{A \cap B} = \Pr{A}\Pr{B}\]
 397 | 
 398 | Conditional Probability
 399 | \[\Pr{A \giv B} = \frac{\Pr{A \cap B}}{\Pr{B}} \qquad \Pr{B} > 0\]
 400 | 
 401 | Law of Total Probability
 402 | \[ \Pr{B} = \sum_{i=1}^n \Pr{B|A_i}\Pr{A_i}
 403 |   \qquad \Omega = \bigsqcup_{i=1}^n A_i\]
 404 | 
 405 | \textsc{Bayes' Theorem}
 406 | \[\Pr{A_i \giv B}
 407 | = \frac{\Pr{B \giv A_i}\Pr{A_i}}{\sum_{j=1}^n \Pr{B \giv A_j}\Pr{A_j}}
 408 | \qquad \Omega = \bigsqcup_{i=1}^n A_i\]
 409 | 
 410 | Inclusion-Exclusion Principle
 411 | \[\biggl|\bigcup_{i=1}^n A_i\biggr| = \sum_{r=1}^n(-1)^{r-1}
 412 |   \sum_{i \le i_1 < \dots < i_r \le n}\biggl|\bigcap_{j=1}^r A_{i_j}\biggr|\]
 413 | 
 414 | \section{Random Variables}
 415 | 
 416 | Random Variable (RV)
 417 | \[X: \Omega \to \R\]
 418 | 
 419 | Probability Mass Function (PMF)
 420 | \[f_X(x) = \Pr{X = x} = \Pr{\{\omega\in\Omega:X(\omega) = x\}}\]
 421 | 
 422 | Probability Density Function (PDF)
 423 | \[\Pr{a \le X \le b} = \int_a^b f(x)\dx\]
 424 | 
 425 | Cumulative Distribution Function (CDF)
 426 | \[F_X:\R \to [0,1] \qquad F_X(x) = \Pr{X \le x}\]
 427 | 
 428 | \begin{enumerate}
 429 |   \item Nondecreasing: $x_1 < x_2 \imp F(x_1) \le F(x_2)$
 430 |   \item Normalized: $\lim_{x\to -\infty} = 0$ and $\lim_{x\to \infty} = 1$
 431 |   \item Right-Continuous: $\lim_{y\downarrow x} F(y) = F(x)$
 432 | \end{enumerate}
 433 | 
 434 | \[\Pr{a\le Y\le b \giv X=x} = \int_a^b f_{Y|X}(y\giv x) dy \qquad a \le b\]
 435 | \[ f_{Y|X}(y\giv x) = \frac{f(x,y)}{f_X(x)} \]
 436 | 
 437 | Independence
 438 | \begin{enumerate}
 439 |   \item $\Pr{X \le x, Y \le y} = \Pr{X \le x}\Pr{Y \le y}$
 440 |   \item $f_{X,Y}(x,y) = f_X(x)f_Y(y)$
 441 | \end{enumerate}
 442 | 
 443 | \subsection{Transformations}
 444 | 
 445 | Transformation function
 446 | \[Z = \transform(X)\]
 447 | 
 448 | Discrete
 449 | \[f_Z(z) = \Pr{\transform(X) = z} = \Pr{\{x:\transform(x) = z\}}
 450 | = \Pr{X \in \transform^{-1}(z)} = \sum_{x \in \transform^{-1}(z)} \!\!\!f_X(x)\]
 451 | 
 452 | Continuous
 453 | \[F_Z(z) = \Pr{\transform(X) \le z} = \int_{A_z} f(x) \dx \quad
 454 |     \text{with } A_z = \{x:\transform(x) \le z\}\]
 455 | 
 456 | Special case if $\transform$ strictly monotone
 457 | \[f_Z(z)
 458 |     = f_X(\transform^{-1}(z))
 459 |       \left|\frac{d}{dz}\transform^{-1}(z)\right|
 460 |     = f_X(x)\left|\frac{dx}{dz}\right|
 461 |     = f_X(x)\frac{1}{|J|}\]
 462 | 
 463 | The Rule of the Lazy Statistician
 464 | \[\E{Z} = \int \transform(x) \dfx\]
 465 | \[\E{I_A(x)} = \int I_A(x) \dfx = \int_A \dfx = \Pr{X \in A}\]
 466 | 
 467 | Convolution
 468 | \begin{itemize}
 469 |   \item $ Z:=X+Y \qquad
 470 |     f_Z(z)=\displaystyle\int_{-\infty}^{\infty} f_{X,Y}(x,z-x)\,dx
 471 |     \;\stackrel{X,Y \ge 0}{=}\; \int_0^z f_{X,Y}(x,z-x)\,dx$
 472 |   \item $ Z:=|X-Y| \qquad
 473 |     f_Z(z)=\displaystyle2\int_0^\infty f_{X,Y}(x,z+x)\,dx$
 474 |     %\;\stackrel{X,Y \ge 0}{=}\; \int_0^\infty f_{X,Y}(x,z+x)\,dx$
 475 |   \item $ Z:=\displaystyle\frac{X}{Y} \qquad
 476 |     f_Z(z)=\displaystyle\int_{-\infty}^{\infty} |y| f_{X,Y}(yz,y)\,dy
 477 |     \;\stackrel{\ind}{=}\; \int_{-\infty}^{\infty} |y| f_X(yz)f_Y(y)\,dy$
 478 | \end{itemize}
 479 | 
 480 | %  \subsection{Joint Distribution}
 481 | %  \begin{itemize}
 482 | %    \item $f(x,y) = \Pr{X \le k, Y \le m)}
 483 | %      = \displaystyle\int_{-\infty}^k\int_{-\infty}^m f(x,y)\,dy\,dx$
 484 | %    \item $\Pr{a < X \le b, c < y \le d} = F(b,d) - F(a,d) - F(b,c) + F(a,c)$
 485 | %    \item $f_X(x) = \displaystyle\int_{-\infty}^\infty f(x,y)\,dy \qquad
 486 | %      f_Y(y) = \displaystyle\int_{-\infty}^\infty f(x,y)\,dx$
 487 | %  \end{itemize}
 488 | 
 489 | %  Order Statistics
 490 | %  \begin{itemize}
 491 | %    \item $U_i\ind U_j$ continuous \textsc{RVs} with common density $f$
 492 | %    \item $X_1(\omega) < \dots < X_n(\omega)$ permuted set of $U_i$'s
 493 | %    \item $X_k = $ \emph{k$^{th}$ order statistic}
 494 | %    \item $X_1(\omega) = \min(U_1(\omega),\dots,U_n(\omega))$
 495 | %    \item $X_n(\omega) = \max(U_1(\omega),\dots,U_n(\omega))$
 496 | %    \item $R(\omega) = X_n(\omega) - X_1(\omega)$
 497 | %  \end{itemize}
 498 | 
 499 | \section{Expectation}
 500 | 
 501 | Definition and properties
 502 | \begin{itemize}
 503 |   \item $\E{X} = \mu_X = \displaystyle \int x \dfx =
 504 |     \begin{cases}
 505 |       \displaystyle\sum_x xf_X(x) & \text{X discrete} \\\\
 506 |       \displaystyle\int xf_X(x)\dx & \text{X continuous}
 507 |     \end{cases}$
 508 |   \item $\Pr{X=c}=1 \imp \E{X} = c$
 509 |   \item $\E{cX} = c\,\E{X}$
 510 |   \item $\E{X+Y} = \E{X}+\E{Y}$
 511 |   \item $\E{XY} = \displaystyle\int_{X,Y} xy f_{X,Y}(x,y)\dfx\dfy$
 512 |   \item $\E{\transform(Y)} \neq \transform(\E{X}) \qquad$
 513 |     (cf.~\hyperref[jensen]{\textsc{Jensen} inequality})
 514 |   \item $\Pr{X \ge Y} = 1 \imp \E{X}\ge\E{Y}$
 515 |   \item $\Pr{X=Y} = 1 \imp \E{X}=\E{Y}$
 516 | %  \item $\Pr{\lvert Y\rvert\le c} = 1 \imp \E{Y}<\infty
 517 | %    \wedge \lvert\E{X}\rvert\le c$
 518 |   \item $\E{X} = \displaystyle\sum_{x=1}^\infty \Pr{X\ge x}$ \qquad X discrete
 519 | \end{itemize}
 520 | 
 521 | Sample mean
 522 | \[\samplemean = \frac{1}{n}\sum_{i=1}^n X_i\]
 523 | 
 524 | \begin{titemize}{Conditional expectation}
 525 |   \item $\E{Y\giv X=x} = \displaystyle\int y f(y\giv x)\dy$
 526 |   \item $\E{X} = \E{\E{X\giv Y}}$
 527 |   \item $\E{\transform(X,Y)\giv X=x}
 528 |     = \displaystyle\int_{-\infty}^\infty \transform(x,y)f_{Y|X}(y\giv x)\dy$
 529 |   \item $\E{\transform(Y,Z)\giv X=x} =
 530 |     \displaystyle\int_{-\infty}^\infty\transform(y,z)
 531 |     f_{(Y,Z)|X}(y,z\giv x)\,dy\,dz$
 532 |   \item $\E{Y+Z\giv X} = \E{Y\giv X} + \E{Z\giv X}$
 533 |   \item $\E{\transform(X)Y\giv X} = \transform(X)\E{Y\giv X}$
 534 |   \item $\E{\transform(X,Y)} = \E[X]{\E{\transform(X,Y)\giv X}}$
 535 |   \item $\E{Y\giv X} = c \imp \cov{X,Y}=0$
 536 | \end{titemize}
 537 | 
 538 | \section{Variance}
 539 | 
 540 | \begin{titemize}{Definition and properties}
 541 |   \item $\V{X} = \sigma_X^2 = \E{(X-\E{X})^2} = \E{X^2} - \E{X}^2$
 542 |   \item $\V{\displaystyle\sum_{i=1}^n X_i} =
 543 |     \displaystyle\sum_{i=1}^n \V{X_i} + \sum_{i\ne j}\cov{X_i,X_j}$
 544 | %    \stackrel{X_i \ind X_j}{=}\sum_{i=1}^n\V{X_i}$
 545 |   \item $\V{\displaystyle\sum_{i=1}^n X_i} =
 546 |     \displaystyle\sum_{i=1}^n\V{X_i} \quad$ if $X_i \ind X_j$
 547 | \end{titemize}
 548 | 
 549 | Standard deviation
 550 | \[\sd[X] = \sqrt{\V{X}} = \sigma_X\]
 551 | 
 552 | Covariance
 553 | \begin{itemize}
 554 |   \item $\cov{X,Y} = \E{(X-\E{X})(Y-\E{Y})} = \E{XY}-\E{X}\E{Y}$
 555 |   \item $\cov{X,a} = 0$
 556 |   \item $\cov{X,X} = \V{X}$
 557 |   \item $\cov{X,Y} = \cov{Y,X}$
 558 |   \item $\cov{aX,bY} = ab\cov{X,Y}$
 559 |   \item $\cov{X+a,Y+b} = \cov{X,Y}$
 560 |   \item $\cov{\displaystyle\sumin X_i, \sumjm Y_j}
 561 |     = \displaystyle\sumin\sumjm\cov{X_i, Y_j}$
 562 | \end{itemize}
 563 | 
 564 | Correlation
 565 | \[\corr{X,Y} = \displaystyle\frac{\cov{X,Y}}{\sqrt{\V{X}\V{Y}}}\]
 566 | 
 567 | Independence
 568 | \[X\ind Y \imp \corr{X,Y} = 0 \eqv \cov{X,Y} = 0 \eqv \E{XY}=\E{X}\E{Y}\]
 569 | 
 570 | Sample variance
 571 | \[\samplevar = \frac{1}{n-1}\sum_{i=1}^n(X_i-\samplemean)^2\]
 572 | 
 573 | Conditional variance
 574 | \begin{itemize}
 575 |   \item $\V{Y\giv X} = \E{(Y-\E{Y\giv X})^2\giv X} =\E{Y^2\giv X}-\E{Y\giv X}^2$
 576 |   \item $\V{Y} = \E{\V{Y\giv X}}+\V{\E{Y\giv X}}$
 577 | \end{itemize}
 578 | 
 579 | \section{Inequalities}
 580 | 
 581 | \textsc{Cauchy-Schwarz}
 582 | \[\E{XY}^2 \le \E{X^2}\E{Y^2}\]
 583 | 
 584 | \textsc{Markov}
 585 | \[\Pr{\transform(X) \ge t}\le\frac{\E{\transform(X)}}{t}\]
 586 | 
 587 | \textsc{Chebyshev}
 588 | \[\Pr{\lvert X-\E{X}\rvert \ge t} \le \frac{\V{X}}{t^2}\]
 589 | 
 590 | \textsc{Chernoff}
 591 | \[\Pr{X \ge (1+\delta)\mu}
 592 | \le \left(\frac{e^\delta}{(1+\delta)^{1+\delta}}\right) \quad \delta>-1\]
 593 | 
 594 | \textsc{Hoeffding}
 595 | \[X_1,\ldots,X_n \; \textrm{independent}
 596 | \;\wedge\; \Pr{X_i\in[a_i,b_i]} = 1 \;\wedge\; 1 \le i \le n \]
 597 | \[\Pr{\Xbar-\E{\Xbar} \ge t} \le e^{-2nt^2} \quad t>0 \]
 598 | \[\Pr{|\Xbar-\E{\Xbar}| \ge t} \le 2\Exp{-\frac{2n^2t^2}{\sumin(b_i-a_i)^2}}
 599 | \quad t>0\]
 600 | 
 601 | \textsc{Jensen}\label{jensen}
 602 | \[\E{\transform(X)} \ge \transform(\E{X}) \quad
 603 |   \transform \text{ convex}\]
 604 | 
 605 | \section{Distribution Relationships}
 606 | 
 607 | Binomial
 608 | \begin{itemize}
 609 |   \item $X_i \dist \bern \imp \displaystyle\sum_{i=1}^n X_i \dist \bin$
 610 |   \item $X\dist\bin, Y\dist\bin[m,p] \imp X+Y\dist\bin[n+m,p]$
 611 |   \item $\limn\bin = \pois[np] \qquad$ ($n$ large, $p$ small)
 612 |   \item $\limn\bin = \norm[np,np(1-p)] \qquad$
 613 |     ($n$ large, $p$ far from 0 and 1)
 614 | \end{itemize}
 615 | 
 616 | Negative Binomial
 617 | \begin{itemize}
 618 |   \item $ X\dist \nbin[1,p] = \geo $
 619 |   \item $ X\dist \nbin[r,p] = \sum_{i=1}^r \geo $
 620 |   \item $X_i\dist \nbin[r_i,p] \imp \sum X_i\dist \nbin[\sum r_i,p] $
 621 |   \item $X\dist \nbin[r,p].\; Y\dist \bin[s+r,p] \imp \Pr{X\le s} = \Pr{Y\ge r}$
 622 | \end{itemize}
 623 | 
 624 | Poisson
 625 | \begin{itemize}
 626 |   \item $X_i\dist\pois[\lambda_i] \wedge X_i \ind X_j
 627 |     \imp \displaystyle\sumin X_i \dist \pois[\displaystyle\sumin \lambda_i]$
 628 |   \item $X_i\dist\pois[\lambda_i] \wedge X_i \ind X_j
 629 |     \imp X_i\,\left|\displaystyle\sumjn X_j\right. \dist
 630 |    \bin[\displaystyle\sumjn X_j,\displaystyle\frac{\lambda_i}{\sumjn\lambda_j}]$
 631 | \end{itemize}
 632 | 
 633 | Exponential
 634 | \begin{itemize}
 635 | %    \item $\forall n \in \mathbb N^+: X_i\dist\ex{\lambda}
 636 |   \item $X_i\dist\ex \wedge  X_i \ind X_j
 637 |     \imp \displaystyle\sumin X_i\dist \gam[n,\beta]$
 638 |   \item Memoryless property: $\Pr{X>x+y\giv X>y}=\Pr{X>x}$
 639 | \end{itemize}
 640 | 
 641 | Normal
 642 | \begin{itemize}
 643 |   \item $X\dist \norm[\mu,\sigma^2]
 644 |     \imp \left(\frac{X-\mu}{\sigma}\right)\dist\norm[0,1] $
 645 |   \item $X\dist \norm[\mu,\sigma^2] \wedge Z = aX+b
 646 |     \imp Z\dist\norm[a\mu+b,a^2\sigma^2] $
 647 |   \item $X_i\dist\norm[\mu_i,\sigma_i^2] \wedge X_i \ind X_j
 648 |      \imp \sum_i X_i \dist \norm[\sum_i\mu_i,\sum_i\sigma_i^2]$
 649 |    \item $\Pr{a < X \le b}= \Phi\left(\frac{b-\mu}{\sigma}\right)
 650 |      - \Phi\left(\frac{a-\mu}{\sigma}\right) $
 651 |   \item $\Phi(-x) = 1 - \Phi(x) \qquad \phi'(x) = -x\phi(x) \qquad
 652 |     \phi''(x) = (x^2-1)\phi(x)$
 653 |   \item Upper quantile of $\norm[0,1]$: $z_{\alpha} = \Phi^{-1}(1-\alpha)$
 654 | \end{itemize}
 655 | 
 656 | Gamma
 657 | \begin{itemize}
 658 |   \item $X\dist\gam \eqv X/\beta \dist\gam[\alpha,1]$
 659 |   \item $\gam\dist \sum_{i=1}^\alpha\ex$
 660 |   \item $X_i\dist\gam[\alpha_i,\beta] \wedge X_i \ind X_j \imp
 661 |     \sum_i X_i\dist \gam[\sum_i \alpha_i,\beta]$
 662 |   \item $\displaystyle\frac{\Gamma(\alpha)}{\lambda^\alpha}
 663 |     = \displaystyle\int_0^\infty x^{\alpha-1} e^{-\lambda x} \dx$
 664 | \end{itemize}
 665 | 
 666 | Beta
 667 | \begin{itemize}
 668 |   \item $\displaystyle
 669 |     \frac{1}{\text{B}(\alpha,\beta)}x^{\alpha-1}(1-x)^{\beta-1}
 670 |     = \frac{\Gamma(\alpha+\beta)}{\Gamma(\alpha)\Gamma(\beta)}
 671 |     x^{\alpha-1}(1-x)^{\beta-1} $
 672 |   \item $\E{X^k}
 673 |     = \displaystyle\frac{\text{B}(\alpha+k,\beta)}{\text{B}(\alpha,\beta)}
 674 |     = \displaystyle\frac{\alpha+k-1}{\alpha+\beta+k-1}\E{X^{k-1}}$
 675 |   \item $\bet[1,1] \dist \unif[0,1]$
 676 | \end{itemize}
 677 | 
 678 | \section{Probability and Moment Generating Functions}
 679 | 
 680 | \begin{itemize}
 681 |   \item $G_X(t) = \E{t^X} \qquad |t| < 1$
 682 |   \item $M_X(t) = G_X(e^t) = \E{e^{Xt}}
 683 |     = \E{ \displaystyle\sum_{i=0}^\infty \frac{(Xt)^i}{i!}}
 684 |     = \displaystyle\sum_{i=0}^\infty \frac{\E{X^i}}{i!}\cdot t^i$
 685 |   \item $\Pr{X=0} = G_X(0)$
 686 |   \item $\Pr{X=1}=G_X'(0)$
 687 |   \item $\Pr{X=i} = \displaystyle\frac{G_X^{(i)}(0)}{i!}$
 688 |   \item $\E{X} = G_X'(1^-)$
 689 |   \item $\E{X^k} = M_X^{(k)}(0)$
 690 |   \item $\E{\displaystyle\frac{X!}{(X-k)!}} = G_X^{(k)}(1^-)$
 691 |   \item $\V{X} = G_X''(1^-) + G_X'(1^-)
 692 |     - \left(G_X'(1^-)\right)^2$
 693 |   \item $G_X(t) = G_Y(t) \imp X \stackrel{d}{=} Y$
 694 | \end{itemize}
 695 | 
 696 | \section{Multivariate Distributions}
 697 | 
 698 | \subsection{Standard Bivariate Normal}
 699 | 
 700 | Let $X,Y\dist\norm[0,1] \wedge X\ind Z$ where
 701 | $Y = \rho X + \sqrt{1-\rho^2}Z$\\
 702 | 
 703 | Joint density
 704 | \[
 705 | f(x,y) = \frac{1}{2 \pi \sqrt{1-\rho^2}}
 706 | \Exp{-\frac{x^2 + y^2 - 2\rho x y}{2 (1-\rho^2)}}
 707 | \]
 708 | 
 709 | Conditionals
 710 | \[
 711 | (Y\giv X=x) \dist \norm[\rho x,1-\rho^2] \qquad\text{and}\qquad
 712 | (X\giv Y=y) \dist \norm[\rho y,1-\rho^2]
 713 | \]
 714 | 
 715 | Independence
 716 | \[X \ind Y \eqv \rho = 0\]
 717 | 
 718 | \subsection{Bivariate Normal}
 719 | % - http://www.athenasc.com/Bivariate-Normal.pdf
 720 | % - http://mathworld.wolfram.com/BivariateNormalDistribution.html
 721 | 
 722 | Let $X\dist\norm[\mu_x,\sigma_x^2]$
 723 |   and $Y\dist\norm[\mu_y,\sigma_y^2]$.
 724 | \[f(x,y) = \frac{1}{2 \pi \sigma_x \sigma_y \sqrt{1-\rho^2}}
 725 | \Exp{-\frac{z}{2 (1-\rho^2)}}\]
 726 | \[ z =
 727 |   \left[
 728 |   \left(\frac{x-\mu_x}{\sigma_x}\right)^2
 729 |     + \left(\frac{y-\mu_y}{\sigma_y}\right)^2
 730 |     - 2\rho\left(\frac{x-\mu_x}{\sigma_x}\right)
 731 |       \left(\frac{y-\mu_y}{\sigma_y}\right)
 732 |   \right]
 733 | \]
 734 | 
 735 | Conditional mean and variance
 736 | \[\E{X\giv Y} = \E{X} + \rho\frac{\sigma_X}{\sigma_Y}(Y-\E{Y})\]
 737 | \[\V{X\giv Y} = \sigma_X \sqrt{1-\rho^2}\]
 738 | 
 739 | \subsection{Multivariate Normal}
 740 | 
 741 | Covariance matrix $\Sigma$ \quad (Precision matrix $\Sigma^{-1}$)
 742 | \[\Sigma =
 743 |   \begin{pmatrix}
 744 |   \V{X_1} & \cdots & \cov{X_1,X_k} \\
 745 |   \vdots & \ddots & \vdots \\
 746 |   \cov{X_k,X_1} & \cdots & \V{X_k}
 747 |   \end{pmatrix}\]
 748 | 
 749 | If $X \dist \norm[\mu,\Sigma]$,
 750 | \[f_X(x) = (2\pi)^{-n/2} \left|\Sigma\right|^{-1/2}
 751 | \Exp{-\frac{1}{2}(x-\mu)^T\Sigma^{-1}(x-\mu)} \]
 752 | 
 753 | Properties
 754 | \begin{itemize}
 755 |   \item $Z \dist \norm[0,1] \wedge X = \mu+\Sigma^{1/2}Z
 756 |     \imp X \dist \norm[\mu,\Sigma]$
 757 |   \item $X \dist \norm[\mu,\Sigma] \imp \Sigma^{-1/2}(X-\mu) \dist \norm[0,1]$
 758 |   \item $X \dist \norm[\mu,\Sigma] \imp AX \dist \norm[A\mu, A\Sigma A^T]$
 759 |   \item $X \dist \norm[\mu,\Sigma] \wedge \|a\| = k
 760 |     \imp a^TX \dist \norm[a^T\mu, a^T\Sigma a]$
 761 | \end{itemize}
 762 | 
 763 | \section{Convergence}
 764 | 
 765 | Let $\{X_1,X_2,\ldots\}$ be a sequence of \rv's and let $X$ be another \rv.
 766 | Let $F_n$ denote the \cdf of $X_n$ and let $F$ denote the \cdf of $X$.
 767 | 
 768 | Types of Convergence
 769 | \begin{enumerate}
 770 |   \item In distribution (weakly, in law): $X_n \dconv X$
 771 |     \[\limn F_n(t) = F(t) \qquad
 772 |       \forall t \text{ where } F \text{ continuous}\]
 773 |   \item In probability: $X_n \pconv X$
 774 |     \[(\forall \varepsilon > 0) \;
 775 |     \lim_{n\to\infty} \Pr{|X_n -X| > \varepsilon} = 0\]
 776 |   \item Almost surely (strongly): $X_n \asconv X$
 777 |     \[\Pr{\limn X_n=X} = \Pr{\omega\in\Omega: \limn X_n(\omega)=X(\omega)}=1\]
 778 |   \item In quadratic mean ($L_2$): $X_n \qmconv X$
 779 |     \[\lim_{n\to\infty} \E{(X_n - X)^2} = 0\]
 780 | \end{enumerate}
 781 | 
 782 | Relationships
 783 | \begin{itemize}
 784 |   \item $X_n \qmconv X \imp X_n \pconv X \imp X_n \dconv X$
 785 |   \item $X_n \asconv X \imp X_n \pconv X$
 786 |   \item $X_n \dconv X \wedge (\exists c \in \R) \; \Pr{X=c} = 1
 787 |     \imp X_n \pconv X$
 788 |   \item $X_n \pconv X \wedge Y_n \pconv Y
 789 |     \imp X_n + Y_n \pconv X + Y$
 790 |   \item $X_n \qmconv X \wedge Y_n \qmconv Y
 791 |     \imp X_n + Y_n \qmconv X + Y$
 792 |   \item $X_n \pconv X \wedge Y_n \pconv Y
 793 |     \imp X_nY_n \pconv XY$
 794 |   \item $X_n \pconv X \imp \transform(X_n) \pconv \transform(X)$
 795 |   \item $X_n \dconv X \imp \transform(X_n) \dconv \transform(X)$
 796 |   \item $X_n \qmconv b \eqv \lim_{n\to\infty} \E{X_n}=b
 797 |     \wedge \lim_{n\to\infty} \V{X_n} = 0$
 798 |   \item $X_1,\dots,X_n\; \iid \wedge \E{X}=\mu \wedge \V{X}<\infty
 799 |     \eqv \samplemean \qmconv \mu$
 800 | \end{itemize}
 801 | 
 802 | \textsc{Slutzky's Theorem}
 803 | \begin{itemize}
 804 |   \item $X_n \dconv X \text{ and } Y_n \pconv c
 805 |     \imp X_n + Y_n \dconv X + c$
 806 |   \item $X_n \dconv X \text{ and } Y_n \pconv c
 807 |     \imp X_nY_n \dconv cX$
 808 |   \item In general: $X_n \dconv X \text{ and } Y_n \dconv Y
 809 |     \nimp X_n + Y_n \dconv X + Y$
 810 | \end{itemize}
 811 | 
 812 | \subsection{Law of Large Numbers (LLN)}
 813 | 
 814 | Let $\{X_1,\ldots,X_n\}$ be a sequence of \iid \rv's, $\E{X_1}=\mu$.
 815 | 
 816 | Weak (WLLN)
 817 | \[\samplemean \pconv \mu \qquad n\to\infty\]
 818 | 
 819 | Strong (SLLN)
 820 | \[\samplemean \asconv \mu \qquad n\to\infty\]
 821 | 
 822 | \subsection{Central Limit Theorem (CLT)}
 823 | 
 824 | Let $\{X_1,\ldots,X_n\}$ be a sequence of \iid \rv's, $\E{X_1}=\mu$, and
 825 | $\V{X_1} = \sigma^2$.\\
 826 | 
 827 | \[ Z_n
 828 |   := \displaystyle\frac{\samplemean-\mu}{\sqrt{\V{\samplemean}}}
 829 |   = \displaystyle\frac{\sqrt{n}(\samplemean - \mu)}{\sigma}
 830 |   \dconv Z \qquad \text{where } Z\dist \norm[0,1]\]
 831 | \[\lim_{n\to\infty} \Pr{Z_n \le z} = \Phi(z) \qquad z \in \mathbb R\]
 832 | 
 833 | CLT notations
 834 | \begin{align*}
 835 | Z_n &\approx \norm[0,1] \\
 836 | \samplemean &\approx \norm[\mu,\frac{\sigma^2}{n}] \\
 837 | \samplemean - \mu &\approx \norm[0,\frac{\sigma^2}{n}] \\
 838 | \sqrt{n}(\samplemean - \mu) &\approx \norm[0,\sigma^2] \\
 839 | \frac{\sqrt{n}(\samplemean - \mu)}{\sigma} &\approx \norm[0,1] \\
 840 | \end{align*}
 841 | 
 842 | Continuity correction
 843 | \[\Pr{\samplemean \le x} \approx
 844 |   \Phi\left(\displaystyle\frac{x+\frac{1}{2}-\mu}{\sigma/\sqrt{n}}\right)\]
 845 | \[\Pr{\samplemean \ge x} \approx
 846 |   1-\Phi\left(\displaystyle\frac{x-\frac{1}{2}-\mu}{\sigma/\sqrt{n}}\right)\]
 847 | 
 848 | Delta method
 849 | \[Y_n \approx \norm[\mu,\frac{\sigma^2}{n}] \imp
 850 | \transform(Y_n) \approx
 851 | \norm[\transform(\mu),
 852 |   \left(\transform'(\mu)\right)^2\frac{\sigma^2}{n}]\]
 853 | 
 854 | \section{Statistical Inference}
 855 | 
 856 | Let $X_1,\cdots,X_n \distiid F$ if not otherwise noted.
 857 | 
 858 | \subsection{Point Estimation}
 859 | 
 860 | \begin{itemize}
 861 |   \item Point estimator $\that_n$ of $\theta$ is a \rv:
 862 |     $\that_n = g(X_1,\dots,X_n)$
 863 |   \item $\bias(\that_n) = \E{\that_n}-\theta$
 864 |   \item Consistency: $\that_n \pconv \theta$
 865 |   \item Sampling distribution: $F(\that_n)$
 866 |   \item Standard error: $\se(\that_n) = \sqrt{\V{\that_n}}$
 867 |   \item Mean squared error: $\mse = \E{(\that_n-\theta)^2}
 868 |     = \bias(\that_n)^2 + \V{\that_n}$
 869 |   \item $\limn \bias(\that_n) = 0 \wedge \limn\se(\that_n) = 0
 870 |     \imp \that_n$ is consistent
 871 |   \item Asymptotic normality:
 872 |     $\displaystyle\frac{\that_n-\theta}{\se} \dconv \norm[0,1]$
 873 |   \item \textsc{Slutzky's Theorem} often lets us replace $\se(\that_n)$ by some
 874 |     (weakly) consistent estimator $\shat_n$.
 875 | \end{itemize}
 876 | 
 877 | \subsection{Normal-Based Confidence Interval}
 878 | 
 879 | Suppose $\that_n \approx \norm[\theta,\sehat^2]$.
 880 | Let $\zat = \Phi^{-1}(1-(\alpha/2))$,
 881 | i.e., $\Pr{Z > \zat} = \alpha/2$ and $\Pr{-\zat < Z < \zat} = 1-\alpha$
 882 | where $Z\dist\norm[0,1]$.
 883 | Then \[C_n = \that_n \pm \zat\sehat\]
 884 | 
 885 | \subsection{Empirical distribution}
 886 | 
 887 | Empirical Distribution Function (ECDF)
 888 | \[\Fnhat(x) = \displaystyle\frac{\sumin I(X_i \le x)}{n}\]
 889 | \[I(X_i \le x) = \begin{cases}
 890 |   1 & X_i \le x \\
 891 |   0 & X_i > x
 892 | \end{cases}\]
 893 | 
 894 | Properties (for any fixed $x$)
 895 | \begin{itemize}
 896 |   \item $\E{\Fnhat} = F(x)$
 897 |   \item $\V{\Fnhat} = \displaystyle\frac{F(x)(1-F(x))}{n}$
 898 |   \item $\mse = \displaystyle\frac{F(x)(1-F(x))}{n} \dconv 0$
 899 |   \item $\Fnhat \pconv F(x)$
 900 | \end{itemize}
 901 | 
 902 | \textsc{Dvoretzky-Kiefer-Wolfowitz} (DKW) inequality ($X_1,\dots,X_n\dist F$)
 903 | \[\Pr{\sup_x\left|F(x)-\Fnhat(x)\right| > \varepsilon} =
 904 |   2e^{-2n\varepsilon^2}\]
 905 | 
 906 | Nonparametric $1-\alpha$ confidence band for $F$
 907 | \begin{align*}
 908 |   L(x) &= \max\{\Fnhat-\epsilon_n, 0\} \\
 909 |   U(x) &= \min\{\Fnhat+\epsilon_n, 1\} \\
 910 |   \epsilon &=
 911 |     \sqrt{\displaystyle\frac{1}{2n}\log\left( \frac{2}{\alpha} \right)} \\
 912 | \end{align*}
 913 | \[\Pr{L(x) \le F(x) \le U(x) \;\forall x} \ge 1-\alpha\]
 914 | 
 915 | \subsection{Statistical Functionals}
 916 | 
 917 | \begin{itemize}
 918 |   \item Statistical functional: $T(F)$
 919 |   \item Plug-in estimator of $\theta = (F)$: $\that_n = T(\Fnhat)$
 920 |   \item Linear functional: $T(F) = \int \transform(x)\dfx$
 921 |   \item Plug-in estimator for linear functional: \\
 922 |     \[T(\Fnhat)
 923 |       = \displaystyle\int \transform(x)\dfhatx
 924 |       = \frac{1}{n}\sumin \transform(X_i)\]
 925 |   \item Often: $T(\Fnhat) \approx \norm[T(F),\sehat^2]$ \imp
 926 |     $T(\Fnhat) \pm \zat\sehat$
 927 |   \item $p^\mathrm{th}$ quantile: $F^{-1}(p) = \inf\{x:F(x) \ge p\}$
 928 |   \item $\mhat = \samplemean$
 929 |   \item $\shat^2 = \displaystyle\frac{1}{n-1}\sumin
 930 |     (X_i-\samplemean)^2$
 931 |   \item  $\khat =
 932 |     \displaystyle\frac{\frac{1}{n}\sumin(X_i-\mhat)^3}{\shat^3}$
 933 |   \item $\rhohat = \displaystyle\frac{\sumin(X_i-\samplemean)(Y_i-\bar{Y}_n)}%
 934 |     {\sqrt{\sumin(X_i-\samplemean)^2}\sqrt{\sumin(Y_i-\bar{Y}_n)^2}}$
 935 | \end{itemize}
 936 | 
 937 | \section{Parametric Inference}
 938 | 
 939 | Let $\mathfrak{F} = \bigl\{ f(x;\theta) : \theta\in\Theta \bigr\}$ be a
 940 | parametric model with parameter space $\Theta \subset \R^k$ and parameter
 941 | $\theta = (\theta_1,\dots,\theta_k)$.
 942 | 
 943 | \subsection{Method of Moments}
 944 | 
 945 | $j^{\mathrm{th}}$ moment
 946 | \[\alpha_j(\theta) = \E{X^j} = \displaystyle\int x^j \dfx\]
 947 | 
 948 | $j^{\mathrm{th}}$ sample moment
 949 | \[\ahat_j = \displaystyle\frac{1}{n}\sumin X_i^j\]
 950 | 
 951 | Method of Moments estimator (MoM)
 952 | \begin{align*}
 953 |   \alpha_1(\theta) &= \ahat_1 \\
 954 |   \alpha_2(\theta) &= \ahat_2 \\
 955 |   \vdots &= \vdots \\
 956 |   \alpha_k(\theta) &= \ahat_k
 957 | \end{align*}
 958 | 
 959 | \begin{titemize}{Properties of the MoM estimator}
 960 |   \item $\that_n$ exists with probability tending to 1
 961 |   \item Consistency: $\that_n \pconv \theta$
 962 |   \item Asymptotic normality:
 963 |     \[\sqrt{n}(\that-\theta) \dconv \norm[0,\Sigma]\]
 964 |     where $\Sigma = g\E{YY^T}g^T$, $Y = (X,X^2,\dots,X^k)^T$,\\
 965 |     $g = (g_1,\dots,g_k)$ and
 966 |     $g_j = \frac{\partial}{\partial\theta}\alpha_j^{-1}(\theta)$
 967 | \end{titemize}
 968 | 
 969 | \subsection{Maximum Likelihood}
 970 | 
 971 | Likelihood: $\Lln : \Theta \to [0,\infty)$
 972 |   \[\Lln(\theta) = \displaystyle\prodin f(X_i;\theta)\] \\
 973 | Log-likelihood
 974 |   \[\lln(\theta) = \log \Lln(\theta) = \sumin \log f(X_i;\theta)\]
 975 | Maximum likelihood estimator (\mle)
 976 | \[\Lln(\that_n) = \sup_\theta \Lln(\theta)\]
 977 | 
 978 | Score function
 979 | \[s(X;\theta) = \frac{\partial}{\partial\theta}\log f(X;\theta)\]
 980 | 
 981 | Fisher information
 982 | \[I(\theta) = \V[\theta]{s(X;\theta)}\]
 983 | \[I_n(\theta) = nI(\theta)\]
 984 | Fisher information (exponential family)
 985 | \[I(\theta) = \E[\theta]{-\frac{\partial}{\partial\theta} s(X;\theta)}\]
 986 | Observed Fisher information
 987 | \[I_n^{obs}(\theta)
 988 |   = -\frac{\partial^2}{\partial\theta^2} \sumin\log f(X_i;\theta)\]
 989 | 
 990 | Properties of the \mle
 991 | \begin{itemize}
 992 |   \item Consistency: $\that_n \pconv \theta$
 993 |   \item Equivariance:
 994 |     $\that_n$ is the \mle
 995 |     \imp $\transform(\that_n)$ is the \mle of $\transform(\theta)$
 996 |   \item Asymptotic optimality (or efficiency), i.e., smallest variance for
 997 |     large samples. If $\ttil_n$ is any other estimator, the asymptotic relative
 998 |     efficiency is:
 999 |     \begin{enumerate}
1000 |       \item $\se \approx \sqrt{1/I_n(\theta)}$
1001 |         \[\frac{(\that_n - \theta)}{\se} \dconv \norm[0,1]\]
1002 |       \item $\sehat \approx \sqrt{1/I_n(\that_n)}$
1003 |         \[\frac{(\that_n - \theta)}{\sehat} \dconv \norm[0,1]\]
1004 |     \end{enumerate}
1005 |   \item Asymptotic optimality
1006 |     \[\are(\ttil_n, \that_n)
1007 |       = \frac{\V{\that_n}}{\V{\ttil_n}}
1008 |       \le 1\]
1009 |   \item Approximately the Bayes estimator
1010 | \end{itemize}
1011 | 
1012 | \subsubsection{Delta Method}
1013 | If $\tau=\transform(\that)$
1014 | where \transform is differentiable and $\transform'(\theta) \neq 0$:
1015 | \[\frac{(\widehat{\tau}_n-\tau)}{\sehat(\widehat{\tau})} \dconv \norm[0,1]\]
1016 | where $\widehat{\tau} = \transform(\that)$
1017 | is the \mle of $\tau$ and
1018 | \[\sehat = \left|\transform'(\that)\right|\sehat(\that_n)\]
1019 | 
1020 | \subsection{Multiparameter Models}
1021 | 
1022 | Let $\theta=(\theta_1,\dots,\theta_k)$
1023 | and $\that= (\that_1,\dots,\that_k)$ be the \mle.
1024 | 
1025 | \[H_{jj} = \frac{\partial^2 \lln}{\partial\theta^2} \qquad
1026 | H_{jk} = \frac{\partial^2 \lln}{\partial\theta_j\partial\theta_k} \qquad\]
1027 | 
1028 | Fisher information matrix
1029 | \[I_n(\theta) = -\begin{bmatrix}
1030 |   \E[\theta]{H_{11}} & \cdots & \E[\theta]{H_{1k}} \\
1031 |   \vdots & \ddots & \vdots \\
1032 |   \E[\theta]{H_{k1}} & \cdots & \E[\theta]{H_{kk}}
1033 | \end{bmatrix}\]
1034 | 
1035 | Under appropriate regularity conditions
1036 | \[(\that-\theta) \approx \norm[0,J_n]\]
1037 | with $J_n(\theta) = I_n^{-1}$.
1038 | Further, if $\that_j$ is the $j^{\mathrm{th}}$ component of $\theta$, then
1039 | \[\frac{(\that_j-\theta_j)}{\sehat_j} \dconv \norm[0,1]\] where $\sehat_j^2 =
1040 | J_n(j,j)$ and $\cov{\that_j,\that_k} = J_n(j,k)$
1041 | 
1042 | \subsubsection{Multiparameter delta method}
1043 | 
1044 | Let $\tau = \transform(\theta_1,\dots,\theta_k)$
1045 | and let the gradient of \transform be
1046 | \[\nabla\transform = \begin{pmatrix}
1047 |   \displaystyle\frac{\partial\transform}{\partial\theta_1} \\
1048 |   \vdots \\
1049 |   \displaystyle\frac{\partial\transform}{\partial\theta_k} \\
1050 | \end{pmatrix}\]
1051 | Suppose $\bigl.\nabla\transform\bigr|_{\theta=\that} \neq 0$ and
1052 | $\widehat{\tau} = \transform(\that)$. Then,
1053 | \[\frac{(\widehat{\tau}-\tau)}{\sehat(\widehat{\tau})} \dconv \norm[0,1]\]
1054 | where
1055 | \[\sehat(\widehat{\tau}) = \sqrt{\left( \widehat{\nabla}\transform \right)^T
1056 | \widehat{J}_n\left( \widehat{\nabla}\transform \right)}\]
1057 | and $\widehat{J}_n = J_n(\that)$ and $\widehat{\nabla}\transform =
1058 | \bigl.\nabla\transform\bigr|_{\theta=\that}$.
1059 | 
1060 | \subsection{Parametric Bootstrap}
1061 | 
1062 | Sample from $f(x;\that_n)$ instead of from $\Fnhat$, where $\that_n$ could be
1063 | the \mle or method of moments estimator.
1064 | 
1065 | \section{Hypothesis Testing}
1066 | 
1067 | \[H_0:\theta\in\Theta_0 \qquad\text{versus}\qquad H_1:\theta\in\Theta_1\]
1068 | 
1069 | Definitions
1070 | \begin{itemize}
1071 |   \item Null hypothesis $H_0$
1072 |   \item Alternative hypothesis $H_1$
1073 |   \item Simple hypothesis $\theta = \theta_0$
1074 |   \item Composite hypothesis $\theta > \theta_0$ or $\theta < \theta_0$
1075 |   \item Two-sided test:
1076 |     $H_0:\theta=\theta_0 \quad\text{versus}\quad H_1:\theta\neq\theta_0$
1077 |   \item One-sided test:
1078 |     $H_0:\theta\le\theta_0 \quad\text{versus}\quad H_1:\theta>\theta_0$
1079 | %  \[H_0:\theta\ge\theta_0 \qquad\text{versus}\qquad H_1:\theta<\theta_0\]
1080 |   \item Critical value $c$
1081 |   \item Test statistic $T$
1082 |   \item Rejection region $R = \left\{ x: T(x) > c \right\}$
1083 |   \item Power function $\beta(\theta) = \Pr{X \in R}$
1084 |   \item Power of a test: $1 - \Pr{\text{Type II error}} = 1-\beta
1085 |     = \displaystyle\inf_{\theta \in \Theta_1} \beta(\theta)$
1086 |   \item Test size: $\alpha = \Pr{\text{Type I error}}
1087 |     = \displaystyle\sup_{\theta\in\Theta_0}\beta(\theta)$
1088 | \end{itemize}
1089 | 
1090 | \centering
1091 | \begin{tabular}{l|cc}
1092 |   & \textsf{Retain} $H_0$ & \textsf{Reject} $H_0$ \\
1093 |   \hline
1094 |   $H_0$ \textsf{true} & $\surd$ & Type I Error ($\alpha$)\\
1095 |   $H_1$ \textsf{true} & Type II Error ($\beta$) &
1096 |   $\surd$ (power) \\
1097 | \end{tabular}
1098 | 
1099 | \raggedright
1100 | p-value
1101 | \begin{itemize}
1102 |   \item p-value $= \sup_{\theta\in\Theta_0} \Pr[\theta]{T(X) \ge T(x)}
1103 |                       = \inf\bigl\{ \alpha: T(x) \in R_\alpha \bigr\}$
1104 |   \item p-value $= \sup_{\theta\in\Theta_0}
1105 |     \underbrace{\Pr[\theta]{T(X^\star) \ge T(X)}}_{1-F_\theta(T(X))
1106 |           \quad \text{since } T(X^\star) \dist F_\theta}
1107 |                       = \inf\bigl\{ \alpha: T(X) \in R_\alpha \bigr\}$
1108 | \end{itemize}
1109 | 
1110 | \centering
1111 | \begin{tabular}{ll}
1112 |   \textsf{p-value} & \textsf{evidence} \\
1113 |   \hline
1114 |   $< 0.01$      & very strong evidence against $H_0$ \\
1115 |   $0.01 - 0.05$ & strong evidence against $H_0$ \\
1116 |   $0.05 - 0.1$  & weak evidence against $H_0$ \\
1117 |   $> 0.1$       & little or no evidence against $H_0$ \\
1118 | \end{tabular}
1119 | 
1120 | \raggedright
1121 | Wald test
1122 | \begin{itemize}
1123 |   \item Two-sided test
1124 |   \item Reject $H_0$ when $|W| > \zat$ where
1125 |     $W = \displaystyle\frac{\that - \theta_0}{\sehat}$
1126 |   \item $\Pr{|W| > \zat} \conv \alpha$
1127 |   \item p-value $= \Pr[\theta_0]{|W| > |w|}
1128 |                  \approx \Pr{|Z| > |w|}
1129 |                  = 2\Phi(-|w|)$
1130 | \end{itemize}
1131 | 
1132 | Likelihood ratio test
1133 | \begin{itemize}
1134 |  \item $T(X) = \displaystyle\frac{\sup_{\theta\in\Theta}\Lln(\theta)}%
1135 |                                  {\sup_{\theta\in\Theta_0}\Lln(\theta)}
1136 |              = \frac{\Lln(\that_n)}{\Lln(\that_{n,0})}$
1137 |  \item $\lambda(X) = 2\log T(X) \dconv \chi_{r-q}^2$
1138 |    where $\displaystyle\sum_{i=1}^k Z_i^2 \dist \chi_k^2$ and
1139 |     $Z_1,\dots,Z_k \distiid \norm[0,1]$
1140 |   \item p-value $= \Pr[\theta_0]{\lambda(X) > \lambda(x)}
1141 |                  \approx \Pr{\chi_{r-q}^2 > \lambda(x)}$
1142 | \end{itemize}
1143 | 
1144 | \begin{titemize}{Multinomial LRT}
1145 |   \item \mle:
1146 |     $\phat_n = \displaystyle \left(\frac{X_1}{n},\dots,\frac{X_k}{n}\right)$
1147 |  \item $T(X) = \displaystyle \frac{\Lln(\phat_n)}{\Lln(p_0)}
1148 |    = \prod_{j=1}^k \left( \frac{\phat_j}{p_{0j}} \right)^{X_j}$
1149 |  \item $\lambda(X) = \displaystyle 2\sum_{j=1}^k X_j \log
1150 |     \left( \frac{\phat_j}{p_{0j}} \right) \dconv \chi_{k-1}^2$
1151 |  \item The approximate size $\alpha$ LRT rejects $H_0$ when
1152 |   $\lambda(X) \ge \chi_{k-1,\alpha}^2$
1153 | \end{titemize}
1154 | 
1155 | Pearson Chi-square Test
1156 | \begin{itemize}
1157 |   \item $T = \displaystyle \sum_{j=1}^k \frac{(X_j-\E{X_j})^2}{\E{X_j}}$
1158 |     where $\E{X_j} = np_{0j}$ under $H_0$
1159 |   \item $T \dconv \chi_{k-1}^2$
1160 |   \item p-value $= \Pr{\chi_{k-1}^2 > T(x)}$
1161 |   \item Faster $\dconv X_{k-1}^2$ than LRT, hence preferable for small $n$
1162 | \end{itemize}
1163 | 
1164 | Independence testing
1165 | \begin{itemize}
1166 |   \item $I$ rows, $J$ columns,
1167 |     $\mathbf{X}$ multinomial sample of size $n=I*J$
1168 |   \item {\mle}s unconstrained: $\phat_{ij} = \frac{X_{ij}}{n}$
1169 |   \item {\mle}s under $H_0$:
1170 |     $\phat_{0ij} = \phat_{i\cdot}\phat_{\cdot j}
1171 |     = \frac{X_{i\cdot}}{n} \frac{X_{\cdot j}}{n}$
1172 |   \item LRT: $\lambda = 2\sum_{i=1}^I\sum_{j=1}^J X_{ij}
1173 |     \log\left( \frac{nX_{ij}}{X_{i\cdot}X_{\cdot j}}\right)$
1174 |   \item PearsonChiSq: $T = \sum_{i=1}^I\sum_{j=1}^J
1175 |     \frac{(X_{ij}-\E{X_{ij}})^2}{\E{X_{ij}}}$
1176 |   \item LRT and Pearson $\dconv \chisq{\nu}$,
1177 |     where $\nu=(I-1)(J-1)$
1178 | \end{itemize}
1179 | 
1180 | \section{Exponential Family}
1181 | 
1182 | Scalar parameter
1183 | \begin{align*}
1184 | f_X(x \giv \theta)
1185 | &= h(x) \Exp{\eta(\theta)T(x) - A(\theta)} \\
1186 | &= h(x) g(\theta) \Exp{\eta(\theta)T(x)}
1187 | \end{align*}
1188 | 
1189 | Vector parameter
1190 | \begin{align*}
1191 | f_X(x \giv \theta)
1192 | &= h(x) \Exp{\sum_{i=1}^s\eta_i(\theta)T_i(x) - A(\theta)} \\
1193 | &= h(x) \Exp{\eta(\theta)\cdot T(x) - A(\theta)} \\
1194 | &= h(x) g(\theta)\Exp{\eta(\theta)\cdot T(x)}
1195 | \end{align*}
1196 | 
1197 | Natural form
1198 | \begin{align*}
1199 | f_X(x \giv \eta)
1200 | &= h(x) \Exp{\eta\cdot \mathbf{T}(x) - A(\eta)} \\
1201 | &= h(x) g(\eta) \Exp{\eta\cdot \mathbf{T}(x)} \\
1202 | &= h(x) g(\eta) \Exp{\eta^T \mathbf{T}(x)}
1203 | \end{align*}
1204 | 
1205 | \section{Bayesian Inference}
1206 | 
1207 | \textsc{Bayes' Theorem}
1208 | \[f(\theta\giv x)
1209 | = \frac{f(x\giv\theta)f(\theta)}{f(x^n)}
1210 | = \frac{f(x\giv\theta)f(\theta)}
1211 |        {\int f(x\giv\theta)f(\theta)\,d\theta} \propto \Lln(\theta)f(\theta)\]
1212 | 
1213 | Definitions
1214 | \begin{itemize}
1215 |   \item $X^n = (X_1,\ldots,X_n)$
1216 |   \item $x^n = (x_1,\ldots,x_n)$
1217 |   \item Prior density $f(\theta)$
1218 |   \item Likelihood $f(x^n \giv \theta)$: joint density of the data\\
1219 |     In particular, $X^n$ \iid \imp $f(x^n\giv\theta) =
1220 |     \displaystyle\prodin f(x_i\giv\theta) = \Lln(\theta)$
1221 |   \item Posterior density $f(\theta\giv x^n)$
1222 |   \item Normalizing constant $c_n = f(x^n)
1223 |     = \int f(x\giv\theta)f(\theta)\,d\theta$
1224 |   \item Kernel: part of a density that depends on $\theta$
1225 |   \item Posterior mean $\bar\theta_n
1226 |     = \int\theta f(\theta\giv x^n)\,d\theta
1227 |     = \frac{\int\theta\Lln(\theta)f(\theta)d\theta}
1228 |            {\int\Lln(\theta)f(\theta)\,d\theta}$
1229 | \end{itemize}
1230 | 
1231 | \subsection{Credible Intervals}
1232 | 
1233 | Posterior interval
1234 | \[\Pr{\theta\in (a,b)\giv x^n}
1235 | = \int_{a}^{b} f(\theta\giv x^n)\,d\theta
1236 | = 1-\alpha\]
1237 | 
1238 | Equal-tail credible interval
1239 | \[\int_{-\infty}^{a} f(\theta\giv x^n)\,d\theta
1240 | = \int_{b}^{\infty} f(\theta\giv x^n)\,d\theta = \alpha/2\]
1241 | 
1242 | Highest posterior density (HPD) region $R_n$
1243 | \begin{enumerate}
1244 |   \item $\Pr{\theta\in R_n} = 1-\alpha$
1245 |   \item $R_n = \left\{ \theta: f(\theta\giv x^n) > k \right\}$ for some $k$
1246 | \end{enumerate}
1247 | $R_n$ is unimodal \imp $R_n$ is an interval
1248 | 
1249 | \subsection{Function of parameters}
1250 | Let $\tau = \transform(\theta)$ and $A =
1251 | \left\{ \theta:\transform(\theta) \le \tau \right\}$.
1252 | 
1253 | Posterior CDF for $\tau$
1254 | \[H(r\giv x^n)
1255 | = \Pr{\transform(\theta) \le \tau\giv x^n}
1256 | = \int_A f(\theta \giv x^n)\,d\theta\]
1257 | Posterior density
1258 | \[h(\tau\giv x^n) = H'(\tau\giv x^n)\]
1259 | Bayesian delta method
1260 | \[\tau\giv X^n \approx \norm[\transform(\that),
1261 | \sehat\left|\transform'(\that)\right|]\]
1262 | 
1263 | \subsection{Priors}
1264 | 
1265 | Choice
1266 | \begin{itemize}
1267 |   \item Subjective Bayesianism: prior should incorporate as much detail as
1268 |     possible the research's a priori knowledge---via \emph{prior elicitation}
1269 |   \item Objective Bayesianism: prior should incorporate as little detail as
1270 |     possible (\emph{non-informative} prior)
1271 |   \item Robust Bayesianism: consider various priors and determine
1272 |     \emph{sensitivity} of our inferences to changes in the prior
1273 | \end{itemize}
1274 | 
1275 | Types
1276 | \begin{itemize}
1277 |   \item Flat: $f(\theta) \propto constant$
1278 |   \item Proper: $\int_{-\infty}^{\infty} f(\theta)\,d\theta = 1$
1279 |   \item Improper: $\int_{-\infty}^{\infty} f(\theta)\,d\theta = \infty$
1280 |   \item \textsc{Jeffrey}'s Prior (transformation-invariant):
1281 |     \[f(\theta) \propto \sqrt{I(\theta)} \qquad
1282 |     f(\theta) \propto \sqrt{\det(I(\theta))}\]
1283 |   \item Conjugate: $f(\theta)$ and $f(\theta\giv x^n)$
1284 |     belong to the same parametric family
1285 | \end{itemize}
1286 | 
1287 | \subsubsection{Conjugate Priors}
1288 | 
1289 | \begin{tabular}{|l|p{.23\columnwidth}|p{.45\columnwidth}|}
1290 |   \hline
1291 |   \multicolumn{3}{|c|}{Continuous likelihood (subscript $c$ denotes constant)}\\
1292 |   \hline && \\[-2ex]
1293 |   Likelihood & Conjugate prior &
1294 |     \text{Posterior hyperparameters} \\[1ex]
1295 | 
1296 |   \hline && \\[-2ex]
1297 | 
1298 |   $\unif[0,\theta]$ & Pareto($x_m,k$) &
1299 |   $\displaystyle\max\left\{ x_{(n)}, x_m \right\}, k+n$ \\
1300 | 
1301 |   $\ex[\lambda]$ & $\gam[\alpha,\beta]$ &
1302 |   $\alpha + n, \beta + \displaystyle\sumin x_i$\\[3ex]
1303 | 
1304 |   $\norm[\mu,\sigma_c^2]$ & $\norm[\mu_0,\sigma_0^2]$ &
1305 |   $\displaystyle
1306 |   \left(\frac{\mu_0}{\sigma_0^2} + \frac{\sumin x_i}{\sigma_c^2} \right) /
1307 |   \left( \frac{1}{\sigma_0^2} + \frac{n}{\sigma_c^2} \right)$,
1308 |   $\displaystyle\left(\frac{1}{\sigma_0^2} + \frac{n}{\sigma_c^2}\right)^{-1}$
1309 |   \\[2ex]
1310 | 
1311 |   $\norm[\mu_c,\sigma^2]$ & Scaled Inverse Chi-square($\nu,\sigma_0^2$) &
1312 |   $\nu + n$, $\displaystyle \frac{\nu\sigma_0^2 + \sumin(x_i-\mu)^2}{\nu + n}$
1313 |   \\[4ex]
1314 | 
1315 |   $\norm[\mu,\sigma^2]$ &
1316 |   Normal-scaled Inverse Gamma($\lambda,\nu,\alpha,\beta$) &
1317 |   $\displaystyle\frac{\nu\lambda+n\xbar}{\nu+n}$, \qquad $\nu+n$, \qquad
1318 |   $\displaystyle \alpha + \frac{n}{2}$, \qquad
1319 |   $\displaystyle \beta + \frac{1}{2}\sumin(x_i-\xbar)^2 +
1320 |   \frac{\gamma(\xbar-\lambda)^2}{2(n+\gamma)}$
1321 |   \\[4ex]
1322 | 
1323 |   MVN($\mu,\Sigma_c$) & MVN($\mu_0,\Sigma_0$) &
1324 |   $\displaystyle\left( \Sigma_0^{-1}+ n\Sigma_c^{-1} \right)^{-1}
1325 |   \left( \Sigma_0^{-1}\mu_0 + n\Sigma^{-1} \xbar \right)$,
1326 |   $\displaystyle\left( \Sigma_0^{-1} + n \Sigma_c^{-1} \right)^{-1}$ \\[1ex]
1327 | 
1328 |   MVN($\mu_c,\Sigma$) & Inverse-Wishart($\kappa,\Psi$) &
1329 |   $\displaystyle n + \kappa, \Psi + \sumin(x_i-\mu_c)(x_i-\mu_c)^T$\\
1330 | 
1331 |   Pareto($x_{m_c}, k$) & $\gam[\alpha,\beta]$ &
1332 |   $\displaystyle\alpha+n, \beta + \sumin \log\frac{x_i}{x_{m_c}}$ \\
1333 | 
1334 |   Pareto($x_m, k_c$) & Pareto($x_0, k_0$) &
1335 |   $\displaystyle x_0, k_0 - kn$ where $k_0 > kn$ \\
1336 | 
1337 |   $\gam[\alpha_c,\beta]$ & $\gam[\alpha_0,\beta_0]$ &
1338 |   $\displaystyle \alpha_0 + n\alpha_c, \beta_0 + \sumin x_i$ \\[3ex]
1339 | 
1340 |   \hline
1341 | \end{tabular}
1342 | 
1343 | \vfill~
1344 | \columnbreak
1345 | 
1346 | \begin{tabular}{*3{|>{\begin{math}\displaystyle}l<{\end{math}}}|}
1347 |   \hline
1348 |   \multicolumn{3}{|c|}{Discrete likelihood}\\
1349 |   \hline && \\[-2ex]
1350 |   \text{Likelihood} & \text{Conjugate prior} &
1351 |     \text{Posterior hyperparameters} \\[1ex]
1352 | 
1353 |   \hline && \\[-2ex]
1354 | 
1355 |   \bern[p] & \bet[\alpha,\beta] &
1356 |   \alpha + \sumin x_i, \beta + n - \sumin x_i \\
1357 | 
1358 |   \bin[p] & \bet[\alpha,\beta] &
1359 |   \alpha + \sumin x_i, \beta + \sumin N_i - \sumin x_i \\
1360 | 
1361 |   \nbin[p] & \bet[\alpha,\beta] & \alpha + rn, \beta + \sumin x_i \\
1362 | 
1363 |   \pois[\lambda] & \gam[\alpha,\beta] & \alpha + \sumin x_i, \beta + n \\
1364 | 
1365 |   \text{Multinomial}(p) & \dir[\alpha] & \alpha + \sumin x^{(i)} \\
1366 | 
1367 |   \geo[p] & \bet[\alpha,\beta] & \alpha + n, \beta + \sumin x_i \\[3ex]
1368 | 
1369 |   \hline
1370 | \end{tabular}
1371 | 
1372 | \subsection{Bayesian Testing}
1373 | If $H_0:\theta \in \Theta_0$:
1374 | \begin{align*}
1375 |   \text{Prior probability } \Pr{H_0}
1376 |     &= \int_{\Theta_0} f(\theta)\,d\theta\\
1377 |   \text{Posterior probability } \Pr{H_0\giv x^n}
1378 |     &= \int_{\Theta_0} f(\theta\giv x^n)\,d\theta\\
1379 | \end{align*}
1380 | 
1381 | Let $H_0{\ldots}H_{k-1}$ be $k$ hypotheses.
1382 | Suppose $\theta \dist f(\theta\giv H_k)$,
1383 | \[\Pr{H_k\giv x^n}
1384 | = \frac{f(x^n\giv H_k)\Pr{H_k}}{\sum_{k=1}^K f(x^n\giv H_k)\Pr{H_k}},\]
1385 | 
1386 | Marginal likelihood
1387 | \[f(x^n\giv H_i)
1388 | = \int_\Theta f(x^n\giv \theta,H_i)f(\theta\giv H_i) \,d\theta\]
1389 | 
1390 | Posterior odds (of $H_i$ relative to $H_j$)
1391 | \[\frac{\Pr{H_i\giv x^n}}{\Pr{H_j\giv x^n}} \quad
1392 | = \underbrace{\frac{f(x^n\giv H_i)}{f(x^n\giv H_j)}}%
1393 |   _{\text{Bayes Factor }BF_{ij}}
1394 | \times \;\underbrace{\frac{\Pr{H_i}}{\Pr{H_j}}}_{\text{prior odds}}\]
1395 | 
1396 | \columnbreak
1397 | Bayes factor
1398 | 
1399 | \centering
1400 | \begin{tabular}{lll}
1401 |   $\log_{10} BF_{10}$ & $BF_{10}$ & \textsf{evidence} \\
1402 |   \toprule
1403 |   $0 - 0.5$ & $1 - 1.5$   & Weak \\
1404 |   $0.5 - 1$ & $1.5 - 10$  & Moderate \\
1405 |   $1 - 2$   & $10 - 100$  & Strong \\
1406 |   $> 2$     & $> 100$     & Decisive \\
1407 | \end{tabular}
1408 | 
1409 | \vspace*{2ex}
1410 | 
1411 | $p^* = \displaystyle\frac{\frac{p}{1-p}BF_{10}}{1+\frac{p}{1-p}BF_{10}}$
1412 | where $p=\Pr{H_1}$ and $p^* = \Pr{H_1 \giv x^n}$
1413 | 
1414 | \raggedright
1415 | 
1416 | \section{Sampling Methods}
1417 | 
1418 | \subsection{Inverse Transform Sampling}
1419 | 
1420 | Setup
1421 | \begin{itemize}
1422 |   \item $U \dist \unif[0,1]$
1423 |   \item $X \dist F$
1424 |   \item $F^{-1}(u)= \inf\{ x \mid F(x) \ge u\}$
1425 | \end{itemize}
1426 | 
1427 | Algorithm
1428 | \begin{enumerate}
1429 |   \item Generate $u \dist \unif[0,1]$
1430 |   \item Compute $x = F^{-1}(u)$
1431 | \end{enumerate}
1432 | 
1433 | \subsection{The Bootstrap}
1434 | 
1435 | Let $T_n = g(X_1,\dots,X_n)$ be a statistic.
1436 | \begin{enumerate}
1437 |   \item Estimate $\V[F]{T_n}$ with $\V[\Fnhat]{T_n}$.
1438 |   \item Approximate $\V[\Fnhat]{T_n}$ using simulation:
1439 |     \begin{enumerate}
1440 |       \item Repeat the following $B$ times to get $T_{n,1}^*,\dots,T_{n,B}^*$,
1441 |         an \iid sample from the sampling distribution implied by $\Fnhat$
1442 |         \begin{enumerate}
1443 |           \item Sample uniformly $X_1^*,\dots,X_n^* \dist \Fnhat$.
1444 |           \item Compute $T_n^* = g(X_1^*,\dots,X_n^*)$.
1445 |         \end{enumerate}
1446 |       \item Then
1447 |         \[v_{boot} = \widehat{\mathbb{V}}_{\Fnhat} =
1448 |           \displaystyle\frac{1}{B} \sum_{b=1}^B
1449 |             \left(T_{n,b}^* - \frac{1}{B} \sum_{r=1}^B T_{n,r}^* \right)^2\]
1450 |     \end{enumerate}
1451 | \end{enumerate}
1452 | 
1453 | \subsubsection{Bootstrap Confidence Intervals}
1454 | 
1455 | Normal-based interval
1456 | \[T_n \pm \zat\sehat_{boot}\]
1457 | 
1458 | Pivotal interval
1459 | \begin{enumerate}
1460 |   \item Location parameter $\theta = T(F)$
1461 |   \item Pivot $R_n = \that_n - \theta$
1462 |   \item Let $H(r) = \Pr{R_n \le r}$ be the \cdf of $R_n$
1463 |   \item Let $R_{n,b}^* = \that_{n,b}^* - \that_n$.
1464 |     Approximate $H$ using bootstrap:
1465 |     \[\widehat{H}(r) = \frac{1}{B}\sum_{b=1}^B I(R_{n,b}^* \le r)\]
1466 |   \item $\theta_\beta^*$ = $\beta$ sample quantile of
1467 |     $(\that_{n,1}^*,\dots,\that_{n,B}^*)$
1468 |   \item $r_\beta^*$ = beta sample quantile of
1469 |     $(R_{n,1}^*,\dots,R_{n,B}^*)$, i.e.,
1470 |     $r_\beta^* = \theta_\beta^* - \that_n$
1471 |   \item Approximate $1-\alpha$ confidence interval
1472 |     $C_n = \left( \hat{a}, \hat{b} \right)$ where
1473 |     \begin{align*}
1474 |     \hat{a}
1475 |       &=& \that_n - \widehat{H}^{-1}\left( 1-\frac{\alpha}{2} \right)
1476 |       &=& \that_n - r_{1-\alpha/2}^*
1477 |       &=& 2\that_n - \theta_{1-\alpha/2}^* \\
1478 |     \hat{b}
1479 |       &=& \that_n - \widehat{H}^{-1}\left( \frac{\alpha}{2} \right)
1480 |       &=& \that_n - r_{\alpha/2}^*
1481 |       &=& 2\that_n - \theta_{\alpha/2}^*
1482 |     \end{align*}
1483 | \end{enumerate}
1484 | %  \[C_n = \left( 2\that_n - \that_{1-\alpha/2}^*,
1485 | %    2\that_n + \that_{\alpha/2}^* \right)\]
1486 | 
1487 | Percentile interval
1488 | \[C_n = \left( \theta_{\alpha/2}^*, \theta_{1-\alpha/2}^* \right)\]
1489 | 
1490 | \subsection{Rejection Sampling}
1491 | Setup
1492 | \begin{itemize}
1493 |   \item We can easily sample from $g(\theta)$
1494 |   \item We want to sample from $h(\theta)$, but it is difficult
1495 |   \item We know $h(\theta)$ up to a proportional constant:
1496 |     $h(\theta) = \displaystyle\frac{k(\theta)}{\int k(\theta)\,d\theta}$
1497 |   \item Envelope condition: we can find $M > 0$ such that
1498 |     $k(\theta) \le Mg(\theta) \quad \forall \theta$
1499 | \end{itemize}
1500 | 
1501 | Algorithm
1502 | \begin{enumerate}
1503 |   \item Draw $\theta^{cand} \dist g(\theta)$
1504 |   \item Generate $u \dist \unif[0,1]$
1505 |   \item Accept $\theta^{cand}$ if
1506 |     $u \le \displaystyle\frac{k(\theta^{cand})}{Mg(\theta^{cand})}$
1507 |   \item Repeat until $B$ values of $\theta^{cand}$ have been accepted
1508 | \end{enumerate}
1509 | 
1510 | Example
1511 | \begin{itemize}
1512 |   \item We can easily sample from the prior $g(\theta) = f(\theta)$
1513 |   \item Target is the posterior
1514 |     $h(\theta) \propto k(\theta) = f(x^n\giv \theta) f(\theta)$
1515 |   \item Envelope condition:
1516 |     $f(x^n\giv\theta) \le f(x^n\giv\that_n) = \Lln(\that_n)\equiv M$
1517 |   \item Algorithm
1518 |     \begin{enumerate}
1519 |       \item Draw $\theta^{cand} \dist f(\theta)$
1520 |       \item Generate $u \dist \unif[0,1]$
1521 |       \item Accept $\theta^{cand}$ if
1522 |         $u \le \displaystyle\frac{\Lln(\theta^{cand})}{\Lln(\that_n)}$
1523 |     \end{enumerate}
1524 | \end{itemize}
1525 | 
1526 | \subsection{Importance Sampling}
1527 | 
1528 | Sample from an importance function $g$ rather than target density $h$.\\
1529 | Algorithm to obtain an approximation to $\E{q(\theta) \giv x^n}$:
1530 | \begin{enumerate}
1531 |   \item Sample from the prior $\theta_1,\ldots,\theta_n \distiid f(\theta)$
1532 |   \item $w_i = \displaystyle\frac{\Lln(\theta_i)}{\sum_{i=1}^B \Lln(\theta_i)}
1533 |     \quad\forall i = 1,\ldots,B$
1534 |   \item $\E{q(\theta)\giv x^n} \approx \sum_{i=1}^B q(\theta_i)w_i$
1535 | \end{enumerate}
1536 | 
1537 | \section{Decision Theory}
1538 | 
1539 | Definitions
1540 | \begin{itemize}
1541 |   \item Unknown quantity affecting our decision: $\theta \in \Theta$
1542 |   \item Decision rule: synonymous for an estimator $\that$
1543 |   \item Action $a \in \mathcal{A}$: possible value of the decision rule. In the
1544 |     estimation context, the action is just an estimate of $\theta$, $\that(x)$.
1545 |   \item Loss function $L$: consequences of taking action $a$ when true state is
1546 |     $\theta$ or discrepancy between $\theta$ and $\that$,
1547 |     $L: \Theta \times \mathcal{A} \to [-k,\infty).$
1548 | \end{itemize}
1549 | 
1550 | Loss functions
1551 | \begin{itemize}
1552 |   \item Squared error loss: $L(\theta,a) = (\theta-a)^2$
1553 |   \item Linear loss: $L(\theta,a) = \begin{cases}
1554 |       K_1(\theta-a) & a-\theta < 0 \\
1555 |       K_2(a-\theta) & a-\theta \ge 0
1556 |     \end{cases}$
1557 |   \item Absolute error loss: $L(\theta,a) = |\theta-a| \quad$
1558 |     (linear loss with $K_1=K_2$)
1559 |   \item $L_p$ loss: $L(\theta,a) = |\theta-a|^p$
1560 |   \item Zero-one loss: $L(\theta,a) = \begin{cases}
1561 |       0 & a=\theta \\
1562 |       1 & a\neq\theta \\
1563 |     \end{cases}$
1564 | \end{itemize}
1565 | 
1566 | \subsection{Risk}
1567 | 
1568 | Posterior risk
1569 | \[r(\that \giv x)
1570 | = \int L(\theta,\that(x))f(\theta\giv x)\,d\theta
1571 | = \E[\theta|X]{L(\theta,\that(x))}\]
1572 | 
1573 | (Frequentist) risk
1574 | \[R(\theta,\that)
1575 | = \int L(\theta,\that(x)) f(x\giv\theta) \dx
1576 | = \E[X|\theta]{L(\theta,\that(X))}\]
1577 | 
1578 | Bayes risk
1579 | \[r(f,\that)
1580 | = \iint L(\theta,\that(x))f(x,\theta)\dx\dtheta
1581 | = \E[\theta,X]{L(\theta,\that(X))}\]
1582 | \[r(f,\that)
1583 | = \E[\theta]{\E[X|\theta]{L(\theta,\that(X)}}
1584 | = \E[\theta]{R(\theta,\that)}\]
1585 | \[r(f,\that)
1586 | = \E[X]{\E[\theta|X]{L(\theta,\that(X)}}
1587 | = \E[X]{r(\that\giv X)}\]
1588 | 
1589 | \subsection{Admissibility}
1590 | 
1591 | \begin{itemize}
1592 |   \item $\that'$ dominates $\that$ if
1593 |     \[\forall \theta: R(\theta,\that') \le R(\theta,\that)\]
1594 |     \[\exists \theta: R(\theta,\that') < R(\theta,\that)\]
1595 |   \item $\that$ is inadmissible if there is at least one other estimator
1596 |     $\that'$ that dominates it. Otherwise it is called admissible.
1597 | \end{itemize}
1598 | 
1599 | \subsection{Bayes Rule}
1600 | 
1601 | Bayes rule (or Bayes estimator)
1602 | \begin{itemize}
1603 |   \item $r(f,\that) = \inf_{\ttil} r(f,\ttil)$
1604 |   \item $\that(x) = \inf r(\that\giv x) \; \forall x
1605 | \imp r(f,\that) = \int r(\that\giv x)f(x)\,dx$
1606 | \end{itemize}
1607 | 
1608 | Theorems
1609 | \begin{itemize}
1610 |   \item Squared error loss: posterior mean
1611 |   \item Absolute error loss: posterior median
1612 |   \item Zero-one loss: posterior mode
1613 | \end{itemize}
1614 | 
1615 | \subsection{Minimax Rules}
1616 | 
1617 | Maximum risk
1618 | \[\bar{R}(\that) = \sup_\theta R(\theta,\that) \qquad
1619 | \bar{R}(a) = \sup_\theta R(\theta,a)\]
1620 | 
1621 | Minimax rule
1622 | \[\sup_\theta R(\theta,\that)
1623 | = \inf_{\ttil} \bar{R}(\ttil)
1624 | = \inf_{\ttil} \sup_\theta R(\theta,\ttil)\]
1625 | 
1626 | \[\that = \text{Bayes rule} \; \wedge \;
1627 | \exists c: R(\theta,\that) = c\]
1628 | 
1629 | Least favorable prior
1630 | \[\that^f = \text{Bayes rule} \; \wedge \;
1631 | R(\theta,\that^f) \le r(f,\that^f) \;\forall\theta\]
1632 | 
1633 | \section{Linear Regression}
1634 | 
1635 | Definitions
1636 | \begin{itemize}
1637 |   \item Response variable $Y$
1638 |   \item Covariate $X$ (aka predictor variable or feature)
1639 | \end{itemize}
1640 | 
1641 | \subsection{Simple Linear Regression}
1642 | Model
1643 | \[Y_i = \beta_0 + \beta_1 X_i + \epsilon_i
1644 | \qquad \E{\epsilon_i\giv X_i} = 0 ,\; \V{\epsilon_i\giv X_i} = \sigma^2\]
1645 | 
1646 | Fitted line
1647 | \[\rhat(x) = \bhat_0 + \bhat_1 x\]
1648 | 
1649 | Predicted (fitted) values
1650 | \[\Yhat_i = \rhat(X_i)\]
1651 | 
1652 | Residuals
1653 | \[\ehat_i = Y_i - \Yhat_i
1654 | = Y_i - \left( \bhat_0 + \bhat_1 X_i \right)\]
1655 | 
1656 | Residual sums of squares (\rss)
1657 | \[\rss(\bhat_0,\bhat_1) = \sumin \ehat_i^2\]
1658 | 
1659 | Least square estimates
1660 | \[\bhat^T=(\bhat_0, \bhat_1)^T: \min_{\bhat_0,\bhat_1}\rss\]
1661 | \begin{align*}
1662 |   \bhat_0 &= \bar Y_n - \bhat_1 \bar X_n \\
1663 |   \bhat_1 &= \frac{\sumin(X_i-\bar X_n)(Y_i-\bar Y_n)}{\sumin(X_i - \bar X_n)^2}
1664 |            = \frac{\sumin X_iY_i-n\Xbar\Ybar}{\sumin X_i^2 - n\Xsqbar} \\
1665 |   \E{\bhat\giv X^n} &= \begin{pmatrix}\beta_0 \\ \beta_1\end{pmatrix} \\
1666 |   \V{\bhat\giv X^n} &=
1667 |   \frac{\sigma^2}{n s^2_X}
1668 |     \begin{pmatrix}n^{-1}\sumin X_i^2 & -\Xnbar \\ -\Xnbar & 1\end{pmatrix} \\
1669 |   \sehat(\bhat_0) &= \frac{\shat}{s_X\sqrt{n}} \sqrt{\frac{\sumin X_i^2}{n}} \\
1670 |   \sehat(\bhat_1) &= \frac{\shat}{s_X\sqrt{n}}
1671 | \end{align*}
1672 | where $s_X^2 = n^{-1} \sumin(X_i-\Xnbar)^2$ and $\shat^2 =
1673 | \frac{1}{n-2} \sumin \ehat_i^2$ (unbiased estimate).
1674 | 
1675 | Further properties:
1676 | \begin{itemize}
1677 |   \item Consistency:
1678 |     $\bhat_0 \pconv \beta_0$ and $\bhat_1 \pconv \beta_1$
1679 |   \item Asymptotic normality:
1680 |     \[\frac{\bhat_0 - \beta_0}{\sehat(\bhat_0)} \dconv \norm[0,1]
1681 |     \quad\text{and}\quad
1682 |     \frac{\bhat_1 - \beta_1}{\sehat(\bhat_1)} \dconv \norm[0,1]\]
1683 |   \item Approximate $1-\alpha$ confidence intervals for $\beta_0$ and $\beta_1$:
1684 |     \[\bhat_0 \pm \zat \sehat(\bhat_0) \quad\text{and}\quad
1685 |     \bhat_1 \pm \zat \sehat(\bhat_1)\]
1686 |   \item Wald test for \hyp{\beta_1=0}{\beta_1\neq 0}: reject
1687 |     $H_0$ if $|W| > \zat$ where $W = \bhat_1/\sehat(\bhat_1)$.
1688 | \end{itemize}
1689 | 
1690 | R$^2$
1691 | \[R^2
1692 | = \frac{\sumin(\Yhat_i-\Ybar)^2}{\sumin(Y_i-\Ybar)^2}
1693 | = 1 - \frac{\sumin \ehat_i^2}{\sumin(Y_i-\Ybar)^2}
1694 | = 1 - \frac{\rss}{\tss}\]
1695 | 
1696 | Likelihood
1697 | \begin{align*}
1698 | \Ll &= \prodin f(X_i,Y_i)
1699 | = \prodin f_X(X_i) \times \prodin f_{Y|X}(Y_i \giv X_i) = \Ll_1 \times \Ll_2 \\
1700 | \Ll_1 &= \prodin f_X(X_i) \\
1701 | \Ll_2 &= \prodin f_{Y|X}(Y_i \giv X_i)
1702 | \propto \sigma^{-n}
1703 | \Exp{-\frac{1}{2\sigma^2}\sum_i\Bigl(Y_i-(\beta_0-\beta_1X_i)\Bigr)^2}
1704 | \end{align*}
1705 | 
1706 | Under the assumption of Normality, the least squares estimator is
1707 | also the \mle but the least squares variance estimator is not the \mle.
1708 | \[\shat^2 = \frac{1}{n}\sumin \ehat_i^2\]
1709 | 
1710 | \subsection{Prediction}
1711 | 
1712 | Observe $X = x_*$ of the covariate and want to predict their outcome $Y_*$.
1713 | \begin{align*}
1714 | \Yhat_* &= \bhat_0 + \bhat_1 x_* \\
1715 | \V{\Yhat_*} &= \V{\bhat_0} + x_*^2 \V{\bhat_1} + 2x_* \cov{\bhat_0,\bhat_1}
1716 | \end{align*}
1717 | 
1718 | Prediction interval
1719 | \[\xihat_n^2
1720 | = \shat^2\left( \frac{\sumin(X_i-X_*)^2}{n\sum_i(X_i-\Xbar)^2j}+1 \right)\]
1721 | \[\Yhat_* \pm \zat \xihat_n\]
1722 | 
1723 | \subsection{Multiple Regression}
1724 | 
1725 | \[Y = X\beta + \epsilon\]
1726 | where
1727 | \[X =
1728 | \begin{pmatrix} X_{11} & \cdots & X_{1k} \\ \vdots & \ddots & \vdots \\
1729 |   X_{n1} & \cdots & X_{nk}\end{pmatrix} \quad
1730 | \beta = \begin{pmatrix}\beta_1 \\ \vdots \\ \beta_k\end{pmatrix} \quad
1731 | \epsilon = \begin{pmatrix}\epsilon_1 \\ \vdots \\ \epsilon_n\end{pmatrix}\]
1732 | 
1733 | Likelihood
1734 | \[\Ll(\mu,\Sigma) = (2\pi\sigma^2)^{-n/2} \Exp{-\frac{1}{2\sigma^2}\rss}\]
1735 | \[\rss = (y-X\beta)^T(y-X\beta) = \|Y-X\beta\|^2 = \sumiN(Y_i-x_i^T\beta)^2\]
1736 | 
1737 | If the $(k \times k)$ matrix $X^TX$ is invertible,
1738 | \begin{align*}
1739 |   \bhat &= (X^TX)^{-1}X^TY \\
1740 |   \V{\bhat \giv X^n} &= \sigma^2(X^TX)^{-1} \\
1741 |   \bhat &\approx \norm[\beta, \sigma^2(X^TX)^{-1}]
1742 | \end{align*}
1743 | 
1744 | Estimate regression function
1745 | \[\rhat(x) = \sumjk\bhat_j x_j\]
1746 | 
1747 | Unbiased estimate for $\sigma^2$
1748 | \[\shat^2 = \frac{1}{n-k} \sumin \ehat_i^2 \qquad \ehat = X\bhat-Y\]
1749 | 
1750 | \mle
1751 | \[\mhat = \Xbar \qquad \shat^2 = \frac{n-k}{n}\sigma^2\]
1752 | 
1753 | $1-\alpha$ Confidence interval
1754 | \[\bhat_j \pm \zat\sehat(\bhat_j)\]
1755 | 
1756 | \subsection{Model Selection}
1757 | 
1758 | Consider predicting a new observation $Y^*$ for covariates $X^*$ and let $S
1759 | \subset J$ denote a subset of the covariates in the model, where $|S| = k$ and
1760 | $|J| = n$.
1761 | 
1762 | Issues
1763 | \begin{itemize}
1764 |   \item Underfitting: too few covariates yields high bias
1765 |   \item Overfitting: too many covariates yields high variance
1766 | \end{itemize}
1767 | 
1768 | Procedure
1769 | \begin{enumerate}
1770 |   \item Assign a score to each model
1771 |   \item Search through all models to find the one with the highest score
1772 | \end{enumerate}
1773 | 
1774 | Hypothesis testing
1775 | \[\hyp{\beta_j=0}{\beta_j\neq0} \quad\forall j \in J\]
1776 | 
1777 | Mean squared prediction error (\mspe)
1778 | \[\mspe = \E{(\Yhat(S)-Y^*)^2}\]
1779 | 
1780 | Prediction risk
1781 | \[R(S) = \sumin \mspe_i = \sumin \E{(\Yhat_i(S)-Y_i^*)^2}\]
1782 | 
1783 | Training error
1784 | \[\Rhat_{tr}(S) = \sumin(\Yhat_i(S)-Y_i)^2\]
1785 | 
1786 | $R^2$
1787 | \[R^2(S)
1788 | = 1 - \frac{\rss(S)}{\tss}
1789 | = 1 - \frac{\Rhat_{tr}(S)}{\tss}
1790 | = 1 - \frac{\sumin(\Yhat_i(S)-\Ybar)^2}{\sumin(Y_i-\Ybar)^2}\]
1791 | 
1792 | The training error is a downward-biased estimate of the prediction risk.
1793 | \[\E{\Rhat_{tr}(S)} < R(S)\]
1794 | \[\bias(\Rhat_{tr}(S)) = \E{\Rhat_{tr}(S)} - R(S) = -2\sumin\cov{\Yhat_i,Y_i}\]
1795 | 
1796 | Adjusted $R^2$
1797 | \[R^2(S) = 1 - \frac{n-1}{n-k} \frac{\rss}{\tss}\]
1798 | 
1799 | \textsc{Mallow's} $C_p$ statistic
1800 | \[\Rhat(S) = \Rhat_{tr}(S) + 2k\shat^2
1801 | = \text{lack of fit} + \text{complexity penalty}\]
1802 | 
1803 | \textsc{Akaike} Information Criterion (AIC)
1804 | \[AIC(S) = \lln(\bhat_S, \shat^2_S) - k\]
1805 | 
1806 | Bayesian Information Criterion (BIC)
1807 | \[BIC(S) = \lln(\bhat_S, \shat^2_S) - \frac{k}{2}\log n\]
1808 | 
1809 | Validation and training
1810 | \[\Rhat_V(S) = \sumim(\Yhat_i^*(S) - Y_i^*)^2 \qquad
1811 | m = |\{\text{validation data}\}|,
1812 | \text{ often }\frac{n}{4}\text { or }\frac{n}{2}\]
1813 | 
1814 | Leave-one-out cross-validation
1815 | \[\Rhat_{CV}(S)
1816 | = \sumin(Y_i - \Yhat_{(i)})^2
1817 | = \sumin \left( \frac{Y_i-\Yhat_i(S)}{1-U_{ii}(S)} \right)^2\]
1818 | \[U(S) = X_S(X_S^T X_S)^{-1} X_S \text{ (``hat matrix'')}\]
1819 | 
1820 | \section{Non-parametric Function Estimation}
1821 | 
1822 | \subsection{Density Estimation}
1823 | 
1824 | Estimate $f(x)$, where $f(x) = \Pr{X \in A} = \int_A f(x)\dx$.\\
1825 | 
1826 | Integrated square error (\ise)
1827 | \[L(f, \fnhat) = \int\left(f(x) - \fnhat(x)\right)^2 \dx = J(h)+\int f^2(x)\dx\]
1828 | 
1829 | Frequentist risk
1830 | \[R(f, \fnhat) = \E{L(f,\fnhat)} = \int b^2(x) \dx + \int v(x) \dx\]
1831 | \begin{align*}
1832 |   b(x) &= \E{\fnhat(x)} - f(x) \\
1833 |   v(x) &= \V{\fnhat(x)}
1834 | \end{align*}
1835 | 
1836 | \subsubsection{Histograms}
1837 | 
1838 | Definitions
1839 | \begin{itemize}
1840 |   \item Number of bins $m$
1841 |   \item Binwidth $h = \frac{1}{m}$
1842 |   \item Bin $B_j$ has $\nu_j$ observations
1843 |   \item Define $\phat_j = \nu_j/n$ and $p_j = \int_{B_j} f(u)\du$
1844 | \end{itemize}
1845 | 
1846 | Histogram estimator
1847 | \begin{align*}
1848 | \fnhat(x) &= \sumjm \frac{\phat_j}{h} I(x\in B_j) \\
1849 | \E{\fnhat(x)} &= \frac{p_j}{h} \\
1850 | \V{\fnhat(x)} &= \frac{p_j(1-p_j)}{nh^2} \\
1851 | R(\fnhat,f) &\approx
1852 | \frac{h^2}{12} \int \left(f'(u)\right)^2 \du + \frac{1}{nh} \\
1853 | h^* &= \frac{1}{n^{1/3}} \left( \frac{6}{\int\left(f'(u) \right)^2}\du
1854 | \right)^{1/3} \\
1855 | R^*(\fnhat,f) &\approx \frac{C}{n^{2/3}} \qquad
1856 | C = \left(\frac{3}{4}\right)^{2/3} \left( \int\left( f'(u) \right)^2 \du
1857 | \right)^{1/3}
1858 | \end{align*}
1859 | 
1860 | Cross-validation estimate of $\E{J(h)}$
1861 | \[\Jhat_{CV}(h)
1862 | = \int \fnhat^2(x) \dx - \frac{2}{n}\sumin \fhat_{(-i)}(X_i)
1863 | = \frac{2}{(n-1)h} - \frac{n+1}{(n-1)h} \sumjm \phat_j^2\]
1864 | 
1865 | \subsubsection{Kernel Density Estimator (KDE)}
1866 | 
1867 | Kernel $K$
1868 | \begin{itemize}
1869 |   \item $K(x) \ge 0$
1870 |   \item $\int K(x)\dx = 1$
1871 |   \item $\int xK(x)\dx = 0$
1872 |   \item $\int x^2 K(x)\dx \equiv \sigma^2_K > 0$
1873 | \end{itemize}
1874 | 
1875 | KDE
1876 | \begin{align*}
1877 |   \fnhat(x) &= \frac{1}{n} \sumin \frac{1}{h} K\left( \frac{x-X_i}{h} \right) \\
1878 |   R(f,\fnhat) &\approx \frac{1}{4}(h\sigma_K)^4 \int (f''(x))^2\dx
1879 |   + \frac{1}{nh} \int K^2(x)\dx \\
1880 |   h^* &= \frac{c_1^{-2/5} c_2^{-1/5} c_3^{-1/5}}{n^{1/5}} \qquad
1881 |   c_1=\sigma_K^2,\;c_2 = \int K^2(x)\dx,\;c_3 = \int(f''(x))^2\dx\\
1882 |   R^*(f,\fnhat) &= \frac{c_4}{n^{4/5}} \qquad
1883 |   c_4 = \underbrace{\frac{5}{4}(\sigma_K^2)^{2/5} \left(\int
1884 |   K^2(x)\dx\right)^{4/5}}_{C(K)}
1885 |   \left( \int(f'')^2\dx \right)^{1/5}
1886 | \end{align*}
1887 | 
1888 | \textsc{Epanechnikov} Kernel
1889 | \[K(x) = \begin{cases}
1890 |   \frac{3}{4\sqrt{5}(1-x^2/5)} & |x| < \sqrt{5} \\ 0 & \text{otherwise}
1891 | \end{cases}\]
1892 | 
1893 | Cross-validation estimate of $\E{J(h)}$
1894 | \[\Jhat_{CV}(h)
1895 | = \int \fnhat^2(x) \dx - \frac{2}{n}\sumin \fhat_{(-i)}(X_i)
1896 | \approx \frac{1}{hn^2} \sumin \sumjn K^*\left( \frac{X_i-X_j}{h} \right) +
1897 | \frac{2}{nh} K(0)\]
1898 | \[K^*(x) = K^{(2)}(x)-2K(x) \qquad K^{(2)}(x) = \int K(x-y) K(y) \dy\]
1899 | 
1900 | \subsection{Non-parametric Regression}
1901 | 
1902 | Estimate $f(x)$ where $f(x) = \E{Y \giv X=x}$.
1903 | Consider pairs of points $(x_1,Y_1),\dots,(x_n,Y_n)$ related by
1904 | \begin{align*}
1905 | Y_i &= r(x_i) + \epsilon_i \\
1906 | \E{\epsilon_i} &= 0 \\
1907 | \V{\epsilon_i} &= \sigma^2
1908 | \end{align*}
1909 | 
1910 | $k$-nearest Neighbor Estimator
1911 | \[\rhat(x) = \frac{1}{k} \sum_{i:x_i \in N_k(x)} Y_i \qquad \text{where }
1912 | N_k(x) = \{k \text{ values of } x_1,\dots,x_n \text{ closest to } x\}\]
1913 | 
1914 | \textsc{Nadaraya-Watson} Kernel Estimator
1915 | \begin{align*}
1916 | \rhat(x) &= \sumin w_i(x)Y_i \\
1917 | w_i(x)
1918 | &= \frac{K\left(\frac{x-x_i}{h}\right)}{\sumjn K\left(\frac{x-x_j}{h}\right)}
1919 | \quad \in [0,1] \\
1920 | R(\rhat_n,r) &\approx \frac{h^4}{4} \left( \int x^2K^2(x)\dx \right)^4
1921 | \int \left( r''(x) + 2r'(x)\frac{f'(x)}{f(x)}\right)^2 \dx \\
1922 | &+ \int \frac{\sigma^2 \int K^2(x) \dx}{nhf(x)}\dx \\
1923 | h^* &\approx \frac{c_1}{n^{1/5}} \\
1924 | R^*(\rhat_n,r) &\approx \frac{c_2}{n^{4/5}} \\
1925 | \end{align*}
1926 | 
1927 | Cross-validation estimate of $\E{J(h)}$
1928 | \[\Jhat_{CV}(h)
1929 | = \sumin (Y_i - \rhat_{(-i)}(x_i))^2
1930 | = \sumin \frac{(Y_i - \rhat(x_i))^2}{\left(
1931 | 1- \frac{K(0)}{\sumjn K\left(\frac{x-x_j}{h}\right)}\right)^2}\]
1932 | 
1933 | \subsection{Smoothing Using Orthogonal Functions}
1934 | 
1935 | Approximation
1936 | \[r(x) = \sum_{j=1}^\infty\beta_j\phi_j(x)
1937 | \approx \sum_{j=1}^J \beta_j\phi_j(x)\]
1938 | 
1939 | Multivariate regression
1940 | \[Y = \Phi\beta + \eta\]
1941 | \[\text{where}\quad \eta_i = \epsilon_i \quad\text{and}\quad \Phi
1942 | = \begin{pmatrix}
1943 |   \phi_0(x_1) & \cdots & \phi_J(x_1) \\
1944 |   \vdots & \ddots & \vdots \\
1945 |   \phi_0(x_n) & \cdots & \phi_J(x_n)
1946 |   \end{pmatrix}\]
1947 | 
1948 | Least squares estimator
1949 | \begin{align*}
1950 | \bhat &= (\Phi^T\Phi)^{-1}\Phi^T Y \\
1951 | &\approx \frac{1}{n}\Phi^T Y
1952 |   \quad\text{(for equally spaced observations only)}
1953 | \end{align*}
1954 | 
1955 | Cross-validation estimate of $\E{J(h)}$
1956 | \[\Rhat_{CV}(J)
1957 | = \sumin \left( Y_i - \sum_{j=1}^J \phi_j(x_i)\bhat_{j,(-i)} \right)^2\]
1958 | 
1959 | \section{Stochastic Processes}
1960 | 
1961 | Stochastic Process
1962 | \[\left\{ X_t : t \in T\right\} \qquad T=\begin{cases}\{0,\pm1,\dots\}=\Z &
1963 |   \text{discrete} \\ [0,\infty) & \text{continuous}\end{cases}\]
1964 | 
1965 | \begin{itemize}
1966 |   \item Notations $X_t$, $X(t)$
1967 |   \item State space $\mathcal{X}$
1968 |   \item Index set $T$
1969 | \end{itemize}
1970 | 
1971 | \subsection{Markov Chains}
1972 | 
1973 | Markov chain
1974 | \[\Pr{X_n = x \giv X_0,\dots,X_{n-1}} = \Pr{X_n = x \giv X_{n-1}}
1975 | \quad \forall n\in T, x \in \mathcal{X}\]
1976 | 
1977 | Transition probabilities
1978 | \begin{align*}
1979 | p_{ij} &\equiv \Pr{X_{n+1} = j \giv X_n = i} \\
1980 | p_{ij}(n) &\equiv \Pr{X_{m+n} = j \giv X_m = i} \quad\text{n-step}
1981 | \end{align*}
1982 | 
1983 | Transition matrix $\mathbf{P}$ (n-step: $\mathbf{P}_n$)
1984 | \begin{itemize}
1985 |   \item $(i,j)$ element is $p_{ij}$
1986 |   \item $p_{ij} > 0$
1987 |   \item $\sum_i p_{ij} = 1$
1988 | \end{itemize}
1989 | 
1990 | \textsc{Chapman-Kolmogorov}
1991 | \[p_{ij}(m+n) = \sum_k p_{ij}(m) p_{kj}(n)\]
1992 | \[\mathbf{P}_{m+n} = \mathbf{P}_m\mathbf{P}_n\]
1993 | \[\mathbf{P}_n = \mathbf{P} \times \cdots \times \mathbf{P} = \mathbf{P}^n\]
1994 | 
1995 | Marginal probability
1996 | \begin{align*}
1997 |   \mu_n &= (\mu_n(1),\dots,\mu_n(N))
1998 |   \quad\text{where}\quad \mu_i(i)=\Pr{X_n=i} \\
1999 | \mu_0 &\eqdef \text{initial distribution} \\
2000 | \mu_n &= \mu_0\mathbf{P}^n
2001 | \end{align*}
2002 | 
2003 | \subsection{Poisson Processes}
2004 | 
2005 | Poisson process
2006 | \begin{itemize}
2007 |   \item $\left\{ X_t : t \in [0,\infty) \right\}$
2008 |     = number of events up to and including time $t$
2009 |   \item $X_0 = 0$
2010 |   \item Independent increments:
2011 |     \[\forall t_0 < \cdots < t_n:
2012 |     X_{t_1} - X_{t_0} \ind \cdots \ind X_{t_n} - X_{t_{n-1}}\]
2013 |   \item Intensity function $\lambda(t)$
2014 |     \begin{itemize}
2015 |       \item $\Pr{X_{t+h}-X_t = 1} = \lambda(t) h + o(h)$
2016 |       \item $\Pr{X_{t+h}-X_t = 2} = o(h)$
2017 |     \end{itemize}
2018 |   \item $X_{s+t} - X_s \dist \pois[m(s+t)-m(s)]\;$ where
2019 |     $\;m(t)=\int_0^t\lambda(s)\ds$
2020 | \end{itemize}
2021 | 
2022 | Homogeneous Poisson process
2023 | \[\lambda(t) \equiv \lambda \imp X_t \dist \pois[\lambda t] \qquad \lambda > 0\]
2024 | 
2025 | Waiting times
2026 | \[W_t \define \text{time at which $X_t$ occurs}\]
2027 | \[W_t \dist \gam[t, \frac{1}{\lambda}]\]
2028 | 
2029 | Interarrival times
2030 | \[S_t = W_{t+1} - W_t\]
2031 | \[S_t \dist \ex[\frac{1}{\lambda}]\]
2032 | 
2033 | \begin{center}
2034 |   \begin{tikzpicture}[decoration={brace,amplitude=5pt}]
2035 |     \draw[->] (0,0) -- (8,0) node[below]{$t$};
2036 |     \foreach \i in {1,1.5,3,5,6,7}
2037 |       \draw (\i,2pt) -- (\i,-2pt) node {};
2038 |     \draw (3,0) node[below] {\footnotesize $W_{t-1}$};
2039 |     \draw (5,0) node[below] {\footnotesize $W_{t}$};
2040 |     \draw[decorate,yshift=5pt] (3,0) -- (5,0)
2041 |       node[midway,above=3pt] {\footnotesize $S_t$};
2042 |   \end{tikzpicture}
2043 | \end{center}
2044 | 
2045 | \section{Time Series}
2046 | 
2047 | Mean function
2048 | \[\mu_{x_t} = \E{x_t} = \int_{-\infty}^\infty x f_t(x) \dx\]
2049 | 
2050 | Autocovariance function
2051 | \[\gamma_x(s,t) = \E{(x_s-\mu_s)(x_t-\mu_t)} = \E{x_sx_t} - \mu_s\mu_t\]
2052 | \[\gamma_x(t,t) = \E{(x_t-\mu_t)^2} = \V{x_t}\]
2053 | 
2054 | Autocorrelation function (ACF)
2055 | \[\rho(s,t) = \frac{\cov{x_s,x_t}}{\sqrt{\V{x_s}\V{x_t}}}
2056 |             = \frac{\gamma(s,t)}{\sqrt{\gamma(s,s)\gamma(t,t)}}\]
2057 | 
2058 | Cross-covariance function (CCV)
2059 | \[\gamma_{xy}(s,t) = \E{(x_s-\mu_{x_s})(y_t-\mu_{y_t})}\]
2060 | 
2061 | Cross-correlation function (CCF)
2062 | \[\rho_{xy}(s,t) = \frac{\gamma_{xy}(s,t)}{\sqrt{\gamma_x(s,s)\gamma_y(t,t)}}\]
2063 | 
2064 | Backshift operator
2065 | \[B^k(x_t) = x_{t-k}\]
2066 | 
2067 | Difference operator
2068 | \[\nabla^d = (1-B)^d\]
2069 | 
2070 | White noise
2071 | \begin{itemize}
2072 |   \item $w_t \dist wn(0, \sigma_w^2)$
2073 |   \item Gaussian: $w_t \distiid \norm[0, \sigma_w^2]$
2074 |   \item $\E{w_t} = 0 \quad t\in T$
2075 |   \item $\V{w_t} = \sigma^2 \quad t\in T$
2076 |   \item $\gamma_w(s,t) = 0 \quad s \neq t \;\wedge\; s,t\in T$
2077 | \end{itemize}
2078 | 
2079 | %Auto regression
2080 | %\[x_t = \sum_{i=1}^p \phi_i x_{t-i} + w_t\]
2081 | 
2082 | Random walk
2083 | \begin{itemize}
2084 |   \item Drift $\delta$
2085 |   \item $x_t = \delta t + \sum_{j=1}^t w_j$
2086 |   \item $\E{x_t} = \delta t$
2087 | \end{itemize}
2088 | 
2089 | Symmetric moving average
2090 | \[m_t = \sum_{j=-k}^k a_j x_{t-j}
2091 | \qquad \text{where } a_j=a_{-j}\ge0 \text{ and } \sum_{j=-k}^k a_j = 1\]
2092 | 
2093 | \subsection{Stationary Time Series}
2094 | 
2095 | Strictly stationary
2096 | \[\Pr{x_{t_1} \le c_1, \dots, x_{t_k} \le c_k} =
2097 |   \Pr{x_{t_1+h} \le c_1, \dots, x_{t_k+h} \le c_k}\]
2098 | \[\forall k\in\N,t_k, c_k, h\in\Z\]
2099 | 
2100 | Weakly stationary
2101 | \begin{itemize}
2102 |   \item $\E{x_t^2} < \infty \qquad\forall t\in\Z$
2103 |   \item $\E{x_t^2} = m \qquad\forall t\in\Z$
2104 |   \item $\gamma_x(s,t) = \gamma_x(s+r, t+r) \qquad\forall r,s,t\in\Z$
2105 | \end{itemize}
2106 | 
2107 | Autocovariance function
2108 | \begin{itemize}
2109 |   \item $\gamma(h) = \E{(x_{t+h}-\mu)(x_t-\mu)} \qquad \forall h\in\Z$
2110 |   \item $\gamma(0) = \E{(x_t-\mu)^2}$
2111 |   \item $\gamma(0) \ge 0$
2112 |   \item $\gamma(0) \ge |\gamma(h)|$
2113 |   \item $\gamma(h) = \gamma(-h)$
2114 | \end{itemize}
2115 | 
2116 | Autocorrelation function (ACF)
2117 | \[\rho_x(h) = \frac{\cov{x_{t+h},x_t}}{\sqrt{\V{x_{t+h}}\V{x_t}}}
2118 |           = \frac{\gamma(t+h,t)}{\sqrt{\gamma(t+h,t+h)\gamma(t,t)}}
2119 |           = \frac{\gamma(h)}{\gamma(0)}\]
2120 | 
2121 | Jointly stationary time series
2122 | \[\gamma_{xy}(h) = \E{(x_{t+h}-\mu_x)(y_t-\mu_y)}\]
2123 | \[\rho_{xy}(h) = \frac{\gamma_{xy}(h)}{\sqrt{\gamma_x(0)\gamma_y(h)}}\]
2124 | 
2125 | Linear process
2126 | \[x_t = \mu + \sum_{j=-\infty}^\infty \psi_j w_{t-j} \quad\text{where}\quad
2127 | \sum_{j=-\infty}^\infty |\psi_j| < \infty\]
2128 | \[\gamma(h) = \sigma_w^2 \sum_{j=-\infty}^\infty \psi_{j+h}\psi_j\]
2129 | 
2130 | \subsection{Estimation of Correlation}
2131 | 
2132 | Sample mean
2133 | \[\xbar = \frac{1}{n}\sum_{t=1}^n x_t\]
2134 | 
2135 | Sample variance
2136 | \[\V{\xbar} = \frac{1}{n}\sum_{h=-n}^n \left(1-\frac{|h|}{n}\right)\gamma_x(h)\]
2137 | 
2138 | Sample autocovariance function
2139 | \[\ghat(h) = \frac{1}{n}\sum_{t=1}^{n-h}(x_{t+h}-\xbar)(x_t-\xbar)\]
2140 | 
2141 | Sample autocorrelation function
2142 | \[\rhohat(h) = \frac{\ghat(h)}{\ghat(0)}\]
2143 | 
2144 | Sample cross-variance function
2145 | \[\ghat_{xy}(h) = \frac{1}{n} \sum_{t=1}^{n-h}(x_{t+h}-\xbar)(y_t - \ybar)\]
2146 | 
2147 | Sample cross-correlation function
2148 | \[\rhohat_{xy}(h) = \frac{\ghat_{xy}(h)}{\sqrt{\ghat_x(0) \ghat_y(0)}}\]
2149 | 
2150 | Properties
2151 | \begin{itemize}
2152 |   \item $\sigma_{\rhohat_x(h)} = \displaystyle\frac{1}{\sqrt{n}}$
2153 |     if $x_t$ is white noise
2154 |   \item $\sigma_{\rhohat_{xy}(h)} = \displaystyle\frac{1}{\sqrt{n}}$
2155 |     if $x_t$ or $y_t$ is white noise
2156 | \end{itemize}
2157 | 
2158 | \subsection{Non-Stationary Time Series}
2159 | 
2160 | Classical decomposition model
2161 | \[x_t = \mu_t + s_t + w_t\]
2162 | \begin{itemize}
2163 |   \item $\mu_t =$ trend
2164 |   \item $s_t =$ seasonal component
2165 |   \item $w_t =$ random noise term
2166 | \end{itemize}
2167 | 
2168 | \subsubsection{Detrending}
2169 | 
2170 | Least squares
2171 | \begin{enumerate}
2172 |   \item Choose trend model, e.g.,
2173 |     $\mu_t = \beta_0 + \beta_1 t + \beta_2 t^2$
2174 |   \item Minimize \rss to obtain trend estimate
2175 |     $\mhat_t = \bhat_0 + \bhat_1 t + \bhat_2 t^2$
2176 |   \item Residuals $\triangleq$ noise $w_t$
2177 | \end{enumerate}
2178 | 
2179 | Moving average
2180 | \begin{itemize}
2181 |   \item The \emph{low-pass} filter $v_t$ is a symmetric moving average $m_t$
2182 |     with $a_j = \frac{1}{2k+1}$:
2183 |     \[v_t = \frac{1}{2k+1} \sum_{i=-k}^k x_{t-1}\]
2184 |   \item If $\frac{1}{2k+1} \sum_{i=-k}^k w_{t-j} \approx 0$,
2185 |     a linear trend function $\mu_t = \beta_0 + \beta_1t$ passes without
2186 |     distortion
2187 | \end{itemize}
2188 | 
2189 | Differencing
2190 | \begin{itemize}
2191 |   \item $\mu_t = \beta_0 + \beta_1t \imp \nabla x_t = \beta_1$
2192 | \end{itemize}
2193 | 
2194 | \subsection{ARIMA models}
2195 | 
2196 | Autoregressive polynomial
2197 | \[\phi(z) = 1 - \phi_1 z - \cdots - \phi_p z_p
2198 | \qquad z \in \C \wedge \phi_p \neq 0\]
2199 | 
2200 | Autoregressive operator
2201 | \[\phi(B) = 1 - \phi_1B - \cdots - \phi_pB^p\]
2202 | 
2203 | Autoregressive model order $p$, $\AR$
2204 | \[x_t = \phi_1 x_{t-1} + \cdots + \phi_p x_{t-p} + w_t \eqv \phi(B)x_t = w_t\]
2205 | 
2206 | $\AR[1]$
2207 | \begin{itemize}
2208 |   \item $x_t = \phi^k(x_{t-k}) + \displaystyle\sum_{j=0}^{k-1} \phi^j(w_{t-j})
2209 |     \stackrel{k\to\infty, |\phi| < 1}{=}
2210 |     \underbrace{\sum_{j=0}^\infty \phi^j(w_{t-j})}_{\text{linear process}}$
2211 | \item $\E{x_t} = \sum_{j=0}^\infty\phi^j(\E{w_{t-j}}) = 0$
2212 | \item $\gamma(h) = \cov{x_{t+h},x_t} = \frac{\sigma_w^2\phi^h}{1-\phi^2}$
2213 | \item $\rho(h) = \frac{\gamma(h)}{\gamma(0)} = \phi^h$
2214 | \item $\rho(h) = \phi \rho(h-1) \quad h=1,2,\ldots$
2215 | \end{itemize}
2216 | 
2217 | Moving average polynomial
2218 | \[\theta(z) = 1 + \theta_1 z + \cdots + \theta_q z_q
2219 | \qquad z \in \C \wedge \theta_q \neq 0\]
2220 | 
2221 | Moving average operator
2222 | \[\theta(B) = 1 + \theta_1B + \cdots + \theta_pB^p\]
2223 | 
2224 | $\MA$ (moving average model order $q$)
2225 | \[x_t = w_t + \theta_1 w_{t-1} + \cdots + \theta_q w_{t-q}
2226 | \eqv x_t = \theta(B)w_t\]
2227 | \[\E{x_t} = \sum_{j=0}^q \theta_j\E{w_{t-j}} = 0\]
2228 | \[\gamma(h) = \cov{x_{t+h},x_t} = \begin{cases}
2229 |   \sigma_w^2\sum_{j=0}^{q-h} \theta_j\theta_{j+h} & 0 \le h \le q \\
2230 |   0 & h > q
2231 | \end{cases}\]
2232 | 
2233 | $\MA[1]$
2234 | \[x_t = w_t + \theta w_{t-1}\]
2235 | \[\gamma(h) = \begin{cases}
2236 |   (1+\theta^2)\sigma_w^2 & h = 0 \\
2237 |   \theta\sigma_w^2 & h = 1 \\
2238 |   0 & h > 1
2239 | \end{cases}\]
2240 | \[\rho(h) = \begin{cases}
2241 |   \frac{\theta}{(1+\theta^2)} & h = 1 \\
2242 |   0 & h > 1
2243 | \end{cases}\]
2244 | 
2245 | $\ARMA$
2246 | \[x_t = \phi_1 x_{t-1} + \cdots + \phi_p x_{t-p} + w_t + \theta_1 w_{t-1} +
2247 | \cdots + \theta_q w_{t-q}\]
2248 | \[\phi(B) x_t = \theta(B) w_t\]
2249 | 
2250 | Partial autocorrelation function (PACF)
2251 | \begin{itemize}
2252 |   \item $x_i^{h-1} \eqdef$ regression of $x_i$ on
2253 |     $\{x_{h-1}, x_{h-2}, \dots, x_1\}$
2254 |   \item $\phi_{hh} = corr(x_h - x_h^{h-1}, x_0 - x_0^{h-1}) \quad h \ge 2$
2255 |   \item E.g., $\phi_{11} = corr(x_1,x_0) = \rho(1)$
2256 | \end{itemize}
2257 | 
2258 | $\ARIMA$
2259 | \[\nabla^d x_t = (1-B)^d x_t \text{ is } \ARMA\]
2260 | \[\phi(B)(1-B)^d x_t = \theta(B) w_t\]
2261 | 
2262 | Exponentially Weighted Moving Average (EWMA)
2263 | \[x_t = x_{t-1} + w_t - \lambda w_{t-1}\]
2264 | \[x_t = \sum_{j=1}^\infty(1-\lambda)\lambda^{j-1} x_{t-j} + w_t
2265 | \quad\text{when } |\lambda| < 1\]
2266 | \[\tilde{x}_{n+1} = (1-\lambda)x_n + \lambda \tilde{x}_n\]
2267 | 
2268 | \begin{titemize}{Seasonal ARIMA}
2269 |   \item Denoted by $\SARIMA$
2270 |   \item $\Phi_P(B^s) \phi(B) \nabla_s^D \nabla^d x_t
2271 |     = \delta + \Theta_Q(B^s)\theta(B)w_t$
2272 | \end{titemize}
2273 | 
2274 | \subsubsection{Causality and Invertibility}
2275 | $\ARMA$ is causal (future-independent)
2276 | $\eqv \exists \{\psi_j\} : \sum_{j=0}^\infty \psi_j < \infty$ such that
2277 | \[x_t = \sum_{j=0}^\infty w_{t-j} = \psi(B)w_t\]
2278 | 
2279 | $\ARMA$ is invertible
2280 | $\eqv \exists \{\pi_j\} : \sum_{j=0}^\infty \pi_j < \infty$ such that
2281 | \[\pi(B)x_t = \sum_{j=0}^\infty X_{t-j} = w_t\]
2282 | 
2283 | Properties
2284 | \begin{itemize}
2285 |   \item $\ARMA$ causal $\eqv$
2286 |     roots of $\phi(z)$ lie outside the unit circle
2287 |     \[\psi(z) = \sum_{j=0}^\infty\psi_j z^j = \frac{\theta(z)}{\phi(z)}
2288 |     \quad |z| \le 1\]
2289 |   \item $\ARMA$ invertible $\eqv$
2290 |     roots of $\theta(z)$ lie outside the unit circle
2291 |     \[\pi(z) = \sum_{j=0}^\infty\pi_j z^j = \frac{\phi(z)}{\theta(z)}
2292 |     \quad |z| \le 1\]
2293 | \end{itemize}
2294 | 
2295 | Behavior of the ACF and PACF for causal and invertible ARMA models
2296 | 
2297 | \begin{center}
2298 |   \begin{tabular}{|c|ccc|}
2299 |     \hline
2300 |     & $\AR$ & $\MA$ & $\ARMA$\\
2301 |     \hline
2302 |     ACF & tails off & cuts off after lag $q$ & tails off \\
2303 |     PACF & cuts off after lag $p$ & tails off $q$ & tails off \\
2304 |     \hline
2305 |   \end{tabular}
2306 | \end{center}
2307 | 
2308 | \subsection{Spectral Analysis}
2309 | 
2310 | Periodic process
2311 | \begin{align*}
2312 |   x_t
2313 |   &= A \cos(2\pi\omega t + \phi) \\
2314 |   &= U_1 \cos(2\pi \omega t) + U_2 \sin(2\pi \omega t)
2315 | \end{align*}
2316 | 
2317 | \begin{itemize}
2318 |   \item Frequency index $\omega$ (cycles per unit time),
2319 |     period $1/\omega$
2320 |   \item Amplitude $A$
2321 |   \item Phase $\phi$
2322 |   \item $U_1 = A\cos\phi$ and $U_2 = A\sin\phi$ often normally distributed \rv's
2323 | \end{itemize}
2324 | 
2325 | Periodic mixture
2326 | \[x_t = \sum_{k=1}^q\left( U_{k1}\cos(2\pi\omega_k t)
2327 | + U_{k2}\sin(2\pi\omega_k t)
2328 | \right)\]
2329 | \begin{itemize}
2330 |   \item $U_{k1}, U_{k2}$, for $k=1,\ldots,q$,
2331 |     are independent zero-mean \rv's with variances $\sigma_k^2$
2332 |   \item $\gamma(h) = \sum_{k=1}^q \sigma_k^2 \cos(2\pi\omega_k h)$
2333 |   \item $\gamma(0) = \E{x_t^2} = \sum_{k=1}^q \sigma_k^2$
2334 | \end{itemize}
2335 | 
2336 | Spectral representation of a periodic process
2337 | \begin{align*}
2338 | \gamma(h)
2339 | &= \sigma^2 \cos(2\pi\omega_0 h) \\
2340 | &= \frac{\sigma^2}{2} e^{-2\pi i \omega_0 h}
2341 | + \frac{\sigma^2}{2} e^{2\pi i \omega_0 h}\\
2342 | &= \int_{-1/2}^{1/2} e^{2\pi i \omega h} \d{F(\omega)}
2343 | \end{align*}
2344 | 
2345 | Spectral distribution function
2346 | \[F(\omega)= \begin{cases}
2347 |   0 & \omega < -\omega_0 \\
2348 |   \sigma^2/2 & -\omega \le \omega < \omega_0 \\
2349 |   \sigma^2 & \omega \ge \omega_0
2350 | \end{cases}\]
2351 | \begin{itemize}
2352 |   \item $F(-\infty) = F(-1/2) = 0$
2353 |   \item $F(\infty) = F(1/2) = \gamma(0)$
2354 | \end{itemize}
2355 | 
2356 | Spectral density
2357 | \[f(\omega) = \sum_{h=-\infty}^\infty \gamma(h) e^{-2\pi i \omega h}
2358 | \quad -\frac{1}{2} \le \omega \le \frac{1}{2}\]
2359 | \begin{itemize}
2360 |   \item Needs $\sum_{h=-\infty}^\infty |\gamma(h)| < \infty
2361 |     \imp \gamma(h) = \int_{-1/2}^{1/2} e^{2\pi i \omega h}f(\omega) \d\omega
2362 |     \quad h=0,\pm1,\ldots$
2363 |   \item $f(\omega) \ge 0$
2364 |   \item $f(\omega) = f(-\omega)$
2365 |   \item $f(\omega) = f(1-\omega)$
2366 |   \item $\gamma(0) = \V{x_t} = \int_{-1/2}^{1/2}f(\omega)\d\omega$
2367 |   \item White noise: $f_w(\omega) = \sigma_w^2$
2368 |   \item $\ARMA, \phi(B)x_t = \theta(B)w_t$:
2369 |     \[f_x(\omega) = \sigma_w^2 \frac{|\theta(e^{-2\pi i
2370 |     \omega})|^2}{|\phi(e^{-2\pi i \omega})|^2}\]
2371 |     where $\phi(z) = 1 - \sum_{k=1}^p \phi_k z^k$ and
2372 |     $\theta(z) = 1 + \sum_{k=1}^q \theta_k z^k$
2373 | \end{itemize}
2374 | 
2375 | Discrete Fourier Transform (DFT)
2376 | \[d(\omega_j) = n^{-1/2} \sum_{i=1}^n x_t e^{-2\pi i\omega_j t}\]
2377 | 
2378 | Fourier/Fundamental frequencies
2379 | \[\omega_j = j/n\]
2380 | 
2381 | Inverse DFT
2382 | \[x_t = n^{-1/2} \sum_{j=0}^{n-1} d(\omega_j) e^{2\pi i\omega_j t}\]
2383 | 
2384 | Periodogram
2385 | \[I(j/n) = |d(j/n)|^2\]
2386 | 
2387 | Scaled Periodogram
2388 | \begin{align*}
2389 | P(j/n)
2390 | &= \frac{4}{n}I(j/n) \\
2391 | &=\left( \frac{2}{n} \sum_{t=1}^n x_t \cos(2\pi t j/n) \right)^2
2392 | + \left( \frac{2}{n} \sum_{t=1}^n x_t \sin(2\pi t j/n) \right)^2
2393 | \end{align*}
2394 | 
2395 | \section{Math}
2396 | 
2397 | %\subsection{Orthogonal Functions}
2398 | %
2399 | %$L_2$ space
2400 | %\[L_2(a,b) = \left\{ f: [a,b] \to \R, \int_a^b f(x)^2\dx < \infty \right\}\]
2401 | %
2402 | %Inner Product
2403 | %\[\int f(x)g(x)\dx\]
2404 | %
2405 | %Norm
2406 | %\[\|f\| = \sqrt{\int f^2(x) \dx}\]
2407 | %
2408 | %Orthogonality (for a series of functions $\phi_i$)
2409 | %\begin{align*}
2410 | %  \int \phi_j^2(x)\dx &= 1 \; \forall j \\
2411 | %  \int \phi_i(x)\phi_j(x)\dx &= 0 \; \forall i \neq j
2412 | %\end{align*}
2413 | %
2414 | %An orthogonal sequence $\phi_1, \phi_2,\dots$ is \emph{complete} if the only
2415 | %function that is is orthogonal to each $\phi_j$ is the zero function. Then,
2416 | %$\phi_1, \phi_2,\dots$ form an \emph{orthogonal basis} in $L_2$:
2417 | %\[f \in L_2 \imp f(x) = \sum_{j=1}^\infty \beta_j \phi_j(x)
2418 | %\quad \text{where } \beta_j = \int_a^b f(x)\phi_j(x) \dx\]
2419 | %
2420 | %Cosine Basis
2421 | %\begin{align*}
2422 | %  \phi_0(x) &= 1 \\
2423 | %  \phi_j(x) &= \sqrt{2}\cos(j\pi x) \quad \forall j\ge1
2424 | %\end{align*}
2425 | %
2426 | %\raggedright
2427 | %\textsc{Parseval}'s Relation
2428 | %\[\|f\|^2 \equiv \int f^2(x)\dx = \sum_{j=1}^\infty \equiv \|\beta\|^2\]
2429 | %
2430 | %\textsc{Legendre} Polynomials
2431 | %\begin{align*}
2432 | %  x &\in [-1,1] \\
2433 | %  P_0(x) &= 1\\
2434 | %  P_1(x) &= x \\
2435 | %  P_{j+1}(x) &= \frac{(2j+1)x(P_j(x) - jP_{j-1}(x)}{j+1} \\
2436 | %  \phi_j(x) &= \sqrt{(2j+1)/2} P_j(x) \quad \text{orthogonal basis for }
2437 | %  L_2(-1,1)
2438 | %\end{align*}
2439 | 
2440 | \subsection{Gamma Function}
2441 | \label{sec:math:gamma}
2442 | 
2443 | \begin{itemize}
2444 |   \item Ordinary:
2445 |     $\displaystyle\Gamma(s) = \int_0^\infty t^{s-1} e^{-t}dt$
2446 |   \item Upper incomplete:
2447 |     $\displaystyle\Gamma(s,x) = \int_x^\infty t^{s-1} e^{-t}dt$
2448 |   \item Lower incomplete:
2449 |     $\displaystyle\gamma(s,x) = \int_0^x t^{s-1} e^{-t}dt$
2450 |   \item $\Gamma(\alpha + 1) = \alpha \Gamma(\alpha) \qquad \alpha>1$
2451 |   \item $\Gamma(n) = (n-1)! \qquad n \in \mathbb N$
2452 |   \item $\Gamma(0) = \Gamma(-1) = \infty$
2453 |   \item $\Gamma(1/2) = \sqrt{\pi}$
2454 |   \item $\Gamma(-1/2) = -2 \Gamma(1/2)$
2455 | \end{itemize}
2456 | 
2457 | \subsection{Beta Function}
2458 | \label{sec:math:beta}
2459 | 
2460 | \begin{itemize}
2461 |   \item Ordinary: $\text{B}(x,y) = \text{B}(y,x)
2462 |     = \displaystyle\int_0^1 t^{x-1}(1-t)^{y-1} \,dt
2463 |     = \displaystyle\frac{\Gamma(x)\Gamma(y)}{\Gamma(x+y)}$
2464 | %    \item $\alpha,\beta \in \mathbb N \imp \displaystyle
2465 | %      \text{B}(\alpha,\beta) = \frac{(\alpha-1)!(\beta-1)!}{(\alpha+\beta-1)!}$
2466 | \item Incomplete: $\text{B}(x;\,a,b)
2467 |     = \displaystyle\int_0^x t^{a-1}(1-t)^{b-1} \,dt$
2468 |   \item Regularized incomplete: \\
2469 |     $I_x(a,b) = \displaystyle\frac{\text{B}(x;\,a,b)}{\text{B}(a,b)}
2470 |       \stackrel{a,b\in\mathbb N}{=}
2471 |       \sum_{j=a}^{a+b-1} \frac{(a+b-1)!}{j!(a+b-1-j)!}x^j(1-x)^{a+b-1-j}$
2472 |   \item $I_0(a,b) = 0 \qquad I_1(a,b) = 1$
2473 |   \item $I_x(a,b) = 1 - I_{1-x}(b,a)$
2474 | \end{itemize}
2475 | 
2476 | \subsection{Series}
2477 | 
2478 | \begin{multicols}{2}
2479 |   \begin{titemize}{Finite}
2480 |   \item $\displaystyle\sum_{k=1}^n k = \frac{n(n+1)}{2}$
2481 |   \item $\displaystyle\sum_{k=1}^n (2k-1) = n^2$
2482 |   \item $\displaystyle\sum_{k=1}^n k^2 = \frac{n(n+1)(2n+1)}{6}$
2483 |   \item $\displaystyle\sum_{k=1}^n k^3 = \left(\frac{n(n+1)}{2}\right)^2$
2484 |   \item $\displaystyle\sum_{k=0}^n c^k = \frac{c^{n+1}-1}{c-1} \quad c\neq1$
2485 | \end{titemize}
2486 | 
2487 | \begin{titemize}{Binomial}
2488 |   \item $\displaystyle\sum_{k=0}^n \binom{n}{k} = 2^n$
2489 |   \item $\displaystyle\sum_{k=0}^n \binom{r+k}{k}=\binom{r+n+1}{n}$
2490 |   \item $\displaystyle\sum_{k=0}^n \binom{k}{m}=\binom{n+1}{m+1}$
2491 |   \item \textsc{Vandermonde}'s Identity:\\
2492 |     $\displaystyle\sum_{k=0}^r \binom{m}{k}\binom{n}{r-k}=\binom{m+n}{r}$
2493 |   \item Binomial Theorem:\\
2494 |     $\displaystyle\sum_{k=0}^n \binom{n}{k}a^{n-k}b^k = (a+b)^n$
2495 | \end{titemize}
2496 | \end{multicols}
2497 | 
2498 | Infinite
2499 | \begin{itemize}
2500 |   \item $\displaystyle\sum_{k=0}^\infty p^k = \frac{1}{1-p},
2501 |     \quad \sum_{k=1}^\infty p^k = \frac{p}{1-p} \quad |p|<1$
2502 |   \item $\displaystyle\sum_{k=0}^\infty kp^{k-1}
2503 |     = \displaystyle\frac{d}{dp}\left(\sum_{k=0}^\infty p^k\right)
2504 |     = \displaystyle\frac{d}{dp}\left(\frac{1}{1-p}\right)
2505 |     = \frac{1}{(1-p)^2} \quad |p|<1$
2506 |   \item $\displaystyle\sum_{k=0}^\infty \binom{r+k-1}{k} x^k = (1-x)^{-r}
2507 |     \quad r\in\mathbb N^+$
2508 |   \item $\displaystyle\sum_{k=0}^\infty \binom{\alpha}{k} p^k
2509 |     = (1+p)^\alpha \quad |p|<1\,,\,\alpha \in \mathbb C$
2510 | \end{itemize}
2511 | 
2512 | %\subsection{Integrals}
2513 | %
2514 | %\begin{itemize}
2515 | %  \item $\displaystyle\int_{-\infty}^\infty e^{-\frac{x^2}{2}}dx
2516 | %    = \sqrt{2\pi}$
2517 | %\end{itemize}
2518 | 
2519 | \vfill~
2520 | 
2521 | \subsection{Combinatorics}
2522 | 
2523 | Sampling
2524 | \begin{center}
2525 |   \begin{tabular}[h]{|l*2{|>{\begin{math}\displaystyle}c<{\end{math}}}|}
2526 |   \hline &&\\[-1.5ex]
2527 |   $k$ out of $n$ & \text{w/o replacement} & \text{w/ replacement}
2528 |   \\[1ex]
2529 |   \hline
2530 |   ordered & n^{\underline k}
2531 |     = \displaystyle\prod_{i=0}^{k-1}(n-i)
2532 |     = \frac{n!}{(n-k)!}
2533 |     & n^k \\[3ex]
2534 |     unordered & \binom{n}{k} = \frac{n^{\underline k}}{k!}
2535 |     = \frac{n!}{k!(n-k)!} &
2536 |     \binom{n-1+r}{r}=\binom{n-1+r}{n-1} \\[3ex]
2537 |   \hline
2538 | \end{tabular}
2539 | \end{center}
2540 | 
2541 | \newcommand{\stirling}[2]{\genfrac{\{}{\}}{0pt}{}{#1}{#2}}
2542 | 
2543 | Stirling numbers, $2^{nd}$ kind
2544 | \[\stirling{n}{k} = k\stirling{n-1}{k}+\stirling{n-1}{k-1}
2545 |   \qquad 1\le k \le n \qquad
2546 |   \stirling{n}{0} = \begin{cases} 1 & n = 0\\ 0 & \text{else} \end{cases}\]
2547 | 
2548 | Partitions
2549 | \[P_{n+k,k} = \sum_{i=1}^n P_{n,i} \qquad \qquad
2550 |   k>n:\;P_{n,k} = 0 \qquad n\ge1:\;P_{n,0} = 0, \; P_{0,0} = 1\]
2551 | 
2552 | % Distinguishability.
2553 | \def\distinguishable{\ensuremath{D}\xspace}
2554 | \def\indistinguishable{\ensuremath{\neg \distinguishable}\xspace}
2555 | Balls and Urns \qquad $f: B \to U$ \qquad
2556 | \distinguishable = distinguishable,
2557 | \indistinguishable = indistinguishable.
2558 | \begin{center}
2559 |   \begin{tabular}[h]{|l*4{|>{\begin{math}\displaystyle}c<{\end{math}}}|}
2560 |     \hline &&&&\\[-1.5ex]
2561 |     $|B|=n$, $|U|=m$ & f \text{ arbitrary} & f \text{ injective} &
2562 |     f \text{ surjective} & f \text{ bijective} \\[1ex]
2563 |     \hline
2564 |     \hline &&&&\\[-2ex]
2565 |     $B:\distinguishable,\; U:\distinguishable$ &
2566 |       m^n & \begin{cases} m^{\underline n} & m \ge n\\
2567 |       0 & \text{else} \end{cases} & m!\,\stirling{n}{m} &
2568 |       \begin{cases} n! & m = n\\ 0 & \text{else} \end{cases}\\[3ex]
2569 |     \hline &&&&\\[-2ex]
2570 |     $B:\indistinguishable,\; U:\distinguishable$ &
2571 |       \binom{m+n-1}{n} & \binom{m}{n} &
2572 |       \binom{n-1}{m-1} &
2573 |       \begin{cases} 1 & m = n\\ 0 & \text{else} \end{cases}\\[3ex]
2574 |     \hline &&&&\\[-2ex]
2575 |     $B:\distinguishable,\; U:\indistinguishable$ &
2576 |       \sum_{k=1}^m \stirling{n}{k} & \begin{cases} 1 &
2577 |       m\ge n\\ 0 & \text{else} \end{cases} & \stirling{n}{m} &
2578 |       \begin{cases} 1 & m = n\\ 0 & \text{else} \end{cases}\\[3ex]
2579 |     \hline &&&&\\[-2ex]
2580 |     $B:\indistinguishable,\; U:\indistinguishable$ & \sum_{k=1}^m P_{n,k} &
2581 |       \begin{cases} 1 & m \ge n\\ 0 & \text{else} \end{cases} & P_{n,m} &
2582 |       \begin{cases} 1 & m = n\\ 0 & \text{else} \end{cases}\\[3ex]
2583 |     \hline
2584 |   \end{tabular}
2585 | \end{center}
2586 | 
2587 | %  Convergence
2588 | %  \begin{itemize}
2589 | %    \item $\displaystyle\sum_{k=1}^\infty a_n$ converges if
2590 | %      $\displaystyle\lim_{n \to \infty} \left|\frac{a_{n+1}}{a_n}\right| < 1$
2591 | %    \item $\displaystyle\sum_{k=1}^\infty a_n$ diverges if
2592 | %      $\displaystyle\lim_{n \to \infty} a_n \neq 0$
2593 | %    \item $\displaystyle\sum_{k=1}^\infty n^{-p}$ converges if $p > 1$
2594 | %  \end{itemize}
2595 | 
2596 | %  \subsection{Calculus}
2597 | %
2598 | %  Polar Coordinates
2599 | %  \begin{itemize}
2600 | %    \item $x = r\cos\theta \qquad y = r\sin\theta$
2601 | %    \item $r = \sqrt{y^2+x^2}$
2602 | %    \item $\theta =
2603 | %      \begin{cases}
2604 | %        0 & \mbox{if } x = 0 \mbox{ and } y = 0\\
2605 | %        \arcsin(\frac{y}{r}) & \mbox{if } x \geq 0 \\
2606 | %        -\arcsin(\frac{y}{r}) + \pi & \mbox{if } x < 0\\
2607 | %      \end{cases}$
2608 | %  \end{itemize}
2609 | 
2610 | {
2611 | \footnotesize
2612 | \bibliographystyle{abbrv}
2613 | \bibliography{literature}
2614 | \vfill~
2615 | }
2616 | 
2617 | \end{multicols*}
2618 | 
2619 | \newpage
2620 | 
2621 | \begin{sidewaysfigure}
2622 |   \captionsetup{labelformat=empty,labelsep=none}
2623 |   \includegraphics[width=\textwidth]{figs/relationships}
2624 |   \caption{Univariate distribution relationships, courtesy Leemis and
2625 |   McQueston~\cite{Leemis08}.}
2626 | \end{sidewaysfigure}
2627 | 
2628 | \end{document}
2629 | 


--------------------------------------------------------------------------------