├── .gitignore ├── COPYING ├── Makefile ├── README.md ├── figs ├── Makefile ├── distributions.R └── relationships.pdf ├── literature.bib ├── probstat.tex └── stat-cookbook.tex /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.fdb_latexmk 3 | *.aux 4 | *.bbl 5 | *.blg 6 | *.brf 7 | *.fls 8 | *.log 9 | *.out 10 | *.sw? 11 | *.toc 12 | *.gz 13 | figs/* 14 | !figs/relationships.pdf 15 | stat-cookbook.pdf 16 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | This work is licensed under the Creative Commons 2 | Attribution-NonCommercial-ShareAlike 4.0 International License. To view a copy 3 | of this license, visit http://creativecommons.org/licenses/by-nc-sa/4.0/ or 4 | send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 5 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | DOC := stat-cookbook.tex 2 | 3 | RERUN := "(undefined references|Rerun to get (cross-references|the bars|point totals) right|Table widths have changed. Rerun LaTeX.|Linenumber reference failed)" 4 | RERUNBIB := "No file.*\.bbl|Citation.*undefined" 5 | 6 | all: figs doc 7 | 8 | figs: 9 | @$(MAKE) -C $@ 10 | 11 | doc: $(DOC:.tex=.pdf) 12 | 13 | %.pdf: %.tex 14 | pdflatex $< 15 | @egrep -q $(RERUNBIB) $*.log && bibtex $* && pdflatex $<; true 16 | @egrep -q $(RERUN) $*.log && pdflatex $<; true 17 | @egrep -q $(RERUN) $*.log && pdflatex $<; true 18 | 19 | latexmk: 20 | -latexmk -pvc -pdf $(DOC) 21 | 22 | purge: 23 | -rm -f *.{aux,dvi,log,bbl,blg,brf,fls,toc,thm,out,fdb_latexmk} 24 | 25 | clean: purge 26 | $(MAKE) -C figs $@ 27 | -rm -f $(DOC:.tex=.pdf) 28 | 29 | .PHONY: all figs purge clean 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | The [probability and statistics cookbook][stat-cookbook] contains a succinct 2 | representation of various topics in probability theory and statistics. It 3 | provides a comprehensive mathematical reference reduced to its essence, rather 4 | than aiming for elaborate explanations. 5 | 6 | Feel encouraged to extend the cookbook by forking it and submitting pull 7 | requests. 8 | 9 | Build Setup 10 | ----------- 11 | 12 | You can build the cookbook locally via: 13 | 14 | make 15 | 16 | This first generates the distribution plots via R and then compiles the LaTeX source. 17 | You may have to install a few missing packages via CRAN. 18 | 19 | License 20 | ------- 21 | 22 | This work is licensed under a [Attribution-NonCommercial-ShareAlike 4.0 23 | International License][by-nc-sa]. 24 | 25 | [![Creative Commons License][by-nc-sa-img]][by-nc-sa] 26 | 27 | [stat-cookbook]: http://statistics.zone 28 | [by-nc-sa]: http://creativecommons.org/licenses/by-nc-sa/4.0/ 29 | [by-nc-sa-img]: http://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png 30 | -------------------------------------------------------------------------------- /figs/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all clean 2 | 3 | # We'll take the first plot as indicator for all plots until the R script 4 | # allows for more fine-grained plot generation. 5 | all: beta-cdf.pdf 6 | 7 | %.pdf: 8 | R --vanilla < distributions.R 9 | 10 | clean: 11 | -rm -f *-*.pdf 12 | -------------------------------------------------------------------------------- /figs/distributions.R: -------------------------------------------------------------------------------- 1 | # Install needed packages if necessary 2 | needed_packages = c("ggplot2", "reshape2", "grid", "RColorBrewer", "VGAM") 3 | if (length(setdiff(needed_packages, rownames(installed.packages()))) > 0) { 4 | install.packages(setdiff(needed_packages, rownames(installed.packages())), 5 | dependencies=TRUE, repos="http://cran.r-project.org") 6 | } 7 | 8 | library(ggplot2) 9 | library(reshape2) 10 | library(grid) 11 | library(RColorBrewer) 12 | library(VGAM) # [dp]pareto 13 | 14 | line_width = 1.3 15 | point_size = 4 16 | theme_set(theme_bw(base_size=20)) 17 | theme_update(legend.background=element_rect(fill=alpha("white", 0)), 18 | legend.key=element_rect(colour="white"), 19 | legend.key.width=unit(3, "lines"), 20 | plot.margin=unit(rep(0, 4), "lines")) 21 | 22 | # FIXME: is it possible to move this statement into theme_update? 23 | scale_color_discrete = function(...) scale_color_brewer(..., palette="Dark2") 24 | 25 | 26 | # --------------------------------------------------------------------------- # 27 | 28 | make.dist.fn <- function(mode, dist) { 29 | if (mode == "cdf") 30 | eval(parse(text=paste("p", dist, sep=""))) 31 | else if (mode == "pdf" || mode == "pmf") 32 | eval(parse(text=paste("d", dist, sep=""))) 33 | else 34 | stop("invalid mode: must be 'cdf' or 'pdf/pmf'") 35 | } 36 | 37 | make.data <- function(mode, dist, theta, xseq) { 38 | dist.fn <- make.dist.fn(mode, dist) 39 | unary <- function(...) function(x) dist.fn(x, ...) 40 | data.fns <- apply(theta, 1, function(x) do.call(unary, as.list(t(x)))) 41 | values <- data.frame(sapply(data.fns, function(f) f(xseq))) 42 | cbind(x=xseq, values) 43 | } 44 | 45 | plot.dist <- function(xseq, theta, dist, mode, title, lab.fn) { 46 | values <- make.data(mode, dist, theta, xseq) 47 | molten <- melt(values, 1) 48 | labels <- apply(theta, 1, function(x) do.call(lab.fn, as.list(t(x)))) 49 | p <- ggplot(molten, aes(x=x, y=value, color=variable, linetype=variable)) + 50 | ggtitle(title) + 51 | ylab(toupper(mode)) + 52 | scale_color_discrete(labels=labels) + 53 | scale_linetype_discrete(labels=labels) 54 | 55 | # We position the legend for CDFs bottom-right and for P[MD]Fs top-right. 56 | if (mode == "cdf") 57 | p <- p + theme(legend.title=element_blank(), 58 | legend.justification=c(1, 0), 59 | legend.position=c(1, 0)) 60 | else 61 | p <- p + theme(legend.title=element_blank(), 62 | legend.justification=c(1, 1), 63 | legend.position=c(1, 1)) 64 | p 65 | } 66 | 67 | plot.discrete <- function(from, to, ...) { 68 | xseq <- seq(from, to) 69 | plot.dist(xseq, ...) + 70 | geom_line(size=line_width) + 71 | geom_point(size=point_size) 72 | } 73 | 74 | plot.continuous <- function(from, to, ...) { 75 | xseq <- seq(from, to, by=0.01) 76 | plot.dist(xseq, ...) + 77 | geom_line(size=line_width) 78 | } 79 | 80 | # --------------------------------------------------------------------------- # 81 | 82 | plot.uniform.cdf.discrete <- function() { 83 | xseq <- 3:7 84 | x <- melt(as.data.frame(cbind(xseq, ecdf(xseq)(xseq))), 1) 85 | ggplot(x, aes(x=xseq, y=value)) + 86 | geom_point(size=point_size) + 87 | geom_segment(aes(x=xseq, y=value, xend=xseq+1, yend=value)) + 88 | geom_segment(aes(x=3.1, y=0.2, xend=4, yend=0.2)) + 89 | geom_segment(aes(x=6, y=0.8, xend=6.9, yend=0.8)) + 90 | geom_point(aes(x=xseq+1), size=point_size, color="white", shape=19) + 91 | geom_point(aes(x=xseq+1), size=point_size, shape=1) + 92 | ggtitle("Uniform (discrete)") + 93 | labs(x="x", y="CDF") + 94 | theme(panel.grid.minor=element_blank()) + 95 | scale_x_continuous(name="x", limits=c(3.1, 6.9), breaks=4:6, 96 | labels=c("a", "", "b")) + 97 | scale_y_continuous(name="CDF", limits=c(0.2, 0.8), 98 | breaks=c(0.2, 0.4, 0.6, 0.8), 99 | labels=c(0, expression(frac(i, n)), 100 | expression(frac(i, n)), 1.0)) 101 | } 102 | 103 | plot.uniform.cdf.continuous <- function() { 104 | x <- as.data.frame(rbind(c(0,0,2,0), c(2,0,6,1), c(6,1,8,1))) 105 | ggplot(x) + 106 | geom_segment(aes(x=V1, y=V2, xend=V3, yend=V4)) + 107 | ggtitle("Uniform (continuous)") + 108 | labs(x="x", y="CDF") + 109 | theme(panel.grid.minor=element_blank()) + 110 | scale_x_continuous(breaks=c(2,6), labels=c("a", "b")) + 111 | scale_y_continuous(limits=0:1, breaks=0:1, labels=0:1) 112 | } 113 | 114 | plot.uniform.pmf <- function() { 115 | xseq <- 3:8 116 | ggplot(data.frame(x0=factor(xseq), x1=xseq, y0=0, y1=0.5)) + 117 | aes(x=x0, y=y1) + 118 | geom_point(size=point_size) + 119 | # geom_segment(aes(x=x1, xend=x1, y=y0, yend=y1), linetype="dashed") + 120 | labs(title="Uniform (discrete)") + 121 | theme(panel.grid.minor=element_blank()) + 122 | scale_x_discrete(name="x", 123 | breaks=xseq, 124 | limits=1:10, 125 | labels=c("a", rep("", length(xseq)-2), "b")) + 126 | scale_y_continuous(name="PMF", 127 | breaks=0.5, 128 | limits=0:1, 129 | labels=expression(frac(1, n))) 130 | } 131 | 132 | plot.uniform.pdf <- function() { 133 | solid <- data.frame(x0=c(1, 3, 8), 134 | x1=c(3, 8, 10), 135 | y0=c(0, 0.5, 0), 136 | y1=c(0, 0.5, 0)) 137 | dashed <- data.frame(x0=c(solid[1,2], solid[3,1]), 138 | x1=c(solid[1,2], solid[2,2]), 139 | y0=c(solid[1,3], solid[3,3]), 140 | y1=c(solid[2,3], solid[2,4])) 141 | filled <- data.frame(x=c(solid[2,1], solid[3,1]), 142 | y=c(solid[2,3], solid[2,3])) 143 | hollow <- data.frame(x=c(solid[2,1], solid[3,1]), 144 | y=c(solid[1,3], solid[3,3])) 145 | 146 | ggplot(solid) + 147 | geom_segment(aes(x=x0, xend=x1, y=y0, yend=y1), size=line_width) + 148 | geom_segment(data=dashed, 149 | aes(x=x0, xend=x1, y=y0, yend=y1), 150 | size=line_width, 151 | linetype="dashed") + 152 | geom_point(data=filled, aes(x=x, y=y), size=point_size) + 153 | geom_point(data=hollow, aes(x=x, y=y), size=point_size, shape=21, 154 | fill="white") + 155 | theme(panel.grid.minor=element_blank()) + 156 | ggtitle("Uniform (continuous)") + 157 | scale_x_continuous(name="x", 158 | breaks=c(solid[1,2], solid[3,1]), 159 | limits=c(solid[1,1], solid[3,2]), 160 | labels=c("a", "b")) + 161 | scale_y_continuous(name="PDF", 162 | breaks=solid[2,3], 163 | limits=0:1, 164 | labels=expression(frac(1, b-a))) 165 | } 166 | 167 | # --------------------------------------------------------------------------- # 168 | 169 | plot.binomial = function(mode, xmin=1, xmax=40, 170 | theta=data.frame(n=c(40, 30, 25), p=c(0.3, 0.6, 0.9)), 171 | title="Binomial") { 172 | lab.fn <- function(x, y) substitute(list(n==i, p==j), list(i=x, j=y)) 173 | plot.discrete(xmin, xmax, theta, "binom", mode, title, lab.fn) 174 | } 175 | 176 | plot.geometric <- function(mode, xmin=0, xmax=10, 177 | theta=data.frame(p=c(0.2, 0.5, 0.8)), 178 | title="Geometric") { 179 | lab.fn <- function(x) substitute(p==i, list(i=x)) 180 | plot.discrete(xmin, xmax, theta, "geom", mode, title, lab.fn) 181 | } 182 | 183 | plot.poisson <- function(mode, xmin=0, xmax=20, 184 | theta=data.frame(lambda=c(1,4,10)), 185 | title="Poisson") { 186 | lab.fn <- function(x) substitute(lambda==i, list(i=x)) 187 | plot.discrete(xmin, xmax, theta, "pois", mode, title, lab.fn) 188 | } 189 | 190 | # --------------------------------------------------------------------------- # 191 | 192 | plot.normal <- function(mode, xmin=-5, xmax=5, 193 | theta=data.frame(mu=c(0,0,0,-2), s2=c(0.2,1,5,0.5)), 194 | title="Normal") { 195 | lab.fn <- function(x, y) substitute(list(mu==i, sigma^2==j), list(i=x, j=y)) 196 | plot.continuous(xmin, xmax, theta, "norm", mode, title, lab.fn) 197 | } 198 | 199 | plot.lognormal <- function(mode, xmin=0, xmax=3, 200 | theta=data.frame(mu=c(0,2,0,1/2,1/4,1/8), 201 | s2=c(3,2,1,1,1,1)), title="Log-Normal") { 202 | lab.fn <- function(x, y) substitute(list(mu==i, sigma^2==j), list(i=x, j=y)) 203 | p <- plot.continuous(xmin, xmax, theta, "lnorm", mode, title, lab.fn) 204 | if (mode == "cdf") 205 | p <- p + theme(legend.justification=c(0, 1), legend.position=c(0, 1)) 206 | 207 | p 208 | } 209 | 210 | plot.student <- function(mode, xmin=-5, xmax=5, 211 | theta=data.frame(c(1,2,5,Inf)), 212 | title=expression(bold("Student\'s") ~ italic(t))) { 213 | lab.fn <- function(x) { 214 | if (x == Inf) 215 | quote(nu==infinity) 216 | else 217 | substitute(nu==i, list(i=x)) 218 | } 219 | 220 | plot.continuous(xmin, xmax, theta, "t", mode, title, lab.fn) 221 | } 222 | 223 | plot.chisquare <- function(mode, xmin=0, xmax=8, 224 | theta=data.frame(1:5), 225 | title=expression(chi^2)) { 226 | lab.fn <- function(x) substitute(k==i, list(i=x)) 227 | plot.continuous(xmin, xmax, theta, "chisq", mode, title, lab.fn) 228 | } 229 | 230 | plot.f <- function(mode, xmin=0, xmax=5, 231 | theta=data.frame(d1=c(1,2,5,100,100), d2=c(1,1,2,1,100)), 232 | title="F") { 233 | lab.fn <- function(x, y) substitute(list(d[1]==i, d[2]==j), list(i=x, j=y)) 234 | plot.continuous(xmin, xmax, theta, "f", mode, title, lab.fn) 235 | } 236 | 237 | plot.exp <- function(mode, xmin=0, xmax=5, 238 | theta=data.frame(c(2,1,0.4)), 239 | title="Exponential") { 240 | lab.fn <- function(x) substitute(beta==i, list(i=1/x)) 241 | plot.continuous(xmin, xmax, theta, "exp", mode, title, lab.fn) 242 | } 243 | 244 | plot.gamma <- function(mode, xmin=0, xmax=20, 245 | theta=data.frame(a=c(1,2,3,5,9), b=c(0.5,0.5,0.5,1,2)), 246 | title="Gamma") { 247 | lab.fn <- function(x, y) substitute(list(alpha==i, beta==j), list(i=x, j=y)) 248 | plot.continuous(xmin, xmax, theta, "gamma", mode, title, lab.fn) 249 | } 250 | 251 | dinvgamma <- function(x, shape = 1, rate = 1, scale = 1/rate, log = FALSE) { 252 | logval <- shape * log(rate) - lgamma(shape) - (shape+1) * log(x) - rate/x 253 | if (log) 254 | logval 255 | else 256 | exp(logval) 257 | } 258 | 259 | pinvgamma <- function(q, shape = 1, rate = 1, scale = 1/rate, 260 | lower.tail = TRUE, log.p = FALSE) { 261 | pgamma(1 / q, shape, rate, scale, !lower.tail, log.p) 262 | } 263 | 264 | plot.invgamma <- function(mode, xmin=0, xmax=5, 265 | theta=data.frame(a=c(1,2,3,3), b=c(1,1,1,0.5)), 266 | title="Inverse Gamma") { 267 | lab.fn <- function(x, y) substitute(list(alpha==i, beta==j), list(i=x, j=y)) 268 | plot.continuous(xmin, xmax, theta, "invgamma", mode, title, lab.fn) 269 | } 270 | 271 | #plot.dirichlet = function() 272 | #{ 273 | # require(MCMCpack) 274 | # 275 | # a = list(c(6,2,2), c(3,7,5), c(6,2,6), c(2,3,4)) 276 | # seqs = seq(0, 15, by=0.01) # FIXME: choose right input 277 | # f = function(v) ddirichlet(cbind(seqs, seqs, seqs), v) 278 | # 279 | # # TODO 280 | # mapply(f, a, b) 281 | # 282 | # s = function(k) substitute(list(alpha == i), list(i=a[k])) 283 | # labs = lapply(1:length(a), s) 284 | #} 285 | 286 | plot.beta <- function(mode, xmin=0, xmax=1, 287 | theta=data.frame(a=c(0.5,5,1,2,2), b=c(0.5,1,3,2,5)), 288 | title="Beta") { 289 | lab.fn <- function(x, y) substitute(list(alpha==i, beta==j), list(i=x, j=y)) 290 | p <- plot.continuous(xmin, xmax, theta, "beta", mode, title, lab.fn) 291 | 292 | if (mode == "cdf") 293 | p <- p + theme(legend.justification=c(0, 1), legend.position=c(0, 1)) 294 | else 295 | p <- p + theme(legend.justification=c(0.5, 1), legend.position=c(0.5, 1)) 296 | 297 | p 298 | } 299 | 300 | plot.weibull <- function(mode, xmin=0, xmax=2.5, 301 | theta=data.frame(lambda=c(1,1,1,1), k=c(0.5,1,1.5,5)), 302 | title="Weibull") { 303 | lab.fn <- function(x, y) substitute(list(lambda==i, k==j), list(i=x, j=y)) 304 | plot.continuous(xmin, xmax, theta, "weibull", mode, title, lab.fn) 305 | } 306 | 307 | plot.pareto <- function(mode, xmin=0.8, xmax=2.5, 308 | theta=data.frame(xm=c(1,1,1), a=c(1,2,4)), 309 | title="Pareto") { 310 | lab.fn <- function(x, y) substitute(list(x[m]==i, k==j), list(i=x, j=y)) 311 | plot.continuous(xmin, xmax, theta, "pareto", mode, title, lab.fn) 312 | } 313 | 314 | # --------------------------------------------------------------------------- # 315 | 316 | store <- function(name, p) { 317 | ggsave(paste(name, "pdf", sep="."), p) 318 | } 319 | 320 | store("uniform-pmf", plot.uniform.pmf()) 321 | store("uniform-pdf", plot.uniform.pdf()) 322 | store("uniform-cdf-discrete", plot.uniform.cdf.discrete()) 323 | store("uniform-cdf-continuous", plot.uniform.cdf.continuous()) 324 | 325 | store("binomial-pmf", plot.binomial("pmf")) 326 | store("binomial-cdf", plot.binomial("cdf")) 327 | store("geometric-pmf", plot.geometric("pmf")) 328 | store("geometric-cdf", plot.geometric("cdf")) 329 | store("poisson-pmf", plot.poisson("pmf")) 330 | store("poisson-cdf", plot.poisson("cdf")) 331 | 332 | store("normal-pdf", plot.normal("pdf")) 333 | store("normal-cdf", plot.normal("cdf")) 334 | store("lognormal-pdf", plot.lognormal("pdf") + ylim(0,1)) 335 | store("lognormal-cdf", plot.lognormal("cdf")) 336 | store("student-pdf", plot.student("pdf")) 337 | store("student-cdf", plot.student("cdf")) 338 | store("chisquare-pdf", plot.chisquare("pdf") + ylim(0,1)) 339 | store("chisquare-cdf", plot.chisquare("cdf")) 340 | store("f-pdf", plot.f("pdf")) 341 | store("f-cdf", plot.f("cdf")) 342 | store("exponential-pdf", plot.exp("pdf")) 343 | store("exponential-cdf", plot.exp("cdf")) 344 | store("gamma-pdf", plot.gamma("pdf")) 345 | store("gamma-cdf", plot.gamma("cdf")) 346 | store("invgamma-pdf", plot.invgamma("pdf")) 347 | store("invgamma-cdf", plot.invgamma("cdf")) 348 | store("beta-pdf", plot.beta("pdf")) 349 | store("beta-cdf", plot.beta("cdf")) 350 | store("weibull-pdf", plot.weibull("pdf")) 351 | store("weibull-cdf", plot.weibull("cdf")) 352 | store("pareto-pdf", plot.pareto("pdf")) 353 | store("pareto-cdf", plot.pareto("cdf")) 354 | -------------------------------------------------------------------------------- /figs/relationships.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mavam/stat-cookbook/09bd804a5251911088d55f8ce787a991b8b4ba15/figs/relationships.pdf -------------------------------------------------------------------------------- /literature.bib: -------------------------------------------------------------------------------- 1 | @Article{Leemis08, 2 | author = {Lawrence M. Leemis and Jacquelyn T. McQueston}, 3 | title = {{Univariate Distribution Relationships}}, 4 | journal = {The American Statistician}, 5 | year = 2008, 6 | volume = {62}, 7 | pages = {45--53}, 8 | number = {1}, 9 | timestamp = {2008.10.16} 10 | } 11 | 12 | @Book{Hoel72, 13 | author = {Paul G. Hoel and Sidney C. Port and Charles J. Stone}, 14 | title = {{Introduction to Probability Theory}}, 15 | publisher = {Brooks Cole}, 16 | year = 1972, 17 | isbn = {978-0395046364} 18 | } 19 | 20 | @Book{Shumway06, 21 | author = {Robert H. Shumway and David S. Stoffer}, 22 | title = {{Time Series Analysis and Its Applications With R Examples}}, 23 | publisher = {Springer}, 24 | isbn = {978-0-387-29317-2}, 25 | year = 2006 26 | } 27 | 28 | @Book{Wasserman03, 29 | author = {Larry Wasserman}, 30 | title = {{All of Statistics: A Concise Course in Statistical Inference}}, 31 | publisher = {Springer}, 32 | year = 2003 33 | } 34 | 35 | @Book{Steger01, 36 | author = {Angelika Steger}, 37 | title = {{Diskrete Strukturen 38 | -- Band 1: Kombinatorik, Graphentheorie, Algebra}}, 39 | publisher = {Springer}, 40 | year = 2001 41 | } 42 | 43 | @Book{Steger02, 44 | author = {Angelika Steger}, 45 | title = {{Diskrete Strukturen 46 | -- Band 2: Wahrscheinlichkeitstheorie und Statistik}}, 47 | publisher = {Springer}, 48 | year = 2002 49 | } 50 | -------------------------------------------------------------------------------- /probstat.tex: -------------------------------------------------------------------------------- 1 | \usepackage{amsmath,amssymb} 2 | \usepackage{dsfont} 3 | \usepackage{cancel} 4 | \usepackage{graphicx} 5 | \usepackage{xargs} 6 | \usepackage{xspace} 7 | 8 | % ============================================================================= 9 | % Formatting 10 | % ============================================================================= 11 | 12 | % Make a note on the margin. 13 | \newcommand{\marnote}[1]{ 14 | \reversemarginpar 15 | \marginpar[\raggedleft\footnotesize\textit{\\[3ex]#1}]% 16 | {\raggedright\footnotesize\textit{\\[3ex]#1}} 17 | \normalmarginpar 18 | } 19 | 20 | \newcommand{\pwiseii}[1]{\ensuremath{\left\{\begin{array}{ll}#1\end{array}}} 21 | \newcommand{\pwiseiii}[1]{\ensuremath{\left\{\begin{array}{ll}#1\end{array}}} 22 | \newcommand{\prn}[1]{\ensuremath{\left(#1\right)}} 23 | \newcommand{\brk}[1]{\ensuremath{\left[#1\right]}} 24 | \newcommand{\brc}[1]{\ensuremath{\left\{#1\right\}}} 25 | \newcommand{\x}[1]{\ensuremath{\cancel{#1}}} 26 | 27 | % ============================================================================= 28 | % General Math 29 | % ============================================================================= 30 | 31 | % Special functions and operators 32 | \DeclareMathOperator{\erf}{erf} 33 | \DeclareMathOperator{\logit}{logit} 34 | \DeclareMathOperator{\sign}{sign} 35 | \DeclareMathOperator*{\argmin}{\arg\!\min} 36 | 37 | % Definitions 38 | \def\define{:=} 39 | \def\defined{=:} 40 | \def\eqdef{\triangleq} 41 | 42 | % Proofs 43 | \def\qed{\ifhmode\unskip\nobreak\fi\hfill \ensuremath{\square}} 44 | 45 | % Standard transformation function 46 | \def\transform{\ensuremath{\varphi}\xspace} 47 | 48 | % Logic 49 | \newcommand{\comp}[1]{\neg{#1}} 50 | \newcommand{\imp}{\ensuremath{\;\Longrightarrow\;}} 51 | \newcommand{\pmi}{\ensuremath{\;\Longleftarrow\;}} 52 | \newcommand{\nimp}{\ensuremath{\;\not\!\!\Longrightarrow\;}} 53 | \newcommand{\npmi}{\ensuremath{\;\not\!\!\Longleftarrow\;}} 54 | \newcommand{\eqv}{\ensuremath{\;\Longleftrightarrow\;}} 55 | 56 | % Numbers. 57 | \def\C{\mathbb{C}} 58 | \def\N{\mathbb{N}} 59 | \def\R{\mathbb{R}} 60 | \def\Z{\mathbb{Z}} 61 | 62 | % Matrices 63 | \newcommand{\eyeii}{\ensuremath{\left(\begin{matrix}1 & 0 \\ 0 & 1\end{matrix}\right)}} 64 | \newcommand{\eyeiii}{\ensuremath{\left(\begin{matrix}1 & 0 & 0 \\ 0 & 1 & 0 \\ 0 & 0 & 1\end{matrix}\right)}} 65 | 66 | % Limits 67 | \newcommand{\Lim}[2]{\ensuremath{\lim_{#1\to #2}}} 68 | \newcommand{\limx}[1][\infty]{\ensuremath{\lim_{x\to #1}}} 69 | \newcommand{\limn}[1][\infty]{\ensuremath{\lim_{n\to #1}}} 70 | 71 | % Sums and products 72 | \newcommand{\Sum}[2][i=1]{\ensuremath{\sum_{#1}^{#2}}} 73 | \newcommand{\sumin}{\ensuremath{\sum_{i=1}^n}} 74 | \newcommand{\sumiN}{\ensuremath{\sum_{i=1}^N}} 75 | \newcommand{\sumim}{\ensuremath{\sum_{i=1}^m}} 76 | \newcommand{\sumjk}{\ensuremath{\sum_{j=1}^k}} 77 | \newcommand{\sumjn}{\ensuremath{\sum_{j=1}^n}} 78 | \newcommand{\sumjm}{\ensuremath{\sum_{j=1}^m}} 79 | \newcommand{\isum}[1][n]{\ensuremath{\sum_{#1}^\infty}} 80 | \newcommand{\dsum}[4][i=1]{\ensuremath{\sum_{#1}^{#2}\sum_{#3}^{#4}}} 81 | \newcommand{\Prod}[2][i=1]{\ensuremath{\prod_{#1}^{#2}}} 82 | \newcommand{\prodin}{\ensuremath{\prod_{i=1}^n}} 83 | \newcommand{\prodjn}{\ensuremath{\prod_{j=1}^n}} 84 | 85 | % Derivatives 86 | \newcommand{\der}[2][]{\ensuremath{\frac{d #1}{d #2}}} 87 | \newcommand{\dder}[2][]{\ensuremath{\frac{d^2 #1}{d #2^2}}} 88 | \newcommand{\pder}[2][]{\ensuremath{\frac{\partial #1}{\partial #2}}} 89 | \newcommand{\pdder}[2][]{\ensuremath{\frac{\partial^2 #1}{\partial #2^2}}} 90 | \newcommand{\mpder}[3][]{% 91 | \ensuremath{\frac{\partial^2 #1}{\partial #2 \partial #3}}} 92 | 93 | % Differentials 94 | %\renewcommand{\d}[1]{\,\mathrm{d}#1} 95 | \renewcommand{\d}[1]{\,d#1} 96 | \def\ds{\d{s}} 97 | \def\dt{\d{t}} 98 | \def\dtheta{\d{\theta}} 99 | \def\du{\d{u}} 100 | \def\dx{\d{x}} 101 | \def\dy{\d{y}} 102 | \def\dfx{\d{F_X(x)}} 103 | \def\dfy{\d{F_Y(y)}} 104 | \def\dfhatx{\d{\widehat{F}_n(x)}} 105 | 106 | % Transcendentals w/ extended arguments. 107 | \newcommand{\Exp}[1]{\ensuremath{\exp\left\{#1\right\}}} 108 | \newcommand{\Log}[1]{\ensuremath{\log\left\{#1\right\}}} 109 | 110 | % ============================================================================= 111 | % Probability and Statistics 112 | % ============================================================================= 113 | 114 | % Formatted terminology. 115 | \def\bias{\textsf{bias}\xspace} 116 | \def\se{\textsf{se}\xspace} 117 | \def\pdf{\textsc{pdf}\xspace} 118 | \def\cdf{\textsc{cdf}\xspace} 119 | \def\ise{\textsc{ise}\xspace} 120 | \def\pgf{\textsc{pgf}\xspace} 121 | \def\mgf{\textsc{mgf}\xspace} 122 | \def\mse{\textsc{mse}\xspace} 123 | \def\mspe{\textsc{mspe}\xspace} 124 | \def\mle{\textsc{mle}\xspace} 125 | \def\mom{\textsc{mom}\xspace} 126 | \def\are{\textsc{are}\xspace} 127 | \def\rss{\textsc{rss}\xspace} 128 | \def\ess{\textsc{ess}\xspace} 129 | \def\tss{\textsc{tss}\xspace} 130 | 131 | % Naming shortcuts. 132 | \def\ahat{\ensuremath{\widehat{\alpha}}} 133 | \def\atil{\ensuremath{\tilde{\alpha}}} 134 | \def\bhat{\ensuremath{\widehat{\beta}}} 135 | \def\btil{\ensuremath{\tilde{\beta}}} 136 | \def\dhat{\ensuremath{\widehat{\delta}}} 137 | \def\ehat{\ensuremath{\hat{\epsilon}}} 138 | \def\ghat{\ensuremath{\widehat{\gamma}}} 139 | \def\khat{\ensuremath{\widehat{\kappa}}} 140 | \def\lhat{\ensuremath{\widehat{\lambda}}} 141 | \def\ltil{\ensuremath{\tilde{\lambda}}} 142 | \def\mhat{\ensuremath{\widehat{\mu}}} 143 | \def\nhat{\ensuremath{\widehat{\nu}}} 144 | \def\mtil{\ensuremath{\tilde{\mu}}} 145 | \def\psihat{\ensuremath{\widehat{\psi}}} 146 | \def\shat{\ensuremath{\widehat{\sigma}}} 147 | \def\stil{\ensuremath{\tilde{\sigma}}} 148 | \def\that{\ensuremath{\widehat{\theta}}} 149 | \def\ttil{\ensuremath{\widetilde{\theta}}} 150 | \def\rhohat{\widehat{\rho}} 151 | \def\xihat{\widehat{\xi}} 152 | 153 | \def\sehat{\ensuremath{\widehat{\se}}} 154 | \def\fhat{\ensuremath{\widehat{f}}} 155 | \def\Fhat{\ensuremath{\widehat{F}}} 156 | \def\fnhat{\ensuremath{\widehat{f}_n}} 157 | \def\Fnhat{\ensuremath{\widehat{F}_n}} 158 | \def\Jhat{\ensuremath{\widehat{J}}} 159 | \def\phat{\ensuremath{\widehat{p}}} 160 | \def\ptil{\ensuremath{\tilde{p}}} 161 | \def\rhat{\widehat{r}} 162 | \def\Rbar{\bar{R}} 163 | \def\Rhat{\widehat{R}} 164 | \def\Qbar{\bar{Q}} 165 | \def\Qhat{\widehat{Q}} 166 | \def\Xhat{\widehat{X}} 167 | \def\xbar{\bar{x}} 168 | \def\Xbar{\bar{X}} 169 | \def\Xsqbar{\overline{X^2}} 170 | \def\xnbar{\overline{x}_n} 171 | \def\Xnbar{\overline{X}_n} 172 | \def\Yhat{\widehat{Y}} 173 | \def\ybar{\overline{y}} 174 | \def\Ybar{\overline{Y}} 175 | \def\Ynbar{\overline{Y}_n} 176 | 177 | % Random variables. 178 | \def\rv{\textsc{rv}\xspace} 179 | \def\iid{\ensuremath{\textsc{iid}}\xspace} 180 | \def\dist{\ensuremath{\sim}\xspace} 181 | \def\disteq{\ensuremath{\stackrel{D}{=}}\xspace} 182 | \def\distiid{\ensuremath{\stackrel{iid}{\sim}}\xspace} 183 | \def\ind{\ensuremath{\perp\!\!\!\perp}\xspace} 184 | \def\nind{\ensuremath{\perp\!\!\!\!\big\vert\!\!\!\!\perp}\xspace} 185 | \def\Xon{\ensuremath{X_1,\dots,X_n}\xspace} 186 | \def\xon{\ensuremath{x_1,\dots,x_n}\xspace} 187 | \def\giv{\ensuremath{\,|\,}} 188 | \def\Giv{\ensuremath{\,\big|\,}} 189 | \def\GIV{\ensuremath{\,\Big|\,}} 190 | \newcommand{\indicator}[1]{\mathds{1}_{\left\{#1\right\}}} 191 | 192 | % Probability, expectation, and variance. 193 | \def\prob{\mathbb{P}} 194 | \renewcommand{\Pr}[2][]{\ensuremath{\prob_{#1}\left[#2\right]}\xspace} 195 | \newcommand{\E}[2][]{\ensuremath{\mathbb{E}_{#1}\left[#2\right]}} 196 | \newcommand{\V}[2][]{\ensuremath{\mathbb{V}_{#1}\left[#2\right]}} 197 | \newcommand{\cov}[2][]{\ensuremath{\mathrm{Cov}_{#1}\left[#2\right]}} 198 | \newcommand{\corr}[2][]{\ensuremath{\rho_{#1}\left[#2\right]}} 199 | \def\sd{\ensuremath{\textsf{sd}}\xspace} 200 | \def\samplemean{\ensuremath{\bar{X}_n}\xspace} 201 | \def\samplevar{\ensuremath{S^2}\xspace} 202 | \def\za{\ensuremath{z_{\alpha}}} 203 | \def\zat{\ensuremath{z_{\alpha/2}}} 204 | 205 | % Inference 206 | \def\Ll{\ensuremath{\mathcal{L}}\xspace} 207 | \def\Lln{\ensuremath{\Ll_n}\xspace} 208 | \def\ll{\ensuremath{\ell}} 209 | \def\lln{\ensuremath{\ll_n}} 210 | 211 | % Hypothesis testing 212 | \newcommand{\hyp}[2]{ 213 | \ensuremath{H_0:#1 \ifhmode\quad\text{versus}\quad\fi\text{ vs. } H_1:#2}} 214 | 215 | % Convergence. 216 | \def\conv{\rightarrow} 217 | \def\convinf{\rightarrow_{n\to\infty}} 218 | \def\pconv{\stackrel{\text{\tiny{P}}}{\rightarrow}} 219 | \def\npconv{\stackrel{\text{\tiny{P}}}{\nrightarrow}} 220 | \def\dconv{\stackrel{\text{\tiny{D}}}{\rightarrow}} 221 | \def\ndconv{\stackrel{\text{\tiny{D}}}{\nrightarrow}} 222 | \def\qmconv{\stackrel{\text{\tiny{qm}}}{\rightarrow}} 223 | \def\nqmconv{\stackrel{\text{\tiny{qm}}}{\nrightarrow}} 224 | \def\asconv{\stackrel{\text{\tiny{as}}}{\rightarrow}} 225 | \def\nasconv{\stackrel{\text{\tiny{as}}}{\nrightarrow}} 226 | 227 | % 228 | % Distributions 229 | % 230 | \newcommandx{\unif}[1][1={a,b}]{\textrm{Unif}\left({#1}\right)} 231 | \newcommandx{\unifd}[1][1={a,\ldots,b}]{\textrm{Unif}\left\{{#1}\right\}} 232 | \newcommandx{\dunif}[3][1=x,2=a,3=b]{\frac{I(#2<#1<#3)}{#3-#2}} 233 | \newcommandx{\dunifd}[3][1=x,2=a,3=b]{\frac{I(#2\le#1\le#3)}{#3-#2+1}} 234 | \newcommandx{\punif}[3][1=x,2=a,3=b]{ 235 | \begin{cases} 0 & #1 < #2 \\ \frac{#1-#2}{#3-#2} & #2 < #1 < #3 \\ 1 & #1 > #3\\\end{cases}} 236 | \newcommandx{\punifd}[3][1=x,2=a,3=b]{ 237 | \begin{cases} 0 & #1 < #2\\ \frac{\lfloor#1\rfloor-#2+1}{#3-#2} & #2 \le #1 \le #3 \\ 1 & #1 > #3\\ \end{cases}} 238 | 239 | % Bernoulli 240 | \newcommandx\bern[1][1=p]{\textrm{Bern}\left({#1}\right)} 241 | \newcommandx\dbern[2][1=x,2=p]{#2^{#1} \left(1-#2\right)^{1-#1}} 242 | \newcommandx\pbern[2][1=x,2=p]{\left(1-#2\right)^{1-#1}} 243 | 244 | % Binomial 245 | \newcommandx\bin[1][1={n,p}]{\textrm{Bin}\left(#1\right)} 246 | \newcommandx\dbin[3][1=x,2=n,3=p]{\binom{#2}{#1}#3^#1\left(1-#3\right)^{#2-#1}} 247 | 248 | % Multinomial 249 | \newcommandx\mult[1][1={n,p}]{\textrm{Mult}\left(#1\right)} 250 | \newcommandx\dmult[3][1=x,2=n,3=p]{\frac{#2!}{#1_1!\ldots#1_k!}#3_1^{#1_1}\cdots#3_k^{#1_k}} 251 | 252 | % Hypergeometric 253 | \newcommandx\hyper[1][1={N,m,n}]{\textrm{Hyp}\left({#1}\right)} 254 | \newcommandx\dhyper[4][1=x,2=N,3=m,4=n]{\frac{\binom{#3}{#1}\binom{#2-#3}{#4-#1}}{\binom{#2}{#4}}} 255 | 256 | % Negative Binomial 257 | \newcommandx\nbin[1][1={r,p}]{\textrm{NBin}\left({#1}\right)} 258 | \newcommandx\dnbin[3][1=x,2=r,3=p]{\binom{#1+#2-1}{#2-1}#3^#2(1-#3)^#1} 259 | \newcommandx\pnbin[3][1=x,2=r,3=p]{I_#3(#2,#1+1)} 260 | 261 | % Geometric 262 | \newcommandx\geo[1][1=p]{\textrm{Geo}\left(#1\right)} 263 | \newcommandx\dgeo[2][1=x,2=p]{#2(1-#2)^{#1-1}} 264 | \newcommandx\pgeo[2][1=x,2=p]{1-(1-#2)^#1} 265 | 266 | % Poisson 267 | \newcommandx\pois[1][1=\lambda]{\textrm{Po}\left({#1}\right)} 268 | \newcommandx\dpois[2][1=x,2=\lambda]{\frac{#2^#1 e^{-#2}}{#1!}} 269 | \newcommandx\ppois[2][1=x,2=\lambda]{e^{-#2}\sum_{i=0}^#1\frac{#2^i}{i!}} 270 | 271 | % Normal 272 | \newcommandx\norm[1][1={\mu,\sigma^2}]{\mathcal{N}\left({#1}\right)} 273 | \newcommandx\dnorm[3][1=x,2=\mu,3=\sigma]% 274 | {\frac{1}{#3\sqrt{2\pi}}\Exp{-\frac{\left(#1-#2\right)^2}{2 #3^2}}} 275 | \newcommandx\pnorm[1][1=x]{\Phi\left({#1}\right)} 276 | \newcommandx\qnorm[1]{\Phi^{-1}\left({#1}\right)} 277 | 278 | % Multivariate Normal 279 | \newcommandx\mvn[1][1={\mu,\Sigma}]{\mathrm{MVN}\left({#1}\right)} 280 | 281 | % Exponential 282 | \newcommandx\ex[1][1=\beta]{\textrm{Exp}\left(#1\right)} 283 | \newcommandx\dex[2][1=x,2=\beta]{\frac{1}{#2}e^{-#1/#2}} 284 | \newcommandx\pex[2][1=x,2=\beta]{1-e^{-#1/#2}} 285 | 286 | % Gamma 287 | \newcommandx\gam[1][1={\alpha,\beta}]{\textrm{Gamma}\left({#1}\right)} 288 | \newcommandx\dgamma[3][1=x,2=\alpha,3=\beta]% 289 | {\frac{#3^{#2}}{\Gamma\left( #2 \right)} #1^{#2-1}e^{-#3#1}} 290 | 291 | % InverseGamma 292 | \newcommandx\invgamma[1][1={\alpha,\beta}]{\textrm{InvGamma}\left({#1}\right)} 293 | \newcommandx\dinvgamma[3][1=x,2=\alpha,3=\beta]% 294 | {\frac{#3^{#2}}{\Gamma\left(#2\right)}#1^{-#2-1}e^{-#3/#1}} 295 | \newcommandx\pinvgamma[3][1=x,2=\alpha,3=\beta]% 296 | {\frac{\Gamma\left(#2,\frac{#3}{#1}\right)}{\Gamma\left(#2\right)}} 297 | 298 | % Beta 299 | \newcommandx\bet[1][1={\alpha,\beta}]{\textrm{Beta}\left(#1\right)} 300 | \newcommandx\dbeta[3][1=x,2=\alpha,3=\beta] 301 | {\frac{\Gamma\left(#2+#3\right)}{\Gamma\left(#2\right)\Gamma\left(#3\right)}#1^{#2-1}\left(1-#1\right)^{#3-1}} 302 | 303 | % Dirichlet 304 | \newcommandx\dir[1][1={\alpha}]{\textrm{Dir}\left(#1\right)} 305 | \newcommandx\ddir[3][1=x,2=\alpha]{\frac{\Gamma\left(\sum_{i=1}^k #2_i\right)}{\prod_{i=1}^k\Gamma\left(#2_i\right)}\prod_{i=1}^k #1_i^{#2_i-1}} 306 | 307 | % Weibull 308 | \newcommandx\weibull[1][1={\alpha}]{\textrm{Dir}\left(#1\right)} 309 | \newcommandx\dweibull[3][1=x,2=\lambda,3=k]{\frac{#3}{#2} 310 | \left(\frac{#1}{#2}\right)^{#3-1} e^{-(#1/#2)^k}} 311 | 312 | % Chi-squard 313 | \newcommandx\chisq[1][1=k]{\chi_{#1}^2} 314 | 315 | % Zeta 316 | \newcommandx\zet[1][1=s]{\textrm{Zeta}\left(#1\right)} 317 | \newcommandx\dzeta[2][1=x,2=s]{\frac{#1^{-#2}}{\zeta\left(#2\right)}} 318 | 319 | % Time Series 320 | \newcommandx\AR[1][1=p]{\mathsf{AR}\left({#1}\right)} 321 | \newcommandx\MA[1][1=q]{\mathsf{MA}\left({#1}\right)} 322 | \newcommandx\ARMA[1][1={p,q}]{\mathsf{ARMA}\left({#1}\right)} 323 | \newcommandx\ARIMA[1][1={p,d,q}]{\mathsf{ARIMA}\left({#1}\right)} 324 | \newcommandx\SARIMA[3][1={p,d,q},2={P,D,Q},3=s]{\mathsf{ARIMA}\left(#1\right) \times \left(#2\right)_{#3}} 325 | 326 | 327 | % ============================================================================= 328 | % Algorithms 329 | % ============================================================================= 330 | 331 | \newcommandx\step[1][1=t]{^{(#1)}} 332 | -------------------------------------------------------------------------------- /stat-cookbook.tex: -------------------------------------------------------------------------------- 1 | % ---------------------------------------------------------------------------- 2 | % 3 | % Probability and Statistics 4 | % Cookbook 5 | % 6 | % ---------------------------------------------------------------------------- 7 | % 8 | % Copyright © Matthias Vallentin , 2017 9 | % 10 | 11 | \documentclass[landscape]{article} 12 | 13 | \usepackage{array} 14 | \usepackage{amsmath,amssymb} 15 | \usepackage{booktabs} 16 | \usepackage{caption} 17 | \usepackage[nodayofweek]{datetime} 18 | \usepackage{environ} 19 | \usepackage{float} 20 | \usepackage{enumitem} 21 | \usepackage{fancyhdr} 22 | \usepackage[landscape,margin=13mm,footskip=1pt,includefoot]{geometry} 23 | \usepackage{graphicx} 24 | \usepackage{hyperref} 25 | \usepackage{multicol} 26 | \usepackage{rotating} 27 | \usepackage{tikz} 28 | \usepackage{threeparttable} 29 | \usepackage{url} 30 | \usepackage{xspace} 31 | 32 | % Document version, MAJOR.MINOR.PATCH. Please change with any modification 33 | % according to semantic versioning practices: 34 | % - The major version changes when adding a new section or topic, or making a 35 | % substantial content change. 36 | % - The minor version changes for non-trivial fixes, corrections, or 37 | % improvements. 38 | % - The patch version changes for trivial fixes, such as typos in text or 39 | % formulas. 40 | \newcommand{\version}{0.2.7} 41 | 42 | % Probability and Statistics LaTeX shortcuts. 43 | \input{probstat} 44 | 45 | % TikZ tweaks 46 | \usetikzlibrary{arrows,shapes} 47 | \usetikzlibrary{decorations.pathreplacing} 48 | \tikzstyle{every picture}+=[remember picture] 49 | \tikzstyle{na} = [baseline=-.5ex] 50 | 51 | % Move footnotes to the bottom-right corner 52 | \pagestyle{fancy} 53 | \fancyhf{} % clear all header and footer fields 54 | \fancyhead{} 55 | \fancyfoot[R]{\footnotesize \thepage} 56 | \renewcommand{\headrulewidth}{0pt} 57 | 58 | % Further document tweaks. 59 | \parindent=0pt 60 | \setitemize{itemsep=0.2mm,parsep=1pt} 61 | \setenumerate{itemsep=0.2mm,parsep=1pt} 62 | 63 | % A type of blue that doesn't look as aggressive as the default 'blue' but also 64 | % distinguishes well from black while not appearing to light. 65 | \definecolor{trueblue}{rgb}{0.0, 0.45, 0.81} 66 | 67 | % Link style (hyperref package) 68 | \hypersetup{ 69 | colorlinks=true, % false: boxed links; true: colored links 70 | linkcolor=black, % color of internal links 71 | citecolor=trueblue, % color of links to bibliography 72 | filecolor=trueblue, % color of file links 73 | urlcolor=trueblue % color of external links 74 | } 75 | 76 | % Personal 77 | \def\email{info@statistics.zone} 78 | \def\web{\url{http://statistics.zone/}} 79 | 80 | % An itemize list with a title that avoids a break between title and list. 81 | \newenvironment{titemize}[1]{ 82 | \begin{minipage}[h]{\columnwidth} 83 | #1 84 | \begin{itemize} 85 | }{ 86 | \end{itemize} 87 | \end{minipage} 88 | } 89 | 90 | \begin{document} 91 | 92 | \thispagestyle{empty} 93 | \begin{center} 94 | \vspace*{\fill} 95 | \textsc{\Huge Probability and Statistics\\[2ex] \huge Cookbook} 96 | \vfill 97 | \footnotesize{ 98 | Version \version\\[1ex] 99 | \today\\[1ex] 100 | \web\\[1ex] 101 | Copyright \copyright{} 102 | \href{http://matthias.vallentin.net}{Matthias Vallentin}\\ 103 | } 104 | \end{center} 105 | 106 | \newpage 107 | 108 | \thispagestyle{empty} 109 | \begin{multicols*}{3} 110 | \tableofcontents 111 | \vfill 112 | \hrule 113 | \vspace{5pt} 114 | {\footnotesize This cookbook integrates various topics in probability theory 115 | and statistics, based on literature~\cite{Hoel72,Wasserman03,Shumway06} 116 | and in-class material from courses of the statistics department at the 117 | University of California in Berkeley but also influenced by others 118 | \cite{Steger01,Steger02}. If you find errors or have suggestions for 119 | improvements, please get in touch at \web.} 120 | \end{multicols*} 121 | 122 | \newpage 123 | 124 | \section{Distribution Overview} 125 | 126 | \subsection{Discrete Distributions} 127 | 128 | \begin{center} 129 | \small 130 | \begin{tabular}{@{}l*6{>{\begin{math}\displaystyle}c<{\end{math}}}@{}} 131 | \toprule &&&&&& \\[-2ex] 132 | & \text{Notation}\footnotemark 133 | & F_X(x) & f_X(x) & \E{X} & \V{X} & M_X(s) \\[1ex] 134 | 135 | \midrule 136 | 137 | Uniform & \unifd & \punifd & \dunifd & 138 | \frac{a+b}{2} & \frac{(b-a+1)^2-1}{12} & 139 | \frac{e^{as}-e^{-(b+1)s}}{s(b-a)} \\[3ex] 140 | 141 | Bernoulli & \bern & \pbern & \dbern & 142 | p & p(1-p) & 143 | 1-p+pe^s \\[3ex] 144 | 145 | Binomial & \bin & I_{1-p}(n-x,x+1) & \dbin & 146 | np & np(1-p) & 147 | (1-p+pe^s)^n \\[3ex] 148 | 149 | Multinomial & \mult & & \dmult \quad \sum_{i=1}^k x_i = n& 150 | \left( {\begin{array}{*{20}{c}} 151 | {n{p_1}}\\ 152 | \vdots \\ 153 | {n{p_k}} 154 | \end{array}} \right) & \left( {\begin{array}{*{20}{c}} 155 | {n{p_1}(1 - {p_1})}&{ - n{p_1}{p_2}}\\ 156 | { - n{p_2}{p_1}}& \ddots 157 | \end{array}} \right) & 158 | \left( \sum_{i=0}^k p_i e^{s_i} \right)^n \\[3ex] 159 | 160 | Hypergeometric & \hyper & 161 | \approx \Phi\left(\displaystyle\frac{x-np}{\sqrt{np(1-p)}}\right) & 162 | \dhyper & 163 | \frac{nm}{N} & \frac{nm(N-n)(N-m)}{N^2(N-1)} & \\[3ex] 164 | 165 | Negative Binomial & \nbin & \pnbin & \dnbin & 166 | r\frac{1-p}{p} & r\frac{1-p}{p^2} & 167 | \left(\frac{pe^s}{1-(1-p)e^s}\right)^r \\[3ex] 168 | 169 | Geometric & \geo & 170 | \pgeo \quad x\in\mathbb N^+ & 171 | \dgeo \quad x\in\mathbb N^+ & 172 | \frac{1}{p} & \frac{1-p}{p^2} & 173 | \frac{pe^s}{1-(1-p)e^s} \\[3ex] 174 | 175 | Poisson & \pois & \ppois & \dpois & 176 | \lambda & \lambda & 177 | e^{\lambda(e^s-1)}\\[3ex] 178 | 179 | \bottomrule 180 | \end{tabular} 181 | \end{center} 182 | 183 | \footnotetext{We use the notation $\gamma(s,x)$ and $\Gamma(x)$ to refer to the 184 | Gamma functions (see \S\ref{sec:math:gamma}), and use $\text{B}(x,y)$ and $I_x$ 185 | to refer to the Beta functions (see \S\ref{sec:math:beta}).} 186 | 187 | \pagebreak 188 | 189 | \begin{figure}[H] 190 | \includegraphics[scale=0.35]{figs/uniform-pmf.pdf} 191 | \includegraphics[scale=0.35]{figs/binomial-pmf.pdf} 192 | \includegraphics[scale=0.35]{figs/geometric-pmf.pdf} 193 | \includegraphics[scale=0.35]{figs/poisson-pmf.pdf} 194 | 195 | \includegraphics[scale=0.35]{figs/uniform-cdf-discrete.pdf} 196 | \includegraphics[scale=0.35]{figs/binomial-cdf.pdf} 197 | \includegraphics[scale=0.35]{figs/geometric-cdf.pdf} 198 | \includegraphics[scale=0.35]{figs/poisson-cdf.pdf} 199 | \end{figure} 200 | 201 | \subsection{Continuous Distributions} 202 | 203 | \begin{threeparttable} 204 | \small 205 | %\newcolumntype{L}{>{\varwidth[c]{\linewidth}}l<{\endvarwidth}} 206 | \newcolumntype{M}{>{\begin{math}\displaystyle}c<{\end{math}}} 207 | \begin{tabular}{@{}l*6{M}@{}} 208 | \toprule &&&&&& \\[-2ex] 209 | & \text{Notation} 210 | & F_X(x) & f_X(x) & \E{X} & \V{X} & M_X(s) \\[1ex] 211 | 212 | \midrule 213 | 214 | Uniform & \unif & \punif & \dunif & 215 | \frac{a+b}{2} & \frac{(b-a)^2}{12} & 216 | \frac{e^{sb}-e^{sa}}{s(b-a)} \\[3ex] 217 | 218 | Normal & \norm & 219 | \Phi(x)=\displaystyle\int_{-\infty}^x \phi(t)\,dt & 220 | \phi(x)=\dnorm & 221 | \mu & \sigma^2 & 222 | \Exp{\mu s + \frac{\sigma^2s^2}{2}}\\[3ex] 223 | 224 | Log-Normal & \ln\norm& 225 | \frac{1}{2}+\frac{1}{2} \erf\left[\frac{\ln x-\mu}{\sqrt{2\sigma^2}}\right] & 226 | \frac{1}{x\sqrt{2\pi\sigma^2}} \Exp{-\frac{(\ln x - \mu)^2}{2\sigma^2}} & 227 | e^{\mu+\sigma^2/2} & 228 | (e^{\sigma^2}-1) e^{2\mu+\sigma^2} & 229 | \\[3ex] 230 | 231 | Multivariate Normal & \mvn & & 232 | (2\pi)^{-k/2} |\Sigma|^{-1/2} e^{-\frac{1}{2}(x-\mu)^T \Sigma^{-1}(x-\mu)} & 233 | \mu & \Sigma & 234 | \Exp{\mu^T s + \frac{1}{2} s^T \Sigma s}\\[3ex] 235 | 236 | Student's $t$ & \text{Student}(\nu) 237 | & I_x\left( \frac{\nu}{2},\frac{\nu}{2} \right) 238 | & \frac{\Gamma\left(\frac{\nu+1}{2}\right)} 239 | {\sqrt{\nu\pi}\Gamma\left(\frac{\nu}{2}\right)} 240 | \left(1+\frac{x^2}{\nu}\right)^{-(\nu+1)/2} 241 | & 0 \quad \nu > 1 242 | & \begin{cases} 243 | \displaystyle\frac{\nu}{\nu-2} & \nu > 2 \\ 244 | \infty & 1 < \nu \le 2 245 | \end{cases} 246 | & \\[3ex] 247 | 248 | Chi-square & \chisq & 249 | \frac{1}{\Gamma(k/2)} \gamma\left(\frac{k}{2}, \frac{x}{2}\right) & 250 | \frac{1}{2^{k/2} \Gamma(k/2)} x^{k/2-1} e^{-x/2}& 251 | k & 2k & 252 | (1-2s)^{-k/2} \; s<1/2\\[3ex] 253 | 254 | F & \text{F}(d_1,d_2) & 255 | I_\frac{d_1x}{d_1x+d_2}\left(\frac{d_1}{2},\frac{d_2}{2}\right) & 256 | \frac{\sqrt{\frac{(d_1x)^{d_1} d_2^{d_2}}{(d_1x+d_2)^{d_1+d_2}}}} 257 | {x\mathrm{B}\left(\frac{d_1}{2},\frac{d_1}{2}\right)} & 258 | \frac{d_2}{d_2-2} %\; d_2 > 2 259 | & \frac{2d_2^2(d_1+d_2-2)}{d_1(d_2-2)^2(d_2-4)} %\; d_2 > 4 260 | & \\[3ex] 261 | 262 | Exponential\tnote{$\ast$} & \ex & \pex & \dex & 263 | \beta & \beta^2 & 264 | \frac{1}{1-\frac{s}{\beta}} \left(s<\beta\right) \\[3ex] 265 | 266 | Gamma\tnote{$\ast$} & \gam & 267 | \frac{\gamma(\alpha,\beta x)}{\Gamma(\alpha)} & \dgamma & 268 | \frac{\alpha}{\beta} & \frac{\alpha}{\beta^2} & 269 | \left(\frac{1}{1-\frac{s}{\beta}} \right)^\alpha \left(s<\beta\right)\\[3ex] 270 | 271 | Inverse Gamma & \invgamma & \pinvgamma & \dinvgamma & 272 | \frac{\beta}{\alpha-1} \; \alpha>1 & 273 | \frac{\beta^2}{(\alpha-1)^2(\alpha-2)} \; \alpha > 2 & 274 | \frac{2(-\beta s)^{\alpha/2}}{\Gamma(\alpha)}K_\alpha 275 | \left( \sqrt{-4\beta s} \right)\\[3ex] 276 | 277 | Dirichlet & \dir & & \ddir & 278 | \frac{\alpha_i}{\sum_{i=1}^k \alpha_i} & 279 | \frac{\E{X_i}(1-\E{X_i})}{\sum_{i=1}^k\alpha_i + 1} & \\[3ex] 280 | 281 | Beta & \bet & I_x(\alpha,\beta)& \dbeta & 282 | \frac{\alpha}{\alpha+\beta} & 283 | \frac{\alpha\beta}{(\alpha+\beta)^2(\alpha+\beta+1)} & 284 | 1+\sum_{k=1}^{\infty} \left( \prod_{r=0}^{k-1} 285 | \frac{\alpha+r}{\alpha+\beta+r} \right) \frac{s^k}{k!} \\[3ex] 286 | 287 | Weibull & \mathrm{Weibull}(\lambda, k) & 1 - e^{-(x/\lambda)^k} & \dweibull & 288 | \lambda \Gamma\left(1 + \frac{1}{k} \right) & 289 | \lambda^2 \Gamma\left(1 + \frac{2}{k}\right) - \mu^2 & 290 | \sum_{n=0}^\infty \frac{s^n \lambda^n}{n!} \Gamma\left(1+\frac{n}{k}\right) 291 | \\[3ex] 292 | 293 | Pareto & \mathrm{Pareto}(x_m, \alpha) & 294 | 1 - \left(\frac{x_m}{x} \right)^\alpha \; x\ge x_m & 295 | \alpha\frac{x_m^\alpha}{x^{\alpha+1}} \quad x\ge x_m& 296 | \frac{\alpha x_m}{\alpha-1} \; \alpha>1 & 297 | \frac{x_m^2\alpha}{(\alpha-1)^2(\alpha-2)} \; \alpha>2 & 298 | \alpha(-x_m s)^\alpha \Gamma(-\alpha,-x_m s) \; s<0\\[3ex] 299 | 300 | \bottomrule 301 | \end{tabular} 302 | \begin{tablenotes} 303 | \item[$\ast$] We use the \emph{rate} parameterization where 304 | $\beta=\frac{1}{\lambda}$. Some textbooks use $\beta$ as \emph{scale} 305 | parameter instead~\cite{Wasserman03}. 306 | \end{tablenotes} 307 | \end{threeparttable} 308 | 309 | \begin{figure}[H] 310 | \includegraphics[scale=0.35]{figs/uniform-pdf.pdf} 311 | \includegraphics[scale=0.35]{figs/normal-pdf.pdf} 312 | \includegraphics[scale=0.35]{figs/lognormal-pdf.pdf} 313 | \includegraphics[scale=0.35]{figs/student-pdf.pdf} 314 | \includegraphics[scale=0.35]{figs/chisquare-pdf.pdf} 315 | \includegraphics[scale=0.35]{figs/f-pdf.pdf} 316 | \includegraphics[scale=0.35]{figs/exponential-pdf.pdf} 317 | \includegraphics[scale=0.35]{figs/gamma-pdf.pdf} 318 | \includegraphics[scale=0.35]{figs/invgamma-pdf.pdf} 319 | \includegraphics[scale=0.35]{figs/beta-pdf.pdf} 320 | \includegraphics[scale=0.35]{figs/weibull-pdf.pdf} 321 | \includegraphics[scale=0.35]{figs/pareto-pdf.pdf} 322 | \end{figure} 323 | 324 | \begin{figure}[H] 325 | \includegraphics[scale=0.35]{figs/uniform-cdf-continuous.pdf} 326 | \includegraphics[scale=0.35]{figs/normal-cdf.pdf} 327 | \includegraphics[scale=0.35]{figs/lognormal-cdf.pdf} 328 | \includegraphics[scale=0.35]{figs/student-cdf.pdf} 329 | \includegraphics[scale=0.35]{figs/chisquare-cdf.pdf} 330 | \includegraphics[scale=0.35]{figs/f-cdf.pdf} 331 | \includegraphics[scale=0.35]{figs/exponential-cdf.pdf} 332 | \includegraphics[scale=0.35]{figs/gamma-cdf.pdf} 333 | \includegraphics[scale=0.35]{figs/invgamma-cdf.pdf} 334 | \includegraphics[scale=0.35]{figs/beta-cdf.pdf} 335 | \includegraphics[scale=0.35]{figs/weibull-cdf.pdf} 336 | \includegraphics[scale=0.35]{figs/pareto-cdf.pdf} 337 | \end{figure} 338 | 339 | \begin{multicols*}{2} 340 | 341 | \section{Probability Theory} 342 | 343 | Definitions 344 | \begin{itemize} 345 | \item Sample space $\Omega$ 346 | \item Outcome (point or element) $\omega \in \Omega$ 347 | \item Event $A \subseteq \Omega$ 348 | \item $\sigma$-algebra $\mathcal{A}$ 349 | \begin{enumerate} 350 | \item $\varnothing \in \mathcal{A}$ 351 | \item $A_1,A_2,\dots, \in \mathcal{A} 352 | \imp \bigcup_{i=1}^\infty A_i \in \mathcal{A}$ 353 | \item $A \in \mathcal{A} \imp \comp{A} \in \mathcal{A}$ 354 | \end{enumerate} 355 | \item Probability Distribution $\prob$ 356 | \begin{enumerate} 357 | \item $\Pr{A} \ge 0 \quad \forall A$ 358 | \item $\Pr{\Omega} = 1$ 359 | \item $\Pr{\displaystyle\bigsqcup_{i=1}^\infty A_i} 360 | = \displaystyle\sum_{i=1}^\infty \Pr{A_i}$ 361 | \end{enumerate} 362 | \item Probability space $(\Omega,\mathcal{A},\prob)$ 363 | \end{itemize} 364 | 365 | Properties 366 | \begin{itemize} 367 | \item $\Pr{\varnothing} = 0$ 368 | \item $B = \Omega \cap B = (A \cup \comp{A}) \cap B 369 | = (A \cap B) \cup (\comp{A} \cap B)$ 370 | \item $\Pr{\comp{A}} = 1 - \Pr{A}$ 371 | \item $\Pr{B} = \Pr{A \cap B} + \Pr{\comp{A} \cap B}$ 372 | \item $\Pr{\Omega} = 1 \qquad \Pr{\varnothing} = 0$ 373 | \item $\comp{\left(\bigcup_n A_n\right)} = \bigcap_n \comp{A_n} 374 | \quad 375 | \comp{\left(\bigcap_n A_n\right)} = \bigcup_n \comp{A_n} 376 | \qquad$ 377 | \textsc{DeMorgan} 378 | \item $\Pr{\bigcup_n A_n} 379 | = 1 - \Pr{\bigcap_n \comp{A_n}}$ 380 | \item $\Pr{A \cup B} = \Pr{A} + \Pr{B} - \Pr{A \cap B}\\[1ex] 381 | \imp \Pr{A \cup B} \le \Pr{A} + \Pr{B}$ 382 | \item $\Pr{A \cup B} 383 | = \Pr{A \cap \comp{B}} + \Pr{\comp{A} \cap B} + \Pr{A \cap B}$ 384 | \item $\Pr{A \cap \comp{B}} = \Pr{A} - \Pr{A \cap B}$ 385 | \end{itemize} 386 | 387 | Continuity of Probabilities 388 | \begin{itemize} 389 | \item $A_1 \subset A_2 \subset \dots \imp \limn \Pr{A_n} = \Pr{A} 390 | \quad\text{where } A = \bigcup_{i=1}^\infty A_i$ 391 | \item $A_1 \supset A_2 \supset \dots \imp \limn \Pr{A_n} = \Pr{A} 392 | \quad\text{where } A = \bigcap_{i=1}^\infty A_i$ 393 | \end{itemize} 394 | 395 | Independence \ind 396 | \[A \ind B \eqv \Pr{A \cap B} = \Pr{A}\Pr{B}\] 397 | 398 | Conditional Probability 399 | \[\Pr{A \giv B} = \frac{\Pr{A \cap B}}{\Pr{B}} \qquad \Pr{B} > 0\] 400 | 401 | Law of Total Probability 402 | \[ \Pr{B} = \sum_{i=1}^n \Pr{B|A_i}\Pr{A_i} 403 | \qquad \Omega = \bigsqcup_{i=1}^n A_i\] 404 | 405 | \textsc{Bayes' Theorem} 406 | \[\Pr{A_i \giv B} 407 | = \frac{\Pr{B \giv A_i}\Pr{A_i}}{\sum_{j=1}^n \Pr{B \giv A_j}\Pr{A_j}} 408 | \qquad \Omega = \bigsqcup_{i=1}^n A_i\] 409 | 410 | Inclusion-Exclusion Principle 411 | \[\biggl|\bigcup_{i=1}^n A_i\biggr| = \sum_{r=1}^n(-1)^{r-1} 412 | \sum_{i \le i_1 < \dots < i_r \le n}\biggl|\bigcap_{j=1}^r A_{i_j}\biggr|\] 413 | 414 | \section{Random Variables} 415 | 416 | Random Variable (RV) 417 | \[X: \Omega \to \R\] 418 | 419 | Probability Mass Function (PMF) 420 | \[f_X(x) = \Pr{X = x} = \Pr{\{\omega\in\Omega:X(\omega) = x\}}\] 421 | 422 | Probability Density Function (PDF) 423 | \[\Pr{a \le X \le b} = \int_a^b f(x)\dx\] 424 | 425 | Cumulative Distribution Function (CDF) 426 | \[F_X:\R \to [0,1] \qquad F_X(x) = \Pr{X \le x}\] 427 | 428 | \begin{enumerate} 429 | \item Nondecreasing: $x_1 < x_2 \imp F(x_1) \le F(x_2)$ 430 | \item Normalized: $\lim_{x\to -\infty} = 0$ and $\lim_{x\to \infty} = 1$ 431 | \item Right-Continuous: $\lim_{y\downarrow x} F(y) = F(x)$ 432 | \end{enumerate} 433 | 434 | \[\Pr{a\le Y\le b \giv X=x} = \int_a^b f_{Y|X}(y\giv x) dy \qquad a \le b\] 435 | \[ f_{Y|X}(y\giv x) = \frac{f(x,y)}{f_X(x)} \] 436 | 437 | Independence 438 | \begin{enumerate} 439 | \item $\Pr{X \le x, Y \le y} = \Pr{X \le x}\Pr{Y \le y}$ 440 | \item $f_{X,Y}(x,y) = f_X(x)f_Y(y)$ 441 | \end{enumerate} 442 | 443 | \subsection{Transformations} 444 | 445 | Transformation function 446 | \[Z = \transform(X)\] 447 | 448 | Discrete 449 | \[f_Z(z) = \Pr{\transform(X) = z} = \Pr{\{x:\transform(x) = z\}} 450 | = \Pr{X \in \transform^{-1}(z)} = \sum_{x \in \transform^{-1}(z)} \!\!\!f_X(x)\] 451 | 452 | Continuous 453 | \[F_Z(z) = \Pr{\transform(X) \le z} = \int_{A_z} f(x) \dx \quad 454 | \text{with } A_z = \{x:\transform(x) \le z\}\] 455 | 456 | Special case if $\transform$ strictly monotone 457 | \[f_Z(z) 458 | = f_X(\transform^{-1}(z)) 459 | \left|\frac{d}{dz}\transform^{-1}(z)\right| 460 | = f_X(x)\left|\frac{dx}{dz}\right| 461 | = f_X(x)\frac{1}{|J|}\] 462 | 463 | The Rule of the Lazy Statistician 464 | \[\E{Z} = \int \transform(x) \dfx\] 465 | \[\E{I_A(x)} = \int I_A(x) \dfx = \int_A \dfx = \Pr{X \in A}\] 466 | 467 | Convolution 468 | \begin{itemize} 469 | \item $ Z:=X+Y \qquad 470 | f_Z(z)=\displaystyle\int_{-\infty}^{\infty} f_{X,Y}(x,z-x)\,dx 471 | \;\stackrel{X,Y \ge 0}{=}\; \int_0^z f_{X,Y}(x,z-x)\,dx$ 472 | \item $ Z:=|X-Y| \qquad 473 | f_Z(z)=\displaystyle2\int_0^\infty f_{X,Y}(x,z+x)\,dx$ 474 | %\;\stackrel{X,Y \ge 0}{=}\; \int_0^\infty f_{X,Y}(x,z+x)\,dx$ 475 | \item $ Z:=\displaystyle\frac{X}{Y} \qquad 476 | f_Z(z)=\displaystyle\int_{-\infty}^{\infty} |y| f_{X,Y}(yz,y)\,dy 477 | \;\stackrel{\ind}{=}\; \int_{-\infty}^{\infty} |y| f_X(yz)f_Y(y)\,dy$ 478 | \end{itemize} 479 | 480 | % \subsection{Joint Distribution} 481 | % \begin{itemize} 482 | % \item $f(x,y) = \Pr{X \le k, Y \le m)} 483 | % = \displaystyle\int_{-\infty}^k\int_{-\infty}^m f(x,y)\,dy\,dx$ 484 | % \item $\Pr{a < X \le b, c < y \le d} = F(b,d) - F(a,d) - F(b,c) + F(a,c)$ 485 | % \item $f_X(x) = \displaystyle\int_{-\infty}^\infty f(x,y)\,dy \qquad 486 | % f_Y(y) = \displaystyle\int_{-\infty}^\infty f(x,y)\,dx$ 487 | % \end{itemize} 488 | 489 | % Order Statistics 490 | % \begin{itemize} 491 | % \item $U_i\ind U_j$ continuous \textsc{RVs} with common density $f$ 492 | % \item $X_1(\omega) < \dots < X_n(\omega)$ permuted set of $U_i$'s 493 | % \item $X_k = $ \emph{k$^{th}$ order statistic} 494 | % \item $X_1(\omega) = \min(U_1(\omega),\dots,U_n(\omega))$ 495 | % \item $X_n(\omega) = \max(U_1(\omega),\dots,U_n(\omega))$ 496 | % \item $R(\omega) = X_n(\omega) - X_1(\omega)$ 497 | % \end{itemize} 498 | 499 | \section{Expectation} 500 | 501 | Definition and properties 502 | \begin{itemize} 503 | \item $\E{X} = \mu_X = \displaystyle \int x \dfx = 504 | \begin{cases} 505 | \displaystyle\sum_x xf_X(x) & \text{X discrete} \\\\ 506 | \displaystyle\int xf_X(x)\dx & \text{X continuous} 507 | \end{cases}$ 508 | \item $\Pr{X=c}=1 \imp \E{X} = c$ 509 | \item $\E{cX} = c\,\E{X}$ 510 | \item $\E{X+Y} = \E{X}+\E{Y}$ 511 | \item $\E{XY} = \displaystyle\int_{X,Y} xy f_{X,Y}(x,y)\dfx\dfy$ 512 | \item $\E{\transform(Y)} \neq \transform(\E{X}) \qquad$ 513 | (cf.~\hyperref[jensen]{\textsc{Jensen} inequality}) 514 | \item $\Pr{X \ge Y} = 1 \imp \E{X}\ge\E{Y}$ 515 | \item $\Pr{X=Y} = 1 \imp \E{X}=\E{Y}$ 516 | % \item $\Pr{\lvert Y\rvert\le c} = 1 \imp \E{Y}<\infty 517 | % \wedge \lvert\E{X}\rvert\le c$ 518 | \item $\E{X} = \displaystyle\sum_{x=1}^\infty \Pr{X\ge x}$ \qquad X discrete 519 | \end{itemize} 520 | 521 | Sample mean 522 | \[\samplemean = \frac{1}{n}\sum_{i=1}^n X_i\] 523 | 524 | \begin{titemize}{Conditional expectation} 525 | \item $\E{Y\giv X=x} = \displaystyle\int y f(y\giv x)\dy$ 526 | \item $\E{X} = \E{\E{X\giv Y}}$ 527 | \item $\E{\transform(X,Y)\giv X=x} 528 | = \displaystyle\int_{-\infty}^\infty \transform(x,y)f_{Y|X}(y\giv x)\dy$ 529 | \item $\E{\transform(Y,Z)\giv X=x} = 530 | \displaystyle\int_{-\infty}^\infty\transform(y,z) 531 | f_{(Y,Z)|X}(y,z\giv x)\,dy\,dz$ 532 | \item $\E{Y+Z\giv X} = \E{Y\giv X} + \E{Z\giv X}$ 533 | \item $\E{\transform(X)Y\giv X} = \transform(X)\E{Y\giv X}$ 534 | \item $\E{\transform(X,Y)} = \E[X]{\E{\transform(X,Y)\giv X}}$ 535 | \item $\E{Y\giv X} = c \imp \cov{X,Y}=0$ 536 | \end{titemize} 537 | 538 | \section{Variance} 539 | 540 | \begin{titemize}{Definition and properties} 541 | \item $\V{X} = \sigma_X^2 = \E{(X-\E{X})^2} = \E{X^2} - \E{X}^2$ 542 | \item $\V{\displaystyle\sum_{i=1}^n X_i} = 543 | \displaystyle\sum_{i=1}^n \V{X_i} + \sum_{i\ne j}\cov{X_i,X_j}$ 544 | % \stackrel{X_i \ind X_j}{=}\sum_{i=1}^n\V{X_i}$ 545 | \item $\V{\displaystyle\sum_{i=1}^n X_i} = 546 | \displaystyle\sum_{i=1}^n\V{X_i} \quad$ if $X_i \ind X_j$ 547 | \end{titemize} 548 | 549 | Standard deviation 550 | \[\sd[X] = \sqrt{\V{X}} = \sigma_X\] 551 | 552 | Covariance 553 | \begin{itemize} 554 | \item $\cov{X,Y} = \E{(X-\E{X})(Y-\E{Y})} = \E{XY}-\E{X}\E{Y}$ 555 | \item $\cov{X,a} = 0$ 556 | \item $\cov{X,X} = \V{X}$ 557 | \item $\cov{X,Y} = \cov{Y,X}$ 558 | \item $\cov{aX,bY} = ab\cov{X,Y}$ 559 | \item $\cov{X+a,Y+b} = \cov{X,Y}$ 560 | \item $\cov{\displaystyle\sumin X_i, \sumjm Y_j} 561 | = \displaystyle\sumin\sumjm\cov{X_i, Y_j}$ 562 | \end{itemize} 563 | 564 | Correlation 565 | \[\corr{X,Y} = \displaystyle\frac{\cov{X,Y}}{\sqrt{\V{X}\V{Y}}}\] 566 | 567 | Independence 568 | \[X\ind Y \imp \corr{X,Y} = 0 \eqv \cov{X,Y} = 0 \eqv \E{XY}=\E{X}\E{Y}\] 569 | 570 | Sample variance 571 | \[\samplevar = \frac{1}{n-1}\sum_{i=1}^n(X_i-\samplemean)^2\] 572 | 573 | Conditional variance 574 | \begin{itemize} 575 | \item $\V{Y\giv X} = \E{(Y-\E{Y\giv X})^2\giv X} =\E{Y^2\giv X}-\E{Y\giv X}^2$ 576 | \item $\V{Y} = \E{\V{Y\giv X}}+\V{\E{Y\giv X}}$ 577 | \end{itemize} 578 | 579 | \section{Inequalities} 580 | 581 | \textsc{Cauchy-Schwarz} 582 | \[\E{XY}^2 \le \E{X^2}\E{Y^2}\] 583 | 584 | \textsc{Markov} 585 | \[\Pr{\transform(X) \ge t}\le\frac{\E{\transform(X)}}{t}\] 586 | 587 | \textsc{Chebyshev} 588 | \[\Pr{\lvert X-\E{X}\rvert \ge t} \le \frac{\V{X}}{t^2}\] 589 | 590 | \textsc{Chernoff} 591 | \[\Pr{X \ge (1+\delta)\mu} 592 | \le \left(\frac{e^\delta}{(1+\delta)^{1+\delta}}\right) \quad \delta>-1\] 593 | 594 | \textsc{Hoeffding} 595 | \[X_1,\ldots,X_n \; \textrm{independent} 596 | \;\wedge\; \Pr{X_i\in[a_i,b_i]} = 1 \;\wedge\; 1 \le i \le n \] 597 | \[\Pr{\Xbar-\E{\Xbar} \ge t} \le e^{-2nt^2} \quad t>0 \] 598 | \[\Pr{|\Xbar-\E{\Xbar}| \ge t} \le 2\Exp{-\frac{2n^2t^2}{\sumin(b_i-a_i)^2}} 599 | \quad t>0\] 600 | 601 | \textsc{Jensen}\label{jensen} 602 | \[\E{\transform(X)} \ge \transform(\E{X}) \quad 603 | \transform \text{ convex}\] 604 | 605 | \section{Distribution Relationships} 606 | 607 | Binomial 608 | \begin{itemize} 609 | \item $X_i \dist \bern \imp \displaystyle\sum_{i=1}^n X_i \dist \bin$ 610 | \item $X\dist\bin, Y\dist\bin[m,p] \imp X+Y\dist\bin[n+m,p]$ 611 | \item $\limn\bin = \pois[np] \qquad$ ($n$ large, $p$ small) 612 | \item $\limn\bin = \norm[np,np(1-p)] \qquad$ 613 | ($n$ large, $p$ far from 0 and 1) 614 | \end{itemize} 615 | 616 | Negative Binomial 617 | \begin{itemize} 618 | \item $ X\dist \nbin[1,p] = \geo $ 619 | \item $ X\dist \nbin[r,p] = \sum_{i=1}^r \geo $ 620 | \item $X_i\dist \nbin[r_i,p] \imp \sum X_i\dist \nbin[\sum r_i,p] $ 621 | \item $X\dist \nbin[r,p].\; Y\dist \bin[s+r,p] \imp \Pr{X\le s} = \Pr{Y\ge r}$ 622 | \end{itemize} 623 | 624 | Poisson 625 | \begin{itemize} 626 | \item $X_i\dist\pois[\lambda_i] \wedge X_i \ind X_j 627 | \imp \displaystyle\sumin X_i \dist \pois[\displaystyle\sumin \lambda_i]$ 628 | \item $X_i\dist\pois[\lambda_i] \wedge X_i \ind X_j 629 | \imp X_i\,\left|\displaystyle\sumjn X_j\right. \dist 630 | \bin[\displaystyle\sumjn X_j,\displaystyle\frac{\lambda_i}{\sumjn\lambda_j}]$ 631 | \end{itemize} 632 | 633 | Exponential 634 | \begin{itemize} 635 | % \item $\forall n \in \mathbb N^+: X_i\dist\ex{\lambda} 636 | \item $X_i\dist\ex \wedge X_i \ind X_j 637 | \imp \displaystyle\sumin X_i\dist \gam[n,\beta]$ 638 | \item Memoryless property: $\Pr{X>x+y\giv X>y}=\Pr{X>x}$ 639 | \end{itemize} 640 | 641 | Normal 642 | \begin{itemize} 643 | \item $X\dist \norm[\mu,\sigma^2] 644 | \imp \left(\frac{X-\mu}{\sigma}\right)\dist\norm[0,1] $ 645 | \item $X\dist \norm[\mu,\sigma^2] \wedge Z = aX+b 646 | \imp Z\dist\norm[a\mu+b,a^2\sigma^2] $ 647 | \item $X_i\dist\norm[\mu_i,\sigma_i^2] \wedge X_i \ind X_j 648 | \imp \sum_i X_i \dist \norm[\sum_i\mu_i,\sum_i\sigma_i^2]$ 649 | \item $\Pr{a < X \le b}= \Phi\left(\frac{b-\mu}{\sigma}\right) 650 | - \Phi\left(\frac{a-\mu}{\sigma}\right) $ 651 | \item $\Phi(-x) = 1 - \Phi(x) \qquad \phi'(x) = -x\phi(x) \qquad 652 | \phi''(x) = (x^2-1)\phi(x)$ 653 | \item Upper quantile of $\norm[0,1]$: $z_{\alpha} = \Phi^{-1}(1-\alpha)$ 654 | \end{itemize} 655 | 656 | Gamma 657 | \begin{itemize} 658 | \item $X\dist\gam \eqv X/\beta \dist\gam[\alpha,1]$ 659 | \item $\gam\dist \sum_{i=1}^\alpha\ex$ 660 | \item $X_i\dist\gam[\alpha_i,\beta] \wedge X_i \ind X_j \imp 661 | \sum_i X_i\dist \gam[\sum_i \alpha_i,\beta]$ 662 | \item $\displaystyle\frac{\Gamma(\alpha)}{\lambda^\alpha} 663 | = \displaystyle\int_0^\infty x^{\alpha-1} e^{-\lambda x} \dx$ 664 | \end{itemize} 665 | 666 | Beta 667 | \begin{itemize} 668 | \item $\displaystyle 669 | \frac{1}{\text{B}(\alpha,\beta)}x^{\alpha-1}(1-x)^{\beta-1} 670 | = \frac{\Gamma(\alpha+\beta)}{\Gamma(\alpha)\Gamma(\beta)} 671 | x^{\alpha-1}(1-x)^{\beta-1} $ 672 | \item $\E{X^k} 673 | = \displaystyle\frac{\text{B}(\alpha+k,\beta)}{\text{B}(\alpha,\beta)} 674 | = \displaystyle\frac{\alpha+k-1}{\alpha+\beta+k-1}\E{X^{k-1}}$ 675 | \item $\bet[1,1] \dist \unif[0,1]$ 676 | \end{itemize} 677 | 678 | \section{Probability and Moment Generating Functions} 679 | 680 | \begin{itemize} 681 | \item $G_X(t) = \E{t^X} \qquad |t| < 1$ 682 | \item $M_X(t) = G_X(e^t) = \E{e^{Xt}} 683 | = \E{ \displaystyle\sum_{i=0}^\infty \frac{(Xt)^i}{i!}} 684 | = \displaystyle\sum_{i=0}^\infty \frac{\E{X^i}}{i!}\cdot t^i$ 685 | \item $\Pr{X=0} = G_X(0)$ 686 | \item $\Pr{X=1}=G_X'(0)$ 687 | \item $\Pr{X=i} = \displaystyle\frac{G_X^{(i)}(0)}{i!}$ 688 | \item $\E{X} = G_X'(1^-)$ 689 | \item $\E{X^k} = M_X^{(k)}(0)$ 690 | \item $\E{\displaystyle\frac{X!}{(X-k)!}} = G_X^{(k)}(1^-)$ 691 | \item $\V{X} = G_X''(1^-) + G_X'(1^-) 692 | - \left(G_X'(1^-)\right)^2$ 693 | \item $G_X(t) = G_Y(t) \imp X \stackrel{d}{=} Y$ 694 | \end{itemize} 695 | 696 | \section{Multivariate Distributions} 697 | 698 | \subsection{Standard Bivariate Normal} 699 | 700 | Let $X,Y\dist\norm[0,1] \wedge X\ind Z$ where 701 | $Y = \rho X + \sqrt{1-\rho^2}Z$\\ 702 | 703 | Joint density 704 | \[ 705 | f(x,y) = \frac{1}{2 \pi \sqrt{1-\rho^2}} 706 | \Exp{-\frac{x^2 + y^2 - 2\rho x y}{2 (1-\rho^2)}} 707 | \] 708 | 709 | Conditionals 710 | \[ 711 | (Y\giv X=x) \dist \norm[\rho x,1-\rho^2] \qquad\text{and}\qquad 712 | (X\giv Y=y) \dist \norm[\rho y,1-\rho^2] 713 | \] 714 | 715 | Independence 716 | \[X \ind Y \eqv \rho = 0\] 717 | 718 | \subsection{Bivariate Normal} 719 | % - http://www.athenasc.com/Bivariate-Normal.pdf 720 | % - http://mathworld.wolfram.com/BivariateNormalDistribution.html 721 | 722 | Let $X\dist\norm[\mu_x,\sigma_x^2]$ 723 | and $Y\dist\norm[\mu_y,\sigma_y^2]$. 724 | \[f(x,y) = \frac{1}{2 \pi \sigma_x \sigma_y \sqrt{1-\rho^2}} 725 | \Exp{-\frac{z}{2 (1-\rho^2)}}\] 726 | \[ z = 727 | \left[ 728 | \left(\frac{x-\mu_x}{\sigma_x}\right)^2 729 | + \left(\frac{y-\mu_y}{\sigma_y}\right)^2 730 | - 2\rho\left(\frac{x-\mu_x}{\sigma_x}\right) 731 | \left(\frac{y-\mu_y}{\sigma_y}\right) 732 | \right] 733 | \] 734 | 735 | Conditional mean and variance 736 | \[\E{X\giv Y} = \E{X} + \rho\frac{\sigma_X}{\sigma_Y}(Y-\E{Y})\] 737 | \[\V{X\giv Y} = \sigma_X \sqrt{1-\rho^2}\] 738 | 739 | \subsection{Multivariate Normal} 740 | 741 | Covariance matrix $\Sigma$ \quad (Precision matrix $\Sigma^{-1}$) 742 | \[\Sigma = 743 | \begin{pmatrix} 744 | \V{X_1} & \cdots & \cov{X_1,X_k} \\ 745 | \vdots & \ddots & \vdots \\ 746 | \cov{X_k,X_1} & \cdots & \V{X_k} 747 | \end{pmatrix}\] 748 | 749 | If $X \dist \norm[\mu,\Sigma]$, 750 | \[f_X(x) = (2\pi)^{-n/2} \left|\Sigma\right|^{-1/2} 751 | \Exp{-\frac{1}{2}(x-\mu)^T\Sigma^{-1}(x-\mu)} \] 752 | 753 | Properties 754 | \begin{itemize} 755 | \item $Z \dist \norm[0,1] \wedge X = \mu+\Sigma^{1/2}Z 756 | \imp X \dist \norm[\mu,\Sigma]$ 757 | \item $X \dist \norm[\mu,\Sigma] \imp \Sigma^{-1/2}(X-\mu) \dist \norm[0,1]$ 758 | \item $X \dist \norm[\mu,\Sigma] \imp AX \dist \norm[A\mu, A\Sigma A^T]$ 759 | \item $X \dist \norm[\mu,\Sigma] \wedge \|a\| = k 760 | \imp a^TX \dist \norm[a^T\mu, a^T\Sigma a]$ 761 | \end{itemize} 762 | 763 | \section{Convergence} 764 | 765 | Let $\{X_1,X_2,\ldots\}$ be a sequence of \rv's and let $X$ be another \rv. 766 | Let $F_n$ denote the \cdf of $X_n$ and let $F$ denote the \cdf of $X$. 767 | 768 | Types of Convergence 769 | \begin{enumerate} 770 | \item In distribution (weakly, in law): $X_n \dconv X$ 771 | \[\limn F_n(t) = F(t) \qquad 772 | \forall t \text{ where } F \text{ continuous}\] 773 | \item In probability: $X_n \pconv X$ 774 | \[(\forall \varepsilon > 0) \; 775 | \lim_{n\to\infty} \Pr{|X_n -X| > \varepsilon} = 0\] 776 | \item Almost surely (strongly): $X_n \asconv X$ 777 | \[\Pr{\limn X_n=X} = \Pr{\omega\in\Omega: \limn X_n(\omega)=X(\omega)}=1\] 778 | \item In quadratic mean ($L_2$): $X_n \qmconv X$ 779 | \[\lim_{n\to\infty} \E{(X_n - X)^2} = 0\] 780 | \end{enumerate} 781 | 782 | Relationships 783 | \begin{itemize} 784 | \item $X_n \qmconv X \imp X_n \pconv X \imp X_n \dconv X$ 785 | \item $X_n \asconv X \imp X_n \pconv X$ 786 | \item $X_n \dconv X \wedge (\exists c \in \R) \; \Pr{X=c} = 1 787 | \imp X_n \pconv X$ 788 | \item $X_n \pconv X \wedge Y_n \pconv Y 789 | \imp X_n + Y_n \pconv X + Y$ 790 | \item $X_n \qmconv X \wedge Y_n \qmconv Y 791 | \imp X_n + Y_n \qmconv X + Y$ 792 | \item $X_n \pconv X \wedge Y_n \pconv Y 793 | \imp X_nY_n \pconv XY$ 794 | \item $X_n \pconv X \imp \transform(X_n) \pconv \transform(X)$ 795 | \item $X_n \dconv X \imp \transform(X_n) \dconv \transform(X)$ 796 | \item $X_n \qmconv b \eqv \lim_{n\to\infty} \E{X_n}=b 797 | \wedge \lim_{n\to\infty} \V{X_n} = 0$ 798 | \item $X_1,\dots,X_n\; \iid \wedge \E{X}=\mu \wedge \V{X}<\infty 799 | \eqv \samplemean \qmconv \mu$ 800 | \end{itemize} 801 | 802 | \textsc{Slutzky's Theorem} 803 | \begin{itemize} 804 | \item $X_n \dconv X \text{ and } Y_n \pconv c 805 | \imp X_n + Y_n \dconv X + c$ 806 | \item $X_n \dconv X \text{ and } Y_n \pconv c 807 | \imp X_nY_n \dconv cX$ 808 | \item In general: $X_n \dconv X \text{ and } Y_n \dconv Y 809 | \nimp X_n + Y_n \dconv X + Y$ 810 | \end{itemize} 811 | 812 | \subsection{Law of Large Numbers (LLN)} 813 | 814 | Let $\{X_1,\ldots,X_n\}$ be a sequence of \iid \rv's, $\E{X_1}=\mu$. 815 | 816 | Weak (WLLN) 817 | \[\samplemean \pconv \mu \qquad n\to\infty\] 818 | 819 | Strong (SLLN) 820 | \[\samplemean \asconv \mu \qquad n\to\infty\] 821 | 822 | \subsection{Central Limit Theorem (CLT)} 823 | 824 | Let $\{X_1,\ldots,X_n\}$ be a sequence of \iid \rv's, $\E{X_1}=\mu$, and 825 | $\V{X_1} = \sigma^2$.\\ 826 | 827 | \[ Z_n 828 | := \displaystyle\frac{\samplemean-\mu}{\sqrt{\V{\samplemean}}} 829 | = \displaystyle\frac{\sqrt{n}(\samplemean - \mu)}{\sigma} 830 | \dconv Z \qquad \text{where } Z\dist \norm[0,1]\] 831 | \[\lim_{n\to\infty} \Pr{Z_n \le z} = \Phi(z) \qquad z \in \mathbb R\] 832 | 833 | CLT notations 834 | \begin{align*} 835 | Z_n &\approx \norm[0,1] \\ 836 | \samplemean &\approx \norm[\mu,\frac{\sigma^2}{n}] \\ 837 | \samplemean - \mu &\approx \norm[0,\frac{\sigma^2}{n}] \\ 838 | \sqrt{n}(\samplemean - \mu) &\approx \norm[0,\sigma^2] \\ 839 | \frac{\sqrt{n}(\samplemean - \mu)}{\sigma} &\approx \norm[0,1] \\ 840 | \end{align*} 841 | 842 | Continuity correction 843 | \[\Pr{\samplemean \le x} \approx 844 | \Phi\left(\displaystyle\frac{x+\frac{1}{2}-\mu}{\sigma/\sqrt{n}}\right)\] 845 | \[\Pr{\samplemean \ge x} \approx 846 | 1-\Phi\left(\displaystyle\frac{x-\frac{1}{2}-\mu}{\sigma/\sqrt{n}}\right)\] 847 | 848 | Delta method 849 | \[Y_n \approx \norm[\mu,\frac{\sigma^2}{n}] \imp 850 | \transform(Y_n) \approx 851 | \norm[\transform(\mu), 852 | \left(\transform'(\mu)\right)^2\frac{\sigma^2}{n}]\] 853 | 854 | \section{Statistical Inference} 855 | 856 | Let $X_1,\cdots,X_n \distiid F$ if not otherwise noted. 857 | 858 | \subsection{Point Estimation} 859 | 860 | \begin{itemize} 861 | \item Point estimator $\that_n$ of $\theta$ is a \rv: 862 | $\that_n = g(X_1,\dots,X_n)$ 863 | \item $\bias(\that_n) = \E{\that_n}-\theta$ 864 | \item Consistency: $\that_n \pconv \theta$ 865 | \item Sampling distribution: $F(\that_n)$ 866 | \item Standard error: $\se(\that_n) = \sqrt{\V{\that_n}}$ 867 | \item Mean squared error: $\mse = \E{(\that_n-\theta)^2} 868 | = \bias(\that_n)^2 + \V{\that_n}$ 869 | \item $\limn \bias(\that_n) = 0 \wedge \limn\se(\that_n) = 0 870 | \imp \that_n$ is consistent 871 | \item Asymptotic normality: 872 | $\displaystyle\frac{\that_n-\theta}{\se} \dconv \norm[0,1]$ 873 | \item \textsc{Slutzky's Theorem} often lets us replace $\se(\that_n)$ by some 874 | (weakly) consistent estimator $\shat_n$. 875 | \end{itemize} 876 | 877 | \subsection{Normal-Based Confidence Interval} 878 | 879 | Suppose $\that_n \approx \norm[\theta,\sehat^2]$. 880 | Let $\zat = \Phi^{-1}(1-(\alpha/2))$, 881 | i.e., $\Pr{Z > \zat} = \alpha/2$ and $\Pr{-\zat < Z < \zat} = 1-\alpha$ 882 | where $Z\dist\norm[0,1]$. 883 | Then \[C_n = \that_n \pm \zat\sehat\] 884 | 885 | \subsection{Empirical distribution} 886 | 887 | Empirical Distribution Function (ECDF) 888 | \[\Fnhat(x) = \displaystyle\frac{\sumin I(X_i \le x)}{n}\] 889 | \[I(X_i \le x) = \begin{cases} 890 | 1 & X_i \le x \\ 891 | 0 & X_i > x 892 | \end{cases}\] 893 | 894 | Properties (for any fixed $x$) 895 | \begin{itemize} 896 | \item $\E{\Fnhat} = F(x)$ 897 | \item $\V{\Fnhat} = \displaystyle\frac{F(x)(1-F(x))}{n}$ 898 | \item $\mse = \displaystyle\frac{F(x)(1-F(x))}{n} \dconv 0$ 899 | \item $\Fnhat \pconv F(x)$ 900 | \end{itemize} 901 | 902 | \textsc{Dvoretzky-Kiefer-Wolfowitz} (DKW) inequality ($X_1,\dots,X_n\dist F$) 903 | \[\Pr{\sup_x\left|F(x)-\Fnhat(x)\right| > \varepsilon} = 904 | 2e^{-2n\varepsilon^2}\] 905 | 906 | Nonparametric $1-\alpha$ confidence band for $F$ 907 | \begin{align*} 908 | L(x) &= \max\{\Fnhat-\epsilon_n, 0\} \\ 909 | U(x) &= \min\{\Fnhat+\epsilon_n, 1\} \\ 910 | \epsilon &= 911 | \sqrt{\displaystyle\frac{1}{2n}\log\left( \frac{2}{\alpha} \right)} \\ 912 | \end{align*} 913 | \[\Pr{L(x) \le F(x) \le U(x) \;\forall x} \ge 1-\alpha\] 914 | 915 | \subsection{Statistical Functionals} 916 | 917 | \begin{itemize} 918 | \item Statistical functional: $T(F)$ 919 | \item Plug-in estimator of $\theta = (F)$: $\that_n = T(\Fnhat)$ 920 | \item Linear functional: $T(F) = \int \transform(x)\dfx$ 921 | \item Plug-in estimator for linear functional: \\ 922 | \[T(\Fnhat) 923 | = \displaystyle\int \transform(x)\dfhatx 924 | = \frac{1}{n}\sumin \transform(X_i)\] 925 | \item Often: $T(\Fnhat) \approx \norm[T(F),\sehat^2]$ \imp 926 | $T(\Fnhat) \pm \zat\sehat$ 927 | \item $p^\mathrm{th}$ quantile: $F^{-1}(p) = \inf\{x:F(x) \ge p\}$ 928 | \item $\mhat = \samplemean$ 929 | \item $\shat^2 = \displaystyle\frac{1}{n-1}\sumin 930 | (X_i-\samplemean)^2$ 931 | \item $\khat = 932 | \displaystyle\frac{\frac{1}{n}\sumin(X_i-\mhat)^3}{\shat^3}$ 933 | \item $\rhohat = \displaystyle\frac{\sumin(X_i-\samplemean)(Y_i-\bar{Y}_n)}% 934 | {\sqrt{\sumin(X_i-\samplemean)^2}\sqrt{\sumin(Y_i-\bar{Y}_n)^2}}$ 935 | \end{itemize} 936 | 937 | \section{Parametric Inference} 938 | 939 | Let $\mathfrak{F} = \bigl\{ f(x;\theta) : \theta\in\Theta \bigr\}$ be a 940 | parametric model with parameter space $\Theta \subset \R^k$ and parameter 941 | $\theta = (\theta_1,\dots,\theta_k)$. 942 | 943 | \subsection{Method of Moments} 944 | 945 | $j^{\mathrm{th}}$ moment 946 | \[\alpha_j(\theta) = \E{X^j} = \displaystyle\int x^j \dfx\] 947 | 948 | $j^{\mathrm{th}}$ sample moment 949 | \[\ahat_j = \displaystyle\frac{1}{n}\sumin X_i^j\] 950 | 951 | Method of Moments estimator (MoM) 952 | \begin{align*} 953 | \alpha_1(\theta) &= \ahat_1 \\ 954 | \alpha_2(\theta) &= \ahat_2 \\ 955 | \vdots &= \vdots \\ 956 | \alpha_k(\theta) &= \ahat_k 957 | \end{align*} 958 | 959 | \begin{titemize}{Properties of the MoM estimator} 960 | \item $\that_n$ exists with probability tending to 1 961 | \item Consistency: $\that_n \pconv \theta$ 962 | \item Asymptotic normality: 963 | \[\sqrt{n}(\that-\theta) \dconv \norm[0,\Sigma]\] 964 | where $\Sigma = g\E{YY^T}g^T$, $Y = (X,X^2,\dots,X^k)^T$,\\ 965 | $g = (g_1,\dots,g_k)$ and 966 | $g_j = \frac{\partial}{\partial\theta}\alpha_j^{-1}(\theta)$ 967 | \end{titemize} 968 | 969 | \subsection{Maximum Likelihood} 970 | 971 | Likelihood: $\Lln : \Theta \to [0,\infty)$ 972 | \[\Lln(\theta) = \displaystyle\prodin f(X_i;\theta)\] \\ 973 | Log-likelihood 974 | \[\lln(\theta) = \log \Lln(\theta) = \sumin \log f(X_i;\theta)\] 975 | Maximum likelihood estimator (\mle) 976 | \[\Lln(\that_n) = \sup_\theta \Lln(\theta)\] 977 | 978 | Score function 979 | \[s(X;\theta) = \frac{\partial}{\partial\theta}\log f(X;\theta)\] 980 | 981 | Fisher information 982 | \[I(\theta) = \V[\theta]{s(X;\theta)}\] 983 | \[I_n(\theta) = nI(\theta)\] 984 | Fisher information (exponential family) 985 | \[I(\theta) = \E[\theta]{-\frac{\partial}{\partial\theta} s(X;\theta)}\] 986 | Observed Fisher information 987 | \[I_n^{obs}(\theta) 988 | = -\frac{\partial^2}{\partial\theta^2} \sumin\log f(X_i;\theta)\] 989 | 990 | Properties of the \mle 991 | \begin{itemize} 992 | \item Consistency: $\that_n \pconv \theta$ 993 | \item Equivariance: 994 | $\that_n$ is the \mle 995 | \imp $\transform(\that_n)$ is the \mle of $\transform(\theta)$ 996 | \item Asymptotic optimality (or efficiency), i.e., smallest variance for 997 | large samples. If $\ttil_n$ is any other estimator, the asymptotic relative 998 | efficiency is: 999 | \begin{enumerate} 1000 | \item $\se \approx \sqrt{1/I_n(\theta)}$ 1001 | \[\frac{(\that_n - \theta)}{\se} \dconv \norm[0,1]\] 1002 | \item $\sehat \approx \sqrt{1/I_n(\that_n)}$ 1003 | \[\frac{(\that_n - \theta)}{\sehat} \dconv \norm[0,1]\] 1004 | \end{enumerate} 1005 | \item Asymptotic optimality 1006 | \[\are(\ttil_n, \that_n) 1007 | = \frac{\V{\that_n}}{\V{\ttil_n}} 1008 | \le 1\] 1009 | \item Approximately the Bayes estimator 1010 | \end{itemize} 1011 | 1012 | \subsubsection{Delta Method} 1013 | If $\tau=\transform(\that)$ 1014 | where \transform is differentiable and $\transform'(\theta) \neq 0$: 1015 | \[\frac{(\widehat{\tau}_n-\tau)}{\sehat(\widehat{\tau})} \dconv \norm[0,1]\] 1016 | where $\widehat{\tau} = \transform(\that)$ 1017 | is the \mle of $\tau$ and 1018 | \[\sehat = \left|\transform'(\that)\right|\sehat(\that_n)\] 1019 | 1020 | \subsection{Multiparameter Models} 1021 | 1022 | Let $\theta=(\theta_1,\dots,\theta_k)$ 1023 | and $\that= (\that_1,\dots,\that_k)$ be the \mle. 1024 | 1025 | \[H_{jj} = \frac{\partial^2 \lln}{\partial\theta^2} \qquad 1026 | H_{jk} = \frac{\partial^2 \lln}{\partial\theta_j\partial\theta_k} \qquad\] 1027 | 1028 | Fisher information matrix 1029 | \[I_n(\theta) = -\begin{bmatrix} 1030 | \E[\theta]{H_{11}} & \cdots & \E[\theta]{H_{1k}} \\ 1031 | \vdots & \ddots & \vdots \\ 1032 | \E[\theta]{H_{k1}} & \cdots & \E[\theta]{H_{kk}} 1033 | \end{bmatrix}\] 1034 | 1035 | Under appropriate regularity conditions 1036 | \[(\that-\theta) \approx \norm[0,J_n]\] 1037 | with $J_n(\theta) = I_n^{-1}$. 1038 | Further, if $\that_j$ is the $j^{\mathrm{th}}$ component of $\theta$, then 1039 | \[\frac{(\that_j-\theta_j)}{\sehat_j} \dconv \norm[0,1]\] where $\sehat_j^2 = 1040 | J_n(j,j)$ and $\cov{\that_j,\that_k} = J_n(j,k)$ 1041 | 1042 | \subsubsection{Multiparameter delta method} 1043 | 1044 | Let $\tau = \transform(\theta_1,\dots,\theta_k)$ 1045 | and let the gradient of \transform be 1046 | \[\nabla\transform = \begin{pmatrix} 1047 | \displaystyle\frac{\partial\transform}{\partial\theta_1} \\ 1048 | \vdots \\ 1049 | \displaystyle\frac{\partial\transform}{\partial\theta_k} \\ 1050 | \end{pmatrix}\] 1051 | Suppose $\bigl.\nabla\transform\bigr|_{\theta=\that} \neq 0$ and 1052 | $\widehat{\tau} = \transform(\that)$. Then, 1053 | \[\frac{(\widehat{\tau}-\tau)}{\sehat(\widehat{\tau})} \dconv \norm[0,1]\] 1054 | where 1055 | \[\sehat(\widehat{\tau}) = \sqrt{\left( \widehat{\nabla}\transform \right)^T 1056 | \widehat{J}_n\left( \widehat{\nabla}\transform \right)}\] 1057 | and $\widehat{J}_n = J_n(\that)$ and $\widehat{\nabla}\transform = 1058 | \bigl.\nabla\transform\bigr|_{\theta=\that}$. 1059 | 1060 | \subsection{Parametric Bootstrap} 1061 | 1062 | Sample from $f(x;\that_n)$ instead of from $\Fnhat$, where $\that_n$ could be 1063 | the \mle or method of moments estimator. 1064 | 1065 | \section{Hypothesis Testing} 1066 | 1067 | \[H_0:\theta\in\Theta_0 \qquad\text{versus}\qquad H_1:\theta\in\Theta_1\] 1068 | 1069 | Definitions 1070 | \begin{itemize} 1071 | \item Null hypothesis $H_0$ 1072 | \item Alternative hypothesis $H_1$ 1073 | \item Simple hypothesis $\theta = \theta_0$ 1074 | \item Composite hypothesis $\theta > \theta_0$ or $\theta < \theta_0$ 1075 | \item Two-sided test: 1076 | $H_0:\theta=\theta_0 \quad\text{versus}\quad H_1:\theta\neq\theta_0$ 1077 | \item One-sided test: 1078 | $H_0:\theta\le\theta_0 \quad\text{versus}\quad H_1:\theta>\theta_0$ 1079 | % \[H_0:\theta\ge\theta_0 \qquad\text{versus}\qquad H_1:\theta<\theta_0\] 1080 | \item Critical value $c$ 1081 | \item Test statistic $T$ 1082 | \item Rejection region $R = \left\{ x: T(x) > c \right\}$ 1083 | \item Power function $\beta(\theta) = \Pr{X \in R}$ 1084 | \item Power of a test: $1 - \Pr{\text{Type II error}} = 1-\beta 1085 | = \displaystyle\inf_{\theta \in \Theta_1} \beta(\theta)$ 1086 | \item Test size: $\alpha = \Pr{\text{Type I error}} 1087 | = \displaystyle\sup_{\theta\in\Theta_0}\beta(\theta)$ 1088 | \end{itemize} 1089 | 1090 | \centering 1091 | \begin{tabular}{l|cc} 1092 | & \textsf{Retain} $H_0$ & \textsf{Reject} $H_0$ \\ 1093 | \hline 1094 | $H_0$ \textsf{true} & $\surd$ & Type I Error ($\alpha$)\\ 1095 | $H_1$ \textsf{true} & Type II Error ($\beta$) & 1096 | $\surd$ (power) \\ 1097 | \end{tabular} 1098 | 1099 | \raggedright 1100 | p-value 1101 | \begin{itemize} 1102 | \item p-value $= \sup_{\theta\in\Theta_0} \Pr[\theta]{T(X) \ge T(x)} 1103 | = \inf\bigl\{ \alpha: T(x) \in R_\alpha \bigr\}$ 1104 | \item p-value $= \sup_{\theta\in\Theta_0} 1105 | \underbrace{\Pr[\theta]{T(X^\star) \ge T(X)}}_{1-F_\theta(T(X)) 1106 | \quad \text{since } T(X^\star) \dist F_\theta} 1107 | = \inf\bigl\{ \alpha: T(X) \in R_\alpha \bigr\}$ 1108 | \end{itemize} 1109 | 1110 | \centering 1111 | \begin{tabular}{ll} 1112 | \textsf{p-value} & \textsf{evidence} \\ 1113 | \hline 1114 | $< 0.01$ & very strong evidence against $H_0$ \\ 1115 | $0.01 - 0.05$ & strong evidence against $H_0$ \\ 1116 | $0.05 - 0.1$ & weak evidence against $H_0$ \\ 1117 | $> 0.1$ & little or no evidence against $H_0$ \\ 1118 | \end{tabular} 1119 | 1120 | \raggedright 1121 | Wald test 1122 | \begin{itemize} 1123 | \item Two-sided test 1124 | \item Reject $H_0$ when $|W| > \zat$ where 1125 | $W = \displaystyle\frac{\that - \theta_0}{\sehat}$ 1126 | \item $\Pr{|W| > \zat} \conv \alpha$ 1127 | \item p-value $= \Pr[\theta_0]{|W| > |w|} 1128 | \approx \Pr{|Z| > |w|} 1129 | = 2\Phi(-|w|)$ 1130 | \end{itemize} 1131 | 1132 | Likelihood ratio test 1133 | \begin{itemize} 1134 | \item $T(X) = \displaystyle\frac{\sup_{\theta\in\Theta}\Lln(\theta)}% 1135 | {\sup_{\theta\in\Theta_0}\Lln(\theta)} 1136 | = \frac{\Lln(\that_n)}{\Lln(\that_{n,0})}$ 1137 | \item $\lambda(X) = 2\log T(X) \dconv \chi_{r-q}^2$ 1138 | where $\displaystyle\sum_{i=1}^k Z_i^2 \dist \chi_k^2$ and 1139 | $Z_1,\dots,Z_k \distiid \norm[0,1]$ 1140 | \item p-value $= \Pr[\theta_0]{\lambda(X) > \lambda(x)} 1141 | \approx \Pr{\chi_{r-q}^2 > \lambda(x)}$ 1142 | \end{itemize} 1143 | 1144 | \begin{titemize}{Multinomial LRT} 1145 | \item \mle: 1146 | $\phat_n = \displaystyle \left(\frac{X_1}{n},\dots,\frac{X_k}{n}\right)$ 1147 | \item $T(X) = \displaystyle \frac{\Lln(\phat_n)}{\Lln(p_0)} 1148 | = \prod_{j=1}^k \left( \frac{\phat_j}{p_{0j}} \right)^{X_j}$ 1149 | \item $\lambda(X) = \displaystyle 2\sum_{j=1}^k X_j \log 1150 | \left( \frac{\phat_j}{p_{0j}} \right) \dconv \chi_{k-1}^2$ 1151 | \item The approximate size $\alpha$ LRT rejects $H_0$ when 1152 | $\lambda(X) \ge \chi_{k-1,\alpha}^2$ 1153 | \end{titemize} 1154 | 1155 | Pearson Chi-square Test 1156 | \begin{itemize} 1157 | \item $T = \displaystyle \sum_{j=1}^k \frac{(X_j-\E{X_j})^2}{\E{X_j}}$ 1158 | where $\E{X_j} = np_{0j}$ under $H_0$ 1159 | \item $T \dconv \chi_{k-1}^2$ 1160 | \item p-value $= \Pr{\chi_{k-1}^2 > T(x)}$ 1161 | \item Faster $\dconv X_{k-1}^2$ than LRT, hence preferable for small $n$ 1162 | \end{itemize} 1163 | 1164 | Independence testing 1165 | \begin{itemize} 1166 | \item $I$ rows, $J$ columns, 1167 | $\mathbf{X}$ multinomial sample of size $n=I*J$ 1168 | \item {\mle}s unconstrained: $\phat_{ij} = \frac{X_{ij}}{n}$ 1169 | \item {\mle}s under $H_0$: 1170 | $\phat_{0ij} = \phat_{i\cdot}\phat_{\cdot j} 1171 | = \frac{X_{i\cdot}}{n} \frac{X_{\cdot j}}{n}$ 1172 | \item LRT: $\lambda = 2\sum_{i=1}^I\sum_{j=1}^J X_{ij} 1173 | \log\left( \frac{nX_{ij}}{X_{i\cdot}X_{\cdot j}}\right)$ 1174 | \item PearsonChiSq: $T = \sum_{i=1}^I\sum_{j=1}^J 1175 | \frac{(X_{ij}-\E{X_{ij}})^2}{\E{X_{ij}}}$ 1176 | \item LRT and Pearson $\dconv \chisq{\nu}$, 1177 | where $\nu=(I-1)(J-1)$ 1178 | \end{itemize} 1179 | 1180 | \section{Exponential Family} 1181 | 1182 | Scalar parameter 1183 | \begin{align*} 1184 | f_X(x \giv \theta) 1185 | &= h(x) \Exp{\eta(\theta)T(x) - A(\theta)} \\ 1186 | &= h(x) g(\theta) \Exp{\eta(\theta)T(x)} 1187 | \end{align*} 1188 | 1189 | Vector parameter 1190 | \begin{align*} 1191 | f_X(x \giv \theta) 1192 | &= h(x) \Exp{\sum_{i=1}^s\eta_i(\theta)T_i(x) - A(\theta)} \\ 1193 | &= h(x) \Exp{\eta(\theta)\cdot T(x) - A(\theta)} \\ 1194 | &= h(x) g(\theta)\Exp{\eta(\theta)\cdot T(x)} 1195 | \end{align*} 1196 | 1197 | Natural form 1198 | \begin{align*} 1199 | f_X(x \giv \eta) 1200 | &= h(x) \Exp{\eta\cdot \mathbf{T}(x) - A(\eta)} \\ 1201 | &= h(x) g(\eta) \Exp{\eta\cdot \mathbf{T}(x)} \\ 1202 | &= h(x) g(\eta) \Exp{\eta^T \mathbf{T}(x)} 1203 | \end{align*} 1204 | 1205 | \section{Bayesian Inference} 1206 | 1207 | \textsc{Bayes' Theorem} 1208 | \[f(\theta\giv x) 1209 | = \frac{f(x\giv\theta)f(\theta)}{f(x^n)} 1210 | = \frac{f(x\giv\theta)f(\theta)} 1211 | {\int f(x\giv\theta)f(\theta)\,d\theta} \propto \Lln(\theta)f(\theta)\] 1212 | 1213 | Definitions 1214 | \begin{itemize} 1215 | \item $X^n = (X_1,\ldots,X_n)$ 1216 | \item $x^n = (x_1,\ldots,x_n)$ 1217 | \item Prior density $f(\theta)$ 1218 | \item Likelihood $f(x^n \giv \theta)$: joint density of the data\\ 1219 | In particular, $X^n$ \iid \imp $f(x^n\giv\theta) = 1220 | \displaystyle\prodin f(x_i\giv\theta) = \Lln(\theta)$ 1221 | \item Posterior density $f(\theta\giv x^n)$ 1222 | \item Normalizing constant $c_n = f(x^n) 1223 | = \int f(x\giv\theta)f(\theta)\,d\theta$ 1224 | \item Kernel: part of a density that depends on $\theta$ 1225 | \item Posterior mean $\bar\theta_n 1226 | = \int\theta f(\theta\giv x^n)\,d\theta 1227 | = \frac{\int\theta\Lln(\theta)f(\theta)d\theta} 1228 | {\int\Lln(\theta)f(\theta)\,d\theta}$ 1229 | \end{itemize} 1230 | 1231 | \subsection{Credible Intervals} 1232 | 1233 | Posterior interval 1234 | \[\Pr{\theta\in (a,b)\giv x^n} 1235 | = \int_{a}^{b} f(\theta\giv x^n)\,d\theta 1236 | = 1-\alpha\] 1237 | 1238 | Equal-tail credible interval 1239 | \[\int_{-\infty}^{a} f(\theta\giv x^n)\,d\theta 1240 | = \int_{b}^{\infty} f(\theta\giv x^n)\,d\theta = \alpha/2\] 1241 | 1242 | Highest posterior density (HPD) region $R_n$ 1243 | \begin{enumerate} 1244 | \item $\Pr{\theta\in R_n} = 1-\alpha$ 1245 | \item $R_n = \left\{ \theta: f(\theta\giv x^n) > k \right\}$ for some $k$ 1246 | \end{enumerate} 1247 | $R_n$ is unimodal \imp $R_n$ is an interval 1248 | 1249 | \subsection{Function of parameters} 1250 | Let $\tau = \transform(\theta)$ and $A = 1251 | \left\{ \theta:\transform(\theta) \le \tau \right\}$. 1252 | 1253 | Posterior CDF for $\tau$ 1254 | \[H(r\giv x^n) 1255 | = \Pr{\transform(\theta) \le \tau\giv x^n} 1256 | = \int_A f(\theta \giv x^n)\,d\theta\] 1257 | Posterior density 1258 | \[h(\tau\giv x^n) = H'(\tau\giv x^n)\] 1259 | Bayesian delta method 1260 | \[\tau\giv X^n \approx \norm[\transform(\that), 1261 | \sehat\left|\transform'(\that)\right|]\] 1262 | 1263 | \subsection{Priors} 1264 | 1265 | Choice 1266 | \begin{itemize} 1267 | \item Subjective Bayesianism: prior should incorporate as much detail as 1268 | possible the research's a priori knowledge---via \emph{prior elicitation} 1269 | \item Objective Bayesianism: prior should incorporate as little detail as 1270 | possible (\emph{non-informative} prior) 1271 | \item Robust Bayesianism: consider various priors and determine 1272 | \emph{sensitivity} of our inferences to changes in the prior 1273 | \end{itemize} 1274 | 1275 | Types 1276 | \begin{itemize} 1277 | \item Flat: $f(\theta) \propto constant$ 1278 | \item Proper: $\int_{-\infty}^{\infty} f(\theta)\,d\theta = 1$ 1279 | \item Improper: $\int_{-\infty}^{\infty} f(\theta)\,d\theta = \infty$ 1280 | \item \textsc{Jeffrey}'s Prior (transformation-invariant): 1281 | \[f(\theta) \propto \sqrt{I(\theta)} \qquad 1282 | f(\theta) \propto \sqrt{\det(I(\theta))}\] 1283 | \item Conjugate: $f(\theta)$ and $f(\theta\giv x^n)$ 1284 | belong to the same parametric family 1285 | \end{itemize} 1286 | 1287 | \subsubsection{Conjugate Priors} 1288 | 1289 | \begin{tabular}{|l|p{.23\columnwidth}|p{.45\columnwidth}|} 1290 | \hline 1291 | \multicolumn{3}{|c|}{Continuous likelihood (subscript $c$ denotes constant)}\\ 1292 | \hline && \\[-2ex] 1293 | Likelihood & Conjugate prior & 1294 | \text{Posterior hyperparameters} \\[1ex] 1295 | 1296 | \hline && \\[-2ex] 1297 | 1298 | $\unif[0,\theta]$ & Pareto($x_m,k$) & 1299 | $\displaystyle\max\left\{ x_{(n)}, x_m \right\}, k+n$ \\ 1300 | 1301 | $\ex[\lambda]$ & $\gam[\alpha,\beta]$ & 1302 | $\alpha + n, \beta + \displaystyle\sumin x_i$\\[3ex] 1303 | 1304 | $\norm[\mu,\sigma_c^2]$ & $\norm[\mu_0,\sigma_0^2]$ & 1305 | $\displaystyle 1306 | \left(\frac{\mu_0}{\sigma_0^2} + \frac{\sumin x_i}{\sigma_c^2} \right) / 1307 | \left( \frac{1}{\sigma_0^2} + \frac{n}{\sigma_c^2} \right)$, 1308 | $\displaystyle\left(\frac{1}{\sigma_0^2} + \frac{n}{\sigma_c^2}\right)^{-1}$ 1309 | \\[2ex] 1310 | 1311 | $\norm[\mu_c,\sigma^2]$ & Scaled Inverse Chi-square($\nu,\sigma_0^2$) & 1312 | $\nu + n$, $\displaystyle \frac{\nu\sigma_0^2 + \sumin(x_i-\mu)^2}{\nu + n}$ 1313 | \\[4ex] 1314 | 1315 | $\norm[\mu,\sigma^2]$ & 1316 | Normal-scaled Inverse Gamma($\lambda,\nu,\alpha,\beta$) & 1317 | $\displaystyle\frac{\nu\lambda+n\xbar}{\nu+n}$, \qquad $\nu+n$, \qquad 1318 | $\displaystyle \alpha + \frac{n}{2}$, \qquad 1319 | $\displaystyle \beta + \frac{1}{2}\sumin(x_i-\xbar)^2 + 1320 | \frac{\gamma(\xbar-\lambda)^2}{2(n+\gamma)}$ 1321 | \\[4ex] 1322 | 1323 | MVN($\mu,\Sigma_c$) & MVN($\mu_0,\Sigma_0$) & 1324 | $\displaystyle\left( \Sigma_0^{-1}+ n\Sigma_c^{-1} \right)^{-1} 1325 | \left( \Sigma_0^{-1}\mu_0 + n\Sigma^{-1} \xbar \right)$, 1326 | $\displaystyle\left( \Sigma_0^{-1} + n \Sigma_c^{-1} \right)^{-1}$ \\[1ex] 1327 | 1328 | MVN($\mu_c,\Sigma$) & Inverse-Wishart($\kappa,\Psi$) & 1329 | $\displaystyle n + \kappa, \Psi + \sumin(x_i-\mu_c)(x_i-\mu_c)^T$\\ 1330 | 1331 | Pareto($x_{m_c}, k$) & $\gam[\alpha,\beta]$ & 1332 | $\displaystyle\alpha+n, \beta + \sumin \log\frac{x_i}{x_{m_c}}$ \\ 1333 | 1334 | Pareto($x_m, k_c$) & Pareto($x_0, k_0$) & 1335 | $\displaystyle x_0, k_0 - kn$ where $k_0 > kn$ \\ 1336 | 1337 | $\gam[\alpha_c,\beta]$ & $\gam[\alpha_0,\beta_0]$ & 1338 | $\displaystyle \alpha_0 + n\alpha_c, \beta_0 + \sumin x_i$ \\[3ex] 1339 | 1340 | \hline 1341 | \end{tabular} 1342 | 1343 | \vfill~ 1344 | \columnbreak 1345 | 1346 | \begin{tabular}{*3{|>{\begin{math}\displaystyle}l<{\end{math}}}|} 1347 | \hline 1348 | \multicolumn{3}{|c|}{Discrete likelihood}\\ 1349 | \hline && \\[-2ex] 1350 | \text{Likelihood} & \text{Conjugate prior} & 1351 | \text{Posterior hyperparameters} \\[1ex] 1352 | 1353 | \hline && \\[-2ex] 1354 | 1355 | \bern[p] & \bet[\alpha,\beta] & 1356 | \alpha + \sumin x_i, \beta + n - \sumin x_i \\ 1357 | 1358 | \bin[p] & \bet[\alpha,\beta] & 1359 | \alpha + \sumin x_i, \beta + \sumin N_i - \sumin x_i \\ 1360 | 1361 | \nbin[p] & \bet[\alpha,\beta] & \alpha + rn, \beta + \sumin x_i \\ 1362 | 1363 | \pois[\lambda] & \gam[\alpha,\beta] & \alpha + \sumin x_i, \beta + n \\ 1364 | 1365 | \text{Multinomial}(p) & \dir[\alpha] & \alpha + \sumin x^{(i)} \\ 1366 | 1367 | \geo[p] & \bet[\alpha,\beta] & \alpha + n, \beta + \sumin x_i \\[3ex] 1368 | 1369 | \hline 1370 | \end{tabular} 1371 | 1372 | \subsection{Bayesian Testing} 1373 | If $H_0:\theta \in \Theta_0$: 1374 | \begin{align*} 1375 | \text{Prior probability } \Pr{H_0} 1376 | &= \int_{\Theta_0} f(\theta)\,d\theta\\ 1377 | \text{Posterior probability } \Pr{H_0\giv x^n} 1378 | &= \int_{\Theta_0} f(\theta\giv x^n)\,d\theta\\ 1379 | \end{align*} 1380 | 1381 | Let $H_0{\ldots}H_{k-1}$ be $k$ hypotheses. 1382 | Suppose $\theta \dist f(\theta\giv H_k)$, 1383 | \[\Pr{H_k\giv x^n} 1384 | = \frac{f(x^n\giv H_k)\Pr{H_k}}{\sum_{k=1}^K f(x^n\giv H_k)\Pr{H_k}},\] 1385 | 1386 | Marginal likelihood 1387 | \[f(x^n\giv H_i) 1388 | = \int_\Theta f(x^n\giv \theta,H_i)f(\theta\giv H_i) \,d\theta\] 1389 | 1390 | Posterior odds (of $H_i$ relative to $H_j$) 1391 | \[\frac{\Pr{H_i\giv x^n}}{\Pr{H_j\giv x^n}} \quad 1392 | = \underbrace{\frac{f(x^n\giv H_i)}{f(x^n\giv H_j)}}% 1393 | _{\text{Bayes Factor }BF_{ij}} 1394 | \times \;\underbrace{\frac{\Pr{H_i}}{\Pr{H_j}}}_{\text{prior odds}}\] 1395 | 1396 | \columnbreak 1397 | Bayes factor 1398 | 1399 | \centering 1400 | \begin{tabular}{lll} 1401 | $\log_{10} BF_{10}$ & $BF_{10}$ & \textsf{evidence} \\ 1402 | \toprule 1403 | $0 - 0.5$ & $1 - 1.5$ & Weak \\ 1404 | $0.5 - 1$ & $1.5 - 10$ & Moderate \\ 1405 | $1 - 2$ & $10 - 100$ & Strong \\ 1406 | $> 2$ & $> 100$ & Decisive \\ 1407 | \end{tabular} 1408 | 1409 | \vspace*{2ex} 1410 | 1411 | $p^* = \displaystyle\frac{\frac{p}{1-p}BF_{10}}{1+\frac{p}{1-p}BF_{10}}$ 1412 | where $p=\Pr{H_1}$ and $p^* = \Pr{H_1 \giv x^n}$ 1413 | 1414 | \raggedright 1415 | 1416 | \section{Sampling Methods} 1417 | 1418 | \subsection{Inverse Transform Sampling} 1419 | 1420 | Setup 1421 | \begin{itemize} 1422 | \item $U \dist \unif[0,1]$ 1423 | \item $X \dist F$ 1424 | \item $F^{-1}(u)= \inf\{ x \mid F(x) \ge u\}$ 1425 | \end{itemize} 1426 | 1427 | Algorithm 1428 | \begin{enumerate} 1429 | \item Generate $u \dist \unif[0,1]$ 1430 | \item Compute $x = F^{-1}(u)$ 1431 | \end{enumerate} 1432 | 1433 | \subsection{The Bootstrap} 1434 | 1435 | Let $T_n = g(X_1,\dots,X_n)$ be a statistic. 1436 | \begin{enumerate} 1437 | \item Estimate $\V[F]{T_n}$ with $\V[\Fnhat]{T_n}$. 1438 | \item Approximate $\V[\Fnhat]{T_n}$ using simulation: 1439 | \begin{enumerate} 1440 | \item Repeat the following $B$ times to get $T_{n,1}^*,\dots,T_{n,B}^*$, 1441 | an \iid sample from the sampling distribution implied by $\Fnhat$ 1442 | \begin{enumerate} 1443 | \item Sample uniformly $X_1^*,\dots,X_n^* \dist \Fnhat$. 1444 | \item Compute $T_n^* = g(X_1^*,\dots,X_n^*)$. 1445 | \end{enumerate} 1446 | \item Then 1447 | \[v_{boot} = \widehat{\mathbb{V}}_{\Fnhat} = 1448 | \displaystyle\frac{1}{B} \sum_{b=1}^B 1449 | \left(T_{n,b}^* - \frac{1}{B} \sum_{r=1}^B T_{n,r}^* \right)^2\] 1450 | \end{enumerate} 1451 | \end{enumerate} 1452 | 1453 | \subsubsection{Bootstrap Confidence Intervals} 1454 | 1455 | Normal-based interval 1456 | \[T_n \pm \zat\sehat_{boot}\] 1457 | 1458 | Pivotal interval 1459 | \begin{enumerate} 1460 | \item Location parameter $\theta = T(F)$ 1461 | \item Pivot $R_n = \that_n - \theta$ 1462 | \item Let $H(r) = \Pr{R_n \le r}$ be the \cdf of $R_n$ 1463 | \item Let $R_{n,b}^* = \that_{n,b}^* - \that_n$. 1464 | Approximate $H$ using bootstrap: 1465 | \[\widehat{H}(r) = \frac{1}{B}\sum_{b=1}^B I(R_{n,b}^* \le r)\] 1466 | \item $\theta_\beta^*$ = $\beta$ sample quantile of 1467 | $(\that_{n,1}^*,\dots,\that_{n,B}^*)$ 1468 | \item $r_\beta^*$ = beta sample quantile of 1469 | $(R_{n,1}^*,\dots,R_{n,B}^*)$, i.e., 1470 | $r_\beta^* = \theta_\beta^* - \that_n$ 1471 | \item Approximate $1-\alpha$ confidence interval 1472 | $C_n = \left( \hat{a}, \hat{b} \right)$ where 1473 | \begin{align*} 1474 | \hat{a} 1475 | &=& \that_n - \widehat{H}^{-1}\left( 1-\frac{\alpha}{2} \right) 1476 | &=& \that_n - r_{1-\alpha/2}^* 1477 | &=& 2\that_n - \theta_{1-\alpha/2}^* \\ 1478 | \hat{b} 1479 | &=& \that_n - \widehat{H}^{-1}\left( \frac{\alpha}{2} \right) 1480 | &=& \that_n - r_{\alpha/2}^* 1481 | &=& 2\that_n - \theta_{\alpha/2}^* 1482 | \end{align*} 1483 | \end{enumerate} 1484 | % \[C_n = \left( 2\that_n - \that_{1-\alpha/2}^*, 1485 | % 2\that_n + \that_{\alpha/2}^* \right)\] 1486 | 1487 | Percentile interval 1488 | \[C_n = \left( \theta_{\alpha/2}^*, \theta_{1-\alpha/2}^* \right)\] 1489 | 1490 | \subsection{Rejection Sampling} 1491 | Setup 1492 | \begin{itemize} 1493 | \item We can easily sample from $g(\theta)$ 1494 | \item We want to sample from $h(\theta)$, but it is difficult 1495 | \item We know $h(\theta)$ up to a proportional constant: 1496 | $h(\theta) = \displaystyle\frac{k(\theta)}{\int k(\theta)\,d\theta}$ 1497 | \item Envelope condition: we can find $M > 0$ such that 1498 | $k(\theta) \le Mg(\theta) \quad \forall \theta$ 1499 | \end{itemize} 1500 | 1501 | Algorithm 1502 | \begin{enumerate} 1503 | \item Draw $\theta^{cand} \dist g(\theta)$ 1504 | \item Generate $u \dist \unif[0,1]$ 1505 | \item Accept $\theta^{cand}$ if 1506 | $u \le \displaystyle\frac{k(\theta^{cand})}{Mg(\theta^{cand})}$ 1507 | \item Repeat until $B$ values of $\theta^{cand}$ have been accepted 1508 | \end{enumerate} 1509 | 1510 | Example 1511 | \begin{itemize} 1512 | \item We can easily sample from the prior $g(\theta) = f(\theta)$ 1513 | \item Target is the posterior 1514 | $h(\theta) \propto k(\theta) = f(x^n\giv \theta) f(\theta)$ 1515 | \item Envelope condition: 1516 | $f(x^n\giv\theta) \le f(x^n\giv\that_n) = \Lln(\that_n)\equiv M$ 1517 | \item Algorithm 1518 | \begin{enumerate} 1519 | \item Draw $\theta^{cand} \dist f(\theta)$ 1520 | \item Generate $u \dist \unif[0,1]$ 1521 | \item Accept $\theta^{cand}$ if 1522 | $u \le \displaystyle\frac{\Lln(\theta^{cand})}{\Lln(\that_n)}$ 1523 | \end{enumerate} 1524 | \end{itemize} 1525 | 1526 | \subsection{Importance Sampling} 1527 | 1528 | Sample from an importance function $g$ rather than target density $h$.\\ 1529 | Algorithm to obtain an approximation to $\E{q(\theta) \giv x^n}$: 1530 | \begin{enumerate} 1531 | \item Sample from the prior $\theta_1,\ldots,\theta_n \distiid f(\theta)$ 1532 | \item $w_i = \displaystyle\frac{\Lln(\theta_i)}{\sum_{i=1}^B \Lln(\theta_i)} 1533 | \quad\forall i = 1,\ldots,B$ 1534 | \item $\E{q(\theta)\giv x^n} \approx \sum_{i=1}^B q(\theta_i)w_i$ 1535 | \end{enumerate} 1536 | 1537 | \section{Decision Theory} 1538 | 1539 | Definitions 1540 | \begin{itemize} 1541 | \item Unknown quantity affecting our decision: $\theta \in \Theta$ 1542 | \item Decision rule: synonymous for an estimator $\that$ 1543 | \item Action $a \in \mathcal{A}$: possible value of the decision rule. In the 1544 | estimation context, the action is just an estimate of $\theta$, $\that(x)$. 1545 | \item Loss function $L$: consequences of taking action $a$ when true state is 1546 | $\theta$ or discrepancy between $\theta$ and $\that$, 1547 | $L: \Theta \times \mathcal{A} \to [-k,\infty).$ 1548 | \end{itemize} 1549 | 1550 | Loss functions 1551 | \begin{itemize} 1552 | \item Squared error loss: $L(\theta,a) = (\theta-a)^2$ 1553 | \item Linear loss: $L(\theta,a) = \begin{cases} 1554 | K_1(\theta-a) & a-\theta < 0 \\ 1555 | K_2(a-\theta) & a-\theta \ge 0 1556 | \end{cases}$ 1557 | \item Absolute error loss: $L(\theta,a) = |\theta-a| \quad$ 1558 | (linear loss with $K_1=K_2$) 1559 | \item $L_p$ loss: $L(\theta,a) = |\theta-a|^p$ 1560 | \item Zero-one loss: $L(\theta,a) = \begin{cases} 1561 | 0 & a=\theta \\ 1562 | 1 & a\neq\theta \\ 1563 | \end{cases}$ 1564 | \end{itemize} 1565 | 1566 | \subsection{Risk} 1567 | 1568 | Posterior risk 1569 | \[r(\that \giv x) 1570 | = \int L(\theta,\that(x))f(\theta\giv x)\,d\theta 1571 | = \E[\theta|X]{L(\theta,\that(x))}\] 1572 | 1573 | (Frequentist) risk 1574 | \[R(\theta,\that) 1575 | = \int L(\theta,\that(x)) f(x\giv\theta) \dx 1576 | = \E[X|\theta]{L(\theta,\that(X))}\] 1577 | 1578 | Bayes risk 1579 | \[r(f,\that) 1580 | = \iint L(\theta,\that(x))f(x,\theta)\dx\dtheta 1581 | = \E[\theta,X]{L(\theta,\that(X))}\] 1582 | \[r(f,\that) 1583 | = \E[\theta]{\E[X|\theta]{L(\theta,\that(X)}} 1584 | = \E[\theta]{R(\theta,\that)}\] 1585 | \[r(f,\that) 1586 | = \E[X]{\E[\theta|X]{L(\theta,\that(X)}} 1587 | = \E[X]{r(\that\giv X)}\] 1588 | 1589 | \subsection{Admissibility} 1590 | 1591 | \begin{itemize} 1592 | \item $\that'$ dominates $\that$ if 1593 | \[\forall \theta: R(\theta,\that') \le R(\theta,\that)\] 1594 | \[\exists \theta: R(\theta,\that') < R(\theta,\that)\] 1595 | \item $\that$ is inadmissible if there is at least one other estimator 1596 | $\that'$ that dominates it. Otherwise it is called admissible. 1597 | \end{itemize} 1598 | 1599 | \subsection{Bayes Rule} 1600 | 1601 | Bayes rule (or Bayes estimator) 1602 | \begin{itemize} 1603 | \item $r(f,\that) = \inf_{\ttil} r(f,\ttil)$ 1604 | \item $\that(x) = \inf r(\that\giv x) \; \forall x 1605 | \imp r(f,\that) = \int r(\that\giv x)f(x)\,dx$ 1606 | \end{itemize} 1607 | 1608 | Theorems 1609 | \begin{itemize} 1610 | \item Squared error loss: posterior mean 1611 | \item Absolute error loss: posterior median 1612 | \item Zero-one loss: posterior mode 1613 | \end{itemize} 1614 | 1615 | \subsection{Minimax Rules} 1616 | 1617 | Maximum risk 1618 | \[\bar{R}(\that) = \sup_\theta R(\theta,\that) \qquad 1619 | \bar{R}(a) = \sup_\theta R(\theta,a)\] 1620 | 1621 | Minimax rule 1622 | \[\sup_\theta R(\theta,\that) 1623 | = \inf_{\ttil} \bar{R}(\ttil) 1624 | = \inf_{\ttil} \sup_\theta R(\theta,\ttil)\] 1625 | 1626 | \[\that = \text{Bayes rule} \; \wedge \; 1627 | \exists c: R(\theta,\that) = c\] 1628 | 1629 | Least favorable prior 1630 | \[\that^f = \text{Bayes rule} \; \wedge \; 1631 | R(\theta,\that^f) \le r(f,\that^f) \;\forall\theta\] 1632 | 1633 | \section{Linear Regression} 1634 | 1635 | Definitions 1636 | \begin{itemize} 1637 | \item Response variable $Y$ 1638 | \item Covariate $X$ (aka predictor variable or feature) 1639 | \end{itemize} 1640 | 1641 | \subsection{Simple Linear Regression} 1642 | Model 1643 | \[Y_i = \beta_0 + \beta_1 X_i + \epsilon_i 1644 | \qquad \E{\epsilon_i\giv X_i} = 0 ,\; \V{\epsilon_i\giv X_i} = \sigma^2\] 1645 | 1646 | Fitted line 1647 | \[\rhat(x) = \bhat_0 + \bhat_1 x\] 1648 | 1649 | Predicted (fitted) values 1650 | \[\Yhat_i = \rhat(X_i)\] 1651 | 1652 | Residuals 1653 | \[\ehat_i = Y_i - \Yhat_i 1654 | = Y_i - \left( \bhat_0 + \bhat_1 X_i \right)\] 1655 | 1656 | Residual sums of squares (\rss) 1657 | \[\rss(\bhat_0,\bhat_1) = \sumin \ehat_i^2\] 1658 | 1659 | Least square estimates 1660 | \[\bhat^T=(\bhat_0, \bhat_1)^T: \min_{\bhat_0,\bhat_1}\rss\] 1661 | \begin{align*} 1662 | \bhat_0 &= \bar Y_n - \bhat_1 \bar X_n \\ 1663 | \bhat_1 &= \frac{\sumin(X_i-\bar X_n)(Y_i-\bar Y_n)}{\sumin(X_i - \bar X_n)^2} 1664 | = \frac{\sumin X_iY_i-n\Xbar\Ybar}{\sumin X_i^2 - n\Xsqbar} \\ 1665 | \E{\bhat\giv X^n} &= \begin{pmatrix}\beta_0 \\ \beta_1\end{pmatrix} \\ 1666 | \V{\bhat\giv X^n} &= 1667 | \frac{\sigma^2}{n s^2_X} 1668 | \begin{pmatrix}n^{-1}\sumin X_i^2 & -\Xnbar \\ -\Xnbar & 1\end{pmatrix} \\ 1669 | \sehat(\bhat_0) &= \frac{\shat}{s_X\sqrt{n}} \sqrt{\frac{\sumin X_i^2}{n}} \\ 1670 | \sehat(\bhat_1) &= \frac{\shat}{s_X\sqrt{n}} 1671 | \end{align*} 1672 | where $s_X^2 = n^{-1} \sumin(X_i-\Xnbar)^2$ and $\shat^2 = 1673 | \frac{1}{n-2} \sumin \ehat_i^2$ (unbiased estimate). 1674 | 1675 | Further properties: 1676 | \begin{itemize} 1677 | \item Consistency: 1678 | $\bhat_0 \pconv \beta_0$ and $\bhat_1 \pconv \beta_1$ 1679 | \item Asymptotic normality: 1680 | \[\frac{\bhat_0 - \beta_0}{\sehat(\bhat_0)} \dconv \norm[0,1] 1681 | \quad\text{and}\quad 1682 | \frac{\bhat_1 - \beta_1}{\sehat(\bhat_1)} \dconv \norm[0,1]\] 1683 | \item Approximate $1-\alpha$ confidence intervals for $\beta_0$ and $\beta_1$: 1684 | \[\bhat_0 \pm \zat \sehat(\bhat_0) \quad\text{and}\quad 1685 | \bhat_1 \pm \zat \sehat(\bhat_1)\] 1686 | \item Wald test for \hyp{\beta_1=0}{\beta_1\neq 0}: reject 1687 | $H_0$ if $|W| > \zat$ where $W = \bhat_1/\sehat(\bhat_1)$. 1688 | \end{itemize} 1689 | 1690 | R$^2$ 1691 | \[R^2 1692 | = \frac{\sumin(\Yhat_i-\Ybar)^2}{\sumin(Y_i-\Ybar)^2} 1693 | = 1 - \frac{\sumin \ehat_i^2}{\sumin(Y_i-\Ybar)^2} 1694 | = 1 - \frac{\rss}{\tss}\] 1695 | 1696 | Likelihood 1697 | \begin{align*} 1698 | \Ll &= \prodin f(X_i,Y_i) 1699 | = \prodin f_X(X_i) \times \prodin f_{Y|X}(Y_i \giv X_i) = \Ll_1 \times \Ll_2 \\ 1700 | \Ll_1 &= \prodin f_X(X_i) \\ 1701 | \Ll_2 &= \prodin f_{Y|X}(Y_i \giv X_i) 1702 | \propto \sigma^{-n} 1703 | \Exp{-\frac{1}{2\sigma^2}\sum_i\Bigl(Y_i-(\beta_0-\beta_1X_i)\Bigr)^2} 1704 | \end{align*} 1705 | 1706 | Under the assumption of Normality, the least squares estimator is 1707 | also the \mle but the least squares variance estimator is not the \mle. 1708 | \[\shat^2 = \frac{1}{n}\sumin \ehat_i^2\] 1709 | 1710 | \subsection{Prediction} 1711 | 1712 | Observe $X = x_*$ of the covariate and want to predict their outcome $Y_*$. 1713 | \begin{align*} 1714 | \Yhat_* &= \bhat_0 + \bhat_1 x_* \\ 1715 | \V{\Yhat_*} &= \V{\bhat_0} + x_*^2 \V{\bhat_1} + 2x_* \cov{\bhat_0,\bhat_1} 1716 | \end{align*} 1717 | 1718 | Prediction interval 1719 | \[\xihat_n^2 1720 | = \shat^2\left( \frac{\sumin(X_i-X_*)^2}{n\sum_i(X_i-\Xbar)^2j}+1 \right)\] 1721 | \[\Yhat_* \pm \zat \xihat_n\] 1722 | 1723 | \subsection{Multiple Regression} 1724 | 1725 | \[Y = X\beta + \epsilon\] 1726 | where 1727 | \[X = 1728 | \begin{pmatrix} X_{11} & \cdots & X_{1k} \\ \vdots & \ddots & \vdots \\ 1729 | X_{n1} & \cdots & X_{nk}\end{pmatrix} \quad 1730 | \beta = \begin{pmatrix}\beta_1 \\ \vdots \\ \beta_k\end{pmatrix} \quad 1731 | \epsilon = \begin{pmatrix}\epsilon_1 \\ \vdots \\ \epsilon_n\end{pmatrix}\] 1732 | 1733 | Likelihood 1734 | \[\Ll(\mu,\Sigma) = (2\pi\sigma^2)^{-n/2} \Exp{-\frac{1}{2\sigma^2}\rss}\] 1735 | \[\rss = (y-X\beta)^T(y-X\beta) = \|Y-X\beta\|^2 = \sumiN(Y_i-x_i^T\beta)^2\] 1736 | 1737 | If the $(k \times k)$ matrix $X^TX$ is invertible, 1738 | \begin{align*} 1739 | \bhat &= (X^TX)^{-1}X^TY \\ 1740 | \V{\bhat \giv X^n} &= \sigma^2(X^TX)^{-1} \\ 1741 | \bhat &\approx \norm[\beta, \sigma^2(X^TX)^{-1}] 1742 | \end{align*} 1743 | 1744 | Estimate regression function 1745 | \[\rhat(x) = \sumjk\bhat_j x_j\] 1746 | 1747 | Unbiased estimate for $\sigma^2$ 1748 | \[\shat^2 = \frac{1}{n-k} \sumin \ehat_i^2 \qquad \ehat = X\bhat-Y\] 1749 | 1750 | \mle 1751 | \[\mhat = \Xbar \qquad \shat^2 = \frac{n-k}{n}\sigma^2\] 1752 | 1753 | $1-\alpha$ Confidence interval 1754 | \[\bhat_j \pm \zat\sehat(\bhat_j)\] 1755 | 1756 | \subsection{Model Selection} 1757 | 1758 | Consider predicting a new observation $Y^*$ for covariates $X^*$ and let $S 1759 | \subset J$ denote a subset of the covariates in the model, where $|S| = k$ and 1760 | $|J| = n$. 1761 | 1762 | Issues 1763 | \begin{itemize} 1764 | \item Underfitting: too few covariates yields high bias 1765 | \item Overfitting: too many covariates yields high variance 1766 | \end{itemize} 1767 | 1768 | Procedure 1769 | \begin{enumerate} 1770 | \item Assign a score to each model 1771 | \item Search through all models to find the one with the highest score 1772 | \end{enumerate} 1773 | 1774 | Hypothesis testing 1775 | \[\hyp{\beta_j=0}{\beta_j\neq0} \quad\forall j \in J\] 1776 | 1777 | Mean squared prediction error (\mspe) 1778 | \[\mspe = \E{(\Yhat(S)-Y^*)^2}\] 1779 | 1780 | Prediction risk 1781 | \[R(S) = \sumin \mspe_i = \sumin \E{(\Yhat_i(S)-Y_i^*)^2}\] 1782 | 1783 | Training error 1784 | \[\Rhat_{tr}(S) = \sumin(\Yhat_i(S)-Y_i)^2\] 1785 | 1786 | $R^2$ 1787 | \[R^2(S) 1788 | = 1 - \frac{\rss(S)}{\tss} 1789 | = 1 - \frac{\Rhat_{tr}(S)}{\tss} 1790 | = 1 - \frac{\sumin(\Yhat_i(S)-\Ybar)^2}{\sumin(Y_i-\Ybar)^2}\] 1791 | 1792 | The training error is a downward-biased estimate of the prediction risk. 1793 | \[\E{\Rhat_{tr}(S)} < R(S)\] 1794 | \[\bias(\Rhat_{tr}(S)) = \E{\Rhat_{tr}(S)} - R(S) = -2\sumin\cov{\Yhat_i,Y_i}\] 1795 | 1796 | Adjusted $R^2$ 1797 | \[R^2(S) = 1 - \frac{n-1}{n-k} \frac{\rss}{\tss}\] 1798 | 1799 | \textsc{Mallow's} $C_p$ statistic 1800 | \[\Rhat(S) = \Rhat_{tr}(S) + 2k\shat^2 1801 | = \text{lack of fit} + \text{complexity penalty}\] 1802 | 1803 | \textsc{Akaike} Information Criterion (AIC) 1804 | \[AIC(S) = \lln(\bhat_S, \shat^2_S) - k\] 1805 | 1806 | Bayesian Information Criterion (BIC) 1807 | \[BIC(S) = \lln(\bhat_S, \shat^2_S) - \frac{k}{2}\log n\] 1808 | 1809 | Validation and training 1810 | \[\Rhat_V(S) = \sumim(\Yhat_i^*(S) - Y_i^*)^2 \qquad 1811 | m = |\{\text{validation data}\}|, 1812 | \text{ often }\frac{n}{4}\text { or }\frac{n}{2}\] 1813 | 1814 | Leave-one-out cross-validation 1815 | \[\Rhat_{CV}(S) 1816 | = \sumin(Y_i - \Yhat_{(i)})^2 1817 | = \sumin \left( \frac{Y_i-\Yhat_i(S)}{1-U_{ii}(S)} \right)^2\] 1818 | \[U(S) = X_S(X_S^T X_S)^{-1} X_S \text{ (``hat matrix'')}\] 1819 | 1820 | \section{Non-parametric Function Estimation} 1821 | 1822 | \subsection{Density Estimation} 1823 | 1824 | Estimate $f(x)$, where $f(x) = \Pr{X \in A} = \int_A f(x)\dx$.\\ 1825 | 1826 | Integrated square error (\ise) 1827 | \[L(f, \fnhat) = \int\left(f(x) - \fnhat(x)\right)^2 \dx = J(h)+\int f^2(x)\dx\] 1828 | 1829 | Frequentist risk 1830 | \[R(f, \fnhat) = \E{L(f,\fnhat)} = \int b^2(x) \dx + \int v(x) \dx\] 1831 | \begin{align*} 1832 | b(x) &= \E{\fnhat(x)} - f(x) \\ 1833 | v(x) &= \V{\fnhat(x)} 1834 | \end{align*} 1835 | 1836 | \subsubsection{Histograms} 1837 | 1838 | Definitions 1839 | \begin{itemize} 1840 | \item Number of bins $m$ 1841 | \item Binwidth $h = \frac{1}{m}$ 1842 | \item Bin $B_j$ has $\nu_j$ observations 1843 | \item Define $\phat_j = \nu_j/n$ and $p_j = \int_{B_j} f(u)\du$ 1844 | \end{itemize} 1845 | 1846 | Histogram estimator 1847 | \begin{align*} 1848 | \fnhat(x) &= \sumjm \frac{\phat_j}{h} I(x\in B_j) \\ 1849 | \E{\fnhat(x)} &= \frac{p_j}{h} \\ 1850 | \V{\fnhat(x)} &= \frac{p_j(1-p_j)}{nh^2} \\ 1851 | R(\fnhat,f) &\approx 1852 | \frac{h^2}{12} \int \left(f'(u)\right)^2 \du + \frac{1}{nh} \\ 1853 | h^* &= \frac{1}{n^{1/3}} \left( \frac{6}{\int\left(f'(u) \right)^2}\du 1854 | \right)^{1/3} \\ 1855 | R^*(\fnhat,f) &\approx \frac{C}{n^{2/3}} \qquad 1856 | C = \left(\frac{3}{4}\right)^{2/3} \left( \int\left( f'(u) \right)^2 \du 1857 | \right)^{1/3} 1858 | \end{align*} 1859 | 1860 | Cross-validation estimate of $\E{J(h)}$ 1861 | \[\Jhat_{CV}(h) 1862 | = \int \fnhat^2(x) \dx - \frac{2}{n}\sumin \fhat_{(-i)}(X_i) 1863 | = \frac{2}{(n-1)h} - \frac{n+1}{(n-1)h} \sumjm \phat_j^2\] 1864 | 1865 | \subsubsection{Kernel Density Estimator (KDE)} 1866 | 1867 | Kernel $K$ 1868 | \begin{itemize} 1869 | \item $K(x) \ge 0$ 1870 | \item $\int K(x)\dx = 1$ 1871 | \item $\int xK(x)\dx = 0$ 1872 | \item $\int x^2 K(x)\dx \equiv \sigma^2_K > 0$ 1873 | \end{itemize} 1874 | 1875 | KDE 1876 | \begin{align*} 1877 | \fnhat(x) &= \frac{1}{n} \sumin \frac{1}{h} K\left( \frac{x-X_i}{h} \right) \\ 1878 | R(f,\fnhat) &\approx \frac{1}{4}(h\sigma_K)^4 \int (f''(x))^2\dx 1879 | + \frac{1}{nh} \int K^2(x)\dx \\ 1880 | h^* &= \frac{c_1^{-2/5} c_2^{-1/5} c_3^{-1/5}}{n^{1/5}} \qquad 1881 | c_1=\sigma_K^2,\;c_2 = \int K^2(x)\dx,\;c_3 = \int(f''(x))^2\dx\\ 1882 | R^*(f,\fnhat) &= \frac{c_4}{n^{4/5}} \qquad 1883 | c_4 = \underbrace{\frac{5}{4}(\sigma_K^2)^{2/5} \left(\int 1884 | K^2(x)\dx\right)^{4/5}}_{C(K)} 1885 | \left( \int(f'')^2\dx \right)^{1/5} 1886 | \end{align*} 1887 | 1888 | \textsc{Epanechnikov} Kernel 1889 | \[K(x) = \begin{cases} 1890 | \frac{3}{4\sqrt{5}(1-x^2/5)} & |x| < \sqrt{5} \\ 0 & \text{otherwise} 1891 | \end{cases}\] 1892 | 1893 | Cross-validation estimate of $\E{J(h)}$ 1894 | \[\Jhat_{CV}(h) 1895 | = \int \fnhat^2(x) \dx - \frac{2}{n}\sumin \fhat_{(-i)}(X_i) 1896 | \approx \frac{1}{hn^2} \sumin \sumjn K^*\left( \frac{X_i-X_j}{h} \right) + 1897 | \frac{2}{nh} K(0)\] 1898 | \[K^*(x) = K^{(2)}(x)-2K(x) \qquad K^{(2)}(x) = \int K(x-y) K(y) \dy\] 1899 | 1900 | \subsection{Non-parametric Regression} 1901 | 1902 | Estimate $f(x)$ where $f(x) = \E{Y \giv X=x}$. 1903 | Consider pairs of points $(x_1,Y_1),\dots,(x_n,Y_n)$ related by 1904 | \begin{align*} 1905 | Y_i &= r(x_i) + \epsilon_i \\ 1906 | \E{\epsilon_i} &= 0 \\ 1907 | \V{\epsilon_i} &= \sigma^2 1908 | \end{align*} 1909 | 1910 | $k$-nearest Neighbor Estimator 1911 | \[\rhat(x) = \frac{1}{k} \sum_{i:x_i \in N_k(x)} Y_i \qquad \text{where } 1912 | N_k(x) = \{k \text{ values of } x_1,\dots,x_n \text{ closest to } x\}\] 1913 | 1914 | \textsc{Nadaraya-Watson} Kernel Estimator 1915 | \begin{align*} 1916 | \rhat(x) &= \sumin w_i(x)Y_i \\ 1917 | w_i(x) 1918 | &= \frac{K\left(\frac{x-x_i}{h}\right)}{\sumjn K\left(\frac{x-x_j}{h}\right)} 1919 | \quad \in [0,1] \\ 1920 | R(\rhat_n,r) &\approx \frac{h^4}{4} \left( \int x^2K^2(x)\dx \right)^4 1921 | \int \left( r''(x) + 2r'(x)\frac{f'(x)}{f(x)}\right)^2 \dx \\ 1922 | &+ \int \frac{\sigma^2 \int K^2(x) \dx}{nhf(x)}\dx \\ 1923 | h^* &\approx \frac{c_1}{n^{1/5}} \\ 1924 | R^*(\rhat_n,r) &\approx \frac{c_2}{n^{4/5}} \\ 1925 | \end{align*} 1926 | 1927 | Cross-validation estimate of $\E{J(h)}$ 1928 | \[\Jhat_{CV}(h) 1929 | = \sumin (Y_i - \rhat_{(-i)}(x_i))^2 1930 | = \sumin \frac{(Y_i - \rhat(x_i))^2}{\left( 1931 | 1- \frac{K(0)}{\sumjn K\left(\frac{x-x_j}{h}\right)}\right)^2}\] 1932 | 1933 | \subsection{Smoothing Using Orthogonal Functions} 1934 | 1935 | Approximation 1936 | \[r(x) = \sum_{j=1}^\infty\beta_j\phi_j(x) 1937 | \approx \sum_{j=1}^J \beta_j\phi_j(x)\] 1938 | 1939 | Multivariate regression 1940 | \[Y = \Phi\beta + \eta\] 1941 | \[\text{where}\quad \eta_i = \epsilon_i \quad\text{and}\quad \Phi 1942 | = \begin{pmatrix} 1943 | \phi_0(x_1) & \cdots & \phi_J(x_1) \\ 1944 | \vdots & \ddots & \vdots \\ 1945 | \phi_0(x_n) & \cdots & \phi_J(x_n) 1946 | \end{pmatrix}\] 1947 | 1948 | Least squares estimator 1949 | \begin{align*} 1950 | \bhat &= (\Phi^T\Phi)^{-1}\Phi^T Y \\ 1951 | &\approx \frac{1}{n}\Phi^T Y 1952 | \quad\text{(for equally spaced observations only)} 1953 | \end{align*} 1954 | 1955 | Cross-validation estimate of $\E{J(h)}$ 1956 | \[\Rhat_{CV}(J) 1957 | = \sumin \left( Y_i - \sum_{j=1}^J \phi_j(x_i)\bhat_{j,(-i)} \right)^2\] 1958 | 1959 | \section{Stochastic Processes} 1960 | 1961 | Stochastic Process 1962 | \[\left\{ X_t : t \in T\right\} \qquad T=\begin{cases}\{0,\pm1,\dots\}=\Z & 1963 | \text{discrete} \\ [0,\infty) & \text{continuous}\end{cases}\] 1964 | 1965 | \begin{itemize} 1966 | \item Notations $X_t$, $X(t)$ 1967 | \item State space $\mathcal{X}$ 1968 | \item Index set $T$ 1969 | \end{itemize} 1970 | 1971 | \subsection{Markov Chains} 1972 | 1973 | Markov chain 1974 | \[\Pr{X_n = x \giv X_0,\dots,X_{n-1}} = \Pr{X_n = x \giv X_{n-1}} 1975 | \quad \forall n\in T, x \in \mathcal{X}\] 1976 | 1977 | Transition probabilities 1978 | \begin{align*} 1979 | p_{ij} &\equiv \Pr{X_{n+1} = j \giv X_n = i} \\ 1980 | p_{ij}(n) &\equiv \Pr{X_{m+n} = j \giv X_m = i} \quad\text{n-step} 1981 | \end{align*} 1982 | 1983 | Transition matrix $\mathbf{P}$ (n-step: $\mathbf{P}_n$) 1984 | \begin{itemize} 1985 | \item $(i,j)$ element is $p_{ij}$ 1986 | \item $p_{ij} > 0$ 1987 | \item $\sum_i p_{ij} = 1$ 1988 | \end{itemize} 1989 | 1990 | \textsc{Chapman-Kolmogorov} 1991 | \[p_{ij}(m+n) = \sum_k p_{ij}(m) p_{kj}(n)\] 1992 | \[\mathbf{P}_{m+n} = \mathbf{P}_m\mathbf{P}_n\] 1993 | \[\mathbf{P}_n = \mathbf{P} \times \cdots \times \mathbf{P} = \mathbf{P}^n\] 1994 | 1995 | Marginal probability 1996 | \begin{align*} 1997 | \mu_n &= (\mu_n(1),\dots,\mu_n(N)) 1998 | \quad\text{where}\quad \mu_i(i)=\Pr{X_n=i} \\ 1999 | \mu_0 &\eqdef \text{initial distribution} \\ 2000 | \mu_n &= \mu_0\mathbf{P}^n 2001 | \end{align*} 2002 | 2003 | \subsection{Poisson Processes} 2004 | 2005 | Poisson process 2006 | \begin{itemize} 2007 | \item $\left\{ X_t : t \in [0,\infty) \right\}$ 2008 | = number of events up to and including time $t$ 2009 | \item $X_0 = 0$ 2010 | \item Independent increments: 2011 | \[\forall t_0 < \cdots < t_n: 2012 | X_{t_1} - X_{t_0} \ind \cdots \ind X_{t_n} - X_{t_{n-1}}\] 2013 | \item Intensity function $\lambda(t)$ 2014 | \begin{itemize} 2015 | \item $\Pr{X_{t+h}-X_t = 1} = \lambda(t) h + o(h)$ 2016 | \item $\Pr{X_{t+h}-X_t = 2} = o(h)$ 2017 | \end{itemize} 2018 | \item $X_{s+t} - X_s \dist \pois[m(s+t)-m(s)]\;$ where 2019 | $\;m(t)=\int_0^t\lambda(s)\ds$ 2020 | \end{itemize} 2021 | 2022 | Homogeneous Poisson process 2023 | \[\lambda(t) \equiv \lambda \imp X_t \dist \pois[\lambda t] \qquad \lambda > 0\] 2024 | 2025 | Waiting times 2026 | \[W_t \define \text{time at which $X_t$ occurs}\] 2027 | \[W_t \dist \gam[t, \frac{1}{\lambda}]\] 2028 | 2029 | Interarrival times 2030 | \[S_t = W_{t+1} - W_t\] 2031 | \[S_t \dist \ex[\frac{1}{\lambda}]\] 2032 | 2033 | \begin{center} 2034 | \begin{tikzpicture}[decoration={brace,amplitude=5pt}] 2035 | \draw[->] (0,0) -- (8,0) node[below]{$t$}; 2036 | \foreach \i in {1,1.5,3,5,6,7} 2037 | \draw (\i,2pt) -- (\i,-2pt) node {}; 2038 | \draw (3,0) node[below] {\footnotesize $W_{t-1}$}; 2039 | \draw (5,0) node[below] {\footnotesize $W_{t}$}; 2040 | \draw[decorate,yshift=5pt] (3,0) -- (5,0) 2041 | node[midway,above=3pt] {\footnotesize $S_t$}; 2042 | \end{tikzpicture} 2043 | \end{center} 2044 | 2045 | \section{Time Series} 2046 | 2047 | Mean function 2048 | \[\mu_{x_t} = \E{x_t} = \int_{-\infty}^\infty x f_t(x) \dx\] 2049 | 2050 | Autocovariance function 2051 | \[\gamma_x(s,t) = \E{(x_s-\mu_s)(x_t-\mu_t)} = \E{x_sx_t} - \mu_s\mu_t\] 2052 | \[\gamma_x(t,t) = \E{(x_t-\mu_t)^2} = \V{x_t}\] 2053 | 2054 | Autocorrelation function (ACF) 2055 | \[\rho(s,t) = \frac{\cov{x_s,x_t}}{\sqrt{\V{x_s}\V{x_t}}} 2056 | = \frac{\gamma(s,t)}{\sqrt{\gamma(s,s)\gamma(t,t)}}\] 2057 | 2058 | Cross-covariance function (CCV) 2059 | \[\gamma_{xy}(s,t) = \E{(x_s-\mu_{x_s})(y_t-\mu_{y_t})}\] 2060 | 2061 | Cross-correlation function (CCF) 2062 | \[\rho_{xy}(s,t) = \frac{\gamma_{xy}(s,t)}{\sqrt{\gamma_x(s,s)\gamma_y(t,t)}}\] 2063 | 2064 | Backshift operator 2065 | \[B^k(x_t) = x_{t-k}\] 2066 | 2067 | Difference operator 2068 | \[\nabla^d = (1-B)^d\] 2069 | 2070 | White noise 2071 | \begin{itemize} 2072 | \item $w_t \dist wn(0, \sigma_w^2)$ 2073 | \item Gaussian: $w_t \distiid \norm[0, \sigma_w^2]$ 2074 | \item $\E{w_t} = 0 \quad t\in T$ 2075 | \item $\V{w_t} = \sigma^2 \quad t\in T$ 2076 | \item $\gamma_w(s,t) = 0 \quad s \neq t \;\wedge\; s,t\in T$ 2077 | \end{itemize} 2078 | 2079 | %Auto regression 2080 | %\[x_t = \sum_{i=1}^p \phi_i x_{t-i} + w_t\] 2081 | 2082 | Random walk 2083 | \begin{itemize} 2084 | \item Drift $\delta$ 2085 | \item $x_t = \delta t + \sum_{j=1}^t w_j$ 2086 | \item $\E{x_t} = \delta t$ 2087 | \end{itemize} 2088 | 2089 | Symmetric moving average 2090 | \[m_t = \sum_{j=-k}^k a_j x_{t-j} 2091 | \qquad \text{where } a_j=a_{-j}\ge0 \text{ and } \sum_{j=-k}^k a_j = 1\] 2092 | 2093 | \subsection{Stationary Time Series} 2094 | 2095 | Strictly stationary 2096 | \[\Pr{x_{t_1} \le c_1, \dots, x_{t_k} \le c_k} = 2097 | \Pr{x_{t_1+h} \le c_1, \dots, x_{t_k+h} \le c_k}\] 2098 | \[\forall k\in\N,t_k, c_k, h\in\Z\] 2099 | 2100 | Weakly stationary 2101 | \begin{itemize} 2102 | \item $\E{x_t^2} < \infty \qquad\forall t\in\Z$ 2103 | \item $\E{x_t^2} = m \qquad\forall t\in\Z$ 2104 | \item $\gamma_x(s,t) = \gamma_x(s+r, t+r) \qquad\forall r,s,t\in\Z$ 2105 | \end{itemize} 2106 | 2107 | Autocovariance function 2108 | \begin{itemize} 2109 | \item $\gamma(h) = \E{(x_{t+h}-\mu)(x_t-\mu)} \qquad \forall h\in\Z$ 2110 | \item $\gamma(0) = \E{(x_t-\mu)^2}$ 2111 | \item $\gamma(0) \ge 0$ 2112 | \item $\gamma(0) \ge |\gamma(h)|$ 2113 | \item $\gamma(h) = \gamma(-h)$ 2114 | \end{itemize} 2115 | 2116 | Autocorrelation function (ACF) 2117 | \[\rho_x(h) = \frac{\cov{x_{t+h},x_t}}{\sqrt{\V{x_{t+h}}\V{x_t}}} 2118 | = \frac{\gamma(t+h,t)}{\sqrt{\gamma(t+h,t+h)\gamma(t,t)}} 2119 | = \frac{\gamma(h)}{\gamma(0)}\] 2120 | 2121 | Jointly stationary time series 2122 | \[\gamma_{xy}(h) = \E{(x_{t+h}-\mu_x)(y_t-\mu_y)}\] 2123 | \[\rho_{xy}(h) = \frac{\gamma_{xy}(h)}{\sqrt{\gamma_x(0)\gamma_y(h)}}\] 2124 | 2125 | Linear process 2126 | \[x_t = \mu + \sum_{j=-\infty}^\infty \psi_j w_{t-j} \quad\text{where}\quad 2127 | \sum_{j=-\infty}^\infty |\psi_j| < \infty\] 2128 | \[\gamma(h) = \sigma_w^2 \sum_{j=-\infty}^\infty \psi_{j+h}\psi_j\] 2129 | 2130 | \subsection{Estimation of Correlation} 2131 | 2132 | Sample mean 2133 | \[\xbar = \frac{1}{n}\sum_{t=1}^n x_t\] 2134 | 2135 | Sample variance 2136 | \[\V{\xbar} = \frac{1}{n}\sum_{h=-n}^n \left(1-\frac{|h|}{n}\right)\gamma_x(h)\] 2137 | 2138 | Sample autocovariance function 2139 | \[\ghat(h) = \frac{1}{n}\sum_{t=1}^{n-h}(x_{t+h}-\xbar)(x_t-\xbar)\] 2140 | 2141 | Sample autocorrelation function 2142 | \[\rhohat(h) = \frac{\ghat(h)}{\ghat(0)}\] 2143 | 2144 | Sample cross-variance function 2145 | \[\ghat_{xy}(h) = \frac{1}{n} \sum_{t=1}^{n-h}(x_{t+h}-\xbar)(y_t - \ybar)\] 2146 | 2147 | Sample cross-correlation function 2148 | \[\rhohat_{xy}(h) = \frac{\ghat_{xy}(h)}{\sqrt{\ghat_x(0) \ghat_y(0)}}\] 2149 | 2150 | Properties 2151 | \begin{itemize} 2152 | \item $\sigma_{\rhohat_x(h)} = \displaystyle\frac{1}{\sqrt{n}}$ 2153 | if $x_t$ is white noise 2154 | \item $\sigma_{\rhohat_{xy}(h)} = \displaystyle\frac{1}{\sqrt{n}}$ 2155 | if $x_t$ or $y_t$ is white noise 2156 | \end{itemize} 2157 | 2158 | \subsection{Non-Stationary Time Series} 2159 | 2160 | Classical decomposition model 2161 | \[x_t = \mu_t + s_t + w_t\] 2162 | \begin{itemize} 2163 | \item $\mu_t =$ trend 2164 | \item $s_t =$ seasonal component 2165 | \item $w_t =$ random noise term 2166 | \end{itemize} 2167 | 2168 | \subsubsection{Detrending} 2169 | 2170 | Least squares 2171 | \begin{enumerate} 2172 | \item Choose trend model, e.g., 2173 | $\mu_t = \beta_0 + \beta_1 t + \beta_2 t^2$ 2174 | \item Minimize \rss to obtain trend estimate 2175 | $\mhat_t = \bhat_0 + \bhat_1 t + \bhat_2 t^2$ 2176 | \item Residuals $\triangleq$ noise $w_t$ 2177 | \end{enumerate} 2178 | 2179 | Moving average 2180 | \begin{itemize} 2181 | \item The \emph{low-pass} filter $v_t$ is a symmetric moving average $m_t$ 2182 | with $a_j = \frac{1}{2k+1}$: 2183 | \[v_t = \frac{1}{2k+1} \sum_{i=-k}^k x_{t-1}\] 2184 | \item If $\frac{1}{2k+1} \sum_{i=-k}^k w_{t-j} \approx 0$, 2185 | a linear trend function $\mu_t = \beta_0 + \beta_1t$ passes without 2186 | distortion 2187 | \end{itemize} 2188 | 2189 | Differencing 2190 | \begin{itemize} 2191 | \item $\mu_t = \beta_0 + \beta_1t \imp \nabla x_t = \beta_1$ 2192 | \end{itemize} 2193 | 2194 | \subsection{ARIMA models} 2195 | 2196 | Autoregressive polynomial 2197 | \[\phi(z) = 1 - \phi_1 z - \cdots - \phi_p z_p 2198 | \qquad z \in \C \wedge \phi_p \neq 0\] 2199 | 2200 | Autoregressive operator 2201 | \[\phi(B) = 1 - \phi_1B - \cdots - \phi_pB^p\] 2202 | 2203 | Autoregressive model order $p$, $\AR$ 2204 | \[x_t = \phi_1 x_{t-1} + \cdots + \phi_p x_{t-p} + w_t \eqv \phi(B)x_t = w_t\] 2205 | 2206 | $\AR[1]$ 2207 | \begin{itemize} 2208 | \item $x_t = \phi^k(x_{t-k}) + \displaystyle\sum_{j=0}^{k-1} \phi^j(w_{t-j}) 2209 | \stackrel{k\to\infty, |\phi| < 1}{=} 2210 | \underbrace{\sum_{j=0}^\infty \phi^j(w_{t-j})}_{\text{linear process}}$ 2211 | \item $\E{x_t} = \sum_{j=0}^\infty\phi^j(\E{w_{t-j}}) = 0$ 2212 | \item $\gamma(h) = \cov{x_{t+h},x_t} = \frac{\sigma_w^2\phi^h}{1-\phi^2}$ 2213 | \item $\rho(h) = \frac{\gamma(h)}{\gamma(0)} = \phi^h$ 2214 | \item $\rho(h) = \phi \rho(h-1) \quad h=1,2,\ldots$ 2215 | \end{itemize} 2216 | 2217 | Moving average polynomial 2218 | \[\theta(z) = 1 + \theta_1 z + \cdots + \theta_q z_q 2219 | \qquad z \in \C \wedge \theta_q \neq 0\] 2220 | 2221 | Moving average operator 2222 | \[\theta(B) = 1 + \theta_1B + \cdots + \theta_pB^p\] 2223 | 2224 | $\MA$ (moving average model order $q$) 2225 | \[x_t = w_t + \theta_1 w_{t-1} + \cdots + \theta_q w_{t-q} 2226 | \eqv x_t = \theta(B)w_t\] 2227 | \[\E{x_t} = \sum_{j=0}^q \theta_j\E{w_{t-j}} = 0\] 2228 | \[\gamma(h) = \cov{x_{t+h},x_t} = \begin{cases} 2229 | \sigma_w^2\sum_{j=0}^{q-h} \theta_j\theta_{j+h} & 0 \le h \le q \\ 2230 | 0 & h > q 2231 | \end{cases}\] 2232 | 2233 | $\MA[1]$ 2234 | \[x_t = w_t + \theta w_{t-1}\] 2235 | \[\gamma(h) = \begin{cases} 2236 | (1+\theta^2)\sigma_w^2 & h = 0 \\ 2237 | \theta\sigma_w^2 & h = 1 \\ 2238 | 0 & h > 1 2239 | \end{cases}\] 2240 | \[\rho(h) = \begin{cases} 2241 | \frac{\theta}{(1+\theta^2)} & h = 1 \\ 2242 | 0 & h > 1 2243 | \end{cases}\] 2244 | 2245 | $\ARMA$ 2246 | \[x_t = \phi_1 x_{t-1} + \cdots + \phi_p x_{t-p} + w_t + \theta_1 w_{t-1} + 2247 | \cdots + \theta_q w_{t-q}\] 2248 | \[\phi(B) x_t = \theta(B) w_t\] 2249 | 2250 | Partial autocorrelation function (PACF) 2251 | \begin{itemize} 2252 | \item $x_i^{h-1} \eqdef$ regression of $x_i$ on 2253 | $\{x_{h-1}, x_{h-2}, \dots, x_1\}$ 2254 | \item $\phi_{hh} = corr(x_h - x_h^{h-1}, x_0 - x_0^{h-1}) \quad h \ge 2$ 2255 | \item E.g., $\phi_{11} = corr(x_1,x_0) = \rho(1)$ 2256 | \end{itemize} 2257 | 2258 | $\ARIMA$ 2259 | \[\nabla^d x_t = (1-B)^d x_t \text{ is } \ARMA\] 2260 | \[\phi(B)(1-B)^d x_t = \theta(B) w_t\] 2261 | 2262 | Exponentially Weighted Moving Average (EWMA) 2263 | \[x_t = x_{t-1} + w_t - \lambda w_{t-1}\] 2264 | \[x_t = \sum_{j=1}^\infty(1-\lambda)\lambda^{j-1} x_{t-j} + w_t 2265 | \quad\text{when } |\lambda| < 1\] 2266 | \[\tilde{x}_{n+1} = (1-\lambda)x_n + \lambda \tilde{x}_n\] 2267 | 2268 | \begin{titemize}{Seasonal ARIMA} 2269 | \item Denoted by $\SARIMA$ 2270 | \item $\Phi_P(B^s) \phi(B) \nabla_s^D \nabla^d x_t 2271 | = \delta + \Theta_Q(B^s)\theta(B)w_t$ 2272 | \end{titemize} 2273 | 2274 | \subsubsection{Causality and Invertibility} 2275 | $\ARMA$ is causal (future-independent) 2276 | $\eqv \exists \{\psi_j\} : \sum_{j=0}^\infty \psi_j < \infty$ such that 2277 | \[x_t = \sum_{j=0}^\infty w_{t-j} = \psi(B)w_t\] 2278 | 2279 | $\ARMA$ is invertible 2280 | $\eqv \exists \{\pi_j\} : \sum_{j=0}^\infty \pi_j < \infty$ such that 2281 | \[\pi(B)x_t = \sum_{j=0}^\infty X_{t-j} = w_t\] 2282 | 2283 | Properties 2284 | \begin{itemize} 2285 | \item $\ARMA$ causal $\eqv$ 2286 | roots of $\phi(z)$ lie outside the unit circle 2287 | \[\psi(z) = \sum_{j=0}^\infty\psi_j z^j = \frac{\theta(z)}{\phi(z)} 2288 | \quad |z| \le 1\] 2289 | \item $\ARMA$ invertible $\eqv$ 2290 | roots of $\theta(z)$ lie outside the unit circle 2291 | \[\pi(z) = \sum_{j=0}^\infty\pi_j z^j = \frac{\phi(z)}{\theta(z)} 2292 | \quad |z| \le 1\] 2293 | \end{itemize} 2294 | 2295 | Behavior of the ACF and PACF for causal and invertible ARMA models 2296 | 2297 | \begin{center} 2298 | \begin{tabular}{|c|ccc|} 2299 | \hline 2300 | & $\AR$ & $\MA$ & $\ARMA$\\ 2301 | \hline 2302 | ACF & tails off & cuts off after lag $q$ & tails off \\ 2303 | PACF & cuts off after lag $p$ & tails off $q$ & tails off \\ 2304 | \hline 2305 | \end{tabular} 2306 | \end{center} 2307 | 2308 | \subsection{Spectral Analysis} 2309 | 2310 | Periodic process 2311 | \begin{align*} 2312 | x_t 2313 | &= A \cos(2\pi\omega t + \phi) \\ 2314 | &= U_1 \cos(2\pi \omega t) + U_2 \sin(2\pi \omega t) 2315 | \end{align*} 2316 | 2317 | \begin{itemize} 2318 | \item Frequency index $\omega$ (cycles per unit time), 2319 | period $1/\omega$ 2320 | \item Amplitude $A$ 2321 | \item Phase $\phi$ 2322 | \item $U_1 = A\cos\phi$ and $U_2 = A\sin\phi$ often normally distributed \rv's 2323 | \end{itemize} 2324 | 2325 | Periodic mixture 2326 | \[x_t = \sum_{k=1}^q\left( U_{k1}\cos(2\pi\omega_k t) 2327 | + U_{k2}\sin(2\pi\omega_k t) 2328 | \right)\] 2329 | \begin{itemize} 2330 | \item $U_{k1}, U_{k2}$, for $k=1,\ldots,q$, 2331 | are independent zero-mean \rv's with variances $\sigma_k^2$ 2332 | \item $\gamma(h) = \sum_{k=1}^q \sigma_k^2 \cos(2\pi\omega_k h)$ 2333 | \item $\gamma(0) = \E{x_t^2} = \sum_{k=1}^q \sigma_k^2$ 2334 | \end{itemize} 2335 | 2336 | Spectral representation of a periodic process 2337 | \begin{align*} 2338 | \gamma(h) 2339 | &= \sigma^2 \cos(2\pi\omega_0 h) \\ 2340 | &= \frac{\sigma^2}{2} e^{-2\pi i \omega_0 h} 2341 | + \frac{\sigma^2}{2} e^{2\pi i \omega_0 h}\\ 2342 | &= \int_{-1/2}^{1/2} e^{2\pi i \omega h} \d{F(\omega)} 2343 | \end{align*} 2344 | 2345 | Spectral distribution function 2346 | \[F(\omega)= \begin{cases} 2347 | 0 & \omega < -\omega_0 \\ 2348 | \sigma^2/2 & -\omega \le \omega < \omega_0 \\ 2349 | \sigma^2 & \omega \ge \omega_0 2350 | \end{cases}\] 2351 | \begin{itemize} 2352 | \item $F(-\infty) = F(-1/2) = 0$ 2353 | \item $F(\infty) = F(1/2) = \gamma(0)$ 2354 | \end{itemize} 2355 | 2356 | Spectral density 2357 | \[f(\omega) = \sum_{h=-\infty}^\infty \gamma(h) e^{-2\pi i \omega h} 2358 | \quad -\frac{1}{2} \le \omega \le \frac{1}{2}\] 2359 | \begin{itemize} 2360 | \item Needs $\sum_{h=-\infty}^\infty |\gamma(h)| < \infty 2361 | \imp \gamma(h) = \int_{-1/2}^{1/2} e^{2\pi i \omega h}f(\omega) \d\omega 2362 | \quad h=0,\pm1,\ldots$ 2363 | \item $f(\omega) \ge 0$ 2364 | \item $f(\omega) = f(-\omega)$ 2365 | \item $f(\omega) = f(1-\omega)$ 2366 | \item $\gamma(0) = \V{x_t} = \int_{-1/2}^{1/2}f(\omega)\d\omega$ 2367 | \item White noise: $f_w(\omega) = \sigma_w^2$ 2368 | \item $\ARMA, \phi(B)x_t = \theta(B)w_t$: 2369 | \[f_x(\omega) = \sigma_w^2 \frac{|\theta(e^{-2\pi i 2370 | \omega})|^2}{|\phi(e^{-2\pi i \omega})|^2}\] 2371 | where $\phi(z) = 1 - \sum_{k=1}^p \phi_k z^k$ and 2372 | $\theta(z) = 1 + \sum_{k=1}^q \theta_k z^k$ 2373 | \end{itemize} 2374 | 2375 | Discrete Fourier Transform (DFT) 2376 | \[d(\omega_j) = n^{-1/2} \sum_{i=1}^n x_t e^{-2\pi i\omega_j t}\] 2377 | 2378 | Fourier/Fundamental frequencies 2379 | \[\omega_j = j/n\] 2380 | 2381 | Inverse DFT 2382 | \[x_t = n^{-1/2} \sum_{j=0}^{n-1} d(\omega_j) e^{2\pi i\omega_j t}\] 2383 | 2384 | Periodogram 2385 | \[I(j/n) = |d(j/n)|^2\] 2386 | 2387 | Scaled Periodogram 2388 | \begin{align*} 2389 | P(j/n) 2390 | &= \frac{4}{n}I(j/n) \\ 2391 | &=\left( \frac{2}{n} \sum_{t=1}^n x_t \cos(2\pi t j/n) \right)^2 2392 | + \left( \frac{2}{n} \sum_{t=1}^n x_t \sin(2\pi t j/n) \right)^2 2393 | \end{align*} 2394 | 2395 | \section{Math} 2396 | 2397 | %\subsection{Orthogonal Functions} 2398 | % 2399 | %$L_2$ space 2400 | %\[L_2(a,b) = \left\{ f: [a,b] \to \R, \int_a^b f(x)^2\dx < \infty \right\}\] 2401 | % 2402 | %Inner Product 2403 | %\[\int f(x)g(x)\dx\] 2404 | % 2405 | %Norm 2406 | %\[\|f\| = \sqrt{\int f^2(x) \dx}\] 2407 | % 2408 | %Orthogonality (for a series of functions $\phi_i$) 2409 | %\begin{align*} 2410 | % \int \phi_j^2(x)\dx &= 1 \; \forall j \\ 2411 | % \int \phi_i(x)\phi_j(x)\dx &= 0 \; \forall i \neq j 2412 | %\end{align*} 2413 | % 2414 | %An orthogonal sequence $\phi_1, \phi_2,\dots$ is \emph{complete} if the only 2415 | %function that is is orthogonal to each $\phi_j$ is the zero function. Then, 2416 | %$\phi_1, \phi_2,\dots$ form an \emph{orthogonal basis} in $L_2$: 2417 | %\[f \in L_2 \imp f(x) = \sum_{j=1}^\infty \beta_j \phi_j(x) 2418 | %\quad \text{where } \beta_j = \int_a^b f(x)\phi_j(x) \dx\] 2419 | % 2420 | %Cosine Basis 2421 | %\begin{align*} 2422 | % \phi_0(x) &= 1 \\ 2423 | % \phi_j(x) &= \sqrt{2}\cos(j\pi x) \quad \forall j\ge1 2424 | %\end{align*} 2425 | % 2426 | %\raggedright 2427 | %\textsc{Parseval}'s Relation 2428 | %\[\|f\|^2 \equiv \int f^2(x)\dx = \sum_{j=1}^\infty \equiv \|\beta\|^2\] 2429 | % 2430 | %\textsc{Legendre} Polynomials 2431 | %\begin{align*} 2432 | % x &\in [-1,1] \\ 2433 | % P_0(x) &= 1\\ 2434 | % P_1(x) &= x \\ 2435 | % P_{j+1}(x) &= \frac{(2j+1)x(P_j(x) - jP_{j-1}(x)}{j+1} \\ 2436 | % \phi_j(x) &= \sqrt{(2j+1)/2} P_j(x) \quad \text{orthogonal basis for } 2437 | % L_2(-1,1) 2438 | %\end{align*} 2439 | 2440 | \subsection{Gamma Function} 2441 | \label{sec:math:gamma} 2442 | 2443 | \begin{itemize} 2444 | \item Ordinary: 2445 | $\displaystyle\Gamma(s) = \int_0^\infty t^{s-1} e^{-t}dt$ 2446 | \item Upper incomplete: 2447 | $\displaystyle\Gamma(s,x) = \int_x^\infty t^{s-1} e^{-t}dt$ 2448 | \item Lower incomplete: 2449 | $\displaystyle\gamma(s,x) = \int_0^x t^{s-1} e^{-t}dt$ 2450 | \item $\Gamma(\alpha + 1) = \alpha \Gamma(\alpha) \qquad \alpha>1$ 2451 | \item $\Gamma(n) = (n-1)! \qquad n \in \mathbb N$ 2452 | \item $\Gamma(0) = \Gamma(-1) = \infty$ 2453 | \item $\Gamma(1/2) = \sqrt{\pi}$ 2454 | \item $\Gamma(-1/2) = -2 \Gamma(1/2)$ 2455 | \end{itemize} 2456 | 2457 | \subsection{Beta Function} 2458 | \label{sec:math:beta} 2459 | 2460 | \begin{itemize} 2461 | \item Ordinary: $\text{B}(x,y) = \text{B}(y,x) 2462 | = \displaystyle\int_0^1 t^{x-1}(1-t)^{y-1} \,dt 2463 | = \displaystyle\frac{\Gamma(x)\Gamma(y)}{\Gamma(x+y)}$ 2464 | % \item $\alpha,\beta \in \mathbb N \imp \displaystyle 2465 | % \text{B}(\alpha,\beta) = \frac{(\alpha-1)!(\beta-1)!}{(\alpha+\beta-1)!}$ 2466 | \item Incomplete: $\text{B}(x;\,a,b) 2467 | = \displaystyle\int_0^x t^{a-1}(1-t)^{b-1} \,dt$ 2468 | \item Regularized incomplete: \\ 2469 | $I_x(a,b) = \displaystyle\frac{\text{B}(x;\,a,b)}{\text{B}(a,b)} 2470 | \stackrel{a,b\in\mathbb N}{=} 2471 | \sum_{j=a}^{a+b-1} \frac{(a+b-1)!}{j!(a+b-1-j)!}x^j(1-x)^{a+b-1-j}$ 2472 | \item $I_0(a,b) = 0 \qquad I_1(a,b) = 1$ 2473 | \item $I_x(a,b) = 1 - I_{1-x}(b,a)$ 2474 | \end{itemize} 2475 | 2476 | \subsection{Series} 2477 | 2478 | \begin{multicols}{2} 2479 | \begin{titemize}{Finite} 2480 | \item $\displaystyle\sum_{k=1}^n k = \frac{n(n+1)}{2}$ 2481 | \item $\displaystyle\sum_{k=1}^n (2k-1) = n^2$ 2482 | \item $\displaystyle\sum_{k=1}^n k^2 = \frac{n(n+1)(2n+1)}{6}$ 2483 | \item $\displaystyle\sum_{k=1}^n k^3 = \left(\frac{n(n+1)}{2}\right)^2$ 2484 | \item $\displaystyle\sum_{k=0}^n c^k = \frac{c^{n+1}-1}{c-1} \quad c\neq1$ 2485 | \end{titemize} 2486 | 2487 | \begin{titemize}{Binomial} 2488 | \item $\displaystyle\sum_{k=0}^n \binom{n}{k} = 2^n$ 2489 | \item $\displaystyle\sum_{k=0}^n \binom{r+k}{k}=\binom{r+n+1}{n}$ 2490 | \item $\displaystyle\sum_{k=0}^n \binom{k}{m}=\binom{n+1}{m+1}$ 2491 | \item \textsc{Vandermonde}'s Identity:\\ 2492 | $\displaystyle\sum_{k=0}^r \binom{m}{k}\binom{n}{r-k}=\binom{m+n}{r}$ 2493 | \item Binomial Theorem:\\ 2494 | $\displaystyle\sum_{k=0}^n \binom{n}{k}a^{n-k}b^k = (a+b)^n$ 2495 | \end{titemize} 2496 | \end{multicols} 2497 | 2498 | Infinite 2499 | \begin{itemize} 2500 | \item $\displaystyle\sum_{k=0}^\infty p^k = \frac{1}{1-p}, 2501 | \quad \sum_{k=1}^\infty p^k = \frac{p}{1-p} \quad |p|<1$ 2502 | \item $\displaystyle\sum_{k=0}^\infty kp^{k-1} 2503 | = \displaystyle\frac{d}{dp}\left(\sum_{k=0}^\infty p^k\right) 2504 | = \displaystyle\frac{d}{dp}\left(\frac{1}{1-p}\right) 2505 | = \frac{1}{(1-p)^2} \quad |p|<1$ 2506 | \item $\displaystyle\sum_{k=0}^\infty \binom{r+k-1}{k} x^k = (1-x)^{-r} 2507 | \quad r\in\mathbb N^+$ 2508 | \item $\displaystyle\sum_{k=0}^\infty \binom{\alpha}{k} p^k 2509 | = (1+p)^\alpha \quad |p|<1\,,\,\alpha \in \mathbb C$ 2510 | \end{itemize} 2511 | 2512 | %\subsection{Integrals} 2513 | % 2514 | %\begin{itemize} 2515 | % \item $\displaystyle\int_{-\infty}^\infty e^{-\frac{x^2}{2}}dx 2516 | % = \sqrt{2\pi}$ 2517 | %\end{itemize} 2518 | 2519 | \vfill~ 2520 | 2521 | \subsection{Combinatorics} 2522 | 2523 | Sampling 2524 | \begin{center} 2525 | \begin{tabular}[h]{|l*2{|>{\begin{math}\displaystyle}c<{\end{math}}}|} 2526 | \hline &&\\[-1.5ex] 2527 | $k$ out of $n$ & \text{w/o replacement} & \text{w/ replacement} 2528 | \\[1ex] 2529 | \hline 2530 | ordered & n^{\underline k} 2531 | = \displaystyle\prod_{i=0}^{k-1}(n-i) 2532 | = \frac{n!}{(n-k)!} 2533 | & n^k \\[3ex] 2534 | unordered & \binom{n}{k} = \frac{n^{\underline k}}{k!} 2535 | = \frac{n!}{k!(n-k)!} & 2536 | \binom{n-1+r}{r}=\binom{n-1+r}{n-1} \\[3ex] 2537 | \hline 2538 | \end{tabular} 2539 | \end{center} 2540 | 2541 | \newcommand{\stirling}[2]{\genfrac{\{}{\}}{0pt}{}{#1}{#2}} 2542 | 2543 | Stirling numbers, $2^{nd}$ kind 2544 | \[\stirling{n}{k} = k\stirling{n-1}{k}+\stirling{n-1}{k-1} 2545 | \qquad 1\le k \le n \qquad 2546 | \stirling{n}{0} = \begin{cases} 1 & n = 0\\ 0 & \text{else} \end{cases}\] 2547 | 2548 | Partitions 2549 | \[P_{n+k,k} = \sum_{i=1}^n P_{n,i} \qquad \qquad 2550 | k>n:\;P_{n,k} = 0 \qquad n\ge1:\;P_{n,0} = 0, \; P_{0,0} = 1\] 2551 | 2552 | % Distinguishability. 2553 | \def\distinguishable{\ensuremath{D}\xspace} 2554 | \def\indistinguishable{\ensuremath{\neg \distinguishable}\xspace} 2555 | Balls and Urns \qquad $f: B \to U$ \qquad 2556 | \distinguishable = distinguishable, 2557 | \indistinguishable = indistinguishable. 2558 | \begin{center} 2559 | \begin{tabular}[h]{|l*4{|>{\begin{math}\displaystyle}c<{\end{math}}}|} 2560 | \hline &&&&\\[-1.5ex] 2561 | $|B|=n$, $|U|=m$ & f \text{ arbitrary} & f \text{ injective} & 2562 | f \text{ surjective} & f \text{ bijective} \\[1ex] 2563 | \hline 2564 | \hline &&&&\\[-2ex] 2565 | $B:\distinguishable,\; U:\distinguishable$ & 2566 | m^n & \begin{cases} m^{\underline n} & m \ge n\\ 2567 | 0 & \text{else} \end{cases} & m!\,\stirling{n}{m} & 2568 | \begin{cases} n! & m = n\\ 0 & \text{else} \end{cases}\\[3ex] 2569 | \hline &&&&\\[-2ex] 2570 | $B:\indistinguishable,\; U:\distinguishable$ & 2571 | \binom{m+n-1}{n} & \binom{m}{n} & 2572 | \binom{n-1}{m-1} & 2573 | \begin{cases} 1 & m = n\\ 0 & \text{else} \end{cases}\\[3ex] 2574 | \hline &&&&\\[-2ex] 2575 | $B:\distinguishable,\; U:\indistinguishable$ & 2576 | \sum_{k=1}^m \stirling{n}{k} & \begin{cases} 1 & 2577 | m\ge n\\ 0 & \text{else} \end{cases} & \stirling{n}{m} & 2578 | \begin{cases} 1 & m = n\\ 0 & \text{else} \end{cases}\\[3ex] 2579 | \hline &&&&\\[-2ex] 2580 | $B:\indistinguishable,\; U:\indistinguishable$ & \sum_{k=1}^m P_{n,k} & 2581 | \begin{cases} 1 & m \ge n\\ 0 & \text{else} \end{cases} & P_{n,m} & 2582 | \begin{cases} 1 & m = n\\ 0 & \text{else} \end{cases}\\[3ex] 2583 | \hline 2584 | \end{tabular} 2585 | \end{center} 2586 | 2587 | % Convergence 2588 | % \begin{itemize} 2589 | % \item $\displaystyle\sum_{k=1}^\infty a_n$ converges if 2590 | % $\displaystyle\lim_{n \to \infty} \left|\frac{a_{n+1}}{a_n}\right| < 1$ 2591 | % \item $\displaystyle\sum_{k=1}^\infty a_n$ diverges if 2592 | % $\displaystyle\lim_{n \to \infty} a_n \neq 0$ 2593 | % \item $\displaystyle\sum_{k=1}^\infty n^{-p}$ converges if $p > 1$ 2594 | % \end{itemize} 2595 | 2596 | % \subsection{Calculus} 2597 | % 2598 | % Polar Coordinates 2599 | % \begin{itemize} 2600 | % \item $x = r\cos\theta \qquad y = r\sin\theta$ 2601 | % \item $r = \sqrt{y^2+x^2}$ 2602 | % \item $\theta = 2603 | % \begin{cases} 2604 | % 0 & \mbox{if } x = 0 \mbox{ and } y = 0\\ 2605 | % \arcsin(\frac{y}{r}) & \mbox{if } x \geq 0 \\ 2606 | % -\arcsin(\frac{y}{r}) + \pi & \mbox{if } x < 0\\ 2607 | % \end{cases}$ 2608 | % \end{itemize} 2609 | 2610 | { 2611 | \footnotesize 2612 | \bibliographystyle{abbrv} 2613 | \bibliography{literature} 2614 | \vfill~ 2615 | } 2616 | 2617 | \end{multicols*} 2618 | 2619 | \newpage 2620 | 2621 | \begin{sidewaysfigure} 2622 | \captionsetup{labelformat=empty,labelsep=none} 2623 | \includegraphics[width=\textwidth]{figs/relationships} 2624 | \caption{Univariate distribution relationships, courtesy Leemis and 2625 | McQueston~\cite{Leemis08}.} 2626 | \end{sidewaysfigure} 2627 | 2628 | \end{document} 2629 | --------------------------------------------------------------------------------