├── LICENSE
├── README.md
├── compstat_book_ex.R
├── poisson_process.R
├── rizzo_11.01.R
├── rizzo_11.02.R
├── rizzo_11.03.R
├── rizzo_11.04.R
├── rizzo_11.05.R
├── rizzo_11.06.R
├── rizzo_3.19.R
├── rizzo_3.20.R
├── rizzo_5.01.R
├── rizzo_5.02.R
├── rizzo_5.03.R
├── rizzo_5.04.R
├── rizzo_5.05.R
├── rizzo_5.13.R
├── rizzo_5.14.R
├── rizzo_7.03.R
├── rizzo_7.04.R
├── rizzo_7.05.R
├── rizzo_7.06.R
├── rizzo_7.07.R
├── rizzo_7.A.R
├── rizzo_7.B.R
├── rizzo_8.1.R
├── rizzo_8.2.R
├── rizzo_8.3.R
├── rizzo_8.4.R
├── rizzo_8.A.R
├── rizzo_9.1.R
├── rizzo_9.10.R
├── rizzo_9.2.R
├── rizzo_9.3.R
├── rizzo_9.4.R
├── rizzo_9.5.R
├── rizzo_9.6.R
├── rizzo_9.7.R
├── rizzo_9.8.R
└── rizzo_9.9.R


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Gerhard Konnerth
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # statistical-computing-with-r
2 | Solution attempts for selected exercises from Rizzo, Maria L. Statistical computing with R. CRC Press, 2007.
3 | 
4 | This is work in progress. Proper documentation as well as splitting the code into multiple scripts is planned.
5 | 
6 | Please note that parts of the code are inspired from the book.
7 | 


--------------------------------------------------------------------------------
/compstat_book_ex.R:
--------------------------------------------------------------------------------
  1 | #ex 3.1
  2 | 
  3 | two.exp = function (size, lambda, eta) {
  4 |   r = runif(size)
  5 |   -1/lambda * log(u / (-exp(lambda * eta)) + 1)
  6 | }
  7 | 
  8 | n = 10000
  9 | lambda = 3
 10 | eta = 0
 11 | a = rexp(1000, rate = lambda)
 12 | b = two.exp (n, lambda, eta)
 13 | 
 14 | p1 = qplot(a, xlim = c(0,4), binwidth = 0.05)
 15 | p2 = qplot(b, xlim = c(0,4), binwidth = 0.05)
 16 | 
 17 | grid.arrange(p1, p2)
 18 | 
 19 | round(x = rbind(quantile(a, probs = seq(0, 1, 0.05)), quantile(b, probs = seq(0, 1, 0.05))), digits = 3)
 20 | 
 21 | # ex 3.2
 22 | 
 23 | library(ggplot2)
 24 | library(gridExtra)
 25 | library(reshape)
 26 | 
 27 | laplace = function (size) {
 28 |   u = runif(size)
 29 |   sapply(u, function (v) if (v > 0.5) { -log(-2*v + 2) } else { log(2*v) })
 30 | }
 31 | n = 1000
 32 | range = c(-10,10)
 33 | step = 0.05
 34 | 
 35 | truth = function (v) {
 36 |   1/2*exp(-abs(v))
 37 | }
 38 | 
 39 | x = seq(range[1], range[2], length.out = n)
 40 | xy = truth(x)
 41 | y = laplace(1000)
 42 | 
 43 | #lines(density(xy))
 44 | 
 45 | m = cbind(x, xy)
 46 | colnames(m) = c("x", "f(x)")
 47 | 
 48 | # qplot(y/n, xlim = range, geom="histogram", binwidth = step) + stat_function(fun = truth)
 49 | hist(y, probability = TRUE, breaks = 100, xlim = range)
 50 | lines(x, y = xy)
 51 | 
 52 | # ex 3.3
 53 | 
 54 | rpareto = function (size, a, b) {
 55 |   stopifnot(a > 0 && b > 0)
 56 |   u = runif(size)
 57 |   b*(1-u)^(-1/a)
 58 | }
 59 | 
 60 | dpareto = function (x, a, b) {
 61 |   stopifnot(a > 0 && b > 0)
 62 |   sapply(x, function (v) if (v < b) { 0 } else { a*b^a*v^(-a-1) })
 63 | }
 64 | 
 65 | size = 1000
 66 | a = 2
 67 | b = 2
 68 | sam = rpareto(1000, a, b)
 69 | 
 70 | hist(sam, probability = TRUE, breaks = 100)
 71 | ran = range(sam)
 72 | x = seq(ran[1], ran[2], 0.01)
 73 | y = dpareto(x, a, b)
 74 | lines(x = x, y = y)
 75 | 
 76 | # ex 3.4
 77 | 
 78 | rrayleigh = function (size, sigma) {
 79 |   stopifnot(sigma > 0)
 80 |   u = runif(size)
 81 |   sqrt(-2*sigma^2*log(1 - u))
 82 | }
 83 | 
 84 | drayleigh = function (x, sigma) {
 85 |   stopifnot(sigma > 0)
 86 |   y = x / (sigma^2) * exp(- x^2 / (2 * sigma^2))
 87 |   y[x < 0] = 0
 88 |   y
 89 | }
 90 | 
 91 | size = 1000
 92 | library(ggplot2)
 93 | library(gridExtra)
 94 | 
 95 | sigmas = 1:5
 96 | xs = sapply(sigmas, function (v) rrayleigh(size, v))
 97 | mat = matrix(xs, ncol = length(sigmas), dimnames = list(NULL, sigmas))
 98 | df = data.frame(mat)
 99 | ps = lapply(1:ncol(df), function (v) qplot(x = df[,v])) 
100 | p1 = qplot(df$X1, main = paste(c("Sigma = ", sigmas[1]), collapse = " "), binwidth = 0.1) + scale_x_continuous(breaks = c(1:range(df$X1)[2]))
101 | p2 = qplot(df$X2, main = paste(c("Sigma = ", sigmas[2]), collapse = " "), binwidth = 0.1) + scale_x_continuous(breaks = c(1:range(df$X2)[2]))
102 | p3 = qplot(df$X3, main = paste(c("Sigma = ", sigmas[3]), collapse = " "), binwidth = 0.1) + scale_x_continuous(breaks = c(1:range(df$X3)[2]))
103 | p4 = qplot(df$X4, main = paste(c("Sigma = ", sigmas[4]), collapse = " "), binwidth = 0.1) + scale_x_continuous(breaks = c(1:range(df$X4)[2]))
104 | p5 = qplot(df$X5, main = paste(c("Sigma = ", sigmas[5]), collapse = " "), binwidth = 0.1) + scale_x_continuous(breaks = c(1:range(df$X5)[2]))
105 | 
106 | grid.arrange(p1, p2, p3, p4, p5)
107 | 
108 | mode <- function(x) {
109 |   ux <- unique(x)
110 |   ux[which.max(tabulate(match(x, ux)))]
111 | }
112 | 
113 | sapply(df, mode)
114 | 
115 | # ex 3.5
116 | 
117 | xs = 0:4
118 | ps = c(0.1, 0.2, 0.2, 0.2, 0.3)
119 | size = 100000
120 | 
121 | rdisc = function (size, xs, ps) {
122 |   u = runif(size)
123 |   c = cumsum(ps)
124 |   sapply(u, function(v) min(xs[c >= v]))
125 | }
126 | 
127 | sam = rdisc(size, xs, ps)
128 | r1 = table(sam)
129 | truth = sample(size = size, x = xs, prob = ps, replace = TRUE)
130 | r2 = table(truth)
131 | # hist(sam, probability = TRUE)
132 | 
133 | rel.freq = rbind(r1, r2)/size
134 | 
135 | # ex 3.7
136 | 
137 | general.acc.rej = function (size, xs, df, dgen, rgen, c) {
138 |   acc.rej = function (size, df, rgen, dgen, c) {
139 |     ct = 1
140 |     n = 1
141 |     res = numeric(size)
142 |     while(n <= size) {
143 |       y = rgen(1)
144 |       u = runif(1)
145 |       if (u < df(y)/dgen(y)/c) {
146 |         res[n] = y
147 |         n = n + 1
148 |       }
149 |       ct = ct + 1
150 |     }
151 |     #print((n-1)/(ct-1))
152 |     res
153 |   }
154 |   
155 |   
156 |   sam = acc.rej(size = size, df = df, rgen = rgen, dgen = dgen, c)
157 |   
158 |   #ys = df(xs)
159 |   
160 |   #nrBins = 50
161 |   # Compute the maximum value in the histogram for a better visualization.
162 |   #bins = seq(min(xs), max(xs), by = dist(range(xs))/nrBins)
163 |   #hist.vals = table(cut(x = sam, bins))
164 |   # adapt to a total area of 1 (probability histogram).
165 |   #hist.vals = hist.vals/sum(hist.vals) * nrBins
166 |   # y limits as maximum of distribution and sample.
167 |   #ylim = c(min(c(ys, hist.vals)), max(c(ys, hist.vals)))
168 |   
169 |   #hist(sam, probability = TRUE, ylim = ylim, breaks = nrBins)
170 |   #lines(x = xs, y = ys)
171 |   
172 |   sam
173 | }
174 | 
175 | ex3.7 = function(size, a, b) {
176 |   rgen = function (size) {
177 |     runif(size, min = u.min, max = u.max)
178 |   }
179 |   
180 |   dgen = function (x) {
181 |     dunif(x, min = u.min, max = u.max)
182 |   }
183 |   
184 |   df = function (x) {
185 |     dbeta(x = x, shape1 = a, shape2 = b)
186 |   }
187 |   
188 |   xs = seq(0, 1, 0.01)
189 |   u.max = max(xs)
190 |   u.min = min(xs)
191 |   y.max = max(df(xs))
192 |   c = y.max / (u.max - u.min)
193 |   
194 |   general.acc.rej (size = size, xs = xs, df = df, dgen = dgen, rgen = rgen, c = c)
195 |   
196 | }
197 | 
198 | a = 3
199 | b = 2
200 | size = 1000
201 | 
202 | ex3.7 (size = size, a = a, b = b)
203 | 
204 | # ex 3.8
205 | 
206 | library(ggplot2)
207 | library(gridExtra)
208 | 
209 | lognormal = function (size, mu, sigma) {
210 |   exp(rnorm(n = size, mean = mu, sd = sigma))
211 | }
212 | 
213 | mus = c(0, 0, 0, 1, 2, 3)
214 | sigmas = c(0.25, 0.5, 1, 0.25, 0.5, 1)
215 | size = 1000
216 | 
217 | params = sapply(1:length(mus), function (v) c(mus[v], sigmas[v]))
218 | params = split(params, rep(1:ncol(params), each = nrow(params)))
219 | 
220 | sams = data.frame(lapply(params, function (v) lognormal(size = size, mu = v[1], sigma = v[2])))
221 | truths = data.frame(lapply(params, function (v) rlnorm(n = size, meanlog = v[1], sdlog = v[2])))
222 | colnames(sams) = params
223 | colnames(truths) = params
224 | 
225 | lims = lapply(1:length(params), function (v) c(min(c(sams[,v], truths[,v])), max(c(sams[,v], truths[,v]))))
226 | 
227 | ps.truth = lapply(1:ncol(truths), function (v) ggplot(data = truths, aes(x = truths[,v])) + geom_density() + ggtitle(paste("truth, mean =", params[[v]][1], ", sd =", params[[v]][2], sep = " ")) + scale_x_continuous(limits = lims[[v]]))
228 | ps.sams = lapply(1:ncol(sams), function (v) ggplot(data = sams, aes(x = sams[,v])) + geom_density() + ggtitle(paste("sample, mean =", params[[v]][1], ", sd =", params[[v]][2], sep = " ")) + scale_x_continuous(limits = lims[[v]]))
229 | 
230 | do.call("grid.arrange", c(grobs = c(ps.truth, ps.sams), ncol = 2, as.table = FALSE))
231 | 
232 | # ex 3.9
233 | 
234 | depanechnikov = function (x) {
235 |   3/4 * (1 - x^2)
236 | }
237 | 
238 | repanechnikov = function (size) {
239 |   u1 = runif(size, min = -1, max = 1)
240 |   u2 = runif(size, min = -1, max = 1)
241 |   u3 = runif(size, min = -1, max = 1)
242 |   sapply(1:size, function(v)
243 |     if (abs(u3[v]) >= abs(u2[v]) && abs(u3[v]) >= abs(u1[v])) u2[v] else u3[v]
244 |   )
245 | }
246 | 
247 | size = 1000
248 | sam = repanechnikov(size = size)
249 | xlim = range(sam)
250 | xs = seq(from = xlim[1], to = xlim[2], by = 0.01)
251 | 
252 | hist(sam, probability = TRUE)
253 | lines(x = xs, y = depanechnikov(xs))
254 | 
255 | # ex 3.11
256 | 
257 | means = c(0, 3)
258 | sds = c(1, 1)
259 | ps = seq(0.1, 0.9, 0.07)
260 | size = 1000
261 | 
262 | rmix = function (size, p, means, sds) {
263 |   rnorm(size, mean = sample(c(means[1], means[2], replace = TRUE, probs = c(p, 1-p))), sd = sample(c(sds[1], sds[2], replace = TRUE, probs = c(p, 1-p))))
264 | }
265 | 
266 | sams = data.frame(lapply(ps, function (p) rmix(size = size, p = p, means = means, sds = sds)))
267 | colnames(sams) = ps
268 | 
269 | plots = lapply(1:ncol(sams), function (v) ggplot(data = sams, aes(x = sams[,v])) + geom_density() + geom_histogram(aes(y = ..density..), bins = 100) + ggtitle(paste("p =", ps[v], sep = " ")))
270 | 
271 | 
272 | do.call("grid.arrange", c(grobs = c(plots), ncol = 2, as.table = FALSE))
273 | 
274 | # hist(sam, probability = TRUE, ylim = c(0, 0.5))
275 | # lines(density(sam))
276 | 
277 | # ex 3.12
278 | 
279 | size = 1000
280 | r = 4
281 | beta = 2
282 | 
283 | rmix = function (size, r, beta) {
284 |   rexp(n = size, rgamma(n = size, r, beta))
285 | }
286 | 
287 | sam = rmix(size = size, r = r, beta = beta)
288 | 
289 | hist (sam, probability = TRUE, breaks = 100, ylim = c(0, 2))
290 | # ex 3.13
291 | xs = seq(min(sam), max(sam), 0.01)
292 | lines(x = xs, y = dpareto(xs, a = r, b = beta))
293 | 
294 | # ex 3.14
295 | 
296 | mus = c(0, 1, 2)
297 | size = 2000
298 | d = length(mus)
299 | Sigma = matrix(c(1, -0.5, 0.5, -0.5, 2, -0.5, 0.5, -0.5, 3), ncol = 3)
300 | 
301 | Zs = matrix(rnorm(n = d * size), ncol = size, nrow = d)
302 | C = t(chol(Sigma))
303 | 
304 | Ys = t(C %*% Zs + mus)
305 | 
306 | # pairs(Ys)
307 | 
308 | # ex 3.15
309 | 
310 | normalize = function (data) {
311 |   Sigma = cov(data)
312 |   mus = sapply(1:ncol(data), function (v) mean(data[,v]))
313 |   A = chol(solve(Sigma))
314 |   Xs = t(A %*% (t(data)-mus))
315 |   Xs
316 | }
317 | 
318 | 
319 | # ex 3.16
320 | 
321 | library(bootstrap)
322 | norm.scor = scor
323 | m1 = normalize(data.matrix(norm.scor[,1:2]))
324 | m2 = normalize(data.matrix(norm.scor[,3:5]))
325 | norm.scor[,1:2] = m1
326 | norm.scor[,3:5] = m2
327 | cov = round(cov(norm.scor), digits=4)
328 | 
329 | # ex 3.17
330 | 
331 | iter = 30
332 | size = 5000
333 | 
334 | a = 1
335 | b = 5
336 | 
337 | tt1 = 0
338 | tt2 = 0
339 | 
340 | for(i in 1:iter) {
341 |   tt1 = tt1 + system.time(ex3.7(size = size, a = a, b = b))["elapsed"]
342 |   tt2 = tt2 + system.time(rbeta(n = 1000, shape1 = a, shape2 = b))["elapsed"]
343 | }
344 | 
345 | tt1
346 | tt2
347 | 
348 | # ex 3.18
349 | 
350 | my.rwishart = function (Sigma, n) {
351 |   stopifnot(nrow(Sigma) == ncol(Sigma))
352 |   d = ncol(Sigma)
353 |   L = chol(Sigma)
354 |   tmp = matrix(rnorm(d^2), ncol = d, nrow = d)
355 |   tmp[upper.tri(tmp, diag = TRUE)] = 0
356 |   A = diag(sapply(1:d, function (v) rchisq(1, n - v + 1))) + tmp
357 |   t(L) %*% A %*% t(A) %*% L
358 | }
359 | 
360 | Sigma = matrix(c(1, -0.5, 0.5, -0.5, 2, -0.5, 0.5, -0.5, 3), ncol = 3)
361 | n = 4
362 | size = 1000
363 | 
364 | sam = lapply(1:size, function (v) my.rwishart(Sigma, n))
365 | 
366 | (mean = apply(simplify2array(sam), 1:2, mean))
367 | sd = apply(simplify2array(sam), 1:2, sd)
368 | 
369 | (truth.mean = n * Sigma)
370 | 
371 | 


--------------------------------------------------------------------------------
/poisson_process.R:
--------------------------------------------------------------------------------
 1 | lambda = 2
 2 | t0 = 3
 3 | size = 10000
 4 | 
 5 | pp.exp = function (t0) {
 6 |   Tn = rexp(100, lambda)
 7 |   Sn = cumsum(Tn)
 8 |   return(min(which(Sn > t0)) - 1)
 9 | }
10 | 
11 | n1s = sapply(1:size, function () pp.exp(t0))
12 | n2s = rpois(size, t0 * lambda)
13 | 
14 | rbind(summary(n1s), summary(n2s))


--------------------------------------------------------------------------------
/rizzo_11.01.R:
--------------------------------------------------------------------------------
 1 | # useful example value of x.
 2 | x = 77.34599 #runif(1, 1, 100)
 3 | a = log(exp(x))
 4 | b = exp(log(x))
 5 | 
 6 | x == a
 7 | x == b
 8 | a == b
 9 | 
10 | identical(x, a)
11 | identical(x, b)
12 | identical(b, a)
13 | 
14 | all.equal(x, a)
15 | all.equal(x, b)
16 | all.equal(b, a)
17 | 


--------------------------------------------------------------------------------
/rizzo_11.02.R:
--------------------------------------------------------------------------------
 1 | a = 10
 2 | b = 20
 3 | r = 5
 4 | s = 5
 5 | 
 6 | getP = function (a, b, r, s) sum(sapply(max(r-b, 0):(r-1), function(k) choose(r+s-1,k)*choose(a+b-1, a+r-1-k)/choose(a+b+r+s-2,a+r-1)))
 7 | 
 8 | P.numerical = getP(a, b, r, s)
 9 | 
10 | n = 10000
11 | xs = rbeta(n, shape1 = a, shape2 = b)
12 | ys = rbeta(n, shape1 = r, shape2 = s)
13 | P.mc = mean(xs < ys)
14 | 
15 | P.numerical
16 | P.mc


--------------------------------------------------------------------------------
/rizzo_11.03.R:
--------------------------------------------------------------------------------
 1 | a = c(1,2)
 2 | d = length(a)
 3 | 
 4 | getTerm = function (a, k) {
 5 |   d = length(a)
 6 |   return((-1)^k * exp((2*k+2)*log(norm(a, type = "2")) - lgamma(k+1) - k*log(2) - log(2*k + 1) - log(2*k + 2) + lgamma((d+1)/2) + lgamma(k + 3/2) - lgamma(k + d/2 + 1)))
 7 | }
 8 | 
 9 | n = 400
10 | 
11 | getSum = function (a) {
12 |   sum(sapply(0:n, function (k) getTerm(a, k)))
13 | }


--------------------------------------------------------------------------------
/rizzo_11.04.R:
--------------------------------------------------------------------------------
 1 | findIntersection = function (k) {
 2 |   s.k.minus.one = function (a) {
 3 |     1-pt(sqrt(a^2 * (k - 1) / (k - a^2)), df = k-1)
 4 |   }
 5 |   
 6 |   s.k = function (a) {
 7 |     1-pt(sqrt(a^2 * k / (k + 1 - a^2)), df = k)
 8 |   }
 9 |   
10 |   f = function (a) {
11 |     s.k(a) - s.k.minus.one(a)
12 |   }
13 |   
14 |   eps = 1e-2
15 |   return(uniroot(f, interval = c(eps, sqrt(k)-eps))$root)
16 | }
17 | 
18 | rs = sapply(c(4:25, 100, 500, 1000), function (k) {
19 |   findIntersection(k)
20 |   })
21 | 


--------------------------------------------------------------------------------
/rizzo_11.05.R:
--------------------------------------------------------------------------------
 1 | # the two sides of the equation are the same expression for a different k. i try to simplify the task by writing them as functions.
 2 | solve.equation = function (k) {
 3 | 
 4 |   # general integral.  
 5 |   expr.integral = function(u, n) {
 6 |     (1 + u^2/(n-1))^(-n/2)
 7 |   }
 8 |   
 9 |   # general c_k.
10 |   get.c = function (n, a) {
11 |     sqrt(a^2 * n / (n + 1 - a^2))
12 |   }
13 |   
14 |   # left or right side of the equation, depending wheather n = k or n = k + 1.
15 |   expr = function (n, a) {
16 |     
17 |     this.integral = function (u) {
18 |       expr.integral(u, n)
19 |     }
20 |     
21 |     c = get.c(n - 1, a)
22 |     
23 |     2/sqrt(pi*(n-1)) * exp(lgamma(n/2)-lgamma((n-1)/2)) * integrate(this.integral, lower = 0, upper = c)$value
24 |   }
25 |   
26 |   f = function (a) {
27 |     left = expr(k, a)
28 |     right = expr(k + 1, a)
29 |     return (left - right)
30 |   }
31 |   
32 |   eps = 1e-2
33 |   if (f(eps) < 0 && f(sqrt(k) - eps) > 0 || f(eps) > 0 && f(sqrt(k) - eps) < 0) {
34 |     r = uniroot(f, interval = c(eps, sqrt(k)-eps))$root
35 |   } else {
36 |     r = NA
37 |   }
38 |   return(r)
39 | }
40 | 
41 | rs2 = sapply(c(4:25, 100, 500, 1000), function (k) {
42 |   solve.equation(k)
43 | })
44 | 
45 | 
46 | # plot f for a fixed k for debugging.
47 | # xs = seq(eps, sqrt(k)-eps, 0.01)
48 | # plot(xs, sapply(xs, function(x) f(x)), type="l")


--------------------------------------------------------------------------------
/rizzo_11.06.R:
--------------------------------------------------------------------------------
 1 | my.dcauchy = function (x, eta, theta) {
 2 |   stopifnot(theta > 0)
 3 |   return(1/(theta*pi*(1 + ((x - eta)/theta)^2)))
 4 | }
 5 | 
 6 | my.pcauchy = function (x, eta, theta) {
 7 |   stopifnot(theta > 0)
 8 |   
 9 |   integral = function (x) {
10 |     my.dcauchy(x, eta, theta)
11 |   }
12 |   
13 |   return(integrate(integral, lower = -Inf, upper = x)$value)
14 | }
15 | 
16 | eta = 0
17 | theta = 2
18 | xs = seq(-10, 10)
19 | estimate = sapply(xs, function(x) my.pcauchy(x, eta, theta))
20 | truth = sapply(xs, function(x) pcauchy(x, eta, theta))
21 | round(rbind(estimate, truth), 4)


--------------------------------------------------------------------------------
/rizzo_3.19.R:
--------------------------------------------------------------------------------
 1 | # ex 3.19
 2 | p = 0.5
 3 | s.min = 0
 4 | s.max = 20
 5 | s.start = 10
 6 | 
 7 | random.walk = function (s) {
 8 |   u = runif(1)
 9 |   n = s[length(s)]
10 |   if (u > p) {
11 |     n = n -1
12 |   } else {
13 |     n = n + 1
14 |   }
15 |   if (n <= s.min || n >= s.max) {
16 |     return(c(s, n))
17 |   } else {
18 |     return(random.walk(c(s,n))) 
19 |   }
20 | }
21 | 
22 | s = random.walk(s.start)
23 | plot(y = s, x = 1:length(s), type="l")


--------------------------------------------------------------------------------
/rizzo_3.20.R:
--------------------------------------------------------------------------------
 1 | # ex 3.20
 2 | 
 3 | lambda = 3
 4 | shape = 6
 5 | scale = 2
 6 | size = 10000
 7 | eps = 1e-8
 8 | 
 9 | t = 10
10 | 
11 | # with probability 1-eps, n or less gamma distributed random variables are needed.
12 | n = qpois(1-eps, lambda = lambda * t)
13 | # sample from the gamma distribution. Not sure if it's ok to use the same sample every time.
14 | # with this, the mean is of by about 10%.
15 | # ys = c(rgamma(n = n, shape = shape, scale = scale))
16 | 
17 | # the interarrival times are exponentially distributed with rate lambda.
18 | pp.exp = function (t0) {
19 |   # not sure how many Tn are needed :/
20 |   Tn = rexp(1000, lambda)
21 |   Sn = cumsum(Tn)
22 |   return(min(which(Sn > t0)) - 1)
23 | }
24 | 
25 | # generate N(t) which follow the poisson process.
26 | ns = sapply(1:size, function (i) pp.exp(t))
27 | # generate X(t) as in the problem description.
28 | xs = sapply(ns, function (n) {
29 |   ys = c(rgamma(n = n, shape = shape, scale = scale))
30 |   sum(ys[1:n])
31 | })
32 | 
33 | # compare mean and variance of 'size' samples of X(t) for verification.
34 | # sample:
35 | (mean.s = mean(xs))
36 | (var.s = var(xs))
37 | 
38 | # theoretical:
39 | (mean.t = lambda * t * shape * scale)
40 | (var.t = (shape + 1) * shape * scale^2)


--------------------------------------------------------------------------------
/rizzo_5.01.R:
--------------------------------------------------------------------------------
 1 | # change of variables yields integral from 0 to 1 of sin(x pi/3)  pi/3 dx.
 2 | 
 3 | n = 10000
 4 | 
 5 | g = function (x) {
 6 |   sin(pi/3 * x) * pi / 3
 7 | }
 8 | 
 9 | (theta.hat = mean(g(runif(n))))
10 | (theta = 1/2)


--------------------------------------------------------------------------------
/rizzo_5.02.R:
--------------------------------------------------------------------------------
 1 | x = seq(-10, 10, 2)
 2 | n = 10000
 3 | 
 4 | mc.norm = function (x) {
 5 |   g = function (t) {
 6 |     x * exp(-(x*t)^2/2)
 7 |   }
 8 | 
 9 |   gs = g(runif(n))
10 | 
11 |   var = var(gs) / n
12 |   
13 |   se = sqrt(var)
14 |       
15 |   zero.to.x = 1/sqrt(2*pi) * mean(gs)
16 |   
17 |   # use the property that the normal density funciton integrates to 1.
18 |   theta.hat  = 2 * zero.to.x + (1 - 2*zero.to.x) / 2
19 |   
20 |   # E(theta.hat) = theta.
21 |   # 95% CI for N(0,1) is (-1.96, 1.96).
22 |   # by the CLT (theta.hat - theta) / se ~ N(0,1).
23 |   CI = theta.hat + 1.96 * se
24 |   
25 |   return(data.frame(theta.hat, var, CI))
26 | }
27 | 


--------------------------------------------------------------------------------
/rizzo_5.03.R:
--------------------------------------------------------------------------------
 1 | m = 10000
 2 | 
 3 | estimate.unif = function () {
 4 |   g = function (y) {
 5 |     exp(-y/2) * 1/2
 6 |   }
 7 |   xs = g(runif(m))
 8 |   theta.hat = mean(xs)
 9 |   var = var(xs)/m
10 |   return(data.frame(theta.hat, var))
11 | }
12 | 
13 | estimate.unif.min.max = function () {
14 |   g = function (y) {
15 |     exp(-y)
16 |   }
17 |   # divide by 2, since dunif(x, 0, 0.5) = 2 for x in (0, 0.5).
18 |   xs = g(runif(m, min = 0, max = 0.5))
19 |   var = var(xs)/m
20 |   
21 |   theta.hat = mean(xs) * 1/2
22 |   return(data.frame(theta.hat, var))
23 | }
24 | 
25 | estimate.exp = function () {
26 |   # theta.hat = pexp(0.5, rate = 1) - pexp(0, rate = 1)
27 |   g = function (y) {
28 |     1/y
29 |   }
30 |   
31 |   y = rexp(m, rate = 1) <= 0.5
32 |   var = var(y)/m
33 |   
34 |   theta.hat = mean(y)
35 |   return(data.frame(theta.hat, var))
36 | }
37 | 
38 | estimate.unif()
39 | estimate.unif.min.max()
40 | estimate.exp()
41 | 
42 | # exp estimator samples of 0 and 1. therefore, higher variance.


--------------------------------------------------------------------------------
/rizzo_5.04.R:
--------------------------------------------------------------------------------
 1 | n = 10000
 2 | a = 3
 3 | b = 3
 4 | 
 5 | # plot the density to get an idea of the problem.
 6 | xs = rbeta(n, shape1 = a, shape2 = b)
 7 | plot(density(xs))
 8 | 
 9 | mc.cdf.beta = function (p, shape1, shape2) {
10 |   if (p <= 0 || p >= 1) return(0)
11 |   us = runif(n)
12 |   return(mean(dbeta(p*us, a, b)) * p)
13 | }
14 | 
15 | xs = seq(0, 1, 0.1)
16 | 
17 | round(rbind(sapply(xs, function (x) mc.cdf.beta(x, a, b)), sapply(xs, function(x) pbeta(x, a, b))), 3)


--------------------------------------------------------------------------------
/rizzo_5.05.R:
--------------------------------------------------------------------------------
 1 | n = 1000
 2 | 
 3 | mc.pnorm = function (x) {
 4 |   g = function (t) {
 5 |     x * exp(-(x*t)^2/2)
 6 |   }
 7 |   
 8 |   gs = g(runif(n))
 9 |   var = var(gs)/n
10 |   zero.to.x = 1/sqrt(2*pi) * mean(gs)
11 |   # use the property that the normal density funciton integrates to 1.
12 |   theta.hat  = 2 * zero.to.x + (1 - 2*zero.to.x) / 2
13 |   return(data.frame(theta.hat, var))
14 | }
15 | 
16 | hit.miss.pnorm = function (p) {
17 |   xs = rnorm(n) <= p
18 |   var = var(xs)/n
19 |   theta.hat = mean(xs)
20 |   return(data.frame(theta.hat, var))
21 | }
22 | 
23 | ps = seq(0, 1, 0.1)
24 | mc = data.frame(t(sapply(ps, function (p) mc.pnorm(p))))
25 | hit.miss = data.frame(t(sapply(ps, function (p) hit.miss.pnorm(p))))
26 | truth = sapply(ps, function (p) pnorm(p))
27 | 
28 | mc.mean = unlist(mc$theta.hat)
29 | hit.miss.mean = unlist(hit.miss$theta.hat)
30 | round(rbind(ps, mc.mean, hit.miss.mean, truth), 3)
31 | # mc is close to the truth than hit or miss.
32 | 
33 | (var.ratio = round(unlist(mc$var) / unlist(hit.miss$var), 3))
34 | # mc is more efficient than hit miss.


--------------------------------------------------------------------------------
/rizzo_5.13.R:
--------------------------------------------------------------------------------
 1 | g = function (x) {
 2 |   x ^ 2 / sqrt(2*pi) * exp(-x^2/2)
 3 | }
 4 | 
 5 | xs = seq(0,10,0.1)
 6 | 
 7 | ys.g = g(xs)
 8 | ys.rayleigh = drayleigh(xs, sigma = 1.5)
 9 | ys.norm = dnorm(xs, mean = 1.5)
10 | lim = max(c(ys.g, ys.rayleigh, ys.norm))
11 | 
12 | plot(xs, ys.g, type = "l", ylim = c(0, lim))
13 | lines(xs, ys.rayleigh, col="red", ylim = c(0, lim))
14 | lines(xs, ys.norm, col="blue", ylim = c(0, lim))
15 | 
16 | # f1(x) = drayleigh(x, sigma = 1.5)
17 | # f2(x) = dnorm(x, mean = 1.5)
18 | 
19 | # f2 is a little closer to g. should be better.


--------------------------------------------------------------------------------
/rizzo_5.14.R:
--------------------------------------------------------------------------------
 1 | g = function (x) {
 2 |   x ^ 2 / sqrt(2*pi) * exp(-x^2/2) * (x > 1)
 3 | }
 4 | 
 5 | sigma.rayleigh = 1.5
 6 | mean = 1.5
 7 | n = 10000
 8 | 
 9 | f1 = function (x) {
10 |   drayleigh(x, sigma = sigma.rayleigh) * (x > 1)
11 | }
12 | 
13 | f2 = function (x) {
14 |   dnorm(x, mean = mean) * (x > 1)
15 | }
16 | 
17 | rf1 = function () {
18 |   rrayleigh(n, sigma = sigma.rayleigh)
19 | }
20 | 
21 | rf2 = function () {
22 |   rnorm(n, mean = mean)
23 | }
24 | 
25 | is.rayleigh = function () {
26 |   xs = rf1()
27 |   return(mean(g(xs)/f1(xs), na.rm = TRUE))  
28 | }
29 | 
30 | is.norm = function () {
31 |   xs = rf2()
32 |   return(mean(g(xs)/f2(xs), na.rm = TRUE))  
33 | }
34 | 
35 | (theta1 = is.rayleigh())
36 | (theta2 = is.norm())
37 | (truth = 0.400626)


--------------------------------------------------------------------------------
/rizzo_7.03.R:
--------------------------------------------------------------------------------
 1 | library("bootstrap")
 2 | 
 3 | B = 200
 4 | n = nrow(law)
 5 | 
 6 | theta.hat = cor(law$LSAT, law$GPA)
 7 | theta.hats.b = numeric(B)
 8 | 
 9 | ts = numeric(B)
10 | 
11 | for (b in 1:B) {
12 |   i = sample(x = 1:n, size = n, replace = TRUE)
13 |   law.b = law[i,]
14 |   theta.hats.b[b] = cor(law.b$LSAT, law.b$GPA)
15 |   sd.theta.hats.b = numeric(B)
16 |   
17 |   for(b2 in 1:B) {
18 |     i2 = sample(x = 1:n, size = n, replace = TRUE)
19 |     law.b2 = law.b[i2,]
20 |     sd.theta.hats.b[b2] = cor(law.b2$LSAT, law.b2$GPA)
21 |   }
22 |   
23 |   se.b = sd(sd.theta.hats.b)
24 |   
25 |   ts[b] = (theta.hats.b[b] - theta.hat) / se.b
26 | }
27 | 
28 | alpha = 0.05
29 | ts.ordered = sort(ts)
30 | 
31 | qs = quantile(ts.ordered, probs = c(alpha/2, 1-alpha/2))
32 | 
33 | se.hat = sd(theta.hats.b)
34 | 
35 | (CI = c(theta.hat - qs[2]*se.hat, theta.hat - qs[1]*se.hat))
36 | 
37 | hist(ts, breaks = 100, xlim = c(-5, 10))


--------------------------------------------------------------------------------
/rizzo_7.04.R:
--------------------------------------------------------------------------------
 1 | library(boot)
 2 | hours = aircondit$hours
 3 | n = length(hours)
 4 | 
 5 | # MLE yeilds:
 6 | mle.lambda = function (values) {
 7 |   return(length(values)/sum(values))
 8 | }
 9 | 
10 | lambda.hat = mle.lambda(hours)
11 | 
12 | lambda.hats.b = numeric(B)
13 | 
14 | B = 200
15 | for (b in 1:B) {
16 |   i = sample(1:n, n, replace = TRUE)
17 |   hours.b = hours[i]
18 |   lambda.hats.b[b] = mle.lambda(hours.b)
19 | }
20 | 
21 | lambda.hats.b.mean = mean(lambda.hats.b)
22 | 
23 | (bias = lambda.hats.b.mean - lambda.hat)


--------------------------------------------------------------------------------
/rizzo_7.05.R:
--------------------------------------------------------------------------------
 1 | library(boot)
 2 | hours = aircondit$hours
 3 | n = length(hours)
 4 | 
 5 | # MLE yeilds:
 6 | mle.lambda = function (values) {
 7 |   return(length(values)/sum(values))
 8 | }
 9 | 
10 | time.b = numeric(B)
11 | ts = numeric(B)
12 | 
13 | time.hat = 1/mle.lambda(hours)
14 | 
15 | B = 200
16 | for (b in 1:B) {
17 |   i = sample(1:n, n, replace = TRUE)
18 |   hours.b = hours[i]
19 |   time.b[b] = 1/mle.lambda(hours.b)
20 |   
21 |   times.b2 = numeric(B)
22 |   
23 |   # compute bootstrap ts for later use.
24 |   for (b2 in 1:B) {
25 |     i2 = sample(1:n, n, replace = TRUE)
26 |     hours.b2 = hours.b[i2]
27 |     times.b2[b2] = 1/mle.lambda(hours.b2)
28 |   }
29 |   
30 |   ts[b] = (time.b[b] - time.hat) / sd(times.b2)
31 | }
32 | 
33 | se.hat = sd(time.b)
34 | alpha = 0.05;
35 | q.probs = c(alpha/2, 1-alpha/2)
36 | 
37 | setCINames = function (object) {
38 |   return(setNames(object, c(paste((alpha/2)*100, '%'), paste((1-alpha/2)*100, '%'))))
39 | }
40 | 
41 | # plot observed statistic 1/lambda, as well as t values for comparison with the bootstrap t CI.
42 | par(mfrow=c(1,2))
43 | hist(time.b, breaks = 100)
44 | hist(ts, breaks = 100)
45 | 
46 | # standard normal.
47 | q = qnorm(1-alpha/2)
48 | ci.sn = time.hat + c(-1,1)*q*se.hat
49 | (ci.sn = setCINames(ci.sn))
50 | 
51 | # basic boostrap.
52 | qs.time.hat = quantile(x = time.b, p = q.probs)
53 | ci.basic = rev(2*time.hat - qs.time.hat)
54 | (ci.basic = setCINames (object = ci.basic))
55 | 
56 | # percentile.
57 | (ci.percentile = qs.time.hat)
58 | 
59 | # bootstrap t.
60 | qs.t = quantile(x = ts, p = q.probs)
61 | (ci.t = setCINames(rev(time.hat - qs.t*se.hat)))


--------------------------------------------------------------------------------
/rizzo_7.06.R:
--------------------------------------------------------------------------------
 1 | library(bootstrap)
 2 | 
 3 | pairs(scor)
 4 | cor(scor)
 5 | 
 6 | n = nrow(scor)
 7 | B = 200
 8 | 
 9 | ro.12 = numeric(B)
10 | ro.34 = numeric(B)
11 | ro.35 = numeric(B)
12 | ro.45 = numeric(B)
13 | 
14 | for (b in 1:B) {
15 |   i = sample(1:n, n, replace = TRUE)
16 |   scor.b = scor[i,]
17 |   ro.12[b] = cor(scor.b$mec, scor.b$vec)
18 |   ro.34[b] = cor(scor.b$alg, scor.b$ana)
19 |   ro.35[b] = cor(scor.b$alg, scor.b$sta)
20 |   ro.45[b] = cor(scor.b$ana, scor.b$sta)
21 | }
22 | 
23 | (se.12 = sd(ro.12))
24 | (se.34 = sd(ro.34))
25 | (se.35 = sd(ro.35))
26 | (se.45 = sd(ro.45))


--------------------------------------------------------------------------------
/rizzo_7.07.R:
--------------------------------------------------------------------------------
 1 | library(bootstrap)
 2 | 
 3 | 
 4 | par(mfrow=c(5,1))
 5 | breaks = 100
 6 | hist(scor$mec, breaks = breaks)
 7 | hist(scor$vec, breaks = breaks)
 8 | hist(scor$alg, breaks = breaks)
 9 | hist(scor$ana, breaks = breaks)
10 | hist(scor$sta, breaks = breaks)


--------------------------------------------------------------------------------
/rizzo_7.A.R:
--------------------------------------------------------------------------------
 1 | mu = 4
 2 | sigma = 3
 3 | n = 1000
 4 | 
 5 | xs = rnorm(n, mean = mu, sd = sqrt(sigma))
 6 | 
 7 | # sample mean.
 8 | mu.hat = mean(xs)
 9 | 
10 | # compute bootstrap sample of the mean.
11 | B = 200
12 | mu.hats.b = numeric(B)
13 | ts = numeric(B)
14 | 
15 | for (b in 1:B) {
16 |   i = sample(1:n, n, replace = TRUE)
17 |   xs.b = xs[i]
18 |   mu.hats.b[b] = mean(xs.b)
19 |   
20 |   for (b2 in 1:B) {
21 |     i2 = sample(1:n, n, replace = TRUE)
22 |     ts[b] = (mu.hats.b[b] - mu.hat) / sd(xs.b[i])
23 |   }
24 | }
25 | 
26 | se.hat = sd(mu.hats.b)
27 | 
28 | # visualize.
29 | par(mfrow = c(2,1))
30 | hist(mu.hats.b, breaks = 100)
31 | hist(ts, breaks = 100)
32 | 
33 | # compute CIs.
34 | alpha = 0.05
35 | probs = c(alpha/2, 1-alpha/2)
36 | 
37 | names = sapply(probs, function(p) paste(p*100, '%', sep = ''))
38 | setCINames = function (object) {
39 |   return (setNames(object = object, names))
40 | }
41 | 
42 | # standard normal.
43 | qs.norm = qnorm(probs)
44 | ci.sn = setCINames(mu.hat - rev(qs.norm)*se.hat)
45 | 
46 | # basic bootstrap.
47 | qs.mu.hats.b = quantile(x = mu.hats.b, probs = probs)
48 | ci.basic = setCINames(2*mu.hat - rev(qs.mu.hats.b))
49 | 
50 | # percentile.
51 | ci.percentile = setCINames(quantile(mu.hats.b, probs = probs))
52 | 
53 | # bootstrap t.
54 | qs.ts = quantile(ts, probs = probs)
55 | ci.t = setCINames(mu.hat - rev(qs.ts)*se.hat)
56 | 
57 | # set up data for the MC study.
58 | mc.study = data.frame(rbind(ci.sn, ci.basic, ci.percentile, ci.t))
59 | colnames(mc.study) = names
60 | mc.study['miss.left'] = rep.int(0, times = nrow(mc.study))
61 | mc.study['miss.right'] = rep.int(0, times = nrow(mc.study))
62 | 
63 | # compute coverage rates for sample mean when sampling from the normal population xs.
64 | size = n
65 | rep = 10000
66 | miss.l = 0
67 | miss.r = 0
68 | 
69 | for(r in 1:rep) {
70 |   i = sample(1:n, size, replace = TRUE)
71 |   mu.sample = mean (xs[i])
72 |   for(y in 1:nrow(mc.study)) {
73 |     lower = mc.study[y,names[1]]
74 |     upper = mc.study[y,names[2]]
75 |     if (mu.sample < lower) {
76 |       mc.study[y,'miss.left'] = mc.study[y,'miss.left'] + 1
77 |     } else if (mu.sample > upper) {
78 |       mc.study[y,'miss.right'] = mc.study[y,'miss.right'] + 1
79 |     }
80 |   }
81 | }
82 | 
83 | mc.study$miss.left = mc.study$miss.left/rep
84 | mc.study$miss.right = mc.study$miss.right/rep
85 | 
86 | mc.study


--------------------------------------------------------------------------------
/rizzo_7.B.R:
--------------------------------------------------------------------------------
 1 | n = 100
 2 | 
 3 | mc.skewness = function(xs) {
 4 |   
 5 |   # https://en.wikipedia.org/wiki/Skewness#Sample_skewness
 6 |   sample.skewness = function (sample) {
 7 |     mu = mean(sample)
 8 |     n = length(sample)
 9 |     num = 1/n * sum(sapply(sample, function (x) (x - mu)^3))
10 |     denom = sd(sample)^3
11 |     return (num/denom)
12 |   }
13 |   
14 |   theta.hat = sample.skewness(xs)
15 |   
16 |   B = 200
17 |   theta.hats.b = numeric(B)
18 |   
19 |   for (b in 1:B) {
20 |     i = sample(1:n, n, TRUE)
21 |     xs.b = xs[i]
22 |     theta.hats.b[b] = sample.skewness(xs.b)
23 |   }
24 |   
25 |   sd.hat = sd(theta.hats.b)
26 |   
27 |   # visualize.
28 |   par(mfrow = c(1, 1))
29 |   hist(theta.hats.b)
30 |   
31 |   # confidence intervals.
32 |   alpha = 0.05
33 |   probs = c(alpha/2, 1-alpha/2)
34 |   names = sapply(probs, function (p) paste(p*100, '%', sep = ''))
35 |   qs.theta.hats.b = quantile(theta.hats.b, probs)
36 |   
37 |   # standard normal.
38 |   qs.norm = qnorm(probs)
39 |   ci.norm = rev(theta.hat - qs.norm * sd.hat)
40 |   
41 |   # basic bootstrap.
42 |   ci.basic = rev(2*theta.hat - qs.theta.hats.b)
43 |   
44 |   # percentile.
45 |   ci.percentile = qs.theta.hats.b
46 |   
47 |   ci.data = data.frame(rbind(ci.norm, ci.basic, ci.percentile))
48 |   colnames(ci.data) = names
49 |   ci.data['left.miss'] = 0
50 |   ci.data['right.miss'] = 0
51 |   
52 |   # mc study.
53 |   rep = 1000
54 |   
55 |   for (r in 1:rep) {
56 |     i = sample(1:n, n, replace = TRUE)
57 |     skew = sample.skewness(xs[i])
58 |     for (y in 1:nrow(ci.data)) {
59 |       lower = ci.data[y,names[1]]
60 |       upper = ci.data[y,names[2]]
61 |       if (skew < lower) {
62 |         ci.data[y,'left.miss'] = ci.data[y,'left.miss'] + 1
63 |       } else if (skew > upper) {
64 |         ci.data[y,'right.miss'] = ci.data[y,'right.miss'] + 1
65 |       }
66 |     }
67 |   }
68 |   
69 |   ci.data$left.miss = ci.data$left.miss/rep
70 |   ci.data$right.miss = ci.data$right.miss/rep
71 |   
72 |   return(ci.data)
73 | }
74 | 
75 | mean = 3
76 | sd = 4
77 | xs = rnorm(n, mean = mean, sd = sd)
78 | 
79 | mc.skewness(xs)
80 | 
81 | df = 10
82 | xs = rchisq(n, df = df)
83 | 
84 | mc.skewness(xs)


--------------------------------------------------------------------------------
/rizzo_8.1.R:
--------------------------------------------------------------------------------
 1 | attach(chickwts)
 2 | feed1 = "soybean"
 3 | feed2 = "linseed"
 4 | xs = sort(weight[feed == feed1])
 5 | ys = sort(weight[feed == feed2])
 6 | rep = 1000
 7 | 
 8 | library(RVAideMemoire)
 9 | 
10 | zs = c(xs, ys)
11 | n1 = length(xs)
12 | n2 = length(ys)
13 | n = n1 + n2
14 | 
15 | Ts = numeric(rep)
16 | for (i in 1:rep) {
17 |   ks = sample(1:n, n1, replace = FALSE)
18 |   zs1 = zs[ks]
19 |   zs2 = zs[-ks]
20 |   Ts[i] = CvM.test(zs1, zs2)$statistic
21 | }
22 | 
23 | (cvm = CvM.test(x, y))
24 | T.hat = cvm$statistic
25 | (p.hat = mean(abs(T.hat) < abs(Ts)))
26 | 
27 | hist(Ts)


--------------------------------------------------------------------------------
/rizzo_8.2.R:
--------------------------------------------------------------------------------
 1 | soybean = chickwts$weight[chickwts$feed=="soybean"]
 2 | linseed = chickwts$weight[chickwts$feed=="linseed"]
 3 | n = length(soybean)
 4 | m = length(linseed)
 5 | 
 6 | tmp = min(n, m)
 7 | soybean = sort(soybean[1:tmp])
 8 | linseed = sort(linseed[1:tmp])
 9 | 
10 | zs = c(soybean, linseed)
11 | spearman.cor.test = cor.test(x = soybean, y = linseed, method = "spearman")
12 | 
13 | B = 1000
14 | k = length(zs)
15 | 
16 | rhos = numeric(rep)
17 | 
18 | for (b in 1:B) {
19 |   i = sample(1:k, k/2, replace = FALSE)
20 |   xs = zs[i]
21 |   ys = zs[-i]
22 |   rhos[b] = cor(x = xs, y = ys, method = "spearman")
23 | }
24 | 
25 | hist(rhos, breaks = 100)
26 | 
27 | (theta.hat = spearman.cor.test$estimate)
28 | 
29 | spearman.cor.test$p.value
30 | 
31 | (p.hat = mean(abs(rhos) > abs(theta.hat)))
32 | 
33 | (alpha = 0.05)
34 | 
35 | # p.hat < alpha, thus H0 rejected.


--------------------------------------------------------------------------------
/rizzo_8.3.R:
--------------------------------------------------------------------------------
 1 | # WIP: doesn't work.
 2 | 
 3 | n1 = 100
 4 | n2 = 200
 5 | 
 6 | # returns probability for H1, where H0 is equal variances (i.e. sd1 = sd2)
 7 | test.equal.variance = function(xs1, xs2){
 8 |   # let x1 be smaller than x2
 9 |   if (length(x2) < length(x1)) {
10 |     tmp = x1
11 |     x1 = x2
12 |     x2 = tmp
13 |   }
14 |   n1 = length(x1)
15 |   n2 = length(x2)
16 |   
17 |   y = c(x1, x2)
18 |   
19 |   # returns 1 if more than five values from one sample lie outside the range of the other sample.
20 |   # this implies different varianes between the samples.
21 |   count5 = function (x1, x2) {
22 |     stopifnot(length(x1) == length(x2))
23 |     extr1 = sum((x1 < min(x2))) + sum((x1 > max(x2)))
24 |     extr2 = sum((x2 < min(x1))) + sum((x2 > max(x1)))
25 |     
26 |     out = max(extr1, extr2)
27 |     return(as.integer(out > 5))
28 |   }
29 |   
30 |   # run permutation test.
31 |   # if H0 holds, count 5 must return 0 for "most" (depending on desired p-value) sets of 2 equally sized samples from y.
32 |   rep = 1000
33 |   n.min = n1
34 |   n = length(y)
35 |   c5s = numeric(rep)
36 |   size = floor(n / 2)
37 |   
38 |   for (i in 1:rep) {
39 |     k = sample(1:n, size, replace = FALSE)
40 |     y1 = y[k]
41 |     y2 = y[-k]
42 |     # count how many times count 5 test returned 1 (i.e. differend variances)
43 |     c5s[i] = count5(y1, y2)
44 |   }
45 |   
46 |   hist(c5s, probability = TRUE)
47 |   
48 |   # frequency of different variances according to count 5 test.
49 |   return(mean(c5s))
50 | }
51 | 
52 | mean = 0
53 | # test the algorithm for random normally distributed samples of unequal sizes with different chocies of the standard deviation.
54 | test.equal.variance(rnorm(n1, mean, 1), rnorm(n2, mean, 1))
55 | test.equal.variance(rnorm(n1, mean, 10), rnorm(n2, mean, 1))
56 | test.equal.variance(rnorm(n1, mean, 100), rnorm(n2, mean, 1))


--------------------------------------------------------------------------------
/rizzo_8.4.R:
--------------------------------------------------------------------------------
 1 | # the package from the book doesn't seem to exist.
 2 | library(FastKNN)
 3 | library(pdist)
 4 | 
 5 | # returns T.
 6 | rth.nn = function (x, y, r) {
 7 |   n1 = length(x)
 8 |   n2 = length(y)
 9 |   
10 |   z = matrix(c(x,y), ncol = 1)
11 |   n = dim(z)[1]
12 |   
13 |   # distance matrix between z and z.
14 |   distance_matrix = as.matrix(dist(z))
15 |   
16 |   # matrix where row i represents k nearest neighbors of z[i]
17 |   nn = matrix(0, n, r)
18 |   for (i in 1:n) {
19 |     nn[i,] = k.nearest.neighbors(i, distance_matrix, k = r)
20 |   }
21 | 
22 |   block1 = nn[1:n1,]
23 |   block2 = nn[n1+1:n2,]
24 |   
25 |   T = (sum(block1<=n1) + sum(block2>n1))/(n*k)
26 |   
27 |   return (T)
28 | }
29 | 
30 | feed1 = "sunflower"
31 | feed2 = "linseed"
32 | x = chickwts$weight[chickwts$feed == feed1]
33 | y = chickwts$weight[chickwts$feed == feed2]
34 | r = 3
35 | 
36 | rth.nn(x, y, r)


--------------------------------------------------------------------------------
/rizzo_8.A.R:
--------------------------------------------------------------------------------
 1 | # WIP: the description of this exercise makes no sense at all.
 2 | # 499 permutation replicates of what statistics?
 3 | # how can there be 499 replicates when there are 10000 samples?
 4 | # what is a permutation decision?
 5 | 
 6 | library(FastKNN)
 7 | library(pdist)
 8 | library(energy)
 9 | library(mvtnorm)
10 | library(boot)
11 | 
12 | # returns T.
13 | rth.nn = function (dist, xi, sizes, nr.neighbors) {
14 |   n1 = sizes[1]
15 |   n2 = sizes[2]
16 |   n = n1 + n2
17 |   
18 |   perm.dist = dist[xi,]
19 |   
20 |   # matrix where row i represents k nearest neighbors of z[i]
21 |   nn = matrix(0, n, nr.neighbors)
22 |   for (i in 1:n) {
23 |     nn[i,] = k.nearest.neighbors(i, perm.dist, k = nr.neighbors)
24 |   }
25 |   
26 |   block1 = nn[1:n1,]
27 |   block2 = nn[n1+1:n2,]
28 |   
29 |   T = (sum(block1<=n1) + sum(block2>n1))/(n*nr.neighbors)
30 |   
31 |   return (T)
32 | }
33 | 
34 | n1 = 20
35 | n2 = 20
36 | d = 2
37 | a = 2/sqrt(d)
38 | sizes = c(n1, n2)
39 | mean1 = c(0, 0)
40 | mean2 = c(0, 0.5)
41 | Sigma1 = diag(d)
42 | Sigma2 = diag(d)
43 | x = rmvnorm(n = n1, mean = mean1, sigma = Sigma1)
44 | y = rmvnorm(n = n2, mean = mean2, sigma = Sigma2)
45 | z = rbind(x, y)
46 | dist = as.matrix(dist(z))
47 | rep = 999
48 | nr.neighbors = 3
49 | 
50 | e = eqdist.etest(dist, sizes, TRUE)
51 | 
52 | # boot.obj = boot(data = dist, statistic = eqdist.etest, sim = "permutation", R = rep, sizes = sizes, distance = TRUE)
53 | 
54 | boot.obj = boot(data = dist, statistic = rth.nn, sim = "permutation", R = rep, sizes = sizes, nr.neighbors = nr.neighbors)
55 | 
56 | 
57 | if (FALSE){
58 | total.n.x = dim(x)[1]
59 | total.n.y = dim(y)[1]
60 | total.sizes = c(total.n.x, total.n.y)
61 | r = 3
62 | 
63 | T0 = rth.nn(dist, total.sizes, r)
64 | e0 = eqdist.etest(dist, total.sizes, TRUE)$statistic
65 | 
66 | Ts = numeric(rep)
67 | es = numeric(rep)
68 | 
69 | for (i in 1:rep) {
70 |   k1 = sample(1:total.n.x, n1, replace = FALSE)
71 |   k2 = total.n.x + sample(1:total.n.y, n2, replace = FALSE)
72 |   ks = c(k1, k2)
73 |   # shuffle distance matrix according to permutation.
74 |   dist.i = dist[ks,ks]
75 |   Ts[i] = rth.nn(dist.i, sizes, r)
76 |   es[i] = eqdist.etest(dist.i, sizes, TRUE)$statistic
77 | }
78 | 
79 | par(mfrow=c(1,2))
80 | hist(Ts)
81 | hist(es)
82 | 
83 | (p.e = mean(es >= e0))
84 | (p.T = mean(Ts >= T0))
85 | }


--------------------------------------------------------------------------------
/rizzo_9.1.R:
--------------------------------------------------------------------------------
 1 | library(VGAM)
 2 | 
 3 | mc.drayleigh = function (sigma) {
 4 |   
 5 |   dg = function (a, b) {
 6 |     dchisq(a, df = b)
 7 |   }
 8 |   
 9 |   rg = function (df) {
10 |     rchisq(n = 1, df = df)
11 |   }
12 |   
13 |   df = function (x) {
14 |     drayleigh(x, sigma = sigma)
15 |   }
16 |   
17 |   N = 10000
18 |   x = numeric(N)
19 |   x[1] = rg(1)
20 |   u = runif(N)
21 |   k = 0
22 |   for (i in 2:N) {
23 |     xt = x[i-1]
24 |     y = rg(xt)
25 |     r = df(y)*dg(xt, y) / (df(xt)*dg(y, xt))
26 |     if (u[i] <= r) {
27 |       x[i] = y
28 |     } else {
29 |       x[i] = x[i-1]
30 |       k = k + 1
31 |     }
32 |   }
33 |   print(k)
34 |   return(x)
35 | }
36 | 
37 | is = 5000:5500
38 | x1 = mc.drayleigh(4)
39 | x2 = mc.drayleigh(2)
40 | par(mfrow=c(2,1))
41 | plot(is, x1[is], type="l")
42 | plot(is, x2[is], type="l")
43 | par(mfrow=c(1,1))
44 | # less efficient for sigma = 2. longer horizontal paths in the respective plot.


--------------------------------------------------------------------------------
/rizzo_9.10.R:
--------------------------------------------------------------------------------
 1 | library(coda)
 2 | 
 3 | l = 10
 4 | s = c(1,2,3,4)
 5 | k = length(s)
 6 | 
 7 | # rayleigh density.
 8 | sigma = 4
 9 | df = function (y, sigma) {
10 |   if (any(x < 0)) return(0)
11 |   stopifnot(sigma > 0)
12 |   return(y/sigma^2 * exp(-y^2/(2*sigma^2)))
13 | }
14 | 
15 | dg = function (y, xt) {
16 |   dchisq(y, df = xt)
17 | }
18 | 
19 | rg = function (xt) {
20 |   rchisq(1, df = xt)
21 | }
22 | 
23 | mh = function (s, l) {
24 |   x = numeric(l)
25 |   us = runif(l)
26 |   x[1] = s
27 |   for (i in 2:l) {
28 |     xt = x[i-1]
29 |     y = rg(xt)
30 |     res = df(y, sigma)/df(xt, sigma) * dg(xt, y)/dg(y, xt)
31 |     if (us[i] <= res) {
32 |       x[i] = y
33 |     } else {
34 |       x[i] = xt
35 |     }
36 |   }
37 |   return(x)
38 | }
39 | 
40 | xs = matrix(sapply(1:k, function(i) mh(s[i], l)), nrow = k, byrow = TRUE)
41 | 
42 | # visualize the generated samples.
43 | plotHists = function () {
44 |   par(mfrow=c(1,k))
45 |   for (i in 1:k) {
46 |     hist(xs[i,], probability = TRUE, breaks = 100)
47 |     x.axis = seq(min(xs[i,]), max(xs[i,]), by = 0.01)
48 |     lines(x.axis, df(x.axis, sigma))
49 |   }
50 |   par(mfrow=c(1,1))
51 | }
52 | 
53 | plotChains = function () {
54 |   burn = 2000
55 |   is = (burn+1):l
56 |   par(mfrow=c(1,k))
57 |   for (i in 1:k) {
58 |     plot(is, xs[i,is], type="l")
59 |   }
60 |   par(mfrow=c(1,1))
61 | }
62 | 
63 | gelman.rubin = function (psis) {
64 |   psi.means = rowMeans(psis)
65 |   B = n * var(psi.means)
66 |   W = mean(apply(psis, MARGIN = 1, "var"))
67 |   var.hat = (n-1)/n*W + 1/n*B
68 |   return(var.hat/W)
69 | }
70 | 
71 | div = matrix(sapply(1:k, function(i) 1:l), nrow = k, byrow = TRUE)
72 | psis = t(apply(xs, MARGIN = 1, "cumsum")) / div
73 | 
74 | r.hats = sapply(2:l, function(j) gelman.rubin(psis[,1:j]))
75 | 
76 | plot(2:l, r.hats, type="l")
77 | abline(h=1.2)
78 | 
79 | # TODO: use coda library.


--------------------------------------------------------------------------------
/rizzo_9.2.R:
--------------------------------------------------------------------------------
 1 | sigma = 2
 2 | 
 3 | dg = function (a, b) {
 4 |   dgamma(x = a, shape = b, rate = 1)
 5 | }
 6 | 
 7 | rg = function (s) {
 8 |   rgamma(1, shape = s, rate = 1)
 9 | }
10 | 
11 | df = function (x) {
12 |   drayleigh(x, sigma = sigma)
13 | }
14 | 
15 | mh = function(rg, dg, df) {
16 |   N = 10000
17 |   x = numeric(N)
18 |   x[1] = rg(1)
19 |   u = runif(N)
20 |   k = 0
21 |   for (i in 2:N) {
22 |     xt = x[i-1]
23 |     y = rg(xt)
24 |     r = df(y)*dg(xt, y) / (df(xt)*dg(y, xt))
25 |     if (u[i] <= r) {
26 |       x[i] = y
27 |     } else {
28 |       x[i] = x[i-1]
29 |       k = k + 1
30 |     }
31 |   }
32 |   print(k)
33 |   return(x)
34 | }
35 | 
36 | x = mh(rg, dg, df)
37 | is = 5000:5500
38 | plot(is, x[is], type="l")


--------------------------------------------------------------------------------
/rizzo_9.3.R:
--------------------------------------------------------------------------------
 1 | theta = 1
 2 | eta = 0
 3 | N = 10000
 4 | 
 5 | stopifnot(theta > 0)
 6 | 
 7 | df = function(x) {
 8 |   1/(theta*pi*(1+((x-eta)/theta)^2))
 9 | }
10 | 
11 | dg = function(x, df) {
12 |   # dt(x = x, df = df)
13 |   dnorm(x = x, mean = df)
14 | }
15 | 
16 | rg = function(df) {
17 |   rnorm(n = 1, mean = df)
18 |   # rt(n = 1, df = df)
19 | }
20 | 
21 | mh = function (N, df, dg, rg) {
22 |   x = numeric(N)
23 |   x[1] = rg(1)
24 |   k = 0
25 |   u = runif(N)
26 |   for (i in 2:N) {
27 |     xt = x[i-1]
28 |     y = rg(xt)
29 |     r = df(y) * dg(xt, y) / (df(xt) * dg(y, xt))
30 |     if (u[i] <= r) {
31 |       x[i] = y
32 |     } else {
33 |       k = k + 1
34 |       x[i] = xt
35 |     }
36 |   }
37 |   print(k)
38 |   return(x)
39 | }
40 | 
41 | x = mh(N, df, dg, rg)
42 | is = 1001:N
43 | par(mfrow = c(1,2))
44 | plot(is, x[is], type="l")
45 | hist(x, probability = TRUE, breaks = 100)
46 | plot.x = seq(min(x), max(x), 0.01)
47 | lines(plot.x, df(plot.x))
48 | par(mfrow = c(1,1))


--------------------------------------------------------------------------------
/rizzo_9.4.R:
--------------------------------------------------------------------------------
 1 | sds = c(0.5, 1, 2, 4)
 2 | is = 5000:5500
 3 | 
 4 | mc.laplace = function (sd) {
 5 |   N = 10000
 6 |  
 7 |   # standard laplace distribution.
 8 |   df = function (x) {
 9 |     1/2 * exp(-abs(x))
10 |   }
11 |   
12 |   rg = function (mean) {
13 |     rnorm(n = 1, mean = mean, sd = sd)
14 |   }
15 |   
16 |   rw = function (N, df, rg) {
17 |     
18 |     x = numeric(N)
19 |     x[1] = rg(1)
20 |     k = 0
21 |     us = runif(N)
22 |     
23 |     for (i in 2:N) {
24 |       xt = x[i-1]
25 |       y = rg(xt)
26 |       res = df(y) / df(xt)
27 |       if (us[i] <= res) {
28 |         x[i] = y
29 |       } else {
30 |         x[i] = xt
31 |         k = k + 1
32 |       }
33 |     }
34 |     print(k)
35 |     
36 |     return(x)
37 |   }
38 |   
39 |   return(rw(N, df, rg))
40 | }
41 | 
42 | xs = mc.laplace(sd = sds[1])
43 | par(mfrow=c(length(sds), 1))
44 | plot(is, xs[is], type="l", ylab = paste("sd = ", sds[1], sep = ''))
45 | for (i in 2:length(sds)) {
46 |   plot(is, mc.laplace(sd = sds[i])[is], type="l", ylab = paste("sd = ", sds[i], sep = ''))
47 | }
48 | par(mfrow=c(1, 1))


--------------------------------------------------------------------------------
/rizzo_9.5.R:
--------------------------------------------------------------------------------
 1 | b = 0.2
 2 | b.lim = c(0, 0.5)
 3 | days = 250
 4 | secs = 1:5
 5 | m = 5000
 6 | burn = 1000
 7 | ws = c((1:4)/4)
 8 | 
 9 | prob.vector = function (b) {
10 |   return(c(1/3, (1-b)/3, (1-2*b)/3, 2*b/3, b/3))
11 | }
12 | 
13 | i = sample(secs, size = days, prob = ps, replace = TRUE)
14 | win = tabulate(i)
15 | 
16 | ps = prob.vector(b)
17 | 
18 | # attempt to avoid numerical issues when computing the posterior density.
19 | # will break when days is large enough.
20 | posterior = function (x, win) {
21 |   if (x < b.lim[1] || x > b.lim[2]) {
22 |     return(0)
23 |   }
24 |   nums = sapply(split(1:days, rep(1:(length(win)), days/length(win))), function(n) prod(n))
25 |   dens = sapply(win, function(w) factorial(w))
26 |   probs = prob.vector(x) ^ win
27 |   return(prod(nums/dens*probs))
28 | }
29 | 
30 | # try to overcome numerical issues when computing acceptance probability in random walk.
31 | prob.ratio = function (n, d, win) {
32 |   return(prod(prob.vector(n)^win / prob.vector(d)^win))
33 | }
34 | 
35 | rw.b = function (w) {
36 |   x = numeric(m)
37 |   
38 |   u = runif(m)
39 |   v = runif(m, -w, w)
40 |   x[1] = w
41 |   for (i in 2:m) {
42 |     xt = x[i-1]
43 |     y = xt + v[i]
44 |     r = prob.ratio(y, xt, win)
45 |     if (u[i] <= r) {
46 |       x[i] = y
47 |     } else {
48 |       x[i] = xt
49 |     }
50 |   }
51 |   
52 |   return(x)
53 | }
54 | 
55 | xbs = lapply(ws, function(w) rw.b(w))
56 | par(mfrow = c(length(ws), 2))
57 | is = (burn+1):m
58 | for (i in 1:length(ws)) {
59 |   xb = xbs[[i]]
60 |   xb = xb[is]
61 |   xb.seq = seq(min(xb), max(xb), 0.05)
62 |   hist(xb, breaks = 100, probability = TRUE, main = paste('w = ', ws[i], sep = ''))
63 |   # TODO: failed to plot the posterior density.
64 |   lines(xb.seq, sapply(xb.seq, function(x) posterior(x, win)))
65 |   plot(is, xb, type="l")
66 | }
67 | par(mfrow = c(1,1))


--------------------------------------------------------------------------------
/rizzo_9.6.R:
--------------------------------------------------------------------------------
  1 | sizes = c(125, 18, 20, 34)
  2 | size = sum(sizes)
  3 | 
  4 | m = 10000
  5 | burn = 2000
  6 | is = (burn+1):m
  7 | 
  8 | prob.vector = function (theta) {
  9 |   return(c(2 + theta, (1-theta), (1-theta), theta) / 4)
 10 | }
 11 | 
 12 | prob.ratio = function (n, d) {
 13 |   #print(prob.vector(n)^sizes)
 14 |   #print(prob.vector(d)^sizes)
 15 |   return(prod(prob.vector(n)^sizes / prob.vector(d)^sizes))
 16 | }
 17 | 
 18 | # random walk metropolis.
 19 | # using unif(-0.25, 0.25) as step.
 20 | 
 21 | x.rw = numeric(m)
 22 | k.rw = 0
 23 | u = runif(m)
 24 | v = runif(m, -0.25, 0.25)
 25 | x.rw[1] = v[1]
 26 | for (i in 2:m) {
 27 |   xt = x.rw[i-1]
 28 |   y = xt + v[i]
 29 |   r = min(prob.ratio(y, xt), 1)
 30 |   #print('hippedy')
 31 |   #print(xt)
 32 |   #print(y)
 33 |   #print(r)
 34 |   if (!is.nan(r) && u[i] <= r) {
 35 |     x.rw[i] = y
 36 |   } else {
 37 |     k.rw = k.rw + 1
 38 |     x.rw[i] = xt
 39 |   }
 40 | }
 41 | 
 42 | # metropolis hastings
 43 | 
 44 | sd = 0.5
 45 | min = -0.8
 46 | max = 0.8
 47 | 
 48 | rg = function(p) {
 49 |   return(runif(1, min - abs(p), max + abs(p)))
 50 |   # return(rnorm(1, p, sd))
 51 | }
 52 | 
 53 | dg = function(x, p) {
 54 |   return(dunif(x, min - abs(p), max + abs(p)))
 55 |   # return(dnorm(x = x, mean = p, sd = sd))
 56 | }
 57 | 
 58 | x.mh = numeric(m)
 59 | k.mh = 0
 60 | u = runif(m)
 61 | x.mh[1] = rg(0)
 62 | for(i in 2:m) {
 63 |   xt = x.mh[i-1]
 64 |   y = rg(xt)
 65 |   r = min(prob.ratio(y, xt) * dg(xt, y) / dg(y, xt), 1)
 66 |   if (!is.na(r) && u[i] <= r) {
 67 |     x.mh[i] = y
 68 |   } else {
 69 |     x.mh[i] = xt
 70 |     k.mh = k.mh + 1
 71 |   }
 72 | }
 73 | 
 74 | # independence sampler.
 75 | 
 76 | x.i = numeric(m)
 77 | x.i[1] = rg(0)
 78 | k.i = 0
 79 | u = runif(m)
 80 | for (i in 2:m){
 81 |   xt = x.i[i-1]
 82 |   y = rg(0)
 83 |   r = prob.ratio(y, xt) * dg(xt, 0)/dg(y, 0)
 84 |   if (u[i] <= r) {
 85 |     x.i[i] = y
 86 |   } else {
 87 |     x.i[i] = xt
 88 |     k.i = k.i + 1
 89 |   }
 90 | }
 91 | 
 92 | print(k.rw)
 93 | print(k.mh)
 94 | print(k.i)
 95 | 
 96 | par(mfrow = c(3,2))
 97 | 
 98 | xs = as.list(x.rw, x.mh)
 99 | 
100 | x = x.rw[is]
101 | hist(x, probability = TRUE)
102 | plot(is, x, type='l')
103 | 
104 | x = x.mh[is]
105 | hist(x, probability = TRUE)
106 | plot(is, x, type='l')
107 | 
108 | x = x.i[is]
109 | hist(x, probability = TRUE)
110 | plot(is, x, type='l')
111 | 
112 | par(mfrow = c(1,1))


--------------------------------------------------------------------------------
/rizzo_9.7.R:
--------------------------------------------------------------------------------
 1 | m = 5000
 2 | burn = 1000
 3 | 
 4 | x = matrix(0, m, 2)
 5 | 
 6 | rho = 0.9
 7 | mu1 = 0
 8 | mu2 = 0
 9 | sigma1 = 1
10 | sigma2 = 1
11 | s1 = sqrt(1-rho^2)*sigma1
12 | s2 = sqrt(1-rho^2)*sigma2
13 | 
14 | mean12 = function (x2) mu1 + rho*sigma1/sigma2*(x2 - mu2)
15 | mean21 = function (x1) mu2 + rho*sigma2/sigma1*(x1 - mu1)
16 | 
17 | x[1,] = c(mu1, mu2)
18 | 
19 | for (i in 2:m) {
20 |   xt = x[i-1,]
21 |   xt[1] = rnorm(1, mean12(xt[2]), s1)
22 |   xt[2] = rnorm(1, mean21(xt[1]), s2)
23 |   x[i,] = xt
24 | }
25 | 
26 | x = x[(burn+1):m,]
27 | 
28 | x = data.frame(x)
29 | lin.reg = lm(X1 ~ X2, data = x)
30 | 
31 | par(mfrow=c(1,2))
32 | plot(x, cex = 0.5, main = "generated data")
33 | hist(lin.reg$residuals, main = "residuals of linear model")
34 | par(mfrow=c(1,1))


--------------------------------------------------------------------------------
/rizzo_9.8.R:
--------------------------------------------------------------------------------
 1 | n = 100
 2 | a = 30
 3 | b = 60
 4 | 
 5 | df = function (x, y) {
 6 |   # general binomial coefficient
 7 |   gamma(n + 1) / (gamma(x + 1) * gamma(n - x + 1))  * y^(x + a - 1) * (1 - y)^(n - x + b - 1)
 8 | }
 9 | 
10 | m = 10000
11 | d = 2
12 | 
13 | x = matrix(0, nrow = m, ncol = d)
14 | 
15 | for (i in 2:m) {
16 |   xt = x[i-1,]
17 |   xt[1] = rbinom(1, n, xt[2])
18 |   xt[2] = rbeta(1, xt[1] + a, n - xt[1] + b)
19 |   x[i,] = xt
20 | }
21 | 
22 | plot(x, cex = 0.1)
23 | xs = seq(from = min(x[,1]), to = max(x[,1]), length.out = 200)
24 | ys = seq(from = min(x[,2]), to = max(x[,2]), length.out = 200)
25 | zs = t(sapply(xs, function (x) sapply(ys, function (y) df(x, y))))
26 | 
27 | # plot contour of density for verification.
28 | contour(xs, ys, zs, add = TRUE, col = 2)


--------------------------------------------------------------------------------
/rizzo_9.9.R:
--------------------------------------------------------------------------------
 1 | sigmas = c(0.2, 0.5, 2)
 2 | s = c(-10, -5, 5, 10) # starting values.
 3 | l = 15000 # length of one chain.
 4 | k = length(s) # number of chains.
 5 | 
 6 | getMHSample = function (sigma) {
 7 |   
 8 |   # metropolis-hastings sampler to generate a normally distributed sample.
 9 |   mh = function(s, l, sd) {
10 |     df = function(y) {
11 |       return(dnorm(y))
12 |     }
13 |     
14 |     rg = function(xt) {
15 |       return(rnorm(n = 1, mean = xt, sd = sd))
16 |     }
17 |     
18 |     dg = function(y, xt) {
19 |       return(dnorm(x = y, mean = xt, sd = sd))
20 |     }
21 |     
22 |     xs = numeric(l)
23 |     xs[1] = s
24 |     us = runif(l)
25 |     for(i in 2:l) {
26 |       xt = xs[i-1]
27 |       y = rg(xt)
28 |       res = df(y) / df(xt) * dg(xt, y) / dg(y, xt)
29 |       if (us[i] <= res) {
30 |         xs[i] = y
31 |       } else {
32 |         xs[i] = xt
33 |       }
34 |     }
35 |     return(xs)
36 |   }
37 |   
38 |   xs = matrix(sapply(s, function(i) mh(i, l, sigma)), nrow = 4, byrow = TRUE)
39 |   return(xs)
40 | }
41 | 
42 | makePlots = function(sigma) {
43 |   xs = getMHSample(sigma)
44 |   
45 |   # avoid re-computing diagnostic statistics when plotting R-hat.
46 |   getPhis = function (xs) {
47 |     # compute means efficiently.
48 |     phis = t(apply(xs, MARGIN = 1, cumsum))
49 |     div = matrix(sapply(1:nrow(phis), function(r) 1:ncol(phis)), nrow = nrow(phis), byrow = TRUE)
50 |     return(phis/div)
51 |   }
52 |   
53 |   # the rows of X are generated chains.
54 |   gelman.rubin = function (phis) {
55 |     row.means = rowMeans(phis)
56 |     phi.mean = mean(row.means)
57 |     
58 |     B = n * var(row.means)
59 |     
60 |     W = mean(apply(phis, 1, "var"))
61 |     var.hat = (n-1)/n * W + 1/n * B
62 |     return(var.hat/W)
63 |   }
64 |   
65 |   phis = getPhis(xs)
66 |   
67 |   # why omit the plots? i like the plots.
68 |   # plot sequence of phis.
69 |   burn = 2000
70 |   is = (1:l)[(burn+1):l]
71 |   for(i in 1:k) {
72 |     plot(is, phis[i,is], type="l", ylab = paste("sigma = ", sigma, ", x[1] = ", s[i], sep=''))
73 |   }
74 |   
75 |   r.hats = sapply((burn + 1):l, function(j) gelman.rubin(phis[,1:j]))
76 |   
77 |   # plot sequence of r.hats.
78 |   plot(is, r.hats, type="l")
79 |   abline(h = 1.2)
80 | }
81 | 
82 | 
83 | par(mfrow = c(length(sigmas), 5))
84 | sapply(sigmas, function(sigma) makePlots(sigma))
85 | par(mfrow = c(1, 1))
86 | 
87 | 


--------------------------------------------------------------------------------