├── LICENSE ├── README.md ├── compstat_book_ex.R ├── poisson_process.R ├── rizzo_11.01.R ├── rizzo_11.02.R ├── rizzo_11.03.R ├── rizzo_11.04.R ├── rizzo_11.05.R ├── rizzo_11.06.R ├── rizzo_3.19.R ├── rizzo_3.20.R ├── rizzo_5.01.R ├── rizzo_5.02.R ├── rizzo_5.03.R ├── rizzo_5.04.R ├── rizzo_5.05.R ├── rizzo_5.13.R ├── rizzo_5.14.R ├── rizzo_7.03.R ├── rizzo_7.04.R ├── rizzo_7.05.R ├── rizzo_7.06.R ├── rizzo_7.07.R ├── rizzo_7.A.R ├── rizzo_7.B.R ├── rizzo_8.1.R ├── rizzo_8.2.R ├── rizzo_8.3.R ├── rizzo_8.4.R ├── rizzo_8.A.R ├── rizzo_9.1.R ├── rizzo_9.10.R ├── rizzo_9.2.R ├── rizzo_9.3.R ├── rizzo_9.4.R ├── rizzo_9.5.R ├── rizzo_9.6.R ├── rizzo_9.7.R ├── rizzo_9.8.R └── rizzo_9.9.R /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Gerhard Konnerth 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # statistical-computing-with-r 2 | Solution attempts for selected exercises from Rizzo, Maria L. Statistical computing with R. CRC Press, 2007. 3 | 4 | This is work in progress. Proper documentation as well as splitting the code into multiple scripts is planned. 5 | 6 | Please note that parts of the code are inspired from the book. 7 | -------------------------------------------------------------------------------- /compstat_book_ex.R: -------------------------------------------------------------------------------- 1 | #ex 3.1 2 | 3 | two.exp = function (size, lambda, eta) { 4 | r = runif(size) 5 | -1/lambda * log(u / (-exp(lambda * eta)) + 1) 6 | } 7 | 8 | n = 10000 9 | lambda = 3 10 | eta = 0 11 | a = rexp(1000, rate = lambda) 12 | b = two.exp (n, lambda, eta) 13 | 14 | p1 = qplot(a, xlim = c(0,4), binwidth = 0.05) 15 | p2 = qplot(b, xlim = c(0,4), binwidth = 0.05) 16 | 17 | grid.arrange(p1, p2) 18 | 19 | round(x = rbind(quantile(a, probs = seq(0, 1, 0.05)), quantile(b, probs = seq(0, 1, 0.05))), digits = 3) 20 | 21 | # ex 3.2 22 | 23 | library(ggplot2) 24 | library(gridExtra) 25 | library(reshape) 26 | 27 | laplace = function (size) { 28 | u = runif(size) 29 | sapply(u, function (v) if (v > 0.5) { -log(-2*v + 2) } else { log(2*v) }) 30 | } 31 | n = 1000 32 | range = c(-10,10) 33 | step = 0.05 34 | 35 | truth = function (v) { 36 | 1/2*exp(-abs(v)) 37 | } 38 | 39 | x = seq(range[1], range[2], length.out = n) 40 | xy = truth(x) 41 | y = laplace(1000) 42 | 43 | #lines(density(xy)) 44 | 45 | m = cbind(x, xy) 46 | colnames(m) = c("x", "f(x)") 47 | 48 | # qplot(y/n, xlim = range, geom="histogram", binwidth = step) + stat_function(fun = truth) 49 | hist(y, probability = TRUE, breaks = 100, xlim = range) 50 | lines(x, y = xy) 51 | 52 | # ex 3.3 53 | 54 | rpareto = function (size, a, b) { 55 | stopifnot(a > 0 && b > 0) 56 | u = runif(size) 57 | b*(1-u)^(-1/a) 58 | } 59 | 60 | dpareto = function (x, a, b) { 61 | stopifnot(a > 0 && b > 0) 62 | sapply(x, function (v) if (v < b) { 0 } else { a*b^a*v^(-a-1) }) 63 | } 64 | 65 | size = 1000 66 | a = 2 67 | b = 2 68 | sam = rpareto(1000, a, b) 69 | 70 | hist(sam, probability = TRUE, breaks = 100) 71 | ran = range(sam) 72 | x = seq(ran[1], ran[2], 0.01) 73 | y = dpareto(x, a, b) 74 | lines(x = x, y = y) 75 | 76 | # ex 3.4 77 | 78 | rrayleigh = function (size, sigma) { 79 | stopifnot(sigma > 0) 80 | u = runif(size) 81 | sqrt(-2*sigma^2*log(1 - u)) 82 | } 83 | 84 | drayleigh = function (x, sigma) { 85 | stopifnot(sigma > 0) 86 | y = x / (sigma^2) * exp(- x^2 / (2 * sigma^2)) 87 | y[x < 0] = 0 88 | y 89 | } 90 | 91 | size = 1000 92 | library(ggplot2) 93 | library(gridExtra) 94 | 95 | sigmas = 1:5 96 | xs = sapply(sigmas, function (v) rrayleigh(size, v)) 97 | mat = matrix(xs, ncol = length(sigmas), dimnames = list(NULL, sigmas)) 98 | df = data.frame(mat) 99 | ps = lapply(1:ncol(df), function (v) qplot(x = df[,v])) 100 | p1 = qplot(df$X1, main = paste(c("Sigma = ", sigmas[1]), collapse = " "), binwidth = 0.1) + scale_x_continuous(breaks = c(1:range(df$X1)[2])) 101 | p2 = qplot(df$X2, main = paste(c("Sigma = ", sigmas[2]), collapse = " "), binwidth = 0.1) + scale_x_continuous(breaks = c(1:range(df$X2)[2])) 102 | p3 = qplot(df$X3, main = paste(c("Sigma = ", sigmas[3]), collapse = " "), binwidth = 0.1) + scale_x_continuous(breaks = c(1:range(df$X3)[2])) 103 | p4 = qplot(df$X4, main = paste(c("Sigma = ", sigmas[4]), collapse = " "), binwidth = 0.1) + scale_x_continuous(breaks = c(1:range(df$X4)[2])) 104 | p5 = qplot(df$X5, main = paste(c("Sigma = ", sigmas[5]), collapse = " "), binwidth = 0.1) + scale_x_continuous(breaks = c(1:range(df$X5)[2])) 105 | 106 | grid.arrange(p1, p2, p3, p4, p5) 107 | 108 | mode <- function(x) { 109 | ux <- unique(x) 110 | ux[which.max(tabulate(match(x, ux)))] 111 | } 112 | 113 | sapply(df, mode) 114 | 115 | # ex 3.5 116 | 117 | xs = 0:4 118 | ps = c(0.1, 0.2, 0.2, 0.2, 0.3) 119 | size = 100000 120 | 121 | rdisc = function (size, xs, ps) { 122 | u = runif(size) 123 | c = cumsum(ps) 124 | sapply(u, function(v) min(xs[c >= v])) 125 | } 126 | 127 | sam = rdisc(size, xs, ps) 128 | r1 = table(sam) 129 | truth = sample(size = size, x = xs, prob = ps, replace = TRUE) 130 | r2 = table(truth) 131 | # hist(sam, probability = TRUE) 132 | 133 | rel.freq = rbind(r1, r2)/size 134 | 135 | # ex 3.7 136 | 137 | general.acc.rej = function (size, xs, df, dgen, rgen, c) { 138 | acc.rej = function (size, df, rgen, dgen, c) { 139 | ct = 1 140 | n = 1 141 | res = numeric(size) 142 | while(n <= size) { 143 | y = rgen(1) 144 | u = runif(1) 145 | if (u < df(y)/dgen(y)/c) { 146 | res[n] = y 147 | n = n + 1 148 | } 149 | ct = ct + 1 150 | } 151 | #print((n-1)/(ct-1)) 152 | res 153 | } 154 | 155 | 156 | sam = acc.rej(size = size, df = df, rgen = rgen, dgen = dgen, c) 157 | 158 | #ys = df(xs) 159 | 160 | #nrBins = 50 161 | # Compute the maximum value in the histogram for a better visualization. 162 | #bins = seq(min(xs), max(xs), by = dist(range(xs))/nrBins) 163 | #hist.vals = table(cut(x = sam, bins)) 164 | # adapt to a total area of 1 (probability histogram). 165 | #hist.vals = hist.vals/sum(hist.vals) * nrBins 166 | # y limits as maximum of distribution and sample. 167 | #ylim = c(min(c(ys, hist.vals)), max(c(ys, hist.vals))) 168 | 169 | #hist(sam, probability = TRUE, ylim = ylim, breaks = nrBins) 170 | #lines(x = xs, y = ys) 171 | 172 | sam 173 | } 174 | 175 | ex3.7 = function(size, a, b) { 176 | rgen = function (size) { 177 | runif(size, min = u.min, max = u.max) 178 | } 179 | 180 | dgen = function (x) { 181 | dunif(x, min = u.min, max = u.max) 182 | } 183 | 184 | df = function (x) { 185 | dbeta(x = x, shape1 = a, shape2 = b) 186 | } 187 | 188 | xs = seq(0, 1, 0.01) 189 | u.max = max(xs) 190 | u.min = min(xs) 191 | y.max = max(df(xs)) 192 | c = y.max / (u.max - u.min) 193 | 194 | general.acc.rej (size = size, xs = xs, df = df, dgen = dgen, rgen = rgen, c = c) 195 | 196 | } 197 | 198 | a = 3 199 | b = 2 200 | size = 1000 201 | 202 | ex3.7 (size = size, a = a, b = b) 203 | 204 | # ex 3.8 205 | 206 | library(ggplot2) 207 | library(gridExtra) 208 | 209 | lognormal = function (size, mu, sigma) { 210 | exp(rnorm(n = size, mean = mu, sd = sigma)) 211 | } 212 | 213 | mus = c(0, 0, 0, 1, 2, 3) 214 | sigmas = c(0.25, 0.5, 1, 0.25, 0.5, 1) 215 | size = 1000 216 | 217 | params = sapply(1:length(mus), function (v) c(mus[v], sigmas[v])) 218 | params = split(params, rep(1:ncol(params), each = nrow(params))) 219 | 220 | sams = data.frame(lapply(params, function (v) lognormal(size = size, mu = v[1], sigma = v[2]))) 221 | truths = data.frame(lapply(params, function (v) rlnorm(n = size, meanlog = v[1], sdlog = v[2]))) 222 | colnames(sams) = params 223 | colnames(truths) = params 224 | 225 | lims = lapply(1:length(params), function (v) c(min(c(sams[,v], truths[,v])), max(c(sams[,v], truths[,v])))) 226 | 227 | ps.truth = lapply(1:ncol(truths), function (v) ggplot(data = truths, aes(x = truths[,v])) + geom_density() + ggtitle(paste("truth, mean =", params[[v]][1], ", sd =", params[[v]][2], sep = " ")) + scale_x_continuous(limits = lims[[v]])) 228 | ps.sams = lapply(1:ncol(sams), function (v) ggplot(data = sams, aes(x = sams[,v])) + geom_density() + ggtitle(paste("sample, mean =", params[[v]][1], ", sd =", params[[v]][2], sep = " ")) + scale_x_continuous(limits = lims[[v]])) 229 | 230 | do.call("grid.arrange", c(grobs = c(ps.truth, ps.sams), ncol = 2, as.table = FALSE)) 231 | 232 | # ex 3.9 233 | 234 | depanechnikov = function (x) { 235 | 3/4 * (1 - x^2) 236 | } 237 | 238 | repanechnikov = function (size) { 239 | u1 = runif(size, min = -1, max = 1) 240 | u2 = runif(size, min = -1, max = 1) 241 | u3 = runif(size, min = -1, max = 1) 242 | sapply(1:size, function(v) 243 | if (abs(u3[v]) >= abs(u2[v]) && abs(u3[v]) >= abs(u1[v])) u2[v] else u3[v] 244 | ) 245 | } 246 | 247 | size = 1000 248 | sam = repanechnikov(size = size) 249 | xlim = range(sam) 250 | xs = seq(from = xlim[1], to = xlim[2], by = 0.01) 251 | 252 | hist(sam, probability = TRUE) 253 | lines(x = xs, y = depanechnikov(xs)) 254 | 255 | # ex 3.11 256 | 257 | means = c(0, 3) 258 | sds = c(1, 1) 259 | ps = seq(0.1, 0.9, 0.07) 260 | size = 1000 261 | 262 | rmix = function (size, p, means, sds) { 263 | rnorm(size, mean = sample(c(means[1], means[2], replace = TRUE, probs = c(p, 1-p))), sd = sample(c(sds[1], sds[2], replace = TRUE, probs = c(p, 1-p)))) 264 | } 265 | 266 | sams = data.frame(lapply(ps, function (p) rmix(size = size, p = p, means = means, sds = sds))) 267 | colnames(sams) = ps 268 | 269 | plots = lapply(1:ncol(sams), function (v) ggplot(data = sams, aes(x = sams[,v])) + geom_density() + geom_histogram(aes(y = ..density..), bins = 100) + ggtitle(paste("p =", ps[v], sep = " "))) 270 | 271 | 272 | do.call("grid.arrange", c(grobs = c(plots), ncol = 2, as.table = FALSE)) 273 | 274 | # hist(sam, probability = TRUE, ylim = c(0, 0.5)) 275 | # lines(density(sam)) 276 | 277 | # ex 3.12 278 | 279 | size = 1000 280 | r = 4 281 | beta = 2 282 | 283 | rmix = function (size, r, beta) { 284 | rexp(n = size, rgamma(n = size, r, beta)) 285 | } 286 | 287 | sam = rmix(size = size, r = r, beta = beta) 288 | 289 | hist (sam, probability = TRUE, breaks = 100, ylim = c(0, 2)) 290 | # ex 3.13 291 | xs = seq(min(sam), max(sam), 0.01) 292 | lines(x = xs, y = dpareto(xs, a = r, b = beta)) 293 | 294 | # ex 3.14 295 | 296 | mus = c(0, 1, 2) 297 | size = 2000 298 | d = length(mus) 299 | Sigma = matrix(c(1, -0.5, 0.5, -0.5, 2, -0.5, 0.5, -0.5, 3), ncol = 3) 300 | 301 | Zs = matrix(rnorm(n = d * size), ncol = size, nrow = d) 302 | C = t(chol(Sigma)) 303 | 304 | Ys = t(C %*% Zs + mus) 305 | 306 | # pairs(Ys) 307 | 308 | # ex 3.15 309 | 310 | normalize = function (data) { 311 | Sigma = cov(data) 312 | mus = sapply(1:ncol(data), function (v) mean(data[,v])) 313 | A = chol(solve(Sigma)) 314 | Xs = t(A %*% (t(data)-mus)) 315 | Xs 316 | } 317 | 318 | 319 | # ex 3.16 320 | 321 | library(bootstrap) 322 | norm.scor = scor 323 | m1 = normalize(data.matrix(norm.scor[,1:2])) 324 | m2 = normalize(data.matrix(norm.scor[,3:5])) 325 | norm.scor[,1:2] = m1 326 | norm.scor[,3:5] = m2 327 | cov = round(cov(norm.scor), digits=4) 328 | 329 | # ex 3.17 330 | 331 | iter = 30 332 | size = 5000 333 | 334 | a = 1 335 | b = 5 336 | 337 | tt1 = 0 338 | tt2 = 0 339 | 340 | for(i in 1:iter) { 341 | tt1 = tt1 + system.time(ex3.7(size = size, a = a, b = b))["elapsed"] 342 | tt2 = tt2 + system.time(rbeta(n = 1000, shape1 = a, shape2 = b))["elapsed"] 343 | } 344 | 345 | tt1 346 | tt2 347 | 348 | # ex 3.18 349 | 350 | my.rwishart = function (Sigma, n) { 351 | stopifnot(nrow(Sigma) == ncol(Sigma)) 352 | d = ncol(Sigma) 353 | L = chol(Sigma) 354 | tmp = matrix(rnorm(d^2), ncol = d, nrow = d) 355 | tmp[upper.tri(tmp, diag = TRUE)] = 0 356 | A = diag(sapply(1:d, function (v) rchisq(1, n - v + 1))) + tmp 357 | t(L) %*% A %*% t(A) %*% L 358 | } 359 | 360 | Sigma = matrix(c(1, -0.5, 0.5, -0.5, 2, -0.5, 0.5, -0.5, 3), ncol = 3) 361 | n = 4 362 | size = 1000 363 | 364 | sam = lapply(1:size, function (v) my.rwishart(Sigma, n)) 365 | 366 | (mean = apply(simplify2array(sam), 1:2, mean)) 367 | sd = apply(simplify2array(sam), 1:2, sd) 368 | 369 | (truth.mean = n * Sigma) 370 | 371 | -------------------------------------------------------------------------------- /poisson_process.R: -------------------------------------------------------------------------------- 1 | lambda = 2 2 | t0 = 3 3 | size = 10000 4 | 5 | pp.exp = function (t0) { 6 | Tn = rexp(100, lambda) 7 | Sn = cumsum(Tn) 8 | return(min(which(Sn > t0)) - 1) 9 | } 10 | 11 | n1s = sapply(1:size, function () pp.exp(t0)) 12 | n2s = rpois(size, t0 * lambda) 13 | 14 | rbind(summary(n1s), summary(n2s)) -------------------------------------------------------------------------------- /rizzo_11.01.R: -------------------------------------------------------------------------------- 1 | # useful example value of x. 2 | x = 77.34599 #runif(1, 1, 100) 3 | a = log(exp(x)) 4 | b = exp(log(x)) 5 | 6 | x == a 7 | x == b 8 | a == b 9 | 10 | identical(x, a) 11 | identical(x, b) 12 | identical(b, a) 13 | 14 | all.equal(x, a) 15 | all.equal(x, b) 16 | all.equal(b, a) 17 | -------------------------------------------------------------------------------- /rizzo_11.02.R: -------------------------------------------------------------------------------- 1 | a = 10 2 | b = 20 3 | r = 5 4 | s = 5 5 | 6 | getP = function (a, b, r, s) sum(sapply(max(r-b, 0):(r-1), function(k) choose(r+s-1,k)*choose(a+b-1, a+r-1-k)/choose(a+b+r+s-2,a+r-1))) 7 | 8 | P.numerical = getP(a, b, r, s) 9 | 10 | n = 10000 11 | xs = rbeta(n, shape1 = a, shape2 = b) 12 | ys = rbeta(n, shape1 = r, shape2 = s) 13 | P.mc = mean(xs < ys) 14 | 15 | P.numerical 16 | P.mc -------------------------------------------------------------------------------- /rizzo_11.03.R: -------------------------------------------------------------------------------- 1 | a = c(1,2) 2 | d = length(a) 3 | 4 | getTerm = function (a, k) { 5 | d = length(a) 6 | return((-1)^k * exp((2*k+2)*log(norm(a, type = "2")) - lgamma(k+1) - k*log(2) - log(2*k + 1) - log(2*k + 2) + lgamma((d+1)/2) + lgamma(k + 3/2) - lgamma(k + d/2 + 1))) 7 | } 8 | 9 | n = 400 10 | 11 | getSum = function (a) { 12 | sum(sapply(0:n, function (k) getTerm(a, k))) 13 | } -------------------------------------------------------------------------------- /rizzo_11.04.R: -------------------------------------------------------------------------------- 1 | findIntersection = function (k) { 2 | s.k.minus.one = function (a) { 3 | 1-pt(sqrt(a^2 * (k - 1) / (k - a^2)), df = k-1) 4 | } 5 | 6 | s.k = function (a) { 7 | 1-pt(sqrt(a^2 * k / (k + 1 - a^2)), df = k) 8 | } 9 | 10 | f = function (a) { 11 | s.k(a) - s.k.minus.one(a) 12 | } 13 | 14 | eps = 1e-2 15 | return(uniroot(f, interval = c(eps, sqrt(k)-eps))$root) 16 | } 17 | 18 | rs = sapply(c(4:25, 100, 500, 1000), function (k) { 19 | findIntersection(k) 20 | }) 21 | -------------------------------------------------------------------------------- /rizzo_11.05.R: -------------------------------------------------------------------------------- 1 | # the two sides of the equation are the same expression for a different k. i try to simplify the task by writing them as functions. 2 | solve.equation = function (k) { 3 | 4 | # general integral. 5 | expr.integral = function(u, n) { 6 | (1 + u^2/(n-1))^(-n/2) 7 | } 8 | 9 | # general c_k. 10 | get.c = function (n, a) { 11 | sqrt(a^2 * n / (n + 1 - a^2)) 12 | } 13 | 14 | # left or right side of the equation, depending wheather n = k or n = k + 1. 15 | expr = function (n, a) { 16 | 17 | this.integral = function (u) { 18 | expr.integral(u, n) 19 | } 20 | 21 | c = get.c(n - 1, a) 22 | 23 | 2/sqrt(pi*(n-1)) * exp(lgamma(n/2)-lgamma((n-1)/2)) * integrate(this.integral, lower = 0, upper = c)$value 24 | } 25 | 26 | f = function (a) { 27 | left = expr(k, a) 28 | right = expr(k + 1, a) 29 | return (left - right) 30 | } 31 | 32 | eps = 1e-2 33 | if (f(eps) < 0 && f(sqrt(k) - eps) > 0 || f(eps) > 0 && f(sqrt(k) - eps) < 0) { 34 | r = uniroot(f, interval = c(eps, sqrt(k)-eps))$root 35 | } else { 36 | r = NA 37 | } 38 | return(r) 39 | } 40 | 41 | rs2 = sapply(c(4:25, 100, 500, 1000), function (k) { 42 | solve.equation(k) 43 | }) 44 | 45 | 46 | # plot f for a fixed k for debugging. 47 | # xs = seq(eps, sqrt(k)-eps, 0.01) 48 | # plot(xs, sapply(xs, function(x) f(x)), type="l") -------------------------------------------------------------------------------- /rizzo_11.06.R: -------------------------------------------------------------------------------- 1 | my.dcauchy = function (x, eta, theta) { 2 | stopifnot(theta > 0) 3 | return(1/(theta*pi*(1 + ((x - eta)/theta)^2))) 4 | } 5 | 6 | my.pcauchy = function (x, eta, theta) { 7 | stopifnot(theta > 0) 8 | 9 | integral = function (x) { 10 | my.dcauchy(x, eta, theta) 11 | } 12 | 13 | return(integrate(integral, lower = -Inf, upper = x)$value) 14 | } 15 | 16 | eta = 0 17 | theta = 2 18 | xs = seq(-10, 10) 19 | estimate = sapply(xs, function(x) my.pcauchy(x, eta, theta)) 20 | truth = sapply(xs, function(x) pcauchy(x, eta, theta)) 21 | round(rbind(estimate, truth), 4) -------------------------------------------------------------------------------- /rizzo_3.19.R: -------------------------------------------------------------------------------- 1 | # ex 3.19 2 | p = 0.5 3 | s.min = 0 4 | s.max = 20 5 | s.start = 10 6 | 7 | random.walk = function (s) { 8 | u = runif(1) 9 | n = s[length(s)] 10 | if (u > p) { 11 | n = n -1 12 | } else { 13 | n = n + 1 14 | } 15 | if (n <= s.min || n >= s.max) { 16 | return(c(s, n)) 17 | } else { 18 | return(random.walk(c(s,n))) 19 | } 20 | } 21 | 22 | s = random.walk(s.start) 23 | plot(y = s, x = 1:length(s), type="l") -------------------------------------------------------------------------------- /rizzo_3.20.R: -------------------------------------------------------------------------------- 1 | # ex 3.20 2 | 3 | lambda = 3 4 | shape = 6 5 | scale = 2 6 | size = 10000 7 | eps = 1e-8 8 | 9 | t = 10 10 | 11 | # with probability 1-eps, n or less gamma distributed random variables are needed. 12 | n = qpois(1-eps, lambda = lambda * t) 13 | # sample from the gamma distribution. Not sure if it's ok to use the same sample every time. 14 | # with this, the mean is of by about 10%. 15 | # ys = c(rgamma(n = n, shape = shape, scale = scale)) 16 | 17 | # the interarrival times are exponentially distributed with rate lambda. 18 | pp.exp = function (t0) { 19 | # not sure how many Tn are needed :/ 20 | Tn = rexp(1000, lambda) 21 | Sn = cumsum(Tn) 22 | return(min(which(Sn > t0)) - 1) 23 | } 24 | 25 | # generate N(t) which follow the poisson process. 26 | ns = sapply(1:size, function (i) pp.exp(t)) 27 | # generate X(t) as in the problem description. 28 | xs = sapply(ns, function (n) { 29 | ys = c(rgamma(n = n, shape = shape, scale = scale)) 30 | sum(ys[1:n]) 31 | }) 32 | 33 | # compare mean and variance of 'size' samples of X(t) for verification. 34 | # sample: 35 | (mean.s = mean(xs)) 36 | (var.s = var(xs)) 37 | 38 | # theoretical: 39 | (mean.t = lambda * t * shape * scale) 40 | (var.t = (shape + 1) * shape * scale^2) -------------------------------------------------------------------------------- /rizzo_5.01.R: -------------------------------------------------------------------------------- 1 | # change of variables yields integral from 0 to 1 of sin(x pi/3) pi/3 dx. 2 | 3 | n = 10000 4 | 5 | g = function (x) { 6 | sin(pi/3 * x) * pi / 3 7 | } 8 | 9 | (theta.hat = mean(g(runif(n)))) 10 | (theta = 1/2) -------------------------------------------------------------------------------- /rizzo_5.02.R: -------------------------------------------------------------------------------- 1 | x = seq(-10, 10, 2) 2 | n = 10000 3 | 4 | mc.norm = function (x) { 5 | g = function (t) { 6 | x * exp(-(x*t)^2/2) 7 | } 8 | 9 | gs = g(runif(n)) 10 | 11 | var = var(gs) / n 12 | 13 | se = sqrt(var) 14 | 15 | zero.to.x = 1/sqrt(2*pi) * mean(gs) 16 | 17 | # use the property that the normal density funciton integrates to 1. 18 | theta.hat = 2 * zero.to.x + (1 - 2*zero.to.x) / 2 19 | 20 | # E(theta.hat) = theta. 21 | # 95% CI for N(0,1) is (-1.96, 1.96). 22 | # by the CLT (theta.hat - theta) / se ~ N(0,1). 23 | CI = theta.hat + 1.96 * se 24 | 25 | return(data.frame(theta.hat, var, CI)) 26 | } 27 | -------------------------------------------------------------------------------- /rizzo_5.03.R: -------------------------------------------------------------------------------- 1 | m = 10000 2 | 3 | estimate.unif = function () { 4 | g = function (y) { 5 | exp(-y/2) * 1/2 6 | } 7 | xs = g(runif(m)) 8 | theta.hat = mean(xs) 9 | var = var(xs)/m 10 | return(data.frame(theta.hat, var)) 11 | } 12 | 13 | estimate.unif.min.max = function () { 14 | g = function (y) { 15 | exp(-y) 16 | } 17 | # divide by 2, since dunif(x, 0, 0.5) = 2 for x in (0, 0.5). 18 | xs = g(runif(m, min = 0, max = 0.5)) 19 | var = var(xs)/m 20 | 21 | theta.hat = mean(xs) * 1/2 22 | return(data.frame(theta.hat, var)) 23 | } 24 | 25 | estimate.exp = function () { 26 | # theta.hat = pexp(0.5, rate = 1) - pexp(0, rate = 1) 27 | g = function (y) { 28 | 1/y 29 | } 30 | 31 | y = rexp(m, rate = 1) <= 0.5 32 | var = var(y)/m 33 | 34 | theta.hat = mean(y) 35 | return(data.frame(theta.hat, var)) 36 | } 37 | 38 | estimate.unif() 39 | estimate.unif.min.max() 40 | estimate.exp() 41 | 42 | # exp estimator samples of 0 and 1. therefore, higher variance. -------------------------------------------------------------------------------- /rizzo_5.04.R: -------------------------------------------------------------------------------- 1 | n = 10000 2 | a = 3 3 | b = 3 4 | 5 | # plot the density to get an idea of the problem. 6 | xs = rbeta(n, shape1 = a, shape2 = b) 7 | plot(density(xs)) 8 | 9 | mc.cdf.beta = function (p, shape1, shape2) { 10 | if (p <= 0 || p >= 1) return(0) 11 | us = runif(n) 12 | return(mean(dbeta(p*us, a, b)) * p) 13 | } 14 | 15 | xs = seq(0, 1, 0.1) 16 | 17 | round(rbind(sapply(xs, function (x) mc.cdf.beta(x, a, b)), sapply(xs, function(x) pbeta(x, a, b))), 3) -------------------------------------------------------------------------------- /rizzo_5.05.R: -------------------------------------------------------------------------------- 1 | n = 1000 2 | 3 | mc.pnorm = function (x) { 4 | g = function (t) { 5 | x * exp(-(x*t)^2/2) 6 | } 7 | 8 | gs = g(runif(n)) 9 | var = var(gs)/n 10 | zero.to.x = 1/sqrt(2*pi) * mean(gs) 11 | # use the property that the normal density funciton integrates to 1. 12 | theta.hat = 2 * zero.to.x + (1 - 2*zero.to.x) / 2 13 | return(data.frame(theta.hat, var)) 14 | } 15 | 16 | hit.miss.pnorm = function (p) { 17 | xs = rnorm(n) <= p 18 | var = var(xs)/n 19 | theta.hat = mean(xs) 20 | return(data.frame(theta.hat, var)) 21 | } 22 | 23 | ps = seq(0, 1, 0.1) 24 | mc = data.frame(t(sapply(ps, function (p) mc.pnorm(p)))) 25 | hit.miss = data.frame(t(sapply(ps, function (p) hit.miss.pnorm(p)))) 26 | truth = sapply(ps, function (p) pnorm(p)) 27 | 28 | mc.mean = unlist(mc$theta.hat) 29 | hit.miss.mean = unlist(hit.miss$theta.hat) 30 | round(rbind(ps, mc.mean, hit.miss.mean, truth), 3) 31 | # mc is close to the truth than hit or miss. 32 | 33 | (var.ratio = round(unlist(mc$var) / unlist(hit.miss$var), 3)) 34 | # mc is more efficient than hit miss. -------------------------------------------------------------------------------- /rizzo_5.13.R: -------------------------------------------------------------------------------- 1 | g = function (x) { 2 | x ^ 2 / sqrt(2*pi) * exp(-x^2/2) 3 | } 4 | 5 | xs = seq(0,10,0.1) 6 | 7 | ys.g = g(xs) 8 | ys.rayleigh = drayleigh(xs, sigma = 1.5) 9 | ys.norm = dnorm(xs, mean = 1.5) 10 | lim = max(c(ys.g, ys.rayleigh, ys.norm)) 11 | 12 | plot(xs, ys.g, type = "l", ylim = c(0, lim)) 13 | lines(xs, ys.rayleigh, col="red", ylim = c(0, lim)) 14 | lines(xs, ys.norm, col="blue", ylim = c(0, lim)) 15 | 16 | # f1(x) = drayleigh(x, sigma = 1.5) 17 | # f2(x) = dnorm(x, mean = 1.5) 18 | 19 | # f2 is a little closer to g. should be better. -------------------------------------------------------------------------------- /rizzo_5.14.R: -------------------------------------------------------------------------------- 1 | g = function (x) { 2 | x ^ 2 / sqrt(2*pi) * exp(-x^2/2) * (x > 1) 3 | } 4 | 5 | sigma.rayleigh = 1.5 6 | mean = 1.5 7 | n = 10000 8 | 9 | f1 = function (x) { 10 | drayleigh(x, sigma = sigma.rayleigh) * (x > 1) 11 | } 12 | 13 | f2 = function (x) { 14 | dnorm(x, mean = mean) * (x > 1) 15 | } 16 | 17 | rf1 = function () { 18 | rrayleigh(n, sigma = sigma.rayleigh) 19 | } 20 | 21 | rf2 = function () { 22 | rnorm(n, mean = mean) 23 | } 24 | 25 | is.rayleigh = function () { 26 | xs = rf1() 27 | return(mean(g(xs)/f1(xs), na.rm = TRUE)) 28 | } 29 | 30 | is.norm = function () { 31 | xs = rf2() 32 | return(mean(g(xs)/f2(xs), na.rm = TRUE)) 33 | } 34 | 35 | (theta1 = is.rayleigh()) 36 | (theta2 = is.norm()) 37 | (truth = 0.400626) -------------------------------------------------------------------------------- /rizzo_7.03.R: -------------------------------------------------------------------------------- 1 | library("bootstrap") 2 | 3 | B = 200 4 | n = nrow(law) 5 | 6 | theta.hat = cor(law$LSAT, law$GPA) 7 | theta.hats.b = numeric(B) 8 | 9 | ts = numeric(B) 10 | 11 | for (b in 1:B) { 12 | i = sample(x = 1:n, size = n, replace = TRUE) 13 | law.b = law[i,] 14 | theta.hats.b[b] = cor(law.b$LSAT, law.b$GPA) 15 | sd.theta.hats.b = numeric(B) 16 | 17 | for(b2 in 1:B) { 18 | i2 = sample(x = 1:n, size = n, replace = TRUE) 19 | law.b2 = law.b[i2,] 20 | sd.theta.hats.b[b2] = cor(law.b2$LSAT, law.b2$GPA) 21 | } 22 | 23 | se.b = sd(sd.theta.hats.b) 24 | 25 | ts[b] = (theta.hats.b[b] - theta.hat) / se.b 26 | } 27 | 28 | alpha = 0.05 29 | ts.ordered = sort(ts) 30 | 31 | qs = quantile(ts.ordered, probs = c(alpha/2, 1-alpha/2)) 32 | 33 | se.hat = sd(theta.hats.b) 34 | 35 | (CI = c(theta.hat - qs[2]*se.hat, theta.hat - qs[1]*se.hat)) 36 | 37 | hist(ts, breaks = 100, xlim = c(-5, 10)) -------------------------------------------------------------------------------- /rizzo_7.04.R: -------------------------------------------------------------------------------- 1 | library(boot) 2 | hours = aircondit$hours 3 | n = length(hours) 4 | 5 | # MLE yeilds: 6 | mle.lambda = function (values) { 7 | return(length(values)/sum(values)) 8 | } 9 | 10 | lambda.hat = mle.lambda(hours) 11 | 12 | lambda.hats.b = numeric(B) 13 | 14 | B = 200 15 | for (b in 1:B) { 16 | i = sample(1:n, n, replace = TRUE) 17 | hours.b = hours[i] 18 | lambda.hats.b[b] = mle.lambda(hours.b) 19 | } 20 | 21 | lambda.hats.b.mean = mean(lambda.hats.b) 22 | 23 | (bias = lambda.hats.b.mean - lambda.hat) -------------------------------------------------------------------------------- /rizzo_7.05.R: -------------------------------------------------------------------------------- 1 | library(boot) 2 | hours = aircondit$hours 3 | n = length(hours) 4 | 5 | # MLE yeilds: 6 | mle.lambda = function (values) { 7 | return(length(values)/sum(values)) 8 | } 9 | 10 | time.b = numeric(B) 11 | ts = numeric(B) 12 | 13 | time.hat = 1/mle.lambda(hours) 14 | 15 | B = 200 16 | for (b in 1:B) { 17 | i = sample(1:n, n, replace = TRUE) 18 | hours.b = hours[i] 19 | time.b[b] = 1/mle.lambda(hours.b) 20 | 21 | times.b2 = numeric(B) 22 | 23 | # compute bootstrap ts for later use. 24 | for (b2 in 1:B) { 25 | i2 = sample(1:n, n, replace = TRUE) 26 | hours.b2 = hours.b[i2] 27 | times.b2[b2] = 1/mle.lambda(hours.b2) 28 | } 29 | 30 | ts[b] = (time.b[b] - time.hat) / sd(times.b2) 31 | } 32 | 33 | se.hat = sd(time.b) 34 | alpha = 0.05; 35 | q.probs = c(alpha/2, 1-alpha/2) 36 | 37 | setCINames = function (object) { 38 | return(setNames(object, c(paste((alpha/2)*100, '%'), paste((1-alpha/2)*100, '%')))) 39 | } 40 | 41 | # plot observed statistic 1/lambda, as well as t values for comparison with the bootstrap t CI. 42 | par(mfrow=c(1,2)) 43 | hist(time.b, breaks = 100) 44 | hist(ts, breaks = 100) 45 | 46 | # standard normal. 47 | q = qnorm(1-alpha/2) 48 | ci.sn = time.hat + c(-1,1)*q*se.hat 49 | (ci.sn = setCINames(ci.sn)) 50 | 51 | # basic boostrap. 52 | qs.time.hat = quantile(x = time.b, p = q.probs) 53 | ci.basic = rev(2*time.hat - qs.time.hat) 54 | (ci.basic = setCINames (object = ci.basic)) 55 | 56 | # percentile. 57 | (ci.percentile = qs.time.hat) 58 | 59 | # bootstrap t. 60 | qs.t = quantile(x = ts, p = q.probs) 61 | (ci.t = setCINames(rev(time.hat - qs.t*se.hat))) -------------------------------------------------------------------------------- /rizzo_7.06.R: -------------------------------------------------------------------------------- 1 | library(bootstrap) 2 | 3 | pairs(scor) 4 | cor(scor) 5 | 6 | n = nrow(scor) 7 | B = 200 8 | 9 | ro.12 = numeric(B) 10 | ro.34 = numeric(B) 11 | ro.35 = numeric(B) 12 | ro.45 = numeric(B) 13 | 14 | for (b in 1:B) { 15 | i = sample(1:n, n, replace = TRUE) 16 | scor.b = scor[i,] 17 | ro.12[b] = cor(scor.b$mec, scor.b$vec) 18 | ro.34[b] = cor(scor.b$alg, scor.b$ana) 19 | ro.35[b] = cor(scor.b$alg, scor.b$sta) 20 | ro.45[b] = cor(scor.b$ana, scor.b$sta) 21 | } 22 | 23 | (se.12 = sd(ro.12)) 24 | (se.34 = sd(ro.34)) 25 | (se.35 = sd(ro.35)) 26 | (se.45 = sd(ro.45)) -------------------------------------------------------------------------------- /rizzo_7.07.R: -------------------------------------------------------------------------------- 1 | library(bootstrap) 2 | 3 | 4 | par(mfrow=c(5,1)) 5 | breaks = 100 6 | hist(scor$mec, breaks = breaks) 7 | hist(scor$vec, breaks = breaks) 8 | hist(scor$alg, breaks = breaks) 9 | hist(scor$ana, breaks = breaks) 10 | hist(scor$sta, breaks = breaks) -------------------------------------------------------------------------------- /rizzo_7.A.R: -------------------------------------------------------------------------------- 1 | mu = 4 2 | sigma = 3 3 | n = 1000 4 | 5 | xs = rnorm(n, mean = mu, sd = sqrt(sigma)) 6 | 7 | # sample mean. 8 | mu.hat = mean(xs) 9 | 10 | # compute bootstrap sample of the mean. 11 | B = 200 12 | mu.hats.b = numeric(B) 13 | ts = numeric(B) 14 | 15 | for (b in 1:B) { 16 | i = sample(1:n, n, replace = TRUE) 17 | xs.b = xs[i] 18 | mu.hats.b[b] = mean(xs.b) 19 | 20 | for (b2 in 1:B) { 21 | i2 = sample(1:n, n, replace = TRUE) 22 | ts[b] = (mu.hats.b[b] - mu.hat) / sd(xs.b[i]) 23 | } 24 | } 25 | 26 | se.hat = sd(mu.hats.b) 27 | 28 | # visualize. 29 | par(mfrow = c(2,1)) 30 | hist(mu.hats.b, breaks = 100) 31 | hist(ts, breaks = 100) 32 | 33 | # compute CIs. 34 | alpha = 0.05 35 | probs = c(alpha/2, 1-alpha/2) 36 | 37 | names = sapply(probs, function(p) paste(p*100, '%', sep = '')) 38 | setCINames = function (object) { 39 | return (setNames(object = object, names)) 40 | } 41 | 42 | # standard normal. 43 | qs.norm = qnorm(probs) 44 | ci.sn = setCINames(mu.hat - rev(qs.norm)*se.hat) 45 | 46 | # basic bootstrap. 47 | qs.mu.hats.b = quantile(x = mu.hats.b, probs = probs) 48 | ci.basic = setCINames(2*mu.hat - rev(qs.mu.hats.b)) 49 | 50 | # percentile. 51 | ci.percentile = setCINames(quantile(mu.hats.b, probs = probs)) 52 | 53 | # bootstrap t. 54 | qs.ts = quantile(ts, probs = probs) 55 | ci.t = setCINames(mu.hat - rev(qs.ts)*se.hat) 56 | 57 | # set up data for the MC study. 58 | mc.study = data.frame(rbind(ci.sn, ci.basic, ci.percentile, ci.t)) 59 | colnames(mc.study) = names 60 | mc.study['miss.left'] = rep.int(0, times = nrow(mc.study)) 61 | mc.study['miss.right'] = rep.int(0, times = nrow(mc.study)) 62 | 63 | # compute coverage rates for sample mean when sampling from the normal population xs. 64 | size = n 65 | rep = 10000 66 | miss.l = 0 67 | miss.r = 0 68 | 69 | for(r in 1:rep) { 70 | i = sample(1:n, size, replace = TRUE) 71 | mu.sample = mean (xs[i]) 72 | for(y in 1:nrow(mc.study)) { 73 | lower = mc.study[y,names[1]] 74 | upper = mc.study[y,names[2]] 75 | if (mu.sample < lower) { 76 | mc.study[y,'miss.left'] = mc.study[y,'miss.left'] + 1 77 | } else if (mu.sample > upper) { 78 | mc.study[y,'miss.right'] = mc.study[y,'miss.right'] + 1 79 | } 80 | } 81 | } 82 | 83 | mc.study$miss.left = mc.study$miss.left/rep 84 | mc.study$miss.right = mc.study$miss.right/rep 85 | 86 | mc.study -------------------------------------------------------------------------------- /rizzo_7.B.R: -------------------------------------------------------------------------------- 1 | n = 100 2 | 3 | mc.skewness = function(xs) { 4 | 5 | # https://en.wikipedia.org/wiki/Skewness#Sample_skewness 6 | sample.skewness = function (sample) { 7 | mu = mean(sample) 8 | n = length(sample) 9 | num = 1/n * sum(sapply(sample, function (x) (x - mu)^3)) 10 | denom = sd(sample)^3 11 | return (num/denom) 12 | } 13 | 14 | theta.hat = sample.skewness(xs) 15 | 16 | B = 200 17 | theta.hats.b = numeric(B) 18 | 19 | for (b in 1:B) { 20 | i = sample(1:n, n, TRUE) 21 | xs.b = xs[i] 22 | theta.hats.b[b] = sample.skewness(xs.b) 23 | } 24 | 25 | sd.hat = sd(theta.hats.b) 26 | 27 | # visualize. 28 | par(mfrow = c(1, 1)) 29 | hist(theta.hats.b) 30 | 31 | # confidence intervals. 32 | alpha = 0.05 33 | probs = c(alpha/2, 1-alpha/2) 34 | names = sapply(probs, function (p) paste(p*100, '%', sep = '')) 35 | qs.theta.hats.b = quantile(theta.hats.b, probs) 36 | 37 | # standard normal. 38 | qs.norm = qnorm(probs) 39 | ci.norm = rev(theta.hat - qs.norm * sd.hat) 40 | 41 | # basic bootstrap. 42 | ci.basic = rev(2*theta.hat - qs.theta.hats.b) 43 | 44 | # percentile. 45 | ci.percentile = qs.theta.hats.b 46 | 47 | ci.data = data.frame(rbind(ci.norm, ci.basic, ci.percentile)) 48 | colnames(ci.data) = names 49 | ci.data['left.miss'] = 0 50 | ci.data['right.miss'] = 0 51 | 52 | # mc study. 53 | rep = 1000 54 | 55 | for (r in 1:rep) { 56 | i = sample(1:n, n, replace = TRUE) 57 | skew = sample.skewness(xs[i]) 58 | for (y in 1:nrow(ci.data)) { 59 | lower = ci.data[y,names[1]] 60 | upper = ci.data[y,names[2]] 61 | if (skew < lower) { 62 | ci.data[y,'left.miss'] = ci.data[y,'left.miss'] + 1 63 | } else if (skew > upper) { 64 | ci.data[y,'right.miss'] = ci.data[y,'right.miss'] + 1 65 | } 66 | } 67 | } 68 | 69 | ci.data$left.miss = ci.data$left.miss/rep 70 | ci.data$right.miss = ci.data$right.miss/rep 71 | 72 | return(ci.data) 73 | } 74 | 75 | mean = 3 76 | sd = 4 77 | xs = rnorm(n, mean = mean, sd = sd) 78 | 79 | mc.skewness(xs) 80 | 81 | df = 10 82 | xs = rchisq(n, df = df) 83 | 84 | mc.skewness(xs) -------------------------------------------------------------------------------- /rizzo_8.1.R: -------------------------------------------------------------------------------- 1 | attach(chickwts) 2 | feed1 = "soybean" 3 | feed2 = "linseed" 4 | xs = sort(weight[feed == feed1]) 5 | ys = sort(weight[feed == feed2]) 6 | rep = 1000 7 | 8 | library(RVAideMemoire) 9 | 10 | zs = c(xs, ys) 11 | n1 = length(xs) 12 | n2 = length(ys) 13 | n = n1 + n2 14 | 15 | Ts = numeric(rep) 16 | for (i in 1:rep) { 17 | ks = sample(1:n, n1, replace = FALSE) 18 | zs1 = zs[ks] 19 | zs2 = zs[-ks] 20 | Ts[i] = CvM.test(zs1, zs2)$statistic 21 | } 22 | 23 | (cvm = CvM.test(x, y)) 24 | T.hat = cvm$statistic 25 | (p.hat = mean(abs(T.hat) < abs(Ts))) 26 | 27 | hist(Ts) -------------------------------------------------------------------------------- /rizzo_8.2.R: -------------------------------------------------------------------------------- 1 | soybean = chickwts$weight[chickwts$feed=="soybean"] 2 | linseed = chickwts$weight[chickwts$feed=="linseed"] 3 | n = length(soybean) 4 | m = length(linseed) 5 | 6 | tmp = min(n, m) 7 | soybean = sort(soybean[1:tmp]) 8 | linseed = sort(linseed[1:tmp]) 9 | 10 | zs = c(soybean, linseed) 11 | spearman.cor.test = cor.test(x = soybean, y = linseed, method = "spearman") 12 | 13 | B = 1000 14 | k = length(zs) 15 | 16 | rhos = numeric(rep) 17 | 18 | for (b in 1:B) { 19 | i = sample(1:k, k/2, replace = FALSE) 20 | xs = zs[i] 21 | ys = zs[-i] 22 | rhos[b] = cor(x = xs, y = ys, method = "spearman") 23 | } 24 | 25 | hist(rhos, breaks = 100) 26 | 27 | (theta.hat = spearman.cor.test$estimate) 28 | 29 | spearman.cor.test$p.value 30 | 31 | (p.hat = mean(abs(rhos) > abs(theta.hat))) 32 | 33 | (alpha = 0.05) 34 | 35 | # p.hat < alpha, thus H0 rejected. -------------------------------------------------------------------------------- /rizzo_8.3.R: -------------------------------------------------------------------------------- 1 | # WIP: doesn't work. 2 | 3 | n1 = 100 4 | n2 = 200 5 | 6 | # returns probability for H1, where H0 is equal variances (i.e. sd1 = sd2) 7 | test.equal.variance = function(xs1, xs2){ 8 | # let x1 be smaller than x2 9 | if (length(x2) < length(x1)) { 10 | tmp = x1 11 | x1 = x2 12 | x2 = tmp 13 | } 14 | n1 = length(x1) 15 | n2 = length(x2) 16 | 17 | y = c(x1, x2) 18 | 19 | # returns 1 if more than five values from one sample lie outside the range of the other sample. 20 | # this implies different varianes between the samples. 21 | count5 = function (x1, x2) { 22 | stopifnot(length(x1) == length(x2)) 23 | extr1 = sum((x1 < min(x2))) + sum((x1 > max(x2))) 24 | extr2 = sum((x2 < min(x1))) + sum((x2 > max(x1))) 25 | 26 | out = max(extr1, extr2) 27 | return(as.integer(out > 5)) 28 | } 29 | 30 | # run permutation test. 31 | # if H0 holds, count 5 must return 0 for "most" (depending on desired p-value) sets of 2 equally sized samples from y. 32 | rep = 1000 33 | n.min = n1 34 | n = length(y) 35 | c5s = numeric(rep) 36 | size = floor(n / 2) 37 | 38 | for (i in 1:rep) { 39 | k = sample(1:n, size, replace = FALSE) 40 | y1 = y[k] 41 | y2 = y[-k] 42 | # count how many times count 5 test returned 1 (i.e. differend variances) 43 | c5s[i] = count5(y1, y2) 44 | } 45 | 46 | hist(c5s, probability = TRUE) 47 | 48 | # frequency of different variances according to count 5 test. 49 | return(mean(c5s)) 50 | } 51 | 52 | mean = 0 53 | # test the algorithm for random normally distributed samples of unequal sizes with different chocies of the standard deviation. 54 | test.equal.variance(rnorm(n1, mean, 1), rnorm(n2, mean, 1)) 55 | test.equal.variance(rnorm(n1, mean, 10), rnorm(n2, mean, 1)) 56 | test.equal.variance(rnorm(n1, mean, 100), rnorm(n2, mean, 1)) -------------------------------------------------------------------------------- /rizzo_8.4.R: -------------------------------------------------------------------------------- 1 | # the package from the book doesn't seem to exist. 2 | library(FastKNN) 3 | library(pdist) 4 | 5 | # returns T. 6 | rth.nn = function (x, y, r) { 7 | n1 = length(x) 8 | n2 = length(y) 9 | 10 | z = matrix(c(x,y), ncol = 1) 11 | n = dim(z)[1] 12 | 13 | # distance matrix between z and z. 14 | distance_matrix = as.matrix(dist(z)) 15 | 16 | # matrix where row i represents k nearest neighbors of z[i] 17 | nn = matrix(0, n, r) 18 | for (i in 1:n) { 19 | nn[i,] = k.nearest.neighbors(i, distance_matrix, k = r) 20 | } 21 | 22 | block1 = nn[1:n1,] 23 | block2 = nn[n1+1:n2,] 24 | 25 | T = (sum(block1<=n1) + sum(block2>n1))/(n*k) 26 | 27 | return (T) 28 | } 29 | 30 | feed1 = "sunflower" 31 | feed2 = "linseed" 32 | x = chickwts$weight[chickwts$feed == feed1] 33 | y = chickwts$weight[chickwts$feed == feed2] 34 | r = 3 35 | 36 | rth.nn(x, y, r) -------------------------------------------------------------------------------- /rizzo_8.A.R: -------------------------------------------------------------------------------- 1 | # WIP: the description of this exercise makes no sense at all. 2 | # 499 permutation replicates of what statistics? 3 | # how can there be 499 replicates when there are 10000 samples? 4 | # what is a permutation decision? 5 | 6 | library(FastKNN) 7 | library(pdist) 8 | library(energy) 9 | library(mvtnorm) 10 | library(boot) 11 | 12 | # returns T. 13 | rth.nn = function (dist, xi, sizes, nr.neighbors) { 14 | n1 = sizes[1] 15 | n2 = sizes[2] 16 | n = n1 + n2 17 | 18 | perm.dist = dist[xi,] 19 | 20 | # matrix where row i represents k nearest neighbors of z[i] 21 | nn = matrix(0, n, nr.neighbors) 22 | for (i in 1:n) { 23 | nn[i,] = k.nearest.neighbors(i, perm.dist, k = nr.neighbors) 24 | } 25 | 26 | block1 = nn[1:n1,] 27 | block2 = nn[n1+1:n2,] 28 | 29 | T = (sum(block1<=n1) + sum(block2>n1))/(n*nr.neighbors) 30 | 31 | return (T) 32 | } 33 | 34 | n1 = 20 35 | n2 = 20 36 | d = 2 37 | a = 2/sqrt(d) 38 | sizes = c(n1, n2) 39 | mean1 = c(0, 0) 40 | mean2 = c(0, 0.5) 41 | Sigma1 = diag(d) 42 | Sigma2 = diag(d) 43 | x = rmvnorm(n = n1, mean = mean1, sigma = Sigma1) 44 | y = rmvnorm(n = n2, mean = mean2, sigma = Sigma2) 45 | z = rbind(x, y) 46 | dist = as.matrix(dist(z)) 47 | rep = 999 48 | nr.neighbors = 3 49 | 50 | e = eqdist.etest(dist, sizes, TRUE) 51 | 52 | # boot.obj = boot(data = dist, statistic = eqdist.etest, sim = "permutation", R = rep, sizes = sizes, distance = TRUE) 53 | 54 | boot.obj = boot(data = dist, statistic = rth.nn, sim = "permutation", R = rep, sizes = sizes, nr.neighbors = nr.neighbors) 55 | 56 | 57 | if (FALSE){ 58 | total.n.x = dim(x)[1] 59 | total.n.y = dim(y)[1] 60 | total.sizes = c(total.n.x, total.n.y) 61 | r = 3 62 | 63 | T0 = rth.nn(dist, total.sizes, r) 64 | e0 = eqdist.etest(dist, total.sizes, TRUE)$statistic 65 | 66 | Ts = numeric(rep) 67 | es = numeric(rep) 68 | 69 | for (i in 1:rep) { 70 | k1 = sample(1:total.n.x, n1, replace = FALSE) 71 | k2 = total.n.x + sample(1:total.n.y, n2, replace = FALSE) 72 | ks = c(k1, k2) 73 | # shuffle distance matrix according to permutation. 74 | dist.i = dist[ks,ks] 75 | Ts[i] = rth.nn(dist.i, sizes, r) 76 | es[i] = eqdist.etest(dist.i, sizes, TRUE)$statistic 77 | } 78 | 79 | par(mfrow=c(1,2)) 80 | hist(Ts) 81 | hist(es) 82 | 83 | (p.e = mean(es >= e0)) 84 | (p.T = mean(Ts >= T0)) 85 | } -------------------------------------------------------------------------------- /rizzo_9.1.R: -------------------------------------------------------------------------------- 1 | library(VGAM) 2 | 3 | mc.drayleigh = function (sigma) { 4 | 5 | dg = function (a, b) { 6 | dchisq(a, df = b) 7 | } 8 | 9 | rg = function (df) { 10 | rchisq(n = 1, df = df) 11 | } 12 | 13 | df = function (x) { 14 | drayleigh(x, sigma = sigma) 15 | } 16 | 17 | N = 10000 18 | x = numeric(N) 19 | x[1] = rg(1) 20 | u = runif(N) 21 | k = 0 22 | for (i in 2:N) { 23 | xt = x[i-1] 24 | y = rg(xt) 25 | r = df(y)*dg(xt, y) / (df(xt)*dg(y, xt)) 26 | if (u[i] <= r) { 27 | x[i] = y 28 | } else { 29 | x[i] = x[i-1] 30 | k = k + 1 31 | } 32 | } 33 | print(k) 34 | return(x) 35 | } 36 | 37 | is = 5000:5500 38 | x1 = mc.drayleigh(4) 39 | x2 = mc.drayleigh(2) 40 | par(mfrow=c(2,1)) 41 | plot(is, x1[is], type="l") 42 | plot(is, x2[is], type="l") 43 | par(mfrow=c(1,1)) 44 | # less efficient for sigma = 2. longer horizontal paths in the respective plot. -------------------------------------------------------------------------------- /rizzo_9.10.R: -------------------------------------------------------------------------------- 1 | library(coda) 2 | 3 | l = 10 4 | s = c(1,2,3,4) 5 | k = length(s) 6 | 7 | # rayleigh density. 8 | sigma = 4 9 | df = function (y, sigma) { 10 | if (any(x < 0)) return(0) 11 | stopifnot(sigma > 0) 12 | return(y/sigma^2 * exp(-y^2/(2*sigma^2))) 13 | } 14 | 15 | dg = function (y, xt) { 16 | dchisq(y, df = xt) 17 | } 18 | 19 | rg = function (xt) { 20 | rchisq(1, df = xt) 21 | } 22 | 23 | mh = function (s, l) { 24 | x = numeric(l) 25 | us = runif(l) 26 | x[1] = s 27 | for (i in 2:l) { 28 | xt = x[i-1] 29 | y = rg(xt) 30 | res = df(y, sigma)/df(xt, sigma) * dg(xt, y)/dg(y, xt) 31 | if (us[i] <= res) { 32 | x[i] = y 33 | } else { 34 | x[i] = xt 35 | } 36 | } 37 | return(x) 38 | } 39 | 40 | xs = matrix(sapply(1:k, function(i) mh(s[i], l)), nrow = k, byrow = TRUE) 41 | 42 | # visualize the generated samples. 43 | plotHists = function () { 44 | par(mfrow=c(1,k)) 45 | for (i in 1:k) { 46 | hist(xs[i,], probability = TRUE, breaks = 100) 47 | x.axis = seq(min(xs[i,]), max(xs[i,]), by = 0.01) 48 | lines(x.axis, df(x.axis, sigma)) 49 | } 50 | par(mfrow=c(1,1)) 51 | } 52 | 53 | plotChains = function () { 54 | burn = 2000 55 | is = (burn+1):l 56 | par(mfrow=c(1,k)) 57 | for (i in 1:k) { 58 | plot(is, xs[i,is], type="l") 59 | } 60 | par(mfrow=c(1,1)) 61 | } 62 | 63 | gelman.rubin = function (psis) { 64 | psi.means = rowMeans(psis) 65 | B = n * var(psi.means) 66 | W = mean(apply(psis, MARGIN = 1, "var")) 67 | var.hat = (n-1)/n*W + 1/n*B 68 | return(var.hat/W) 69 | } 70 | 71 | div = matrix(sapply(1:k, function(i) 1:l), nrow = k, byrow = TRUE) 72 | psis = t(apply(xs, MARGIN = 1, "cumsum")) / div 73 | 74 | r.hats = sapply(2:l, function(j) gelman.rubin(psis[,1:j])) 75 | 76 | plot(2:l, r.hats, type="l") 77 | abline(h=1.2) 78 | 79 | # TODO: use coda library. -------------------------------------------------------------------------------- /rizzo_9.2.R: -------------------------------------------------------------------------------- 1 | sigma = 2 2 | 3 | dg = function (a, b) { 4 | dgamma(x = a, shape = b, rate = 1) 5 | } 6 | 7 | rg = function (s) { 8 | rgamma(1, shape = s, rate = 1) 9 | } 10 | 11 | df = function (x) { 12 | drayleigh(x, sigma = sigma) 13 | } 14 | 15 | mh = function(rg, dg, df) { 16 | N = 10000 17 | x = numeric(N) 18 | x[1] = rg(1) 19 | u = runif(N) 20 | k = 0 21 | for (i in 2:N) { 22 | xt = x[i-1] 23 | y = rg(xt) 24 | r = df(y)*dg(xt, y) / (df(xt)*dg(y, xt)) 25 | if (u[i] <= r) { 26 | x[i] = y 27 | } else { 28 | x[i] = x[i-1] 29 | k = k + 1 30 | } 31 | } 32 | print(k) 33 | return(x) 34 | } 35 | 36 | x = mh(rg, dg, df) 37 | is = 5000:5500 38 | plot(is, x[is], type="l") -------------------------------------------------------------------------------- /rizzo_9.3.R: -------------------------------------------------------------------------------- 1 | theta = 1 2 | eta = 0 3 | N = 10000 4 | 5 | stopifnot(theta > 0) 6 | 7 | df = function(x) { 8 | 1/(theta*pi*(1+((x-eta)/theta)^2)) 9 | } 10 | 11 | dg = function(x, df) { 12 | # dt(x = x, df = df) 13 | dnorm(x = x, mean = df) 14 | } 15 | 16 | rg = function(df) { 17 | rnorm(n = 1, mean = df) 18 | # rt(n = 1, df = df) 19 | } 20 | 21 | mh = function (N, df, dg, rg) { 22 | x = numeric(N) 23 | x[1] = rg(1) 24 | k = 0 25 | u = runif(N) 26 | for (i in 2:N) { 27 | xt = x[i-1] 28 | y = rg(xt) 29 | r = df(y) * dg(xt, y) / (df(xt) * dg(y, xt)) 30 | if (u[i] <= r) { 31 | x[i] = y 32 | } else { 33 | k = k + 1 34 | x[i] = xt 35 | } 36 | } 37 | print(k) 38 | return(x) 39 | } 40 | 41 | x = mh(N, df, dg, rg) 42 | is = 1001:N 43 | par(mfrow = c(1,2)) 44 | plot(is, x[is], type="l") 45 | hist(x, probability = TRUE, breaks = 100) 46 | plot.x = seq(min(x), max(x), 0.01) 47 | lines(plot.x, df(plot.x)) 48 | par(mfrow = c(1,1)) -------------------------------------------------------------------------------- /rizzo_9.4.R: -------------------------------------------------------------------------------- 1 | sds = c(0.5, 1, 2, 4) 2 | is = 5000:5500 3 | 4 | mc.laplace = function (sd) { 5 | N = 10000 6 | 7 | # standard laplace distribution. 8 | df = function (x) { 9 | 1/2 * exp(-abs(x)) 10 | } 11 | 12 | rg = function (mean) { 13 | rnorm(n = 1, mean = mean, sd = sd) 14 | } 15 | 16 | rw = function (N, df, rg) { 17 | 18 | x = numeric(N) 19 | x[1] = rg(1) 20 | k = 0 21 | us = runif(N) 22 | 23 | for (i in 2:N) { 24 | xt = x[i-1] 25 | y = rg(xt) 26 | res = df(y) / df(xt) 27 | if (us[i] <= res) { 28 | x[i] = y 29 | } else { 30 | x[i] = xt 31 | k = k + 1 32 | } 33 | } 34 | print(k) 35 | 36 | return(x) 37 | } 38 | 39 | return(rw(N, df, rg)) 40 | } 41 | 42 | xs = mc.laplace(sd = sds[1]) 43 | par(mfrow=c(length(sds), 1)) 44 | plot(is, xs[is], type="l", ylab = paste("sd = ", sds[1], sep = '')) 45 | for (i in 2:length(sds)) { 46 | plot(is, mc.laplace(sd = sds[i])[is], type="l", ylab = paste("sd = ", sds[i], sep = '')) 47 | } 48 | par(mfrow=c(1, 1)) -------------------------------------------------------------------------------- /rizzo_9.5.R: -------------------------------------------------------------------------------- 1 | b = 0.2 2 | b.lim = c(0, 0.5) 3 | days = 250 4 | secs = 1:5 5 | m = 5000 6 | burn = 1000 7 | ws = c((1:4)/4) 8 | 9 | prob.vector = function (b) { 10 | return(c(1/3, (1-b)/3, (1-2*b)/3, 2*b/3, b/3)) 11 | } 12 | 13 | i = sample(secs, size = days, prob = ps, replace = TRUE) 14 | win = tabulate(i) 15 | 16 | ps = prob.vector(b) 17 | 18 | # attempt to avoid numerical issues when computing the posterior density. 19 | # will break when days is large enough. 20 | posterior = function (x, win) { 21 | if (x < b.lim[1] || x > b.lim[2]) { 22 | return(0) 23 | } 24 | nums = sapply(split(1:days, rep(1:(length(win)), days/length(win))), function(n) prod(n)) 25 | dens = sapply(win, function(w) factorial(w)) 26 | probs = prob.vector(x) ^ win 27 | return(prod(nums/dens*probs)) 28 | } 29 | 30 | # try to overcome numerical issues when computing acceptance probability in random walk. 31 | prob.ratio = function (n, d, win) { 32 | return(prod(prob.vector(n)^win / prob.vector(d)^win)) 33 | } 34 | 35 | rw.b = function (w) { 36 | x = numeric(m) 37 | 38 | u = runif(m) 39 | v = runif(m, -w, w) 40 | x[1] = w 41 | for (i in 2:m) { 42 | xt = x[i-1] 43 | y = xt + v[i] 44 | r = prob.ratio(y, xt, win) 45 | if (u[i] <= r) { 46 | x[i] = y 47 | } else { 48 | x[i] = xt 49 | } 50 | } 51 | 52 | return(x) 53 | } 54 | 55 | xbs = lapply(ws, function(w) rw.b(w)) 56 | par(mfrow = c(length(ws), 2)) 57 | is = (burn+1):m 58 | for (i in 1:length(ws)) { 59 | xb = xbs[[i]] 60 | xb = xb[is] 61 | xb.seq = seq(min(xb), max(xb), 0.05) 62 | hist(xb, breaks = 100, probability = TRUE, main = paste('w = ', ws[i], sep = '')) 63 | # TODO: failed to plot the posterior density. 64 | lines(xb.seq, sapply(xb.seq, function(x) posterior(x, win))) 65 | plot(is, xb, type="l") 66 | } 67 | par(mfrow = c(1,1)) -------------------------------------------------------------------------------- /rizzo_9.6.R: -------------------------------------------------------------------------------- 1 | sizes = c(125, 18, 20, 34) 2 | size = sum(sizes) 3 | 4 | m = 10000 5 | burn = 2000 6 | is = (burn+1):m 7 | 8 | prob.vector = function (theta) { 9 | return(c(2 + theta, (1-theta), (1-theta), theta) / 4) 10 | } 11 | 12 | prob.ratio = function (n, d) { 13 | #print(prob.vector(n)^sizes) 14 | #print(prob.vector(d)^sizes) 15 | return(prod(prob.vector(n)^sizes / prob.vector(d)^sizes)) 16 | } 17 | 18 | # random walk metropolis. 19 | # using unif(-0.25, 0.25) as step. 20 | 21 | x.rw = numeric(m) 22 | k.rw = 0 23 | u = runif(m) 24 | v = runif(m, -0.25, 0.25) 25 | x.rw[1] = v[1] 26 | for (i in 2:m) { 27 | xt = x.rw[i-1] 28 | y = xt + v[i] 29 | r = min(prob.ratio(y, xt), 1) 30 | #print('hippedy') 31 | #print(xt) 32 | #print(y) 33 | #print(r) 34 | if (!is.nan(r) && u[i] <= r) { 35 | x.rw[i] = y 36 | } else { 37 | k.rw = k.rw + 1 38 | x.rw[i] = xt 39 | } 40 | } 41 | 42 | # metropolis hastings 43 | 44 | sd = 0.5 45 | min = -0.8 46 | max = 0.8 47 | 48 | rg = function(p) { 49 | return(runif(1, min - abs(p), max + abs(p))) 50 | # return(rnorm(1, p, sd)) 51 | } 52 | 53 | dg = function(x, p) { 54 | return(dunif(x, min - abs(p), max + abs(p))) 55 | # return(dnorm(x = x, mean = p, sd = sd)) 56 | } 57 | 58 | x.mh = numeric(m) 59 | k.mh = 0 60 | u = runif(m) 61 | x.mh[1] = rg(0) 62 | for(i in 2:m) { 63 | xt = x.mh[i-1] 64 | y = rg(xt) 65 | r = min(prob.ratio(y, xt) * dg(xt, y) / dg(y, xt), 1) 66 | if (!is.na(r) && u[i] <= r) { 67 | x.mh[i] = y 68 | } else { 69 | x.mh[i] = xt 70 | k.mh = k.mh + 1 71 | } 72 | } 73 | 74 | # independence sampler. 75 | 76 | x.i = numeric(m) 77 | x.i[1] = rg(0) 78 | k.i = 0 79 | u = runif(m) 80 | for (i in 2:m){ 81 | xt = x.i[i-1] 82 | y = rg(0) 83 | r = prob.ratio(y, xt) * dg(xt, 0)/dg(y, 0) 84 | if (u[i] <= r) { 85 | x.i[i] = y 86 | } else { 87 | x.i[i] = xt 88 | k.i = k.i + 1 89 | } 90 | } 91 | 92 | print(k.rw) 93 | print(k.mh) 94 | print(k.i) 95 | 96 | par(mfrow = c(3,2)) 97 | 98 | xs = as.list(x.rw, x.mh) 99 | 100 | x = x.rw[is] 101 | hist(x, probability = TRUE) 102 | plot(is, x, type='l') 103 | 104 | x = x.mh[is] 105 | hist(x, probability = TRUE) 106 | plot(is, x, type='l') 107 | 108 | x = x.i[is] 109 | hist(x, probability = TRUE) 110 | plot(is, x, type='l') 111 | 112 | par(mfrow = c(1,1)) -------------------------------------------------------------------------------- /rizzo_9.7.R: -------------------------------------------------------------------------------- 1 | m = 5000 2 | burn = 1000 3 | 4 | x = matrix(0, m, 2) 5 | 6 | rho = 0.9 7 | mu1 = 0 8 | mu2 = 0 9 | sigma1 = 1 10 | sigma2 = 1 11 | s1 = sqrt(1-rho^2)*sigma1 12 | s2 = sqrt(1-rho^2)*sigma2 13 | 14 | mean12 = function (x2) mu1 + rho*sigma1/sigma2*(x2 - mu2) 15 | mean21 = function (x1) mu2 + rho*sigma2/sigma1*(x1 - mu1) 16 | 17 | x[1,] = c(mu1, mu2) 18 | 19 | for (i in 2:m) { 20 | xt = x[i-1,] 21 | xt[1] = rnorm(1, mean12(xt[2]), s1) 22 | xt[2] = rnorm(1, mean21(xt[1]), s2) 23 | x[i,] = xt 24 | } 25 | 26 | x = x[(burn+1):m,] 27 | 28 | x = data.frame(x) 29 | lin.reg = lm(X1 ~ X2, data = x) 30 | 31 | par(mfrow=c(1,2)) 32 | plot(x, cex = 0.5, main = "generated data") 33 | hist(lin.reg$residuals, main = "residuals of linear model") 34 | par(mfrow=c(1,1)) -------------------------------------------------------------------------------- /rizzo_9.8.R: -------------------------------------------------------------------------------- 1 | n = 100 2 | a = 30 3 | b = 60 4 | 5 | df = function (x, y) { 6 | # general binomial coefficient 7 | gamma(n + 1) / (gamma(x + 1) * gamma(n - x + 1)) * y^(x + a - 1) * (1 - y)^(n - x + b - 1) 8 | } 9 | 10 | m = 10000 11 | d = 2 12 | 13 | x = matrix(0, nrow = m, ncol = d) 14 | 15 | for (i in 2:m) { 16 | xt = x[i-1,] 17 | xt[1] = rbinom(1, n, xt[2]) 18 | xt[2] = rbeta(1, xt[1] + a, n - xt[1] + b) 19 | x[i,] = xt 20 | } 21 | 22 | plot(x, cex = 0.1) 23 | xs = seq(from = min(x[,1]), to = max(x[,1]), length.out = 200) 24 | ys = seq(from = min(x[,2]), to = max(x[,2]), length.out = 200) 25 | zs = t(sapply(xs, function (x) sapply(ys, function (y) df(x, y)))) 26 | 27 | # plot contour of density for verification. 28 | contour(xs, ys, zs, add = TRUE, col = 2) -------------------------------------------------------------------------------- /rizzo_9.9.R: -------------------------------------------------------------------------------- 1 | sigmas = c(0.2, 0.5, 2) 2 | s = c(-10, -5, 5, 10) # starting values. 3 | l = 15000 # length of one chain. 4 | k = length(s) # number of chains. 5 | 6 | getMHSample = function (sigma) { 7 | 8 | # metropolis-hastings sampler to generate a normally distributed sample. 9 | mh = function(s, l, sd) { 10 | df = function(y) { 11 | return(dnorm(y)) 12 | } 13 | 14 | rg = function(xt) { 15 | return(rnorm(n = 1, mean = xt, sd = sd)) 16 | } 17 | 18 | dg = function(y, xt) { 19 | return(dnorm(x = y, mean = xt, sd = sd)) 20 | } 21 | 22 | xs = numeric(l) 23 | xs[1] = s 24 | us = runif(l) 25 | for(i in 2:l) { 26 | xt = xs[i-1] 27 | y = rg(xt) 28 | res = df(y) / df(xt) * dg(xt, y) / dg(y, xt) 29 | if (us[i] <= res) { 30 | xs[i] = y 31 | } else { 32 | xs[i] = xt 33 | } 34 | } 35 | return(xs) 36 | } 37 | 38 | xs = matrix(sapply(s, function(i) mh(i, l, sigma)), nrow = 4, byrow = TRUE) 39 | return(xs) 40 | } 41 | 42 | makePlots = function(sigma) { 43 | xs = getMHSample(sigma) 44 | 45 | # avoid re-computing diagnostic statistics when plotting R-hat. 46 | getPhis = function (xs) { 47 | # compute means efficiently. 48 | phis = t(apply(xs, MARGIN = 1, cumsum)) 49 | div = matrix(sapply(1:nrow(phis), function(r) 1:ncol(phis)), nrow = nrow(phis), byrow = TRUE) 50 | return(phis/div) 51 | } 52 | 53 | # the rows of X are generated chains. 54 | gelman.rubin = function (phis) { 55 | row.means = rowMeans(phis) 56 | phi.mean = mean(row.means) 57 | 58 | B = n * var(row.means) 59 | 60 | W = mean(apply(phis, 1, "var")) 61 | var.hat = (n-1)/n * W + 1/n * B 62 | return(var.hat/W) 63 | } 64 | 65 | phis = getPhis(xs) 66 | 67 | # why omit the plots? i like the plots. 68 | # plot sequence of phis. 69 | burn = 2000 70 | is = (1:l)[(burn+1):l] 71 | for(i in 1:k) { 72 | plot(is, phis[i,is], type="l", ylab = paste("sigma = ", sigma, ", x[1] = ", s[i], sep='')) 73 | } 74 | 75 | r.hats = sapply((burn + 1):l, function(j) gelman.rubin(phis[,1:j])) 76 | 77 | # plot sequence of r.hats. 78 | plot(is, r.hats, type="l") 79 | abline(h = 1.2) 80 | } 81 | 82 | 83 | par(mfrow = c(length(sigmas), 5)) 84 | sapply(sigmas, function(sigma) makePlots(sigma)) 85 | par(mfrow = c(1, 1)) 86 | 87 | --------------------------------------------------------------------------------